diff --git a/CMakeLists.txt b/CMakeLists.txt
index fe45c1f..1ddaef3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,6 +35,14 @@ add_executable(ei_rp2040_firmware
     src/main.cpp
 )
 
+# Define options for different boards
+OPTION(BOARD_MICROMOD "Select MicroMod board" OFF)
+
+# Conditionally define macros based on the selected board
+if (BOARD_MICROMOD)
+    target_compile_definitions(ei_rp2040_firmware PRIVATE BOARD_MICROMOD=1)
+endif()
+
 OPTION(DEFINE_PIN_UART
     "Only use TX/RX pins and not USB UART"
     OFF)
@@ -100,6 +108,7 @@ target_include_directories(ei_rp2040_firmware PRIVATE
     ThirdParty/Seeed_Arduino_UltrasonicRanger
     ThirdParty/Wire/src
     ThirdParty/Arduino_LSM6DSOX/src
+    ThirdParty/LIS2DH12/src
     ThirdParty/rp2040_DHT11_lib
     ThirdParty/PDM/src/include
 )
diff --git a/ThirdParty/LIS2DH12/src/LIS2DH12.cpp b/ThirdParty/LIS2DH12/src/LIS2DH12.cpp
new file mode 100644
index 0000000..4fd4ee6
--- /dev/null
+++ b/ThirdParty/LIS2DH12/src/LIS2DH12.cpp
@@ -0,0 +1,161 @@
+/* Include ----------------------------------------------------------------- */
+#include "LIS2DH12.h"
+
+/* Constant defines -------------------------------------------------------- */
+// Registers
+#define LIS2DH12_STATUS_REG_AUX        0x07U
+#define LIS2DH12_OUT_TEMP_L            0x0CU
+#define LIS2DH12_WHO_AM_I_REG          0X0FU
+#define LIS2DH12_TEMP_CFG_REG          0x1FU
+#define LIS2DH12_CTRL_REG1             0x20U
+#define LIS2DH12_CTRL_REG4             0x23U
+#define LIS2DH12_STATUS_REG            0x27U
+#define LIS2DH12_OUT_X_L               0x28U
+
+// Register masks
+#define LIS2DH12_STATUS_REG_ZYXOR_MASK   0x80u
+#define LIS2DH12_STATUS_REG_AUX_TDA_MASK 0x04u
+
+LIS2DH12Class::LIS2DH12Class(TwoWire& wire, uint8_t slaveAddress) :
+    _wire(&wire),
+    _slaveAddress(slaveAddress)
+{
+}
+
+LIS2DH12Class::~LIS2DH12Class() { /* Nothing to do */ }
+
+int LIS2DH12Class::begin()
+{
+    int8_t type;
+    uint8_t ret;
+    _wire->begin();
+
+    type = readRegister(LIS2DH12_WHO_AM_I_REG);
+    if (type != MOTION_SENSOR_LIS2DH12) {
+        return 0;
+    }
+
+    // Data rate: 100Hz, enable all axis, high-resolution mode
+    writeRegister(LIS2DH12_CTRL_REG1, 0x57);
+    
+    //Enable block data update, full-scale 2g, hr 1
+    writeRegister(LIS2DH12_CTRL_REG4, 0x88);
+
+    //Enable temperature sensor
+    writeRegister(LIS2DH12_TEMP_CFG_REG, 0xC0);
+
+    return type;
+}
+
+void LIS2DH12Class::end()
+{
+    _wire->end();
+}
+
+int LIS2DH12Class::readAcceleration(float& x, float& y, float& z)
+{
+    int16_t data[3];
+
+    if (!readRegisters(LIS2DH12_OUT_X_L, (uint8_t*)data, sizeof(data))) {
+        x = NAN;
+        y = NAN;
+        z = NAN;
+
+        return 0;
+    }
+
+    /* First convert fs2 hr to mg and then to g */
+    x = ((data[0] / 16.0f) * 1.0f) / 1000.0f;
+    y = ((data[1] / 16.0f) * 1.0f) / 1000.0f;
+    z = ((data[2] / 16.0f) * 1.0f) / 1000.0f;
+
+    return 1;
+}
+
+int LIS2DH12Class::accelerationAvailable()
+{
+    uint8_t data;
+    if (readRegisters(LIS2DH12_STATUS_REG, &data, 1) != 1) {
+        return 0;
+    }
+
+    return (data & LIS2DH12_STATUS_REG_ZYXOR_MASK) ? 1 : 0;
+}
+
+float LIS2DH12Class::accelerationSampleRate()
+{
+    return 100.0F;
+}
+
+int LIS2DH12Class::readTemperature(int & temperature_deg)
+{
+    /* Read the raw temperature from the sensor. */
+    int16_t temperature_raw = 0;
+
+    if (readRegisters(LIS2DH12_OUT_TEMP_L, reinterpret_cast<uint8_t*>(&temperature_raw), sizeof(temperature_raw)) != 1) {
+        return 0;
+    }
+
+    temperature_deg = (((float)temperature_raw / 64.0f ) / 4.0f ) + 25.0f;
+
+    return 1;
+}
+
+int LIS2DH12Class::temperatureAvailable()
+{
+    uint8_t data;
+
+    if (readRegisters(LIS2DH12_STATUS_REG_AUX, &data, 1) != 1) {
+        return 0;
+    }
+    return (data & LIS2DH12_STATUS_REG_AUX_TDA_MASK) ? 1 : 0;
+}
+
+int LIS2DH12Class::readRegister(uint8_t address)
+{
+uint8_t value;
+
+if (readRegisters(address, &value, sizeof(value)) != 1) {
+    return -1;
+}
+
+return value;
+}
+
+int LIS2DH12Class::readRegisters(uint8_t address, uint8_t* data, size_t length)
+{
+    if (length > 1)
+    {
+        //For multi byte reads we must set the first bit to 1
+        address |= 0x80;
+    }
+    
+    _wire->beginTransmission(_slaveAddress);
+    _wire->write(address);
+
+    if (_wire->endTransmission(false) != 0) {
+        return -1;
+    }
+
+    if (_wire->requestFrom(_slaveAddress, length) != length) {
+        return 0;
+    }
+
+    for (size_t i = 0; i < length; i++) {
+        *data++ = _wire->read();
+    }
+    return 1;
+}
+
+int LIS2DH12Class::writeRegister(uint8_t address, uint8_t value)
+{
+    _wire->beginTransmission(_slaveAddress);
+    _wire->write(address);
+    _wire->write(value);
+    if (_wire->endTransmission() != 0) {
+        return 0;
+    }
+    return 1;
+}
+
+LIS2DH12Class MOTION(Wire, LIS2DH12_ADDRESS);
diff --git a/ThirdParty/LIS2DH12/src/LIS2DH12.h b/ThirdParty/LIS2DH12/src/LIS2DH12.h
new file mode 100644
index 0000000..83ec317
--- /dev/null
+++ b/ThirdParty/LIS2DH12/src/LIS2DH12.h
@@ -0,0 +1,58 @@
+  /*
+    This file is part of the Arduino_LIS2DH12 library.
+    Copyright (c) 2021 Arduino SA. All rights reserved.
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+  */
+
+  #include <Wire.h>
+  #include <math.h>
+  #include <stdint.h>
+  #include <stdlib.h>
+  #include <string.h>
+  #include <stdio.h>
+
+  #define MOTION_SENSOR_LIS2DH12  0x33
+  #define LIS2DH12_ADDRESS        0x19U
+
+  class LIS2DH12Class {
+    public:
+      LIS2DH12Class(TwoWire& wire, uint8_t slaveAddress);
+      ~LIS2DH12Class();
+
+      int begin();
+      void end();
+
+      // Accelerometer
+      int readAcceleration(float& x, float& y, float& z);
+      float accelerationSampleRate();
+      int accelerationAvailable();
+
+      // Temperature
+      int readTemperature(int & temperature_deg);
+      int temperatureAvailable();
+
+    private:
+      int readRegister(uint8_t address);
+      int readRegisters(uint8_t address, uint8_t* data, size_t length);
+      int writeRegister(uint8_t address, uint8_t value);
+
+
+    private:
+      TwoWire* _wire;
+      uint8_t _slaveAddress;
+  };
+
+  extern LIS2DH12Class MOTION;
diff --git a/ThirdParty/Wire/src/Wire.h b/ThirdParty/Wire/src/Wire.h
index 9db4868..de6e919 100644
--- a/ThirdParty/Wire/src/Wire.h
+++ b/ThirdParty/Wire/src/Wire.h
@@ -46,12 +46,19 @@ constexpr uint32_t __bitset(const int (&a)[N], size_t i = 0U) {
 #define WIRE_BUFFER_SIZE 128
 #endif
 
+#ifdef BOARD_MICROMOD
+#define SDA  4
+#define SCL  5
+#define PIN_WIRE1_SDA  26
+#define PIN_WIRE1_SCL  27
+#else
 // Wire
 #define SDA  12
 #define SCL  13
 
 #define PIN_WIRE1_SDA  6
 #define PIN_WIRE1_SCL  7
+#endif
 
 class TwoWire {
 public:
diff --git a/edge-impulse-sdk/.gitignore b/edge-impulse-sdk/.gitignore
index b071295..ce7014e 100644
--- a/edge-impulse-sdk/.gitignore
+++ b/edge-impulse-sdk/.gitignore
@@ -7,3 +7,7 @@ utensor.lib
 utensor/libutensor.a
 *.o
 *.d
+doc/
+node_modules/
+package-lock.json
+package.json
\ No newline at end of file
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/cachel1_armv7.h b/edge-impulse-sdk/CMSIS/Core/Include/cachel1_armv7.h
index d2c3e22..e8f4002 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/cachel1_armv7.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/cachel1_armv7.h
@@ -1,11 +1,11 @@
 /******************************************************************************
  * @file     cachel1_armv7.h
  * @brief    CMSIS Level 1 Cache API for Armv7-M and later
- * @version  V1.0.0
- * @date     03. March 2020
+ * @version  V1.0.2
+ * @date     22. June 2022
  ******************************************************************************/
 /*
- * Copyright (c) 2020 Arm Limited. All rights reserved.
+ * Copyright (c) 2020-2021 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -48,7 +48,7 @@
 
 #ifndef __SCB_ICACHE_LINE_SIZE
 #define __SCB_ICACHE_LINE_SIZE  32U /*!< Cortex-M7 cache line size is fixed to 32 bytes (8 words). See also register SCB_CCSIDR */
-#endif 
+#endif
 
 /**
   \brief   Enable I-Cache
@@ -112,7 +112,7 @@ __STATIC_FORCEINLINE void SCB_InvalidateICache (void)
   \param[in]   addr    address
   \param[in]   isize   size of memory block (in number of bytes)
 */
-__STATIC_FORCEINLINE void SCB_InvalidateICache_by_Addr (void *addr, int32_t isize)
+__STATIC_FORCEINLINE void SCB_InvalidateICache_by_Addr (volatile void *addr, int32_t isize)
 {
   #if defined (__ICACHE_PRESENT) && (__ICACHE_PRESENT == 1U)
     if ( isize > 0 ) {
@@ -181,9 +181,15 @@ __STATIC_FORCEINLINE void SCB_EnableDCache (void)
 __STATIC_FORCEINLINE void SCB_DisableDCache (void)
 {
   #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
-    uint32_t ccsidr;
-    uint32_t sets;
-    uint32_t ways;
+    struct {
+      uint32_t ccsidr;
+      uint32_t sets;
+      uint32_t ways;
+    } locals
+    #if ((defined(__GNUC__) || defined(__clang__)) && !defined(__OPTIMIZE__))
+       __ALIGNED(__SCB_DCACHE_LINE_SIZE)
+    #endif
+    ;
 
     SCB->CSSELR = 0U;                       /* select Level 1 data cache */
     __DSB();
@@ -191,20 +197,37 @@ __STATIC_FORCEINLINE void SCB_DisableDCache (void)
     SCB->CCR &= ~(uint32_t)SCB_CCR_DC_Msk;  /* disable D-Cache */
     __DSB();
 
-    ccsidr = SCB->CCSIDR;
+    #if ((defined(__GNUC__) || defined(__clang__)) && !defined(__OPTIMIZE__))
+      /*
+       * For the endless loop issue with GCC and clang with O0.
+       * More details, see https://github.com/ARM-software/CMSIS_5/issues/620
+       *
+       * The issue only happens when local variables are in stack (GCC/clang O0). If
+       * local variables are saved in general purpose register, then the function
+       * is OK.
+       *
+       * When local variables are in stack, after disabling the cache, flush the
+       * local variables cache line for data consistency.
+       */
+      /* Clean and invalidate the local variable cache. */
+      SCB->DCCIMVAC = (uint32_t)&locals;
+      __DSB();
+      __ISB();
+    #endif
 
+    locals.ccsidr = SCB->CCSIDR;
                                             /* clean & invalidate D-Cache */
-    sets = (uint32_t)(CCSIDR_SETS(ccsidr));
+    locals.sets = (uint32_t)(CCSIDR_SETS(locals.ccsidr));
     do {
-      ways = (uint32_t)(CCSIDR_WAYS(ccsidr));
+      locals.ways = (uint32_t)(CCSIDR_WAYS(locals.ccsidr));
       do {
-        SCB->DCCISW = (((sets << SCB_DCCISW_SET_Pos) & SCB_DCCISW_SET_Msk) |
-                       ((ways << SCB_DCCISW_WAY_Pos) & SCB_DCCISW_WAY_Msk)  );
+        SCB->DCCISW = (((locals.sets << SCB_DCCISW_SET_Pos) & SCB_DCCISW_SET_Msk) |
+                       ((locals.ways << SCB_DCCISW_WAY_Pos) & SCB_DCCISW_WAY_Msk)  );
         #if defined ( __CC_ARM )
           __schedule_barrier();
         #endif
-      } while (ways-- != 0U);
-    } while(sets-- != 0U);
+      } while (locals.ways-- != 0U);
+    } while(locals.sets-- != 0U);
 
     __DSB();
     __ISB();
@@ -325,13 +348,13 @@ __STATIC_FORCEINLINE void SCB_CleanInvalidateDCache (void)
   \param[in]   addr    address
   \param[in]   dsize   size of memory block (in number of bytes)
 */
-__STATIC_FORCEINLINE void SCB_InvalidateDCache_by_Addr (void *addr, int32_t dsize)
+__STATIC_FORCEINLINE void SCB_InvalidateDCache_by_Addr (volatile void *addr, int32_t dsize)
 {
   #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
-    if ( dsize > 0 ) { 
+    if ( dsize > 0 ) {
        int32_t op_size = dsize + (((uint32_t)addr) & (__SCB_DCACHE_LINE_SIZE - 1U));
       uint32_t op_addr = (uint32_t)addr /* & ~(__SCB_DCACHE_LINE_SIZE - 1U) */;
-    
+
       __DSB();
 
       do {
@@ -355,13 +378,13 @@ __STATIC_FORCEINLINE void SCB_InvalidateDCache_by_Addr (void *addr, int32_t dsiz
   \param[in]   addr    address
   \param[in]   dsize   size of memory block (in number of bytes)
 */
-__STATIC_FORCEINLINE void SCB_CleanDCache_by_Addr (uint32_t *addr, int32_t dsize)
+__STATIC_FORCEINLINE void SCB_CleanDCache_by_Addr (volatile void *addr, int32_t dsize)
 {
   #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
-    if ( dsize > 0 ) { 
+    if ( dsize > 0 ) {
        int32_t op_size = dsize + (((uint32_t)addr) & (__SCB_DCACHE_LINE_SIZE - 1U));
       uint32_t op_addr = (uint32_t)addr /* & ~(__SCB_DCACHE_LINE_SIZE - 1U) */;
-    
+
       __DSB();
 
       do {
@@ -385,13 +408,13 @@ __STATIC_FORCEINLINE void SCB_CleanDCache_by_Addr (uint32_t *addr, int32_t dsize
   \param[in]   addr    address (aligned to 32-byte boundary)
   \param[in]   dsize   size of memory block (in number of bytes)
 */
-__STATIC_FORCEINLINE void SCB_CleanInvalidateDCache_by_Addr (uint32_t *addr, int32_t dsize)
+__STATIC_FORCEINLINE void SCB_CleanInvalidateDCache_by_Addr (volatile void *addr, int32_t dsize)
 {
   #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
-    if ( dsize > 0 ) { 
+    if ( dsize > 0 ) {
        int32_t op_size = dsize + (((uint32_t)addr) & (__SCB_DCACHE_LINE_SIZE - 1U));
       uint32_t op_addr = (uint32_t)addr /* & ~(__SCB_DCACHE_LINE_SIZE - 1U) */;
-    
+
       __DSB();
 
       do {
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/cmsis_armcc.h b/edge-impulse-sdk/CMSIS/Core/Include/cmsis_armcc.h
index ced0a2c..a955d47 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/cmsis_armcc.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/cmsis_armcc.h
@@ -1,8 +1,8 @@
 /**************************************************************************//**
  * @file     cmsis_armcc.h
  * @brief    CMSIS compiler ARMCC (Arm Compiler 5) header file
- * @version  V5.3.0
- * @date     19. February 2021
+ * @version  V5.3.2
+ * @date     27. May 2021
  ******************************************************************************/
 /*
  * Copyright (c) 2009-2021 Arm Limited. All rights reserved.
@@ -131,672 +131,673 @@
 #define __VECTOR_TABLE_ATTRIBUTE  __attribute__((used, section("RESET")))
 #endif
 
-/* ###########################  Core Function Access  ########################### */
-/** \ingroup  CMSIS_Core_FunctionInterface
-    \defgroup CMSIS_Core_RegAccFunctions CMSIS Core Register Access Functions
+/* ##########################  Core Instruction Access  ######################### */
+/** \defgroup CMSIS_Core_InstructionInterface CMSIS Core Instruction Interface
+  Access to dedicated instructions
   @{
- */
+*/
 
 /**
-  \brief   Enable IRQ Interrupts
-  \details Enables IRQ interrupts by clearing the I-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   No Operation
+  \details No Operation does nothing. This instruction can be used for code alignment purposes.
  */
-/* intrinsic void __enable_irq();     */
+#define __NOP                             __nop
 
 
 /**
-  \brief   Disable IRQ Interrupts
-  \details Disables IRQ interrupts by setting the I-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   Wait For Interrupt
+  \details Wait For Interrupt is a hint instruction that suspends execution until one of a number of events occurs.
  */
-/* intrinsic void __disable_irq();    */
+#define __WFI                             __wfi
+
 
 /**
-  \brief   Get Control Register
-  \details Returns the content of the Control Register.
-  \return               Control Register value
+  \brief   Wait For Event
+  \details Wait For Event is a hint instruction that permits the processor to enter
+           a low-power state until one of a number of events occurs.
  */
-__STATIC_INLINE uint32_t __get_CONTROL(void)
-{
-  register uint32_t __regControl         __ASM("control");
-  return(__regControl);
-}
+#define __WFE                             __wfe
 
 
 /**
-  \brief   Set Control Register
-  \details Writes the given value to the Control Register.
-  \param [in]    control  Control Register value to set
+  \brief   Send Event
+  \details Send Event is a hint instruction. It causes an event to be signaled to the CPU.
  */
-__STATIC_INLINE void __set_CONTROL(uint32_t control)
-{
-  register uint32_t __regControl         __ASM("control");
-  __regControl = control;
-}
+#define __SEV                             __sev
 
 
 /**
-  \brief   Get IPSR Register
-  \details Returns the content of the IPSR Register.
-  \return               IPSR Register value
+  \brief   Instruction Synchronization Barrier
+  \details Instruction Synchronization Barrier flushes the pipeline in the processor,
+           so that all instructions following the ISB are fetched from cache or memory,
+           after the instruction has been completed.
  */
-__STATIC_INLINE uint32_t __get_IPSR(void)
-{
-  register uint32_t __regIPSR          __ASM("ipsr");
-  return(__regIPSR);
-}
+#define __ISB()                           __isb(0xF)
 
+/**
+  \brief   Data Synchronization Barrier
+  \details Acts as a special kind of Data Memory Barrier.
+           It completes when all explicit memory accesses before this instruction complete.
+ */
+#define __DSB()                           __dsb(0xF)
 
 /**
-  \brief   Get APSR Register
-  \details Returns the content of the APSR Register.
-  \return               APSR Register value
+  \brief   Data Memory Barrier
+  \details Ensures the apparent order of the explicit memory operations before
+           and after the instruction, without ensuring their completion.
  */
-__STATIC_INLINE uint32_t __get_APSR(void)
-{
-  register uint32_t __regAPSR          __ASM("apsr");
-  return(__regAPSR);
-}
+#define __DMB()                           __dmb(0xF)
 
 
 /**
-  \brief   Get xPSR Register
-  \details Returns the content of the xPSR Register.
-  \return               xPSR Register value
+  \brief   Reverse byte order (32 bit)
+  \details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_INLINE uint32_t __get_xPSR(void)
-{
-  register uint32_t __regXPSR          __ASM("xpsr");
-  return(__regXPSR);
-}
+#define __REV                             __rev
 
 
 /**
-  \brief   Get Process Stack Pointer
-  \details Returns the current value of the Process Stack Pointer (PSP).
-  \return               PSP Register value
+  \brief   Reverse byte order (16 bit)
+  \details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_INLINE uint32_t __get_PSP(void)
+#ifndef __NO_EMBEDDED_ASM
+__attribute__((section(".rev16_text"))) __STATIC_INLINE __ASM uint32_t __REV16(uint32_t value)
 {
-  register uint32_t __regProcessStackPointer  __ASM("psp");
-  return(__regProcessStackPointer);
+  rev16 r0, r0
+  bx lr
 }
+#endif
 
 
 /**
-  \brief   Set Process Stack Pointer
-  \details Assigns the given value to the Process Stack Pointer (PSP).
-  \param [in]    topOfProcStack  Process Stack Pointer value to set
+  \brief   Reverse byte order (16 bit)
+  \details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_INLINE void __set_PSP(uint32_t topOfProcStack)
+#ifndef __NO_EMBEDDED_ASM
+__attribute__((section(".revsh_text"))) __STATIC_INLINE __ASM int16_t __REVSH(int16_t value)
 {
-  register uint32_t __regProcessStackPointer  __ASM("psp");
-  __regProcessStackPointer = topOfProcStack;
+  revsh r0, r0
+  bx lr
 }
+#endif
 
 
 /**
-  \brief   Get Main Stack Pointer
-  \details Returns the current value of the Main Stack Pointer (MSP).
-  \return               MSP Register value
+  \brief   Rotate Right in unsigned value (32 bit)
+  \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
+  \param [in]    op1  Value to rotate
+  \param [in]    op2  Number of Bits to rotate
+  \return               Rotated value
  */
-__STATIC_INLINE uint32_t __get_MSP(void)
-{
-  register uint32_t __regMainStackPointer     __ASM("msp");
-  return(__regMainStackPointer);
-}
+#define __ROR                             __ror
 
 
 /**
-  \brief   Set Main Stack Pointer
-  \details Assigns the given value to the Main Stack Pointer (MSP).
-  \param [in]    topOfMainStack  Main Stack Pointer value to set
+  \brief   Breakpoint
+  \details Causes the processor to enter Debug state.
+           Debug tools can use this to investigate system state when the instruction at a particular address is reached.
+  \param [in]    value  is ignored by the processor.
+                 If required, a debugger can use it to store additional information about the breakpoint.
  */
-__STATIC_INLINE void __set_MSP(uint32_t topOfMainStack)
-{
-  register uint32_t __regMainStackPointer     __ASM("msp");
-  __regMainStackPointer = topOfMainStack;
-}
+#define __BKPT(value)                       __breakpoint(value)
 
 
 /**
-  \brief   Get Priority Mask
-  \details Returns the current state of the priority mask bit from the Priority Mask Register.
-  \return               Priority Mask value
+  \brief   Reverse bit order of value
+  \details Reverses the bit order of the given value.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_INLINE uint32_t __get_PRIMASK(void)
+#if ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__  == 1)) || \
+     (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1))     )
+  #define __RBIT                          __rbit
+#else
+__attribute__((always_inline)) __STATIC_INLINE uint32_t __RBIT(uint32_t value)
 {
-  register uint32_t __regPriMask         __ASM("primask");
-  return(__regPriMask);
+  uint32_t result;
+  uint32_t s = (4U /*sizeof(v)*/ * 8U) - 1U; /* extra shift needed at end */
+
+  result = value;                      /* r will be reversed bits of v; first get LSB of v */
+  for (value >>= 1U; value != 0U; value >>= 1U)
+  {
+    result <<= 1U;
+    result |= value & 1U;
+    s--;
+  }
+  result <<= s;                        /* shift when v's highest bits are zero */
+  return result;
 }
+#endif
 
 
 /**
-  \brief   Set Priority Mask
-  \details Assigns the given value to the Priority Mask Register.
-  \param [in]    priMask  Priority Mask
+  \brief   Count leading zeros
+  \details Counts the number of leading zeros of a data value.
+  \param [in]  value  Value to count the leading zeros
+  \return             number of leading zeros in value
  */
-__STATIC_INLINE void __set_PRIMASK(uint32_t priMask)
-{
-  register uint32_t __regPriMask         __ASM("primask");
-  __regPriMask = (priMask);
-}
+#define __CLZ                             __clz
 
 
 #if ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__  == 1)) || \
      (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1))     )
 
 /**
-  \brief   Enable FIQ
-  \details Enables FIQ interrupts by clearing the F-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   LDR Exclusive (8 bit)
+  \details Executes a exclusive LDR instruction for 8 bit value.
+  \param [in]    ptr  Pointer to data
+  \return             value of type uint8_t at (*ptr)
  */
-#define __enable_fault_irq                __enable_fiq
+#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
+  #define __LDREXB(ptr)                                                        ((uint8_t ) __ldrex(ptr))
+#else
+  #define __LDREXB(ptr)          _Pragma("push") _Pragma("diag_suppress 3731") ((uint8_t ) __ldrex(ptr))  _Pragma("pop")
+#endif
 
 
 /**
-  \brief   Disable FIQ
-  \details Disables FIQ interrupts by setting the F-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   LDR Exclusive (16 bit)
+  \details Executes a exclusive LDR instruction for 16 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint16_t at (*ptr)
  */
-#define __disable_fault_irq               __disable_fiq
+#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
+  #define __LDREXH(ptr)                                                        ((uint16_t) __ldrex(ptr))
+#else
+  #define __LDREXH(ptr)          _Pragma("push") _Pragma("diag_suppress 3731") ((uint16_t) __ldrex(ptr))  _Pragma("pop")
+#endif
 
 
 /**
-  \brief   Get Base Priority
-  \details Returns the current value of the Base Priority register.
-  \return               Base Priority register value
+  \brief   LDR Exclusive (32 bit)
+  \details Executes a exclusive LDR instruction for 32 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint32_t at (*ptr)
  */
-__STATIC_INLINE uint32_t  __get_BASEPRI(void)
-{
-  register uint32_t __regBasePri         __ASM("basepri");
-  return(__regBasePri);
-}
+#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
+  #define __LDREXW(ptr)                                                        ((uint32_t ) __ldrex(ptr))
+#else
+  #define __LDREXW(ptr)          _Pragma("push") _Pragma("diag_suppress 3731") ((uint32_t ) __ldrex(ptr))  _Pragma("pop")
+#endif
 
 
 /**
-  \brief   Set Base Priority
-  \details Assigns the given value to the Base Priority register.
-  \param [in]    basePri  Base Priority value to set
+  \brief   STR Exclusive (8 bit)
+  \details Executes a exclusive STR instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-__STATIC_INLINE void __set_BASEPRI(uint32_t basePri)
-{
-  register uint32_t __regBasePri         __ASM("basepri");
-  __regBasePri = (basePri & 0xFFU);
-}
+#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
+  #define __STREXB(value, ptr)                                                 __strex(value, ptr)
+#else
+  #define __STREXB(value, ptr)   _Pragma("push") _Pragma("diag_suppress 3731") __strex(value, ptr)        _Pragma("pop")
+#endif
 
 
 /**
-  \brief   Set Base Priority with condition
-  \details Assigns the given value to the Base Priority register only if BASEPRI masking is disabled,
-           or the new value increases the BASEPRI priority level.
-  \param [in]    basePri  Base Priority value to set
- */
-__STATIC_INLINE void __set_BASEPRI_MAX(uint32_t basePri)
-{
-  register uint32_t __regBasePriMax      __ASM("basepri_max");
-  __regBasePriMax = (basePri & 0xFFU);
-}
+  \brief   STR Exclusive (16 bit)
+  \details Executes a exclusive STR instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
+ */
+#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
+  #define __STREXH(value, ptr)                                                 __strex(value, ptr)
+#else
+  #define __STREXH(value, ptr)   _Pragma("push") _Pragma("diag_suppress 3731") __strex(value, ptr)        _Pragma("pop")
+#endif
 
 
 /**
-  \brief   Get Fault Mask
-  \details Returns the current value of the Fault Mask register.
-  \return               Fault Mask register value
+  \brief   STR Exclusive (32 bit)
+  \details Executes a exclusive STR instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-__STATIC_INLINE uint32_t __get_FAULTMASK(void)
-{
-  register uint32_t __regFaultMask       __ASM("faultmask");
-  return(__regFaultMask);
-}
+#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
+  #define __STREXW(value, ptr)                                                 __strex(value, ptr)
+#else
+  #define __STREXW(value, ptr)   _Pragma("push") _Pragma("diag_suppress 3731") __strex(value, ptr)        _Pragma("pop")
+#endif
 
 
 /**
-  \brief   Set Fault Mask
-  \details Assigns the given value to the Fault Mask register.
-  \param [in]    faultMask  Fault Mask value to set
+  \brief   Remove the exclusive lock
+  \details Removes the exclusive lock which is created by LDREX.
  */
-__STATIC_INLINE void __set_FAULTMASK(uint32_t faultMask)
-{
-  register uint32_t __regFaultMask       __ASM("faultmask");
-  __regFaultMask = (faultMask & (uint32_t)1U);
-}
-
-#endif /* ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__  == 1)) || \
-           (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1))     ) */
+#define __CLREX                           __clrex
 
 
 /**
-  \brief   Get FPSCR
-  \details Returns the current value of the Floating Point Status/Control register.
-  \return               Floating Point Status/Control register value
+  \brief   Signed Saturate
+  \details Saturates a signed value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (1..32)
+  \return             Saturated value
  */
-__STATIC_INLINE uint32_t __get_FPSCR(void)
-{
-#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
-     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
-  register uint32_t __regfpscr         __ASM("fpscr");
-  return(__regfpscr);
-#else
-   return(0U);
-#endif
-}
+#define __SSAT                            __ssat
 
 
 /**
-  \brief   Set FPSCR
-  \details Assigns the given value to the Floating Point Status/Control register.
-  \param [in]    fpscr  Floating Point Status/Control value to set
+  \brief   Unsigned Saturate
+  \details Saturates an unsigned value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (0..31)
+  \return             Saturated value
  */
-__STATIC_INLINE void __set_FPSCR(uint32_t fpscr)
-{
-#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
-     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
-  register uint32_t __regfpscr         __ASM("fpscr");
-  __regfpscr = (fpscr);
-#else
-  (void)fpscr;
-#endif
-}
-
-
-/*@} end of CMSIS_Core_RegAccFunctions */
-
+#define __USAT                            __usat
 
-/* ##########################  Core Instruction Access  ######################### */
-/** \defgroup CMSIS_Core_InstructionInterface CMSIS Core Instruction Interface
-  Access to dedicated instructions
-  @{
-*/
 
 /**
-  \brief   No Operation
-  \details No Operation does nothing. This instruction can be used for code alignment purposes.
+  \brief   Rotate Right with Extend (32 bit)
+  \details Moves each bit of a bitstring right by one bit.
+           The carry input is shifted in at the left end of the bitstring.
+  \param [in]    value  Value to rotate
+  \return               Rotated value
  */
-#define __NOP                             __nop
+#ifndef __NO_EMBEDDED_ASM
+__attribute__((section(".rrx_text"))) __STATIC_INLINE __ASM uint32_t __RRX(uint32_t value)
+{
+  rrx r0, r0
+  bx lr
+}
+#endif
 
 
 /**
-  \brief   Wait For Interrupt
-  \details Wait For Interrupt is a hint instruction that suspends execution until one of a number of events occurs.
+  \brief   LDRT Unprivileged (8 bit)
+  \details Executes a Unprivileged LDRT instruction for 8 bit value.
+  \param [in]    ptr  Pointer to data
+  \return             value of type uint8_t at (*ptr)
  */
-#define __WFI                             __wfi
+#define __LDRBT(ptr)                      ((uint8_t )  __ldrt(ptr))
 
 
 /**
-  \brief   Wait For Event
-  \details Wait For Event is a hint instruction that permits the processor to enter
-           a low-power state until one of a number of events occurs.
+  \brief   LDRT Unprivileged (16 bit)
+  \details Executes a Unprivileged LDRT instruction for 16 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint16_t at (*ptr)
  */
-#define __WFE                             __wfe
+#define __LDRHT(ptr)                      ((uint16_t)  __ldrt(ptr))
 
 
 /**
-  \brief   Send Event
-  \details Send Event is a hint instruction. It causes an event to be signaled to the CPU.
+  \brief   LDRT Unprivileged (32 bit)
+  \details Executes a Unprivileged LDRT instruction for 32 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint32_t at (*ptr)
  */
-#define __SEV                             __sev
+#define __LDRT(ptr)                       ((uint32_t ) __ldrt(ptr))
 
 
 /**
-  \brief   Instruction Synchronization Barrier
-  \details Instruction Synchronization Barrier flushes the pipeline in the processor,
-           so that all instructions following the ISB are fetched from cache or memory,
-           after the instruction has been completed.
+  \brief   STRT Unprivileged (8 bit)
+  \details Executes a Unprivileged STRT instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-#define __ISB()                           __isb(0xF)
+#define __STRBT(value, ptr)               __strt(value, ptr)
 
-/**
-  \brief   Data Synchronization Barrier
-  \details Acts as a special kind of Data Memory Barrier.
-           It completes when all explicit memory accesses before this instruction complete.
- */
-#define __DSB()                           __dsb(0xF)
 
 /**
-  \brief   Data Memory Barrier
-  \details Ensures the apparent order of the explicit memory operations before
-           and after the instruction, without ensuring their completion.
+  \brief   STRT Unprivileged (16 bit)
+  \details Executes a Unprivileged STRT instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-#define __DMB()                           __dmb(0xF)
+#define __STRHT(value, ptr)               __strt(value, ptr)
 
 
 /**
-  \brief   Reverse byte order (32 bit)
-  \details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   STRT Unprivileged (32 bit)
+  \details Executes a Unprivileged STRT instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-#define __REV                             __rev
+#define __STRT(value, ptr)                __strt(value, ptr)
 
+#else  /* ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__  == 1)) || \
+           (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1))     ) */
 
 /**
-  \brief   Reverse byte order (16 bit)
-  \details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Signed Saturate
+  \details Saturates a signed value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (1..32)
+  \return             Saturated value
  */
-#ifndef __NO_EMBEDDED_ASM
-__attribute__((section(".rev16_text"))) __STATIC_INLINE __ASM uint32_t __REV16(uint32_t value)
+__attribute__((always_inline)) __STATIC_INLINE int32_t __SSAT(int32_t val, uint32_t sat)
 {
-  rev16 r0, r0
-  bx lr
+  if ((sat >= 1U) && (sat <= 32U))
+  {
+    const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
+    const int32_t min = -1 - max ;
+    if (val > max)
+    {
+      return max;
+    }
+    else if (val < min)
+    {
+      return min;
+    }
+  }
+  return val;
 }
-#endif
-
 
 /**
-  \brief   Reverse byte order (16 bit)
-  \details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Unsigned Saturate
+  \details Saturates an unsigned value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (0..31)
+  \return             Saturated value
  */
-#ifndef __NO_EMBEDDED_ASM
-__attribute__((section(".revsh_text"))) __STATIC_INLINE __ASM int16_t __REVSH(int16_t value)
+__attribute__((always_inline)) __STATIC_INLINE uint32_t __USAT(int32_t val, uint32_t sat)
 {
-  revsh r0, r0
-  bx lr
+  if (sat <= 31U)
+  {
+    const uint32_t max = ((1U << sat) - 1U);
+    if (val > (int32_t)max)
+    {
+      return max;
+    }
+    else if (val < 0)
+    {
+      return 0U;
+    }
+  }
+  return (uint32_t)val;
 }
-#endif
 
+#endif /* ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__  == 1)) || \
+           (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1))     ) */
 
-/**
-  \brief   Rotate Right in unsigned value (32 bit)
-  \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
-  \param [in]    op1  Value to rotate
-  \param [in]    op2  Number of Bits to rotate
-  \return               Rotated value
- */
-#define __ROR                             __ror
+/*@}*/ /* end of group CMSIS_Core_InstructionInterface */
 
 
-/**
-  \brief   Breakpoint
-  \details Causes the processor to enter Debug state.
-           Debug tools can use this to investigate system state when the instruction at a particular address is reached.
-  \param [in]    value  is ignored by the processor.
-                 If required, a debugger can use it to store additional information about the breakpoint.
+/* ###########################  Core Function Access  ########################### */
+/** \ingroup  CMSIS_Core_FunctionInterface
+    \defgroup CMSIS_Core_RegAccFunctions CMSIS Core Register Access Functions
+  @{
  */
-#define __BKPT(value)                       __breakpoint(value)
-
 
 /**
-  \brief   Reverse bit order of value
-  \details Reverses the bit order of the given value.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Enable IRQ Interrupts
+  \details Enables IRQ interrupts by clearing special-purpose register PRIMASK.
+           Can only be executed in Privileged modes.
  */
-#if ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__  == 1)) || \
-     (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1))     )
-  #define __RBIT                          __rbit
-#else
-__attribute__((always_inline)) __STATIC_INLINE uint32_t __RBIT(uint32_t value)
-{
-  uint32_t result;
-  uint32_t s = (4U /*sizeof(v)*/ * 8U) - 1U; /* extra shift needed at end */
-
-  result = value;                      /* r will be reversed bits of v; first get LSB of v */
-  for (value >>= 1U; value != 0U; value >>= 1U)
-  {
-    result <<= 1U;
-    result |= value & 1U;
-    s--;
-  }
-  result <<= s;                        /* shift when v's highest bits are zero */
-  return result;
-}
-#endif
+/* intrinsic void __enable_irq();     */
 
 
 /**
-  \brief   Count leading zeros
-  \details Counts the number of leading zeros of a data value.
-  \param [in]  value  Value to count the leading zeros
-  \return             number of leading zeros in value
+  \brief   Disable IRQ Interrupts
+  \details Disables IRQ interrupts by setting special-purpose register PRIMASK.
+           Can only be executed in Privileged modes.
  */
-#define __CLZ                             __clz
-
-
-#if ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__  == 1)) || \
-     (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1))     )
+/* intrinsic void __disable_irq();    */
 
 /**
-  \brief   LDR Exclusive (8 bit)
-  \details Executes a exclusive LDR instruction for 8 bit value.
-  \param [in]    ptr  Pointer to data
-  \return             value of type uint8_t at (*ptr)
+  \brief   Get Control Register
+  \details Returns the content of the Control Register.
+  \return               Control Register value
  */
-#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
-  #define __LDREXB(ptr)                                                        ((uint8_t ) __ldrex(ptr))
-#else
-  #define __LDREXB(ptr)          _Pragma("push") _Pragma("diag_suppress 3731") ((uint8_t ) __ldrex(ptr))  _Pragma("pop")
-#endif
+__STATIC_INLINE uint32_t __get_CONTROL(void)
+{
+  register uint32_t __regControl         __ASM("control");
+  return(__regControl);
+}
 
 
 /**
-  \brief   LDR Exclusive (16 bit)
-  \details Executes a exclusive LDR instruction for 16 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint16_t at (*ptr)
+  \brief   Set Control Register
+  \details Writes the given value to the Control Register.
+  \param [in]    control  Control Register value to set
  */
-#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
-  #define __LDREXH(ptr)                                                        ((uint16_t) __ldrex(ptr))
-#else
-  #define __LDREXH(ptr)          _Pragma("push") _Pragma("diag_suppress 3731") ((uint16_t) __ldrex(ptr))  _Pragma("pop")
-#endif
+__STATIC_INLINE void __set_CONTROL(uint32_t control)
+{
+  register uint32_t __regControl         __ASM("control");
+  __regControl = control;
+  __ISB();
+}
 
 
 /**
-  \brief   LDR Exclusive (32 bit)
-  \details Executes a exclusive LDR instruction for 32 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint32_t at (*ptr)
+  \brief   Get IPSR Register
+  \details Returns the content of the IPSR Register.
+  \return               IPSR Register value
  */
-#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
-  #define __LDREXW(ptr)                                                        ((uint32_t ) __ldrex(ptr))
-#else
-  #define __LDREXW(ptr)          _Pragma("push") _Pragma("diag_suppress 3731") ((uint32_t ) __ldrex(ptr))  _Pragma("pop")
-#endif
+__STATIC_INLINE uint32_t __get_IPSR(void)
+{
+  register uint32_t __regIPSR          __ASM("ipsr");
+  return(__regIPSR);
+}
 
 
 /**
-  \brief   STR Exclusive (8 bit)
-  \details Executes a exclusive STR instruction for 8 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Get APSR Register
+  \details Returns the content of the APSR Register.
+  \return               APSR Register value
  */
-#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
-  #define __STREXB(value, ptr)                                                 __strex(value, ptr)
-#else
-  #define __STREXB(value, ptr)   _Pragma("push") _Pragma("diag_suppress 3731") __strex(value, ptr)        _Pragma("pop")
-#endif
+__STATIC_INLINE uint32_t __get_APSR(void)
+{
+  register uint32_t __regAPSR          __ASM("apsr");
+  return(__regAPSR);
+}
 
 
 /**
-  \brief   STR Exclusive (16 bit)
-  \details Executes a exclusive STR instruction for 16 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Get xPSR Register
+  \details Returns the content of the xPSR Register.
+  \return               xPSR Register value
  */
-#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
-  #define __STREXH(value, ptr)                                                 __strex(value, ptr)
-#else
-  #define __STREXH(value, ptr)   _Pragma("push") _Pragma("diag_suppress 3731") __strex(value, ptr)        _Pragma("pop")
-#endif
+__STATIC_INLINE uint32_t __get_xPSR(void)
+{
+  register uint32_t __regXPSR          __ASM("xpsr");
+  return(__regXPSR);
+}
 
 
 /**
-  \brief   STR Exclusive (32 bit)
-  \details Executes a exclusive STR instruction for 32 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Get Process Stack Pointer
+  \details Returns the current value of the Process Stack Pointer (PSP).
+  \return               PSP Register value
  */
-#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
-  #define __STREXW(value, ptr)                                                 __strex(value, ptr)
-#else
-  #define __STREXW(value, ptr)   _Pragma("push") _Pragma("diag_suppress 3731") __strex(value, ptr)        _Pragma("pop")
-#endif
+__STATIC_INLINE uint32_t __get_PSP(void)
+{
+  register uint32_t __regProcessStackPointer  __ASM("psp");
+  return(__regProcessStackPointer);
+}
 
 
 /**
-  \brief   Remove the exclusive lock
-  \details Removes the exclusive lock which is created by LDREX.
+  \brief   Set Process Stack Pointer
+  \details Assigns the given value to the Process Stack Pointer (PSP).
+  \param [in]    topOfProcStack  Process Stack Pointer value to set
  */
-#define __CLREX                           __clrex
+__STATIC_INLINE void __set_PSP(uint32_t topOfProcStack)
+{
+  register uint32_t __regProcessStackPointer  __ASM("psp");
+  __regProcessStackPointer = topOfProcStack;
+}
 
 
 /**
-  \brief   Signed Saturate
-  \details Saturates a signed value.
-  \param [in]  value  Value to be saturated
-  \param [in]    sat  Bit position to saturate to (1..32)
-  \return             Saturated value
+  \brief   Get Main Stack Pointer
+  \details Returns the current value of the Main Stack Pointer (MSP).
+  \return               MSP Register value
  */
-#define __SSAT                            __ssat
+__STATIC_INLINE uint32_t __get_MSP(void)
+{
+  register uint32_t __regMainStackPointer     __ASM("msp");
+  return(__regMainStackPointer);
+}
 
 
 /**
-  \brief   Unsigned Saturate
-  \details Saturates an unsigned value.
-  \param [in]  value  Value to be saturated
-  \param [in]    sat  Bit position to saturate to (0..31)
-  \return             Saturated value
+  \brief   Set Main Stack Pointer
+  \details Assigns the given value to the Main Stack Pointer (MSP).
+  \param [in]    topOfMainStack  Main Stack Pointer value to set
  */
-#define __USAT                            __usat
+__STATIC_INLINE void __set_MSP(uint32_t topOfMainStack)
+{
+  register uint32_t __regMainStackPointer     __ASM("msp");
+  __regMainStackPointer = topOfMainStack;
+}
 
 
 /**
-  \brief   Rotate Right with Extend (32 bit)
-  \details Moves each bit of a bitstring right by one bit.
-           The carry input is shifted in at the left end of the bitstring.
-  \param [in]    value  Value to rotate
-  \return               Rotated value
+  \brief   Get Priority Mask
+  \details Returns the current state of the priority mask bit from the Priority Mask Register.
+  \return               Priority Mask value
  */
-#ifndef __NO_EMBEDDED_ASM
-__attribute__((section(".rrx_text"))) __STATIC_INLINE __ASM uint32_t __RRX(uint32_t value)
+__STATIC_INLINE uint32_t __get_PRIMASK(void)
 {
-  rrx r0, r0
-  bx lr
+  register uint32_t __regPriMask         __ASM("primask");
+  return(__regPriMask);
 }
-#endif
 
 
 /**
-  \brief   LDRT Unprivileged (8 bit)
-  \details Executes a Unprivileged LDRT instruction for 8 bit value.
-  \param [in]    ptr  Pointer to data
-  \return             value of type uint8_t at (*ptr)
+  \brief   Set Priority Mask
+  \details Assigns the given value to the Priority Mask Register.
+  \param [in]    priMask  Priority Mask
  */
-#define __LDRBT(ptr)                      ((uint8_t )  __ldrt(ptr))
+__STATIC_INLINE void __set_PRIMASK(uint32_t priMask)
+{
+  register uint32_t __regPriMask         __ASM("primask");
+  __regPriMask = (priMask);
+}
 
 
+#if ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__  == 1)) || \
+     (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1))     )
+
 /**
-  \brief   LDRT Unprivileged (16 bit)
-  \details Executes a Unprivileged LDRT instruction for 16 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint16_t at (*ptr)
+  \brief   Enable FIQ
+  \details Enables FIQ interrupts by clearing special-purpose register FAULTMASK.
+           Can only be executed in Privileged modes.
  */
-#define __LDRHT(ptr)                      ((uint16_t)  __ldrt(ptr))
+#define __enable_fault_irq                __enable_fiq
 
 
 /**
-  \brief   LDRT Unprivileged (32 bit)
-  \details Executes a Unprivileged LDRT instruction for 32 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint32_t at (*ptr)
+  \brief   Disable FIQ
+  \details Disables FIQ interrupts by setting special-purpose register FAULTMASK.
+           Can only be executed in Privileged modes.
  */
-#define __LDRT(ptr)                       ((uint32_t ) __ldrt(ptr))
+#define __disable_fault_irq               __disable_fiq
 
 
 /**
-  \brief   STRT Unprivileged (8 bit)
-  \details Executes a Unprivileged STRT instruction for 8 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Get Base Priority
+  \details Returns the current value of the Base Priority register.
+  \return               Base Priority register value
  */
-#define __STRBT(value, ptr)               __strt(value, ptr)
+__STATIC_INLINE uint32_t  __get_BASEPRI(void)
+{
+  register uint32_t __regBasePri         __ASM("basepri");
+  return(__regBasePri);
+}
 
 
 /**
-  \brief   STRT Unprivileged (16 bit)
-  \details Executes a Unprivileged STRT instruction for 16 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Set Base Priority
+  \details Assigns the given value to the Base Priority register.
+  \param [in]    basePri  Base Priority value to set
  */
-#define __STRHT(value, ptr)               __strt(value, ptr)
+__STATIC_INLINE void __set_BASEPRI(uint32_t basePri)
+{
+  register uint32_t __regBasePri         __ASM("basepri");
+  __regBasePri = (basePri & 0xFFU);
+}
 
 
 /**
-  \brief   STRT Unprivileged (32 bit)
-  \details Executes a Unprivileged STRT instruction for 32 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Set Base Priority with condition
+  \details Assigns the given value to the Base Priority register only if BASEPRI masking is disabled,
+           or the new value increases the BASEPRI priority level.
+  \param [in]    basePri  Base Priority value to set
  */
-#define __STRT(value, ptr)                __strt(value, ptr)
+__STATIC_INLINE void __set_BASEPRI_MAX(uint32_t basePri)
+{
+  register uint32_t __regBasePriMax      __ASM("basepri_max");
+  __regBasePriMax = (basePri & 0xFFU);
+}
 
-#else  /* ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__  == 1)) || \
-           (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1))     ) */
 
 /**
-  \brief   Signed Saturate
-  \details Saturates a signed value.
-  \param [in]  value  Value to be saturated
-  \param [in]    sat  Bit position to saturate to (1..32)
-  \return             Saturated value
+  \brief   Get Fault Mask
+  \details Returns the current value of the Fault Mask register.
+  \return               Fault Mask register value
  */
-__attribute__((always_inline)) __STATIC_INLINE int32_t __SSAT(int32_t val, uint32_t sat)
+__STATIC_INLINE uint32_t __get_FAULTMASK(void)
 {
-  if ((sat >= 1U) && (sat <= 32U))
-  {
-    const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
-    const int32_t min = -1 - max ;
-    if (val > max)
-    {
-      return max;
-    }
-    else if (val < min)
-    {
-      return min;
-    }
-  }
-  return val;
+  register uint32_t __regFaultMask       __ASM("faultmask");
+  return(__regFaultMask);
 }
 
+
 /**
-  \brief   Unsigned Saturate
-  \details Saturates an unsigned value.
-  \param [in]  value  Value to be saturated
-  \param [in]    sat  Bit position to saturate to (0..31)
-  \return             Saturated value
+  \brief   Set Fault Mask
+  \details Assigns the given value to the Fault Mask register.
+  \param [in]    faultMask  Fault Mask value to set
  */
-__attribute__((always_inline)) __STATIC_INLINE uint32_t __USAT(int32_t val, uint32_t sat)
+__STATIC_INLINE void __set_FAULTMASK(uint32_t faultMask)
 {
-  if (sat <= 31U)
-  {
-    const uint32_t max = ((1U << sat) - 1U);
-    if (val > (int32_t)max)
-    {
-      return max;
-    }
-    else if (val < 0)
-    {
-      return 0U;
-    }
-  }
-  return (uint32_t)val;
+  register uint32_t __regFaultMask       __ASM("faultmask");
+  __regFaultMask = (faultMask & (uint32_t)1U);
 }
 
 #endif /* ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__  == 1)) || \
            (defined (__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ == 1))     ) */
 
-/*@}*/ /* end of group CMSIS_Core_InstructionInterface */
+
+/**
+  \brief   Get FPSCR
+  \details Returns the current value of the Floating Point Status/Control register.
+  \return               Floating Point Status/Control register value
+ */
+__STATIC_INLINE uint32_t __get_FPSCR(void)
+{
+#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
+     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
+  register uint32_t __regfpscr         __ASM("fpscr");
+  return(__regfpscr);
+#else
+   return(0U);
+#endif
+}
+
+
+/**
+  \brief   Set FPSCR
+  \details Assigns the given value to the Floating Point Status/Control register.
+  \param [in]    fpscr  Floating Point Status/Control value to set
+ */
+__STATIC_INLINE void __set_FPSCR(uint32_t fpscr)
+{
+#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
+     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
+  register uint32_t __regfpscr         __ASM("fpscr");
+  __regfpscr = (fpscr);
+#else
+  (void)fpscr;
+#endif
+}
+
+
+/*@} end of CMSIS_Core_RegAccFunctions */
 
 
 /* ###################  Compiler specific Intrinsics  ########################### */
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/cmsis_armclang.h b/edge-impulse-sdk/CMSIS/Core/Include/cmsis_armclang.h
index b14038c..b4a1200 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/cmsis_armclang.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/cmsis_armclang.h
@@ -1,11 +1,11 @@
 /**************************************************************************//**
  * @file     cmsis_armclang.h
  * @brief    CMSIS compiler armclang (Arm Compiler 6) header file
- * @version  V5.4.0
- * @date     19. February 2020
+ * @version  V5.4.4
+ * @date     30. May 2022
  ******************************************************************************/
 /*
- * Copyright (c) 2009-2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2009-2022 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -29,10 +29,6 @@
 
 #pragma clang system_header   /* treat file as system include file */
 
-#ifndef __ARM_COMPAT_H
-#include <arm_compat.h>    /* Compatibility header for Arm Compiler 5 intrinsics */
-#endif
-
 /* CMSIS compiler specific defines */
 #ifndef   __ASM
   #define __ASM                                  __asm
@@ -156,456 +152,423 @@ __STATIC_FORCEINLINE void __TZ_set_STACKSEAL_S (uint32_t* stackTop) {
 #endif
 
 
-/* ###########################  Core Function Access  ########################### */
-/** \ingroup  CMSIS_Core_FunctionInterface
-    \defgroup CMSIS_Core_RegAccFunctions CMSIS Core Register Access Functions
+/* ##########################  Core Instruction Access  ######################### */
+/** \defgroup CMSIS_Core_InstructionInterface CMSIS Core Instruction Interface
+  Access to dedicated instructions
   @{
- */
+*/
+
+/* Define macros for porting to both thumb1 and thumb2.
+ * For thumb1, use low register (r0-r7), specified by constraint "l"
+ * Otherwise, use general registers, specified by constraint "r" */
+#if defined (__thumb__) && !defined (__thumb2__)
+#define __CMSIS_GCC_OUT_REG(r) "=l" (r)
+#define __CMSIS_GCC_RW_REG(r) "+l" (r)
+#define __CMSIS_GCC_USE_REG(r) "l" (r)
+#else
+#define __CMSIS_GCC_OUT_REG(r) "=r" (r)
+#define __CMSIS_GCC_RW_REG(r) "+r" (r)
+#define __CMSIS_GCC_USE_REG(r) "r" (r)
+#endif
 
 /**
-  \brief   Enable IRQ Interrupts
-  \details Enables IRQ interrupts by clearing the I-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   No Operation
+  \details No Operation does nothing. This instruction can be used for code alignment purposes.
  */
-/* intrinsic void __enable_irq();  see arm_compat.h */
-
+#define __NOP          __builtin_arm_nop
 
 /**
-  \brief   Disable IRQ Interrupts
-  \details Disables IRQ interrupts by setting the I-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   Wait For Interrupt
+  \details Wait For Interrupt is a hint instruction that suspends execution until one of a number of events occurs.
  */
-/* intrinsic void __disable_irq();  see arm_compat.h */
+#define __WFI          __builtin_arm_wfi
 
 
 /**
-  \brief   Get Control Register
-  \details Returns the content of the Control Register.
-  \return               Control Register value
+  \brief   Wait For Event
+  \details Wait For Event is a hint instruction that permits the processor to enter
+           a low-power state until one of a number of events occurs.
  */
-__STATIC_FORCEINLINE uint32_t __get_CONTROL(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, control" : "=r" (result) );
-  return(result);
-}
+#define __WFE          __builtin_arm_wfe
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Control Register (non-secure)
-  \details Returns the content of the non-secure Control Register when in secure mode.
-  \return               non-secure Control Register value
+  \brief   Send Event
+  \details Send Event is a hint instruction. It causes an event to be signaled to the CPU.
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_CONTROL_NS(void)
-{
-  uint32_t result;
+#define __SEV          __builtin_arm_sev
 
-  __ASM volatile ("MRS %0, control_ns" : "=r" (result) );
-  return(result);
-}
-#endif
 
+/**
+  \brief   Instruction Synchronization Barrier
+  \details Instruction Synchronization Barrier flushes the pipeline in the processor,
+           so that all instructions following the ISB are fetched from cache or memory,
+           after the instruction has been completed.
+ */
+#define __ISB()        __builtin_arm_isb(0xF)
 
 /**
-  \brief   Set Control Register
-  \details Writes the given value to the Control Register.
-  \param [in]    control  Control Register value to set
+  \brief   Data Synchronization Barrier
+  \details Acts as a special kind of Data Memory Barrier.
+           It completes when all explicit memory accesses before this instruction complete.
  */
-__STATIC_FORCEINLINE void __set_CONTROL(uint32_t control)
-{
-  __ASM volatile ("MSR control, %0" : : "r" (control) : "memory");
-}
+#define __DSB()        __builtin_arm_dsb(0xF)
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Set Control Register (non-secure)
-  \details Writes the given value to the non-secure Control Register when in secure state.
-  \param [in]    control  Control Register value to set
+  \brief   Data Memory Barrier
+  \details Ensures the apparent order of the explicit memory operations before
+           and after the instruction, without ensuring their completion.
  */
-__STATIC_FORCEINLINE void __TZ_set_CONTROL_NS(uint32_t control)
-{
-  __ASM volatile ("MSR control_ns, %0" : : "r" (control) : "memory");
-}
-#endif
+#define __DMB()        __builtin_arm_dmb(0xF)
 
 
 /**
-  \brief   Get IPSR Register
-  \details Returns the content of the IPSR Register.
-  \return               IPSR Register value
+  \brief   Reverse byte order (32 bit)
+  \details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_FORCEINLINE uint32_t __get_IPSR(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, ipsr" : "=r" (result) );
-  return(result);
-}
+#define __REV(value)   __builtin_bswap32(value)
 
 
 /**
-  \brief   Get APSR Register
-  \details Returns the content of the APSR Register.
-  \return               APSR Register value
+  \brief   Reverse byte order (16 bit)
+  \details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_FORCEINLINE uint32_t __get_APSR(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, apsr" : "=r" (result) );
-  return(result);
-}
+#define __REV16(value) __ROR(__REV(value), 16)
 
 
 /**
-  \brief   Get xPSR Register
-  \details Returns the content of the xPSR Register.
-  \return               xPSR Register value
+  \brief   Reverse byte order (16 bit)
+  \details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_FORCEINLINE uint32_t __get_xPSR(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, xpsr" : "=r" (result) );
-  return(result);
-}
+#define __REVSH(value) (int16_t)__builtin_bswap16(value)
 
 
 /**
-  \brief   Get Process Stack Pointer
-  \details Returns the current value of the Process Stack Pointer (PSP).
-  \return               PSP Register value
+  \brief   Rotate Right in unsigned value (32 bit)
+  \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
+  \param [in]    op1  Value to rotate
+  \param [in]    op2  Number of Bits to rotate
+  \return               Rotated value
  */
-__STATIC_FORCEINLINE uint32_t __get_PSP(void)
+__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
 {
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, psp"  : "=r" (result) );
-  return(result);
+  op2 %= 32U;
+  if (op2 == 0U)
+  {
+    return op1;
+  }
+  return (op1 >> op2) | (op1 << (32U - op2));
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Process Stack Pointer (non-secure)
-  \details Returns the current value of the non-secure Process Stack Pointer (PSP) when in secure state.
-  \return               PSP Register value
+  \brief   Breakpoint
+  \details Causes the processor to enter Debug state.
+           Debug tools can use this to investigate system state when the instruction at a particular address is reached.
+  \param [in]    value  is ignored by the processor.
+                 If required, a debugger can use it to store additional information about the breakpoint.
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_PSP_NS(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, psp_ns"  : "=r" (result) );
-  return(result);
-}
-#endif
+#define __BKPT(value)     __ASM volatile ("bkpt "#value)
 
 
 /**
-  \brief   Set Process Stack Pointer
-  \details Assigns the given value to the Process Stack Pointer (PSP).
-  \param [in]    topOfProcStack  Process Stack Pointer value to set
+  \brief   Reverse bit order of value
+  \details Reverses the bit order of the given value.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_FORCEINLINE void __set_PSP(uint32_t topOfProcStack)
-{
-  __ASM volatile ("MSR psp, %0" : : "r" (topOfProcStack) : );
-}
-
+#define __RBIT            __builtin_arm_rbit
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Set Process Stack Pointer (non-secure)
-  \details Assigns the given value to the non-secure Process Stack Pointer (PSP) when in secure state.
-  \param [in]    topOfProcStack  Process Stack Pointer value to set
+  \brief   Count leading zeros
+  \details Counts the number of leading zeros of a data value.
+  \param [in]  value  Value to count the leading zeros
+  \return             number of leading zeros in value
  */
-__STATIC_FORCEINLINE void __TZ_set_PSP_NS(uint32_t topOfProcStack)
+__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t value)
 {
-  __ASM volatile ("MSR psp_ns, %0" : : "r" (topOfProcStack) : );
+  /* Even though __builtin_clz produces a CLZ instruction on ARM, formally
+     __builtin_clz(0) is undefined behaviour, so handle this case specially.
+     This guarantees ARM-compatible results if happening to compile on a non-ARM
+     target, and ensures the compiler doesn't decide to activate any
+     optimisations using the logic "value was passed to __builtin_clz, so it
+     is non-zero".
+     ARM Compiler 6.10 and possibly earlier will optimise this test away, leaving a
+     single CLZ instruction.
+   */
+  if (value == 0U)
+  {
+    return 32U;
+  }
+  return __builtin_clz(value);
 }
-#endif
 
 
+#if ((defined (__ARM_ARCH_7M__       ) && (__ARM_ARCH_7M__        == 1)) || \
+     (defined (__ARM_ARCH_7EM__      ) && (__ARM_ARCH_7EM__       == 1)) || \
+     (defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
+     (defined (__ARM_ARCH_8M_BASE__  ) && (__ARM_ARCH_8M_BASE__   == 1)) || \
+     (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     )
+
 /**
-  \brief   Get Main Stack Pointer
-  \details Returns the current value of the Main Stack Pointer (MSP).
-  \return               MSP Register value
+  \brief   LDR Exclusive (8 bit)
+  \details Executes a exclusive LDR instruction for 8 bit value.
+  \param [in]    ptr  Pointer to data
+  \return             value of type uint8_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __get_MSP(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, msp" : "=r" (result) );
-  return(result);
-}
+#define __LDREXB        (uint8_t)__builtin_arm_ldrex
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Main Stack Pointer (non-secure)
-  \details Returns the current value of the non-secure Main Stack Pointer (MSP) when in secure state.
-  \return               MSP Register value
+  \brief   LDR Exclusive (16 bit)
+  \details Executes a exclusive LDR instruction for 16 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint16_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_MSP_NS(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, msp_ns" : "=r" (result) );
-  return(result);
-}
-#endif
+#define __LDREXH        (uint16_t)__builtin_arm_ldrex
 
 
 /**
-  \brief   Set Main Stack Pointer
-  \details Assigns the given value to the Main Stack Pointer (MSP).
-  \param [in]    topOfMainStack  Main Stack Pointer value to set
- */
-__STATIC_FORCEINLINE void __set_MSP(uint32_t topOfMainStack)
-{
-  __ASM volatile ("MSR msp, %0" : : "r" (topOfMainStack) : );
-}
-
-
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
-/**
-  \brief   Set Main Stack Pointer (non-secure)
-  \details Assigns the given value to the non-secure Main Stack Pointer (MSP) when in secure state.
-  \param [in]    topOfMainStack  Main Stack Pointer value to set
- */
-__STATIC_FORCEINLINE void __TZ_set_MSP_NS(uint32_t topOfMainStack)
-{
-  __ASM volatile ("MSR msp_ns, %0" : : "r" (topOfMainStack) : );
-}
-#endif
-
-
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
-/**
-  \brief   Get Stack Pointer (non-secure)
-  \details Returns the current value of the non-secure Stack Pointer (SP) when in secure state.
-  \return               SP Register value
+  \brief   LDR Exclusive (32 bit)
+  \details Executes a exclusive LDR instruction for 32 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint32_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_SP_NS(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, sp_ns" : "=r" (result) );
-  return(result);
-}
+#define __LDREXW        (uint32_t)__builtin_arm_ldrex
 
 
 /**
-  \brief   Set Stack Pointer (non-secure)
-  \details Assigns the given value to the non-secure Stack Pointer (SP) when in secure state.
-  \param [in]    topOfStack  Stack Pointer value to set
+  \brief   STR Exclusive (8 bit)
+  \details Executes a exclusive STR instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-__STATIC_FORCEINLINE void __TZ_set_SP_NS(uint32_t topOfStack)
-{
-  __ASM volatile ("MSR sp_ns, %0" : : "r" (topOfStack) : );
-}
-#endif
+#define __STREXB        (uint32_t)__builtin_arm_strex
 
 
 /**
-  \brief   Get Priority Mask
-  \details Returns the current state of the priority mask bit from the Priority Mask Register.
-  \return               Priority Mask value
+  \brief   STR Exclusive (16 bit)
+  \details Executes a exclusive STR instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-__STATIC_FORCEINLINE uint32_t __get_PRIMASK(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, primask" : "=r" (result) );
-  return(result);
-}
+#define __STREXH        (uint32_t)__builtin_arm_strex
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Priority Mask (non-secure)
-  \details Returns the current state of the non-secure priority mask bit from the Priority Mask Register when in secure state.
-  \return               Priority Mask value
+  \brief   STR Exclusive (32 bit)
+  \details Executes a exclusive STR instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_PRIMASK_NS(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, primask_ns" : "=r" (result) );
-  return(result);
-}
-#endif
+#define __STREXW        (uint32_t)__builtin_arm_strex
 
 
 /**
-  \brief   Set Priority Mask
-  \details Assigns the given value to the Priority Mask Register.
-  \param [in]    priMask  Priority Mask
+  \brief   Remove the exclusive lock
+  \details Removes the exclusive lock which is created by LDREX.
  */
-__STATIC_FORCEINLINE void __set_PRIMASK(uint32_t priMask)
-{
-  __ASM volatile ("MSR primask, %0" : : "r" (priMask) : "memory");
-}
-
+#define __CLREX             __builtin_arm_clrex
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
-/**
-  \brief   Set Priority Mask (non-secure)
-  \details Assigns the given value to the non-secure Priority Mask Register when in secure state.
-  \param [in]    priMask  Priority Mask
- */
-__STATIC_FORCEINLINE void __TZ_set_PRIMASK_NS(uint32_t priMask)
-{
-  __ASM volatile ("MSR primask_ns, %0" : : "r" (priMask) : "memory");
-}
-#endif
+#endif /* ((defined (__ARM_ARCH_7M__       ) && (__ARM_ARCH_7M__        == 1)) || \
+           (defined (__ARM_ARCH_7EM__      ) && (__ARM_ARCH_7EM__       == 1)) || \
+           (defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
+           (defined (__ARM_ARCH_8M_BASE__  ) && (__ARM_ARCH_8M_BASE__   == 1)) || \
+           (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     ) */
 
 
 #if ((defined (__ARM_ARCH_7M__       ) && (__ARM_ARCH_7M__        == 1)) || \
      (defined (__ARM_ARCH_7EM__      ) && (__ARM_ARCH_7EM__       == 1)) || \
      (defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
      (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     )
+
 /**
-  \brief   Enable FIQ
-  \details Enables FIQ interrupts by clearing the F-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   Signed Saturate
+  \details Saturates a signed value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (1..32)
+  \return             Saturated value
  */
-#define __enable_fault_irq                __enable_fiq   /* see arm_compat.h */
+#define __SSAT             __builtin_arm_ssat
 
 
 /**
-  \brief   Disable FIQ
-  \details Disables FIQ interrupts by setting the F-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   Unsigned Saturate
+  \details Saturates an unsigned value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (0..31)
+  \return             Saturated value
  */
-#define __disable_fault_irq               __disable_fiq   /* see arm_compat.h */
+#define __USAT             __builtin_arm_usat
 
 
 /**
-  \brief   Get Base Priority
-  \details Returns the current value of the Base Priority register.
-  \return               Base Priority register value
+  \brief   Rotate Right with Extend (32 bit)
+  \details Moves each bit of a bitstring right by one bit.
+           The carry input is shifted in at the left end of the bitstring.
+  \param [in]    value  Value to rotate
+  \return               Rotated value
  */
-__STATIC_FORCEINLINE uint32_t __get_BASEPRI(void)
+__STATIC_FORCEINLINE uint32_t __RRX(uint32_t value)
 {
   uint32_t result;
 
-  __ASM volatile ("MRS %0, basepri" : "=r" (result) );
+  __ASM volatile ("rrx %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) );
   return(result);
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Base Priority (non-secure)
-  \details Returns the current value of the non-secure Base Priority register when in secure state.
-  \return               Base Priority register value
+  \brief   LDRT Unprivileged (8 bit)
+  \details Executes a Unprivileged LDRT instruction for 8 bit value.
+  \param [in]    ptr  Pointer to data
+  \return             value of type uint8_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_BASEPRI_NS(void)
+__STATIC_FORCEINLINE uint8_t __LDRBT(volatile uint8_t *ptr)
 {
   uint32_t result;
 
-  __ASM volatile ("MRS %0, basepri_ns" : "=r" (result) );
-  return(result);
-}
-#endif
-
-
-/**
-  \brief   Set Base Priority
-  \details Assigns the given value to the Base Priority register.
-  \param [in]    basePri  Base Priority value to set
- */
-__STATIC_FORCEINLINE void __set_BASEPRI(uint32_t basePri)
-{
-  __ASM volatile ("MSR basepri, %0" : : "r" (basePri) : "memory");
+  __ASM volatile ("ldrbt %0, %1" : "=r" (result) : "Q" (*ptr) );
+  return ((uint8_t) result);    /* Add explicit type cast here */
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Set Base Priority (non-secure)
-  \details Assigns the given value to the non-secure Base Priority register when in secure state.
-  \param [in]    basePri  Base Priority value to set
+  \brief   LDRT Unprivileged (16 bit)
+  \details Executes a Unprivileged LDRT instruction for 16 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint16_t at (*ptr)
  */
-__STATIC_FORCEINLINE void __TZ_set_BASEPRI_NS(uint32_t basePri)
+__STATIC_FORCEINLINE uint16_t __LDRHT(volatile uint16_t *ptr)
 {
-  __ASM volatile ("MSR basepri_ns, %0" : : "r" (basePri) : "memory");
-}
-#endif
-
+  uint32_t result;
 
-/**
-  \brief   Set Base Priority with condition
-  \details Assigns the given value to the Base Priority register only if BASEPRI masking is disabled,
-           or the new value increases the BASEPRI priority level.
-  \param [in]    basePri  Base Priority value to set
- */
-__STATIC_FORCEINLINE void __set_BASEPRI_MAX(uint32_t basePri)
-{
-  __ASM volatile ("MSR basepri_max, %0" : : "r" (basePri) : "memory");
+  __ASM volatile ("ldrht %0, %1" : "=r" (result) : "Q" (*ptr) );
+  return ((uint16_t) result);    /* Add explicit type cast here */
 }
 
 
 /**
-  \brief   Get Fault Mask
-  \details Returns the current value of the Fault Mask register.
-  \return               Fault Mask register value
+  \brief   LDRT Unprivileged (32 bit)
+  \details Executes a Unprivileged LDRT instruction for 32 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint32_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __get_FAULTMASK(void)
+__STATIC_FORCEINLINE uint32_t __LDRT(volatile uint32_t *ptr)
 {
   uint32_t result;
 
-  __ASM volatile ("MRS %0, faultmask" : "=r" (result) );
+  __ASM volatile ("ldrt %0, %1" : "=r" (result) : "Q" (*ptr) );
   return(result);
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Fault Mask (non-secure)
-  \details Returns the current value of the non-secure Fault Mask register when in secure state.
-  \return               Fault Mask register value
+  \brief   STRT Unprivileged (8 bit)
+  \details Executes a Unprivileged STRT instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_FAULTMASK_NS(void)
+__STATIC_FORCEINLINE void __STRBT(uint8_t value, volatile uint8_t *ptr)
 {
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, faultmask_ns" : "=r" (result) );
-  return(result);
+  __ASM volatile ("strbt %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) );
 }
-#endif
 
 
 /**
-  \brief   Set Fault Mask
-  \details Assigns the given value to the Fault Mask register.
-  \param [in]    faultMask  Fault Mask value to set
+  \brief   STRT Unprivileged (16 bit)
+  \details Executes a Unprivileged STRT instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE void __set_FAULTMASK(uint32_t faultMask)
+__STATIC_FORCEINLINE void __STRHT(uint16_t value, volatile uint16_t *ptr)
 {
-  __ASM volatile ("MSR faultmask, %0" : : "r" (faultMask) : "memory");
+  __ASM volatile ("strht %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) );
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Set Fault Mask (non-secure)
-  \details Assigns the given value to the non-secure Fault Mask register when in secure state.
-  \param [in]    faultMask  Fault Mask value to set
+  \brief   STRT Unprivileged (32 bit)
+  \details Executes a Unprivileged STRT instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE void __TZ_set_FAULTMASK_NS(uint32_t faultMask)
+__STATIC_FORCEINLINE void __STRT(uint32_t value, volatile uint32_t *ptr)
 {
-  __ASM volatile ("MSR faultmask_ns, %0" : : "r" (faultMask) : "memory");
+  __ASM volatile ("strt %1, %0" : "=Q" (*ptr) : "r" (value) );
 }
-#endif
 
-#endif /* ((defined (__ARM_ARCH_7M__       ) && (__ARM_ARCH_7M__        == 1)) || \
-           (defined (__ARM_ARCH_7EM__      ) && (__ARM_ARCH_7EM__       == 1)) || \
-           (defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
-           (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     ) */
+#else /* ((defined (__ARM_ARCH_7M__       ) && (__ARM_ARCH_7M__        == 1)) || \
+          (defined (__ARM_ARCH_7EM__      ) && (__ARM_ARCH_7EM__       == 1)) || \
+          (defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
+          (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     ) */
+
+/**
+  \brief   Signed Saturate
+  \details Saturates a signed value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (1..32)
+  \return             Saturated value
+ */
+__STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
+{
+  if ((sat >= 1U) && (sat <= 32U))
+  {
+    const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
+    const int32_t min = -1 - max ;
+    if (val > max)
+    {
+      return max;
+    }
+    else if (val < min)
+    {
+      return min;
+    }
+  }
+  return val;
+}
+
+/**
+  \brief   Unsigned Saturate
+  \details Saturates an unsigned value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (0..31)
+  \return             Saturated value
+ */
+__STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
+{
+  if (sat <= 31U)
+  {
+    const uint32_t max = ((1U << sat) - 1U);
+    if (val > (int32_t)max)
+    {
+      return max;
+    }
+    else if (val < 0)
+    {
+      return 0U;
+    }
+  }
+  return (uint32_t)val;
+}
+
+#endif /* ((defined (__ARM_ARCH_7M__       ) && (__ARM_ARCH_7M__        == 1)) || \
+           (defined (__ARM_ARCH_7EM__      ) && (__ARM_ARCH_7EM__       == 1)) || \
+           (defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
+           (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     ) */
 
 
 #if ((defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
@@ -613,631 +576,615 @@ __STATIC_FORCEINLINE void __TZ_set_FAULTMASK_NS(uint32_t faultMask)
      (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     )
 
 /**
-  \brief   Get Process Stack Pointer Limit
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence zero is returned always in non-secure
-  mode.
-
-  \details Returns the current value of the Process Stack Pointer Limit (PSPLIM).
-  \return               PSPLIM Register value
+  \brief   Load-Acquire (8 bit)
+  \details Executes a LDAB instruction for 8 bit value.
+  \param [in]    ptr  Pointer to data
+  \return             value of type uint8_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __get_PSPLIM(void)
+__STATIC_FORCEINLINE uint8_t __LDAB(volatile uint8_t *ptr)
 {
-#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
-       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) && \
-    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
-    // without main extensions, the non-secure PSPLIM is RAZ/WI
-  return 0U;
-#else
   uint32_t result;
-  __ASM volatile ("MRS %0, psplim"  : "=r" (result) );
-  return result;
-#endif
+
+  __ASM volatile ("ldab %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
+  return ((uint8_t) result);
 }
 
-#if (defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3))
-/**
-  \brief   Get Process Stack Pointer Limit (non-secure)
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence zero is returned always in non-secure
-  mode.
 
-  \details Returns the current value of the non-secure Process Stack Pointer Limit (PSPLIM) when in secure state.
-  \return               PSPLIM Register value
+/**
+  \brief   Load-Acquire (16 bit)
+  \details Executes a LDAH instruction for 16 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint16_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_PSPLIM_NS(void)
+__STATIC_FORCEINLINE uint16_t __LDAH(volatile uint16_t *ptr)
 {
-#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
-       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) )
-  // without main extensions, the non-secure PSPLIM is RAZ/WI
-  return 0U;
-#else
   uint32_t result;
-  __ASM volatile ("MRS %0, psplim_ns"  : "=r" (result) );
-  return result;
-#endif
+
+  __ASM volatile ("ldah %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
+  return ((uint16_t) result);
 }
-#endif
 
 
 /**
-  \brief   Set Process Stack Pointer Limit
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence the write is silently ignored in non-secure
-  mode.
-
-  \details Assigns the given value to the Process Stack Pointer Limit (PSPLIM).
-  \param [in]    ProcStackPtrLimit  Process Stack Pointer Limit value to set
+  \brief   Load-Acquire (32 bit)
+  \details Executes a LDA instruction for 32 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint32_t at (*ptr)
  */
-__STATIC_FORCEINLINE void __set_PSPLIM(uint32_t ProcStackPtrLimit)
+__STATIC_FORCEINLINE uint32_t __LDA(volatile uint32_t *ptr)
 {
-#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
-       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) && \
-    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
-  // without main extensions, the non-secure PSPLIM is RAZ/WI
-  (void)ProcStackPtrLimit;
-#else
-  __ASM volatile ("MSR psplim, %0" : : "r" (ProcStackPtrLimit));
-#endif
+  uint32_t result;
+
+  __ASM volatile ("lda %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
+  return(result);
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Set Process Stack Pointer (non-secure)
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence the write is silently ignored in non-secure
-  mode.
-
-  \details Assigns the given value to the non-secure Process Stack Pointer Limit (PSPLIM) when in secure state.
-  \param [in]    ProcStackPtrLimit  Process Stack Pointer Limit value to set
+  \brief   Store-Release (8 bit)
+  \details Executes a STLB instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE void __TZ_set_PSPLIM_NS(uint32_t ProcStackPtrLimit)
+__STATIC_FORCEINLINE void __STLB(uint8_t value, volatile uint8_t *ptr)
 {
-#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
-       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) )
-  // without main extensions, the non-secure PSPLIM is RAZ/WI
-  (void)ProcStackPtrLimit;
-#else
-  __ASM volatile ("MSR psplim_ns, %0\n" : : "r" (ProcStackPtrLimit));
-#endif
+  __ASM volatile ("stlb %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
 }
-#endif
 
 
 /**
-  \brief   Get Main Stack Pointer Limit
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence zero is returned always.
-
-  \details Returns the current value of the Main Stack Pointer Limit (MSPLIM).
-  \return               MSPLIM Register value
+  \brief   Store-Release (16 bit)
+  \details Executes a STLH instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE uint32_t __get_MSPLIM(void)
+__STATIC_FORCEINLINE void __STLH(uint16_t value, volatile uint16_t *ptr)
 {
-#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
-       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) && \
-    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
-  // without main extensions, the non-secure MSPLIM is RAZ/WI
-  return 0U;
-#else
-  uint32_t result;
-  __ASM volatile ("MRS %0, msplim" : "=r" (result) );
-  return result;
-#endif
+  __ASM volatile ("stlh %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Get Main Stack Pointer Limit (non-secure)
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence zero is returned always.
-
-  \details Returns the current value of the non-secure Main Stack Pointer Limit(MSPLIM) when in secure state.
-  \return               MSPLIM Register value
+  \brief   Store-Release (32 bit)
+  \details Executes a STL instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_MSPLIM_NS(void)
+__STATIC_FORCEINLINE void __STL(uint32_t value, volatile uint32_t *ptr)
 {
-#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
-       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) )
-  // without main extensions, the non-secure MSPLIM is RAZ/WI
-  return 0U;
-#else
-  uint32_t result;
-  __ASM volatile ("MRS %0, msplim_ns" : "=r" (result) );
-  return result;
-#endif
+  __ASM volatile ("stl %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
 }
-#endif
 
 
 /**
-  \brief   Set Main Stack Pointer Limit
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence the write is silently ignored.
+  \brief   Load-Acquire Exclusive (8 bit)
+  \details Executes a LDAB exclusive instruction for 8 bit value.
+  \param [in]    ptr  Pointer to data
+  \return             value of type uint8_t at (*ptr)
+ */
+#define     __LDAEXB                 (uint8_t)__builtin_arm_ldaex
 
-  \details Assigns the given value to the Main Stack Pointer Limit (MSPLIM).
-  \param [in]    MainStackPtrLimit  Main Stack Pointer Limit value to set
+
+/**
+  \brief   Load-Acquire Exclusive (16 bit)
+  \details Executes a LDAH exclusive instruction for 16 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint16_t at (*ptr)
  */
-__STATIC_FORCEINLINE void __set_MSPLIM(uint32_t MainStackPtrLimit)
-{
-#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
-       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) && \
-    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
-  // without main extensions, the non-secure MSPLIM is RAZ/WI
-  (void)MainStackPtrLimit;
-#else
-  __ASM volatile ("MSR msplim, %0" : : "r" (MainStackPtrLimit));
-#endif
-}
+#define     __LDAEXH                 (uint16_t)__builtin_arm_ldaex
 
 
-#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Set Main Stack Pointer Limit (non-secure)
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence the write is silently ignored.
+  \brief   Load-Acquire Exclusive (32 bit)
+  \details Executes a LDA exclusive instruction for 32 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint32_t at (*ptr)
+ */
+#define     __LDAEX                  (uint32_t)__builtin_arm_ldaex
 
-  \details Assigns the given value to the non-secure Main Stack Pointer Limit (MSPLIM) when in secure state.
-  \param [in]    MainStackPtrLimit  Main Stack Pointer value to set
+
+/**
+  \brief   Store-Release Exclusive (8 bit)
+  \details Executes a STLB exclusive instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-__STATIC_FORCEINLINE void __TZ_set_MSPLIM_NS(uint32_t MainStackPtrLimit)
-{
-#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
-       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) )
-  // without main extensions, the non-secure MSPLIM is RAZ/WI
-  (void)MainStackPtrLimit;
-#else
-  __ASM volatile ("MSR msplim_ns, %0" : : "r" (MainStackPtrLimit));
-#endif
-}
-#endif
+#define     __STLEXB                 (uint32_t)__builtin_arm_stlex
 
-#endif /* ((defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
-           (defined (__ARM_ARCH_8M_BASE__  ) && (__ARM_ARCH_8M_BASE__   == 1)) || \
-           (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     ) */
 
 /**
-  \brief   Get FPSCR
-  \details Returns the current value of the Floating Point Status/Control register.
-  \return               Floating Point Status/Control register value
+  \brief   Store-Release Exclusive (16 bit)
+  \details Executes a STLH exclusive instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
-     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
-#define __get_FPSCR      (uint32_t)__builtin_arm_get_fpscr
-#else
-#define __get_FPSCR()      ((uint32_t)0U)
-#endif
+#define     __STLEXH                 (uint32_t)__builtin_arm_stlex
+
 
 /**
-  \brief   Set FPSCR
-  \details Assigns the given value to the Floating Point Status/Control register.
-  \param [in]    fpscr  Floating Point Status/Control value to set
+  \brief   Store-Release Exclusive (32 bit)
+  \details Executes a STL exclusive instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
-     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
-#define __set_FPSCR      __builtin_arm_set_fpscr
-#else
-#define __set_FPSCR(x)      ((void)(x))
-#endif
+#define     __STLEX                  (uint32_t)__builtin_arm_stlex
 
+#endif /* ((defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
+           (defined (__ARM_ARCH_8M_BASE__  ) && (__ARM_ARCH_8M_BASE__   == 1)) || \
+           (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     ) */
 
-/*@} end of CMSIS_Core_RegAccFunctions */
+/** @}*/ /* end of group CMSIS_Core_InstructionInterface */
 
 
-/* ##########################  Core Instruction Access  ######################### */
-/** \defgroup CMSIS_Core_InstructionInterface CMSIS Core Instruction Interface
-  Access to dedicated instructions
+/* ###########################  Core Function Access  ########################### */
+/** \ingroup  CMSIS_Core_FunctionInterface
+    \defgroup CMSIS_Core_RegAccFunctions CMSIS Core Register Access Functions
   @{
-*/
+ */
 
-/* Define macros for porting to both thumb1 and thumb2.
- * For thumb1, use low register (r0-r7), specified by constraint "l"
- * Otherwise, use general registers, specified by constraint "r" */
-#if defined (__thumb__) && !defined (__thumb2__)
-#define __CMSIS_GCC_OUT_REG(r) "=l" (r)
-#define __CMSIS_GCC_RW_REG(r) "+l" (r)
-#define __CMSIS_GCC_USE_REG(r) "l" (r)
-#else
-#define __CMSIS_GCC_OUT_REG(r) "=r" (r)
-#define __CMSIS_GCC_RW_REG(r) "+r" (r)
-#define __CMSIS_GCC_USE_REG(r) "r" (r)
+/**
+  \brief   Enable IRQ Interrupts
+  \details Enables IRQ interrupts by clearing special-purpose register PRIMASK.
+           Can only be executed in Privileged modes.
+ */
+#ifndef __ARM_COMPAT_H
+__STATIC_FORCEINLINE void __enable_irq(void)
+{
+  __ASM volatile ("cpsie i" : : : "memory");
+}
+#endif
+
+
+/**
+  \brief   Disable IRQ Interrupts
+  \details Disables IRQ interrupts by setting special-purpose register PRIMASK.
+           Can only be executed in Privileged modes.
+ */
+#ifndef __ARM_COMPAT_H
+__STATIC_FORCEINLINE void __disable_irq(void)
+{
+  __ASM volatile ("cpsid i" : : : "memory");
+}
 #endif
 
+
 /**
-  \brief   No Operation
-  \details No Operation does nothing. This instruction can be used for code alignment purposes.
+  \brief   Get Control Register
+  \details Returns the content of the Control Register.
+  \return               Control Register value
  */
-#define __NOP          __builtin_arm_nop
+__STATIC_FORCEINLINE uint32_t __get_CONTROL(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, control" : "=r" (result) );
+  return(result);
+}
+
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Wait For Interrupt
-  \details Wait For Interrupt is a hint instruction that suspends execution until one of a number of events occurs.
+  \brief   Get Control Register (non-secure)
+  \details Returns the content of the non-secure Control Register when in secure mode.
+  \return               non-secure Control Register value
  */
-#define __WFI          __builtin_arm_wfi
+__STATIC_FORCEINLINE uint32_t __TZ_get_CONTROL_NS(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, control_ns" : "=r" (result) );
+  return(result);
+}
+#endif
 
 
 /**
-  \brief   Wait For Event
-  \details Wait For Event is a hint instruction that permits the processor to enter
-           a low-power state until one of a number of events occurs.
+  \brief   Set Control Register
+  \details Writes the given value to the Control Register.
+  \param [in]    control  Control Register value to set
  */
-#define __WFE          __builtin_arm_wfe
+__STATIC_FORCEINLINE void __set_CONTROL(uint32_t control)
+{
+  __ASM volatile ("MSR control, %0" : : "r" (control) : "memory");
+  __ISB();
+}
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Send Event
-  \details Send Event is a hint instruction. It causes an event to be signaled to the CPU.
+  \brief   Set Control Register (non-secure)
+  \details Writes the given value to the non-secure Control Register when in secure state.
+  \param [in]    control  Control Register value to set
  */
-#define __SEV          __builtin_arm_sev
+__STATIC_FORCEINLINE void __TZ_set_CONTROL_NS(uint32_t control)
+{
+  __ASM volatile ("MSR control_ns, %0" : : "r" (control) : "memory");
+  __ISB();
+}
+#endif
 
 
 /**
-  \brief   Instruction Synchronization Barrier
-  \details Instruction Synchronization Barrier flushes the pipeline in the processor,
-           so that all instructions following the ISB are fetched from cache or memory,
-           after the instruction has been completed.
+  \brief   Get IPSR Register
+  \details Returns the content of the IPSR Register.
+  \return               IPSR Register value
  */
-#define __ISB()        __builtin_arm_isb(0xF)
+__STATIC_FORCEINLINE uint32_t __get_IPSR(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, ipsr" : "=r" (result) );
+  return(result);
+}
+
 
 /**
-  \brief   Data Synchronization Barrier
-  \details Acts as a special kind of Data Memory Barrier.
-           It completes when all explicit memory accesses before this instruction complete.
+  \brief   Get APSR Register
+  \details Returns the content of the APSR Register.
+  \return               APSR Register value
  */
-#define __DSB()        __builtin_arm_dsb(0xF)
+__STATIC_FORCEINLINE uint32_t __get_APSR(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, apsr" : "=r" (result) );
+  return(result);
+}
 
 
 /**
-  \brief   Data Memory Barrier
-  \details Ensures the apparent order of the explicit memory operations before
-           and after the instruction, without ensuring their completion.
+  \brief   Get xPSR Register
+  \details Returns the content of the xPSR Register.
+  \return               xPSR Register value
  */
-#define __DMB()        __builtin_arm_dmb(0xF)
+__STATIC_FORCEINLINE uint32_t __get_xPSR(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, xpsr" : "=r" (result) );
+  return(result);
+}
 
 
 /**
-  \brief   Reverse byte order (32 bit)
-  \details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Get Process Stack Pointer
+  \details Returns the current value of the Process Stack Pointer (PSP).
+  \return               PSP Register value
  */
-#define __REV(value)   __builtin_bswap32(value)
+__STATIC_FORCEINLINE uint32_t __get_PSP(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, psp"  : "=r" (result) );
+  return(result);
+}
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Reverse byte order (16 bit)
-  \details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Get Process Stack Pointer (non-secure)
+  \details Returns the current value of the non-secure Process Stack Pointer (PSP) when in secure state.
+  \return               PSP Register value
  */
-#define __REV16(value) __ROR(__REV(value), 16)
+__STATIC_FORCEINLINE uint32_t __TZ_get_PSP_NS(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, psp_ns"  : "=r" (result) );
+  return(result);
+}
+#endif
 
 
 /**
-  \brief   Reverse byte order (16 bit)
-  \details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Set Process Stack Pointer
+  \details Assigns the given value to the Process Stack Pointer (PSP).
+  \param [in]    topOfProcStack  Process Stack Pointer value to set
  */
-#define __REVSH(value) (int16_t)__builtin_bswap16(value)
+__STATIC_FORCEINLINE void __set_PSP(uint32_t topOfProcStack)
+{
+  __ASM volatile ("MSR psp, %0" : : "r" (topOfProcStack) : );
+}
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Rotate Right in unsigned value (32 bit)
-  \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
-  \param [in]    op1  Value to rotate
-  \param [in]    op2  Number of Bits to rotate
-  \return               Rotated value
+  \brief   Set Process Stack Pointer (non-secure)
+  \details Assigns the given value to the non-secure Process Stack Pointer (PSP) when in secure state.
+  \param [in]    topOfProcStack  Process Stack Pointer value to set
  */
-__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
+__STATIC_FORCEINLINE void __TZ_set_PSP_NS(uint32_t topOfProcStack)
 {
-  op2 %= 32U;
-  if (op2 == 0U)
-  {
-    return op1;
-  }
-  return (op1 >> op2) | (op1 << (32U - op2));
+  __ASM volatile ("MSR psp_ns, %0" : : "r" (topOfProcStack) : );
 }
+#endif
 
 
 /**
-  \brief   Breakpoint
-  \details Causes the processor to enter Debug state.
-           Debug tools can use this to investigate system state when the instruction at a particular address is reached.
-  \param [in]    value  is ignored by the processor.
-                 If required, a debugger can use it to store additional information about the breakpoint.
+  \brief   Get Main Stack Pointer
+  \details Returns the current value of the Main Stack Pointer (MSP).
+  \return               MSP Register value
  */
-#define __BKPT(value)     __ASM volatile ("bkpt "#value)
+__STATIC_FORCEINLINE uint32_t __get_MSP(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, msp" : "=r" (result) );
+  return(result);
+}
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Reverse bit order of value
-  \details Reverses the bit order of the given value.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Get Main Stack Pointer (non-secure)
+  \details Returns the current value of the non-secure Main Stack Pointer (MSP) when in secure state.
+  \return               MSP Register value
  */
-#define __RBIT            __builtin_arm_rbit
+__STATIC_FORCEINLINE uint32_t __TZ_get_MSP_NS(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, msp_ns" : "=r" (result) );
+  return(result);
+}
+#endif
+
 
 /**
-  \brief   Count leading zeros
-  \details Counts the number of leading zeros of a data value.
-  \param [in]  value  Value to count the leading zeros
-  \return             number of leading zeros in value
+  \brief   Set Main Stack Pointer
+  \details Assigns the given value to the Main Stack Pointer (MSP).
+  \param [in]    topOfMainStack  Main Stack Pointer value to set
  */
-__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t value)
+__STATIC_FORCEINLINE void __set_MSP(uint32_t topOfMainStack)
 {
-  /* Even though __builtin_clz produces a CLZ instruction on ARM, formally
-     __builtin_clz(0) is undefined behaviour, so handle this case specially.
-     This guarantees ARM-compatible results if happening to compile on a non-ARM
-     target, and ensures the compiler doesn't decide to activate any
-     optimisations using the logic "value was passed to __builtin_clz, so it
-     is non-zero".
-     ARM Compiler 6.10 and possibly earlier will optimise this test away, leaving a
-     single CLZ instruction.
-   */
-  if (value == 0U)
-  {
-    return 32U;
-  }
-  return __builtin_clz(value);
+  __ASM volatile ("MSR msp, %0" : : "r" (topOfMainStack) : );
 }
 
 
-#if ((defined (__ARM_ARCH_7M__       ) && (__ARM_ARCH_7M__        == 1)) || \
-     (defined (__ARM_ARCH_7EM__      ) && (__ARM_ARCH_7EM__       == 1)) || \
-     (defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
-     (defined (__ARM_ARCH_8M_BASE__  ) && (__ARM_ARCH_8M_BASE__   == 1)) || \
-     (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     )
-
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   LDR Exclusive (8 bit)
-  \details Executes a exclusive LDR instruction for 8 bit value.
-  \param [in]    ptr  Pointer to data
-  \return             value of type uint8_t at (*ptr)
+  \brief   Set Main Stack Pointer (non-secure)
+  \details Assigns the given value to the non-secure Main Stack Pointer (MSP) when in secure state.
+  \param [in]    topOfMainStack  Main Stack Pointer value to set
  */
-#define __LDREXB        (uint8_t)__builtin_arm_ldrex
+__STATIC_FORCEINLINE void __TZ_set_MSP_NS(uint32_t topOfMainStack)
+{
+  __ASM volatile ("MSR msp_ns, %0" : : "r" (topOfMainStack) : );
+}
+#endif
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   LDR Exclusive (16 bit)
-  \details Executes a exclusive LDR instruction for 16 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint16_t at (*ptr)
+  \brief   Get Stack Pointer (non-secure)
+  \details Returns the current value of the non-secure Stack Pointer (SP) when in secure state.
+  \return               SP Register value
  */
-#define __LDREXH        (uint16_t)__builtin_arm_ldrex
+__STATIC_FORCEINLINE uint32_t __TZ_get_SP_NS(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, sp_ns" : "=r" (result) );
+  return(result);
+}
 
 
 /**
-  \brief   LDR Exclusive (32 bit)
-  \details Executes a exclusive LDR instruction for 32 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint32_t at (*ptr)
+  \brief   Set Stack Pointer (non-secure)
+  \details Assigns the given value to the non-secure Stack Pointer (SP) when in secure state.
+  \param [in]    topOfStack  Stack Pointer value to set
  */
-#define __LDREXW        (uint32_t)__builtin_arm_ldrex
+__STATIC_FORCEINLINE void __TZ_set_SP_NS(uint32_t topOfStack)
+{
+  __ASM volatile ("MSR sp_ns, %0" : : "r" (topOfStack) : );
+}
+#endif
 
 
 /**
-  \brief   STR Exclusive (8 bit)
-  \details Executes a exclusive STR instruction for 8 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Get Priority Mask
+  \details Returns the current state of the priority mask bit from the Priority Mask Register.
+  \return               Priority Mask value
  */
-#define __STREXB        (uint32_t)__builtin_arm_strex
+__STATIC_FORCEINLINE uint32_t __get_PRIMASK(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, primask" : "=r" (result) );
+  return(result);
+}
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   STR Exclusive (16 bit)
-  \details Executes a exclusive STR instruction for 16 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Get Priority Mask (non-secure)
+  \details Returns the current state of the non-secure priority mask bit from the Priority Mask Register when in secure state.
+  \return               Priority Mask value
  */
-#define __STREXH        (uint32_t)__builtin_arm_strex
+__STATIC_FORCEINLINE uint32_t __TZ_get_PRIMASK_NS(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, primask_ns" : "=r" (result) );
+  return(result);
+}
+#endif
 
 
 /**
-  \brief   STR Exclusive (32 bit)
-  \details Executes a exclusive STR instruction for 32 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Set Priority Mask
+  \details Assigns the given value to the Priority Mask Register.
+  \param [in]    priMask  Priority Mask
  */
-#define __STREXW        (uint32_t)__builtin_arm_strex
+__STATIC_FORCEINLINE void __set_PRIMASK(uint32_t priMask)
+{
+  __ASM volatile ("MSR primask, %0" : : "r" (priMask) : "memory");
+}
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Remove the exclusive lock
-  \details Removes the exclusive lock which is created by LDREX.
+  \brief   Set Priority Mask (non-secure)
+  \details Assigns the given value to the non-secure Priority Mask Register when in secure state.
+  \param [in]    priMask  Priority Mask
  */
-#define __CLREX             __builtin_arm_clrex
-
-#endif /* ((defined (__ARM_ARCH_7M__       ) && (__ARM_ARCH_7M__        == 1)) || \
-           (defined (__ARM_ARCH_7EM__      ) && (__ARM_ARCH_7EM__       == 1)) || \
-           (defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
-           (defined (__ARM_ARCH_8M_BASE__  ) && (__ARM_ARCH_8M_BASE__   == 1)) || \
-           (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     ) */
+__STATIC_FORCEINLINE void __TZ_set_PRIMASK_NS(uint32_t priMask)
+{
+  __ASM volatile ("MSR primask_ns, %0" : : "r" (priMask) : "memory");
+}
+#endif
 
 
 #if ((defined (__ARM_ARCH_7M__       ) && (__ARM_ARCH_7M__        == 1)) || \
      (defined (__ARM_ARCH_7EM__      ) && (__ARM_ARCH_7EM__       == 1)) || \
      (defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
      (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     )
-
 /**
-  \brief   Signed Saturate
-  \details Saturates a signed value.
-  \param [in]  value  Value to be saturated
-  \param [in]    sat  Bit position to saturate to (1..32)
-  \return             Saturated value
+  \brief   Enable FIQ
+  \details Enables FIQ interrupts by clearing special-purpose register FAULTMASK.
+           Can only be executed in Privileged modes.
  */
-#define __SSAT             __builtin_arm_ssat
+__STATIC_FORCEINLINE void __enable_fault_irq(void)
+{
+  __ASM volatile ("cpsie f" : : : "memory");
+}
 
 
 /**
-  \brief   Unsigned Saturate
-  \details Saturates an unsigned value.
-  \param [in]  value  Value to be saturated
-  \param [in]    sat  Bit position to saturate to (0..31)
-  \return             Saturated value
+  \brief   Disable FIQ
+  \details Disables FIQ interrupts by setting special-purpose register FAULTMASK.
+           Can only be executed in Privileged modes.
  */
-#define __USAT             __builtin_arm_usat
+__STATIC_FORCEINLINE void __disable_fault_irq(void)
+{
+  __ASM volatile ("cpsid f" : : : "memory");
+}
 
 
 /**
-  \brief   Rotate Right with Extend (32 bit)
-  \details Moves each bit of a bitstring right by one bit.
-           The carry input is shifted in at the left end of the bitstring.
-  \param [in]    value  Value to rotate
-  \return               Rotated value
+  \brief   Get Base Priority
+  \details Returns the current value of the Base Priority register.
+  \return               Base Priority register value
  */
-__STATIC_FORCEINLINE uint32_t __RRX(uint32_t value)
+__STATIC_FORCEINLINE uint32_t __get_BASEPRI(void)
 {
   uint32_t result;
 
-  __ASM volatile ("rrx %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) );
+  __ASM volatile ("MRS %0, basepri" : "=r" (result) );
   return(result);
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   LDRT Unprivileged (8 bit)
-  \details Executes a Unprivileged LDRT instruction for 8 bit value.
-  \param [in]    ptr  Pointer to data
-  \return             value of type uint8_t at (*ptr)
+  \brief   Get Base Priority (non-secure)
+  \details Returns the current value of the non-secure Base Priority register when in secure state.
+  \return               Base Priority register value
  */
-__STATIC_FORCEINLINE uint8_t __LDRBT(volatile uint8_t *ptr)
+__STATIC_FORCEINLINE uint32_t __TZ_get_BASEPRI_NS(void)
 {
   uint32_t result;
 
-  __ASM volatile ("ldrbt %0, %1" : "=r" (result) : "Q" (*ptr) );
-  return ((uint8_t) result);    /* Add explicit type cast here */
+  __ASM volatile ("MRS %0, basepri_ns" : "=r" (result) );
+  return(result);
 }
+#endif
 
 
 /**
-  \brief   LDRT Unprivileged (16 bit)
-  \details Executes a Unprivileged LDRT instruction for 16 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint16_t at (*ptr)
+  \brief   Set Base Priority
+  \details Assigns the given value to the Base Priority register.
+  \param [in]    basePri  Base Priority value to set
  */
-__STATIC_FORCEINLINE uint16_t __LDRHT(volatile uint16_t *ptr)
+__STATIC_FORCEINLINE void __set_BASEPRI(uint32_t basePri)
 {
-  uint32_t result;
-
-  __ASM volatile ("ldrht %0, %1" : "=r" (result) : "Q" (*ptr) );
-  return ((uint16_t) result);    /* Add explicit type cast here */
+  __ASM volatile ("MSR basepri, %0" : : "r" (basePri) : "memory");
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   LDRT Unprivileged (32 bit)
-  \details Executes a Unprivileged LDRT instruction for 32 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint32_t at (*ptr)
+  \brief   Set Base Priority (non-secure)
+  \details Assigns the given value to the non-secure Base Priority register when in secure state.
+  \param [in]    basePri  Base Priority value to set
  */
-__STATIC_FORCEINLINE uint32_t __LDRT(volatile uint32_t *ptr)
+__STATIC_FORCEINLINE void __TZ_set_BASEPRI_NS(uint32_t basePri)
 {
-  uint32_t result;
-
-  __ASM volatile ("ldrt %0, %1" : "=r" (result) : "Q" (*ptr) );
-  return(result);
+  __ASM volatile ("MSR basepri_ns, %0" : : "r" (basePri) : "memory");
 }
+#endif
 
 
 /**
-  \brief   STRT Unprivileged (8 bit)
-  \details Executes a Unprivileged STRT instruction for 8 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Set Base Priority with condition
+  \details Assigns the given value to the Base Priority register only if BASEPRI masking is disabled,
+           or the new value increases the BASEPRI priority level.
+  \param [in]    basePri  Base Priority value to set
  */
-__STATIC_FORCEINLINE void __STRBT(uint8_t value, volatile uint8_t *ptr)
+__STATIC_FORCEINLINE void __set_BASEPRI_MAX(uint32_t basePri)
 {
-  __ASM volatile ("strbt %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) );
+  __ASM volatile ("MSR basepri_max, %0" : : "r" (basePri) : "memory");
 }
 
 
 /**
-  \brief   STRT Unprivileged (16 bit)
-  \details Executes a Unprivileged STRT instruction for 16 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Get Fault Mask
+  \details Returns the current value of the Fault Mask register.
+  \return               Fault Mask register value
  */
-__STATIC_FORCEINLINE void __STRHT(uint16_t value, volatile uint16_t *ptr)
+__STATIC_FORCEINLINE uint32_t __get_FAULTMASK(void)
 {
-  __ASM volatile ("strht %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) );
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, faultmask" : "=r" (result) );
+  return(result);
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   STRT Unprivileged (32 bit)
-  \details Executes a Unprivileged STRT instruction for 32 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Get Fault Mask (non-secure)
+  \details Returns the current value of the non-secure Fault Mask register when in secure state.
+  \return               Fault Mask register value
  */
-__STATIC_FORCEINLINE void __STRT(uint32_t value, volatile uint32_t *ptr)
+__STATIC_FORCEINLINE uint32_t __TZ_get_FAULTMASK_NS(void)
 {
-  __ASM volatile ("strt %1, %0" : "=Q" (*ptr) : "r" (value) );
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, faultmask_ns" : "=r" (result) );
+  return(result);
 }
+#endif
 
-#else /* ((defined (__ARM_ARCH_7M__       ) && (__ARM_ARCH_7M__        == 1)) || \
-          (defined (__ARM_ARCH_7EM__      ) && (__ARM_ARCH_7EM__       == 1)) || \
-          (defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
-          (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     ) */
 
 /**
-  \brief   Signed Saturate
-  \details Saturates a signed value.
-  \param [in]  value  Value to be saturated
-  \param [in]    sat  Bit position to saturate to (1..32)
-  \return             Saturated value
+  \brief   Set Fault Mask
+  \details Assigns the given value to the Fault Mask register.
+  \param [in]    faultMask  Fault Mask value to set
  */
-__STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
+__STATIC_FORCEINLINE void __set_FAULTMASK(uint32_t faultMask)
 {
-  if ((sat >= 1U) && (sat <= 32U))
-  {
-    const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
-    const int32_t min = -1 - max ;
-    if (val > max)
-    {
-      return max;
-    }
-    else if (val < min)
-    {
-      return min;
-    }
-  }
-  return val;
+  __ASM volatile ("MSR faultmask, %0" : : "r" (faultMask) : "memory");
 }
 
-/**
-  \brief   Unsigned Saturate
-  \details Saturates an unsigned value.
-  \param [in]  value  Value to be saturated
-  \param [in]    sat  Bit position to saturate to (0..31)
-  \return             Saturated value
- */
-__STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
-{
-  if (sat <= 31U)
-  {
-    const uint32_t max = ((1U << sat) - 1U);
-    if (val > (int32_t)max)
-    {
-      return max;
-    }
-    else if (val < 0)
-    {
-      return 0U;
-    }
-  }
-  return (uint32_t)val;
+
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
+/**
+  \brief   Set Fault Mask (non-secure)
+  \details Assigns the given value to the non-secure Fault Mask register when in secure state.
+  \param [in]    faultMask  Fault Mask value to set
+ */
+__STATIC_FORCEINLINE void __TZ_set_FAULTMASK_NS(uint32_t faultMask)
+{
+  __ASM volatile ("MSR faultmask_ns, %0" : : "r" (faultMask) : "memory");
 }
+#endif
 
 #endif /* ((defined (__ARM_ARCH_7M__       ) && (__ARM_ARCH_7M__        == 1)) || \
            (defined (__ARM_ARCH_7EM__      ) && (__ARM_ARCH_7EM__       == 1)) || \
@@ -1250,150 +1197,217 @@ __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
      (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     )
 
 /**
-  \brief   Load-Acquire (8 bit)
-  \details Executes a LDAB instruction for 8 bit value.
-  \param [in]    ptr  Pointer to data
-  \return             value of type uint8_t at (*ptr)
+  \brief   Get Process Stack Pointer Limit
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence zero is returned always in non-secure
+  mode.
+
+  \details Returns the current value of the Process Stack Pointer Limit (PSPLIM).
+  \return               PSPLIM Register value
  */
-__STATIC_FORCEINLINE uint8_t __LDAB(volatile uint8_t *ptr)
+__STATIC_FORCEINLINE uint32_t __get_PSPLIM(void)
 {
+#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
+       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) && \
+    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
+    // without main extensions, the non-secure PSPLIM is RAZ/WI
+  return 0U;
+#else
   uint32_t result;
-
-  __ASM volatile ("ldab %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
-  return ((uint8_t) result);
+  __ASM volatile ("MRS %0, psplim"  : "=r" (result) );
+  return result;
+#endif
 }
 
-
+#if (defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Load-Acquire (16 bit)
-  \details Executes a LDAH instruction for 16 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint16_t at (*ptr)
+  \brief   Get Process Stack Pointer Limit (non-secure)
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence zero is returned always in non-secure
+  mode.
+
+  \details Returns the current value of the non-secure Process Stack Pointer Limit (PSPLIM) when in secure state.
+  \return               PSPLIM Register value
  */
-__STATIC_FORCEINLINE uint16_t __LDAH(volatile uint16_t *ptr)
+__STATIC_FORCEINLINE uint32_t __TZ_get_PSPLIM_NS(void)
 {
+#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
+       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) )
+  // without main extensions, the non-secure PSPLIM is RAZ/WI
+  return 0U;
+#else
   uint32_t result;
-
-  __ASM volatile ("ldah %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
-  return ((uint16_t) result);
+  __ASM volatile ("MRS %0, psplim_ns"  : "=r" (result) );
+  return result;
+#endif
 }
+#endif
 
 
 /**
-  \brief   Load-Acquire (32 bit)
-  \details Executes a LDA instruction for 32 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint32_t at (*ptr)
+  \brief   Set Process Stack Pointer Limit
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence the write is silently ignored in non-secure
+  mode.
+
+  \details Assigns the given value to the Process Stack Pointer Limit (PSPLIM).
+  \param [in]    ProcStackPtrLimit  Process Stack Pointer Limit value to set
  */
-__STATIC_FORCEINLINE uint32_t __LDA(volatile uint32_t *ptr)
+__STATIC_FORCEINLINE void __set_PSPLIM(uint32_t ProcStackPtrLimit)
 {
-  uint32_t result;
-
-  __ASM volatile ("lda %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
-  return(result);
+#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
+       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) && \
+    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
+  // without main extensions, the non-secure PSPLIM is RAZ/WI
+  (void)ProcStackPtrLimit;
+#else
+  __ASM volatile ("MSR psplim, %0" : : "r" (ProcStackPtrLimit));
+#endif
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Store-Release (8 bit)
-  \details Executes a STLB instruction for 8 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Set Process Stack Pointer (non-secure)
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence the write is silently ignored in non-secure
+  mode.
+
+  \details Assigns the given value to the non-secure Process Stack Pointer Limit (PSPLIM) when in secure state.
+  \param [in]    ProcStackPtrLimit  Process Stack Pointer Limit value to set
  */
-__STATIC_FORCEINLINE void __STLB(uint8_t value, volatile uint8_t *ptr)
+__STATIC_FORCEINLINE void __TZ_set_PSPLIM_NS(uint32_t ProcStackPtrLimit)
 {
-  __ASM volatile ("stlb %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
+#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
+       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) )
+  // without main extensions, the non-secure PSPLIM is RAZ/WI
+  (void)ProcStackPtrLimit;
+#else
+  __ASM volatile ("MSR psplim_ns, %0\n" : : "r" (ProcStackPtrLimit));
+#endif
 }
+#endif
 
 
 /**
-  \brief   Store-Release (16 bit)
-  \details Executes a STLH instruction for 16 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Get Main Stack Pointer Limit
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence zero is returned always.
+
+  \details Returns the current value of the Main Stack Pointer Limit (MSPLIM).
+  \return               MSPLIM Register value
  */
-__STATIC_FORCEINLINE void __STLH(uint16_t value, volatile uint16_t *ptr)
+__STATIC_FORCEINLINE uint32_t __get_MSPLIM(void)
 {
-  __ASM volatile ("stlh %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
+#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
+       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) && \
+    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
+  // without main extensions, the non-secure MSPLIM is RAZ/WI
+  return 0U;
+#else
+  uint32_t result;
+  __ASM volatile ("MRS %0, msplim" : "=r" (result) );
+  return result;
+#endif
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Store-Release (32 bit)
-  \details Executes a STL instruction for 32 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Get Main Stack Pointer Limit (non-secure)
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence zero is returned always.
+
+  \details Returns the current value of the non-secure Main Stack Pointer Limit(MSPLIM) when in secure state.
+  \return               MSPLIM Register value
  */
-__STATIC_FORCEINLINE void __STL(uint32_t value, volatile uint32_t *ptr)
+__STATIC_FORCEINLINE uint32_t __TZ_get_MSPLIM_NS(void)
 {
-  __ASM volatile ("stl %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
+#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
+       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) )
+  // without main extensions, the non-secure MSPLIM is RAZ/WI
+  return 0U;
+#else
+  uint32_t result;
+  __ASM volatile ("MRS %0, msplim_ns" : "=r" (result) );
+  return result;
+#endif
 }
+#endif
 
 
 /**
-  \brief   Load-Acquire Exclusive (8 bit)
-  \details Executes a LDAB exclusive instruction for 8 bit value.
-  \param [in]    ptr  Pointer to data
-  \return             value of type uint8_t at (*ptr)
- */
-#define     __LDAEXB                 (uint8_t)__builtin_arm_ldaex
-
+  \brief   Set Main Stack Pointer Limit
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence the write is silently ignored.
 
-/**
-  \brief   Load-Acquire Exclusive (16 bit)
-  \details Executes a LDAH exclusive instruction for 16 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint16_t at (*ptr)
+  \details Assigns the given value to the Main Stack Pointer Limit (MSPLIM).
+  \param [in]    MainStackPtrLimit  Main Stack Pointer Limit value to set
  */
-#define     __LDAEXH                 (uint16_t)__builtin_arm_ldaex
+__STATIC_FORCEINLINE void __set_MSPLIM(uint32_t MainStackPtrLimit)
+{
+#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
+       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) && \
+    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
+  // without main extensions, the non-secure MSPLIM is RAZ/WI
+  (void)MainStackPtrLimit;
+#else
+  __ASM volatile ("MSR msplim, %0" : : "r" (MainStackPtrLimit));
+#endif
+}
 
 
+#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Load-Acquire Exclusive (32 bit)
-  \details Executes a LDA exclusive instruction for 32 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint32_t at (*ptr)
- */
-#define     __LDAEX                  (uint32_t)__builtin_arm_ldaex
-
+  \brief   Set Main Stack Pointer Limit (non-secure)
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence the write is silently ignored.
 
-/**
-  \brief   Store-Release Exclusive (8 bit)
-  \details Executes a STLB exclusive instruction for 8 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \details Assigns the given value to the non-secure Main Stack Pointer Limit (MSPLIM) when in secure state.
+  \param [in]    MainStackPtrLimit  Main Stack Pointer value to set
  */
-#define     __STLEXB                 (uint32_t)__builtin_arm_stlex
+__STATIC_FORCEINLINE void __TZ_set_MSPLIM_NS(uint32_t MainStackPtrLimit)
+{
+#if (!((defined (__ARM_ARCH_8M_MAIN__   ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
+       (defined (__ARM_ARCH_8_1M_MAIN__ ) && (__ARM_ARCH_8_1M_MAIN__ == 1))   ) )
+  // without main extensions, the non-secure MSPLIM is RAZ/WI
+  (void)MainStackPtrLimit;
+#else
+  __ASM volatile ("MSR msplim_ns, %0" : : "r" (MainStackPtrLimit));
+#endif
+}
+#endif
 
+#endif /* ((defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
+           (defined (__ARM_ARCH_8M_BASE__  ) && (__ARM_ARCH_8M_BASE__   == 1)) || \
+           (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     ) */
 
 /**
-  \brief   Store-Release Exclusive (16 bit)
-  \details Executes a STLH exclusive instruction for 16 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Get FPSCR
+  \details Returns the current value of the Floating Point Status/Control register.
+  \return               Floating Point Status/Control register value
  */
-#define     __STLEXH                 (uint32_t)__builtin_arm_stlex
-
+#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
+     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
+#define __get_FPSCR      (uint32_t)__builtin_arm_get_fpscr
+#else
+#define __get_FPSCR()      ((uint32_t)0U)
+#endif
 
 /**
-  \brief   Store-Release Exclusive (32 bit)
-  \details Executes a STL exclusive instruction for 32 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Set FPSCR
+  \details Assigns the given value to the Floating Point Status/Control register.
+  \param [in]    fpscr  Floating Point Status/Control value to set
  */
-#define     __STLEX                  (uint32_t)__builtin_arm_stlex
+#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
+     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
+#define __set_FPSCR      __builtin_arm_set_fpscr
+#else
+#define __set_FPSCR(fpscr)      ((void)(fpscr))
+#endif
 
-#endif /* ((defined (__ARM_ARCH_8M_MAIN__  ) && (__ARM_ARCH_8M_MAIN__   == 1)) || \
-           (defined (__ARM_ARCH_8M_BASE__  ) && (__ARM_ARCH_8M_BASE__   == 1)) || \
-           (defined (__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ == 1))     ) */
 
-/*@}*/ /* end of group CMSIS_Core_InstructionInterface */
+/** @} end of CMSIS_Core_RegAccFunctions */
 
 
 /* ###################  Compiler specific Intrinsics  ########################### */
@@ -1483,7 +1497,7 @@ __STATIC_FORCEINLINE int32_t __SMMLA (int32_t op1, int32_t op2, int32_t op3)
 }
 
 #endif /* (__ARM_FEATURE_DSP == 1) */
-/*@} end of group CMSIS_SIMD_intrinsics */
+/** @} end of group CMSIS_SIMD_intrinsics */
 
 
 #endif /* __CMSIS_ARMCLANG_H */
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/cmsis_armclang_ltm.h b/edge-impulse-sdk/CMSIS/Core/Include/cmsis_armclang_ltm.h
index 3972d01..1e255d5 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/cmsis_armclang_ltm.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/cmsis_armclang_ltm.h
@@ -1,8 +1,8 @@
 /**************************************************************************//**
  * @file     cmsis_armclang_ltm.h
  * @brief    CMSIS compiler armclang (Arm Compiler 6) header file
- * @version  V1.5.0
- * @date     19. February 2021
+ * @version  V1.5.3
+ * @date     27. May 2021
  ******************************************************************************/
 /*
  * Copyright (c) 2018-2021 Arm Limited. All rights reserved.
@@ -29,10 +29,6 @@
 
 #pragma clang system_header   /* treat file as system include file */
 
-#ifndef __ARM_COMPAT_H
-#include <arm_compat.h>    /* Compatibility header for Arm Compiler 5 intrinsics */
-#endif
-
 /* CMSIS compiler specific defines */
 #ifndef   __ASM
   #define __ASM                                  __asm
@@ -156,1069 +152,1027 @@ __STATIC_FORCEINLINE void __TZ_set_STACKSEAL_S (uint32_t* stackTop) {
 #endif
 
 
-/* ###########################  Core Function Access  ########################### */
-/** \ingroup  CMSIS_Core_FunctionInterface
-    \defgroup CMSIS_Core_RegAccFunctions CMSIS Core Register Access Functions
+/* ##########################  Core Instruction Access  ######################### */
+/** \defgroup CMSIS_Core_InstructionInterface CMSIS Core Instruction Interface
+  Access to dedicated instructions
   @{
- */
+*/
+
+/* Define macros for porting to both thumb1 and thumb2.
+ * For thumb1, use low register (r0-r7), specified by constraint "l"
+ * Otherwise, use general registers, specified by constraint "r" */
+#if defined (__thumb__) && !defined (__thumb2__)
+#define __CMSIS_GCC_OUT_REG(r) "=l" (r)
+#define __CMSIS_GCC_USE_REG(r) "l" (r)
+#else
+#define __CMSIS_GCC_OUT_REG(r) "=r" (r)
+#define __CMSIS_GCC_USE_REG(r) "r" (r)
+#endif
 
 /**
-  \brief   Enable IRQ Interrupts
-  \details Enables IRQ interrupts by clearing the I-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   No Operation
+  \details No Operation does nothing. This instruction can be used for code alignment purposes.
  */
-/* intrinsic void __enable_irq();  see arm_compat.h */
-
+#define __NOP          __builtin_arm_nop
 
 /**
-  \brief   Disable IRQ Interrupts
-  \details Disables IRQ interrupts by setting the I-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   Wait For Interrupt
+  \details Wait For Interrupt is a hint instruction that suspends execution until one of a number of events occurs.
  */
-/* intrinsic void __disable_irq();  see arm_compat.h */
+#define __WFI          __builtin_arm_wfi
 
 
 /**
-  \brief   Get Control Register
-  \details Returns the content of the Control Register.
-  \return               Control Register value
+  \brief   Wait For Event
+  \details Wait For Event is a hint instruction that permits the processor to enter
+           a low-power state until one of a number of events occurs.
  */
-__STATIC_FORCEINLINE uint32_t __get_CONTROL(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, control" : "=r" (result) );
-  return(result);
-}
+#define __WFE          __builtin_arm_wfe
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Control Register (non-secure)
-  \details Returns the content of the non-secure Control Register when in secure mode.
-  \return               non-secure Control Register value
+  \brief   Send Event
+  \details Send Event is a hint instruction. It causes an event to be signaled to the CPU.
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_CONTROL_NS(void)
-{
-  uint32_t result;
+#define __SEV          __builtin_arm_sev
 
-  __ASM volatile ("MRS %0, control_ns" : "=r" (result) );
-  return(result);
-}
-#endif
 
+/**
+  \brief   Instruction Synchronization Barrier
+  \details Instruction Synchronization Barrier flushes the pipeline in the processor,
+           so that all instructions following the ISB are fetched from cache or memory,
+           after the instruction has been completed.
+ */
+#define __ISB()        __builtin_arm_isb(0xF)
 
 /**
-  \brief   Set Control Register
-  \details Writes the given value to the Control Register.
-  \param [in]    control  Control Register value to set
+  \brief   Data Synchronization Barrier
+  \details Acts as a special kind of Data Memory Barrier.
+           It completes when all explicit memory accesses before this instruction complete.
  */
-__STATIC_FORCEINLINE void __set_CONTROL(uint32_t control)
-{
-  __ASM volatile ("MSR control, %0" : : "r" (control) : "memory");
-}
+#define __DSB()        __builtin_arm_dsb(0xF)
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Set Control Register (non-secure)
-  \details Writes the given value to the non-secure Control Register when in secure state.
-  \param [in]    control  Control Register value to set
+  \brief   Data Memory Barrier
+  \details Ensures the apparent order of the explicit memory operations before
+           and after the instruction, without ensuring their completion.
  */
-__STATIC_FORCEINLINE void __TZ_set_CONTROL_NS(uint32_t control)
-{
-  __ASM volatile ("MSR control_ns, %0" : : "r" (control) : "memory");
-}
-#endif
+#define __DMB()        __builtin_arm_dmb(0xF)
 
 
 /**
-  \brief   Get IPSR Register
-  \details Returns the content of the IPSR Register.
-  \return               IPSR Register value
+  \brief   Reverse byte order (32 bit)
+  \details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_FORCEINLINE uint32_t __get_IPSR(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, ipsr" : "=r" (result) );
-  return(result);
-}
+#define __REV(value)   __builtin_bswap32(value)
 
 
 /**
-  \brief   Get APSR Register
-  \details Returns the content of the APSR Register.
-  \return               APSR Register value
+  \brief   Reverse byte order (16 bit)
+  \details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_FORCEINLINE uint32_t __get_APSR(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, apsr" : "=r" (result) );
-  return(result);
-}
+#define __REV16(value) __ROR(__REV(value), 16)
 
 
 /**
-  \brief   Get xPSR Register
-  \details Returns the content of the xPSR Register.
-  \return               xPSR Register value
+  \brief   Reverse byte order (16 bit)
+  \details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_FORCEINLINE uint32_t __get_xPSR(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, xpsr" : "=r" (result) );
-  return(result);
-}
+#define __REVSH(value) (int16_t)__builtin_bswap16(value)
 
 
 /**
-  \brief   Get Process Stack Pointer
-  \details Returns the current value of the Process Stack Pointer (PSP).
-  \return               PSP Register value
+  \brief   Rotate Right in unsigned value (32 bit)
+  \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
+  \param [in]    op1  Value to rotate
+  \param [in]    op2  Number of Bits to rotate
+  \return               Rotated value
  */
-__STATIC_FORCEINLINE uint32_t __get_PSP(void)
+__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
 {
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, psp"  : "=r" (result) );
-  return(result);
+  op2 %= 32U;
+  if (op2 == 0U)
+  {
+    return op1;
+  }
+  return (op1 >> op2) | (op1 << (32U - op2));
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Process Stack Pointer (non-secure)
-  \details Returns the current value of the non-secure Process Stack Pointer (PSP) when in secure state.
-  \return               PSP Register value
+  \brief   Breakpoint
+  \details Causes the processor to enter Debug state.
+           Debug tools can use this to investigate system state when the instruction at a particular address is reached.
+  \param [in]    value  is ignored by the processor.
+                 If required, a debugger can use it to store additional information about the breakpoint.
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_PSP_NS(void)
-{
-  uint32_t result;
+#define __BKPT(value)     __ASM volatile ("bkpt "#value)
 
-  __ASM volatile ("MRS %0, psp_ns"  : "=r" (result) );
-  return(result);
-}
-#endif
 
+/**
+  \brief   Reverse bit order of value
+  \details Reverses the bit order of the given value.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
+ */
+#define __RBIT            __builtin_arm_rbit
 
 /**
-  \brief   Set Process Stack Pointer
-  \details Assigns the given value to the Process Stack Pointer (PSP).
-  \param [in]    topOfProcStack  Process Stack Pointer value to set
+  \brief   Count leading zeros
+  \details Counts the number of leading zeros of a data value.
+  \param [in]  value  Value to count the leading zeros
+  \return             number of leading zeros in value
  */
-__STATIC_FORCEINLINE void __set_PSP(uint32_t topOfProcStack)
+__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t value)
 {
-  __ASM volatile ("MSR psp, %0" : : "r" (topOfProcStack) : );
+  /* Even though __builtin_clz produces a CLZ instruction on ARM, formally
+     __builtin_clz(0) is undefined behaviour, so handle this case specially.
+     This guarantees ARM-compatible results if happening to compile on a non-ARM
+     target, and ensures the compiler doesn't decide to activate any
+     optimisations using the logic "value was passed to __builtin_clz, so it
+     is non-zero".
+     ARM Compiler 6.10 and possibly earlier will optimise this test away, leaving a
+     single CLZ instruction.
+   */
+  if (value == 0U)
+  {
+    return 32U;
+  }
+  return __builtin_clz(value);
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
+#if ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
+     (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
+     (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
+     (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    )
 /**
-  \brief   Set Process Stack Pointer (non-secure)
-  \details Assigns the given value to the non-secure Process Stack Pointer (PSP) when in secure state.
-  \param [in]    topOfProcStack  Process Stack Pointer value to set
+  \brief   LDR Exclusive (8 bit)
+  \details Executes a exclusive LDR instruction for 8 bit value.
+  \param [in]    ptr  Pointer to data
+  \return             value of type uint8_t at (*ptr)
  */
-__STATIC_FORCEINLINE void __TZ_set_PSP_NS(uint32_t topOfProcStack)
-{
-  __ASM volatile ("MSR psp_ns, %0" : : "r" (topOfProcStack) : );
-}
-#endif
+#define __LDREXB        (uint8_t)__builtin_arm_ldrex
 
 
 /**
-  \brief   Get Main Stack Pointer
-  \details Returns the current value of the Main Stack Pointer (MSP).
-  \return               MSP Register value
+  \brief   LDR Exclusive (16 bit)
+  \details Executes a exclusive LDR instruction for 16 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint16_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __get_MSP(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, msp" : "=r" (result) );
-  return(result);
-}
+#define __LDREXH        (uint16_t)__builtin_arm_ldrex
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Main Stack Pointer (non-secure)
-  \details Returns the current value of the non-secure Main Stack Pointer (MSP) when in secure state.
-  \return               MSP Register value
+  \brief   LDR Exclusive (32 bit)
+  \details Executes a exclusive LDR instruction for 32 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint32_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_MSP_NS(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, msp_ns" : "=r" (result) );
-  return(result);
-}
-#endif
+#define __LDREXW        (uint32_t)__builtin_arm_ldrex
 
 
 /**
-  \brief   Set Main Stack Pointer
-  \details Assigns the given value to the Main Stack Pointer (MSP).
-  \param [in]    topOfMainStack  Main Stack Pointer value to set
- */
-__STATIC_FORCEINLINE void __set_MSP(uint32_t topOfMainStack)
-{
-  __ASM volatile ("MSR msp, %0" : : "r" (topOfMainStack) : );
-}
-
-
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
-/**
-  \brief   Set Main Stack Pointer (non-secure)
-  \details Assigns the given value to the non-secure Main Stack Pointer (MSP) when in secure state.
-  \param [in]    topOfMainStack  Main Stack Pointer value to set
- */
-__STATIC_FORCEINLINE void __TZ_set_MSP_NS(uint32_t topOfMainStack)
-{
-  __ASM volatile ("MSR msp_ns, %0" : : "r" (topOfMainStack) : );
-}
-#endif
-
-
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
-/**
-  \brief   Get Stack Pointer (non-secure)
-  \details Returns the current value of the non-secure Stack Pointer (SP) when in secure state.
-  \return               SP Register value
- */
-__STATIC_FORCEINLINE uint32_t __TZ_get_SP_NS(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, sp_ns" : "=r" (result) );
-  return(result);
-}
-
-
-/**
-  \brief   Set Stack Pointer (non-secure)
-  \details Assigns the given value to the non-secure Stack Pointer (SP) when in secure state.
-  \param [in]    topOfStack  Stack Pointer value to set
+  \brief   STR Exclusive (8 bit)
+  \details Executes a exclusive STR instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-__STATIC_FORCEINLINE void __TZ_set_SP_NS(uint32_t topOfStack)
-{
-  __ASM volatile ("MSR sp_ns, %0" : : "r" (topOfStack) : );
-}
-#endif
+#define __STREXB        (uint32_t)__builtin_arm_strex
 
 
 /**
-  \brief   Get Priority Mask
-  \details Returns the current state of the priority mask bit from the Priority Mask Register.
-  \return               Priority Mask value
+  \brief   STR Exclusive (16 bit)
+  \details Executes a exclusive STR instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-__STATIC_FORCEINLINE uint32_t __get_PRIMASK(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, primask" : "=r" (result) );
-  return(result);
-}
+#define __STREXH        (uint32_t)__builtin_arm_strex
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Priority Mask (non-secure)
-  \details Returns the current state of the non-secure priority mask bit from the Priority Mask Register when in secure state.
-  \return               Priority Mask value
+  \brief   STR Exclusive (32 bit)
+  \details Executes a exclusive STR instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_PRIMASK_NS(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, primask_ns" : "=r" (result) );
-  return(result);
-}
-#endif
+#define __STREXW        (uint32_t)__builtin_arm_strex
 
 
 /**
-  \brief   Set Priority Mask
-  \details Assigns the given value to the Priority Mask Register.
-  \param [in]    priMask  Priority Mask
+  \brief   Remove the exclusive lock
+  \details Removes the exclusive lock which is created by LDREX.
  */
-__STATIC_FORCEINLINE void __set_PRIMASK(uint32_t priMask)
-{
-  __ASM volatile ("MSR primask, %0" : : "r" (priMask) : "memory");
-}
-
+#define __CLREX             __builtin_arm_clrex
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
-/**
-  \brief   Set Priority Mask (non-secure)
-  \details Assigns the given value to the non-secure Priority Mask Register when in secure state.
-  \param [in]    priMask  Priority Mask
- */
-__STATIC_FORCEINLINE void __TZ_set_PRIMASK_NS(uint32_t priMask)
-{
-  __ASM volatile ("MSR primask_ns, %0" : : "r" (priMask) : "memory");
-}
-#endif
+#endif /* ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
+           (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
+           (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
+           (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    ) */
 
 
 #if ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
      (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
      (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))    )
+
 /**
-  \brief   Enable FIQ
-  \details Enables FIQ interrupts by clearing the F-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   Signed Saturate
+  \details Saturates a signed value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (1..32)
+  \return             Saturated value
  */
-#define __enable_fault_irq                __enable_fiq   /* see arm_compat.h */
+#define __SSAT             __builtin_arm_ssat
 
 
 /**
-  \brief   Disable FIQ
-  \details Disables FIQ interrupts by setting the F-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   Unsigned Saturate
+  \details Saturates an unsigned value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (0..31)
+  \return             Saturated value
  */
-#define __disable_fault_irq               __disable_fiq   /* see arm_compat.h */
+#define __USAT             __builtin_arm_usat
 
 
 /**
-  \brief   Get Base Priority
-  \details Returns the current value of the Base Priority register.
-  \return               Base Priority register value
+  \brief   Rotate Right with Extend (32 bit)
+  \details Moves each bit of a bitstring right by one bit.
+           The carry input is shifted in at the left end of the bitstring.
+  \param [in]    value  Value to rotate
+  \return               Rotated value
  */
-__STATIC_FORCEINLINE uint32_t __get_BASEPRI(void)
+__STATIC_FORCEINLINE uint32_t __RRX(uint32_t value)
 {
   uint32_t result;
 
-  __ASM volatile ("MRS %0, basepri" : "=r" (result) );
+  __ASM volatile ("rrx %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) );
   return(result);
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Base Priority (non-secure)
-  \details Returns the current value of the non-secure Base Priority register when in secure state.
-  \return               Base Priority register value
+  \brief   LDRT Unprivileged (8 bit)
+  \details Executes a Unprivileged LDRT instruction for 8 bit value.
+  \param [in]    ptr  Pointer to data
+  \return             value of type uint8_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_BASEPRI_NS(void)
+__STATIC_FORCEINLINE uint8_t __LDRBT(volatile uint8_t *ptr)
 {
   uint32_t result;
 
-  __ASM volatile ("MRS %0, basepri_ns" : "=r" (result) );
-  return(result);
-}
-#endif
-
-
-/**
-  \brief   Set Base Priority
-  \details Assigns the given value to the Base Priority register.
-  \param [in]    basePri  Base Priority value to set
- */
-__STATIC_FORCEINLINE void __set_BASEPRI(uint32_t basePri)
-{
-  __ASM volatile ("MSR basepri, %0" : : "r" (basePri) : "memory");
+  __ASM volatile ("ldrbt %0, %1" : "=r" (result) : "Q" (*ptr) );
+  return ((uint8_t) result);    /* Add explicit type cast here */
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Set Base Priority (non-secure)
-  \details Assigns the given value to the non-secure Base Priority register when in secure state.
-  \param [in]    basePri  Base Priority value to set
+  \brief   LDRT Unprivileged (16 bit)
+  \details Executes a Unprivileged LDRT instruction for 16 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint16_t at (*ptr)
  */
-__STATIC_FORCEINLINE void __TZ_set_BASEPRI_NS(uint32_t basePri)
+__STATIC_FORCEINLINE uint16_t __LDRHT(volatile uint16_t *ptr)
 {
-  __ASM volatile ("MSR basepri_ns, %0" : : "r" (basePri) : "memory");
-}
-#endif
-
+  uint32_t result;
 
-/**
-  \brief   Set Base Priority with condition
-  \details Assigns the given value to the Base Priority register only if BASEPRI masking is disabled,
-           or the new value increases the BASEPRI priority level.
-  \param [in]    basePri  Base Priority value to set
- */
-__STATIC_FORCEINLINE void __set_BASEPRI_MAX(uint32_t basePri)
-{
-  __ASM volatile ("MSR basepri_max, %0" : : "r" (basePri) : "memory");
+  __ASM volatile ("ldrht %0, %1" : "=r" (result) : "Q" (*ptr) );
+  return ((uint16_t) result);    /* Add explicit type cast here */
 }
 
 
 /**
-  \brief   Get Fault Mask
-  \details Returns the current value of the Fault Mask register.
-  \return               Fault Mask register value
+  \brief   LDRT Unprivileged (32 bit)
+  \details Executes a Unprivileged LDRT instruction for 32 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint32_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __get_FAULTMASK(void)
+__STATIC_FORCEINLINE uint32_t __LDRT(volatile uint32_t *ptr)
 {
   uint32_t result;
 
-  __ASM volatile ("MRS %0, faultmask" : "=r" (result) );
+  __ASM volatile ("ldrt %0, %1" : "=r" (result) : "Q" (*ptr) );
   return(result);
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Fault Mask (non-secure)
-  \details Returns the current value of the non-secure Fault Mask register when in secure state.
-  \return               Fault Mask register value
+  \brief   STRT Unprivileged (8 bit)
+  \details Executes a Unprivileged STRT instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_FAULTMASK_NS(void)
+__STATIC_FORCEINLINE void __STRBT(uint8_t value, volatile uint8_t *ptr)
 {
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, faultmask_ns" : "=r" (result) );
-  return(result);
+  __ASM volatile ("strbt %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) );
 }
-#endif
 
 
 /**
-  \brief   Set Fault Mask
-  \details Assigns the given value to the Fault Mask register.
-  \param [in]    faultMask  Fault Mask value to set
+  \brief   STRT Unprivileged (16 bit)
+  \details Executes a Unprivileged STRT instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE void __set_FAULTMASK(uint32_t faultMask)
+__STATIC_FORCEINLINE void __STRHT(uint16_t value, volatile uint16_t *ptr)
 {
-  __ASM volatile ("MSR faultmask, %0" : : "r" (faultMask) : "memory");
+  __ASM volatile ("strht %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) );
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Set Fault Mask (non-secure)
-  \details Assigns the given value to the non-secure Fault Mask register when in secure state.
-  \param [in]    faultMask  Fault Mask value to set
+  \brief   STRT Unprivileged (32 bit)
+  \details Executes a Unprivileged STRT instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE void __TZ_set_FAULTMASK_NS(uint32_t faultMask)
+__STATIC_FORCEINLINE void __STRT(uint32_t value, volatile uint32_t *ptr)
 {
-  __ASM volatile ("MSR faultmask_ns, %0" : : "r" (faultMask) : "memory");
+  __ASM volatile ("strt %1, %0" : "=Q" (*ptr) : "r" (value) );
 }
-#endif
 
-#endif /* ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
+#else  /* ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
            (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
            (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))    ) */
 
-
-#if ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
-     (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    )
-
 /**
-  \brief   Get Process Stack Pointer Limit
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence zero is returned always in non-secure
-  mode.
-
-  \details Returns the current value of the Process Stack Pointer Limit (PSPLIM).
-  \return               PSPLIM Register value
+  \brief   Signed Saturate
+  \details Saturates a signed value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (1..32)
+  \return             Saturated value
  */
-__STATIC_FORCEINLINE uint32_t __get_PSPLIM(void)
+__STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
-    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
-    // without main extensions, the non-secure PSPLIM is RAZ/WI
-  return 0U;
-#else
-  uint32_t result;
-  __ASM volatile ("MRS %0, psplim"  : "=r" (result) );
-  return result;
-#endif
+  if ((sat >= 1U) && (sat <= 32U))
+  {
+    const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
+    const int32_t min = -1 - max ;
+    if (val > max)
+    {
+      return max;
+    }
+    else if (val < min)
+    {
+      return min;
+    }
+  }
+  return val;
 }
 
-#if (defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Process Stack Pointer Limit (non-secure)
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence zero is returned always in non-secure
-  mode.
-
-  \details Returns the current value of the non-secure Process Stack Pointer Limit (PSPLIM) when in secure state.
-  \return               PSPLIM Register value
+  \brief   Unsigned Saturate
+  \details Saturates an unsigned value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (0..31)
+  \return             Saturated value
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_PSPLIM_NS(void)
+__STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
-  // without main extensions, the non-secure PSPLIM is RAZ/WI
-  return 0U;
-#else
-  uint32_t result;
-  __ASM volatile ("MRS %0, psplim_ns"  : "=r" (result) );
-  return result;
-#endif
+  if (sat <= 31U)
+  {
+    const uint32_t max = ((1U << sat) - 1U);
+    if (val > (int32_t)max)
+    {
+      return max;
+    }
+    else if (val < 0)
+    {
+      return 0U;
+    }
+  }
+  return (uint32_t)val;
 }
-#endif
 
+#endif /* ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
+           (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
+           (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))    ) */
 
-/**
-  \brief   Set Process Stack Pointer Limit
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence the write is silently ignored in non-secure
-  mode.
 
-  \details Assigns the given value to the Process Stack Pointer Limit (PSPLIM).
-  \param [in]    ProcStackPtrLimit  Process Stack Pointer Limit value to set
+#if ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
+     (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    )
+/**
+  \brief   Load-Acquire (8 bit)
+  \details Executes a LDAB instruction for 8 bit value.
+  \param [in]    ptr  Pointer to data
+  \return             value of type uint8_t at (*ptr)
  */
-__STATIC_FORCEINLINE void __set_PSPLIM(uint32_t ProcStackPtrLimit)
+__STATIC_FORCEINLINE uint8_t __LDAB(volatile uint8_t *ptr)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
-    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
-  // without main extensions, the non-secure PSPLIM is RAZ/WI
-  (void)ProcStackPtrLimit;
-#else
-  __ASM volatile ("MSR psplim, %0" : : "r" (ProcStackPtrLimit));
-#endif
+  uint32_t result;
+
+  __ASM volatile ("ldab %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
+  return ((uint8_t) result);
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Set Process Stack Pointer (non-secure)
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence the write is silently ignored in non-secure
-  mode.
-
-  \details Assigns the given value to the non-secure Process Stack Pointer Limit (PSPLIM) when in secure state.
-  \param [in]    ProcStackPtrLimit  Process Stack Pointer Limit value to set
+  \brief   Load-Acquire (16 bit)
+  \details Executes a LDAH instruction for 16 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint16_t at (*ptr)
  */
-__STATIC_FORCEINLINE void __TZ_set_PSPLIM_NS(uint32_t ProcStackPtrLimit)
+__STATIC_FORCEINLINE uint16_t __LDAH(volatile uint16_t *ptr)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
-  // without main extensions, the non-secure PSPLIM is RAZ/WI
-  (void)ProcStackPtrLimit;
-#else
-  __ASM volatile ("MSR psplim_ns, %0\n" : : "r" (ProcStackPtrLimit));
-#endif
+  uint32_t result;
+
+  __ASM volatile ("ldah %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
+  return ((uint16_t) result);
 }
-#endif
 
 
 /**
-  \brief   Get Main Stack Pointer Limit
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence zero is returned always.
-
-  \details Returns the current value of the Main Stack Pointer Limit (MSPLIM).
-  \return               MSPLIM Register value
+  \brief   Load-Acquire (32 bit)
+  \details Executes a LDA instruction for 32 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint32_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __get_MSPLIM(void)
+__STATIC_FORCEINLINE uint32_t __LDA(volatile uint32_t *ptr)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
-    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
-  // without main extensions, the non-secure MSPLIM is RAZ/WI
-  return 0U;
-#else
   uint32_t result;
-  __ASM volatile ("MRS %0, msplim" : "=r" (result) );
-  return result;
-#endif
+
+  __ASM volatile ("lda %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
+  return(result);
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Get Main Stack Pointer Limit (non-secure)
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence zero is returned always.
-
-  \details Returns the current value of the non-secure Main Stack Pointer Limit(MSPLIM) when in secure state.
-  \return               MSPLIM Register value
+  \brief   Store-Release (8 bit)
+  \details Executes a STLB instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_MSPLIM_NS(void)
+__STATIC_FORCEINLINE void __STLB(uint8_t value, volatile uint8_t *ptr)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
-  // without main extensions, the non-secure MSPLIM is RAZ/WI
-  return 0U;
-#else
-  uint32_t result;
-  __ASM volatile ("MRS %0, msplim_ns" : "=r" (result) );
-  return result;
-#endif
+  __ASM volatile ("stlb %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
 }
-#endif
 
 
 /**
-  \brief   Set Main Stack Pointer Limit
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence the write is silently ignored.
-
-  \details Assigns the given value to the Main Stack Pointer Limit (MSPLIM).
-  \param [in]    MainStackPtrLimit  Main Stack Pointer Limit value to set
+  \brief   Store-Release (16 bit)
+  \details Executes a STLH instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE void __set_MSPLIM(uint32_t MainStackPtrLimit)
+__STATIC_FORCEINLINE void __STLH(uint16_t value, volatile uint16_t *ptr)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
-    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
-  // without main extensions, the non-secure MSPLIM is RAZ/WI
-  (void)MainStackPtrLimit;
-#else
-  __ASM volatile ("MSR msplim, %0" : : "r" (MainStackPtrLimit));
-#endif
+  __ASM volatile ("stlh %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Set Main Stack Pointer Limit (non-secure)
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence the write is silently ignored.
-
-  \details Assigns the given value to the non-secure Main Stack Pointer Limit (MSPLIM) when in secure state.
-  \param [in]    MainStackPtrLimit  Main Stack Pointer value to set
+  \brief   Store-Release (32 bit)
+  \details Executes a STL instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE void __TZ_set_MSPLIM_NS(uint32_t MainStackPtrLimit)
+__STATIC_FORCEINLINE void __STL(uint32_t value, volatile uint32_t *ptr)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
-  // without main extensions, the non-secure MSPLIM is RAZ/WI
-  (void)MainStackPtrLimit;
-#else
-  __ASM volatile ("MSR msplim_ns, %0" : : "r" (MainStackPtrLimit));
-#endif
+  __ASM volatile ("stl %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
 }
-#endif
 
-#endif /* ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
-           (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    ) */
 
 /**
-  \brief   Get FPSCR
-  \details Returns the current value of the Floating Point Status/Control register.
-  \return               Floating Point Status/Control register value
+  \brief   Load-Acquire Exclusive (8 bit)
+  \details Executes a LDAB exclusive instruction for 8 bit value.
+  \param [in]    ptr  Pointer to data
+  \return             value of type uint8_t at (*ptr)
  */
-#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
-     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
-#define __get_FPSCR      (uint32_t)__builtin_arm_get_fpscr
-#else
-#define __get_FPSCR()      ((uint32_t)0U)
-#endif
+#define     __LDAEXB                 (uint8_t)__builtin_arm_ldaex
+
 
 /**
-  \brief   Set FPSCR
-  \details Assigns the given value to the Floating Point Status/Control register.
-  \param [in]    fpscr  Floating Point Status/Control value to set
+  \brief   Load-Acquire Exclusive (16 bit)
+  \details Executes a LDAH exclusive instruction for 16 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint16_t at (*ptr)
  */
-#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
-     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
-#define __set_FPSCR      __builtin_arm_set_fpscr
-#else
-#define __set_FPSCR(x)      ((void)(x))
-#endif
+#define     __LDAEXH                 (uint16_t)__builtin_arm_ldaex
 
 
-/*@} end of CMSIS_Core_RegAccFunctions */
+/**
+  \brief   Load-Acquire Exclusive (32 bit)
+  \details Executes a LDA exclusive instruction for 32 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint32_t at (*ptr)
+ */
+#define     __LDAEX                  (uint32_t)__builtin_arm_ldaex
+
+
+/**
+  \brief   Store-Release Exclusive (8 bit)
+  \details Executes a STLB exclusive instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
+ */
+#define     __STLEXB                 (uint32_t)__builtin_arm_stlex
+
+
+/**
+  \brief   Store-Release Exclusive (16 bit)
+  \details Executes a STLH exclusive instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
+ */
+#define     __STLEXH                 (uint32_t)__builtin_arm_stlex
+
+
+/**
+  \brief   Store-Release Exclusive (32 bit)
+  \details Executes a STL exclusive instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
+ */
+#define     __STLEX                  (uint32_t)__builtin_arm_stlex
+
+#endif /* ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
+           (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    ) */
+
+/*@}*/ /* end of group CMSIS_Core_InstructionInterface */
+
 
+/* ###########################  Core Function Access  ########################### */
+/** \ingroup  CMSIS_Core_FunctionInterface
+    \defgroup CMSIS_Core_RegAccFunctions CMSIS Core Register Access Functions
+  @{
+ */
+
+/**
+  \brief   Enable IRQ Interrupts
+  \details Enables IRQ interrupts by clearing special-purpose register PRIMASK.
+           Can only be executed in Privileged modes.
+ */
+#ifndef __ARM_COMPAT_H
+__STATIC_FORCEINLINE void __enable_irq(void)
+{
+  __ASM volatile ("cpsie i" : : : "memory");
+}
+#endif
 
-/* ##########################  Core Instruction Access  ######################### */
-/** \defgroup CMSIS_Core_InstructionInterface CMSIS Core Instruction Interface
-  Access to dedicated instructions
-  @{
-*/
 
-/* Define macros for porting to both thumb1 and thumb2.
- * For thumb1, use low register (r0-r7), specified by constraint "l"
- * Otherwise, use general registers, specified by constraint "r" */
-#if defined (__thumb__) && !defined (__thumb2__)
-#define __CMSIS_GCC_OUT_REG(r) "=l" (r)
-#define __CMSIS_GCC_USE_REG(r) "l" (r)
-#else
-#define __CMSIS_GCC_OUT_REG(r) "=r" (r)
-#define __CMSIS_GCC_USE_REG(r) "r" (r)
+/**
+  \brief   Disable IRQ Interrupts
+  \details Disables IRQ interrupts by setting special-purpose register PRIMASK.
+           Can only be executed in Privileged modes.
+ */
+#ifndef __ARM_COMPAT_H
+__STATIC_FORCEINLINE void __disable_irq(void)
+{
+  __ASM volatile ("cpsid i" : : : "memory");
+}
 #endif
 
+
 /**
-  \brief   No Operation
-  \details No Operation does nothing. This instruction can be used for code alignment purposes.
+  \brief   Get Control Register
+  \details Returns the content of the Control Register.
+  \return               Control Register value
  */
-#define __NOP          __builtin_arm_nop
+__STATIC_FORCEINLINE uint32_t __get_CONTROL(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, control" : "=r" (result) );
+  return(result);
+}
+
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Wait For Interrupt
-  \details Wait For Interrupt is a hint instruction that suspends execution until one of a number of events occurs.
+  \brief   Get Control Register (non-secure)
+  \details Returns the content of the non-secure Control Register when in secure mode.
+  \return               non-secure Control Register value
  */
-#define __WFI          __builtin_arm_wfi
+__STATIC_FORCEINLINE uint32_t __TZ_get_CONTROL_NS(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, control_ns" : "=r" (result) );
+  return(result);
+}
+#endif
 
 
 /**
-  \brief   Wait For Event
-  \details Wait For Event is a hint instruction that permits the processor to enter
-           a low-power state until one of a number of events occurs.
+  \brief   Set Control Register
+  \details Writes the given value to the Control Register.
+  \param [in]    control  Control Register value to set
  */
-#define __WFE          __builtin_arm_wfe
+__STATIC_FORCEINLINE void __set_CONTROL(uint32_t control)
+{
+  __ASM volatile ("MSR control, %0" : : "r" (control) : "memory");
+  __ISB();
+}
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Send Event
-  \details Send Event is a hint instruction. It causes an event to be signaled to the CPU.
+  \brief   Set Control Register (non-secure)
+  \details Writes the given value to the non-secure Control Register when in secure state.
+  \param [in]    control  Control Register value to set
  */
-#define __SEV          __builtin_arm_sev
+__STATIC_FORCEINLINE void __TZ_set_CONTROL_NS(uint32_t control)
+{
+  __ASM volatile ("MSR control_ns, %0" : : "r" (control) : "memory");
+  __ISB();
+}
+#endif
 
 
 /**
-  \brief   Instruction Synchronization Barrier
-  \details Instruction Synchronization Barrier flushes the pipeline in the processor,
-           so that all instructions following the ISB are fetched from cache or memory,
-           after the instruction has been completed.
+  \brief   Get IPSR Register
+  \details Returns the content of the IPSR Register.
+  \return               IPSR Register value
  */
-#define __ISB()        __builtin_arm_isb(0xF)
+__STATIC_FORCEINLINE uint32_t __get_IPSR(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, ipsr" : "=r" (result) );
+  return(result);
+}
+
 
 /**
-  \brief   Data Synchronization Barrier
-  \details Acts as a special kind of Data Memory Barrier.
-           It completes when all explicit memory accesses before this instruction complete.
+  \brief   Get APSR Register
+  \details Returns the content of the APSR Register.
+  \return               APSR Register value
  */
-#define __DSB()        __builtin_arm_dsb(0xF)
+__STATIC_FORCEINLINE uint32_t __get_APSR(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, apsr" : "=r" (result) );
+  return(result);
+}
 
 
 /**
-  \brief   Data Memory Barrier
-  \details Ensures the apparent order of the explicit memory operations before
-           and after the instruction, without ensuring their completion.
+  \brief   Get xPSR Register
+  \details Returns the content of the xPSR Register.
+  \return               xPSR Register value
  */
-#define __DMB()        __builtin_arm_dmb(0xF)
+__STATIC_FORCEINLINE uint32_t __get_xPSR(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, xpsr" : "=r" (result) );
+  return(result);
+}
 
 
 /**
-  \brief   Reverse byte order (32 bit)
-  \details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Get Process Stack Pointer
+  \details Returns the current value of the Process Stack Pointer (PSP).
+  \return               PSP Register value
  */
-#define __REV(value)   __builtin_bswap32(value)
+__STATIC_FORCEINLINE uint32_t __get_PSP(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, psp"  : "=r" (result) );
+  return(result);
+}
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Reverse byte order (16 bit)
-  \details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Get Process Stack Pointer (non-secure)
+  \details Returns the current value of the non-secure Process Stack Pointer (PSP) when in secure state.
+  \return               PSP Register value
  */
-#define __REV16(value) __ROR(__REV(value), 16)
+__STATIC_FORCEINLINE uint32_t __TZ_get_PSP_NS(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, psp_ns"  : "=r" (result) );
+  return(result);
+}
+#endif
 
 
 /**
-  \brief   Reverse byte order (16 bit)
-  \details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Set Process Stack Pointer
+  \details Assigns the given value to the Process Stack Pointer (PSP).
+  \param [in]    topOfProcStack  Process Stack Pointer value to set
  */
-#define __REVSH(value) (int16_t)__builtin_bswap16(value)
+__STATIC_FORCEINLINE void __set_PSP(uint32_t topOfProcStack)
+{
+  __ASM volatile ("MSR psp, %0" : : "r" (topOfProcStack) : );
+}
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Rotate Right in unsigned value (32 bit)
-  \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
-  \param [in]    op1  Value to rotate
-  \param [in]    op2  Number of Bits to rotate
-  \return               Rotated value
+  \brief   Set Process Stack Pointer (non-secure)
+  \details Assigns the given value to the non-secure Process Stack Pointer (PSP) when in secure state.
+  \param [in]    topOfProcStack  Process Stack Pointer value to set
  */
-__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
+__STATIC_FORCEINLINE void __TZ_set_PSP_NS(uint32_t topOfProcStack)
 {
-  op2 %= 32U;
-  if (op2 == 0U)
-  {
-    return op1;
-  }
-  return (op1 >> op2) | (op1 << (32U - op2));
+  __ASM volatile ("MSR psp_ns, %0" : : "r" (topOfProcStack) : );
 }
+#endif
 
 
 /**
-  \brief   Breakpoint
-  \details Causes the processor to enter Debug state.
-           Debug tools can use this to investigate system state when the instruction at a particular address is reached.
-  \param [in]    value  is ignored by the processor.
-                 If required, a debugger can use it to store additional information about the breakpoint.
+  \brief   Get Main Stack Pointer
+  \details Returns the current value of the Main Stack Pointer (MSP).
+  \return               MSP Register value
  */
-#define __BKPT(value)     __ASM volatile ("bkpt "#value)
+__STATIC_FORCEINLINE uint32_t __get_MSP(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, msp" : "=r" (result) );
+  return(result);
+}
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Reverse bit order of value
-  \details Reverses the bit order of the given value.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Get Main Stack Pointer (non-secure)
+  \details Returns the current value of the non-secure Main Stack Pointer (MSP) when in secure state.
+  \return               MSP Register value
  */
-#define __RBIT            __builtin_arm_rbit
+__STATIC_FORCEINLINE uint32_t __TZ_get_MSP_NS(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, msp_ns" : "=r" (result) );
+  return(result);
+}
+#endif
+
 
 /**
-  \brief   Count leading zeros
-  \details Counts the number of leading zeros of a data value.
-  \param [in]  value  Value to count the leading zeros
-  \return             number of leading zeros in value
+  \brief   Set Main Stack Pointer
+  \details Assigns the given value to the Main Stack Pointer (MSP).
+  \param [in]    topOfMainStack  Main Stack Pointer value to set
  */
-__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t value)
+__STATIC_FORCEINLINE void __set_MSP(uint32_t topOfMainStack)
 {
-  /* Even though __builtin_clz produces a CLZ instruction on ARM, formally
-     __builtin_clz(0) is undefined behaviour, so handle this case specially.
-     This guarantees ARM-compatible results if happening to compile on a non-ARM
-     target, and ensures the compiler doesn't decide to activate any
-     optimisations using the logic "value was passed to __builtin_clz, so it
-     is non-zero".
-     ARM Compiler 6.10 and possibly earlier will optimise this test away, leaving a
-     single CLZ instruction.
-   */
-  if (value == 0U)
-  {
-    return 32U;
-  }
-  return __builtin_clz(value);
+  __ASM volatile ("MSR msp, %0" : : "r" (topOfMainStack) : );
 }
 
 
-#if ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
-     (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
-     (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
-     (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    )
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   LDR Exclusive (8 bit)
-  \details Executes a exclusive LDR instruction for 8 bit value.
-  \param [in]    ptr  Pointer to data
-  \return             value of type uint8_t at (*ptr)
+  \brief   Set Main Stack Pointer (non-secure)
+  \details Assigns the given value to the non-secure Main Stack Pointer (MSP) when in secure state.
+  \param [in]    topOfMainStack  Main Stack Pointer value to set
  */
-#define __LDREXB        (uint8_t)__builtin_arm_ldrex
+__STATIC_FORCEINLINE void __TZ_set_MSP_NS(uint32_t topOfMainStack)
+{
+  __ASM volatile ("MSR msp_ns, %0" : : "r" (topOfMainStack) : );
+}
+#endif
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   LDR Exclusive (16 bit)
-  \details Executes a exclusive LDR instruction for 16 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint16_t at (*ptr)
+  \brief   Get Stack Pointer (non-secure)
+  \details Returns the current value of the non-secure Stack Pointer (SP) when in secure state.
+  \return               SP Register value
  */
-#define __LDREXH        (uint16_t)__builtin_arm_ldrex
+__STATIC_FORCEINLINE uint32_t __TZ_get_SP_NS(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, sp_ns" : "=r" (result) );
+  return(result);
+}
 
 
 /**
-  \brief   LDR Exclusive (32 bit)
-  \details Executes a exclusive LDR instruction for 32 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint32_t at (*ptr)
+  \brief   Set Stack Pointer (non-secure)
+  \details Assigns the given value to the non-secure Stack Pointer (SP) when in secure state.
+  \param [in]    topOfStack  Stack Pointer value to set
  */
-#define __LDREXW        (uint32_t)__builtin_arm_ldrex
+__STATIC_FORCEINLINE void __TZ_set_SP_NS(uint32_t topOfStack)
+{
+  __ASM volatile ("MSR sp_ns, %0" : : "r" (topOfStack) : );
+}
+#endif
 
 
 /**
-  \brief   STR Exclusive (8 bit)
-  \details Executes a exclusive STR instruction for 8 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Get Priority Mask
+  \details Returns the current state of the priority mask bit from the Priority Mask Register.
+  \return               Priority Mask value
  */
-#define __STREXB        (uint32_t)__builtin_arm_strex
+__STATIC_FORCEINLINE uint32_t __get_PRIMASK(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, primask" : "=r" (result) );
+  return(result);
+}
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   STR Exclusive (16 bit)
-  \details Executes a exclusive STR instruction for 16 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Get Priority Mask (non-secure)
+  \details Returns the current state of the non-secure priority mask bit from the Priority Mask Register when in secure state.
+  \return               Priority Mask value
  */
-#define __STREXH        (uint32_t)__builtin_arm_strex
+__STATIC_FORCEINLINE uint32_t __TZ_get_PRIMASK_NS(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, primask_ns" : "=r" (result) );
+  return(result);
+}
+#endif
 
 
 /**
-  \brief   STR Exclusive (32 bit)
-  \details Executes a exclusive STR instruction for 32 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Set Priority Mask
+  \details Assigns the given value to the Priority Mask Register.
+  \param [in]    priMask  Priority Mask
  */
-#define __STREXW        (uint32_t)__builtin_arm_strex
+__STATIC_FORCEINLINE void __set_PRIMASK(uint32_t priMask)
+{
+  __ASM volatile ("MSR primask, %0" : : "r" (priMask) : "memory");
+}
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Remove the exclusive lock
-  \details Removes the exclusive lock which is created by LDREX.
+  \brief   Set Priority Mask (non-secure)
+  \details Assigns the given value to the non-secure Priority Mask Register when in secure state.
+  \param [in]    priMask  Priority Mask
  */
-#define __CLREX             __builtin_arm_clrex
-
-#endif /* ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
-           (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
-           (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
-           (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    ) */
+__STATIC_FORCEINLINE void __TZ_set_PRIMASK_NS(uint32_t priMask)
+{
+  __ASM volatile ("MSR primask_ns, %0" : : "r" (priMask) : "memory");
+}
+#endif
 
 
 #if ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
      (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
      (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))    )
-
 /**
-  \brief   Signed Saturate
-  \details Saturates a signed value.
-  \param [in]  value  Value to be saturated
-  \param [in]    sat  Bit position to saturate to (1..32)
-  \return             Saturated value
+  \brief   Enable FIQ
+  \details Enables FIQ interrupts by clearing special-purpose register FAULTMASK.
+           Can only be executed in Privileged modes.
  */
-#define __SSAT             __builtin_arm_ssat
+__STATIC_FORCEINLINE void __enable_fault_irq(void)
+{
+  __ASM volatile ("cpsie f" : : : "memory");
+}
 
 
 /**
-  \brief   Unsigned Saturate
-  \details Saturates an unsigned value.
-  \param [in]  value  Value to be saturated
-  \param [in]    sat  Bit position to saturate to (0..31)
-  \return             Saturated value
+  \brief   Disable FIQ
+  \details Disables FIQ interrupts by setting special-purpose register FAULTMASK.
+           Can only be executed in Privileged modes.
  */
-#define __USAT             __builtin_arm_usat
+__STATIC_FORCEINLINE void __disable_fault_irq(void)
+{
+  __ASM volatile ("cpsid f" : : : "memory");
+}
 
 
 /**
-  \brief   Rotate Right with Extend (32 bit)
-  \details Moves each bit of a bitstring right by one bit.
-           The carry input is shifted in at the left end of the bitstring.
-  \param [in]    value  Value to rotate
-  \return               Rotated value
+  \brief   Get Base Priority
+  \details Returns the current value of the Base Priority register.
+  \return               Base Priority register value
  */
-__STATIC_FORCEINLINE uint32_t __RRX(uint32_t value)
+__STATIC_FORCEINLINE uint32_t __get_BASEPRI(void)
 {
   uint32_t result;
 
-  __ASM volatile ("rrx %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) );
+  __ASM volatile ("MRS %0, basepri" : "=r" (result) );
   return(result);
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   LDRT Unprivileged (8 bit)
-  \details Executes a Unprivileged LDRT instruction for 8 bit value.
-  \param [in]    ptr  Pointer to data
-  \return             value of type uint8_t at (*ptr)
+  \brief   Get Base Priority (non-secure)
+  \details Returns the current value of the non-secure Base Priority register when in secure state.
+  \return               Base Priority register value
  */
-__STATIC_FORCEINLINE uint8_t __LDRBT(volatile uint8_t *ptr)
+__STATIC_FORCEINLINE uint32_t __TZ_get_BASEPRI_NS(void)
 {
   uint32_t result;
 
-  __ASM volatile ("ldrbt %0, %1" : "=r" (result) : "Q" (*ptr) );
-  return ((uint8_t) result);    /* Add explicit type cast here */
+  __ASM volatile ("MRS %0, basepri_ns" : "=r" (result) );
+  return(result);
 }
+#endif
 
 
 /**
-  \brief   LDRT Unprivileged (16 bit)
-  \details Executes a Unprivileged LDRT instruction for 16 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint16_t at (*ptr)
+  \brief   Set Base Priority
+  \details Assigns the given value to the Base Priority register.
+  \param [in]    basePri  Base Priority value to set
  */
-__STATIC_FORCEINLINE uint16_t __LDRHT(volatile uint16_t *ptr)
+__STATIC_FORCEINLINE void __set_BASEPRI(uint32_t basePri)
 {
-  uint32_t result;
-
-  __ASM volatile ("ldrht %0, %1" : "=r" (result) : "Q" (*ptr) );
-  return ((uint16_t) result);    /* Add explicit type cast here */
+  __ASM volatile ("MSR basepri, %0" : : "r" (basePri) : "memory");
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   LDRT Unprivileged (32 bit)
-  \details Executes a Unprivileged LDRT instruction for 32 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint32_t at (*ptr)
+  \brief   Set Base Priority (non-secure)
+  \details Assigns the given value to the non-secure Base Priority register when in secure state.
+  \param [in]    basePri  Base Priority value to set
  */
-__STATIC_FORCEINLINE uint32_t __LDRT(volatile uint32_t *ptr)
+__STATIC_FORCEINLINE void __TZ_set_BASEPRI_NS(uint32_t basePri)
 {
-  uint32_t result;
-
-  __ASM volatile ("ldrt %0, %1" : "=r" (result) : "Q" (*ptr) );
-  return(result);
+  __ASM volatile ("MSR basepri_ns, %0" : : "r" (basePri) : "memory");
 }
+#endif
 
 
 /**
-  \brief   STRT Unprivileged (8 bit)
-  \details Executes a Unprivileged STRT instruction for 8 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Set Base Priority with condition
+  \details Assigns the given value to the Base Priority register only if BASEPRI masking is disabled,
+           or the new value increases the BASEPRI priority level.
+  \param [in]    basePri  Base Priority value to set
  */
-__STATIC_FORCEINLINE void __STRBT(uint8_t value, volatile uint8_t *ptr)
+__STATIC_FORCEINLINE void __set_BASEPRI_MAX(uint32_t basePri)
 {
-  __ASM volatile ("strbt %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) );
+  __ASM volatile ("MSR basepri_max, %0" : : "r" (basePri) : "memory");
 }
 
 
 /**
-  \brief   STRT Unprivileged (16 bit)
-  \details Executes a Unprivileged STRT instruction for 16 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Get Fault Mask
+  \details Returns the current value of the Fault Mask register.
+  \return               Fault Mask register value
  */
-__STATIC_FORCEINLINE void __STRHT(uint16_t value, volatile uint16_t *ptr)
+__STATIC_FORCEINLINE uint32_t __get_FAULTMASK(void)
 {
-  __ASM volatile ("strht %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) );
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, faultmask" : "=r" (result) );
+  return(result);
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   STRT Unprivileged (32 bit)
-  \details Executes a Unprivileged STRT instruction for 32 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Get Fault Mask (non-secure)
+  \details Returns the current value of the non-secure Fault Mask register when in secure state.
+  \return               Fault Mask register value
  */
-__STATIC_FORCEINLINE void __STRT(uint32_t value, volatile uint32_t *ptr)
+__STATIC_FORCEINLINE uint32_t __TZ_get_FAULTMASK_NS(void)
 {
-  __ASM volatile ("strt %1, %0" : "=Q" (*ptr) : "r" (value) );
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, faultmask_ns" : "=r" (result) );
+  return(result);
 }
+#endif
 
-#else  /* ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
-           (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
-           (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))    ) */
 
 /**
-  \brief   Signed Saturate
-  \details Saturates a signed value.
-  \param [in]  value  Value to be saturated
-  \param [in]    sat  Bit position to saturate to (1..32)
-  \return             Saturated value
+  \brief   Set Fault Mask
+  \details Assigns the given value to the Fault Mask register.
+  \param [in]    faultMask  Fault Mask value to set
  */
-__STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
+__STATIC_FORCEINLINE void __set_FAULTMASK(uint32_t faultMask)
 {
-  if ((sat >= 1U) && (sat <= 32U))
-  {
-    const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
-    const int32_t min = -1 - max ;
-    if (val > max)
-    {
-      return max;
-    }
-    else if (val < min)
-    {
-      return min;
-    }
-  }
-  return val;
+  __ASM volatile ("MSR faultmask, %0" : : "r" (faultMask) : "memory");
 }
 
+
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Unsigned Saturate
-  \details Saturates an unsigned value.
-  \param [in]  value  Value to be saturated
-  \param [in]    sat  Bit position to saturate to (0..31)
-  \return             Saturated value
+  \brief   Set Fault Mask (non-secure)
+  \details Assigns the given value to the non-secure Fault Mask register when in secure state.
+  \param [in]    faultMask  Fault Mask value to set
  */
-__STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
+__STATIC_FORCEINLINE void __TZ_set_FAULTMASK_NS(uint32_t faultMask)
 {
-  if (sat <= 31U)
-  {
-    const uint32_t max = ((1U << sat) - 1U);
-    if (val > (int32_t)max)
-    {
-      return max;
-    }
-    else if (val < 0)
-    {
-      return 0U;
-    }
-  }
-  return (uint32_t)val;
+  __ASM volatile ("MSR faultmask_ns, %0" : : "r" (faultMask) : "memory");
 }
+#endif
 
 #endif /* ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
            (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
@@ -1227,150 +1181,210 @@ __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
 
 #if ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
      (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    )
+
 /**
-  \brief   Load-Acquire (8 bit)
-  \details Executes a LDAB instruction for 8 bit value.
-  \param [in]    ptr  Pointer to data
-  \return             value of type uint8_t at (*ptr)
+  \brief   Get Process Stack Pointer Limit
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence zero is returned always in non-secure
+  mode.
+
+  \details Returns the current value of the Process Stack Pointer Limit (PSPLIM).
+  \return               PSPLIM Register value
  */
-__STATIC_FORCEINLINE uint8_t __LDAB(volatile uint8_t *ptr)
+__STATIC_FORCEINLINE uint32_t __get_PSPLIM(void)
 {
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
+    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
+    // without main extensions, the non-secure PSPLIM is RAZ/WI
+  return 0U;
+#else
   uint32_t result;
-
-  __ASM volatile ("ldab %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
-  return ((uint8_t) result);
+  __ASM volatile ("MRS %0, psplim"  : "=r" (result) );
+  return result;
+#endif
 }
 
-
+#if (defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Load-Acquire (16 bit)
-  \details Executes a LDAH instruction for 16 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint16_t at (*ptr)
+  \brief   Get Process Stack Pointer Limit (non-secure)
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence zero is returned always in non-secure
+  mode.
+
+  \details Returns the current value of the non-secure Process Stack Pointer Limit (PSPLIM) when in secure state.
+  \return               PSPLIM Register value
  */
-__STATIC_FORCEINLINE uint16_t __LDAH(volatile uint16_t *ptr)
+__STATIC_FORCEINLINE uint32_t __TZ_get_PSPLIM_NS(void)
 {
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
+  // without main extensions, the non-secure PSPLIM is RAZ/WI
+  return 0U;
+#else
   uint32_t result;
-
-  __ASM volatile ("ldah %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
-  return ((uint16_t) result);
+  __ASM volatile ("MRS %0, psplim_ns"  : "=r" (result) );
+  return result;
+#endif
 }
+#endif
 
 
 /**
-  \brief   Load-Acquire (32 bit)
-  \details Executes a LDA instruction for 32 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint32_t at (*ptr)
+  \brief   Set Process Stack Pointer Limit
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence the write is silently ignored in non-secure
+  mode.
+
+  \details Assigns the given value to the Process Stack Pointer Limit (PSPLIM).
+  \param [in]    ProcStackPtrLimit  Process Stack Pointer Limit value to set
  */
-__STATIC_FORCEINLINE uint32_t __LDA(volatile uint32_t *ptr)
+__STATIC_FORCEINLINE void __set_PSPLIM(uint32_t ProcStackPtrLimit)
 {
-  uint32_t result;
-
-  __ASM volatile ("lda %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
-  return(result);
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
+    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
+  // without main extensions, the non-secure PSPLIM is RAZ/WI
+  (void)ProcStackPtrLimit;
+#else
+  __ASM volatile ("MSR psplim, %0" : : "r" (ProcStackPtrLimit));
+#endif
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Store-Release (8 bit)
-  \details Executes a STLB instruction for 8 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Set Process Stack Pointer (non-secure)
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence the write is silently ignored in non-secure
+  mode.
+
+  \details Assigns the given value to the non-secure Process Stack Pointer Limit (PSPLIM) when in secure state.
+  \param [in]    ProcStackPtrLimit  Process Stack Pointer Limit value to set
  */
-__STATIC_FORCEINLINE void __STLB(uint8_t value, volatile uint8_t *ptr)
+__STATIC_FORCEINLINE void __TZ_set_PSPLIM_NS(uint32_t ProcStackPtrLimit)
 {
-  __ASM volatile ("stlb %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
+  // without main extensions, the non-secure PSPLIM is RAZ/WI
+  (void)ProcStackPtrLimit;
+#else
+  __ASM volatile ("MSR psplim_ns, %0\n" : : "r" (ProcStackPtrLimit));
+#endif
 }
+#endif
 
 
 /**
-  \brief   Store-Release (16 bit)
-  \details Executes a STLH instruction for 16 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Get Main Stack Pointer Limit
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence zero is returned always.
+
+  \details Returns the current value of the Main Stack Pointer Limit (MSPLIM).
+  \return               MSPLIM Register value
  */
-__STATIC_FORCEINLINE void __STLH(uint16_t value, volatile uint16_t *ptr)
+__STATIC_FORCEINLINE uint32_t __get_MSPLIM(void)
 {
-  __ASM volatile ("stlh %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
+    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
+  // without main extensions, the non-secure MSPLIM is RAZ/WI
+  return 0U;
+#else
+  uint32_t result;
+  __ASM volatile ("MRS %0, msplim" : "=r" (result) );
+  return result;
+#endif
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Store-Release (32 bit)
-  \details Executes a STL instruction for 32 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Get Main Stack Pointer Limit (non-secure)
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence zero is returned always.
+
+  \details Returns the current value of the non-secure Main Stack Pointer Limit(MSPLIM) when in secure state.
+  \return               MSPLIM Register value
  */
-__STATIC_FORCEINLINE void __STL(uint32_t value, volatile uint32_t *ptr)
+__STATIC_FORCEINLINE uint32_t __TZ_get_MSPLIM_NS(void)
 {
-  __ASM volatile ("stl %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
+  // without main extensions, the non-secure MSPLIM is RAZ/WI
+  return 0U;
+#else
+  uint32_t result;
+  __ASM volatile ("MRS %0, msplim_ns" : "=r" (result) );
+  return result;
+#endif
 }
+#endif
 
 
 /**
-  \brief   Load-Acquire Exclusive (8 bit)
-  \details Executes a LDAB exclusive instruction for 8 bit value.
-  \param [in]    ptr  Pointer to data
-  \return             value of type uint8_t at (*ptr)
- */
-#define     __LDAEXB                 (uint8_t)__builtin_arm_ldaex
-
+  \brief   Set Main Stack Pointer Limit
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence the write is silently ignored.
 
-/**
-  \brief   Load-Acquire Exclusive (16 bit)
-  \details Executes a LDAH exclusive instruction for 16 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint16_t at (*ptr)
+  \details Assigns the given value to the Main Stack Pointer Limit (MSPLIM).
+  \param [in]    MainStackPtrLimit  Main Stack Pointer Limit value to set
  */
-#define     __LDAEXH                 (uint16_t)__builtin_arm_ldaex
+__STATIC_FORCEINLINE void __set_MSPLIM(uint32_t MainStackPtrLimit)
+{
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
+    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
+  // without main extensions, the non-secure MSPLIM is RAZ/WI
+  (void)MainStackPtrLimit;
+#else
+  __ASM volatile ("MSR msplim, %0" : : "r" (MainStackPtrLimit));
+#endif
+}
 
 
+#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Load-Acquire Exclusive (32 bit)
-  \details Executes a LDA exclusive instruction for 32 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint32_t at (*ptr)
- */
-#define     __LDAEX                  (uint32_t)__builtin_arm_ldaex
-
+  \brief   Set Main Stack Pointer Limit (non-secure)
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence the write is silently ignored.
 
-/**
-  \brief   Store-Release Exclusive (8 bit)
-  \details Executes a STLB exclusive instruction for 8 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \details Assigns the given value to the non-secure Main Stack Pointer Limit (MSPLIM) when in secure state.
+  \param [in]    MainStackPtrLimit  Main Stack Pointer value to set
  */
-#define     __STLEXB                 (uint32_t)__builtin_arm_stlex
+__STATIC_FORCEINLINE void __TZ_set_MSPLIM_NS(uint32_t MainStackPtrLimit)
+{
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
+  // without main extensions, the non-secure MSPLIM is RAZ/WI
+  (void)MainStackPtrLimit;
+#else
+  __ASM volatile ("MSR msplim_ns, %0" : : "r" (MainStackPtrLimit));
+#endif
+}
+#endif
 
+#endif /* ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
+           (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    ) */
 
 /**
-  \brief   Store-Release Exclusive (16 bit)
-  \details Executes a STLH exclusive instruction for 16 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Get FPSCR
+  \details Returns the current value of the Floating Point Status/Control register.
+  \return               Floating Point Status/Control register value
  */
-#define     __STLEXH                 (uint32_t)__builtin_arm_stlex
-
+#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
+     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
+#define __get_FPSCR      (uint32_t)__builtin_arm_get_fpscr
+#else
+#define __get_FPSCR()      ((uint32_t)0U)
+#endif
 
 /**
-  \brief   Store-Release Exclusive (32 bit)
-  \details Executes a STL exclusive instruction for 32 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Set FPSCR
+  \details Assigns the given value to the Floating Point Status/Control register.
+  \param [in]    fpscr  Floating Point Status/Control value to set
  */
-#define     __STLEX                  (uint32_t)__builtin_arm_stlex
+#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
+     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
+#define __set_FPSCR      __builtin_arm_set_fpscr
+#else
+#define __set_FPSCR(x)      ((void)(x))
+#endif
 
-#endif /* ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
-           (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    ) */
 
-/*@}*/ /* end of group CMSIS_Core_InstructionInterface */
+/*@} end of CMSIS_Core_RegAccFunctions */
 
 
 /* ###################  Compiler specific Intrinsics  ########################### */
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/cmsis_gcc.h b/edge-impulse-sdk/CMSIS/Core/Include/cmsis_gcc.h
index edc9f86..bf7cd11 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/cmsis_gcc.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/cmsis_gcc.h
@@ -1,8 +1,8 @@
 /**************************************************************************//**
  * @file     cmsis_gcc.h
  * @brief    CMSIS compiler GCC header file
- * @version  V5.3.2
- * @date     25. January 2021
+ * @version  V5.4.1
+ * @date     27. May 2021
  ******************************************************************************/
 /*
  * Copyright (c) 2009-2021 Arm Limited. All rights reserved.
@@ -202,468 +202,549 @@ __STATIC_FORCEINLINE void __TZ_set_STACKSEAL_S (uint32_t* stackTop) {
 #endif
 
 
-/* ###########################  Core Function Access  ########################### */
-/** \ingroup  CMSIS_Core_FunctionInterface
-    \defgroup CMSIS_Core_RegAccFunctions CMSIS Core Register Access Functions
+/* ##########################  Core Instruction Access  ######################### */
+/** \defgroup CMSIS_Core_InstructionInterface CMSIS Core Instruction Interface
+  Access to dedicated instructions
   @{
- */
+*/
 
-/**
-  \brief   Enable IRQ Interrupts
-  \details Enables IRQ interrupts by clearing the I-bit in the CPSR.
-           Can only be executed in Privileged modes.
- */
-// Patched by Edge Impulse, fix for targets that already have __enable_irq
-#ifndef __enable_irq
-__STATIC_FORCEINLINE void __enable_irq(void)
-{
-  __ASM volatile ("cpsie i" : : : "memory");
-}
+/* Define macros for porting to both thumb1 and thumb2.
+ * For thumb1, use low register (r0-r7), specified by constraint "l"
+ * Otherwise, use general registers, specified by constraint "r" */
+#if defined (__thumb__) && !defined (__thumb2__)
+#define __CMSIS_GCC_OUT_REG(r) "=l" (r)
+#define __CMSIS_GCC_RW_REG(r) "+l" (r)
+#define __CMSIS_GCC_USE_REG(r) "l" (r)
+#else
+#define __CMSIS_GCC_OUT_REG(r) "=r" (r)
+#define __CMSIS_GCC_RW_REG(r) "+r" (r)
+#define __CMSIS_GCC_USE_REG(r) "r" (r)
 #endif
 
-
 /**
-  \brief   Disable IRQ Interrupts
-  \details Disables IRQ interrupts by setting the I-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   No Operation
+  \details No Operation does nothing. This instruction can be used for code alignment purposes.
  */
-// Patched by Edge Impulse, fix for targets that already have __disable_irq
-#ifndef __disable_irq
-__STATIC_FORCEINLINE void __disable_irq(void)
-{
-  __ASM volatile ("cpsid i" : : : "memory");
-}
-#endif
-
+#define __NOP()                             __ASM volatile ("nop")
 
 /**
-  \brief   Get Control Register
-  \details Returns the content of the Control Register.
-  \return               Control Register value
+  \brief   Wait For Interrupt
+  \details Wait For Interrupt is a hint instruction that suspends execution until one of a number of events occurs.
  */
-__STATIC_FORCEINLINE uint32_t __get_CONTROL(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, control" : "=r" (result) );
-  return(result);
-}
+#define __WFI()                             __ASM volatile ("wfi":::"memory")
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Control Register (non-secure)
-  \details Returns the content of the non-secure Control Register when in secure mode.
-  \return               non-secure Control Register value
+  \brief   Wait For Event
+  \details Wait For Event is a hint instruction that permits the processor to enter
+           a low-power state until one of a number of events occurs.
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_CONTROL_NS(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, control_ns" : "=r" (result) );
-  return(result);
-}
-#endif
+#define __WFE()                             __ASM volatile ("wfe":::"memory")
 
 
 /**
-  \brief   Set Control Register
-  \details Writes the given value to the Control Register.
-  \param [in]    control  Control Register value to set
+  \brief   Send Event
+  \details Send Event is a hint instruction. It causes an event to be signaled to the CPU.
  */
-__STATIC_FORCEINLINE void __set_CONTROL(uint32_t control)
-{
-  __ASM volatile ("MSR control, %0" : : "r" (control) : "memory");
-}
+#define __SEV()                             __ASM volatile ("sev")
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Set Control Register (non-secure)
-  \details Writes the given value to the non-secure Control Register when in secure state.
-  \param [in]    control  Control Register value to set
+  \brief   Instruction Synchronization Barrier
+  \details Instruction Synchronization Barrier flushes the pipeline in the processor,
+           so that all instructions following the ISB are fetched from cache or memory,
+           after the instruction has been completed.
  */
-__STATIC_FORCEINLINE void __TZ_set_CONTROL_NS(uint32_t control)
+__STATIC_FORCEINLINE void __ISB(void)
 {
-  __ASM volatile ("MSR control_ns, %0" : : "r" (control) : "memory");
+  __ASM volatile ("isb 0xF":::"memory");
 }
-#endif
 
 
 /**
-  \brief   Get IPSR Register
-  \details Returns the content of the IPSR Register.
-  \return               IPSR Register value
+  \brief   Data Synchronization Barrier
+  \details Acts as a special kind of Data Memory Barrier.
+           It completes when all explicit memory accesses before this instruction complete.
  */
-__STATIC_FORCEINLINE uint32_t __get_IPSR(void)
+__STATIC_FORCEINLINE void __DSB(void)
 {
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, ipsr" : "=r" (result) );
-  return(result);
+  __ASM volatile ("dsb 0xF":::"memory");
 }
 
 
 /**
-  \brief   Get APSR Register
-  \details Returns the content of the APSR Register.
-  \return               APSR Register value
+  \brief   Data Memory Barrier
+  \details Ensures the apparent order of the explicit memory operations before
+           and after the instruction, without ensuring their completion.
  */
-__STATIC_FORCEINLINE uint32_t __get_APSR(void)
+__STATIC_FORCEINLINE void __DMB(void)
 {
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, apsr" : "=r" (result) );
-  return(result);
+  __ASM volatile ("dmb 0xF":::"memory");
 }
 
 
 /**
-  \brief   Get xPSR Register
-  \details Returns the content of the xPSR Register.
-  \return               xPSR Register value
+  \brief   Reverse byte order (32 bit)
+  \details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_FORCEINLINE uint32_t __get_xPSR(void)
+__STATIC_FORCEINLINE uint32_t __REV(uint32_t value)
 {
+#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
+  return __builtin_bswap32(value);
+#else
   uint32_t result;
 
-  __ASM volatile ("MRS %0, xpsr" : "=r" (result) );
-  return(result);
+  __ASM ("rev %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) );
+  return result;
+#endif
 }
 
 
 /**
-  \brief   Get Process Stack Pointer
-  \details Returns the current value of the Process Stack Pointer (PSP).
-  \return               PSP Register value
+  \brief   Reverse byte order (16 bit)
+  \details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_FORCEINLINE uint32_t __get_PSP(void)
+__STATIC_FORCEINLINE uint32_t __REV16(uint32_t value)
 {
   uint32_t result;
 
-  __ASM volatile ("MRS %0, psp"  : "=r" (result) );
-  return(result);
+  __ASM ("rev16 %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) );
+  return result;
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Process Stack Pointer (non-secure)
-  \details Returns the current value of the non-secure Process Stack Pointer (PSP) when in secure state.
-  \return               PSP Register value
+  \brief   Reverse byte order (16 bit)
+  \details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_PSP_NS(void)
+__STATIC_FORCEINLINE int16_t __REVSH(int16_t value)
 {
-  uint32_t result;
+#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
+  return (int16_t)__builtin_bswap16(value);
+#else
+  int16_t result;
 
-  __ASM volatile ("MRS %0, psp_ns"  : "=r" (result) );
-  return(result);
-}
+  __ASM ("revsh %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) );
+  return result;
 #endif
-
-
-/**
-  \brief   Set Process Stack Pointer
-  \details Assigns the given value to the Process Stack Pointer (PSP).
-  \param [in]    topOfProcStack  Process Stack Pointer value to set
- */
-__STATIC_FORCEINLINE void __set_PSP(uint32_t topOfProcStack)
-{
-  __ASM volatile ("MSR psp, %0" : : "r" (topOfProcStack) : );
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Set Process Stack Pointer (non-secure)
-  \details Assigns the given value to the non-secure Process Stack Pointer (PSP) when in secure state.
-  \param [in]    topOfProcStack  Process Stack Pointer value to set
+  \brief   Rotate Right in unsigned value (32 bit)
+  \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
+  \param [in]    op1  Value to rotate
+  \param [in]    op2  Number of Bits to rotate
+  \return               Rotated value
  */
-__STATIC_FORCEINLINE void __TZ_set_PSP_NS(uint32_t topOfProcStack)
+__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
 {
-  __ASM volatile ("MSR psp_ns, %0" : : "r" (topOfProcStack) : );
+  op2 %= 32U;
+  if (op2 == 0U)
+  {
+    return op1;
+  }
+  return (op1 >> op2) | (op1 << (32U - op2));
 }
-#endif
 
 
 /**
-  \brief   Get Main Stack Pointer
-  \details Returns the current value of the Main Stack Pointer (MSP).
-  \return               MSP Register value
+  \brief   Breakpoint
+  \details Causes the processor to enter Debug state.
+           Debug tools can use this to investigate system state when the instruction at a particular address is reached.
+  \param [in]    value  is ignored by the processor.
+                 If required, a debugger can use it to store additional information about the breakpoint.
  */
-__STATIC_FORCEINLINE uint32_t __get_MSP(void)
-{
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, msp" : "=r" (result) );
-  return(result);
-}
+#define __BKPT(value)                       __ASM volatile ("bkpt "#value)
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Main Stack Pointer (non-secure)
-  \details Returns the current value of the non-secure Main Stack Pointer (MSP) when in secure state.
-  \return               MSP Register value
+  \brief   Reverse bit order of value
+  \details Reverses the bit order of the given value.
+  \param [in]    value  Value to reverse
+  \return               Reversed value
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_MSP_NS(void)
+__STATIC_FORCEINLINE uint32_t __RBIT(uint32_t value)
 {
   uint32_t result;
 
-  __ASM volatile ("MRS %0, msp_ns" : "=r" (result) );
-  return(result);
-}
+#if ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
+     (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
+     (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))    )
+   __ASM ("rbit %0, %1" : "=r" (result) : "r" (value) );
+#else
+  uint32_t s = (4U /*sizeof(v)*/ * 8U) - 1U; /* extra shift needed at end */
+
+  result = value;                      /* r will be reversed bits of v; first get LSB of v */
+  for (value >>= 1U; value != 0U; value >>= 1U)
+  {
+    result <<= 1U;
+    result |= value & 1U;
+    s--;
+  }
+  result <<= s;                        /* shift when v's highest bits are zero */
 #endif
+  return result;
+}
 
 
 /**
-  \brief   Set Main Stack Pointer
-  \details Assigns the given value to the Main Stack Pointer (MSP).
-  \param [in]    topOfMainStack  Main Stack Pointer value to set
+  \brief   Count leading zeros
+  \details Counts the number of leading zeros of a data value.
+  \param [in]  value  Value to count the leading zeros
+  \return             number of leading zeros in value
  */
-__STATIC_FORCEINLINE void __set_MSP(uint32_t topOfMainStack)
+__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t value)
 {
-  __ASM volatile ("MSR msp, %0" : : "r" (topOfMainStack) : );
+  /* Even though __builtin_clz produces a CLZ instruction on ARM, formally
+     __builtin_clz(0) is undefined behaviour, so handle this case specially.
+     This guarantees ARM-compatible results if happening to compile on a non-ARM
+     target, and ensures the compiler doesn't decide to activate any
+     optimisations using the logic "value was passed to __builtin_clz, so it
+     is non-zero".
+     ARM GCC 7.3 and possibly earlier will optimise this test away, leaving a
+     single CLZ instruction.
+   */
+  if (value == 0U)
+  {
+    return 32U;
+  }
+  return __builtin_clz(value);
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
+#if ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
+     (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
+     (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
+     (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    )
 /**
-  \brief   Set Main Stack Pointer (non-secure)
-  \details Assigns the given value to the non-secure Main Stack Pointer (MSP) when in secure state.
-  \param [in]    topOfMainStack  Main Stack Pointer value to set
+  \brief   LDR Exclusive (8 bit)
+  \details Executes a exclusive LDR instruction for 8 bit value.
+  \param [in]    ptr  Pointer to data
+  \return             value of type uint8_t at (*ptr)
  */
-__STATIC_FORCEINLINE void __TZ_set_MSP_NS(uint32_t topOfMainStack)
+__STATIC_FORCEINLINE uint8_t __LDREXB(volatile uint8_t *addr)
 {
-  __ASM volatile ("MSR msp_ns, %0" : : "r" (topOfMainStack) : );
-}
+    uint32_t result;
+
+#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
+   __ASM volatile ("ldrexb %0, %1" : "=r" (result) : "Q" (*addr) );
+#else
+    /* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not
+       accepted by assembler. So has to use following less efficient pattern.
+    */
+   __ASM volatile ("ldrexb %0, [%1]" : "=r" (result) : "r" (addr) : "memory" );
 #endif
+   return ((uint8_t) result);    /* Add explicit type cast here */
+}
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Stack Pointer (non-secure)
-  \details Returns the current value of the non-secure Stack Pointer (SP) when in secure state.
-  \return               SP Register value
+  \brief   LDR Exclusive (16 bit)
+  \details Executes a exclusive LDR instruction for 16 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint16_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_SP_NS(void)
+__STATIC_FORCEINLINE uint16_t __LDREXH(volatile uint16_t *addr)
 {
-  uint32_t result;
+    uint32_t result;
 
-  __ASM volatile ("MRS %0, sp_ns" : "=r" (result) );
-  return(result);
+#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
+   __ASM volatile ("ldrexh %0, %1" : "=r" (result) : "Q" (*addr) );
+#else
+    /* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not
+       accepted by assembler. So has to use following less efficient pattern.
+    */
+   __ASM volatile ("ldrexh %0, [%1]" : "=r" (result) : "r" (addr) : "memory" );
+#endif
+   return ((uint16_t) result);    /* Add explicit type cast here */
 }
 
 
 /**
-  \brief   Set Stack Pointer (non-secure)
-  \details Assigns the given value to the non-secure Stack Pointer (SP) when in secure state.
-  \param [in]    topOfStack  Stack Pointer value to set
+  \brief   LDR Exclusive (32 bit)
+  \details Executes a exclusive LDR instruction for 32 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint32_t at (*ptr)
  */
-__STATIC_FORCEINLINE void __TZ_set_SP_NS(uint32_t topOfStack)
+__STATIC_FORCEINLINE uint32_t __LDREXW(volatile uint32_t *addr)
 {
-  __ASM volatile ("MSR sp_ns, %0" : : "r" (topOfStack) : );
+    uint32_t result;
+
+   __ASM volatile ("ldrex %0, %1" : "=r" (result) : "Q" (*addr) );
+   return(result);
 }
-#endif
 
 
 /**
-  \brief   Get Priority Mask
-  \details Returns the current state of the priority mask bit from the Priority Mask Register.
-  \return               Priority Mask value
+  \brief   STR Exclusive (8 bit)
+  \details Executes a exclusive STR instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-__STATIC_FORCEINLINE uint32_t __get_PRIMASK(void)
+__STATIC_FORCEINLINE uint32_t __STREXB(uint8_t value, volatile uint8_t *addr)
 {
-  uint32_t result;
+   uint32_t result;
 
-  __ASM volatile ("MRS %0, primask" : "=r" (result) );
-  return(result);
+   __ASM volatile ("strexb %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" ((uint32_t)value) );
+   return(result);
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Priority Mask (non-secure)
-  \details Returns the current state of the non-secure priority mask bit from the Priority Mask Register when in secure state.
-  \return               Priority Mask value
+  \brief   STR Exclusive (16 bit)
+  \details Executes a exclusive STR instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_PRIMASK_NS(void)
+__STATIC_FORCEINLINE uint32_t __STREXH(uint16_t value, volatile uint16_t *addr)
 {
-  uint32_t result;
+   uint32_t result;
 
-  __ASM volatile ("MRS %0, primask_ns" : "=r" (result) );
-  return(result);
+   __ASM volatile ("strexh %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" ((uint32_t)value) );
+   return(result);
 }
-#endif
 
 
 /**
-  \brief   Set Priority Mask
-  \details Assigns the given value to the Priority Mask Register.
-  \param [in]    priMask  Priority Mask
+  \brief   STR Exclusive (32 bit)
+  \details Executes a exclusive STR instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-__STATIC_FORCEINLINE void __set_PRIMASK(uint32_t priMask)
+__STATIC_FORCEINLINE uint32_t __STREXW(uint32_t value, volatile uint32_t *addr)
 {
-  __ASM volatile ("MSR primask, %0" : : "r" (priMask) : "memory");
+   uint32_t result;
+
+   __ASM volatile ("strex %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" (value) );
+   return(result);
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Set Priority Mask (non-secure)
-  \details Assigns the given value to the non-secure Priority Mask Register when in secure state.
-  \param [in]    priMask  Priority Mask
+  \brief   Remove the exclusive lock
+  \details Removes the exclusive lock which is created by LDREX.
  */
-__STATIC_FORCEINLINE void __TZ_set_PRIMASK_NS(uint32_t priMask)
+__STATIC_FORCEINLINE void __CLREX(void)
 {
-  __ASM volatile ("MSR primask_ns, %0" : : "r" (priMask) : "memory");
+  __ASM volatile ("clrex" ::: "memory");
 }
-#endif
+
+#endif /* ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
+           (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
+           (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
+           (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    ) */
 
 
 #if ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
      (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
      (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))    )
 /**
-  \brief   Enable FIQ
-  \details Enables FIQ interrupts by clearing the F-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   Signed Saturate
+  \details Saturates a signed value.
+  \param [in]  ARG1  Value to be saturated
+  \param [in]  ARG2  Bit position to saturate to (1..32)
+  \return             Saturated value
  */
-__STATIC_FORCEINLINE void __enable_fault_irq(void)
-{
-  __ASM volatile ("cpsie f" : : : "memory");
-}
+#define __SSAT(ARG1, ARG2) \
+__extension__ \
+({                          \
+  int32_t __RES, __ARG1 = (ARG1); \
+  __ASM volatile ("ssat %0, %1, %2" : "=r" (__RES) :  "I" (ARG2), "r" (__ARG1) : "cc" ); \
+  __RES; \
+ })
 
 
 /**
-  \brief   Disable FIQ
-  \details Disables FIQ interrupts by setting the F-bit in the CPSR.
-           Can only be executed in Privileged modes.
+  \brief   Unsigned Saturate
+  \details Saturates an unsigned value.
+  \param [in]  ARG1  Value to be saturated
+  \param [in]  ARG2  Bit position to saturate to (0..31)
+  \return             Saturated value
  */
-__STATIC_FORCEINLINE void __disable_fault_irq(void)
-{
-  __ASM volatile ("cpsid f" : : : "memory");
-}
+#define __USAT(ARG1, ARG2) \
+__extension__ \
+({                          \
+  uint32_t __RES, __ARG1 = (ARG1); \
+  __ASM volatile ("usat %0, %1, %2" : "=r" (__RES) :  "I" (ARG2), "r" (__ARG1) : "cc" ); \
+  __RES; \
+ })
 
 
 /**
-  \brief   Get Base Priority
-  \details Returns the current value of the Base Priority register.
-  \return               Base Priority register value
+  \brief   Rotate Right with Extend (32 bit)
+  \details Moves each bit of a bitstring right by one bit.
+           The carry input is shifted in at the left end of the bitstring.
+  \param [in]    value  Value to rotate
+  \return               Rotated value
  */
-__STATIC_FORCEINLINE uint32_t __get_BASEPRI(void)
+__STATIC_FORCEINLINE uint32_t __RRX(uint32_t value)
 {
   uint32_t result;
 
-  __ASM volatile ("MRS %0, basepri" : "=r" (result) );
+  __ASM volatile ("rrx %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) );
   return(result);
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Base Priority (non-secure)
-  \details Returns the current value of the non-secure Base Priority register when in secure state.
-  \return               Base Priority register value
+  \brief   LDRT Unprivileged (8 bit)
+  \details Executes a Unprivileged LDRT instruction for 8 bit value.
+  \param [in]    ptr  Pointer to data
+  \return             value of type uint8_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_BASEPRI_NS(void)
+__STATIC_FORCEINLINE uint8_t __LDRBT(volatile uint8_t *ptr)
 {
-  uint32_t result;
+    uint32_t result;
 
-  __ASM volatile ("MRS %0, basepri_ns" : "=r" (result) );
-  return(result);
-}
+#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
+   __ASM volatile ("ldrbt %0, %1" : "=r" (result) : "Q" (*ptr) );
+#else
+    /* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not
+       accepted by assembler. So has to use following less efficient pattern.
+    */
+   __ASM volatile ("ldrbt %0, [%1]" : "=r" (result) : "r" (ptr) : "memory" );
 #endif
+   return ((uint8_t) result);    /* Add explicit type cast here */
+}
 
 
 /**
-  \brief   Set Base Priority
-  \details Assigns the given value to the Base Priority register.
-  \param [in]    basePri  Base Priority value to set
+  \brief   LDRT Unprivileged (16 bit)
+  \details Executes a Unprivileged LDRT instruction for 16 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint16_t at (*ptr)
  */
-__STATIC_FORCEINLINE void __set_BASEPRI(uint32_t basePri)
+__STATIC_FORCEINLINE uint16_t __LDRHT(volatile uint16_t *ptr)
 {
-  __ASM volatile ("MSR basepri, %0" : : "r" (basePri) : "memory");
+    uint32_t result;
+
+#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
+   __ASM volatile ("ldrht %0, %1" : "=r" (result) : "Q" (*ptr) );
+#else
+    /* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not
+       accepted by assembler. So has to use following less efficient pattern.
+    */
+   __ASM volatile ("ldrht %0, [%1]" : "=r" (result) : "r" (ptr) : "memory" );
+#endif
+   return ((uint16_t) result);    /* Add explicit type cast here */
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Set Base Priority (non-secure)
-  \details Assigns the given value to the non-secure Base Priority register when in secure state.
-  \param [in]    basePri  Base Priority value to set
+  \brief   LDRT Unprivileged (32 bit)
+  \details Executes a Unprivileged LDRT instruction for 32 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint32_t at (*ptr)
  */
-__STATIC_FORCEINLINE void __TZ_set_BASEPRI_NS(uint32_t basePri)
+__STATIC_FORCEINLINE uint32_t __LDRT(volatile uint32_t *ptr)
 {
-  __ASM volatile ("MSR basepri_ns, %0" : : "r" (basePri) : "memory");
+    uint32_t result;
+
+   __ASM volatile ("ldrt %0, %1" : "=r" (result) : "Q" (*ptr) );
+   return(result);
 }
-#endif
 
 
 /**
-  \brief   Set Base Priority with condition
-  \details Assigns the given value to the Base Priority register only if BASEPRI masking is disabled,
-           or the new value increases the BASEPRI priority level.
-  \param [in]    basePri  Base Priority value to set
+  \brief   STRT Unprivileged (8 bit)
+  \details Executes a Unprivileged STRT instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE void __set_BASEPRI_MAX(uint32_t basePri)
+__STATIC_FORCEINLINE void __STRBT(uint8_t value, volatile uint8_t *ptr)
 {
-  __ASM volatile ("MSR basepri_max, %0" : : "r" (basePri) : "memory");
+   __ASM volatile ("strbt %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) );
 }
 
 
 /**
-  \brief   Get Fault Mask
-  \details Returns the current value of the Fault Mask register.
-  \return               Fault Mask register value
+  \brief   STRT Unprivileged (16 bit)
+  \details Executes a Unprivileged STRT instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE uint32_t __get_FAULTMASK(void)
+__STATIC_FORCEINLINE void __STRHT(uint16_t value, volatile uint16_t *ptr)
 {
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, faultmask" : "=r" (result) );
-  return(result);
+   __ASM volatile ("strht %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) );
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Get Fault Mask (non-secure)
-  \details Returns the current value of the non-secure Fault Mask register when in secure state.
-  \return               Fault Mask register value
+  \brief   STRT Unprivileged (32 bit)
+  \details Executes a Unprivileged STRT instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_FAULTMASK_NS(void)
+__STATIC_FORCEINLINE void __STRT(uint32_t value, volatile uint32_t *ptr)
 {
-  uint32_t result;
-
-  __ASM volatile ("MRS %0, faultmask_ns" : "=r" (result) );
-  return(result);
+   __ASM volatile ("strt %1, %0" : "=Q" (*ptr) : "r" (value) );
 }
-#endif
 
+#else  /* ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
+           (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
+           (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))    ) */
 
 /**
-  \brief   Set Fault Mask
-  \details Assigns the given value to the Fault Mask register.
-  \param [in]    faultMask  Fault Mask value to set
+  \brief   Signed Saturate
+  \details Saturates a signed value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (1..32)
+  \return             Saturated value
  */
-__STATIC_FORCEINLINE void __set_FAULTMASK(uint32_t faultMask)
+__STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
 {
-  __ASM volatile ("MSR faultmask, %0" : : "r" (faultMask) : "memory");
+  if ((sat >= 1U) && (sat <= 32U))
+  {
+    const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
+    const int32_t min = -1 - max ;
+    if (val > max)
+    {
+      return max;
+    }
+    else if (val < min)
+    {
+      return min;
+    }
+  }
+  return val;
 }
 
-
-#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Set Fault Mask (non-secure)
-  \details Assigns the given value to the non-secure Fault Mask register when in secure state.
-  \param [in]    faultMask  Fault Mask value to set
+  \brief   Unsigned Saturate
+  \details Saturates an unsigned value.
+  \param [in]  value  Value to be saturated
+  \param [in]    sat  Bit position to saturate to (0..31)
+  \return             Saturated value
  */
-__STATIC_FORCEINLINE void __TZ_set_FAULTMASK_NS(uint32_t faultMask)
+__STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
 {
-  __ASM volatile ("MSR faultmask_ns, %0" : : "r" (faultMask) : "memory");
+  if (sat <= 31U)
+  {
+    const uint32_t max = ((1U << sat) - 1U);
+    if (val > (int32_t)max)
+    {
+      return max;
+    }
+    else if (val < 0)
+    {
+      return 0U;
+    }
+  }
+  return (uint32_t)val;
 }
-#endif
 
 #endif /* ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
            (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
@@ -672,968 +753,889 @@ __STATIC_FORCEINLINE void __TZ_set_FAULTMASK_NS(uint32_t faultMask)
 
 #if ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
      (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    )
-
 /**
-  \brief   Get Process Stack Pointer Limit
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence zero is returned always in non-secure
-  mode.
-
-  \details Returns the current value of the Process Stack Pointer Limit (PSPLIM).
-  \return               PSPLIM Register value
+  \brief   Load-Acquire (8 bit)
+  \details Executes a LDAB instruction for 8 bit value.
+  \param [in]    ptr  Pointer to data
+  \return             value of type uint8_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __get_PSPLIM(void)
+__STATIC_FORCEINLINE uint8_t __LDAB(volatile uint8_t *ptr)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
-    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
-    // without main extensions, the non-secure PSPLIM is RAZ/WI
-  return 0U;
-#else
-  uint32_t result;
-  __ASM volatile ("MRS %0, psplim"  : "=r" (result) );
-  return result;
-#endif
+    uint32_t result;
+
+   __ASM volatile ("ldab %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
+   return ((uint8_t) result);
 }
 
-#if (defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3))
-/**
-  \brief   Get Process Stack Pointer Limit (non-secure)
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence zero is returned always.
 
-  \details Returns the current value of the non-secure Process Stack Pointer Limit (PSPLIM) when in secure state.
-  \return               PSPLIM Register value
+/**
+  \brief   Load-Acquire (16 bit)
+  \details Executes a LDAH instruction for 16 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint16_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_PSPLIM_NS(void)
+__STATIC_FORCEINLINE uint16_t __LDAH(volatile uint16_t *ptr)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
-  // without main extensions, the non-secure PSPLIM is RAZ/WI
-  return 0U;
-#else
-  uint32_t result;
-  __ASM volatile ("MRS %0, psplim_ns"  : "=r" (result) );
-  return result;
-#endif
+    uint32_t result;
+
+   __ASM volatile ("ldah %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
+   return ((uint16_t) result);
 }
-#endif
 
 
 /**
-  \brief   Set Process Stack Pointer Limit
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence the write is silently ignored in non-secure
-  mode.
-
-  \details Assigns the given value to the Process Stack Pointer Limit (PSPLIM).
-  \param [in]    ProcStackPtrLimit  Process Stack Pointer Limit value to set
+  \brief   Load-Acquire (32 bit)
+  \details Executes a LDA instruction for 32 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint32_t at (*ptr)
  */
-__STATIC_FORCEINLINE void __set_PSPLIM(uint32_t ProcStackPtrLimit)
+__STATIC_FORCEINLINE uint32_t __LDA(volatile uint32_t *ptr)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
-    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
-  // without main extensions, the non-secure PSPLIM is RAZ/WI
-  (void)ProcStackPtrLimit;
-#else
-  __ASM volatile ("MSR psplim, %0" : : "r" (ProcStackPtrLimit));
-#endif
+    uint32_t result;
+
+   __ASM volatile ("lda %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
+   return(result);
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Set Process Stack Pointer (non-secure)
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence the write is silently ignored.
-
-  \details Assigns the given value to the non-secure Process Stack Pointer Limit (PSPLIM) when in secure state.
-  \param [in]    ProcStackPtrLimit  Process Stack Pointer Limit value to set
+  \brief   Store-Release (8 bit)
+  \details Executes a STLB instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE void __TZ_set_PSPLIM_NS(uint32_t ProcStackPtrLimit)
+__STATIC_FORCEINLINE void __STLB(uint8_t value, volatile uint8_t *ptr)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
-  // without main extensions, the non-secure PSPLIM is RAZ/WI
-  (void)ProcStackPtrLimit;
-#else
-  __ASM volatile ("MSR psplim_ns, %0\n" : : "r" (ProcStackPtrLimit));
-#endif
+   __ASM volatile ("stlb %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
 }
-#endif
 
 
 /**
-  \brief   Get Main Stack Pointer Limit
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence zero is returned always in non-secure
-  mode.
-
-  \details Returns the current value of the Main Stack Pointer Limit (MSPLIM).
-  \return               MSPLIM Register value
+  \brief   Store-Release (16 bit)
+  \details Executes a STLH instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE uint32_t __get_MSPLIM(void)
+__STATIC_FORCEINLINE void __STLH(uint16_t value, volatile uint16_t *ptr)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
-    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
-  // without main extensions, the non-secure MSPLIM is RAZ/WI
-  return 0U;
-#else
-  uint32_t result;
-  __ASM volatile ("MRS %0, msplim" : "=r" (result) );
-  return result;
-#endif
+   __ASM volatile ("stlh %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Get Main Stack Pointer Limit (non-secure)
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence zero is returned always.
-
-  \details Returns the current value of the non-secure Main Stack Pointer Limit(MSPLIM) when in secure state.
-  \return               MSPLIM Register value
+  \brief   Store-Release (32 bit)
+  \details Executes a STL instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
  */
-__STATIC_FORCEINLINE uint32_t __TZ_get_MSPLIM_NS(void)
+__STATIC_FORCEINLINE void __STL(uint32_t value, volatile uint32_t *ptr)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
-  // without main extensions, the non-secure MSPLIM is RAZ/WI
-  return 0U;
-#else
-  uint32_t result;
-  __ASM volatile ("MRS %0, msplim_ns" : "=r" (result) );
-  return result;
-#endif
+   __ASM volatile ("stl %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
 }
-#endif
 
 
 /**
-  \brief   Set Main Stack Pointer Limit
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence the write is silently ignored in non-secure
-  mode.
-
-  \details Assigns the given value to the Main Stack Pointer Limit (MSPLIM).
-  \param [in]    MainStackPtrLimit  Main Stack Pointer Limit value to set
+  \brief   Load-Acquire Exclusive (8 bit)
+  \details Executes a LDAB exclusive instruction for 8 bit value.
+  \param [in]    ptr  Pointer to data
+  \return             value of type uint8_t at (*ptr)
  */
-__STATIC_FORCEINLINE void __set_MSPLIM(uint32_t MainStackPtrLimit)
+__STATIC_FORCEINLINE uint8_t __LDAEXB(volatile uint8_t *ptr)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
-    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
-  // without main extensions, the non-secure MSPLIM is RAZ/WI
-  (void)MainStackPtrLimit;
-#else
-  __ASM volatile ("MSR msplim, %0" : : "r" (MainStackPtrLimit));
-#endif
+    uint32_t result;
+
+   __ASM volatile ("ldaexb %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
+   return ((uint8_t) result);
 }
 
 
-#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
 /**
-  \brief   Set Main Stack Pointer Limit (non-secure)
-  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
-  Stack Pointer Limit register hence the write is silently ignored.
-
-  \details Assigns the given value to the non-secure Main Stack Pointer Limit (MSPLIM) when in secure state.
-  \param [in]    MainStackPtrLimit  Main Stack Pointer value to set
+  \brief   Load-Acquire Exclusive (16 bit)
+  \details Executes a LDAH exclusive instruction for 16 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint16_t at (*ptr)
  */
-__STATIC_FORCEINLINE void __TZ_set_MSPLIM_NS(uint32_t MainStackPtrLimit)
+__STATIC_FORCEINLINE uint16_t __LDAEXH(volatile uint16_t *ptr)
 {
-#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
-  // without main extensions, the non-secure MSPLIM is RAZ/WI
-  (void)MainStackPtrLimit;
-#else
-  __ASM volatile ("MSR msplim_ns, %0" : : "r" (MainStackPtrLimit));
-#endif
-}
-#endif
+    uint32_t result;
 
-#endif /* ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
-           (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    ) */
+   __ASM volatile ("ldaexh %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
+   return ((uint16_t) result);
+}
 
 
 /**
-  \brief   Get FPSCR
-  \details Returns the current value of the Floating Point Status/Control register.
-  \return               Floating Point Status/Control register value
+  \brief   Load-Acquire Exclusive (32 bit)
+  \details Executes a LDA exclusive instruction for 32 bit values.
+  \param [in]    ptr  Pointer to data
+  \return        value of type uint32_t at (*ptr)
  */
-__STATIC_FORCEINLINE uint32_t __get_FPSCR(void)
+__STATIC_FORCEINLINE uint32_t __LDAEX(volatile uint32_t *ptr)
 {
-#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
-     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
-#if __has_builtin(__builtin_arm_get_fpscr)
-// Re-enable using built-in when GCC has been fixed
-// || (__GNUC__ > 7) || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2)
-  /* see https://gcc.gnu.org/ml/gcc-patches/2017-04/msg00443.html */
-  return __builtin_arm_get_fpscr();
-#else
-  uint32_t result;
+    uint32_t result;
 
-  __ASM volatile ("VMRS %0, fpscr" : "=r" (result) );
-  return(result);
-#endif
-#else
-  return(0U);
-#endif
+   __ASM volatile ("ldaex %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
+   return(result);
 }
 
 
 /**
-  \brief   Set FPSCR
-  \details Assigns the given value to the Floating Point Status/Control register.
-  \param [in]    fpscr  Floating Point Status/Control value to set
+  \brief   Store-Release Exclusive (8 bit)
+  \details Executes a STLB exclusive instruction for 8 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-__STATIC_FORCEINLINE void __set_FPSCR(uint32_t fpscr)
+__STATIC_FORCEINLINE uint32_t __STLEXB(uint8_t value, volatile uint8_t *ptr)
 {
-#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
-     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
-#if __has_builtin(__builtin_arm_set_fpscr)
-// Re-enable using built-in when GCC has been fixed
-// || (__GNUC__ > 7) || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2)
-  /* see https://gcc.gnu.org/ml/gcc-patches/2017-04/msg00443.html */
-  __builtin_arm_set_fpscr(fpscr);
-#else
-  __ASM volatile ("VMSR fpscr, %0" : : "r" (fpscr) : "vfpcc", "memory");
-#endif
-#else
-  (void)fpscr;
-#endif
-}
+   uint32_t result;
 
+   __ASM volatile ("stlexb %0, %2, %1" : "=&r" (result), "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
+   return(result);
+}
 
-/*@} end of CMSIS_Core_RegAccFunctions */
 
+/**
+  \brief   Store-Release Exclusive (16 bit)
+  \details Executes a STLH exclusive instruction for 16 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
+ */
+__STATIC_FORCEINLINE uint32_t __STLEXH(uint16_t value, volatile uint16_t *ptr)
+{
+   uint32_t result;
 
-/* ##########################  Core Instruction Access  ######################### */
-/** \defgroup CMSIS_Core_InstructionInterface CMSIS Core Instruction Interface
-  Access to dedicated instructions
-  @{
-*/
+   __ASM volatile ("stlexh %0, %2, %1" : "=&r" (result), "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
+   return(result);
+}
 
-/* Define macros for porting to both thumb1 and thumb2.
- * For thumb1, use low register (r0-r7), specified by constraint "l"
- * Otherwise, use general registers, specified by constraint "r" */
-#if defined (__thumb__) && !defined (__thumb2__)
-#define __CMSIS_GCC_OUT_REG(r) "=l" (r)
-#define __CMSIS_GCC_RW_REG(r) "+l" (r)
-#define __CMSIS_GCC_USE_REG(r) "l" (r)
-#else
-#define __CMSIS_GCC_OUT_REG(r) "=r" (r)
-#define __CMSIS_GCC_RW_REG(r) "+r" (r)
-#define __CMSIS_GCC_USE_REG(r) "r" (r)
-#endif
 
 /**
-  \brief   No Operation
-  \details No Operation does nothing. This instruction can be used for code alignment purposes.
+  \brief   Store-Release Exclusive (32 bit)
+  \details Executes a STL exclusive instruction for 32 bit values.
+  \param [in]  value  Value to store
+  \param [in]    ptr  Pointer to location
+  \return          0  Function succeeded
+  \return          1  Function failed
  */
-#define __NOP()                             __ASM volatile ("nop")
+__STATIC_FORCEINLINE uint32_t __STLEX(uint32_t value, volatile uint32_t *ptr)
+{
+   uint32_t result;
 
-/**
-  \brief   Wait For Interrupt
-  \details Wait For Interrupt is a hint instruction that suspends execution until one of a number of events occurs.
- */
-#define __WFI()                             __ASM volatile ("wfi":::"memory")
+   __ASM volatile ("stlex %0, %2, %1" : "=&r" (result), "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
+   return(result);
+}
 
+#endif /* ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
+           (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    ) */
 
-/**
-  \brief   Wait For Event
-  \details Wait For Event is a hint instruction that permits the processor to enter
-           a low-power state until one of a number of events occurs.
- */
-#define __WFE()                             __ASM volatile ("wfe":::"memory")
+/*@}*/ /* end of group CMSIS_Core_InstructionInterface */
 
 
-/**
-  \brief   Send Event
-  \details Send Event is a hint instruction. It causes an event to be signaled to the CPU.
+/* ###########################  Core Function Access  ########################### */
+/** \ingroup  CMSIS_Core_FunctionInterface
+    \defgroup CMSIS_Core_RegAccFunctions CMSIS Core Register Access Functions
+  @{
  */
-#define __SEV()                             __ASM volatile ("sev")
-
 
 /**
-  \brief   Instruction Synchronization Barrier
-  \details Instruction Synchronization Barrier flushes the pipeline in the processor,
-           so that all instructions following the ISB are fetched from cache or memory,
-           after the instruction has been completed.
+  \brief   Enable IRQ Interrupts
+  \details Enables IRQ interrupts by clearing special-purpose register PRIMASK.
+           Can only be executed in Privileged modes.
  */
-__STATIC_FORCEINLINE void __ISB(void)
+// Patched by Edge Impulse, fix for targets that already have __enable_irq
+#ifndef __enable_irq
+__STATIC_FORCEINLINE void __enable_irq(void)
 {
-  __ASM volatile ("isb 0xF":::"memory");
+  __ASM volatile ("cpsie i" : : : "memory");
 }
+#endif
 
 
 /**
-  \brief   Data Synchronization Barrier
-  \details Acts as a special kind of Data Memory Barrier.
-           It completes when all explicit memory accesses before this instruction complete.
+  \brief   Disable IRQ Interrupts
+  \details Disables IRQ interrupts by setting special-purpose register PRIMASK.
+           Can only be executed in Privileged modes.
  */
-__STATIC_FORCEINLINE void __DSB(void)
+// Patched by Edge Impulse, fix for targets that already have __disable_irq
+#ifndef __disable_irq
+__STATIC_FORCEINLINE void __disable_irq(void)
 {
-  __ASM volatile ("dsb 0xF":::"memory");
+  __ASM volatile ("cpsid i" : : : "memory");
 }
+#endif
 
 
 /**
-  \brief   Data Memory Barrier
-  \details Ensures the apparent order of the explicit memory operations before
-           and after the instruction, without ensuring their completion.
+  \brief   Get Control Register
+  \details Returns the content of the Control Register.
+  \return               Control Register value
  */
-__STATIC_FORCEINLINE void __DMB(void)
+__STATIC_FORCEINLINE uint32_t __get_CONTROL(void)
 {
-  __ASM volatile ("dmb 0xF":::"memory");
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, control" : "=r" (result) );
+  return(result);
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Reverse byte order (32 bit)
-  \details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Get Control Register (non-secure)
+  \details Returns the content of the non-secure Control Register when in secure mode.
+  \return               non-secure Control Register value
  */
-__STATIC_FORCEINLINE uint32_t __REV(uint32_t value)
+__STATIC_FORCEINLINE uint32_t __TZ_get_CONTROL_NS(void)
 {
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
-  return __builtin_bswap32(value);
-#else
   uint32_t result;
 
-  __ASM ("rev %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) );
-  return result;
-#endif
+  __ASM volatile ("MRS %0, control_ns" : "=r" (result) );
+  return(result);
 }
+#endif
 
 
 /**
-  \brief   Reverse byte order (16 bit)
-  \details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Set Control Register
+  \details Writes the given value to the Control Register.
+  \param [in]    control  Control Register value to set
  */
-__STATIC_FORCEINLINE uint32_t __REV16(uint32_t value)
+__STATIC_FORCEINLINE void __set_CONTROL(uint32_t control)
 {
-  uint32_t result;
-
-  __ASM ("rev16 %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) );
-  return result;
+  __ASM volatile ("MSR control, %0" : : "r" (control) : "memory");
+  __ISB();
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Reverse byte order (16 bit)
-  \details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Set Control Register (non-secure)
+  \details Writes the given value to the non-secure Control Register when in secure state.
+  \param [in]    control  Control Register value to set
  */
-__STATIC_FORCEINLINE int16_t __REVSH(int16_t value)
+__STATIC_FORCEINLINE void __TZ_set_CONTROL_NS(uint32_t control)
 {
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
-  return (int16_t)__builtin_bswap16(value);
-#else
-  int16_t result;
-
-  __ASM ("revsh %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) );
-  return result;
-#endif
+  __ASM volatile ("MSR control_ns, %0" : : "r" (control) : "memory");
+  __ISB();
 }
+#endif
 
 
 /**
-  \brief   Rotate Right in unsigned value (32 bit)
-  \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
-  \param [in]    op1  Value to rotate
-  \param [in]    op2  Number of Bits to rotate
-  \return               Rotated value
+  \brief   Get IPSR Register
+  \details Returns the content of the IPSR Register.
+  \return               IPSR Register value
  */
-__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
+__STATIC_FORCEINLINE uint32_t __get_IPSR(void)
 {
-  op2 %= 32U;
-  if (op2 == 0U)
-  {
-    return op1;
-  }
-  return (op1 >> op2) | (op1 << (32U - op2));
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, ipsr" : "=r" (result) );
+  return(result);
 }
 
 
 /**
-  \brief   Breakpoint
-  \details Causes the processor to enter Debug state.
-           Debug tools can use this to investigate system state when the instruction at a particular address is reached.
-  \param [in]    value  is ignored by the processor.
-                 If required, a debugger can use it to store additional information about the breakpoint.
+  \brief   Get APSR Register
+  \details Returns the content of the APSR Register.
+  \return               APSR Register value
  */
-#define __BKPT(value)                       __ASM volatile ("bkpt "#value)
+__STATIC_FORCEINLINE uint32_t __get_APSR(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, apsr" : "=r" (result) );
+  return(result);
+}
 
 
 /**
-  \brief   Reverse bit order of value
-  \details Reverses the bit order of the given value.
-  \param [in]    value  Value to reverse
-  \return               Reversed value
+  \brief   Get xPSR Register
+  \details Returns the content of the xPSR Register.
+  \return               xPSR Register value
  */
-__STATIC_FORCEINLINE uint32_t __RBIT(uint32_t value)
+__STATIC_FORCEINLINE uint32_t __get_xPSR(void)
 {
   uint32_t result;
 
-#if ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
-     (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
-     (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))    )
-   __ASM ("rbit %0, %1" : "=r" (result) : "r" (value) );
-#else
-  uint32_t s = (4U /*sizeof(v)*/ * 8U) - 1U; /* extra shift needed at end */
-
-  result = value;                      /* r will be reversed bits of v; first get LSB of v */
-  for (value >>= 1U; value != 0U; value >>= 1U)
-  {
-    result <<= 1U;
-    result |= value & 1U;
-    s--;
-  }
-  result <<= s;                        /* shift when v's highest bits are zero */
-#endif
-  return result;
+  __ASM volatile ("MRS %0, xpsr" : "=r" (result) );
+  return(result);
 }
 
 
 /**
-  \brief   Count leading zeros
-  \details Counts the number of leading zeros of a data value.
-  \param [in]  value  Value to count the leading zeros
-  \return             number of leading zeros in value
+  \brief   Get Process Stack Pointer
+  \details Returns the current value of the Process Stack Pointer (PSP).
+  \return               PSP Register value
  */
-__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t value)
+__STATIC_FORCEINLINE uint32_t __get_PSP(void)
 {
-  /* Even though __builtin_clz produces a CLZ instruction on ARM, formally
-     __builtin_clz(0) is undefined behaviour, so handle this case specially.
-     This guarantees ARM-compatible results if happening to compile on a non-ARM
-     target, and ensures the compiler doesn't decide to activate any
-     optimisations using the logic "value was passed to __builtin_clz, so it
-     is non-zero".
-     ARM GCC 7.3 and possibly earlier will optimise this test away, leaving a
-     single CLZ instruction.
-   */
-  if (value == 0U)
-  {
-    return 32U;
-  }
-  return __builtin_clz(value);
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, psp"  : "=r" (result) );
+  return(result);
 }
 
 
-#if ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
-     (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
-     (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
-     (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    )
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   LDR Exclusive (8 bit)
-  \details Executes a exclusive LDR instruction for 8 bit value.
-  \param [in]    ptr  Pointer to data
-  \return             value of type uint8_t at (*ptr)
+  \brief   Get Process Stack Pointer (non-secure)
+  \details Returns the current value of the non-secure Process Stack Pointer (PSP) when in secure state.
+  \return               PSP Register value
  */
-__STATIC_FORCEINLINE uint8_t __LDREXB(volatile uint8_t *addr)
+__STATIC_FORCEINLINE uint32_t __TZ_get_PSP_NS(void)
 {
-    uint32_t result;
+  uint32_t result;
 
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
-   __ASM volatile ("ldrexb %0, %1" : "=r" (result) : "Q" (*addr) );
-#else
-    /* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not
-       accepted by assembler. So has to use following less efficient pattern.
-    */
-   __ASM volatile ("ldrexb %0, [%1]" : "=r" (result) : "r" (addr) : "memory" );
-#endif
-   return ((uint8_t) result);    /* Add explicit type cast here */
+  __ASM volatile ("MRS %0, psp_ns"  : "=r" (result) );
+  return(result);
 }
+#endif
 
 
 /**
-  \brief   LDR Exclusive (16 bit)
-  \details Executes a exclusive LDR instruction for 16 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint16_t at (*ptr)
+  \brief   Set Process Stack Pointer
+  \details Assigns the given value to the Process Stack Pointer (PSP).
+  \param [in]    topOfProcStack  Process Stack Pointer value to set
  */
-__STATIC_FORCEINLINE uint16_t __LDREXH(volatile uint16_t *addr)
+__STATIC_FORCEINLINE void __set_PSP(uint32_t topOfProcStack)
 {
-    uint32_t result;
-
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
-   __ASM volatile ("ldrexh %0, %1" : "=r" (result) : "Q" (*addr) );
-#else
-    /* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not
-       accepted by assembler. So has to use following less efficient pattern.
-    */
-   __ASM volatile ("ldrexh %0, [%1]" : "=r" (result) : "r" (addr) : "memory" );
-#endif
-   return ((uint16_t) result);    /* Add explicit type cast here */
+  __ASM volatile ("MSR psp, %0" : : "r" (topOfProcStack) : );
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   LDR Exclusive (32 bit)
-  \details Executes a exclusive LDR instruction for 32 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint32_t at (*ptr)
+  \brief   Set Process Stack Pointer (non-secure)
+  \details Assigns the given value to the non-secure Process Stack Pointer (PSP) when in secure state.
+  \param [in]    topOfProcStack  Process Stack Pointer value to set
  */
-__STATIC_FORCEINLINE uint32_t __LDREXW(volatile uint32_t *addr)
+__STATIC_FORCEINLINE void __TZ_set_PSP_NS(uint32_t topOfProcStack)
 {
-    uint32_t result;
-
-   __ASM volatile ("ldrex %0, %1" : "=r" (result) : "Q" (*addr) );
-   return(result);
+  __ASM volatile ("MSR psp_ns, %0" : : "r" (topOfProcStack) : );
 }
+#endif
 
 
 /**
-  \brief   STR Exclusive (8 bit)
-  \details Executes a exclusive STR instruction for 8 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Get Main Stack Pointer
+  \details Returns the current value of the Main Stack Pointer (MSP).
+  \return               MSP Register value
  */
-__STATIC_FORCEINLINE uint32_t __STREXB(uint8_t value, volatile uint8_t *addr)
+__STATIC_FORCEINLINE uint32_t __get_MSP(void)
 {
-   uint32_t result;
+  uint32_t result;
 
-   __ASM volatile ("strexb %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" ((uint32_t)value) );
-   return(result);
+  __ASM volatile ("MRS %0, msp" : "=r" (result) );
+  return(result);
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   STR Exclusive (16 bit)
-  \details Executes a exclusive STR instruction for 16 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Get Main Stack Pointer (non-secure)
+  \details Returns the current value of the non-secure Main Stack Pointer (MSP) when in secure state.
+  \return               MSP Register value
  */
-__STATIC_FORCEINLINE uint32_t __STREXH(uint16_t value, volatile uint16_t *addr)
+__STATIC_FORCEINLINE uint32_t __TZ_get_MSP_NS(void)
 {
-   uint32_t result;
+  uint32_t result;
 
-   __ASM volatile ("strexh %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" ((uint32_t)value) );
-   return(result);
+  __ASM volatile ("MRS %0, msp_ns" : "=r" (result) );
+  return(result);
 }
+#endif
 
 
 /**
-  \brief   STR Exclusive (32 bit)
-  \details Executes a exclusive STR instruction for 32 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Set Main Stack Pointer
+  \details Assigns the given value to the Main Stack Pointer (MSP).
+  \param [in]    topOfMainStack  Main Stack Pointer value to set
  */
-__STATIC_FORCEINLINE uint32_t __STREXW(uint32_t value, volatile uint32_t *addr)
+__STATIC_FORCEINLINE void __set_MSP(uint32_t topOfMainStack)
 {
-   uint32_t result;
-
-   __ASM volatile ("strex %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" (value) );
-   return(result);
+  __ASM volatile ("MSR msp, %0" : : "r" (topOfMainStack) : );
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Remove the exclusive lock
-  \details Removes the exclusive lock which is created by LDREX.
+  \brief   Set Main Stack Pointer (non-secure)
+  \details Assigns the given value to the non-secure Main Stack Pointer (MSP) when in secure state.
+  \param [in]    topOfMainStack  Main Stack Pointer value to set
  */
-__STATIC_FORCEINLINE void __CLREX(void)
+__STATIC_FORCEINLINE void __TZ_set_MSP_NS(uint32_t topOfMainStack)
 {
-  __ASM volatile ("clrex" ::: "memory");
+  __ASM volatile ("MSR msp_ns, %0" : : "r" (topOfMainStack) : );
 }
-
-#endif /* ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
-           (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
-           (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
-           (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    ) */
+#endif
 
 
-#if ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
-     (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
-     (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))    )
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Signed Saturate
-  \details Saturates a signed value.
-  \param [in]  ARG1  Value to be saturated
-  \param [in]  ARG2  Bit position to saturate to (1..32)
-  \return             Saturated value
+  \brief   Get Stack Pointer (non-secure)
+  \details Returns the current value of the non-secure Stack Pointer (SP) when in secure state.
+  \return               SP Register value
  */
-#define __SSAT(ARG1, ARG2) \
-__extension__ \
-({                          \
-  int32_t __RES, __ARG1 = (ARG1); \
-  __ASM volatile ("ssat %0, %1, %2" : "=r" (__RES) :  "I" (ARG2), "r" (__ARG1) : "cc" ); \
-  __RES; \
- })
+__STATIC_FORCEINLINE uint32_t __TZ_get_SP_NS(void)
+{
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, sp_ns" : "=r" (result) );
+  return(result);
+}
 
 
 /**
-  \brief   Unsigned Saturate
-  \details Saturates an unsigned value.
-  \param [in]  ARG1  Value to be saturated
-  \param [in]  ARG2  Bit position to saturate to (0..31)
-  \return             Saturated value
+  \brief   Set Stack Pointer (non-secure)
+  \details Assigns the given value to the non-secure Stack Pointer (SP) when in secure state.
+  \param [in]    topOfStack  Stack Pointer value to set
  */
-#define __USAT(ARG1, ARG2) \
-__extension__ \
-({                          \
-  uint32_t __RES, __ARG1 = (ARG1); \
-  __ASM volatile ("usat %0, %1, %2" : "=r" (__RES) :  "I" (ARG2), "r" (__ARG1) : "cc" ); \
-  __RES; \
- })
+__STATIC_FORCEINLINE void __TZ_set_SP_NS(uint32_t topOfStack)
+{
+  __ASM volatile ("MSR sp_ns, %0" : : "r" (topOfStack) : );
+}
+#endif
 
 
 /**
-  \brief   Rotate Right with Extend (32 bit)
-  \details Moves each bit of a bitstring right by one bit.
-           The carry input is shifted in at the left end of the bitstring.
-  \param [in]    value  Value to rotate
-  \return               Rotated value
+  \brief   Get Priority Mask
+  \details Returns the current state of the priority mask bit from the Priority Mask Register.
+  \return               Priority Mask value
  */
-__STATIC_FORCEINLINE uint32_t __RRX(uint32_t value)
+__STATIC_FORCEINLINE uint32_t __get_PRIMASK(void)
 {
   uint32_t result;
 
-  __ASM volatile ("rrx %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) );
+  __ASM volatile ("MRS %0, primask" : "=r" (result) );
   return(result);
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   LDRT Unprivileged (8 bit)
-  \details Executes a Unprivileged LDRT instruction for 8 bit value.
-  \param [in]    ptr  Pointer to data
-  \return             value of type uint8_t at (*ptr)
+  \brief   Get Priority Mask (non-secure)
+  \details Returns the current state of the non-secure priority mask bit from the Priority Mask Register when in secure state.
+  \return               Priority Mask value
  */
-__STATIC_FORCEINLINE uint8_t __LDRBT(volatile uint8_t *ptr)
+__STATIC_FORCEINLINE uint32_t __TZ_get_PRIMASK_NS(void)
 {
-    uint32_t result;
+  uint32_t result;
 
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
-   __ASM volatile ("ldrbt %0, %1" : "=r" (result) : "Q" (*ptr) );
-#else
-    /* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not
-       accepted by assembler. So has to use following less efficient pattern.
-    */
-   __ASM volatile ("ldrbt %0, [%1]" : "=r" (result) : "r" (ptr) : "memory" );
-#endif
-   return ((uint8_t) result);    /* Add explicit type cast here */
+  __ASM volatile ("MRS %0, primask_ns" : "=r" (result) );
+  return(result);
 }
+#endif
 
 
 /**
-  \brief   LDRT Unprivileged (16 bit)
-  \details Executes a Unprivileged LDRT instruction for 16 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint16_t at (*ptr)
+  \brief   Set Priority Mask
+  \details Assigns the given value to the Priority Mask Register.
+  \param [in]    priMask  Priority Mask
  */
-__STATIC_FORCEINLINE uint16_t __LDRHT(volatile uint16_t *ptr)
+__STATIC_FORCEINLINE void __set_PRIMASK(uint32_t priMask)
 {
-    uint32_t result;
-
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
-   __ASM volatile ("ldrht %0, %1" : "=r" (result) : "Q" (*ptr) );
-#else
-    /* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not
-       accepted by assembler. So has to use following less efficient pattern.
-    */
-   __ASM volatile ("ldrht %0, [%1]" : "=r" (result) : "r" (ptr) : "memory" );
-#endif
-   return ((uint16_t) result);    /* Add explicit type cast here */
+  __ASM volatile ("MSR primask, %0" : : "r" (priMask) : "memory");
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   LDRT Unprivileged (32 bit)
-  \details Executes a Unprivileged LDRT instruction for 32 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint32_t at (*ptr)
+  \brief   Set Priority Mask (non-secure)
+  \details Assigns the given value to the non-secure Priority Mask Register when in secure state.
+  \param [in]    priMask  Priority Mask
  */
-__STATIC_FORCEINLINE uint32_t __LDRT(volatile uint32_t *ptr)
+__STATIC_FORCEINLINE void __TZ_set_PRIMASK_NS(uint32_t priMask)
 {
-    uint32_t result;
-
-   __ASM volatile ("ldrt %0, %1" : "=r" (result) : "Q" (*ptr) );
-   return(result);
+  __ASM volatile ("MSR primask_ns, %0" : : "r" (priMask) : "memory");
 }
+#endif
 
 
+#if ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
+     (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
+     (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))    )
 /**
-  \brief   STRT Unprivileged (8 bit)
-  \details Executes a Unprivileged STRT instruction for 8 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Enable FIQ
+  \details Enables FIQ interrupts by clearing special-purpose register FAULTMASK.
+           Can only be executed in Privileged modes.
  */
-__STATIC_FORCEINLINE void __STRBT(uint8_t value, volatile uint8_t *ptr)
+__STATIC_FORCEINLINE void __enable_fault_irq(void)
 {
-   __ASM volatile ("strbt %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) );
+  __ASM volatile ("cpsie f" : : : "memory");
 }
 
 
 /**
-  \brief   STRT Unprivileged (16 bit)
-  \details Executes a Unprivileged STRT instruction for 16 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Disable FIQ
+  \details Disables FIQ interrupts by setting special-purpose register FAULTMASK.
+           Can only be executed in Privileged modes.
  */
-__STATIC_FORCEINLINE void __STRHT(uint16_t value, volatile uint16_t *ptr)
+__STATIC_FORCEINLINE void __disable_fault_irq(void)
 {
-   __ASM volatile ("strht %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) );
+  __ASM volatile ("cpsid f" : : : "memory");
 }
 
 
 /**
-  \brief   STRT Unprivileged (32 bit)
-  \details Executes a Unprivileged STRT instruction for 32 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Get Base Priority
+  \details Returns the current value of the Base Priority register.
+  \return               Base Priority register value
  */
-__STATIC_FORCEINLINE void __STRT(uint32_t value, volatile uint32_t *ptr)
+__STATIC_FORCEINLINE uint32_t __get_BASEPRI(void)
 {
-   __ASM volatile ("strt %1, %0" : "=Q" (*ptr) : "r" (value) );
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, basepri" : "=r" (result) );
+  return(result);
 }
 
-#else  /* ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
-           (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
-           (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))    ) */
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Signed Saturate
-  \details Saturates a signed value.
-  \param [in]  value  Value to be saturated
-  \param [in]    sat  Bit position to saturate to (1..32)
-  \return             Saturated value
+  \brief   Get Base Priority (non-secure)
+  \details Returns the current value of the non-secure Base Priority register when in secure state.
+  \return               Base Priority register value
  */
-__STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
+__STATIC_FORCEINLINE uint32_t __TZ_get_BASEPRI_NS(void)
 {
-  if ((sat >= 1U) && (sat <= 32U))
-  {
-    const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
-    const int32_t min = -1 - max ;
-    if (val > max)
-    {
-      return max;
-    }
-    else if (val < min)
-    {
-      return min;
-    }
-  }
-  return val;
+  uint32_t result;
+
+  __ASM volatile ("MRS %0, basepri_ns" : "=r" (result) );
+  return(result);
 }
+#endif
+
 
 /**
-  \brief   Unsigned Saturate
-  \details Saturates an unsigned value.
-  \param [in]  value  Value to be saturated
-  \param [in]    sat  Bit position to saturate to (0..31)
-  \return             Saturated value
+  \brief   Set Base Priority
+  \details Assigns the given value to the Base Priority register.
+  \param [in]    basePri  Base Priority value to set
  */
-__STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
+__STATIC_FORCEINLINE void __set_BASEPRI(uint32_t basePri)
 {
-  if (sat <= 31U)
-  {
-    const uint32_t max = ((1U << sat) - 1U);
-    if (val > (int32_t)max)
-    {
-      return max;
-    }
-    else if (val < 0)
-    {
-      return 0U;
-    }
-  }
-  return (uint32_t)val;
+  __ASM volatile ("MSR basepri, %0" : : "r" (basePri) : "memory");
 }
 
-#endif /* ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
-           (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
-           (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))    ) */
-
 
-#if ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
-     (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    )
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Load-Acquire (8 bit)
-  \details Executes a LDAB instruction for 8 bit value.
-  \param [in]    ptr  Pointer to data
-  \return             value of type uint8_t at (*ptr)
+  \brief   Set Base Priority (non-secure)
+  \details Assigns the given value to the non-secure Base Priority register when in secure state.
+  \param [in]    basePri  Base Priority value to set
  */
-__STATIC_FORCEINLINE uint8_t __LDAB(volatile uint8_t *ptr)
+__STATIC_FORCEINLINE void __TZ_set_BASEPRI_NS(uint32_t basePri)
 {
-    uint32_t result;
+  __ASM volatile ("MSR basepri_ns, %0" : : "r" (basePri) : "memory");
+}
+#endif
 
-   __ASM volatile ("ldab %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
-   return ((uint8_t) result);
+
+/**
+  \brief   Set Base Priority with condition
+  \details Assigns the given value to the Base Priority register only if BASEPRI masking is disabled,
+           or the new value increases the BASEPRI priority level.
+  \param [in]    basePri  Base Priority value to set
+ */
+__STATIC_FORCEINLINE void __set_BASEPRI_MAX(uint32_t basePri)
+{
+  __ASM volatile ("MSR basepri_max, %0" : : "r" (basePri) : "memory");
 }
 
 
 /**
-  \brief   Load-Acquire (16 bit)
-  \details Executes a LDAH instruction for 16 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint16_t at (*ptr)
+  \brief   Get Fault Mask
+  \details Returns the current value of the Fault Mask register.
+  \return               Fault Mask register value
  */
-__STATIC_FORCEINLINE uint16_t __LDAH(volatile uint16_t *ptr)
+__STATIC_FORCEINLINE uint32_t __get_FAULTMASK(void)
 {
-    uint32_t result;
+  uint32_t result;
 
-   __ASM volatile ("ldah %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
-   return ((uint16_t) result);
+  __ASM volatile ("MRS %0, faultmask" : "=r" (result) );
+  return(result);
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Load-Acquire (32 bit)
-  \details Executes a LDA instruction for 32 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint32_t at (*ptr)
+  \brief   Get Fault Mask (non-secure)
+  \details Returns the current value of the non-secure Fault Mask register when in secure state.
+  \return               Fault Mask register value
  */
-__STATIC_FORCEINLINE uint32_t __LDA(volatile uint32_t *ptr)
+__STATIC_FORCEINLINE uint32_t __TZ_get_FAULTMASK_NS(void)
 {
-    uint32_t result;
+  uint32_t result;
 
-   __ASM volatile ("lda %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
-   return(result);
+  __ASM volatile ("MRS %0, faultmask_ns" : "=r" (result) );
+  return(result);
 }
+#endif
 
 
 /**
-  \brief   Store-Release (8 bit)
-  \details Executes a STLB instruction for 8 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Set Fault Mask
+  \details Assigns the given value to the Fault Mask register.
+  \param [in]    faultMask  Fault Mask value to set
  */
-__STATIC_FORCEINLINE void __STLB(uint8_t value, volatile uint8_t *ptr)
+__STATIC_FORCEINLINE void __set_FAULTMASK(uint32_t faultMask)
 {
-   __ASM volatile ("stlb %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
+  __ASM volatile ("MSR faultmask, %0" : : "r" (faultMask) : "memory");
 }
 
 
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Store-Release (16 bit)
-  \details Executes a STLH instruction for 16 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Set Fault Mask (non-secure)
+  \details Assigns the given value to the non-secure Fault Mask register when in secure state.
+  \param [in]    faultMask  Fault Mask value to set
  */
-__STATIC_FORCEINLINE void __STLH(uint16_t value, volatile uint16_t *ptr)
+__STATIC_FORCEINLINE void __TZ_set_FAULTMASK_NS(uint32_t faultMask)
 {
-   __ASM volatile ("stlh %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
+  __ASM volatile ("MSR faultmask_ns, %0" : : "r" (faultMask) : "memory");
 }
+#endif
+
+#endif /* ((defined (__ARM_ARCH_7M__      ) && (__ARM_ARCH_7M__      == 1)) || \
+           (defined (__ARM_ARCH_7EM__     ) && (__ARM_ARCH_7EM__     == 1)) || \
+           (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))    ) */
+
 
+#if ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
+     (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    )
 
 /**
-  \brief   Store-Release (32 bit)
-  \details Executes a STL instruction for 32 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
+  \brief   Get Process Stack Pointer Limit
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence zero is returned always in non-secure
+  mode.
+
+  \details Returns the current value of the Process Stack Pointer Limit (PSPLIM).
+  \return               PSPLIM Register value
  */
-__STATIC_FORCEINLINE void __STL(uint32_t value, volatile uint32_t *ptr)
+__STATIC_FORCEINLINE uint32_t __get_PSPLIM(void)
 {
-   __ASM volatile ("stl %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
+    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
+    // without main extensions, the non-secure PSPLIM is RAZ/WI
+  return 0U;
+#else
+  uint32_t result;
+  __ASM volatile ("MRS %0, psplim"  : "=r" (result) );
+  return result;
+#endif
 }
 
-
+#if (defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3))
 /**
-  \brief   Load-Acquire Exclusive (8 bit)
-  \details Executes a LDAB exclusive instruction for 8 bit value.
-  \param [in]    ptr  Pointer to data
-  \return             value of type uint8_t at (*ptr)
+  \brief   Get Process Stack Pointer Limit (non-secure)
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence zero is returned always.
+
+  \details Returns the current value of the non-secure Process Stack Pointer Limit (PSPLIM) when in secure state.
+  \return               PSPLIM Register value
  */
-__STATIC_FORCEINLINE uint8_t __LDAEXB(volatile uint8_t *ptr)
+__STATIC_FORCEINLINE uint32_t __TZ_get_PSPLIM_NS(void)
 {
-    uint32_t result;
-
-   __ASM volatile ("ldaexb %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
-   return ((uint8_t) result);
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
+  // without main extensions, the non-secure PSPLIM is RAZ/WI
+  return 0U;
+#else
+  uint32_t result;
+  __ASM volatile ("MRS %0, psplim_ns"  : "=r" (result) );
+  return result;
+#endif
 }
+#endif
 
 
 /**
-  \brief   Load-Acquire Exclusive (16 bit)
-  \details Executes a LDAH exclusive instruction for 16 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint16_t at (*ptr)
+  \brief   Set Process Stack Pointer Limit
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence the write is silently ignored in non-secure
+  mode.
+
+  \details Assigns the given value to the Process Stack Pointer Limit (PSPLIM).
+  \param [in]    ProcStackPtrLimit  Process Stack Pointer Limit value to set
  */
-__STATIC_FORCEINLINE uint16_t __LDAEXH(volatile uint16_t *ptr)
+__STATIC_FORCEINLINE void __set_PSPLIM(uint32_t ProcStackPtrLimit)
 {
-    uint32_t result;
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
+    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
+  // without main extensions, the non-secure PSPLIM is RAZ/WI
+  (void)ProcStackPtrLimit;
+#else
+  __ASM volatile ("MSR psplim, %0" : : "r" (ProcStackPtrLimit));
+#endif
+}
 
-   __ASM volatile ("ldaexh %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
-   return ((uint16_t) result);
+
+#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
+/**
+  \brief   Set Process Stack Pointer (non-secure)
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence the write is silently ignored.
+
+  \details Assigns the given value to the non-secure Process Stack Pointer Limit (PSPLIM) when in secure state.
+  \param [in]    ProcStackPtrLimit  Process Stack Pointer Limit value to set
+ */
+__STATIC_FORCEINLINE void __TZ_set_PSPLIM_NS(uint32_t ProcStackPtrLimit)
+{
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
+  // without main extensions, the non-secure PSPLIM is RAZ/WI
+  (void)ProcStackPtrLimit;
+#else
+  __ASM volatile ("MSR psplim_ns, %0\n" : : "r" (ProcStackPtrLimit));
+#endif
 }
+#endif
 
 
 /**
-  \brief   Load-Acquire Exclusive (32 bit)
-  \details Executes a LDA exclusive instruction for 32 bit values.
-  \param [in]    ptr  Pointer to data
-  \return        value of type uint32_t at (*ptr)
+  \brief   Get Main Stack Pointer Limit
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence zero is returned always in non-secure
+  mode.
+
+  \details Returns the current value of the Main Stack Pointer Limit (MSPLIM).
+  \return               MSPLIM Register value
  */
-__STATIC_FORCEINLINE uint32_t __LDAEX(volatile uint32_t *ptr)
+__STATIC_FORCEINLINE uint32_t __get_MSPLIM(void)
 {
-    uint32_t result;
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
+    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
+  // without main extensions, the non-secure MSPLIM is RAZ/WI
+  return 0U;
+#else
+  uint32_t result;
+  __ASM volatile ("MRS %0, msplim" : "=r" (result) );
+  return result;
+#endif
+}
 
-   __ASM volatile ("ldaex %0, %1" : "=r" (result) : "Q" (*ptr) : "memory" );
-   return(result);
+
+#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
+/**
+  \brief   Get Main Stack Pointer Limit (non-secure)
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence zero is returned always.
+
+  \details Returns the current value of the non-secure Main Stack Pointer Limit(MSPLIM) when in secure state.
+  \return               MSPLIM Register value
+ */
+__STATIC_FORCEINLINE uint32_t __TZ_get_MSPLIM_NS(void)
+{
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
+  // without main extensions, the non-secure MSPLIM is RAZ/WI
+  return 0U;
+#else
+  uint32_t result;
+  __ASM volatile ("MRS %0, msplim_ns" : "=r" (result) );
+  return result;
+#endif
 }
+#endif
 
 
 /**
-  \brief   Store-Release Exclusive (8 bit)
-  \details Executes a STLB exclusive instruction for 8 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Set Main Stack Pointer Limit
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence the write is silently ignored in non-secure
+  mode.
+
+  \details Assigns the given value to the Main Stack Pointer Limit (MSPLIM).
+  \param [in]    MainStackPtrLimit  Main Stack Pointer Limit value to set
  */
-__STATIC_FORCEINLINE uint32_t __STLEXB(uint8_t value, volatile uint8_t *ptr)
+__STATIC_FORCEINLINE void __set_MSPLIM(uint32_t MainStackPtrLimit)
 {
-   uint32_t result;
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \
+    (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3)))
+  // without main extensions, the non-secure MSPLIM is RAZ/WI
+  (void)MainStackPtrLimit;
+#else
+  __ASM volatile ("MSR msplim, %0" : : "r" (MainStackPtrLimit));
+#endif
+}
 
-   __ASM volatile ("stlexb %0, %2, %1" : "=&r" (result), "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
-   return(result);
+
+#if (defined (__ARM_FEATURE_CMSE  ) && (__ARM_FEATURE_CMSE   == 3))
+/**
+  \brief   Set Main Stack Pointer Limit (non-secure)
+  Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure
+  Stack Pointer Limit register hence the write is silently ignored.
+
+  \details Assigns the given value to the non-secure Main Stack Pointer Limit (MSPLIM) when in secure state.
+  \param [in]    MainStackPtrLimit  Main Stack Pointer value to set
+ */
+__STATIC_FORCEINLINE void __TZ_set_MSPLIM_NS(uint32_t MainStackPtrLimit)
+{
+#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)))
+  // without main extensions, the non-secure MSPLIM is RAZ/WI
+  (void)MainStackPtrLimit;
+#else
+  __ASM volatile ("MSR msplim_ns, %0" : : "r" (MainStackPtrLimit));
+#endif
 }
+#endif
+
+#endif /* ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
+           (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    ) */
 
 
 /**
-  \brief   Store-Release Exclusive (16 bit)
-  \details Executes a STLH exclusive instruction for 16 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Get FPSCR
+  \details Returns the current value of the Floating Point Status/Control register.
+  \return               Floating Point Status/Control register value
  */
-__STATIC_FORCEINLINE uint32_t __STLEXH(uint16_t value, volatile uint16_t *ptr)
+__STATIC_FORCEINLINE uint32_t __get_FPSCR(void)
 {
-   uint32_t result;
+#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
+     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
+#if __has_builtin(__builtin_arm_get_fpscr)
+// Re-enable using built-in when GCC has been fixed
+// || (__GNUC__ > 7) || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2)
+  /* see https://gcc.gnu.org/ml/gcc-patches/2017-04/msg00443.html */
+  return __builtin_arm_get_fpscr();
+#else
+  uint32_t result;
 
-   __ASM volatile ("stlexh %0, %2, %1" : "=&r" (result), "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
-   return(result);
+  __ASM volatile ("VMRS %0, fpscr" : "=r" (result) );
+  return(result);
+#endif
+#else
+  return(0U);
+#endif
 }
 
 
 /**
-  \brief   Store-Release Exclusive (32 bit)
-  \details Executes a STL exclusive instruction for 32 bit values.
-  \param [in]  value  Value to store
-  \param [in]    ptr  Pointer to location
-  \return          0  Function succeeded
-  \return          1  Function failed
+  \brief   Set FPSCR
+  \details Assigns the given value to the Floating Point Status/Control register.
+  \param [in]    fpscr  Floating Point Status/Control value to set
  */
-__STATIC_FORCEINLINE uint32_t __STLEX(uint32_t value, volatile uint32_t *ptr)
+__STATIC_FORCEINLINE void __set_FPSCR(uint32_t fpscr)
 {
-   uint32_t result;
-
-   __ASM volatile ("stlex %0, %2, %1" : "=&r" (result), "=Q" (*ptr) : "r" ((uint32_t)value) : "memory" );
-   return(result);
+#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
+     (defined (__FPU_USED   ) && (__FPU_USED    == 1U))     )
+#if __has_builtin(__builtin_arm_set_fpscr)
+// Re-enable using built-in when GCC has been fixed
+// || (__GNUC__ > 7) || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2)
+  /* see https://gcc.gnu.org/ml/gcc-patches/2017-04/msg00443.html */
+  __builtin_arm_set_fpscr(fpscr);
+#else
+  __ASM volatile ("VMSR fpscr, %0" : : "r" (fpscr) : "vfpcc", "memory");
+#endif
+#else
+  (void)fpscr;
+#endif
 }
 
-#endif /* ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \
-           (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1))    ) */
 
-/*@}*/ /* end of group CMSIS_Core_InstructionInterface */
+/*@} end of CMSIS_Core_RegAccFunctions */
 
 
 /* ###################  Compiler specific Intrinsics  ########################### */
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/cmsis_iccarm.h b/edge-impulse-sdk/CMSIS/Core/Include/cmsis_iccarm.h
index 45e90af..65b824b 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/cmsis_iccarm.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/cmsis_iccarm.h
@@ -1,14 +1,14 @@
 /**************************************************************************//**
  * @file     cmsis_iccarm.h
  * @brief    CMSIS compiler ICCARM (IAR Compiler for Arm) header file
- * @version  V5.2.0
- * @date     28. January 2020
+ * @version  V5.3.0
+ * @date     14. April 2021
  ******************************************************************************/
 
 //------------------------------------------------------------------------------
 //
-// Copyright (c) 2017-2020 IAR Systems
-// Copyright (c) 2017-2019 Arm Limited. All rights reserved.
+// Copyright (c) 2017-2021 IAR Systems
+// Copyright (c) 2017-2021 Arm Limited. All rights reserved.
 //
 // SPDX-License-Identifier: Apache-2.0
 //
@@ -267,6 +267,24 @@ __packed struct  __iar_u32 { uint32_t v; };
 #define __VECTOR_TABLE_ATTRIBUTE  @".intvec"
 #endif
 
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+#ifndef __STACK_SEAL
+#define __STACK_SEAL              STACKSEAL$$Base
+#endif
+
+#ifndef __TZ_STACK_SEAL_SIZE
+#define __TZ_STACK_SEAL_SIZE      8U
+#endif
+
+#ifndef __TZ_STACK_SEAL_VALUE
+#define __TZ_STACK_SEAL_VALUE     0xFEF5EDA5FEF5EDA5ULL
+#endif
+
+__STATIC_FORCEINLINE void __TZ_set_STACKSEAL_S (uint32_t* stackTop) {
+  *((uint64_t *)stackTop) = __TZ_STACK_SEAL_VALUE;
+}
+#endif
+
 #ifndef __ICCARM_INTRINSICS_VERSION__
   #define __ICCARM_INTRINSICS_VERSION__  0
 #endif
@@ -337,7 +355,13 @@ __packed struct  __iar_u32 { uint32_t v; };
 
   #define __set_BASEPRI(VALUE)        (__arm_wsr("BASEPRI", (VALUE)))
   #define __set_BASEPRI_MAX(VALUE)    (__arm_wsr("BASEPRI_MAX", (VALUE)))
-  #define __set_CONTROL(VALUE)        (__arm_wsr("CONTROL", (VALUE)))
+
+__STATIC_FORCEINLINE void __set_CONTROL(uint32_t control)
+{
+  __arm_wsr("CONTROL", control);
+  __iar_builtin_ISB();
+}
+
   #define __set_FAULTMASK(VALUE)      (__arm_wsr("FAULTMASK", (VALUE)))
   #define __set_MSP(VALUE)            (__arm_wsr("MSP", (VALUE)))
 
@@ -359,7 +383,13 @@ __packed struct  __iar_u32 { uint32_t v; };
   #endif
 
   #define __TZ_get_CONTROL_NS()       (__arm_rsr("CONTROL_NS"))
-  #define __TZ_set_CONTROL_NS(VALUE)  (__arm_wsr("CONTROL_NS", (VALUE)))
+
+__STATIC_FORCEINLINE void __TZ_set_CONTROL_NS(uint32_t control)
+{
+  __arm_wsr("CONTROL_NS", control);
+  __iar_builtin_ISB();
+}
+
   #define __TZ_get_PSP_NS()           (__arm_rsr("PSP_NS"))
   #define __TZ_set_PSP_NS(VALUE)      (__arm_wsr("PSP_NS", (VALUE)))
   #define __TZ_get_MSP_NS()           (__arm_rsr("MSP_NS"))
@@ -681,6 +711,7 @@ __packed struct  __iar_u32 { uint32_t v; };
     __IAR_FT void   __TZ_set_CONTROL_NS(uint32_t value)
     {
       __asm volatile("MSR      CONTROL_NS,%0" :: "r" (value));
+      __iar_builtin_ISB();
     }
 
     __IAR_FT uint32_t   __TZ_get_PSP_NS(void)
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/cmsis_version.h b/edge-impulse-sdk/CMSIS/Core/Include/cmsis_version.h
index 2f048e4..8b4765f 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/cmsis_version.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/cmsis_version.h
@@ -1,11 +1,11 @@
 /**************************************************************************//**
  * @file     cmsis_version.h
  * @brief    CMSIS Core(M) Version definitions
- * @version  V5.0.4
- * @date     23. July 2019
+ * @version  V5.0.5
+ * @date     02. February 2022
  ******************************************************************************/
 /*
- * Copyright (c) 2009-2019 ARM Limited. All rights reserved.
+ * Copyright (c) 2009-2022 ARM Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -33,7 +33,7 @@
 
 /*  CMSIS Version definitions */
 #define __CM_CMSIS_VERSION_MAIN  ( 5U)                                      /*!< [31:16] CMSIS Core(M) main version */
-#define __CM_CMSIS_VERSION_SUB   ( 4U)                                      /*!< [15:0]  CMSIS Core(M) sub version */
+#define __CM_CMSIS_VERSION_SUB   ( 6U)                                      /*!< [15:0]  CMSIS Core(M) sub version */
 #define __CM_CMSIS_VERSION       ((__CM_CMSIS_VERSION_MAIN << 16U) | \
                                    __CM_CMSIS_VERSION_SUB           )       /*!< CMSIS Core(M) version number */
 #endif
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/core_armv81mml.h b/edge-impulse-sdk/CMSIS/Core/Include/core_armv81mml.h
index 18bcb04..fa1afb8 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/core_armv81mml.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/core_armv81mml.h
@@ -1,11 +1,11 @@
 /**************************************************************************//**
  * @file     core_armv81mml.h
  * @brief    CMSIS Armv8.1-M Mainline Core Peripheral Access Layer Header File
- * @version  V1.4.0
- * @date     15. April 2020
+ * @version  V1.4.2
+ * @date     13. October 2021
  ******************************************************************************/
 /*
- * Copyright (c) 2018-2020 Arm Limited. All rights reserved.
+ * Copyright (c) 2018-2021 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -210,14 +210,14 @@
     #define __FPU_PRESENT             0U
     #warning "__FPU_PRESENT not defined in device header file; using default!"
   #endif
-  
+
   #if __FPU_PRESENT != 0U
     #ifndef __FPU_DP
       #define __FPU_DP             0U
       #warning "__FPU_DP not defined in device header file; using default!"
     #endif
   #endif
-  
+
   #ifndef __MPU_PRESENT
     #define __MPU_PRESENT             0U
     #warning "__MPU_PRESENT not defined in device header file; using default!"
@@ -232,7 +232,7 @@
     #define __DCACHE_PRESENT          0U
     #warning "__DCACHE_PRESENT not defined in device header file; using default!"
   #endif
-  
+
   #ifndef __PMU_PRESENT
     #define __PMU_PRESENT             0U
     #warning "__PMU_PRESENT not defined in device header file; using default!"
@@ -261,7 +261,7 @@
     #define __VTOR_PRESENT             1U
     #warning "__VTOR_PRESENT not defined in device header file; using default!"
   #endif
-  
+
   #ifndef __NVIC_PRIO_BITS
     #define __NVIC_PRIO_BITS          3U
     #warning "__NVIC_PRIO_BITS not defined in device header file; using default!"
@@ -526,7 +526,7 @@ typedef struct
   __IOM uint32_t AFSR;                   /*!< Offset: 0x03C (R/W)  Auxiliary Fault Status Register */
   __IM  uint32_t ID_PFR[2U];             /*!< Offset: 0x040 (R/ )  Processor Feature Register */
   __IM  uint32_t ID_DFR;                 /*!< Offset: 0x048 (R/ )  Debug Feature Register */
-  __IM  uint32_t ID_ADR;                 /*!< Offset: 0x04C (R/ )  Auxiliary Feature Register */
+  __IM  uint32_t ID_AFR;                 /*!< Offset: 0x04C (R/ )  Auxiliary Feature Register */
   __IM  uint32_t ID_MMFR[4U];            /*!< Offset: 0x050 (R/ )  Memory Model Feature Register */
   __IM  uint32_t ID_ISAR[6U];            /*!< Offset: 0x060 (R/ )  Instruction Set Attributes Register */
   __IM  uint32_t CLIDR;                  /*!< Offset: 0x078 (R/ )  Cache Level ID register */
@@ -535,7 +535,10 @@ typedef struct
   __IOM uint32_t CSSELR;                 /*!< Offset: 0x084 (R/W)  Cache Size Selection Register */
   __IOM uint32_t CPACR;                  /*!< Offset: 0x088 (R/W)  Coprocessor Access Control Register */
   __IOM uint32_t NSACR;                  /*!< Offset: 0x08C (R/W)  Non-Secure Access Control Register */
-        uint32_t RESERVED3[92U];
+        uint32_t RESERVED7[21U];
+  __IOM uint32_t SFSR;                   /*!< Offset: 0x0E4 (R/W)  Secure Fault Status Register */
+  __IOM uint32_t SFAR;                   /*!< Offset: 0x0E8 (R/W)  Secure Fault Address Register */
+        uint32_t RESERVED3[69U];
   __OM  uint32_t STIR;                   /*!< Offset: 0x200 ( /W)  Software Triggered Interrupt Register */
   __IOM uint32_t RFSR;                   /*!< Offset: 0x204 (R/W)  RAS Fault Status Register */
         uint32_t RESERVED4[14U];
@@ -766,22 +769,22 @@ typedef struct
 #define SCB_CFSR_MEMFAULTSR_Msk            (0xFFUL /*<< SCB_CFSR_MEMFAULTSR_Pos*/)        /*!< SCB CFSR: Memory Manage Fault Status Register Mask */
 
 /* MemManage Fault Status Register (part of SCB Configurable Fault Status Register) */
-#define SCB_CFSR_MMARVALID_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 7U)               /*!< SCB CFSR (MMFSR): MMARVALID Position */
+#define SCB_CFSR_MMARVALID_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 7U)                 /*!< SCB CFSR (MMFSR): MMARVALID Position */
 #define SCB_CFSR_MMARVALID_Msk             (1UL << SCB_CFSR_MMARVALID_Pos)                /*!< SCB CFSR (MMFSR): MMARVALID Mask */
 
-#define SCB_CFSR_MLSPERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 5U)               /*!< SCB CFSR (MMFSR): MLSPERR Position */
+#define SCB_CFSR_MLSPERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 5U)                 /*!< SCB CFSR (MMFSR): MLSPERR Position */
 #define SCB_CFSR_MLSPERR_Msk               (1UL << SCB_CFSR_MLSPERR_Pos)                  /*!< SCB CFSR (MMFSR): MLSPERR Mask */
 
-#define SCB_CFSR_MSTKERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 4U)               /*!< SCB CFSR (MMFSR): MSTKERR Position */
+#define SCB_CFSR_MSTKERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 4U)                 /*!< SCB CFSR (MMFSR): MSTKERR Position */
 #define SCB_CFSR_MSTKERR_Msk               (1UL << SCB_CFSR_MSTKERR_Pos)                  /*!< SCB CFSR (MMFSR): MSTKERR Mask */
 
-#define SCB_CFSR_MUNSTKERR_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 3U)               /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
+#define SCB_CFSR_MUNSTKERR_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 3U)                 /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
 #define SCB_CFSR_MUNSTKERR_Msk             (1UL << SCB_CFSR_MUNSTKERR_Pos)                /*!< SCB CFSR (MMFSR): MUNSTKERR Mask */
 
-#define SCB_CFSR_DACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 1U)               /*!< SCB CFSR (MMFSR): DACCVIOL Position */
+#define SCB_CFSR_DACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 1U)                 /*!< SCB CFSR (MMFSR): DACCVIOL Position */
 #define SCB_CFSR_DACCVIOL_Msk              (1UL << SCB_CFSR_DACCVIOL_Pos)                 /*!< SCB CFSR (MMFSR): DACCVIOL Mask */
 
-#define SCB_CFSR_IACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 0U)               /*!< SCB CFSR (MMFSR): IACCVIOL Position */
+#define SCB_CFSR_IACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 0U)                 /*!< SCB CFSR (MMFSR): IACCVIOL Position */
 #define SCB_CFSR_IACCVIOL_Msk              (1UL /*<< SCB_CFSR_IACCVIOL_Pos*/)             /*!< SCB CFSR (MMFSR): IACCVIOL Mask */
 
 /* BusFault Status Register (part of SCB Configurable Fault Status Register) */
@@ -1490,15 +1493,14 @@ typedef struct
         uint32_t RESERVED11[108];
   __IOM uint32_t AUTHSTATUS;                        /*!< Offset: 0xFB8 (R/W)  PMU Authentication Status Register */
   __IOM uint32_t DEVARCH;                           /*!< Offset: 0xFBC (R/W)  PMU Device Architecture Register */
-        uint32_t RESERVED12[4];
+        uint32_t RESERVED12[3];
   __IOM uint32_t DEVTYPE;                           /*!< Offset: 0xFCC (R/W)  PMU Device Type Register */
   __IOM uint32_t PIDR4;                             /*!< Offset: 0xFD0 (R/W)  PMU Peripheral Identification Register 4 */
         uint32_t RESERVED13[3];
   __IOM uint32_t PIDR0;                             /*!< Offset: 0xFE0 (R/W)  PMU Peripheral Identification Register 0 */
-  __IOM uint32_t PIDR1;                             /*!< Offset: 0xFE0 (R/W)  PMU Peripheral Identification Register 1 */
-  __IOM uint32_t PIDR2;                             /*!< Offset: 0xFE0 (R/W)  PMU Peripheral Identification Register 2 */
-  __IOM uint32_t PIDR3;                             /*!< Offset: 0xFE0 (R/W)  PMU Peripheral Identification Register 3 */
-        uint32_t RESERVED14[3];
+  __IOM uint32_t PIDR1;                             /*!< Offset: 0xFE4 (R/W)  PMU Peripheral Identification Register 1 */
+  __IOM uint32_t PIDR2;                             /*!< Offset: 0xFE8 (R/W)  PMU Peripheral Identification Register 2 */
+  __IOM uint32_t PIDR3;                             /*!< Offset: 0xFEC (R/W)  PMU Peripheral Identification Register 3 */
   __IOM uint32_t CIDR0;                             /*!< Offset: 0xFF0 (R/W)  PMU Component Identification Register 0 */
   __IOM uint32_t CIDR1;                             /*!< Offset: 0xFF4 (R/W)  PMU Component Identification Register 1 */
   __IOM uint32_t CIDR2;                             /*!< Offset: 0xFF8 (R/W)  PMU Component Identification Register 2 */
@@ -3158,6 +3160,15 @@ typedef struct
 /*@} */
 
 
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup   CMSIS_register_aliases     Backwards Compatibility Aliases
+  \brief      Register alias definitions for backwards compatibility.
+  @{
+ */
+#define ID_ADR  (ID_AFR)    /*!< SCB Auxiliary Feature Register */
+/*@} */
+
 
 /*******************************************************************************
  *                Hardware Abstraction Layer
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/core_armv8mml.h b/edge-impulse-sdk/CMSIS/Core/Include/core_armv8mml.h
index 0632732..ede72ec 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/core_armv8mml.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/core_armv8mml.h
@@ -1,11 +1,11 @@
 /**************************************************************************//**
  * @file     core_armv8mml.h
  * @brief    CMSIS Armv8-M Mainline Core Peripheral Access Layer Header File
- * @version  V5.2.1
- * @date     19. August 2020
+ * @version  V5.2.4
+ * @date     30. May 2022
  ******************************************************************************/
 /*
- * Copyright (c) 2009-2020 Arm Limited. All rights reserved.
+ * Copyright (c) 2009-2022 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -254,7 +254,7 @@
     #define __VTOR_PRESENT             1U
     #warning "__VTOR_PRESENT not defined in device header file; using default!"
   #endif
-  
+
   #ifndef __NVIC_PRIO_BITS
     #define __NVIC_PRIO_BITS          3U
     #warning "__NVIC_PRIO_BITS not defined in device header file; using default!"
@@ -287,7 +287,7 @@
 #define     __OM     volatile            /*! Defines 'write only' structure member permissions */
 #define     __IOM    volatile            /*! Defines 'read / write' structure member permissions */
 
-/*@} end of group ARMv8MML */
+/** @} end of group ARMv8MML */
 
 
 
@@ -452,7 +452,7 @@ typedef union
 #define CONTROL_nPRIV_Pos                   0U                                            /*!< CONTROL: nPRIV Position */
 #define CONTROL_nPRIV_Msk                  (1UL /*<< CONTROL_nPRIV_Pos*/)                 /*!< CONTROL: nPRIV Mask */
 
-/*@} end of group CMSIS_CORE */
+/** @} end of group CMSIS_CORE */
 
 
 /**
@@ -488,7 +488,7 @@ typedef struct
 #define NVIC_STIR_INTID_Pos                 0U                                         /*!< STIR: INTLINESNUM Position */
 #define NVIC_STIR_INTID_Msk                (0x1FFUL /*<< NVIC_STIR_INTID_Pos*/)        /*!< STIR: INTLINESNUM Mask */
 
-/*@} end of group CMSIS_NVIC */
+/** @} end of group CMSIS_NVIC */
 
 
 /**
@@ -519,7 +519,7 @@ typedef struct
   __IOM uint32_t AFSR;                   /*!< Offset: 0x03C (R/W)  Auxiliary Fault Status Register */
   __IM  uint32_t ID_PFR[2U];             /*!< Offset: 0x040 (R/ )  Processor Feature Register */
   __IM  uint32_t ID_DFR;                 /*!< Offset: 0x048 (R/ )  Debug Feature Register */
-  __IM  uint32_t ID_ADR;                 /*!< Offset: 0x04C (R/ )  Auxiliary Feature Register */
+  __IM  uint32_t ID_AFR;                 /*!< Offset: 0x04C (R/ )  Auxiliary Feature Register */
   __IM  uint32_t ID_MMFR[4U];            /*!< Offset: 0x050 (R/ )  Memory Model Feature Register */
   __IM  uint32_t ID_ISAR[6U];            /*!< Offset: 0x060 (R/ )  Instruction Set Attributes Register */
   __IM  uint32_t CLIDR;                  /*!< Offset: 0x078 (R/ )  Cache Level ID register */
@@ -528,7 +528,10 @@ typedef struct
   __IOM uint32_t CSSELR;                 /*!< Offset: 0x084 (R/W)  Cache Size Selection Register */
   __IOM uint32_t CPACR;                  /*!< Offset: 0x088 (R/W)  Coprocessor Access Control Register */
   __IOM uint32_t NSACR;                  /*!< Offset: 0x08C (R/W)  Non-Secure Access Control Register */
-        uint32_t RESERVED3[92U];
+        uint32_t RESERVED7[21U];
+  __IOM uint32_t SFSR;                   /*!< Offset: 0x0E4 (R/W)  Secure Fault Status Register */
+  __IOM uint32_t SFAR;                   /*!< Offset: 0x0E8 (R/W)  Secure Fault Address Register */
+        uint32_t RESERVED3[69U];
   __OM  uint32_t STIR;                   /*!< Offset: 0x200 ( /W)  Software Triggered Interrupt Register */
         uint32_t RESERVED4[15U];
   __IM  uint32_t MVFR0;                  /*!< Offset: 0x240 (R/ )  Media and VFP Feature Register 0 */
@@ -746,22 +749,22 @@ typedef struct
 #define SCB_CFSR_MEMFAULTSR_Msk            (0xFFUL /*<< SCB_CFSR_MEMFAULTSR_Pos*/)        /*!< SCB CFSR: Memory Manage Fault Status Register Mask */
 
 /* MemManage Fault Status Register (part of SCB Configurable Fault Status Register) */
-#define SCB_CFSR_MMARVALID_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 7U)               /*!< SCB CFSR (MMFSR): MMARVALID Position */
+#define SCB_CFSR_MMARVALID_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 7U)                 /*!< SCB CFSR (MMFSR): MMARVALID Position */
 #define SCB_CFSR_MMARVALID_Msk             (1UL << SCB_CFSR_MMARVALID_Pos)                /*!< SCB CFSR (MMFSR): MMARVALID Mask */
 
-#define SCB_CFSR_MLSPERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 5U)               /*!< SCB CFSR (MMFSR): MLSPERR Position */
+#define SCB_CFSR_MLSPERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 5U)                 /*!< SCB CFSR (MMFSR): MLSPERR Position */
 #define SCB_CFSR_MLSPERR_Msk               (1UL << SCB_CFSR_MLSPERR_Pos)                  /*!< SCB CFSR (MMFSR): MLSPERR Mask */
 
-#define SCB_CFSR_MSTKERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 4U)               /*!< SCB CFSR (MMFSR): MSTKERR Position */
+#define SCB_CFSR_MSTKERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 4U)                 /*!< SCB CFSR (MMFSR): MSTKERR Position */
 #define SCB_CFSR_MSTKERR_Msk               (1UL << SCB_CFSR_MSTKERR_Pos)                  /*!< SCB CFSR (MMFSR): MSTKERR Mask */
 
-#define SCB_CFSR_MUNSTKERR_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 3U)               /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
+#define SCB_CFSR_MUNSTKERR_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 3U)                 /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
 #define SCB_CFSR_MUNSTKERR_Msk             (1UL << SCB_CFSR_MUNSTKERR_Pos)                /*!< SCB CFSR (MMFSR): MUNSTKERR Mask */
 
-#define SCB_CFSR_DACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 1U)               /*!< SCB CFSR (MMFSR): DACCVIOL Position */
+#define SCB_CFSR_DACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 1U)                 /*!< SCB CFSR (MMFSR): DACCVIOL Position */
 #define SCB_CFSR_DACCVIOL_Msk              (1UL << SCB_CFSR_DACCVIOL_Pos)                 /*!< SCB CFSR (MMFSR): DACCVIOL Mask */
 
-#define SCB_CFSR_IACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 0U)               /*!< SCB CFSR (MMFSR): IACCVIOL Position */
+#define SCB_CFSR_IACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 0U)                 /*!< SCB CFSR (MMFSR): IACCVIOL Position */
 #define SCB_CFSR_IACCVIOL_Msk              (1UL /*<< SCB_CFSR_IACCVIOL_Pos*/)             /*!< SCB CFSR (MMFSR): IACCVIOL Mask */
 
 /* BusFault Status Register (part of SCB Configurable Fault Status Register) */
@@ -921,7 +924,7 @@ typedef struct
 #define SCB_DCCISW_SET_Pos                  5U                                            /*!< SCB DCCISW: Set Position */
 #define SCB_DCCISW_SET_Msk                 (0x1FFUL << SCB_DCCISW_SET_Pos)                /*!< SCB DCCISW: Set Mask */
 
-/*@} end of group CMSIS_SCB */
+/** @} end of group CMSIS_SCB */
 
 
 /**
@@ -946,7 +949,7 @@ typedef struct
 #define SCnSCB_ICTR_INTLINESNUM_Pos         0U                                         /*!< ICTR: INTLINESNUM Position */
 #define SCnSCB_ICTR_INTLINESNUM_Msk        (0xFUL /*<< SCnSCB_ICTR_INTLINESNUM_Pos*/)  /*!< ICTR: INTLINESNUM Mask */
 
-/*@} end of group CMSIS_SCnotSCB */
+/** @} end of group CMSIS_SCnotSCB */
 
 
 /**
@@ -998,7 +1001,7 @@ typedef struct
 #define SysTick_CALIB_TENMS_Pos             0U                                            /*!< SysTick CALIB: TENMS Position */
 #define SysTick_CALIB_TENMS_Msk            (0xFFFFFFUL /*<< SysTick_CALIB_TENMS_Pos*/)    /*!< SysTick CALIB: TENMS Mask */
 
-/*@} end of group CMSIS_SysTick */
+/** @} end of group CMSIS_SysTick */
 
 
 /**
@@ -1098,7 +1101,7 @@ typedef struct
 #define ITM_LSR_Present_Pos                 0U                                            /*!< ITM LSR: Present Position */
 #define ITM_LSR_Present_Msk                (1UL /*<< ITM_LSR_Present_Pos*/)               /*!< ITM LSR: Present Mask */
 
-/*@}*/ /* end of group CMSIS_ITM */
+/** @}*/ /* end of group CMSIS_ITM */
 
 
 /**
@@ -1284,7 +1287,7 @@ typedef struct
 #define DWT_FUNCTION_MATCH_Pos              0U                                         /*!< DWT FUNCTION: MATCH Position */
 #define DWT_FUNCTION_MATCH_Msk             (0xFUL /*<< DWT_FUNCTION_MATCH_Pos*/)       /*!< DWT FUNCTION: MATCH Mask */
 
-/*@}*/ /* end of group CMSIS_DWT */
+/** @}*/ /* end of group CMSIS_DWT */
 
 
 /**
@@ -1382,7 +1385,7 @@ typedef struct
 #define TPI_DEVTYPE_MajorType_Pos           0U                                         /*!< TPI DEVTYPE: MajorType Position */
 #define TPI_DEVTYPE_MajorType_Msk          (0xFUL << TPI_DEVTYPE_MajorType_Pos)        /*!< TPI DEVTYPE: MajorType Mask */
 
-/*@}*/ /* end of group CMSIS_TPI */
+/** @}*/ /* end of group CMSIS_TPI */
 
 
 #if defined (__MPU_PRESENT) && (__MPU_PRESENT == 1U)
@@ -1494,7 +1497,7 @@ typedef struct
 #define MPU_MAIR1_Attr4_Pos                 0U                                            /*!< MPU MAIR1: Attr4 Position */
 #define MPU_MAIR1_Attr4_Msk                (0xFFUL /*<< MPU_MAIR1_Attr4_Pos*/)            /*!< MPU MAIR1: Attr4 Mask */
 
-/*@} end of group CMSIS_MPU */
+/** @} end of group CMSIS_MPU */
 #endif
 
 
@@ -1581,7 +1584,7 @@ typedef struct
 #define SAU_SFSR_INVEP_Pos                  0U                                            /*!< SAU SFSR: INVEP Position */
 #define SAU_SFSR_INVEP_Msk                 (1UL /*<< SAU_SFSR_INVEP_Pos*/)                /*!< SAU SFSR: INVEP Mask */
 
-/*@} end of group CMSIS_SAU */
+/** @} end of group CMSIS_SAU */
 #endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
 
 
@@ -1717,7 +1720,7 @@ typedef struct
 #define FPU_MVFR2_FPMisc_Pos                4U                                            /*!< MVFR2: FPMisc bits Position */
 #define FPU_MVFR2_FPMisc_Msk               (0xFUL << FPU_MVFR2_FPMisc_Pos)                /*!< MVFR2: FPMisc bits Mask */
 
-/*@} end of group CMSIS_FPU */
+/** @} end of group CMSIS_FPU */
 
 /* CoreDebug is deprecated. replaced by DCB (Debug Control Block) */
 /**
@@ -1851,7 +1854,7 @@ typedef struct
 #define CoreDebug_DSCSR_SBRSELEN_Pos        0U                                            /*!< \deprecated CoreDebug DSCSR: SBRSELEN Position */
 #define CoreDebug_DSCSR_SBRSELEN_Msk       (1UL /*<< CoreDebug_DSCSR_SBRSELEN_Pos*/)      /*!< \deprecated CoreDebug DSCSR: SBRSELEN Mask */
 
-/*@} end of group CMSIS_CoreDebug */
+/** @} end of group CMSIS_CoreDebug */
 
 
 /**
@@ -2007,7 +2010,7 @@ typedef struct
 #define DCB_DSCSR_SBRSELEN_Pos              0U                                            /*!< DCB DSCSR: Secure banked register select enable Position */
 #define DCB_DSCSR_SBRSELEN_Msk             (0x1UL /*<< DCB_DSCSR_SBRSELEN_Pos*/)          /*!< DCB DSCSR: Secure banked register select enable Mask */
 
-/*@} end of group CMSIS_DCB */
+/** @} end of group CMSIS_DCB */
 
 
 
@@ -2081,7 +2084,7 @@ typedef struct
 #define DIB_DDEVTYPE_MAJOR_Msk             (0xFUL /*<< DIB_DDEVTYPE_MAJOR_Pos*/)          /*!< DIB DDEVTYPE: Major type Mask */
 
 
-/*@} end of group CMSIS_DIB */
+/** @} end of group CMSIS_DIB */
 
 
 /**
@@ -2107,7 +2110,7 @@ typedef struct
 */
 #define _FLD2VAL(field, value)    (((uint32_t)(value) & field ## _Msk) >> field ## _Pos)
 
-/*@} end of group CMSIS_core_bitfield */
+/** @} end of group CMSIS_core_bitfield */
 
 
 /**
@@ -2179,8 +2182,17 @@ typedef struct
   #define FPU_NS              ((FPU_Type       *)     FPU_BASE_NS      ) /*!< Floating Point Unit               (non-secure address space) */
 
 #endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
-/*@} */
+/** @} */
+
 
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup   CMSIS_register_aliases     Backwards Compatibility Aliases
+  \brief      Register alias definitions for backwards compatibility.
+  @{
+ */
+#define ID_ADR  (ID_AFR)    /*!< SCB Auxiliary Feature Register */
+/*@} */
 
 
 /*******************************************************************************
@@ -2838,7 +2850,7 @@ __STATIC_INLINE uint32_t TZ_NVIC_GetPriority_NS(IRQn_Type IRQn)
 }
 #endif /*  defined (__ARM_FEATURE_CMSE) &&(__ARM_FEATURE_CMSE == 3U) */
 
-/*@} end of CMSIS_Core_NVICFunctions */
+/** @} end of CMSIS_Core_NVICFunctions */
 
 /* ##########################  MPU functions  #################################### */
 
@@ -2884,7 +2896,7 @@ __STATIC_INLINE uint32_t SCB_GetFPUType(void)
 }
 
 
-/*@} end of CMSIS_Core_FpuFunctions */
+/** @} end of CMSIS_Core_FpuFunctions */
 
 
 /* ##########################  Cache functions  #################################### */
@@ -2927,7 +2939,7 @@ __STATIC_INLINE void TZ_SAU_Disable(void)
 
 #endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
 
-/*@} end of CMSIS_Core_SAUFunctions */
+/** @} end of CMSIS_Core_SAUFunctions */
 
 
 
@@ -2940,7 +2952,7 @@ __STATIC_INLINE void TZ_SAU_Disable(void)
   @{
  */
 
- 
+
 /**
   \brief   Set Debug Authentication Control Register
   \details writes to Debug Authentication Control register.
@@ -2994,7 +3006,7 @@ __STATIC_INLINE uint32_t TZ_DCB_GetAuthCtrl_NS(void)
 }
 #endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
 
-/*@} end of CMSIS_Core_DCBFunctions */
+/** @} end of CMSIS_Core_DCBFunctions */
 
 
 
@@ -3007,7 +3019,7 @@ __STATIC_INLINE uint32_t TZ_DCB_GetAuthCtrl_NS(void)
   @{
  */
 
- 
+
 /**
   \brief   Get Debug Authentication Status Register
   \details Reads Debug Authentication Status register.
@@ -3031,7 +3043,7 @@ __STATIC_INLINE uint32_t TZ_DIB_GetAuthStatus_NS(void)
 }
 #endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
 
-/*@} end of CMSIS_Core_DCBFunctions */
+/** @} end of CMSIS_Core_DCBFunctions */
 
 
 
@@ -3105,7 +3117,7 @@ __STATIC_INLINE uint32_t TZ_SysTick_Config_NS(uint32_t ticks)
 
 #endif
 
-/*@} end of CMSIS_Core_SysTickFunctions */
+/** @} end of CMSIS_Core_SysTickFunctions */
 
 
 
@@ -3183,7 +3195,7 @@ __STATIC_INLINE int32_t ITM_CheckChar (void)
   }
 }
 
-/*@} end of CMSIS_core_DebugFunctions */
+/** @} end of CMSIS_core_DebugFunctions */
 
 
 
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/core_cm3.h b/edge-impulse-sdk/CMSIS/Core/Include/core_cm3.h
index 33c0f57..b73615f 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/core_cm3.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/core_cm3.h
@@ -1,11 +1,11 @@
 /**************************************************************************//**
  * @file     core_cm3.h
  * @brief    CMSIS Cortex-M3 Core Peripheral Access Layer Header File
- * @version  V5.1.1
- * @date     27. March 2020
+ * @version  V5.1.2
+ * @date     04. June 2021
  ******************************************************************************/
 /*
- * Copyright (c) 2009-2020 Arm Limited. All rights reserved.
+ * Copyright (c) 2009-2021 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -146,7 +146,7 @@
     #define __VTOR_PRESENT             1U
     #warning "__VTOR_PRESENT not defined in device header file; using default!"
   #endif
-  
+
   #ifndef __NVIC_PRIO_BITS
     #define __NVIC_PRIO_BITS          3U
     #warning "__NVIC_PRIO_BITS not defined in device header file; using default!"
@@ -565,19 +565,19 @@ typedef struct
 #define SCB_CFSR_MEMFAULTSR_Msk            (0xFFUL /*<< SCB_CFSR_MEMFAULTSR_Pos*/)        /*!< SCB CFSR: Memory Manage Fault Status Register Mask */
 
 /* MemManage Fault Status Register (part of SCB Configurable Fault Status Register) */
-#define SCB_CFSR_MMARVALID_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 7U)               /*!< SCB CFSR (MMFSR): MMARVALID Position */
+#define SCB_CFSR_MMARVALID_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 7U)                 /*!< SCB CFSR (MMFSR): MMARVALID Position */
 #define SCB_CFSR_MMARVALID_Msk             (1UL << SCB_CFSR_MMARVALID_Pos)                /*!< SCB CFSR (MMFSR): MMARVALID Mask */
 
-#define SCB_CFSR_MSTKERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 4U)               /*!< SCB CFSR (MMFSR): MSTKERR Position */
+#define SCB_CFSR_MSTKERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 4U)                 /*!< SCB CFSR (MMFSR): MSTKERR Position */
 #define SCB_CFSR_MSTKERR_Msk               (1UL << SCB_CFSR_MSTKERR_Pos)                  /*!< SCB CFSR (MMFSR): MSTKERR Mask */
 
-#define SCB_CFSR_MUNSTKERR_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 3U)               /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
+#define SCB_CFSR_MUNSTKERR_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 3U)                 /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
 #define SCB_CFSR_MUNSTKERR_Msk             (1UL << SCB_CFSR_MUNSTKERR_Pos)                /*!< SCB CFSR (MMFSR): MUNSTKERR Mask */
 
-#define SCB_CFSR_DACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 1U)               /*!< SCB CFSR (MMFSR): DACCVIOL Position */
+#define SCB_CFSR_DACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 1U)                 /*!< SCB CFSR (MMFSR): DACCVIOL Position */
 #define SCB_CFSR_DACCVIOL_Msk              (1UL << SCB_CFSR_DACCVIOL_Pos)                 /*!< SCB CFSR (MMFSR): DACCVIOL Mask */
 
-#define SCB_CFSR_IACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 0U)               /*!< SCB CFSR (MMFSR): IACCVIOL Position */
+#define SCB_CFSR_IACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 0U)                 /*!< SCB CFSR (MMFSR): IACCVIOL Position */
 #define SCB_CFSR_IACCVIOL_Msk              (1UL /*<< SCB_CFSR_IACCVIOL_Pos*/)             /*!< SCB CFSR (MMFSR): IACCVIOL Mask */
 
 /* BusFault Status Register (part of SCB Configurable Fault Status Register) */
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/core_cm33.h b/edge-impulse-sdk/CMSIS/Core/Include/core_cm33.h
index 6294184..f964b15 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/core_cm33.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/core_cm33.h
@@ -1,11 +1,11 @@
 /**************************************************************************//**
  * @file     core_cm33.h
  * @brief    CMSIS Cortex-M33 Core Peripheral Access Layer Header File
- * @version  V5.2.1
- * @date     19. August 2020
+ * @version  V5.2.3
+ * @date     13. October 2021
  ******************************************************************************/
 /*
- * Copyright (c) 2009-2020 Arm Limited. All rights reserved.
+ * Copyright (c) 2009-2021 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -254,7 +254,7 @@
     #define __VTOR_PRESENT             1U
     #warning "__VTOR_PRESENT not defined in device header file; using default!"
   #endif
-  
+
   #ifndef __NVIC_PRIO_BITS
     #define __NVIC_PRIO_BITS          3U
     #warning "__NVIC_PRIO_BITS not defined in device header file; using default!"
@@ -519,7 +519,7 @@ typedef struct
   __IOM uint32_t AFSR;                   /*!< Offset: 0x03C (R/W)  Auxiliary Fault Status Register */
   __IM  uint32_t ID_PFR[2U];             /*!< Offset: 0x040 (R/ )  Processor Feature Register */
   __IM  uint32_t ID_DFR;                 /*!< Offset: 0x048 (R/ )  Debug Feature Register */
-  __IM  uint32_t ID_ADR;                 /*!< Offset: 0x04C (R/ )  Auxiliary Feature Register */
+  __IM  uint32_t ID_AFR;                 /*!< Offset: 0x04C (R/ )  Auxiliary Feature Register */
   __IM  uint32_t ID_MMFR[4U];            /*!< Offset: 0x050 (R/ )  Memory Model Feature Register */
   __IM  uint32_t ID_ISAR[6U];            /*!< Offset: 0x060 (R/ )  Instruction Set Attributes Register */
   __IM  uint32_t CLIDR;                  /*!< Offset: 0x078 (R/ )  Cache Level ID register */
@@ -528,7 +528,10 @@ typedef struct
   __IOM uint32_t CSSELR;                 /*!< Offset: 0x084 (R/W)  Cache Size Selection Register */
   __IOM uint32_t CPACR;                  /*!< Offset: 0x088 (R/W)  Coprocessor Access Control Register */
   __IOM uint32_t NSACR;                  /*!< Offset: 0x08C (R/W)  Non-Secure Access Control Register */
-        uint32_t RESERVED3[92U];
+        uint32_t RESERVED7[21U];
+  __IOM uint32_t SFSR;                   /*!< Offset: 0x0E4 (R/W)  Secure Fault Status Register */
+  __IOM uint32_t SFAR;                   /*!< Offset: 0x0E8 (R/W)  Secure Fault Address Register */
+        uint32_t RESERVED3[69U];
   __OM  uint32_t STIR;                   /*!< Offset: 0x200 ( /W)  Software Triggered Interrupt Register */
         uint32_t RESERVED4[15U];
   __IM  uint32_t MVFR0;                  /*!< Offset: 0x240 (R/ )  Media and VFP Feature Register 0 */
@@ -746,22 +749,22 @@ typedef struct
 #define SCB_CFSR_MEMFAULTSR_Msk            (0xFFUL /*<< SCB_CFSR_MEMFAULTSR_Pos*/)        /*!< SCB CFSR: Memory Manage Fault Status Register Mask */
 
 /* MemManage Fault Status Register (part of SCB Configurable Fault Status Register) */
-#define SCB_CFSR_MMARVALID_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 7U)               /*!< SCB CFSR (MMFSR): MMARVALID Position */
+#define SCB_CFSR_MMARVALID_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 7U)                 /*!< SCB CFSR (MMFSR): MMARVALID Position */
 #define SCB_CFSR_MMARVALID_Msk             (1UL << SCB_CFSR_MMARVALID_Pos)                /*!< SCB CFSR (MMFSR): MMARVALID Mask */
 
-#define SCB_CFSR_MLSPERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 5U)               /*!< SCB CFSR (MMFSR): MLSPERR Position */
+#define SCB_CFSR_MLSPERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 5U)                 /*!< SCB CFSR (MMFSR): MLSPERR Position */
 #define SCB_CFSR_MLSPERR_Msk               (1UL << SCB_CFSR_MLSPERR_Pos)                  /*!< SCB CFSR (MMFSR): MLSPERR Mask */
 
-#define SCB_CFSR_MSTKERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 4U)               /*!< SCB CFSR (MMFSR): MSTKERR Position */
+#define SCB_CFSR_MSTKERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 4U)                 /*!< SCB CFSR (MMFSR): MSTKERR Position */
 #define SCB_CFSR_MSTKERR_Msk               (1UL << SCB_CFSR_MSTKERR_Pos)                  /*!< SCB CFSR (MMFSR): MSTKERR Mask */
 
-#define SCB_CFSR_MUNSTKERR_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 3U)               /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
+#define SCB_CFSR_MUNSTKERR_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 3U)                 /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
 #define SCB_CFSR_MUNSTKERR_Msk             (1UL << SCB_CFSR_MUNSTKERR_Pos)                /*!< SCB CFSR (MMFSR): MUNSTKERR Mask */
 
-#define SCB_CFSR_DACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 1U)               /*!< SCB CFSR (MMFSR): DACCVIOL Position */
+#define SCB_CFSR_DACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 1U)                 /*!< SCB CFSR (MMFSR): DACCVIOL Position */
 #define SCB_CFSR_DACCVIOL_Msk              (1UL << SCB_CFSR_DACCVIOL_Pos)                 /*!< SCB CFSR (MMFSR): DACCVIOL Mask */
 
-#define SCB_CFSR_IACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 0U)               /*!< SCB CFSR (MMFSR): IACCVIOL Position */
+#define SCB_CFSR_IACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 0U)                 /*!< SCB CFSR (MMFSR): IACCVIOL Position */
 #define SCB_CFSR_IACCVIOL_Msk              (1UL /*<< SCB_CFSR_IACCVIOL_Pos*/)             /*!< SCB CFSR (MMFSR): IACCVIOL Mask */
 
 /* BusFault Status Register (part of SCB Configurable Fault Status Register) */
@@ -2257,6 +2260,15 @@ typedef struct
 /*@} */
 
 
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup   CMSIS_register_aliases     Backwards Compatibility Aliases
+  \brief      Register alias definitions for backwards compatibility.
+  @{
+ */
+#define ID_ADR  (ID_AFR)    /*!< SCB Auxiliary Feature Register */
+/*@} */
+
 
 /*******************************************************************************
  *                Hardware Abstraction Layer
@@ -3008,7 +3020,7 @@ __STATIC_INLINE void TZ_SAU_Disable(void)
   @{
  */
 
- 
+
 /**
   \brief   Set Debug Authentication Control Register
   \details writes to Debug Authentication Control register.
@@ -3075,7 +3087,7 @@ __STATIC_INLINE uint32_t TZ_DCB_GetAuthCtrl_NS(void)
   @{
  */
 
- 
+
 /**
   \brief   Get Debug Authentication Status Register
   \details Reads Debug Authentication Status register.
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/core_cm35p.h b/edge-impulse-sdk/CMSIS/Core/Include/core_cm35p.h
index a1e51ad..c8bfddd 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/core_cm35p.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/core_cm35p.h
@@ -1,11 +1,11 @@
 /**************************************************************************//**
  * @file     core_cm35p.h
  * @brief    CMSIS Cortex-M35P Core Peripheral Access Layer Header File
- * @version  V1.1.1
- * @date     19. August 2020
+ * @version  V1.1.3
+ * @date     13. October 2021
  ******************************************************************************/
 /*
- * Copyright (c) 2018-2020 Arm Limited. All rights reserved.
+ * Copyright (c) 2018-2021 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -249,12 +249,12 @@
     #define __DSP_PRESENT             0U
     #warning "__DSP_PRESENT not defined in device header file; using default!"
   #endif
-  
+
   #ifndef __VTOR_PRESENT
     #define __VTOR_PRESENT             1U
     #warning "__VTOR_PRESENT not defined in device header file; using default!"
   #endif
-  
+
   #ifndef __NVIC_PRIO_BITS
     #define __NVIC_PRIO_BITS          3U
     #warning "__NVIC_PRIO_BITS not defined in device header file; using default!"
@@ -519,7 +519,7 @@ typedef struct
   __IOM uint32_t AFSR;                   /*!< Offset: 0x03C (R/W)  Auxiliary Fault Status Register */
   __IM  uint32_t ID_PFR[2U];             /*!< Offset: 0x040 (R/ )  Processor Feature Register */
   __IM  uint32_t ID_DFR;                 /*!< Offset: 0x048 (R/ )  Debug Feature Register */
-  __IM  uint32_t ID_ADR;                 /*!< Offset: 0x04C (R/ )  Auxiliary Feature Register */
+  __IM  uint32_t ID_AFR;                 /*!< Offset: 0x04C (R/ )  Auxiliary Feature Register */
   __IM  uint32_t ID_MMFR[4U];            /*!< Offset: 0x050 (R/ )  Memory Model Feature Register */
   __IM  uint32_t ID_ISAR[6U];            /*!< Offset: 0x060 (R/ )  Instruction Set Attributes Register */
   __IM  uint32_t CLIDR;                  /*!< Offset: 0x078 (R/ )  Cache Level ID register */
@@ -528,7 +528,10 @@ typedef struct
   __IOM uint32_t CSSELR;                 /*!< Offset: 0x084 (R/W)  Cache Size Selection Register */
   __IOM uint32_t CPACR;                  /*!< Offset: 0x088 (R/W)  Coprocessor Access Control Register */
   __IOM uint32_t NSACR;                  /*!< Offset: 0x08C (R/W)  Non-Secure Access Control Register */
-        uint32_t RESERVED3[92U];
+        uint32_t RESERVED7[21U];
+  __IOM uint32_t SFSR;                   /*!< Offset: 0x0E4 (R/W)  Secure Fault Status Register */
+  __IOM uint32_t SFAR;                   /*!< Offset: 0x0E8 (R/W)  Secure Fault Address Register */
+        uint32_t RESERVED3[69U];
   __OM  uint32_t STIR;                   /*!< Offset: 0x200 ( /W)  Software Triggered Interrupt Register */
         uint32_t RESERVED4[15U];
   __IM  uint32_t MVFR0;                  /*!< Offset: 0x240 (R/ )  Media and VFP Feature Register 0 */
@@ -746,22 +749,22 @@ typedef struct
 #define SCB_CFSR_MEMFAULTSR_Msk            (0xFFUL /*<< SCB_CFSR_MEMFAULTSR_Pos*/)        /*!< SCB CFSR: Memory Manage Fault Status Register Mask */
 
 /* MemManage Fault Status Register (part of SCB Configurable Fault Status Register) */
-#define SCB_CFSR_MMARVALID_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 7U)               /*!< SCB CFSR (MMFSR): MMARVALID Position */
+#define SCB_CFSR_MMARVALID_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 7U)                 /*!< SCB CFSR (MMFSR): MMARVALID Position */
 #define SCB_CFSR_MMARVALID_Msk             (1UL << SCB_CFSR_MMARVALID_Pos)                /*!< SCB CFSR (MMFSR): MMARVALID Mask */
 
-#define SCB_CFSR_MLSPERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 5U)               /*!< SCB CFSR (MMFSR): MLSPERR Position */
+#define SCB_CFSR_MLSPERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 5U)                 /*!< SCB CFSR (MMFSR): MLSPERR Position */
 #define SCB_CFSR_MLSPERR_Msk               (1UL << SCB_CFSR_MLSPERR_Pos)                  /*!< SCB CFSR (MMFSR): MLSPERR Mask */
 
-#define SCB_CFSR_MSTKERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 4U)               /*!< SCB CFSR (MMFSR): MSTKERR Position */
+#define SCB_CFSR_MSTKERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 4U)                 /*!< SCB CFSR (MMFSR): MSTKERR Position */
 #define SCB_CFSR_MSTKERR_Msk               (1UL << SCB_CFSR_MSTKERR_Pos)                  /*!< SCB CFSR (MMFSR): MSTKERR Mask */
 
-#define SCB_CFSR_MUNSTKERR_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 3U)               /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
+#define SCB_CFSR_MUNSTKERR_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 3U)                 /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
 #define SCB_CFSR_MUNSTKERR_Msk             (1UL << SCB_CFSR_MUNSTKERR_Pos)                /*!< SCB CFSR (MMFSR): MUNSTKERR Mask */
 
-#define SCB_CFSR_DACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 1U)               /*!< SCB CFSR (MMFSR): DACCVIOL Position */
+#define SCB_CFSR_DACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 1U)                 /*!< SCB CFSR (MMFSR): DACCVIOL Position */
 #define SCB_CFSR_DACCVIOL_Msk              (1UL << SCB_CFSR_DACCVIOL_Pos)                 /*!< SCB CFSR (MMFSR): DACCVIOL Mask */
 
-#define SCB_CFSR_IACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 0U)               /*!< SCB CFSR (MMFSR): IACCVIOL Position */
+#define SCB_CFSR_IACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 0U)                 /*!< SCB CFSR (MMFSR): IACCVIOL Position */
 #define SCB_CFSR_IACCVIOL_Msk              (1UL /*<< SCB_CFSR_IACCVIOL_Pos*/)             /*!< SCB CFSR (MMFSR): IACCVIOL Mask */
 
 /* BusFault Status Register (part of SCB Configurable Fault Status Register) */
@@ -2257,6 +2260,15 @@ typedef struct
 /*@} */
 
 
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup   CMSIS_register_aliases     Backwards Compatibility Aliases
+  \brief      Register alias definitions for backwards compatibility.
+  @{
+ */
+#define ID_ADR  (ID_AFR)    /*!< SCB Auxiliary Feature Register */
+/*@} */
+
 
 /*******************************************************************************
  *                Hardware Abstraction Layer
@@ -3008,7 +3020,7 @@ __STATIC_INLINE void TZ_SAU_Disable(void)
   @{
  */
 
- 
+
 /**
   \brief   Set Debug Authentication Control Register
   \details writes to Debug Authentication Control register.
@@ -3075,7 +3087,7 @@ __STATIC_INLINE uint32_t TZ_DCB_GetAuthCtrl_NS(void)
   @{
  */
 
- 
+
 /**
   \brief   Get Debug Authentication Status Register
   \details Reads Debug Authentication Status register.
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/core_cm4.h b/edge-impulse-sdk/CMSIS/Core/Include/core_cm4.h
index dfdc41a..a347f36 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/core_cm4.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/core_cm4.h
@@ -1,8 +1,8 @@
 /**************************************************************************//**
  * @file     core_cm4.h
  * @brief    CMSIS Cortex-M4 Core Peripheral Access Layer Header File
- * @version  V5.1.1
- * @date     27. March 2020
+ * @version  V5.1.2
+ * @date     04. June 2021
  ******************************************************************************/
 /*
  * Copyright (c) 2009-2020 Arm Limited. All rights reserved.
@@ -198,7 +198,7 @@
     #define __VTOR_PRESENT             1U
     #warning "__VTOR_PRESENT not defined in device header file; using default!"
   #endif
-  
+
   #ifndef __NVIC_PRIO_BITS
     #define __NVIC_PRIO_BITS          3U
     #warning "__NVIC_PRIO_BITS not defined in device header file; using default!"
@@ -623,22 +623,22 @@ typedef struct
 #define SCB_CFSR_MEMFAULTSR_Msk            (0xFFUL /*<< SCB_CFSR_MEMFAULTSR_Pos*/)        /*!< SCB CFSR: Memory Manage Fault Status Register Mask */
 
 /* MemManage Fault Status Register (part of SCB Configurable Fault Status Register) */
-#define SCB_CFSR_MMARVALID_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 7U)               /*!< SCB CFSR (MMFSR): MMARVALID Position */
+#define SCB_CFSR_MMARVALID_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 7U)                 /*!< SCB CFSR (MMFSR): MMARVALID Position */
 #define SCB_CFSR_MMARVALID_Msk             (1UL << SCB_CFSR_MMARVALID_Pos)                /*!< SCB CFSR (MMFSR): MMARVALID Mask */
 
-#define SCB_CFSR_MLSPERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 5U)               /*!< SCB CFSR (MMFSR): MLSPERR Position */
+#define SCB_CFSR_MLSPERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 5U)                 /*!< SCB CFSR (MMFSR): MLSPERR Position */
 #define SCB_CFSR_MLSPERR_Msk               (1UL << SCB_CFSR_MLSPERR_Pos)                  /*!< SCB CFSR (MMFSR): MLSPERR Mask */
 
-#define SCB_CFSR_MSTKERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 4U)               /*!< SCB CFSR (MMFSR): MSTKERR Position */
+#define SCB_CFSR_MSTKERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 4U)                 /*!< SCB CFSR (MMFSR): MSTKERR Position */
 #define SCB_CFSR_MSTKERR_Msk               (1UL << SCB_CFSR_MSTKERR_Pos)                  /*!< SCB CFSR (MMFSR): MSTKERR Mask */
 
-#define SCB_CFSR_MUNSTKERR_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 3U)               /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
+#define SCB_CFSR_MUNSTKERR_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 3U)                 /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
 #define SCB_CFSR_MUNSTKERR_Msk             (1UL << SCB_CFSR_MUNSTKERR_Pos)                /*!< SCB CFSR (MMFSR): MUNSTKERR Mask */
 
-#define SCB_CFSR_DACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 1U)               /*!< SCB CFSR (MMFSR): DACCVIOL Position */
+#define SCB_CFSR_DACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 1U)                 /*!< SCB CFSR (MMFSR): DACCVIOL Position */
 #define SCB_CFSR_DACCVIOL_Msk              (1UL << SCB_CFSR_DACCVIOL_Pos)                 /*!< SCB CFSR (MMFSR): DACCVIOL Mask */
 
-#define SCB_CFSR_IACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 0U)               /*!< SCB CFSR (MMFSR): IACCVIOL Position */
+#define SCB_CFSR_IACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 0U)                 /*!< SCB CFSR (MMFSR): IACCVIOL Position */
 #define SCB_CFSR_IACCVIOL_Msk              (1UL /*<< SCB_CFSR_IACCVIOL_Pos*/)             /*!< SCB CFSR (MMFSR): IACCVIOL Mask */
 
 /* BusFault Status Register (part of SCB Configurable Fault Status Register) */
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/core_cm55.h b/edge-impulse-sdk/CMSIS/Core/Include/core_cm55.h
index 03c1aa5..2f40d61 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/core_cm55.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/core_cm55.h
@@ -1,11 +1,11 @@
 /**************************************************************************//**
  * @file     core_cm55.h
  * @brief    CMSIS Cortex-M55 Core Peripheral Access Layer Header File
- * @version  V1.1.0
- * @date     15. April 2020
+ * @version  V1.2.5
+ * @date     12. May 2022
  ******************************************************************************/
 /*
- * Copyright (c) 2018-2020 Arm Limited. All rights reserved.
+ * Copyright (c) 2018-2022 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -58,7 +58,7 @@
  *                 CMSIS definitions
  ******************************************************************************/
 /**
-  \ingroup Cortex_CM55
+  \ingroup Cortex_M55
   @{
  */
 
@@ -210,7 +210,7 @@
     #define __FPU_PRESENT             0U
     #warning "__FPU_PRESENT not defined in device header file; using default!"
   #endif
-  
+
   #if __FPU_PRESENT != 0U
     #ifndef __FPU_DP
       #define __FPU_DP             0U
@@ -232,12 +232,12 @@
     #define __DCACHE_PRESENT          0U
     #warning "__DCACHE_PRESENT not defined in device header file; using default!"
   #endif
-  
+
   #ifndef __VTOR_PRESENT
     #define __VTOR_PRESENT             1U
     #warning "__VTOR_PRESENT not defined in device header file; using default!"
   #endif
-  
+
   #ifndef __PMU_PRESENT
     #define __PMU_PRESENT             0U
     #warning "__PMU_PRESENT not defined in device header file; using default!"
@@ -303,9 +303,11 @@
   Core Register contain:
   - Core Register
   - Core NVIC Register
+  - Core EWIC Register
   - Core SCB Register
   - Core SysTick Register
   - Core Debug Register
+  - Core PMU Register
   - Core MPU Register
   - Core SAU Register
   - Core FPU Register
@@ -526,7 +528,7 @@ typedef struct
   __IOM uint32_t AFSR;                   /*!< Offset: 0x03C (R/W)  Auxiliary Fault Status Register */
   __IM  uint32_t ID_PFR[2U];             /*!< Offset: 0x040 (R/ )  Processor Feature Register */
   __IM  uint32_t ID_DFR;                 /*!< Offset: 0x048 (R/ )  Debug Feature Register */
-  __IM  uint32_t ID_ADR;                 /*!< Offset: 0x04C (R/ )  Auxiliary Feature Register */
+  __IM  uint32_t ID_AFR;                 /*!< Offset: 0x04C (R/ )  Auxiliary Feature Register */
   __IM  uint32_t ID_MMFR[4U];            /*!< Offset: 0x050 (R/ )  Memory Model Feature Register */
   __IM  uint32_t ID_ISAR[6U];            /*!< Offset: 0x060 (R/ )  Instruction Set Attributes Register */
   __IM  uint32_t CLIDR;                  /*!< Offset: 0x078 (R/ )  Cache Level ID register */
@@ -535,7 +537,10 @@ typedef struct
   __IOM uint32_t CSSELR;                 /*!< Offset: 0x084 (R/W)  Cache Size Selection Register */
   __IOM uint32_t CPACR;                  /*!< Offset: 0x088 (R/W)  Coprocessor Access Control Register */
   __IOM uint32_t NSACR;                  /*!< Offset: 0x08C (R/W)  Non-Secure Access Control Register */
-        uint32_t RESERVED3[92U];
+        uint32_t RESERVED7[21U];
+  __IOM uint32_t SFSR;                   /*!< Offset: 0x0E4 (R/W)  Secure Fault Status Register */
+  __IOM uint32_t SFAR;                   /*!< Offset: 0x0E8 (R/W)  Secure Fault Address Register */
+        uint32_t RESERVED3[69U];
   __OM  uint32_t STIR;                   /*!< Offset: 0x200 ( /W)  Software Triggered Interrupt Register */
   __IOM uint32_t RFSR;                   /*!< Offset: 0x204 (R/W)  RAS Fault Status Register */
         uint32_t RESERVED4[14U];
@@ -766,22 +771,22 @@ typedef struct
 #define SCB_CFSR_MEMFAULTSR_Msk            (0xFFUL /*<< SCB_CFSR_MEMFAULTSR_Pos*/)        /*!< SCB CFSR: Memory Manage Fault Status Register Mask */
 
 /* MemManage Fault Status Register (part of SCB Configurable Fault Status Register) */
-#define SCB_CFSR_MMARVALID_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 7U)               /*!< SCB CFSR (MMFSR): MMARVALID Position */
+#define SCB_CFSR_MMARVALID_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 7U)                 /*!< SCB CFSR (MMFSR): MMARVALID Position */
 #define SCB_CFSR_MMARVALID_Msk             (1UL << SCB_CFSR_MMARVALID_Pos)                /*!< SCB CFSR (MMFSR): MMARVALID Mask */
 
-#define SCB_CFSR_MLSPERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 5U)               /*!< SCB CFSR (MMFSR): MLSPERR Position */
+#define SCB_CFSR_MLSPERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 5U)                 /*!< SCB CFSR (MMFSR): MLSPERR Position */
 #define SCB_CFSR_MLSPERR_Msk               (1UL << SCB_CFSR_MLSPERR_Pos)                  /*!< SCB CFSR (MMFSR): MLSPERR Mask */
 
-#define SCB_CFSR_MSTKERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 4U)               /*!< SCB CFSR (MMFSR): MSTKERR Position */
+#define SCB_CFSR_MSTKERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 4U)                 /*!< SCB CFSR (MMFSR): MSTKERR Position */
 #define SCB_CFSR_MSTKERR_Msk               (1UL << SCB_CFSR_MSTKERR_Pos)                  /*!< SCB CFSR (MMFSR): MSTKERR Mask */
 
-#define SCB_CFSR_MUNSTKERR_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 3U)               /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
+#define SCB_CFSR_MUNSTKERR_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 3U)                 /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
 #define SCB_CFSR_MUNSTKERR_Msk             (1UL << SCB_CFSR_MUNSTKERR_Pos)                /*!< SCB CFSR (MMFSR): MUNSTKERR Mask */
 
-#define SCB_CFSR_DACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 1U)               /*!< SCB CFSR (MMFSR): DACCVIOL Position */
+#define SCB_CFSR_DACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 1U)                 /*!< SCB CFSR (MMFSR): DACCVIOL Position */
 #define SCB_CFSR_DACCVIOL_Msk              (1UL << SCB_CFSR_DACCVIOL_Pos)                 /*!< SCB CFSR (MMFSR): DACCVIOL Mask */
 
-#define SCB_CFSR_IACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 0U)               /*!< SCB CFSR (MMFSR): IACCVIOL Position */
+#define SCB_CFSR_IACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 0U)                 /*!< SCB CFSR (MMFSR): IACCVIOL Position */
 #define SCB_CFSR_IACCVIOL_Msk              (1UL /*<< SCB_CFSR_IACCVIOL_Pos*/)             /*!< SCB CFSR (MMFSR): IACCVIOL Mask */
 
 /* BusFault Status Register (part of SCB Configurable Fault Status Register) */
@@ -987,13 +992,13 @@ typedef struct
 
 /**
   \ingroup  CMSIS_core_register
-  \defgroup CMSIS_SCnSCB System Controls not in SCB (SCnSCB)
-  \brief    Type definitions for the System Control and ID Register not in the SCB
+  \defgroup CMSIS_ICB Implementation Control Block register (ICB)
+  \brief    Type definitions for the Implementation Control Block Register
   @{
  */
 
 /**
-  \brief  Structure type to access the System Control and ID Register not in the SCB.
+  \brief  Structure type to access the Implementation Control Block (ICB).
  */
 typedef struct
 {
@@ -1001,13 +1006,56 @@ typedef struct
   __IM  uint32_t ICTR;                   /*!< Offset: 0x004 (R/ )  Interrupt Controller Type Register */
   __IOM uint32_t ACTLR;                  /*!< Offset: 0x008 (R/W)  Auxiliary Control Register */
   __IOM uint32_t CPPWR;                  /*!< Offset: 0x00C (R/W)  Coprocessor Power Control  Register */
-} SCnSCB_Type;
+} ICB_Type;
+
+/* Auxiliary Control Register Definitions */
+#define ICB_ACTLR_DISCRITAXIRUW_Pos     27U                                               /*!< ACTLR: DISCRITAXIRUW Position */
+#define ICB_ACTLR_DISCRITAXIRUW_Msk     (1UL << ICB_ACTLR_DISCRITAXIRUW_Pos)              /*!< ACTLR: DISCRITAXIRUW Mask */
+
+#define ICB_ACTLR_DISDI_Pos             16U                                               /*!< ACTLR: DISDI Position */
+#define ICB_ACTLR_DISDI_Msk             (3UL << ICB_ACTLR_DISDI_Pos)                      /*!< ACTLR: DISDI Mask */
+
+#define ICB_ACTLR_DISCRITAXIRUR_Pos     15U                                               /*!< ACTLR: DISCRITAXIRUR Position */
+#define ICB_ACTLR_DISCRITAXIRUR_Msk     (1UL << ICB_ACTLR_DISCRITAXIRUR_Pos)              /*!< ACTLR: DISCRITAXIRUR Mask */
+
+#define ICB_ACTLR_EVENTBUSEN_Pos        14U                                               /*!< ACTLR: EVENTBUSEN Position */
+#define ICB_ACTLR_EVENTBUSEN_Msk        (1UL << ICB_ACTLR_EVENTBUSEN_Pos)                 /*!< ACTLR: EVENTBUSEN Mask */
+
+#define ICB_ACTLR_EVENTBUSEN_S_Pos      13U                                               /*!< ACTLR: EVENTBUSEN_S Position */
+#define ICB_ACTLR_EVENTBUSEN_S_Msk      (1UL << ICB_ACTLR_EVENTBUSEN_S_Pos)               /*!< ACTLR: EVENTBUSEN_S Mask */
+
+#define ICB_ACTLR_DISITMATBFLUSH_Pos    12U                                               /*!< ACTLR: DISITMATBFLUSH Position */
+#define ICB_ACTLR_DISITMATBFLUSH_Msk    (1UL << ICB_ACTLR_DISITMATBFLUSH_Pos)             /*!< ACTLR: DISITMATBFLUSH Mask */
+
+#define ICB_ACTLR_DISNWAMODE_Pos        11U                                               /*!< ACTLR: DISNWAMODE Position */
+#define ICB_ACTLR_DISNWAMODE_Msk        (1UL << ICB_ACTLR_DISNWAMODE_Pos)                 /*!< ACTLR: DISNWAMODE Mask */
+
+#define ICB_ACTLR_FPEXCODIS_Pos         10U                                               /*!< ACTLR: FPEXCODIS Position */
+#define ICB_ACTLR_FPEXCODIS_Msk         (1UL << ICB_ACTLR_FPEXCODIS_Pos)                  /*!< ACTLR: FPEXCODIS Mask */
+
+#define ICB_ACTLR_DISOLAP_Pos            7U                                               /*!< ACTLR: DISOLAP Position */
+#define ICB_ACTLR_DISOLAP_Msk           (1UL << ICB_ACTLR_DISOLAP_Pos)                    /*!< ACTLR: DISOLAP Mask */
+
+#define ICB_ACTLR_DISOLAPS_Pos           6U                                               /*!< ACTLR: DISOLAPS Position */
+#define ICB_ACTLR_DISOLAPS_Msk          (1UL << ICB_ACTLR_DISOLAPS_Pos)                   /*!< ACTLR: DISOLAPS Mask */
+
+#define ICB_ACTLR_DISLOBR_Pos            5U                                               /*!< ACTLR: DISLOBR Position */
+#define ICB_ACTLR_DISLOBR_Msk           (1UL << ICB_ACTLR_DISLOBR_Pos)                    /*!< ACTLR: DISLOBR Mask */
+
+#define ICB_ACTLR_DISLO_Pos              4U                                               /*!< ACTLR: DISLO Position */
+#define ICB_ACTLR_DISLO_Msk             (1UL << ICB_ACTLR_DISLO_Pos)                      /*!< ACTLR: DISLO Mask */
+
+#define ICB_ACTLR_DISLOLEP_Pos           3U                                               /*!< ACTLR: DISLOLEP Position */
+#define ICB_ACTLR_DISLOLEP_Msk          (1UL << ICB_ACTLR_DISLOLEP_Pos)                   /*!< ACTLR: DISLOLEP Mask */
+
+#define ICB_ACTLR_DISFOLD_Pos            2U                                               /*!< ACTLR: DISFOLD Position */
+#define ICB_ACTLR_DISFOLD_Msk           (1UL << ICB_ACTLR_DISFOLD_Pos)                    /*!< ACTLR: DISFOLD Mask */
 
 /* Interrupt Controller Type Register Definitions */
-#define SCnSCB_ICTR_INTLINESNUM_Pos         0U                                         /*!< ICTR: INTLINESNUM Position */
-#define SCnSCB_ICTR_INTLINESNUM_Msk        (0xFUL /*<< SCnSCB_ICTR_INTLINESNUM_Pos*/)  /*!< ICTR: INTLINESNUM Mask */
+#define ICB_ICTR_INTLINESNUM_Pos         0U                                               /*!< ICTR: INTLINESNUM Position */
+#define ICB_ICTR_INTLINESNUM_Msk        (0xFUL /*<< ICB_ICTR_INTLINESNUM_Pos*/)           /*!< ICTR: INTLINESNUM Mask */
 
-/*@} end of group CMSIS_SCnotSCB */
+/*@} end of group CMSIS_ICB */
 
 
 /**
@@ -1086,13 +1134,15 @@ typedef struct
   __IOM uint32_t TPR;                    /*!< Offset: 0xE40 (R/W)  ITM Trace Privilege Register */
         uint32_t RESERVED2[15U];
   __IOM uint32_t TCR;                    /*!< Offset: 0xE80 (R/W)  ITM Trace Control Register */
-        uint32_t RESERVED3[32U];
-        uint32_t RESERVED4[43U];
-  __OM  uint32_t LAR;                    /*!< Offset: 0xFB0 ( /W)  ITM Lock Access Register */
-  __IM  uint32_t LSR;                    /*!< Offset: 0xFB4 (R/ )  ITM Lock Status Register */
+        uint32_t RESERVED3[27U];
+  __IM  uint32_t ITREAD;                 /*!< Offset: 0xEF0 (R/ )  ITM Integration Read Register */
+        uint32_t RESERVED4[1U];
+  __OM  uint32_t ITWRITE;                /*!< Offset: 0xEF8 ( /W)  ITM Integration Write Register */
         uint32_t RESERVED5[1U];
+  __IOM uint32_t ITCTRL;                 /*!< Offset: 0xF00 (R/W)  ITM Integration Mode Control Register */
+        uint32_t RESERVED6[46U];
   __IM  uint32_t DEVARCH;                /*!< Offset: 0xFBC (R/ )  ITM Device Architecture Register */
-        uint32_t RESERVED6[3U];
+        uint32_t RESERVED7[3U];
   __IM  uint32_t DEVTYPE;                /*!< Offset: 0xFCC (R/ )  ITM Device Type Register */
   __IM  uint32_t PID4;                   /*!< Offset: 0xFD0 (R/ )  ITM Peripheral Identification Register #4 */
   __IM  uint32_t PID5;                   /*!< Offset: 0xFD4 (R/ )  ITM Peripheral Identification Register #5 */
@@ -1150,15 +1200,23 @@ typedef struct
 #define ITM_TCR_ITMENA_Pos                  0U                                            /*!< ITM TCR: ITM Enable bit Position */
 #define ITM_TCR_ITMENA_Msk                 (1UL /*<< ITM_TCR_ITMENA_Pos*/)                /*!< ITM TCR: ITM Enable bit Mask */
 
-/* ITM Lock Status Register Definitions */
-#define ITM_LSR_ByteAcc_Pos                 2U                                            /*!< ITM LSR: ByteAcc Position */
-#define ITM_LSR_ByteAcc_Msk                (1UL << ITM_LSR_ByteAcc_Pos)                   /*!< ITM LSR: ByteAcc Mask */
+/* ITM Integration Read Register Definitions */
+#define ITM_ITREAD_AFVALID_Pos              1U                                            /*!< ITM ITREAD: AFVALID Position */
+#define ITM_ITREAD_AFVALID_Msk             (0x1UL << ITM_ITREAD_AFVALID_Pos)              /*!< ITM ITREAD: AFVALID Mask */
+
+#define ITM_ITREAD_ATREADY_Pos              0U                                            /*!< ITM ITREAD: ATREADY Position */
+#define ITM_ITREAD_ATREADY_Msk             (0x1UL /*<< ITM_ITREAD_ATREADY_Pos*/)          /*!< ITM ITREAD: ATREADY Mask */
+
+/* ITM Integration Write Register Definitions */
+#define ITM_ITWRITE_AFVALID_Pos             1U                                            /*!< ITM ITWRITE: AFVALID Position */
+#define ITM_ITWRITE_AFVALID_Msk            (0x1UL << ITM_ITWRITE_AFVALID_Pos)             /*!< ITM ITWRITE: AFVALID Mask */
 
-#define ITM_LSR_Access_Pos                  1U                                            /*!< ITM LSR: Access Position */
-#define ITM_LSR_Access_Msk                 (1UL << ITM_LSR_Access_Pos)                    /*!< ITM LSR: Access Mask */
+#define ITM_ITWRITE_ATREADY_Pos             0U                                            /*!< ITM ITWRITE: ATREADY Position */
+#define ITM_ITWRITE_ATREADY_Msk            (0x1UL /*<< ITM_ITWRITE_ATREADY_Pos*/)         /*!< ITM ITWRITE: ATREADY Mask */
 
-#define ITM_LSR_Present_Pos                 0U                                            /*!< ITM LSR: Present Position */
-#define ITM_LSR_Present_Msk                (1UL /*<< ITM_LSR_Present_Pos*/)               /*!< ITM LSR: Present Mask */
+/* ITM Integration Mode Control Register Definitions */
+#define ITM_ITCTRL_IME_Pos                  0U                                            /*!< ITM ITCTRL: IME Position */
+#define ITM_ITCTRL_IME_Msk                 (0x1UL /*<< ITM_ITCTRL_IME_Pos*/)              /*!< ITM ITCTRL: IME Mask */
 
 /*@}*/ /* end of group CMSIS_ITM */
 
@@ -1190,66 +1248,34 @@ typedef struct
   __IOM uint32_t COMP1;                  /*!< Offset: 0x030 (R/W)  Comparator Register 1 */
         uint32_t RESERVED3[1U];
   __IOM uint32_t FUNCTION1;              /*!< Offset: 0x038 (R/W)  Function Register 1 */
-        uint32_t RESERVED4[1U];
+  __IOM uint32_t VMASK1;                 /*!< Offset: 0x03C (R/W)  Comparator Value Mask 1 */
   __IOM uint32_t COMP2;                  /*!< Offset: 0x040 (R/W)  Comparator Register 2 */
-        uint32_t RESERVED5[1U];
+        uint32_t RESERVED4[1U];
   __IOM uint32_t FUNCTION2;              /*!< Offset: 0x048 (R/W)  Function Register 2 */
-        uint32_t RESERVED6[1U];
+        uint32_t RESERVED5[1U];
   __IOM uint32_t COMP3;                  /*!< Offset: 0x050 (R/W)  Comparator Register 3 */
-        uint32_t RESERVED7[1U];
+        uint32_t RESERVED6[1U];
   __IOM uint32_t FUNCTION3;              /*!< Offset: 0x058 (R/W)  Function Register 3 */
-        uint32_t RESERVED8[1U];
+  __IOM uint32_t VMASK3;                 /*!< Offset: 0x05C (R/W)  Comparator Value Mask 3 */
   __IOM uint32_t COMP4;                  /*!< Offset: 0x060 (R/W)  Comparator Register 4 */
-        uint32_t RESERVED9[1U];
+        uint32_t RESERVED7[1U];
   __IOM uint32_t FUNCTION4;              /*!< Offset: 0x068 (R/W)  Function Register 4 */
-        uint32_t RESERVED10[1U];
+        uint32_t RESERVED8[1U];
   __IOM uint32_t COMP5;                  /*!< Offset: 0x070 (R/W)  Comparator Register 5 */
-        uint32_t RESERVED11[1U];
+        uint32_t RESERVED9[1U];
   __IOM uint32_t FUNCTION5;              /*!< Offset: 0x078 (R/W)  Function Register 5 */
-        uint32_t RESERVED12[1U];
+        uint32_t RESERVED10[1U];
   __IOM uint32_t COMP6;                  /*!< Offset: 0x080 (R/W)  Comparator Register 6 */
-        uint32_t RESERVED13[1U];
+        uint32_t RESERVED11[1U];
   __IOM uint32_t FUNCTION6;              /*!< Offset: 0x088 (R/W)  Function Register 6 */
-        uint32_t RESERVED14[1U];
+        uint32_t RESERVED12[1U];
   __IOM uint32_t COMP7;                  /*!< Offset: 0x090 (R/W)  Comparator Register 7 */
-        uint32_t RESERVED15[1U];
+        uint32_t RESERVED13[1U];
   __IOM uint32_t FUNCTION7;              /*!< Offset: 0x098 (R/W)  Function Register 7 */
-        uint32_t RESERVED16[1U];
-  __IOM uint32_t COMP8;                  /*!< Offset: 0x0A0 (R/W)  Comparator Register 8 */
-        uint32_t RESERVED17[1U];
-  __IOM uint32_t FUNCTION8;              /*!< Offset: 0x0A8 (R/W)  Function Register 8 */
-        uint32_t RESERVED18[1U];
-  __IOM uint32_t COMP9;                  /*!< Offset: 0x0B0 (R/W)  Comparator Register 9 */
-        uint32_t RESERVED19[1U];
-  __IOM uint32_t FUNCTION9;              /*!< Offset: 0x0B8 (R/W)  Function Register 9 */
-        uint32_t RESERVED20[1U];
-  __IOM uint32_t COMP10;                 /*!< Offset: 0x0C0 (R/W)  Comparator Register 10 */
-        uint32_t RESERVED21[1U];
-  __IOM uint32_t FUNCTION10;             /*!< Offset: 0x0C8 (R/W)  Function Register 10 */
-        uint32_t RESERVED22[1U];
-  __IOM uint32_t COMP11;                 /*!< Offset: 0x0D0 (R/W)  Comparator Register 11 */
-        uint32_t RESERVED23[1U];
-  __IOM uint32_t FUNCTION11;             /*!< Offset: 0x0D8 (R/W)  Function Register 11 */
-        uint32_t RESERVED24[1U];
-  __IOM uint32_t COMP12;                 /*!< Offset: 0x0E0 (R/W)  Comparator Register 12 */
-        uint32_t RESERVED25[1U];
-  __IOM uint32_t FUNCTION12;             /*!< Offset: 0x0E8 (R/W)  Function Register 12 */
-        uint32_t RESERVED26[1U];
-  __IOM uint32_t COMP13;                 /*!< Offset: 0x0F0 (R/W)  Comparator Register 13 */
-        uint32_t RESERVED27[1U];
-  __IOM uint32_t FUNCTION13;             /*!< Offset: 0x0F8 (R/W)  Function Register 13 */
-        uint32_t RESERVED28[1U];
-  __IOM uint32_t COMP14;                 /*!< Offset: 0x100 (R/W)  Comparator Register 14 */
-        uint32_t RESERVED29[1U];
-  __IOM uint32_t FUNCTION14;             /*!< Offset: 0x108 (R/W)  Function Register 14 */
-        uint32_t RESERVED30[1U];
-  __IOM uint32_t COMP15;                 /*!< Offset: 0x110 (R/W)  Comparator Register 15 */
-        uint32_t RESERVED31[1U];
-  __IOM uint32_t FUNCTION15;             /*!< Offset: 0x118 (R/W)  Function Register 15 */
-        uint32_t RESERVED32[934U];
-  __IM  uint32_t LSR;                    /*!< Offset: 0xFB4 (R  )  Lock Status Register */
-        uint32_t RESERVED33[1U];
-  __IM  uint32_t DEVARCH;                /*!< Offset: 0xFBC (R/ )  Device Architecture Register */
+        uint32_t RESERVED14[968U];
+  __IM  uint32_t DEVARCH;                /*!< Offset: 0xFBC (R/ )  Device Type Architecture Register */
+        uint32_t RESERVED15[3U];
+  __IM  uint32_t DEVTYPE;                /*!< Offset: 0xFCC (R/ )  Device Type Identifier Register */
 } DWT_Type;
 
 /* DWT Control Register Definitions */
@@ -1341,7 +1367,7 @@ typedef struct
 #define DWT_FUNCTION_DATAVSIZE_Msk         (0x3UL << DWT_FUNCTION_DATAVSIZE_Pos)       /*!< DWT FUNCTION: DATAVSIZE Mask */
 
 #define DWT_FUNCTION_ACTION_Pos             4U                                         /*!< DWT FUNCTION: ACTION Position */
-#define DWT_FUNCTION_ACTION_Msk            (0x1UL << DWT_FUNCTION_ACTION_Pos)          /*!< DWT FUNCTION: ACTION Mask */
+#define DWT_FUNCTION_ACTION_Msk            (0x3UL << DWT_FUNCTION_ACTION_Pos)          /*!< DWT FUNCTION: ACTION Mask */
 
 #define DWT_FUNCTION_MATCH_Pos              0U                                         /*!< DWT FUNCTION: MATCH Position */
 #define DWT_FUNCTION_MATCH_Msk             (0xFUL /*<< DWT_FUNCTION_MATCH_Pos*/)       /*!< DWT FUNCTION: MATCH Mask */
@@ -1349,6 +1375,456 @@ typedef struct
 /*@}*/ /* end of group CMSIS_DWT */
 
 
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup MemSysCtl_Type     Memory System Control Registers (IMPLEMENTATION DEFINED)
+  \brief    Type definitions for the Memory System Control Registers (MEMSYSCTL)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Memory System Control Registers (MEMSYSCTL).
+ */
+typedef struct
+{
+  __IOM uint32_t MSCR;                   /*!< Offset: 0x000 (R/W)  Memory System Control Register */
+  __IOM uint32_t PFCR;                   /*!< Offset: 0x004 (R/W)  Prefetcher Control Register */
+        uint32_t RESERVED1[2U];
+  __IOM uint32_t ITCMCR;                 /*!< Offset: 0x010 (R/W)  ITCM Control Register */
+  __IOM uint32_t DTCMCR;                 /*!< Offset: 0x014 (R/W)  DTCM Control Register */
+  __IOM uint32_t PAHBCR;                 /*!< Offset: 0x018 (R/W)  P-AHB Control Register */
+        uint32_t RESERVED2[313U];
+  __IOM uint32_t ITGU_CTRL;              /*!< Offset: 0x500 (R/W)  ITGU Control Register */
+  __IOM uint32_t ITGU_CFG;               /*!< Offset: 0x504 (R/W)  ITGU Configuration Register */
+        uint32_t RESERVED3[2U];
+  __IOM uint32_t ITGU_LUT[16U];          /*!< Offset: 0x510 (R/W)  ITGU Look Up Table Register */
+        uint32_t RESERVED4[44U];
+  __IOM uint32_t DTGU_CTRL;              /*!< Offset: 0x600 (R/W)  DTGU Control Registers */
+  __IOM uint32_t DTGU_CFG;               /*!< Offset: 0x604 (R/W)  DTGU Configuration Register */
+        uint32_t RESERVED5[2U];
+  __IOM uint32_t DTGU_LUT[16U];          /*!< Offset: 0x610 (R/W)  DTGU Look Up Table Register */
+} MemSysCtl_Type;
+
+/* MEMSYSCTL Memory System Control Register (MSCR) Register Definitions */
+#define MEMSYSCTL_MSCR_CPWRDN_Pos          17U                                         /*!< MEMSYSCTL MSCR: CPWRDN Position */
+#define MEMSYSCTL_MSCR_CPWRDN_Msk          (0x1UL << MEMSYSCTL_MSCR_CPWRDN_Pos)        /*!< MEMSYSCTL MSCR: CPWRDN Mask */
+
+#define MEMSYSCTL_MSCR_DCCLEAN_Pos         16U                                         /*!< MEMSYSCTL MSCR: DCCLEAN Position */
+#define MEMSYSCTL_MSCR_DCCLEAN_Msk         (0x1UL << MEMSYSCTL_MSCR_DCCLEAN_Pos)       /*!< MEMSYSCTL MSCR: DCCLEAN Mask */
+
+#define MEMSYSCTL_MSCR_ICACTIVE_Pos        13U                                         /*!< MEMSYSCTL MSCR: ICACTIVE Position */
+#define MEMSYSCTL_MSCR_ICACTIVE_Msk        (0x1UL << MEMSYSCTL_MSCR_ICACTIVE_Pos)      /*!< MEMSYSCTL MSCR: ICACTIVE Mask */
+
+#define MEMSYSCTL_MSCR_DCACTIVE_Pos        12U                                         /*!< MEMSYSCTL MSCR: DCACTIVE Position */
+#define MEMSYSCTL_MSCR_DCACTIVE_Msk        (0x1UL << MEMSYSCTL_MSCR_DCACTIVE_Pos)      /*!< MEMSYSCTL MSCR: DCACTIVE Mask */
+
+#define MEMSYSCTL_MSCR_TECCCHKDIS_Pos       4U                                         /*!< MEMSYSCTL MSCR: TECCCHKDIS Position */
+#define MEMSYSCTL_MSCR_TECCCHKDIS_Msk      (0x1UL << MEMSYSCTL_MSCR_TECCCHKDIS_Pos)    /*!< MEMSYSCTL MSCR: TECCCHKDIS Mask */
+
+#define MEMSYSCTL_MSCR_EVECCFAULT_Pos       3U                                         /*!< MEMSYSCTL MSCR: EVECCFAULT Position */
+#define MEMSYSCTL_MSCR_EVECCFAULT_Msk      (0x1UL << MEMSYSCTL_MSCR_EVECCFAULT_Pos)    /*!< MEMSYSCTL MSCR: EVECCFAULT Mask */
+
+#define MEMSYSCTL_MSCR_FORCEWT_Pos          2U                                         /*!< MEMSYSCTL MSCR: FORCEWT Position */
+#define MEMSYSCTL_MSCR_FORCEWT_Msk         (0x1UL << MEMSYSCTL_MSCR_FORCEWT_Pos)       /*!< MEMSYSCTL MSCR: FORCEWT Mask */
+
+#define MEMSYSCTL_MSCR_ECCEN_Pos            1U                                         /*!< MEMSYSCTL MSCR: ECCEN Position */
+#define MEMSYSCTL_MSCR_ECCEN_Msk           (0x1UL << MEMSYSCTL_MSCR_ECCEN_Pos)         /*!< MEMSYSCTL MSCR: ECCEN Mask */
+
+/* MEMSYSCTL Prefetcher Control Register (PFCR) Register Definitions */
+#define MEMSYSCTL_PFCR_MAX_OS_Pos           7U                                         /*!< MEMSYSCTL PFCR: MAX_OS Position */
+#define MEMSYSCTL_PFCR_MAX_OS_Msk          (0x7UL << MEMSYSCTL_PFCR_MAX_OS_Pos)        /*!< MEMSYSCTL PFCR: MAX_OS Mask */
+
+#define MEMSYSCTL_PFCR_MAX_LA_Pos           4U                                         /*!< MEMSYSCTL PFCR: MAX_LA Position */
+#define MEMSYSCTL_PFCR_MAX_LA_Msk          (0x7UL << MEMSYSCTL_PFCR_MAX_LA_Pos)        /*!< MEMSYSCTL PFCR: MAX_LA Mask */
+
+#define MEMSYSCTL_PFCR_MIN_LA_Pos           1U                                         /*!< MEMSYSCTL PFCR: MIN_LA Position */
+#define MEMSYSCTL_PFCR_MIN_LA_Msk          (0x7UL << MEMSYSCTL_PFCR_MIN_LA_Pos)        /*!< MEMSYSCTL PFCR: MIN_LA Mask */
+
+#define MEMSYSCTL_PFCR_ENABLE_Pos           0U                                         /*!< MEMSYSCTL PFCR: ENABLE Position */
+#define MEMSYSCTL_PFCR_ENABLE_Msk          (0x1UL /*<< MEMSYSCTL_PFCR_ENABLE_Pos*/)    /*!< MEMSYSCTL PFCR: ENABLE Mask */
+
+/* MEMSYSCTL ITCM Control Register (ITCMCR) Register Definitions */
+#define MEMSYSCTL_ITCMCR_SZ_Pos             3U                                         /*!< MEMSYSCTL ITCMCR: SZ Position */
+#define MEMSYSCTL_ITCMCR_SZ_Msk            (0xFUL << MEMSYSCTL_ITCMCR_SZ_Pos)          /*!< MEMSYSCTL ITCMCR: SZ Mask */
+
+#define MEMSYSCTL_ITCMCR_EN_Pos             0U                                         /*!< MEMSYSCTL ITCMCR: EN Position */
+#define MEMSYSCTL_ITCMCR_EN_Msk            (0x1UL /*<< MEMSYSCTL_ITCMCR_EN_Pos*/)      /*!< MEMSYSCTL ITCMCR: EN Mask */
+
+/* MEMSYSCTL DTCM Control Register (DTCMCR) Register Definitions */
+#define MEMSYSCTL_DTCMCR_SZ_Pos             3U                                         /*!< MEMSYSCTL DTCMCR: SZ Position */
+#define MEMSYSCTL_DTCMCR_SZ_Msk            (0xFUL << MEMSYSCTL_DTCMCR_SZ_Pos)          /*!< MEMSYSCTL DTCMCR: SZ Mask */
+
+#define MEMSYSCTL_DTCMCR_EN_Pos             0U                                         /*!< MEMSYSCTL DTCMCR: EN Position */
+#define MEMSYSCTL_DTCMCR_EN_Msk            (0x1UL /*<< MEMSYSCTL_DTCMCR_EN_Pos*/)      /*!< MEMSYSCTL DTCMCR: EN Mask */
+
+/* MEMSYSCTL P-AHB Control Register (PAHBCR) Register Definitions */
+#define MEMSYSCTL_PAHBCR_SZ_Pos             1U                                         /*!< MEMSYSCTL PAHBCR: SZ Position */
+#define MEMSYSCTL_PAHBCR_SZ_Msk            (0x7UL << MEMSYSCTL_PAHBCR_SZ_Pos)          /*!< MEMSYSCTL PAHBCR: SZ Mask */
+
+#define MEMSYSCTL_PAHBCR_EN_Pos             0U                                         /*!< MEMSYSCTL PAHBCR: EN Position */
+#define MEMSYSCTL_PAHBCR_EN_Msk            (0x1UL /*<< MEMSYSCTL_PAHBCR_EN_Pos*/)      /*!< MEMSYSCTL PAHBCR: EN Mask */
+
+/* MEMSYSCTL ITGU Control Register (ITGU_CTRL) Register Definitions */
+#define MEMSYSCTL_ITGU_CTRL_DEREN_Pos       1U                                         /*!< MEMSYSCTL ITGU_CTRL: DEREN Position */
+#define MEMSYSCTL_ITGU_CTRL_DEREN_Msk      (0x1UL << MEMSYSCTL_ITGU_CTRL_DEREN_Pos)    /*!< MEMSYSCTL ITGU_CTRL: DEREN Mask */
+
+#define MEMSYSCTL_ITGU_CTRL_DBFEN_Pos       0U                                         /*!< MEMSYSCTL ITGU_CTRL: DBFEN Position */
+#define MEMSYSCTL_ITGU_CTRL_DBFEN_Msk      (0x1UL /*<< MEMSYSCTL_ITGU_CTRL_DBFEN_Pos*/) /*!< MEMSYSCTL ITGU_CTRL: DBFEN Mask */
+
+/* MEMSYSCTL ITGU Configuration Register (ITGU_CFG) Register Definitions */
+#define MEMSYSCTL_ITGU_CFG_PRESENT_Pos     31U                                         /*!< MEMSYSCTL ITGU_CFG: PRESENT Position */
+#define MEMSYSCTL_ITGU_CFG_PRESENT_Msk     (0x1UL << MEMSYSCTL_ITGU_CFG_PRESENT_Pos)   /*!< MEMSYSCTL ITGU_CFG: PRESENT Mask */
+
+#define MEMSYSCTL_ITGU_CFG_NUMBLKS_Pos      8U                                         /*!< MEMSYSCTL ITGU_CFG: NUMBLKS Position */
+#define MEMSYSCTL_ITGU_CFG_NUMBLKS_Msk     (0xFUL << MEMSYSCTL_ITGU_CFG_NUMBLKS_Pos)   /*!< MEMSYSCTL ITGU_CFG: NUMBLKS Mask */
+
+#define MEMSYSCTL_ITGU_CFG_BLKSZ_Pos        0U                                         /*!< MEMSYSCTL ITGU_CFG: BLKSZ Position */
+#define MEMSYSCTL_ITGU_CFG_BLKSZ_Msk       (0xFUL /*<< MEMSYSCTL_ITGU_CFG_BLKSZ_Pos*/) /*!< MEMSYSCTL ITGU_CFG: BLKSZ Mask */
+
+/* MEMSYSCTL DTGU Control Registers (DTGU_CTRL) Register Definitions */
+#define MEMSYSCTL_DTGU_CTRL_DEREN_Pos       1U                                         /*!< MEMSYSCTL DTGU_CTRL: DEREN Position */
+#define MEMSYSCTL_DTGU_CTRL_DEREN_Msk      (0x1UL << MEMSYSCTL_DTGU_CTRL_DEREN_Pos)    /*!< MEMSYSCTL DTGU_CTRL: DEREN Mask */
+
+#define MEMSYSCTL_DTGU_CTRL_DBFEN_Pos       0U                                         /*!< MEMSYSCTL DTGU_CTRL: DBFEN Position */
+#define MEMSYSCTL_DTGU_CTRL_DBFEN_Msk      (0x1UL /*<< MEMSYSCTL_DTGU_CTRL_DBFEN_Pos*/) /*!< MEMSYSCTL DTGU_CTRL: DBFEN Mask */
+
+/* MEMSYSCTL DTGU Configuration Register (DTGU_CFG) Register Definitions */
+#define MEMSYSCTL_DTGU_CFG_PRESENT_Pos     31U                                         /*!< MEMSYSCTL DTGU_CFG: PRESENT Position */
+#define MEMSYSCTL_DTGU_CFG_PRESENT_Msk     (0x1UL << MEMSYSCTL_DTGU_CFG_PRESENT_Pos)   /*!< MEMSYSCTL DTGU_CFG: PRESENT Mask */
+
+#define MEMSYSCTL_DTGU_CFG_NUMBLKS_Pos      8U                                         /*!< MEMSYSCTL DTGU_CFG: NUMBLKS Position */
+#define MEMSYSCTL_DTGU_CFG_NUMBLKS_Msk     (0xFUL << MEMSYSCTL_DTGU_CFG_NUMBLKS_Pos)   /*!< MEMSYSCTL DTGU_CFG: NUMBLKS Mask */
+
+#define MEMSYSCTL_DTGU_CFG_BLKSZ_Pos        0U                                         /*!< MEMSYSCTL DTGU_CFG: BLKSZ Position */
+#define MEMSYSCTL_DTGU_CFG_BLKSZ_Msk       (0xFUL /*<< MEMSYSCTL_DTGU_CFG_BLKSZ_Pos*/) /*!< MEMSYSCTL DTGU_CFG: BLKSZ Mask */
+
+
+/*@}*/ /* end of group MemSysCtl_Type */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup PwrModCtl_Type     Power Mode Control Registers
+  \brief    Type definitions for the Power Mode Control Registers (PWRMODCTL)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Power Mode Control Registers (PWRMODCTL).
+ */
+typedef struct
+{
+  __IOM uint32_t CPDLPSTATE;             /*!< Offset: 0x000 (R/W)  Core Power Domain Low Power State Register */
+  __IOM uint32_t DPDLPSTATE;             /*!< Offset: 0x004 (R/W)  Debug Power Domain Low Power State Register */
+} PwrModCtl_Type;
+
+/* PWRMODCTL Core Power Domain Low Power State (CPDLPSTATE) Register Definitions */
+#define PWRMODCTL_CPDLPSTATE_RLPSTATE_Pos   8U                                              /*!< PWRMODCTL CPDLPSTATE: RLPSTATE Position */
+#define PWRMODCTL_CPDLPSTATE_RLPSTATE_Msk  (0x3UL << PWRMODCTL_CPDLPSTATE_RLPSTATE_Pos)     /*!< PWRMODCTL CPDLPSTATE: RLPSTATE Mask */
+
+#define PWRMODCTL_CPDLPSTATE_ELPSTATE_Pos   4U                                              /*!< PWRMODCTL CPDLPSTATE: ELPSTATE Position */
+#define PWRMODCTL_CPDLPSTATE_ELPSTATE_Msk  (0x3UL << PWRMODCTL_CPDLPSTATE_ELPSTATE_Pos)     /*!< PWRMODCTL CPDLPSTATE: ELPSTATE Mask */
+
+#define PWRMODCTL_CPDLPSTATE_CLPSTATE_Pos   0U                                              /*!< PWRMODCTL CPDLPSTATE: CLPSTATE Position */
+#define PWRMODCTL_CPDLPSTATE_CLPSTATE_Msk  (0x3UL /*<< PWRMODCTL_CPDLPSTATE_CLPSTATE_Pos*/) /*!< PWRMODCTL CPDLPSTATE: CLPSTATE Mask */
+
+/* PWRMODCTL Debug Power Domain Low Power State (DPDLPSTATE) Register Definitions */
+#define PWRMODCTL_DPDLPSTATE_DLPSTATE_Pos   0U                                              /*!< PWRMODCTL DPDLPSTATE: DLPSTATE Position */
+#define PWRMODCTL_DPDLPSTATE_DLPSTATE_Msk  (0x3UL /*<< PWRMODCTL_DPDLPSTATE_DLPSTATE_Pos*/) /*!< PWRMODCTL DPDLPSTATE: DLPSTATE Mask */
+
+/*@}*/ /* end of group PwrModCtl_Type */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup EWIC_Type     External Wakeup Interrupt Controller Registers
+  \brief    Type definitions for the External Wakeup Interrupt Controller Registers (EWIC)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the External Wakeup Interrupt Controller Registers (EWIC).
+ */
+typedef struct
+{
+  __OM  uint32_t EVENTSPR;               /*!< Offset: 0x000 ( /W)  Event Set Pending Register */
+        uint32_t RESERVED0[31U];
+  __IM  uint32_t EVENTMASKA;             /*!< Offset: 0x080 (R/W)  Event Mask A Register */
+  __IM  uint32_t EVENTMASK[15];          /*!< Offset: 0x084 (R/W)  Event Mask Register */
+} EWIC_Type;
+
+/* EWIC External Wakeup Interrupt Controller (EVENTSPR) Register Definitions */
+#define EWIC_EVENTSPR_EDBGREQ_Pos   2U                                                 /*!< EWIC EVENTSPR: EDBGREQ Position */
+#define EWIC_EVENTSPR_EDBGREQ_Msk  (0x1UL << EWIC_EVENTSPR_EDBGREQ_Pos)                /*!< EWIC EVENTSPR: EDBGREQ Mask */
+
+#define EWIC_EVENTSPR_NMI_Pos   1U                                                     /*!< EWIC EVENTSPR: NMI Position */
+#define EWIC_EVENTSPR_NMI_Msk  (0x1UL << EWIC_EVENTSPR_NMI_Pos)                        /*!< EWIC EVENTSPR: NMI Mask */
+
+#define EWIC_EVENTSPR_EVENT_Pos   0U                                                   /*!< EWIC EVENTSPR: EVENT Position */
+#define EWIC_EVENTSPR_EVENT_Msk  (0x1UL /*<< EWIC_EVENTSPR_EVENT_Pos*/)                /*!< EWIC EVENTSPR: EVENT Mask */
+
+/* EWIC External Wakeup Interrupt Controller (EVENTMASKA) Register Definitions */
+#define EWIC_EVENTMASKA_EDBGREQ_Pos   2U                                               /*!< EWIC EVENTMASKA: EDBGREQ Position */
+#define EWIC_EVENTMASKA_EDBGREQ_Msk  (0x1UL << EWIC_EVENTMASKA_EDBGREQ_Pos)            /*!< EWIC EVENTMASKA: EDBGREQ Mask */
+
+#define EWIC_EVENTMASKA_NMI_Pos   1U                                                   /*!< EWIC EVENTMASKA: NMI Position */
+#define EWIC_EVENTMASKA_NMI_Msk  (0x1UL << EWIC_EVENTMASKA_NMI_Pos)                    /*!< EWIC EVENTMASKA: NMI Mask */
+
+#define EWIC_EVENTMASKA_EVENT_Pos   0U                                                 /*!< EWIC EVENTMASKA: EVENT Position */
+#define EWIC_EVENTMASKA_EVENT_Msk  (0x1UL /*<< EWIC_EVENTMASKA_EVENT_Pos*/)            /*!< EWIC EVENTMASKA: EVENT Mask */
+
+/* EWIC External Wakeup Interrupt Controller (EVENTMASK) Register Definitions */
+#define EWIC_EVENTMASK_IRQ_Pos   0U                                                    /*!< EWIC EVENTMASKA: IRQ Position */
+#define EWIC_EVENTMASK_IRQ_Msk  (0xFFFFFFFFUL /*<< EWIC_EVENTMASKA_IRQ_Pos*/)          /*!< EWIC EVENTMASKA: IRQ Mask */
+
+/*@}*/ /* end of group EWIC_Type */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup ErrBnk_Type     Error Banking Registers (IMPLEMENTATION DEFINED)
+  \brief    Type definitions for the Error Banking Registers (ERRBNK)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Error Banking Registers (ERRBNK).
+ */
+typedef struct
+{
+  __IOM uint32_t IEBR0;                  /*!< Offset: 0x000 (R/W)  Instruction Cache Error Bank Register 0 */
+  __IOM uint32_t IEBR1;                  /*!< Offset: 0x004 (R/W)  Instruction Cache Error Bank Register 1 */
+        uint32_t RESERVED0[2U];
+  __IOM uint32_t DEBR0;                  /*!< Offset: 0x010 (R/W)  Data Cache Error Bank Register 0 */
+  __IOM uint32_t DEBR1;                  /*!< Offset: 0x014 (R/W)  Data Cache Error Bank Register 1 */
+        uint32_t RESERVED1[2U];
+  __IOM uint32_t TEBR0;                  /*!< Offset: 0x020 (R/W)  TCM Error Bank Register 0 */
+        uint32_t RESERVED2[1U];
+  __IOM uint32_t TEBR1;                  /*!< Offset: 0x028 (R/W)  TCM Error Bank Register 1 */
+} ErrBnk_Type;
+
+/* ERRBNK Instruction Cache Error Bank Register 0 (IEBR0) Register Definitions */
+#define ERRBNK_IEBR0_SWDEF_Pos             30U                                         /*!< ERRBNK IEBR0: SWDEF Position */
+#define ERRBNK_IEBR0_SWDEF_Msk             (0x3UL << ERRBNK_IEBR0_SWDEF_Pos)           /*!< ERRBNK IEBR0: SWDEF Mask */
+
+#define ERRBNK_IEBR0_BANK_Pos              16U                                         /*!< ERRBNK IEBR0: BANK Position */
+#define ERRBNK_IEBR0_BANK_Msk              (0x1UL << ERRBNK_IEBR0_BANK_Pos)            /*!< ERRBNK IEBR0: BANK Mask */
+
+#define ERRBNK_IEBR0_LOCATION_Pos           2U                                         /*!< ERRBNK IEBR0: LOCATION Position */
+#define ERRBNK_IEBR0_LOCATION_Msk          (0x3FFFUL << ERRBNK_IEBR0_LOCATION_Pos)     /*!< ERRBNK IEBR0: LOCATION Mask */
+
+#define ERRBNK_IEBR0_LOCKED_Pos             1U                                         /*!< ERRBNK IEBR0: LOCKED Position */
+#define ERRBNK_IEBR0_LOCKED_Msk            (0x1UL << ERRBNK_IEBR0_LOCKED_Pos)          /*!< ERRBNK IEBR0: LOCKED Mask */
+
+#define ERRBNK_IEBR0_VALID_Pos              0U                                         /*!< ERRBNK IEBR0: VALID Position */
+#define ERRBNK_IEBR0_VALID_Msk             (0x1UL << /*ERRBNK_IEBR0_VALID_Pos*/)       /*!< ERRBNK IEBR0: VALID Mask */
+
+/* ERRBNK Instruction Cache Error Bank Register 1 (IEBR1) Register Definitions */
+#define ERRBNK_IEBR1_SWDEF_Pos             30U                                         /*!< ERRBNK IEBR1: SWDEF Position */
+#define ERRBNK_IEBR1_SWDEF_Msk             (0x3UL << ERRBNK_IEBR1_SWDEF_Pos)           /*!< ERRBNK IEBR1: SWDEF Mask */
+
+#define ERRBNK_IEBR1_BANK_Pos              16U                                         /*!< ERRBNK IEBR1: BANK Position */
+#define ERRBNK_IEBR1_BANK_Msk              (0x1UL << ERRBNK_IEBR1_BANK_Pos)            /*!< ERRBNK IEBR1: BANK Mask */
+
+#define ERRBNK_IEBR1_LOCATION_Pos           2U                                         /*!< ERRBNK IEBR1: LOCATION Position */
+#define ERRBNK_IEBR1_LOCATION_Msk          (0x3FFFUL << ERRBNK_IEBR1_LOCATION_Pos)     /*!< ERRBNK IEBR1: LOCATION Mask */
+
+#define ERRBNK_IEBR1_LOCKED_Pos             1U                                         /*!< ERRBNK IEBR1: LOCKED Position */
+#define ERRBNK_IEBR1_LOCKED_Msk            (0x1UL << ERRBNK_IEBR1_LOCKED_Pos)          /*!< ERRBNK IEBR1: LOCKED Mask */
+
+#define ERRBNK_IEBR1_VALID_Pos              0U                                         /*!< ERRBNK IEBR1: VALID Position */
+#define ERRBNK_IEBR1_VALID_Msk             (0x1UL << /*ERRBNK_IEBR1_VALID_Pos*/)       /*!< ERRBNK IEBR1: VALID Mask */
+
+/* ERRBNK Data Cache Error Bank Register 0 (DEBR0) Register Definitions */
+#define ERRBNK_DEBR0_SWDEF_Pos             30U                                         /*!< ERRBNK DEBR0: SWDEF Position */
+#define ERRBNK_DEBR0_SWDEF_Msk             (0x3UL << ERRBNK_DEBR0_SWDEF_Pos)           /*!< ERRBNK DEBR0: SWDEF Mask */
+
+#define ERRBNK_DEBR0_TYPE_Pos              17U                                         /*!< ERRBNK DEBR0: TYPE Position */
+#define ERRBNK_DEBR0_TYPE_Msk              (0x1UL << ERRBNK_DEBR0_TYPE_Pos)            /*!< ERRBNK DEBR0: TYPE Mask */
+
+#define ERRBNK_DEBR0_BANK_Pos              16U                                         /*!< ERRBNK DEBR0: BANK Position */
+#define ERRBNK_DEBR0_BANK_Msk              (0x1UL << ERRBNK_DEBR0_BANK_Pos)            /*!< ERRBNK DEBR0: BANK Mask */
+
+#define ERRBNK_DEBR0_LOCATION_Pos           2U                                         /*!< ERRBNK DEBR0: LOCATION Position */
+#define ERRBNK_DEBR0_LOCATION_Msk          (0x3FFFUL << ERRBNK_DEBR0_LOCATION_Pos)     /*!< ERRBNK DEBR0: LOCATION Mask */
+
+#define ERRBNK_DEBR0_LOCKED_Pos             1U                                         /*!< ERRBNK DEBR0: LOCKED Position */
+#define ERRBNK_DEBR0_LOCKED_Msk            (0x1UL << ERRBNK_DEBR0_LOCKED_Pos)          /*!< ERRBNK DEBR0: LOCKED Mask */
+
+#define ERRBNK_DEBR0_VALID_Pos              0U                                         /*!< ERRBNK DEBR0: VALID Position */
+#define ERRBNK_DEBR0_VALID_Msk             (0x1UL << /*ERRBNK_DEBR0_VALID_Pos*/)       /*!< ERRBNK DEBR0: VALID Mask */
+
+/* ERRBNK Data Cache Error Bank Register 1 (DEBR1) Register Definitions */
+#define ERRBNK_DEBR1_SWDEF_Pos             30U                                         /*!< ERRBNK DEBR1: SWDEF Position */
+#define ERRBNK_DEBR1_SWDEF_Msk             (0x3UL << ERRBNK_DEBR1_SWDEF_Pos)           /*!< ERRBNK DEBR1: SWDEF Mask */
+
+#define ERRBNK_DEBR1_TYPE_Pos              17U                                         /*!< ERRBNK DEBR1: TYPE Position */
+#define ERRBNK_DEBR1_TYPE_Msk              (0x1UL << ERRBNK_DEBR1_TYPE_Pos)            /*!< ERRBNK DEBR1: TYPE Mask */
+
+#define ERRBNK_DEBR1_BANK_Pos              16U                                         /*!< ERRBNK DEBR1: BANK Position */
+#define ERRBNK_DEBR1_BANK_Msk              (0x1UL << ERRBNK_DEBR1_BANK_Pos)            /*!< ERRBNK DEBR1: BANK Mask */
+
+#define ERRBNK_DEBR1_LOCATION_Pos           2U                                         /*!< ERRBNK DEBR1: LOCATION Position */
+#define ERRBNK_DEBR1_LOCATION_Msk          (0x3FFFUL << ERRBNK_DEBR1_LOCATION_Pos)     /*!< ERRBNK DEBR1: LOCATION Mask */
+
+#define ERRBNK_DEBR1_LOCKED_Pos             1U                                         /*!< ERRBNK DEBR1: LOCKED Position */
+#define ERRBNK_DEBR1_LOCKED_Msk            (0x1UL << ERRBNK_DEBR1_LOCKED_Pos)          /*!< ERRBNK DEBR1: LOCKED Mask */
+
+#define ERRBNK_DEBR1_VALID_Pos              0U                                         /*!< ERRBNK DEBR1: VALID Position */
+#define ERRBNK_DEBR1_VALID_Msk             (0x1UL << /*ERRBNK_DEBR1_VALID_Pos*/)       /*!< ERRBNK DEBR1: VALID Mask */
+
+/* ERRBNK TCM Error Bank Register 0 (TEBR0) Register Definitions */
+#define ERRBNK_TEBR0_SWDEF_Pos             30U                                         /*!< ERRBNK TEBR0: SWDEF Position */
+#define ERRBNK_TEBR0_SWDEF_Msk             (0x3UL << ERRBNK_TEBR0_SWDEF_Pos)           /*!< ERRBNK TEBR0: SWDEF Mask */
+
+#define ERRBNK_TEBR0_POISON_Pos            28U                                         /*!< ERRBNK TEBR0: POISON Position */
+#define ERRBNK_TEBR0_POISON_Msk            (0x1UL << ERRBNK_TEBR0_POISON_Pos)          /*!< ERRBNK TEBR0: POISON Mask */
+
+#define ERRBNK_TEBR0_TYPE_Pos              27U                                         /*!< ERRBNK TEBR0: TYPE Position */
+#define ERRBNK_TEBR0_TYPE_Msk              (0x1UL << ERRBNK_TEBR0_TYPE_Pos)            /*!< ERRBNK TEBR0: TYPE Mask */
+
+#define ERRBNK_TEBR0_BANK_Pos              24U                                         /*!< ERRBNK TEBR0: BANK Position */
+#define ERRBNK_TEBR0_BANK_Msk              (0x3UL << ERRBNK_TEBR0_BANK_Pos)            /*!< ERRBNK TEBR0: BANK Mask */
+
+#define ERRBNK_TEBR0_LOCATION_Pos           2U                                         /*!< ERRBNK TEBR0: LOCATION Position */
+#define ERRBNK_TEBR0_LOCATION_Msk          (0x3FFFFFUL << ERRBNK_TEBR0_LOCATION_Pos)   /*!< ERRBNK TEBR0: LOCATION Mask */
+
+#define ERRBNK_TEBR0_LOCKED_Pos             1U                                         /*!< ERRBNK TEBR0: LOCKED Position */
+#define ERRBNK_TEBR0_LOCKED_Msk            (0x1UL << ERRBNK_TEBR0_LOCKED_Pos)          /*!< ERRBNK TEBR0: LOCKED Mask */
+
+#define ERRBNK_TEBR0_VALID_Pos              0U                                         /*!< ERRBNK TEBR0: VALID Position */
+#define ERRBNK_TEBR0_VALID_Msk             (0x1UL << /*ERRBNK_TEBR0_VALID_Pos*/)       /*!< ERRBNK TEBR0: VALID Mask */
+
+/* ERRBNK TCM Error Bank Register 1 (TEBR1) Register Definitions */
+#define ERRBNK_TEBR1_SWDEF_Pos             30U                                         /*!< ERRBNK TEBR1: SWDEF Position */
+#define ERRBNK_TEBR1_SWDEF_Msk             (0x3UL << ERRBNK_TEBR1_SWDEF_Pos)           /*!< ERRBNK TEBR1: SWDEF Mask */
+
+#define ERRBNK_TEBR1_POISON_Pos            28U                                         /*!< ERRBNK TEBR1: POISON Position */
+#define ERRBNK_TEBR1_POISON_Msk            (0x1UL << ERRBNK_TEBR1_POISON_Pos)          /*!< ERRBNK TEBR1: POISON Mask */
+
+#define ERRBNK_TEBR1_TYPE_Pos              27U                                         /*!< ERRBNK TEBR1: TYPE Position */
+#define ERRBNK_TEBR1_TYPE_Msk              (0x1UL << ERRBNK_TEBR1_TYPE_Pos)            /*!< ERRBNK TEBR1: TYPE Mask */
+
+#define ERRBNK_TEBR1_BANK_Pos              24U                                         /*!< ERRBNK TEBR1: BANK Position */
+#define ERRBNK_TEBR1_BANK_Msk              (0x3UL << ERRBNK_TEBR1_BANK_Pos)            /*!< ERRBNK TEBR1: BANK Mask */
+
+#define ERRBNK_TEBR1_LOCATION_Pos           2U                                         /*!< ERRBNK TEBR1: LOCATION Position */
+#define ERRBNK_TEBR1_LOCATION_Msk          (0x3FFFFFUL << ERRBNK_TEBR1_LOCATION_Pos)   /*!< ERRBNK TEBR1: LOCATION Mask */
+
+#define ERRBNK_TEBR1_LOCKED_Pos             1U                                         /*!< ERRBNK TEBR1: LOCKED Position */
+#define ERRBNK_TEBR1_LOCKED_Msk            (0x1UL << ERRBNK_TEBR1_LOCKED_Pos)          /*!< ERRBNK TEBR1: LOCKED Mask */
+
+#define ERRBNK_TEBR1_VALID_Pos              0U                                         /*!< ERRBNK TEBR1: VALID Position */
+#define ERRBNK_TEBR1_VALID_Msk             (0x1UL << /*ERRBNK_TEBR1_VALID_Pos*/)       /*!< ERRBNK TEBR1: VALID Mask */
+
+/*@}*/ /* end of group ErrBnk_Type */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup PrcCfgInf_Type     Processor Configuration Information Registers (IMPLEMENTATION DEFINED)
+  \brief    Type definitions for the Processor Configuration Information Registerss (PRCCFGINF)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Processor Configuration Information Registerss (PRCCFGINF).
+ */
+typedef struct
+{
+  __OM  uint32_t CFGINFOSEL;             /*!< Offset: 0x000 ( /W)  Processor Configuration Information Selection Register */
+  __IM  uint32_t CFGINFORD;              /*!< Offset: 0x004 (R/ )  Processor Configuration Information Read Data Register */
+} PrcCfgInf_Type;
+
+/* PRCCFGINF Processor Configuration Information Selection Register (CFGINFOSEL) Definitions */
+
+/* PRCCFGINF Processor Configuration Information Read Data Register (CFGINFORD) Definitions */
+
+/*@}*/ /* end of group PrcCfgInf_Type */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup STL_Type     Software Test Library Observation Registers
+  \brief    Type definitions for the Software Test Library Observation Registerss (STL)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Software Test Library Observation Registerss (STL).
+ */
+typedef struct
+{
+  __IM  uint32_t STLNVICPENDOR;          /*!< Offset: 0x000 (R/ )  NVIC Pending Priority Tree Register */
+  __IM  uint32_t STLNVICACTVOR;          /*!< Offset: 0x004 (R/ )  NVIC Active Priority Tree Register */
+        uint32_t RESERVED0[2U];
+  __OM  uint32_t STLIDMPUSR;             /*!< Offset: 0x010 ( /W)  MPU Sanple Register */
+  __IM  uint32_t STLIMPUOR;              /*!< Offset: 0x014 (R/ )  MPU Region Hit Register */
+  __IM  uint32_t STLD0MPUOR;             /*!< Offset: 0x018 (R/ )  MPU Memory Attributes Register 0 */
+  __IM  uint32_t STLD1MPUOR;             /*!< Offset: 0x01C (R/ )  MPU Memory Attributes Register 1 */
+
+} STL_Type;
+
+/* STL Software Test Library Observation Register (STLNVICPENDOR) Definitions */
+#define STL_STLNVICPENDOR_VALID_Pos        18U                                         /*!< STL STLNVICPENDOR: VALID Position */
+#define STL_STLNVICPENDOR_VALID_Msk        (0x1UL << STL_STLNVICPENDOR_VALID_Pos)      /*!< STL STLNVICPENDOR: VALID Mask */
+
+#define STL_STLNVICPENDOR_TARGET_Pos       17U                                         /*!< STL STLNVICPENDOR: TARGET Position */
+#define STL_STLNVICPENDOR_TARGET_Msk       (0x1UL << STL_STLNVICPENDOR_TARGET_Pos)     /*!< STL STLNVICPENDOR: TARGET Mask */
+
+#define STL_STLNVICPENDOR_PRIORITY_Pos      9U                                         /*!< STL STLNVICPENDOR: PRIORITY Position */
+#define STL_STLNVICPENDOR_PRIORITY_Msk     (0xFFUL << STL_STLNVICPENDOR_PRIORITY_Pos)  /*!< STL STLNVICPENDOR: PRIORITY Mask */
+
+#define STL_STLNVICPENDOR_INTNUM_Pos        0U                                         /*!< STL STLNVICPENDOR: INTNUM Position */
+#define STL_STLNVICPENDOR_INTNUM_Msk       (0x1FFUL /*<< STL_STLNVICPENDOR_INTNUM_Pos*/) /*!< STL STLNVICPENDOR: INTNUM Mask */
+
+/* STL Software Test Library Observation Register (STLNVICACTVOR) Definitions */
+#define STL_STLNVICACTVOR_VALID_Pos        18U                                         /*!< STL STLNVICACTVOR: VALID Position */
+#define STL_STLNVICACTVOR_VALID_Msk        (0x1UL << STL_STLNVICACTVOR_VALID_Pos)      /*!< STL STLNVICACTVOR: VALID Mask */
+
+#define STL_STLNVICACTVOR_TARGET_Pos       17U                                         /*!< STL STLNVICACTVOR: TARGET Position */
+#define STL_STLNVICACTVOR_TARGET_Msk       (0x1UL << STL_STLNVICACTVOR_TARGET_Pos)     /*!< STL STLNVICACTVOR: TARGET Mask */
+
+#define STL_STLNVICACTVOR_PRIORITY_Pos      9U                                         /*!< STL STLNVICACTVOR: PRIORITY Position */
+#define STL_STLNVICACTVOR_PRIORITY_Msk     (0xFFUL << STL_STLNVICACTVOR_PRIORITY_Pos)  /*!< STL STLNVICACTVOR: PRIORITY Mask */
+
+#define STL_STLNVICACTVOR_INTNUM_Pos        0U                                         /*!< STL STLNVICACTVOR: INTNUM Position */
+#define STL_STLNVICACTVOR_INTNUM_Msk       (0x1FFUL /*<< STL_STLNVICACTVOR_INTNUM_Pos*/) /*!< STL STLNVICACTVOR: INTNUM Mask */
+
+/* STL Software Test Library Observation Register (STLIDMPUSR) Definitions */
+#define STL_STLIDMPUSR_ADDR_Pos             5U                                         /*!< STL STLIDMPUSR: ADDR Position */
+#define STL_STLIDMPUSR_ADDR_Msk            (0x7FFFFFFUL << STL_STLIDMPUSR_ADDR_Pos)    /*!< STL STLIDMPUSR: ADDR Mask */
+
+#define STL_STLIDMPUSR_INSTR_Pos            2U                                         /*!< STL STLIDMPUSR: INSTR Position */
+#define STL_STLIDMPUSR_INSTR_Msk           (0x1UL << STL_STLIDMPUSR_INSTR_Pos)         /*!< STL STLIDMPUSR: INSTR Mask */
+
+#define STL_STLIDMPUSR_DATA_Pos             1U                                         /*!< STL STLIDMPUSR: DATA Position */
+#define STL_STLIDMPUSR_DATA_Msk            (0x1UL << STL_STLIDMPUSR_DATA_Pos)          /*!< STL STLIDMPUSR: DATA Mask */
+
+/* STL Software Test Library Observation Register (STLIMPUOR) Definitions */
+#define STL_STLIMPUOR_HITREGION_Pos         9U                                         /*!< STL STLIMPUOR: HITREGION Position */
+#define STL_STLIMPUOR_HITREGION_Msk        (0xFFUL << STL_STLIMPUOR_HITREGION_Pos)     /*!< STL STLIMPUOR: HITREGION Mask */
+
+#define STL_STLIMPUOR_ATTR_Pos              0U                                         /*!< STL STLIMPUOR: ATTR Position */
+#define STL_STLIMPUOR_ATTR_Msk             (0x1FFUL /*<< STL_STLIMPUOR_ATTR_Pos*/)     /*!< STL STLIMPUOR: ATTR Mask */
+
+/* STL Software Test Library Observation Register (STLD0MPUOR) Definitions */
+#define STL_STLD0MPUOR_HITREGION_Pos        9U                                         /*!< STL STLD0MPUOR: HITREGION Position */
+#define STL_STLD0MPUOR_HITREGION_Msk       (0xFFUL << STL_STLD0MPUOR_HITREGION_Pos)    /*!< STL STLD0MPUOR: HITREGION Mask */
+
+#define STL_STLD0MPUOR_ATTR_Pos             0U                                         /*!< STL STLD0MPUOR: ATTR Position */
+#define STL_STLD0MPUOR_ATTR_Msk            (0x1FFUL /*<< STL_STLD0MPUOR_ATTR_Pos*/)    /*!< STL STLD0MPUOR: ATTR Mask */
+
+/* STL Software Test Library Observation Register (STLD1MPUOR) Definitions */
+#define STL_STLD1MPUOR_HITREGION_Pos        9U                                         /*!< STL STLD1MPUOR: HITREGION Position */
+#define STL_STLD1MPUOR_HITREGION_Msk       (0xFFUL << STL_STLD1MPUOR_HITREGION_Pos)    /*!< STL STLD1MPUOR: HITREGION Mask */
+
+#define STL_STLD1MPUOR_ATTR_Pos             0U                                         /*!< STL STLD1MPUOR: ATTR Position */
+#define STL_STLD1MPUOR_ATTR_Msk            (0x1FFUL /*<< STL_STLD1MPUOR_ATTR_Pos*/)    /*!< STL STLD1MPUOR: ATTR Mask */
+
+/*@}*/ /* end of group STL_Type */
+
+
 /**
   \ingroup  CMSIS_core_register
   \defgroup CMSIS_TPI     Trace Port Interface (TPI)
@@ -1490,15 +1966,14 @@ typedef struct
         uint32_t RESERVED11[108];
   __IOM uint32_t AUTHSTATUS;                        /*!< Offset: 0xFB8 (R/W)  PMU Authentication Status Register */
   __IOM uint32_t DEVARCH;                           /*!< Offset: 0xFBC (R/W)  PMU Device Architecture Register */
-        uint32_t RESERVED12[4];
+        uint32_t RESERVED12[3];
   __IOM uint32_t DEVTYPE;                           /*!< Offset: 0xFCC (R/W)  PMU Device Type Register */
   __IOM uint32_t PIDR4;                             /*!< Offset: 0xFD0 (R/W)  PMU Peripheral Identification Register 4 */
         uint32_t RESERVED13[3];
   __IOM uint32_t PIDR0;                             /*!< Offset: 0xFE0 (R/W)  PMU Peripheral Identification Register 0 */
-  __IOM uint32_t PIDR1;                             /*!< Offset: 0xFE0 (R/W)  PMU Peripheral Identification Register 1 */
-  __IOM uint32_t PIDR2;                             /*!< Offset: 0xFE0 (R/W)  PMU Peripheral Identification Register 2 */
-  __IOM uint32_t PIDR3;                             /*!< Offset: 0xFE0 (R/W)  PMU Peripheral Identification Register 3 */
-        uint32_t RESERVED14[3];
+  __IOM uint32_t PIDR1;                             /*!< Offset: 0xFE4 (R/W)  PMU Peripheral Identification Register 1 */
+  __IOM uint32_t PIDR2;                             /*!< Offset: 0xFE8 (R/W)  PMU Peripheral Identification Register 2 */
+  __IOM uint32_t PIDR3;                             /*!< Offset: 0xFEC (R/W)  PMU Peripheral Identification Register 3 */
   __IOM uint32_t CIDR0;                             /*!< Offset: 0xFF0 (R/W)  PMU Component Identification Register 0 */
   __IOM uint32_t CIDR1;                             /*!< Offset: 0xFF4 (R/W)  PMU Component Identification Register 1 */
   __IOM uint32_t CIDR2;                             /*!< Offset: 0xFF8 (R/W)  PMU Component Identification Register 2 */
@@ -2983,27 +3458,13 @@ typedef struct
  */
 typedef struct
 {
-  __OM  uint32_t DLAR;                   /*!< Offset: 0x000 ( /W)  SCS Software Lock Access Register */
-  __IM  uint32_t DLSR;                   /*!< Offset: 0x004 (R/ )  SCS Software Lock Status Register */
+        uint32_t RESERVED0[2U];
   __IM  uint32_t DAUTHSTATUS;            /*!< Offset: 0x008 (R/ )  Debug Authentication Status Register */
   __IM  uint32_t DDEVARCH;               /*!< Offset: 0x00C (R/ )  SCS Device Architecture Register */
-  __IM  uint32_t DDEVTYPE;               /*!< Offset: 0x010 (R/ )  SCS Device Type Register */
+        uint32_t RESERVED1[3U];
+  __IM  uint32_t DDEVTYPE;               /*!< Offset: 0x01C (R/ )  SCS Device Type Register */
 } DIB_Type;
 
-/* DLAR, SCS Software Lock Access Register Definitions */
-#define DIB_DLAR_KEY_Pos                    0U                                            /*!< DIB DLAR: KEY Position */
-#define DIB_DLAR_KEY_Msk                   (0xFFFFFFFFUL /*<< DIB_DLAR_KEY_Pos */)        /*!< DIB DLAR: KEY Mask */
-
-/* DLSR, SCS Software Lock Status Register Definitions */
-#define DIB_DLSR_nTT_Pos                    2U                                            /*!< DIB DLSR: Not thirty-two bit Position */
-#define DIB_DLSR_nTT_Msk                   (0x1UL << DIB_DLSR_nTT_Pos )                   /*!< DIB DLSR: Not thirty-two bit Mask */
-
-#define DIB_DLSR_SLK_Pos                    1U                                            /*!< DIB DLSR: Software Lock status Position */
-#define DIB_DLSR_SLK_Msk                   (0x1UL << DIB_DLSR_SLK_Pos )                   /*!< DIB DLSR: Software Lock status Mask */
-
-#define DIB_DLSR_SLI_Pos                    0U                                            /*!< DIB DLSR: Software Lock implemented Position */
-#define DIB_DLSR_SLI_Msk                   (0x1UL /*<< DIB_DLSR_SLI_Pos*/)                /*!< DIB DLSR: Software Lock implemented Mask */
-
 /* DAUTHSTATUS, Debug Authentication Status Register Definitions */
 #define DIB_DAUTHSTATUS_SUNID_Pos          22U                                            /*!< DIB DAUTHSTATUS: Secure Unprivileged Non-invasive Debug Allowed Position */
 #define DIB_DAUTHSTATUS_SUNID_Msk          (0x3UL << DIB_DAUTHSTATUS_SUNID_Pos )          /*!< DIB DAUTHSTATUS: Secure Unprivileged Non-invasive Debug Allowed Mask */
@@ -3093,6 +3554,12 @@ typedef struct
   #define SCS_BASE            (0xE000E000UL)                             /*!< System Control Space Base Address */
   #define ITM_BASE            (0xE0000000UL)                             /*!< ITM Base Address */
   #define DWT_BASE            (0xE0001000UL)                             /*!< DWT Base Address */
+  #define MEMSYSCTL_BASE      (0xE001E000UL)                             /*!< Memory System Control Base Address */
+  #define ERRBNK_BASE         (0xE001E100UL)                             /*!< Error Banking Base Address */
+  #define PWRMODCTL_BASE      (0xE001E300UL)                             /*!< Power Mode Control Base Address */
+  #define EWIC_BASE           (0xE001E400UL)                             /*!< External Wakeup Interrupt Controller Base Address */
+  #define PRCCFGINF_BASE      (0xE001E700UL)                             /*!< Processor Configuration Information Base Address */
+  #define STL_BASE            (0xE001E800UL)                             /*!< Software Test Library Base Address */
   #define TPI_BASE            (0xE0040000UL)                             /*!< TPI Base Address */
   #define CoreDebug_BASE      (0xE000EDF0UL)                             /*!< \deprecated Core Debug Base Address */
   #define DCB_BASE            (0xE000EDF0UL)                             /*!< DCB Base Address */
@@ -3101,13 +3568,19 @@ typedef struct
   #define NVIC_BASE           (SCS_BASE +  0x0100UL)                     /*!< NVIC Base Address */
   #define SCB_BASE            (SCS_BASE +  0x0D00UL)                     /*!< System Control Block Base Address */
 
-  #define SCnSCB              ((SCnSCB_Type    *)     SCS_BASE         ) /*!< System control Register not in SCB */
+  #define ICB                 ((ICB_Type       *)     SCS_BASE         ) /*!< System control Register not in SCB */
   #define SCB                 ((SCB_Type       *)     SCB_BASE         ) /*!< SCB configuration struct */
   #define SysTick             ((SysTick_Type   *)     SysTick_BASE     ) /*!< SysTick configuration struct */
   #define NVIC                ((NVIC_Type      *)     NVIC_BASE        ) /*!< NVIC configuration struct */
   #define ITM                 ((ITM_Type       *)     ITM_BASE         ) /*!< ITM configuration struct */
   #define DWT                 ((DWT_Type       *)     DWT_BASE         ) /*!< DWT configuration struct */
   #define TPI                 ((TPI_Type       *)     TPI_BASE         ) /*!< TPI configuration struct */
+  #define MEMSYSCTL           ((MemSysCtl_Type *)     MEMSYSCTL_BASE   ) /*!< Memory System Control configuration struct */
+  #define ERRBNK              ((ErrBnk_Type    *)     ERRBNK_BASE      ) /*!< Error Banking configuration struct */
+  #define PWRMODCTL           ((PwrModCtl_Type *)     PWRMODCTL_BASE   ) /*!< Power Mode Control configuration struct */
+  #define EWIC                ((EWIC_Type      *)     EWIC_BASE        ) /*!< EWIC configuration struct */
+  #define PRCCFGINF           ((PrcCfgInf_Type *)     PRCCFGINF_BASE   ) /*!< Processor Configuration Information configuration struct */
+  #define STL                 ((STL_Type       *)     STL_BASE         ) /*!< Software Test Library configuration struct */
   #define CoreDebug           ((CoreDebug_Type *)     CoreDebug_BASE   ) /*!< \deprecated Core Debug configuration struct */
   #define DCB                 ((DCB_Type       *)     DCB_BASE         ) /*!< DCB configuration struct */
   #define DIB                 ((DIB_Type       *)     DIB_BASE         ) /*!< DIB configuration struct */
@@ -3139,7 +3612,7 @@ typedef struct
   #define NVIC_BASE_NS        (SCS_BASE_NS +  0x0100UL)                  /*!< NVIC Base Address                 (non-secure address space) */
   #define SCB_BASE_NS         (SCS_BASE_NS +  0x0D00UL)                  /*!< System Control Block Base Address (non-secure address space) */
 
-  #define SCnSCB_NS           ((SCnSCB_Type    *)     SCS_BASE_NS      ) /*!< System control Register not in SCB(non-secure address space) */
+  #define ICB_NS              ((ICB_Type       *)     SCS_BASE_NS      ) /*!< System control Register not in SCB(non-secure address space) */
   #define SCB_NS              ((SCB_Type       *)     SCB_BASE_NS      ) /*!< SCB configuration struct          (non-secure address space) */
   #define SysTick_NS          ((SysTick_Type   *)     SysTick_BASE_NS  ) /*!< SysTick configuration struct      (non-secure address space) */
   #define NVIC_NS             ((NVIC_Type      *)     NVIC_BASE_NS     ) /*!< NVIC configuration struct         (non-secure address space) */
@@ -3159,6 +3632,69 @@ typedef struct
 /*@} */
 
 
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup   CMSIS_register_aliases     Backwards Compatibility Aliases
+  \brief      Register alias definitions for backwards compatibility.
+  @{
+ */
+#define ID_ADR  (ID_AFR)    /*!< SCB Auxiliary Feature Register */
+
+/* 'SCnSCB' is deprecated and replaced by 'ICB' */
+typedef ICB_Type SCnSCB_Type;
+
+/* Auxiliary Control Register Definitions */
+#define SCnSCB_ACTLR_DISCRITAXIRUW_Pos   (ICB_ACTLR_DISCRITAXIRUW_Pos)
+#define SCnSCB_ACTLR_DISCRITAXIRUW_Msk   (ICB_ACTLR_DISCRITAXIRUW_Msk)
+
+#define SCnSCB_ACTLR_DISDI_Pos           (ICB_ACTLR_DISDI_Pos)
+#define SCnSCB_ACTLR_DISDI_Msk           (ICB_ACTLR_DISDI_Msk)
+
+#define SCnSCB_ACTLR_DISCRITAXIRUR_Pos   (ICB_ACTLR_DISCRITAXIRUR_Pos)
+#define SCnSCB_ACTLR_DISCRITAXIRUR_Msk   (ICB_ACTLR_DISCRITAXIRUR_Msk)
+
+#define SCnSCB_ACTLR_EVENTBUSEN_Pos      (ICB_ACTLR_EVENTBUSEN_Pos)
+#define SCnSCB_ACTLR_EVENTBUSEN_Msk      (ICB_ACTLR_EVENTBUSEN_Msk)
+
+#define SCnSCB_ACTLR_EVENTBUSEN_S_Pos    (ICB_ACTLR_EVENTBUSEN_S_Pos)
+#define SCnSCB_ACTLR_EVENTBUSEN_S_Msk    (ICB_ACTLR_EVENTBUSEN_S_Msk)
+
+#define SCnSCB_ACTLR_DISITMATBFLUSH_Pos  (ICB_ACTLR_DISITMATBFLUSH_Pos)
+#define SCnSCB_ACTLR_DISITMATBFLUSH_Msk  (ICB_ACTLR_DISITMATBFLUSH_Msk)
+
+#define SCnSCB_ACTLR_DISNWAMODE_Pos      (ICB_ACTLR_DISNWAMODE_Pos)
+#define SCnSCB_ACTLR_DISNWAMODE_Msk      (ICB_ACTLR_DISNWAMODE_Msk)
+
+#define SCnSCB_ACTLR_FPEXCODIS_Pos       (ICB_ACTLR_FPEXCODIS_Pos)
+#define SCnSCB_ACTLR_FPEXCODIS_Msk       (ICB_ACTLR_FPEXCODIS_Msk)
+
+#define SCnSCB_ACTLR_DISOLAP_Pos         (ICB_ACTLR_DISOLAP_Pos)
+#define SCnSCB_ACTLR_DISOLAP_Msk         (ICB_ACTLR_DISOLAP_Msk)
+
+#define SCnSCB_ACTLR_DISOLAPS_Pos        (ICB_ACTLR_DISOLAPS_Pos)
+#define SCnSCB_ACTLR_DISOLAPS_Msk        (ICB_ACTLR_DISOLAPS_Msk)
+
+#define SCnSCB_ACTLR_DISLOBR_Pos         (ICB_ACTLR_DISLOBR_Pos)
+#define SCnSCB_ACTLR_DISLOBR_Msk         (ICB_ACTLR_DISLOBR_Msk)
+
+#define SCnSCB_ACTLR_DISLO_Pos           (ICB_ACTLR_DISLO_Pos)
+#define SCnSCB_ACTLR_DISLO_Msk           (ICB_ACTLR_DISLO_Msk)
+
+#define SCnSCB_ACTLR_DISLOLEP_Pos        (ICB_ACTLR_DISLOLEP_Pos)
+#define SCnSCB_ACTLR_DISLOLEP_Msk        (ICB_ACTLR_DISLOLEP_Msk)
+
+#define SCnSCB_ACTLR_DISFOLD_Pos         (ICB_ACTLR_DISFOLD_Pos)
+#define SCnSCB_ACTLR_DISFOLD_Msk         (ICB_ACTLR_DISFOLD_Msk)
+
+/* Interrupt Controller Type Register Definitions */
+#define SCnSCB_ICTR_INTLINESNUM_Pos      (ICB_ICTR_INTLINESNUM_Pos)
+#define SCnSCB_ICTR_INTLINESNUM_Msk      (ICB_ICTR_INTLINESNUM_Msk)
+
+#define SCnSCB                           (ICB)
+#define SCnSCB_NS                        (ICB_NS)
+
+/*@} */
+
 
 /*******************************************************************************
  *                Hardware Abstraction Layer
@@ -3852,6 +4388,9 @@ __STATIC_INLINE uint32_t TZ_NVIC_GetPriority_NS(IRQn_Type IRQn)
 #define ARMCM55_PMU_NWAMODE_ENTER                    0xC200             /*!< No write-allocate mode entry */
 #define ARMCM55_PMU_NWAMODE                          0xC201             /*!< Write-allocate store is not allocated into the data cache due to no-write-allocate mode */
 #define ARMCM55_PMU_SAHB_ACCESS                      0xC300             /*!< Read or write access on the S-AHB interface to the TCM */
+#define ARMCM55_PMU_PAHB_ACCESS                      0xC301             /*!< Read or write access to the P-AHB write interface */
+#define ARMCM55_PMU_AXI_WRITE_ACCESS                 0xC302             /*!< Any beat access to M-AXI write interface */
+#define ARMCM55_PMU_AXI_READ_ACCESS                  0xC303             /*!< Any beat access to M-AXI read interface */
 #define ARMCM55_PMU_DOSTIMEOUT_DOUBLE                0xC400             /*!< Denial of Service timeout has fired twice and caused buffers to drain to allow forward progress */
 #define ARMCM55_PMU_DOSTIMEOUT_TRIPLE                0xC401             /*!< Denial of Service timeout has fired three times and blocked the LSU to force forward progress */
 
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/core_cm7.h b/edge-impulse-sdk/CMSIS/Core/Include/core_cm7.h
index a82367a..649894a 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/core_cm7.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/core_cm7.h
@@ -1,11 +1,11 @@
 /**************************************************************************//**
  * @file     core_cm7.h
  * @brief    CMSIS Cortex-M7 Core Peripheral Access Layer Header File
- * @version  V5.1.5
- * @date     03. November 2020
+ * @version  V5.1.6
+ * @date     04. June 2021
  ******************************************************************************/
 /*
- * Copyright (c) 2009-2020 Arm Limited. All rights reserved.
+ * Copyright (c) 2009-2021 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -213,7 +213,7 @@
     #define __VTOR_PRESENT             1U
     #warning "__VTOR_PRESENT not defined in device header file; using default!"
   #endif
-  
+
   #ifndef __NVIC_PRIO_BITS
     #define __NVIC_PRIO_BITS          3U
     #warning "__NVIC_PRIO_BITS not defined in device header file; using default!"
@@ -677,22 +677,22 @@ typedef struct
 #define SCB_CFSR_MEMFAULTSR_Msk            (0xFFUL /*<< SCB_CFSR_MEMFAULTSR_Pos*/)        /*!< SCB CFSR: Memory Manage Fault Status Register Mask */
 
 /* MemManage Fault Status Register (part of SCB Configurable Fault Status Register) */
-#define SCB_CFSR_MMARVALID_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 7U)               /*!< SCB CFSR (MMFSR): MMARVALID Position */
+#define SCB_CFSR_MMARVALID_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 7U)                 /*!< SCB CFSR (MMFSR): MMARVALID Position */
 #define SCB_CFSR_MMARVALID_Msk             (1UL << SCB_CFSR_MMARVALID_Pos)                /*!< SCB CFSR (MMFSR): MMARVALID Mask */
 
-#define SCB_CFSR_MLSPERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 5U)               /*!< SCB CFSR (MMFSR): MLSPERR Position */
+#define SCB_CFSR_MLSPERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 5U)                 /*!< SCB CFSR (MMFSR): MLSPERR Position */
 #define SCB_CFSR_MLSPERR_Msk               (1UL << SCB_CFSR_MLSPERR_Pos)                  /*!< SCB CFSR (MMFSR): MLSPERR Mask */
 
-#define SCB_CFSR_MSTKERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 4U)               /*!< SCB CFSR (MMFSR): MSTKERR Position */
+#define SCB_CFSR_MSTKERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 4U)                 /*!< SCB CFSR (MMFSR): MSTKERR Position */
 #define SCB_CFSR_MSTKERR_Msk               (1UL << SCB_CFSR_MSTKERR_Pos)                  /*!< SCB CFSR (MMFSR): MSTKERR Mask */
 
-#define SCB_CFSR_MUNSTKERR_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 3U)               /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
+#define SCB_CFSR_MUNSTKERR_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 3U)                 /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
 #define SCB_CFSR_MUNSTKERR_Msk             (1UL << SCB_CFSR_MUNSTKERR_Pos)                /*!< SCB CFSR (MMFSR): MUNSTKERR Mask */
 
-#define SCB_CFSR_DACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 1U)               /*!< SCB CFSR (MMFSR): DACCVIOL Position */
+#define SCB_CFSR_DACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 1U)                 /*!< SCB CFSR (MMFSR): DACCVIOL Position */
 #define SCB_CFSR_DACCVIOL_Msk              (1UL << SCB_CFSR_DACCVIOL_Pos)                 /*!< SCB CFSR (MMFSR): DACCVIOL Mask */
 
-#define SCB_CFSR_IACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 0U)               /*!< SCB CFSR (MMFSR): IACCVIOL Position */
+#define SCB_CFSR_IACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 0U)                 /*!< SCB CFSR (MMFSR): IACCVIOL Position */
 #define SCB_CFSR_IACCVIOL_Msk              (1UL /*<< SCB_CFSR_IACCVIOL_Pos*/)             /*!< SCB CFSR (MMFSR): IACCVIOL Mask */
 
 /* BusFault Status Register (part of SCB Configurable Fault Status Register) */
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/core_cm85.h b/edge-impulse-sdk/CMSIS/Core/Include/core_cm85.h
new file mode 100644
index 0000000..acb2eb1
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/Core/Include/core_cm85.h
@@ -0,0 +1,4636 @@
+/**************************************************************************//**
+ * @file     core_cm85.h
+ * @brief    CMSIS Cortex-M85 Core Peripheral Access Layer Header File
+ * @version  V1.0.5
+ * @date     12. May 2022
+ ******************************************************************************/
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if   defined ( __ICCARM__ )
+  #pragma system_include                        /* treat file as system include file for MISRA check */
+#elif defined (__clang__)
+  #pragma clang system_header                   /* treat file as system include file */
+#elif defined ( __GNUC__ )
+  #pragma GCC diagnostic ignored "-Wpedantic"   /* disable pedantic warning due to unnamed structs/unions */
+#endif
+
+#ifndef __CORE_CM85_H_GENERIC
+#define __CORE_CM85_H_GENERIC
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+/**
+  \page CMSIS_MISRA_Exceptions  MISRA-C:2004 Compliance Exceptions
+  CMSIS violates the following MISRA-C:2004 rules:
+
+   \li Required Rule 8.5, object/function definition in header file.<br>
+     Function definitions in header files are used to allow 'inlining'.
+
+   \li Required Rule 18.4, declaration of union type or object of union type: '{...}'.<br>
+     Unions are used for effective representation of core registers.
+
+   \li Advisory Rule 19.7, Function-like macro defined.<br>
+     Function-like macros are used to allow more efficient code.
+ */
+
+
+/*******************************************************************************
+ *                 CMSIS definitions
+ ******************************************************************************/
+/**
+  \ingroup Cortex_M85
+  @{
+ */
+
+#include "cmsis_version.h"
+
+/*  CMSIS CM85 definitions */
+
+#define __CORTEX_M                      (85U)                                 /*!< Cortex-M Core */
+
+#if defined ( __CC_ARM )
+  #error Legacy Arm Compiler does not support Armv8.1-M target architecture.
+#elif defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
+  #if defined __ARM_FP
+    #if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)
+      #define __FPU_USED       1U
+    #else
+      #error "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)"
+      #define __FPU_USED       0U
+    #endif
+  #else
+    #define __FPU_USED         0U
+  #endif
+
+  #if defined(__ARM_FEATURE_DSP)
+    #if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1U)
+      #define __DSP_USED       1U
+    #else
+      #error "Compiler generates DSP (SIMD) instructions for a devices without DSP extensions (check __DSP_PRESENT)"
+      #define __DSP_USED       0U
+    #endif
+  #else
+    #define __DSP_USED         0U
+  #endif
+
+#elif defined ( __GNUC__ )
+  #if defined (__VFP_FP__) && !defined(__SOFTFP__)
+    #if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)
+      #define __FPU_USED       1U
+    #else
+      #error "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)"
+      #define __FPU_USED       0U
+    #endif
+  #else
+    #define __FPU_USED         0U
+  #endif
+
+  #if defined(__ARM_FEATURE_DSP)
+    #if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1U)
+      #define __DSP_USED       1U
+    #else
+      #error "Compiler generates DSP (SIMD) instructions for a devices without DSP extensions (check __DSP_PRESENT)"
+      #define __DSP_USED         0U
+    #endif
+  #else
+    #define __DSP_USED         0U
+  #endif
+
+#elif defined ( __ICCARM__ )
+  #if defined __ARMVFP__
+    #if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)
+      #define __FPU_USED       1U
+    #else
+      #error "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)"
+      #define __FPU_USED       0U
+    #endif
+  #else
+    #define __FPU_USED         0U
+  #endif
+
+  #if defined(__ARM_FEATURE_DSP)
+    #if defined(__DSP_PRESENT) && (__DSP_PRESENT == 1U)
+      #define __DSP_USED       1U
+    #else
+      #error "Compiler generates DSP (SIMD) instructions for a devices without DSP extensions (check __DSP_PRESENT)"
+      #define __DSP_USED         0U
+    #endif
+  #else
+    #define __DSP_USED         0U
+  #endif
+
+#elif defined ( __TI_ARM__ )
+  #if defined __TI_VFP_SUPPORT__
+    #if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)
+      #define __FPU_USED       1U
+    #else
+      #error "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)"
+      #define __FPU_USED       0U
+    #endif
+  #else
+    #define __FPU_USED         0U
+  #endif
+
+#elif defined ( __TASKING__ )
+  #if defined __FPU_VFP__
+    #if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)
+      #define __FPU_USED       1U
+    #else
+      #error "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)"
+      #define __FPU_USED       0U
+    #endif
+  #else
+    #define __FPU_USED         0U
+  #endif
+
+#elif defined ( __CSMC__ )
+  #if ( __CSMC__ & 0x400U)
+    #if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)
+      #define __FPU_USED       1U
+    #else
+      #error "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)"
+      #define __FPU_USED       0U
+    #endif
+  #else
+    #define __FPU_USED         0U
+  #endif
+
+#endif
+
+#include "edge-impulse-sdk/CMSIS/Core/Include/cmsis_compiler.h"               /* CMSIS compiler specific defines */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __CORE_CM85_H_GENERIC */
+
+#ifndef __CMSIS_GENERIC
+
+#ifndef __CORE_CM85_H_DEPENDANT
+#define __CORE_CM85_H_DEPENDANT
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+/* check device defines and use defaults */
+#if defined __CHECK_DEVICE_DEFINES
+  #ifndef __CM85_REV
+    #define __CM85_REV               0x0001U
+    #warning "__CM85_REV not defined in device header file; using default!"
+  #endif
+
+  #ifndef __FPU_PRESENT
+    #define __FPU_PRESENT             0U
+    #warning "__FPU_PRESENT not defined in device header file; using default!"
+  #endif
+
+  #if __FPU_PRESENT != 0U
+    #ifndef __FPU_DP
+      #define __FPU_DP             0U
+      #warning "__FPU_DP not defined in device header file; using default!"
+    #endif
+  #endif
+
+  #ifndef __MPU_PRESENT
+    #define __MPU_PRESENT             0U
+    #warning "__MPU_PRESENT not defined in device header file; using default!"
+  #endif
+
+  #ifndef __ICACHE_PRESENT
+    #define __ICACHE_PRESENT          0U
+    #warning "__ICACHE_PRESENT not defined in device header file; using default!"
+  #endif
+
+  #ifndef __DCACHE_PRESENT
+    #define __DCACHE_PRESENT          0U
+    #warning "__DCACHE_PRESENT not defined in device header file; using default!"
+  #endif
+
+  #ifndef __VTOR_PRESENT
+    #define __VTOR_PRESENT             1U
+    #warning "__VTOR_PRESENT not defined in device header file; using default!"
+  #endif
+
+  #ifndef __PMU_PRESENT
+    #define __PMU_PRESENT             0U
+    #warning "__PMU_PRESENT not defined in device header file; using default!"
+  #endif
+
+  #if __PMU_PRESENT != 0U
+    #ifndef __PMU_NUM_EVENTCNT
+      #define __PMU_NUM_EVENTCNT      8U
+      #warning "__PMU_NUM_EVENTCNT not defined in device header file; using default!"
+    #elif (__PMU_NUM_EVENTCNT > 8 || __PMU_NUM_EVENTCNT < 2)
+    #error "__PMU_NUM_EVENTCNT is out of range in device header file!" */
+    #endif
+  #endif
+
+  #ifndef __SAUREGION_PRESENT
+    #define __SAUREGION_PRESENT       0U
+    #warning "__SAUREGION_PRESENT not defined in device header file; using default!"
+  #endif
+
+  #ifndef __DSP_PRESENT
+    #define __DSP_PRESENT             0U
+    #warning "__DSP_PRESENT not defined in device header file; using default!"
+  #endif
+
+  #ifndef __NVIC_PRIO_BITS
+    #define __NVIC_PRIO_BITS          3U
+    #warning "__NVIC_PRIO_BITS not defined in device header file; using default!"
+  #endif
+
+  #ifndef __Vendor_SysTickConfig
+    #define __Vendor_SysTickConfig    0U
+    #warning "__Vendor_SysTickConfig not defined in device header file; using default!"
+  #endif
+#endif
+
+/* IO definitions (access restrictions to peripheral registers) */
+/**
+    \defgroup CMSIS_glob_defs CMSIS Global Defines
+
+    <strong>IO Type Qualifiers</strong> are used
+    \li to specify the access to peripheral variables.
+    \li for automatic generation of peripheral register debug information.
+*/
+#ifdef __cplusplus
+  #define   __I     volatile             /*!< Defines 'read only' permissions */
+#else
+  #define   __I     volatile const       /*!< Defines 'read only' permissions */
+#endif
+#define     __O     volatile             /*!< Defines 'write only' permissions */
+#define     __IO    volatile             /*!< Defines 'read / write' permissions */
+
+/* following defines should be used for structure members */
+#define     __IM     volatile const      /*! Defines 'read only' structure member permissions */
+#define     __OM     volatile            /*! Defines 'write only' structure member permissions */
+#define     __IOM    volatile            /*! Defines 'read / write' structure member permissions */
+
+/*@} end of group Cortex_M85 */
+
+
+
+/*******************************************************************************
+ *                 Register Abstraction
+  Core Register contain:
+  - Core Register
+  - Core NVIC Register
+  - Core EWIC Register
+  - Core SCB Register
+  - Core SysTick Register
+  - Core Debug Register
+  - Core PMU Register
+  - Core MPU Register
+  - Core SAU Register
+  - Core FPU Register
+ ******************************************************************************/
+/**
+  \defgroup CMSIS_core_register Defines and Type Definitions
+  \brief Type definitions and defines for Cortex-M processor based devices.
+*/
+
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup   CMSIS_CORE  Status and Control Registers
+  \brief      Core Register type definitions.
+  @{
+ */
+
+/**
+  \brief  Union type to access the Application Program Status Register (APSR).
+ */
+typedef union
+{
+  struct
+  {
+    uint32_t _reserved0:16;              /*!< bit:  0..15  Reserved */
+    uint32_t GE:4;                       /*!< bit: 16..19  Greater than or Equal flags */
+    uint32_t _reserved1:7;               /*!< bit: 20..26  Reserved */
+    uint32_t Q:1;                        /*!< bit:     27  Saturation condition flag */
+    uint32_t V:1;                        /*!< bit:     28  Overflow condition code flag */
+    uint32_t C:1;                        /*!< bit:     29  Carry condition code flag */
+    uint32_t Z:1;                        /*!< bit:     30  Zero condition code flag */
+    uint32_t N:1;                        /*!< bit:     31  Negative condition code flag */
+  } b;                                   /*!< Structure used for bit  access */
+  uint32_t w;                            /*!< Type      used for word access */
+} APSR_Type;
+
+/* APSR Register Definitions */
+#define APSR_N_Pos                         31U                                            /*!< APSR: N Position */
+#define APSR_N_Msk                         (1UL << APSR_N_Pos)                            /*!< APSR: N Mask */
+
+#define APSR_Z_Pos                         30U                                            /*!< APSR: Z Position */
+#define APSR_Z_Msk                         (1UL << APSR_Z_Pos)                            /*!< APSR: Z Mask */
+
+#define APSR_C_Pos                         29U                                            /*!< APSR: C Position */
+#define APSR_C_Msk                         (1UL << APSR_C_Pos)                            /*!< APSR: C Mask */
+
+#define APSR_V_Pos                         28U                                            /*!< APSR: V Position */
+#define APSR_V_Msk                         (1UL << APSR_V_Pos)                            /*!< APSR: V Mask */
+
+#define APSR_Q_Pos                         27U                                            /*!< APSR: Q Position */
+#define APSR_Q_Msk                         (1UL << APSR_Q_Pos)                            /*!< APSR: Q Mask */
+
+#define APSR_GE_Pos                        16U                                            /*!< APSR: GE Position */
+#define APSR_GE_Msk                        (0xFUL << APSR_GE_Pos)                         /*!< APSR: GE Mask */
+
+
+/**
+  \brief  Union type to access the Interrupt Program Status Register (IPSR).
+ */
+typedef union
+{
+  struct
+  {
+    uint32_t ISR:9;                      /*!< bit:  0.. 8  Exception number */
+    uint32_t _reserved0:23;              /*!< bit:  9..31  Reserved */
+  } b;                                   /*!< Structure used for bit  access */
+  uint32_t w;                            /*!< Type      used for word access */
+} IPSR_Type;
+
+/* IPSR Register Definitions */
+#define IPSR_ISR_Pos                        0U                                            /*!< IPSR: ISR Position */
+#define IPSR_ISR_Msk                       (0x1FFUL /*<< IPSR_ISR_Pos*/)                  /*!< IPSR: ISR Mask */
+
+
+/**
+  \brief  Union type to access the Special-Purpose Program Status Registers (xPSR).
+ */
+typedef union
+{
+  struct
+  {
+    uint32_t ISR:9;                      /*!< bit:  0.. 8  Exception number */
+    uint32_t _reserved0:7;               /*!< bit:  9..15  Reserved */
+    uint32_t GE:4;                       /*!< bit: 16..19  Greater than or Equal flags */
+    uint32_t _reserved1:1;               /*!< bit:     20  Reserved */
+    uint32_t B:1;                        /*!< bit:     21  BTI active       (read 0) */
+    uint32_t _reserved2:2;               /*!< bit: 22..23  Reserved */
+    uint32_t T:1;                        /*!< bit:     24  Thumb bit        (read 0) */
+    uint32_t IT:2;                       /*!< bit: 25..26  saved IT state   (read 0) */
+    uint32_t Q:1;                        /*!< bit:     27  Saturation condition flag */
+    uint32_t V:1;                        /*!< bit:     28  Overflow condition code flag */
+    uint32_t C:1;                        /*!< bit:     29  Carry condition code flag */
+    uint32_t Z:1;                        /*!< bit:     30  Zero condition code flag */
+    uint32_t N:1;                        /*!< bit:     31  Negative condition code flag */
+  } b;                                   /*!< Structure used for bit  access */
+  uint32_t w;                            /*!< Type      used for word access */
+} xPSR_Type;
+
+/* xPSR Register Definitions */
+#define xPSR_N_Pos                         31U                                            /*!< xPSR: N Position */
+#define xPSR_N_Msk                         (1UL << xPSR_N_Pos)                            /*!< xPSR: N Mask */
+
+#define xPSR_Z_Pos                         30U                                            /*!< xPSR: Z Position */
+#define xPSR_Z_Msk                         (1UL << xPSR_Z_Pos)                            /*!< xPSR: Z Mask */
+
+#define xPSR_C_Pos                         29U                                            /*!< xPSR: C Position */
+#define xPSR_C_Msk                         (1UL << xPSR_C_Pos)                            /*!< xPSR: C Mask */
+
+#define xPSR_V_Pos                         28U                                            /*!< xPSR: V Position */
+#define xPSR_V_Msk                         (1UL << xPSR_V_Pos)                            /*!< xPSR: V Mask */
+
+#define xPSR_Q_Pos                         27U                                            /*!< xPSR: Q Position */
+#define xPSR_Q_Msk                         (1UL << xPSR_Q_Pos)                            /*!< xPSR: Q Mask */
+
+#define xPSR_IT_Pos                        25U                                            /*!< xPSR: IT Position */
+#define xPSR_IT_Msk                        (3UL << xPSR_IT_Pos)                           /*!< xPSR: IT Mask */
+
+#define xPSR_T_Pos                         24U                                            /*!< xPSR: T Position */
+#define xPSR_T_Msk                         (1UL << xPSR_T_Pos)                            /*!< xPSR: T Mask */
+
+#define xPSR_B_Pos                         21U                                            /*!< xPSR: B Position */
+#define xPSR_B_Msk                         (1UL << xPSR_B_Pos)                            /*!< xPSR: B Mask */
+
+#define xPSR_GE_Pos                        16U                                            /*!< xPSR: GE Position */
+#define xPSR_GE_Msk                        (0xFUL << xPSR_GE_Pos)                         /*!< xPSR: GE Mask */
+
+#define xPSR_ISR_Pos                        0U                                            /*!< xPSR: ISR Position */
+#define xPSR_ISR_Msk                       (0x1FFUL /*<< xPSR_ISR_Pos*/)                  /*!< xPSR: ISR Mask */
+
+
+/**
+  \brief  Union type to access the Control Registers (CONTROL).
+ */
+typedef union
+{
+  struct
+  {
+    uint32_t nPRIV:1;                    /*!< bit:      0  Execution privilege in Thread mode */
+    uint32_t SPSEL:1;                    /*!< bit:      1  Stack-pointer select */
+    uint32_t FPCA:1;                     /*!< bit:      2  Floating-point context active */
+    uint32_t SFPA:1;                     /*!< bit:      3  Secure floating-point active */
+    uint32_t BTI_EN:1;                   /*!< bit:      4  Privileged branch target identification enable */
+    uint32_t UBTI_EN:1;                  /*!< bit:      5  Unprivileged branch target identification enable */
+    uint32_t PAC_EN:1;                   /*!< bit:      6  Privileged pointer authentication enable */
+    uint32_t UPAC_EN:1;                  /*!< bit:      7  Unprivileged pointer authentication enable */
+    uint32_t _reserved1:24;              /*!< bit:  8..31  Reserved */
+  } b;                                   /*!< Structure used for bit  access */
+  uint32_t w;                            /*!< Type      used for word access */
+} CONTROL_Type;
+
+/* CONTROL Register Definitions */
+#define CONTROL_UPAC_EN_Pos                 7U                                            /*!< CONTROL: UPAC_EN Position */
+#define CONTROL_UPAC_EN_Msk                (1UL << CONTROL_UPAC_EN_Pos)                   /*!< CONTROL: UPAC_EN Mask */
+
+#define CONTROL_PAC_EN_Pos                  6U                                            /*!< CONTROL: PAC_EN Position */
+#define CONTROL_PAC_EN_Msk                 (1UL << CONTROL_PAC_EN_Pos)                    /*!< CONTROL: PAC_EN Mask */
+
+#define CONTROL_UBTI_EN_Pos                 5U                                            /*!< CONTROL: UBTI_EN Position */
+#define CONTROL_UBTI_EN_Msk                (1UL << CONTROL_UBTI_EN_Pos)                   /*!< CONTROL: UBTI_EN Mask */
+
+#define CONTROL_BTI_EN_Pos                  4U                                            /*!< CONTROL: BTI_EN Position */
+#define CONTROL_BTI_EN_Msk                 (1UL << CONTROL_BTI_EN_Pos)                    /*!< CONTROL: BTI_EN Mask */
+
+#define CONTROL_SFPA_Pos                    3U                                            /*!< CONTROL: SFPA Position */
+#define CONTROL_SFPA_Msk                   (1UL << CONTROL_SFPA_Pos)                      /*!< CONTROL: SFPA Mask */
+
+#define CONTROL_FPCA_Pos                    2U                                            /*!< CONTROL: FPCA Position */
+#define CONTROL_FPCA_Msk                   (1UL << CONTROL_FPCA_Pos)                      /*!< CONTROL: FPCA Mask */
+
+#define CONTROL_SPSEL_Pos                   1U                                            /*!< CONTROL: SPSEL Position */
+#define CONTROL_SPSEL_Msk                  (1UL << CONTROL_SPSEL_Pos)                     /*!< CONTROL: SPSEL Mask */
+
+#define CONTROL_nPRIV_Pos                   0U                                            /*!< CONTROL: nPRIV Position */
+#define CONTROL_nPRIV_Msk                  (1UL /*<< CONTROL_nPRIV_Pos*/)                 /*!< CONTROL: nPRIV Mask */
+
+/*@} end of group CMSIS_CORE */
+
+
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup   CMSIS_NVIC  Nested Vectored Interrupt Controller (NVIC)
+  \brief      Type definitions for the NVIC Registers
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Nested Vectored Interrupt Controller (NVIC).
+ */
+typedef struct
+{
+  __IOM uint32_t ISER[16U];              /*!< Offset: 0x000 (R/W)  Interrupt Set Enable Register */
+        uint32_t RESERVED0[16U];
+  __IOM uint32_t ICER[16U];              /*!< Offset: 0x080 (R/W)  Interrupt Clear Enable Register */
+        uint32_t RSERVED1[16U];
+  __IOM uint32_t ISPR[16U];              /*!< Offset: 0x100 (R/W)  Interrupt Set Pending Register */
+        uint32_t RESERVED2[16U];
+  __IOM uint32_t ICPR[16U];              /*!< Offset: 0x180 (R/W)  Interrupt Clear Pending Register */
+        uint32_t RESERVED3[16U];
+  __IOM uint32_t IABR[16U];              /*!< Offset: 0x200 (R/W)  Interrupt Active bit Register */
+        uint32_t RESERVED4[16U];
+  __IOM uint32_t ITNS[16U];              /*!< Offset: 0x280 (R/W)  Interrupt Non-Secure State Register */
+        uint32_t RESERVED5[16U];
+  __IOM uint8_t  IPR[496U];              /*!< Offset: 0x300 (R/W)  Interrupt Priority Register (8Bit wide) */
+        uint32_t RESERVED6[580U];
+  __OM  uint32_t STIR;                   /*!< Offset: 0xE00 ( /W)  Software Trigger Interrupt Register */
+}  NVIC_Type;
+
+/* Software Triggered Interrupt Register Definitions */
+#define NVIC_STIR_INTID_Pos                 0U                                         /*!< STIR: INTLINESNUM Position */
+#define NVIC_STIR_INTID_Msk                (0x1FFUL /*<< NVIC_STIR_INTID_Pos*/)        /*!< STIR: INTLINESNUM Mask */
+
+/*@} end of group CMSIS_NVIC */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_SCB     System Control Block (SCB)
+  \brief    Type definitions for the System Control Block Registers
+  @{
+ */
+
+/**
+  \brief  Structure type to access the System Control Block (SCB).
+ */
+typedef struct
+{
+  __IM  uint32_t CPUID;                  /*!< Offset: 0x000 (R/ )  CPUID Base Register */
+  __IOM uint32_t ICSR;                   /*!< Offset: 0x004 (R/W)  Interrupt Control and State Register */
+  __IOM uint32_t VTOR;                   /*!< Offset: 0x008 (R/W)  Vector Table Offset Register */
+  __IOM uint32_t AIRCR;                  /*!< Offset: 0x00C (R/W)  Application Interrupt and Reset Control Register */
+  __IOM uint32_t SCR;                    /*!< Offset: 0x010 (R/W)  System Control Register */
+  __IOM uint32_t CCR;                    /*!< Offset: 0x014 (R/W)  Configuration Control Register */
+  __IOM uint8_t  SHPR[12U];              /*!< Offset: 0x018 (R/W)  System Handlers Priority Registers (4-7, 8-11, 12-15) */
+  __IOM uint32_t SHCSR;                  /*!< Offset: 0x024 (R/W)  System Handler Control and State Register */
+  __IOM uint32_t CFSR;                   /*!< Offset: 0x028 (R/W)  Configurable Fault Status Register */
+  __IOM uint32_t HFSR;                   /*!< Offset: 0x02C (R/W)  HardFault Status Register */
+  __IOM uint32_t DFSR;                   /*!< Offset: 0x030 (R/W)  Debug Fault Status Register */
+  __IOM uint32_t MMFAR;                  /*!< Offset: 0x034 (R/W)  MemManage Fault Address Register */
+  __IOM uint32_t BFAR;                   /*!< Offset: 0x038 (R/W)  BusFault Address Register */
+  __IOM uint32_t AFSR;                   /*!< Offset: 0x03C (R/W)  Auxiliary Fault Status Register */
+  __IM  uint32_t ID_PFR[2U];             /*!< Offset: 0x040 (R/ )  Processor Feature Register */
+  __IM  uint32_t ID_DFR;                 /*!< Offset: 0x048 (R/ )  Debug Feature Register */
+  __IM  uint32_t ID_AFR;                 /*!< Offset: 0x04C (R/ )  Auxiliary Feature Register */
+  __IM  uint32_t ID_MMFR[4U];            /*!< Offset: 0x050 (R/ )  Memory Model Feature Register */
+  __IM  uint32_t ID_ISAR[6U];            /*!< Offset: 0x060 (R/ )  Instruction Set Attributes Register */
+  __IM  uint32_t CLIDR;                  /*!< Offset: 0x078 (R/ )  Cache Level ID register */
+  __IM  uint32_t CTR;                    /*!< Offset: 0x07C (R/ )  Cache Type register */
+  __IM  uint32_t CCSIDR;                 /*!< Offset: 0x080 (R/ )  Cache Size ID Register */
+  __IOM uint32_t CSSELR;                 /*!< Offset: 0x084 (R/W)  Cache Size Selection Register */
+  __IOM uint32_t CPACR;                  /*!< Offset: 0x088 (R/W)  Coprocessor Access Control Register */
+  __IOM uint32_t NSACR;                  /*!< Offset: 0x08C (R/W)  Non-Secure Access Control Register */
+        uint32_t RESERVED7[21U];
+  __IOM uint32_t SFSR;                   /*!< Offset: 0x0E4 (R/W)  Secure Fault Status Register */
+  __IOM uint32_t SFAR;                   /*!< Offset: 0x0E8 (R/W)  Secure Fault Address Register */
+        uint32_t RESERVED3[69U];
+  __OM  uint32_t STIR;                   /*!< Offset: 0x200 ( /W)  Software Triggered Interrupt Register */
+  __IOM uint32_t RFSR;                   /*!< Offset: 0x204 (R/W)  RAS Fault Status Register */
+        uint32_t RESERVED4[14U];
+  __IM  uint32_t MVFR0;                  /*!< Offset: 0x240 (R/ )  Media and VFP Feature Register 0 */
+  __IM  uint32_t MVFR1;                  /*!< Offset: 0x244 (R/ )  Media and VFP Feature Register 1 */
+  __IM  uint32_t MVFR2;                  /*!< Offset: 0x248 (R/ )  Media and VFP Feature Register 2 */
+        uint32_t RESERVED5[1U];
+  __OM  uint32_t ICIALLU;                /*!< Offset: 0x250 ( /W)  I-Cache Invalidate All to PoU */
+        uint32_t RESERVED6[1U];
+  __OM  uint32_t ICIMVAU;                /*!< Offset: 0x258 ( /W)  I-Cache Invalidate by MVA to PoU */
+  __OM  uint32_t DCIMVAC;                /*!< Offset: 0x25C ( /W)  D-Cache Invalidate by MVA to PoC */
+  __OM  uint32_t DCISW;                  /*!< Offset: 0x260 ( /W)  D-Cache Invalidate by Set-way */
+  __OM  uint32_t DCCMVAU;                /*!< Offset: 0x264 ( /W)  D-Cache Clean by MVA to PoU */
+  __OM  uint32_t DCCMVAC;                /*!< Offset: 0x268 ( /W)  D-Cache Clean by MVA to PoC */
+  __OM  uint32_t DCCSW;                  /*!< Offset: 0x26C ( /W)  D-Cache Clean by Set-way */
+  __OM  uint32_t DCCIMVAC;               /*!< Offset: 0x270 ( /W)  D-Cache Clean and Invalidate by MVA to PoC */
+  __OM  uint32_t DCCISW;                 /*!< Offset: 0x274 ( /W)  D-Cache Clean and Invalidate by Set-way */
+  __OM  uint32_t BPIALL;                 /*!< Offset: 0x278 ( /W)  Branch Predictor Invalidate All */
+} SCB_Type;
+
+/* SCB CPUID Register Definitions */
+#define SCB_CPUID_IMPLEMENTER_Pos          24U                                            /*!< SCB CPUID: IMPLEMENTER Position */
+#define SCB_CPUID_IMPLEMENTER_Msk          (0xFFUL << SCB_CPUID_IMPLEMENTER_Pos)          /*!< SCB CPUID: IMPLEMENTER Mask */
+
+#define SCB_CPUID_VARIANT_Pos              20U                                            /*!< SCB CPUID: VARIANT Position */
+#define SCB_CPUID_VARIANT_Msk              (0xFUL << SCB_CPUID_VARIANT_Pos)               /*!< SCB CPUID: VARIANT Mask */
+
+#define SCB_CPUID_ARCHITECTURE_Pos         16U                                            /*!< SCB CPUID: ARCHITECTURE Position */
+#define SCB_CPUID_ARCHITECTURE_Msk         (0xFUL << SCB_CPUID_ARCHITECTURE_Pos)          /*!< SCB CPUID: ARCHITECTURE Mask */
+
+#define SCB_CPUID_PARTNO_Pos                4U                                            /*!< SCB CPUID: PARTNO Position */
+#define SCB_CPUID_PARTNO_Msk               (0xFFFUL << SCB_CPUID_PARTNO_Pos)              /*!< SCB CPUID: PARTNO Mask */
+
+#define SCB_CPUID_REVISION_Pos              0U                                            /*!< SCB CPUID: REVISION Position */
+#define SCB_CPUID_REVISION_Msk             (0xFUL /*<< SCB_CPUID_REVISION_Pos*/)          /*!< SCB CPUID: REVISION Mask */
+
+/* SCB Interrupt Control State Register Definitions */
+#define SCB_ICSR_PENDNMISET_Pos            31U                                            /*!< SCB ICSR: PENDNMISET Position */
+#define SCB_ICSR_PENDNMISET_Msk            (1UL << SCB_ICSR_PENDNMISET_Pos)               /*!< SCB ICSR: PENDNMISET Mask */
+
+#define SCB_ICSR_NMIPENDSET_Pos            SCB_ICSR_PENDNMISET_Pos                        /*!< SCB ICSR: NMIPENDSET Position, backward compatibility */
+#define SCB_ICSR_NMIPENDSET_Msk            SCB_ICSR_PENDNMISET_Msk                        /*!< SCB ICSR: NMIPENDSET Mask, backward compatibility */
+
+#define SCB_ICSR_PENDNMICLR_Pos            30U                                            /*!< SCB ICSR: PENDNMICLR Position */
+#define SCB_ICSR_PENDNMICLR_Msk            (1UL << SCB_ICSR_PENDNMICLR_Pos)               /*!< SCB ICSR: PENDNMICLR Mask */
+
+#define SCB_ICSR_PENDSVSET_Pos             28U                                            /*!< SCB ICSR: PENDSVSET Position */
+#define SCB_ICSR_PENDSVSET_Msk             (1UL << SCB_ICSR_PENDSVSET_Pos)                /*!< SCB ICSR: PENDSVSET Mask */
+
+#define SCB_ICSR_PENDSVCLR_Pos             27U                                            /*!< SCB ICSR: PENDSVCLR Position */
+#define SCB_ICSR_PENDSVCLR_Msk             (1UL << SCB_ICSR_PENDSVCLR_Pos)                /*!< SCB ICSR: PENDSVCLR Mask */
+
+#define SCB_ICSR_PENDSTSET_Pos             26U                                            /*!< SCB ICSR: PENDSTSET Position */
+#define SCB_ICSR_PENDSTSET_Msk             (1UL << SCB_ICSR_PENDSTSET_Pos)                /*!< SCB ICSR: PENDSTSET Mask */
+
+#define SCB_ICSR_PENDSTCLR_Pos             25U                                            /*!< SCB ICSR: PENDSTCLR Position */
+#define SCB_ICSR_PENDSTCLR_Msk             (1UL << SCB_ICSR_PENDSTCLR_Pos)                /*!< SCB ICSR: PENDSTCLR Mask */
+
+#define SCB_ICSR_STTNS_Pos                 24U                                            /*!< SCB ICSR: STTNS Position (Security Extension) */
+#define SCB_ICSR_STTNS_Msk                 (1UL << SCB_ICSR_STTNS_Pos)                    /*!< SCB ICSR: STTNS Mask (Security Extension) */
+
+#define SCB_ICSR_ISRPREEMPT_Pos            23U                                            /*!< SCB ICSR: ISRPREEMPT Position */
+#define SCB_ICSR_ISRPREEMPT_Msk            (1UL << SCB_ICSR_ISRPREEMPT_Pos)               /*!< SCB ICSR: ISRPREEMPT Mask */
+
+#define SCB_ICSR_ISRPENDING_Pos            22U                                            /*!< SCB ICSR: ISRPENDING Position */
+#define SCB_ICSR_ISRPENDING_Msk            (1UL << SCB_ICSR_ISRPENDING_Pos)               /*!< SCB ICSR: ISRPENDING Mask */
+
+#define SCB_ICSR_VECTPENDING_Pos           12U                                            /*!< SCB ICSR: VECTPENDING Position */
+#define SCB_ICSR_VECTPENDING_Msk           (0x1FFUL << SCB_ICSR_VECTPENDING_Pos)          /*!< SCB ICSR: VECTPENDING Mask */
+
+#define SCB_ICSR_RETTOBASE_Pos             11U                                            /*!< SCB ICSR: RETTOBASE Position */
+#define SCB_ICSR_RETTOBASE_Msk             (1UL << SCB_ICSR_RETTOBASE_Pos)                /*!< SCB ICSR: RETTOBASE Mask */
+
+#define SCB_ICSR_VECTACTIVE_Pos             0U                                            /*!< SCB ICSR: VECTACTIVE Position */
+#define SCB_ICSR_VECTACTIVE_Msk            (0x1FFUL /*<< SCB_ICSR_VECTACTIVE_Pos*/)       /*!< SCB ICSR: VECTACTIVE Mask */
+
+/* SCB Vector Table Offset Register Definitions */
+#define SCB_VTOR_TBLOFF_Pos                 7U                                            /*!< SCB VTOR: TBLOFF Position */
+#define SCB_VTOR_TBLOFF_Msk                (0x1FFFFFFUL << SCB_VTOR_TBLOFF_Pos)           /*!< SCB VTOR: TBLOFF Mask */
+
+/* SCB Application Interrupt and Reset Control Register Definitions */
+#define SCB_AIRCR_VECTKEY_Pos              16U                                            /*!< SCB AIRCR: VECTKEY Position */
+#define SCB_AIRCR_VECTKEY_Msk              (0xFFFFUL << SCB_AIRCR_VECTKEY_Pos)            /*!< SCB AIRCR: VECTKEY Mask */
+
+#define SCB_AIRCR_VECTKEYSTAT_Pos          16U                                            /*!< SCB AIRCR: VECTKEYSTAT Position */
+#define SCB_AIRCR_VECTKEYSTAT_Msk          (0xFFFFUL << SCB_AIRCR_VECTKEYSTAT_Pos)        /*!< SCB AIRCR: VECTKEYSTAT Mask */
+
+#define SCB_AIRCR_ENDIANESS_Pos            15U                                            /*!< SCB AIRCR: ENDIANESS Position */
+#define SCB_AIRCR_ENDIANESS_Msk            (1UL << SCB_AIRCR_ENDIANESS_Pos)               /*!< SCB AIRCR: ENDIANESS Mask */
+
+#define SCB_AIRCR_PRIS_Pos                 14U                                            /*!< SCB AIRCR: PRIS Position */
+#define SCB_AIRCR_PRIS_Msk                 (1UL << SCB_AIRCR_PRIS_Pos)                    /*!< SCB AIRCR: PRIS Mask */
+
+#define SCB_AIRCR_BFHFNMINS_Pos            13U                                            /*!< SCB AIRCR: BFHFNMINS Position */
+#define SCB_AIRCR_BFHFNMINS_Msk            (1UL << SCB_AIRCR_BFHFNMINS_Pos)               /*!< SCB AIRCR: BFHFNMINS Mask */
+
+#define SCB_AIRCR_PRIGROUP_Pos              8U                                            /*!< SCB AIRCR: PRIGROUP Position */
+#define SCB_AIRCR_PRIGROUP_Msk             (7UL << SCB_AIRCR_PRIGROUP_Pos)                /*!< SCB AIRCR: PRIGROUP Mask */
+
+#define SCB_AIRCR_IESB_Pos                  5U                                            /*!< SCB AIRCR: Implicit ESB Enable Position */
+#define SCB_AIRCR_IESB_Msk                 (1UL << SCB_AIRCR_IESB_Pos)                    /*!< SCB AIRCR: Implicit ESB Enable Mask */
+
+#define SCB_AIRCR_DIT_Pos                   4U                                            /*!< SCB AIRCR: Data Independent Timing Position */
+#define SCB_AIRCR_DIT_Msk                  (1UL << SCB_AIRCR_DIT_Pos)                     /*!< SCB AIRCR: Data Independent Timing Mask */
+
+#define SCB_AIRCR_SYSRESETREQS_Pos          3U                                            /*!< SCB AIRCR: SYSRESETREQS Position */
+#define SCB_AIRCR_SYSRESETREQS_Msk         (1UL << SCB_AIRCR_SYSRESETREQS_Pos)            /*!< SCB AIRCR: SYSRESETREQS Mask */
+
+#define SCB_AIRCR_SYSRESETREQ_Pos           2U                                            /*!< SCB AIRCR: SYSRESETREQ Position */
+#define SCB_AIRCR_SYSRESETREQ_Msk          (1UL << SCB_AIRCR_SYSRESETREQ_Pos)             /*!< SCB AIRCR: SYSRESETREQ Mask */
+
+#define SCB_AIRCR_VECTCLRACTIVE_Pos         1U                                            /*!< SCB AIRCR: VECTCLRACTIVE Position */
+#define SCB_AIRCR_VECTCLRACTIVE_Msk        (1UL << SCB_AIRCR_VECTCLRACTIVE_Pos)           /*!< SCB AIRCR: VECTCLRACTIVE Mask */
+
+/* SCB System Control Register Definitions */
+#define SCB_SCR_SEVONPEND_Pos               4U                                            /*!< SCB SCR: SEVONPEND Position */
+#define SCB_SCR_SEVONPEND_Msk              (1UL << SCB_SCR_SEVONPEND_Pos)                 /*!< SCB SCR: SEVONPEND Mask */
+
+#define SCB_SCR_SLEEPDEEPS_Pos              3U                                            /*!< SCB SCR: SLEEPDEEPS Position */
+#define SCB_SCR_SLEEPDEEPS_Msk             (1UL << SCB_SCR_SLEEPDEEPS_Pos)                /*!< SCB SCR: SLEEPDEEPS Mask */
+
+#define SCB_SCR_SLEEPDEEP_Pos               2U                                            /*!< SCB SCR: SLEEPDEEP Position */
+#define SCB_SCR_SLEEPDEEP_Msk              (1UL << SCB_SCR_SLEEPDEEP_Pos)                 /*!< SCB SCR: SLEEPDEEP Mask */
+
+#define SCB_SCR_SLEEPONEXIT_Pos             1U                                            /*!< SCB SCR: SLEEPONEXIT Position */
+#define SCB_SCR_SLEEPONEXIT_Msk            (1UL << SCB_SCR_SLEEPONEXIT_Pos)               /*!< SCB SCR: SLEEPONEXIT Mask */
+
+/* SCB Configuration Control Register Definitions */
+#define SCB_CCR_TRD_Pos                    20U                                            /*!< SCB CCR: TRD Position */
+#define SCB_CCR_TRD_Msk                    (1UL << SCB_CCR_TRD_Pos)                       /*!< SCB CCR: TRD Mask */
+
+#define SCB_CCR_LOB_Pos                    19U                                            /*!< SCB CCR: LOB Position */
+#define SCB_CCR_LOB_Msk                    (1UL << SCB_CCR_LOB_Pos)                       /*!< SCB CCR: LOB Mask */
+
+#define SCB_CCR_BP_Pos                     18U                                            /*!< SCB CCR: BP Position */
+#define SCB_CCR_BP_Msk                     (1UL << SCB_CCR_BP_Pos)                        /*!< SCB CCR: BP Mask */
+
+#define SCB_CCR_IC_Pos                     17U                                            /*!< SCB CCR: IC Position */
+#define SCB_CCR_IC_Msk                     (1UL << SCB_CCR_IC_Pos)                        /*!< SCB CCR: IC Mask */
+
+#define SCB_CCR_DC_Pos                     16U                                            /*!< SCB CCR: DC Position */
+#define SCB_CCR_DC_Msk                     (1UL << SCB_CCR_DC_Pos)                        /*!< SCB CCR: DC Mask */
+
+#define SCB_CCR_STKOFHFNMIGN_Pos           10U                                            /*!< SCB CCR: STKOFHFNMIGN Position */
+#define SCB_CCR_STKOFHFNMIGN_Msk           (1UL << SCB_CCR_STKOFHFNMIGN_Pos)              /*!< SCB CCR: STKOFHFNMIGN Mask */
+
+#define SCB_CCR_BFHFNMIGN_Pos               8U                                            /*!< SCB CCR: BFHFNMIGN Position */
+#define SCB_CCR_BFHFNMIGN_Msk              (1UL << SCB_CCR_BFHFNMIGN_Pos)                 /*!< SCB CCR: BFHFNMIGN Mask */
+
+#define SCB_CCR_DIV_0_TRP_Pos               4U                                            /*!< SCB CCR: DIV_0_TRP Position */
+#define SCB_CCR_DIV_0_TRP_Msk              (1UL << SCB_CCR_DIV_0_TRP_Pos)                 /*!< SCB CCR: DIV_0_TRP Mask */
+
+#define SCB_CCR_UNALIGN_TRP_Pos             3U                                            /*!< SCB CCR: UNALIGN_TRP Position */
+#define SCB_CCR_UNALIGN_TRP_Msk            (1UL << SCB_CCR_UNALIGN_TRP_Pos)               /*!< SCB CCR: UNALIGN_TRP Mask */
+
+#define SCB_CCR_USERSETMPEND_Pos            1U                                            /*!< SCB CCR: USERSETMPEND Position */
+#define SCB_CCR_USERSETMPEND_Msk           (1UL << SCB_CCR_USERSETMPEND_Pos)              /*!< SCB CCR: USERSETMPEND Mask */
+
+/* SCB System Handler Control and State Register Definitions */
+#define SCB_SHCSR_HARDFAULTPENDED_Pos      21U                                            /*!< SCB SHCSR: HARDFAULTPENDED Position */
+#define SCB_SHCSR_HARDFAULTPENDED_Msk      (1UL << SCB_SHCSR_HARDFAULTPENDED_Pos)         /*!< SCB SHCSR: HARDFAULTPENDED Mask */
+
+#define SCB_SHCSR_SECUREFAULTPENDED_Pos    20U                                            /*!< SCB SHCSR: SECUREFAULTPENDED Position */
+#define SCB_SHCSR_SECUREFAULTPENDED_Msk    (1UL << SCB_SHCSR_SECUREFAULTPENDED_Pos)       /*!< SCB SHCSR: SECUREFAULTPENDED Mask */
+
+#define SCB_SHCSR_SECUREFAULTENA_Pos       19U                                            /*!< SCB SHCSR: SECUREFAULTENA Position */
+#define SCB_SHCSR_SECUREFAULTENA_Msk       (1UL << SCB_SHCSR_SECUREFAULTENA_Pos)          /*!< SCB SHCSR: SECUREFAULTENA Mask */
+
+#define SCB_SHCSR_USGFAULTENA_Pos          18U                                            /*!< SCB SHCSR: USGFAULTENA Position */
+#define SCB_SHCSR_USGFAULTENA_Msk          (1UL << SCB_SHCSR_USGFAULTENA_Pos)             /*!< SCB SHCSR: USGFAULTENA Mask */
+
+#define SCB_SHCSR_BUSFAULTENA_Pos          17U                                            /*!< SCB SHCSR: BUSFAULTENA Position */
+#define SCB_SHCSR_BUSFAULTENA_Msk          (1UL << SCB_SHCSR_BUSFAULTENA_Pos)             /*!< SCB SHCSR: BUSFAULTENA Mask */
+
+#define SCB_SHCSR_MEMFAULTENA_Pos          16U                                            /*!< SCB SHCSR: MEMFAULTENA Position */
+#define SCB_SHCSR_MEMFAULTENA_Msk          (1UL << SCB_SHCSR_MEMFAULTENA_Pos)             /*!< SCB SHCSR: MEMFAULTENA Mask */
+
+#define SCB_SHCSR_SVCALLPENDED_Pos         15U                                            /*!< SCB SHCSR: SVCALLPENDED Position */
+#define SCB_SHCSR_SVCALLPENDED_Msk         (1UL << SCB_SHCSR_SVCALLPENDED_Pos)            /*!< SCB SHCSR: SVCALLPENDED Mask */
+
+#define SCB_SHCSR_BUSFAULTPENDED_Pos       14U                                            /*!< SCB SHCSR: BUSFAULTPENDED Position */
+#define SCB_SHCSR_BUSFAULTPENDED_Msk       (1UL << SCB_SHCSR_BUSFAULTPENDED_Pos)          /*!< SCB SHCSR: BUSFAULTPENDED Mask */
+
+#define SCB_SHCSR_MEMFAULTPENDED_Pos       13U                                            /*!< SCB SHCSR: MEMFAULTPENDED Position */
+#define SCB_SHCSR_MEMFAULTPENDED_Msk       (1UL << SCB_SHCSR_MEMFAULTPENDED_Pos)          /*!< SCB SHCSR: MEMFAULTPENDED Mask */
+
+#define SCB_SHCSR_USGFAULTPENDED_Pos       12U                                            /*!< SCB SHCSR: USGFAULTPENDED Position */
+#define SCB_SHCSR_USGFAULTPENDED_Msk       (1UL << SCB_SHCSR_USGFAULTPENDED_Pos)          /*!< SCB SHCSR: USGFAULTPENDED Mask */
+
+#define SCB_SHCSR_SYSTICKACT_Pos           11U                                            /*!< SCB SHCSR: SYSTICKACT Position */
+#define SCB_SHCSR_SYSTICKACT_Msk           (1UL << SCB_SHCSR_SYSTICKACT_Pos)              /*!< SCB SHCSR: SYSTICKACT Mask */
+
+#define SCB_SHCSR_PENDSVACT_Pos            10U                                            /*!< SCB SHCSR: PENDSVACT Position */
+#define SCB_SHCSR_PENDSVACT_Msk            (1UL << SCB_SHCSR_PENDSVACT_Pos)               /*!< SCB SHCSR: PENDSVACT Mask */
+
+#define SCB_SHCSR_MONITORACT_Pos            8U                                            /*!< SCB SHCSR: MONITORACT Position */
+#define SCB_SHCSR_MONITORACT_Msk           (1UL << SCB_SHCSR_MONITORACT_Pos)              /*!< SCB SHCSR: MONITORACT Mask */
+
+#define SCB_SHCSR_SVCALLACT_Pos             7U                                            /*!< SCB SHCSR: SVCALLACT Position */
+#define SCB_SHCSR_SVCALLACT_Msk            (1UL << SCB_SHCSR_SVCALLACT_Pos)               /*!< SCB SHCSR: SVCALLACT Mask */
+
+#define SCB_SHCSR_NMIACT_Pos                5U                                            /*!< SCB SHCSR: NMIACT Position */
+#define SCB_SHCSR_NMIACT_Msk               (1UL << SCB_SHCSR_NMIACT_Pos)                  /*!< SCB SHCSR: NMIACT Mask */
+
+#define SCB_SHCSR_SECUREFAULTACT_Pos        4U                                            /*!< SCB SHCSR: SECUREFAULTACT Position */
+#define SCB_SHCSR_SECUREFAULTACT_Msk       (1UL << SCB_SHCSR_SECUREFAULTACT_Pos)          /*!< SCB SHCSR: SECUREFAULTACT Mask */
+
+#define SCB_SHCSR_USGFAULTACT_Pos           3U                                            /*!< SCB SHCSR: USGFAULTACT Position */
+#define SCB_SHCSR_USGFAULTACT_Msk          (1UL << SCB_SHCSR_USGFAULTACT_Pos)             /*!< SCB SHCSR: USGFAULTACT Mask */
+
+#define SCB_SHCSR_HARDFAULTACT_Pos          2U                                            /*!< SCB SHCSR: HARDFAULTACT Position */
+#define SCB_SHCSR_HARDFAULTACT_Msk         (1UL << SCB_SHCSR_HARDFAULTACT_Pos)            /*!< SCB SHCSR: HARDFAULTACT Mask */
+
+#define SCB_SHCSR_BUSFAULTACT_Pos           1U                                            /*!< SCB SHCSR: BUSFAULTACT Position */
+#define SCB_SHCSR_BUSFAULTACT_Msk          (1UL << SCB_SHCSR_BUSFAULTACT_Pos)             /*!< SCB SHCSR: BUSFAULTACT Mask */
+
+#define SCB_SHCSR_MEMFAULTACT_Pos           0U                                            /*!< SCB SHCSR: MEMFAULTACT Position */
+#define SCB_SHCSR_MEMFAULTACT_Msk          (1UL /*<< SCB_SHCSR_MEMFAULTACT_Pos*/)         /*!< SCB SHCSR: MEMFAULTACT Mask */
+
+/* SCB Configurable Fault Status Register Definitions */
+#define SCB_CFSR_USGFAULTSR_Pos            16U                                            /*!< SCB CFSR: Usage Fault Status Register Position */
+#define SCB_CFSR_USGFAULTSR_Msk            (0xFFFFUL << SCB_CFSR_USGFAULTSR_Pos)          /*!< SCB CFSR: Usage Fault Status Register Mask */
+
+#define SCB_CFSR_BUSFAULTSR_Pos             8U                                            /*!< SCB CFSR: Bus Fault Status Register Position */
+#define SCB_CFSR_BUSFAULTSR_Msk            (0xFFUL << SCB_CFSR_BUSFAULTSR_Pos)            /*!< SCB CFSR: Bus Fault Status Register Mask */
+
+#define SCB_CFSR_MEMFAULTSR_Pos             0U                                            /*!< SCB CFSR: Memory Manage Fault Status Register Position */
+#define SCB_CFSR_MEMFAULTSR_Msk            (0xFFUL /*<< SCB_CFSR_MEMFAULTSR_Pos*/)        /*!< SCB CFSR: Memory Manage Fault Status Register Mask */
+
+/* MemManage Fault Status Register (part of SCB Configurable Fault Status Register) */
+#define SCB_CFSR_MMARVALID_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 7U)                 /*!< SCB CFSR (MMFSR): MMARVALID Position */
+#define SCB_CFSR_MMARVALID_Msk             (1UL << SCB_CFSR_MMARVALID_Pos)                /*!< SCB CFSR (MMFSR): MMARVALID Mask */
+
+#define SCB_CFSR_MLSPERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 5U)                 /*!< SCB CFSR (MMFSR): MLSPERR Position */
+#define SCB_CFSR_MLSPERR_Msk               (1UL << SCB_CFSR_MLSPERR_Pos)                  /*!< SCB CFSR (MMFSR): MLSPERR Mask */
+
+#define SCB_CFSR_MSTKERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 4U)                 /*!< SCB CFSR (MMFSR): MSTKERR Position */
+#define SCB_CFSR_MSTKERR_Msk               (1UL << SCB_CFSR_MSTKERR_Pos)                  /*!< SCB CFSR (MMFSR): MSTKERR Mask */
+
+#define SCB_CFSR_MUNSTKERR_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 3U)                 /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
+#define SCB_CFSR_MUNSTKERR_Msk             (1UL << SCB_CFSR_MUNSTKERR_Pos)                /*!< SCB CFSR (MMFSR): MUNSTKERR Mask */
+
+#define SCB_CFSR_DACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 1U)                 /*!< SCB CFSR (MMFSR): DACCVIOL Position */
+#define SCB_CFSR_DACCVIOL_Msk              (1UL << SCB_CFSR_DACCVIOL_Pos)                 /*!< SCB CFSR (MMFSR): DACCVIOL Mask */
+
+#define SCB_CFSR_IACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 0U)                 /*!< SCB CFSR (MMFSR): IACCVIOL Position */
+#define SCB_CFSR_IACCVIOL_Msk              (1UL /*<< SCB_CFSR_IACCVIOL_Pos*/)             /*!< SCB CFSR (MMFSR): IACCVIOL Mask */
+
+/* BusFault Status Register (part of SCB Configurable Fault Status Register) */
+#define SCB_CFSR_BFARVALID_Pos            (SCB_CFSR_BUSFAULTSR_Pos + 7U)                  /*!< SCB CFSR (BFSR): BFARVALID Position */
+#define SCB_CFSR_BFARVALID_Msk            (1UL << SCB_CFSR_BFARVALID_Pos)                 /*!< SCB CFSR (BFSR): BFARVALID Mask */
+
+#define SCB_CFSR_LSPERR_Pos               (SCB_CFSR_BUSFAULTSR_Pos + 5U)                  /*!< SCB CFSR (BFSR): LSPERR Position */
+#define SCB_CFSR_LSPERR_Msk               (1UL << SCB_CFSR_LSPERR_Pos)                    /*!< SCB CFSR (BFSR): LSPERR Mask */
+
+#define SCB_CFSR_STKERR_Pos               (SCB_CFSR_BUSFAULTSR_Pos + 4U)                  /*!< SCB CFSR (BFSR): STKERR Position */
+#define SCB_CFSR_STKERR_Msk               (1UL << SCB_CFSR_STKERR_Pos)                    /*!< SCB CFSR (BFSR): STKERR Mask */
+
+#define SCB_CFSR_UNSTKERR_Pos             (SCB_CFSR_BUSFAULTSR_Pos + 3U)                  /*!< SCB CFSR (BFSR): UNSTKERR Position */
+#define SCB_CFSR_UNSTKERR_Msk             (1UL << SCB_CFSR_UNSTKERR_Pos)                  /*!< SCB CFSR (BFSR): UNSTKERR Mask */
+
+#define SCB_CFSR_IMPRECISERR_Pos          (SCB_CFSR_BUSFAULTSR_Pos + 2U)                  /*!< SCB CFSR (BFSR): IMPRECISERR Position */
+#define SCB_CFSR_IMPRECISERR_Msk          (1UL << SCB_CFSR_IMPRECISERR_Pos)               /*!< SCB CFSR (BFSR): IMPRECISERR Mask */
+
+#define SCB_CFSR_PRECISERR_Pos            (SCB_CFSR_BUSFAULTSR_Pos + 1U)                  /*!< SCB CFSR (BFSR): PRECISERR Position */
+#define SCB_CFSR_PRECISERR_Msk            (1UL << SCB_CFSR_PRECISERR_Pos)                 /*!< SCB CFSR (BFSR): PRECISERR Mask */
+
+#define SCB_CFSR_IBUSERR_Pos              (SCB_CFSR_BUSFAULTSR_Pos + 0U)                  /*!< SCB CFSR (BFSR): IBUSERR Position */
+#define SCB_CFSR_IBUSERR_Msk              (1UL << SCB_CFSR_IBUSERR_Pos)                   /*!< SCB CFSR (BFSR): IBUSERR Mask */
+
+/* UsageFault Status Register (part of SCB Configurable Fault Status Register) */
+#define SCB_CFSR_DIVBYZERO_Pos            (SCB_CFSR_USGFAULTSR_Pos + 9U)                  /*!< SCB CFSR (UFSR): DIVBYZERO Position */
+#define SCB_CFSR_DIVBYZERO_Msk            (1UL << SCB_CFSR_DIVBYZERO_Pos)                 /*!< SCB CFSR (UFSR): DIVBYZERO Mask */
+
+#define SCB_CFSR_UNALIGNED_Pos            (SCB_CFSR_USGFAULTSR_Pos + 8U)                  /*!< SCB CFSR (UFSR): UNALIGNED Position */
+#define SCB_CFSR_UNALIGNED_Msk            (1UL << SCB_CFSR_UNALIGNED_Pos)                 /*!< SCB CFSR (UFSR): UNALIGNED Mask */
+
+#define SCB_CFSR_STKOF_Pos                (SCB_CFSR_USGFAULTSR_Pos + 4U)                  /*!< SCB CFSR (UFSR): STKOF Position */
+#define SCB_CFSR_STKOF_Msk                (1UL << SCB_CFSR_STKOF_Pos)                     /*!< SCB CFSR (UFSR): STKOF Mask */
+
+#define SCB_CFSR_NOCP_Pos                 (SCB_CFSR_USGFAULTSR_Pos + 3U)                  /*!< SCB CFSR (UFSR): NOCP Position */
+#define SCB_CFSR_NOCP_Msk                 (1UL << SCB_CFSR_NOCP_Pos)                      /*!< SCB CFSR (UFSR): NOCP Mask */
+
+#define SCB_CFSR_INVPC_Pos                (SCB_CFSR_USGFAULTSR_Pos + 2U)                  /*!< SCB CFSR (UFSR): INVPC Position */
+#define SCB_CFSR_INVPC_Msk                (1UL << SCB_CFSR_INVPC_Pos)                     /*!< SCB CFSR (UFSR): INVPC Mask */
+
+#define SCB_CFSR_INVSTATE_Pos             (SCB_CFSR_USGFAULTSR_Pos + 1U)                  /*!< SCB CFSR (UFSR): INVSTATE Position */
+#define SCB_CFSR_INVSTATE_Msk             (1UL << SCB_CFSR_INVSTATE_Pos)                  /*!< SCB CFSR (UFSR): INVSTATE Mask */
+
+#define SCB_CFSR_UNDEFINSTR_Pos           (SCB_CFSR_USGFAULTSR_Pos + 0U)                  /*!< SCB CFSR (UFSR): UNDEFINSTR Position */
+#define SCB_CFSR_UNDEFINSTR_Msk           (1UL << SCB_CFSR_UNDEFINSTR_Pos)                /*!< SCB CFSR (UFSR): UNDEFINSTR Mask */
+
+/* SCB Hard Fault Status Register Definitions */
+#define SCB_HFSR_DEBUGEVT_Pos              31U                                            /*!< SCB HFSR: DEBUGEVT Position */
+#define SCB_HFSR_DEBUGEVT_Msk              (1UL << SCB_HFSR_DEBUGEVT_Pos)                 /*!< SCB HFSR: DEBUGEVT Mask */
+
+#define SCB_HFSR_FORCED_Pos                30U                                            /*!< SCB HFSR: FORCED Position */
+#define SCB_HFSR_FORCED_Msk                (1UL << SCB_HFSR_FORCED_Pos)                   /*!< SCB HFSR: FORCED Mask */
+
+#define SCB_HFSR_VECTTBL_Pos                1U                                            /*!< SCB HFSR: VECTTBL Position */
+#define SCB_HFSR_VECTTBL_Msk               (1UL << SCB_HFSR_VECTTBL_Pos)                  /*!< SCB HFSR: VECTTBL Mask */
+
+/* SCB Debug Fault Status Register Definitions */
+#define SCB_DFSR_PMU_Pos                    5U                                            /*!< SCB DFSR: PMU Position */
+#define SCB_DFSR_PMU_Msk                   (1UL << SCB_DFSR_PMU_Pos)                      /*!< SCB DFSR: PMU Mask */
+
+#define SCB_DFSR_EXTERNAL_Pos               4U                                            /*!< SCB DFSR: EXTERNAL Position */
+#define SCB_DFSR_EXTERNAL_Msk              (1UL << SCB_DFSR_EXTERNAL_Pos)                 /*!< SCB DFSR: EXTERNAL Mask */
+
+#define SCB_DFSR_VCATCH_Pos                 3U                                            /*!< SCB DFSR: VCATCH Position */
+#define SCB_DFSR_VCATCH_Msk                (1UL << SCB_DFSR_VCATCH_Pos)                   /*!< SCB DFSR: VCATCH Mask */
+
+#define SCB_DFSR_DWTTRAP_Pos                2U                                            /*!< SCB DFSR: DWTTRAP Position */
+#define SCB_DFSR_DWTTRAP_Msk               (1UL << SCB_DFSR_DWTTRAP_Pos)                  /*!< SCB DFSR: DWTTRAP Mask */
+
+#define SCB_DFSR_BKPT_Pos                   1U                                            /*!< SCB DFSR: BKPT Position */
+#define SCB_DFSR_BKPT_Msk                  (1UL << SCB_DFSR_BKPT_Pos)                     /*!< SCB DFSR: BKPT Mask */
+
+#define SCB_DFSR_HALTED_Pos                 0U                                            /*!< SCB DFSR: HALTED Position */
+#define SCB_DFSR_HALTED_Msk                (1UL /*<< SCB_DFSR_HALTED_Pos*/)               /*!< SCB DFSR: HALTED Mask */
+
+/* SCB Non-Secure Access Control Register Definitions */
+#define SCB_NSACR_CP11_Pos                 11U                                            /*!< SCB NSACR: CP11 Position */
+#define SCB_NSACR_CP11_Msk                 (1UL << SCB_NSACR_CP11_Pos)                    /*!< SCB NSACR: CP11 Mask */
+
+#define SCB_NSACR_CP10_Pos                 10U                                            /*!< SCB NSACR: CP10 Position */
+#define SCB_NSACR_CP10_Msk                 (1UL << SCB_NSACR_CP10_Pos)                    /*!< SCB NSACR: CP10 Mask */
+
+#define SCB_NSACR_CP7_Pos                   7U                                            /*!< SCB NSACR: CP7 Position */
+#define SCB_NSACR_CP7_Msk                  (1UL << SCB_NSACR_CP7_Pos)                     /*!< SCB NSACR: CP7 Mask */
+
+#define SCB_NSACR_CP6_Pos                   6U                                            /*!< SCB NSACR: CP6 Position */
+#define SCB_NSACR_CP6_Msk                  (1UL << SCB_NSACR_CP6_Pos)                     /*!< SCB NSACR: CP6 Mask */
+
+#define SCB_NSACR_CP5_Pos                   5U                                            /*!< SCB NSACR: CP5 Position */
+#define SCB_NSACR_CP5_Msk                  (1UL << SCB_NSACR_CP5_Pos)                     /*!< SCB NSACR: CP5 Mask */
+
+#define SCB_NSACR_CP4_Pos                   4U                                            /*!< SCB NSACR: CP4 Position */
+#define SCB_NSACR_CP4_Msk                  (1UL << SCB_NSACR_CP4_Pos)                     /*!< SCB NSACR: CP4 Mask */
+
+#define SCB_NSACR_CP3_Pos                   3U                                            /*!< SCB NSACR: CP3 Position */
+#define SCB_NSACR_CP3_Msk                  (1UL << SCB_NSACR_CP3_Pos)                     /*!< SCB NSACR: CP3 Mask */
+
+#define SCB_NSACR_CP2_Pos                   2U                                            /*!< SCB NSACR: CP2 Position */
+#define SCB_NSACR_CP2_Msk                  (1UL << SCB_NSACR_CP2_Pos)                     /*!< SCB NSACR: CP2 Mask */
+
+#define SCB_NSACR_CP1_Pos                   1U                                            /*!< SCB NSACR: CP1 Position */
+#define SCB_NSACR_CP1_Msk                  (1UL << SCB_NSACR_CP1_Pos)                     /*!< SCB NSACR: CP1 Mask */
+
+#define SCB_NSACR_CP0_Pos                   0U                                            /*!< SCB NSACR: CP0 Position */
+#define SCB_NSACR_CP0_Msk                  (1UL /*<< SCB_NSACR_CP0_Pos*/)                 /*!< SCB NSACR: CP0 Mask */
+
+/* SCB Debug Feature Register 0 Definitions */
+#define SCB_ID_DFR_UDE_Pos                 28U                                            /*!< SCB ID_DFR: UDE Position */
+#define SCB_ID_DFR_UDE_Msk                 (0xFUL << SCB_ID_DFR_UDE_Pos)                  /*!< SCB ID_DFR: UDE Mask */
+
+#define SCB_ID_DFR_MProfDbg_Pos            20U                                            /*!< SCB ID_DFR: MProfDbg Position */
+#define SCB_ID_DFR_MProfDbg_Msk            (0xFUL << SCB_ID_DFR_MProfDbg_Pos)             /*!< SCB ID_DFR: MProfDbg Mask */
+
+/* SCB Cache Level ID Register Definitions */
+#define SCB_CLIDR_LOUU_Pos                 27U                                            /*!< SCB CLIDR: LoUU Position */
+#define SCB_CLIDR_LOUU_Msk                 (7UL << SCB_CLIDR_LOUU_Pos)                    /*!< SCB CLIDR: LoUU Mask */
+
+#define SCB_CLIDR_LOC_Pos                  24U                                            /*!< SCB CLIDR: LoC Position */
+#define SCB_CLIDR_LOC_Msk                  (7UL << SCB_CLIDR_LOC_Pos)                     /*!< SCB CLIDR: LoC Mask */
+
+/* SCB Cache Type Register Definitions */
+#define SCB_CTR_FORMAT_Pos                 29U                                            /*!< SCB CTR: Format Position */
+#define SCB_CTR_FORMAT_Msk                 (7UL << SCB_CTR_FORMAT_Pos)                    /*!< SCB CTR: Format Mask */
+
+#define SCB_CTR_CWG_Pos                    24U                                            /*!< SCB CTR: CWG Position */
+#define SCB_CTR_CWG_Msk                    (0xFUL << SCB_CTR_CWG_Pos)                     /*!< SCB CTR: CWG Mask */
+
+#define SCB_CTR_ERG_Pos                    20U                                            /*!< SCB CTR: ERG Position */
+#define SCB_CTR_ERG_Msk                    (0xFUL << SCB_CTR_ERG_Pos)                     /*!< SCB CTR: ERG Mask */
+
+#define SCB_CTR_DMINLINE_Pos               16U                                            /*!< SCB CTR: DminLine Position */
+#define SCB_CTR_DMINLINE_Msk               (0xFUL << SCB_CTR_DMINLINE_Pos)                /*!< SCB CTR: DminLine Mask */
+
+#define SCB_CTR_IMINLINE_Pos                0U                                            /*!< SCB CTR: ImInLine Position */
+#define SCB_CTR_IMINLINE_Msk               (0xFUL /*<< SCB_CTR_IMINLINE_Pos*/)            /*!< SCB CTR: ImInLine Mask */
+
+/* SCB Cache Size ID Register Definitions */
+#define SCB_CCSIDR_WT_Pos                  31U                                            /*!< SCB CCSIDR: WT Position */
+#define SCB_CCSIDR_WT_Msk                  (1UL << SCB_CCSIDR_WT_Pos)                     /*!< SCB CCSIDR: WT Mask */
+
+#define SCB_CCSIDR_WB_Pos                  30U                                            /*!< SCB CCSIDR: WB Position */
+#define SCB_CCSIDR_WB_Msk                  (1UL << SCB_CCSIDR_WB_Pos)                     /*!< SCB CCSIDR: WB Mask */
+
+#define SCB_CCSIDR_RA_Pos                  29U                                            /*!< SCB CCSIDR: RA Position */
+#define SCB_CCSIDR_RA_Msk                  (1UL << SCB_CCSIDR_RA_Pos)                     /*!< SCB CCSIDR: RA Mask */
+
+#define SCB_CCSIDR_WA_Pos                  28U                                            /*!< SCB CCSIDR: WA Position */
+#define SCB_CCSIDR_WA_Msk                  (1UL << SCB_CCSIDR_WA_Pos)                     /*!< SCB CCSIDR: WA Mask */
+
+#define SCB_CCSIDR_NUMSETS_Pos             13U                                            /*!< SCB CCSIDR: NumSets Position */
+#define SCB_CCSIDR_NUMSETS_Msk             (0x7FFFUL << SCB_CCSIDR_NUMSETS_Pos)           /*!< SCB CCSIDR: NumSets Mask */
+
+#define SCB_CCSIDR_ASSOCIATIVITY_Pos        3U                                            /*!< SCB CCSIDR: Associativity Position */
+#define SCB_CCSIDR_ASSOCIATIVITY_Msk       (0x3FFUL << SCB_CCSIDR_ASSOCIATIVITY_Pos)      /*!< SCB CCSIDR: Associativity Mask */
+
+#define SCB_CCSIDR_LINESIZE_Pos             0U                                            /*!< SCB CCSIDR: LineSize Position */
+#define SCB_CCSIDR_LINESIZE_Msk            (7UL /*<< SCB_CCSIDR_LINESIZE_Pos*/)           /*!< SCB CCSIDR: LineSize Mask */
+
+/* SCB Cache Size Selection Register Definitions */
+#define SCB_CSSELR_LEVEL_Pos                1U                                            /*!< SCB CSSELR: Level Position */
+#define SCB_CSSELR_LEVEL_Msk               (7UL << SCB_CSSELR_LEVEL_Pos)                  /*!< SCB CSSELR: Level Mask */
+
+#define SCB_CSSELR_IND_Pos                  0U                                            /*!< SCB CSSELR: InD Position */
+#define SCB_CSSELR_IND_Msk                 (1UL /*<< SCB_CSSELR_IND_Pos*/)                /*!< SCB CSSELR: InD Mask */
+
+/* SCB Software Triggered Interrupt Register Definitions */
+#define SCB_STIR_INTID_Pos                  0U                                            /*!< SCB STIR: INTID Position */
+#define SCB_STIR_INTID_Msk                 (0x1FFUL /*<< SCB_STIR_INTID_Pos*/)            /*!< SCB STIR: INTID Mask */
+
+/* SCB RAS Fault Status Register Definitions */
+#define SCB_RFSR_V_Pos                     31U                                            /*!< SCB RFSR: V Position */
+#define SCB_RFSR_V_Msk                     (1UL << SCB_RFSR_V_Pos)                        /*!< SCB RFSR: V Mask */
+
+#define SCB_RFSR_IS_Pos                    16U                                            /*!< SCB RFSR: IS Position */
+#define SCB_RFSR_IS_Msk                    (0x7FFFUL << SCB_RFSR_IS_Pos)                  /*!< SCB RFSR: IS Mask */
+
+#define SCB_RFSR_UET_Pos                    0U                                            /*!< SCB RFSR: UET Position */
+#define SCB_RFSR_UET_Msk                   (3UL /*<< SCB_RFSR_UET_Pos*/)                  /*!< SCB RFSR: UET Mask */
+
+/* SCB D-Cache Invalidate by Set-way Register Definitions */
+#define SCB_DCISW_WAY_Pos                  30U                                            /*!< SCB DCISW: Way Position */
+#define SCB_DCISW_WAY_Msk                  (3UL << SCB_DCISW_WAY_Pos)                     /*!< SCB DCISW: Way Mask */
+
+#define SCB_DCISW_SET_Pos                   5U                                            /*!< SCB DCISW: Set Position */
+#define SCB_DCISW_SET_Msk                  (0x1FFUL << SCB_DCISW_SET_Pos)                 /*!< SCB DCISW: Set Mask */
+
+/* SCB D-Cache Clean by Set-way Register Definitions */
+#define SCB_DCCSW_WAY_Pos                  30U                                            /*!< SCB DCCSW: Way Position */
+#define SCB_DCCSW_WAY_Msk                  (3UL << SCB_DCCSW_WAY_Pos)                     /*!< SCB DCCSW: Way Mask */
+
+#define SCB_DCCSW_SET_Pos                   5U                                            /*!< SCB DCCSW: Set Position */
+#define SCB_DCCSW_SET_Msk                  (0x1FFUL << SCB_DCCSW_SET_Pos)                 /*!< SCB DCCSW: Set Mask */
+
+/* SCB D-Cache Clean and Invalidate by Set-way Register Definitions */
+#define SCB_DCCISW_WAY_Pos                 30U                                            /*!< SCB DCCISW: Way Position */
+#define SCB_DCCISW_WAY_Msk                 (3UL << SCB_DCCISW_WAY_Pos)                    /*!< SCB DCCISW: Way Mask */
+
+#define SCB_DCCISW_SET_Pos                  5U                                            /*!< SCB DCCISW: Set Position */
+#define SCB_DCCISW_SET_Msk                 (0x1FFUL << SCB_DCCISW_SET_Pos)                /*!< SCB DCCISW: Set Mask */
+
+/*@} end of group CMSIS_SCB */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_ICB Implementation Control Block register (ICB)
+  \brief    Type definitions for the Implementation Control Block Register
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Implementation Control Block (ICB).
+ */
+typedef struct
+{
+        uint32_t RESERVED0[1U];
+  __IM  uint32_t ICTR;                   /*!< Offset: 0x004 (R/ )  Interrupt Controller Type Register */
+  __IOM uint32_t ACTLR;                  /*!< Offset: 0x008 (R/W)  Auxiliary Control Register */
+  __IOM uint32_t CPPWR;                  /*!< Offset: 0x00C (R/W)  Coprocessor Power Control  Register */
+} ICB_Type;
+
+/* Auxiliary Control Register Definitions */
+#define ICB_ACTLR_DISCRITAXIRUW_Pos     27U                                               /*!< ACTLR: DISCRITAXIRUW Position */
+#define ICB_ACTLR_DISCRITAXIRUW_Msk     (1UL << ICB_ACTLR_DISCRITAXIRUW_Pos)              /*!< ACTLR: DISCRITAXIRUW Mask */
+
+#define ICB_ACTLR_DISCRITAXIRUR_Pos     15U                                               /*!< ACTLR: DISCRITAXIRUR Position */
+#define ICB_ACTLR_DISCRITAXIRUR_Msk     (1UL << ICB_ACTLR_DISCRITAXIRUR_Pos)              /*!< ACTLR: DISCRITAXIRUR Mask */
+
+#define ICB_ACTLR_EVENTBUSEN_Pos        14U                                               /*!< ACTLR: EVENTBUSEN Position */
+#define ICB_ACTLR_EVENTBUSEN_Msk        (1UL << ICB_ACTLR_EVENTBUSEN_Pos)                 /*!< ACTLR: EVENTBUSEN Mask */
+
+#define ICB_ACTLR_EVENTBUSEN_S_Pos      13U                                               /*!< ACTLR: EVENTBUSEN_S Position */
+#define ICB_ACTLR_EVENTBUSEN_S_Msk      (1UL << ICB_ACTLR_EVENTBUSEN_S_Pos)               /*!< ACTLR: EVENTBUSEN_S Mask */
+
+#define ICB_ACTLR_DISITMATBFLUSH_Pos    12U                                               /*!< ACTLR: DISITMATBFLUSH Position */
+#define ICB_ACTLR_DISITMATBFLUSH_Msk    (1UL << ICB_ACTLR_DISITMATBFLUSH_Pos)             /*!< ACTLR: DISITMATBFLUSH Mask */
+
+#define ICB_ACTLR_DISNWAMODE_Pos        11U                                               /*!< ACTLR: DISNWAMODE Position */
+#define ICB_ACTLR_DISNWAMODE_Msk        (1UL << ICB_ACTLR_DISNWAMODE_Pos)                 /*!< ACTLR: DISNWAMODE Mask */
+
+#define ICB_ACTLR_FPEXCODIS_Pos         10U                                               /*!< ACTLR: FPEXCODIS Position */
+#define ICB_ACTLR_FPEXCODIS_Msk         (1UL << ICB_ACTLR_FPEXCODIS_Pos)                  /*!< ACTLR: FPEXCODIS Mask */
+
+/* Interrupt Controller Type Register Definitions */
+#define ICB_ICTR_INTLINESNUM_Pos         0U                                               /*!< ICTR: INTLINESNUM Position */
+#define ICB_ICTR_INTLINESNUM_Msk        (0xFUL /*<< ICB_ICTR_INTLINESNUM_Pos*/)           /*!< ICTR: INTLINESNUM Mask */
+
+/*@} end of group CMSIS_ICB */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_SysTick     System Tick Timer (SysTick)
+  \brief    Type definitions for the System Timer Registers.
+  @{
+ */
+
+/**
+  \brief  Structure type to access the System Timer (SysTick).
+ */
+typedef struct
+{
+  __IOM uint32_t CTRL;                   /*!< Offset: 0x000 (R/W)  SysTick Control and Status Register */
+  __IOM uint32_t LOAD;                   /*!< Offset: 0x004 (R/W)  SysTick Reload Value Register */
+  __IOM uint32_t VAL;                    /*!< Offset: 0x008 (R/W)  SysTick Current Value Register */
+  __IM  uint32_t CALIB;                  /*!< Offset: 0x00C (R/ )  SysTick Calibration Register */
+} SysTick_Type;
+
+/* SysTick Control / Status Register Definitions */
+#define SysTick_CTRL_COUNTFLAG_Pos         16U                                            /*!< SysTick CTRL: COUNTFLAG Position */
+#define SysTick_CTRL_COUNTFLAG_Msk         (1UL << SysTick_CTRL_COUNTFLAG_Pos)            /*!< SysTick CTRL: COUNTFLAG Mask */
+
+#define SysTick_CTRL_CLKSOURCE_Pos          2U                                            /*!< SysTick CTRL: CLKSOURCE Position */
+#define SysTick_CTRL_CLKSOURCE_Msk         (1UL << SysTick_CTRL_CLKSOURCE_Pos)            /*!< SysTick CTRL: CLKSOURCE Mask */
+
+#define SysTick_CTRL_TICKINT_Pos            1U                                            /*!< SysTick CTRL: TICKINT Position */
+#define SysTick_CTRL_TICKINT_Msk           (1UL << SysTick_CTRL_TICKINT_Pos)              /*!< SysTick CTRL: TICKINT Mask */
+
+#define SysTick_CTRL_ENABLE_Pos             0U                                            /*!< SysTick CTRL: ENABLE Position */
+#define SysTick_CTRL_ENABLE_Msk            (1UL /*<< SysTick_CTRL_ENABLE_Pos*/)           /*!< SysTick CTRL: ENABLE Mask */
+
+/* SysTick Reload Register Definitions */
+#define SysTick_LOAD_RELOAD_Pos             0U                                            /*!< SysTick LOAD: RELOAD Position */
+#define SysTick_LOAD_RELOAD_Msk            (0xFFFFFFUL /*<< SysTick_LOAD_RELOAD_Pos*/)    /*!< SysTick LOAD: RELOAD Mask */
+
+/* SysTick Current Register Definitions */
+#define SysTick_VAL_CURRENT_Pos             0U                                            /*!< SysTick VAL: CURRENT Position */
+#define SysTick_VAL_CURRENT_Msk            (0xFFFFFFUL /*<< SysTick_VAL_CURRENT_Pos*/)    /*!< SysTick VAL: CURRENT Mask */
+
+/* SysTick Calibration Register Definitions */
+#define SysTick_CALIB_NOREF_Pos            31U                                            /*!< SysTick CALIB: NOREF Position */
+#define SysTick_CALIB_NOREF_Msk            (1UL << SysTick_CALIB_NOREF_Pos)               /*!< SysTick CALIB: NOREF Mask */
+
+#define SysTick_CALIB_SKEW_Pos             30U                                            /*!< SysTick CALIB: SKEW Position */
+#define SysTick_CALIB_SKEW_Msk             (1UL << SysTick_CALIB_SKEW_Pos)                /*!< SysTick CALIB: SKEW Mask */
+
+#define SysTick_CALIB_TENMS_Pos             0U                                            /*!< SysTick CALIB: TENMS Position */
+#define SysTick_CALIB_TENMS_Msk            (0xFFFFFFUL /*<< SysTick_CALIB_TENMS_Pos*/)    /*!< SysTick CALIB: TENMS Mask */
+
+/*@} end of group CMSIS_SysTick */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_ITM     Instrumentation Trace Macrocell (ITM)
+  \brief    Type definitions for the Instrumentation Trace Macrocell (ITM)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Instrumentation Trace Macrocell Register (ITM).
+ */
+typedef struct
+{
+  __OM  union
+  {
+    __OM  uint8_t    u8;                 /*!< Offset: 0x000 ( /W)  ITM Stimulus Port 8-bit */
+    __OM  uint16_t   u16;                /*!< Offset: 0x000 ( /W)  ITM Stimulus Port 16-bit */
+    __OM  uint32_t   u32;                /*!< Offset: 0x000 ( /W)  ITM Stimulus Port 32-bit */
+  }  PORT [32U];                         /*!< Offset: 0x000 ( /W)  ITM Stimulus Port Registers */
+        uint32_t RESERVED0[864U];
+  __IOM uint32_t TER;                    /*!< Offset: 0xE00 (R/W)  ITM Trace Enable Register */
+        uint32_t RESERVED1[15U];
+  __IOM uint32_t TPR;                    /*!< Offset: 0xE40 (R/W)  ITM Trace Privilege Register */
+        uint32_t RESERVED2[15U];
+  __IOM uint32_t TCR;                    /*!< Offset: 0xE80 (R/W)  ITM Trace Control Register */
+        uint32_t RESERVED3[27U];
+  __IM  uint32_t ITREAD;                 /*!< Offset: 0xEF0 (R/ )  ITM Integration Read Register */
+        uint32_t RESERVED4[1U];
+  __OM  uint32_t ITWRITE;                /*!< Offset: 0xEF8 ( /W)  ITM Integration Write Register */
+        uint32_t RESERVED5[1U];
+  __IOM uint32_t ITCTRL;                 /*!< Offset: 0xF00 (R/W)  ITM Integration Mode Control Register */
+        uint32_t RESERVED6[46U];
+  __IM  uint32_t DEVARCH;                /*!< Offset: 0xFBC (R/ )  ITM Device Architecture Register */
+        uint32_t RESERVED7[3U];
+  __IM  uint32_t DEVTYPE;                /*!< Offset: 0xFCC (R/ )  ITM Device Type Register */
+  __IM  uint32_t PID4;                   /*!< Offset: 0xFD0 (R/ )  ITM Peripheral Identification Register #4 */
+  __IM  uint32_t PID5;                   /*!< Offset: 0xFD4 (R/ )  ITM Peripheral Identification Register #5 */
+  __IM  uint32_t PID6;                   /*!< Offset: 0xFD8 (R/ )  ITM Peripheral Identification Register #6 */
+  __IM  uint32_t PID7;                   /*!< Offset: 0xFDC (R/ )  ITM Peripheral Identification Register #7 */
+  __IM  uint32_t PID0;                   /*!< Offset: 0xFE0 (R/ )  ITM Peripheral Identification Register #0 */
+  __IM  uint32_t PID1;                   /*!< Offset: 0xFE4 (R/ )  ITM Peripheral Identification Register #1 */
+  __IM  uint32_t PID2;                   /*!< Offset: 0xFE8 (R/ )  ITM Peripheral Identification Register #2 */
+  __IM  uint32_t PID3;                   /*!< Offset: 0xFEC (R/ )  ITM Peripheral Identification Register #3 */
+  __IM  uint32_t CID0;                   /*!< Offset: 0xFF0 (R/ )  ITM Component  Identification Register #0 */
+  __IM  uint32_t CID1;                   /*!< Offset: 0xFF4 (R/ )  ITM Component  Identification Register #1 */
+  __IM  uint32_t CID2;                   /*!< Offset: 0xFF8 (R/ )  ITM Component  Identification Register #2 */
+  __IM  uint32_t CID3;                   /*!< Offset: 0xFFC (R/ )  ITM Component  Identification Register #3 */
+} ITM_Type;
+
+/* ITM Stimulus Port Register Definitions */
+#define ITM_STIM_DISABLED_Pos               1U                                            /*!< ITM STIM: DISABLED Position */
+#define ITM_STIM_DISABLED_Msk              (0x1UL << ITM_STIM_DISABLED_Pos)               /*!< ITM STIM: DISABLED Mask */
+
+#define ITM_STIM_FIFOREADY_Pos              0U                                            /*!< ITM STIM: FIFOREADY Position */
+#define ITM_STIM_FIFOREADY_Msk             (0x1UL /*<< ITM_STIM_FIFOREADY_Pos*/)          /*!< ITM STIM: FIFOREADY Mask */
+
+/* ITM Trace Privilege Register Definitions */
+#define ITM_TPR_PRIVMASK_Pos                0U                                            /*!< ITM TPR: PRIVMASK Position */
+#define ITM_TPR_PRIVMASK_Msk               (0xFUL /*<< ITM_TPR_PRIVMASK_Pos*/)            /*!< ITM TPR: PRIVMASK Mask */
+
+/* ITM Trace Control Register Definitions */
+#define ITM_TCR_BUSY_Pos                   23U                                            /*!< ITM TCR: BUSY Position */
+#define ITM_TCR_BUSY_Msk                   (1UL << ITM_TCR_BUSY_Pos)                      /*!< ITM TCR: BUSY Mask */
+
+#define ITM_TCR_TRACEBUSID_Pos             16U                                            /*!< ITM TCR: ATBID Position */
+#define ITM_TCR_TRACEBUSID_Msk             (0x7FUL << ITM_TCR_TRACEBUSID_Pos)             /*!< ITM TCR: ATBID Mask */
+
+#define ITM_TCR_GTSFREQ_Pos                10U                                            /*!< ITM TCR: Global timestamp frequency Position */
+#define ITM_TCR_GTSFREQ_Msk                (3UL << ITM_TCR_GTSFREQ_Pos)                   /*!< ITM TCR: Global timestamp frequency Mask */
+
+#define ITM_TCR_TSPRESCALE_Pos              8U                                            /*!< ITM TCR: TSPRESCALE Position */
+#define ITM_TCR_TSPRESCALE_Msk             (3UL << ITM_TCR_TSPRESCALE_Pos)                /*!< ITM TCR: TSPRESCALE Mask */
+
+#define ITM_TCR_STALLENA_Pos                5U                                            /*!< ITM TCR: STALLENA Position */
+#define ITM_TCR_STALLENA_Msk               (1UL << ITM_TCR_STALLENA_Pos)                  /*!< ITM TCR: STALLENA Mask */
+
+#define ITM_TCR_SWOENA_Pos                  4U                                            /*!< ITM TCR: SWOENA Position */
+#define ITM_TCR_SWOENA_Msk                 (1UL << ITM_TCR_SWOENA_Pos)                    /*!< ITM TCR: SWOENA Mask */
+
+#define ITM_TCR_DWTENA_Pos                  3U                                            /*!< ITM TCR: DWTENA Position */
+#define ITM_TCR_DWTENA_Msk                 (1UL << ITM_TCR_DWTENA_Pos)                    /*!< ITM TCR: DWTENA Mask */
+
+#define ITM_TCR_SYNCENA_Pos                 2U                                            /*!< ITM TCR: SYNCENA Position */
+#define ITM_TCR_SYNCENA_Msk                (1UL << ITM_TCR_SYNCENA_Pos)                   /*!< ITM TCR: SYNCENA Mask */
+
+#define ITM_TCR_TSENA_Pos                   1U                                            /*!< ITM TCR: TSENA Position */
+#define ITM_TCR_TSENA_Msk                  (1UL << ITM_TCR_TSENA_Pos)                     /*!< ITM TCR: TSENA Mask */
+
+#define ITM_TCR_ITMENA_Pos                  0U                                            /*!< ITM TCR: ITM Enable bit Position */
+#define ITM_TCR_ITMENA_Msk                 (1UL /*<< ITM_TCR_ITMENA_Pos*/)                /*!< ITM TCR: ITM Enable bit Mask */
+
+/* ITM Integration Read Register Definitions */
+#define ITM_ITREAD_AFVALID_Pos              1U                                            /*!< ITM ITREAD: AFVALID Position */
+#define ITM_ITREAD_AFVALID_Msk             (0x1UL << ITM_ITREAD_AFVALID_Pos)              /*!< ITM ITREAD: AFVALID Mask */
+
+#define ITM_ITREAD_ATREADY_Pos              0U                                            /*!< ITM ITREAD: ATREADY Position */
+#define ITM_ITREAD_ATREADY_Msk             (0x1UL /*<< ITM_ITREAD_ATREADY_Pos*/)          /*!< ITM ITREAD: ATREADY Mask */
+
+/* ITM Integration Write Register Definitions */
+#define ITM_ITWRITE_AFVALID_Pos             1U                                            /*!< ITM ITWRITE: AFVALID Position */
+#define ITM_ITWRITE_AFVALID_Msk            (0x1UL << ITM_ITWRITE_AFVALID_Pos)             /*!< ITM ITWRITE: AFVALID Mask */
+
+#define ITM_ITWRITE_ATREADY_Pos             0U                                            /*!< ITM ITWRITE: ATREADY Position */
+#define ITM_ITWRITE_ATREADY_Msk            (0x1UL /*<< ITM_ITWRITE_ATREADY_Pos*/)         /*!< ITM ITWRITE: ATREADY Mask */
+
+/* ITM Integration Mode Control Register Definitions */
+#define ITM_ITCTRL_IME_Pos                  0U                                            /*!< ITM ITCTRL: IME Position */
+#define ITM_ITCTRL_IME_Msk                 (0x1UL /*<< ITM_ITCTRL_IME_Pos*/)              /*!< ITM ITCTRL: IME Mask */
+
+/*@}*/ /* end of group CMSIS_ITM */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_DWT     Data Watchpoint and Trace (DWT)
+  \brief    Type definitions for the Data Watchpoint and Trace (DWT)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Data Watchpoint and Trace Register (DWT).
+ */
+typedef struct
+{
+  __IOM uint32_t CTRL;                   /*!< Offset: 0x000 (R/W)  Control Register */
+  __IOM uint32_t CYCCNT;                 /*!< Offset: 0x004 (R/W)  Cycle Count Register */
+  __IOM uint32_t CPICNT;                 /*!< Offset: 0x008 (R/W)  CPI Count Register */
+  __IOM uint32_t EXCCNT;                 /*!< Offset: 0x00C (R/W)  Exception Overhead Count Register */
+  __IOM uint32_t SLEEPCNT;               /*!< Offset: 0x010 (R/W)  Sleep Count Register */
+  __IOM uint32_t LSUCNT;                 /*!< Offset: 0x014 (R/W)  LSU Count Register */
+  __IOM uint32_t FOLDCNT;                /*!< Offset: 0x018 (R/W)  Folded-instruction Count Register */
+  __IM  uint32_t PCSR;                   /*!< Offset: 0x01C (R/ )  Program Counter Sample Register */
+  __IOM uint32_t COMP0;                  /*!< Offset: 0x020 (R/W)  Comparator Register 0 */
+        uint32_t RESERVED1[1U];
+  __IOM uint32_t FUNCTION0;              /*!< Offset: 0x028 (R/W)  Function Register 0 */
+        uint32_t RESERVED2[1U];
+  __IOM uint32_t COMP1;                  /*!< Offset: 0x030 (R/W)  Comparator Register 1 */
+        uint32_t RESERVED3[1U];
+  __IOM uint32_t FUNCTION1;              /*!< Offset: 0x038 (R/W)  Function Register 1 */
+  __IOM uint32_t VMASK1;                 /*!< Offset: 0x03C (R/W)  Comparator Value Mask 1 */
+  __IOM uint32_t COMP2;                  /*!< Offset: 0x040 (R/W)  Comparator Register 2 */
+        uint32_t RESERVED4[1U];
+  __IOM uint32_t FUNCTION2;              /*!< Offset: 0x048 (R/W)  Function Register 2 */
+        uint32_t RESERVED5[1U];
+  __IOM uint32_t COMP3;                  /*!< Offset: 0x050 (R/W)  Comparator Register 3 */
+        uint32_t RESERVED6[1U];
+  __IOM uint32_t FUNCTION3;              /*!< Offset: 0x058 (R/W)  Function Register 3 */
+  __IOM uint32_t VMASK3;                 /*!< Offset: 0x05C (R/W)  Comparator Value Mask 3 */
+  __IOM uint32_t COMP4;                  /*!< Offset: 0x060 (R/W)  Comparator Register 4 */
+        uint32_t RESERVED7[1U];
+  __IOM uint32_t FUNCTION4;              /*!< Offset: 0x068 (R/W)  Function Register 4 */
+        uint32_t RESERVED8[1U];
+  __IOM uint32_t COMP5;                  /*!< Offset: 0x070 (R/W)  Comparator Register 5 */
+        uint32_t RESERVED9[1U];
+  __IOM uint32_t FUNCTION5;              /*!< Offset: 0x078 (R/W)  Function Register 5 */
+        uint32_t RESERVED10[1U];
+  __IOM uint32_t COMP6;                  /*!< Offset: 0x080 (R/W)  Comparator Register 6 */
+        uint32_t RESERVED11[1U];
+  __IOM uint32_t FUNCTION6;              /*!< Offset: 0x088 (R/W)  Function Register 6 */
+        uint32_t RESERVED12[1U];
+  __IOM uint32_t COMP7;                  /*!< Offset: 0x090 (R/W)  Comparator Register 7 */
+        uint32_t RESERVED13[1U];
+  __IOM uint32_t FUNCTION7;              /*!< Offset: 0x098 (R/W)  Function Register 7 */
+        uint32_t RESERVED14[968U];
+  __IM  uint32_t DEVARCH;                /*!< Offset: 0xFBC (R/ )  Device Type Architecture Register */
+        uint32_t RESERVED15[3U];
+  __IM  uint32_t DEVTYPE;                /*!< Offset: 0xFCC (R/ )  Device Type Identifier Register */
+} DWT_Type;
+
+/* DWT Control Register Definitions */
+#define DWT_CTRL_NUMCOMP_Pos               28U                                         /*!< DWT CTRL: NUMCOMP Position */
+#define DWT_CTRL_NUMCOMP_Msk               (0xFUL << DWT_CTRL_NUMCOMP_Pos)             /*!< DWT CTRL: NUMCOMP Mask */
+
+#define DWT_CTRL_NOTRCPKT_Pos              27U                                         /*!< DWT CTRL: NOTRCPKT Position */
+#define DWT_CTRL_NOTRCPKT_Msk              (0x1UL << DWT_CTRL_NOTRCPKT_Pos)            /*!< DWT CTRL: NOTRCPKT Mask */
+
+#define DWT_CTRL_NOEXTTRIG_Pos             26U                                         /*!< DWT CTRL: NOEXTTRIG Position */
+#define DWT_CTRL_NOEXTTRIG_Msk             (0x1UL << DWT_CTRL_NOEXTTRIG_Pos)           /*!< DWT CTRL: NOEXTTRIG Mask */
+
+#define DWT_CTRL_NOCYCCNT_Pos              25U                                         /*!< DWT CTRL: NOCYCCNT Position */
+#define DWT_CTRL_NOCYCCNT_Msk              (0x1UL << DWT_CTRL_NOCYCCNT_Pos)            /*!< DWT CTRL: NOCYCCNT Mask */
+
+#define DWT_CTRL_NOPRFCNT_Pos              24U                                         /*!< DWT CTRL: NOPRFCNT Position */
+#define DWT_CTRL_NOPRFCNT_Msk              (0x1UL << DWT_CTRL_NOPRFCNT_Pos)            /*!< DWT CTRL: NOPRFCNT Mask */
+
+#define DWT_CTRL_CYCDISS_Pos               23U                                         /*!< DWT CTRL: CYCDISS Position */
+#define DWT_CTRL_CYCDISS_Msk               (0x1UL << DWT_CTRL_CYCDISS_Pos)             /*!< DWT CTRL: CYCDISS Mask */
+
+#define DWT_CTRL_CYCEVTENA_Pos             22U                                         /*!< DWT CTRL: CYCEVTENA Position */
+#define DWT_CTRL_CYCEVTENA_Msk             (0x1UL << DWT_CTRL_CYCEVTENA_Pos)           /*!< DWT CTRL: CYCEVTENA Mask */
+
+#define DWT_CTRL_FOLDEVTENA_Pos            21U                                         /*!< DWT CTRL: FOLDEVTENA Position */
+#define DWT_CTRL_FOLDEVTENA_Msk            (0x1UL << DWT_CTRL_FOLDEVTENA_Pos)          /*!< DWT CTRL: FOLDEVTENA Mask */
+
+#define DWT_CTRL_LSUEVTENA_Pos             20U                                         /*!< DWT CTRL: LSUEVTENA Position */
+#define DWT_CTRL_LSUEVTENA_Msk             (0x1UL << DWT_CTRL_LSUEVTENA_Pos)           /*!< DWT CTRL: LSUEVTENA Mask */
+
+#define DWT_CTRL_SLEEPEVTENA_Pos           19U                                         /*!< DWT CTRL: SLEEPEVTENA Position */
+#define DWT_CTRL_SLEEPEVTENA_Msk           (0x1UL << DWT_CTRL_SLEEPEVTENA_Pos)         /*!< DWT CTRL: SLEEPEVTENA Mask */
+
+#define DWT_CTRL_EXCEVTENA_Pos             18U                                         /*!< DWT CTRL: EXCEVTENA Position */
+#define DWT_CTRL_EXCEVTENA_Msk             (0x1UL << DWT_CTRL_EXCEVTENA_Pos)           /*!< DWT CTRL: EXCEVTENA Mask */
+
+#define DWT_CTRL_CPIEVTENA_Pos             17U                                         /*!< DWT CTRL: CPIEVTENA Position */
+#define DWT_CTRL_CPIEVTENA_Msk             (0x1UL << DWT_CTRL_CPIEVTENA_Pos)           /*!< DWT CTRL: CPIEVTENA Mask */
+
+#define DWT_CTRL_EXCTRCENA_Pos             16U                                         /*!< DWT CTRL: EXCTRCENA Position */
+#define DWT_CTRL_EXCTRCENA_Msk             (0x1UL << DWT_CTRL_EXCTRCENA_Pos)           /*!< DWT CTRL: EXCTRCENA Mask */
+
+#define DWT_CTRL_PCSAMPLENA_Pos            12U                                         /*!< DWT CTRL: PCSAMPLENA Position */
+#define DWT_CTRL_PCSAMPLENA_Msk            (0x1UL << DWT_CTRL_PCSAMPLENA_Pos)          /*!< DWT CTRL: PCSAMPLENA Mask */
+
+#define DWT_CTRL_SYNCTAP_Pos               10U                                         /*!< DWT CTRL: SYNCTAP Position */
+#define DWT_CTRL_SYNCTAP_Msk               (0x3UL << DWT_CTRL_SYNCTAP_Pos)             /*!< DWT CTRL: SYNCTAP Mask */
+
+#define DWT_CTRL_CYCTAP_Pos                 9U                                         /*!< DWT CTRL: CYCTAP Position */
+#define DWT_CTRL_CYCTAP_Msk                (0x1UL << DWT_CTRL_CYCTAP_Pos)              /*!< DWT CTRL: CYCTAP Mask */
+
+#define DWT_CTRL_POSTINIT_Pos               5U                                         /*!< DWT CTRL: POSTINIT Position */
+#define DWT_CTRL_POSTINIT_Msk              (0xFUL << DWT_CTRL_POSTINIT_Pos)            /*!< DWT CTRL: POSTINIT Mask */
+
+#define DWT_CTRL_POSTPRESET_Pos             1U                                         /*!< DWT CTRL: POSTPRESET Position */
+#define DWT_CTRL_POSTPRESET_Msk            (0xFUL << DWT_CTRL_POSTPRESET_Pos)          /*!< DWT CTRL: POSTPRESET Mask */
+
+#define DWT_CTRL_CYCCNTENA_Pos              0U                                         /*!< DWT CTRL: CYCCNTENA Position */
+#define DWT_CTRL_CYCCNTENA_Msk             (0x1UL /*<< DWT_CTRL_CYCCNTENA_Pos*/)       /*!< DWT CTRL: CYCCNTENA Mask */
+
+/* DWT CPI Count Register Definitions */
+#define DWT_CPICNT_CPICNT_Pos               0U                                         /*!< DWT CPICNT: CPICNT Position */
+#define DWT_CPICNT_CPICNT_Msk              (0xFFUL /*<< DWT_CPICNT_CPICNT_Pos*/)       /*!< DWT CPICNT: CPICNT Mask */
+
+/* DWT Exception Overhead Count Register Definitions */
+#define DWT_EXCCNT_EXCCNT_Pos               0U                                         /*!< DWT EXCCNT: EXCCNT Position */
+#define DWT_EXCCNT_EXCCNT_Msk              (0xFFUL /*<< DWT_EXCCNT_EXCCNT_Pos*/)       /*!< DWT EXCCNT: EXCCNT Mask */
+
+/* DWT Sleep Count Register Definitions */
+#define DWT_SLEEPCNT_SLEEPCNT_Pos           0U                                         /*!< DWT SLEEPCNT: SLEEPCNT Position */
+#define DWT_SLEEPCNT_SLEEPCNT_Msk          (0xFFUL /*<< DWT_SLEEPCNT_SLEEPCNT_Pos*/)   /*!< DWT SLEEPCNT: SLEEPCNT Mask */
+
+/* DWT LSU Count Register Definitions */
+#define DWT_LSUCNT_LSUCNT_Pos               0U                                         /*!< DWT LSUCNT: LSUCNT Position */
+#define DWT_LSUCNT_LSUCNT_Msk              (0xFFUL /*<< DWT_LSUCNT_LSUCNT_Pos*/)       /*!< DWT LSUCNT: LSUCNT Mask */
+
+/* DWT Folded-instruction Count Register Definitions */
+#define DWT_FOLDCNT_FOLDCNT_Pos             0U                                         /*!< DWT FOLDCNT: FOLDCNT Position */
+#define DWT_FOLDCNT_FOLDCNT_Msk            (0xFFUL /*<< DWT_FOLDCNT_FOLDCNT_Pos*/)     /*!< DWT FOLDCNT: FOLDCNT Mask */
+
+/* DWT Comparator Function Register Definitions */
+#define DWT_FUNCTION_ID_Pos                27U                                         /*!< DWT FUNCTION: ID Position */
+#define DWT_FUNCTION_ID_Msk                (0x1FUL << DWT_FUNCTION_ID_Pos)             /*!< DWT FUNCTION: ID Mask */
+
+#define DWT_FUNCTION_MATCHED_Pos           24U                                         /*!< DWT FUNCTION: MATCHED Position */
+#define DWT_FUNCTION_MATCHED_Msk           (0x1UL << DWT_FUNCTION_MATCHED_Pos)         /*!< DWT FUNCTION: MATCHED Mask */
+
+#define DWT_FUNCTION_DATAVSIZE_Pos         10U                                         /*!< DWT FUNCTION: DATAVSIZE Position */
+#define DWT_FUNCTION_DATAVSIZE_Msk         (0x3UL << DWT_FUNCTION_DATAVSIZE_Pos)       /*!< DWT FUNCTION: DATAVSIZE Mask */
+
+#define DWT_FUNCTION_ACTION_Pos             4U                                         /*!< DWT FUNCTION: ACTION Position */
+#define DWT_FUNCTION_ACTION_Msk            (0x3UL << DWT_FUNCTION_ACTION_Pos)          /*!< DWT FUNCTION: ACTION Mask */
+
+#define DWT_FUNCTION_MATCH_Pos              0U                                         /*!< DWT FUNCTION: MATCH Position */
+#define DWT_FUNCTION_MATCH_Msk             (0xFUL /*<< DWT_FUNCTION_MATCH_Pos*/)       /*!< DWT FUNCTION: MATCH Mask */
+
+/*@}*/ /* end of group CMSIS_DWT */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup MemSysCtl_Type     Memory System Control Registers (IMPLEMENTATION DEFINED)
+  \brief    Type definitions for the Memory System Control Registers (MEMSYSCTL)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Memory System Control Registers (MEMSYSCTL).
+ */
+typedef struct
+{
+  __IOM uint32_t MSCR;                   /*!< Offset: 0x000 (R/W)  Memory System Control Register */
+  __IOM uint32_t PFCR;                   /*!< Offset: 0x004 (R/W)  Prefetcher Control Register */
+        uint32_t RESERVED1[2U];
+  __IOM uint32_t ITCMCR;                 /*!< Offset: 0x010 (R/W)  ITCM Control Register */
+  __IOM uint32_t DTCMCR;                 /*!< Offset: 0x014 (R/W)  DTCM Control Register */
+  __IOM uint32_t PAHBCR;                 /*!< Offset: 0x018 (R/W)  P-AHB Control Register */
+        uint32_t RESERVED2[313U];
+  __IOM uint32_t ITGU_CTRL;              /*!< Offset: 0x500 (R/W)  ITGU Control Register */
+  __IOM uint32_t ITGU_CFG;               /*!< Offset: 0x504 (R/W)  ITGU Configuration Register */
+        uint32_t RESERVED3[2U];
+  __IOM uint32_t ITGU_LUT[16U];          /*!< Offset: 0x510 (R/W)  ITGU Look Up Table Register */
+        uint32_t RESERVED4[44U];
+  __IOM uint32_t DTGU_CTRL;              /*!< Offset: 0x600 (R/W)  DTGU Control Registers */
+  __IOM uint32_t DTGU_CFG;               /*!< Offset: 0x604 (R/W)  DTGU Configuration Register */
+        uint32_t RESERVED5[2U];
+  __IOM uint32_t DTGU_LUT[16U];          /*!< Offset: 0x610 (R/W)  DTGU Look Up Table Register */
+} MemSysCtl_Type;
+
+/* MEMSYSCTL Memory System Control Register (MSCR) Register Definitions */
+#define MEMSYSCTL_MSCR_CPWRDN_Pos          17U                                         /*!< MEMSYSCTL MSCR: CPWRDN Position */
+#define MEMSYSCTL_MSCR_CPWRDN_Msk          (0x1UL << MEMSYSCTL_MSCR_CPWRDN_Pos)        /*!< MEMSYSCTL MSCR: CPWRDN Mask */
+
+#define MEMSYSCTL_MSCR_DCCLEAN_Pos         16U                                         /*!< MEMSYSCTL MSCR: DCCLEAN Position */
+#define MEMSYSCTL_MSCR_DCCLEAN_Msk         (0x1UL << MEMSYSCTL_MSCR_DCCLEAN_Pos)       /*!< MEMSYSCTL MSCR: DCCLEAN Mask */
+
+#define MEMSYSCTL_MSCR_ICACTIVE_Pos        13U                                         /*!< MEMSYSCTL MSCR: ICACTIVE Position */
+#define MEMSYSCTL_MSCR_ICACTIVE_Msk        (0x1UL << MEMSYSCTL_MSCR_ICACTIVE_Pos)      /*!< MEMSYSCTL MSCR: ICACTIVE Mask */
+
+#define MEMSYSCTL_MSCR_DCACTIVE_Pos        12U                                         /*!< MEMSYSCTL MSCR: DCACTIVE Position */
+#define MEMSYSCTL_MSCR_DCACTIVE_Msk        (0x1UL << MEMSYSCTL_MSCR_DCACTIVE_Pos)      /*!< MEMSYSCTL MSCR: DCACTIVE Mask */
+
+#define MEMSYSCTL_MSCR_EVECCFAULT_Pos       3U                                         /*!< MEMSYSCTL MSCR: EVECCFAULT Position */
+#define MEMSYSCTL_MSCR_EVECCFAULT_Msk      (0x1UL << MEMSYSCTL_MSCR_EVECCFAULT_Pos)    /*!< MEMSYSCTL MSCR: EVECCFAULT Mask */
+
+#define MEMSYSCTL_MSCR_FORCEWT_Pos          2U                                         /*!< MEMSYSCTL MSCR: FORCEWT Position */
+#define MEMSYSCTL_MSCR_FORCEWT_Msk         (0x1UL << MEMSYSCTL_MSCR_FORCEWT_Pos)       /*!< MEMSYSCTL MSCR: FORCEWT Mask */
+
+#define MEMSYSCTL_MSCR_ECCEN_Pos            1U                                         /*!< MEMSYSCTL MSCR: ECCEN Position */
+#define MEMSYSCTL_MSCR_ECCEN_Msk           (0x1UL << MEMSYSCTL_MSCR_ECCEN_Pos)         /*!< MEMSYSCTL MSCR: ECCEN Mask */
+
+/* MEMSYSCTL Prefetcher Control Register (PFCR) Register Definitions */
+#define MEMSYSCTL_PFCR_DIS_NLP_Pos          7U                                         /*!< MEMSYSCTL PFCR: DIS_NLP Position */
+#define MEMSYSCTL_PFCR_DIS_NLP_Msk         (0x1UL << MEMSYSCTL_PFCR_DIS_NLP_Pos)       /*!< MEMSYSCTL PFCR: DIS_NLP Mask */
+
+#define MEMSYSCTL_PFCR_ENABLE_Pos           0U                                         /*!< MEMSYSCTL PFCR: ENABLE Position */
+#define MEMSYSCTL_PFCR_ENABLE_Msk          (0x1UL /*<< MEMSYSCTL_PFCR_ENABLE_Pos*/)    /*!< MEMSYSCTL PFCR: ENABLE Mask */
+
+/* MEMSYSCTL ITCM Control Register (ITCMCR) Register Definitions */
+#define MEMSYSCTL_ITCMCR_SZ_Pos             3U                                         /*!< MEMSYSCTL ITCMCR: SZ Position */
+#define MEMSYSCTL_ITCMCR_SZ_Msk            (0xFUL << MEMSYSCTL_ITCMCR_SZ_Pos)          /*!< MEMSYSCTL ITCMCR: SZ Mask */
+
+#define MEMSYSCTL_ITCMCR_EN_Pos             0U                                         /*!< MEMSYSCTL ITCMCR: EN Position */
+#define MEMSYSCTL_ITCMCR_EN_Msk            (0x1UL /*<< MEMSYSCTL_ITCMCR_EN_Pos*/)      /*!< MEMSYSCTL ITCMCR: EN Mask */
+
+/* MEMSYSCTL DTCM Control Register (DTCMCR) Register Definitions */
+#define MEMSYSCTL_DTCMCR_SZ_Pos             3U                                         /*!< MEMSYSCTL DTCMCR: SZ Position */
+#define MEMSYSCTL_DTCMCR_SZ_Msk            (0xFUL << MEMSYSCTL_DTCMCR_SZ_Pos)          /*!< MEMSYSCTL DTCMCR: SZ Mask */
+
+#define MEMSYSCTL_DTCMCR_EN_Pos             0U                                         /*!< MEMSYSCTL DTCMCR: EN Position */
+#define MEMSYSCTL_DTCMCR_EN_Msk            (0x1UL /*<< MEMSYSCTL_DTCMCR_EN_Pos*/)      /*!< MEMSYSCTL DTCMCR: EN Mask */
+
+/* MEMSYSCTL P-AHB Control Register (PAHBCR) Register Definitions */
+#define MEMSYSCTL_PAHBCR_SZ_Pos             1U                                         /*!< MEMSYSCTL PAHBCR: SZ Position */
+#define MEMSYSCTL_PAHBCR_SZ_Msk            (0x7UL << MEMSYSCTL_PAHBCR_SZ_Pos)          /*!< MEMSYSCTL PAHBCR: SZ Mask */
+
+#define MEMSYSCTL_PAHBCR_EN_Pos             0U                                         /*!< MEMSYSCTL PAHBCR: EN Position */
+#define MEMSYSCTL_PAHBCR_EN_Msk            (0x1UL /*<< MEMSYSCTL_PAHBCR_EN_Pos*/)      /*!< MEMSYSCTL PAHBCR: EN Mask */
+
+/* MEMSYSCTL ITGU Control Register (ITGU_CTRL) Register Definitions */
+#define MEMSYSCTL_ITGU_CTRL_DEREN_Pos       1U                                         /*!< MEMSYSCTL ITGU_CTRL: DEREN Position */
+#define MEMSYSCTL_ITGU_CTRL_DEREN_Msk      (0x1UL << MEMSYSCTL_ITGU_CTRL_DEREN_Pos)    /*!< MEMSYSCTL ITGU_CTRL: DEREN Mask */
+
+#define MEMSYSCTL_ITGU_CTRL_DBFEN_Pos       0U                                         /*!< MEMSYSCTL ITGU_CTRL: DBFEN Position */
+#define MEMSYSCTL_ITGU_CTRL_DBFEN_Msk      (0x1UL /*<< MEMSYSCTL_ITGU_CTRL_DBFEN_Pos*/) /*!< MEMSYSCTL ITGU_CTRL: DBFEN Mask */
+
+/* MEMSYSCTL ITGU Configuration Register (ITGU_CFG) Register Definitions */
+#define MEMSYSCTL_ITGU_CFG_PRESENT_Pos     31U                                         /*!< MEMSYSCTL ITGU_CFG: PRESENT Position */
+#define MEMSYSCTL_ITGU_CFG_PRESENT_Msk     (0x1UL << MEMSYSCTL_ITGU_CFG_PRESENT_Pos)   /*!< MEMSYSCTL ITGU_CFG: PRESENT Mask */
+
+#define MEMSYSCTL_ITGU_CFG_NUMBLKS_Pos      8U                                         /*!< MEMSYSCTL ITGU_CFG: NUMBLKS Position */
+#define MEMSYSCTL_ITGU_CFG_NUMBLKS_Msk     (0xFUL << MEMSYSCTL_ITGU_CFG_NUMBLKS_Pos)   /*!< MEMSYSCTL ITGU_CFG: NUMBLKS Mask */
+
+#define MEMSYSCTL_ITGU_CFG_BLKSZ_Pos        0U                                         /*!< MEMSYSCTL ITGU_CFG: BLKSZ Position */
+#define MEMSYSCTL_ITGU_CFG_BLKSZ_Msk       (0xFUL /*<< MEMSYSCTL_ITGU_CFG_BLKSZ_Pos*/) /*!< MEMSYSCTL ITGU_CFG: BLKSZ Mask */
+
+/* MEMSYSCTL DTGU Control Registers (DTGU_CTRL) Register Definitions */
+#define MEMSYSCTL_DTGU_CTRL_DEREN_Pos       1U                                         /*!< MEMSYSCTL DTGU_CTRL: DEREN Position */
+#define MEMSYSCTL_DTGU_CTRL_DEREN_Msk      (0x1UL << MEMSYSCTL_DTGU_CTRL_DEREN_Pos)    /*!< MEMSYSCTL DTGU_CTRL: DEREN Mask */
+
+#define MEMSYSCTL_DTGU_CTRL_DBFEN_Pos       0U                                         /*!< MEMSYSCTL DTGU_CTRL: DBFEN Position */
+#define MEMSYSCTL_DTGU_CTRL_DBFEN_Msk      (0x1UL /*<< MEMSYSCTL_DTGU_CTRL_DBFEN_Pos*/) /*!< MEMSYSCTL DTGU_CTRL: DBFEN Mask */
+
+/* MEMSYSCTL DTGU Configuration Register (DTGU_CFG) Register Definitions */
+#define MEMSYSCTL_DTGU_CFG_PRESENT_Pos     31U                                         /*!< MEMSYSCTL DTGU_CFG: PRESENT Position */
+#define MEMSYSCTL_DTGU_CFG_PRESENT_Msk     (0x1UL << MEMSYSCTL_DTGU_CFG_PRESENT_Pos)   /*!< MEMSYSCTL DTGU_CFG: PRESENT Mask */
+
+#define MEMSYSCTL_DTGU_CFG_NUMBLKS_Pos      8U                                         /*!< MEMSYSCTL DTGU_CFG: NUMBLKS Position */
+#define MEMSYSCTL_DTGU_CFG_NUMBLKS_Msk     (0xFUL << MEMSYSCTL_DTGU_CFG_NUMBLKS_Pos)   /*!< MEMSYSCTL DTGU_CFG: NUMBLKS Mask */
+
+#define MEMSYSCTL_DTGU_CFG_BLKSZ_Pos        0U                                         /*!< MEMSYSCTL DTGU_CFG: BLKSZ Position */
+#define MEMSYSCTL_DTGU_CFG_BLKSZ_Msk       (0xFUL /*<< MEMSYSCTL_DTGU_CFG_BLKSZ_Pos*/) /*!< MEMSYSCTL DTGU_CFG: BLKSZ Mask */
+
+
+/*@}*/ /* end of group MemSysCtl_Type */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup PwrModCtl_Type     Power Mode Control Registers
+  \brief    Type definitions for the Power Mode Control Registers (PWRMODCTL)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Power Mode Control Registers (PWRMODCTL).
+ */
+typedef struct
+{
+  __IOM uint32_t CPDLPSTATE;             /*!< Offset: 0x000 (R/W)  Core Power Domain Low Power State Register */
+  __IOM uint32_t DPDLPSTATE;             /*!< Offset: 0x004 (R/W)  Debug Power Domain Low Power State Register */
+} PwrModCtl_Type;
+
+/* PWRMODCTL Core Power Domain Low Power State (CPDLPSTATE) Register Definitions */
+#define PWRMODCTL_CPDLPSTATE_RLPSTATE_Pos   8U                                              /*!< PWRMODCTL CPDLPSTATE: RLPSTATE Position */
+#define PWRMODCTL_CPDLPSTATE_RLPSTATE_Msk  (0x3UL << PWRMODCTL_CPDLPSTATE_RLPSTATE_Pos)     /*!< PWRMODCTL CPDLPSTATE: RLPSTATE Mask */
+
+#define PWRMODCTL_CPDLPSTATE_ELPSTATE_Pos   4U                                              /*!< PWRMODCTL CPDLPSTATE: ELPSTATE Position */
+#define PWRMODCTL_CPDLPSTATE_ELPSTATE_Msk  (0x3UL << PWRMODCTL_CPDLPSTATE_ELPSTATE_Pos)     /*!< PWRMODCTL CPDLPSTATE: ELPSTATE Mask */
+
+#define PWRMODCTL_CPDLPSTATE_CLPSTATE_Pos   0U                                              /*!< PWRMODCTL CPDLPSTATE: CLPSTATE Position */
+#define PWRMODCTL_CPDLPSTATE_CLPSTATE_Msk  (0x3UL /*<< PWRMODCTL_CPDLPSTATE_CLPSTATE_Pos*/) /*!< PWRMODCTL CPDLPSTATE: CLPSTATE Mask */
+
+/* PWRMODCTL Debug Power Domain Low Power State (DPDLPSTATE) Register Definitions */
+#define PWRMODCTL_DPDLPSTATE_DLPSTATE_Pos   0U                                              /*!< PWRMODCTL DPDLPSTATE: DLPSTATE Position */
+#define PWRMODCTL_DPDLPSTATE_DLPSTATE_Msk  (0x3UL /*<< PWRMODCTL_DPDLPSTATE_DLPSTATE_Pos*/) /*!< PWRMODCTL DPDLPSTATE: DLPSTATE Mask */
+
+/*@}*/ /* end of group PwrModCtl_Type */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup EWIC_Type     External Wakeup Interrupt Controller Registers
+  \brief    Type definitions for the External Wakeup Interrupt Controller Registers (EWIC)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the External Wakeup Interrupt Controller Registers (EWIC).
+ */
+typedef struct
+{
+  __OM  uint32_t EVENTSPR;               /*!< Offset: 0x000 ( /W)  Event Set Pending Register */
+        uint32_t RESERVED0[31U];
+  __IM  uint32_t EVENTMASKA;             /*!< Offset: 0x080 (R/W)  Event Mask A Register */
+  __IM  uint32_t EVENTMASK[15];          /*!< Offset: 0x084 (R/W)  Event Mask Register */
+} EWIC_Type;
+
+/* EWIC External Wakeup Interrupt Controller (EVENTSPR) Register Definitions */
+#define EWIC_EVENTSPR_EDBGREQ_Pos   2U                                                 /*!< EWIC EVENTSPR: EDBGREQ Position */
+#define EWIC_EVENTSPR_EDBGREQ_Msk  (0x1UL << EWIC_EVENTSPR_EDBGREQ_Pos)                /*!< EWIC EVENTSPR: EDBGREQ Mask */
+
+#define EWIC_EVENTSPR_NMI_Pos   1U                                                     /*!< EWIC EVENTSPR: NMI Position */
+#define EWIC_EVENTSPR_NMI_Msk  (0x1UL << EWIC_EVENTSPR_NMI_Pos)                        /*!< EWIC EVENTSPR: NMI Mask */
+
+#define EWIC_EVENTSPR_EVENT_Pos   0U                                                   /*!< EWIC EVENTSPR: EVENT Position */
+#define EWIC_EVENTSPR_EVENT_Msk  (0x1UL /*<< EWIC_EVENTSPR_EVENT_Pos*/)                /*!< EWIC EVENTSPR: EVENT Mask */
+
+/* EWIC External Wakeup Interrupt Controller (EVENTMASKA) Register Definitions */
+#define EWIC_EVENTMASKA_EDBGREQ_Pos   2U                                               /*!< EWIC EVENTMASKA: EDBGREQ Position */
+#define EWIC_EVENTMASKA_EDBGREQ_Msk  (0x1UL << EWIC_EVENTMASKA_EDBGREQ_Pos)            /*!< EWIC EVENTMASKA: EDBGREQ Mask */
+
+#define EWIC_EVENTMASKA_NMI_Pos   1U                                                   /*!< EWIC EVENTMASKA: NMI Position */
+#define EWIC_EVENTMASKA_NMI_Msk  (0x1UL << EWIC_EVENTMASKA_NMI_Pos)                    /*!< EWIC EVENTMASKA: NMI Mask */
+
+#define EWIC_EVENTMASKA_EVENT_Pos   0U                                                 /*!< EWIC EVENTMASKA: EVENT Position */
+#define EWIC_EVENTMASKA_EVENT_Msk  (0x1UL /*<< EWIC_EVENTMASKA_EVENT_Pos*/)            /*!< EWIC EVENTMASKA: EVENT Mask */
+
+/* EWIC External Wakeup Interrupt Controller (EVENTMASK) Register Definitions */
+#define EWIC_EVENTMASK_IRQ_Pos   0U                                                    /*!< EWIC EVENTMASKA: IRQ Position */
+#define EWIC_EVENTMASK_IRQ_Msk  (0xFFFFFFFFUL /*<< EWIC_EVENTMASKA_IRQ_Pos*/)          /*!< EWIC EVENTMASKA: IRQ Mask */
+
+/*@}*/ /* end of group EWIC_Type */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup ErrBnk_Type     Error Banking Registers (IMPLEMENTATION DEFINED)
+  \brief    Type definitions for the Error Banking Registers (ERRBNK)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Error Banking Registers (ERRBNK).
+ */
+typedef struct
+{
+  __IOM uint32_t IEBR0;                  /*!< Offset: 0x000 (R/W)  Instruction Cache Error Bank Register 0 */
+  __IOM uint32_t IEBR1;                  /*!< Offset: 0x004 (R/W)  Instruction Cache Error Bank Register 1 */
+        uint32_t RESERVED0[2U];
+  __IOM uint32_t DEBR0;                  /*!< Offset: 0x010 (R/W)  Data Cache Error Bank Register 0 */
+  __IOM uint32_t DEBR1;                  /*!< Offset: 0x014 (R/W)  Data Cache Error Bank Register 1 */
+        uint32_t RESERVED1[2U];
+  __IOM uint32_t TEBR0;                  /*!< Offset: 0x020 (R/W)  TCM Error Bank Register 0 */
+        uint32_t RESERVED2[1U];
+  __IOM uint32_t TEBR1;                  /*!< Offset: 0x028 (R/W)  TCM Error Bank Register 1 */
+} ErrBnk_Type;
+
+/* ERRBNK Instruction Cache Error Bank Register 0 (IEBR0) Register Definitions */
+#define ERRBNK_IEBR0_SWDEF_Pos             30U                                         /*!< ERRBNK IEBR0: SWDEF Position */
+#define ERRBNK_IEBR0_SWDEF_Msk             (0x3UL << ERRBNK_IEBR0_SWDEF_Pos)           /*!< ERRBNK IEBR0: SWDEF Mask */
+
+#define ERRBNK_IEBR0_BANK_Pos              16U                                         /*!< ERRBNK IEBR0: BANK Position */
+#define ERRBNK_IEBR0_BANK_Msk              (0x1UL << ERRBNK_IEBR0_BANK_Pos)            /*!< ERRBNK IEBR0: BANK Mask */
+
+#define ERRBNK_IEBR0_LOCATION_Pos           2U                                         /*!< ERRBNK IEBR0: LOCATION Position */
+#define ERRBNK_IEBR0_LOCATION_Msk          (0x3FFFUL << ERRBNK_IEBR0_LOCATION_Pos)     /*!< ERRBNK IEBR0: LOCATION Mask */
+
+#define ERRBNK_IEBR0_LOCKED_Pos             1U                                         /*!< ERRBNK IEBR0: LOCKED Position */
+#define ERRBNK_IEBR0_LOCKED_Msk            (0x1UL << ERRBNK_IEBR0_LOCKED_Pos)          /*!< ERRBNK IEBR0: LOCKED Mask */
+
+#define ERRBNK_IEBR0_VALID_Pos              0U                                         /*!< ERRBNK IEBR0: VALID Position */
+#define ERRBNK_IEBR0_VALID_Msk             (0x1UL << /*ERRBNK_IEBR0_VALID_Pos*/)       /*!< ERRBNK IEBR0: VALID Mask */
+
+/* ERRBNK Instruction Cache Error Bank Register 1 (IEBR1) Register Definitions */
+#define ERRBNK_IEBR1_SWDEF_Pos             30U                                         /*!< ERRBNK IEBR1: SWDEF Position */
+#define ERRBNK_IEBR1_SWDEF_Msk             (0x3UL << ERRBNK_IEBR1_SWDEF_Pos)           /*!< ERRBNK IEBR1: SWDEF Mask */
+
+#define ERRBNK_IEBR1_BANK_Pos              16U                                         /*!< ERRBNK IEBR1: BANK Position */
+#define ERRBNK_IEBR1_BANK_Msk              (0x1UL << ERRBNK_IEBR1_BANK_Pos)            /*!< ERRBNK IEBR1: BANK Mask */
+
+#define ERRBNK_IEBR1_LOCATION_Pos           2U                                         /*!< ERRBNK IEBR1: LOCATION Position */
+#define ERRBNK_IEBR1_LOCATION_Msk          (0x3FFFUL << ERRBNK_IEBR1_LOCATION_Pos)     /*!< ERRBNK IEBR1: LOCATION Mask */
+
+#define ERRBNK_IEBR1_LOCKED_Pos             1U                                         /*!< ERRBNK IEBR1: LOCKED Position */
+#define ERRBNK_IEBR1_LOCKED_Msk            (0x1UL << ERRBNK_IEBR1_LOCKED_Pos)          /*!< ERRBNK IEBR1: LOCKED Mask */
+
+#define ERRBNK_IEBR1_VALID_Pos              0U                                         /*!< ERRBNK IEBR1: VALID Position */
+#define ERRBNK_IEBR1_VALID_Msk             (0x1UL << /*ERRBNK_IEBR1_VALID_Pos*/)       /*!< ERRBNK IEBR1: VALID Mask */
+
+/* ERRBNK Data Cache Error Bank Register 0 (DEBR0) Register Definitions */
+#define ERRBNK_DEBR0_SWDEF_Pos             30U                                         /*!< ERRBNK DEBR0: SWDEF Position */
+#define ERRBNK_DEBR0_SWDEF_Msk             (0x3UL << ERRBNK_DEBR0_SWDEF_Pos)           /*!< ERRBNK DEBR0: SWDEF Mask */
+
+#define ERRBNK_DEBR0_TYPE_Pos              17U                                         /*!< ERRBNK DEBR0: TYPE Position */
+#define ERRBNK_DEBR0_TYPE_Msk              (0x1UL << ERRBNK_DEBR0_TYPE_Pos)            /*!< ERRBNK DEBR0: TYPE Mask */
+
+#define ERRBNK_DEBR0_BANK_Pos              16U                                         /*!< ERRBNK DEBR0: BANK Position */
+#define ERRBNK_DEBR0_BANK_Msk              (0x1UL << ERRBNK_DEBR0_BANK_Pos)            /*!< ERRBNK DEBR0: BANK Mask */
+
+#define ERRBNK_DEBR0_LOCATION_Pos           2U                                         /*!< ERRBNK DEBR0: LOCATION Position */
+#define ERRBNK_DEBR0_LOCATION_Msk          (0x3FFFUL << ERRBNK_DEBR0_LOCATION_Pos)     /*!< ERRBNK DEBR0: LOCATION Mask */
+
+#define ERRBNK_DEBR0_LOCKED_Pos             1U                                         /*!< ERRBNK DEBR0: LOCKED Position */
+#define ERRBNK_DEBR0_LOCKED_Msk            (0x1UL << ERRBNK_DEBR0_LOCKED_Pos)          /*!< ERRBNK DEBR0: LOCKED Mask */
+
+#define ERRBNK_DEBR0_VALID_Pos              0U                                         /*!< ERRBNK DEBR0: VALID Position */
+#define ERRBNK_DEBR0_VALID_Msk             (0x1UL << /*ERRBNK_DEBR0_VALID_Pos*/)       /*!< ERRBNK DEBR0: VALID Mask */
+
+/* ERRBNK Data Cache Error Bank Register 1 (DEBR1) Register Definitions */
+#define ERRBNK_DEBR1_SWDEF_Pos             30U                                         /*!< ERRBNK DEBR1: SWDEF Position */
+#define ERRBNK_DEBR1_SWDEF_Msk             (0x3UL << ERRBNK_DEBR1_SWDEF_Pos)           /*!< ERRBNK DEBR1: SWDEF Mask */
+
+#define ERRBNK_DEBR1_TYPE_Pos              17U                                         /*!< ERRBNK DEBR1: TYPE Position */
+#define ERRBNK_DEBR1_TYPE_Msk              (0x1UL << ERRBNK_DEBR1_TYPE_Pos)            /*!< ERRBNK DEBR1: TYPE Mask */
+
+#define ERRBNK_DEBR1_BANK_Pos              16U                                         /*!< ERRBNK DEBR1: BANK Position */
+#define ERRBNK_DEBR1_BANK_Msk              (0x1UL << ERRBNK_DEBR1_BANK_Pos)            /*!< ERRBNK DEBR1: BANK Mask */
+
+#define ERRBNK_DEBR1_LOCATION_Pos           2U                                         /*!< ERRBNK DEBR1: LOCATION Position */
+#define ERRBNK_DEBR1_LOCATION_Msk          (0x3FFFUL << ERRBNK_DEBR1_LOCATION_Pos)     /*!< ERRBNK DEBR1: LOCATION Mask */
+
+#define ERRBNK_DEBR1_LOCKED_Pos             1U                                         /*!< ERRBNK DEBR1: LOCKED Position */
+#define ERRBNK_DEBR1_LOCKED_Msk            (0x1UL << ERRBNK_DEBR1_LOCKED_Pos)          /*!< ERRBNK DEBR1: LOCKED Mask */
+
+#define ERRBNK_DEBR1_VALID_Pos              0U                                         /*!< ERRBNK DEBR1: VALID Position */
+#define ERRBNK_DEBR1_VALID_Msk             (0x1UL << /*ERRBNK_DEBR1_VALID_Pos*/)       /*!< ERRBNK DEBR1: VALID Mask */
+
+/* ERRBNK TCM Error Bank Register 0 (TEBR0) Register Definitions */
+#define ERRBNK_TEBR0_SWDEF_Pos             30U                                         /*!< ERRBNK TEBR0: SWDEF Position */
+#define ERRBNK_TEBR0_SWDEF_Msk             (0x3UL << ERRBNK_TEBR0_SWDEF_Pos)           /*!< ERRBNK TEBR0: SWDEF Mask */
+
+#define ERRBNK_TEBR0_POISON_Pos            28U                                         /*!< ERRBNK TEBR0: POISON Position */
+#define ERRBNK_TEBR0_POISON_Msk            (0x1UL << ERRBNK_TEBR0_POISON_Pos)          /*!< ERRBNK TEBR0: POISON Mask */
+
+#define ERRBNK_TEBR0_TYPE_Pos              27U                                         /*!< ERRBNK TEBR0: TYPE Position */
+#define ERRBNK_TEBR0_TYPE_Msk              (0x1UL << ERRBNK_TEBR0_TYPE_Pos)            /*!< ERRBNK TEBR0: TYPE Mask */
+
+#define ERRBNK_TEBR0_BANK_Pos              24U                                         /*!< ERRBNK TEBR0: BANK Position */
+#define ERRBNK_TEBR0_BANK_Msk              (0x3UL << ERRBNK_TEBR0_BANK_Pos)            /*!< ERRBNK TEBR0: BANK Mask */
+
+#define ERRBNK_TEBR0_LOCATION_Pos           2U                                         /*!< ERRBNK TEBR0: LOCATION Position */
+#define ERRBNK_TEBR0_LOCATION_Msk          (0x3FFFFFUL << ERRBNK_TEBR0_LOCATION_Pos)   /*!< ERRBNK TEBR0: LOCATION Mask */
+
+#define ERRBNK_TEBR0_LOCKED_Pos             1U                                         /*!< ERRBNK TEBR0: LOCKED Position */
+#define ERRBNK_TEBR0_LOCKED_Msk            (0x1UL << ERRBNK_TEBR0_LOCKED_Pos)          /*!< ERRBNK TEBR0: LOCKED Mask */
+
+#define ERRBNK_TEBR0_VALID_Pos              0U                                         /*!< ERRBNK TEBR0: VALID Position */
+#define ERRBNK_TEBR0_VALID_Msk             (0x1UL << /*ERRBNK_TEBR0_VALID_Pos*/)       /*!< ERRBNK TEBR0: VALID Mask */
+
+/* ERRBNK TCM Error Bank Register 1 (TEBR1) Register Definitions */
+#define ERRBNK_TEBR1_SWDEF_Pos             30U                                         /*!< ERRBNK TEBR1: SWDEF Position */
+#define ERRBNK_TEBR1_SWDEF_Msk             (0x3UL << ERRBNK_TEBR1_SWDEF_Pos)           /*!< ERRBNK TEBR1: SWDEF Mask */
+
+#define ERRBNK_TEBR1_POISON_Pos            28U                                         /*!< ERRBNK TEBR1: POISON Position */
+#define ERRBNK_TEBR1_POISON_Msk            (0x1UL << ERRBNK_TEBR1_POISON_Pos)          /*!< ERRBNK TEBR1: POISON Mask */
+
+#define ERRBNK_TEBR1_TYPE_Pos              27U                                         /*!< ERRBNK TEBR1: TYPE Position */
+#define ERRBNK_TEBR1_TYPE_Msk              (0x1UL << ERRBNK_TEBR1_TYPE_Pos)            /*!< ERRBNK TEBR1: TYPE Mask */
+
+#define ERRBNK_TEBR1_BANK_Pos              24U                                         /*!< ERRBNK TEBR1: BANK Position */
+#define ERRBNK_TEBR1_BANK_Msk              (0x3UL << ERRBNK_TEBR1_BANK_Pos)            /*!< ERRBNK TEBR1: BANK Mask */
+
+#define ERRBNK_TEBR1_LOCATION_Pos           2U                                         /*!< ERRBNK TEBR1: LOCATION Position */
+#define ERRBNK_TEBR1_LOCATION_Msk          (0x3FFFFFUL << ERRBNK_TEBR1_LOCATION_Pos)   /*!< ERRBNK TEBR1: LOCATION Mask */
+
+#define ERRBNK_TEBR1_LOCKED_Pos             1U                                         /*!< ERRBNK TEBR1: LOCKED Position */
+#define ERRBNK_TEBR1_LOCKED_Msk            (0x1UL << ERRBNK_TEBR1_LOCKED_Pos)          /*!< ERRBNK TEBR1: LOCKED Mask */
+
+#define ERRBNK_TEBR1_VALID_Pos              0U                                         /*!< ERRBNK TEBR1: VALID Position */
+#define ERRBNK_TEBR1_VALID_Msk             (0x1UL << /*ERRBNK_TEBR1_VALID_Pos*/)       /*!< ERRBNK TEBR1: VALID Mask */
+
+/*@}*/ /* end of group ErrBnk_Type */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup PrcCfgInf_Type     Processor Configuration Information Registers (IMPLEMENTATION DEFINED)
+  \brief    Type definitions for the Processor Configuration Information Registerss (PRCCFGINF)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Processor Configuration Information Registerss (PRCCFGINF).
+ */
+typedef struct
+{
+  __OM  uint32_t CFGINFOSEL;             /*!< Offset: 0x000 ( /W)  Processor Configuration Information Selection Register */
+  __IM  uint32_t CFGINFORD;              /*!< Offset: 0x004 (R/ )  Processor Configuration Information Read Data Register */
+} PrcCfgInf_Type;
+
+/* PRCCFGINF Processor Configuration Information Selection Register (CFGINFOSEL) Definitions */
+
+/* PRCCFGINF Processor Configuration Information Read Data Register (CFGINFORD) Definitions */
+
+/*@}*/ /* end of group PrcCfgInf_Type */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_TPI     Trace Port Interface (TPI)
+  \brief    Type definitions for the Trace Port Interface (TPI)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Trace Port Interface Register (TPI).
+ */
+typedef struct
+{
+  __IM  uint32_t SSPSR;                  /*!< Offset: 0x000 (R/ )  Supported Parallel Port Sizes Register */
+  __IOM uint32_t CSPSR;                  /*!< Offset: 0x004 (R/W)  Current Parallel Port Sizes Register */
+        uint32_t RESERVED0[2U];
+  __IOM uint32_t ACPR;                   /*!< Offset: 0x010 (R/W)  Asynchronous Clock Prescaler Register */
+        uint32_t RESERVED1[55U];
+  __IOM uint32_t SPPR;                   /*!< Offset: 0x0F0 (R/W)  Selected Pin Protocol Register */
+        uint32_t RESERVED2[131U];
+  __IM  uint32_t FFSR;                   /*!< Offset: 0x300 (R/ )  Formatter and Flush Status Register */
+  __IOM uint32_t FFCR;                   /*!< Offset: 0x304 (R/W)  Formatter and Flush Control Register */
+  __IOM uint32_t PSCR;                   /*!< Offset: 0x308 (R/W)  Periodic Synchronization Control Register */
+        uint32_t RESERVED3[809U];
+  __OM  uint32_t LAR;                    /*!< Offset: 0xFB0 ( /W)  Software Lock Access Register */
+  __IM  uint32_t LSR;                    /*!< Offset: 0xFB4 (R/ )  Software Lock Status Register */
+        uint32_t RESERVED4[4U];
+  __IM  uint32_t TYPE;                   /*!< Offset: 0xFC8 (R/ )  Device Identifier Register */
+  __IM  uint32_t DEVTYPE;                /*!< Offset: 0xFCC (R/ )  Device Type Register */
+} TPI_Type;
+
+/* TPI Asynchronous Clock Prescaler Register Definitions */
+#define TPI_ACPR_SWOSCALER_Pos              0U                                         /*!< TPI ACPR: SWOSCALER Position */
+#define TPI_ACPR_SWOSCALER_Msk             (0xFFFFUL /*<< TPI_ACPR_SWOSCALER_Pos*/)    /*!< TPI ACPR: SWOSCALER Mask */
+
+/* TPI Selected Pin Protocol Register Definitions */
+#define TPI_SPPR_TXMODE_Pos                 0U                                         /*!< TPI SPPR: TXMODE Position */
+#define TPI_SPPR_TXMODE_Msk                (0x3UL /*<< TPI_SPPR_TXMODE_Pos*/)          /*!< TPI SPPR: TXMODE Mask */
+
+/* TPI Formatter and Flush Status Register Definitions */
+#define TPI_FFSR_FtNonStop_Pos              3U                                         /*!< TPI FFSR: FtNonStop Position */
+#define TPI_FFSR_FtNonStop_Msk             (0x1UL << TPI_FFSR_FtNonStop_Pos)           /*!< TPI FFSR: FtNonStop Mask */
+
+#define TPI_FFSR_TCPresent_Pos              2U                                         /*!< TPI FFSR: TCPresent Position */
+#define TPI_FFSR_TCPresent_Msk             (0x1UL << TPI_FFSR_TCPresent_Pos)           /*!< TPI FFSR: TCPresent Mask */
+
+#define TPI_FFSR_FtStopped_Pos              1U                                         /*!< TPI FFSR: FtStopped Position */
+#define TPI_FFSR_FtStopped_Msk             (0x1UL << TPI_FFSR_FtStopped_Pos)           /*!< TPI FFSR: FtStopped Mask */
+
+#define TPI_FFSR_FlInProg_Pos               0U                                         /*!< TPI FFSR: FlInProg Position */
+#define TPI_FFSR_FlInProg_Msk              (0x1UL /*<< TPI_FFSR_FlInProg_Pos*/)        /*!< TPI FFSR: FlInProg Mask */
+
+/* TPI Formatter and Flush Control Register Definitions */
+#define TPI_FFCR_TrigIn_Pos                 8U                                         /*!< TPI FFCR: TrigIn Position */
+#define TPI_FFCR_TrigIn_Msk                (0x1UL << TPI_FFCR_TrigIn_Pos)              /*!< TPI FFCR: TrigIn Mask */
+
+#define TPI_FFCR_FOnMan_Pos                 6U                                         /*!< TPI FFCR: FOnMan Position */
+#define TPI_FFCR_FOnMan_Msk                (0x1UL << TPI_FFCR_FOnMan_Pos)              /*!< TPI FFCR: FOnMan Mask */
+
+#define TPI_FFCR_EnFmt_Pos                  0U                                         /*!< TPI FFCR: EnFmt Position */
+#define TPI_FFCR_EnFmt_Msk                 (0x3UL << /*TPI_FFCR_EnFmt_Pos*/)           /*!< TPI FFCR: EnFmt Mask */
+
+/* TPI Periodic Synchronization Control Register Definitions */
+#define TPI_PSCR_PSCount_Pos                0U                                         /*!< TPI PSCR: PSCount Position */
+#define TPI_PSCR_PSCount_Msk               (0x1FUL /*<< TPI_PSCR_PSCount_Pos*/)        /*!< TPI PSCR: TPSCount Mask */
+
+/* TPI Software Lock Status Register Definitions */
+#define TPI_LSR_nTT_Pos                     1U                                         /*!< TPI LSR: Not thirty-two bit. Position */
+#define TPI_LSR_nTT_Msk                    (0x1UL << TPI_LSR_nTT_Pos)                  /*!< TPI LSR: Not thirty-two bit. Mask */
+
+#define TPI_LSR_SLK_Pos                     1U                                         /*!< TPI LSR: Software Lock status Position */
+#define TPI_LSR_SLK_Msk                    (0x1UL << TPI_LSR_SLK_Pos)                  /*!< TPI LSR: Software Lock status Mask */
+
+#define TPI_LSR_SLI_Pos                     0U                                         /*!< TPI LSR: Software Lock implemented Position */
+#define TPI_LSR_SLI_Msk                    (0x1UL /*<< TPI_LSR_SLI_Pos*/)              /*!< TPI LSR: Software Lock implemented Mask */
+
+/* TPI DEVID Register Definitions */
+#define TPI_DEVID_NRZVALID_Pos             11U                                         /*!< TPI DEVID: NRZVALID Position */
+#define TPI_DEVID_NRZVALID_Msk             (0x1UL << TPI_DEVID_NRZVALID_Pos)           /*!< TPI DEVID: NRZVALID Mask */
+
+#define TPI_DEVID_MANCVALID_Pos            10U                                         /*!< TPI DEVID: MANCVALID Position */
+#define TPI_DEVID_MANCVALID_Msk            (0x1UL << TPI_DEVID_MANCVALID_Pos)          /*!< TPI DEVID: MANCVALID Mask */
+
+#define TPI_DEVID_PTINVALID_Pos             9U                                         /*!< TPI DEVID: PTINVALID Position */
+#define TPI_DEVID_PTINVALID_Msk            (0x1UL << TPI_DEVID_PTINVALID_Pos)          /*!< TPI DEVID: PTINVALID Mask */
+
+#define TPI_DEVID_FIFOSZ_Pos                6U                                         /*!< TPI DEVID: FIFO depth Position */
+#define TPI_DEVID_FIFOSZ_Msk               (0x7UL << TPI_DEVID_FIFOSZ_Pos)             /*!< TPI DEVID: FIFO depth Mask */
+
+/* TPI DEVTYPE Register Definitions */
+#define TPI_DEVTYPE_SubType_Pos             4U                                         /*!< TPI DEVTYPE: SubType Position */
+#define TPI_DEVTYPE_SubType_Msk            (0xFUL /*<< TPI_DEVTYPE_SubType_Pos*/)      /*!< TPI DEVTYPE: SubType Mask */
+
+#define TPI_DEVTYPE_MajorType_Pos           0U                                         /*!< TPI DEVTYPE: MajorType Position */
+#define TPI_DEVTYPE_MajorType_Msk          (0xFUL << TPI_DEVTYPE_MajorType_Pos)        /*!< TPI DEVTYPE: MajorType Mask */
+
+/*@}*/ /* end of group CMSIS_TPI */
+
+#if defined (__PMU_PRESENT) && (__PMU_PRESENT == 1U)
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_PMU     Performance Monitoring Unit (PMU)
+  \brief    Type definitions for the Performance Monitoring Unit (PMU)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Performance Monitoring Unit (PMU).
+ */
+typedef struct
+{
+  __IOM uint32_t EVCNTR[__PMU_NUM_EVENTCNT];        /*!< Offset: 0x0 (R/W)    PMU Event Counter Registers */
+#if __PMU_NUM_EVENTCNT<31
+        uint32_t RESERVED0[31U-__PMU_NUM_EVENTCNT];
+#endif
+  __IOM uint32_t CCNTR;                             /*!< Offset: 0x7C (R/W)   PMU Cycle Counter Register */
+        uint32_t RESERVED1[224];
+  __IOM uint32_t EVTYPER[__PMU_NUM_EVENTCNT];       /*!< Offset: 0x400 (R/W)  PMU Event Type and Filter Registers */
+#if __PMU_NUM_EVENTCNT<31
+        uint32_t RESERVED2[31U-__PMU_NUM_EVENTCNT];
+#endif
+  __IOM uint32_t CCFILTR;                           /*!< Offset: 0x47C (R/W)  PMU Cycle Counter Filter Register */
+        uint32_t RESERVED3[480];
+  __IOM uint32_t CNTENSET;                          /*!< Offset: 0xC00 (R/W)  PMU Count Enable Set Register */
+        uint32_t RESERVED4[7];
+  __IOM uint32_t CNTENCLR;                          /*!< Offset: 0xC20 (R/W)  PMU Count Enable Clear Register */
+        uint32_t RESERVED5[7];
+  __IOM uint32_t INTENSET;                          /*!< Offset: 0xC40 (R/W)  PMU Interrupt Enable Set Register */
+        uint32_t RESERVED6[7];
+  __IOM uint32_t INTENCLR;                          /*!< Offset: 0xC60 (R/W)  PMU Interrupt Enable Clear Register */
+        uint32_t RESERVED7[7];
+  __IOM uint32_t OVSCLR;                            /*!< Offset: 0xC80 (R/W)  PMU Overflow Flag Status Clear Register */
+        uint32_t RESERVED8[7];
+  __IOM uint32_t SWINC;                             /*!< Offset: 0xCA0 (R/W)  PMU Software Increment Register */
+        uint32_t RESERVED9[7];
+  __IOM uint32_t OVSSET;                            /*!< Offset: 0xCC0 (R/W)  PMU Overflow Flag Status Set Register */
+        uint32_t RESERVED10[79];
+  __IOM uint32_t TYPE;                              /*!< Offset: 0xE00 (R/W)  PMU Type Register */
+  __IOM uint32_t CTRL;                              /*!< Offset: 0xE04 (R/W)  PMU Control Register */
+        uint32_t RESERVED11[108];
+  __IOM uint32_t AUTHSTATUS;                        /*!< Offset: 0xFB8 (R/W)  PMU Authentication Status Register */
+  __IOM uint32_t DEVARCH;                           /*!< Offset: 0xFBC (R/W)  PMU Device Architecture Register */
+        uint32_t RESERVED12[3];
+  __IOM uint32_t DEVTYPE;                           /*!< Offset: 0xFCC (R/W)  PMU Device Type Register */
+  __IOM uint32_t PIDR4;                             /*!< Offset: 0xFD0 (R/W)  PMU Peripheral Identification Register 4 */
+        uint32_t RESERVED13[3];
+  __IOM uint32_t PIDR0;                             /*!< Offset: 0xFE0 (R/W)  PMU Peripheral Identification Register 0 */
+  __IOM uint32_t PIDR1;                             /*!< Offset: 0xFE4 (R/W)  PMU Peripheral Identification Register 1 */
+  __IOM uint32_t PIDR2;                             /*!< Offset: 0xFE8 (R/W)  PMU Peripheral Identification Register 2 */
+  __IOM uint32_t PIDR3;                             /*!< Offset: 0xFEC (R/W)  PMU Peripheral Identification Register 3 */
+  __IOM uint32_t CIDR0;                             /*!< Offset: 0xFF0 (R/W)  PMU Component Identification Register 0 */
+  __IOM uint32_t CIDR1;                             /*!< Offset: 0xFF4 (R/W)  PMU Component Identification Register 1 */
+  __IOM uint32_t CIDR2;                             /*!< Offset: 0xFF8 (R/W)  PMU Component Identification Register 2 */
+  __IOM uint32_t CIDR3;                             /*!< Offset: 0xFFC (R/W)  PMU Component Identification Register 3 */
+} PMU_Type;
+
+/** \brief PMU Event Counter Registers (0-30) Definitions  */
+
+#define PMU_EVCNTR_CNT_Pos                    0U                                           /*!< PMU EVCNTR: Counter Position */
+#define PMU_EVCNTR_CNT_Msk                   (0xFFFFUL /*<< PMU_EVCNTRx_CNT_Pos*/)         /*!< PMU EVCNTR: Counter Mask */
+
+/** \brief PMU Event Type and Filter Registers (0-30) Definitions  */
+
+#define PMU_EVTYPER_EVENTTOCNT_Pos            0U                                           /*!< PMU EVTYPER: Event to Count Position */
+#define PMU_EVTYPER_EVENTTOCNT_Msk           (0xFFFFUL /*<< EVTYPERx_EVENTTOCNT_Pos*/)     /*!< PMU EVTYPER: Event to Count Mask */
+
+/** \brief PMU Count Enable Set Register Definitions */
+
+#define PMU_CNTENSET_CNT0_ENABLE_Pos          0U                                           /*!< PMU CNTENSET: Event Counter 0 Enable Set Position */
+#define PMU_CNTENSET_CNT0_ENABLE_Msk         (1UL /*<< PMU_CNTENSET_CNT0_ENABLE_Pos*/)     /*!< PMU CNTENSET: Event Counter 0 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT1_ENABLE_Pos          1U                                           /*!< PMU CNTENSET: Event Counter 1 Enable Set Position */
+#define PMU_CNTENSET_CNT1_ENABLE_Msk         (1UL << PMU_CNTENSET_CNT1_ENABLE_Pos)         /*!< PMU CNTENSET: Event Counter 1 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT2_ENABLE_Pos          2U                                           /*!< PMU CNTENSET: Event Counter 2 Enable Set Position */
+#define PMU_CNTENSET_CNT2_ENABLE_Msk         (1UL << PMU_CNTENSET_CNT2_ENABLE_Pos)         /*!< PMU CNTENSET: Event Counter 2 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT3_ENABLE_Pos          3U                                           /*!< PMU CNTENSET: Event Counter 3 Enable Set Position */
+#define PMU_CNTENSET_CNT3_ENABLE_Msk         (1UL << PMU_CNTENSET_CNT3_ENABLE_Pos)         /*!< PMU CNTENSET: Event Counter 3 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT4_ENABLE_Pos          4U                                           /*!< PMU CNTENSET: Event Counter 4 Enable Set Position */
+#define PMU_CNTENSET_CNT4_ENABLE_Msk         (1UL << PMU_CNTENSET_CNT4_ENABLE_Pos)         /*!< PMU CNTENSET: Event Counter 4 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT5_ENABLE_Pos          5U                                           /*!< PMU CNTENSET: Event Counter 5 Enable Set Position */
+#define PMU_CNTENSET_CNT5_ENABLE_Msk         (1UL << PMU_CNTENSET_CNT5_ENABLE_Pos)         /*!< PMU CNTENSET: Event Counter 5 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT6_ENABLE_Pos          6U                                           /*!< PMU CNTENSET: Event Counter 6 Enable Set Position */
+#define PMU_CNTENSET_CNT6_ENABLE_Msk         (1UL << PMU_CNTENSET_CNT6_ENABLE_Pos)         /*!< PMU CNTENSET: Event Counter 6 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT7_ENABLE_Pos          7U                                           /*!< PMU CNTENSET: Event Counter 7 Enable Set Position */
+#define PMU_CNTENSET_CNT7_ENABLE_Msk         (1UL << PMU_CNTENSET_CNT7_ENABLE_Pos)         /*!< PMU CNTENSET: Event Counter 7 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT8_ENABLE_Pos          8U                                           /*!< PMU CNTENSET: Event Counter 8 Enable Set Position */
+#define PMU_CNTENSET_CNT8_ENABLE_Msk         (1UL << PMU_CNTENSET_CNT8_ENABLE_Pos)         /*!< PMU CNTENSET: Event Counter 8 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT9_ENABLE_Pos          9U                                           /*!< PMU CNTENSET: Event Counter 9 Enable Set Position */
+#define PMU_CNTENSET_CNT9_ENABLE_Msk         (1UL << PMU_CNTENSET_CNT9_ENABLE_Pos)         /*!< PMU CNTENSET: Event Counter 9 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT10_ENABLE_Pos         10U                                          /*!< PMU CNTENSET: Event Counter 10 Enable Set Position */
+#define PMU_CNTENSET_CNT10_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT10_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 10 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT11_ENABLE_Pos         11U                                          /*!< PMU CNTENSET: Event Counter 11 Enable Set Position */
+#define PMU_CNTENSET_CNT11_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT11_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 11 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT12_ENABLE_Pos         12U                                          /*!< PMU CNTENSET: Event Counter 12 Enable Set Position */
+#define PMU_CNTENSET_CNT12_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT12_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 12 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT13_ENABLE_Pos         13U                                          /*!< PMU CNTENSET: Event Counter 13 Enable Set Position */
+#define PMU_CNTENSET_CNT13_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT13_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 13 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT14_ENABLE_Pos         14U                                          /*!< PMU CNTENSET: Event Counter 14 Enable Set Position */
+#define PMU_CNTENSET_CNT14_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT14_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 14 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT15_ENABLE_Pos         15U                                          /*!< PMU CNTENSET: Event Counter 15 Enable Set Position */
+#define PMU_CNTENSET_CNT15_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT15_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 15 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT16_ENABLE_Pos         16U                                          /*!< PMU CNTENSET: Event Counter 16 Enable Set Position */
+#define PMU_CNTENSET_CNT16_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT16_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 16 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT17_ENABLE_Pos         17U                                          /*!< PMU CNTENSET: Event Counter 17 Enable Set Position */
+#define PMU_CNTENSET_CNT17_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT17_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 17 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT18_ENABLE_Pos         18U                                          /*!< PMU CNTENSET: Event Counter 18 Enable Set Position */
+#define PMU_CNTENSET_CNT18_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT18_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 18 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT19_ENABLE_Pos         19U                                          /*!< PMU CNTENSET: Event Counter 19 Enable Set Position */
+#define PMU_CNTENSET_CNT19_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT19_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 19 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT20_ENABLE_Pos         20U                                          /*!< PMU CNTENSET: Event Counter 20 Enable Set Position */
+#define PMU_CNTENSET_CNT20_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT20_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 20 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT21_ENABLE_Pos         21U                                          /*!< PMU CNTENSET: Event Counter 21 Enable Set Position */
+#define PMU_CNTENSET_CNT21_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT21_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 21 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT22_ENABLE_Pos         22U                                          /*!< PMU CNTENSET: Event Counter 22 Enable Set Position */
+#define PMU_CNTENSET_CNT22_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT22_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 22 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT23_ENABLE_Pos         23U                                          /*!< PMU CNTENSET: Event Counter 23 Enable Set Position */
+#define PMU_CNTENSET_CNT23_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT23_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 23 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT24_ENABLE_Pos         24U                                          /*!< PMU CNTENSET: Event Counter 24 Enable Set Position */
+#define PMU_CNTENSET_CNT24_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT24_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 24 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT25_ENABLE_Pos         25U                                          /*!< PMU CNTENSET: Event Counter 25 Enable Set Position */
+#define PMU_CNTENSET_CNT25_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT25_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 25 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT26_ENABLE_Pos         26U                                          /*!< PMU CNTENSET: Event Counter 26 Enable Set Position */
+#define PMU_CNTENSET_CNT26_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT26_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 26 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT27_ENABLE_Pos         27U                                          /*!< PMU CNTENSET: Event Counter 27 Enable Set Position */
+#define PMU_CNTENSET_CNT27_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT27_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 27 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT28_ENABLE_Pos         28U                                          /*!< PMU CNTENSET: Event Counter 28 Enable Set Position */
+#define PMU_CNTENSET_CNT28_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT28_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 28 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT29_ENABLE_Pos         29U                                          /*!< PMU CNTENSET: Event Counter 29 Enable Set Position */
+#define PMU_CNTENSET_CNT29_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT29_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 29 Enable Set Mask */
+
+#define PMU_CNTENSET_CNT30_ENABLE_Pos         30U                                          /*!< PMU CNTENSET: Event Counter 30 Enable Set Position */
+#define PMU_CNTENSET_CNT30_ENABLE_Msk        (1UL << PMU_CNTENSET_CNT30_ENABLE_Pos)        /*!< PMU CNTENSET: Event Counter 30 Enable Set Mask */
+
+#define PMU_CNTENSET_CCNTR_ENABLE_Pos         31U                                          /*!< PMU CNTENSET: Cycle Counter Enable Set Position */
+#define PMU_CNTENSET_CCNTR_ENABLE_Msk        (1UL << PMU_CNTENSET_CCNTR_ENABLE_Pos)        /*!< PMU CNTENSET: Cycle Counter Enable Set Mask */
+
+/** \brief PMU Count Enable Clear Register Definitions */
+
+#define PMU_CNTENSET_CNT0_ENABLE_Pos          0U                                           /*!< PMU CNTENCLR: Event Counter 0 Enable Clear Position */
+#define PMU_CNTENCLR_CNT0_ENABLE_Msk         (1UL /*<< PMU_CNTENCLR_CNT0_ENABLE_Pos*/)     /*!< PMU CNTENCLR: Event Counter 0 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT1_ENABLE_Pos          1U                                           /*!< PMU CNTENCLR: Event Counter 1 Enable Clear Position */
+#define PMU_CNTENCLR_CNT1_ENABLE_Msk         (1UL << PMU_CNTENCLR_CNT1_ENABLE_Pos)         /*!< PMU CNTENCLR: Event Counter 1 Enable Clear */
+
+#define PMU_CNTENCLR_CNT2_ENABLE_Pos          2U                                           /*!< PMU CNTENCLR: Event Counter 2 Enable Clear Position */
+#define PMU_CNTENCLR_CNT2_ENABLE_Msk         (1UL << PMU_CNTENCLR_CNT2_ENABLE_Pos)         /*!< PMU CNTENCLR: Event Counter 2 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT3_ENABLE_Pos          3U                                           /*!< PMU CNTENCLR: Event Counter 3 Enable Clear Position */
+#define PMU_CNTENCLR_CNT3_ENABLE_Msk         (1UL << PMU_CNTENCLR_CNT3_ENABLE_Pos)         /*!< PMU CNTENCLR: Event Counter 3 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT4_ENABLE_Pos          4U                                           /*!< PMU CNTENCLR: Event Counter 4 Enable Clear Position */
+#define PMU_CNTENCLR_CNT4_ENABLE_Msk         (1UL << PMU_CNTENCLR_CNT4_ENABLE_Pos)         /*!< PMU CNTENCLR: Event Counter 4 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT5_ENABLE_Pos          5U                                           /*!< PMU CNTENCLR: Event Counter 5 Enable Clear Position */
+#define PMU_CNTENCLR_CNT5_ENABLE_Msk         (1UL << PMU_CNTENCLR_CNT5_ENABLE_Pos)         /*!< PMU CNTENCLR: Event Counter 5 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT6_ENABLE_Pos          6U                                           /*!< PMU CNTENCLR: Event Counter 6 Enable Clear Position */
+#define PMU_CNTENCLR_CNT6_ENABLE_Msk         (1UL << PMU_CNTENCLR_CNT6_ENABLE_Pos)         /*!< PMU CNTENCLR: Event Counter 6 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT7_ENABLE_Pos          7U                                           /*!< PMU CNTENCLR: Event Counter 7 Enable Clear Position */
+#define PMU_CNTENCLR_CNT7_ENABLE_Msk         (1UL << PMU_CNTENCLR_CNT7_ENABLE_Pos)         /*!< PMU CNTENCLR: Event Counter 7 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT8_ENABLE_Pos          8U                                           /*!< PMU CNTENCLR: Event Counter 8 Enable Clear Position */
+#define PMU_CNTENCLR_CNT8_ENABLE_Msk         (1UL << PMU_CNTENCLR_CNT8_ENABLE_Pos)         /*!< PMU CNTENCLR: Event Counter 8 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT9_ENABLE_Pos          9U                                           /*!< PMU CNTENCLR: Event Counter 9 Enable Clear Position */
+#define PMU_CNTENCLR_CNT9_ENABLE_Msk         (1UL << PMU_CNTENCLR_CNT9_ENABLE_Pos)         /*!< PMU CNTENCLR: Event Counter 9 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT10_ENABLE_Pos         10U                                          /*!< PMU CNTENCLR: Event Counter 10 Enable Clear Position */
+#define PMU_CNTENCLR_CNT10_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT10_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 10 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT11_ENABLE_Pos         11U                                          /*!< PMU CNTENCLR: Event Counter 11 Enable Clear Position */
+#define PMU_CNTENCLR_CNT11_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT11_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 11 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT12_ENABLE_Pos         12U                                          /*!< PMU CNTENCLR: Event Counter 12 Enable Clear Position */
+#define PMU_CNTENCLR_CNT12_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT12_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 12 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT13_ENABLE_Pos         13U                                          /*!< PMU CNTENCLR: Event Counter 13 Enable Clear Position */
+#define PMU_CNTENCLR_CNT13_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT13_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 13 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT14_ENABLE_Pos         14U                                          /*!< PMU CNTENCLR: Event Counter 14 Enable Clear Position */
+#define PMU_CNTENCLR_CNT14_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT14_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 14 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT15_ENABLE_Pos         15U                                          /*!< PMU CNTENCLR: Event Counter 15 Enable Clear Position */
+#define PMU_CNTENCLR_CNT15_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT15_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 15 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT16_ENABLE_Pos         16U                                          /*!< PMU CNTENCLR: Event Counter 16 Enable Clear Position */
+#define PMU_CNTENCLR_CNT16_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT16_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 16 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT17_ENABLE_Pos         17U                                          /*!< PMU CNTENCLR: Event Counter 17 Enable Clear Position */
+#define PMU_CNTENCLR_CNT17_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT17_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 17 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT18_ENABLE_Pos         18U                                          /*!< PMU CNTENCLR: Event Counter 18 Enable Clear Position */
+#define PMU_CNTENCLR_CNT18_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT18_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 18 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT19_ENABLE_Pos         19U                                          /*!< PMU CNTENCLR: Event Counter 19 Enable Clear Position */
+#define PMU_CNTENCLR_CNT19_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT19_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 19 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT20_ENABLE_Pos         20U                                          /*!< PMU CNTENCLR: Event Counter 20 Enable Clear Position */
+#define PMU_CNTENCLR_CNT20_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT20_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 20 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT21_ENABLE_Pos         21U                                          /*!< PMU CNTENCLR: Event Counter 21 Enable Clear Position */
+#define PMU_CNTENCLR_CNT21_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT21_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 21 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT22_ENABLE_Pos         22U                                          /*!< PMU CNTENCLR: Event Counter 22 Enable Clear Position */
+#define PMU_CNTENCLR_CNT22_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT22_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 22 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT23_ENABLE_Pos         23U                                          /*!< PMU CNTENCLR: Event Counter 23 Enable Clear Position */
+#define PMU_CNTENCLR_CNT23_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT23_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 23 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT24_ENABLE_Pos         24U                                          /*!< PMU CNTENCLR: Event Counter 24 Enable Clear Position */
+#define PMU_CNTENCLR_CNT24_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT24_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 24 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT25_ENABLE_Pos         25U                                          /*!< PMU CNTENCLR: Event Counter 25 Enable Clear Position */
+#define PMU_CNTENCLR_CNT25_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT25_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 25 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT26_ENABLE_Pos         26U                                          /*!< PMU CNTENCLR: Event Counter 26 Enable Clear Position */
+#define PMU_CNTENCLR_CNT26_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT26_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 26 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT27_ENABLE_Pos         27U                                          /*!< PMU CNTENCLR: Event Counter 27 Enable Clear Position */
+#define PMU_CNTENCLR_CNT27_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT27_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 27 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT28_ENABLE_Pos         28U                                          /*!< PMU CNTENCLR: Event Counter 28 Enable Clear Position */
+#define PMU_CNTENCLR_CNT28_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT28_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 28 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT29_ENABLE_Pos         29U                                          /*!< PMU CNTENCLR: Event Counter 29 Enable Clear Position */
+#define PMU_CNTENCLR_CNT29_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT29_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 29 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CNT30_ENABLE_Pos         30U                                          /*!< PMU CNTENCLR: Event Counter 30 Enable Clear Position */
+#define PMU_CNTENCLR_CNT30_ENABLE_Msk        (1UL << PMU_CNTENCLR_CNT30_ENABLE_Pos)        /*!< PMU CNTENCLR: Event Counter 30 Enable Clear Mask */
+
+#define PMU_CNTENCLR_CCNTR_ENABLE_Pos         31U                                          /*!< PMU CNTENCLR: Cycle Counter Enable Clear Position */
+#define PMU_CNTENCLR_CCNTR_ENABLE_Msk        (1UL << PMU_CNTENCLR_CCNTR_ENABLE_Pos)        /*!< PMU CNTENCLR: Cycle Counter Enable Clear Mask */
+
+/** \brief PMU Interrupt Enable Set Register Definitions */
+
+#define PMU_INTENSET_CNT0_ENABLE_Pos          0U                                           /*!< PMU INTENSET: Event Counter 0 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT0_ENABLE_Msk         (1UL /*<< PMU_INTENSET_CNT0_ENABLE_Pos*/)     /*!< PMU INTENSET: Event Counter 0 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT1_ENABLE_Pos          1U                                           /*!< PMU INTENSET: Event Counter 1 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT1_ENABLE_Msk         (1UL << PMU_INTENSET_CNT1_ENABLE_Pos)         /*!< PMU INTENSET: Event Counter 1 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT2_ENABLE_Pos          2U                                           /*!< PMU INTENSET: Event Counter 2 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT2_ENABLE_Msk         (1UL << PMU_INTENSET_CNT2_ENABLE_Pos)         /*!< PMU INTENSET: Event Counter 2 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT3_ENABLE_Pos          3U                                           /*!< PMU INTENSET: Event Counter 3 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT3_ENABLE_Msk         (1UL << PMU_INTENSET_CNT3_ENABLE_Pos)         /*!< PMU INTENSET: Event Counter 3 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT4_ENABLE_Pos          4U                                           /*!< PMU INTENSET: Event Counter 4 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT4_ENABLE_Msk         (1UL << PMU_INTENSET_CNT4_ENABLE_Pos)         /*!< PMU INTENSET: Event Counter 4 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT5_ENABLE_Pos          5U                                           /*!< PMU INTENSET: Event Counter 5 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT5_ENABLE_Msk         (1UL << PMU_INTENSET_CNT5_ENABLE_Pos)         /*!< PMU INTENSET: Event Counter 5 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT6_ENABLE_Pos          6U                                           /*!< PMU INTENSET: Event Counter 6 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT6_ENABLE_Msk         (1UL << PMU_INTENSET_CNT6_ENABLE_Pos)         /*!< PMU INTENSET: Event Counter 6 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT7_ENABLE_Pos          7U                                           /*!< PMU INTENSET: Event Counter 7 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT7_ENABLE_Msk         (1UL << PMU_INTENSET_CNT7_ENABLE_Pos)         /*!< PMU INTENSET: Event Counter 7 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT8_ENABLE_Pos          8U                                           /*!< PMU INTENSET: Event Counter 8 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT8_ENABLE_Msk         (1UL << PMU_INTENSET_CNT8_ENABLE_Pos)         /*!< PMU INTENSET: Event Counter 8 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT9_ENABLE_Pos          9U                                           /*!< PMU INTENSET: Event Counter 9 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT9_ENABLE_Msk         (1UL << PMU_INTENSET_CNT9_ENABLE_Pos)         /*!< PMU INTENSET: Event Counter 9 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT10_ENABLE_Pos         10U                                          /*!< PMU INTENSET: Event Counter 10 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT10_ENABLE_Msk        (1UL << PMU_INTENSET_CNT10_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 10 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT11_ENABLE_Pos         11U                                          /*!< PMU INTENSET: Event Counter 11 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT11_ENABLE_Msk        (1UL << PMU_INTENSET_CNT11_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 11 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT12_ENABLE_Pos         12U                                          /*!< PMU INTENSET: Event Counter 12 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT12_ENABLE_Msk        (1UL << PMU_INTENSET_CNT12_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 12 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT13_ENABLE_Pos         13U                                          /*!< PMU INTENSET: Event Counter 13 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT13_ENABLE_Msk        (1UL << PMU_INTENSET_CNT13_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 13 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT14_ENABLE_Pos         14U                                          /*!< PMU INTENSET: Event Counter 14 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT14_ENABLE_Msk        (1UL << PMU_INTENSET_CNT14_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 14 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT15_ENABLE_Pos         15U                                          /*!< PMU INTENSET: Event Counter 15 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT15_ENABLE_Msk        (1UL << PMU_INTENSET_CNT15_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 15 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT16_ENABLE_Pos         16U                                          /*!< PMU INTENSET: Event Counter 16 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT16_ENABLE_Msk        (1UL << PMU_INTENSET_CNT16_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 16 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT17_ENABLE_Pos         17U                                          /*!< PMU INTENSET: Event Counter 17 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT17_ENABLE_Msk        (1UL << PMU_INTENSET_CNT17_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 17 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT18_ENABLE_Pos         18U                                          /*!< PMU INTENSET: Event Counter 18 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT18_ENABLE_Msk        (1UL << PMU_INTENSET_CNT18_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 18 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT19_ENABLE_Pos         19U                                          /*!< PMU INTENSET: Event Counter 19 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT19_ENABLE_Msk        (1UL << PMU_INTENSET_CNT19_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 19 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT20_ENABLE_Pos         20U                                          /*!< PMU INTENSET: Event Counter 20 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT20_ENABLE_Msk        (1UL << PMU_INTENSET_CNT20_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 20 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT21_ENABLE_Pos         21U                                          /*!< PMU INTENSET: Event Counter 21 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT21_ENABLE_Msk        (1UL << PMU_INTENSET_CNT21_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 21 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT22_ENABLE_Pos         22U                                          /*!< PMU INTENSET: Event Counter 22 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT22_ENABLE_Msk        (1UL << PMU_INTENSET_CNT22_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 22 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT23_ENABLE_Pos         23U                                          /*!< PMU INTENSET: Event Counter 23 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT23_ENABLE_Msk        (1UL << PMU_INTENSET_CNT23_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 23 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT24_ENABLE_Pos         24U                                          /*!< PMU INTENSET: Event Counter 24 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT24_ENABLE_Msk        (1UL << PMU_INTENSET_CNT24_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 24 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT25_ENABLE_Pos         25U                                          /*!< PMU INTENSET: Event Counter 25 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT25_ENABLE_Msk        (1UL << PMU_INTENSET_CNT25_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 25 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT26_ENABLE_Pos         26U                                          /*!< PMU INTENSET: Event Counter 26 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT26_ENABLE_Msk        (1UL << PMU_INTENSET_CNT26_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 26 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT27_ENABLE_Pos         27U                                          /*!< PMU INTENSET: Event Counter 27 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT27_ENABLE_Msk        (1UL << PMU_INTENSET_CNT27_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 27 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT28_ENABLE_Pos         28U                                          /*!< PMU INTENSET: Event Counter 28 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT28_ENABLE_Msk        (1UL << PMU_INTENSET_CNT28_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 28 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT29_ENABLE_Pos         29U                                          /*!< PMU INTENSET: Event Counter 29 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT29_ENABLE_Msk        (1UL << PMU_INTENSET_CNT29_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 29 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CNT30_ENABLE_Pos         30U                                          /*!< PMU INTENSET: Event Counter 30 Interrupt Enable Set Position */
+#define PMU_INTENSET_CNT30_ENABLE_Msk        (1UL << PMU_INTENSET_CNT30_ENABLE_Pos)        /*!< PMU INTENSET: Event Counter 30 Interrupt Enable Set Mask */
+
+#define PMU_INTENSET_CYCCNT_ENABLE_Pos        31U                                          /*!< PMU INTENSET: Cycle Counter Interrupt Enable Set Position */
+#define PMU_INTENSET_CCYCNT_ENABLE_Msk       (1UL << PMU_INTENSET_CYCCNT_ENABLE_Pos)       /*!< PMU INTENSET: Cycle Counter Interrupt Enable Set Mask */
+
+/** \brief PMU Interrupt Enable Clear Register Definitions */
+
+#define PMU_INTENSET_CNT0_ENABLE_Pos          0U                                           /*!< PMU INTENCLR: Event Counter 0 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT0_ENABLE_Msk         (1UL /*<< PMU_INTENCLR_CNT0_ENABLE_Pos*/)     /*!< PMU INTENCLR: Event Counter 0 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT1_ENABLE_Pos          1U                                           /*!< PMU INTENCLR: Event Counter 1 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT1_ENABLE_Msk         (1UL << PMU_INTENCLR_CNT1_ENABLE_Pos)         /*!< PMU INTENCLR: Event Counter 1 Interrupt Enable Clear */
+
+#define PMU_INTENCLR_CNT2_ENABLE_Pos          2U                                           /*!< PMU INTENCLR: Event Counter 2 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT2_ENABLE_Msk         (1UL << PMU_INTENCLR_CNT2_ENABLE_Pos)         /*!< PMU INTENCLR: Event Counter 2 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT3_ENABLE_Pos          3U                                           /*!< PMU INTENCLR: Event Counter 3 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT3_ENABLE_Msk         (1UL << PMU_INTENCLR_CNT3_ENABLE_Pos)         /*!< PMU INTENCLR: Event Counter 3 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT4_ENABLE_Pos          4U                                           /*!< PMU INTENCLR: Event Counter 4 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT4_ENABLE_Msk         (1UL << PMU_INTENCLR_CNT4_ENABLE_Pos)         /*!< PMU INTENCLR: Event Counter 4 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT5_ENABLE_Pos          5U                                           /*!< PMU INTENCLR: Event Counter 5 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT5_ENABLE_Msk         (1UL << PMU_INTENCLR_CNT5_ENABLE_Pos)         /*!< PMU INTENCLR: Event Counter 5 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT6_ENABLE_Pos          6U                                           /*!< PMU INTENCLR: Event Counter 6 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT6_ENABLE_Msk         (1UL << PMU_INTENCLR_CNT6_ENABLE_Pos)         /*!< PMU INTENCLR: Event Counter 6 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT7_ENABLE_Pos          7U                                           /*!< PMU INTENCLR: Event Counter 7 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT7_ENABLE_Msk         (1UL << PMU_INTENCLR_CNT7_ENABLE_Pos)         /*!< PMU INTENCLR: Event Counter 7 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT8_ENABLE_Pos          8U                                           /*!< PMU INTENCLR: Event Counter 8 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT8_ENABLE_Msk         (1UL << PMU_INTENCLR_CNT8_ENABLE_Pos)         /*!< PMU INTENCLR: Event Counter 8 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT9_ENABLE_Pos          9U                                           /*!< PMU INTENCLR: Event Counter 9 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT9_ENABLE_Msk         (1UL << PMU_INTENCLR_CNT9_ENABLE_Pos)         /*!< PMU INTENCLR: Event Counter 9 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT10_ENABLE_Pos         10U                                          /*!< PMU INTENCLR: Event Counter 10 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT10_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT10_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 10 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT11_ENABLE_Pos         11U                                          /*!< PMU INTENCLR: Event Counter 11 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT11_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT11_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 11 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT12_ENABLE_Pos         12U                                          /*!< PMU INTENCLR: Event Counter 12 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT12_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT12_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 12 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT13_ENABLE_Pos         13U                                          /*!< PMU INTENCLR: Event Counter 13 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT13_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT13_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 13 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT14_ENABLE_Pos         14U                                          /*!< PMU INTENCLR: Event Counter 14 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT14_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT14_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 14 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT15_ENABLE_Pos         15U                                          /*!< PMU INTENCLR: Event Counter 15 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT15_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT15_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 15 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT16_ENABLE_Pos         16U                                          /*!< PMU INTENCLR: Event Counter 16 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT16_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT16_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 16 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT17_ENABLE_Pos         17U                                          /*!< PMU INTENCLR: Event Counter 17 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT17_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT17_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 17 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT18_ENABLE_Pos         18U                                          /*!< PMU INTENCLR: Event Counter 18 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT18_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT18_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 18 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT19_ENABLE_Pos         19U                                          /*!< PMU INTENCLR: Event Counter 19 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT19_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT19_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 19 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT20_ENABLE_Pos         20U                                          /*!< PMU INTENCLR: Event Counter 20 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT20_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT20_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 20 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT21_ENABLE_Pos         21U                                          /*!< PMU INTENCLR: Event Counter 21 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT21_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT21_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 21 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT22_ENABLE_Pos         22U                                          /*!< PMU INTENCLR: Event Counter 22 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT22_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT22_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 22 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT23_ENABLE_Pos         23U                                          /*!< PMU INTENCLR: Event Counter 23 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT23_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT23_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 23 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT24_ENABLE_Pos         24U                                          /*!< PMU INTENCLR: Event Counter 24 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT24_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT24_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 24 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT25_ENABLE_Pos         25U                                          /*!< PMU INTENCLR: Event Counter 25 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT25_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT25_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 25 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT26_ENABLE_Pos         26U                                          /*!< PMU INTENCLR: Event Counter 26 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT26_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT26_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 26 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT27_ENABLE_Pos         27U                                          /*!< PMU INTENCLR: Event Counter 27 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT27_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT27_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 27 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT28_ENABLE_Pos         28U                                          /*!< PMU INTENCLR: Event Counter 28 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT28_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT28_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 28 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT29_ENABLE_Pos         29U                                          /*!< PMU INTENCLR: Event Counter 29 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT29_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT29_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 29 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CNT30_ENABLE_Pos         30U                                          /*!< PMU INTENCLR: Event Counter 30 Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CNT30_ENABLE_Msk        (1UL << PMU_INTENCLR_CNT30_ENABLE_Pos)        /*!< PMU INTENCLR: Event Counter 30 Interrupt Enable Clear Mask */
+
+#define PMU_INTENCLR_CYCCNT_ENABLE_Pos        31U                                          /*!< PMU INTENCLR: Cycle Counter Interrupt Enable Clear Position */
+#define PMU_INTENCLR_CYCCNT_ENABLE_Msk       (1UL << PMU_INTENCLR_CYCCNT_ENABLE_Pos)       /*!< PMU INTENCLR: Cycle Counter Interrupt Enable Clear Mask */
+
+/** \brief PMU Overflow Flag Status Set Register Definitions */
+
+#define PMU_OVSSET_CNT0_STATUS_Pos            0U                                           /*!< PMU OVSSET: Event Counter 0 Overflow Set Position */
+#define PMU_OVSSET_CNT0_STATUS_Msk           (1UL /*<< PMU_OVSSET_CNT0_STATUS_Pos*/)       /*!< PMU OVSSET: Event Counter 0 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT1_STATUS_Pos            1U                                           /*!< PMU OVSSET: Event Counter 1 Overflow Set Position */
+#define PMU_OVSSET_CNT1_STATUS_Msk           (1UL << PMU_OVSSET_CNT1_STATUS_Pos)           /*!< PMU OVSSET: Event Counter 1 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT2_STATUS_Pos            2U                                           /*!< PMU OVSSET: Event Counter 2 Overflow Set Position */
+#define PMU_OVSSET_CNT2_STATUS_Msk           (1UL << PMU_OVSSET_CNT2_STATUS_Pos)           /*!< PMU OVSSET: Event Counter 2 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT3_STATUS_Pos            3U                                           /*!< PMU OVSSET: Event Counter 3 Overflow Set Position */
+#define PMU_OVSSET_CNT3_STATUS_Msk           (1UL << PMU_OVSSET_CNT3_STATUS_Pos)           /*!< PMU OVSSET: Event Counter 3 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT4_STATUS_Pos            4U                                           /*!< PMU OVSSET: Event Counter 4 Overflow Set Position */
+#define PMU_OVSSET_CNT4_STATUS_Msk           (1UL << PMU_OVSSET_CNT4_STATUS_Pos)           /*!< PMU OVSSET: Event Counter 4 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT5_STATUS_Pos            5U                                           /*!< PMU OVSSET: Event Counter 5 Overflow Set Position */
+#define PMU_OVSSET_CNT5_STATUS_Msk           (1UL << PMU_OVSSET_CNT5_STATUS_Pos)           /*!< PMU OVSSET: Event Counter 5 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT6_STATUS_Pos            6U                                           /*!< PMU OVSSET: Event Counter 6 Overflow Set Position */
+#define PMU_OVSSET_CNT6_STATUS_Msk           (1UL << PMU_OVSSET_CNT6_STATUS_Pos)           /*!< PMU OVSSET: Event Counter 6 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT7_STATUS_Pos            7U                                           /*!< PMU OVSSET: Event Counter 7 Overflow Set Position */
+#define PMU_OVSSET_CNT7_STATUS_Msk           (1UL << PMU_OVSSET_CNT7_STATUS_Pos)           /*!< PMU OVSSET: Event Counter 7 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT8_STATUS_Pos            8U                                           /*!< PMU OVSSET: Event Counter 8 Overflow Set Position */
+#define PMU_OVSSET_CNT8_STATUS_Msk           (1UL << PMU_OVSSET_CNT8_STATUS_Pos)           /*!< PMU OVSSET: Event Counter 8 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT9_STATUS_Pos            9U                                           /*!< PMU OVSSET: Event Counter 9 Overflow Set Position */
+#define PMU_OVSSET_CNT9_STATUS_Msk           (1UL << PMU_OVSSET_CNT9_STATUS_Pos)           /*!< PMU OVSSET: Event Counter 9 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT10_STATUS_Pos           10U                                          /*!< PMU OVSSET: Event Counter 10 Overflow Set Position */
+#define PMU_OVSSET_CNT10_STATUS_Msk          (1UL << PMU_OVSSET_CNT10_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 10 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT11_STATUS_Pos           11U                                          /*!< PMU OVSSET: Event Counter 11 Overflow Set Position */
+#define PMU_OVSSET_CNT11_STATUS_Msk          (1UL << PMU_OVSSET_CNT11_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 11 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT12_STATUS_Pos           12U                                          /*!< PMU OVSSET: Event Counter 12 Overflow Set Position */
+#define PMU_OVSSET_CNT12_STATUS_Msk          (1UL << PMU_OVSSET_CNT12_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 12 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT13_STATUS_Pos           13U                                          /*!< PMU OVSSET: Event Counter 13 Overflow Set Position */
+#define PMU_OVSSET_CNT13_STATUS_Msk          (1UL << PMU_OVSSET_CNT13_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 13 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT14_STATUS_Pos           14U                                          /*!< PMU OVSSET: Event Counter 14 Overflow Set Position */
+#define PMU_OVSSET_CNT14_STATUS_Msk          (1UL << PMU_OVSSET_CNT14_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 14 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT15_STATUS_Pos           15U                                          /*!< PMU OVSSET: Event Counter 15 Overflow Set Position */
+#define PMU_OVSSET_CNT15_STATUS_Msk          (1UL << PMU_OVSSET_CNT15_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 15 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT16_STATUS_Pos           16U                                          /*!< PMU OVSSET: Event Counter 16 Overflow Set Position */
+#define PMU_OVSSET_CNT16_STATUS_Msk          (1UL << PMU_OVSSET_CNT16_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 16 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT17_STATUS_Pos           17U                                          /*!< PMU OVSSET: Event Counter 17 Overflow Set Position */
+#define PMU_OVSSET_CNT17_STATUS_Msk          (1UL << PMU_OVSSET_CNT17_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 17 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT18_STATUS_Pos           18U                                          /*!< PMU OVSSET: Event Counter 18 Overflow Set Position */
+#define PMU_OVSSET_CNT18_STATUS_Msk          (1UL << PMU_OVSSET_CNT18_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 18 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT19_STATUS_Pos           19U                                          /*!< PMU OVSSET: Event Counter 19 Overflow Set Position */
+#define PMU_OVSSET_CNT19_STATUS_Msk          (1UL << PMU_OVSSET_CNT19_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 19 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT20_STATUS_Pos           20U                                          /*!< PMU OVSSET: Event Counter 20 Overflow Set Position */
+#define PMU_OVSSET_CNT20_STATUS_Msk          (1UL << PMU_OVSSET_CNT20_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 20 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT21_STATUS_Pos           21U                                          /*!< PMU OVSSET: Event Counter 21 Overflow Set Position */
+#define PMU_OVSSET_CNT21_STATUS_Msk          (1UL << PMU_OVSSET_CNT21_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 21 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT22_STATUS_Pos           22U                                          /*!< PMU OVSSET: Event Counter 22 Overflow Set Position */
+#define PMU_OVSSET_CNT22_STATUS_Msk          (1UL << PMU_OVSSET_CNT22_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 22 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT23_STATUS_Pos           23U                                          /*!< PMU OVSSET: Event Counter 23 Overflow Set Position */
+#define PMU_OVSSET_CNT23_STATUS_Msk          (1UL << PMU_OVSSET_CNT23_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 23 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT24_STATUS_Pos           24U                                          /*!< PMU OVSSET: Event Counter 24 Overflow Set Position */
+#define PMU_OVSSET_CNT24_STATUS_Msk          (1UL << PMU_OVSSET_CNT24_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 24 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT25_STATUS_Pos           25U                                          /*!< PMU OVSSET: Event Counter 25 Overflow Set Position */
+#define PMU_OVSSET_CNT25_STATUS_Msk          (1UL << PMU_OVSSET_CNT25_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 25 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT26_STATUS_Pos           26U                                          /*!< PMU OVSSET: Event Counter 26 Overflow Set Position */
+#define PMU_OVSSET_CNT26_STATUS_Msk          (1UL << PMU_OVSSET_CNT26_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 26 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT27_STATUS_Pos           27U                                          /*!< PMU OVSSET: Event Counter 27 Overflow Set Position */
+#define PMU_OVSSET_CNT27_STATUS_Msk          (1UL << PMU_OVSSET_CNT27_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 27 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT28_STATUS_Pos           28U                                          /*!< PMU OVSSET: Event Counter 28 Overflow Set Position */
+#define PMU_OVSSET_CNT28_STATUS_Msk          (1UL << PMU_OVSSET_CNT28_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 28 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT29_STATUS_Pos           29U                                          /*!< PMU OVSSET: Event Counter 29 Overflow Set Position */
+#define PMU_OVSSET_CNT29_STATUS_Msk          (1UL << PMU_OVSSET_CNT29_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 29 Overflow Set Mask */
+
+#define PMU_OVSSET_CNT30_STATUS_Pos           30U                                          /*!< PMU OVSSET: Event Counter 30 Overflow Set Position */
+#define PMU_OVSSET_CNT30_STATUS_Msk          (1UL << PMU_OVSSET_CNT30_STATUS_Pos)          /*!< PMU OVSSET: Event Counter 30 Overflow Set Mask */
+
+#define PMU_OVSSET_CYCCNT_STATUS_Pos          31U                                          /*!< PMU OVSSET: Cycle Counter Overflow Set Position */
+#define PMU_OVSSET_CYCCNT_STATUS_Msk         (1UL << PMU_OVSSET_CYCCNT_STATUS_Pos)         /*!< PMU OVSSET: Cycle Counter Overflow Set Mask */
+
+/** \brief PMU Overflow Flag Status Clear Register Definitions */
+
+#define PMU_OVSCLR_CNT0_STATUS_Pos            0U                                           /*!< PMU OVSCLR: Event Counter 0 Overflow Clear Position */
+#define PMU_OVSCLR_CNT0_STATUS_Msk           (1UL /*<< PMU_OVSCLR_CNT0_STATUS_Pos*/)       /*!< PMU OVSCLR: Event Counter 0 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT1_STATUS_Pos            1U                                           /*!< PMU OVSCLR: Event Counter 1 Overflow Clear Position */
+#define PMU_OVSCLR_CNT1_STATUS_Msk           (1UL << PMU_OVSCLR_CNT1_STATUS_Pos)           /*!< PMU OVSCLR: Event Counter 1 Overflow Clear */
+
+#define PMU_OVSCLR_CNT2_STATUS_Pos            2U                                           /*!< PMU OVSCLR: Event Counter 2 Overflow Clear Position */
+#define PMU_OVSCLR_CNT2_STATUS_Msk           (1UL << PMU_OVSCLR_CNT2_STATUS_Pos)           /*!< PMU OVSCLR: Event Counter 2 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT3_STATUS_Pos            3U                                           /*!< PMU OVSCLR: Event Counter 3 Overflow Clear Position */
+#define PMU_OVSCLR_CNT3_STATUS_Msk           (1UL << PMU_OVSCLR_CNT3_STATUS_Pos)           /*!< PMU OVSCLR: Event Counter 3 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT4_STATUS_Pos            4U                                           /*!< PMU OVSCLR: Event Counter 4 Overflow Clear Position */
+#define PMU_OVSCLR_CNT4_STATUS_Msk           (1UL << PMU_OVSCLR_CNT4_STATUS_Pos)           /*!< PMU OVSCLR: Event Counter 4 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT5_STATUS_Pos            5U                                           /*!< PMU OVSCLR: Event Counter 5 Overflow Clear Position */
+#define PMU_OVSCLR_CNT5_STATUS_Msk           (1UL << PMU_OVSCLR_CNT5_STATUS_Pos)           /*!< PMU OVSCLR: Event Counter 5 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT6_STATUS_Pos            6U                                           /*!< PMU OVSCLR: Event Counter 6 Overflow Clear Position */
+#define PMU_OVSCLR_CNT6_STATUS_Msk           (1UL << PMU_OVSCLR_CNT6_STATUS_Pos)           /*!< PMU OVSCLR: Event Counter 6 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT7_STATUS_Pos            7U                                           /*!< PMU OVSCLR: Event Counter 7 Overflow Clear Position */
+#define PMU_OVSCLR_CNT7_STATUS_Msk           (1UL << PMU_OVSCLR_CNT7_STATUS_Pos)           /*!< PMU OVSCLR: Event Counter 7 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT8_STATUS_Pos            8U                                           /*!< PMU OVSCLR: Event Counter 8 Overflow Clear Position */
+#define PMU_OVSCLR_CNT8_STATUS_Msk           (1UL << PMU_OVSCLR_CNT8_STATUS_Pos)           /*!< PMU OVSCLR: Event Counter 8 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT9_STATUS_Pos            9U                                           /*!< PMU OVSCLR: Event Counter 9 Overflow Clear Position */
+#define PMU_OVSCLR_CNT9_STATUS_Msk           (1UL << PMU_OVSCLR_CNT9_STATUS_Pos)           /*!< PMU OVSCLR: Event Counter 9 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT10_STATUS_Pos           10U                                          /*!< PMU OVSCLR: Event Counter 10 Overflow Clear Position */
+#define PMU_OVSCLR_CNT10_STATUS_Msk          (1UL << PMU_OVSCLR_CNT10_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 10 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT11_STATUS_Pos           11U                                          /*!< PMU OVSCLR: Event Counter 11 Overflow Clear Position */
+#define PMU_OVSCLR_CNT11_STATUS_Msk          (1UL << PMU_OVSCLR_CNT11_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 11 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT12_STATUS_Pos           12U                                          /*!< PMU OVSCLR: Event Counter 12 Overflow Clear Position */
+#define PMU_OVSCLR_CNT12_STATUS_Msk          (1UL << PMU_OVSCLR_CNT12_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 12 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT13_STATUS_Pos           13U                                          /*!< PMU OVSCLR: Event Counter 13 Overflow Clear Position */
+#define PMU_OVSCLR_CNT13_STATUS_Msk          (1UL << PMU_OVSCLR_CNT13_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 13 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT14_STATUS_Pos           14U                                          /*!< PMU OVSCLR: Event Counter 14 Overflow Clear Position */
+#define PMU_OVSCLR_CNT14_STATUS_Msk          (1UL << PMU_OVSCLR_CNT14_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 14 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT15_STATUS_Pos           15U                                          /*!< PMU OVSCLR: Event Counter 15 Overflow Clear Position */
+#define PMU_OVSCLR_CNT15_STATUS_Msk          (1UL << PMU_OVSCLR_CNT15_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 15 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT16_STATUS_Pos           16U                                          /*!< PMU OVSCLR: Event Counter 16 Overflow Clear Position */
+#define PMU_OVSCLR_CNT16_STATUS_Msk          (1UL << PMU_OVSCLR_CNT16_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 16 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT17_STATUS_Pos           17U                                          /*!< PMU OVSCLR: Event Counter 17 Overflow Clear Position */
+#define PMU_OVSCLR_CNT17_STATUS_Msk          (1UL << PMU_OVSCLR_CNT17_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 17 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT18_STATUS_Pos           18U                                          /*!< PMU OVSCLR: Event Counter 18 Overflow Clear Position */
+#define PMU_OVSCLR_CNT18_STATUS_Msk          (1UL << PMU_OVSCLR_CNT18_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 18 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT19_STATUS_Pos           19U                                          /*!< PMU OVSCLR: Event Counter 19 Overflow Clear Position */
+#define PMU_OVSCLR_CNT19_STATUS_Msk          (1UL << PMU_OVSCLR_CNT19_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 19 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT20_STATUS_Pos           20U                                          /*!< PMU OVSCLR: Event Counter 20 Overflow Clear Position */
+#define PMU_OVSCLR_CNT20_STATUS_Msk          (1UL << PMU_OVSCLR_CNT20_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 20 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT21_STATUS_Pos           21U                                          /*!< PMU OVSCLR: Event Counter 21 Overflow Clear Position */
+#define PMU_OVSCLR_CNT21_STATUS_Msk          (1UL << PMU_OVSCLR_CNT21_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 21 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT22_STATUS_Pos           22U                                          /*!< PMU OVSCLR: Event Counter 22 Overflow Clear Position */
+#define PMU_OVSCLR_CNT22_STATUS_Msk          (1UL << PMU_OVSCLR_CNT22_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 22 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT23_STATUS_Pos           23U                                          /*!< PMU OVSCLR: Event Counter 23 Overflow Clear Position */
+#define PMU_OVSCLR_CNT23_STATUS_Msk          (1UL << PMU_OVSCLR_CNT23_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 23 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT24_STATUS_Pos           24U                                          /*!< PMU OVSCLR: Event Counter 24 Overflow Clear Position */
+#define PMU_OVSCLR_CNT24_STATUS_Msk          (1UL << PMU_OVSCLR_CNT24_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 24 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT25_STATUS_Pos           25U                                          /*!< PMU OVSCLR: Event Counter 25 Overflow Clear Position */
+#define PMU_OVSCLR_CNT25_STATUS_Msk          (1UL << PMU_OVSCLR_CNT25_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 25 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT26_STATUS_Pos           26U                                          /*!< PMU OVSCLR: Event Counter 26 Overflow Clear Position */
+#define PMU_OVSCLR_CNT26_STATUS_Msk          (1UL << PMU_OVSCLR_CNT26_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 26 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT27_STATUS_Pos           27U                                          /*!< PMU OVSCLR: Event Counter 27 Overflow Clear Position */
+#define PMU_OVSCLR_CNT27_STATUS_Msk          (1UL << PMU_OVSCLR_CNT27_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 27 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT28_STATUS_Pos           28U                                          /*!< PMU OVSCLR: Event Counter 28 Overflow Clear Position */
+#define PMU_OVSCLR_CNT28_STATUS_Msk          (1UL << PMU_OVSCLR_CNT28_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 28 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT29_STATUS_Pos           29U                                          /*!< PMU OVSCLR: Event Counter 29 Overflow Clear Position */
+#define PMU_OVSCLR_CNT29_STATUS_Msk          (1UL << PMU_OVSCLR_CNT29_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 29 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CNT30_STATUS_Pos           30U                                          /*!< PMU OVSCLR: Event Counter 30 Overflow Clear Position */
+#define PMU_OVSCLR_CNT30_STATUS_Msk          (1UL << PMU_OVSCLR_CNT30_STATUS_Pos)          /*!< PMU OVSCLR: Event Counter 30 Overflow Clear Mask */
+
+#define PMU_OVSCLR_CYCCNT_STATUS_Pos          31U                                          /*!< PMU OVSCLR: Cycle Counter Overflow Clear Position */
+#define PMU_OVSCLR_CYCCNT_STATUS_Msk         (1UL << PMU_OVSCLR_CYCCNT_STATUS_Pos)         /*!< PMU OVSCLR: Cycle Counter Overflow Clear Mask */
+
+/** \brief PMU Software Increment Counter */
+
+#define PMU_SWINC_CNT0_Pos                    0U                                           /*!< PMU SWINC: Event Counter 0 Software Increment Position */
+#define PMU_SWINC_CNT0_Msk                   (1UL /*<< PMU_SWINC_CNT0_Pos */)              /*!< PMU SWINC: Event Counter 0 Software Increment Mask */
+
+#define PMU_SWINC_CNT1_Pos                    1U                                           /*!< PMU SWINC: Event Counter 1 Software Increment Position */
+#define PMU_SWINC_CNT1_Msk                   (1UL << PMU_SWINC_CNT1_Pos)                   /*!< PMU SWINC: Event Counter 1 Software Increment Mask */
+
+#define PMU_SWINC_CNT2_Pos                    2U                                           /*!< PMU SWINC: Event Counter 2 Software Increment Position */
+#define PMU_SWINC_CNT2_Msk                   (1UL << PMU_SWINC_CNT2_Pos)                   /*!< PMU SWINC: Event Counter 2 Software Increment Mask */
+
+#define PMU_SWINC_CNT3_Pos                    3U                                           /*!< PMU SWINC: Event Counter 3 Software Increment Position */
+#define PMU_SWINC_CNT3_Msk                   (1UL << PMU_SWINC_CNT3_Pos)                   /*!< PMU SWINC: Event Counter 3 Software Increment Mask */
+
+#define PMU_SWINC_CNT4_Pos                    4U                                           /*!< PMU SWINC: Event Counter 4 Software Increment Position */
+#define PMU_SWINC_CNT4_Msk                   (1UL << PMU_SWINC_CNT4_Pos)                   /*!< PMU SWINC: Event Counter 4 Software Increment Mask */
+
+#define PMU_SWINC_CNT5_Pos                    5U                                           /*!< PMU SWINC: Event Counter 5 Software Increment Position */
+#define PMU_SWINC_CNT5_Msk                   (1UL << PMU_SWINC_CNT5_Pos)                   /*!< PMU SWINC: Event Counter 5 Software Increment Mask */
+
+#define PMU_SWINC_CNT6_Pos                    6U                                           /*!< PMU SWINC: Event Counter 6 Software Increment Position */
+#define PMU_SWINC_CNT6_Msk                   (1UL << PMU_SWINC_CNT6_Pos)                   /*!< PMU SWINC: Event Counter 6 Software Increment Mask */
+
+#define PMU_SWINC_CNT7_Pos                    7U                                           /*!< PMU SWINC: Event Counter 7 Software Increment Position */
+#define PMU_SWINC_CNT7_Msk                   (1UL << PMU_SWINC_CNT7_Pos)                   /*!< PMU SWINC: Event Counter 7 Software Increment Mask */
+
+#define PMU_SWINC_CNT8_Pos                    8U                                           /*!< PMU SWINC: Event Counter 8 Software Increment Position */
+#define PMU_SWINC_CNT8_Msk                   (1UL << PMU_SWINC_CNT8_Pos)                   /*!< PMU SWINC: Event Counter 8 Software Increment Mask */
+
+#define PMU_SWINC_CNT9_Pos                    9U                                           /*!< PMU SWINC: Event Counter 9 Software Increment Position */
+#define PMU_SWINC_CNT9_Msk                   (1UL << PMU_SWINC_CNT9_Pos)                   /*!< PMU SWINC: Event Counter 9 Software Increment Mask */
+
+#define PMU_SWINC_CNT10_Pos                   10U                                          /*!< PMU SWINC: Event Counter 10 Software Increment Position */
+#define PMU_SWINC_CNT10_Msk                  (1UL << PMU_SWINC_CNT10_Pos)                  /*!< PMU SWINC: Event Counter 10 Software Increment Mask */
+
+#define PMU_SWINC_CNT11_Pos                   11U                                          /*!< PMU SWINC: Event Counter 11 Software Increment Position */
+#define PMU_SWINC_CNT11_Msk                  (1UL << PMU_SWINC_CNT11_Pos)                  /*!< PMU SWINC: Event Counter 11 Software Increment Mask */
+
+#define PMU_SWINC_CNT12_Pos                   12U                                          /*!< PMU SWINC: Event Counter 12 Software Increment Position */
+#define PMU_SWINC_CNT12_Msk                  (1UL << PMU_SWINC_CNT12_Pos)                  /*!< PMU SWINC: Event Counter 12 Software Increment Mask */
+
+#define PMU_SWINC_CNT13_Pos                   13U                                          /*!< PMU SWINC: Event Counter 13 Software Increment Position */
+#define PMU_SWINC_CNT13_Msk                  (1UL << PMU_SWINC_CNT13_Pos)                  /*!< PMU SWINC: Event Counter 13 Software Increment Mask */
+
+#define PMU_SWINC_CNT14_Pos                   14U                                          /*!< PMU SWINC: Event Counter 14 Software Increment Position */
+#define PMU_SWINC_CNT14_Msk                  (1UL << PMU_SWINC_CNT14_Pos)                  /*!< PMU SWINC: Event Counter 14 Software Increment Mask */
+
+#define PMU_SWINC_CNT15_Pos                   15U                                          /*!< PMU SWINC: Event Counter 15 Software Increment Position */
+#define PMU_SWINC_CNT15_Msk                  (1UL << PMU_SWINC_CNT15_Pos)                  /*!< PMU SWINC: Event Counter 15 Software Increment Mask */
+
+#define PMU_SWINC_CNT16_Pos                   16U                                          /*!< PMU SWINC: Event Counter 16 Software Increment Position */
+#define PMU_SWINC_CNT16_Msk                  (1UL << PMU_SWINC_CNT16_Pos)                  /*!< PMU SWINC: Event Counter 16 Software Increment Mask */
+
+#define PMU_SWINC_CNT17_Pos                   17U                                          /*!< PMU SWINC: Event Counter 17 Software Increment Position */
+#define PMU_SWINC_CNT17_Msk                  (1UL << PMU_SWINC_CNT17_Pos)                  /*!< PMU SWINC: Event Counter 17 Software Increment Mask */
+
+#define PMU_SWINC_CNT18_Pos                   18U                                          /*!< PMU SWINC: Event Counter 18 Software Increment Position */
+#define PMU_SWINC_CNT18_Msk                  (1UL << PMU_SWINC_CNT18_Pos)                  /*!< PMU SWINC: Event Counter 18 Software Increment Mask */
+
+#define PMU_SWINC_CNT19_Pos                   19U                                          /*!< PMU SWINC: Event Counter 19 Software Increment Position */
+#define PMU_SWINC_CNT19_Msk                  (1UL << PMU_SWINC_CNT19_Pos)                  /*!< PMU SWINC: Event Counter 19 Software Increment Mask */
+
+#define PMU_SWINC_CNT20_Pos                   20U                                          /*!< PMU SWINC: Event Counter 20 Software Increment Position */
+#define PMU_SWINC_CNT20_Msk                  (1UL << PMU_SWINC_CNT20_Pos)                  /*!< PMU SWINC: Event Counter 20 Software Increment Mask */
+
+#define PMU_SWINC_CNT21_Pos                   21U                                          /*!< PMU SWINC: Event Counter 21 Software Increment Position */
+#define PMU_SWINC_CNT21_Msk                  (1UL << PMU_SWINC_CNT21_Pos)                  /*!< PMU SWINC: Event Counter 21 Software Increment Mask */
+
+#define PMU_SWINC_CNT22_Pos                   22U                                          /*!< PMU SWINC: Event Counter 22 Software Increment Position */
+#define PMU_SWINC_CNT22_Msk                  (1UL << PMU_SWINC_CNT22_Pos)                  /*!< PMU SWINC: Event Counter 22 Software Increment Mask */
+
+#define PMU_SWINC_CNT23_Pos                   23U                                          /*!< PMU SWINC: Event Counter 23 Software Increment Position */
+#define PMU_SWINC_CNT23_Msk                  (1UL << PMU_SWINC_CNT23_Pos)                  /*!< PMU SWINC: Event Counter 23 Software Increment Mask */
+
+#define PMU_SWINC_CNT24_Pos                   24U                                          /*!< PMU SWINC: Event Counter 24 Software Increment Position */
+#define PMU_SWINC_CNT24_Msk                  (1UL << PMU_SWINC_CNT24_Pos)                  /*!< PMU SWINC: Event Counter 24 Software Increment Mask */
+
+#define PMU_SWINC_CNT25_Pos                   25U                                          /*!< PMU SWINC: Event Counter 25 Software Increment Position */
+#define PMU_SWINC_CNT25_Msk                  (1UL << PMU_SWINC_CNT25_Pos)                  /*!< PMU SWINC: Event Counter 25 Software Increment Mask */
+
+#define PMU_SWINC_CNT26_Pos                   26U                                          /*!< PMU SWINC: Event Counter 26 Software Increment Position */
+#define PMU_SWINC_CNT26_Msk                  (1UL << PMU_SWINC_CNT26_Pos)                  /*!< PMU SWINC: Event Counter 26 Software Increment Mask */
+
+#define PMU_SWINC_CNT27_Pos                   27U                                          /*!< PMU SWINC: Event Counter 27 Software Increment Position */
+#define PMU_SWINC_CNT27_Msk                  (1UL << PMU_SWINC_CNT27_Pos)                  /*!< PMU SWINC: Event Counter 27 Software Increment Mask */
+
+#define PMU_SWINC_CNT28_Pos                   28U                                          /*!< PMU SWINC: Event Counter 28 Software Increment Position */
+#define PMU_SWINC_CNT28_Msk                  (1UL << PMU_SWINC_CNT28_Pos)                  /*!< PMU SWINC: Event Counter 28 Software Increment Mask */
+
+#define PMU_SWINC_CNT29_Pos                   29U                                          /*!< PMU SWINC: Event Counter 29 Software Increment Position */
+#define PMU_SWINC_CNT29_Msk                  (1UL << PMU_SWINC_CNT29_Pos)                  /*!< PMU SWINC: Event Counter 29 Software Increment Mask */
+
+#define PMU_SWINC_CNT30_Pos                   30U                                          /*!< PMU SWINC: Event Counter 30 Software Increment Position */
+#define PMU_SWINC_CNT30_Msk                  (1UL << PMU_SWINC_CNT30_Pos)                  /*!< PMU SWINC: Event Counter 30 Software Increment Mask */
+
+/** \brief PMU Control Register Definitions */
+
+#define PMU_CTRL_ENABLE_Pos                   0U                                           /*!< PMU CTRL: ENABLE Position */
+#define PMU_CTRL_ENABLE_Msk                  (1UL /*<< PMU_CTRL_ENABLE_Pos*/)              /*!< PMU CTRL: ENABLE Mask */
+
+#define PMU_CTRL_EVENTCNT_RESET_Pos           1U                                           /*!< PMU CTRL: Event Counter Reset Position */
+#define PMU_CTRL_EVENTCNT_RESET_Msk          (1UL << PMU_CTRL_EVENTCNT_RESET_Pos)          /*!< PMU CTRL: Event Counter Reset Mask */
+
+#define PMU_CTRL_CYCCNT_RESET_Pos             2U                                           /*!< PMU CTRL: Cycle Counter Reset Position */
+#define PMU_CTRL_CYCCNT_RESET_Msk            (1UL << PMU_CTRL_CYCCNT_RESET_Pos)            /*!< PMU CTRL: Cycle Counter Reset Mask */
+
+#define PMU_CTRL_CYCCNT_DISABLE_Pos           5U                                           /*!< PMU CTRL: Disable Cycle Counter Position */
+#define PMU_CTRL_CYCCNT_DISABLE_Msk          (1UL << PMU_CTRL_CYCCNT_DISABLE_Pos)          /*!< PMU CTRL: Disable Cycle Counter Mask */
+
+#define PMU_CTRL_FRZ_ON_OV_Pos                9U                                           /*!< PMU CTRL: Freeze-on-overflow Position */
+#define PMU_CTRL_FRZ_ON_OV_Msk               (1UL << PMU_CTRL_FRZ_ON_OVERFLOW_Pos)         /*!< PMU CTRL: Freeze-on-overflow Mask */
+
+#define PMU_CTRL_TRACE_ON_OV_Pos              11U                                          /*!< PMU CTRL: Trace-on-overflow Position */
+#define PMU_CTRL_TRACE_ON_OV_Msk             (1UL << PMU_CTRL_TRACE_ON_OVERFLOW_Pos)       /*!< PMU CTRL: Trace-on-overflow Mask */
+
+/** \brief PMU Type Register Definitions */
+
+#define PMU_TYPE_NUM_CNTS_Pos                 0U                                           /*!< PMU TYPE: Number of Counters Position */
+#define PMU_TYPE_NUM_CNTS_Msk                (0xFFUL /*<< PMU_TYPE_NUM_CNTS_Pos*/)         /*!< PMU TYPE: Number of Counters Mask */
+
+#define PMU_TYPE_SIZE_CNTS_Pos                8U                                           /*!< PMU TYPE: Size of Counters Position */
+#define PMU_TYPE_SIZE_CNTS_Msk               (0x3FUL << PMU_TYPE_SIZE_CNTS_Pos)            /*!< PMU TYPE: Size of Counters Mask */
+
+#define PMU_TYPE_CYCCNT_PRESENT_Pos           14U                                          /*!< PMU TYPE: Cycle Counter Present Position */
+#define PMU_TYPE_CYCCNT_PRESENT_Msk          (1UL << PMU_TYPE_CYCCNT_PRESENT_Pos)          /*!< PMU TYPE: Cycle Counter Present Mask */
+
+#define PMU_TYPE_FRZ_OV_SUPPORT_Pos           21U                                          /*!< PMU TYPE: Freeze-on-overflow Support Position */
+#define PMU_TYPE_FRZ_OV_SUPPORT_Msk          (1UL << PMU_TYPE_FRZ_OV_SUPPORT_Pos)          /*!< PMU TYPE: Freeze-on-overflow Support Mask */
+
+#define PMU_TYPE_TRACE_ON_OV_SUPPORT_Pos      23U                                          /*!< PMU TYPE: Trace-on-overflow Support Position */
+#define PMU_TYPE_TRACE_ON_OV_SUPPORT_Msk     (1UL << PMU_TYPE_FRZ_OV_SUPPORT_Pos)          /*!< PMU TYPE: Trace-on-overflow Support Mask */
+
+/** \brief PMU Authentication Status Register Definitions */
+
+#define PMU_AUTHSTATUS_NSID_Pos               0U                                           /*!< PMU AUTHSTATUS: Non-secure Invasive Debug Position */
+#define PMU_AUTHSTATUS_NSID_Msk              (0x3UL /*<< PMU_AUTHSTATUS_NSID_Pos*/)        /*!< PMU AUTHSTATUS: Non-secure Invasive Debug Mask */
+
+#define PMU_AUTHSTATUS_NSNID_Pos              2U                                           /*!< PMU AUTHSTATUS: Non-secure Non-invasive Debug Position */
+#define PMU_AUTHSTATUS_NSNID_Msk             (0x3UL << PMU_AUTHSTATUS_NSNID_Pos)           /*!< PMU AUTHSTATUS: Non-secure Non-invasive Debug Mask */
+
+#define PMU_AUTHSTATUS_SID_Pos                4U                                           /*!< PMU AUTHSTATUS: Secure Invasive Debug Position */
+#define PMU_AUTHSTATUS_SID_Msk               (0x3UL << PMU_AUTHSTATUS_SID_Pos)             /*!< PMU AUTHSTATUS: Secure Invasive Debug Mask */
+
+#define PMU_AUTHSTATUS_SNID_Pos               6U                                           /*!< PMU AUTHSTATUS: Secure Non-invasive Debug Position */
+#define PMU_AUTHSTATUS_SNID_Msk              (0x3UL << PMU_AUTHSTATUS_SNID_Pos)            /*!< PMU AUTHSTATUS: Secure Non-invasive Debug Mask */
+
+#define PMU_AUTHSTATUS_NSUID_Pos              16U                                          /*!< PMU AUTHSTATUS: Non-secure Unprivileged Invasive Debug Position */
+#define PMU_AUTHSTATUS_NSUID_Msk             (0x3UL << PMU_AUTHSTATUS_NSUID_Pos)           /*!< PMU AUTHSTATUS: Non-secure Unprivileged Invasive Debug Mask */
+
+#define PMU_AUTHSTATUS_NSUNID_Pos             18U                                          /*!< PMU AUTHSTATUS: Non-secure Unprivileged Non-invasive Debug Position */
+#define PMU_AUTHSTATUS_NSUNID_Msk            (0x3UL << PMU_AUTHSTATUS_NSUNID_Pos)          /*!< PMU AUTHSTATUS: Non-secure Unprivileged Non-invasive Debug Mask */
+
+#define PMU_AUTHSTATUS_SUID_Pos               20U                                          /*!< PMU AUTHSTATUS: Secure Unprivileged Invasive Debug Position */
+#define PMU_AUTHSTATUS_SUID_Msk              (0x3UL << PMU_AUTHSTATUS_SUID_Pos)            /*!< PMU AUTHSTATUS: Secure Unprivileged Invasive Debug Mask */
+
+#define PMU_AUTHSTATUS_SUNID_Pos              22U                                          /*!< PMU AUTHSTATUS: Secure Unprivileged Non-invasive Debug Position */
+#define PMU_AUTHSTATUS_SUNID_Msk             (0x3UL << PMU_AUTHSTATUS_SUNID_Pos)           /*!< PMU AUTHSTATUS: Secure Unprivileged Non-invasive Debug Mask */
+
+
+/*@} end of group CMSIS_PMU */
+#endif
+
+#if defined (__MPU_PRESENT) && (__MPU_PRESENT == 1U)
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_MPU     Memory Protection Unit (MPU)
+  \brief    Type definitions for the Memory Protection Unit (MPU)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Memory Protection Unit (MPU).
+ */
+typedef struct
+{
+  __IM  uint32_t TYPE;                   /*!< Offset: 0x000 (R/ )  MPU Type Register */
+  __IOM uint32_t CTRL;                   /*!< Offset: 0x004 (R/W)  MPU Control Register */
+  __IOM uint32_t RNR;                    /*!< Offset: 0x008 (R/W)  MPU Region Number Register */
+  __IOM uint32_t RBAR;                   /*!< Offset: 0x00C (R/W)  MPU Region Base Address Register */
+  __IOM uint32_t RLAR;                   /*!< Offset: 0x010 (R/W)  MPU Region Limit Address Register */
+  __IOM uint32_t RBAR_A1;                /*!< Offset: 0x014 (R/W)  MPU Region Base Address Register Alias 1 */
+  __IOM uint32_t RLAR_A1;                /*!< Offset: 0x018 (R/W)  MPU Region Limit Address Register Alias 1 */
+  __IOM uint32_t RBAR_A2;                /*!< Offset: 0x01C (R/W)  MPU Region Base Address Register Alias 2 */
+  __IOM uint32_t RLAR_A2;                /*!< Offset: 0x020 (R/W)  MPU Region Limit Address Register Alias 2 */
+  __IOM uint32_t RBAR_A3;                /*!< Offset: 0x024 (R/W)  MPU Region Base Address Register Alias 3 */
+  __IOM uint32_t RLAR_A3;                /*!< Offset: 0x028 (R/W)  MPU Region Limit Address Register Alias 3 */
+        uint32_t RESERVED0[1];
+  union {
+  __IOM uint32_t MAIR[2];
+  struct {
+  __IOM uint32_t MAIR0;                  /*!< Offset: 0x030 (R/W)  MPU Memory Attribute Indirection Register 0 */
+  __IOM uint32_t MAIR1;                  /*!< Offset: 0x034 (R/W)  MPU Memory Attribute Indirection Register 1 */
+  };
+  };
+} MPU_Type;
+
+#define MPU_TYPE_RALIASES                  4U
+
+/* MPU Type Register Definitions */
+#define MPU_TYPE_IREGION_Pos               16U                                            /*!< MPU TYPE: IREGION Position */
+#define MPU_TYPE_IREGION_Msk               (0xFFUL << MPU_TYPE_IREGION_Pos)               /*!< MPU TYPE: IREGION Mask */
+
+#define MPU_TYPE_DREGION_Pos                8U                                            /*!< MPU TYPE: DREGION Position */
+#define MPU_TYPE_DREGION_Msk               (0xFFUL << MPU_TYPE_DREGION_Pos)               /*!< MPU TYPE: DREGION Mask */
+
+#define MPU_TYPE_SEPARATE_Pos               0U                                            /*!< MPU TYPE: SEPARATE Position */
+#define MPU_TYPE_SEPARATE_Msk              (1UL /*<< MPU_TYPE_SEPARATE_Pos*/)             /*!< MPU TYPE: SEPARATE Mask */
+
+/* MPU Control Register Definitions */
+#define MPU_CTRL_PRIVDEFENA_Pos             2U                                            /*!< MPU CTRL: PRIVDEFENA Position */
+#define MPU_CTRL_PRIVDEFENA_Msk            (1UL << MPU_CTRL_PRIVDEFENA_Pos)               /*!< MPU CTRL: PRIVDEFENA Mask */
+
+#define MPU_CTRL_HFNMIENA_Pos               1U                                            /*!< MPU CTRL: HFNMIENA Position */
+#define MPU_CTRL_HFNMIENA_Msk              (1UL << MPU_CTRL_HFNMIENA_Pos)                 /*!< MPU CTRL: HFNMIENA Mask */
+
+#define MPU_CTRL_ENABLE_Pos                 0U                                            /*!< MPU CTRL: ENABLE Position */
+#define MPU_CTRL_ENABLE_Msk                (1UL /*<< MPU_CTRL_ENABLE_Pos*/)               /*!< MPU CTRL: ENABLE Mask */
+
+/* MPU Region Number Register Definitions */
+#define MPU_RNR_REGION_Pos                  0U                                            /*!< MPU RNR: REGION Position */
+#define MPU_RNR_REGION_Msk                 (0xFFUL /*<< MPU_RNR_REGION_Pos*/)             /*!< MPU RNR: REGION Mask */
+
+/* MPU Region Base Address Register Definitions */
+#define MPU_RBAR_BASE_Pos                   5U                                            /*!< MPU RBAR: BASE Position */
+#define MPU_RBAR_BASE_Msk                  (0x7FFFFFFUL << MPU_RBAR_BASE_Pos)             /*!< MPU RBAR: BASE Mask */
+
+#define MPU_RBAR_SH_Pos                     3U                                            /*!< MPU RBAR: SH Position */
+#define MPU_RBAR_SH_Msk                    (0x3UL << MPU_RBAR_SH_Pos)                     /*!< MPU RBAR: SH Mask */
+
+#define MPU_RBAR_AP_Pos                     1U                                            /*!< MPU RBAR: AP Position */
+#define MPU_RBAR_AP_Msk                    (0x3UL << MPU_RBAR_AP_Pos)                     /*!< MPU RBAR: AP Mask */
+
+#define MPU_RBAR_XN_Pos                     0U                                            /*!< MPU RBAR: XN Position */
+#define MPU_RBAR_XN_Msk                    (01UL /*<< MPU_RBAR_XN_Pos*/)                  /*!< MPU RBAR: XN Mask */
+
+/* MPU Region Limit Address Register Definitions */
+#define MPU_RLAR_LIMIT_Pos                  5U                                            /*!< MPU RLAR: LIMIT Position */
+#define MPU_RLAR_LIMIT_Msk                 (0x7FFFFFFUL << MPU_RLAR_LIMIT_Pos)            /*!< MPU RLAR: LIMIT Mask */
+
+#define MPU_RLAR_PXN_Pos                    4U                                            /*!< MPU RLAR: PXN Position */
+#define MPU_RLAR_PXN_Msk                   (1UL << MPU_RLAR_PXN_Pos)                      /*!< MPU RLAR: PXN Mask */
+
+#define MPU_RLAR_AttrIndx_Pos               1U                                            /*!< MPU RLAR: AttrIndx Position */
+#define MPU_RLAR_AttrIndx_Msk              (7UL << MPU_RLAR_AttrIndx_Pos)                 /*!< MPU RLAR: AttrIndx Mask */
+
+#define MPU_RLAR_EN_Pos                     0U                                            /*!< MPU RLAR: Region enable bit Position */
+#define MPU_RLAR_EN_Msk                    (1UL /*<< MPU_RLAR_EN_Pos*/)                   /*!< MPU RLAR: Region enable bit Disable Mask */
+
+/* MPU Memory Attribute Indirection Register 0 Definitions */
+#define MPU_MAIR0_Attr3_Pos                24U                                            /*!< MPU MAIR0: Attr3 Position */
+#define MPU_MAIR0_Attr3_Msk                (0xFFUL << MPU_MAIR0_Attr3_Pos)                /*!< MPU MAIR0: Attr3 Mask */
+
+#define MPU_MAIR0_Attr2_Pos                16U                                            /*!< MPU MAIR0: Attr2 Position */
+#define MPU_MAIR0_Attr2_Msk                (0xFFUL << MPU_MAIR0_Attr2_Pos)                /*!< MPU MAIR0: Attr2 Mask */
+
+#define MPU_MAIR0_Attr1_Pos                 8U                                            /*!< MPU MAIR0: Attr1 Position */
+#define MPU_MAIR0_Attr1_Msk                (0xFFUL << MPU_MAIR0_Attr1_Pos)                /*!< MPU MAIR0: Attr1 Mask */
+
+#define MPU_MAIR0_Attr0_Pos                 0U                                            /*!< MPU MAIR0: Attr0 Position */
+#define MPU_MAIR0_Attr0_Msk                (0xFFUL /*<< MPU_MAIR0_Attr0_Pos*/)            /*!< MPU MAIR0: Attr0 Mask */
+
+/* MPU Memory Attribute Indirection Register 1 Definitions */
+#define MPU_MAIR1_Attr7_Pos                24U                                            /*!< MPU MAIR1: Attr7 Position */
+#define MPU_MAIR1_Attr7_Msk                (0xFFUL << MPU_MAIR1_Attr7_Pos)                /*!< MPU MAIR1: Attr7 Mask */
+
+#define MPU_MAIR1_Attr6_Pos                16U                                            /*!< MPU MAIR1: Attr6 Position */
+#define MPU_MAIR1_Attr6_Msk                (0xFFUL << MPU_MAIR1_Attr6_Pos)                /*!< MPU MAIR1: Attr6 Mask */
+
+#define MPU_MAIR1_Attr5_Pos                 8U                                            /*!< MPU MAIR1: Attr5 Position */
+#define MPU_MAIR1_Attr5_Msk                (0xFFUL << MPU_MAIR1_Attr5_Pos)                /*!< MPU MAIR1: Attr5 Mask */
+
+#define MPU_MAIR1_Attr4_Pos                 0U                                            /*!< MPU MAIR1: Attr4 Position */
+#define MPU_MAIR1_Attr4_Msk                (0xFFUL /*<< MPU_MAIR1_Attr4_Pos*/)            /*!< MPU MAIR1: Attr4 Mask */
+
+/*@} end of group CMSIS_MPU */
+#endif
+
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_SAU     Security Attribution Unit (SAU)
+  \brief    Type definitions for the Security Attribution Unit (SAU)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Security Attribution Unit (SAU).
+ */
+typedef struct
+{
+  __IOM uint32_t CTRL;                   /*!< Offset: 0x000 (R/W)  SAU Control Register */
+  __IM  uint32_t TYPE;                   /*!< Offset: 0x004 (R/ )  SAU Type Register */
+#if defined (__SAUREGION_PRESENT) && (__SAUREGION_PRESENT == 1U)
+  __IOM uint32_t RNR;                    /*!< Offset: 0x008 (R/W)  SAU Region Number Register */
+  __IOM uint32_t RBAR;                   /*!< Offset: 0x00C (R/W)  SAU Region Base Address Register */
+  __IOM uint32_t RLAR;                   /*!< Offset: 0x010 (R/W)  SAU Region Limit Address Register */
+#else
+        uint32_t RESERVED0[3];
+#endif
+  __IOM uint32_t SFSR;                   /*!< Offset: 0x014 (R/W)  Secure Fault Status Register */
+  __IOM uint32_t SFAR;                   /*!< Offset: 0x018 (R/W)  Secure Fault Address Register */
+} SAU_Type;
+
+/* SAU Control Register Definitions */
+#define SAU_CTRL_ALLNS_Pos                  1U                                            /*!< SAU CTRL: ALLNS Position */
+#define SAU_CTRL_ALLNS_Msk                 (1UL << SAU_CTRL_ALLNS_Pos)                    /*!< SAU CTRL: ALLNS Mask */
+
+#define SAU_CTRL_ENABLE_Pos                 0U                                            /*!< SAU CTRL: ENABLE Position */
+#define SAU_CTRL_ENABLE_Msk                (1UL /*<< SAU_CTRL_ENABLE_Pos*/)               /*!< SAU CTRL: ENABLE Mask */
+
+/* SAU Type Register Definitions */
+#define SAU_TYPE_SREGION_Pos                0U                                            /*!< SAU TYPE: SREGION Position */
+#define SAU_TYPE_SREGION_Msk               (0xFFUL /*<< SAU_TYPE_SREGION_Pos*/)           /*!< SAU TYPE: SREGION Mask */
+
+#if defined (__SAUREGION_PRESENT) && (__SAUREGION_PRESENT == 1U)
+/* SAU Region Number Register Definitions */
+#define SAU_RNR_REGION_Pos                  0U                                            /*!< SAU RNR: REGION Position */
+#define SAU_RNR_REGION_Msk                 (0xFFUL /*<< SAU_RNR_REGION_Pos*/)             /*!< SAU RNR: REGION Mask */
+
+/* SAU Region Base Address Register Definitions */
+#define SAU_RBAR_BADDR_Pos                  5U                                            /*!< SAU RBAR: BADDR Position */
+#define SAU_RBAR_BADDR_Msk                 (0x7FFFFFFUL << SAU_RBAR_BADDR_Pos)            /*!< SAU RBAR: BADDR Mask */
+
+/* SAU Region Limit Address Register Definitions */
+#define SAU_RLAR_LADDR_Pos                  5U                                            /*!< SAU RLAR: LADDR Position */
+#define SAU_RLAR_LADDR_Msk                 (0x7FFFFFFUL << SAU_RLAR_LADDR_Pos)            /*!< SAU RLAR: LADDR Mask */
+
+#define SAU_RLAR_NSC_Pos                    1U                                            /*!< SAU RLAR: NSC Position */
+#define SAU_RLAR_NSC_Msk                   (1UL << SAU_RLAR_NSC_Pos)                      /*!< SAU RLAR: NSC Mask */
+
+#define SAU_RLAR_ENABLE_Pos                 0U                                            /*!< SAU RLAR: ENABLE Position */
+#define SAU_RLAR_ENABLE_Msk                (1UL /*<< SAU_RLAR_ENABLE_Pos*/)               /*!< SAU RLAR: ENABLE Mask */
+
+#endif /* defined (__SAUREGION_PRESENT) && (__SAUREGION_PRESENT == 1U) */
+
+/* Secure Fault Status Register Definitions */
+#define SAU_SFSR_LSERR_Pos                  7U                                            /*!< SAU SFSR: LSERR Position */
+#define SAU_SFSR_LSERR_Msk                 (1UL << SAU_SFSR_LSERR_Pos)                    /*!< SAU SFSR: LSERR Mask */
+
+#define SAU_SFSR_SFARVALID_Pos              6U                                            /*!< SAU SFSR: SFARVALID Position */
+#define SAU_SFSR_SFARVALID_Msk             (1UL << SAU_SFSR_SFARVALID_Pos)                /*!< SAU SFSR: SFARVALID Mask */
+
+#define SAU_SFSR_LSPERR_Pos                 5U                                            /*!< SAU SFSR: LSPERR Position */
+#define SAU_SFSR_LSPERR_Msk                (1UL << SAU_SFSR_LSPERR_Pos)                   /*!< SAU SFSR: LSPERR Mask */
+
+#define SAU_SFSR_INVTRAN_Pos                4U                                            /*!< SAU SFSR: INVTRAN Position */
+#define SAU_SFSR_INVTRAN_Msk               (1UL << SAU_SFSR_INVTRAN_Pos)                  /*!< SAU SFSR: INVTRAN Mask */
+
+#define SAU_SFSR_AUVIOL_Pos                 3U                                            /*!< SAU SFSR: AUVIOL Position */
+#define SAU_SFSR_AUVIOL_Msk                (1UL << SAU_SFSR_AUVIOL_Pos)                   /*!< SAU SFSR: AUVIOL Mask */
+
+#define SAU_SFSR_INVER_Pos                  2U                                            /*!< SAU SFSR: INVER Position */
+#define SAU_SFSR_INVER_Msk                 (1UL << SAU_SFSR_INVER_Pos)                    /*!< SAU SFSR: INVER Mask */
+
+#define SAU_SFSR_INVIS_Pos                  1U                                            /*!< SAU SFSR: INVIS Position */
+#define SAU_SFSR_INVIS_Msk                 (1UL << SAU_SFSR_INVIS_Pos)                    /*!< SAU SFSR: INVIS Mask */
+
+#define SAU_SFSR_INVEP_Pos                  0U                                            /*!< SAU SFSR: INVEP Position */
+#define SAU_SFSR_INVEP_Msk                 (1UL /*<< SAU_SFSR_INVEP_Pos*/)                /*!< SAU SFSR: INVEP Mask */
+
+/*@} end of group CMSIS_SAU */
+#endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_FPU     Floating Point Unit (FPU)
+  \brief    Type definitions for the Floating Point Unit (FPU)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Floating Point Unit (FPU).
+ */
+typedef struct
+{
+        uint32_t RESERVED0[1U];
+  __IOM uint32_t FPCCR;                  /*!< Offset: 0x004 (R/W)  Floating-Point Context Control Register */
+  __IOM uint32_t FPCAR;                  /*!< Offset: 0x008 (R/W)  Floating-Point Context Address Register */
+  __IOM uint32_t FPDSCR;                 /*!< Offset: 0x00C (R/W)  Floating-Point Default Status Control Register */
+  __IM  uint32_t MVFR0;                  /*!< Offset: 0x010 (R/ )  Media and VFP Feature Register 0 */
+  __IM  uint32_t MVFR1;                  /*!< Offset: 0x014 (R/ )  Media and VFP Feature Register 1 */
+  __IM  uint32_t MVFR2;                  /*!< Offset: 0x018 (R/ )  Media and VFP Feature Register 2 */
+} FPU_Type;
+
+/* Floating-Point Context Control Register Definitions */
+#define FPU_FPCCR_ASPEN_Pos                31U                                            /*!< FPCCR: ASPEN bit Position */
+#define FPU_FPCCR_ASPEN_Msk                (1UL << FPU_FPCCR_ASPEN_Pos)                   /*!< FPCCR: ASPEN bit Mask */
+
+#define FPU_FPCCR_LSPEN_Pos                30U                                            /*!< FPCCR: LSPEN Position */
+#define FPU_FPCCR_LSPEN_Msk                (1UL << FPU_FPCCR_LSPEN_Pos)                   /*!< FPCCR: LSPEN bit Mask */
+
+#define FPU_FPCCR_LSPENS_Pos               29U                                            /*!< FPCCR: LSPENS Position */
+#define FPU_FPCCR_LSPENS_Msk               (1UL << FPU_FPCCR_LSPENS_Pos)                  /*!< FPCCR: LSPENS bit Mask */
+
+#define FPU_FPCCR_CLRONRET_Pos             28U                                            /*!< FPCCR: CLRONRET Position */
+#define FPU_FPCCR_CLRONRET_Msk             (1UL << FPU_FPCCR_CLRONRET_Pos)                /*!< FPCCR: CLRONRET bit Mask */
+
+#define FPU_FPCCR_CLRONRETS_Pos            27U                                            /*!< FPCCR: CLRONRETS Position */
+#define FPU_FPCCR_CLRONRETS_Msk            (1UL << FPU_FPCCR_CLRONRETS_Pos)               /*!< FPCCR: CLRONRETS bit Mask */
+
+#define FPU_FPCCR_TS_Pos                   26U                                            /*!< FPCCR: TS Position */
+#define FPU_FPCCR_TS_Msk                   (1UL << FPU_FPCCR_TS_Pos)                      /*!< FPCCR: TS bit Mask */
+
+#define FPU_FPCCR_UFRDY_Pos                10U                                            /*!< FPCCR: UFRDY Position */
+#define FPU_FPCCR_UFRDY_Msk                (1UL << FPU_FPCCR_UFRDY_Pos)                   /*!< FPCCR: UFRDY bit Mask */
+
+#define FPU_FPCCR_SPLIMVIOL_Pos             9U                                            /*!< FPCCR: SPLIMVIOL Position */
+#define FPU_FPCCR_SPLIMVIOL_Msk            (1UL << FPU_FPCCR_SPLIMVIOL_Pos)               /*!< FPCCR: SPLIMVIOL bit Mask */
+
+#define FPU_FPCCR_MONRDY_Pos                8U                                            /*!< FPCCR: MONRDY Position */
+#define FPU_FPCCR_MONRDY_Msk               (1UL << FPU_FPCCR_MONRDY_Pos)                  /*!< FPCCR: MONRDY bit Mask */
+
+#define FPU_FPCCR_SFRDY_Pos                 7U                                            /*!< FPCCR: SFRDY Position */
+#define FPU_FPCCR_SFRDY_Msk                (1UL << FPU_FPCCR_SFRDY_Pos)                   /*!< FPCCR: SFRDY bit Mask */
+
+#define FPU_FPCCR_BFRDY_Pos                 6U                                            /*!< FPCCR: BFRDY Position */
+#define FPU_FPCCR_BFRDY_Msk                (1UL << FPU_FPCCR_BFRDY_Pos)                   /*!< FPCCR: BFRDY bit Mask */
+
+#define FPU_FPCCR_MMRDY_Pos                 5U                                            /*!< FPCCR: MMRDY Position */
+#define FPU_FPCCR_MMRDY_Msk                (1UL << FPU_FPCCR_MMRDY_Pos)                   /*!< FPCCR: MMRDY bit Mask */
+
+#define FPU_FPCCR_HFRDY_Pos                 4U                                            /*!< FPCCR: HFRDY Position */
+#define FPU_FPCCR_HFRDY_Msk                (1UL << FPU_FPCCR_HFRDY_Pos)                   /*!< FPCCR: HFRDY bit Mask */
+
+#define FPU_FPCCR_THREAD_Pos                3U                                            /*!< FPCCR: processor mode bit Position */
+#define FPU_FPCCR_THREAD_Msk               (1UL << FPU_FPCCR_THREAD_Pos)                  /*!< FPCCR: processor mode active bit Mask */
+
+#define FPU_FPCCR_S_Pos                     2U                                            /*!< FPCCR: Security status of the FP context bit Position */
+#define FPU_FPCCR_S_Msk                    (1UL << FPU_FPCCR_S_Pos)                       /*!< FPCCR: Security status of the FP context bit Mask */
+
+#define FPU_FPCCR_USER_Pos                  1U                                            /*!< FPCCR: privilege level bit Position */
+#define FPU_FPCCR_USER_Msk                 (1UL << FPU_FPCCR_USER_Pos)                    /*!< FPCCR: privilege level bit Mask */
+
+#define FPU_FPCCR_LSPACT_Pos                0U                                            /*!< FPCCR: Lazy state preservation active bit Position */
+#define FPU_FPCCR_LSPACT_Msk               (1UL /*<< FPU_FPCCR_LSPACT_Pos*/)              /*!< FPCCR: Lazy state preservation active bit Mask */
+
+/* Floating-Point Context Address Register Definitions */
+#define FPU_FPCAR_ADDRESS_Pos               3U                                            /*!< FPCAR: ADDRESS bit Position */
+#define FPU_FPCAR_ADDRESS_Msk              (0x1FFFFFFFUL << FPU_FPCAR_ADDRESS_Pos)        /*!< FPCAR: ADDRESS bit Mask */
+
+/* Floating-Point Default Status Control Register Definitions */
+#define FPU_FPDSCR_AHP_Pos                 26U                                            /*!< FPDSCR: AHP bit Position */
+#define FPU_FPDSCR_AHP_Msk                 (1UL << FPU_FPDSCR_AHP_Pos)                    /*!< FPDSCR: AHP bit Mask */
+
+#define FPU_FPDSCR_DN_Pos                  25U                                            /*!< FPDSCR: DN bit Position */
+#define FPU_FPDSCR_DN_Msk                  (1UL << FPU_FPDSCR_DN_Pos)                     /*!< FPDSCR: DN bit Mask */
+
+#define FPU_FPDSCR_FZ_Pos                  24U                                            /*!< FPDSCR: FZ bit Position */
+#define FPU_FPDSCR_FZ_Msk                  (1UL << FPU_FPDSCR_FZ_Pos)                     /*!< FPDSCR: FZ bit Mask */
+
+#define FPU_FPDSCR_RMode_Pos               22U                                            /*!< FPDSCR: RMode bit Position */
+#define FPU_FPDSCR_RMode_Msk               (3UL << FPU_FPDSCR_RMode_Pos)                  /*!< FPDSCR: RMode bit Mask */
+
+#define FPU_FPDSCR_FZ16_Pos                19U                                            /*!< FPDSCR: FZ16 bit Position */
+#define FPU_FPDSCR_FZ16_Msk                (1UL << FPU_FPDSCR_FZ16_Pos)                   /*!< FPDSCR: FZ16 bit Mask */
+
+#define FPU_FPDSCR_LTPSIZE_Pos             16U                                            /*!< FPDSCR: LTPSIZE bit Position */
+#define FPU_FPDSCR_LTPSIZE_Msk             (7UL << FPU_FPDSCR_LTPSIZE_Pos)                /*!< FPDSCR: LTPSIZE bit Mask */
+
+/* Media and VFP Feature Register 0 Definitions */
+#define FPU_MVFR0_FPRound_Pos              28U                                            /*!< MVFR0: FPRound bits Position */
+#define FPU_MVFR0_FPRound_Msk              (0xFUL << FPU_MVFR0_FPRound_Pos)               /*!< MVFR0: FPRound bits Mask */
+
+#define FPU_MVFR0_FPSqrt_Pos               20U                                            /*!< MVFR0: FPSqrt bits Position */
+#define FPU_MVFR0_FPSqrt_Msk               (0xFUL << FPU_MVFR0_FPSqrt_Pos)                 /*!< MVFR0: FPSqrt bits Mask */
+
+#define FPU_MVFR0_FPDivide_Pos             16U                                            /*!< MVFR0: FPDivide bits Position */
+#define FPU_MVFR0_FPDivide_Msk             (0xFUL << FPU_MVFR0_FPDivide_Pos)              /*!< MVFR0: Divide bits Mask */
+
+#define FPU_MVFR0_FPDP_Pos                  8U                                            /*!< MVFR0: FPDP bits Position */
+#define FPU_MVFR0_FPDP_Msk                 (0xFUL << FPU_MVFR0_FPDP_Pos)                  /*!< MVFR0: FPDP bits Mask */
+
+#define FPU_MVFR0_FPSP_Pos                  4U                                            /*!< MVFR0: FPSP bits Position */
+#define FPU_MVFR0_FPSP_Msk                 (0xFUL << FPU_MVFR0_FPSP_Pos)                  /*!< MVFR0: FPSP bits Mask */
+
+#define FPU_MVFR0_SIMDReg_Pos               0U                                            /*!< MVFR0: SIMDReg bits Position */
+#define FPU_MVFR0_SIMDReg_Msk              (0xFUL /*<< FPU_MVFR0_SIMDReg_Pos*/)           /*!< MVFR0: SIMDReg bits Mask */
+
+/* Media and VFP Feature Register 1 Definitions */
+#define FPU_MVFR1_FMAC_Pos                 28U                                            /*!< MVFR1: FMAC bits Position */
+#define FPU_MVFR1_FMAC_Msk                 (0xFUL << FPU_MVFR1_FMAC_Pos)                  /*!< MVFR1: FMAC bits Mask */
+
+#define FPU_MVFR1_FPHP_Pos                 24U                                            /*!< MVFR1: FPHP bits Position */
+#define FPU_MVFR1_FPHP_Msk                 (0xFUL << FPU_MVFR1_FPHP_Pos)                  /*!< MVFR1: FPHP bits Mask */
+
+#define FPU_MVFR1_FP16_Pos                 20U                                            /*!< MVFR1: FP16 bits Position */
+#define FPU_MVFR1_FP16_Msk                 (0xFUL << FPU_MVFR1_FP16_Pos)                  /*!< MVFR1: FP16 bits Mask */
+
+#define FPU_MVFR1_MVE_Pos                   8U                                            /*!< MVFR1: MVE bits Position */
+#define FPU_MVFR1_MVE_Msk                  (0xFUL << FPU_MVFR1_MVE_Pos)                   /*!< MVFR1: MVE bits Mask */
+
+#define FPU_MVFR1_FPDNaN_Pos                4U                                            /*!< MVFR1: FPDNaN bits Position */
+#define FPU_MVFR1_FPDNaN_Msk               (0xFUL << FPU_MVFR1_FPDNaN_Pos)                /*!< MVFR1: FPDNaN bits Mask */
+
+#define FPU_MVFR1_FPFtZ_Pos                 0U                                            /*!< MVFR1: FPFtZ bits Position */
+#define FPU_MVFR1_FPFtZ_Msk                (0xFUL /*<< FPU_MVFR1_FPFtZ_Pos*/)             /*!< MVFR1: FPFtZ bits Mask */
+
+/* Media and VFP Feature Register 2 Definitions */
+#define FPU_MVFR2_FPMisc_Pos                4U                                            /*!< MVFR2: FPMisc bits Position */
+#define FPU_MVFR2_FPMisc_Msk               (0xFUL << FPU_MVFR2_FPMisc_Pos)                /*!< MVFR2: FPMisc bits Mask */
+
+/*@} end of group CMSIS_FPU */
+
+/* CoreDebug is deprecated. replaced by DCB (Debug Control Block) */
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_CoreDebug       Core Debug Registers (CoreDebug)
+  \brief    Type definitions for the Core Debug Registers
+  @{
+ */
+
+/**
+  \brief  \deprecated Structure type to access the Core Debug Register (CoreDebug).
+ */
+typedef struct
+{
+  __IOM uint32_t DHCSR;                  /*!< Offset: 0x000 (R/W)  Debug Halting Control and Status Register */
+  __OM  uint32_t DCRSR;                  /*!< Offset: 0x004 ( /W)  Debug Core Register Selector Register */
+  __IOM uint32_t DCRDR;                  /*!< Offset: 0x008 (R/W)  Debug Core Register Data Register */
+  __IOM uint32_t DEMCR;                  /*!< Offset: 0x00C (R/W)  Debug Exception and Monitor Control Register */
+  __OM  uint32_t DSCEMCR;                /*!< Offset: 0x010 ( /W)  Debug Set Clear Exception and Monitor Control Register */
+  __IOM uint32_t DAUTHCTRL;              /*!< Offset: 0x014 (R/W)  Debug Authentication Control Register */
+  __IOM uint32_t DSCSR;                  /*!< Offset: 0x018 (R/W)  Debug Security Control and Status Register */
+} CoreDebug_Type;
+
+/* Debug Halting Control and Status Register Definitions */
+#define CoreDebug_DHCSR_DBGKEY_Pos         16U                                            /*!< \deprecated CoreDebug DHCSR: DBGKEY Position */
+#define CoreDebug_DHCSR_DBGKEY_Msk         (0xFFFFUL << CoreDebug_DHCSR_DBGKEY_Pos)       /*!< \deprecated CoreDebug DHCSR: DBGKEY Mask */
+
+#define CoreDebug_DHCSR_S_RESTART_ST_Pos   26U                                            /*!< \deprecated CoreDebug DHCSR: S_RESTART_ST Position */
+#define CoreDebug_DHCSR_S_RESTART_ST_Msk   (1UL << CoreDebug_DHCSR_S_RESTART_ST_Pos)      /*!< \deprecated CoreDebug DHCSR: S_RESTART_ST Mask */
+
+#define CoreDebug_DHCSR_S_RESET_ST_Pos     25U                                            /*!< \deprecated CoreDebug DHCSR: S_RESET_ST Position */
+#define CoreDebug_DHCSR_S_RESET_ST_Msk     (1UL << CoreDebug_DHCSR_S_RESET_ST_Pos)        /*!< \deprecated CoreDebug DHCSR: S_RESET_ST Mask */
+
+#define CoreDebug_DHCSR_S_RETIRE_ST_Pos    24U                                            /*!< \deprecated CoreDebug DHCSR: S_RETIRE_ST Position */
+#define CoreDebug_DHCSR_S_RETIRE_ST_Msk    (1UL << CoreDebug_DHCSR_S_RETIRE_ST_Pos)       /*!< \deprecated CoreDebug DHCSR: S_RETIRE_ST Mask */
+
+#define CoreDebug_DHCSR_S_FPD_Pos          23U                                            /*!< \deprecated CoreDebug DHCSR: S_FPD Position */
+#define CoreDebug_DHCSR_S_FPD_Msk          (1UL << CoreDebug_DHCSR_S_FPD_Pos)             /*!< \deprecated CoreDebug DHCSR: S_FPD Mask */
+
+#define CoreDebug_DHCSR_S_SUIDE_Pos        22U                                            /*!< \deprecated CoreDebug DHCSR: S_SUIDE Position */
+#define CoreDebug_DHCSR_S_SUIDE_Msk        (1UL << CoreDebug_DHCSR_S_SUIDE_Pos)           /*!< \deprecated CoreDebug DHCSR: S_SUIDE Mask */
+
+#define CoreDebug_DHCSR_S_NSUIDE_Pos       21U                                            /*!< \deprecated CoreDebug DHCSR: S_NSUIDE Position */
+#define CoreDebug_DHCSR_S_NSUIDE_Msk       (1UL << CoreDebug_DHCSR_S_NSUIDE_Pos)          /*!< \deprecated CoreDebug DHCSR: S_NSUIDE Mask */
+
+#define CoreDebug_DHCSR_S_SDE_Pos          20U                                            /*!< \deprecated CoreDebug DHCSR: S_SDE Position */
+#define CoreDebug_DHCSR_S_SDE_Msk          (1UL << CoreDebug_DHCSR_S_SDE_Pos)             /*!< \deprecated CoreDebug DHCSR: S_SDE Mask */
+
+#define CoreDebug_DHCSR_S_LOCKUP_Pos       19U                                            /*!< \deprecated CoreDebug DHCSR: S_LOCKUP Position */
+#define CoreDebug_DHCSR_S_LOCKUP_Msk       (1UL << CoreDebug_DHCSR_S_LOCKUP_Pos)          /*!< \deprecated CoreDebug DHCSR: S_LOCKUP Mask */
+
+#define CoreDebug_DHCSR_S_SLEEP_Pos        18U                                            /*!< \deprecated CoreDebug DHCSR: S_SLEEP Position */
+#define CoreDebug_DHCSR_S_SLEEP_Msk        (1UL << CoreDebug_DHCSR_S_SLEEP_Pos)           /*!< \deprecated CoreDebug DHCSR: S_SLEEP Mask */
+
+#define CoreDebug_DHCSR_S_HALT_Pos         17U                                            /*!< \deprecated CoreDebug DHCSR: S_HALT Position */
+#define CoreDebug_DHCSR_S_HALT_Msk         (1UL << CoreDebug_DHCSR_S_HALT_Pos)            /*!< \deprecated CoreDebug DHCSR: S_HALT Mask */
+
+#define CoreDebug_DHCSR_S_REGRDY_Pos       16U                                            /*!< \deprecated CoreDebug DHCSR: S_REGRDY Position */
+#define CoreDebug_DHCSR_S_REGRDY_Msk       (1UL << CoreDebug_DHCSR_S_REGRDY_Pos)          /*!< \deprecated CoreDebug DHCSR: S_REGRDY Mask */
+
+#define CoreDebug_DHCSR_C_PMOV_Pos          6U                                            /*!< \deprecated CoreDebug DHCSR: C_PMOV Position */
+#define CoreDebug_DHCSR_C_PMOV_Msk         (1UL << CoreDebug_DHCSR_C_PMOV_Pos)            /*!< \deprecated CoreDebug DHCSR: C_PMOV Mask */
+
+#define CoreDebug_DHCSR_C_SNAPSTALL_Pos     5U                                            /*!< \deprecated CoreDebug DHCSR: C_SNAPSTALL Position */
+#define CoreDebug_DHCSR_C_SNAPSTALL_Msk    (1UL << CoreDebug_DHCSR_C_SNAPSTALL_Pos)       /*!< \deprecated CoreDebug DHCSR: C_SNAPSTALL Mask */
+
+#define CoreDebug_DHCSR_C_MASKINTS_Pos      3U                                            /*!< \deprecated CoreDebug DHCSR: C_MASKINTS Position */
+#define CoreDebug_DHCSR_C_MASKINTS_Msk     (1UL << CoreDebug_DHCSR_C_MASKINTS_Pos)        /*!< \deprecated CoreDebug DHCSR: C_MASKINTS Mask */
+
+#define CoreDebug_DHCSR_C_STEP_Pos          2U                                            /*!< \deprecated CoreDebug DHCSR: C_STEP Position */
+#define CoreDebug_DHCSR_C_STEP_Msk         (1UL << CoreDebug_DHCSR_C_STEP_Pos)            /*!< \deprecated CoreDebug DHCSR: C_STEP Mask */
+
+#define CoreDebug_DHCSR_C_HALT_Pos          1U                                            /*!< \deprecated CoreDebug DHCSR: C_HALT Position */
+#define CoreDebug_DHCSR_C_HALT_Msk         (1UL << CoreDebug_DHCSR_C_HALT_Pos)            /*!< \deprecated CoreDebug DHCSR: C_HALT Mask */
+
+#define CoreDebug_DHCSR_C_DEBUGEN_Pos       0U                                            /*!< \deprecated CoreDebug DHCSR: C_DEBUGEN Position */
+#define CoreDebug_DHCSR_C_DEBUGEN_Msk      (1UL /*<< CoreDebug_DHCSR_C_DEBUGEN_Pos*/)     /*!< \deprecated CoreDebug DHCSR: C_DEBUGEN Mask */
+
+/* Debug Core Register Selector Register Definitions */
+#define CoreDebug_DCRSR_REGWnR_Pos         16U                                            /*!< \deprecated CoreDebug DCRSR: REGWnR Position */
+#define CoreDebug_DCRSR_REGWnR_Msk         (1UL << CoreDebug_DCRSR_REGWnR_Pos)            /*!< \deprecated CoreDebug DCRSR: REGWnR Mask */
+
+#define CoreDebug_DCRSR_REGSEL_Pos          0U                                            /*!< \deprecated CoreDebug DCRSR: REGSEL Position */
+#define CoreDebug_DCRSR_REGSEL_Msk         (0x1FUL /*<< CoreDebug_DCRSR_REGSEL_Pos*/)     /*!< \deprecated CoreDebug DCRSR: REGSEL Mask */
+
+/* Debug Exception and Monitor Control Register Definitions */
+#define CoreDebug_DEMCR_TRCENA_Pos         24U                                            /*!< \deprecated CoreDebug DEMCR: TRCENA Position */
+#define CoreDebug_DEMCR_TRCENA_Msk         (1UL << CoreDebug_DEMCR_TRCENA_Pos)            /*!< \deprecated CoreDebug DEMCR: TRCENA Mask */
+
+#define CoreDebug_DEMCR_MON_REQ_Pos        19U                                            /*!< \deprecated CoreDebug DEMCR: MON_REQ Position */
+#define CoreDebug_DEMCR_MON_REQ_Msk        (1UL << CoreDebug_DEMCR_MON_REQ_Pos)           /*!< \deprecated CoreDebug DEMCR: MON_REQ Mask */
+
+#define CoreDebug_DEMCR_MON_STEP_Pos       18U                                            /*!< \deprecated CoreDebug DEMCR: MON_STEP Position */
+#define CoreDebug_DEMCR_MON_STEP_Msk       (1UL << CoreDebug_DEMCR_MON_STEP_Pos)          /*!< \deprecated CoreDebug DEMCR: MON_STEP Mask */
+
+#define CoreDebug_DEMCR_MON_PEND_Pos       17U                                            /*!< \deprecated CoreDebug DEMCR: MON_PEND Position */
+#define CoreDebug_DEMCR_MON_PEND_Msk       (1UL << CoreDebug_DEMCR_MON_PEND_Pos)          /*!< \deprecated CoreDebug DEMCR: MON_PEND Mask */
+
+#define CoreDebug_DEMCR_MON_EN_Pos         16U                                            /*!< \deprecated CoreDebug DEMCR: MON_EN Position */
+#define CoreDebug_DEMCR_MON_EN_Msk         (1UL << CoreDebug_DEMCR_MON_EN_Pos)            /*!< \deprecated CoreDebug DEMCR: MON_EN Mask */
+
+#define CoreDebug_DEMCR_VC_HARDERR_Pos     10U                                            /*!< \deprecated CoreDebug DEMCR: VC_HARDERR Position */
+#define CoreDebug_DEMCR_VC_HARDERR_Msk     (1UL << CoreDebug_DEMCR_VC_HARDERR_Pos)        /*!< \deprecated CoreDebug DEMCR: VC_HARDERR Mask */
+
+#define CoreDebug_DEMCR_VC_INTERR_Pos       9U                                            /*!< \deprecated CoreDebug DEMCR: VC_INTERR Position */
+#define CoreDebug_DEMCR_VC_INTERR_Msk      (1UL << CoreDebug_DEMCR_VC_INTERR_Pos)         /*!< \deprecated CoreDebug DEMCR: VC_INTERR Mask */
+
+#define CoreDebug_DEMCR_VC_BUSERR_Pos       8U                                            /*!< \deprecated CoreDebug DEMCR: VC_BUSERR Position */
+#define CoreDebug_DEMCR_VC_BUSERR_Msk      (1UL << CoreDebug_DEMCR_VC_BUSERR_Pos)         /*!< \deprecated CoreDebug DEMCR: VC_BUSERR Mask */
+
+#define CoreDebug_DEMCR_VC_STATERR_Pos      7U                                            /*!< \deprecated CoreDebug DEMCR: VC_STATERR Position */
+#define CoreDebug_DEMCR_VC_STATERR_Msk     (1UL << CoreDebug_DEMCR_VC_STATERR_Pos)        /*!< \deprecated CoreDebug DEMCR: VC_STATERR Mask */
+
+#define CoreDebug_DEMCR_VC_CHKERR_Pos       6U                                            /*!< \deprecated CoreDebug DEMCR: VC_CHKERR Position */
+#define CoreDebug_DEMCR_VC_CHKERR_Msk      (1UL << CoreDebug_DEMCR_VC_CHKERR_Pos)         /*!< \deprecated CoreDebug DEMCR: VC_CHKERR Mask */
+
+#define CoreDebug_DEMCR_VC_NOCPERR_Pos      5U                                            /*!< \deprecated CoreDebug DEMCR: VC_NOCPERR Position */
+#define CoreDebug_DEMCR_VC_NOCPERR_Msk     (1UL << CoreDebug_DEMCR_VC_NOCPERR_Pos)        /*!< \deprecated CoreDebug DEMCR: VC_NOCPERR Mask */
+
+#define CoreDebug_DEMCR_VC_MMERR_Pos        4U                                            /*!< \deprecated CoreDebug DEMCR: VC_MMERR Position */
+#define CoreDebug_DEMCR_VC_MMERR_Msk       (1UL << CoreDebug_DEMCR_VC_MMERR_Pos)          /*!< \deprecated CoreDebug DEMCR: VC_MMERR Mask */
+
+#define CoreDebug_DEMCR_VC_CORERESET_Pos    0U                                            /*!< \deprecated CoreDebug DEMCR: VC_CORERESET Position */
+#define CoreDebug_DEMCR_VC_CORERESET_Msk   (1UL /*<< CoreDebug_DEMCR_VC_CORERESET_Pos*/)  /*!< \deprecated CoreDebug DEMCR: VC_CORERESET Mask */
+
+/* Debug Set Clear Exception and Monitor Control Register Definitions */
+#define CoreDebug_DSCEMCR_CLR_MON_REQ_Pos  19U                                            /*!< \deprecated CoreDebug DSCEMCR: CLR_MON_REQ, Position */
+#define CoreDebug_DSCEMCR_CLR_MON_REQ_Msk  (1UL << CoreDebug_DSCEMCR_CLR_MON_REQ_Pos)     /*!< \deprecated CoreDebug DSCEMCR: CLR_MON_REQ, Mask */
+
+#define CoreDebug_DSCEMCR_CLR_MON_PEND_Pos 17U                                            /*!< \deprecated CoreDebug DSCEMCR: CLR_MON_PEND, Position */
+#define CoreDebug_DSCEMCR_CLR_MON_PEND_Msk (1UL << CoreDebug_DSCEMCR_CLR_MON_PEND_Pos)    /*!< \deprecated CoreDebug DSCEMCR: CLR_MON_PEND, Mask */
+
+#define CoreDebug_DSCEMCR_SET_MON_REQ_Pos   3U                                            /*!< \deprecated CoreDebug DSCEMCR: SET_MON_REQ, Position */
+#define CoreDebug_DSCEMCR_SET_MON_REQ_Msk  (1UL << CoreDebug_DSCEMCR_SET_MON_REQ_Pos)     /*!< \deprecated CoreDebug DSCEMCR: SET_MON_REQ, Mask */
+
+#define CoreDebug_DSCEMCR_SET_MON_PEND_Pos  1U                                            /*!< \deprecated CoreDebug DSCEMCR: SET_MON_PEND, Position */
+#define CoreDebug_DSCEMCR_SET_MON_PEND_Msk (1UL << CoreDebug_DSCEMCR_SET_MON_PEND_Pos)    /*!< \deprecated CoreDebug DSCEMCR: SET_MON_PEND, Mask */
+
+/* Debug Authentication Control Register Definitions */
+#define CoreDebug_DAUTHCTRL_UIDEN_Pos      10U                                            /*!< \deprecated CoreDebug DAUTHCTRL: UIDEN, Position */
+#define CoreDebug_DAUTHCTRL_UIDEN_Msk      (1UL << CoreDebug_DAUTHCTRL_UIDEN_Pos)         /*!< \deprecated CoreDebug DAUTHCTRL: UIDEN, Mask */
+
+#define CoreDebug_DAUTHCTRL_UIDAPEN_Pos     9U                                            /*!< \deprecated CoreDebug DAUTHCTRL: UIDAPEN, Position */
+#define CoreDebug_DAUTHCTRL_UIDAPEN_Msk    (1UL << CoreDebug_DAUTHCTRL_UIDAPEN_Pos)       /*!< \deprecated CoreDebug DAUTHCTRL: UIDAPEN, Mask */
+
+#define CoreDebug_DAUTHCTRL_FSDMA_Pos       8U                                            /*!< \deprecated CoreDebug DAUTHCTRL: FSDMA, Position */
+#define CoreDebug_DAUTHCTRL_FSDMA_Msk      (1UL << CoreDebug_DAUTHCTRL_FSDMA_Pos)         /*!< \deprecated CoreDebug DAUTHCTRL: FSDMA, Mask */
+
+#define CoreDebug_DAUTHCTRL_INTSPNIDEN_Pos  3U                                            /*!< \deprecated CoreDebug DAUTHCTRL: INTSPNIDEN, Position */
+#define CoreDebug_DAUTHCTRL_INTSPNIDEN_Msk (1UL << CoreDebug_DAUTHCTRL_INTSPNIDEN_Pos)    /*!< \deprecated CoreDebug DAUTHCTRL: INTSPNIDEN, Mask */
+
+#define CoreDebug_DAUTHCTRL_SPNIDENSEL_Pos  2U                                            /*!< \deprecated CoreDebug DAUTHCTRL: SPNIDENSEL Position */
+#define CoreDebug_DAUTHCTRL_SPNIDENSEL_Msk (1UL << CoreDebug_DAUTHCTRL_SPNIDENSEL_Pos)    /*!< \deprecated CoreDebug DAUTHCTRL: SPNIDENSEL Mask */
+
+#define CoreDebug_DAUTHCTRL_INTSPIDEN_Pos   1U                                            /*!< \deprecated CoreDebug DAUTHCTRL: INTSPIDEN Position */
+#define CoreDebug_DAUTHCTRL_INTSPIDEN_Msk  (1UL << CoreDebug_DAUTHCTRL_INTSPIDEN_Pos)     /*!< \deprecated CoreDebug DAUTHCTRL: INTSPIDEN Mask */
+
+#define CoreDebug_DAUTHCTRL_SPIDENSEL_Pos   0U                                            /*!< \deprecated CoreDebug DAUTHCTRL: SPIDENSEL Position */
+#define CoreDebug_DAUTHCTRL_SPIDENSEL_Msk  (1UL /*<< CoreDebug_DAUTHCTRL_SPIDENSEL_Pos*/) /*!< \deprecated CoreDebug DAUTHCTRL: SPIDENSEL Mask */
+
+/* Debug Security Control and Status Register Definitions */
+#define CoreDebug_DSCSR_CDS_Pos            16U                                            /*!< \deprecated CoreDebug DSCSR: CDS Position */
+#define CoreDebug_DSCSR_CDS_Msk            (1UL << CoreDebug_DSCSR_CDS_Pos)               /*!< \deprecated CoreDebug DSCSR: CDS Mask */
+
+#define CoreDebug_DSCSR_SBRSEL_Pos          1U                                            /*!< \deprecated CoreDebug DSCSR: SBRSEL Position */
+#define CoreDebug_DSCSR_SBRSEL_Msk         (1UL << CoreDebug_DSCSR_SBRSEL_Pos)            /*!< \deprecated CoreDebug DSCSR: SBRSEL Mask */
+
+#define CoreDebug_DSCSR_SBRSELEN_Pos        0U                                            /*!< \deprecated CoreDebug DSCSR: SBRSELEN Position */
+#define CoreDebug_DSCSR_SBRSELEN_Msk       (1UL /*<< CoreDebug_DSCSR_SBRSELEN_Pos*/)      /*!< \deprecated CoreDebug DSCSR: SBRSELEN Mask */
+
+/*@} end of group CMSIS_CoreDebug */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_DCB       Debug Control Block
+  \brief    Type definitions for the Debug Control Block Registers
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Debug Control Block Registers (DCB).
+ */
+typedef struct
+{
+  __IOM uint32_t DHCSR;                  /*!< Offset: 0x000 (R/W)  Debug Halting Control and Status Register */
+  __OM  uint32_t DCRSR;                  /*!< Offset: 0x004 ( /W)  Debug Core Register Selector Register */
+  __IOM uint32_t DCRDR;                  /*!< Offset: 0x008 (R/W)  Debug Core Register Data Register */
+  __IOM uint32_t DEMCR;                  /*!< Offset: 0x00C (R/W)  Debug Exception and Monitor Control Register */
+  __OM  uint32_t DSCEMCR;                /*!< Offset: 0x010 ( /W)  Debug Set Clear Exception and Monitor Control Register */
+  __IOM uint32_t DAUTHCTRL;              /*!< Offset: 0x014 (R/W)  Debug Authentication Control Register */
+  __IOM uint32_t DSCSR;                  /*!< Offset: 0x018 (R/W)  Debug Security Control and Status Register */
+} DCB_Type;
+
+/* DHCSR, Debug Halting Control and Status Register Definitions */
+#define DCB_DHCSR_DBGKEY_Pos               16U                                            /*!< DCB DHCSR: Debug key Position */
+#define DCB_DHCSR_DBGKEY_Msk               (0xFFFFUL << DCB_DHCSR_DBGKEY_Pos)             /*!< DCB DHCSR: Debug key Mask */
+
+#define DCB_DHCSR_S_RESTART_ST_Pos         26U                                            /*!< DCB DHCSR: Restart sticky status Position */
+#define DCB_DHCSR_S_RESTART_ST_Msk         (0x1UL << DCB_DHCSR_S_RESTART_ST_Pos)          /*!< DCB DHCSR: Restart sticky status Mask */
+
+#define DCB_DHCSR_S_RESET_ST_Pos           25U                                            /*!< DCB DHCSR: Reset sticky status Position */
+#define DCB_DHCSR_S_RESET_ST_Msk           (0x1UL << DCB_DHCSR_S_RESET_ST_Pos)            /*!< DCB DHCSR: Reset sticky status Mask */
+
+#define DCB_DHCSR_S_RETIRE_ST_Pos          24U                                            /*!< DCB DHCSR: Retire sticky status Position */
+#define DCB_DHCSR_S_RETIRE_ST_Msk          (0x1UL << DCB_DHCSR_S_RETIRE_ST_Pos)           /*!< DCB DHCSR: Retire sticky status Mask */
+
+#define DCB_DHCSR_S_FPD_Pos                23U                                            /*!< DCB DHCSR: Floating-point registers Debuggable Position */
+#define DCB_DHCSR_S_FPD_Msk                (0x1UL << DCB_DHCSR_S_FPD_Pos)                 /*!< DCB DHCSR: Floating-point registers Debuggable Mask */
+
+#define DCB_DHCSR_S_SUIDE_Pos              22U                                            /*!< DCB DHCSR: Secure unprivileged halting debug enabled Position */
+#define DCB_DHCSR_S_SUIDE_Msk              (0x1UL << DCB_DHCSR_S_SUIDE_Pos)               /*!< DCB DHCSR: Secure unprivileged halting debug enabled Mask */
+
+#define DCB_DHCSR_S_NSUIDE_Pos             21U                                            /*!< DCB DHCSR: Non-secure unprivileged halting debug enabled Position */
+#define DCB_DHCSR_S_NSUIDE_Msk             (0x1UL << DCB_DHCSR_S_NSUIDE_Pos)              /*!< DCB DHCSR: Non-secure unprivileged halting debug enabled Mask */
+
+#define DCB_DHCSR_S_SDE_Pos                20U                                            /*!< DCB DHCSR: Secure debug enabled Position */
+#define DCB_DHCSR_S_SDE_Msk                (0x1UL << DCB_DHCSR_S_SDE_Pos)                 /*!< DCB DHCSR: Secure debug enabled Mask */
+
+#define DCB_DHCSR_S_LOCKUP_Pos             19U                                            /*!< DCB DHCSR: Lockup status Position */
+#define DCB_DHCSR_S_LOCKUP_Msk             (0x1UL << DCB_DHCSR_S_LOCKUP_Pos)              /*!< DCB DHCSR: Lockup status Mask */
+
+#define DCB_DHCSR_S_SLEEP_Pos              18U                                            /*!< DCB DHCSR: Sleeping status Position */
+#define DCB_DHCSR_S_SLEEP_Msk              (0x1UL << DCB_DHCSR_S_SLEEP_Pos)               /*!< DCB DHCSR: Sleeping status Mask */
+
+#define DCB_DHCSR_S_HALT_Pos               17U                                            /*!< DCB DHCSR: Halted status Position */
+#define DCB_DHCSR_S_HALT_Msk               (0x1UL << DCB_DHCSR_S_HALT_Pos)                /*!< DCB DHCSR: Halted status Mask */
+
+#define DCB_DHCSR_S_REGRDY_Pos             16U                                            /*!< DCB DHCSR: Register ready status Position */
+#define DCB_DHCSR_S_REGRDY_Msk             (0x1UL << DCB_DHCSR_S_REGRDY_Pos)              /*!< DCB DHCSR: Register ready status Mask */
+
+#define DCB_DHCSR_C_PMOV_Pos                6U                                            /*!< DCB DHCSR: Halt on PMU overflow control Position */
+#define DCB_DHCSR_C_PMOV_Msk               (0x1UL << DCB_DHCSR_C_PMOV_Pos)                /*!< DCB DHCSR: Halt on PMU overflow control Mask */
+
+#define DCB_DHCSR_C_SNAPSTALL_Pos           5U                                            /*!< DCB DHCSR: Snap stall control Position */
+#define DCB_DHCSR_C_SNAPSTALL_Msk          (0x1UL << DCB_DHCSR_C_SNAPSTALL_Pos)           /*!< DCB DHCSR: Snap stall control Mask */
+
+#define DCB_DHCSR_C_MASKINTS_Pos            3U                                            /*!< DCB DHCSR: Mask interrupts control Position */
+#define DCB_DHCSR_C_MASKINTS_Msk           (0x1UL << DCB_DHCSR_C_MASKINTS_Pos)            /*!< DCB DHCSR: Mask interrupts control Mask */
+
+#define DCB_DHCSR_C_STEP_Pos                2U                                            /*!< DCB DHCSR: Step control Position */
+#define DCB_DHCSR_C_STEP_Msk               (0x1UL << DCB_DHCSR_C_STEP_Pos)                /*!< DCB DHCSR: Step control Mask */
+
+#define DCB_DHCSR_C_HALT_Pos                1U                                            /*!< DCB DHCSR: Halt control Position */
+#define DCB_DHCSR_C_HALT_Msk               (0x1UL << DCB_DHCSR_C_HALT_Pos)                /*!< DCB DHCSR: Halt control Mask */
+
+#define DCB_DHCSR_C_DEBUGEN_Pos             0U                                            /*!< DCB DHCSR: Debug enable control Position */
+#define DCB_DHCSR_C_DEBUGEN_Msk            (0x1UL /*<< DCB_DHCSR_C_DEBUGEN_Pos*/)         /*!< DCB DHCSR: Debug enable control Mask */
+
+/* DCRSR, Debug Core Register Select Register Definitions */
+#define DCB_DCRSR_REGWnR_Pos               16U                                            /*!< DCB DCRSR: Register write/not-read Position */
+#define DCB_DCRSR_REGWnR_Msk               (0x1UL << DCB_DCRSR_REGWnR_Pos)                /*!< DCB DCRSR: Register write/not-read Mask */
+
+#define DCB_DCRSR_REGSEL_Pos                0U                                            /*!< DCB DCRSR: Register selector Position */
+#define DCB_DCRSR_REGSEL_Msk               (0x7FUL /*<< DCB_DCRSR_REGSEL_Pos*/)           /*!< DCB DCRSR: Register selector Mask */
+
+/* DCRDR, Debug Core Register Data Register Definitions */
+#define DCB_DCRDR_DBGTMP_Pos                0U                                            /*!< DCB DCRDR: Data temporary buffer Position */
+#define DCB_DCRDR_DBGTMP_Msk               (0xFFFFFFFFUL /*<< DCB_DCRDR_DBGTMP_Pos*/)     /*!< DCB DCRDR: Data temporary buffer Mask */
+
+/* DEMCR, Debug Exception and Monitor Control Register Definitions */
+#define DCB_DEMCR_TRCENA_Pos               24U                                            /*!< DCB DEMCR: Trace enable Position */
+#define DCB_DEMCR_TRCENA_Msk               (0x1UL << DCB_DEMCR_TRCENA_Pos)                /*!< DCB DEMCR: Trace enable Mask */
+
+#define DCB_DEMCR_MONPRKEY_Pos             23U                                            /*!< DCB DEMCR: Monitor pend req key Position */
+#define DCB_DEMCR_MONPRKEY_Msk             (0x1UL << DCB_DEMCR_MONPRKEY_Pos)              /*!< DCB DEMCR: Monitor pend req key Mask */
+
+#define DCB_DEMCR_UMON_EN_Pos              21U                                            /*!< DCB DEMCR: Unprivileged monitor enable Position */
+#define DCB_DEMCR_UMON_EN_Msk              (0x1UL << DCB_DEMCR_UMON_EN_Pos)               /*!< DCB DEMCR: Unprivileged monitor enable Mask */
+
+#define DCB_DEMCR_SDME_Pos                 20U                                            /*!< DCB DEMCR: Secure DebugMonitor enable Position */
+#define DCB_DEMCR_SDME_Msk                 (0x1UL << DCB_DEMCR_SDME_Pos)                  /*!< DCB DEMCR: Secure DebugMonitor enable Mask */
+
+#define DCB_DEMCR_MON_REQ_Pos              19U                                            /*!< DCB DEMCR: Monitor request Position */
+#define DCB_DEMCR_MON_REQ_Msk              (0x1UL << DCB_DEMCR_MON_REQ_Pos)               /*!< DCB DEMCR: Monitor request Mask */
+
+#define DCB_DEMCR_MON_STEP_Pos             18U                                            /*!< DCB DEMCR: Monitor step Position */
+#define DCB_DEMCR_MON_STEP_Msk             (0x1UL << DCB_DEMCR_MON_STEP_Pos)              /*!< DCB DEMCR: Monitor step Mask */
+
+#define DCB_DEMCR_MON_PEND_Pos             17U                                            /*!< DCB DEMCR: Monitor pend Position */
+#define DCB_DEMCR_MON_PEND_Msk             (0x1UL << DCB_DEMCR_MON_PEND_Pos)              /*!< DCB DEMCR: Monitor pend Mask */
+
+#define DCB_DEMCR_MON_EN_Pos               16U                                            /*!< DCB DEMCR: Monitor enable Position */
+#define DCB_DEMCR_MON_EN_Msk               (0x1UL << DCB_DEMCR_MON_EN_Pos)                /*!< DCB DEMCR: Monitor enable Mask */
+
+#define DCB_DEMCR_VC_SFERR_Pos             11U                                            /*!< DCB DEMCR: Vector Catch SecureFault Position */
+#define DCB_DEMCR_VC_SFERR_Msk             (0x1UL << DCB_DEMCR_VC_SFERR_Pos)              /*!< DCB DEMCR: Vector Catch SecureFault Mask */
+
+#define DCB_DEMCR_VC_HARDERR_Pos           10U                                            /*!< DCB DEMCR: Vector Catch HardFault errors Position */
+#define DCB_DEMCR_VC_HARDERR_Msk           (0x1UL << DCB_DEMCR_VC_HARDERR_Pos)            /*!< DCB DEMCR: Vector Catch HardFault errors Mask */
+
+#define DCB_DEMCR_VC_INTERR_Pos             9U                                            /*!< DCB DEMCR: Vector Catch interrupt errors Position */
+#define DCB_DEMCR_VC_INTERR_Msk            (0x1UL << DCB_DEMCR_VC_INTERR_Pos)             /*!< DCB DEMCR: Vector Catch interrupt errors Mask */
+
+#define DCB_DEMCR_VC_BUSERR_Pos             8U                                            /*!< DCB DEMCR: Vector Catch BusFault errors Position */
+#define DCB_DEMCR_VC_BUSERR_Msk            (0x1UL << DCB_DEMCR_VC_BUSERR_Pos)             /*!< DCB DEMCR: Vector Catch BusFault errors Mask */
+
+#define DCB_DEMCR_VC_STATERR_Pos            7U                                            /*!< DCB DEMCR: Vector Catch state errors Position */
+#define DCB_DEMCR_VC_STATERR_Msk           (0x1UL << DCB_DEMCR_VC_STATERR_Pos)            /*!< DCB DEMCR: Vector Catch state errors Mask */
+
+#define DCB_DEMCR_VC_CHKERR_Pos             6U                                            /*!< DCB DEMCR: Vector Catch check errors Position */
+#define DCB_DEMCR_VC_CHKERR_Msk            (0x1UL << DCB_DEMCR_VC_CHKERR_Pos)             /*!< DCB DEMCR: Vector Catch check errors Mask */
+
+#define DCB_DEMCR_VC_NOCPERR_Pos            5U                                            /*!< DCB DEMCR: Vector Catch NOCP errors Position */
+#define DCB_DEMCR_VC_NOCPERR_Msk           (0x1UL << DCB_DEMCR_VC_NOCPERR_Pos)            /*!< DCB DEMCR: Vector Catch NOCP errors Mask */
+
+#define DCB_DEMCR_VC_MMERR_Pos              4U                                            /*!< DCB DEMCR: Vector Catch MemManage errors Position */
+#define DCB_DEMCR_VC_MMERR_Msk             (0x1UL << DCB_DEMCR_VC_MMERR_Pos)              /*!< DCB DEMCR: Vector Catch MemManage errors Mask */
+
+#define DCB_DEMCR_VC_CORERESET_Pos          0U                                            /*!< DCB DEMCR: Vector Catch Core reset Position */
+#define DCB_DEMCR_VC_CORERESET_Msk         (0x1UL /*<< DCB_DEMCR_VC_CORERESET_Pos*/)      /*!< DCB DEMCR: Vector Catch Core reset Mask */
+
+/* DSCEMCR, Debug Set Clear Exception and Monitor Control Register Definitions */
+#define DCB_DSCEMCR_CLR_MON_REQ_Pos        19U                                            /*!< DCB DSCEMCR: Clear monitor request Position */
+#define DCB_DSCEMCR_CLR_MON_REQ_Msk        (0x1UL << DCB_DSCEMCR_CLR_MON_REQ_Pos)         /*!< DCB DSCEMCR: Clear monitor request Mask */
+
+#define DCB_DSCEMCR_CLR_MON_PEND_Pos       17U                                            /*!< DCB DSCEMCR: Clear monitor pend Position */
+#define DCB_DSCEMCR_CLR_MON_PEND_Msk       (0x1UL << DCB_DSCEMCR_CLR_MON_PEND_Pos)        /*!< DCB DSCEMCR: Clear monitor pend Mask */
+
+#define DCB_DSCEMCR_SET_MON_REQ_Pos         3U                                            /*!< DCB DSCEMCR: Set monitor request Position */
+#define DCB_DSCEMCR_SET_MON_REQ_Msk        (0x1UL << DCB_DSCEMCR_SET_MON_REQ_Pos)         /*!< DCB DSCEMCR: Set monitor request Mask */
+
+#define DCB_DSCEMCR_SET_MON_PEND_Pos        1U                                            /*!< DCB DSCEMCR: Set monitor pend Position */
+#define DCB_DSCEMCR_SET_MON_PEND_Msk       (0x1UL << DCB_DSCEMCR_SET_MON_PEND_Pos)        /*!< DCB DSCEMCR: Set monitor pend Mask */
+
+/* DAUTHCTRL, Debug Authentication Control Register Definitions */
+#define DCB_DAUTHCTRL_UIDEN_Pos            10U                                            /*!< DCB DAUTHCTRL: Unprivileged Invasive Debug Enable Position */
+#define DCB_DAUTHCTRL_UIDEN_Msk            (0x1UL << DCB_DAUTHCTRL_UIDEN_Pos)             /*!< DCB DAUTHCTRL: Unprivileged Invasive Debug Enable Mask */
+
+#define DCB_DAUTHCTRL_UIDAPEN_Pos           9U                                            /*!< DCB DAUTHCTRL: Unprivileged Invasive DAP Access Enable Position */
+#define DCB_DAUTHCTRL_UIDAPEN_Msk          (0x1UL << DCB_DAUTHCTRL_UIDAPEN_Pos)           /*!< DCB DAUTHCTRL: Unprivileged Invasive DAP Access Enable Mask */
+
+#define DCB_DAUTHCTRL_FSDMA_Pos             8U                                            /*!< DCB DAUTHCTRL: Force Secure DebugMonitor Allowed Position */
+#define DCB_DAUTHCTRL_FSDMA_Msk            (0x1UL << DCB_DAUTHCTRL_FSDMA_Pos)             /*!< DCB DAUTHCTRL: Force Secure DebugMonitor Allowed Mask */
+
+#define DCB_DAUTHCTRL_INTSPNIDEN_Pos        3U                                            /*!< DCB DAUTHCTRL: Internal Secure non-invasive debug enable Position */
+#define DCB_DAUTHCTRL_INTSPNIDEN_Msk       (0x1UL << DCB_DAUTHCTRL_INTSPNIDEN_Pos)        /*!< DCB DAUTHCTRL: Internal Secure non-invasive debug enable Mask */
+
+#define DCB_DAUTHCTRL_SPNIDENSEL_Pos        2U                                            /*!< DCB DAUTHCTRL: Secure non-invasive debug enable select Position */
+#define DCB_DAUTHCTRL_SPNIDENSEL_Msk       (0x1UL << DCB_DAUTHCTRL_SPNIDENSEL_Pos)        /*!< DCB DAUTHCTRL: Secure non-invasive debug enable select Mask */
+
+#define DCB_DAUTHCTRL_INTSPIDEN_Pos         1U                                            /*!< DCB DAUTHCTRL: Internal Secure invasive debug enable Position */
+#define DCB_DAUTHCTRL_INTSPIDEN_Msk        (0x1UL << DCB_DAUTHCTRL_INTSPIDEN_Pos)         /*!< DCB DAUTHCTRL: Internal Secure invasive debug enable Mask */
+
+#define DCB_DAUTHCTRL_SPIDENSEL_Pos         0U                                            /*!< DCB DAUTHCTRL: Secure invasive debug enable select Position */
+#define DCB_DAUTHCTRL_SPIDENSEL_Msk        (0x1UL /*<< DCB_DAUTHCTRL_SPIDENSEL_Pos*/)     /*!< DCB DAUTHCTRL: Secure invasive debug enable select Mask */
+
+/* DSCSR, Debug Security Control and Status Register Definitions */
+#define DCB_DSCSR_CDSKEY_Pos               17U                                            /*!< DCB DSCSR: CDS write-enable key Position */
+#define DCB_DSCSR_CDSKEY_Msk               (0x1UL << DCB_DSCSR_CDSKEY_Pos)                /*!< DCB DSCSR: CDS write-enable key Mask */
+
+#define DCB_DSCSR_CDS_Pos                  16U                                            /*!< DCB DSCSR: Current domain Secure Position */
+#define DCB_DSCSR_CDS_Msk                  (0x1UL << DCB_DSCSR_CDS_Pos)                   /*!< DCB DSCSR: Current domain Secure Mask */
+
+#define DCB_DSCSR_SBRSEL_Pos                1U                                            /*!< DCB DSCSR: Secure banked register select Position */
+#define DCB_DSCSR_SBRSEL_Msk               (0x1UL << DCB_DSCSR_SBRSEL_Pos)                /*!< DCB DSCSR: Secure banked register select Mask */
+
+#define DCB_DSCSR_SBRSELEN_Pos              0U                                            /*!< DCB DSCSR: Secure banked register select enable Position */
+#define DCB_DSCSR_SBRSELEN_Msk             (0x1UL /*<< DCB_DSCSR_SBRSELEN_Pos*/)          /*!< DCB DSCSR: Secure banked register select enable Mask */
+
+/*@} end of group CMSIS_DCB */
+
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_DIB       Debug Identification Block
+  \brief    Type definitions for the Debug Identification Block Registers
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Debug Identification Block Registers (DIB).
+ */
+typedef struct
+{
+        uint32_t RESERVED0[2U];
+  __IM  uint32_t DAUTHSTATUS;            /*!< Offset: 0x008 (R/ )  Debug Authentication Status Register */
+  __IM  uint32_t DDEVARCH;               /*!< Offset: 0x00C (R/ )  SCS Device Architecture Register */
+        uint32_t RESERVED1[3U];
+  __IM  uint32_t DDEVTYPE;               /*!< Offset: 0x01C (R/ )  SCS Device Type Register */
+} DIB_Type;
+
+/* DAUTHSTATUS, Debug Authentication Status Register Definitions */
+#define DIB_DAUTHSTATUS_SUNID_Pos          22U                                            /*!< DIB DAUTHSTATUS: Secure Unprivileged Non-invasive Debug Allowed Position */
+#define DIB_DAUTHSTATUS_SUNID_Msk          (0x3UL << DIB_DAUTHSTATUS_SUNID_Pos )          /*!< DIB DAUTHSTATUS: Secure Unprivileged Non-invasive Debug Allowed Mask */
+
+#define DIB_DAUTHSTATUS_SUID_Pos           20U                                            /*!< DIB DAUTHSTATUS: Secure Unprivileged Invasive Debug Allowed Position */
+#define DIB_DAUTHSTATUS_SUID_Msk           (0x3UL << DIB_DAUTHSTATUS_SUID_Pos )           /*!< DIB DAUTHSTATUS: Secure Unprivileged Invasive Debug Allowed Mask */
+
+#define DIB_DAUTHSTATUS_NSUNID_Pos         18U                                            /*!< DIB DAUTHSTATUS: Non-secure Unprivileged Non-invasive Debug Allo Position */
+#define DIB_DAUTHSTATUS_NSUNID_Msk         (0x3UL << DIB_DAUTHSTATUS_NSUNID_Pos )         /*!< DIB DAUTHSTATUS: Non-secure Unprivileged Non-invasive Debug Allo Mask */
+
+#define DIB_DAUTHSTATUS_NSUID_Pos          16U                                            /*!< DIB DAUTHSTATUS: Non-secure Unprivileged Invasive Debug Allowed Position */
+#define DIB_DAUTHSTATUS_NSUID_Msk          (0x3UL << DIB_DAUTHSTATUS_NSUID_Pos )          /*!< DIB DAUTHSTATUS: Non-secure Unprivileged Invasive Debug Allowed Mask */
+
+#define DIB_DAUTHSTATUS_SNID_Pos            6U                                            /*!< DIB DAUTHSTATUS: Secure Non-invasive Debug Position */
+#define DIB_DAUTHSTATUS_SNID_Msk           (0x3UL << DIB_DAUTHSTATUS_SNID_Pos )           /*!< DIB DAUTHSTATUS: Secure Non-invasive Debug Mask */
+
+#define DIB_DAUTHSTATUS_SID_Pos             4U                                            /*!< DIB DAUTHSTATUS: Secure Invasive Debug Position */
+#define DIB_DAUTHSTATUS_SID_Msk            (0x3UL << DIB_DAUTHSTATUS_SID_Pos )            /*!< DIB DAUTHSTATUS: Secure Invasive Debug Mask */
+
+#define DIB_DAUTHSTATUS_NSNID_Pos           2U                                            /*!< DIB DAUTHSTATUS: Non-secure Non-invasive Debug Position */
+#define DIB_DAUTHSTATUS_NSNID_Msk          (0x3UL << DIB_DAUTHSTATUS_NSNID_Pos )          /*!< DIB DAUTHSTATUS: Non-secure Non-invasive Debug Mask */
+
+#define DIB_DAUTHSTATUS_NSID_Pos            0U                                            /*!< DIB DAUTHSTATUS: Non-secure Invasive Debug Position */
+#define DIB_DAUTHSTATUS_NSID_Msk           (0x3UL /*<< DIB_DAUTHSTATUS_NSID_Pos*/)        /*!< DIB DAUTHSTATUS: Non-secure Invasive Debug Mask */
+
+/* DDEVARCH, SCS Device Architecture Register Definitions */
+#define DIB_DDEVARCH_ARCHITECT_Pos         21U                                            /*!< DIB DDEVARCH: Architect Position */
+#define DIB_DDEVARCH_ARCHITECT_Msk         (0x7FFUL << DIB_DDEVARCH_ARCHITECT_Pos )       /*!< DIB DDEVARCH: Architect Mask */
+
+#define DIB_DDEVARCH_PRESENT_Pos           20U                                            /*!< DIB DDEVARCH: DEVARCH Present Position */
+#define DIB_DDEVARCH_PRESENT_Msk           (0x1FUL << DIB_DDEVARCH_PRESENT_Pos )          /*!< DIB DDEVARCH: DEVARCH Present Mask */
+
+#define DIB_DDEVARCH_REVISION_Pos          16U                                            /*!< DIB DDEVARCH: Revision Position */
+#define DIB_DDEVARCH_REVISION_Msk          (0xFUL << DIB_DDEVARCH_REVISION_Pos )          /*!< DIB DDEVARCH: Revision Mask */
+
+#define DIB_DDEVARCH_ARCHVER_Pos           12U                                            /*!< DIB DDEVARCH: Architecture Version Position */
+#define DIB_DDEVARCH_ARCHVER_Msk           (0xFUL << DIB_DDEVARCH_ARCHVER_Pos )           /*!< DIB DDEVARCH: Architecture Version Mask */
+
+#define DIB_DDEVARCH_ARCHPART_Pos           0U                                            /*!< DIB DDEVARCH: Architecture Part Position */
+#define DIB_DDEVARCH_ARCHPART_Msk          (0xFFFUL /*<< DIB_DDEVARCH_ARCHPART_Pos*/)     /*!< DIB DDEVARCH: Architecture Part Mask */
+
+/* DDEVTYPE, SCS Device Type Register Definitions */
+#define DIB_DDEVTYPE_SUB_Pos                4U                                            /*!< DIB DDEVTYPE: Sub-type Position */
+#define DIB_DDEVTYPE_SUB_Msk               (0xFUL << DIB_DDEVTYPE_SUB_Pos )               /*!< DIB DDEVTYPE: Sub-type Mask */
+
+#define DIB_DDEVTYPE_MAJOR_Pos              0U                                            /*!< DIB DDEVTYPE: Major type Position */
+#define DIB_DDEVTYPE_MAJOR_Msk             (0xFUL /*<< DIB_DDEVTYPE_MAJOR_Pos*/)          /*!< DIB DDEVTYPE: Major type Mask */
+
+
+/*@} end of group CMSIS_DIB */
+
+
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup   CMSIS_core_bitfield     Core register bit field macros
+  \brief      Macros for use with bit field definitions (xxx_Pos, xxx_Msk).
+  @{
+ */
+
+/**
+  \brief   Mask and shift a bit field value for use in a register bit range.
+  \param[in] field  Name of the register bit field.
+  \param[in] value  Value of the bit field. This parameter is interpreted as an uint32_t type.
+  \return           Masked and shifted value.
+*/
+#define _VAL2FLD(field, value)    (((uint32_t)(value) << field ## _Pos) & field ## _Msk)
+
+/**
+  \brief     Mask and shift a register value to extract a bit filed value.
+  \param[in] field  Name of the register bit field.
+  \param[in] value  Value of register. This parameter is interpreted as an uint32_t type.
+  \return           Masked and shifted bit field value.
+*/
+#define _FLD2VAL(field, value)    (((uint32_t)(value) & field ## _Msk) >> field ## _Pos)
+
+/*@} end of group CMSIS_core_bitfield */
+
+
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup   CMSIS_core_base     Core Definitions
+  \brief      Definitions for base addresses, unions, and structures.
+  @{
+ */
+
+/* Memory mapping of Core Hardware */
+  #define SCS_BASE            (0xE000E000UL)                             /*!< System Control Space Base Address */
+  #define ITM_BASE            (0xE0000000UL)                             /*!< ITM Base Address */
+  #define DWT_BASE            (0xE0001000UL)                             /*!< DWT Base Address */
+  #define MEMSYSCTL_BASE      (0xE001E000UL)                             /*!< Memory System Control Base Address */
+  #define ERRBNK_BASE         (0xE001E100UL)                             /*!< Error Banking Base Address */
+  #define PWRMODCTL_BASE      (0xE001E300UL)                             /*!< Power Mode Control Base Address */
+  #define EWIC_BASE           (0xE001E400UL)                             /*!< External Wakeup Interrupt Controller Base Address */
+  #define PRCCFGINF_BASE      (0xE001E700UL)                             /*!< Processor Configuration Information Base Address */
+  #define TPI_BASE            (0xE0040000UL)                             /*!< TPI Base Address */
+  #define CoreDebug_BASE      (0xE000EDF0UL)                             /*!< \deprecated Core Debug Base Address */
+  #define DCB_BASE            (0xE000EDF0UL)                             /*!< DCB Base Address */
+  #define DIB_BASE            (0xE000EFB0UL)                             /*!< DIB Base Address */
+  #define SysTick_BASE        (SCS_BASE +  0x0010UL)                     /*!< SysTick Base Address */
+  #define NVIC_BASE           (SCS_BASE +  0x0100UL)                     /*!< NVIC Base Address */
+  #define SCB_BASE            (SCS_BASE +  0x0D00UL)                     /*!< System Control Block Base Address */
+
+  #define ICB                 ((ICB_Type       *)     SCS_BASE         ) /*!< System control Register not in SCB */
+  #define SCB                 ((SCB_Type       *)     SCB_BASE         ) /*!< SCB configuration struct */
+  #define SysTick             ((SysTick_Type   *)     SysTick_BASE     ) /*!< SysTick configuration struct */
+  #define NVIC                ((NVIC_Type      *)     NVIC_BASE        ) /*!< NVIC configuration struct */
+  #define ITM                 ((ITM_Type       *)     ITM_BASE         ) /*!< ITM configuration struct */
+  #define DWT                 ((DWT_Type       *)     DWT_BASE         ) /*!< DWT configuration struct */
+  #define TPI                 ((TPI_Type       *)     TPI_BASE         ) /*!< TPI configuration struct */
+  #define MEMSYSCTL           ((MemSysCtl_Type *)     MEMSYSCTL_BASE   ) /*!< Memory System Control configuration struct */
+  #define ERRBNK              ((ErrBnk_Type    *)     ERRBNK_BASE      ) /*!< Error Banking configuration struct */
+  #define PWRMODCTL           ((PwrModCtl_Type *)     PWRMODCTL_BASE   ) /*!< Power Mode Control configuration struct */
+  #define EWIC                ((EWIC_Type      *)     EWIC_BASE        ) /*!< EWIC configuration struct */
+  #define PRCCFGINF           ((PrcCfgInf_Type *)     PRCCFGINF_BASE   ) /*!< Processor Configuration Information configuration struct */
+  #define CoreDebug           ((CoreDebug_Type *)     CoreDebug_BASE   ) /*!< \deprecated Core Debug configuration struct */
+  #define DCB                 ((DCB_Type       *)     DCB_BASE         ) /*!< DCB configuration struct */
+  #define DIB                 ((DIB_Type       *)     DIB_BASE         ) /*!< DIB configuration struct */
+
+  #if defined (__MPU_PRESENT) && (__MPU_PRESENT == 1U)
+    #define MPU_BASE          (SCS_BASE +  0x0D90UL)                     /*!< Memory Protection Unit */
+    #define MPU               ((MPU_Type       *)     MPU_BASE         ) /*!< Memory Protection Unit */
+  #endif
+
+  #if defined (__PMU_PRESENT) && (__PMU_PRESENT == 1U)
+    #define PMU_BASE          (0xE0003000UL)                             /*!< PMU Base Address */
+    #define PMU               ((PMU_Type       *)     PMU_BASE         ) /*!< PMU configuration struct */
+  #endif
+
+  #if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+    #define SAU_BASE          (SCS_BASE +  0x0DD0UL)                     /*!< Security Attribution Unit */
+    #define SAU               ((SAU_Type       *)     SAU_BASE         ) /*!< Security Attribution Unit */
+  #endif
+
+  #define FPU_BASE            (SCS_BASE +  0x0F30UL)                     /*!< Floating Point Unit */
+  #define FPU                 ((FPU_Type       *)     FPU_BASE         ) /*!< Floating Point Unit */
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+  #define SCS_BASE_NS         (0xE002E000UL)                             /*!< System Control Space Base Address (non-secure address space) */
+  #define CoreDebug_BASE_NS   (0xE002EDF0UL)                             /*!< \deprecated Core Debug Base Address           (non-secure address space) */
+  #define DCB_BASE_NS         (0xE002EDF0UL)                             /*!< DCB Base Address                  (non-secure address space) */
+  #define DIB_BASE_NS         (0xE002EFB0UL)                             /*!< DIB Base Address                  (non-secure address space) */
+  #define SysTick_BASE_NS     (SCS_BASE_NS +  0x0010UL)                  /*!< SysTick Base Address              (non-secure address space) */
+  #define NVIC_BASE_NS        (SCS_BASE_NS +  0x0100UL)                  /*!< NVIC Base Address                 (non-secure address space) */
+  #define SCB_BASE_NS         (SCS_BASE_NS +  0x0D00UL)                  /*!< System Control Block Base Address (non-secure address space) */
+
+  #define ICB_NS              ((ICB_Type       *)     SCS_BASE_NS      ) /*!< System control Register not in SCB(non-secure address space) */
+  #define SCB_NS              ((SCB_Type       *)     SCB_BASE_NS      ) /*!< SCB configuration struct          (non-secure address space) */
+  #define SysTick_NS          ((SysTick_Type   *)     SysTick_BASE_NS  ) /*!< SysTick configuration struct      (non-secure address space) */
+  #define NVIC_NS             ((NVIC_Type      *)     NVIC_BASE_NS     ) /*!< NVIC configuration struct         (non-secure address space) */
+  #define CoreDebug_NS        ((CoreDebug_Type *)     CoreDebug_BASE_NS) /*!< \deprecated Core Debug configuration struct   (non-secure address space) */
+  #define DCB_NS              ((DCB_Type       *)     DCB_BASE_NS      ) /*!< DCB configuration struct          (non-secure address space) */
+  #define DIB_NS              ((DIB_Type       *)     DIB_BASE_NS      ) /*!< DIB configuration struct          (non-secure address space) */
+
+  #if defined (__MPU_PRESENT) && (__MPU_PRESENT == 1U)
+    #define MPU_BASE_NS       (SCS_BASE_NS +  0x0D90UL)                  /*!< Memory Protection Unit            (non-secure address space) */
+    #define MPU_NS            ((MPU_Type       *)     MPU_BASE_NS      ) /*!< Memory Protection Unit            (non-secure address space) */
+  #endif
+
+  #define FPU_BASE_NS         (SCS_BASE_NS +  0x0F30UL)                  /*!< Floating Point Unit               (non-secure address space) */
+  #define FPU_NS              ((FPU_Type       *)     FPU_BASE_NS      ) /*!< Floating Point Unit               (non-secure address space) */
+
+#endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
+/*@} */
+
+
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup   CMSIS_register_aliases     Backwards Compatibility Aliases
+  \brief      Register alias definitions for backwards compatibility.
+  @{
+ */
+
+/*@} */
+
+
+/*******************************************************************************
+ *                Hardware Abstraction Layer
+  Core Function Interface contains:
+  - Core NVIC Functions
+  - Core SysTick Functions
+  - Core Debug Functions
+  - Core Register Access Functions
+ ******************************************************************************/
+/**
+  \defgroup CMSIS_Core_FunctionInterface Functions and Instructions Reference
+*/
+
+
+
+/* ##########################   NVIC functions  #################################### */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_NVICFunctions NVIC Functions
+  \brief    Functions that manage interrupts and exceptions via the NVIC.
+  @{
+ */
+
+#ifdef CMSIS_NVIC_VIRTUAL
+  #ifndef CMSIS_NVIC_VIRTUAL_HEADER_FILE
+    #define CMSIS_NVIC_VIRTUAL_HEADER_FILE "cmsis_nvic_virtual.h"
+  #endif
+  #include CMSIS_NVIC_VIRTUAL_HEADER_FILE
+#else
+  #define NVIC_SetPriorityGrouping    __NVIC_SetPriorityGrouping
+  #define NVIC_GetPriorityGrouping    __NVIC_GetPriorityGrouping
+  #define NVIC_EnableIRQ              __NVIC_EnableIRQ
+  #define NVIC_GetEnableIRQ           __NVIC_GetEnableIRQ
+  #define NVIC_DisableIRQ             __NVIC_DisableIRQ
+  #define NVIC_GetPendingIRQ          __NVIC_GetPendingIRQ
+  #define NVIC_SetPendingIRQ          __NVIC_SetPendingIRQ
+  #define NVIC_ClearPendingIRQ        __NVIC_ClearPendingIRQ
+  #define NVIC_GetActive              __NVIC_GetActive
+  #define NVIC_SetPriority            __NVIC_SetPriority
+  #define NVIC_GetPriority            __NVIC_GetPriority
+  #define NVIC_SystemReset            __NVIC_SystemReset
+#endif /* CMSIS_NVIC_VIRTUAL */
+
+#ifdef CMSIS_VECTAB_VIRTUAL
+  #ifndef CMSIS_VECTAB_VIRTUAL_HEADER_FILE
+    #define CMSIS_VECTAB_VIRTUAL_HEADER_FILE "cmsis_vectab_virtual.h"
+  #endif
+  #include CMSIS_VECTAB_VIRTUAL_HEADER_FILE
+#else
+  #define NVIC_SetVector              __NVIC_SetVector
+  #define NVIC_GetVector              __NVIC_GetVector
+#endif  /* (CMSIS_VECTAB_VIRTUAL) */
+
+#define NVIC_USER_IRQ_OFFSET          16
+
+
+/* Special LR values for Secure/Non-Secure call handling and exception handling                                               */
+
+/* Function Return Payload (from ARMv8-M Architecture Reference Manual) LR value on entry from Secure BLXNS                   */
+#define FNC_RETURN                 (0xFEFFFFFFUL)     /* bit [0] ignored when processing a branch                             */
+
+/* The following EXC_RETURN mask values are used to evaluate the LR on exception entry */
+#define EXC_RETURN_PREFIX          (0xFF000000UL)     /* bits [31:24] set to indicate an EXC_RETURN value                     */
+#define EXC_RETURN_S               (0x00000040UL)     /* bit [6] stack used to push registers: 0=Non-secure 1=Secure          */
+#define EXC_RETURN_DCRS            (0x00000020UL)     /* bit [5] stacking rules for called registers: 0=skipped 1=saved       */
+#define EXC_RETURN_FTYPE           (0x00000010UL)     /* bit [4] allocate stack for floating-point context: 0=done 1=skipped  */
+#define EXC_RETURN_MODE            (0x00000008UL)     /* bit [3] processor mode for return: 0=Handler mode 1=Thread mode      */
+#define EXC_RETURN_SPSEL           (0x00000004UL)     /* bit [2] stack pointer used to restore context: 0=MSP 1=PSP           */
+#define EXC_RETURN_ES              (0x00000001UL)     /* bit [0] security state exception was taken to: 0=Non-secure 1=Secure */
+
+/* Integrity Signature (from ARMv8-M Architecture Reference Manual) for exception context stacking                            */
+#if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)  /* Value for processors with floating-point extension:                  */
+#define EXC_INTEGRITY_SIGNATURE     (0xFEFA125AUL)     /* bit [0] SFTC must match LR bit[4] EXC_RETURN_FTYPE                   */
+#else
+#define EXC_INTEGRITY_SIGNATURE     (0xFEFA125BUL)     /* Value for processors without floating-point extension                */
+#endif
+
+
+/**
+  \brief   Set Priority Grouping
+  \details Sets the priority grouping field using the required unlock sequence.
+           The parameter PriorityGroup is assigned to the field SCB->AIRCR [10:8] PRIGROUP field.
+           Only values from 0..7 are used.
+           In case of a conflict between priority grouping and available
+           priority bits (__NVIC_PRIO_BITS), the smallest possible priority group is set.
+  \param [in]      PriorityGroup  Priority grouping field.
+ */
+__STATIC_INLINE void __NVIC_SetPriorityGrouping(uint32_t PriorityGroup)
+{
+  uint32_t reg_value;
+  uint32_t PriorityGroupTmp = (PriorityGroup & (uint32_t)0x07UL);             /* only values 0..7 are used          */
+
+  reg_value  =  SCB->AIRCR;                                                   /* read old register configuration    */
+  reg_value &= ~((uint32_t)(SCB_AIRCR_VECTKEY_Msk | SCB_AIRCR_PRIGROUP_Msk)); /* clear bits to change               */
+  reg_value  =  (reg_value                                   |
+                ((uint32_t)0x5FAUL << SCB_AIRCR_VECTKEY_Pos) |
+                (PriorityGroupTmp << SCB_AIRCR_PRIGROUP_Pos)  );              /* Insert write key and priority group */
+  SCB->AIRCR =  reg_value;
+}
+
+
+/**
+  \brief   Get Priority Grouping
+  \details Reads the priority grouping field from the NVIC Interrupt Controller.
+  \return                Priority grouping field (SCB->AIRCR [10:8] PRIGROUP field).
+ */
+__STATIC_INLINE uint32_t __NVIC_GetPriorityGrouping(void)
+{
+  return ((uint32_t)((SCB->AIRCR & SCB_AIRCR_PRIGROUP_Msk) >> SCB_AIRCR_PRIGROUP_Pos));
+}
+
+
+/**
+  \brief   Enable Interrupt
+  \details Enables a device specific interrupt in the NVIC interrupt controller.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void __NVIC_EnableIRQ(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    __COMPILER_BARRIER();
+    NVIC->ISER[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+    __COMPILER_BARRIER();
+  }
+}
+
+
+/**
+  \brief   Get Interrupt Enable status
+  \details Returns a device specific interrupt enable status from the NVIC interrupt controller.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  Interrupt is not enabled.
+  \return             1  Interrupt is enabled.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t __NVIC_GetEnableIRQ(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return((uint32_t)(((NVIC->ISER[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+/**
+  \brief   Disable Interrupt
+  \details Disables a device specific interrupt in the NVIC interrupt controller.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void __NVIC_DisableIRQ(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC->ICER[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+    __DSB();
+    __ISB();
+  }
+}
+
+
+/**
+  \brief   Get Pending Interrupt
+  \details Reads the NVIC pending register and returns the pending bit for the specified device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  Interrupt status is not pending.
+  \return             1  Interrupt status is pending.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t __NVIC_GetPendingIRQ(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return((uint32_t)(((NVIC->ISPR[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+/**
+  \brief   Set Pending Interrupt
+  \details Sets the pending bit of a device specific interrupt in the NVIC pending register.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void __NVIC_SetPendingIRQ(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC->ISPR[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+  }
+}
+
+
+/**
+  \brief   Clear Pending Interrupt
+  \details Clears the pending bit of a device specific interrupt in the NVIC pending register.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void __NVIC_ClearPendingIRQ(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC->ICPR[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+  }
+}
+
+
+/**
+  \brief   Get Active Interrupt
+  \details Reads the active register in the NVIC and returns the active bit for the device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  Interrupt status is not active.
+  \return             1  Interrupt status is active.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t __NVIC_GetActive(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return((uint32_t)(((NVIC->IABR[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+/**
+  \brief   Get Interrupt Target State
+  \details Reads the interrupt target field in the NVIC and returns the interrupt target bit for the device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  if interrupt is assigned to Secure
+  \return             1  if interrupt is assigned to Non Secure
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t NVIC_GetTargetState(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return((uint32_t)(((NVIC->ITNS[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+/**
+  \brief   Set Interrupt Target State
+  \details Sets the interrupt target field in the NVIC and returns the interrupt target bit for the device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  if interrupt is assigned to Secure
+                      1  if interrupt is assigned to Non Secure
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t NVIC_SetTargetState(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC->ITNS[(((uint32_t)IRQn) >> 5UL)] |=  ((uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL)));
+    return((uint32_t)(((NVIC->ITNS[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+/**
+  \brief   Clear Interrupt Target State
+  \details Clears the interrupt target field in the NVIC and returns the interrupt target bit for the device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  if interrupt is assigned to Secure
+                      1  if interrupt is assigned to Non Secure
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t NVIC_ClearTargetState(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC->ITNS[(((uint32_t)IRQn) >> 5UL)] &= ~((uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL)));
+    return((uint32_t)(((NVIC->ITNS[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+#endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
+
+
+/**
+  \brief   Set Interrupt Priority
+  \details Sets the priority of a device specific interrupt or a processor exception.
+           The interrupt number can be positive to specify a device specific interrupt,
+           or negative to specify a processor exception.
+  \param [in]      IRQn  Interrupt number.
+  \param [in]  priority  Priority to set.
+  \note    The priority cannot be set for every processor exception.
+ */
+__STATIC_INLINE void __NVIC_SetPriority(IRQn_Type IRQn, uint32_t priority)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC->IPR[((uint32_t)IRQn)]               = (uint8_t)((priority << (8U - __NVIC_PRIO_BITS)) & (uint32_t)0xFFUL);
+  }
+  else
+  {
+    SCB->SHPR[(((uint32_t)IRQn) & 0xFUL)-4UL] = (uint8_t)((priority << (8U - __NVIC_PRIO_BITS)) & (uint32_t)0xFFUL);
+  }
+}
+
+
+/**
+  \brief   Get Interrupt Priority
+  \details Reads the priority of a device specific interrupt or a processor exception.
+           The interrupt number can be positive to specify a device specific interrupt,
+           or negative to specify a processor exception.
+  \param [in]   IRQn  Interrupt number.
+  \return             Interrupt Priority.
+                      Value is aligned automatically to the implemented priority bits of the microcontroller.
+ */
+__STATIC_INLINE uint32_t __NVIC_GetPriority(IRQn_Type IRQn)
+{
+
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return(((uint32_t)NVIC->IPR[((uint32_t)IRQn)]               >> (8U - __NVIC_PRIO_BITS)));
+  }
+  else
+  {
+    return(((uint32_t)SCB->SHPR[(((uint32_t)IRQn) & 0xFUL)-4UL] >> (8U - __NVIC_PRIO_BITS)));
+  }
+}
+
+
+/**
+  \brief   Encode Priority
+  \details Encodes the priority for an interrupt with the given priority group,
+           preemptive priority value, and subpriority value.
+           In case of a conflict between priority grouping and available
+           priority bits (__NVIC_PRIO_BITS), the smallest possible priority group is set.
+  \param [in]     PriorityGroup  Used priority group.
+  \param [in]   PreemptPriority  Preemptive priority value (starting from 0).
+  \param [in]       SubPriority  Subpriority value (starting from 0).
+  \return                        Encoded priority. Value can be used in the function \ref NVIC_SetPriority().
+ */
+__STATIC_INLINE uint32_t NVIC_EncodePriority (uint32_t PriorityGroup, uint32_t PreemptPriority, uint32_t SubPriority)
+{
+  uint32_t PriorityGroupTmp = (PriorityGroup & (uint32_t)0x07UL);   /* only values 0..7 are used          */
+  uint32_t PreemptPriorityBits;
+  uint32_t SubPriorityBits;
+
+  PreemptPriorityBits = ((7UL - PriorityGroupTmp) > (uint32_t)(__NVIC_PRIO_BITS)) ? (uint32_t)(__NVIC_PRIO_BITS) : (uint32_t)(7UL - PriorityGroupTmp);
+  SubPriorityBits     = ((PriorityGroupTmp + (uint32_t)(__NVIC_PRIO_BITS)) < (uint32_t)7UL) ? (uint32_t)0UL : (uint32_t)((PriorityGroupTmp - 7UL) + (uint32_t)(__NVIC_PRIO_BITS));
+
+  return (
+           ((PreemptPriority & (uint32_t)((1UL << (PreemptPriorityBits)) - 1UL)) << SubPriorityBits) |
+           ((SubPriority     & (uint32_t)((1UL << (SubPriorityBits    )) - 1UL)))
+         );
+}
+
+
+/**
+  \brief   Decode Priority
+  \details Decodes an interrupt priority value with a given priority group to
+           preemptive priority value and subpriority value.
+           In case of a conflict between priority grouping and available
+           priority bits (__NVIC_PRIO_BITS) the smallest possible priority group is set.
+  \param [in]         Priority   Priority value, which can be retrieved with the function \ref NVIC_GetPriority().
+  \param [in]     PriorityGroup  Used priority group.
+  \param [out] pPreemptPriority  Preemptive priority value (starting from 0).
+  \param [out]     pSubPriority  Subpriority value (starting from 0).
+ */
+__STATIC_INLINE void NVIC_DecodePriority (uint32_t Priority, uint32_t PriorityGroup, uint32_t* const pPreemptPriority, uint32_t* const pSubPriority)
+{
+  uint32_t PriorityGroupTmp = (PriorityGroup & (uint32_t)0x07UL);   /* only values 0..7 are used          */
+  uint32_t PreemptPriorityBits;
+  uint32_t SubPriorityBits;
+
+  PreemptPriorityBits = ((7UL - PriorityGroupTmp) > (uint32_t)(__NVIC_PRIO_BITS)) ? (uint32_t)(__NVIC_PRIO_BITS) : (uint32_t)(7UL - PriorityGroupTmp);
+  SubPriorityBits     = ((PriorityGroupTmp + (uint32_t)(__NVIC_PRIO_BITS)) < (uint32_t)7UL) ? (uint32_t)0UL : (uint32_t)((PriorityGroupTmp - 7UL) + (uint32_t)(__NVIC_PRIO_BITS));
+
+  *pPreemptPriority = (Priority >> SubPriorityBits) & (uint32_t)((1UL << (PreemptPriorityBits)) - 1UL);
+  *pSubPriority     = (Priority                   ) & (uint32_t)((1UL << (SubPriorityBits    )) - 1UL);
+}
+
+
+/**
+  \brief   Set Interrupt Vector
+  \details Sets an interrupt vector in SRAM based interrupt vector table.
+           The interrupt number can be positive to specify a device specific interrupt,
+           or negative to specify a processor exception.
+           VTOR must been relocated to SRAM before.
+  \param [in]   IRQn      Interrupt number
+  \param [in]   vector    Address of interrupt handler function
+ */
+__STATIC_INLINE void __NVIC_SetVector(IRQn_Type IRQn, uint32_t vector)
+{
+  uint32_t *vectors = (uint32_t *)SCB->VTOR;
+  vectors[(int32_t)IRQn + NVIC_USER_IRQ_OFFSET] = vector;
+  __DSB();
+}
+
+
+/**
+  \brief   Get Interrupt Vector
+  \details Reads an interrupt vector from interrupt vector table.
+           The interrupt number can be positive to specify a device specific interrupt,
+           or negative to specify a processor exception.
+  \param [in]   IRQn      Interrupt number.
+  \return                 Address of interrupt handler function
+ */
+__STATIC_INLINE uint32_t __NVIC_GetVector(IRQn_Type IRQn)
+{
+  uint32_t *vectors = (uint32_t *)SCB->VTOR;
+  return vectors[(int32_t)IRQn + NVIC_USER_IRQ_OFFSET];
+}
+
+
+/**
+  \brief   System Reset
+  \details Initiates a system reset request to reset the MCU.
+ */
+__NO_RETURN __STATIC_INLINE void __NVIC_SystemReset(void)
+{
+  __DSB();                                                          /* Ensure all outstanding memory accesses included
+                                                                       buffered write are completed before reset */
+  SCB->AIRCR  = (uint32_t)((0x5FAUL << SCB_AIRCR_VECTKEY_Pos)    |
+                           (SCB->AIRCR & SCB_AIRCR_PRIGROUP_Msk) |
+                            SCB_AIRCR_SYSRESETREQ_Msk    );         /* Keep priority group unchanged */
+  __DSB();                                                          /* Ensure completion of memory access */
+
+  for(;;)                                                           /* wait until reset */
+  {
+    __NOP();
+  }
+}
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+/**
+  \brief   Set Priority Grouping (non-secure)
+  \details Sets the non-secure priority grouping field when in secure state using the required unlock sequence.
+           The parameter PriorityGroup is assigned to the field SCB->AIRCR [10:8] PRIGROUP field.
+           Only values from 0..7 are used.
+           In case of a conflict between priority grouping and available
+           priority bits (__NVIC_PRIO_BITS), the smallest possible priority group is set.
+  \param [in]      PriorityGroup  Priority grouping field.
+ */
+__STATIC_INLINE void TZ_NVIC_SetPriorityGrouping_NS(uint32_t PriorityGroup)
+{
+  uint32_t reg_value;
+  uint32_t PriorityGroupTmp = (PriorityGroup & (uint32_t)0x07UL);             /* only values 0..7 are used          */
+
+  reg_value  =  SCB_NS->AIRCR;                                                   /* read old register configuration    */
+  reg_value &= ~((uint32_t)(SCB_AIRCR_VECTKEY_Msk | SCB_AIRCR_PRIGROUP_Msk));             /* clear bits to change               */
+  reg_value  =  (reg_value                                   |
+                ((uint32_t)0x5FAUL << SCB_AIRCR_VECTKEY_Pos) |
+                (PriorityGroupTmp << SCB_AIRCR_PRIGROUP_Pos)                      );              /* Insert write key and priority group */
+  SCB_NS->AIRCR =  reg_value;
+}
+
+
+/**
+  \brief   Get Priority Grouping (non-secure)
+  \details Reads the priority grouping field from the non-secure NVIC when in secure state.
+  \return                Priority grouping field (SCB->AIRCR [10:8] PRIGROUP field).
+ */
+__STATIC_INLINE uint32_t TZ_NVIC_GetPriorityGrouping_NS(void)
+{
+  return ((uint32_t)((SCB_NS->AIRCR & SCB_AIRCR_PRIGROUP_Msk) >> SCB_AIRCR_PRIGROUP_Pos));
+}
+
+
+/**
+  \brief   Enable Interrupt (non-secure)
+  \details Enables a device specific interrupt in the non-secure NVIC interrupt controller when in secure state.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void TZ_NVIC_EnableIRQ_NS(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC_NS->ISER[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+  }
+}
+
+
+/**
+  \brief   Get Interrupt Enable status (non-secure)
+  \details Returns a device specific interrupt enable status from the non-secure NVIC interrupt controller when in secure state.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  Interrupt is not enabled.
+  \return             1  Interrupt is enabled.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t TZ_NVIC_GetEnableIRQ_NS(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return((uint32_t)(((NVIC_NS->ISER[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+/**
+  \brief   Disable Interrupt (non-secure)
+  \details Disables a device specific interrupt in the non-secure NVIC interrupt controller when in secure state.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void TZ_NVIC_DisableIRQ_NS(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC_NS->ICER[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+  }
+}
+
+
+/**
+  \brief   Get Pending Interrupt (non-secure)
+  \details Reads the NVIC pending register in the non-secure NVIC when in secure state and returns the pending bit for the specified device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  Interrupt status is not pending.
+  \return             1  Interrupt status is pending.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t TZ_NVIC_GetPendingIRQ_NS(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return((uint32_t)(((NVIC_NS->ISPR[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+/**
+  \brief   Set Pending Interrupt (non-secure)
+  \details Sets the pending bit of a device specific interrupt in the non-secure NVIC pending register when in secure state.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void TZ_NVIC_SetPendingIRQ_NS(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC_NS->ISPR[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+  }
+}
+
+
+/**
+  \brief   Clear Pending Interrupt (non-secure)
+  \details Clears the pending bit of a device specific interrupt in the non-secure NVIC pending register when in secure state.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void TZ_NVIC_ClearPendingIRQ_NS(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC_NS->ICPR[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+  }
+}
+
+
+/**
+  \brief   Get Active Interrupt (non-secure)
+  \details Reads the active register in non-secure NVIC when in secure state and returns the active bit for the device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  Interrupt status is not active.
+  \return             1  Interrupt status is active.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t TZ_NVIC_GetActive_NS(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return((uint32_t)(((NVIC_NS->IABR[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+/**
+  \brief   Set Interrupt Priority (non-secure)
+  \details Sets the priority of a non-secure device specific interrupt or a non-secure processor exception when in secure state.
+           The interrupt number can be positive to specify a device specific interrupt,
+           or negative to specify a processor exception.
+  \param [in]      IRQn  Interrupt number.
+  \param [in]  priority  Priority to set.
+  \note    The priority cannot be set for every non-secure processor exception.
+ */
+__STATIC_INLINE void TZ_NVIC_SetPriority_NS(IRQn_Type IRQn, uint32_t priority)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC_NS->IPR[((uint32_t)IRQn)]               = (uint8_t)((priority << (8U - __NVIC_PRIO_BITS)) & (uint32_t)0xFFUL);
+  }
+  else
+  {
+    SCB_NS->SHPR[(((uint32_t)IRQn) & 0xFUL)-4UL] = (uint8_t)((priority << (8U - __NVIC_PRIO_BITS)) & (uint32_t)0xFFUL);
+  }
+}
+
+
+/**
+  \brief   Get Interrupt Priority (non-secure)
+  \details Reads the priority of a non-secure device specific interrupt or a non-secure processor exception when in secure state.
+           The interrupt number can be positive to specify a device specific interrupt,
+           or negative to specify a processor exception.
+  \param [in]   IRQn  Interrupt number.
+  \return             Interrupt Priority. Value is aligned automatically to the implemented priority bits of the microcontroller.
+ */
+__STATIC_INLINE uint32_t TZ_NVIC_GetPriority_NS(IRQn_Type IRQn)
+{
+
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return(((uint32_t)NVIC_NS->IPR[((uint32_t)IRQn)]               >> (8U - __NVIC_PRIO_BITS)));
+  }
+  else
+  {
+    return(((uint32_t)SCB_NS->SHPR[(((uint32_t)IRQn) & 0xFUL)-4UL] >> (8U - __NVIC_PRIO_BITS)));
+  }
+}
+#endif /*  defined (__ARM_FEATURE_CMSE) &&(__ARM_FEATURE_CMSE == 3U) */
+
+/*@} end of CMSIS_Core_NVICFunctions */
+
+/* ##########################  MPU functions  #################################### */
+
+#if defined (__MPU_PRESENT) && (__MPU_PRESENT == 1U)
+
+#include "mpu_armv8.h"
+
+#endif
+
+/* ##########################  PMU functions and events  #################################### */
+
+#if defined (__PMU_PRESENT) && (__PMU_PRESENT == 1U)
+
+#include "pmu_armv8.h"
+
+/**
+  \brief   Cortex-M85 PMU events
+  \note    Architectural PMU events can be found in pmu_armv8.h
+*/
+
+#define ARMCM85_PMU_ECC_ERR                          0xC000             /*!< One or more Error Correcting Code (ECC) errors detected */
+#define ARMCM85_PMU_ECC_ERR_MBIT                     0xC001             /*!< One or more multi-bit ECC errors detected */
+#define ARMCM85_PMU_ECC_ERR_DCACHE                   0xC010             /*!< One or more ECC errors in the data cache */
+#define ARMCM85_PMU_ECC_ERR_ICACHE                   0xC011             /*!< One or more ECC errors in the instruction cache */
+#define ARMCM85_PMU_ECC_ERR_MBIT_DCACHE              0xC012             /*!< One or more multi-bit ECC errors in the data cache */
+#define ARMCM85_PMU_ECC_ERR_MBIT_ICACHE              0xC013             /*!< One or more multi-bit ECC errors in the instruction cache */
+#define ARMCM85_PMU_ECC_ERR_DTCM                     0xC020             /*!< One or more ECC errors in the Data Tightly Coupled Memory (DTCM) */
+#define ARMCM85_PMU_ECC_ERR_ITCM                     0xC021             /*!< One or more ECC errors in the Instruction Tightly Coupled Memory (ITCM) */
+#define ARMCM85_PMU_ECC_ERR_MBIT_DTCM                0xC022             /*!< One or more multi-bit ECC errors in the DTCM */
+#define ARMCM85_PMU_ECC_ERR_MBIT_ITCM                0xC023             /*!< One or more multi-bit ECC errors in the ITCM */
+#define ARMCM85_PMU_PF_LINEFILL                      0xC100             /*!< The prefetcher starts a line-fill */
+#define ARMCM85_PMU_PF_CANCEL                        0xC101             /*!< The prefetcher stops prefetching */
+#define ARMCM85_PMU_PF_DROP_LINEFILL                 0xC102             /*!< A linefill triggered by a prefetcher has been dropped because of lack of buffering */
+#define ARMCM85_PMU_NWAMODE_ENTER                    0xC200             /*!< No write-allocate mode entry */
+#define ARMCM85_PMU_NWAMODE                          0xC201             /*!< Write-allocate store is not allocated into the data cache due to no-write-allocate mode */
+#define ARMCM85_PMU_SAHB_ACCESS                      0xC300             /*!< Read or write access on the S-AHB interface to the TCM */
+#define ARMCM85_PMU_PAHB_ACCESS                      0xC301             /*!< Read or write access on the P-AHB write interface */
+#define ARMCM85_PMU_AXI_WRITE_ACCESS                 0xC302             /*!< Any beat access to M-AXI write interface */
+#define ARMCM85_PMU_AXI_READ_ACCESS                  0xC303             /*!< Any beat access to M-AXI read interface */
+#define ARMCM85_PMU_DOSTIMEOUT_DOUBLE                0xC400             /*!< Denial of Service timeout has fired twice and caused buffers to drain to allow forward progress */
+#define ARMCM85_PMU_DOSTIMEOUT_TRIPLE                0xC401             /*!< Denial of Service timeout has fired three times and blocked the LSU to force forward progress */
+
+#endif
+
+/* ##########################  FPU functions  #################################### */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_FpuFunctions FPU Functions
+  \brief    Function that provides FPU type.
+  @{
+ */
+
+/**
+  \brief   get FPU type
+  \details returns the FPU type
+  \returns
+   - \b  0: No FPU
+   - \b  1: Single precision FPU
+   - \b  2: Double + Single precision FPU
+ */
+__STATIC_INLINE uint32_t SCB_GetFPUType(void)
+{
+  uint32_t mvfr0;
+
+  mvfr0 = FPU->MVFR0;
+  if      ((mvfr0 & (FPU_MVFR0_FPSP_Msk | FPU_MVFR0_FPDP_Msk)) == 0x220U)
+  {
+    return 2U;           /* Double + Single precision FPU */
+  }
+  else if ((mvfr0 & (FPU_MVFR0_FPSP_Msk | FPU_MVFR0_FPDP_Msk)) == 0x020U)
+  {
+    return 1U;           /* Single precision FPU */
+  }
+  else
+  {
+    return 0U;           /* No FPU */
+  }
+}
+
+
+/*@} end of CMSIS_Core_FpuFunctions */
+
+/* ##########################  MVE functions  #################################### */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_MveFunctions MVE Functions
+  \brief    Function that provides MVE type.
+  @{
+ */
+
+/**
+  \brief   get MVE type
+  \details returns the MVE type
+  \returns
+   - \b  0: No Vector Extension (MVE)
+   - \b  1: Integer Vector Extension (MVE-I)
+   - \b  2: Floating-point Vector Extension (MVE-F)
+ */
+__STATIC_INLINE uint32_t SCB_GetMVEType(void)
+{
+  const uint32_t mvfr1 = FPU->MVFR1;
+  if      ((mvfr1 & FPU_MVFR1_MVE_Msk) == (0x2U << FPU_MVFR1_MVE_Pos))
+  {
+    return 2U;
+  }
+  else if ((mvfr1 & FPU_MVFR1_MVE_Msk) == (0x1U << FPU_MVFR1_MVE_Pos))
+  {
+    return 1U;
+  }
+  else
+  {
+    return 0U;
+  }
+}
+
+
+/*@} end of CMSIS_Core_MveFunctions */
+
+
+/* ##########################  Cache functions  #################################### */
+
+#if ((defined (__ICACHE_PRESENT) && (__ICACHE_PRESENT == 1U)) || \
+     (defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)))
+#include "cachel1_armv7.h"
+#endif
+
+
+/* ##########################   SAU functions  #################################### */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_SAUFunctions SAU Functions
+  \brief    Functions that configure the SAU.
+  @{
+ */
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+
+/**
+  \brief   Enable SAU
+  \details Enables the Security Attribution Unit (SAU).
+ */
+__STATIC_INLINE void TZ_SAU_Enable(void)
+{
+    SAU->CTRL |=  (SAU_CTRL_ENABLE_Msk);
+}
+
+
+
+/**
+  \brief   Disable SAU
+  \details Disables the Security Attribution Unit (SAU).
+ */
+__STATIC_INLINE void TZ_SAU_Disable(void)
+{
+    SAU->CTRL &= ~(SAU_CTRL_ENABLE_Msk);
+}
+
+#endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
+
+/*@} end of CMSIS_Core_SAUFunctions */
+
+
+
+/* ###################  PAC Key functions  ########################### */
+
+#if (defined (__ARM_FEATURE_PAUTH) && (__ARM_FEATURE_PAUTH == 1))
+#include "pac_armv81.h"
+#endif
+
+
+/* ##################################    Debug Control function  ############################################ */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_DCBFunctions Debug Control Functions
+  \brief    Functions that access the Debug Control Block.
+  @{
+ */
+
+
+/**
+  \brief   Set Debug Authentication Control Register
+  \details writes to Debug Authentication Control register.
+  \param [in]  value  value to be writen.
+ */
+__STATIC_INLINE void DCB_SetAuthCtrl(uint32_t value)
+{
+    __DSB();
+    __ISB();
+    DCB->DAUTHCTRL = value;
+    __DSB();
+    __ISB();
+}
+
+
+/**
+  \brief   Get Debug Authentication Control Register
+  \details Reads Debug Authentication Control register.
+  \return             Debug Authentication Control Register.
+ */
+__STATIC_INLINE uint32_t DCB_GetAuthCtrl(void)
+{
+    return (DCB->DAUTHCTRL);
+}
+
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+/**
+  \brief   Set Debug Authentication Control Register (non-secure)
+  \details writes to non-secure Debug Authentication Control register when in secure state.
+  \param [in]  value  value to be writen
+ */
+__STATIC_INLINE void TZ_DCB_SetAuthCtrl_NS(uint32_t value)
+{
+    __DSB();
+    __ISB();
+    DCB_NS->DAUTHCTRL = value;
+    __DSB();
+    __ISB();
+}
+
+
+/**
+  \brief   Get Debug Authentication Control Register (non-secure)
+  \details Reads non-secure Debug Authentication Control register when in secure state.
+  \return             Debug Authentication Control Register.
+ */
+__STATIC_INLINE uint32_t TZ_DCB_GetAuthCtrl_NS(void)
+{
+    return (DCB_NS->DAUTHCTRL);
+}
+#endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
+
+/*@} end of CMSIS_Core_DCBFunctions */
+
+
+
+
+/* ##################################    Debug Identification function  ############################################ */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_DIBFunctions Debug Identification Functions
+  \brief    Functions that access the Debug Identification Block.
+  @{
+ */
+
+
+/**
+  \brief   Get Debug Authentication Status Register
+  \details Reads Debug Authentication Status register.
+  \return             Debug Authentication Status Register.
+ */
+__STATIC_INLINE uint32_t DIB_GetAuthStatus(void)
+{
+    return (DIB->DAUTHSTATUS);
+}
+
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+/**
+  \brief   Get Debug Authentication Status Register (non-secure)
+  \details Reads non-secure Debug Authentication Status register when in secure state.
+  \return             Debug Authentication Status Register.
+ */
+__STATIC_INLINE uint32_t TZ_DIB_GetAuthStatus_NS(void)
+{
+    return (DIB_NS->DAUTHSTATUS);
+}
+#endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
+
+/*@} end of CMSIS_Core_DCBFunctions */
+
+
+
+
+/* ##################################    SysTick function  ############################################ */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_SysTickFunctions SysTick Functions
+  \brief    Functions that configure the System.
+  @{
+ */
+
+#if defined (__Vendor_SysTickConfig) && (__Vendor_SysTickConfig == 0U)
+
+/**
+  \brief   System Tick Configuration
+  \details Initializes the System Timer and its interrupt, and starts the System Tick Timer.
+           Counter is in free running mode to generate periodic interrupts.
+  \param [in]  ticks  Number of ticks between two interrupts.
+  \return          0  Function succeeded.
+  \return          1  Function failed.
+  \note    When the variable <b>__Vendor_SysTickConfig</b> is set to 1, then the
+           function <b>SysTick_Config</b> is not included. In this case, the file <b><i>device</i>.h</b>
+           must contain a vendor-specific implementation of this function.
+ */
+__STATIC_INLINE uint32_t SysTick_Config(uint32_t ticks)
+{
+  if ((ticks - 1UL) > SysTick_LOAD_RELOAD_Msk)
+  {
+    return (1UL);                                                   /* Reload value impossible */
+  }
+
+  SysTick->LOAD  = (uint32_t)(ticks - 1UL);                         /* set reload register */
+  NVIC_SetPriority (SysTick_IRQn, (1UL << __NVIC_PRIO_BITS) - 1UL); /* set Priority for Systick Interrupt */
+  SysTick->VAL   = 0UL;                                             /* Load the SysTick Counter Value */
+  SysTick->CTRL  = SysTick_CTRL_CLKSOURCE_Msk |
+                   SysTick_CTRL_TICKINT_Msk   |
+                   SysTick_CTRL_ENABLE_Msk;                         /* Enable SysTick IRQ and SysTick Timer */
+  return (0UL);                                                     /* Function successful */
+}
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+/**
+  \brief   System Tick Configuration (non-secure)
+  \details Initializes the non-secure System Timer and its interrupt when in secure state, and starts the System Tick Timer.
+           Counter is in free running mode to generate periodic interrupts.
+  \param [in]  ticks  Number of ticks between two interrupts.
+  \return          0  Function succeeded.
+  \return          1  Function failed.
+  \note    When the variable <b>__Vendor_SysTickConfig</b> is set to 1, then the
+           function <b>TZ_SysTick_Config_NS</b> is not included. In this case, the file <b><i>device</i>.h</b>
+           must contain a vendor-specific implementation of this function.
+
+ */
+__STATIC_INLINE uint32_t TZ_SysTick_Config_NS(uint32_t ticks)
+{
+  if ((ticks - 1UL) > SysTick_LOAD_RELOAD_Msk)
+  {
+    return (1UL);                                                         /* Reload value impossible */
+  }
+
+  SysTick_NS->LOAD  = (uint32_t)(ticks - 1UL);                            /* set reload register */
+  TZ_NVIC_SetPriority_NS (SysTick_IRQn, (1UL << __NVIC_PRIO_BITS) - 1UL); /* set Priority for Systick Interrupt */
+  SysTick_NS->VAL   = 0UL;                                                /* Load the SysTick Counter Value */
+  SysTick_NS->CTRL  = SysTick_CTRL_CLKSOURCE_Msk |
+                      SysTick_CTRL_TICKINT_Msk   |
+                      SysTick_CTRL_ENABLE_Msk;                            /* Enable SysTick IRQ and SysTick Timer */
+  return (0UL);                                                           /* Function successful */
+}
+#endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
+
+#endif
+
+/*@} end of CMSIS_Core_SysTickFunctions */
+
+
+
+/* ##################################### Debug In/Output function ########################################### */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_core_DebugFunctions ITM Functions
+  \brief    Functions that access the ITM debug interface.
+  @{
+ */
+
+extern volatile int32_t ITM_RxBuffer;                              /*!< External variable to receive characters. */
+#define                 ITM_RXBUFFER_EMPTY  ((int32_t)0x5AA55AA5U) /*!< Value identifying \ref ITM_RxBuffer is ready for next character. */
+
+
+/**
+  \brief   ITM Send Character
+  \details Transmits a character via the ITM channel 0, and
+           \li Just returns when no debugger is connected that has booked the output.
+           \li Is blocking when a debugger is connected, but the previous character sent has not been transmitted.
+  \param [in]     ch  Character to transmit.
+  \returns            Character to transmit.
+ */
+__STATIC_INLINE uint32_t ITM_SendChar (uint32_t ch)
+{
+  if (((ITM->TCR & ITM_TCR_ITMENA_Msk) != 0UL) &&      /* ITM enabled */
+      ((ITM->TER & 1UL               ) != 0UL)   )     /* ITM Port #0 enabled */
+  {
+    while (ITM->PORT[0U].u32 == 0UL)
+    {
+      __NOP();
+    }
+    ITM->PORT[0U].u8 = (uint8_t)ch;
+  }
+  return (ch);
+}
+
+
+/**
+  \brief   ITM Receive Character
+  \details Inputs a character via the external variable \ref ITM_RxBuffer.
+  \return             Received character.
+  \return         -1  No character pending.
+ */
+__STATIC_INLINE int32_t ITM_ReceiveChar (void)
+{
+  int32_t ch = -1;                           /* no character available */
+
+  if (ITM_RxBuffer != ITM_RXBUFFER_EMPTY)
+  {
+    ch = ITM_RxBuffer;
+    ITM_RxBuffer = ITM_RXBUFFER_EMPTY;       /* ready for next character */
+  }
+
+  return (ch);
+}
+
+
+/**
+  \brief   ITM Check Character
+  \details Checks whether a character is pending for reading in the variable \ref ITM_RxBuffer.
+  \return          0  No character available.
+  \return          1  Character available.
+ */
+__STATIC_INLINE int32_t ITM_CheckChar (void)
+{
+
+  if (ITM_RxBuffer == ITM_RXBUFFER_EMPTY)
+  {
+    return (0);                              /* no character available */
+  }
+  else
+  {
+    return (1);                              /*    character available */
+  }
+}
+
+/*@} end of CMSIS_core_DebugFunctions */
+
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __CORE_CM85_H_DEPENDANT */
+
+#endif /* __CMSIS_GENERIC */
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/core_sc300.h b/edge-impulse-sdk/CMSIS/Core/Include/core_sc300.h
index 03a02cc..f6c3bfd 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/core_sc300.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/core_sc300.h
@@ -1,11 +1,11 @@
 /**************************************************************************//**
  * @file     core_sc300.h
  * @brief    CMSIS SC300 Core Peripheral Access Layer Header File
- * @version  V5.0.9
- * @date     27. March 2020
+ * @version  V5.0.10
+ * @date     04. June 2021
  ******************************************************************************/
 /*
- * Copyright (c) 2009-2020 Arm Limited. All rights reserved.
+ * Copyright (c) 2009-2021 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -562,19 +562,19 @@ typedef struct
 #define SCB_CFSR_MEMFAULTSR_Msk            (0xFFUL /*<< SCB_CFSR_MEMFAULTSR_Pos*/)        /*!< SCB CFSR: Memory Manage Fault Status Register Mask */
 
 /* MemManage Fault Status Register (part of SCB Configurable Fault Status Register) */
-#define SCB_CFSR_MMARVALID_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 7U)               /*!< SCB CFSR (MMFSR): MMARVALID Position */
+#define SCB_CFSR_MMARVALID_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 7U)                 /*!< SCB CFSR (MMFSR): MMARVALID Position */
 #define SCB_CFSR_MMARVALID_Msk             (1UL << SCB_CFSR_MMARVALID_Pos)                /*!< SCB CFSR (MMFSR): MMARVALID Mask */
 
-#define SCB_CFSR_MSTKERR_Pos               (SCB_SHCSR_MEMFAULTACT_Pos + 4U)               /*!< SCB CFSR (MMFSR): MSTKERR Position */
+#define SCB_CFSR_MSTKERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 4U)                 /*!< SCB CFSR (MMFSR): MSTKERR Position */
 #define SCB_CFSR_MSTKERR_Msk               (1UL << SCB_CFSR_MSTKERR_Pos)                  /*!< SCB CFSR (MMFSR): MSTKERR Mask */
 
-#define SCB_CFSR_MUNSTKERR_Pos             (SCB_SHCSR_MEMFAULTACT_Pos + 3U)               /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
+#define SCB_CFSR_MUNSTKERR_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 3U)                 /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
 #define SCB_CFSR_MUNSTKERR_Msk             (1UL << SCB_CFSR_MUNSTKERR_Pos)                /*!< SCB CFSR (MMFSR): MUNSTKERR Mask */
 
-#define SCB_CFSR_DACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 1U)               /*!< SCB CFSR (MMFSR): DACCVIOL Position */
+#define SCB_CFSR_DACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 1U)                 /*!< SCB CFSR (MMFSR): DACCVIOL Position */
 #define SCB_CFSR_DACCVIOL_Msk              (1UL << SCB_CFSR_DACCVIOL_Pos)                 /*!< SCB CFSR (MMFSR): DACCVIOL Mask */
 
-#define SCB_CFSR_IACCVIOL_Pos              (SCB_SHCSR_MEMFAULTACT_Pos + 0U)               /*!< SCB CFSR (MMFSR): IACCVIOL Position */
+#define SCB_CFSR_IACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 0U)                 /*!< SCB CFSR (MMFSR): IACCVIOL Position */
 #define SCB_CFSR_IACCVIOL_Msk              (1UL /*<< SCB_CFSR_IACCVIOL_Pos*/)             /*!< SCB CFSR (MMFSR): IACCVIOL Mask */
 
 /* BusFault Status Register (part of SCB Configurable Fault Status Register) */
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/core_starmc1.h b/edge-impulse-sdk/CMSIS/Core/Include/core_starmc1.h
new file mode 100644
index 0000000..a6a399d
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/Core/Include/core_starmc1.h
@@ -0,0 +1,3592 @@
+/**************************************************************************//**
+ * @file     core_starmc1.h
+ * @brief    CMSIS ArmChina STAR-MC1 Core Peripheral Access Layer Header File
+ * @version  V1.0.2
+ * @date     07. April 2022
+ ******************************************************************************/
+/*
+ * Copyright (c) 2009-2018 Arm Limited. 
+ * Copyright (c) 2018-2022 Arm China. 
+ * All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if   defined ( __ICCARM__ )
+  #pragma system_include         /* treat file as system include file for MISRA check */
+#elif defined (__clang__)
+  #pragma clang system_header                   /* treat file as system include file */
+#elif defined ( __GNUC__ )
+  #pragma GCC diagnostic ignored "-Wpedantic"   /* disable pedantic warning due to unnamed structs/unions */
+#endif
+
+#ifndef __CORE_STAR_H_GENERIC
+#define __CORE_STAR_H_GENERIC
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+/**
+  \page CMSIS_MISRA_Exceptions  MISRA-C:2004 Compliance Exceptions
+  CMSIS violates the following MISRA-C:2004 rules:
+
+   \li Required Rule 8.5, object/function definition in header file.<br>
+     Function definitions in header files are used to allow 'inlining'.
+
+   \li Required Rule 18.4, declaration of union type or object of union type: '{...}'.<br>
+     Unions are used for effective representation of core registers.
+
+   \li Advisory Rule 19.7, Function-like macro defined.<br>
+     Function-like macros are used to allow more efficient code.
+ */
+
+
+/*******************************************************************************
+ *                 CMSIS definitions
+ ******************************************************************************/
+/**
+  \ingroup STAR-MC1
+  @{
+ */
+
+#include "cmsis_version.h"
+
+/* Macro Define for STAR-MC1 */
+#define __STAR_MC                 (1U)                                       /*!< STAR-MC Core */
+
+/** __FPU_USED indicates whether an FPU is used or not.
+    For this, __FPU_PRESENT has to be checked prior to making use of FPU specific registers and functions.
+*/
+#if defined ( __CC_ARM )
+  #if defined (__TARGET_FPU_VFP)
+    #if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)
+      #define __FPU_USED       1U
+    #else
+      #error "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)"
+      #define __FPU_USED       0U
+    #endif
+  #else
+    #define __FPU_USED         0U
+  #endif
+
+  #if defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1U)
+    #if defined (__DSP_PRESENT) && (__DSP_PRESENT == 1U)
+      #define __DSP_USED       1U
+    #else
+      #error "Compiler generates DSP (SIMD) instructions for a devices without DSP extensions (check __DSP_PRESENT)"
+      #define __DSP_USED         0U
+    #endif
+  #else
+    #define __DSP_USED         0U
+  #endif
+
+#elif defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
+  #if defined (__ARM_FP)
+    #if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)
+      #define __FPU_USED       1U
+    #else
+      #warning "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)"
+      #define __FPU_USED       0U
+    #endif
+  #else
+    #define __FPU_USED         0U
+  #endif
+
+  #if defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1U)
+    #if defined (__DSP_PRESENT) && (__DSP_PRESENT == 1U)
+      #define __DSP_USED       1U
+    #else
+      #error "Compiler generates DSP (SIMD) instructions for a devices without DSP extensions (check __DSP_PRESENT)"
+      #define __DSP_USED         0U
+    #endif
+  #else
+    #define __DSP_USED         0U
+  #endif
+
+#elif defined ( __GNUC__ )
+  #if defined (__VFP_FP__) && !defined(__SOFTFP__)
+    #if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)
+      #define __FPU_USED       1U
+    #else
+      #error "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)"
+      #define __FPU_USED       0U
+    #endif
+  #else
+    #define __FPU_USED         0U
+  #endif
+
+  #if defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1U)
+    #if defined (__DSP_PRESENT) && (__DSP_PRESENT == 1U)
+      #define __DSP_USED       1U
+    #else
+      #error "Compiler generates DSP (SIMD) instructions for a devices without DSP extensions (check __DSP_PRESENT)"
+      #define __DSP_USED         0U
+    #endif
+  #else
+    #define __DSP_USED         0U
+  #endif
+
+#elif defined ( __ICCARM__ )
+  #if defined (__ARMVFP__)
+    #if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)
+      #define __FPU_USED       1U
+    #else
+      #error "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)"
+      #define __FPU_USED       0U
+    #endif
+  #else
+    #define __FPU_USED         0U
+  #endif
+
+  #if defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1U)
+    #if defined (__DSP_PRESENT) && (__DSP_PRESENT == 1U)
+      #define __DSP_USED       1U
+    #else
+      #error "Compiler generates DSP (SIMD) instructions for a devices without DSP extensions (check __DSP_PRESENT)"
+      #define __DSP_USED         0U
+    #endif
+  #else
+    #define __DSP_USED         0U
+  #endif
+
+#elif defined ( __TI_ARM__ )
+  #if defined (__TI_VFP_SUPPORT__)
+    #if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)
+      #define __FPU_USED       1U
+    #else
+      #error "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)"
+      #define __FPU_USED       0U
+    #endif
+  #else
+    #define __FPU_USED         0U
+  #endif
+
+#elif defined ( __TASKING__ )
+  #if defined (__FPU_VFP__)
+    #if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)
+      #define __FPU_USED       1U
+    #else
+      #error "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)"
+      #define __FPU_USED       0U
+    #endif
+  #else
+    #define __FPU_USED         0U
+  #endif
+
+#elif defined ( __CSMC__ )
+  #if ( __CSMC__ & 0x400U)
+    #if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)
+      #define __FPU_USED       1U
+    #else
+      #error "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)"
+      #define __FPU_USED       0U
+    #endif
+  #else
+    #define __FPU_USED         0U
+  #endif
+
+#endif
+
+#include "edge-impulse-sdk/CMSIS/Core/Include/cmsis_compiler.h"               /* CMSIS compiler specific defines */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __CORE_STAR_H_GENERIC */
+
+#ifndef __CMSIS_GENERIC
+
+#ifndef __CORE_STAR_H_DEPENDANT
+#define __CORE_STAR_H_DEPENDANT
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+/* check device defines and use defaults */
+#if defined __CHECK_DEVICE_DEFINES
+  #ifndef __STAR_REV
+    #define __STAR_REV                0x0000U
+    #warning "__STAR_REV not defined in device header file; using default!"
+  #endif
+
+  #ifndef __FPU_PRESENT
+    #define __FPU_PRESENT             0U
+    #warning "__FPU_PRESENT not defined in device header file; using default!"
+  #endif
+
+  #ifndef __MPU_PRESENT
+    #define __MPU_PRESENT             0U
+    #warning "__MPU_PRESENT not defined in device header file; using default!"
+  #endif
+
+  #ifndef __SAUREGION_PRESENT
+    #define __SAUREGION_PRESENT       0U
+    #warning "__SAUREGION_PRESENT not defined in device header file; using default!"
+  #endif
+
+  #ifndef __DSP_PRESENT
+    #define __DSP_PRESENT             0U
+    #warning "__DSP_PRESENT not defined in device header file; using default!"
+  #endif
+
+  #ifndef __ICACHE_PRESENT
+    #define __ICACHE_PRESENT          0U
+    #warning "__ICACHE_PRESENT not defined in device header file; using default!"
+  #endif
+
+  #ifndef __DCACHE_PRESENT
+    #define __DCACHE_PRESENT          0U
+    #warning "__DCACHE_PRESENT not defined in device header file; using default!"
+  #endif
+
+  #ifndef __DTCM_PRESENT
+    #define __DTCM_PRESENT            0U
+    #warning "__DTCM_PRESENT        not defined in device header file; using default!"
+  #endif
+
+  #ifndef __NVIC_PRIO_BITS
+    #define __NVIC_PRIO_BITS          3U
+    #warning "__NVIC_PRIO_BITS not defined in device header file; using default!"
+  #endif
+
+  #ifndef __Vendor_SysTickConfig
+    #define __Vendor_SysTickConfig    0U
+    #warning "__Vendor_SysTickConfig not defined in device header file; using default!"
+  #endif
+#endif
+
+/* IO definitions (access restrictions to peripheral registers) */
+/**
+    \defgroup CMSIS_glob_defs CMSIS Global Defines
+
+    <strong>IO Type Qualifiers</strong> are used
+    \li to specify the access to peripheral variables.
+    \li for automatic generation of peripheral register debug information.
+*/
+#ifdef __cplusplus
+  #define   __I     volatile             /*!< Defines 'read only' permissions */
+#else
+  #define   __I     volatile const       /*!< Defines 'read only' permissions */
+#endif
+#define     __O     volatile             /*!< Defines 'write only' permissions */
+#define     __IO    volatile             /*!< Defines 'read / write' permissions */
+
+/* following defines should be used for structure members */
+#define     __IM     volatile const      /*! Defines 'read only' structure member permissions */
+#define     __OM     volatile            /*! Defines 'write only' structure member permissions */
+#define     __IOM    volatile            /*! Defines 'read / write' structure member permissions */
+
+/*@} end of group STAR-MC1 */
+
+
+
+/*******************************************************************************
+ *                 Register Abstraction
+  Core Register contain:
+  - Core Register
+  - Core NVIC Register
+  - Core SCB Register
+  - Core SysTick Register
+  - Core Debug Register
+  - Core MPU Register
+  - Core SAU Register
+  - Core FPU Register
+ ******************************************************************************/
+/**
+  \defgroup CMSIS_core_register Defines and Type Definitions
+  \brief Type definitions and defines for STAR-MC1 processor based devices.
+*/
+
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup   CMSIS_CORE  Status and Control Registers
+  \brief      Core Register type definitions.
+  @{
+ */
+
+/**
+  \brief  Union type to access the Application Program Status Register (APSR).
+ */
+typedef union
+{
+  struct
+  {
+    uint32_t _reserved0:16;              /*!< bit:  0..15  Reserved */
+    uint32_t GE:4;                       /*!< bit: 16..19  Greater than or Equal flags */
+    uint32_t _reserved1:7;               /*!< bit: 20..26  Reserved */
+    uint32_t Q:1;                        /*!< bit:     27  Saturation condition flag */
+    uint32_t V:1;                        /*!< bit:     28  Overflow condition code flag */
+    uint32_t C:1;                        /*!< bit:     29  Carry condition code flag */
+    uint32_t Z:1;                        /*!< bit:     30  Zero condition code flag */
+    uint32_t N:1;                        /*!< bit:     31  Negative condition code flag */
+  } b;                                   /*!< Structure used for bit  access */
+  uint32_t w;                            /*!< Type      used for word access */
+} APSR_Type;
+
+/* APSR Register Definitions */
+#define APSR_N_Pos                         31U                                            /*!< APSR: N Position */
+#define APSR_N_Msk                         (1UL << APSR_N_Pos)                            /*!< APSR: N Mask */
+
+#define APSR_Z_Pos                         30U                                            /*!< APSR: Z Position */
+#define APSR_Z_Msk                         (1UL << APSR_Z_Pos)                            /*!< APSR: Z Mask */
+
+#define APSR_C_Pos                         29U                                            /*!< APSR: C Position */
+#define APSR_C_Msk                         (1UL << APSR_C_Pos)                            /*!< APSR: C Mask */
+
+#define APSR_V_Pos                         28U                                            /*!< APSR: V Position */
+#define APSR_V_Msk                         (1UL << APSR_V_Pos)                            /*!< APSR: V Mask */
+
+#define APSR_Q_Pos                         27U                                            /*!< APSR: Q Position */
+#define APSR_Q_Msk                         (1UL << APSR_Q_Pos)                            /*!< APSR: Q Mask */
+
+#define APSR_GE_Pos                        16U                                            /*!< APSR: GE Position */
+#define APSR_GE_Msk                        (0xFUL << APSR_GE_Pos)                         /*!< APSR: GE Mask */
+
+
+/**
+  \brief  Union type to access the Interrupt Program Status Register (IPSR).
+ */
+typedef union
+{
+  struct
+  {
+    uint32_t ISR:9;                      /*!< bit:  0.. 8  Exception number */
+    uint32_t _reserved0:23;              /*!< bit:  9..31  Reserved */
+  } b;                                   /*!< Structure used for bit  access */
+  uint32_t w;                            /*!< Type      used for word access */
+} IPSR_Type;
+
+/* IPSR Register Definitions */
+#define IPSR_ISR_Pos                        0U                                            /*!< IPSR: ISR Position */
+#define IPSR_ISR_Msk                       (0x1FFUL /*<< IPSR_ISR_Pos*/)                  /*!< IPSR: ISR Mask */
+
+
+/**
+  \brief  Union type to access the Special-Purpose Program Status Registers (xPSR).
+ */
+typedef union
+{
+  struct
+  {
+    uint32_t ISR:9;                      /*!< bit:  0.. 8  Exception number */
+    uint32_t _reserved0:7;               /*!< bit:  9..15  Reserved */
+    uint32_t GE:4;                       /*!< bit: 16..19  Greater than or Equal flags */
+    uint32_t _reserved1:4;               /*!< bit: 20..23  Reserved */
+    uint32_t T:1;                        /*!< bit:     24  Thumb bit        (read 0) */
+    uint32_t IT:2;                       /*!< bit: 25..26  saved IT state   (read 0) */
+    uint32_t Q:1;                        /*!< bit:     27  Saturation condition flag */
+    uint32_t V:1;                        /*!< bit:     28  Overflow condition code flag */
+    uint32_t C:1;                        /*!< bit:     29  Carry condition code flag */
+    uint32_t Z:1;                        /*!< bit:     30  Zero condition code flag */
+    uint32_t N:1;                        /*!< bit:     31  Negative condition code flag */
+  } b;                                   /*!< Structure used for bit  access */
+  uint32_t w;                            /*!< Type      used for word access */
+} xPSR_Type;
+
+/* xPSR Register Definitions */
+#define xPSR_N_Pos                         31U                                            /*!< xPSR: N Position */
+#define xPSR_N_Msk                         (1UL << xPSR_N_Pos)                            /*!< xPSR: N Mask */
+
+#define xPSR_Z_Pos                         30U                                            /*!< xPSR: Z Position */
+#define xPSR_Z_Msk                         (1UL << xPSR_Z_Pos)                            /*!< xPSR: Z Mask */
+
+#define xPSR_C_Pos                         29U                                            /*!< xPSR: C Position */
+#define xPSR_C_Msk                         (1UL << xPSR_C_Pos)                            /*!< xPSR: C Mask */
+
+#define xPSR_V_Pos                         28U                                            /*!< xPSR: V Position */
+#define xPSR_V_Msk                         (1UL << xPSR_V_Pos)                            /*!< xPSR: V Mask */
+
+#define xPSR_Q_Pos                         27U                                            /*!< xPSR: Q Position */
+#define xPSR_Q_Msk                         (1UL << xPSR_Q_Pos)                            /*!< xPSR: Q Mask */
+
+#define xPSR_IT_Pos                        25U                                            /*!< xPSR: IT Position */
+#define xPSR_IT_Msk                        (3UL << xPSR_IT_Pos)                           /*!< xPSR: IT Mask */
+
+#define xPSR_T_Pos                         24U                                            /*!< xPSR: T Position */
+#define xPSR_T_Msk                         (1UL << xPSR_T_Pos)                            /*!< xPSR: T Mask */
+
+#define xPSR_GE_Pos                        16U                                            /*!< xPSR: GE Position */
+#define xPSR_GE_Msk                        (0xFUL << xPSR_GE_Pos)                         /*!< xPSR: GE Mask */
+
+#define xPSR_ISR_Pos                        0U                                            /*!< xPSR: ISR Position */
+#define xPSR_ISR_Msk                       (0x1FFUL /*<< xPSR_ISR_Pos*/)                  /*!< xPSR: ISR Mask */
+
+
+/**
+  \brief  Union type to access the Control Registers (CONTROL).
+ */
+typedef union
+{
+  struct
+  {
+    uint32_t nPRIV:1;                    /*!< bit:      0  Execution privilege in Thread mode */
+    uint32_t SPSEL:1;                    /*!< bit:      1  Stack-pointer select */
+    uint32_t FPCA:1;                     /*!< bit:      2  Floating-point context active */
+    uint32_t SFPA:1;                     /*!< bit:      3  Secure floating-point active */
+    uint32_t _reserved1:28;              /*!< bit:  4..31  Reserved */
+  } b;                                   /*!< Structure used for bit  access */
+  uint32_t w;                            /*!< Type      used for word access */
+} CONTROL_Type;
+
+/* CONTROL Register Definitions */
+#define CONTROL_SFPA_Pos                    3U                                            /*!< CONTROL: SFPA Position */
+#define CONTROL_SFPA_Msk                   (1UL << CONTROL_SFPA_Pos)                      /*!< CONTROL: SFPA Mask */
+
+#define CONTROL_FPCA_Pos                    2U                                            /*!< CONTROL: FPCA Position */
+#define CONTROL_FPCA_Msk                   (1UL << CONTROL_FPCA_Pos)                      /*!< CONTROL: FPCA Mask */
+
+#define CONTROL_SPSEL_Pos                   1U                                            /*!< CONTROL: SPSEL Position */
+#define CONTROL_SPSEL_Msk                  (1UL << CONTROL_SPSEL_Pos)                     /*!< CONTROL: SPSEL Mask */
+
+#define CONTROL_nPRIV_Pos                   0U                                            /*!< CONTROL: nPRIV Position */
+#define CONTROL_nPRIV_Msk                  (1UL /*<< CONTROL_nPRIV_Pos*/)                 /*!< CONTROL: nPRIV Mask */
+
+/*@} end of group CMSIS_CORE */
+
+
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup   CMSIS_NVIC  Nested Vectored Interrupt Controller (NVIC)
+  \brief      Type definitions for the NVIC Registers
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Nested Vectored Interrupt Controller (NVIC).
+ */
+typedef struct
+{
+  __IOM uint32_t ISER[16U];              /*!< Offset: 0x000 (R/W)  Interrupt Set Enable Register */
+        uint32_t RESERVED0[16U];
+  __IOM uint32_t ICER[16U];              /*!< Offset: 0x080 (R/W)  Interrupt Clear Enable Register */
+        uint32_t RSERVED1[16U];
+  __IOM uint32_t ISPR[16U];              /*!< Offset: 0x100 (R/W)  Interrupt Set Pending Register */
+        uint32_t RESERVED2[16U];
+  __IOM uint32_t ICPR[16U];              /*!< Offset: 0x180 (R/W)  Interrupt Clear Pending Register */
+        uint32_t RESERVED3[16U];
+  __IOM uint32_t IABR[16U];              /*!< Offset: 0x200 (R/W)  Interrupt Active bit Register */
+        uint32_t RESERVED4[16U];
+  __IOM uint32_t ITNS[16U];              /*!< Offset: 0x280 (R/W)  Interrupt Non-Secure State Register */
+        uint32_t RESERVED5[16U];
+  __IOM uint8_t  IPR[496U];              /*!< Offset: 0x300 (R/W)  Interrupt Priority Register (8Bit wide) */
+        uint32_t RESERVED6[580U];
+  __OM  uint32_t STIR;                   /*!< Offset: 0xE00 ( /W)  Software Trigger Interrupt Register */
+}  NVIC_Type;
+
+/* Software Triggered Interrupt Register Definitions */
+#define NVIC_STIR_INTID_Pos                 0U                                         /*!< STIR: INTLINESNUM Position */
+#define NVIC_STIR_INTID_Msk                (0x1FFUL /*<< NVIC_STIR_INTID_Pos*/)        /*!< STIR: INTLINESNUM Mask */
+
+/*@} end of group CMSIS_NVIC */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_SCB     System Control Block (SCB)
+  \brief    Type definitions for the System Control Block Registers
+  @{
+ */
+
+/**
+  \brief  Structure type to access the System Control Block (SCB).
+ */
+typedef struct
+{
+  __IM  uint32_t CPUID;                  /*!< Offset: 0x000 (R/ )  CPUID Base Register */
+  __IOM uint32_t ICSR;                   /*!< Offset: 0x004 (R/W)  Interrupt Control and State Register */
+  __IOM uint32_t VTOR;                   /*!< Offset: 0x008 (R/W)  Vector Table Offset Register */
+  __IOM uint32_t AIRCR;                  /*!< Offset: 0x00C (R/W)  Application Interrupt and Reset Control Register */
+  __IOM uint32_t SCR;                    /*!< Offset: 0x010 (R/W)  System Control Register */
+  __IOM uint32_t CCR;                    /*!< Offset: 0x014 (R/W)  Configuration Control Register */
+  __IOM uint8_t  SHPR[12U];              /*!< Offset: 0x018 (R/W)  System Handlers Priority Registers (4-7, 8-11, 12-15) */
+  __IOM uint32_t SHCSR;                  /*!< Offset: 0x024 (R/W)  System Handler Control and State Register */
+  __IOM uint32_t CFSR;                   /*!< Offset: 0x028 (R/W)  Configurable Fault Status Register */
+  __IOM uint32_t HFSR;                   /*!< Offset: 0x02C (R/W)  HardFault Status Register */
+  __IOM uint32_t DFSR;                   /*!< Offset: 0x030 (R/W)  Debug Fault Status Register */
+  __IOM uint32_t MMFAR;                  /*!< Offset: 0x034 (R/W)  MemManage Fault Address Register */
+  __IOM uint32_t BFAR;                   /*!< Offset: 0x038 (R/W)  BusFault Address Register */
+  __IOM uint32_t AFSR;                   /*!< Offset: 0x03C (R/W)  Auxiliary Fault Status Register */
+  __IM  uint32_t ID_PFR[2U];             /*!< Offset: 0x040 (R/ )  Processor Feature Register */
+  __IM  uint32_t ID_DFR;                 /*!< Offset: 0x048 (R/ )  Debug Feature Register */
+  __IM  uint32_t ID_AFR;                 /*!< Offset: 0x04C (R/ )  Auxiliary Feature Register */
+  __IM  uint32_t ID_MMFR[4U];            /*!< Offset: 0x050 (R/ )  Memory Model Feature Register */
+  __IM  uint32_t ID_ISAR[5U];            /*!< Offset: 0x060 (R/ )  Instruction Set Attributes Register */
+        uint32_t RESERVED0[1U];
+  __IM  uint32_t CLIDR;                  /*!< Offset: 0x078 (R/ )  Cache Level ID register */
+  __IM  uint32_t CTR;                    /*!< Offset: 0x07C (R/ )  Cache Type register */
+  __IM  uint32_t CCSIDR;                 /*!< Offset: 0x080 (R/ )  Cache Size ID Register */
+  __IOM uint32_t CSSELR;                 /*!< Offset: 0x084 (R/W)  Cache Size Selection Register */
+  __IOM uint32_t CPACR;                  /*!< Offset: 0x088 (R/W)  Coprocessor Access Control Register */
+  __IOM uint32_t NSACR;                  /*!< Offset: 0x08C (R/W)  Non-Secure Access Control Register */
+        uint32_t RESERVED_ADD1[21U];      
+  __IOM uint32_t SFSR;                   /*!< Offset: 0x0E4 (R/W)  Secure Fault Status Register */
+  __IOM uint32_t SFAR;                   /*!< Offset: 0x0E8 (R/W)  Secure Fault Address Register */
+        uint32_t RESERVED3[69U];
+  __OM  uint32_t STIR;                   /*!< Offset: F00-D00=0x200 ( /W)  Software Triggered Interrupt Register */
+        uint32_t RESERVED4[15U];
+  __IM  uint32_t MVFR0;                  /*!< Offset: 0x240 (R/ )  Media and VFP Feature Register 0 */
+  __IM  uint32_t MVFR1;                  /*!< Offset: 0x244 (R/ )  Media and VFP Feature Register 1 */
+  __IM  uint32_t MVFR2;                  /*!< Offset: 0x248 (R/ )  Media and VFP Feature Register 2 */
+        uint32_t RESERVED5[1U];
+  __OM  uint32_t ICIALLU;                /*!< Offset: 0x250 ( /W)  I-Cache Invalidate All to PoU */
+        uint32_t RESERVED6[1U];
+  __OM  uint32_t ICIMVAU;                /*!< Offset: 0x258 ( /W)  I-Cache Invalidate by MVA to PoU */
+  __OM  uint32_t DCIMVAC;                /*!< Offset: 0x25C ( /W)  D-Cache Invalidate by MVA to PoC */
+  __OM  uint32_t DCISW;                  /*!< Offset: 0x260 ( /W)  D-Cache Invalidate by Set-way */
+  __OM  uint32_t DCCMVAU;                /*!< Offset: 0x264 ( /W)  D-Cache Clean by MVA to PoU */
+  __OM  uint32_t DCCMVAC;                /*!< Offset: 0x268 ( /W)  D-Cache Clean by MVA to PoC */
+  __OM  uint32_t DCCSW;                  /*!< Offset: 0x26C ( /W)  D-Cache Clean by Set-way */
+  __OM  uint32_t DCCIMVAC;               /*!< Offset: 0x270 ( /W)  D-Cache Clean and Invalidate by MVA to PoC */
+  __OM  uint32_t DCCISW;                 /*!< Offset: 0x274 ( /W)  D-Cache Clean and Invalidate by Set-way */
+} SCB_Type;
+
+typedef struct
+{
+  __IOM uint32_t CACR;				       /*!< Offset: 0x0 (R/W)  L1 Cache Control Register */
+  __IOM uint32_t ITCMCR;				   /*!< Offset: 0x10 (R/W)  Instruction Tightly-Coupled Memory Control Register */
+  __IOM uint32_t DTCMCR;				   /*!< Offset: 0x14 (R/W)  Data Tightly-Coupled Memory Control Registers */ 
+}EMSS_Type;
+
+/* SCB CPUID Register Definitions */
+#define SCB_CPUID_IMPLEMENTER_Pos          24U                                            /*!< SCB CPUID: IMPLEMENTER Position */
+#define SCB_CPUID_IMPLEMENTER_Msk          (0xFFUL << SCB_CPUID_IMPLEMENTER_Pos)          /*!< SCB CPUID: IMPLEMENTER Mask */
+
+#define SCB_CPUID_VARIANT_Pos              20U                                            /*!< SCB CPUID: VARIANT Position */
+#define SCB_CPUID_VARIANT_Msk              (0xFUL << SCB_CPUID_VARIANT_Pos)               /*!< SCB CPUID: VARIANT Mask */
+
+#define SCB_CPUID_ARCHITECTURE_Pos         16U                                            /*!< SCB CPUID: ARCHITECTURE Position */
+#define SCB_CPUID_ARCHITECTURE_Msk         (0xFUL << SCB_CPUID_ARCHITECTURE_Pos)          /*!< SCB CPUID: ARCHITECTURE Mask */
+
+#define SCB_CPUID_PARTNO_Pos                4U                                            /*!< SCB CPUID: PARTNO Position */
+#define SCB_CPUID_PARTNO_Msk               (0xFFFUL << SCB_CPUID_PARTNO_Pos)              /*!< SCB CPUID: PARTNO Mask */
+
+#define SCB_CPUID_REVISION_Pos              0U                                            /*!< SCB CPUID: REVISION Position */
+#define SCB_CPUID_REVISION_Msk             (0xFUL /*<< SCB_CPUID_REVISION_Pos*/)          /*!< SCB CPUID: REVISION Mask */
+
+/* SCB Interrupt Control State Register Definitions */
+#define SCB_ICSR_PENDNMISET_Pos            31U                                            /*!< SCB ICSR: PENDNMISET Position */
+#define SCB_ICSR_PENDNMISET_Msk            (1UL << SCB_ICSR_PENDNMISET_Pos)               /*!< SCB ICSR: PENDNMISET Mask */
+
+#define SCB_ICSR_NMIPENDSET_Pos            SCB_ICSR_PENDNMISET_Pos                        /*!< SCB ICSR: NMIPENDSET Position, backward compatibility */
+#define SCB_ICSR_NMIPENDSET_Msk            SCB_ICSR_PENDNMISET_Msk                        /*!< SCB ICSR: NMIPENDSET Mask, backward compatibility */
+
+#define SCB_ICSR_PENDNMICLR_Pos            30U                                            /*!< SCB ICSR: PENDNMICLR Position */
+#define SCB_ICSR_PENDNMICLR_Msk            (1UL << SCB_ICSR_PENDNMICLR_Pos)               /*!< SCB ICSR: PENDNMICLR Mask */
+
+#define SCB_ICSR_PENDSVSET_Pos             28U                                            /*!< SCB ICSR: PENDSVSET Position */
+#define SCB_ICSR_PENDSVSET_Msk             (1UL << SCB_ICSR_PENDSVSET_Pos)                /*!< SCB ICSR: PENDSVSET Mask */
+
+#define SCB_ICSR_PENDSVCLR_Pos             27U                                            /*!< SCB ICSR: PENDSVCLR Position */
+#define SCB_ICSR_PENDSVCLR_Msk             (1UL << SCB_ICSR_PENDSVCLR_Pos)                /*!< SCB ICSR: PENDSVCLR Mask */
+
+#define SCB_ICSR_PENDSTSET_Pos             26U                                            /*!< SCB ICSR: PENDSTSET Position */
+#define SCB_ICSR_PENDSTSET_Msk             (1UL << SCB_ICSR_PENDSTSET_Pos)                /*!< SCB ICSR: PENDSTSET Mask */
+
+#define SCB_ICSR_PENDSTCLR_Pos             25U                                            /*!< SCB ICSR: PENDSTCLR Position */
+#define SCB_ICSR_PENDSTCLR_Msk             (1UL << SCB_ICSR_PENDSTCLR_Pos)                /*!< SCB ICSR: PENDSTCLR Mask */
+
+#define SCB_ICSR_STTNS_Pos                 24U                                            /*!< SCB ICSR: STTNS Position (Security Extension) */
+#define SCB_ICSR_STTNS_Msk                 (1UL << SCB_ICSR_STTNS_Pos)                    /*!< SCB ICSR: STTNS Mask (Security Extension) */
+
+#define SCB_ICSR_ISRPREEMPT_Pos            23U                                            /*!< SCB ICSR: ISRPREEMPT Position */
+#define SCB_ICSR_ISRPREEMPT_Msk            (1UL << SCB_ICSR_ISRPREEMPT_Pos)               /*!< SCB ICSR: ISRPREEMPT Mask */
+
+#define SCB_ICSR_ISRPENDING_Pos            22U                                            /*!< SCB ICSR: ISRPENDING Position */
+#define SCB_ICSR_ISRPENDING_Msk            (1UL << SCB_ICSR_ISRPENDING_Pos)               /*!< SCB ICSR: ISRPENDING Mask */
+
+#define SCB_ICSR_VECTPENDING_Pos           12U                                            /*!< SCB ICSR: VECTPENDING Position */
+#define SCB_ICSR_VECTPENDING_Msk           (0x1FFUL << SCB_ICSR_VECTPENDING_Pos)          /*!< SCB ICSR: VECTPENDING Mask */
+
+#define SCB_ICSR_RETTOBASE_Pos             11U                                            /*!< SCB ICSR: RETTOBASE Position */
+#define SCB_ICSR_RETTOBASE_Msk             (1UL << SCB_ICSR_RETTOBASE_Pos)                /*!< SCB ICSR: RETTOBASE Mask */
+
+#define SCB_ICSR_VECTACTIVE_Pos             0U                                            /*!< SCB ICSR: VECTACTIVE Position */
+#define SCB_ICSR_VECTACTIVE_Msk            (0x1FFUL /*<< SCB_ICSR_VECTACTIVE_Pos*/)       /*!< SCB ICSR: VECTACTIVE Mask */
+
+/* SCB Vector Table Offset Register Definitions */
+#define SCB_VTOR_TBLOFF_Pos                 7U                                            /*!< SCB VTOR: TBLOFF Position */
+#define SCB_VTOR_TBLOFF_Msk                (0x1FFFFFFUL << SCB_VTOR_TBLOFF_Pos)           /*!< SCB VTOR: TBLOFF Mask */
+
+/* SCB Application Interrupt and Reset Control Register Definitions */
+#define SCB_AIRCR_VECTKEY_Pos              16U                                            /*!< SCB AIRCR: VECTKEY Position */
+#define SCB_AIRCR_VECTKEY_Msk              (0xFFFFUL << SCB_AIRCR_VECTKEY_Pos)            /*!< SCB AIRCR: VECTKEY Mask */
+
+#define SCB_AIRCR_VECTKEYSTAT_Pos          16U                                            /*!< SCB AIRCR: VECTKEYSTAT Position */
+#define SCB_AIRCR_VECTKEYSTAT_Msk          (0xFFFFUL << SCB_AIRCR_VECTKEYSTAT_Pos)        /*!< SCB AIRCR: VECTKEYSTAT Mask */
+
+#define SCB_AIRCR_ENDIANESS_Pos            15U                                            /*!< SCB AIRCR: ENDIANESS Position */
+#define SCB_AIRCR_ENDIANESS_Msk            (1UL << SCB_AIRCR_ENDIANESS_Pos)               /*!< SCB AIRCR: ENDIANESS Mask */
+
+#define SCB_AIRCR_PRIS_Pos                 14U                                            /*!< SCB AIRCR: PRIS Position */
+#define SCB_AIRCR_PRIS_Msk                 (1UL << SCB_AIRCR_PRIS_Pos)                    /*!< SCB AIRCR: PRIS Mask */
+
+#define SCB_AIRCR_BFHFNMINS_Pos            13U                                            /*!< SCB AIRCR: BFHFNMINS Position */
+#define SCB_AIRCR_BFHFNMINS_Msk            (1UL << SCB_AIRCR_BFHFNMINS_Pos)               /*!< SCB AIRCR: BFHFNMINS Mask */
+
+#define SCB_AIRCR_PRIGROUP_Pos              8U                                            /*!< SCB AIRCR: PRIGROUP Position */
+#define SCB_AIRCR_PRIGROUP_Msk             (7UL << SCB_AIRCR_PRIGROUP_Pos)                /*!< SCB AIRCR: PRIGROUP Mask */
+
+#define SCB_AIRCR_SYSRESETREQS_Pos          3U                                            /*!< SCB AIRCR: SYSRESETREQS Position */
+#define SCB_AIRCR_SYSRESETREQS_Msk         (1UL << SCB_AIRCR_SYSRESETREQS_Pos)            /*!< SCB AIRCR: SYSRESETREQS Mask */
+
+#define SCB_AIRCR_SYSRESETREQ_Pos           2U                                            /*!< SCB AIRCR: SYSRESETREQ Position */
+#define SCB_AIRCR_SYSRESETREQ_Msk          (1UL << SCB_AIRCR_SYSRESETREQ_Pos)             /*!< SCB AIRCR: SYSRESETREQ Mask */
+
+#define SCB_AIRCR_VECTCLRACTIVE_Pos         1U                                            /*!< SCB AIRCR: VECTCLRACTIVE Position */
+#define SCB_AIRCR_VECTCLRACTIVE_Msk        (1UL << SCB_AIRCR_VECTCLRACTIVE_Pos)           /*!< SCB AIRCR: VECTCLRACTIVE Mask */
+
+/* SCB System Control Register Definitions */
+#define SCB_SCR_SEVONPEND_Pos               4U                                            /*!< SCB SCR: SEVONPEND Position */
+#define SCB_SCR_SEVONPEND_Msk              (1UL << SCB_SCR_SEVONPEND_Pos)                 /*!< SCB SCR: SEVONPEND Mask */
+
+#define SCB_SCR_SLEEPDEEPS_Pos              3U                                            /*!< SCB SCR: SLEEPDEEPS Position */
+#define SCB_SCR_SLEEPDEEPS_Msk             (1UL << SCB_SCR_SLEEPDEEPS_Pos)                /*!< SCB SCR: SLEEPDEEPS Mask */
+
+#define SCB_SCR_SLEEPDEEP_Pos               2U                                            /*!< SCB SCR: SLEEPDEEP Position */
+#define SCB_SCR_SLEEPDEEP_Msk              (1UL << SCB_SCR_SLEEPDEEP_Pos)                 /*!< SCB SCR: SLEEPDEEP Mask */
+
+#define SCB_SCR_SLEEPONEXIT_Pos             1U                                            /*!< SCB SCR: SLEEPONEXIT Position */
+#define SCB_SCR_SLEEPONEXIT_Msk            (1UL << SCB_SCR_SLEEPONEXIT_Pos)               /*!< SCB SCR: SLEEPONEXIT Mask */
+
+/* SCB Configuration Control Register Definitions */
+#define SCB_CCR_BP_Pos                     18U                                            /*!< SCB CCR: BP Position */
+#define SCB_CCR_BP_Msk                     (1UL << SCB_CCR_BP_Pos)                        /*!< SCB CCR: BP Mask */
+
+#define SCB_CCR_IC_Pos                     17U                                            /*!< SCB CCR: IC Position */
+#define SCB_CCR_IC_Msk                     (1UL << SCB_CCR_IC_Pos)                        /*!< SCB CCR: IC Mask */
+
+#define SCB_CCR_DC_Pos                     16U                                            /*!< SCB CCR: DC Position */
+#define SCB_CCR_DC_Msk                     (1UL << SCB_CCR_DC_Pos)                        /*!< SCB CCR: DC Mask */
+
+#define SCB_CCR_STKOFHFNMIGN_Pos           10U                                            /*!< SCB CCR: STKOFHFNMIGN Position */
+#define SCB_CCR_STKOFHFNMIGN_Msk           (1UL << SCB_CCR_STKOFHFNMIGN_Pos)              /*!< SCB CCR: STKOFHFNMIGN Mask */
+
+#define SCB_CCR_BFHFNMIGN_Pos               8U                                            /*!< SCB CCR: BFHFNMIGN Position */
+#define SCB_CCR_BFHFNMIGN_Msk              (1UL << SCB_CCR_BFHFNMIGN_Pos)                 /*!< SCB CCR: BFHFNMIGN Mask */
+
+#define SCB_CCR_DIV_0_TRP_Pos               4U                                            /*!< SCB CCR: DIV_0_TRP Position */
+#define SCB_CCR_DIV_0_TRP_Msk              (1UL << SCB_CCR_DIV_0_TRP_Pos)                 /*!< SCB CCR: DIV_0_TRP Mask */
+
+#define SCB_CCR_UNALIGN_TRP_Pos             3U                                            /*!< SCB CCR: UNALIGN_TRP Position */
+#define SCB_CCR_UNALIGN_TRP_Msk            (1UL << SCB_CCR_UNALIGN_TRP_Pos)               /*!< SCB CCR: UNALIGN_TRP Mask */
+
+#define SCB_CCR_USERSETMPEND_Pos            1U                                            /*!< SCB CCR: USERSETMPEND Position */
+#define SCB_CCR_USERSETMPEND_Msk           (1UL << SCB_CCR_USERSETMPEND_Pos)              /*!< SCB CCR: USERSETMPEND Mask */
+
+/* SCB System Handler Control and State Register Definitions */
+#define SCB_SHCSR_HARDFAULTPENDED_Pos      21U                                            /*!< SCB SHCSR: HARDFAULTPENDED Position */
+#define SCB_SHCSR_HARDFAULTPENDED_Msk      (1UL << SCB_SHCSR_HARDFAULTPENDED_Pos)         /*!< SCB SHCSR: HARDFAULTPENDED Mask */
+
+#define SCB_SHCSR_SECUREFAULTPENDED_Pos    20U                                            /*!< SCB SHCSR: SECUREFAULTPENDED Position */
+#define SCB_SHCSR_SECUREFAULTPENDED_Msk    (1UL << SCB_SHCSR_SECUREFAULTPENDED_Pos)       /*!< SCB SHCSR: SECUREFAULTPENDED Mask */
+
+#define SCB_SHCSR_SECUREFAULTENA_Pos       19U                                            /*!< SCB SHCSR: SECUREFAULTENA Position */
+#define SCB_SHCSR_SECUREFAULTENA_Msk       (1UL << SCB_SHCSR_SECUREFAULTENA_Pos)          /*!< SCB SHCSR: SECUREFAULTENA Mask */
+
+#define SCB_SHCSR_USGFAULTENA_Pos          18U                                            /*!< SCB SHCSR: USGFAULTENA Position */
+#define SCB_SHCSR_USGFAULTENA_Msk          (1UL << SCB_SHCSR_USGFAULTENA_Pos)             /*!< SCB SHCSR: USGFAULTENA Mask */
+
+#define SCB_SHCSR_BUSFAULTENA_Pos          17U                                            /*!< SCB SHCSR: BUSFAULTENA Position */
+#define SCB_SHCSR_BUSFAULTENA_Msk          (1UL << SCB_SHCSR_BUSFAULTENA_Pos)             /*!< SCB SHCSR: BUSFAULTENA Mask */
+
+#define SCB_SHCSR_MEMFAULTENA_Pos          16U                                            /*!< SCB SHCSR: MEMFAULTENA Position */
+#define SCB_SHCSR_MEMFAULTENA_Msk          (1UL << SCB_SHCSR_MEMFAULTENA_Pos)             /*!< SCB SHCSR: MEMFAULTENA Mask */
+
+#define SCB_SHCSR_SVCALLPENDED_Pos         15U                                            /*!< SCB SHCSR: SVCALLPENDED Position */
+#define SCB_SHCSR_SVCALLPENDED_Msk         (1UL << SCB_SHCSR_SVCALLPENDED_Pos)            /*!< SCB SHCSR: SVCALLPENDED Mask */
+
+#define SCB_SHCSR_BUSFAULTPENDED_Pos       14U                                            /*!< SCB SHCSR: BUSFAULTPENDED Position */
+#define SCB_SHCSR_BUSFAULTPENDED_Msk       (1UL << SCB_SHCSR_BUSFAULTPENDED_Pos)          /*!< SCB SHCSR: BUSFAULTPENDED Mask */
+
+#define SCB_SHCSR_MEMFAULTPENDED_Pos       13U                                            /*!< SCB SHCSR: MEMFAULTPENDED Position */
+#define SCB_SHCSR_MEMFAULTPENDED_Msk       (1UL << SCB_SHCSR_MEMFAULTPENDED_Pos)          /*!< SCB SHCSR: MEMFAULTPENDED Mask */
+
+#define SCB_SHCSR_USGFAULTPENDED_Pos       12U                                            /*!< SCB SHCSR: USGFAULTPENDED Position */
+#define SCB_SHCSR_USGFAULTPENDED_Msk       (1UL << SCB_SHCSR_USGFAULTPENDED_Pos)          /*!< SCB SHCSR: USGFAULTPENDED Mask */
+
+#define SCB_SHCSR_SYSTICKACT_Pos           11U                                            /*!< SCB SHCSR: SYSTICKACT Position */
+#define SCB_SHCSR_SYSTICKACT_Msk           (1UL << SCB_SHCSR_SYSTICKACT_Pos)              /*!< SCB SHCSR: SYSTICKACT Mask */
+
+#define SCB_SHCSR_PENDSVACT_Pos            10U                                            /*!< SCB SHCSR: PENDSVACT Position */
+#define SCB_SHCSR_PENDSVACT_Msk            (1UL << SCB_SHCSR_PENDSVACT_Pos)               /*!< SCB SHCSR: PENDSVACT Mask */
+
+#define SCB_SHCSR_MONITORACT_Pos            8U                                            /*!< SCB SHCSR: MONITORACT Position */
+#define SCB_SHCSR_MONITORACT_Msk           (1UL << SCB_SHCSR_MONITORACT_Pos)              /*!< SCB SHCSR: MONITORACT Mask */
+
+#define SCB_SHCSR_SVCALLACT_Pos             7U                                            /*!< SCB SHCSR: SVCALLACT Position */
+#define SCB_SHCSR_SVCALLACT_Msk            (1UL << SCB_SHCSR_SVCALLACT_Pos)               /*!< SCB SHCSR: SVCALLACT Mask */
+
+#define SCB_SHCSR_NMIACT_Pos                5U                                            /*!< SCB SHCSR: NMIACT Position */
+#define SCB_SHCSR_NMIACT_Msk               (1UL << SCB_SHCSR_NMIACT_Pos)                  /*!< SCB SHCSR: NMIACT Mask */
+
+#define SCB_SHCSR_SECUREFAULTACT_Pos        4U                                            /*!< SCB SHCSR: SECUREFAULTACT Position */
+#define SCB_SHCSR_SECUREFAULTACT_Msk       (1UL << SCB_SHCSR_SECUREFAULTACT_Pos)          /*!< SCB SHCSR: SECUREFAULTACT Mask */
+
+#define SCB_SHCSR_USGFAULTACT_Pos           3U                                            /*!< SCB SHCSR: USGFAULTACT Position */
+#define SCB_SHCSR_USGFAULTACT_Msk          (1UL << SCB_SHCSR_USGFAULTACT_Pos)             /*!< SCB SHCSR: USGFAULTACT Mask */
+
+#define SCB_SHCSR_HARDFAULTACT_Pos          2U                                            /*!< SCB SHCSR: HARDFAULTACT Position */
+#define SCB_SHCSR_HARDFAULTACT_Msk         (1UL << SCB_SHCSR_HARDFAULTACT_Pos)            /*!< SCB SHCSR: HARDFAULTACT Mask */
+
+#define SCB_SHCSR_BUSFAULTACT_Pos           1U                                            /*!< SCB SHCSR: BUSFAULTACT Position */
+#define SCB_SHCSR_BUSFAULTACT_Msk          (1UL << SCB_SHCSR_BUSFAULTACT_Pos)             /*!< SCB SHCSR: BUSFAULTACT Mask */
+
+#define SCB_SHCSR_MEMFAULTACT_Pos           0U                                            /*!< SCB SHCSR: MEMFAULTACT Position */
+#define SCB_SHCSR_MEMFAULTACT_Msk          (1UL /*<< SCB_SHCSR_MEMFAULTACT_Pos*/)         /*!< SCB SHCSR: MEMFAULTACT Mask */
+
+/* SCB Configurable Fault Status Register Definitions */
+#define SCB_CFSR_USGFAULTSR_Pos            16U                                            /*!< SCB CFSR: Usage Fault Status Register Position */
+#define SCB_CFSR_USGFAULTSR_Msk            (0xFFFFUL << SCB_CFSR_USGFAULTSR_Pos)          /*!< SCB CFSR: Usage Fault Status Register Mask */
+
+#define SCB_CFSR_BUSFAULTSR_Pos             8U                                            /*!< SCB CFSR: Bus Fault Status Register Position */
+#define SCB_CFSR_BUSFAULTSR_Msk            (0xFFUL << SCB_CFSR_BUSFAULTSR_Pos)            /*!< SCB CFSR: Bus Fault Status Register Mask */
+
+#define SCB_CFSR_MEMFAULTSR_Pos             0U                                            /*!< SCB CFSR: Memory Manage Fault Status Register Position */
+#define SCB_CFSR_MEMFAULTSR_Msk            (0xFFUL /*<< SCB_CFSR_MEMFAULTSR_Pos*/)        /*!< SCB CFSR: Memory Manage Fault Status Register Mask */
+
+/* MemManage Fault Status Register (part of SCB Configurable Fault Status Register) */
+#define SCB_CFSR_MMARVALID_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 7U)                 /*!< SCB CFSR (MMFSR): MMARVALID Position */
+#define SCB_CFSR_MMARVALID_Msk             (1UL << SCB_CFSR_MMARVALID_Pos)                /*!< SCB CFSR (MMFSR): MMARVALID Mask */
+
+#define SCB_CFSR_MLSPERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 5U)                 /*!< SCB CFSR (MMFSR): MLSPERR Position */
+#define SCB_CFSR_MLSPERR_Msk               (1UL << SCB_CFSR_MLSPERR_Pos)                  /*!< SCB CFSR (MMFSR): MLSPERR Mask */
+
+#define SCB_CFSR_MSTKERR_Pos               (SCB_CFSR_MEMFAULTSR_Pos + 4U)                 /*!< SCB CFSR (MMFSR): MSTKERR Position */
+#define SCB_CFSR_MSTKERR_Msk               (1UL << SCB_CFSR_MSTKERR_Pos)                  /*!< SCB CFSR (MMFSR): MSTKERR Mask */
+
+#define SCB_CFSR_MUNSTKERR_Pos             (SCB_CFSR_MEMFAULTSR_Pos + 3U)                 /*!< SCB CFSR (MMFSR): MUNSTKERR Position */
+#define SCB_CFSR_MUNSTKERR_Msk             (1UL << SCB_CFSR_MUNSTKERR_Pos)                /*!< SCB CFSR (MMFSR): MUNSTKERR Mask */
+
+#define SCB_CFSR_DACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 1U)                 /*!< SCB CFSR (MMFSR): DACCVIOL Position */
+#define SCB_CFSR_DACCVIOL_Msk              (1UL << SCB_CFSR_DACCVIOL_Pos)                 /*!< SCB CFSR (MMFSR): DACCVIOL Mask */
+
+#define SCB_CFSR_IACCVIOL_Pos              (SCB_CFSR_MEMFAULTSR_Pos + 0U)                 /*!< SCB CFSR (MMFSR): IACCVIOL Position */
+#define SCB_CFSR_IACCVIOL_Msk              (1UL /*<< SCB_CFSR_IACCVIOL_Pos*/)             /*!< SCB CFSR (MMFSR): IACCVIOL Mask */
+
+/* BusFault Status Register (part of SCB Configurable Fault Status Register) */
+#define SCB_CFSR_BFARVALID_Pos            (SCB_CFSR_BUSFAULTSR_Pos + 7U)                  /*!< SCB CFSR (BFSR): BFARVALID Position */
+#define SCB_CFSR_BFARVALID_Msk            (1UL << SCB_CFSR_BFARVALID_Pos)                 /*!< SCB CFSR (BFSR): BFARVALID Mask */
+
+#define SCB_CFSR_LSPERR_Pos               (SCB_CFSR_BUSFAULTSR_Pos + 5U)                  /*!< SCB CFSR (BFSR): LSPERR Position */
+#define SCB_CFSR_LSPERR_Msk               (1UL << SCB_CFSR_LSPERR_Pos)                    /*!< SCB CFSR (BFSR): LSPERR Mask */
+
+#define SCB_CFSR_STKERR_Pos               (SCB_CFSR_BUSFAULTSR_Pos + 4U)                  /*!< SCB CFSR (BFSR): STKERR Position */
+#define SCB_CFSR_STKERR_Msk               (1UL << SCB_CFSR_STKERR_Pos)                    /*!< SCB CFSR (BFSR): STKERR Mask */
+
+#define SCB_CFSR_UNSTKERR_Pos             (SCB_CFSR_BUSFAULTSR_Pos + 3U)                  /*!< SCB CFSR (BFSR): UNSTKERR Position */
+#define SCB_CFSR_UNSTKERR_Msk             (1UL << SCB_CFSR_UNSTKERR_Pos)                  /*!< SCB CFSR (BFSR): UNSTKERR Mask */
+
+#define SCB_CFSR_IMPRECISERR_Pos          (SCB_CFSR_BUSFAULTSR_Pos + 2U)                  /*!< SCB CFSR (BFSR): IMPRECISERR Position */
+#define SCB_CFSR_IMPRECISERR_Msk          (1UL << SCB_CFSR_IMPRECISERR_Pos)               /*!< SCB CFSR (BFSR): IMPRECISERR Mask */
+
+#define SCB_CFSR_PRECISERR_Pos            (SCB_CFSR_BUSFAULTSR_Pos + 1U)                  /*!< SCB CFSR (BFSR): PRECISERR Position */
+#define SCB_CFSR_PRECISERR_Msk            (1UL << SCB_CFSR_PRECISERR_Pos)                 /*!< SCB CFSR (BFSR): PRECISERR Mask */
+
+#define SCB_CFSR_IBUSERR_Pos              (SCB_CFSR_BUSFAULTSR_Pos + 0U)                  /*!< SCB CFSR (BFSR): IBUSERR Position */
+#define SCB_CFSR_IBUSERR_Msk              (1UL << SCB_CFSR_IBUSERR_Pos)                   /*!< SCB CFSR (BFSR): IBUSERR Mask */
+
+/* UsageFault Status Register (part of SCB Configurable Fault Status Register) */
+#define SCB_CFSR_DIVBYZERO_Pos            (SCB_CFSR_USGFAULTSR_Pos + 9U)                  /*!< SCB CFSR (UFSR): DIVBYZERO Position */
+#define SCB_CFSR_DIVBYZERO_Msk            (1UL << SCB_CFSR_DIVBYZERO_Pos)                 /*!< SCB CFSR (UFSR): DIVBYZERO Mask */
+
+#define SCB_CFSR_UNALIGNED_Pos            (SCB_CFSR_USGFAULTSR_Pos + 8U)                  /*!< SCB CFSR (UFSR): UNALIGNED Position */
+#define SCB_CFSR_UNALIGNED_Msk            (1UL << SCB_CFSR_UNALIGNED_Pos)                 /*!< SCB CFSR (UFSR): UNALIGNED Mask */
+
+#define SCB_CFSR_STKOF_Pos                (SCB_CFSR_USGFAULTSR_Pos + 4U)                  /*!< SCB CFSR (UFSR): STKOF Position */
+#define SCB_CFSR_STKOF_Msk                (1UL << SCB_CFSR_STKOF_Pos)                     /*!< SCB CFSR (UFSR): STKOF Mask */
+
+#define SCB_CFSR_NOCP_Pos                 (SCB_CFSR_USGFAULTSR_Pos + 3U)                  /*!< SCB CFSR (UFSR): NOCP Position */
+#define SCB_CFSR_NOCP_Msk                 (1UL << SCB_CFSR_NOCP_Pos)                      /*!< SCB CFSR (UFSR): NOCP Mask */
+
+#define SCB_CFSR_INVPC_Pos                (SCB_CFSR_USGFAULTSR_Pos + 2U)                  /*!< SCB CFSR (UFSR): INVPC Position */
+#define SCB_CFSR_INVPC_Msk                (1UL << SCB_CFSR_INVPC_Pos)                     /*!< SCB CFSR (UFSR): INVPC Mask */
+
+#define SCB_CFSR_INVSTATE_Pos             (SCB_CFSR_USGFAULTSR_Pos + 1U)                  /*!< SCB CFSR (UFSR): INVSTATE Position */
+#define SCB_CFSR_INVSTATE_Msk             (1UL << SCB_CFSR_INVSTATE_Pos)                  /*!< SCB CFSR (UFSR): INVSTATE Mask */
+
+#define SCB_CFSR_UNDEFINSTR_Pos           (SCB_CFSR_USGFAULTSR_Pos + 0U)                  /*!< SCB CFSR (UFSR): UNDEFINSTR Position */
+#define SCB_CFSR_UNDEFINSTR_Msk           (1UL << SCB_CFSR_UNDEFINSTR_Pos)                /*!< SCB CFSR (UFSR): UNDEFINSTR Mask */
+
+/* SCB Hard Fault Status Register Definitions */
+#define SCB_HFSR_DEBUGEVT_Pos              31U                                            /*!< SCB HFSR: DEBUGEVT Position */
+#define SCB_HFSR_DEBUGEVT_Msk              (1UL << SCB_HFSR_DEBUGEVT_Pos)                 /*!< SCB HFSR: DEBUGEVT Mask */
+
+#define SCB_HFSR_FORCED_Pos                30U                                            /*!< SCB HFSR: FORCED Position */
+#define SCB_HFSR_FORCED_Msk                (1UL << SCB_HFSR_FORCED_Pos)                   /*!< SCB HFSR: FORCED Mask */
+
+#define SCB_HFSR_VECTTBL_Pos                1U                                            /*!< SCB HFSR: VECTTBL Position */
+#define SCB_HFSR_VECTTBL_Msk               (1UL << SCB_HFSR_VECTTBL_Pos)                  /*!< SCB HFSR: VECTTBL Mask */
+
+/* SCB Debug Fault Status Register Definitions */
+#define SCB_DFSR_EXTERNAL_Pos               4U                                            /*!< SCB DFSR: EXTERNAL Position */
+#define SCB_DFSR_EXTERNAL_Msk              (1UL << SCB_DFSR_EXTERNAL_Pos)                 /*!< SCB DFSR: EXTERNAL Mask */
+
+#define SCB_DFSR_VCATCH_Pos                 3U                                            /*!< SCB DFSR: VCATCH Position */
+#define SCB_DFSR_VCATCH_Msk                (1UL << SCB_DFSR_VCATCH_Pos)                   /*!< SCB DFSR: VCATCH Mask */
+
+#define SCB_DFSR_DWTTRAP_Pos                2U                                            /*!< SCB DFSR: DWTTRAP Position */
+#define SCB_DFSR_DWTTRAP_Msk               (1UL << SCB_DFSR_DWTTRAP_Pos)                  /*!< SCB DFSR: DWTTRAP Mask */
+
+#define SCB_DFSR_BKPT_Pos                   1U                                            /*!< SCB DFSR: BKPT Position */
+#define SCB_DFSR_BKPT_Msk                  (1UL << SCB_DFSR_BKPT_Pos)                     /*!< SCB DFSR: BKPT Mask */
+
+#define SCB_DFSR_HALTED_Pos                 0U                                            /*!< SCB DFSR: HALTED Position */
+#define SCB_DFSR_HALTED_Msk                (1UL /*<< SCB_DFSR_HALTED_Pos*/)               /*!< SCB DFSR: HALTED Mask */
+
+/* SCB Non-Secure Access Control Register Definitions */
+#define SCB_NSACR_CP11_Pos                 11U                                            /*!< SCB NSACR: CP11 Position */
+#define SCB_NSACR_CP11_Msk                 (1UL << SCB_NSACR_CP11_Pos)                    /*!< SCB NSACR: CP11 Mask */
+
+#define SCB_NSACR_CP10_Pos                 10U                                            /*!< SCB NSACR: CP10 Position */
+#define SCB_NSACR_CP10_Msk                 (1UL << SCB_NSACR_CP10_Pos)                    /*!< SCB NSACR: CP10 Mask */
+
+#define SCB_NSACR_CPn_Pos                   0U                                            /*!< SCB NSACR: CPn Position */
+#define SCB_NSACR_CPn_Msk                  (1UL /*<< SCB_NSACR_CPn_Pos*/)                 /*!< SCB NSACR: CPn Mask */
+
+/* SCB Cache Level ID Register Definitions */
+#define SCB_CLIDR_LOUU_Pos                 27U                                            /*!< SCB CLIDR: LoUU Position */
+#define SCB_CLIDR_LOUU_Msk                 (7UL << SCB_CLIDR_LOUU_Pos)                    /*!< SCB CLIDR: LoUU Mask */
+
+#define SCB_CLIDR_LOC_Pos                  24U                                            /*!< SCB CLIDR: LoC Position */
+#define SCB_CLIDR_LOC_Msk                  (7UL << SCB_CLIDR_LOC_Pos)                     /*!< SCB CLIDR: LoC Mask */
+
+#define SCB_CLIDR_IC_Pos                   0U                                             /*!< SCB CLIDR: IC Position */
+#define SCB_CLIDR_IC_Msk                   (1UL << SCB_CLIDR_IC_Pos)                      /*!< SCB CLIDR: IC Mask */
+
+#define SCB_CLIDR_DC_Pos                   1U                                             /*!< SCB CLIDR: DC Position */
+#define SCB_CLIDR_DC_Msk                   (1UL << SCB_CLIDR_DC_Pos)                      /*!< SCB CLIDR: DC Mask */
+
+
+
+/* SCB Cache Type Register Definitions */
+#define SCB_CTR_FORMAT_Pos                 29U                                            /*!< SCB CTR: Format Position */
+#define SCB_CTR_FORMAT_Msk                 (7UL << SCB_CTR_FORMAT_Pos)                    /*!< SCB CTR: Format Mask */
+
+#define SCB_CTR_CWG_Pos                    24U                                            /*!< SCB CTR: CWG Position */
+#define SCB_CTR_CWG_Msk                    (0xFUL << SCB_CTR_CWG_Pos)                     /*!< SCB CTR: CWG Mask */
+
+#define SCB_CTR_ERG_Pos                    20U                                            /*!< SCB CTR: ERG Position */
+#define SCB_CTR_ERG_Msk                    (0xFUL << SCB_CTR_ERG_Pos)                     /*!< SCB CTR: ERG Mask */
+
+#define SCB_CTR_DMINLINE_Pos               16U                                            /*!< SCB CTR: DminLine Position */
+#define SCB_CTR_DMINLINE_Msk               (0xFUL << SCB_CTR_DMINLINE_Pos)                /*!< SCB CTR: DminLine Mask */
+
+#define SCB_CTR_IMINLINE_Pos                0U                                            /*!< SCB CTR: ImInLine Position */
+#define SCB_CTR_IMINLINE_Msk               (0xFUL /*<< SCB_CTR_IMINLINE_Pos*/)            /*!< SCB CTR: ImInLine Mask */
+
+/* SCB Cache Size ID Register Definitions */
+#define SCB_CCSIDR_WT_Pos                  31U                                            /*!< SCB CCSIDR: WT Position */
+#define SCB_CCSIDR_WT_Msk                  (1UL << SCB_CCSIDR_WT_Pos)                     /*!< SCB CCSIDR: WT Mask */
+
+#define SCB_CCSIDR_WB_Pos                  30U                                            /*!< SCB CCSIDR: WB Position */
+#define SCB_CCSIDR_WB_Msk                  (1UL << SCB_CCSIDR_WB_Pos)                     /*!< SCB CCSIDR: WB Mask */
+
+#define SCB_CCSIDR_RA_Pos                  29U                                            /*!< SCB CCSIDR: RA Position */
+#define SCB_CCSIDR_RA_Msk                  (1UL << SCB_CCSIDR_RA_Pos)                     /*!< SCB CCSIDR: RA Mask */
+
+#define SCB_CCSIDR_WA_Pos                  28U                                            /*!< SCB CCSIDR: WA Position */
+#define SCB_CCSIDR_WA_Msk                  (1UL << SCB_CCSIDR_WA_Pos)                     /*!< SCB CCSIDR: WA Mask */
+
+#define SCB_CCSIDR_NUMSETS_Pos             13U                                            /*!< SCB CCSIDR: NumSets Position */
+#define SCB_CCSIDR_NUMSETS_Msk             (0x7FFFUL << SCB_CCSIDR_NUMSETS_Pos)           /*!< SCB CCSIDR: NumSets Mask */
+
+#define SCB_CCSIDR_ASSOCIATIVITY_Pos        3U                                            /*!< SCB CCSIDR: Associativity Position */
+#define SCB_CCSIDR_ASSOCIATIVITY_Msk       (0x3FFUL << SCB_CCSIDR_ASSOCIATIVITY_Pos)      /*!< SCB CCSIDR: Associativity Mask */
+
+#define SCB_CCSIDR_LINESIZE_Pos             0U                                            /*!< SCB CCSIDR: LineSize Position */
+#define SCB_CCSIDR_LINESIZE_Msk            (7UL /*<< SCB_CCSIDR_LINESIZE_Pos*/)           /*!< SCB CCSIDR: LineSize Mask */
+
+/* SCB Cache Size Selection Register Definitions */
+#define SCB_CSSELR_LEVEL_Pos                1U                                            /*!< SCB CSSELR: Level Position */
+#define SCB_CSSELR_LEVEL_Msk               (7UL << SCB_CSSELR_LEVEL_Pos)                  /*!< SCB CSSELR: Level Mask */
+
+#define SCB_CSSELR_IND_Pos                  0U                                            /*!< SCB CSSELR: InD Position */
+#define SCB_CSSELR_IND_Msk                 (1UL /*<< SCB_CSSELR_IND_Pos*/)                /*!< SCB CSSELR: InD Mask */
+
+/* SCB Software Triggered Interrupt Register Definitions */
+#define SCB_STIR_INTID_Pos                  0U                                            /*!< SCB STIR: INTID Position */
+#define SCB_STIR_INTID_Msk                 (0x1FFUL /*<< SCB_STIR_INTID_Pos*/)            /*!< SCB STIR: INTID Mask */
+
+/* SCB D-Cache line Invalidate by Set-way Register Definitions */
+#define SCB_DCISW_LEVEL_Pos                1U                                             /*!< SCB DCISW: Level Position */
+#define SCB_DCISW_LEVEL_Msk                (7UL << SCB_DCISW_LEVEL_Pos)                   /*!< SCB DCISW: Level Mask */
+
+#define SCB_DCISW_WAY_Pos                  30U                                            /*!< SCB DCISW: Way Position */
+#define SCB_DCISW_WAY_Msk                  (3UL << SCB_DCISW_WAY_Pos)                     /*!< SCB DCISW: Way Mask */
+
+#define SCB_DCISW_SET_Pos                   5U                                            /*!< SCB DCISW: Set Position */
+#define SCB_DCISW_SET_Msk                  (0xFFUL << SCB_DCISW_SET_Pos)                 /*!< SCB DCISW: Set Mask */
+
+/* SCB D-Cache Clean line by Set-way Register Definitions */
+#define SCB_DCCSW_LEVEL_Pos                1U                                             /*!< SCB DCCSW: Level Position */
+#define SCB_DCCSW_LEVEL_Msk                (7UL << SCB_DCCSW_LEVEL_Pos)                   /*!< SCB DCCSW: Level Mask */
+
+#define SCB_DCCSW_WAY_Pos                  30U                                            /*!< SCB DCCSW: Way Position */
+#define SCB_DCCSW_WAY_Msk                  (3UL << SCB_DCCSW_WAY_Pos)                     /*!< SCB DCCSW: Way Mask */
+
+#define SCB_DCCSW_SET_Pos                   5U                                            /*!< SCB DCCSW: Set Position */
+#define SCB_DCCSW_SET_Msk                  (0xFFUL << SCB_DCCSW_SET_Pos)                 /*!< SCB DCCSW: Set Mask */
+
+/* SCB D-Cache Clean and Invalidate by Set-way Register Definitions */
+#define SCB_DCCISW_LEVEL_Pos               1U                                             /*!< SCB DCCISW: Level Position */
+#define SCB_DCCISW_LEVEL_Msk               (7UL << SCB_DCCISW_LEVEL_Pos)                  /*!< SCB DCCISW: Level Mask */
+
+#define SCB_DCCISW_WAY_Pos                 30U                                            /*!< SCB DCCISW: Way Position */
+#define SCB_DCCISW_WAY_Msk                 (3UL << SCB_DCCISW_WAY_Pos)                    /*!< SCB DCCISW: Way Mask */
+
+#define SCB_DCCISW_SET_Pos                  5U                                            /*!< SCB DCCISW: Set Position */
+#define SCB_DCCISW_SET_Msk                 (0xFFUL << SCB_DCCISW_SET_Pos)                /*!< SCB DCCISW: Set Mask */
+
+/* ArmChina: Implementation Defined */
+/* Instruction Tightly-Coupled Memory Control Register Definitions */
+#define SCB_ITCMCR_SZ_Pos                   3U                                            /*!< SCB ITCMCR: SZ Position */
+#define SCB_ITCMCR_SZ_Msk                  (0xFUL << SCB_ITCMCR_SZ_Pos)                   /*!< SCB ITCMCR: SZ Mask */
+
+#define SCB_ITCMCR_EN_Pos                   0U                                            /*!< SCB ITCMCR: EN Position */
+#define SCB_ITCMCR_EN_Msk                  (1UL /*<< SCB_ITCMCR_EN_Pos*/)                 /*!< SCB ITCMCR: EN Mask */
+
+/* Data Tightly-Coupled Memory Control Register Definitions */
+#define SCB_DTCMCR_SZ_Pos                   3U                                            /*!< SCB DTCMCR: SZ Position */
+#define SCB_DTCMCR_SZ_Msk                  (0xFUL << SCB_DTCMCR_SZ_Pos)                   /*!< SCB DTCMCR: SZ Mask */
+
+#define SCB_DTCMCR_EN_Pos                   0U                                            /*!< SCB DTCMCR: EN Position */
+#define SCB_DTCMCR_EN_Msk                  (1UL /*<< SCB_DTCMCR_EN_Pos*/)                 /*!< SCB DTCMCR: EN Mask */
+
+/* L1 Cache Control Register Definitions */
+#define SCB_CACR_DCCLEAN_Pos                16U                                            /*!< SCB CACR: DCCLEAN Position */
+#define SCB_CACR_DCCLEAN_Msk               (1UL << SCB_CACR_FORCEWT_Pos)                  /*!< SCB CACR: DCCLEAN Mask */
+
+#define SCB_CACR_ICACTIVE_Pos                13U                                            /*!< SCB CACR: ICACTIVE Position */
+#define SCB_CACR_ICACTIVE_Msk               (1UL << SCB_CACR_FORCEWT_Pos)                  /*!< SCB CACR: ICACTIVE Mask */
+
+#define SCB_CACR_DCACTIVE_Pos                12U                                            /*!< SCB CACR: DCACTIVE Position */
+#define SCB_CACR_DCACTIVE_Msk               (1UL << SCB_CACR_FORCEWT_Pos)                  /*!< SCB CACR: DCACTIVE Mask */
+
+#define SCB_CACR_FORCEWT_Pos                2U                                            /*!< SCB CACR: FORCEWT Position */
+#define SCB_CACR_FORCEWT_Msk               (1UL << SCB_CACR_FORCEWT_Pos)                  /*!< SCB CACR: FORCEWT Mask */
+
+/*@} end of group CMSIS_SCB */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_SCnSCB System Controls not in SCB (SCnSCB)
+  \brief    Type definitions for the System Control and ID Register not in the SCB
+  @{
+ */
+
+/**
+  \brief  Structure type to access the System Control and ID Register not in the SCB.
+ */
+typedef struct
+{
+        uint32_t RESERVED0[1U];
+  __IM  uint32_t ICTR;                   /*!< Offset: 0x004 (R/ )  Interrupt Controller Type Register */
+  __IOM uint32_t ACTLR;                  /*!< Offset: 0x008 (R/W)  Auxiliary Control Register */
+  __IOM uint32_t CPPWR;                  /*!< Offset: 0x00C (R/W)  Coprocessor Power Control  Register */
+} SCnSCB_Type;
+
+/* Interrupt Controller Type Register Definitions */
+#define SCnSCB_ICTR_INTLINESNUM_Pos         0U                                         /*!< ICTR: INTLINESNUM Position */
+#define SCnSCB_ICTR_INTLINESNUM_Msk        (0xFUL /*<< SCnSCB_ICTR_INTLINESNUM_Pos*/)  /*!< ICTR: INTLINESNUM Mask */
+
+/*@} end of group CMSIS_SCnotSCB */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_SysTick     System Tick Timer (SysTick)
+  \brief    Type definitions for the System Timer Registers.
+  @{
+ */
+
+/**
+  \brief  Structure type to access the System Timer (SysTick).
+ */
+typedef struct
+{
+  __IOM uint32_t CTRL;                   /*!< Offset: 0x000 (R/W)  SysTick Control and Status Register */
+  __IOM uint32_t LOAD;                   /*!< Offset: 0x004 (R/W)  SysTick Reload Value Register */
+  __IOM uint32_t VAL;                    /*!< Offset: 0x008 (R/W)  SysTick Current Value Register */
+  __IM  uint32_t CALIB;                  /*!< Offset: 0x00C (R/ )  SysTick Calibration Register */
+} SysTick_Type;
+
+/* SysTick Control / Status Register Definitions */
+#define SysTick_CTRL_COUNTFLAG_Pos         16U                                            /*!< SysTick CTRL: COUNTFLAG Position */
+#define SysTick_CTRL_COUNTFLAG_Msk         (1UL << SysTick_CTRL_COUNTFLAG_Pos)            /*!< SysTick CTRL: COUNTFLAG Mask */
+
+#define SysTick_CTRL_CLKSOURCE_Pos          2U                                            /*!< SysTick CTRL: CLKSOURCE Position */
+#define SysTick_CTRL_CLKSOURCE_Msk         (1UL << SysTick_CTRL_CLKSOURCE_Pos)            /*!< SysTick CTRL: CLKSOURCE Mask */
+
+#define SysTick_CTRL_TICKINT_Pos            1U                                            /*!< SysTick CTRL: TICKINT Position */
+#define SysTick_CTRL_TICKINT_Msk           (1UL << SysTick_CTRL_TICKINT_Pos)              /*!< SysTick CTRL: TICKINT Mask */
+
+#define SysTick_CTRL_ENABLE_Pos             0U                                            /*!< SysTick CTRL: ENABLE Position */
+#define SysTick_CTRL_ENABLE_Msk            (1UL /*<< SysTick_CTRL_ENABLE_Pos*/)           /*!< SysTick CTRL: ENABLE Mask */
+
+/* SysTick Reload Register Definitions */
+#define SysTick_LOAD_RELOAD_Pos             0U                                            /*!< SysTick LOAD: RELOAD Position */
+#define SysTick_LOAD_RELOAD_Msk            (0xFFFFFFUL /*<< SysTick_LOAD_RELOAD_Pos*/)    /*!< SysTick LOAD: RELOAD Mask */
+
+/* SysTick Current Register Definitions */
+#define SysTick_VAL_CURRENT_Pos             0U                                            /*!< SysTick VAL: CURRENT Position */
+#define SysTick_VAL_CURRENT_Msk            (0xFFFFFFUL /*<< SysTick_VAL_CURRENT_Pos*/)    /*!< SysTick VAL: CURRENT Mask */
+
+/* SysTick Calibration Register Definitions */
+#define SysTick_CALIB_NOREF_Pos            31U                                            /*!< SysTick CALIB: NOREF Position */
+#define SysTick_CALIB_NOREF_Msk            (1UL << SysTick_CALIB_NOREF_Pos)               /*!< SysTick CALIB: NOREF Mask */
+
+#define SysTick_CALIB_SKEW_Pos             30U                                            /*!< SysTick CALIB: SKEW Position */
+#define SysTick_CALIB_SKEW_Msk             (1UL << SysTick_CALIB_SKEW_Pos)                /*!< SysTick CALIB: SKEW Mask */
+
+#define SysTick_CALIB_TENMS_Pos             0U                                            /*!< SysTick CALIB: TENMS Position */
+#define SysTick_CALIB_TENMS_Msk            (0xFFFFFFUL /*<< SysTick_CALIB_TENMS_Pos*/)    /*!< SysTick CALIB: TENMS Mask */
+
+/*@} end of group CMSIS_SysTick */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_ITM     Instrumentation Trace Macrocell (ITM)
+  \brief    Type definitions for the Instrumentation Trace Macrocell (ITM)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Instrumentation Trace Macrocell Register (ITM).
+ */
+typedef struct
+{
+  __OM  union
+  {
+    __OM  uint8_t    u8;                 /*!< Offset: 0x000 ( /W)  ITM Stimulus Port 8-bit */
+    __OM  uint16_t   u16;                /*!< Offset: 0x000 ( /W)  ITM Stimulus Port 16-bit */
+    __OM  uint32_t   u32;                /*!< Offset: 0x000 ( /W)  ITM Stimulus Port 32-bit */
+  }  PORT [32U];                         /*!< Offset: 0x000 ( /W)  ITM Stimulus Port Registers */
+        uint32_t RESERVED0[864U];
+  __IOM uint32_t TER;                    /*!< Offset: 0xE00 (R/W)  ITM Trace Enable Register */
+        uint32_t RESERVED1[15U];
+  __IOM uint32_t TPR;                    /*!< Offset: 0xE40 (R/W)  ITM Trace Privilege Register */
+        uint32_t RESERVED2[15U];
+  __IOM uint32_t TCR;                    /*!< Offset: 0xE80 (R/W)  ITM Trace Control Register */
+        uint32_t RESERVED3[32U];
+        uint32_t RESERVED4[43U];
+  __OM  uint32_t LAR;                    /*!< Offset: 0xFB0 ( /W)  ITM Lock Access Register */
+  __IM  uint32_t LSR;                    /*!< Offset: 0xFB4 (R/ )  ITM Lock Status Register */
+        uint32_t RESERVED5[1U];
+  __IM  uint32_t DEVARCH;                /*!< Offset: 0xFBC (R/ )  ITM Device Architecture Register */
+        uint32_t RESERVED6[4U];
+  __IM  uint32_t PID4;                   /*!< Offset: 0xFD0 (R/ )  ITM Peripheral Identification Register #4 */
+  __IM  uint32_t PID5;                   /*!< Offset: 0xFD4 (R/ )  ITM Peripheral Identification Register #5 */
+  __IM  uint32_t PID6;                   /*!< Offset: 0xFD8 (R/ )  ITM Peripheral Identification Register #6 */
+  __IM  uint32_t PID7;                   /*!< Offset: 0xFDC (R/ )  ITM Peripheral Identification Register #7 */
+  __IM  uint32_t PID0;                   /*!< Offset: 0xFE0 (R/ )  ITM Peripheral Identification Register #0 */
+  __IM  uint32_t PID1;                   /*!< Offset: 0xFE4 (R/ )  ITM Peripheral Identification Register #1 */
+  __IM  uint32_t PID2;                   /*!< Offset: 0xFE8 (R/ )  ITM Peripheral Identification Register #2 */
+  __IM  uint32_t PID3;                   /*!< Offset: 0xFEC (R/ )  ITM Peripheral Identification Register #3 */
+  __IM  uint32_t CID0;                   /*!< Offset: 0xFF0 (R/ )  ITM Component  Identification Register #0 */
+  __IM  uint32_t CID1;                   /*!< Offset: 0xFF4 (R/ )  ITM Component  Identification Register #1 */
+  __IM  uint32_t CID2;                   /*!< Offset: 0xFF8 (R/ )  ITM Component  Identification Register #2 */
+  __IM  uint32_t CID3;                   /*!< Offset: 0xFFC (R/ )  ITM Component  Identification Register #3 */
+} ITM_Type;
+
+/* ITM Stimulus Port Register Definitions */
+#define ITM_STIM_DISABLED_Pos               1U                                            /*!< ITM STIM: DISABLED Position */
+#define ITM_STIM_DISABLED_Msk              (0x1UL << ITM_STIM_DISABLED_Pos)               /*!< ITM STIM: DISABLED Mask */
+
+#define ITM_STIM_FIFOREADY_Pos              0U                                            /*!< ITM STIM: FIFOREADY Position */
+#define ITM_STIM_FIFOREADY_Msk             (0x1UL /*<< ITM_STIM_FIFOREADY_Pos*/)          /*!< ITM STIM: FIFOREADY Mask */
+
+/* ITM Trace Privilege Register Definitions */
+#define ITM_TPR_PRIVMASK_Pos                0U                                            /*!< ITM TPR: PRIVMASK Position */
+#define ITM_TPR_PRIVMASK_Msk               (0xFFFFFFFFUL /*<< ITM_TPR_PRIVMASK_Pos*/)     /*!< ITM TPR: PRIVMASK Mask */
+
+/* ITM Trace Control Register Definitions */
+#define ITM_TCR_BUSY_Pos                   23U                                            /*!< ITM TCR: BUSY Position */
+#define ITM_TCR_BUSY_Msk                   (1UL << ITM_TCR_BUSY_Pos)                      /*!< ITM TCR: BUSY Mask */
+
+#define ITM_TCR_TRACEBUSID_Pos             16U                                            /*!< ITM TCR: ATBID Position */
+#define ITM_TCR_TRACEBUSID_Msk             (0x7FUL << ITM_TCR_TRACEBUSID_Pos)             /*!< ITM TCR: ATBID Mask */
+
+#define ITM_TCR_GTSFREQ_Pos                10U                                            /*!< ITM TCR: Global timestamp frequency Position */
+#define ITM_TCR_GTSFREQ_Msk                (3UL << ITM_TCR_GTSFREQ_Pos)                   /*!< ITM TCR: Global timestamp frequency Mask */
+
+#define ITM_TCR_TSPRESCALE_Pos              8U                                            /*!< ITM TCR: TSPRESCALE Position */
+#define ITM_TCR_TSPRESCALE_Msk             (3UL << ITM_TCR_TSPRESCALE_Pos)                /*!< ITM TCR: TSPRESCALE Mask */
+
+#define ITM_TCR_STALLENA_Pos                5U                                            /*!< ITM TCR: STALLENA Position */
+#define ITM_TCR_STALLENA_Msk               (1UL << ITM_TCR_STALLENA_Pos)                  /*!< ITM TCR: STALLENA Mask */
+
+#define ITM_TCR_SWOENA_Pos                  4U                                            /*!< ITM TCR: SWOENA Position */
+#define ITM_TCR_SWOENA_Msk                 (1UL << ITM_TCR_SWOENA_Pos)                    /*!< ITM TCR: SWOENA Mask */
+
+#define ITM_TCR_DWTENA_Pos                  3U                                            /*!< ITM TCR: DWTENA Position */
+#define ITM_TCR_DWTENA_Msk                 (1UL << ITM_TCR_DWTENA_Pos)                    /*!< ITM TCR: DWTENA Mask */
+
+#define ITM_TCR_SYNCENA_Pos                 2U                                            /*!< ITM TCR: SYNCENA Position */
+#define ITM_TCR_SYNCENA_Msk                (1UL << ITM_TCR_SYNCENA_Pos)                   /*!< ITM TCR: SYNCENA Mask */
+
+#define ITM_TCR_TSENA_Pos                   1U                                            /*!< ITM TCR: TSENA Position */
+#define ITM_TCR_TSENA_Msk                  (1UL << ITM_TCR_TSENA_Pos)                     /*!< ITM TCR: TSENA Mask */
+
+#define ITM_TCR_ITMENA_Pos                  0U                                            /*!< ITM TCR: ITM Enable bit Position */
+#define ITM_TCR_ITMENA_Msk                 (1UL /*<< ITM_TCR_ITMENA_Pos*/)                /*!< ITM TCR: ITM Enable bit Mask */
+
+/* ITM Lock Status Register Definitions */
+#define ITM_LSR_ByteAcc_Pos                 2U                                            /*!< ITM LSR: ByteAcc Position */
+#define ITM_LSR_ByteAcc_Msk                (1UL << ITM_LSR_ByteAcc_Pos)                   /*!< ITM LSR: ByteAcc Mask */
+
+#define ITM_LSR_Access_Pos                  1U                                            /*!< ITM LSR: Access Position */
+#define ITM_LSR_Access_Msk                 (1UL << ITM_LSR_Access_Pos)                    /*!< ITM LSR: Access Mask */
+
+#define ITM_LSR_Present_Pos                 0U                                            /*!< ITM LSR: Present Position */
+#define ITM_LSR_Present_Msk                (1UL /*<< ITM_LSR_Present_Pos*/)               /*!< ITM LSR: Present Mask */
+
+/*@}*/ /* end of group CMSIS_ITM */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_DWT     Data Watchpoint and Trace (DWT)
+  \brief    Type definitions for the Data Watchpoint and Trace (DWT)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Data Watchpoint and Trace Register (DWT).
+ */
+typedef struct
+{
+  __IOM uint32_t CTRL;                   /*!< Offset: 0x000 (R/W)  Control Register */
+  __IOM uint32_t CYCCNT;                 /*!< Offset: 0x004 (R/W)  Cycle Count Register */
+  __IOM uint32_t CPICNT;                 /*!< Offset: 0x008 (R/W)  CPI Count Register */
+  __IOM uint32_t EXCCNT;                 /*!< Offset: 0x00C (R/W)  Exception Overhead Count Register */
+  __IOM uint32_t SLEEPCNT;               /*!< Offset: 0x010 (R/W)  Sleep Count Register */
+  __IOM uint32_t LSUCNT;                 /*!< Offset: 0x014 (R/W)  LSU Count Register */
+  __IOM uint32_t FOLDCNT;                /*!< Offset: 0x018 (R/W)  Folded-instruction Count Register */
+  __IM  uint32_t PCSR;                   /*!< Offset: 0x01C (R/ )  Program Counter Sample Register */
+  __IOM uint32_t COMP0;                  /*!< Offset: 0x020 (R/W)  Comparator Register 0 */
+        uint32_t RESERVED1[1U];
+  __IOM uint32_t FUNCTION0;              /*!< Offset: 0x028 (R/W)  Function Register 0 */
+        uint32_t RESERVED2[1U];
+  __IOM uint32_t COMP1;                  /*!< Offset: 0x030 (R/W)  Comparator Register 1 */
+        uint32_t RESERVED3[1U];
+  __IOM uint32_t FUNCTION1;              /*!< Offset: 0x038 (R/W)  Function Register 1 */
+        uint32_t RESERVED4[1U];
+  __IOM uint32_t COMP2;                  /*!< Offset: 0x040 (R/W)  Comparator Register 2 */
+        uint32_t RESERVED5[1U];
+  __IOM uint32_t FUNCTION2;              /*!< Offset: 0x048 (R/W)  Function Register 2 */
+        uint32_t RESERVED6[1U];
+  __IOM uint32_t COMP3;                  /*!< Offset: 0x050 (R/W)  Comparator Register 3 */
+        uint32_t RESERVED7[1U];
+  __IOM uint32_t FUNCTION3;              /*!< Offset: 0x058 (R/W)  Function Register 3 */
+        uint32_t RESERVED8[1U];
+  __IOM uint32_t COMP4;                  /*!< Offset: 0x060 (R/W)  Comparator Register 4 */
+        uint32_t RESERVED9[1U];
+  __IOM uint32_t FUNCTION4;              /*!< Offset: 0x068 (R/W)  Function Register 4 */
+        uint32_t RESERVED10[1U];
+  __IOM uint32_t COMP5;                  /*!< Offset: 0x070 (R/W)  Comparator Register 5 */
+        uint32_t RESERVED11[1U];
+  __IOM uint32_t FUNCTION5;              /*!< Offset: 0x078 (R/W)  Function Register 5 */
+        uint32_t RESERVED12[1U];
+  __IOM uint32_t COMP6;                  /*!< Offset: 0x080 (R/W)  Comparator Register 6 */
+        uint32_t RESERVED13[1U];
+  __IOM uint32_t FUNCTION6;              /*!< Offset: 0x088 (R/W)  Function Register 6 */
+        uint32_t RESERVED14[1U];
+  __IOM uint32_t COMP7;                  /*!< Offset: 0x090 (R/W)  Comparator Register 7 */
+        uint32_t RESERVED15[1U];
+  __IOM uint32_t FUNCTION7;              /*!< Offset: 0x098 (R/W)  Function Register 7 */
+        uint32_t RESERVED16[1U];
+  __IOM uint32_t COMP8;                  /*!< Offset: 0x0A0 (R/W)  Comparator Register 8 */
+        uint32_t RESERVED17[1U];
+  __IOM uint32_t FUNCTION8;              /*!< Offset: 0x0A8 (R/W)  Function Register 8 */
+        uint32_t RESERVED18[1U];
+  __IOM uint32_t COMP9;                  /*!< Offset: 0x0B0 (R/W)  Comparator Register 9 */
+        uint32_t RESERVED19[1U];
+  __IOM uint32_t FUNCTION9;              /*!< Offset: 0x0B8 (R/W)  Function Register 9 */
+        uint32_t RESERVED20[1U];
+  __IOM uint32_t COMP10;                 /*!< Offset: 0x0C0 (R/W)  Comparator Register 10 */
+        uint32_t RESERVED21[1U];
+  __IOM uint32_t FUNCTION10;             /*!< Offset: 0x0C8 (R/W)  Function Register 10 */
+        uint32_t RESERVED22[1U];
+  __IOM uint32_t COMP11;                 /*!< Offset: 0x0D0 (R/W)  Comparator Register 11 */
+        uint32_t RESERVED23[1U];
+  __IOM uint32_t FUNCTION11;             /*!< Offset: 0x0D8 (R/W)  Function Register 11 */
+        uint32_t RESERVED24[1U];
+  __IOM uint32_t COMP12;                 /*!< Offset: 0x0E0 (R/W)  Comparator Register 12 */
+        uint32_t RESERVED25[1U];
+  __IOM uint32_t FUNCTION12;             /*!< Offset: 0x0E8 (R/W)  Function Register 12 */
+        uint32_t RESERVED26[1U];
+  __IOM uint32_t COMP13;                 /*!< Offset: 0x0F0 (R/W)  Comparator Register 13 */
+        uint32_t RESERVED27[1U];
+  __IOM uint32_t FUNCTION13;             /*!< Offset: 0x0F8 (R/W)  Function Register 13 */
+        uint32_t RESERVED28[1U];
+  __IOM uint32_t COMP14;                 /*!< Offset: 0x100 (R/W)  Comparator Register 14 */
+        uint32_t RESERVED29[1U];
+  __IOM uint32_t FUNCTION14;             /*!< Offset: 0x108 (R/W)  Function Register 14 */
+        uint32_t RESERVED30[1U];
+  __IOM uint32_t COMP15;                 /*!< Offset: 0x110 (R/W)  Comparator Register 15 */
+        uint32_t RESERVED31[1U];
+  __IOM uint32_t FUNCTION15;             /*!< Offset: 0x118 (R/W)  Function Register 15 */
+        uint32_t RESERVED32[934U];
+  __IM  uint32_t LSR;                    /*!< Offset: 0xFB4 (R  )  Lock Status Register */
+        uint32_t RESERVED33[1U];
+  __IM  uint32_t DEVARCH;                /*!< Offset: 0xFBC (R/ )  Device Architecture Register */
+} DWT_Type;
+
+/* DWT Control Register Definitions */
+#define DWT_CTRL_NUMCOMP_Pos               28U                                         /*!< DWT CTRL: NUMCOMP Position */
+#define DWT_CTRL_NUMCOMP_Msk               (0xFUL << DWT_CTRL_NUMCOMP_Pos)             /*!< DWT CTRL: NUMCOMP Mask */
+
+#define DWT_CTRL_NOTRCPKT_Pos              27U                                         /*!< DWT CTRL: NOTRCPKT Position */
+#define DWT_CTRL_NOTRCPKT_Msk              (0x1UL << DWT_CTRL_NOTRCPKT_Pos)            /*!< DWT CTRL: NOTRCPKT Mask */
+
+#define DWT_CTRL_NOEXTTRIG_Pos             26U                                         /*!< DWT CTRL: NOEXTTRIG Position */
+#define DWT_CTRL_NOEXTTRIG_Msk             (0x1UL << DWT_CTRL_NOEXTTRIG_Pos)           /*!< DWT CTRL: NOEXTTRIG Mask */
+
+#define DWT_CTRL_NOCYCCNT_Pos              25U                                         /*!< DWT CTRL: NOCYCCNT Position */
+#define DWT_CTRL_NOCYCCNT_Msk              (0x1UL << DWT_CTRL_NOCYCCNT_Pos)            /*!< DWT CTRL: NOCYCCNT Mask */
+
+#define DWT_CTRL_NOPRFCNT_Pos              24U                                         /*!< DWT CTRL: NOPRFCNT Position */
+#define DWT_CTRL_NOPRFCNT_Msk              (0x1UL << DWT_CTRL_NOPRFCNT_Pos)            /*!< DWT CTRL: NOPRFCNT Mask */
+
+#define DWT_CTRL_CYCDISS_Pos               23U                                         /*!< DWT CTRL: CYCDISS Position */
+#define DWT_CTRL_CYCDISS_Msk               (0x1UL << DWT_CTRL_CYCDISS_Pos)             /*!< DWT CTRL: CYCDISS Mask */
+
+#define DWT_CTRL_CYCEVTENA_Pos             22U                                         /*!< DWT CTRL: CYCEVTENA Position */
+#define DWT_CTRL_CYCEVTENA_Msk             (0x1UL << DWT_CTRL_CYCEVTENA_Pos)           /*!< DWT CTRL: CYCEVTENA Mask */
+
+#define DWT_CTRL_FOLDEVTENA_Pos            21U                                         /*!< DWT CTRL: FOLDEVTENA Position */
+#define DWT_CTRL_FOLDEVTENA_Msk            (0x1UL << DWT_CTRL_FOLDEVTENA_Pos)          /*!< DWT CTRL: FOLDEVTENA Mask */
+
+#define DWT_CTRL_LSUEVTENA_Pos             20U                                         /*!< DWT CTRL: LSUEVTENA Position */
+#define DWT_CTRL_LSUEVTENA_Msk             (0x1UL << DWT_CTRL_LSUEVTENA_Pos)           /*!< DWT CTRL: LSUEVTENA Mask */
+
+#define DWT_CTRL_SLEEPEVTENA_Pos           19U                                         /*!< DWT CTRL: SLEEPEVTENA Position */
+#define DWT_CTRL_SLEEPEVTENA_Msk           (0x1UL << DWT_CTRL_SLEEPEVTENA_Pos)         /*!< DWT CTRL: SLEEPEVTENA Mask */
+
+#define DWT_CTRL_EXCEVTENA_Pos             18U                                         /*!< DWT CTRL: EXCEVTENA Position */
+#define DWT_CTRL_EXCEVTENA_Msk             (0x1UL << DWT_CTRL_EXCEVTENA_Pos)           /*!< DWT CTRL: EXCEVTENA Mask */
+
+#define DWT_CTRL_CPIEVTENA_Pos             17U                                         /*!< DWT CTRL: CPIEVTENA Position */
+#define DWT_CTRL_CPIEVTENA_Msk             (0x1UL << DWT_CTRL_CPIEVTENA_Pos)           /*!< DWT CTRL: CPIEVTENA Mask */
+
+#define DWT_CTRL_EXCTRCENA_Pos             16U                                         /*!< DWT CTRL: EXCTRCENA Position */
+#define DWT_CTRL_EXCTRCENA_Msk             (0x1UL << DWT_CTRL_EXCTRCENA_Pos)           /*!< DWT CTRL: EXCTRCENA Mask */
+
+#define DWT_CTRL_PCSAMPLENA_Pos            12U                                         /*!< DWT CTRL: PCSAMPLENA Position */
+#define DWT_CTRL_PCSAMPLENA_Msk            (0x1UL << DWT_CTRL_PCSAMPLENA_Pos)          /*!< DWT CTRL: PCSAMPLENA Mask */
+
+#define DWT_CTRL_SYNCTAP_Pos               10U                                         /*!< DWT CTRL: SYNCTAP Position */
+#define DWT_CTRL_SYNCTAP_Msk               (0x3UL << DWT_CTRL_SYNCTAP_Pos)             /*!< DWT CTRL: SYNCTAP Mask */
+
+#define DWT_CTRL_CYCTAP_Pos                 9U                                         /*!< DWT CTRL: CYCTAP Position */
+#define DWT_CTRL_CYCTAP_Msk                (0x1UL << DWT_CTRL_CYCTAP_Pos)              /*!< DWT CTRL: CYCTAP Mask */
+
+#define DWT_CTRL_POSTINIT_Pos               5U                                         /*!< DWT CTRL: POSTINIT Position */
+#define DWT_CTRL_POSTINIT_Msk              (0xFUL << DWT_CTRL_POSTINIT_Pos)            /*!< DWT CTRL: POSTINIT Mask */
+
+#define DWT_CTRL_POSTPRESET_Pos             1U                                         /*!< DWT CTRL: POSTPRESET Position */
+#define DWT_CTRL_POSTPRESET_Msk            (0xFUL << DWT_CTRL_POSTPRESET_Pos)          /*!< DWT CTRL: POSTPRESET Mask */
+
+#define DWT_CTRL_CYCCNTENA_Pos              0U                                         /*!< DWT CTRL: CYCCNTENA Position */
+#define DWT_CTRL_CYCCNTENA_Msk             (0x1UL /*<< DWT_CTRL_CYCCNTENA_Pos*/)       /*!< DWT CTRL: CYCCNTENA Mask */
+
+/* DWT CPI Count Register Definitions */
+#define DWT_CPICNT_CPICNT_Pos               0U                                         /*!< DWT CPICNT: CPICNT Position */
+#define DWT_CPICNT_CPICNT_Msk              (0xFFUL /*<< DWT_CPICNT_CPICNT_Pos*/)       /*!< DWT CPICNT: CPICNT Mask */
+
+/* DWT Exception Overhead Count Register Definitions */
+#define DWT_EXCCNT_EXCCNT_Pos               0U                                         /*!< DWT EXCCNT: EXCCNT Position */
+#define DWT_EXCCNT_EXCCNT_Msk              (0xFFUL /*<< DWT_EXCCNT_EXCCNT_Pos*/)       /*!< DWT EXCCNT: EXCCNT Mask */
+
+/* DWT Sleep Count Register Definitions */
+#define DWT_SLEEPCNT_SLEEPCNT_Pos           0U                                         /*!< DWT SLEEPCNT: SLEEPCNT Position */
+#define DWT_SLEEPCNT_SLEEPCNT_Msk          (0xFFUL /*<< DWT_SLEEPCNT_SLEEPCNT_Pos*/)   /*!< DWT SLEEPCNT: SLEEPCNT Mask */
+
+/* DWT LSU Count Register Definitions */
+#define DWT_LSUCNT_LSUCNT_Pos               0U                                         /*!< DWT LSUCNT: LSUCNT Position */
+#define DWT_LSUCNT_LSUCNT_Msk              (0xFFUL /*<< DWT_LSUCNT_LSUCNT_Pos*/)       /*!< DWT LSUCNT: LSUCNT Mask */
+
+/* DWT Folded-instruction Count Register Definitions */
+#define DWT_FOLDCNT_FOLDCNT_Pos             0U                                         /*!< DWT FOLDCNT: FOLDCNT Position */
+#define DWT_FOLDCNT_FOLDCNT_Msk            (0xFFUL /*<< DWT_FOLDCNT_FOLDCNT_Pos*/)     /*!< DWT FOLDCNT: FOLDCNT Mask */
+
+/* DWT Comparator Function Register Definitions */
+#define DWT_FUNCTION_ID_Pos                27U                                         /*!< DWT FUNCTION: ID Position */
+#define DWT_FUNCTION_ID_Msk                (0x1FUL << DWT_FUNCTION_ID_Pos)             /*!< DWT FUNCTION: ID Mask */
+
+#define DWT_FUNCTION_MATCHED_Pos           24U                                         /*!< DWT FUNCTION: MATCHED Position */
+#define DWT_FUNCTION_MATCHED_Msk           (0x1UL << DWT_FUNCTION_MATCHED_Pos)         /*!< DWT FUNCTION: MATCHED Mask */
+
+#define DWT_FUNCTION_DATAVSIZE_Pos         10U                                         /*!< DWT FUNCTION: DATAVSIZE Position */
+#define DWT_FUNCTION_DATAVSIZE_Msk         (0x3UL << DWT_FUNCTION_DATAVSIZE_Pos)       /*!< DWT FUNCTION: DATAVSIZE Mask */
+
+#define DWT_FUNCTION_ACTION_Pos             4U                                         /*!< DWT FUNCTION: ACTION Position */
+#define DWT_FUNCTION_ACTION_Msk            (0x1UL << DWT_FUNCTION_ACTION_Pos)          /*!< DWT FUNCTION: ACTION Mask */
+
+#define DWT_FUNCTION_MATCH_Pos              0U                                         /*!< DWT FUNCTION: MATCH Position */
+#define DWT_FUNCTION_MATCH_Msk             (0xFUL /*<< DWT_FUNCTION_MATCH_Pos*/)       /*!< DWT FUNCTION: MATCH Mask */
+
+/*@}*/ /* end of group CMSIS_DWT */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_TPI     Trace Port Interface (TPI)
+  \brief    Type definitions for the Trace Port Interface (TPI)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Trace Port Interface Register (TPI).
+ */
+typedef struct
+{
+  __IM  uint32_t SSPSR;                  /*!< Offset: 0x000 (R/ )  Supported Parallel Port Size Register */
+  __IOM uint32_t CSPSR;                  /*!< Offset: 0x004 (R/W)  Current Parallel Port Size Register */
+        uint32_t RESERVED0[2U];
+  __IOM uint32_t ACPR;                   /*!< Offset: 0x010 (R/W)  Asynchronous Clock Prescaler Register */
+        uint32_t RESERVED1[55U];
+  __IOM uint32_t SPPR;                   /*!< Offset: 0x0F0 (R/W)  Selected Pin Protocol Register */
+        uint32_t RESERVED2[131U];
+  __IM  uint32_t FFSR;                   /*!< Offset: 0x300 (R/ )  Formatter and Flush Status Register */
+  __IOM uint32_t FFCR;                   /*!< Offset: 0x304 (R/W)  Formatter and Flush Control Register */
+  __IOM uint32_t PSCR;                   /*!< Offset: 0x308 (R/W)  Periodic Synchronization Control Register */
+        uint32_t RESERVED3[759U];
+  __IM  uint32_t TRIGGER;                /*!< Offset: 0xEE8 (R/ )  TRIGGER Register */
+  __IM  uint32_t ITFTTD0;                /*!< Offset: 0xEEC (R/ )  Integration Test FIFO Test Data 0 Register */
+  __IOM uint32_t ITATBCTR2;              /*!< Offset: 0xEF0 (R/W)  Integration Test ATB Control Register 2 */
+        uint32_t RESERVED4[1U];
+  __IM  uint32_t ITATBCTR0;              /*!< Offset: 0xEF8 (R/ )  Integration Test ATB Control Register 0 */
+  __IM  uint32_t ITFTTD1;                /*!< Offset: 0xEFC (R/ )  Integration Test FIFO Test Data 1 Register */
+  __IOM uint32_t ITCTRL;                 /*!< Offset: 0xF00 (R/W)  Integration Mode Control */
+        uint32_t RESERVED5[39U];
+  __IOM uint32_t CLAIMSET;               /*!< Offset: 0xFA0 (R/W)  Claim tag set */
+  __IOM uint32_t CLAIMCLR;               /*!< Offset: 0xFA4 (R/W)  Claim tag clear */
+        uint32_t RESERVED7[8U];
+  __IM  uint32_t DEVID;                  /*!< Offset: 0xFC8 (R/ )  Device Configuration Register */
+  __IM  uint32_t DEVTYPE;                /*!< Offset: 0xFCC (R/ )  Device Type Identifier Register */
+} TPI_Type;
+
+/* TPI Asynchronous Clock Prescaler Register Definitions */
+#define TPI_ACPR_PRESCALER_Pos              0U                                         /*!< TPI ACPR: PRESCALER Position */
+#define TPI_ACPR_PRESCALER_Msk             (0x1FFFUL /*<< TPI_ACPR_PRESCALER_Pos*/)    /*!< TPI ACPR: PRESCALER Mask */
+
+/* TPI Selected Pin Protocol Register Definitions */
+#define TPI_SPPR_TXMODE_Pos                 0U                                         /*!< TPI SPPR: TXMODE Position */
+#define TPI_SPPR_TXMODE_Msk                (0x3UL /*<< TPI_SPPR_TXMODE_Pos*/)          /*!< TPI SPPR: TXMODE Mask */
+
+/* TPI Formatter and Flush Status Register Definitions */
+#define TPI_FFSR_FtNonStop_Pos              3U                                         /*!< TPI FFSR: FtNonStop Position */
+#define TPI_FFSR_FtNonStop_Msk             (0x1UL << TPI_FFSR_FtNonStop_Pos)           /*!< TPI FFSR: FtNonStop Mask */
+
+#define TPI_FFSR_TCPresent_Pos              2U                                         /*!< TPI FFSR: TCPresent Position */
+#define TPI_FFSR_TCPresent_Msk             (0x1UL << TPI_FFSR_TCPresent_Pos)           /*!< TPI FFSR: TCPresent Mask */
+
+#define TPI_FFSR_FtStopped_Pos              1U                                         /*!< TPI FFSR: FtStopped Position */
+#define TPI_FFSR_FtStopped_Msk             (0x1UL << TPI_FFSR_FtStopped_Pos)           /*!< TPI FFSR: FtStopped Mask */
+
+#define TPI_FFSR_FlInProg_Pos               0U                                         /*!< TPI FFSR: FlInProg Position */
+#define TPI_FFSR_FlInProg_Msk              (0x1UL /*<< TPI_FFSR_FlInProg_Pos*/)        /*!< TPI FFSR: FlInProg Mask */
+
+/* TPI Formatter and Flush Control Register Definitions */
+#define TPI_FFCR_TrigIn_Pos                 8U                                         /*!< TPI FFCR: TrigIn Position */
+#define TPI_FFCR_TrigIn_Msk                (0x1UL << TPI_FFCR_TrigIn_Pos)              /*!< TPI FFCR: TrigIn Mask */
+
+#define TPI_FFCR_FOnMan_Pos                 6U                                         /*!< TPI FFCR: FOnMan Position */
+#define TPI_FFCR_FOnMan_Msk                (0x1UL << TPI_FFCR_FOnMan_Pos)              /*!< TPI FFCR: FOnMan Mask */
+
+#define TPI_FFCR_EnFCont_Pos                1U                                         /*!< TPI FFCR: EnFCont Position */
+#define TPI_FFCR_EnFCont_Msk               (0x1UL << TPI_FFCR_EnFCont_Pos)             /*!< TPI FFCR: EnFCont Mask */
+
+/* TPI TRIGGER Register Definitions */
+#define TPI_TRIGGER_TRIGGER_Pos             0U                                         /*!< TPI TRIGGER: TRIGGER Position */
+#define TPI_TRIGGER_TRIGGER_Msk            (0x1UL /*<< TPI_TRIGGER_TRIGGER_Pos*/)      /*!< TPI TRIGGER: TRIGGER Mask */
+
+/* TPI Integration Test FIFO Test Data 0 Register Definitions */
+#define TPI_ITFTTD0_ATB_IF2_ATVALID_Pos    29U                                         /*!< TPI ITFTTD0: ATB Interface 2 ATVALIDPosition */
+#define TPI_ITFTTD0_ATB_IF2_ATVALID_Msk    (0x3UL << TPI_ITFTTD0_ATB_IF2_ATVALID_Pos)  /*!< TPI ITFTTD0: ATB Interface 2 ATVALID Mask */
+
+#define TPI_ITFTTD0_ATB_IF2_bytecount_Pos  27U                                         /*!< TPI ITFTTD0: ATB Interface 2 byte count Position */
+#define TPI_ITFTTD0_ATB_IF2_bytecount_Msk  (0x3UL << TPI_ITFTTD0_ATB_IF2_bytecount_Pos) /*!< TPI ITFTTD0: ATB Interface 2 byte count Mask */
+
+#define TPI_ITFTTD0_ATB_IF1_ATVALID_Pos    26U                                         /*!< TPI ITFTTD0: ATB Interface 1 ATVALID Position */
+#define TPI_ITFTTD0_ATB_IF1_ATVALID_Msk    (0x3UL << TPI_ITFTTD0_ATB_IF1_ATVALID_Pos)  /*!< TPI ITFTTD0: ATB Interface 1 ATVALID Mask */
+
+#define TPI_ITFTTD0_ATB_IF1_bytecount_Pos  24U                                         /*!< TPI ITFTTD0: ATB Interface 1 byte count Position */
+#define TPI_ITFTTD0_ATB_IF1_bytecount_Msk  (0x3UL << TPI_ITFTTD0_ATB_IF1_bytecount_Pos) /*!< TPI ITFTTD0: ATB Interface 1 byte countt Mask */
+
+#define TPI_ITFTTD0_ATB_IF1_data2_Pos      16U                                         /*!< TPI ITFTTD0: ATB Interface 1 data2 Position */
+#define TPI_ITFTTD0_ATB_IF1_data2_Msk      (0xFFUL << TPI_ITFTTD0_ATB_IF1_data1_Pos)   /*!< TPI ITFTTD0: ATB Interface 1 data2 Mask */
+
+#define TPI_ITFTTD0_ATB_IF1_data1_Pos       8U                                         /*!< TPI ITFTTD0: ATB Interface 1 data1 Position */
+#define TPI_ITFTTD0_ATB_IF1_data1_Msk      (0xFFUL << TPI_ITFTTD0_ATB_IF1_data1_Pos)   /*!< TPI ITFTTD0: ATB Interface 1 data1 Mask */
+
+#define TPI_ITFTTD0_ATB_IF1_data0_Pos       0U                                          /*!< TPI ITFTTD0: ATB Interface 1 data0 Position */
+#define TPI_ITFTTD0_ATB_IF1_data0_Msk      (0xFFUL /*<< TPI_ITFTTD0_ATB_IF1_data0_Pos*/) /*!< TPI ITFTTD0: ATB Interface 1 data0 Mask */
+
+/* TPI Integration Test ATB Control Register 2 Register Definitions */
+#define TPI_ITATBCTR2_AFVALID2S_Pos         1U                                         /*!< TPI ITATBCTR2: AFVALID2S Position */
+#define TPI_ITATBCTR2_AFVALID2S_Msk        (0x1UL << TPI_ITATBCTR2_AFVALID2S_Pos)      /*!< TPI ITATBCTR2: AFVALID2SS Mask */
+
+#define TPI_ITATBCTR2_AFVALID1S_Pos         1U                                         /*!< TPI ITATBCTR2: AFVALID1S Position */
+#define TPI_ITATBCTR2_AFVALID1S_Msk        (0x1UL << TPI_ITATBCTR2_AFVALID1S_Pos)      /*!< TPI ITATBCTR2: AFVALID1SS Mask */
+
+#define TPI_ITATBCTR2_ATREADY2S_Pos         0U                                         /*!< TPI ITATBCTR2: ATREADY2S Position */
+#define TPI_ITATBCTR2_ATREADY2S_Msk        (0x1UL /*<< TPI_ITATBCTR2_ATREADY2S_Pos*/)  /*!< TPI ITATBCTR2: ATREADY2S Mask */
+
+#define TPI_ITATBCTR2_ATREADY1S_Pos         0U                                         /*!< TPI ITATBCTR2: ATREADY1S Position */
+#define TPI_ITATBCTR2_ATREADY1S_Msk        (0x1UL /*<< TPI_ITATBCTR2_ATREADY1S_Pos*/)  /*!< TPI ITATBCTR2: ATREADY1S Mask */
+
+/* TPI Integration Test FIFO Test Data 1 Register Definitions */
+#define TPI_ITFTTD1_ATB_IF2_ATVALID_Pos    29U                                         /*!< TPI ITFTTD1: ATB Interface 2 ATVALID Position */
+#define TPI_ITFTTD1_ATB_IF2_ATVALID_Msk    (0x3UL << TPI_ITFTTD1_ATB_IF2_ATVALID_Pos)  /*!< TPI ITFTTD1: ATB Interface 2 ATVALID Mask */
+
+#define TPI_ITFTTD1_ATB_IF2_bytecount_Pos  27U                                         /*!< TPI ITFTTD1: ATB Interface 2 byte count Position */
+#define TPI_ITFTTD1_ATB_IF2_bytecount_Msk  (0x3UL << TPI_ITFTTD1_ATB_IF2_bytecount_Pos) /*!< TPI ITFTTD1: ATB Interface 2 byte count Mask */
+
+#define TPI_ITFTTD1_ATB_IF1_ATVALID_Pos    26U                                         /*!< TPI ITFTTD1: ATB Interface 1 ATVALID Position */
+#define TPI_ITFTTD1_ATB_IF1_ATVALID_Msk    (0x3UL << TPI_ITFTTD1_ATB_IF1_ATVALID_Pos)  /*!< TPI ITFTTD1: ATB Interface 1 ATVALID Mask */
+
+#define TPI_ITFTTD1_ATB_IF1_bytecount_Pos  24U                                         /*!< TPI ITFTTD1: ATB Interface 1 byte count Position */
+#define TPI_ITFTTD1_ATB_IF1_bytecount_Msk  (0x3UL << TPI_ITFTTD1_ATB_IF1_bytecount_Pos) /*!< TPI ITFTTD1: ATB Interface 1 byte countt Mask */
+
+#define TPI_ITFTTD1_ATB_IF2_data2_Pos      16U                                         /*!< TPI ITFTTD1: ATB Interface 2 data2 Position */
+#define TPI_ITFTTD1_ATB_IF2_data2_Msk      (0xFFUL << TPI_ITFTTD1_ATB_IF2_data1_Pos)   /*!< TPI ITFTTD1: ATB Interface 2 data2 Mask */
+
+#define TPI_ITFTTD1_ATB_IF2_data1_Pos       8U                                         /*!< TPI ITFTTD1: ATB Interface 2 data1 Position */
+#define TPI_ITFTTD1_ATB_IF2_data1_Msk      (0xFFUL << TPI_ITFTTD1_ATB_IF2_data1_Pos)   /*!< TPI ITFTTD1: ATB Interface 2 data1 Mask */
+
+#define TPI_ITFTTD1_ATB_IF2_data0_Pos       0U                                          /*!< TPI ITFTTD1: ATB Interface 2 data0 Position */
+#define TPI_ITFTTD1_ATB_IF2_data0_Msk      (0xFFUL /*<< TPI_ITFTTD1_ATB_IF2_data0_Pos*/) /*!< TPI ITFTTD1: ATB Interface 2 data0 Mask */
+
+/* TPI Integration Test ATB Control Register 0 Definitions */
+#define TPI_ITATBCTR0_AFVALID2S_Pos         1U                                         /*!< TPI ITATBCTR0: AFVALID2S Position */
+#define TPI_ITATBCTR0_AFVALID2S_Msk        (0x1UL << TPI_ITATBCTR0_AFVALID2S_Pos)      /*!< TPI ITATBCTR0: AFVALID2SS Mask */
+
+#define TPI_ITATBCTR0_AFVALID1S_Pos         1U                                         /*!< TPI ITATBCTR0: AFVALID1S Position */
+#define TPI_ITATBCTR0_AFVALID1S_Msk        (0x1UL << TPI_ITATBCTR0_AFVALID1S_Pos)      /*!< TPI ITATBCTR0: AFVALID1SS Mask */
+
+#define TPI_ITATBCTR0_ATREADY2S_Pos         0U                                         /*!< TPI ITATBCTR0: ATREADY2S Position */
+#define TPI_ITATBCTR0_ATREADY2S_Msk        (0x1UL /*<< TPI_ITATBCTR0_ATREADY2S_Pos*/)  /*!< TPI ITATBCTR0: ATREADY2S Mask */
+
+#define TPI_ITATBCTR0_ATREADY1S_Pos         0U                                         /*!< TPI ITATBCTR0: ATREADY1S Position */
+#define TPI_ITATBCTR0_ATREADY1S_Msk        (0x1UL /*<< TPI_ITATBCTR0_ATREADY1S_Pos*/)  /*!< TPI ITATBCTR0: ATREADY1S Mask */
+
+/* TPI Integration Mode Control Register Definitions */
+#define TPI_ITCTRL_Mode_Pos                 0U                                         /*!< TPI ITCTRL: Mode Position */
+#define TPI_ITCTRL_Mode_Msk                (0x3UL /*<< TPI_ITCTRL_Mode_Pos*/)          /*!< TPI ITCTRL: Mode Mask */
+
+/* TPI DEVID Register Definitions */
+#define TPI_DEVID_NRZVALID_Pos             11U                                         /*!< TPI DEVID: NRZVALID Position */
+#define TPI_DEVID_NRZVALID_Msk             (0x1UL << TPI_DEVID_NRZVALID_Pos)           /*!< TPI DEVID: NRZVALID Mask */
+
+#define TPI_DEVID_MANCVALID_Pos            10U                                         /*!< TPI DEVID: MANCVALID Position */
+#define TPI_DEVID_MANCVALID_Msk            (0x1UL << TPI_DEVID_MANCVALID_Pos)          /*!< TPI DEVID: MANCVALID Mask */
+
+#define TPI_DEVID_PTINVALID_Pos             9U                                         /*!< TPI DEVID: PTINVALID Position */
+#define TPI_DEVID_PTINVALID_Msk            (0x1UL << TPI_DEVID_PTINVALID_Pos)          /*!< TPI DEVID: PTINVALID Mask */
+
+#define TPI_DEVID_FIFOSZ_Pos                6U                                         /*!< TPI DEVID: FIFOSZ Position */
+#define TPI_DEVID_FIFOSZ_Msk               (0x7UL << TPI_DEVID_FIFOSZ_Pos)             /*!< TPI DEVID: FIFOSZ Mask */
+
+#define TPI_DEVID_NrTraceInput_Pos          0U                                         /*!< TPI DEVID: NrTraceInput Position */
+#define TPI_DEVID_NrTraceInput_Msk         (0x3FUL /*<< TPI_DEVID_NrTraceInput_Pos*/)  /*!< TPI DEVID: NrTraceInput Mask */
+
+/* TPI DEVTYPE Register Definitions */
+#define TPI_DEVTYPE_SubType_Pos             4U                                         /*!< TPI DEVTYPE: SubType Position */
+#define TPI_DEVTYPE_SubType_Msk            (0xFUL /*<< TPI_DEVTYPE_SubType_Pos*/)      /*!< TPI DEVTYPE: SubType Mask */
+
+#define TPI_DEVTYPE_MajorType_Pos           0U                                         /*!< TPI DEVTYPE: MajorType Position */
+#define TPI_DEVTYPE_MajorType_Msk          (0xFUL << TPI_DEVTYPE_MajorType_Pos)        /*!< TPI DEVTYPE: MajorType Mask */
+
+/*@}*/ /* end of group CMSIS_TPI */
+
+
+#if defined (__MPU_PRESENT) && (__MPU_PRESENT == 1U)
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_MPU     Memory Protection Unit (MPU)
+  \brief    Type definitions for the Memory Protection Unit (MPU)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Memory Protection Unit (MPU).
+ */
+typedef struct
+{
+  __IM  uint32_t TYPE;                   /*!< Offset: 0x000 (R/ )  MPU Type Register */
+  __IOM uint32_t CTRL;                   /*!< Offset: 0x004 (R/W)  MPU Control Register */
+  __IOM uint32_t RNR;                    /*!< Offset: 0x008 (R/W)  MPU Region Number Register */
+  __IOM uint32_t RBAR;                   /*!< Offset: 0x00C (R/W)  MPU Region Base Address Register */
+  __IOM uint32_t RLAR;                   /*!< Offset: 0x010 (R/W)  MPU Region Limit Address Register */
+  __IOM uint32_t RBAR_A1;                /*!< Offset: 0x014 (R/W)  MPU Region Base Address Register Alias 1 */
+  __IOM uint32_t RLAR_A1;                /*!< Offset: 0x018 (R/W)  MPU Region Limit Address Register Alias 1 */
+  __IOM uint32_t RBAR_A2;                /*!< Offset: 0x01C (R/W)  MPU Region Base Address Register Alias 2 */
+  __IOM uint32_t RLAR_A2;                /*!< Offset: 0x020 (R/W)  MPU Region Limit Address Register Alias 2 */
+  __IOM uint32_t RBAR_A3;                /*!< Offset: 0x024 (R/W)  MPU Region Base Address Register Alias 3 */
+  __IOM uint32_t RLAR_A3;                /*!< Offset: 0x028 (R/W)  MPU Region Limit Address Register Alias 3 */
+        uint32_t RESERVED0[1];
+  union {
+  __IOM uint32_t MAIR[2];
+  struct {
+  __IOM uint32_t MAIR0;                  /*!< Offset: 0x030 (R/W)  MPU Memory Attribute Indirection Register 0 */
+  __IOM uint32_t MAIR1;                  /*!< Offset: 0x034 (R/W)  MPU Memory Attribute Indirection Register 1 */
+  };
+  };
+} MPU_Type;
+
+#define MPU_TYPE_RALIASES                  4U
+
+/* MPU Type Register Definitions */
+#define MPU_TYPE_IREGION_Pos               16U                                            /*!< MPU TYPE: IREGION Position */
+#define MPU_TYPE_IREGION_Msk               (0xFFUL << MPU_TYPE_IREGION_Pos)               /*!< MPU TYPE: IREGION Mask */
+
+#define MPU_TYPE_DREGION_Pos                8U                                            /*!< MPU TYPE: DREGION Position */
+#define MPU_TYPE_DREGION_Msk               (0xFFUL << MPU_TYPE_DREGION_Pos)               /*!< MPU TYPE: DREGION Mask */
+
+#define MPU_TYPE_SEPARATE_Pos               0U                                            /*!< MPU TYPE: SEPARATE Position */
+#define MPU_TYPE_SEPARATE_Msk              (1UL /*<< MPU_TYPE_SEPARATE_Pos*/)             /*!< MPU TYPE: SEPARATE Mask */
+
+/* MPU Control Register Definitions */
+#define MPU_CTRL_PRIVDEFENA_Pos             2U                                            /*!< MPU CTRL: PRIVDEFENA Position */
+#define MPU_CTRL_PRIVDEFENA_Msk            (1UL << MPU_CTRL_PRIVDEFENA_Pos)               /*!< MPU CTRL: PRIVDEFENA Mask */
+
+#define MPU_CTRL_HFNMIENA_Pos               1U                                            /*!< MPU CTRL: HFNMIENA Position */
+#define MPU_CTRL_HFNMIENA_Msk              (1UL << MPU_CTRL_HFNMIENA_Pos)                 /*!< MPU CTRL: HFNMIENA Mask */
+
+#define MPU_CTRL_ENABLE_Pos                 0U                                            /*!< MPU CTRL: ENABLE Position */
+#define MPU_CTRL_ENABLE_Msk                (1UL /*<< MPU_CTRL_ENABLE_Pos*/)               /*!< MPU CTRL: ENABLE Mask */
+
+/* MPU Region Number Register Definitions */
+#define MPU_RNR_REGION_Pos                  0U                                            /*!< MPU RNR: REGION Position */
+#define MPU_RNR_REGION_Msk                 (0xFFUL /*<< MPU_RNR_REGION_Pos*/)             /*!< MPU RNR: REGION Mask */
+
+/* MPU Region Base Address Register Definitions */
+#define MPU_RBAR_BASE_Pos                   5U                                            /*!< MPU RBAR: BASE Position */
+#define MPU_RBAR_BASE_Msk                  (0x7FFFFFFUL << MPU_RBAR_BASE_Pos)             /*!< MPU RBAR: BASE Mask */
+
+#define MPU_RBAR_SH_Pos                     3U                                            /*!< MPU RBAR: SH Position */
+#define MPU_RBAR_SH_Msk                    (0x3UL << MPU_RBAR_SH_Pos)                     /*!< MPU RBAR: SH Mask */
+
+#define MPU_RBAR_AP_Pos                     1U                                            /*!< MPU RBAR: AP Position */
+#define MPU_RBAR_AP_Msk                    (0x3UL << MPU_RBAR_AP_Pos)                     /*!< MPU RBAR: AP Mask */
+
+#define MPU_RBAR_XN_Pos                     0U                                            /*!< MPU RBAR: XN Position */
+#define MPU_RBAR_XN_Msk                    (01UL /*<< MPU_RBAR_XN_Pos*/)                  /*!< MPU RBAR: XN Mask */
+
+/* MPU Region Limit Address Register Definitions */
+#define MPU_RLAR_LIMIT_Pos                  5U                                            /*!< MPU RLAR: LIMIT Position */
+#define MPU_RLAR_LIMIT_Msk                 (0x7FFFFFFUL << MPU_RLAR_LIMIT_Pos)            /*!< MPU RLAR: LIMIT Mask */
+
+#define MPU_RLAR_AttrIndx_Pos               1U                                            /*!< MPU RLAR: AttrIndx Position */
+#define MPU_RLAR_AttrIndx_Msk              (0x7UL << MPU_RLAR_AttrIndx_Pos)               /*!< MPU RLAR: AttrIndx Mask */
+
+#define MPU_RLAR_EN_Pos                     0U                                            /*!< MPU RLAR: Region enable bit Position */
+#define MPU_RLAR_EN_Msk                    (1UL /*<< MPU_RLAR_EN_Pos*/)                   /*!< MPU RLAR: Region enable bit Disable Mask */
+
+/* MPU Memory Attribute Indirection Register 0 Definitions */
+#define MPU_MAIR0_Attr3_Pos                24U                                            /*!< MPU MAIR0: Attr3 Position */
+#define MPU_MAIR0_Attr3_Msk                (0xFFUL << MPU_MAIR0_Attr3_Pos)                /*!< MPU MAIR0: Attr3 Mask */
+
+#define MPU_MAIR0_Attr2_Pos                16U                                            /*!< MPU MAIR0: Attr2 Position */
+#define MPU_MAIR0_Attr2_Msk                (0xFFUL << MPU_MAIR0_Attr2_Pos)                /*!< MPU MAIR0: Attr2 Mask */
+
+#define MPU_MAIR0_Attr1_Pos                 8U                                            /*!< MPU MAIR0: Attr1 Position */
+#define MPU_MAIR0_Attr1_Msk                (0xFFUL << MPU_MAIR0_Attr1_Pos)                /*!< MPU MAIR0: Attr1 Mask */
+
+#define MPU_MAIR0_Attr0_Pos                 0U                                            /*!< MPU MAIR0: Attr0 Position */
+#define MPU_MAIR0_Attr0_Msk                (0xFFUL /*<< MPU_MAIR0_Attr0_Pos*/)            /*!< MPU MAIR0: Attr0 Mask */
+
+/* MPU Memory Attribute Indirection Register 1 Definitions */
+#define MPU_MAIR1_Attr7_Pos                24U                                            /*!< MPU MAIR1: Attr7 Position */
+#define MPU_MAIR1_Attr7_Msk                (0xFFUL << MPU_MAIR1_Attr7_Pos)                /*!< MPU MAIR1: Attr7 Mask */
+
+#define MPU_MAIR1_Attr6_Pos                16U                                            /*!< MPU MAIR1: Attr6 Position */
+#define MPU_MAIR1_Attr6_Msk                (0xFFUL << MPU_MAIR1_Attr6_Pos)                /*!< MPU MAIR1: Attr6 Mask */
+
+#define MPU_MAIR1_Attr5_Pos                 8U                                            /*!< MPU MAIR1: Attr5 Position */
+#define MPU_MAIR1_Attr5_Msk                (0xFFUL << MPU_MAIR1_Attr5_Pos)                /*!< MPU MAIR1: Attr5 Mask */
+
+#define MPU_MAIR1_Attr4_Pos                 0U                                            /*!< MPU MAIR1: Attr4 Position */
+#define MPU_MAIR1_Attr4_Msk                (0xFFUL /*<< MPU_MAIR1_Attr4_Pos*/)            /*!< MPU MAIR1: Attr4 Mask */
+
+/*@} end of group CMSIS_MPU */
+#endif
+
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_SAU     Security Attribution Unit (SAU)
+  \brief    Type definitions for the Security Attribution Unit (SAU)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Security Attribution Unit (SAU).
+ */
+typedef struct
+{
+  __IOM uint32_t CTRL;                   /*!< Offset: 0x000 (R/W)  SAU Control Register */
+  __IM  uint32_t TYPE;                   /*!< Offset: 0x004 (R/ )  SAU Type Register */
+#if defined (__SAUREGION_PRESENT) && (__SAUREGION_PRESENT == 1U)
+  __IOM uint32_t RNR;                    /*!< Offset: 0x008 (R/W)  SAU Region Number Register */
+  __IOM uint32_t RBAR;                   /*!< Offset: 0x00C (R/W)  SAU Region Base Address Register */
+  __IOM uint32_t RLAR;                   /*!< Offset: 0x010 (R/W)  SAU Region Limit Address Register */
+#else
+        uint32_t RESERVED0[3];
+#endif
+  __IOM uint32_t SFSR;                   /*!< Offset: 0x014 (R/W)  Secure Fault Status Register */
+  __IOM uint32_t SFAR;                   /*!< Offset: 0x018 (R/W)  Secure Fault Address Register */
+} SAU_Type;
+
+/* SAU Control Register Definitions */
+#define SAU_CTRL_ALLNS_Pos                  1U                                            /*!< SAU CTRL: ALLNS Position */
+#define SAU_CTRL_ALLNS_Msk                 (1UL << SAU_CTRL_ALLNS_Pos)                    /*!< SAU CTRL: ALLNS Mask */
+
+#define SAU_CTRL_ENABLE_Pos                 0U                                            /*!< SAU CTRL: ENABLE Position */
+#define SAU_CTRL_ENABLE_Msk                (1UL /*<< SAU_CTRL_ENABLE_Pos*/)               /*!< SAU CTRL: ENABLE Mask */
+
+/* SAU Type Register Definitions */
+#define SAU_TYPE_SREGION_Pos                0U                                            /*!< SAU TYPE: SREGION Position */
+#define SAU_TYPE_SREGION_Msk               (0xFFUL /*<< SAU_TYPE_SREGION_Pos*/)           /*!< SAU TYPE: SREGION Mask */
+
+#if defined (__SAUREGION_PRESENT) && (__SAUREGION_PRESENT == 1U)
+/* SAU Region Number Register Definitions */
+#define SAU_RNR_REGION_Pos                  0U                                            /*!< SAU RNR: REGION Position */
+#define SAU_RNR_REGION_Msk                 (0xFFUL /*<< SAU_RNR_REGION_Pos*/)             /*!< SAU RNR: REGION Mask */
+
+/* SAU Region Base Address Register Definitions */
+#define SAU_RBAR_BADDR_Pos                  5U                                            /*!< SAU RBAR: BADDR Position */
+#define SAU_RBAR_BADDR_Msk                 (0x7FFFFFFUL << SAU_RBAR_BADDR_Pos)            /*!< SAU RBAR: BADDR Mask */
+
+/* SAU Region Limit Address Register Definitions */
+#define SAU_RLAR_LADDR_Pos                  5U                                            /*!< SAU RLAR: LADDR Position */
+#define SAU_RLAR_LADDR_Msk                 (0x7FFFFFFUL << SAU_RLAR_LADDR_Pos)            /*!< SAU RLAR: LADDR Mask */
+
+#define SAU_RLAR_NSC_Pos                    1U                                            /*!< SAU RLAR: NSC Position */
+#define SAU_RLAR_NSC_Msk                   (1UL << SAU_RLAR_NSC_Pos)                      /*!< SAU RLAR: NSC Mask */
+
+#define SAU_RLAR_ENABLE_Pos                 0U                                            /*!< SAU RLAR: ENABLE Position */
+#define SAU_RLAR_ENABLE_Msk                (1UL /*<< SAU_RLAR_ENABLE_Pos*/)               /*!< SAU RLAR: ENABLE Mask */
+
+#endif /* defined (__SAUREGION_PRESENT) && (__SAUREGION_PRESENT == 1U) */
+
+/* Secure Fault Status Register Definitions */
+#define SAU_SFSR_LSERR_Pos                  7U                                            /*!< SAU SFSR: LSERR Position */
+#define SAU_SFSR_LSERR_Msk                 (1UL << SAU_SFSR_LSERR_Pos)                    /*!< SAU SFSR: LSERR Mask */
+
+#define SAU_SFSR_SFARVALID_Pos              6U                                            /*!< SAU SFSR: SFARVALID Position */
+#define SAU_SFSR_SFARVALID_Msk             (1UL << SAU_SFSR_SFARVALID_Pos)                /*!< SAU SFSR: SFARVALID Mask */
+
+#define SAU_SFSR_LSPERR_Pos                 5U                                            /*!< SAU SFSR: LSPERR Position */
+#define SAU_SFSR_LSPERR_Msk                (1UL << SAU_SFSR_LSPERR_Pos)                   /*!< SAU SFSR: LSPERR Mask */
+
+#define SAU_SFSR_INVTRAN_Pos                4U                                            /*!< SAU SFSR: INVTRAN Position */
+#define SAU_SFSR_INVTRAN_Msk               (1UL << SAU_SFSR_INVTRAN_Pos)                  /*!< SAU SFSR: INVTRAN Mask */
+
+#define SAU_SFSR_AUVIOL_Pos                 3U                                            /*!< SAU SFSR: AUVIOL Position */
+#define SAU_SFSR_AUVIOL_Msk                (1UL << SAU_SFSR_AUVIOL_Pos)                   /*!< SAU SFSR: AUVIOL Mask */
+
+#define SAU_SFSR_INVER_Pos                  2U                                            /*!< SAU SFSR: INVER Position */
+#define SAU_SFSR_INVER_Msk                 (1UL << SAU_SFSR_INVER_Pos)                    /*!< SAU SFSR: INVER Mask */
+
+#define SAU_SFSR_INVIS_Pos                  1U                                            /*!< SAU SFSR: INVIS Position */
+#define SAU_SFSR_INVIS_Msk                 (1UL << SAU_SFSR_INVIS_Pos)                    /*!< SAU SFSR: INVIS Mask */
+
+#define SAU_SFSR_INVEP_Pos                  0U                                            /*!< SAU SFSR: INVEP Position */
+#define SAU_SFSR_INVEP_Msk                 (1UL /*<< SAU_SFSR_INVEP_Pos*/)                /*!< SAU SFSR: INVEP Mask */
+
+/*@} end of group CMSIS_SAU */
+#endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_FPU     Floating Point Unit (FPU)
+  \brief    Type definitions for the Floating Point Unit (FPU)
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Floating Point Unit (FPU).
+ */
+typedef struct
+{
+        uint32_t RESERVED0[1U];
+  __IOM uint32_t FPCCR;                  /*!< Offset: 0x004 (R/W)  Floating-Point Context Control Register */
+  __IOM uint32_t FPCAR;                  /*!< Offset: 0x008 (R/W)  Floating-Point Context Address Register */
+  __IOM uint32_t FPDSCR;                 /*!< Offset: 0x00C (R/W)  Floating-Point Default Status Control Register */
+  __IM  uint32_t MVFR0;                  /*!< Offset: 0x010 (R/ )  Media and VFP Feature Register 0 */
+  __IM  uint32_t MVFR1;                  /*!< Offset: 0x014 (R/ )  Media and VFP Feature Register 1 */
+  __IM  uint32_t MVFR2;                  /*!< Offset: 0x018 (R/ )  Media and VFP Feature Register 2 */
+} FPU_Type;
+
+/* Floating-Point Context Control Register Definitions */
+#define FPU_FPCCR_ASPEN_Pos                31U                                            /*!< FPCCR: ASPEN bit Position */
+#define FPU_FPCCR_ASPEN_Msk                (1UL << FPU_FPCCR_ASPEN_Pos)                   /*!< FPCCR: ASPEN bit Mask */
+
+#define FPU_FPCCR_LSPEN_Pos                30U                                            /*!< FPCCR: LSPEN Position */
+#define FPU_FPCCR_LSPEN_Msk                (1UL << FPU_FPCCR_LSPEN_Pos)                   /*!< FPCCR: LSPEN bit Mask */
+
+#define FPU_FPCCR_LSPENS_Pos               29U                                            /*!< FPCCR: LSPENS Position */
+#define FPU_FPCCR_LSPENS_Msk               (1UL << FPU_FPCCR_LSPENS_Pos)                  /*!< FPCCR: LSPENS bit Mask */
+
+#define FPU_FPCCR_CLRONRET_Pos             28U                                            /*!< FPCCR: CLRONRET Position */
+#define FPU_FPCCR_CLRONRET_Msk             (1UL << FPU_FPCCR_CLRONRET_Pos)                /*!< FPCCR: CLRONRET bit Mask */
+
+#define FPU_FPCCR_CLRONRETS_Pos            27U                                            /*!< FPCCR: CLRONRETS Position */
+#define FPU_FPCCR_CLRONRETS_Msk            (1UL << FPU_FPCCR_CLRONRETS_Pos)               /*!< FPCCR: CLRONRETS bit Mask */
+
+#define FPU_FPCCR_TS_Pos                   26U                                            /*!< FPCCR: TS Position */
+#define FPU_FPCCR_TS_Msk                   (1UL << FPU_FPCCR_TS_Pos)                      /*!< FPCCR: TS bit Mask */
+
+#define FPU_FPCCR_UFRDY_Pos                10U                                            /*!< FPCCR: UFRDY Position */
+#define FPU_FPCCR_UFRDY_Msk                (1UL << FPU_FPCCR_UFRDY_Pos)                   /*!< FPCCR: UFRDY bit Mask */
+
+#define FPU_FPCCR_SPLIMVIOL_Pos             9U                                            /*!< FPCCR: SPLIMVIOL Position */
+#define FPU_FPCCR_SPLIMVIOL_Msk            (1UL << FPU_FPCCR_SPLIMVIOL_Pos)               /*!< FPCCR: SPLIMVIOL bit Mask */
+
+#define FPU_FPCCR_MONRDY_Pos                8U                                            /*!< FPCCR: MONRDY Position */
+#define FPU_FPCCR_MONRDY_Msk               (1UL << FPU_FPCCR_MONRDY_Pos)                  /*!< FPCCR: MONRDY bit Mask */
+
+#define FPU_FPCCR_SFRDY_Pos                 7U                                            /*!< FPCCR: SFRDY Position */
+#define FPU_FPCCR_SFRDY_Msk                (1UL << FPU_FPCCR_SFRDY_Pos)                   /*!< FPCCR: SFRDY bit Mask */
+
+#define FPU_FPCCR_BFRDY_Pos                 6U                                            /*!< FPCCR: BFRDY Position */
+#define FPU_FPCCR_BFRDY_Msk                (1UL << FPU_FPCCR_BFRDY_Pos)                   /*!< FPCCR: BFRDY bit Mask */
+
+#define FPU_FPCCR_MMRDY_Pos                 5U                                            /*!< FPCCR: MMRDY Position */
+#define FPU_FPCCR_MMRDY_Msk                (1UL << FPU_FPCCR_MMRDY_Pos)                   /*!< FPCCR: MMRDY bit Mask */
+
+#define FPU_FPCCR_HFRDY_Pos                 4U                                            /*!< FPCCR: HFRDY Position */
+#define FPU_FPCCR_HFRDY_Msk                (1UL << FPU_FPCCR_HFRDY_Pos)                   /*!< FPCCR: HFRDY bit Mask */
+
+#define FPU_FPCCR_THREAD_Pos                3U                                            /*!< FPCCR: processor mode bit Position */
+#define FPU_FPCCR_THREAD_Msk               (1UL << FPU_FPCCR_THREAD_Pos)                  /*!< FPCCR: processor mode active bit Mask */
+
+#define FPU_FPCCR_S_Pos                     2U                                            /*!< FPCCR: Security status of the FP context bit Position */
+#define FPU_FPCCR_S_Msk                    (1UL << FPU_FPCCR_S_Pos)                       /*!< FPCCR: Security status of the FP context bit Mask */
+
+#define FPU_FPCCR_USER_Pos                  1U                                            /*!< FPCCR: privilege level bit Position */
+#define FPU_FPCCR_USER_Msk                 (1UL << FPU_FPCCR_USER_Pos)                    /*!< FPCCR: privilege level bit Mask */
+
+#define FPU_FPCCR_LSPACT_Pos                0U                                            /*!< FPCCR: Lazy state preservation active bit Position */
+#define FPU_FPCCR_LSPACT_Msk               (1UL /*<< FPU_FPCCR_LSPACT_Pos*/)              /*!< FPCCR: Lazy state preservation active bit Mask */
+
+/* Floating-Point Context Address Register Definitions */
+#define FPU_FPCAR_ADDRESS_Pos               3U                                            /*!< FPCAR: ADDRESS bit Position */
+#define FPU_FPCAR_ADDRESS_Msk              (0x1FFFFFFFUL << FPU_FPCAR_ADDRESS_Pos)        /*!< FPCAR: ADDRESS bit Mask */
+
+/* Floating-Point Default Status Control Register Definitions */
+#define FPU_FPDSCR_AHP_Pos                 26U                                            /*!< FPDSCR: AHP bit Position */
+#define FPU_FPDSCR_AHP_Msk                 (1UL << FPU_FPDSCR_AHP_Pos)                    /*!< FPDSCR: AHP bit Mask */
+
+#define FPU_FPDSCR_DN_Pos                  25U                                            /*!< FPDSCR: DN bit Position */
+#define FPU_FPDSCR_DN_Msk                  (1UL << FPU_FPDSCR_DN_Pos)                     /*!< FPDSCR: DN bit Mask */
+
+#define FPU_FPDSCR_FZ_Pos                  24U                                            /*!< FPDSCR: FZ bit Position */
+#define FPU_FPDSCR_FZ_Msk                  (1UL << FPU_FPDSCR_FZ_Pos)                     /*!< FPDSCR: FZ bit Mask */
+
+#define FPU_FPDSCR_RMode_Pos               22U                                            /*!< FPDSCR: RMode bit Position */
+#define FPU_FPDSCR_RMode_Msk               (3UL << FPU_FPDSCR_RMode_Pos)                  /*!< FPDSCR: RMode bit Mask */
+
+/* Media and VFP Feature Register 0 Definitions */
+#define FPU_MVFR0_FP_rounding_modes_Pos    28U                                            /*!< MVFR0: FP rounding modes bits Position */
+#define FPU_MVFR0_FP_rounding_modes_Msk    (0xFUL << FPU_MVFR0_FP_rounding_modes_Pos)     /*!< MVFR0: FP rounding modes bits Mask */
+
+#define FPU_MVFR0_Short_vectors_Pos        24U                                            /*!< MVFR0: Short vectors bits Position */
+#define FPU_MVFR0_Short_vectors_Msk        (0xFUL << FPU_MVFR0_Short_vectors_Pos)         /*!< MVFR0: Short vectors bits Mask */
+
+#define FPU_MVFR0_Square_root_Pos          20U                                            /*!< MVFR0: Square root bits Position */
+#define FPU_MVFR0_Square_root_Msk          (0xFUL << FPU_MVFR0_Square_root_Pos)           /*!< MVFR0: Square root bits Mask */
+
+#define FPU_MVFR0_Divide_Pos               16U                                            /*!< MVFR0: Divide bits Position */
+#define FPU_MVFR0_Divide_Msk               (0xFUL << FPU_MVFR0_Divide_Pos)                /*!< MVFR0: Divide bits Mask */
+
+#define FPU_MVFR0_FP_excep_trapping_Pos    12U                                            /*!< MVFR0: FP exception trapping bits Position */
+#define FPU_MVFR0_FP_excep_trapping_Msk    (0xFUL << FPU_MVFR0_FP_excep_trapping_Pos)     /*!< MVFR0: FP exception trapping bits Mask */
+
+#define FPU_MVFR0_Double_precision_Pos      8U                                            /*!< MVFR0: Double-precision bits Position */
+#define FPU_MVFR0_Double_precision_Msk     (0xFUL << FPU_MVFR0_Double_precision_Pos)      /*!< MVFR0: Double-precision bits Mask */
+
+#define FPU_MVFR0_Single_precision_Pos      4U                                            /*!< MVFR0: Single-precision bits Position */
+#define FPU_MVFR0_Single_precision_Msk     (0xFUL << FPU_MVFR0_Single_precision_Pos)      /*!< MVFR0: Single-precision bits Mask */
+
+#define FPU_MVFR0_A_SIMD_registers_Pos      0U                                            /*!< MVFR0: A_SIMD registers bits Position */
+#define FPU_MVFR0_A_SIMD_registers_Msk     (0xFUL /*<< FPU_MVFR0_A_SIMD_registers_Pos*/)  /*!< MVFR0: A_SIMD registers bits Mask */
+
+/* Media and VFP Feature Register 1 Definitions */
+#define FPU_MVFR1_FP_fused_MAC_Pos         28U                                            /*!< MVFR1: FP fused MAC bits Position */
+#define FPU_MVFR1_FP_fused_MAC_Msk         (0xFUL << FPU_MVFR1_FP_fused_MAC_Pos)          /*!< MVFR1: FP fused MAC bits Mask */
+
+#define FPU_MVFR1_FP_HPFP_Pos              24U                                            /*!< MVFR1: FP HPFP bits Position */
+#define FPU_MVFR1_FP_HPFP_Msk              (0xFUL << FPU_MVFR1_FP_HPFP_Pos)               /*!< MVFR1: FP HPFP bits Mask */
+
+#define FPU_MVFR1_D_NaN_mode_Pos            4U                                            /*!< MVFR1: D_NaN mode bits Position */
+#define FPU_MVFR1_D_NaN_mode_Msk           (0xFUL << FPU_MVFR1_D_NaN_mode_Pos)            /*!< MVFR1: D_NaN mode bits Mask */
+
+#define FPU_MVFR1_FtZ_mode_Pos              0U                                            /*!< MVFR1: FtZ mode bits Position */
+#define FPU_MVFR1_FtZ_mode_Msk             (0xFUL /*<< FPU_MVFR1_FtZ_mode_Pos*/)          /*!< MVFR1: FtZ mode bits Mask */
+
+/* Media and VFP Feature Register 2 Definitions */
+#define FPU_MVFR2_FPMisc_Pos                4U                                            /*!< MVFR2: FPMisc bits Position */
+#define FPU_MVFR2_FPMisc_Msk               (0xFUL << FPU_MVFR2_FPMisc_Pos)                /*!< MVFR2: FPMisc bits Mask */
+
+/*@} end of group CMSIS_FPU */
+
+
+
+
+
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup CMSIS_DCB       Debug Control Block
+  \brief    Type definitions for the Debug Control Block Registers
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Debug Control Block Registers (DCB).
+ */
+typedef struct
+{
+  __IOM uint32_t DHCSR;                  /*!< Offset: 0x000 (R/W)  Debug Halting Control and Status Register */
+  __OM  uint32_t DCRSR;                  /*!< Offset: 0x004 ( /W)  Debug Core Register Selector Register */
+  __IOM uint32_t DCRDR;                  /*!< Offset: 0x008 (R/W)  Debug Core Register Data Register */
+  __IOM uint32_t DEMCR;                  /*!< Offset: 0x00C (R/W)  Debug Exception and Monitor Control Register */
+        uint32_t RESERVED0[1U];
+  __IOM uint32_t DAUTHCTRL;              /*!< Offset: 0x014 (R/W)  Debug Authentication Control Register */
+  __IOM uint32_t DSCSR;                  /*!< Offset: 0x018 (R/W)  Debug Security Control and Status Register */
+} DCB_Type;
+
+/* DHCSR, Debug Halting Control and Status Register Definitions */
+#define DCB_DHCSR_DBGKEY_Pos               16U                                            /*!< DCB DHCSR: Debug key Position */
+#define DCB_DHCSR_DBGKEY_Msk               (0xFFFFUL << DCB_DHCSR_DBGKEY_Pos)             /*!< DCB DHCSR: Debug key Mask */
+
+#define DCB_DHCSR_S_RESTART_ST_Pos         26U                                            /*!< DCB DHCSR: Restart sticky status Position */
+#define DCB_DHCSR_S_RESTART_ST_Msk         (0x1UL << DCB_DHCSR_S_RESTART_ST_Pos)          /*!< DCB DHCSR: Restart sticky status Mask */
+
+#define DCB_DHCSR_S_RESET_ST_Pos           25U                                            /*!< DCB DHCSR: Reset sticky status Position */
+#define DCB_DHCSR_S_RESET_ST_Msk           (0x1UL << DCB_DHCSR_S_RESET_ST_Pos)            /*!< DCB DHCSR: Reset sticky status Mask */
+
+#define DCB_DHCSR_S_RETIRE_ST_Pos          24U                                            /*!< DCB DHCSR: Retire sticky status Position */
+#define DCB_DHCSR_S_RETIRE_ST_Msk          (0x1UL << DCB_DHCSR_S_RETIRE_ST_Pos)           /*!< DCB DHCSR: Retire sticky status Mask */
+
+#define DCB_DHCSR_S_SDE_Pos                20U                                            /*!< DCB DHCSR: Secure debug enabled Position */
+#define DCB_DHCSR_S_SDE_Msk                (0x1UL << DCB_DHCSR_S_SDE_Pos)                 /*!< DCB DHCSR: Secure debug enabled Mask */
+
+#define DCB_DHCSR_S_LOCKUP_Pos             19U                                            /*!< DCB DHCSR: Lockup status Position */
+#define DCB_DHCSR_S_LOCKUP_Msk             (0x1UL << DCB_DHCSR_S_LOCKUP_Pos)              /*!< DCB DHCSR: Lockup status Mask */
+
+#define DCB_DHCSR_S_SLEEP_Pos              18U                                            /*!< DCB DHCSR: Sleeping status Position */
+#define DCB_DHCSR_S_SLEEP_Msk              (0x1UL << DCB_DHCSR_S_SLEEP_Pos)               /*!< DCB DHCSR: Sleeping status Mask */
+
+#define DCB_DHCSR_S_HALT_Pos               17U                                            /*!< DCB DHCSR: Halted status Position */
+#define DCB_DHCSR_S_HALT_Msk               (0x1UL << DCB_DHCSR_S_HALT_Pos)                /*!< DCB DHCSR: Halted status Mask */
+
+#define DCB_DHCSR_S_REGRDY_Pos             16U                                            /*!< DCB DHCSR: Register ready status Position */
+#define DCB_DHCSR_S_REGRDY_Msk             (0x1UL << DCB_DHCSR_S_REGRDY_Pos)              /*!< DCB DHCSR: Register ready status Mask */
+
+#define DCB_DHCSR_C_SNAPSTALL_Pos           5U                                            /*!< DCB DHCSR: Snap stall control Position */
+#define DCB_DHCSR_C_SNAPSTALL_Msk          (0x1UL << DCB_DHCSR_C_SNAPSTALL_Pos)           /*!< DCB DHCSR: Snap stall control Mask */
+
+#define DCB_DHCSR_C_MASKINTS_Pos            3U                                            /*!< DCB DHCSR: Mask interrupts control Position */
+#define DCB_DHCSR_C_MASKINTS_Msk           (0x1UL << DCB_DHCSR_C_MASKINTS_Pos)            /*!< DCB DHCSR: Mask interrupts control Mask */
+
+#define DCB_DHCSR_C_STEP_Pos                2U                                            /*!< DCB DHCSR: Step control Position */
+#define DCB_DHCSR_C_STEP_Msk               (0x1UL << DCB_DHCSR_C_STEP_Pos)                /*!< DCB DHCSR: Step control Mask */
+
+#define DCB_DHCSR_C_HALT_Pos                1U                                            /*!< DCB DHCSR: Halt control Position */
+#define DCB_DHCSR_C_HALT_Msk               (0x1UL << DCB_DHCSR_C_HALT_Pos)                /*!< DCB DHCSR: Halt control Mask */
+
+#define DCB_DHCSR_C_DEBUGEN_Pos             0U                                            /*!< DCB DHCSR: Debug enable control Position */
+#define DCB_DHCSR_C_DEBUGEN_Msk            (0x1UL /*<< DCB_DHCSR_C_DEBUGEN_Pos*/)         /*!< DCB DHCSR: Debug enable control Mask */
+
+/* DCRSR, Debug Core Register Select Register Definitions */
+#define DCB_DCRSR_REGWnR_Pos               16U                                            /*!< DCB DCRSR: Register write/not-read Position */
+#define DCB_DCRSR_REGWnR_Msk               (0x1UL << DCB_DCRSR_REGWnR_Pos)                /*!< DCB DCRSR: Register write/not-read Mask */
+
+#define DCB_DCRSR_REGSEL_Pos                0U                                            /*!< DCB DCRSR: Register selector Position */
+#define DCB_DCRSR_REGSEL_Msk               (0x7FUL /*<< DCB_DCRSR_REGSEL_Pos*/)           /*!< DCB DCRSR: Register selector Mask */
+
+/* DCRDR, Debug Core Register Data Register Definitions */
+#define DCB_DCRDR_DBGTMP_Pos                0U                                            /*!< DCB DCRDR: Data temporary buffer Position */
+#define DCB_DCRDR_DBGTMP_Msk               (0xFFFFFFFFUL /*<< DCB_DCRDR_DBGTMP_Pos*/)     /*!< DCB DCRDR: Data temporary buffer Mask */
+
+/* DEMCR, Debug Exception and Monitor Control Register Definitions */
+#define DCB_DEMCR_TRCENA_Pos               24U                                            /*!< DCB DEMCR: Trace enable Position */
+#define DCB_DEMCR_TRCENA_Msk               (0x1UL << DCB_DEMCR_TRCENA_Pos)                /*!< DCB DEMCR: Trace enable Mask */
+
+#define DCB_DEMCR_MONPRKEY_Pos             23U                                            /*!< DCB DEMCR: Monitor pend req key Position */
+#define DCB_DEMCR_MONPRKEY_Msk             (0x1UL << DCB_DEMCR_MONPRKEY_Pos)              /*!< DCB DEMCR: Monitor pend req key Mask */
+
+#define DCB_DEMCR_UMON_EN_Pos              21U                                            /*!< DCB DEMCR: Unprivileged monitor enable Position */
+#define DCB_DEMCR_UMON_EN_Msk              (0x1UL << DCB_DEMCR_UMON_EN_Pos)               /*!< DCB DEMCR: Unprivileged monitor enable Mask */
+
+#define DCB_DEMCR_SDME_Pos                 20U                                            /*!< DCB DEMCR: Secure DebugMonitor enable Position */
+#define DCB_DEMCR_SDME_Msk                 (0x1UL << DCB_DEMCR_SDME_Pos)                  /*!< DCB DEMCR: Secure DebugMonitor enable Mask */
+
+#define DCB_DEMCR_MON_REQ_Pos              19U                                            /*!< DCB DEMCR: Monitor request Position */
+#define DCB_DEMCR_MON_REQ_Msk              (0x1UL << DCB_DEMCR_MON_REQ_Pos)               /*!< DCB DEMCR: Monitor request Mask */
+
+#define DCB_DEMCR_MON_STEP_Pos             18U                                            /*!< DCB DEMCR: Monitor step Position */
+#define DCB_DEMCR_MON_STEP_Msk             (0x1UL << DCB_DEMCR_MON_STEP_Pos)              /*!< DCB DEMCR: Monitor step Mask */
+
+#define DCB_DEMCR_MON_PEND_Pos             17U                                            /*!< DCB DEMCR: Monitor pend Position */
+#define DCB_DEMCR_MON_PEND_Msk             (0x1UL << DCB_DEMCR_MON_PEND_Pos)              /*!< DCB DEMCR: Monitor pend Mask */
+
+#define DCB_DEMCR_MON_EN_Pos               16U                                            /*!< DCB DEMCR: Monitor enable Position */
+#define DCB_DEMCR_MON_EN_Msk               (0x1UL << DCB_DEMCR_MON_EN_Pos)                /*!< DCB DEMCR: Monitor enable Mask */
+
+#define DCB_DEMCR_VC_SFERR_Pos             11U                                            /*!< DCB DEMCR: Vector Catch SecureFault Position */
+#define DCB_DEMCR_VC_SFERR_Msk             (0x1UL << DCB_DEMCR_VC_SFERR_Pos)              /*!< DCB DEMCR: Vector Catch SecureFault Mask */
+
+#define DCB_DEMCR_VC_HARDERR_Pos           10U                                            /*!< DCB DEMCR: Vector Catch HardFault errors Position */
+#define DCB_DEMCR_VC_HARDERR_Msk           (0x1UL << DCB_DEMCR_VC_HARDERR_Pos)            /*!< DCB DEMCR: Vector Catch HardFault errors Mask */
+
+#define DCB_DEMCR_VC_INTERR_Pos             9U                                            /*!< DCB DEMCR: Vector Catch interrupt errors Position */
+#define DCB_DEMCR_VC_INTERR_Msk            (0x1UL << DCB_DEMCR_VC_INTERR_Pos)             /*!< DCB DEMCR: Vector Catch interrupt errors Mask */
+
+#define DCB_DEMCR_VC_BUSERR_Pos             8U                                            /*!< DCB DEMCR: Vector Catch BusFault errors Position */
+#define DCB_DEMCR_VC_BUSERR_Msk            (0x1UL << DCB_DEMCR_VC_BUSERR_Pos)             /*!< DCB DEMCR: Vector Catch BusFault errors Mask */
+
+#define DCB_DEMCR_VC_STATERR_Pos            7U                                            /*!< DCB DEMCR: Vector Catch state errors Position */
+#define DCB_DEMCR_VC_STATERR_Msk           (0x1UL << DCB_DEMCR_VC_STATERR_Pos)            /*!< DCB DEMCR: Vector Catch state errors Mask */
+
+#define DCB_DEMCR_VC_CHKERR_Pos             6U                                            /*!< DCB DEMCR: Vector Catch check errors Position */
+#define DCB_DEMCR_VC_CHKERR_Msk            (0x1UL << DCB_DEMCR_VC_CHKERR_Pos)             /*!< DCB DEMCR: Vector Catch check errors Mask */
+
+#define DCB_DEMCR_VC_NOCPERR_Pos            5U                                            /*!< DCB DEMCR: Vector Catch NOCP errors Position */
+#define DCB_DEMCR_VC_NOCPERR_Msk           (0x1UL << DCB_DEMCR_VC_NOCPERR_Pos)            /*!< DCB DEMCR: Vector Catch NOCP errors Mask */
+
+#define DCB_DEMCR_VC_MMERR_Pos              4U                                            /*!< DCB DEMCR: Vector Catch MemManage errors Position */
+#define DCB_DEMCR_VC_MMERR_Msk             (0x1UL << DCB_DEMCR_VC_MMERR_Pos)              /*!< DCB DEMCR: Vector Catch MemManage errors Mask */
+
+#define DCB_DEMCR_VC_CORERESET_Pos          0U                                            /*!< DCB DEMCR: Vector Catch Core reset Position */
+#define DCB_DEMCR_VC_CORERESET_Msk         (0x1UL /*<< DCB_DEMCR_VC_CORERESET_Pos*/)      /*!< DCB DEMCR: Vector Catch Core reset Mask */
+
+/* DAUTHCTRL, Debug Authentication Control Register Definitions */
+#define DCB_DAUTHCTRL_INTSPNIDEN_Pos        3U                                            /*!< DCB DAUTHCTRL: Internal Secure non-invasive debug enable Position */
+#define DCB_DAUTHCTRL_INTSPNIDEN_Msk       (0x1UL << DCB_DAUTHCTRL_INTSPNIDEN_Pos)        /*!< DCB DAUTHCTRL: Internal Secure non-invasive debug enable Mask */
+
+#define DCB_DAUTHCTRL_SPNIDENSEL_Pos        2U                                            /*!< DCB DAUTHCTRL: Secure non-invasive debug enable select Position */
+#define DCB_DAUTHCTRL_SPNIDENSEL_Msk       (0x1UL << DCB_DAUTHCTRL_SPNIDENSEL_Pos)        /*!< DCB DAUTHCTRL: Secure non-invasive debug enable select Mask */
+
+#define DCB_DAUTHCTRL_INTSPIDEN_Pos         1U                                            /*!< DCB DAUTHCTRL: Internal Secure invasive debug enable Position */
+#define DCB_DAUTHCTRL_INTSPIDEN_Msk        (0x1UL << DCB_DAUTHCTRL_INTSPIDEN_Pos)         /*!< DCB DAUTHCTRL: Internal Secure invasive debug enable Mask */
+
+#define DCB_DAUTHCTRL_SPIDENSEL_Pos         0U                                            /*!< DCB DAUTHCTRL: Secure invasive debug enable select Position */
+#define DCB_DAUTHCTRL_SPIDENSEL_Msk        (0x1UL /*<< DCB_DAUTHCTRL_SPIDENSEL_Pos*/)     /*!< DCB DAUTHCTRL: Secure invasive debug enable select Mask */
+
+/* DSCSR, Debug Security Control and Status Register Definitions */
+#define DCB_DSCSR_CDSKEY_Pos               17U                                            /*!< DCB DSCSR: CDS write-enable key Position */
+#define DCB_DSCSR_CDSKEY_Msk               (0x1UL << DCB_DSCSR_CDSKEY_Pos)                /*!< DCB DSCSR: CDS write-enable key Mask */
+
+#define DCB_DSCSR_CDS_Pos                  16U                                            /*!< DCB DSCSR: Current domain Secure Position */
+#define DCB_DSCSR_CDS_Msk                  (0x1UL << DCB_DSCSR_CDS_Pos)                   /*!< DCB DSCSR: Current domain Secure Mask */
+
+#define DCB_DSCSR_SBRSEL_Pos                1U                                            /*!< DCB DSCSR: Secure banked register select Position */
+#define DCB_DSCSR_SBRSEL_Msk               (0x1UL << DCB_DSCSR_SBRSEL_Pos)                /*!< DCB DSCSR: Secure banked register select Mask */
+
+#define DCB_DSCSR_SBRSELEN_Pos              0U                                            /*!< DCB DSCSR: Secure banked register select enable Position */
+#define DCB_DSCSR_SBRSELEN_Msk             (0x1UL /*<< DCB_DSCSR_SBRSELEN_Pos*/)          /*!< DCB DSCSR: Secure banked register select enable Mask */
+
+/*@} end of group CMSIS_DCB */
+
+
+
+/**
+  \ingroup  CMSIS_core_register
+  \defgroup CMSIS_DIB       Debug Identification Block
+  \brief    Type definitions for the Debug Identification Block Registers
+  @{
+ */
+
+/**
+  \brief  Structure type to access the Debug Identification Block Registers (DIB).
+ */
+typedef struct
+{
+  __OM  uint32_t DLAR;                   /*!< Offset: 0x000 ( /W)  SCS Software Lock Access Register */
+  __IM  uint32_t DLSR;                   /*!< Offset: 0x004 (R/ )  SCS Software Lock Status Register */
+  __IM  uint32_t DAUTHSTATUS;            /*!< Offset: 0x008 (R/ )  Debug Authentication Status Register */
+  __IM  uint32_t DDEVARCH;               /*!< Offset: 0x00C (R/ )  SCS Device Architecture Register */
+  __IM  uint32_t DDEVTYPE;               /*!< Offset: 0x010 (R/ )  SCS Device Type Register */
+} DIB_Type;
+
+/* DLAR, SCS Software Lock Access Register Definitions */
+#define DIB_DLAR_KEY_Pos                    0U                                            /*!< DIB DLAR: KEY Position */
+#define DIB_DLAR_KEY_Msk                   (0xFFFFFFFFUL /*<< DIB_DLAR_KEY_Pos */)        /*!< DIB DLAR: KEY Mask */
+
+/* DLSR, SCS Software Lock Status Register Definitions */
+#define DIB_DLSR_nTT_Pos                    2U                                            /*!< DIB DLSR: Not thirty-two bit Position */
+#define DIB_DLSR_nTT_Msk                   (0x1UL << DIB_DLSR_nTT_Pos )                   /*!< DIB DLSR: Not thirty-two bit Mask */
+
+#define DIB_DLSR_SLK_Pos                    1U                                            /*!< DIB DLSR: Software Lock status Position */
+#define DIB_DLSR_SLK_Msk                   (0x1UL << DIB_DLSR_SLK_Pos )                   /*!< DIB DLSR: Software Lock status Mask */
+
+#define DIB_DLSR_SLI_Pos                    0U                                            /*!< DIB DLSR: Software Lock implemented Position */
+#define DIB_DLSR_SLI_Msk                   (0x1UL /*<< DIB_DLSR_SLI_Pos*/)                /*!< DIB DLSR: Software Lock implemented Mask */
+
+/* DAUTHSTATUS, Debug Authentication Status Register Definitions */
+#define DIB_DAUTHSTATUS_SNID_Pos            6U                                            /*!< DIB DAUTHSTATUS: Secure Non-invasive Debug Position */
+#define DIB_DAUTHSTATUS_SNID_Msk           (0x3UL << DIB_DAUTHSTATUS_SNID_Pos )           /*!< DIB DAUTHSTATUS: Secure Non-invasive Debug Mask */
+
+#define DIB_DAUTHSTATUS_SID_Pos             4U                                            /*!< DIB DAUTHSTATUS: Secure Invasive Debug Position */
+#define DIB_DAUTHSTATUS_SID_Msk            (0x3UL << DIB_DAUTHSTATUS_SID_Pos )            /*!< DIB DAUTHSTATUS: Secure Invasive Debug Mask */
+
+#define DIB_DAUTHSTATUS_NSNID_Pos           2U                                            /*!< DIB DAUTHSTATUS: Non-secure Non-invasive Debug Position */
+#define DIB_DAUTHSTATUS_NSNID_Msk          (0x3UL << DIB_DAUTHSTATUS_NSNID_Pos )          /*!< DIB DAUTHSTATUS: Non-secure Non-invasive Debug Mask */
+
+#define DIB_DAUTHSTATUS_NSID_Pos            0U                                            /*!< DIB DAUTHSTATUS: Non-secure Invasive Debug Position */
+#define DIB_DAUTHSTATUS_NSID_Msk           (0x3UL /*<< DIB_DAUTHSTATUS_NSID_Pos*/)        /*!< DIB DAUTHSTATUS: Non-secure Invasive Debug Mask */
+
+/* DDEVARCH, SCS Device Architecture Register Definitions */
+#define DIB_DDEVARCH_ARCHITECT_Pos         21U                                            /*!< DIB DDEVARCH: Architect Position */
+#define DIB_DDEVARCH_ARCHITECT_Msk         (0x7FFUL << DIB_DDEVARCH_ARCHITECT_Pos )       /*!< DIB DDEVARCH: Architect Mask */
+
+#define DIB_DDEVARCH_PRESENT_Pos           20U                                            /*!< DIB DDEVARCH: DEVARCH Present Position */
+#define DIB_DDEVARCH_PRESENT_Msk           (0x1FUL << DIB_DDEVARCH_PRESENT_Pos )          /*!< DIB DDEVARCH: DEVARCH Present Mask */
+
+#define DIB_DDEVARCH_REVISION_Pos          16U                                            /*!< DIB DDEVARCH: Revision Position */
+#define DIB_DDEVARCH_REVISION_Msk          (0xFUL << DIB_DDEVARCH_REVISION_Pos )          /*!< DIB DDEVARCH: Revision Mask */
+
+#define DIB_DDEVARCH_ARCHVER_Pos           12U                                            /*!< DIB DDEVARCH: Architecture Version Position */
+#define DIB_DDEVARCH_ARCHVER_Msk           (0xFUL << DIB_DDEVARCH_ARCHVER_Pos )           /*!< DIB DDEVARCH: Architecture Version Mask */
+
+#define DIB_DDEVARCH_ARCHPART_Pos           0U                                            /*!< DIB DDEVARCH: Architecture Part Position */
+#define DIB_DDEVARCH_ARCHPART_Msk          (0xFFFUL /*<< DIB_DDEVARCH_ARCHPART_Pos*/)     /*!< DIB DDEVARCH: Architecture Part Mask */
+
+/* DDEVTYPE, SCS Device Type Register Definitions */
+#define DIB_DDEVTYPE_SUB_Pos                4U                                            /*!< DIB DDEVTYPE: Sub-type Position */
+#define DIB_DDEVTYPE_SUB_Msk               (0xFUL << DIB_DDEVTYPE_SUB_Pos )               /*!< DIB DDEVTYPE: Sub-type Mask */
+
+#define DIB_DDEVTYPE_MAJOR_Pos              0U                                            /*!< DIB DDEVTYPE: Major type Position */
+#define DIB_DDEVTYPE_MAJOR_Msk             (0xFUL /*<< DIB_DDEVTYPE_MAJOR_Pos*/)          /*!< DIB DDEVTYPE: Major type Mask */
+
+
+/*@} end of group CMSIS_DIB */
+
+
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup   CMSIS_core_bitfield     Core register bit field macros
+  \brief      Macros for use with bit field definitions (xxx_Pos, xxx_Msk).
+  @{
+ */
+
+/**
+  \brief   Mask and shift a bit field value for use in a register bit range.
+  \param[in] field  Name of the register bit field.
+  \param[in] value  Value of the bit field. This parameter is interpreted as an uint32_t type.
+  \return           Masked and shifted value.
+*/
+#define _VAL2FLD(field, value)    (((uint32_t)(value) << field ## _Pos) & field ## _Msk)
+
+/**
+  \brief     Mask and shift a register value to extract a bit filed value.
+  \param[in] field  Name of the register bit field.
+  \param[in] value  Value of register. This parameter is interpreted as an uint32_t type.
+  \return           Masked and shifted bit field value.
+*/
+#define _FLD2VAL(field, value)    (((uint32_t)(value) & field ## _Msk) >> field ## _Pos)
+
+/*@} end of group CMSIS_core_bitfield */
+
+
+/**
+  \ingroup    CMSIS_core_register
+  \defgroup   CMSIS_core_base     Core Definitions
+  \brief      Definitions for base addresses, unions, and structures.
+  @{
+ */
+
+/* Memory mapping of Core Hardware */
+  #define SCS_BASE            (0xE000E000UL)                             /*!< System Control Space Base Address */
+  #define ITM_BASE            (0xE0000000UL)                             /*!< ITM Base Address */
+  #define DWT_BASE            (0xE0001000UL)                             /*!< DWT Base Address */
+  #define TPI_BASE            (0xE0040000UL)                             /*!< TPI Base Address */
+  #define DCB_BASE            (0xE000EDF0UL)                             /*!< DCB Base Address */
+  #define DIB_BASE            (0xE000EFB0UL)                             /*!< DIB Base Address */
+  #define EMSS_BASE           (0xE001E000UL)                             /*!<Enhanced Memory SubSystem Base Address */
+  
+  #define SysTick_BASE        (SCS_BASE +  0x0010UL)                     /*!< SysTick Base Address */
+  #define NVIC_BASE           (SCS_BASE +  0x0100UL)                     /*!< NVIC Base Address */
+  #define SCB_BASE            (SCS_BASE +  0x0D00UL)                     /*!< System Control Block Base Address */
+
+  #define SCnSCB              ((SCnSCB_Type    *)     SCS_BASE         ) /*!< System control Register not in SCB */
+  #define SCB                 ((SCB_Type       *)     SCB_BASE         ) /*!< SCB configuration struct */
+  #define SysTick             ((SysTick_Type   *)     SysTick_BASE     ) /*!< SysTick configuration struct */
+  #define NVIC                ((NVIC_Type      *)     NVIC_BASE        ) /*!< NVIC configuration struct */
+  #define ITM                 ((ITM_Type       *)     ITM_BASE         ) /*!< ITM configuration struct */
+  #define DWT                 ((DWT_Type       *)     DWT_BASE         ) /*!< DWT configuration struct */
+  #define TPI                 ((TPI_Type       *)     TPI_BASE         ) /*!< TPI configuration struct */
+  #define DCB                 ((DCB_Type       *)     DCB_BASE         ) /*!< DCB configuration struct */
+  #define DIB                 ((DIB_Type       *)     DIB_BASE         ) /*!< DIB configuration struct */
+  #define EMSS                ((EMSS_Type      *)     EMSS_BASE        ) /*!<Ehanced MSS Registers struct */
+
+  #if defined (__MPU_PRESENT) && (__MPU_PRESENT == 1U)
+    #define MPU_BASE          (SCS_BASE +  0x0D90UL)                     /*!< Memory Protection Unit */
+    #define MPU               ((MPU_Type       *)     MPU_BASE         ) /*!< Memory Protection Unit */
+  #endif
+
+  #if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+    #define SAU_BASE          (SCS_BASE +  0x0DD0UL)                     /*!< Security Attribution Unit */
+    #define SAU               ((SAU_Type       *)     SAU_BASE         ) /*!< Security Attribution Unit */
+  #endif
+
+  #define FPU_BASE            (SCS_BASE +  0x0F30UL)                     /*!< Floating Point Unit */
+  #define FPU                 ((FPU_Type       *)     FPU_BASE         ) /*!< Floating Point Unit */
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+  #define SCS_BASE_NS         (0xE002E000UL)                             /*!< System Control Space Base Address (non-secure address space) */
+
+  #define DCB_BASE_NS         (0xE002EDF0UL)                             /*!< DCB Base Address                  (non-secure address space) */
+  #define DIB_BASE_NS         (0xE002EFB0UL)                             /*!< DIB Base Address                  (non-secure address space) */
+  #define SysTick_BASE_NS     (SCS_BASE_NS +  0x0010UL)                  /*!< SysTick Base Address              (non-secure address space) */
+  #define NVIC_BASE_NS        (SCS_BASE_NS +  0x0100UL)                  /*!< NVIC Base Address                 (non-secure address space) */
+  #define SCB_BASE_NS         (SCS_BASE_NS +  0x0D00UL)                  /*!< System Control Block Base Address (non-secure address space) */
+
+  #define SCnSCB_NS           ((SCnSCB_Type    *)     SCS_BASE_NS      ) /*!< System control Register not in SCB(non-secure address space) */
+  #define SCB_NS              ((SCB_Type       *)     SCB_BASE_NS      ) /*!< SCB configuration struct          (non-secure address space) */
+  #define SysTick_NS          ((SysTick_Type   *)     SysTick_BASE_NS  ) /*!< SysTick configuration struct      (non-secure address space) */
+  #define NVIC_NS             ((NVIC_Type      *)     NVIC_BASE_NS     ) /*!< NVIC configuration struct         (non-secure address space) */
+  #define DCB_NS              ((DCB_Type       *)     DCB_BASE_NS      ) /*!< DCB configuration struct          (non-secure address space) */
+  #define DIB_NS              ((DIB_Type       *)     DIB_BASE_NS      ) /*!< DIB configuration struct          (non-secure address space) */
+
+  #if defined (__MPU_PRESENT) && (__MPU_PRESENT == 1U)
+    #define MPU_BASE_NS       (SCS_BASE_NS +  0x0D90UL)                  /*!< Memory Protection Unit            (non-secure address space) */
+    #define MPU_NS            ((MPU_Type       *)     MPU_BASE_NS      ) /*!< Memory Protection Unit            (non-secure address space) */
+  #endif
+
+  #define FPU_BASE_NS         (SCS_BASE_NS +  0x0F30UL)                  /*!< Floating Point Unit               (non-secure address space) */
+  #define FPU_NS              ((FPU_Type       *)     FPU_BASE_NS      ) /*!< Floating Point Unit               (non-secure address space) */
+
+#endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
+/*@} */
+
+
+
+/*******************************************************************************
+ *                Hardware Abstraction Layer
+  Core Function Interface contains:
+  - Core NVIC Functions
+  - Core SysTick Functions
+  - Core Debug Functions
+  - Core Register Access Functions
+ ******************************************************************************/
+/**
+  \defgroup CMSIS_Core_FunctionInterface Functions and Instructions Reference
+*/
+
+
+
+/* ##########################   NVIC functions  #################################### */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_NVICFunctions NVIC Functions
+  \brief    Functions that manage interrupts and exceptions via the NVIC.
+  @{
+ */
+
+#ifdef CMSIS_NVIC_VIRTUAL
+  #ifndef CMSIS_NVIC_VIRTUAL_HEADER_FILE
+    #define CMSIS_NVIC_VIRTUAL_HEADER_FILE "cmsis_nvic_virtual.h"
+  #endif
+  #include CMSIS_NVIC_VIRTUAL_HEADER_FILE
+#else
+  #define NVIC_SetPriorityGrouping    __NVIC_SetPriorityGrouping
+  #define NVIC_GetPriorityGrouping    __NVIC_GetPriorityGrouping
+  #define NVIC_EnableIRQ              __NVIC_EnableIRQ
+  #define NVIC_GetEnableIRQ           __NVIC_GetEnableIRQ
+  #define NVIC_DisableIRQ             __NVIC_DisableIRQ
+  #define NVIC_GetPendingIRQ          __NVIC_GetPendingIRQ
+  #define NVIC_SetPendingIRQ          __NVIC_SetPendingIRQ
+  #define NVIC_ClearPendingIRQ        __NVIC_ClearPendingIRQ
+  #define NVIC_GetActive              __NVIC_GetActive
+  #define NVIC_SetPriority            __NVIC_SetPriority
+  #define NVIC_GetPriority            __NVIC_GetPriority
+  #define NVIC_SystemReset            __NVIC_SystemReset
+  #define SW_SystemReset              __SW_SystemReset
+#endif /* CMSIS_NVIC_VIRTUAL */
+
+#ifdef CMSIS_VECTAB_VIRTUAL
+  #ifndef CMSIS_VECTAB_VIRTUAL_HEADER_FILE
+    #define CMSIS_VECTAB_VIRTUAL_HEADER_FILE "cmsis_vectab_virtual.h"
+  #endif
+  #include CMSIS_VECTAB_VIRTUAL_HEADER_FILE
+#else
+  #define NVIC_SetVector              __NVIC_SetVector
+  #define NVIC_GetVector              __NVIC_GetVector
+#endif  /* (CMSIS_VECTAB_VIRTUAL) */
+
+#define NVIC_USER_IRQ_OFFSET          16
+
+
+/* Special LR values for Secure/Non-Secure call handling and exception handling                                               */
+
+/* Function Return Payload (from ARMv8-M Architecture Reference Manual) LR value on entry from Secure BLXNS                   */ 
+#define FNC_RETURN                 (0xFEFFFFFFUL)     /* bit [0] ignored when processing a branch                             */
+
+/* The following EXC_RETURN mask values are used to evaluate the LR on exception entry */
+#define EXC_RETURN_PREFIX          (0xFF000000UL)     /* bits [31:24] set to indicate an EXC_RETURN value                     */
+#define EXC_RETURN_S               (0x00000040UL)     /* bit [6] stack used to push registers: 0=Non-secure 1=Secure          */
+#define EXC_RETURN_DCRS            (0x00000020UL)     /* bit [5] stacking rules for called registers: 0=skipped 1=saved       */
+#define EXC_RETURN_FTYPE           (0x00000010UL)     /* bit [4] allocate stack for floating-point context: 0=done 1=skipped  */
+#define EXC_RETURN_MODE            (0x00000008UL)     /* bit [3] processor mode for return: 0=Handler mode 1=Thread mode      */
+#define EXC_RETURN_SPSEL           (0x00000004UL)     /* bit [2] stack pointer used to restore context: 0=MSP 1=PSP           */
+#define EXC_RETURN_ES              (0x00000001UL)     /* bit [0] security state exception was taken to: 0=Non-secure 1=Secure */
+
+/* Integrity Signature (from ARMv8-M Architecture Reference Manual) for exception context stacking                            */
+#if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)  /* Value for processors with floating-point extension:                  */
+#define EXC_INTEGRITY_SIGNATURE     (0xFEFA125AUL)     /* bit [0] SFTC must match LR bit[4] EXC_RETURN_FTYPE                   */
+#else 
+#define EXC_INTEGRITY_SIGNATURE     (0xFEFA125BUL)     /* Value for processors without floating-point extension                */
+#endif
+
+
+/**
+  \brief   Set Priority Grouping
+  \details Sets the priority grouping field using the required unlock sequence.
+           The parameter PriorityGroup is assigned to the field SCB->AIRCR [10:8] PRIGROUP field.
+           Only values from 0..7 are used.
+           In case of a conflict between priority grouping and available
+           priority bits (__NVIC_PRIO_BITS), the smallest possible priority group is set.
+  \param [in]      PriorityGroup  Priority grouping field.
+ */
+__STATIC_INLINE void __NVIC_SetPriorityGrouping(uint32_t PriorityGroup)
+{
+  uint32_t reg_value;
+  uint32_t PriorityGroupTmp = (PriorityGroup & (uint32_t)0x07UL);             /* only values 0..7 are used          */
+
+  reg_value  =  SCB->AIRCR;                                                   /* read old register configuration    */
+  reg_value &= ~((uint32_t)(SCB_AIRCR_VECTKEY_Msk | SCB_AIRCR_PRIGROUP_Msk)); /* clear bits to change               */
+  reg_value  =  (reg_value                                   |
+                ((uint32_t)0x5FAUL << SCB_AIRCR_VECTKEY_Pos) |
+                (PriorityGroupTmp << SCB_AIRCR_PRIGROUP_Pos)  );              /* Insert write key and priority group */
+  SCB->AIRCR =  reg_value;
+}
+
+
+/**
+  \brief   Get Priority Grouping
+  \details Reads the priority grouping field from the NVIC Interrupt Controller.
+  \return                Priority grouping field (SCB->AIRCR [10:8] PRIGROUP field).
+ */
+__STATIC_INLINE uint32_t __NVIC_GetPriorityGrouping(void)
+{
+  return ((uint32_t)((SCB->AIRCR & SCB_AIRCR_PRIGROUP_Msk) >> SCB_AIRCR_PRIGROUP_Pos));
+}
+
+
+/**
+  \brief   Enable Interrupt
+  \details Enables a device specific interrupt in the NVIC interrupt controller.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void __NVIC_EnableIRQ(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    __COMPILER_BARRIER();
+    NVIC->ISER[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+    __COMPILER_BARRIER();
+  }
+}
+
+
+/**
+  \brief   Get Interrupt Enable status
+  \details Returns a device specific interrupt enable status from the NVIC interrupt controller.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  Interrupt is not enabled.
+  \return             1  Interrupt is enabled.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t __NVIC_GetEnableIRQ(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return((uint32_t)(((NVIC->ISER[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+/**
+  \brief   Disable Interrupt
+  \details Disables a device specific interrupt in the NVIC interrupt controller.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void __NVIC_DisableIRQ(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC->ICER[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+    __DSB();
+    __ISB();
+  }
+}
+
+
+/**
+  \brief   Get Pending Interrupt
+  \details Reads the NVIC pending register and returns the pending bit for the specified device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  Interrupt status is not pending.
+  \return             1  Interrupt status is pending.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t __NVIC_GetPendingIRQ(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return((uint32_t)(((NVIC->ISPR[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+/**
+  \brief   Set Pending Interrupt
+  \details Sets the pending bit of a device specific interrupt in the NVIC pending register.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void __NVIC_SetPendingIRQ(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC->ISPR[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+  }
+}
+
+
+/**
+  \brief   Clear Pending Interrupt
+  \details Clears the pending bit of a device specific interrupt in the NVIC pending register.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void __NVIC_ClearPendingIRQ(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC->ICPR[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+  }
+}
+
+
+/**
+  \brief   Get Active Interrupt
+  \details Reads the active register in the NVIC and returns the active bit for the device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  Interrupt status is not active.
+  \return             1  Interrupt status is active.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t __NVIC_GetActive(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return((uint32_t)(((NVIC->IABR[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+/**
+  \brief   Get Interrupt Target State
+  \details Reads the interrupt target field in the NVIC and returns the interrupt target bit for the device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  if interrupt is assigned to Secure
+  \return             1  if interrupt is assigned to Non Secure
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t NVIC_GetTargetState(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return((uint32_t)(((NVIC->ITNS[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+/**
+  \brief   Set Interrupt Target State
+  \details Sets the interrupt target field in the NVIC and returns the interrupt target bit for the device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  if interrupt is assigned to Secure
+                      1  if interrupt is assigned to Non Secure
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t NVIC_SetTargetState(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC->ITNS[(((uint32_t)IRQn) >> 5UL)] |=  ((uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL)));
+    return((uint32_t)(((NVIC->ITNS[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+/**
+  \brief   Clear Interrupt Target State
+  \details Clears the interrupt target field in the NVIC and returns the interrupt target bit for the device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  if interrupt is assigned to Secure
+                      1  if interrupt is assigned to Non Secure
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t NVIC_ClearTargetState(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC->ITNS[(((uint32_t)IRQn) >> 5UL)] &= ~((uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL)));
+    return((uint32_t)(((NVIC->ITNS[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+#endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
+
+
+/**
+  \brief   Set Interrupt Priority
+  \details Sets the priority of a device specific interrupt or a processor exception.
+           The interrupt number can be positive to specify a device specific interrupt,
+           or negative to specify a processor exception.
+  \param [in]      IRQn  Interrupt number.
+  \param [in]  priority  Priority to set.
+  \note    The priority cannot be set for every processor exception.
+ */
+__STATIC_INLINE void __NVIC_SetPriority(IRQn_Type IRQn, uint32_t priority)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC->IPR[((uint32_t)IRQn)]               = (uint8_t)((priority << (8U - __NVIC_PRIO_BITS)) & (uint32_t)0xFFUL);
+  }
+  else
+  {
+    SCB->SHPR[(((uint32_t)IRQn) & 0xFUL)-4UL] = (uint8_t)((priority << (8U - __NVIC_PRIO_BITS)) & (uint32_t)0xFFUL);
+  }
+}
+
+
+/**
+  \brief   Get Interrupt Priority
+  \details Reads the priority of a device specific interrupt or a processor exception.
+           The interrupt number can be positive to specify a device specific interrupt,
+           or negative to specify a processor exception.
+  \param [in]   IRQn  Interrupt number.
+  \return             Interrupt Priority.
+                      Value is aligned automatically to the implemented priority bits of the microcontroller.
+ */
+__STATIC_INLINE uint32_t __NVIC_GetPriority(IRQn_Type IRQn)
+{
+
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return(((uint32_t)NVIC->IPR[((uint32_t)IRQn)]               >> (8U - __NVIC_PRIO_BITS)));
+  }
+  else
+  {
+    return(((uint32_t)SCB->SHPR[(((uint32_t)IRQn) & 0xFUL)-4UL] >> (8U - __NVIC_PRIO_BITS)));
+  }
+}
+
+
+/**
+  \brief   Encode Priority
+  \details Encodes the priority for an interrupt with the given priority group,
+           preemptive priority value, and subpriority value.
+           In case of a conflict between priority grouping and available
+           priority bits (__NVIC_PRIO_BITS), the smallest possible priority group is set.
+  \param [in]     PriorityGroup  Used priority group.
+  \param [in]   PreemptPriority  Preemptive priority value (starting from 0).
+  \param [in]       SubPriority  Subpriority value (starting from 0).
+  \return                        Encoded priority. Value can be used in the function \ref NVIC_SetPriority().
+ */
+__STATIC_INLINE uint32_t NVIC_EncodePriority (uint32_t PriorityGroup, uint32_t PreemptPriority, uint32_t SubPriority)
+{
+  uint32_t PriorityGroupTmp = (PriorityGroup & (uint32_t)0x07UL);   /* only values 0..7 are used          */
+  uint32_t PreemptPriorityBits;
+  uint32_t SubPriorityBits;
+
+  PreemptPriorityBits = ((7UL - PriorityGroupTmp) > (uint32_t)(__NVIC_PRIO_BITS)) ? (uint32_t)(__NVIC_PRIO_BITS) : (uint32_t)(7UL - PriorityGroupTmp);
+  SubPriorityBits     = ((PriorityGroupTmp + (uint32_t)(__NVIC_PRIO_BITS)) < (uint32_t)7UL) ? (uint32_t)0UL : (uint32_t)((PriorityGroupTmp - 7UL) + (uint32_t)(__NVIC_PRIO_BITS));
+
+  return (
+           ((PreemptPriority & (uint32_t)((1UL << (PreemptPriorityBits)) - 1UL)) << SubPriorityBits) |
+           ((SubPriority     & (uint32_t)((1UL << (SubPriorityBits    )) - 1UL)))
+         );
+}
+
+
+/**
+  \brief   Decode Priority
+  \details Decodes an interrupt priority value with a given priority group to
+           preemptive priority value and subpriority value.
+           In case of a conflict between priority grouping and available
+           priority bits (__NVIC_PRIO_BITS) the smallest possible priority group is set.
+  \param [in]         Priority   Priority value, which can be retrieved with the function \ref NVIC_GetPriority().
+  \param [in]     PriorityGroup  Used priority group.
+  \param [out] pPreemptPriority  Preemptive priority value (starting from 0).
+  \param [out]     pSubPriority  Subpriority value (starting from 0).
+ */
+__STATIC_INLINE void NVIC_DecodePriority (uint32_t Priority, uint32_t PriorityGroup, uint32_t* const pPreemptPriority, uint32_t* const pSubPriority)
+{
+  uint32_t PriorityGroupTmp = (PriorityGroup & (uint32_t)0x07UL);   /* only values 0..7 are used          */
+  uint32_t PreemptPriorityBits;
+  uint32_t SubPriorityBits;
+
+  PreemptPriorityBits = ((7UL - PriorityGroupTmp) > (uint32_t)(__NVIC_PRIO_BITS)) ? (uint32_t)(__NVIC_PRIO_BITS) : (uint32_t)(7UL - PriorityGroupTmp);
+  SubPriorityBits     = ((PriorityGroupTmp + (uint32_t)(__NVIC_PRIO_BITS)) < (uint32_t)7UL) ? (uint32_t)0UL : (uint32_t)((PriorityGroupTmp - 7UL) + (uint32_t)(__NVIC_PRIO_BITS));
+
+  *pPreemptPriority = (Priority >> SubPriorityBits) & (uint32_t)((1UL << (PreemptPriorityBits)) - 1UL);
+  *pSubPriority     = (Priority                   ) & (uint32_t)((1UL << (SubPriorityBits    )) - 1UL);
+}
+
+
+/**
+  \brief   Set Interrupt Vector
+  \details Sets an interrupt vector in SRAM based interrupt vector table.
+           The interrupt number can be positive to specify a device specific interrupt,
+           or negative to specify a processor exception.
+           VTOR must been relocated to SRAM before.
+  \param [in]   IRQn      Interrupt number
+  \param [in]   vector    Address of interrupt handler function
+ */
+__STATIC_INLINE void __NVIC_SetVector(IRQn_Type IRQn, uint32_t vector)
+{
+  uint32_t *vectors = (uint32_t *)SCB->VTOR;
+  vectors[(int32_t)IRQn + NVIC_USER_IRQ_OFFSET] = vector;
+  __DSB();
+}
+
+
+/**
+  \brief   Get Interrupt Vector
+  \details Reads an interrupt vector from interrupt vector table.
+           The interrupt number can be positive to specify a device specific interrupt,
+           or negative to specify a processor exception.
+  \param [in]   IRQn      Interrupt number.
+  \return                 Address of interrupt handler function
+ */
+__STATIC_INLINE uint32_t __NVIC_GetVector(IRQn_Type IRQn)
+{
+  uint32_t *vectors = (uint32_t *)SCB->VTOR;
+  return vectors[(int32_t)IRQn + NVIC_USER_IRQ_OFFSET];
+}
+
+
+/**
+  \brief   System Reset
+  \details Initiates a system reset request to reset the MCU.
+ */
+__NO_RETURN __STATIC_INLINE void __NVIC_SystemReset(void)
+{
+  __DSB();                                                          /* Ensure all outstanding memory accesses including
+                                                                       buffered write are completed before reset */
+  SCB->AIRCR  = (uint32_t)((0x5FAUL << SCB_AIRCR_VECTKEY_Pos)    |
+                           (SCB->AIRCR & SCB_AIRCR_PRIGROUP_Msk) |
+                            SCB_AIRCR_SYSRESETREQ_Msk    );         /* Keep priority group unchanged */
+  __DSB();                                                          /* Ensure completion of memory access */
+
+  for(;;)                                                           /* wait until reset */
+  {
+    __NOP();
+  }
+}
+
+/**
+  \brief   Software Reset
+  \details Initiates a system reset request to reset the CPU.
+ */
+__NO_RETURN __STATIC_INLINE void __SW_SystemReset(void)
+{
+  __DSB();                                                          /* Ensure all outstanding memory accesses including
+                                                                       buffered write are completed before reset */
+  SCB->AIRCR  = (uint32_t)((0x5FAUL << SCB_AIRCR_VECTKEY_Pos)    |
+                           (SCB->AIRCR & SCB_AIRCR_BFHFNMINS_Msk) | /* Keep BFHFNMINS unchanged. Use this Reset function in case your case need to keep it */
+                           (SCB->AIRCR & SCB_AIRCR_PRIGROUP_Msk) | /* Keep priority group unchanged */
+                            SCB_AIRCR_SYSRESETREQ_Msk    );         
+  __DSB();                                                          /* Ensure completion of memory access */
+
+  for(;;)                                                           /* wait until reset */
+  {
+    __NOP();
+  }
+}
+
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+/**
+  \brief   Set Priority Grouping (non-secure)
+  \details Sets the non-secure priority grouping field when in secure state using the required unlock sequence.
+           The parameter PriorityGroup is assigned to the field SCB->AIRCR [10:8] PRIGROUP field.
+           Only values from 0..7 are used.
+           In case of a conflict between priority grouping and available
+           priority bits (__NVIC_PRIO_BITS), the smallest possible priority group is set.
+  \param [in]      PriorityGroup  Priority grouping field.
+ */
+__STATIC_INLINE void TZ_NVIC_SetPriorityGrouping_NS(uint32_t PriorityGroup)
+{
+  uint32_t reg_value;
+  uint32_t PriorityGroupTmp = (PriorityGroup & (uint32_t)0x07UL);             /* only values 0..7 are used          */
+
+  reg_value  =  SCB_NS->AIRCR;                                                /* read old register configuration    */
+  reg_value &= ~((uint32_t)(SCB_AIRCR_VECTKEY_Msk | SCB_AIRCR_PRIGROUP_Msk)); /* clear bits to change               */
+  reg_value  =  (reg_value                                   |
+                ((uint32_t)0x5FAUL << SCB_AIRCR_VECTKEY_Pos) |
+                (PriorityGroupTmp << SCB_AIRCR_PRIGROUP_Pos)  );              /* Insert write key and priority group */
+  SCB_NS->AIRCR =  reg_value;
+}
+
+
+/**
+  \brief   Get Priority Grouping (non-secure)
+  \details Reads the priority grouping field from the non-secure NVIC when in secure state.
+  \return                Priority grouping field (SCB->AIRCR [10:8] PRIGROUP field).
+ */
+__STATIC_INLINE uint32_t TZ_NVIC_GetPriorityGrouping_NS(void)
+{
+  return ((uint32_t)((SCB_NS->AIRCR & SCB_AIRCR_PRIGROUP_Msk) >> SCB_AIRCR_PRIGROUP_Pos));
+}
+
+
+/**
+  \brief   Enable Interrupt (non-secure)
+  \details Enables a device specific interrupt in the non-secure NVIC interrupt controller when in secure state.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void TZ_NVIC_EnableIRQ_NS(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC_NS->ISER[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+  }
+}
+
+
+/**
+  \brief   Get Interrupt Enable status (non-secure)
+  \details Returns a device specific interrupt enable status from the non-secure NVIC interrupt controller when in secure state.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  Interrupt is not enabled.
+  \return             1  Interrupt is enabled.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t TZ_NVIC_GetEnableIRQ_NS(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return((uint32_t)(((NVIC_NS->ISER[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+/**
+  \brief   Disable Interrupt (non-secure)
+  \details Disables a device specific interrupt in the non-secure NVIC interrupt controller when in secure state.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void TZ_NVIC_DisableIRQ_NS(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC_NS->ICER[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+  }
+}
+
+
+/**
+  \brief   Get Pending Interrupt (non-secure)
+  \details Reads the NVIC pending register in the non-secure NVIC when in secure state and returns the pending bit for the specified device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  Interrupt status is not pending.
+  \return             1  Interrupt status is pending.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t TZ_NVIC_GetPendingIRQ_NS(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return((uint32_t)(((NVIC_NS->ISPR[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+/**
+  \brief   Set Pending Interrupt (non-secure)
+  \details Sets the pending bit of a device specific interrupt in the non-secure NVIC pending register when in secure state.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void TZ_NVIC_SetPendingIRQ_NS(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC_NS->ISPR[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+  }
+}
+
+
+/**
+  \brief   Clear Pending Interrupt (non-secure)
+  \details Clears the pending bit of a device specific interrupt in the non-secure NVIC pending register when in secure state.
+  \param [in]      IRQn  Device specific interrupt number.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE void TZ_NVIC_ClearPendingIRQ_NS(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC_NS->ICPR[(((uint32_t)IRQn) >> 5UL)] = (uint32_t)(1UL << (((uint32_t)IRQn) & 0x1FUL));
+  }
+}
+
+
+/**
+  \brief   Get Active Interrupt (non-secure)
+  \details Reads the active register in non-secure NVIC when in secure state and returns the active bit for the device specific interrupt.
+  \param [in]      IRQn  Device specific interrupt number.
+  \return             0  Interrupt status is not active.
+  \return             1  Interrupt status is active.
+  \note    IRQn must not be negative.
+ */
+__STATIC_INLINE uint32_t TZ_NVIC_GetActive_NS(IRQn_Type IRQn)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return((uint32_t)(((NVIC_NS->IABR[(((uint32_t)IRQn) >> 5UL)] & (1UL << (((uint32_t)IRQn) & 0x1FUL))) != 0UL) ? 1UL : 0UL));
+  }
+  else
+  {
+    return(0U);
+  }
+}
+
+
+/**
+  \brief   Set Interrupt Priority (non-secure)
+  \details Sets the priority of a non-secure device specific interrupt or a non-secure processor exception when in secure state.
+           The interrupt number can be positive to specify a device specific interrupt,
+           or negative to specify a processor exception.
+  \param [in]      IRQn  Interrupt number.
+  \param [in]  priority  Priority to set.
+  \note    The priority cannot be set for every non-secure processor exception.
+ */
+__STATIC_INLINE void TZ_NVIC_SetPriority_NS(IRQn_Type IRQn, uint32_t priority)
+{
+  if ((int32_t)(IRQn) >= 0)
+  {
+    NVIC_NS->IPR[((uint32_t)IRQn)]               = (uint8_t)((priority << (8U - __NVIC_PRIO_BITS)) & (uint32_t)0xFFUL);
+  }
+  else
+  {
+    SCB_NS->SHPR[(((uint32_t)IRQn) & 0xFUL)-4UL] = (uint8_t)((priority << (8U - __NVIC_PRIO_BITS)) & (uint32_t)0xFFUL);
+  }
+}
+
+
+/**
+  \brief   Get Interrupt Priority (non-secure)
+  \details Reads the priority of a non-secure device specific interrupt or a non-secure processor exception when in secure state.
+           The interrupt number can be positive to specify a device specific interrupt,
+           or negative to specify a processor exception.
+  \param [in]   IRQn  Interrupt number.
+  \return             Interrupt Priority. Value is aligned automatically to the implemented priority bits of the microcontroller.
+ */
+__STATIC_INLINE uint32_t TZ_NVIC_GetPriority_NS(IRQn_Type IRQn)
+{
+
+  if ((int32_t)(IRQn) >= 0)
+  {
+    return(((uint32_t)NVIC_NS->IPR[((uint32_t)IRQn)]               >> (8U - __NVIC_PRIO_BITS)));
+  }
+  else
+  {
+    return(((uint32_t)SCB_NS->SHPR[(((uint32_t)IRQn) & 0xFUL)-4UL] >> (8U - __NVIC_PRIO_BITS)));
+  }
+}
+#endif /*  defined (__ARM_FEATURE_CMSE) &&(__ARM_FEATURE_CMSE == 3U) */
+
+/*@} end of CMSIS_Core_NVICFunctions */
+
+/* ##########################  MPU functions  #################################### */
+
+#if defined (__MPU_PRESENT) && (__MPU_PRESENT == 1U)
+
+#include "mpu_armv8.h"
+
+#endif
+
+/* ##########################  FPU functions  #################################### */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_FpuFunctions FPU Functions
+  \brief    Function that provides FPU type.
+  @{
+ */
+
+/**
+  \brief   get FPU type
+  \details returns the FPU type
+  \returns
+   - \b  0: No FPU
+   - \b  1: Single precision FPU
+   - \b  2: Double + Single precision FPU
+ */
+__STATIC_INLINE uint32_t SCB_GetFPUType(void)
+{
+  uint32_t mvfr0;
+
+  mvfr0 = FPU->MVFR0;
+  if      ((mvfr0 & (FPU_MVFR0_Single_precision_Msk | FPU_MVFR0_Double_precision_Msk)) == 0x220U)
+  {
+    return 2U;           /* Double + Single precision FPU */
+  }
+  else if ((mvfr0 & (FPU_MVFR0_Single_precision_Msk | FPU_MVFR0_Double_precision_Msk)) == 0x020U)
+  {
+    return 1U;           /* Single precision FPU */
+  }
+  else
+  {
+    return 0U;           /* No FPU */
+  }
+}
+
+
+/*@} end of CMSIS_Core_FpuFunctions */
+
+
+
+/* ##########################   SAU functions  #################################### */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_SAUFunctions SAU Functions
+  \brief    Functions that configure the SAU.
+  @{
+ */
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+
+/**
+  \brief   Enable SAU
+  \details Enables the Security Attribution Unit (SAU).
+ */
+__STATIC_INLINE void TZ_SAU_Enable(void)
+{
+    SAU->CTRL |=  (SAU_CTRL_ENABLE_Msk);
+}
+
+
+
+/**
+  \brief   Disable SAU
+  \details Disables the Security Attribution Unit (SAU).
+ */
+__STATIC_INLINE void TZ_SAU_Disable(void)
+{
+    SAU->CTRL &= ~(SAU_CTRL_ENABLE_Msk);
+}
+
+#endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
+
+/*@} end of CMSIS_Core_SAUFunctions */
+
+
+
+/* ##################################    Debug Control function  ############################################ */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_DCBFunctions Debug Control Functions
+  \brief    Functions that access the Debug Control Block.
+  @{
+ */
+
+
+/**
+  \brief   Set Debug Authentication Control Register
+  \details writes to Debug Authentication Control register.
+  \param [in]  value  value to be writen.
+ */
+__STATIC_INLINE void DCB_SetAuthCtrl(uint32_t value)
+{
+    __DSB();
+    __ISB();
+    DCB->DAUTHCTRL = value;
+    __DSB();
+    __ISB();
+}
+
+
+/**
+  \brief   Get Debug Authentication Control Register
+  \details Reads Debug Authentication Control register.
+  \return             Debug Authentication Control Register.
+ */
+__STATIC_INLINE uint32_t DCB_GetAuthCtrl(void)
+{
+    return (DCB->DAUTHCTRL);
+}
+
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+/**
+  \brief   Set Debug Authentication Control Register (non-secure)
+  \details writes to non-secure Debug Authentication Control register when in secure state.
+  \param [in]  value  value to be writen
+ */
+__STATIC_INLINE void TZ_DCB_SetAuthCtrl_NS(uint32_t value)
+{
+    __DSB();
+    __ISB();
+    DCB_NS->DAUTHCTRL = value;
+    __DSB();
+    __ISB();
+}
+
+
+/**
+  \brief   Get Debug Authentication Control Register (non-secure)
+  \details Reads non-secure Debug Authentication Control register when in secure state.
+  \return             Debug Authentication Control Register.
+ */
+__STATIC_INLINE uint32_t TZ_DCB_GetAuthCtrl_NS(void)
+{
+    return (DCB_NS->DAUTHCTRL);
+}
+#endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
+
+/*@} end of CMSIS_Core_DCBFunctions */
+
+
+
+
+/* ##################################    Debug Identification function  ############################################ */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_DIBFunctions Debug Identification Functions
+  \brief    Functions that access the Debug Identification Block.
+  @{
+ */
+
+
+/**
+  \brief   Get Debug Authentication Status Register
+  \details Reads Debug Authentication Status register.
+  \return             Debug Authentication Status Register.
+ */
+__STATIC_INLINE uint32_t DIB_GetAuthStatus(void)
+{
+    return (DIB->DAUTHSTATUS);
+}
+
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+/**
+  \brief   Get Debug Authentication Status Register (non-secure)
+  \details Reads non-secure Debug Authentication Status register when in secure state.
+  \return             Debug Authentication Status Register.
+ */
+__STATIC_INLINE uint32_t TZ_DIB_GetAuthStatus_NS(void)
+{
+    return (DIB_NS->DAUTHSTATUS);
+}
+#endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
+
+/*@} end of CMSIS_Core_DCBFunctions */
+
+
+#if ((defined (__ICACHE_PRESENT) && (__ICACHE_PRESENT == 1U)) || \
+     (defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)))
+
+/* ##########################  Cache functions  #################################### */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_CacheFunctions Cache Functions
+  \brief    Functions that configure Instruction and Data cache.
+  @{
+ */
+
+/* Cache Size ID Register Macros */
+#define CCSIDR_WAYS(x)         (((x) & SCB_CCSIDR_ASSOCIATIVITY_Msk) >> SCB_CCSIDR_ASSOCIATIVITY_Pos)
+#define CCSIDR_SETS(x)         (((x) & SCB_CCSIDR_NUMSETS_Msk      ) >> SCB_CCSIDR_NUMSETS_Pos      )
+
+#define __SCB_DCACHE_LINE_SIZE  32U /*!< STAR-MC1 cache line size is fixed to 32 bytes (8 words). See also register SCB_CCSIDR */
+#define __SCB_ICACHE_LINE_SIZE  32U /*!< STAR-MC1 cache line size is fixed to 32 bytes (8 words). See also register SCB_CCSIDR */
+
+/**
+  \brief   Enable I-Cache
+  \details Turns on I-Cache
+  */
+__STATIC_FORCEINLINE void SCB_EnableICache (void)
+{
+  #if defined (__ICACHE_PRESENT) && (__ICACHE_PRESENT == 1U)
+    if (SCB->CCR & SCB_CCR_IC_Msk) return;  /* return if ICache is already enabled */
+
+    __DSB();
+    __ISB();
+    SCB->ICIALLU = 0UL;                     /* invalidate I-Cache */
+    __DSB();
+    __ISB();
+    SCB->CCR |=  (uint32_t)SCB_CCR_IC_Msk;  /* enable I-Cache */
+    __DSB();
+    __ISB();
+  #endif
+}
+
+
+/**
+  \brief   Disable I-Cache
+  \details Turns off I-Cache
+  */
+__STATIC_FORCEINLINE void SCB_DisableICache (void)
+{
+  #if defined (__ICACHE_PRESENT) && (__ICACHE_PRESENT == 1U)
+    __DSB();
+    __ISB();
+    SCB->CCR &= ~(uint32_t)SCB_CCR_IC_Msk;  /* disable I-Cache */
+    SCB->ICIALLU = 0UL;                     /* invalidate I-Cache */
+    __DSB();
+    __ISB();
+  #endif
+}
+
+
+/**
+  \brief   Invalidate I-Cache
+  \details Invalidates I-Cache
+  */
+__STATIC_FORCEINLINE void SCB_InvalidateICache (void)
+{
+  #if defined (__ICACHE_PRESENT) && (__ICACHE_PRESENT == 1U)
+    __DSB();
+    __ISB();
+    SCB->ICIALLU = 0UL;
+    __DSB();
+    __ISB();
+  #endif
+}
+
+
+/**
+  \brief   I-Cache Invalidate by address
+  \details Invalidates I-Cache for the given address.
+           I-Cache is invalidated starting from a 32 byte aligned address in 32 byte granularity.
+           I-Cache memory blocks which are part of given address + given size are invalidated.
+  \param[in]   addr    address
+  \param[in]   isize   size of memory block (in number of bytes)
+*/
+__STATIC_FORCEINLINE void SCB_InvalidateICache_by_Addr (void *addr, int32_t isize)
+{
+  #if defined (__ICACHE_PRESENT) && (__ICACHE_PRESENT == 1U)
+    if ( isize > 0 ) {
+       int32_t op_size = isize + (((uint32_t)addr) & (__SCB_ICACHE_LINE_SIZE - 1U));
+      uint32_t op_addr = (uint32_t)addr /* & ~(__SCB_ICACHE_LINE_SIZE - 1U) */;
+
+      __DSB();
+
+      do {
+        SCB->ICIMVAU = op_addr;             /* register accepts only 32byte aligned values, only bits 31..5 are valid */
+        op_addr += __SCB_ICACHE_LINE_SIZE;
+        op_size -= __SCB_ICACHE_LINE_SIZE;
+      } while ( op_size > 0 );
+
+      __DSB();
+      __ISB();
+    }
+  #endif
+}
+
+
+/**
+  \brief   Enable D-Cache
+  \details Turns on D-Cache
+  */
+__STATIC_FORCEINLINE void SCB_EnableDCache (void)
+{
+  #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
+    uint32_t ccsidr;
+    uint32_t sets;
+    uint32_t ways;
+
+    if (SCB->CCR & SCB_CCR_DC_Msk) return;  /* return if DCache is already enabled */
+
+    SCB->CSSELR = 0U;                       /* select Level 1 data cache */
+    __DSB();
+
+    ccsidr = SCB->CCSIDR;
+
+                                            /* invalidate D-Cache */
+    sets = (uint32_t)(CCSIDR_SETS(ccsidr));
+    do {
+      ways = (uint32_t)(CCSIDR_WAYS(ccsidr));
+      do {
+        SCB->DCISW = (((sets << SCB_DCISW_SET_Pos) & SCB_DCISW_SET_Msk) |
+                      ((ways << SCB_DCISW_WAY_Pos) & SCB_DCISW_WAY_Msk)  );
+        #if defined ( __CC_ARM )
+          __schedule_barrier();
+        #endif
+      } while (ways-- != 0U);
+    } while(sets-- != 0U);
+    __DSB();
+
+    SCB->CCR |=  (uint32_t)SCB_CCR_DC_Msk;  /* enable D-Cache */
+
+    __DSB();
+    __ISB();
+  #endif
+}
+
+
+/**
+  \brief   Disable D-Cache
+  \details Turns off D-Cache
+  */
+__STATIC_FORCEINLINE void SCB_DisableDCache (void)
+{
+  #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
+    uint32_t ccsidr;
+    uint32_t sets;
+    uint32_t ways;
+
+    SCB->CSSELR = 0U;                       /* select Level 1 data cache */
+    __DSB();
+
+    SCB->CCR &= ~(uint32_t)SCB_CCR_DC_Msk;  /* disable D-Cache */
+    __DSB();
+
+    ccsidr = SCB->CCSIDR;
+
+                                            /* clean & invalidate D-Cache */
+    sets = (uint32_t)(CCSIDR_SETS(ccsidr));
+    do {
+      ways = (uint32_t)(CCSIDR_WAYS(ccsidr));
+      do {
+        SCB->DCCISW = (((sets << SCB_DCCISW_SET_Pos) & SCB_DCCISW_SET_Msk) |
+                       ((ways << SCB_DCCISW_WAY_Pos) & SCB_DCCISW_WAY_Msk)  );
+        #if defined ( __CC_ARM )
+          __schedule_barrier();
+        #endif
+      } while (ways-- != 0U);
+    } while(sets-- != 0U);
+
+    __DSB();
+    __ISB();
+  #endif
+}
+
+
+/**
+  \brief   Invalidate D-Cache
+  \details Invalidates D-Cache
+  */
+__STATIC_FORCEINLINE void SCB_InvalidateDCache (void)
+{
+  #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
+    uint32_t ccsidr;
+    uint32_t sets;
+    uint32_t ways;
+
+    SCB->CSSELR = 0U;                       /* select Level 1 data cache */
+    __DSB();
+
+    ccsidr = SCB->CCSIDR;
+
+                                            /* invalidate D-Cache */
+    sets = (uint32_t)(CCSIDR_SETS(ccsidr));
+    do {
+      ways = (uint32_t)(CCSIDR_WAYS(ccsidr));
+      do {
+        SCB->DCISW = (((sets << SCB_DCISW_SET_Pos) & SCB_DCISW_SET_Msk) |
+                      ((ways << SCB_DCISW_WAY_Pos) & SCB_DCISW_WAY_Msk)  );
+        #if defined ( __CC_ARM )
+          __schedule_barrier();
+        #endif
+      } while (ways-- != 0U);
+    } while(sets-- != 0U);
+
+    __DSB();
+    __ISB();
+  #endif
+}
+
+
+/**
+  \brief   Clean D-Cache
+  \details Cleans D-Cache
+  */
+__STATIC_FORCEINLINE void SCB_CleanDCache (void)
+{
+  #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
+    uint32_t ccsidr;
+    uint32_t sets;
+    uint32_t ways;
+
+    SCB->CSSELR = 0U;                       /* select Level 1 data cache */
+    __DSB();
+
+    ccsidr = SCB->CCSIDR;
+
+                                            /* clean D-Cache */
+    sets = (uint32_t)(CCSIDR_SETS(ccsidr));
+    do {
+      ways = (uint32_t)(CCSIDR_WAYS(ccsidr));
+      do {
+        SCB->DCCSW = (((sets << SCB_DCCSW_SET_Pos) & SCB_DCCSW_SET_Msk) |
+                      ((ways << SCB_DCCSW_WAY_Pos) & SCB_DCCSW_WAY_Msk)  );
+        #if defined ( __CC_ARM )
+          __schedule_barrier();
+        #endif
+      } while (ways-- != 0U);
+    } while(sets-- != 0U);
+
+    __DSB();
+    __ISB();
+  #endif
+}
+
+
+/**
+  \brief   Clean & Invalidate D-Cache
+  \details Cleans and Invalidates D-Cache
+  */
+__STATIC_FORCEINLINE void SCB_CleanInvalidateDCache (void)
+{
+  #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
+    uint32_t ccsidr;
+    uint32_t sets;
+    uint32_t ways;
+
+    SCB->CSSELR = 0U;                       /* select Level 1 data cache */
+    __DSB();
+
+    ccsidr = SCB->CCSIDR;
+
+                                            /* clean & invalidate D-Cache */
+    sets = (uint32_t)(CCSIDR_SETS(ccsidr));
+    do {
+      ways = (uint32_t)(CCSIDR_WAYS(ccsidr));
+      do {
+        SCB->DCCISW = (((sets << SCB_DCCISW_SET_Pos) & SCB_DCCISW_SET_Msk) |
+                       ((ways << SCB_DCCISW_WAY_Pos) & SCB_DCCISW_WAY_Msk)  );
+        #if defined ( __CC_ARM )
+          __schedule_barrier();
+        #endif
+      } while (ways-- != 0U);
+    } while(sets-- != 0U);
+
+    __DSB();
+    __ISB();
+  #endif
+}
+
+
+/**
+  \brief   D-Cache Invalidate by address
+  \details Invalidates D-Cache for the given address.
+           D-Cache is invalidated starting from a 32 byte aligned address in 32 byte granularity.
+           D-Cache memory blocks which are part of given address + given size are invalidated.
+  \param[in]   addr    address
+  \param[in]   dsize   size of memory block (in number of bytes)
+*/
+__STATIC_FORCEINLINE void SCB_InvalidateDCache_by_Addr (void *addr, int32_t dsize)
+{
+  #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
+    if ( dsize > 0 ) { 
+       int32_t op_size = dsize + (((uint32_t)addr) & (__SCB_DCACHE_LINE_SIZE - 1U));
+      uint32_t op_addr = (uint32_t)addr /* & ~(__SCB_DCACHE_LINE_SIZE - 1U) */;
+    
+      __DSB();
+
+      do {
+        SCB->DCIMVAC = op_addr;             /* register accepts only 32byte aligned values, only bits 31..5 are valid */
+        op_addr += __SCB_DCACHE_LINE_SIZE;
+        op_size -= __SCB_DCACHE_LINE_SIZE;
+      } while ( op_size > 0 );
+
+      __DSB();
+      __ISB();
+    }
+  #endif
+}
+
+
+/**
+  \brief   D-Cache Clean by address
+  \details Cleans D-Cache for the given address
+           D-Cache is cleaned starting from a 32 byte aligned address in 32 byte granularity.
+           D-Cache memory blocks which are part of given address + given size are cleaned.
+  \param[in]   addr    address
+  \param[in]   dsize   size of memory block (in number of bytes)
+*/
+__STATIC_FORCEINLINE void SCB_CleanDCache_by_Addr (uint32_t *addr, int32_t dsize)
+{
+  #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
+    if ( dsize > 0 ) { 
+       int32_t op_size = dsize + (((uint32_t)addr) & (__SCB_DCACHE_LINE_SIZE - 1U));
+      uint32_t op_addr = (uint32_t)addr /* & ~(__SCB_DCACHE_LINE_SIZE - 1U) */;
+    
+      __DSB();
+
+      do {
+        SCB->DCCMVAC = op_addr;             /* register accepts only 32byte aligned values, only bits 31..5 are valid */
+        op_addr += __SCB_DCACHE_LINE_SIZE;
+        op_size -= __SCB_DCACHE_LINE_SIZE;
+      } while ( op_size > 0 );
+
+      __DSB();
+      __ISB();
+    }
+  #endif
+}
+
+
+/**
+  \brief   D-Cache Clean and Invalidate by address
+  \details Cleans and invalidates D_Cache for the given address
+           D-Cache is cleaned and invalidated starting from a 32 byte aligned address in 32 byte granularity.
+           D-Cache memory blocks which are part of given address + given size are cleaned and invalidated.
+  \param[in]   addr    address (aligned to 32-byte boundary)
+  \param[in]   dsize   size of memory block (in number of bytes)
+*/
+__STATIC_FORCEINLINE void SCB_CleanInvalidateDCache_by_Addr (uint32_t *addr, int32_t dsize)
+{
+  #if defined (__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
+    if ( dsize > 0 ) { 
+       int32_t op_size = dsize + (((uint32_t)addr) & (__SCB_DCACHE_LINE_SIZE - 1U));
+      uint32_t op_addr = (uint32_t)addr /* & ~(__SCB_DCACHE_LINE_SIZE - 1U) */;
+    
+      __DSB();
+
+      do {
+        SCB->DCCIMVAC = op_addr;            /* register accepts only 32byte aligned values, only bits 31..5 are valid */
+        op_addr +=          __SCB_DCACHE_LINE_SIZE;
+        op_size -=          __SCB_DCACHE_LINE_SIZE;
+      } while ( op_size > 0 );
+
+      __DSB();
+      __ISB();
+    }
+  #endif
+}
+
+/*@} end of CMSIS_Core_CacheFunctions */
+#endif
+
+
+/* ##################################    SysTick function  ############################################ */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_SysTickFunctions SysTick Functions
+  \brief    Functions that configure the System.
+  @{
+ */
+
+#if defined (__Vendor_SysTickConfig) && (__Vendor_SysTickConfig == 0U)
+
+/**
+  \brief   System Tick Configuration
+  \details Initializes the System Timer and its interrupt, and starts the System Tick Timer.
+           Counter is in free running mode to generate periodic interrupts.
+  \param [in]  ticks  Number of ticks between two interrupts.
+  \return          0  Function succeeded.
+  \return          1  Function failed.
+  \note    When the variable <b>__Vendor_SysTickConfig</b> is set to 1, then the
+           function <b>SysTick_Config</b> is not included. In this case, the file <b><i>device</i>.h</b>
+           must contain a vendor-specific implementation of this function.
+ */
+__STATIC_INLINE uint32_t SysTick_Config(uint32_t ticks)
+{
+  if ((ticks - 1UL) > SysTick_LOAD_RELOAD_Msk)
+  {
+    return (1UL);                                                   /* Reload value impossible */
+  }
+
+  SysTick->LOAD  = (uint32_t)(ticks - 1UL);                         /* set reload register */
+  NVIC_SetPriority (SysTick_IRQn, (1UL << __NVIC_PRIO_BITS) - 1UL); /* set Priority for Systick Interrupt */
+  SysTick->VAL   = 0UL;                                             /* Load the SysTick Counter Value */
+  SysTick->CTRL  = SysTick_CTRL_CLKSOURCE_Msk |
+                   SysTick_CTRL_TICKINT_Msk   |
+                   SysTick_CTRL_ENABLE_Msk;                         /* Enable SysTick IRQ and SysTick Timer */
+  return (0UL);                                                     /* Function successful */
+}
+
+#if defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U)
+/**
+  \brief   System Tick Configuration (non-secure)
+  \details Initializes the non-secure System Timer and its interrupt when in secure state, and starts the System Tick Timer.
+           Counter is in free running mode to generate periodic interrupts.
+  \param [in]  ticks  Number of ticks between two interrupts.
+  \return          0  Function succeeded.
+  \return          1  Function failed.
+  \note    When the variable <b>__Vendor_SysTickConfig</b> is set to 1, then the
+           function <b>TZ_SysTick_Config_NS</b> is not included. In this case, the file <b><i>device</i>.h</b>
+           must contain a vendor-specific implementation of this function.
+
+ */
+__STATIC_INLINE uint32_t TZ_SysTick_Config_NS(uint32_t ticks)
+{
+  if ((ticks - 1UL) > SysTick_LOAD_RELOAD_Msk)
+  {
+    return (1UL);                                                         /* Reload value impossible */
+  }
+
+  SysTick_NS->LOAD  = (uint32_t)(ticks - 1UL);                            /* set reload register */
+  TZ_NVIC_SetPriority_NS (SysTick_IRQn, (1UL << __NVIC_PRIO_BITS) - 1UL); /* set Priority for Systick Interrupt */
+  SysTick_NS->VAL   = 0UL;                                                /* Load the SysTick Counter Value */
+  SysTick_NS->CTRL  = SysTick_CTRL_CLKSOURCE_Msk |
+                      SysTick_CTRL_TICKINT_Msk   |
+                      SysTick_CTRL_ENABLE_Msk;                            /* Enable SysTick IRQ and SysTick Timer */
+  return (0UL);                                                           /* Function successful */
+}
+#endif /* defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3U) */
+
+#endif
+
+/*@} end of CMSIS_Core_SysTickFunctions */
+
+
+
+/* ##################################### Debug In/Output function ########################################### */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_core_DebugFunctions ITM Functions
+  \brief    Functions that access the ITM debug interface.
+  @{
+ */
+
+extern volatile int32_t ITM_RxBuffer;                              /*!< External variable to receive characters. */
+#define                 ITM_RXBUFFER_EMPTY  ((int32_t)0x5AA55AA5U) /*!< Value identifying \ref ITM_RxBuffer is ready for next character. */
+
+
+/**
+  \brief   ITM Send Character
+  \details Transmits a character via the ITM channel 0, and
+           \li Just returns when no debugger is connected that has booked the output.
+           \li Is blocking when a debugger is connected, but the previous character sent has not been transmitted.
+  \param [in]     ch  Character to transmit.
+  \returns            Character to transmit.
+ */
+__STATIC_INLINE uint32_t ITM_SendChar (uint32_t ch)
+{
+  if (((ITM->TCR & ITM_TCR_ITMENA_Msk) != 0UL) &&      /* ITM enabled */
+      ((ITM->TER & 1UL               ) != 0UL)   )     /* ITM Port #0 enabled */
+  {
+    while (ITM->PORT[0U].u32 == 0UL)
+    {
+      __NOP();
+    }
+    ITM->PORT[0U].u8 = (uint8_t)ch;
+  }
+  return (ch);
+}
+
+
+/**
+  \brief   ITM Receive Character
+  \details Inputs a character via the external variable \ref ITM_RxBuffer.
+  \return             Received character.
+  \return         -1  No character pending.
+ */
+__STATIC_INLINE int32_t ITM_ReceiveChar (void)
+{
+  int32_t ch = -1;                           /* no character available */
+
+  if (ITM_RxBuffer != ITM_RXBUFFER_EMPTY)
+  {
+    ch = ITM_RxBuffer;
+    ITM_RxBuffer = ITM_RXBUFFER_EMPTY;       /* ready for next character */
+  }
+
+  return (ch);
+}
+
+
+/**
+  \brief   ITM Check Character
+  \details Checks whether a character is pending for reading in the variable \ref ITM_RxBuffer.
+  \return          0  No character available.
+  \return          1  Character available.
+ */
+__STATIC_INLINE int32_t ITM_CheckChar (void)
+{
+
+  if (ITM_RxBuffer == ITM_RXBUFFER_EMPTY)
+  {
+    return (0);                              /* no character available */
+  }
+  else
+  {
+    return (1);                              /*    character available */
+  }
+}
+
+/*@} end of CMSIS_core_DebugFunctions */
+
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __CORE_STAR_H_DEPENDANT */
+
+#endif /* __CMSIS_GENERIC */
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/mpu_armv8.h b/edge-impulse-sdk/CMSIS/Core/Include/mpu_armv8.h
index d4c6f7b..cb04a57 100644
--- a/edge-impulse-sdk/CMSIS/Core/Include/mpu_armv8.h
+++ b/edge-impulse-sdk/CMSIS/Core/Include/mpu_armv8.h
@@ -1,11 +1,11 @@
 /******************************************************************************
  * @file     mpu_armv8.h
  * @brief    CMSIS MPU API for Armv8-M and Armv8.1-M MPU
- * @version  V5.1.2
- * @date     10. February 2020
+ * @version  V5.1.4
+ * @date     30. May 2022
  ******************************************************************************/
 /*
- * Copyright (c) 2017-2020 Arm Limited. All rights reserved.
+ * Copyright (c) 2017-2022 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -84,7 +84,7 @@
 * \param SH Defines the Shareability domain for this memory region.
 * \param RO Read-Only: Set to 1 for a read-only memory region.
 * \param NP Non-Privileged: Set to 1 for a non-privileged memory region.
-* \oaram XN eXecute Never: Set to 1 for a non-executable memory region.
+* \param XN eXecute Never: Set to 1 for a non-executable memory region.
 */
 #define ARM_MPU_RBAR(BASE, SH, RO, NP, XN) \
   (((BASE) & MPU_RBAR_BASE_Msk) | \
diff --git a/edge-impulse-sdk/CMSIS/Core/Include/pac_armv81.h b/edge-impulse-sdk/CMSIS/Core/Include/pac_armv81.h
new file mode 100644
index 0000000..854b60a
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/Core/Include/pac_armv81.h
@@ -0,0 +1,206 @@
+/******************************************************************************
+ * @file     pac_armv81.h
+ * @brief    CMSIS PAC key functions for Armv8.1-M PAC extension
+ * @version  V1.0.0
+ * @date     23. March 2022
+ ******************************************************************************/
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if   defined ( __ICCARM__ )
+  #pragma system_include         /* treat file as system include file for MISRA check */
+#elif defined (__clang__)
+  #pragma clang system_header    /* treat file as system include file */
+#endif
+
+#ifndef PAC_ARMV81_H
+#define PAC_ARMV81_H
+
+
+/* ###################  PAC Key functions  ########################### */
+/**
+  \ingroup  CMSIS_Core_FunctionInterface
+  \defgroup CMSIS_Core_PacKeyFunctions PAC Key functions
+  \brief    Functions that access the PAC keys.
+  @{
+ */
+
+#if (defined (__ARM_FEATURE_PAUTH) && (__ARM_FEATURE_PAUTH == 1))
+
+/**
+  \brief   read the PAC key used for privileged mode
+  \details Reads the PAC key stored in the PAC_KEY_P registers.
+  \param [out]    pPacKey  128bit PAC key
+ */
+__STATIC_FORCEINLINE void __get_PAC_KEY_P (uint32_t* pPacKey) {
+  __ASM volatile (
+  "mrs   r1, pac_key_p_0\n"
+  "str   r1,[%0,#0]\n"
+  "mrs   r1, pac_key_p_1\n"
+  "str   r1,[%0,#4]\n"
+  "mrs   r1, pac_key_p_2\n"
+  "str   r1,[%0,#8]\n"
+  "mrs   r1, pac_key_p_3\n"
+  "str   r1,[%0,#12]\n"
+  : : "r" (pPacKey) : "memory", "r1"
+  );
+}
+
+/**
+  \brief   write the PAC key used for privileged mode
+  \details writes the given PAC key to the PAC_KEY_P registers.
+  \param [in]    pPacKey  128bit PAC key
+ */
+__STATIC_FORCEINLINE void __set_PAC_KEY_P (uint32_t* pPacKey) {
+  __ASM volatile (
+  "ldr   r1,[%0,#0]\n"
+  "msr   pac_key_p_0, r1\n"
+  "ldr   r1,[%0,#4]\n"
+  "msr   pac_key_p_1, r1\n"
+  "ldr   r1,[%0,#8]\n"
+  "msr   pac_key_p_2, r1\n"
+  "ldr   r1,[%0,#12]\n"
+  "msr   pac_key_p_3, r1\n"
+  : : "r" (pPacKey) : "memory", "r1"
+  );
+}
+
+/**
+  \brief   read the PAC key used for unprivileged mode
+  \details Reads the PAC key stored in the PAC_KEY_U registers.
+  \param [out]    pPacKey  128bit PAC key
+ */
+__STATIC_FORCEINLINE void __get_PAC_KEY_U (uint32_t* pPacKey) {
+  __ASM volatile (
+  "mrs   r1, pac_key_u_0\n"
+  "str   r1,[%0,#0]\n"
+  "mrs   r1, pac_key_u_1\n"
+  "str   r1,[%0,#4]\n"
+  "mrs   r1, pac_key_u_2\n"
+  "str   r1,[%0,#8]\n"
+  "mrs   r1, pac_key_u_3\n"
+  "str   r1,[%0,#12]\n"
+  : : "r" (pPacKey) : "memory", "r1"
+  );
+}
+
+/**
+  \brief   write the PAC key used for unprivileged mode
+  \details writes the given PAC key to the PAC_KEY_U registers.
+  \param [in]    pPacKey  128bit PAC key
+ */
+__STATIC_FORCEINLINE void __set_PAC_KEY_U (uint32_t* pPacKey) {
+  __ASM volatile (
+  "ldr   r1,[%0,#0]\n"
+  "msr   pac_key_u_0, r1\n"
+  "ldr   r1,[%0,#4]\n"
+  "msr   pac_key_u_1, r1\n"
+  "ldr   r1,[%0,#8]\n"
+  "msr   pac_key_u_2, r1\n"
+  "ldr   r1,[%0,#12]\n"
+  "msr   pac_key_u_3, r1\n"
+  : : "r" (pPacKey) : "memory", "r1"
+  );
+}
+
+#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3))
+
+/**
+  \brief   read the PAC key used for privileged mode (non-secure)
+  \details Reads the PAC key stored in the non-secure PAC_KEY_P registers when in secure mode.
+  \param [out]    pPacKey  128bit PAC key
+ */
+__STATIC_FORCEINLINE void __TZ_get_PAC_KEY_P_NS (uint32_t* pPacKey) {
+  __ASM volatile (
+  "mrs   r1, pac_key_p_0_ns\n"
+  "str   r1,[%0,#0]\n"
+  "mrs   r1, pac_key_p_1_ns\n"
+  "str   r1,[%0,#4]\n"
+  "mrs   r1, pac_key_p_2_ns\n"
+  "str   r1,[%0,#8]\n"
+  "mrs   r1, pac_key_p_3_ns\n"
+  "str   r1,[%0,#12]\n"
+  : : "r" (pPacKey) : "memory", "r1"
+  );
+}
+
+/**
+  \brief   write the PAC key used for privileged mode (non-secure)
+  \details writes the given PAC key to the non-secure PAC_KEY_P registers when in secure mode.
+  \param [in]    pPacKey  128bit PAC key
+ */
+__STATIC_FORCEINLINE void __TZ_set_PAC_KEY_P_NS (uint32_t* pPacKey) {
+  __ASM volatile (
+  "ldr   r1,[%0,#0]\n"
+  "msr   pac_key_p_0_ns, r1\n"
+  "ldr   r1,[%0,#4]\n"
+  "msr   pac_key_p_1_ns, r1\n"
+  "ldr   r1,[%0,#8]\n"
+  "msr   pac_key_p_2_ns, r1\n"
+  "ldr   r1,[%0,#12]\n"
+  "msr   pac_key_p_3_ns, r1\n"
+  : : "r" (pPacKey) : "memory", "r1"
+  );
+}
+
+/**
+  \brief   read the PAC key used for unprivileged mode (non-secure)
+  \details Reads the PAC key stored in the non-secure PAC_KEY_U registers when in secure mode.
+  \param [out]    pPacKey  128bit PAC key
+ */
+__STATIC_FORCEINLINE void __TZ_get_PAC_KEY_U_NS (uint32_t* pPacKey) {
+  __ASM volatile (
+  "mrs   r1, pac_key_u_0_ns\n"
+  "str   r1,[%0,#0]\n"
+  "mrs   r1, pac_key_u_1_ns\n"
+  "str   r1,[%0,#4]\n"
+  "mrs   r1, pac_key_u_2_ns\n"
+  "str   r1,[%0,#8]\n"
+  "mrs   r1, pac_key_u_3_ns\n"
+  "str   r1,[%0,#12]\n"
+  : : "r" (pPacKey) : "memory", "r1"
+  );
+}
+
+/**
+  \brief   write the PAC key used for unprivileged mode (non-secure)
+  \details writes the given PAC key to the non-secure PAC_KEY_U registers when in secure mode.
+  \param [in]    pPacKey  128bit PAC key
+ */
+__STATIC_FORCEINLINE void __TZ_set_PAC_KEY_U_NS (uint32_t* pPacKey) {
+  __ASM volatile (
+  "ldr   r1,[%0,#0]\n"
+  "msr   pac_key_u_0_ns, r1\n"
+  "ldr   r1,[%0,#4]\n"
+  "msr   pac_key_u_1_ns, r1\n"
+  "ldr   r1,[%0,#8]\n"
+  "msr   pac_key_u_2_ns, r1\n"
+  "ldr   r1,[%0,#12]\n"
+  "msr   pac_key_u_3_ns, r1\n"
+  : : "r" (pPacKey) : "memory", "r1"
+  );
+}
+
+#endif /* (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) */
+
+#endif /* (defined (__ARM_FEATURE_PAUTH) && (__ARM_FEATURE_PAUTH == 1)) */
+
+/*@} end of CMSIS_Core_PacKeyFunctions */
+
+
+#endif /* PAC_ARMV81_H */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables.h
index 4f7a5c7..55b789e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables.h
@@ -3,13 +3,13 @@
  * Title:        arm_common_tables.h
  * Description:  Extern declaration for common tables
  *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * @version  V1.10.0
+ * @date     08 July 2021
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -498,10 +498,20 @@ extern "C"
     extern const q15_t sinTable_q15[FAST_MATH_TABLE_SIZE + 1];
   #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
 
+  /* Fast vector sqrt */
   #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
      #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q31_MVE)
        extern const q31_t sqrtTable_Q31[256];
      #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */
+  #endif 
+
+  /* Accurate scalar sqrt */
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SQRT_Q31) 
+       extern const q31_t sqrt_initial_lut_q31[32];
+  #endif
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SQRT_Q15) 
+       extern const q15_t sqrt_initial_lut_q15[16];
   #endif
 
   #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables_f16.h
index a5b9454..9c48086 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables_f16.h
@@ -3,13 +3,13 @@
  * Title:        arm_common_tables_f16.h
  * Description:  Extern declaration for common tables
  *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * @version  V1.10.0
+ * @date     08 July 2021
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_const_structs.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_const_structs.h
index 2a0659f..2efc0a1 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_const_structs.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_const_structs.h
@@ -4,13 +4,13 @@
  * Description:  Constant structs that are initialized for user convenience.
  *               For example, some can be given as arguments to the arm_cfft_f32() function.
  *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * @version  V1.10.0
+ * @date     08 July 2021
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_const_structs_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_const_structs_f16.h
index 13f7b59..843f50e 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_const_structs_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_const_structs_f16.h
@@ -4,13 +4,13 @@
  * Description:  Constant structs that are initialized for user convenience.
  *               For example, some can be given as arguments to the arm_cfft_f16() function.
  *
- * $Date:        20. April 2020
- * $Revision:    V.1.5.1
+ * @version  V1.10.0
+ * @date     08 July 2021
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -74,4 +74,4 @@ extern "C"
 }
 #endif
 
-#endif
\ No newline at end of file
+#endif
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h
index 1479611..8706197 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h
@@ -3,13 +3,13 @@
  * Title:        arm_helium_utils.h
  * Description:  Utility functions for Helium development
  *
- * $Date:        09. September 2019
- * $Revision:    V.1.5.1
+ * @version  V1.10.0
+ * @date     08 July 2021
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -335,7 +335,7 @@ __STATIC_INLINE arm_status arm_mat_cmplx_trans_32bit(
         /*
          * Set status as ARM_MATH_SIZE_MISMATCH
          */
-        return = ARM_MATH_SIZE_MISMATCH;
+        return ARM_MATH_SIZE_MISMATCH;
     }
 #else
     (void)dstRows;
@@ -535,7 +535,7 @@ __STATIC_INLINE arm_status arm_mat_cmplx_trans_16bit(
         /*
          * Set status as ARM_MATH_SIZE_MISMATCH
          */
-        return = ARM_MATH_SIZE_MISMATCH;
+        return ARM_MATH_SIZE_MISMATCH;
     }
 #else
     (void)dstRows;
@@ -620,7 +620,7 @@ __STATIC_INLINE q31x4_t FAST_VSQRT_Q31(q31x4_t vecIn)
 
 
     vecSignBits = vclsq(vecIn);
-    vecSignBits = vbicq(vecSignBits, 1);
+    vecSignBits = vbicq_n_s32(vecSignBits, 1);
     /*
      * in = in << no_of_sign_bits;
      */
@@ -687,7 +687,7 @@ __STATIC_INLINE q15x8_t FAST_VSQRT_Q15(q15x8_t vecIn)
     vecDst = vuninitializedq_s16();
 
     vecSignBits = vclsq(vecIn);
-    vecSignBits = vbicq(vecSignBits, 1);
+    vecSignBits = vbicq_n_s16(vecSignBits, 1);
     /*
      * in = in << no_of_sign_bits;
      */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_math.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_math.h
index d1e68e5..989ba29 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_math.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_math.h
@@ -1,11 +1,12 @@
 /******************************************************************************
  * @file     arm_math.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.7.0
- * @date     18. March 2019
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
- * Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -32,20 +33,20 @@
    * based devices.
    *
    * The library is divided into a number of functions each covering a specific category:
-   * - Basic math functions
-   * - Fast math functions
-   * - Complex math functions
-   * - Filtering functions
-   * - Matrix functions
-   * - Transform functions
-   * - Motor control functions
-   * - Statistical functions
-   * - Support functions
-   * - Interpolation functions
-   * - Support Vector Machine functions (SVM)
-   * - Bayes classifier functions
-   * - Distance functions
-   * - Quaternion functions
+   * - \ref groupMath "Basic math functions"
+   * - \ref groupFastMath "Fast math functions"
+   * - \ref groupCmplxMath "Complex math functions"
+   * - \ref groupFilters "Filtering functions"
+   * - \ref groupMatrix "Matrix functions"
+   * - \ref groupTransforms "Transform functions"
+   * - \ref groupController "Motor control functions"
+   * - \ref groupStats "Statistical functions"
+   * - \ref groupSupport "Support functions"
+   * - \ref groupInterpolation "Interpolation functions"
+   * - \ref groupSVM "Support Vector Machine functions (SVM)"
+   * - \ref groupBayes "Bayes classifier functions"
+   * - \ref groupDistance "Distance functions"
+   * - \ref groupQuaternionMath "Quaternion functions"
    *
    * The library has generally separate functions for operating on 8-bit integers, 16-bit integers,
    * 32-bit integer and 32-bit floating-point values.
@@ -60,129 +61,95 @@
    *
    * \section using Using the Library
    *
-   * The library installer contains prebuilt versions of the libraries in the <code>Lib</code> folder.
-   *
-   * Here is the list of pre-built libraries :
-   * - arm_cortexM7lfdp_math.lib (Cortex-M7, Little endian, Double Precision Floating Point Unit)
-   * - arm_cortexM7bfdp_math.lib (Cortex-M7, Big endian, Double Precision Floating Point Unit)
-   * - arm_cortexM7lfsp_math.lib (Cortex-M7, Little endian, Single Precision Floating Point Unit)
-   * - arm_cortexM7bfsp_math.lib (Cortex-M7, Big endian and Single Precision Floating Point Unit on)
-   * - arm_cortexM7l_math.lib (Cortex-M7, Little endian)
-   * - arm_cortexM7b_math.lib (Cortex-M7, Big endian)
-   * - arm_cortexM4lf_math.lib (Cortex-M4, Little endian, Floating Point Unit)
-   * - arm_cortexM4bf_math.lib (Cortex-M4, Big endian, Floating Point Unit)
-   * - arm_cortexM4l_math.lib (Cortex-M4, Little endian)
-   * - arm_cortexM4b_math.lib (Cortex-M4, Big endian)
-   * - arm_cortexM3l_math.lib (Cortex-M3, Little endian)
-   * - arm_cortexM3b_math.lib (Cortex-M3, Big endian)
-   * - arm_cortexM0l_math.lib (Cortex-M0 / Cortex-M0+, Little endian)
-   * - arm_cortexM0b_math.lib (Cortex-M0 / Cortex-M0+, Big endian)
-   * - arm_ARMv8MBLl_math.lib (Armv8-M Baseline, Little endian)
-   * - arm_ARMv8MMLl_math.lib (Armv8-M Mainline, Little endian)
-   * - arm_ARMv8MMLlfsp_math.lib (Armv8-M Mainline, Little endian, Single Precision Floating Point Unit)
-   * - arm_ARMv8MMLld_math.lib (Armv8-M Mainline, Little endian, DSP instructions)
-   * - arm_ARMv8MMLldfsp_math.lib (Armv8-M Mainline, Little endian, DSP instructions, Single Precision Floating Point Unit)
-   *
-   * The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
-   * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single
-   * public header file <code> arm_math.h</code> for Cortex-M cores with little endian and big endian. Same header file will be used for floating point unit(FPU) variants.
+   * The library is released in source form. It is strongly advised to compile the library using -Ofast to
+   * have the best performances.
    *
+   * The library functions are declared in the public file `arm_math.h` which is placed in the `Include` folder.
+   * Simply include this file. If you don't want to include everything, you can also rely
+   * on headers in `Include/dsp` folder and use only what you need.
    *
    * \section example Examples
    *
-   * The library ships with a number of examples which demonstrate how to use the library functions.
+   * The library ships with a number of examples which demonstrate how to use the library functions. Please refer to \ref groupExamples.
    *
    * \section toolchain Toolchain Support
    *
    * The library is now tested on Fast Models building with cmake.
-   * Core M0, M7, A5 are tested.
-   * 
-   * 
-   *
-   * \section building Building the Library
-   *
-   * The library installer contains a project file to rebuild libraries on MDK toolchain in the <code>CMSIS\\DSP\\Projects\\ARM</code> folder.
-   * - arm_cortexM_math.uvprojx
-   *
-   *
-   * The libraries can be built by opening the arm_cortexM_math.uvprojx project in MDK-ARM, selecting a specific target, and defining the optional preprocessor macros detailed above.
+   * Core M0, M4, M7, M33, M55, A32 are tested.
    *
-   * There is also a work in progress cmake build. The README file is giving more details.
    *
    * \section preprocessor Preprocessor Macros
    *
-   * Each library project have different preprocessor macros.
-   *
-   * - ARM_MATH_BIG_ENDIAN:
-   *
-   * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
-   *
-   * - ARM_MATH_MATRIX_CHECK:
-   *
-   * Define macro ARM_MATH_MATRIX_CHECK for checking on the input and output sizes of matrices
-   *
-   * - ARM_MATH_ROUNDING:
-   *
-   * Define macro ARM_MATH_ROUNDING for rounding on support functions
-   *
-   * - ARM_MATH_LOOPUNROLL:
-   *
-   * Define macro ARM_MATH_LOOPUNROLL to enable manual loop unrolling in DSP functions
-   *
-   * - ARM_MATH_NEON:
-   *
-   * Define macro ARM_MATH_NEON to enable Neon versions of the DSP functions.
+   * Each library project has different preprocessor macros.
+   *
+   * - `ARM_MATH_BIG_ENDIAN`:
+   *  - Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
+   * .
+   * - `ARM_MATH_MATRIX_CHECK`:
+   *  - Define macro ARM_MATH_MATRIX_CHECK for checking on the input and output sizes of matrices
+   * .
+   * - `ARM_MATH_ROUNDING`:
+   *  - Define macro ARM_MATH_ROUNDING for rounding on support functions
+   * .
+   * - `ARM_MATH_LOOPUNROLL`:
+   *  - Define macro ARM_MATH_LOOPUNROLL to enable manual loop unrolling in DSP functions
+   * .
+   * - `ARM_MATH_NEON`:
+   *  - Define macro ARM_MATH_NEON to enable Neon versions of the DSP functions.
    * It is not enabled by default when Neon is available because performances are 
    * dependent on the compiler and target architecture.
-   *
-   * - ARM_MATH_NEON_EXPERIMENTAL:
-   *
-   * Define macro ARM_MATH_NEON_EXPERIMENTAL to enable experimental Neon versions of 
+   * .
+   * - `ARM_MATH_NEON_EXPERIMENTAL`:
+   *  - Define macro ARM_MATH_NEON_EXPERIMENTAL to enable experimental Neon versions of 
    * of some DSP functions. Experimental Neon versions currently do not have better
    * performances than the scalar versions.
-   *
-   * - ARM_MATH_HELIUM:
-   *
-   * It implies the flags ARM_MATH_MVEF and ARM_MATH_MVEI and ARM_MATH_FLOAT16.
-   *
-   * - ARM_MATH_MVEF:
-   *
-   * Select Helium versions of the f32 algorithms.
+   * .
+   * - `ARM_MATH_HELIUM`:
+   *  - It implies the flags ARM_MATH_MVEF and ARM_MATH_MVEI and ARM_MATH_MVE_FLOAT16.
+   * .
+   * - `ARM_MATH_HELIUM_EXPERIMENTAL`:
+   *  - Only taken into account when ARM_MATH_MVEF, ARM_MATH_MVEI or ARM_MATH_MVE_FLOAT16 are defined.
+   * Enable some vector versions which may have worse performance than scalar
+   * depending on the core / compiler configuration.
+   * .
+   * - `ARM_MATH_MVEF`:
+   *  - Select Helium versions of the f32 algorithms.
    * It implies ARM_MATH_FLOAT16 and ARM_MATH_MVEI.
-   *
-   * - ARM_MATH_MVEI:
-   *
-   * Select Helium versions of the int and fixed point algorithms.
-   *
-   * - ARM_MATH_MVE_FLOAT16:
-   *
-   * MVE Float16 implementations of some algorithms (Requires MVE extension).
-   *
-   * - DISABLEFLOAT16:
-   *
-   * Disable float16 algorithms when __fp16 is not supported for a
+   * .
+   * - `ARM_MATH_MVEI`:
+   *  - Select Helium versions of the int and fixed point algorithms.
+   * .
+   * - `ARM_MATH_MVE_FLOAT16`:
+   *  - MVE Float16 implementations of some algorithms (Requires MVE extension).
+   * .
+   * - `DISABLEFLOAT16`:
+   *  - Disable float16 algorithms when __fp16 is not supported for a
    * specific compiler / core configuration.
    * This is only valid for scalar. When vector architecture is
    * supporting f16 then it can't be disabled.
+   * .
+   * - `ARM_MATH_AUTOVECTORIZE`:
+   *  - With Helium or Neon, disable the use of vectorized code with C intrinsics
+   * and use pure C instead. The vectorization is then done by the compiler.
    *
-   * <hr>
    * \section pack CMSIS-DSP in ARM::CMSIS Pack
    *
    * The following files relevant to CMSIS-DSP are present in the <b>ARM::CMSIS</b> Pack directories:
    * |File/Folder                      |Content                                                                 |
    * |---------------------------------|------------------------------------------------------------------------|
    * |\b CMSIS\\Documentation\\DSP     | This documentation                                                     |
-   * |\b CMSIS\\DSP\\DSP_Lib_TestSuite | DSP_Lib deprecated test suite                                                     |
    * |\b CMSIS\\DSP\\Examples          | Example projects demonstrating the usage of the library functions      |
-   * |\b CMSIS\\DSP\\Include           | DSP_Lib include files for using and building the lib
-   * |\b CMSIS\\DSP\\PrivateInclude    | DSP_Lib private include files for building the lib                                               |
-   * |\b CMSIS\\DSP\\Lib               | DSP_Lib binaries                                                       |
-   * |\b CMSIS\\DSP\\Projects          | Projects to rebuild DSP_Lib binaries                                   |
-   * |\b CMSIS\\DSP\\Source            | DSP_Lib source files                                                   |
+   * |\b CMSIS\\DSP\\ComputeLibrary    | Small Neon kernels when building on Cortex-A
+   * |\b CMSIS\\DSP\\Include           | include files for using and building the lib
+   * |\b CMSIS\\DSP\\PrivateInclude    | private include files for building the lib                                               |
+   * |\b CMSIS\\DSP\\Source            | source files                                                   |
    *
-   * <hr>
    * \section rev Revision History of CMSIS-DSP
    * Please refer to \ref ChangeLog_pg.
+   *
+   * \section license License
+   * 
+   * The CMSIS-DSP is provided free of charge under the <a href="LICENSE.txt">Apache 2.0 License</a>.
    */
 
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_f16.h
index 85b20df..166d7d6 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_f16.h
@@ -1,11 +1,12 @@
 /******************************************************************************
  * @file     arm_math_f16.h
  * @brief    Public header file for f16 function of the CMSIS DSP Library
- * @version  V1.8.1
- * @date     20. April 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
- * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_memory.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_memory.h
index e750a8f..850d51e 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_memory.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_memory.h
@@ -1,11 +1,12 @@
 /******************************************************************************
  * @file     arm_math_memory.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
- * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -73,7 +74,7 @@ extern "C"
   @return        Q31 value
  */
 __STATIC_FORCEINLINE q31_t read_q15x2 (
-  q15_t * pQ15)
+  q15_t const * pQ15)
 {
   q31_t val;
 
@@ -91,40 +92,14 @@ __STATIC_FORCEINLINE q31_t read_q15x2 (
   @param[in]     pQ15      points to input value
   @return        Q31 value
  */
-__STATIC_FORCEINLINE q31_t read_q15x2_ia (
-  q15_t ** pQ15)
-{
-  q31_t val;
-
-#ifdef __ARM_FEATURE_UNALIGNED
-  memcpy (&val, *pQ15, 4);
-#else
-  val = ((*pQ15)[1] << 16) | ((*pQ15)[0] & 0x0FFFF);
-#endif
-
- *pQ15 += 2;
- return (val);
-}
+#define read_q15x2_ia(pQ15) read_q15x2((*(pQ15) += 2) - 2)
 
 /**
   @brief         Read 2 Q15 from Q15 pointer and decrement pointer afterwards.
   @param[in]     pQ15      points to input value
   @return        Q31 value
  */
-__STATIC_FORCEINLINE q31_t read_q15x2_da (
-  q15_t ** pQ15)
-{
-  q31_t val;
-
-#ifdef __ARM_FEATURE_UNALIGNED
-  memcpy (&val, *pQ15, 4);
-#else
-  val = ((*pQ15)[1] << 16) | ((*pQ15)[0] & 0x0FFFF);
-#endif
-
-  *pQ15 -= 2;
-  return (val);
-}
+#define read_q15x2_da(pQ15) read_q15x2((*(pQ15) -= 2) + 2)
 
 /**
   @brief         Write 2 Q15 to Q15 pointer and increment pointer afterwards.
@@ -140,8 +115,8 @@ __STATIC_FORCEINLINE void write_q15x2_ia (
 #ifdef __ARM_FEATURE_UNALIGNED
   memcpy (*pQ15, &val, 4);
 #else
-  (*pQ15)[0] = (val & 0x0FFFF);
-  (*pQ15)[1] = (val >> 16) & 0x0FFFF;
+  (*pQ15)[0] = (q15_t)(val & 0x0FFFF);
+  (*pQ15)[1] = (q15_t)((val >> 16) & 0x0FFFF);
 #endif
 
  *pQ15 += 2;
@@ -162,52 +137,43 @@ __STATIC_FORCEINLINE void write_q15x2 (
 #ifdef __ARM_FEATURE_UNALIGNED
   memcpy (pQ15, &val, 4);
 #else
-  pQ15[0] = val & 0x0FFFF;
-  pQ15[1] = val >> 16;
+  pQ15[0] = (q15_t)(val & 0x0FFFF);
+  pQ15[1] = (q15_t)(val >> 16);
 #endif
 }
 
 
 /**
-  @brief         Read 4 Q7 from Q7 pointer and increment pointer afterwards.
+  @brief         Read 4 Q7 from Q7 pointer
   @param[in]     pQ7       points to input value
   @return        Q31 value
  */
-__STATIC_FORCEINLINE q31_t read_q7x4_ia (
-  q7_t ** pQ7)
+__STATIC_FORCEINLINE q31_t read_q7x4 (
+  q7_t const * pQ7)
 {
   q31_t val;
 
-
 #ifdef __ARM_FEATURE_UNALIGNED
-  memcpy (&val, *pQ7, 4);
+  memcpy (&val, pQ7, 4);
 #else
-  val =(((*pQ7)[3] & 0x0FF) << 24)  | (((*pQ7)[2] & 0x0FF) << 16)  | (((*pQ7)[1] & 0x0FF) << 8)  | ((*pQ7)[0] & 0x0FF);
+  val =((pQ7[3] & 0x0FF) << 24)  | ((pQ7[2] & 0x0FF) << 16)  | ((pQ7[1] & 0x0FF) << 8)  | (pQ7[0] & 0x0FF);
 #endif 
-
-  *pQ7 += 4;
-
   return (val);
 }
 
 /**
-  @brief         Read 4 Q7 from Q7 pointer and decrement pointer afterwards.
+  @brief         Read 4 Q7 from Q7 pointer and increment pointer afterwards.
   @param[in]     pQ7       points to input value
   @return        Q31 value
  */
-__STATIC_FORCEINLINE q31_t read_q7x4_da (
-  q7_t ** pQ7)
-{
-  q31_t val;
-#ifdef __ARM_FEATURE_UNALIGNED
-  memcpy (&val, *pQ7, 4);
-#else
-  val = ((((*pQ7)[3]) & 0x0FF) << 24) | ((((*pQ7)[2]) & 0x0FF) << 16)   | ((((*pQ7)[1]) & 0x0FF) << 8)  | ((*pQ7)[0] & 0x0FF);
-#endif 
-  *pQ7 -= 4;
+#define read_q7x4_ia(pQ7) read_q7x4((*(pQ7) += 4) - 4)
 
-  return (val);
-}
+/**
+  @brief         Read 4 Q7 from Q7 pointer and decrement pointer afterwards.
+  @param[in]     pQ7       points to input value
+  @return        Q31 value
+ */
+#define read_q7x4_da(pQ7) read_q7x4((*(pQ7) -= 4) + 4)
 
 /**
   @brief         Write 4 Q7 to Q7 pointer and increment pointer afterwards.
@@ -223,10 +189,10 @@ __STATIC_FORCEINLINE void write_q7x4_ia (
 #ifdef __ARM_FEATURE_UNALIGNED
   memcpy (*pQ7, &val, 4);
 #else
-  (*pQ7)[0] = val & 0x0FF;
-  (*pQ7)[1] = (val >> 8) & 0x0FF;
-  (*pQ7)[2] = (val >> 16) & 0x0FF;
-  (*pQ7)[3] = (val >> 24) & 0x0FF;
+  (*pQ7)[0] = (q7_t)(val & 0x0FF);
+  (*pQ7)[1] = (q7_t)((val >> 8) & 0x0FF);
+  (*pQ7)[2] = (q7_t)((val >> 16) & 0x0FF);
+  (*pQ7)[3] = (q7_t)((val >> 24) & 0x0FF);
 
 #endif
   *pQ7 += 4;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types.h
index a48b659..b3db6f7 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types.h
@@ -1,11 +1,12 @@
 /******************************************************************************
  * @file     arm_math_types.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
- * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -36,6 +37,9 @@ extern "C"
 
 #elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
 
+#elif defined ( __APPLE_CC__ )
+  #pragma GCC diagnostic ignored "-Wold-style-cast"
+
 #elif defined ( __GNUC__ )
   #pragma GCC diagnostic push
   #pragma GCC diagnostic ignored "-Wsign-conversion"
@@ -63,7 +67,11 @@ extern "C"
 #define __STATIC_FORCEINLINE static __forceinline
 #define __STATIC_INLINE static __inline
 #define __ALIGNED(x) __declspec(align(x))
-
+#elif defined ( __APPLE_CC__ )
+#include <stdint.h>
+#define  __ALIGNED(x) __attribute__((aligned(x)))
+#define __STATIC_FORCEINLINE static inline __attribute__((always_inline)) 
+#define __STATIC_INLINE static inline
 #elif defined (__GNUC_PYTHON__)
 #include <stdint.h>
 #define  __ALIGNED(x) __attribute__((aligned(x)))
@@ -87,16 +95,22 @@ extern "C"
 #endif
 
 #if defined(ARM_MATH_NEON)
-#include <arm_neon.h>
-#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-  #if !defined(ARM_MATH_NEON_FLOAT16)
-  #define ARM_MATH_NEON_FLOAT16
+  #if defined(_MSC_VER) && defined(_M_ARM64EC)
+    #include <arm64_neon.h>
+  #else
+    #include <arm_neon.h>
+  #endif
+  #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+    #if !defined(ARM_MATH_NEON_FLOAT16)
+      #define ARM_MATH_NEON_FLOAT16
+    #endif
   #endif
-#endif
 #endif
 
 #if !defined(ARM_MATH_AUTOVECTORIZE)
 
+
+#if defined(__ARM_FEATURE_MVE)
 #if __ARM_FEATURE_MVE
   #if !defined(ARM_MATH_MVEI)
     #define ARM_MATH_MVEI
@@ -112,6 +126,7 @@ extern "C"
   #endif
 #endif
 
+#endif /*defined(__ARM_FEATURE_MVE)*/
 #endif /*!defined(ARM_MATH_AUTOVECTORIZE)*/
 
 
@@ -160,6 +175,12 @@ extern "C"
   #define LOW_OPTIMIZATION_EXIT
   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+  
+#elif defined ( __APPLE_CC__ )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
 
 #elif defined ( __GNUC__ )
   #define LOW_OPTIMIZATION_ENTER \
@@ -223,6 +244,8 @@ extern "C"
 
 #elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
 
+#elif defined ( __APPLE_CC__ )
+
 #elif defined ( __GNUC__ )
 #pragma GCC diagnostic pop
 
@@ -244,7 +267,7 @@ extern "C"
 }
 #endif
 
-#if __ARM_FEATURE_MVE
+#if defined(__ARM_FEATURE_MVE) && __ARM_FEATURE_MVE
 #include <arm_mve.h>
 #endif
 
@@ -276,7 +299,9 @@ extern "C"
   /**
    * @brief 32-bit floating-point type definition.
    */
+#if !defined(__ICCARM__) || !(__ARM_FEATURE_MVE & 2)
   typedef float float32_t;
+#endif
 
   /**
    * @brief 64-bit floating-point type definition.
@@ -298,12 +323,12 @@ extern "C"
   typedef int32x4_t q31x4_t;
 
   /**
-   * @brief 16-bit fractional 128-bit vector data type with 16-bit alignement in 1.15 format.
+   * @brief 16-bit fractional 128-bit vector data type with 16-bit alignment in 1.15 format.
    */
   typedef __ALIGNED(2) int16x8_t q15x8_t;
 
  /**
-   * @brief 8-bit fractional 128-bit vector data type with 8-bit alignement in 1.7 format.
+   * @brief 8-bit fractional 128-bit vector data type with 8-bit alignment in 1.7 format.
    */
   typedef __ALIGNED(1) int8x16_t q7x16_t;
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types_f16.h
index c83f761..771af5c 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types_f16.h
@@ -1,11 +1,12 @@
 /******************************************************************************
  * @file     arm_math_types_f16.h
  * @brief    Public header file for f16 function of the CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
- * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -61,7 +62,7 @@ won't be built.
 #endif
 
 #if defined(ARM_MATH_NEON) || (defined(ARM_MATH_MVEF)  && !defined(ARM_MATH_AUTOVECTORIZE)) /* floating point vector*/
-  
+
 #if defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_NEON_FLOAT16)
 
   /**
@@ -92,7 +93,7 @@ won't be built.
 #endif
 
 #if defined(ARM_MATH_NEON)
- 
+
 
 #if defined(ARM_MATH_NEON_FLOAT16)
   /**
@@ -128,21 +129,30 @@ won't be built.
       float16x4_t     f;
       int16x4_t       i;
   } any16x4_t;
-#endif 
+#endif
 
 #endif
 
 
 
 #if defined(ARM_FLOAT16_SUPPORTED)
+
+#if defined(__ICCARM__)
+
+#define F16INFINITY   ((float16_t) INFINITY)
+
+#else
+
+#define F16INFINITY ((float16_t)__builtin_inf())
+
+#endif
+
 #define F16_MAX   ((float16_t)__FLT16_MAX__)
-#define F16_MIN   (-(float16_t)__FLT16_MAX__)
+#define F16_MIN   (-(_Float16)__FLT16_MAX__)
 
 #define F16_ABSMAX   ((float16_t)__FLT16_MAX__)
 #define F16_ABSMIN   ((float16_t)0.0f16)
 
-#define F16INFINITY ((float16_t)__builtin_inf())
-  
 #endif /* ARM_FLOAT16_SUPPORTED*/
 #endif /* !defined( __CC_ARM ) */
 
@@ -151,5 +161,3 @@ won't be built.
 #endif
 
 #endif /* _ARM_MATH_F16_H */
-
-
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_mve_tables.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_mve_tables.h
index 74f51b2..43456f0 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_mve_tables.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_mve_tables.h
@@ -4,12 +4,13 @@
  * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
  *               used for MVE implementation only
  *
- * $Date:        14. April 2020
+ * @version  V1.10.0
+ * @date     04 October 2021
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_mve_tables_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_mve_tables_f16.h
index 171a391..62b8d9b 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_mve_tables_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_mve_tables_f16.h
@@ -4,12 +4,13 @@
  * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
  *               used for MVE implementation only
  *
- * $Date:        14. April 2020
+ * @version  V1.10.0
+ * @date     04 October 2021
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -39,7 +40,7 @@ extern "C"
 
  
 
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 
 #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
 
@@ -96,7 +97,7 @@ extern float16_t rearranged_twiddle_stride3_4096_f16[2728];
 
 #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
 
-#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
 
 
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_fft.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_fft.h
index 4d15381..4994892 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_fft.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_fft.h
@@ -40,13 +40,280 @@ extern "C"
 #define MVE_CMPLX_MULT_FLT_AxB(A,B)         vcmlaq_rot90(vcmulq(A, B), A, B)
 #define MVE_CMPLX_MULT_FLT_Conj_AxB(A,B)    vcmlaq_rot270(vcmulq(A, B), A, B)
 
-#define MVE_CMPLX_MULT_FX_AxB(A,B)          vqdmladhxq(vqdmlsdhq((__typeof(A))vuninitializedq_s32(), A, B), A, B)
-#define MVE_CMPLX_MULT_FX_AxConjB(A,B)      vqdmladhq(vqdmlsdhxq((__typeof(A))vuninitializedq_s32(), A, B), A, B)
+#define MVE_CMPLX_MULT_FX_AxB(A,B,TyA)      vqdmladhxq(vqdmlsdhq((TyA)vuninitializedq_s32(), A, B), A, B)
+#define MVE_CMPLX_MULT_FX_AxConjB(A,B,TyA)  vqdmladhq(vqdmlsdhxq((TyA)vuninitializedq_s32(), A, B), A, B)
 
 #define MVE_CMPLX_ADD_FX_A_ixB(A, B)        vhcaddq_rot90(A,B)
 #define MVE_CMPLX_SUB_FX_A_ixB(A,B)         vhcaddq_rot270(A,B)
 
 
+/**
+  @brief         In-place 32 bit reversal function for helium
+  @param[in,out] pSrc        points to in-place buffer of unknown 32-bit data type
+  @param[in]     bitRevLen   bit reversal table length
+  @param[in]     pBitRevTab  points to bit reversal table
+  @return        none
+*/
+
+__STATIC_INLINE void arm_bitreversal_32_inpl_mve(
+        uint32_t *pSrc,
+  const uint16_t  bitRevLen,
+  const uint16_t *pBitRevTab)
+
+{
+    uint64_t       *src = (uint64_t *) pSrc;
+    int32_t         blkCnt;     /* loop counters */
+    uint32x4_t      bitRevTabOff;
+    uint32x4_t      one = vdupq_n_u32(1);
+    uint64x2_t      inLow, inHigh;
+    uint64x2_t      bitRevOff1Low, bitRevOff0Low;
+    uint64x2_t      bitRevOff1High, bitRevOff0High;
+
+    /* load scheduling to increase gather load idx update / gather load distance */
+    bitRevTabOff = vldrhq_u32(pBitRevTab);
+    pBitRevTab += 4;
+
+    bitRevOff0Low = vmullbq_int_u32(bitRevTabOff, one);
+    bitRevOff0High = vmulltq_int_u32(bitRevTabOff, one);
+
+
+    blkCnt = bitRevLen / 8;
+    while (blkCnt > 0) {
+        bitRevTabOff = vldrhq_u32(pBitRevTab);
+        pBitRevTab += 4;
+
+        /* 64-bit index expansion */
+        bitRevOff1Low = vmullbq_int_u32(bitRevTabOff, one);
+        bitRevOff1High = vmulltq_int_u32(bitRevTabOff, one);
+
+        inLow = vldrdq_gather_offset_u64(src, bitRevOff0Low);
+        inHigh = vldrdq_gather_offset_u64(src, bitRevOff0High);
+
+        vstrdq_scatter_offset_u64(src, bitRevOff0Low, inHigh);
+        vstrdq_scatter_offset_u64(src, bitRevOff0High, inLow);
+
+
+        /* unrolled */
+        bitRevTabOff = vldrhq_u32(pBitRevTab);
+        pBitRevTab += 4;
+
+        bitRevOff0Low = vmullbq_int_u32(bitRevTabOff, one);
+        bitRevOff0High = vmulltq_int_u32(bitRevTabOff, one);
+
+        inLow = vldrdq_gather_offset_u64(src, bitRevOff1Low);
+        inHigh = vldrdq_gather_offset_u64(src, bitRevOff1High);
+
+        vstrdq_scatter_offset_u64(src, bitRevOff1Low, inHigh);
+        vstrdq_scatter_offset_u64(src, bitRevOff1High, inLow);
+
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+
+    if (bitRevLen & 7) {
+        /* FFT size = 16 */
+        inLow = vldrdq_gather_offset_u64(src, bitRevOff0Low);
+        inHigh = vldrdq_gather_offset_u64(src, bitRevOff0High);
+
+        vstrdq_scatter_offset_u64(src, bitRevOff0Low, inHigh);
+        vstrdq_scatter_offset_u64(src, bitRevOff0High, inLow);
+    }
+}
+
+
+
+/**
+  @brief         In-place 16 bit reversal function for helium
+  @param[in,out] pSrc        points to in-place buffer of unknown 16-bit data type
+  @param[in]     bitRevLen   bit reversal table length
+  @param[in]     pBitRevTab  points to bit reversal table
+  @return        none
+*/
+
+__STATIC_INLINE void arm_bitreversal_16_inpl_mve(
+        uint16_t *pSrc,
+  const uint16_t bitRevLen,
+  const uint16_t *pBitRevTab)
+
+{
+    uint32_t       *src = (uint32_t *) pSrc;
+    int32_t         blkCnt;     /* loop counters */
+    uint32x4_t      bitRevTabOff;
+    uint16x8_t      one = vdupq_n_u16(1);
+    uint32x4_t      bitRevOff1Low, bitRevOff0Low;
+    uint32x4_t      bitRevOff1High, bitRevOff0High;
+    uint32x4_t      inLow, inHigh;
+
+    /* load scheduling to increase gather load idx update / gather load distance */
+    bitRevTabOff = vldrhq_u16(pBitRevTab);
+    pBitRevTab += 8;
+
+    bitRevOff0Low = vmullbq_int_u16((uint16x8_t)bitRevTabOff, one);
+    bitRevOff0High = vmulltq_int_u16((uint16x8_t)bitRevTabOff, one);
+    bitRevOff0Low = vshrq_n_u16((uint16x8_t)bitRevOff0Low, 3);
+    bitRevOff0High = vshrq_n_u16((uint16x8_t)bitRevOff0High, 3);
+
+    blkCnt = (bitRevLen / 16);
+    while (blkCnt > 0) {
+        bitRevTabOff = vldrhq_u16(pBitRevTab);
+        pBitRevTab += 8;
+
+        bitRevOff1Low = vmullbq_int_u16((uint16x8_t)bitRevTabOff, one);
+        bitRevOff1High = vmulltq_int_u16((uint16x8_t)bitRevTabOff, one);
+        bitRevOff1Low = vshrq_n_u16((uint16x8_t)bitRevOff1Low, 3);
+        bitRevOff1High = vshrq_n_u16((uint16x8_t)bitRevOff1High, 3);
+
+        inLow = vldrwq_gather_shifted_offset_u32(src, bitRevOff0Low);
+        inHigh = vldrwq_gather_shifted_offset_u32(src, bitRevOff0High);
+
+        vstrwq_scatter_shifted_offset_u32(src, bitRevOff0Low, inHigh);
+        vstrwq_scatter_shifted_offset_u32(src, bitRevOff0High, inLow);
+
+        /* loop unrolling */
+        bitRevTabOff = vldrhq_u16(pBitRevTab);
+        pBitRevTab += 8;
+
+        bitRevOff0Low = vmullbq_int_u16((uint16x8_t)bitRevTabOff, one);
+        bitRevOff0High = vmulltq_int_u16((uint16x8_t)bitRevTabOff, one);
+        bitRevOff0Low = vshrq_n_u16((uint16x8_t)bitRevOff0Low, 3);
+        bitRevOff0High = vshrq_n_u16((uint16x8_t)bitRevOff0High, 3);
+
+        inLow = vldrwq_gather_shifted_offset_u32(src, bitRevOff1Low);
+        inHigh = vldrwq_gather_shifted_offset_u32(src, bitRevOff1High);
+
+        vstrwq_scatter_shifted_offset_u32(src, bitRevOff1Low, inHigh);
+        vstrwq_scatter_shifted_offset_u32(src, bitRevOff1High, inLow);
+
+        blkCnt--;
+    }
+
+    /* tail handling */
+    blkCnt = bitRevLen & 0xf;
+    if (blkCnt == 8) {
+        inLow = vldrwq_gather_shifted_offset_u32(src, bitRevOff0Low);
+        inHigh = vldrwq_gather_shifted_offset_u32(src, bitRevOff0High);
+
+        vstrwq_scatter_shifted_offset_u32(src, bitRevOff0Low, inHigh);
+        vstrwq_scatter_shifted_offset_u32(src, bitRevOff0High, inLow);
+    } else if (blkCnt == 12) {
+        /* FFT 16 special case */
+        mve_pred16_t    p = vctp16q(4);
+
+        bitRevTabOff = vldrhq_z_u16(pBitRevTab, p);
+
+        inLow = vldrwq_gather_shifted_offset_u32(src, bitRevOff0Low);
+        inHigh = vldrwq_gather_shifted_offset_u32(src, bitRevOff0High);
+
+        vstrwq_scatter_shifted_offset_u32(src, bitRevOff0Low, inHigh);
+        vstrwq_scatter_shifted_offset_u32(src, bitRevOff0High, inLow);
+
+        bitRevOff0Low = vmullbq_int_u16((uint16x8_t)bitRevTabOff, one);
+        bitRevOff0High = vmulltq_int_u16((uint16x8_t)bitRevTabOff, one);
+        bitRevOff0Low = vshrq_n_u16((uint16x8_t)bitRevOff0Low, 3);
+        bitRevOff0High = vshrq_n_u16((uint16x8_t)bitRevOff0High, 3);
+
+        inLow = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff0Low, p);
+        inHigh = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff0High, p);
+
+        vstrwq_scatter_shifted_offset_p_u32(src, bitRevOff0Low, inHigh, p);
+        vstrwq_scatter_shifted_offset_p_u32(src, bitRevOff0High, inLow, p);
+    }
+}
+
+/**
+  @brief         Out-of-place 32 bit reversal function for helium
+  @param[out]   pDst        points to destination buffer of unknown 32-bit data type
+  @param[in]    pSrc        points to input buffer of unknown 32-bit data type
+  @param[in]    fftLen      FFT length
+  @return       none
+*/
+__STATIC_INLINE void arm_bitreversal_32_outpl_mve(void *pDst, void *pSrc, uint32_t fftLen)
+{
+    uint32x4_t      idxOffs0, idxOffs1, bitRevOffs0, bitRevOffs1;
+    uint32_t        bitRevPos, blkCnt;
+    uint32_t       *pDst32 = (uint32_t *) pDst;
+
+    /* fwd indexes */
+    idxOffs0 = vdupq_n_u32(0);
+    idxOffs1 = vdupq_n_u32(0);
+    idxOffs0[0] = 0;    idxOffs0[2] = 4;
+    idxOffs1[0] = 8;    idxOffs1[2] = 12;
+
+    bitRevPos = (31 - __CLZ(fftLen)) + 5;
+    blkCnt = fftLen >> 2;
+
+    /* issued earlier to increase gather load idx update / gather load distance */
+    /* bit-reverse fwd indexes */
+    bitRevOffs0 = vbrsrq(idxOffs0, bitRevPos);
+    bitRevOffs1 = vbrsrq(idxOffs1, bitRevPos);
+    while (blkCnt > 0) {
+        uint64x2_t      vecIn;
+
+        vecIn = vldrdq_gather_offset_u64(pSrc, (uint64x2_t) bitRevOffs0);
+        idxOffs0 = idxOffs0 + 16;
+        vst1q(pDst32, (uint32x4_t) vecIn);
+        pDst32 += 4;
+        bitRevOffs0 = vbrsrq(idxOffs0, bitRevPos);
+
+        vecIn = vldrdq_gather_offset_u64(pSrc, (uint64x2_t) bitRevOffs1);
+        idxOffs1 = idxOffs1 + 16;
+        vst1q(pDst32, (uint32x4_t) vecIn);
+        pDst32 += 4;
+        bitRevOffs1 = vbrsrq(idxOffs1, bitRevPos);
+
+        blkCnt--;
+    }
+}
+
+
+/**
+  @brief         Out-of-place 16 bit reversal function for helium
+  @param[out]   pDst        points to destination buffer of unknown 16-bit data type
+  @param[in]    pSrc        points to input buffer of unknown 16-bit data type
+  @param[in]    fftLen      FFT length
+  @return       none
+*/
+
+__STATIC_INLINE void arm_bitreversal_16_outpl_mve(void *pDst, void *pSrc, uint32_t fftLen)
+{
+    uint32x4_t      idxOffs0, idxOffs1, bitRevOffs0, bitRevOffs1;
+    uint32_t        bitRevPos, blkCnt;
+    uint16_t       *pDst16 = (uint16_t *) pDst;
+    uint32_t        incrIdx = 0;
+
+    /* fwd indexes */
+    idxOffs0 = vidupq_wb_u32(&incrIdx, 4);    // {0, 4, 8, 12}
+    idxOffs1 = vidupq_wb_u32(&incrIdx, 4);    // {16, 20, 24, 28}
+
+    bitRevPos = (31 - __CLZ(fftLen)) + 4;
+    blkCnt = fftLen >> 3;
+
+    /* issued earlier to increase gather load idx update / gather load distance */
+    /* bit-reverse fwd indexes */
+    bitRevOffs0 = vbrsrq(idxOffs0, bitRevPos);
+    bitRevOffs1 = vbrsrq(idxOffs1, bitRevPos);
+    while (blkCnt > 0) {
+        uint32x4_t      vecIn;
+
+        vecIn = vldrwq_gather_offset_s32(pSrc, bitRevOffs0);
+        idxOffs0 = idxOffs0 + 32;
+        vst1q(pDst16, (uint16x8_t) vecIn);
+        pDst16 += 8;
+        bitRevOffs0 = vbrsrq(idxOffs0, bitRevPos);
+
+        vecIn = vldrwq_gather_offset_s32(pSrc, bitRevOffs1);
+        idxOffs1 = idxOffs1 + 32;
+        vst1q(pDst16, (uint16x8_t) vecIn);
+        pDst16 += 8;
+        bitRevOffs1 = vbrsrq(idxOffs1, bitRevPos);
+
+        blkCnt--;
+    }
+}
+
+
 #endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)*/
 
 
@@ -55,4 +322,4 @@ extern "C"
 #endif
 
 
-#endif /* _ARM_VEC_FFT_H_ */
\ No newline at end of file
+#endif /* _ARM_VEC_FFT_H_ */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_math.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_math.h
index 43d8f46..dc32ca6 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_math.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_math.h
@@ -1,11 +1,12 @@
 /******************************************************************************
  * @file     arm_vec_math.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.7.0
- * @date     15. October 2019
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
- * Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_math_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_math_f16.h
index 71ff75d..bca9ef8 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_math_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_math_f16.h
@@ -1,9 +1,12 @@
 /******************************************************************************
  * @file     arm_vec_math_f16.h
  * @brief    Public header file for CMSIS DSP Library
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
- * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -65,11 +68,11 @@ __STATIC_INLINE f16x8_t vrecip_medprec_f16(
     b = 2.0f16 - xinv.f * ax;
     xinv.f = xinv.f * b;
 
-    xinv.f = vdupq_m(xinv.f, F16INFINITY, vcmpeqq(x, 0.0f));
+    xinv.f = vdupq_m_n_f16(xinv.f, F16INFINITY, vcmpeqq_n_f16(x, 0.0f));
     /*
      * restore sign
      */
-    xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f));
+    xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq_n_f16(x, 0.0f));
 
     return xinv.f;
 }
@@ -102,11 +105,11 @@ __STATIC_INLINE f16x8_t vrecip_hiprec_f16(
     b = 2.0f16 - xinv.f * ax;
     xinv.f = xinv.f * b;
 
-    xinv.f = vdupq_m(xinv.f, F16INFINITY, vcmpeqq(x, 0.0f));
+    xinv.f = vdupq_m_n_f16(xinv.f, F16INFINITY, vcmpeqq_n_f16(x, 0.0f));
     /*
      * restore sign
      */
-    xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f));
+    xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq_n_f16(x, 0.0f));
 
     return xinv.f;
 }
@@ -140,22 +143,17 @@ __STATIC_INLINE float16x8_t vtaylor_polyq_f16(
     return res;
 }
 
-__STATIC_INLINE float16x8_t vmant_exp_f16(
-    float16x8_t     x,
-    int16x8_t * e)
-{
-    any16x8_t       r;
-    int16x8_t       n;
-
-    r.f = x;
-    n = r.i >> 10;
-    n = n - 15;
-    r.i = r.i - (n << 10);
-
-    *e = n;
-    return r.f;
-}
-
+#define VMANT_EXP_F16(x)  \
+    any16x8_t       r;    \
+    int16x8_t       n;    \
+                          \
+    r.f = x;              \
+    n = r.i >> 10;        \
+    n = n - 15;           \
+    r.i = r.i - (n << 10);\
+                          \
+    vecExpUnBiased = n;   \
+    vecTmpFlt1 = r.f;
 
 __STATIC_INLINE float16x8_t vlogq_f16(float16x8_t vecIn)
 {
@@ -167,7 +165,7 @@ __STATIC_INLINE float16x8_t vlogq_f16(float16x8_t vecIn)
     /*
      * extract exponent
      */
-    vecTmpFlt1 = vmant_exp_f16(vecIn, &vecExpUnBiased);
+    VMANT_EXP_F16(vecIn);
 
     vecTmpFlt0 = vecTmpFlt1 * vecTmpFlt1;
     /*
@@ -213,7 +211,7 @@ __STATIC_INLINE float16x8_t vlogq_f16(float16x8_t vecIn)
      */
     vecAcc0 = vfmaq(vecAcc0, vecExpUnBiasedFlt, __logf_rng_f16);
     // set log0 down to -inf
-    vecAcc0 = vdupq_m(vecAcc0, -F16INFINITY, vcmpeqq(vecIn, 0.0f));
+    vecAcc0 = vdupq_m_n_f16(vecAcc0, -(_Float16)F16INFINITY, vcmpeqq_n_f16(vecIn, 0.0f));
     return vecAcc0;
 }
 
@@ -230,7 +228,7 @@ __STATIC_INLINE float16x8_t vexpq_f16(
     // Reconstruct
     poly = (float16x8_t) (vqaddq_s16((int16x8_t) (poly), vqshlq_n_s16(m, 10)));
 
-    poly = vdupq_m(poly, 0.0f, vcmpltq_n_s16(m, -14));
+    poly = vdupq_m_n_f16(poly, 0.0f16, vcmpltq_n_s16(m, -14));
     return poly;
 }
 
@@ -267,20 +265,20 @@ __STATIC_INLINE f16x8_t vrecip_f16(f16x8_t vecIn)
     vecW = vmulq(vecSx, v.f);
 
     // v.f = v.f * (8 + w * (-28 + w * (56 + w * (-70 + w *(56 + w * (-28 + w * (8 - w)))))));
-    vecTmp = vsubq(vdupq_n_f16(8.0f), vecW);
-    vecTmp = vfmasq(vecW, vecTmp, -28.0f);
-    vecTmp = vfmasq(vecW, vecTmp, 56.0f);
-    vecTmp = vfmasq(vecW, vecTmp, -70.0f);
-    vecTmp = vfmasq(vecW, vecTmp, 56.0f);
-    vecTmp = vfmasq(vecW, vecTmp, -28.0f);
-    vecTmp = vfmasq(vecW, vecTmp, 8.0f);
+    vecTmp = vsubq(vdupq_n_f16(8.0f16), vecW);
+    vecTmp = vfmasq_n_f16(vecW, vecTmp, -28.0f16);
+    vecTmp = vfmasq_n_f16(vecW, vecTmp, 56.0f16);
+    vecTmp = vfmasq_n_f16(vecW, vecTmp, -70.0f16);
+    vecTmp = vfmasq_n_f16(vecW, vecTmp, 56.0f16);
+    vecTmp = vfmasq_n_f16(vecW, vecTmp, -28.0f16);
+    vecTmp = vfmasq_n_f16(vecW, vecTmp, 8.0f16);
     v.f = vmulq(v.f,  vecTmp);
 
-    v.f = vdupq_m(v.f, F16INFINITY, vcmpeqq(vecIn, 0.0f));
+    v.f = vdupq_m_n_f16(v.f, F16INFINITY, vcmpeqq_n_f16(vecIn, 0.0f));
     /*
      * restore sign
      */
-    v.f = vnegq_m(v.f, v.f, vcmpltq(vecIn, 0.0f));
+    v.f = vnegq_m(v.f, v.f, vcmpltq_n_f16(vecIn, 0.0f));
     return v.f;
 }
 
@@ -288,10 +286,10 @@ __STATIC_INLINE f16x8_t vtanhq_f16(
     f16x8_t val)
 {
     f16x8_t         x =
-        vminnmq_f16(vmaxnmq_f16(val, vdupq_n_f16(-10.f)), vdupq_n_f16(10.0f));
-    f16x8_t         exp2x = vexpq_f16(vmulq_n_f16(x, 2.f));
-    f16x8_t         num = vsubq_n_f16(exp2x, 1.f);
-    f16x8_t         den = vaddq_n_f16(exp2x, 1.f);
+        vminnmq_f16(vmaxnmq_f16(val, vdupq_n_f16(-10.f16)), vdupq_n_f16(10.0f16));
+    f16x8_t         exp2x = vexpq_f16(vmulq_n_f16(x, 2.f16));
+    f16x8_t         num = vsubq_n_f16(exp2x, 1.f16);
+    f16x8_t         den = vaddq_n_f16(exp2x, 1.f16);
     f16x8_t         tanh = vmulq_f16(num, vrecip_f16(den));
     return tanh;
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h
index fe20c48..30ad98d 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     basic_math_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -99,6 +100,21 @@ extern "C"
 
 
 
+/**
+ * @brief Floating-point vector multiplication.
+ * @param[in]  pSrcA      points to the first input vector
+ * @param[in]  pSrcB      points to the second input vector
+ * @param[out] pDst       points to the output vector
+ * @param[in]  blockSize  number of samples in each vector
+ */
+void arm_mult_f64(
+const float64_t * pSrcA,
+const float64_t * pSrcB,
+	  float64_t * pDst,
+	  uint32_t blockSize);
+
+
+
  /**
    * @brief Floating-point vector addition.
    * @param[in]  pSrcA      points to the first input vector
@@ -114,6 +130,21 @@ extern "C"
 
 
 
+/**
+  * @brief Floating-point vector addition.
+  * @param[in]  pSrcA      points to the first input vector
+  * @param[in]  pSrcB      points to the second input vector
+  * @param[out] pDst       points to the output vector
+  * @param[in]  blockSize  number of samples in each vector
+  */
+ void arm_add_f64(
+ const float64_t * pSrcA,
+ const float64_t * pSrcB,
+	   float64_t * pDst,
+	   uint32_t blockSize);
+
+
+
   /**
    * @brief Q7 vector addition.
    * @param[in]  pSrcA      points to the first input vector
@@ -171,6 +202,21 @@ extern "C"
 
 
 
+  /**
+   * @brief Floating-point vector subtraction.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void arm_sub_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        float64_t * pDst,
+        uint32_t blockSize);
+
+
+
   /**
    * @brief Q7 vector subtraction.
    * @param[in]  pSrcA      points to the first input vector
@@ -228,6 +274,21 @@ extern "C"
 
 
 
+  /**
+   * @brief Multiplies a floating-point vector by a scalar.
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  scale      scale factor to be applied
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void arm_scale_f64(
+  const float64_t * pSrc,
+        float64_t scale,
+        float64_t * pDst,
+        uint32_t blockSize);
+
+
+
   /**
    * @brief Multiplies a Q7 vector by a scalar.
    * @param[in]  pSrc        points to the input vector
@@ -301,6 +362,18 @@ extern "C"
 
 
 
+/**
+ * @brief Floating-point vector absolute value.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[out] pDst       points to the output buffer
+ * @param[in]  blockSize  number of samples in each vector
+ */
+void arm_abs_f64(
+const float64_t * pSrc,
+	  float64_t * pDst,
+	  uint32_t blockSize);
+
+
 
   /**
    * @brief Q15 vector absolute value.
@@ -341,6 +414,21 @@ extern "C"
 
 
 
+/**
+ * @brief Dot product of floating-point vectors.
+ * @param[in]  pSrcA      points to the first input vector
+ * @param[in]  pSrcB      points to the second input vector
+ * @param[in]  blockSize  number of samples in each vector
+ * @param[out] result     output result returned here
+ */
+void arm_dot_prod_f64(
+const float64_t * pSrcA,
+const float64_t * pSrcB,
+	  uint32_t blockSize,
+	  float64_t * result);
+
+
+
   /**
    * @brief Dot product of Q7 vectors.
    * @param[in]  pSrcA      points to the first input vector
@@ -425,6 +513,21 @@ extern "C"
         uint32_t blockSize);
 
 
+/**
+ * @brief  Adds a constant offset to a floating-point vector.
+ * @param[in]  pSrc       points to the input vector
+ * @param[in]  offset     is the offset to be added
+ * @param[out] pDst       points to the output vector
+ * @param[in]  blockSize  number of samples in the vector
+ */
+void arm_offset_f64(
+const float64_t * pSrc,
+	  float64_t offset,
+	  float64_t * pDst,
+	  uint32_t blockSize);
+
+
+
   /**
    * @brief  Adds a constant offset to a floating-point vector.
    * @param[in]  pSrc       points to the input vector
@@ -494,6 +597,20 @@ extern "C"
         uint32_t blockSize);
 
 
+
+/**
+ * @brief  Negates the elements of a floating-point vector.
+ * @param[in]  pSrc       points to the input vector
+ * @param[out] pDst       points to the output vector
+ * @param[in]  blockSize  number of samples in the vector
+ */
+void arm_negate_f64(
+const float64_t * pSrc,
+	  float64_t * pDst,
+	  uint32_t blockSize);
+
+
+
   /**
    * @brief  Negates the elements of a Q7 vector.
    * @param[in]  pSrc       points to the input vector
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions_f16.h
index f1d4aae..92f11da 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions_f16.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     basic_math_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/bayes_functions.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/bayes_functions.h
index c527018..0d6d58b 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/bayes_functions.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/bayes_functions.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     bayes_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -66,9 +67,10 @@ typedef struct
 /**
  * @brief Naive Gaussian Bayesian Estimator
  *
- * @param[in]  S         points to a naive bayes instance structure
- * @param[in]  in        points to the elements of the input vector.
- * @param[in]  pBuffer   points to a buffer of length numberOfClasses
+ * @param[in]  S                        points to a naive bayes instance structure
+ * @param[in]  in                       points to the elements of the input vector.
+ * @param[out] *pOutputProbabilities    points to a buffer of length numberOfClasses containing estimated probabilities
+ * @param[out] *pBufferB                points to a temporary buffer of length numberOfClasses
  * @return The predicted class
  *
  */
@@ -76,7 +78,8 @@ typedef struct
 
 uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S, 
    const float32_t * in, 
-   float32_t *pBuffer);
+   float32_t *pOutputProbabilities,
+   float32_t *pBufferB);
 
 
 #ifdef   __cplusplus
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/bayes_functions_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/bayes_functions_f16.h
index 46dabab..a16c49b 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/bayes_functions_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/bayes_functions_f16.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     bayes_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -57,9 +58,10 @@ typedef struct
 /**
  * @brief Naive Gaussian Bayesian Estimator
  *
- * @param[in]  S         points to a naive bayes instance structure
- * @param[in]  in        points to the elements of the input vector.
- * @param[in]  pBuffer   points to a buffer of length numberOfClasses
+ * @param[in]  S                        points to a naive bayes instance structure
+ * @param[in]  in                       points to the elements of the input vector.
+ * @param[out] *pOutputProbabilities    points to a buffer of length numberOfClasses containing estimated probabilities
+ * @param[out] *pBufferB                points to a temporary buffer of length numberOfClasses
  * @return The predicted class
  *
  */
@@ -67,7 +69,8 @@ typedef struct
 
 uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 *S, 
    const float16_t * in, 
-   float16_t *pBuffer);
+   float16_t *pOutputProbabilities,
+   float16_t *pBufferB);
 
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions.h
index 5589a06..b4394de 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     complex_math_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -95,6 +96,18 @@ extern "C"
         uint32_t numSamples);
 
 
+  /**
+   * @brief  Floating-point complex magnitude squared
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void arm_cmplx_mag_squared_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t numSamples);
+
+
   /**
    * @brief  Q31 complex magnitude squared
    * @param[in]  pSrc        points to the complex input vector
@@ -131,6 +144,18 @@ extern "C"
         uint32_t numSamples);
 
 
+/**
+   * @brief  Floating-point complex magnitude
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void arm_cmplx_mag_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t numSamples);
+
+
   /**
    * @brief  Q31 complex magnitude
    * @param[in]  pSrc        points to the complex input vector
@@ -154,6 +179,17 @@ extern "C"
         q15_t * pDst,
         uint32_t numSamples);
 
+  /**
+   * @brief  Q15 complex magnitude
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void arm_cmplx_mag_fast_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t numSamples);
+
 
   /**
    * @brief  Q15 complex dot product
@@ -287,6 +323,21 @@ extern "C"
 
 
 
+/**
+ * @brief  Floating-point complex-by-complex multiplication
+ * @param[in]  pSrcA       points to the first input vector
+ * @param[in]  pSrcB       points to the second input vector
+ * @param[out] pDst        points to the output vector
+ * @param[in]  numSamples  number of complex samples in each vector
+ */
+void arm_cmplx_mult_cmplx_f64(
+const float64_t * pSrcA,
+const float64_t * pSrcB,
+	  float64_t * pDst,
+	  uint32_t numSamples);
+
+
+
 #ifdef   __cplusplus
 }
 #endif
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions_f16.h
index 39d9fa9..e0baa6f 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions_f16.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     complex_math_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/controller_functions.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/controller_functions.h
index 39218ba..886a23c 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/controller_functions.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/controller_functions.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     controller_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -51,15 +52,35 @@ extern "C"
  */
 
 
- /**
-   * @ingroup groupController
-   */
-
-  /**
-   * @addtogroup SinCos
-   * @{
-   */
+/**
+  @ingroup groupController
+ */
 
+/**
+  @defgroup SinCos Sine Cosine
+
+  Computes the trigonometric sine and cosine values using a combination of table lookup
+  and linear interpolation.
+  There are separate functions for Q31 and floating-point data types.
+  The input to the floating-point version is in degrees while the
+  fixed-point Q31 have a scaled input with the range
+  [-1 0.9999] mapping to [-180 +180] degrees.
+
+  The floating point function also allows values that are out of the usual range. When this happens, the function will
+  take extra time to adjust the input value to the range of [-180 180].
+
+  The result is accurate to 5 digits after the decimal point.
+
+  The implementation is based on table lookup using 360 values together with linear interpolation.
+  The steps used are:
+   -# Calculation of the nearest integer table index.
+   -# Compute the fractional portion (fract) of the input.
+   -# Fetch the value corresponding to \c index from sine table to \c y0 and also value from \c index+1 to \c y1.
+   -# Sine value is computed as <code> *psinVal = y0 + (fract * (y1 - y0))</code>.
+   -# Fetch the value corresponding to \c index from cosine table to \c y0 and also value from \c index+1 to \c y1.
+   -# Cosine value is computed as <code> *pcosVal = y0 + (fract * (y1 - y0))</code>.
+ */
+ 
 /**
    * @brief  Floating-point sin_cos function.
    * @param[in]  theta   input value in degrees
@@ -83,14 +104,11 @@ extern "C"
         q31_t * pSinVal,
         q31_t * pCosVal);
 
-  /**
-   * @} end of SinCos group
-   */
-
- /**
-   * @ingroup groupController
-   */
 
+/**
+  @ingroup groupController
+ */
+  
 /**
    * @defgroup PID PID Motor Control
    *
@@ -151,6 +169,7 @@ extern "C"
 
 
   /**
+   * @ingroup PID
    * @brief Instance structure for the Q15 PID Control.
    */
   typedef struct
@@ -169,6 +188,7 @@ extern "C"
   } arm_pid_instance_q15;
 
   /**
+   * @ingroup PID
    * @brief Instance structure for the Q31 PID Control.
    */
   typedef struct
@@ -183,6 +203,7 @@ extern "C"
   } arm_pid_instance_q31;
 
   /**
+   * @ingroup PID
    * @brief Instance structure for the floating-point PID Control.
    */
   typedef struct
@@ -254,12 +275,10 @@ extern "C"
 
 
 
-  /**
-   * @addtogroup PID
-   * @{
-   */
+
 
   /**
+   * @ingroup PID
    * @brief         Process function for the floating-point PID Control.
    * @param[in,out] S   is an instance of the floating-point PID Control structure
    * @param[in]     in  input sample to process
@@ -286,6 +305,7 @@ extern "C"
   }
 
 /**
+  @ingroup PID
   @brief         Process function for the Q31 PID Control.
   @param[in,out] S  points to an instance of the Q31 PID Control structure
   @param[in]     in  input sample to process
@@ -331,6 +351,7 @@ __STATIC_FORCEINLINE q31_t arm_pid_q31(
 
 
 /**
+  @ingroup PID
   @brief         Process function for the Q15 PID Control.
   @param[in,out] S   points to an instance of the Q15 PID Control structure
   @param[in]     in  input sample to process
@@ -383,9 +404,7 @@ __STATIC_FORCEINLINE q15_t arm_pid_q15(
     return (out);
   }
 
-  /**
-   * @} end of PID group
-   */
+
 
   /**
    * @ingroup groupController
@@ -415,12 +434,10 @@ __STATIC_FORCEINLINE q15_t arm_pid_q15(
    * Refer to the function specific documentation below for usage guidelines.
    */
 
-  /**
-   * @addtogroup park
-   * @{
-   */
+ 
 
   /**
+   * @ingroup park
    * @brief Floating-point Park transform
    * @param[in]  Ialpha  input two-phase vector coordinate alpha
    * @param[in]  Ibeta   input two-phase vector coordinate beta
@@ -450,6 +467,7 @@ __STATIC_FORCEINLINE q15_t arm_pid_q15(
 
 
 /**
+  @ingroup park
   @brief  Park transform for Q31 version
   @param[in]  Ialpha  input two-phase vector coordinate alpha
   @param[in]  Ibeta   input two-phase vector coordinate beta
@@ -495,9 +513,6 @@ __STATIC_FORCEINLINE void arm_park_q31(
     *pIq = __QSUB(product4, product3);
   }
 
-  /**
-   * @} end of park group
-   */
 
 
   /**
@@ -521,12 +536,10 @@ __STATIC_FORCEINLINE void arm_park_q31(
    * Refer to the function specific documentation below for usage guidelines.
    */
 
-  /**
-   * @addtogroup inv_park
-   * @{
-   */
+  
 
    /**
+   * @ingroup inv_park
    * @brief  Floating-point Inverse Park transform
    * @param[in]  Id       input coordinate of rotor reference frame d
    * @param[in]  Iq       input coordinate of rotor reference frame q
@@ -553,6 +566,7 @@ __STATIC_FORCEINLINE void arm_park_q31(
 
 
 /**
+  @ingroup inv_park
   @brief  Inverse Park transform for   Q31 version
   @param[in]  Id       input coordinate of rotor reference frame d
   @param[in]  Iq       input coordinate of rotor reference frame q
@@ -598,9 +612,6 @@ __STATIC_FORCEINLINE void arm_inv_park_q31(
     *pIbeta = __QADD(product4, product3);
   }
 
-  /**
-   * @} end of Inverse park group
-   */
 
 /**
    * @ingroup groupController
@@ -628,13 +639,10 @@ __STATIC_FORCEINLINE void arm_inv_park_q31(
    * Refer to the function specific documentation below for usage guidelines.
    */
 
-  /**
-   * @addtogroup clarke
-   * @{
-   */
 
   /**
    *
+   * @ingroup clarke
    * @brief  Floating-point Clarke transform
    * @param[in]  Ia       input three-phase coordinate <code>a</code>
    * @param[in]  Ib       input three-phase coordinate <code>b</code>
@@ -657,6 +665,7 @@ __STATIC_FORCEINLINE void arm_inv_park_q31(
 
 
 /**
+  @ingroup clarke
   @brief  Clarke transform for Q31 version
   @param[in]  Ia       input three-phase coordinate <code>a</code>
   @param[in]  Ib       input three-phase coordinate <code>b</code>
@@ -690,9 +699,6 @@ __STATIC_FORCEINLINE void arm_clarke_q31(
     *pIbeta = __QADD(product1, product2);
   }
 
-  /**
-   * @} end of clarke group
-   */
 
 
   /**
@@ -715,12 +721,10 @@ __STATIC_FORCEINLINE void arm_clarke_q31(
    * Refer to the function specific documentation below for usage guidelines.
    */
 
-  /**
-   * @addtogroup inv_clarke
-   * @{
-   */
+ 
 
    /**
+   * @ingroup inv_clarke
    * @brief  Floating-point Inverse Clarke transform
    * @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
    * @param[in]  Ibeta   input two-phase orthogonal vector axis beta
@@ -743,6 +747,7 @@ __STATIC_FORCEINLINE void arm_clarke_q31(
 
 
 /**
+  @ingroup inv_clarke
   @brief  Inverse Clarke transform for Q31 version
   @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
   @param[in]  Ibeta   input two-phase orthogonal vector axis beta
@@ -776,9 +781,7 @@ __STATIC_FORCEINLINE void arm_inv_clarke_q31(
     *pIb = __QSUB(product2, product1);
   }
 
-  /**
-   * @} end of inv_clarke group
-   */
+
 
 
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/controller_functions_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/controller_functions_f16.h
index a76e1f6..8fae483 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/controller_functions_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/controller_functions_f16.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     controller_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/debug.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/debug.h
new file mode 100644
index 0000000..6fb7183
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/debug.h
@@ -0,0 +1,146 @@
+/******************************************************************************
+ * @file     basic_math_functions.h
+ * @brief    Public header file for CMSIS DSP Library
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _DEBUG_FUNCTIONS_H_
+#define _DEBUG_FUNCTIONS_H_
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math_memory.h"
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/none.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/utils.h"
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions_f16.h"
+
+#include <stdio.h>
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+#define PROW_f16(S,NB)            \
+{                                 \
+    printf("{%f",(double)(S)[0]);   \
+    for(unsigned int i=1;i<(NB) ;i++)       \
+    {                             \
+       printf(",%f",(double)(S)[i]);\
+    }                             \
+    printf("}");                  \
+};
+
+#define PV_f16(S,V,NB)\
+{                     \
+    printf("%s=",(S));  \
+    PROW_f16((V),(NB));   \
+    printf(";\n");    \
+};
+
+#define PM_f16(S,M)                                       \
+{                                                         \
+    printf("%s={",(S));                                     \
+    for(unsigned int row=0;row<(M)->numRows;row++)                   \
+    {                                                     \
+        if (row != 0)                                     \
+        {                                                 \
+            printf("\n,");                                \
+        }                                                 \
+        PROW_f16((M)->pData + row * (M)->numCols, (M)->numCols);\
+    }                                                     \
+    printf("};\n");                                       \
+}
+
+#endif 
+
+#define PROW_f32(S,NB)            \
+{                                 \
+    printf("{%f",(double)(S)[0]);   \
+    for(unsigned int i=1;i<(NB) ;i++)       \
+    {                             \
+       printf(",%f",(double)(S)[i]);\
+    }                             \
+    printf("}");                  \
+};
+
+#define PV_f32(S,V,NB)\
+{                     \
+    printf("%s=",(S));  \
+    PROW_f32((V),(NB));   \
+    printf(";\n");    \
+};
+
+#define PM_f32(S,M)                                       \
+{                                                         \
+    printf("%s={",(S));                                     \
+    for(unsigned int row=0;row<(M)->numRows;row++)                   \
+    {                                                     \
+        if (row != 0)                                     \
+        {                                                 \
+            printf("\n,");                                \
+        }                                                 \
+        PROW_f32((M)->pData + row * (M)->numCols, (M)->numCols);\
+    }                                                     \
+    printf("};\n");                                       \
+}
+
+#define PROW_f64(S,NB)            \
+{                                 \
+    printf("{%.20g",(double)(S)[0]);   \
+    for(unsigned int i=1;i<(NB) ;i++)       \
+    {                             \
+       printf(",%.20g",(double)(S)[i]);\
+    }                             \
+    printf("}");                  \
+};
+
+#define PV_f64(S,V,NB) \
+{                      \
+    printf("%s=",(S)); \
+    PROW_f64((V),(NB));\
+    printf(";\n");     \
+};
+
+#define PM_f64(S,M)                                       \
+{                                                         \
+    printf("%s={",(S));                                     \
+    for(unsigned int row=0;row<(M)->numRows;row++)                   \
+    {                                                     \
+        if (row != 0)                                     \
+        {                                                 \
+            printf("\n,");                                \
+        }                                                 \
+        PROW_f64((M)->pData + row * (M)->numCols, (M)->numCols);\
+    }                                                     \
+    printf("};\n");                                       \
+}
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _DEBUG_FUNCTIONS_H_ */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/distance_functions.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/distance_functions.h
index c1580cb..a8cc19d 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/distance_functions.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/distance_functions.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     distance_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -68,6 +69,17 @@ __attribute__((weak)) float __powisf2(float a, int b);
 
 float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
 
+/**
+ * @brief        Euclidean distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float64_t arm_euclidean_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize);
+
 /**
  * @brief        Bray-Curtis distance between two vectors
  * @param[in]    pA         First vector
@@ -105,6 +117,17 @@ float32_t arm_canberra_distance_f32(const float32_t *pA,const float32_t *pB, uin
 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
 
 
+/**
+ * @brief        Chebyshev distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float64_t arm_chebyshev_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize);
+
+
 /**
  * @brief        Cityblock (Manhattan) distance between two vectors
  * @param[in]    pA         First vector
@@ -115,6 +138,16 @@ float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, ui
  */
 float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
 
+/**
+ * @brief        Cityblock (Manhattan) distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float64_t arm_cityblock_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize);
+
 /**
  * @brief        Correlation distance between two vectors
  *
@@ -140,6 +173,18 @@ float32_t arm_correlation_distance_f32(float32_t *pA,float32_t *pB, uint32_t blo
 
 float32_t arm_cosine_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);
 
+/**
+ * @brief        Cosine distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float64_t arm_cosine_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize);
+
 /**
  * @brief        Jensen-Shannon distance between two vectors
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/distance_functions_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/distance_functions_f16.h
index 0d71b6b..46ad233 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/distance_functions_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/distance_functions_f16.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     distance_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h
index 1828f3f..758b0fb 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     fast_math_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -32,6 +33,9 @@
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/none.h"
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/utils.h"
 
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+
+
 #ifdef   __cplusplus
 extern "C"
 {
@@ -59,17 +63,8 @@ extern "C"
  *
  */
 
-  /**
-   * @ingroup groupFastMath
-   */
-
 
-/**
-  @addtogroup sin
-  @{
- */
-
-/**
+   /**
    * @brief  Fast approximation to the trigonometric sine function for floating-point data.
    * @param[in] x  input value in radians.
    * @return  sin(x).
@@ -86,7 +81,6 @@ extern "C"
   q31_t arm_sin_q31(
   q31_t x);
 
-
   /**
    * @brief  Fast approximation to the trigonometric sine function for Q15 data.
    * @param[in] x  Scaled input value in radians.
@@ -95,14 +89,6 @@ extern "C"
   q15_t arm_sin_q15(
   q15_t x);
 
-/**
-  @} end of sin group
- */
-
-/**
-  @addtogroup cos
-  @{
- */
 
   /**
    * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
@@ -130,10 +116,6 @@ extern "C"
   q15_t arm_cos_q15(
   q15_t x);
 
-/**
-  @} end of cos group
- */
-
 
 /**
   @brief         Floating-point vector of log values.
@@ -147,6 +129,46 @@ extern "C"
         float32_t * pDst,
         uint32_t blockSize);
 
+
+
+/**
+  @brief         Floating-point vector of log values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+  void arm_vlog_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize);
+
+
+
+  /**
+   * @brief  q31 vector of log values.
+   * @param[in]     pSrc       points to the input vector in q31
+   * @param[out]    pDst       points to the output vector in q5.26
+   * @param[in]     blockSize  number of samples in each vector
+   * @return        none
+   */
+  void arm_vlog_q31(const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+  /**
+   * @brief  q15 vector of log values.
+   * @param[in]     pSrc       points to the input vector in q15
+   * @param[out]    pDst       points to the output vector in q4.11
+   * @param[in]     blockSize  number of samples in each vector
+   * @return        none
+   */
+  void arm_vlog_q15(const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
+
+
+
 /**
   @brief         Floating-point vector of exp values.
   @param[in]     pSrc       points to the input vector
@@ -159,6 +181,22 @@ extern "C"
         float32_t * pDst,
         uint32_t blockSize);
 
+
+
+/**
+  @brief         Floating-point vector of exp values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+  void arm_vexp_f64(
+  const float64_t * pSrc,
+		float64_t * pDst,
+		uint32_t blockSize);
+
+
+
  /**
    * @defgroup SQRT Square Root
    *
@@ -194,7 +232,7 @@ extern "C"
                    - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
  */
 __STATIC_FORCEINLINE arm_status arm_sqrt_f32(
-  float32_t in,
+  const float32_t in,
   float32_t * pOut)
   {
     if (in >= 0.0f)
@@ -252,33 +290,75 @@ arm_status arm_sqrt_q15(
   q15_t in,
   q15_t * pOut);
 
+
+
   /**
-   * @brief  Vector Floating-point square root function.
-   * @param[in]  pIn   input vector.
-   * @param[out] pOut  vector of square roots of input elements.
-   * @param[in]  len   length of input vector.
-   * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
-   * <code>in</code> is negative value and returns zero output for negative values.
+   * @} end of SQRT group
    */
-  void arm_vsqrt_f32(
-  float32_t * pIn,
-  float32_t * pOut,
-  uint16_t len);
 
-  void arm_vsqrt_q31(
-  q31_t * pIn,
-  q31_t * pOut,
-  uint16_t len);
+  /**
+  @brief         Fixed point division
+  @param[in]     numerator    Numerator
+  @param[in]     denominator  Denominator
+  @param[out]    quotient     Quotient value normalized between -1.0 and 1.0
+  @param[out]    shift        Shift left value to get the unnormalized quotient
+  @return        error status
+
+  When dividing by 0, an error ARM_MATH_NANINF is returned. And the quotient is forced
+  to the saturated negative or positive value.
+ */
 
-  void arm_vsqrt_q15(
-  q15_t * pIn,
-  q15_t * pOut,
-  uint16_t len);
+arm_status arm_divide_q15(q15_t numerator,
+  q15_t denominator,
+  q15_t *quotient,
+  int16_t *shift);
 
   /**
-   * @} end of SQRT group
+  @brief         Fixed point division
+  @param[in]     numerator    Numerator
+  @param[in]     denominator  Denominator
+  @param[out]    quotient     Quotient value normalized between -1.0 and 1.0
+  @param[out]    shift        Shift left value to get the unnormalized quotient
+  @return        error status
+
+  When dividing by 0, an error ARM_MATH_NANINF is returned. And the quotient is forced
+  to the saturated negative or positive value.
+ */
+
+arm_status arm_divide_q31(q31_t numerator,
+  q31_t denominator,
+  q31_t *quotient,
+  int16_t *shift);
+
+
+
+  /**
+     @brief  Arc tangent in radian of y/x using sign of x and y to determine right quadrant.
+     @param[in]   y  y coordinate
+     @param[in]   x  x coordinate
+     @param[out]  result  Result
+     @return  error status.
+   */
+  arm_status arm_atan2_f32(float32_t y,float32_t x,float32_t *result);
+
+
+  /**
+     @brief  Arc tangent in radian of y/x using sign of x and y to determine right quadrant.
+     @param[in]   y  y coordinate
+     @param[in]   x  x coordinate
+     @param[out]  result  Result in Q2.29
+     @return  error status.
    */
+  arm_status arm_atan2_q31(q31_t y,q31_t x,q31_t *result);
 
+  /**
+     @brief  Arc tangent in radian of y/x using sign of x and y to determine right quadrant.
+     @param[in]   y  y coordinate
+     @param[in]   x  x coordinate
+     @param[out]  result  Result in Q2.13
+     @return  error status.
+   */
+  arm_status arm_atan2_q15(q15_t y,q15_t x,q15_t *result);
 
 #ifdef   __cplusplus
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions_f16.h
index 3be576e..c97ec64 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions_f16.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     fast_math_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -107,6 +108,15 @@ __STATIC_FORCEINLINE arm_status arm_sqrt_f16(
         float16_t * pDst,
         uint32_t blockSize);
 
+  /**
+     @brief  Arc tangent in radian of y/x using sign of x and y to determine right quadrant.
+     @param[in]   y  y coordinate
+     @param[in]   x  x coordinate
+     @param[out]  result  Result
+     @return  error status.
+   */
+  arm_status arm_atan2_f16(float16_t y,float16_t x,float16_t *result);
+
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/filtering_functions.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/filtering_functions.h
index 4d41606..38a40ba 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/filtering_functions.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/filtering_functions.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     filtering_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -33,6 +34,7 @@
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/utils.h"
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/support_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
 
 #ifdef   __cplusplus
 extern "C"
@@ -88,6 +90,16 @@ extern "C"
     const float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
   } arm_fir_instance_f32;
 
+  /**
+   * @brief Instance structure for the floating-point FIR filter.
+   */
+  typedef struct
+  {
+          uint16_t numTaps;     /**< number of filter coefficients in the filter. */
+          float64_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    const float64_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
+  } arm_fir_instance_f64;
+
   /**
    * @brief Processing function for the Q7 FIR filter.
    * @param[in]  S          points to an instance of the Q7 FIR filter structure.
@@ -224,6 +236,19 @@ extern "C"
         float32_t * pDst,
         uint32_t blockSize);
 
+  /**
+   * @brief Processing function for the floating-point FIR filter.
+   * @param[in]  S          points to an instance of the floating-point FIR structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   */
+  void arm_fir_f64(
+  const arm_fir_instance_f64 * S,
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize);
+
   /**
    * @brief  Initialization function for the floating-point FIR filter.
    * @param[in,out] S          points to an instance of the floating-point FIR filter structure.
@@ -239,6 +264,21 @@ extern "C"
         float32_t * pState,
         uint32_t blockSize);
 
+  /**
+   * @brief  Initialization function for the floating-point FIR filter.
+   * @param[in,out] S          points to an instance of the floating-point FIR filter structure.
+   * @param[in]     numTaps    Number of filter coefficients in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     blockSize  number of samples that are processed at a time.
+   */
+  void arm_fir_init_f64(
+        arm_fir_instance_f64 * S,
+        uint16_t numTaps,
+  const float64_t * pCoeffs,
+        float64_t * pState,
+        uint32_t blockSize);
+
   /**
    * @brief Instance structure for the Q15 Biquad cascade filter.
    */
@@ -1171,10 +1211,17 @@ arm_status arm_fir_decimate_init_f32(
 
 
 #if defined(ARM_MATH_NEON) 
+/**
+  @brief         Compute new coefficient arrays for use in vectorized filter (Neon only).
+  @param[in]     numStages         number of 2nd order stages in the filter.
+  @param[in]     pCoeffs           points to the original filter coefficients.
+  @param[in]     pComputedCoeffs   points to the new computed coefficients for the vectorized version.
+  @return        none
+*/
 void arm_biquad_cascade_df2T_compute_coefs_f32(
-  arm_biquad_cascade_df2T_instance_f32 * S,
   uint8_t numStages,
-  float32_t * pCoeffs);
+  const float32_t * pCoeffs,
+  float32_t * pComputedCoeffs);
 #endif
   /**
    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
@@ -1787,6 +1834,22 @@ void arm_biquad_cascade_df2T_compute_coefs_f32(
         float32_t * pDst);
 
 
+  /**
+   * @brief Correlation of floating-point sequences.
+   * @param[in]  pSrcA    points to the first input sequence.
+   * @param[in]  srcALen  length of the first input sequence.
+   * @param[in]  pSrcB    points to the second input sequence.
+   * @param[in]  srcBLen  length of the second input sequence.
+   * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   */
+  void arm_correlate_f64(
+  const float64_t * pSrcA,
+        uint32_t srcALen,
+  const float64_t * pSrcB,
+        uint32_t srcBLen,
+        float64_t * pDst);
+
+
 /**
  @brief Correlation of Q15 sequences
  @param[in]  pSrcA     points to the first input sequence
@@ -2432,8 +2495,33 @@ void arm_correlate_fast_q31(
   }
 
 
+/**
+  @brief         Levinson Durbin
+  @param[in]     phi      autocovariance vector starting with lag 0 (length is nbCoefs + 1)
+  @param[out]    a        autoregressive coefficients
+  @param[out]    err      prediction error (variance)
+  @param[in]     nbCoefs  number of autoregressive coefficients
+  @return        none
+ */
+void arm_levinson_durbin_f32(const float32_t *phi,
+  float32_t *a, 
+  float32_t *err,
+  int nbCoefs);
+
+
+/**
+  @brief         Levinson Durbin
+  @param[in]     phi      autocovariance vector starting with lag 0 (length is nbCoefs + 1)
+  @param[out]    a        autoregressive coefficients
+  @param[out]    err      prediction error (variance)
+  @param[in]     nbCoefs  number of autoregressive coefficients
+  @return        none
+ */
+void arm_levinson_durbin_q31(const q31_t *phi,
+  q31_t *a, 
+  q31_t *err,
+  int nbCoefs);
 
- 
 #ifdef   __cplusplus
 }
 #endif
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/filtering_functions_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/filtering_functions_f16.h
index 9abb53a..21f33f4 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/filtering_functions_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/filtering_functions_f16.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     filtering_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -214,6 +215,20 @@ extern "C"
         uint32_t srcBLen,
         float16_t * pDst);
 
+
+/**
+  @brief         Levinson Durbin
+  @param[in]     phi      autocovariance vector starting with lag 0 (length is nbCoefs + 1)
+  @param[out]    a        autoregressive coefficients
+  @param[out]    err      prediction error (variance)
+  @param[in]     nbCoefs  number of autoregressive coefficients
+  @return        none
+ */
+void arm_levinson_durbin_f16(const float16_t *phi,
+  float16_t *a, 
+  float16_t *err,
+  int nbCoefs);
+
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/interpolation_functions.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/interpolation_functions.h
index e7cf537..a650fe8 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/interpolation_functions.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/interpolation_functions.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     interpolation_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -120,18 +121,6 @@ extern "C"
   } arm_spline_instance_f32;
 
 
-
-
-  /**
-   * @ingroup groupInterpolation
-   */
-
-  /**
-   * @addtogroup SplineInterpolate
-   * @{
-   */
-
-  
   /**
    * @brief Processing function for the floating-point cubic spline interpolation.
    * @param[in]  S          points to an instance of the floating-point spline structure.
@@ -165,18 +154,7 @@ extern "C"
           float32_t * tempBuffer);
 
 
-  /**
-   * @} end of SplineInterpolate group
-   */
-
-
-  
-  /**
-   * @addtogroup LinearInterpolate
-   * @{
-   */
-
-    /**
+   /**
    * @brief  Process function for the floating-point Linear Interpolation Function.
    * @param[in,out] S  is an instance of the floating-point Linear Interpolation structure
    * @param[in]     x  input sample to process
@@ -201,7 +179,7 @@ extern "C"
    *
    */
   q31_t arm_linear_interp_q31(
-  q31_t * pYData,
+  const q31_t * pYData,
   q31_t x,
   uint32_t nValues);
 
@@ -219,7 +197,7 @@ extern "C"
    *
    */
   q15_t arm_linear_interp_q15(
-  q15_t * pYData,
+  const q15_t * pYData,
   q31_t x,
   uint32_t nValues);
 
@@ -236,27 +214,10 @@ extern "C"
    * This function can support maximum of table size 2^12.
    */
 q7_t arm_linear_interp_q7(
-  q7_t * pYData,
+  const q7_t * pYData,
   q31_t x,
   uint32_t nValues);
 
-  /**
-   * @} end of LinearInterpolate group
-   */
-
-  
-
-
-  /**
-   * @ingroup groupInterpolation
-   */
-
-
-  /**
-   * @addtogroup BilinearInterpolate
-   * @{
-   */
-
   /**
   * @brief  Floating-point bilinear interpolation.
   * @param[in,out] S  points to an instance of the interpolation structure.
@@ -305,10 +266,6 @@ q7_t arm_linear_interp_q7(
   arm_bilinear_interp_instance_q7 * S,
   q31_t X,
   q31_t Y);
-  /**
-   * @} end of BilinearInterpolate group
-   */
-
 
 
 #ifdef   __cplusplus
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/interpolation_functions_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/interpolation_functions_f16.h
index 46abd32..227ecb0 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/interpolation_functions_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/interpolation_functions_f16.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     interpolation_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h
index e5dce74..9bab8e6 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     matrix_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -108,6 +109,9 @@ extern "C"
  * return <code>ARM_MATH_SUCCESS</code>.
  */
 
+  #define DEFAULT_HOUSEHOLDER_THRESHOLD_F64 (1.0e-16)
+  #define DEFAULT_HOUSEHOLDER_THRESHOLD_F32 (1.0e-12f)
+
   /**
    * @brief Instance structure for the floating-point matrix structure.
    */
@@ -443,6 +447,21 @@ arm_status arm_mat_mult_q31(
   const arm_matrix_instance_q31 * pSrcB,
         arm_matrix_instance_q31 * pDst);
 
+  /**
+   * @brief Q31 matrix multiplication
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
+   * @param[in]  pState  points to the array for storing intermediate results
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+arm_status arm_mat_mult_opt_q31(
+  const arm_matrix_instance_q31 * pSrcA,
+  const arm_matrix_instance_q31 * pSrcB,
+        arm_matrix_instance_q31 * pDst,
+        q31_t *pState);
+
   /**
    * @brief Q31 matrix and vector multiplication
    * @param[in]  pSrcMat  points to the input matrix structure
@@ -734,6 +753,88 @@ void arm_mat_init_f32(
   arm_matrix_instance_f64 * d,
   uint16_t * pp);
 
+/**
+  @brief         QR decomposition of a m x n floating point matrix with m >= n.
+  @param[in]     pSrc      points to input matrix structure. The source matrix is modified by the function.
+  @param[in]     threshold norm2 threshold.
+  @param[out]    pOutR     points to output R matrix structure of dimension m x n
+  @param[out]    pOutQ     points to output Q matrix structure of dimension m x m
+  @param[out]    pOutTau   points to Householder scaling factors of dimension n
+  @param[inout]  pTmpA     points to a temporary vector of dimension m.
+  @param[inout]  pTmpB     points to a temporary vector of dimension n.
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS       : Operation successful
+                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
+                   - \ref ARM_MATH_SINGULAR      : Input matrix is found to be singular (non-invertible)
+ */
+
+arm_status arm_mat_qr_f32(
+    const arm_matrix_instance_f32 * pSrc,
+    const float32_t threshold,
+    arm_matrix_instance_f32 * pOutR,
+    arm_matrix_instance_f32 * pOutQ,
+    float32_t * pOutTau,
+    float32_t *pTmpA,
+    float32_t *pTmpB
+    );
+
+/**
+  @brief         QR decomposition of a m x n floating point matrix with m >= n.
+  @param[in]     pSrc      points to input matrix structure. The source matrix is modified by the function.
+  @param[in]     threshold norm2 threshold.  
+  @param[out]    pOutR     points to output R matrix structure of dimension m x n
+  @param[out]    pOutQ     points to output Q matrix structure of dimension m x m
+  @param[out]    pOutTau   points to Householder scaling factors of dimension n
+  @param[inout]  pTmpA     points to a temporary vector of dimension m.
+  @param[inout]  pTmpB     points to a temporary vector of dimension n.
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS       : Operation successful
+                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
+                   - \ref ARM_MATH_SINGULAR      : Input matrix is found to be singular (non-invertible)
+ */
+
+arm_status arm_mat_qr_f64(
+    const arm_matrix_instance_f64 * pSrc,
+    const float64_t threshold,
+    arm_matrix_instance_f64 * pOutR,
+    arm_matrix_instance_f64 * pOutQ,
+    float64_t * pOutTau,
+    float64_t *pTmpA,
+    float64_t *pTmpB
+    );
+
+/**
+  @brief         Householder transform of a floating point vector.
+  @param[in]     pSrc        points to the input vector.
+  @param[in]     threshold   norm2 threshold.
+  @param[in]     blockSize   dimension of the vector space.
+  @param[outQ]   pOut        points to the output vector.
+  @return        beta        return the scaling factor beta
+ */
+
+float32_t arm_householder_f32(
+    const float32_t * pSrc,
+    const float32_t threshold,
+    uint32_t    blockSize,
+    float32_t * pOut
+    );
+
+/**
+  @brief         Householder transform of a double floating point vector.
+  @param[in]     pSrc        points to the input vector.
+  @param[in]     threshold   norm2 threshold.
+  @param[in]     blockSize   dimension of the vector space.
+  @param[outQ]   pOut        points to the output vector.
+  @return        beta        return the scaling factor beta
+ */
+
+float64_t arm_householder_f64(
+    const float64_t * pSrc,
+    const float64_t threshold,
+    uint32_t    blockSize,
+    float64_t * pOut
+    );
+
 #ifdef   __cplusplus
 }
 #endif
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions_f16.h
index 0bc32b9..3f54651 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions_f16.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     matrix_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -40,6 +41,8 @@ extern "C"
     
 #if defined(ARM_FLOAT16_SUPPORTED)
 
+  #define DEFAULT_HOUSEHOLDER_THRESHOLD_F16 (1.0e-3f)
+
  /**
    * @brief Instance structure for the floating-point matrix structure.
    */
@@ -211,6 +214,46 @@ void arm_mat_init_f16(
   arm_matrix_instance_f16 * dst);
 
 
+/**
+  @brief         QR decomposition of a m x n floating point matrix with m >= n.
+  @param[in]     pSrc      points to input matrix structure. The source matrix is modified by the function.
+  @param[in]     threshold norm2 threshold.  
+  @param[out]    pOutR     points to output R matrix structure of dimension m x n
+  @param[out]    pOutQ     points to output Q matrix structure of dimension m x m
+  @param[out]    pOutTau   points to Householder scaling factors of dimension n
+  @param[inout]  pTmpA     points to a temporary vector of dimension m.
+  @param[inout]  pTmpB     points to a temporary vector of dimension n.
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS       : Operation successful
+                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
+                   - \ref ARM_MATH_SINGULAR      : Input matrix is found to be singular (non-invertible)
+ */
+
+arm_status arm_mat_qr_f16(
+    const arm_matrix_instance_f16 * pSrc,
+    const float16_t threshold,
+    arm_matrix_instance_f16 * pOutR,
+    arm_matrix_instance_f16 * pOutQ,
+    float16_t * pOutTau,
+    float16_t *pTmpA,
+    float16_t *pTmpB
+    );
+
+/**
+  @brief         Householder transform of a half floating point vector.
+  @param[in]     pSrc        points to the input vector.
+  @param[in]     threshold   norm2 threshold.  
+  @param[in]     blockSize   dimension of the vector space.
+  @param[outQ]   pOut        points to the output vector.
+  @return        beta        return the scaling factor beta
+ */
+
+float16_t arm_householder_f16(
+    const float16_t * pSrc,
+    const float16_t threshold,
+    uint32_t    blockSize,
+    float16_t * pOut
+    );
 
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h
new file mode 100644
index 0000000..5b0f55d
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h
@@ -0,0 +1,640 @@
+/******************************************************************************
+ * @file     matrix_utils.h
+ * @brief    Public header file for CMSIS DSP Library
+ * @version  V1.11.0
+ * @date     30 May 2022
+ * Target Processor: Cortex-M and Cortex-A cores
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2022 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ 
+#ifndef _MATRIX_UTILS_H_
+#define _MATRIX_UTILS_H_
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math_memory.h"
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/none.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#define ELEM(A,ROW,COL) &((A)->pData[(A)->numCols* (ROW) + (COL)])
+
+#define SCALE_COL_T(T,CAST,A,ROW,v,i)        \
+{                                       \
+  int32_t w;                            \
+  T *data = (A)->pData;                 \
+  const int32_t numCols = (A)->numCols; \
+  const int32_t nb = (A)->numRows - ROW;\
+                                        \
+  data += i + numCols * (ROW);          \
+                                        \
+  for(w=0;w < nb; w++)                  \
+  {                                     \
+     *data *= CAST v;                   \
+     data += numCols;                   \
+  }                                     \
+}
+
+#define COPY_COL_T(T,A,ROW,COL,DST)               \
+{                                                 \
+    uint32_t row;                                 \
+    T *pb=DST;                                    \
+    T *pa = (A)->pData + ROW * (A)->numCols + COL;\
+    for(row = ROW; row < (A)->numRows; row ++)    \
+    {                                             \
+         *pb++ = *pa;                             \
+         pa += (A)->numCols;                      \
+    }                                             \
+}
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#define SWAP_ROWS_F16(A,COL,i,j)                  \
+  {                                               \
+    int cnt = ((A)->numCols)-(COL);               \
+    int32_t w;                                   \
+    float16_t *data = (A)->pData;                 \
+    const int32_t numCols = (A)->numCols;        \
+                                                  \
+    for(w=(COL);w < numCols; w+=8)                \
+    {                                             \
+       f16x8_t tmpa,tmpb;                         \
+       mve_pred16_t p0 = vctp16q(cnt);            \
+                                                  \
+       tmpa=vldrhq_z_f16(&data[i*numCols + w],p0);\
+       tmpb=vldrhq_z_f16(&data[j*numCols + w],p0);\
+                                                  \
+       vstrhq_p(&data[i*numCols + w], tmpb, p0);  \
+       vstrhq_p(&data[j*numCols + w], tmpa, p0);  \
+                                                  \
+       cnt -= 8;                                  \
+    }                                             \
+  }
+
+#define SCALE_ROW_F16(A,COL,v,i)                   \
+{                                                   \
+  int cnt = ((A)->numCols)-(COL);                   \
+  int32_t w;                                       \
+  float16_t *data = (A)->pData;                     \
+  const int32_t numCols = (A)->numCols;            \
+                                                    \
+  for(w=(COL);w < numCols; w+=8)                    \
+  {                                                 \
+       f16x8_t tmpa;                                \
+       mve_pred16_t p0 = vctp16q(cnt);              \
+       tmpa = vldrhq_z_f16(&data[i*numCols + w],p0);\
+       tmpa = vmulq_n_f16(tmpa,(_Float16)v);                  \
+       vstrhq_p(&data[i*numCols + w], tmpa, p0);    \
+       cnt -= 8;                                    \
+  }                                                 \
+                                                    \
+}
+
+#define MAC_ROW_F16(COL,A,i,v,B,j)                   \
+{                                                    \
+  int cnt = ((A)->numCols)-(COL);                    \
+  int32_t w;                                        \
+  float16_t *dataA = (A)->pData;                     \
+  float16_t *dataB = (B)->pData;                     \
+  const int32_t numCols = (A)->numCols;             \
+                                                     \
+  for(w=(COL);w < numCols; w+=8)                     \
+  {                                                  \
+       f16x8_t tmpa,tmpb;                            \
+       mve_pred16_t p0 = vctp16q(cnt);               \
+       tmpa = vldrhq_z_f16(&dataA[i*numCols + w],p0);\
+       tmpb = vldrhq_z_f16(&dataB[j*numCols + w],p0);\
+       tmpa = vfmaq_n_f16(tmpa,tmpb,v);              \
+       vstrhq_p(&dataA[i*numCols + w], tmpa, p0);    \
+       cnt -= 8;                                     \
+  }                                                  \
+                                                     \
+}
+
+#define MAS_ROW_F16(COL,A,i,v,B,j)                   \
+{                                                    \
+  int cnt = ((A)->numCols)-(COL);                    \
+  int32_t w;                                        \
+  float16_t *dataA = (A)->pData;                     \
+  float16_t *dataB = (B)->pData;                     \
+  const int32_t numCols = (A)->numCols;             \
+  f16x8_t vec=vdupq_n_f16(v);                        \
+                                                     \
+  for(w=(COL);w < numCols; w+=8)                     \
+  {                                                  \
+       f16x8_t tmpa,tmpb;                            \
+       mve_pred16_t p0 = vctp16q(cnt);               \
+       tmpa = vldrhq_z_f16(&dataA[i*numCols + w],p0);\
+       tmpb = vldrhq_z_f16(&dataB[j*numCols + w],p0);\
+       tmpa = vfmsq_f16(tmpa,tmpb,vec);              \
+       vstrhq_p(&dataA[i*numCols + w], tmpa, p0);    \
+       cnt -= 8;                                     \
+  }                                                  \
+                                                     \
+}
+
+#else
+
+
+#define SWAP_ROWS_F16(A,COL,i,j)       \
+{                                      \
+  int32_t w;                           \
+  float16_t *dataI = (A)->pData;       \
+  float16_t *dataJ = (A)->pData;       \
+  const int32_t numCols = (A)->numCols;\
+  const int32_t nb = numCols-(COL);    \
+                                       \
+  dataI += i*numCols + (COL);          \
+  dataJ += j*numCols + (COL);          \
+                                       \
+  for(w=0;w < nb; w++)                 \
+  {                                    \
+     float16_t tmp;                    \
+     tmp = *dataI;                     \
+     *dataI++ = *dataJ;                \
+     *dataJ++ = tmp;                   \
+  }                                    \
+}
+
+#define SCALE_ROW_F16(A,COL,v,i)       \
+{                                      \
+  int32_t w;                           \
+  float16_t *data = (A)->pData;        \
+  const int32_t numCols = (A)->numCols;\
+  const int32_t nb = numCols-(COL);    \
+                                       \
+  data += i*numCols + (COL);           \
+                                       \
+  for(w=0;w < nb; w++)                 \
+  {                                    \
+     *data++ *= (_Float16)v;           \
+  }                                    \
+}
+
+
+#define MAC_ROW_F16(COL,A,i,v,B,j)                \
+{                                                 \
+  int32_t w;                                      \
+  float16_t *dataA = (A)->pData;                  \
+  float16_t *dataB = (B)->pData;                  \
+  const int32_t numCols = (A)->numCols;           \
+  const int32_t nb = numCols-(COL);               \
+                                                  \
+  dataA += i*numCols + (COL);                     \
+  dataB += j*numCols + (COL);                     \
+                                                  \
+  for(w=0;w < nb; w++)                            \
+  {                                               \
+     *dataA++ += (_Float16)v * (_Float16)*dataB++;\
+  }                                               \
+}
+
+#define MAS_ROW_F16(COL,A,i,v,B,j)                \
+{                                                 \
+  int32_t w;                                      \
+  float16_t *dataA = (A)->pData;                  \
+  float16_t *dataB = (B)->pData;                  \
+  const int32_t numCols = (A)->numCols;           \
+  const int32_t nb = numCols-(COL);               \
+                                                  \
+  dataA += i*numCols + (COL);                     \
+  dataB += j*numCols + (COL);                     \
+                                                  \
+  for(w=0;w < nb; w++)                            \
+  {                                               \
+     *dataA++ -= (_Float16)v * (_Float16)*dataB++;\
+  }                                               \
+}
+
+#endif /*defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)*/
+
+/* Functions with only a scalar version */
+#define COPY_COL_F16(A,ROW,COL,DST) \
+  COPY_COL_T(float16_t,A,ROW,COL,DST)
+
+#define SCALE_COL_F16(A,ROW,v,i)        \
+  SCALE_COL_T(float16_t,(_Float16),A,ROW,v,i)
+  
+#endif /* defined(ARM_FLOAT16_SUPPORTED)*/
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#define SWAP_ROWS_F32(A,COL,i,j)                  \
+  {                                               \
+    int cnt = ((A)->numCols)-(COL);               \
+    float32_t *data = (A)->pData;                 \
+    const int32_t numCols = (A)->numCols;        \
+    int32_t w;                                   \
+                                                  \
+    for(w=(COL);w < numCols; w+=4)                \
+    {                                             \
+       f32x4_t tmpa,tmpb;                         \
+       mve_pred16_t p0 = vctp32q(cnt);            \
+                                                  \
+       tmpa=vldrwq_z_f32(&data[i*numCols + w],p0);\
+       tmpb=vldrwq_z_f32(&data[j*numCols + w],p0);\
+                                                  \
+       vstrwq_p(&data[i*numCols + w], tmpb, p0);  \
+       vstrwq_p(&data[j*numCols + w], tmpa, p0);  \
+                                                  \
+       cnt -= 4;                                  \
+    }                                             \
+  }
+
+#define MAC_ROW_F32(COL,A,i,v,B,j)                   \
+{                                                    \
+  int cnt = ((A)->numCols)-(COL);                    \
+  float32_t *dataA = (A)->pData;                     \
+  float32_t *dataB = (B)->pData;                     \
+  const int32_t numCols = (A)->numCols;             \
+  int32_t w;                                        \
+                                                     \
+  for(w=(COL);w < numCols; w+=4)                     \
+  {                                                  \
+       f32x4_t tmpa,tmpb;                            \
+       mve_pred16_t p0 = vctp32q(cnt);               \
+       tmpa = vldrwq_z_f32(&dataA[i*numCols + w],p0);\
+       tmpb = vldrwq_z_f32(&dataB[j*numCols + w],p0);\
+       tmpa = vfmaq_n_f32(tmpa,tmpb,v);              \
+       vstrwq_p(&dataA[i*numCols + w], tmpa, p0);    \
+       cnt -= 4;                                     \
+  }                                                  \
+                                                     \
+}
+
+#define MAS_ROW_F32(COL,A,i,v,B,j)                   \
+{                                                    \
+  int cnt = ((A)->numCols)-(COL);                    \
+  float32_t *dataA = (A)->pData;                     \
+  float32_t *dataB = (B)->pData;                     \
+  const int32_t numCols = (A)->numCols;             \
+  int32_t w;                                        \
+  f32x4_t vec=vdupq_n_f32(v);                        \
+                                                     \
+  for(w=(COL);w < numCols; w+=4)                     \
+  {                                                  \
+       f32x4_t tmpa,tmpb;                            \
+       mve_pred16_t p0 = vctp32q(cnt);               \
+       tmpa = vldrwq_z_f32(&dataA[i*numCols + w],p0);\
+       tmpb = vldrwq_z_f32(&dataB[j*numCols + w],p0);\
+       tmpa = vfmsq_f32(tmpa,tmpb,vec);              \
+       vstrwq_p(&dataA[i*numCols + w], tmpa, p0);    \
+       cnt -= 4;                                     \
+  }                                                  \
+                                                     \
+}
+
+#define SCALE_ROW_F32(A,COL,v,i)                    \
+{                                                   \
+  int cnt = ((A)->numCols)-(COL);                   \
+  float32_t *data = (A)->pData;                     \
+  const int32_t numCols = (A)->numCols;            \
+  int32_t w;                                       \
+                                                    \
+  for(w=(COL);w < numCols; w+=4)                    \
+  {                                                 \
+       f32x4_t tmpa;                                \
+       mve_pred16_t p0 = vctp32q(cnt);              \
+       tmpa = vldrwq_z_f32(&data[i*numCols + w],p0);\
+       tmpa = vmulq_n_f32(tmpa,v);                  \
+       vstrwq_p(&data[i*numCols + w], tmpa, p0);    \
+       cnt -= 4;                                    \
+  }                                                 \
+                                                    \
+}
+
+#elif defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#define SWAP_ROWS_F32(A,COL,i,j)       \
+{                                      \
+  int32_t w;                           \
+  float32_t *dataI = (A)->pData;       \
+  float32_t *dataJ = (A)->pData;       \
+  const int32_t numCols = (A)->numCols;\
+  const int32_t nb = numCols - COL;    \
+                                       \
+  dataI += i*numCols + (COL);          \
+  dataJ += j*numCols + (COL);          \
+                                       \
+  float32_t tmp;                       \
+                                       \
+  for(w=0;w < nb; w++)                 \
+  {                                    \
+     tmp = *dataI;                     \
+     *dataI++ = *dataJ;                \
+     *dataJ++ = tmp;                   \
+  }                                    \
+}
+
+#define MAC_ROW_F32(COL,A,i,v,B,j)     \
+{                                      \
+  float32_t *dataA = (A)->pData;       \
+  float32_t *dataB = (B)->pData;       \
+  const int32_t numCols = (A)->numCols;\
+  const int32_t nb = numCols - (COL);  \
+  int32_t nbElems;                     \
+  f32x4_t vec = vdupq_n_f32(v);        \
+                                       \
+  nbElems = nb >> 2;                   \
+                                       \
+  dataA += i*numCols + (COL);          \
+  dataB += j*numCols + (COL);          \
+                                       \
+  while(nbElems>0)                     \
+  {                                    \
+       f32x4_t tmpa,tmpb;              \
+       tmpa = vld1q_f32(dataA,p0);     \
+       tmpb = vld1q_f32(dataB,p0);     \
+       tmpa = vmlaq_f32(tmpa,tmpb,vec);\
+       vst1q_f32(dataA, tmpa, p0);     \
+       nbElems--;                      \
+       dataA += 4;                     \
+       dataB += 4;                     \
+  }                                    \
+                                       \
+  nbElems = nb & 3;                    \
+  while(nbElems > 0)                   \
+  {                                    \
+     *dataA++ += v* *dataB++;          \
+     nbElems--;                        \
+  }                                    \
+}
+
+#define MAS_ROW_F32(COL,A,i,v,B,j)     \
+{                                      \
+  float32_t *dataA = (A)->pData;       \
+  float32_t *dataB = (B)->pData;       \
+  const int32_t numCols = (A)->numCols;\
+  const int32_t nb = numCols - (COL);  \
+  int32_t nbElems;                     \
+  f32x4_t vec = vdupq_n_f32(v);        \
+                                       \
+  nbElems = nb >> 2;                   \
+                                       \
+  dataA += i*numCols + (COL);          \
+  dataB += j*numCols + (COL);          \
+                                       \
+  while(nbElems>0)                     \
+  {                                    \
+       f32x4_t tmpa,tmpb;              \
+       tmpa = vld1q_f32(dataA);        \
+       tmpb = vld1q_f32(dataB);        \
+       tmpa = vmlsq_f32(tmpa,tmpb,vec);\
+       vst1q_f32(dataA, tmpa);         \
+       nbElems--;                      \
+       dataA += 4;                     \
+       dataB += 4;                     \
+  }                                    \
+                                       \
+  nbElems = nb & 3;                    \
+  while(nbElems > 0)                   \
+  {                                    \
+     *dataA++ -= v* *dataB++;          \
+     nbElems--;                        \
+  }                                    \
+}
+
+#define SCALE_ROW_F32(A,COL,v,i)        \
+{                                       \
+  float32_t *data = (A)->pData;         \
+  const int32_t numCols = (A)->numCols; \
+  const int32_t nb = numCols - (COL);   \
+  int32_t nbElems;                      \
+  f32x4_t vec = vdupq_n_f32(v);         \
+                                        \
+  nbElems = nb >> 2;                    \
+                                        \
+  data += i*numCols + (COL);            \
+  while(nbElems>0)                      \
+  {                                     \
+       f32x4_t tmpa;                    \
+       tmpa = vld1q_f32(data);          \
+       tmpa = vmulq_f32(tmpa,vec);      \
+       vst1q_f32(data, tmpa);           \
+       data += 4;                       \
+       nbElems --;                      \
+  }                                     \
+                                        \
+  nbElems = nb & 3;                     \
+  while(nbElems > 0)                    \
+  {                                     \
+     *data++ *= v;                      \
+     nbElems--;                         \
+  }                                     \
+                                        \
+}
+
+#else
+
+#define SWAP_ROWS_F32(A,COL,i,j)       \
+{                                      \
+  int32_t w;                           \
+  float32_t tmp;                       \
+  float32_t *dataI = (A)->pData;       \
+  float32_t *dataJ = (A)->pData;       \
+  const int32_t numCols = (A)->numCols;\
+  const int32_t nb = numCols - COL;    \
+                                       \
+  dataI += i*numCols + (COL);          \
+  dataJ += j*numCols + (COL);          \
+                                       \
+                                       \
+  for(w=0;w < nb; w++)                 \
+  {                                    \
+     tmp = *dataI;                     \
+     *dataI++ = *dataJ;                \
+     *dataJ++ = tmp;                   \
+  }                                    \
+}
+
+#define SCALE_ROW_F32(A,COL,v,i)       \
+{                                      \
+  int32_t w;                           \
+  float32_t *data = (A)->pData;        \
+  const int32_t numCols = (A)->numCols;\
+  const int32_t nb = numCols - COL;    \
+                                       \
+  data += i*numCols + (COL);           \
+                                       \
+  for(w=0;w < nb; w++)                 \
+  {                                    \
+     *data++ *= v;                     \
+  }                                    \
+}
+
+
+#define MAC_ROW_F32(COL,A,i,v,B,j)     \
+{                                      \
+  int32_t w;                           \
+  float32_t *dataA = (A)->pData;       \
+  float32_t *dataB = (B)->pData;       \
+  const int32_t numCols = (A)->numCols;\
+  const int32_t nb = numCols-(COL);    \
+                                       \
+  dataA = dataA + i*numCols + (COL);   \
+  dataB = dataB + j*numCols + (COL);   \
+                                       \
+  for(w=0;w < nb; w++)                 \
+  {                                    \
+     *dataA++ += v* *dataB++;          \
+  }                                    \
+}
+
+#define MAS_ROW_F32(COL,A,i,v,B,j)     \
+{                                      \
+  int32_t w;                           \
+  float32_t *dataA = (A)->pData;       \
+  float32_t *dataB = (B)->pData;       \
+  const int32_t numCols = (A)->numCols;\
+  const int32_t nb = numCols-(COL);    \
+                                       \
+  dataA = dataA + i*numCols + (COL);   \
+  dataB = dataB + j*numCols + (COL);   \
+                                       \
+  for(w=0;w < nb; w++)                 \
+  {                                    \
+     *dataA++ -= v* *dataB++;          \
+  }                                    \
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/* Functions with only a scalar version */
+
+#define COPY_COL_F32(A,ROW,COL,DST) \
+  COPY_COL_T(float32_t,A,ROW,COL,DST)
+
+#define COPY_COL_F64(A,ROW,COL,DST) \
+  COPY_COL_T(float64_t,A,ROW,COL,DST)
+
+#define SWAP_COLS_F32(A,COL,i,j)               \
+{                                              \
+  int32_t w;                                  \
+  float32_t *data = (A)->pData;                \
+  const int32_t numCols = (A)->numCols;       \
+  for(w=(COL);w < numCols; w++)                \
+  {                                            \
+     float32_t tmp;                            \
+     tmp = data[w*numCols + i];                \
+     data[w*numCols + i] = data[w*numCols + j];\
+     data[w*numCols + j] = tmp;                \
+  }                                            \
+}
+
+#define SCALE_COL_F32(A,ROW,v,i)        \
+  SCALE_COL_T(float32_t,,A,ROW,v,i)
+
+#define SWAP_ROWS_F64(A,COL,i,j)       \
+{                                      \
+  int32_t w;                           \
+  float64_t *dataI = (A)->pData;       \
+  float64_t *dataJ = (A)->pData;       \
+  const int32_t numCols = (A)->numCols;\
+  const int32_t nb = numCols-(COL);    \
+                                       \
+  dataI += i*numCols + (COL);          \
+  dataJ += j*numCols + (COL);          \
+                                       \
+  for(w=0;w < nb; w++)                 \
+  {                                    \
+     float64_t tmp;                    \
+     tmp = *dataI;                     \
+     *dataI++ = *dataJ;                \
+     *dataJ++ = tmp;                   \
+  }                                    \
+}
+
+#define SWAP_COLS_F64(A,COL,i,j)               \
+{                                              \
+  int32_t w;                                  \
+  float64_t *data = (A)->pData;                \
+  const int32_t numCols = (A)->numCols;       \
+  for(w=(COL);w < numCols; w++)                \
+  {                                            \
+     float64_t tmp;                            \
+     tmp = data[w*numCols + i];                \
+     data[w*numCols + i] = data[w*numCols + j];\
+     data[w*numCols + j] = tmp;                \
+  }                                            \
+}
+
+#define SCALE_ROW_F64(A,COL,v,i)       \
+{                                      \
+  int32_t w;                           \
+  float64_t *data = (A)->pData;        \
+  const int32_t numCols = (A)->numCols;\
+  const int32_t nb = numCols-(COL);    \
+                                       \
+  data += i*numCols + (COL);           \
+                                       \
+  for(w=0;w < nb; w++)                 \
+  {                                    \
+     *data++ *= v;                     \
+  }                                    \
+}
+
+#define SCALE_COL_F64(A,ROW,v,i)        \
+  SCALE_COL_T(float64_t,,A,ROW,v,i)
+
+#define MAC_ROW_F64(COL,A,i,v,B,j)      \
+{                                       \
+  int32_t w;                           \
+  float64_t *dataA = (A)->pData;        \
+  float64_t *dataB = (B)->pData;        \
+  const int32_t numCols = (A)->numCols;\
+  const int32_t nb = numCols-(COL);     \
+                                        \
+  dataA += i*numCols + (COL);           \
+  dataB += j*numCols + (COL);           \
+                                        \
+  for(w=0;w < nb; w++)                  \
+  {                                     \
+     *dataA++ += v* *dataB++;           \
+  }                                     \
+}
+
+#define MAS_ROW_F64(COL,A,i,v,B,j)      \
+{                                       \
+  int32_t w;                           \
+  float64_t *dataA = (A)->pData;        \
+  float64_t *dataB = (B)->pData;        \
+  const int32_t numCols = (A)->numCols;\
+  const int32_t nb = numCols-(COL);     \
+                                        \
+  dataA += i*numCols + (COL);           \
+  dataB += j*numCols + (COL);           \
+                                        \
+  for(w=0;w < nb; w++)                  \
+  {                                     \
+     *dataA++ -= v* *dataB++;           \
+  }                                     \
+}
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ifndef _MATRIX_UTILS_H_ */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/none.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/none.h
index 332d3bd..1e36a51 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/none.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/none.h
@@ -59,7 +59,7 @@ MSVC is not going to be used to cross-compile to ARM. So, having a MSVC
 compiler file in Core or Core_A would not make sense.
 
 */
-#if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__)
+#if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) || defined(__APPLE_CC__)
     __STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data)
     {
       if (data == 0U) { return 32U; }
@@ -215,6 +215,7 @@ __STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
   #define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0xFFFF0000) | \
                                       (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)  )
 
+  #define __SXTAB16_RORn(ARG1, ARG2, ARG3) __SXTAB16(ARG1, __ROR(ARG2, ARG3))
 
   /*
    * @brief C custom defined SADD16 (by Edge Impulse)
@@ -584,10 +585,11 @@ __STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
   {
     return (sum + (int32_t) (((int64_t) x * y) >> 32));
   }
-
+#if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) || defined(__APPLE_CC__)
   // Rotate right, dual extract 8-bits and sign extend each to 16-bits.
   // rotate value must be 8,16 or 24
   // Patched by Edge Impulse to polyfill x86 support
+  // Patched by Edge Impulse for IAR Workbench
   __STATIC_FORCEINLINE uint32_t __SXTB16_RORn(uint32_t val1, uint32_t rotate)
   {
     uint32_t ret;
@@ -601,7 +603,7 @@ __STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
     ret |= ((uint32_t)a16 & 0xffff);
     return ret;
   }
-
+#endif
   // Dual sign-extended 8 to 16-bit addition
   // Patched by Edge Impulse to polyfill x86 support
   __STATIC_FORCEINLINE uint32_t __SXTAB16(uint32_t val1, uint32_t val2)
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/quaternion_math_functions.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/quaternion_math_functions.h
index e7d08e9..8192cd8 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/quaternion_math_functions.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/quaternion_math_functions.h
@@ -1,6 +1,10 @@
 /******************************************************************************
  * @file     quaternion_math_functions.h
  * @brief    Public header file for CMSIS DSP Library
+ * @version  V1.10.0
+ * @date     08 July 2021
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h
index 337057a..866e467 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     statistics_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.1
+ * @date     14 July 2022
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -169,6 +170,18 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
         float32_t * pResult);
 
 
+  /**
+   * @brief  Sum of the squares of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void arm_power_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
+
+
   /**
    * @brief  Sum of the squares of the elements of a Q15 vector.
    * @param[in]  pSrc       is input pointer
@@ -241,6 +254,18 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
         float32_t * pResult);
 
 
+  /**
+   * @brief  Mean value of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void arm_mean_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
+
+
   /**
    * @brief  Variance of the elements of a floating-point vector.
    * @param[in]  pSrc       is input pointer
@@ -253,6 +278,18 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
         float32_t * pResult);
 
 
+  /**
+   * @brief  Variance of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void arm_var_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
+
+
   /**
    * @brief  Variance of the elements of a Q31 vector.
    * @param[in]  pSrc       is input pointer
@@ -325,6 +362,18 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
         float32_t * pResult);
 
 
+  /**
+   * @brief  Standard deviation of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void arm_std_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
+
+
   /**
    * @brief  Standard deviation of the elements of a Q31 vector.
    * @param[in]  pSrc       is input pointer
@@ -363,6 +412,30 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
         q7_t * result,
         uint32_t * index);
 
+  /**
+   * @brief  Minimum value of absolute values of a Q7 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] result     is output pointer
+   * @param[in]  index      is the array index of the minimum value in the input buffer.
+   */
+  void arm_absmin_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * result,
+        uint32_t * index);
+
+    /**
+   * @brief  Minimum value of absolute values of a Q7 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] result     is output pointer
+   */
+  void arm_absmin_no_idx_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * result);
+
 
   /**
    * @brief  Minimum value of a Q15 vector.
@@ -377,6 +450,30 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
         q15_t * pResult,
         uint32_t * pIndex);
 
+/**
+   * @brief  Minimum value of absolute values of a Q15 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[in]  pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void arm_absmin_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex);
+
+  /**
+   * @brief  Minimum value of absolute values of a Q15 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   */
+  void arm_absmin_no_idx_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
+
 
   /**
    * @brief  Minimum value of a Q31 vector.
@@ -391,6 +488,30 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
         q31_t * pResult,
         uint32_t * pIndex);
 
+  /**
+   * @brief  Minimum value of absolute values of a Q31 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void arm_absmin_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex);
+
+ /**
+   * @brief  Minimum value of absolute values of a Q31 vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   */
+  void arm_absmin_no_idx_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
+
 
   /**
    * @brief  Minimum value of a floating-point vector.
@@ -405,6 +526,68 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
         float32_t * pResult,
         uint32_t * pIndex);
 
+  /**
+   * @brief  Minimum value of absolute values of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void arm_absmin_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex);
+
+  /**
+   * @brief  Minimum value of absolute values of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   */
+  void arm_absmin_no_idx_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
+
+
+  /**
+   * @brief  Minimum value of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void arm_min_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex);
+
+  /**
+   * @brief  Minimum value of absolute values of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void arm_absmin_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex);
+
+  /**
+   * @brief  Minimum value of absolute values of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   */
+  void arm_absmin_no_idx_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
+
 
 /**
  * @brief Maximum value of a Q7 vector.
@@ -419,6 +602,30 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
         q7_t * pResult,
         uint32_t * pIndex);
 
+/**
+ * @brief Maximum value of absolute values of a Q7 vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void arm_absmax_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult,
+        uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of absolute values of a Q7 vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ */
+  void arm_absmax_no_idx_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult);
+
 
 /**
  * @brief Maximum value of a Q15 vector.
@@ -433,6 +640,29 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
         q15_t * pResult,
         uint32_t * pIndex);
 
+/**
+ * @brief Maximum value of absolute values of a Q15 vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void arm_absmax_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex);
+
+  /**
+ * @brief Maximum value of absolute values of a Q15 vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ */
+  void arm_absmax_no_idx_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
 
 /**
  * @brief Maximum value of a Q31 vector.
@@ -447,6 +677,29 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
         q31_t * pResult,
         uint32_t * pIndex);
 
+/**
+ * @brief Maximum value of absolute values of a Q31 vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void arm_absmax_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex);
+
+ /**
+ * @brief Maximum value of absolute values of a Q31 vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ */
+  void arm_absmax_no_idx_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
 
 /**
  * @brief Maximum value of a floating-point vector.
@@ -461,6 +714,67 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
         float32_t * pResult,
         uint32_t * pIndex);
 
+/**
+ * @brief Maximum value of absolute values of a floating-point vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void arm_absmax_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex);
+
+ /**
+ * @brief Maximum value of absolute values of a floating-point vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ */
+  void arm_absmax_no_idx_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
+
+/**
+ * @brief Maximum value of a floating-point vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void arm_max_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of absolute values of a floating-point vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void arm_absmax_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of absolute values of a floating-point vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ */
+  void arm_absmax_no_idx_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
+
   /**
     @brief         Maximum value of a floating-point vector.
     @param[in]     pSrc       points to the input vector
@@ -473,7 +787,213 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
       uint32_t   blockSize,
       float32_t *pResult);
 
+  /**
+    @brief         Minimum value of a floating-point vector.
+    @param[in]     pSrc       points to the input vector
+    @param[in]     blockSize  number of samples in input vector
+    @param[out]    pResult    minimum value returned here
+    @return        none
+   */
+  void arm_min_no_idx_f32(
+      const float32_t *pSrc,
+      uint32_t   blockSize,
+      float32_t *pResult);
+
+  /**
+    @brief         Maximum value of a floating-point vector.
+    @param[in]     pSrc       points to the input vector
+    @param[in]     blockSize  number of samples in input vector
+    @param[out]    pResult    maximum value returned here
+    @return        none
+   */
+  void arm_max_no_idx_f64(
+      const float64_t *pSrc,
+      uint32_t   blockSize,
+      float64_t *pResult);
+
+  /**
+    @brief         Maximum value of a q31 vector.
+    @param[in]     pSrc       points to the input vector
+    @param[in]     blockSize  number of samples in input vector
+    @param[out]    pResult    maximum value returned here
+    @return        none
+   */
+  void arm_max_no_idx_q31(
+      const q31_t *pSrc,
+      uint32_t   blockSize,
+      q31_t *pResult);
+
+  /**
+    @brief         Maximum value of a q15 vector.
+    @param[in]     pSrc       points to the input vector
+    @param[in]     blockSize  number of samples in input vector
+    @param[out]    pResult    maximum value returned here
+    @return        none
+   */
+  void arm_max_no_idx_q15(
+      const q15_t *pSrc,
+      uint32_t   blockSize,
+      q15_t *pResult);
+
+  /**
+    @brief         Maximum value of a q7 vector.
+    @param[in]     pSrc       points to the input vector
+    @param[in]     blockSize  number of samples in input vector
+    @param[out]    pResult    maximum value returned here
+    @return        none
+   */
+  void arm_max_no_idx_q7(
+      const q7_t *pSrc,
+      uint32_t   blockSize,
+      q7_t *pResult);
+
+  /**
+    @brief         Minimum value of a floating-point vector.
+    @param[in]     pSrc       points to the input vector
+    @param[in]     blockSize  number of samples in input vector
+    @param[out]    pResult    minimum value returned here
+    @return        none
+   */
+  void arm_min_no_idx_f64(
+      const float64_t *pSrc,
+      uint32_t   blockSize,
+      float64_t *pResult);
+
+/**
+    @brief         Minimum value of a q31 vector.
+    @param[in]     pSrc       points to the input vector
+    @param[in]     blockSize  number of samples in input vector
+    @param[out]    pResult    minimum value returned here
+    @return        none
+   */
+  void arm_min_no_idx_q31(
+      const q31_t *pSrc,
+      uint32_t   blockSize,
+      q31_t *pResult);
+
+  /**
+    @brief         Minimum value of a q15 vector.
+    @param[in]     pSrc       points to the input vector
+    @param[in]     blockSize  number of samples in input vector
+    @param[out]    pResult    minimum value returned here
+    @return        none
+   */
+  void arm_min_no_idx_q15(
+      const q15_t *pSrc,
+      uint32_t   blockSize,
+      q15_t *pResult);
+
+  /**
+    @brief         Minimum value of a q7 vector.
+    @param[in]     pSrc       points to the input vector
+    @param[in]     blockSize  number of samples in input vector
+    @param[out]    pResult    minimum value returned here
+    @return        none
+   */
+  void arm_min_no_idx_q7(
+      const q7_t *pSrc,
+      uint32_t   blockSize,
+      q7_t *pResult);
+
+/**
+  @brief         Mean square error between two Q7 vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    mean square error
+  @return        none 
+*/
+  
+void arm_mse_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        uint32_t blockSize,
+        q7_t * pResult);
+
+/**
+  @brief         Mean square error between two Q15 vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    mean square error
+  @return        none 
+*/
+  
+void arm_mse_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        uint32_t blockSize,
+        q15_t * pResult);
+
+/**
+  @brief         Mean square error between two Q31 vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    mean square error
+  @return        none 
+*/
+  
+void arm_mse_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        uint32_t blockSize,
+        q31_t * pResult);
+
+/**
+  @brief         Mean square error between two single precision float vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    mean square error
+  @return        none 
+*/
+  
+void arm_mse_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        uint32_t blockSize,
+        float32_t * pResult);
+
+/**
+  @brief         Mean square error between two double precision float vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    mean square error
+  @return        none 
+*/
+  
+void arm_mse_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        uint32_t blockSize,
+        float64_t * pResult);
+
+
+/**
+ * @brief  Accumulation value of a floating-point vector.
+ * @param[in]  pSrc       is input pointer
+ * @param[in]  blockSize  is the number of samples to process
+ * @param[out] pResult    is output value.
+ */
+
+void arm_accumulate_f32(
+const float32_t * pSrc,
+      uint32_t blockSize,
+      float32_t * pResult);
+
+/**
+ * @brief  Accumulation value of a floating-point vector.
+ * @param[in]  pSrc       is input pointer
+ * @param[in]  blockSize  is the number of samples to process
+ * @param[out] pResult    is output value.
+ */
 
+void arm_accumulate_f64(
+const float64_t * pSrc,
+      uint32_t blockSize,
+      float64_t * pResult);
 
 
 #ifdef   __cplusplus
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions_f16.h
index 055040f..a3db3ee 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions_f16.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     statistics_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.1
+ * @date     14 July 2022
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -110,6 +111,19 @@ extern "C"
         float16_t * pResult,
         uint32_t * pIndex);
 
+ /**
+   * @brief  Minimum value of absolute values of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void arm_absmin_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex);
+
 /**
  * @brief Maximum value of a floating-point vector.
  * @param[in]  pSrc       points to the input buffer
@@ -123,6 +137,42 @@ extern "C"
         float16_t * pResult,
         uint32_t * pIndex);
 
+/**
+ * @brief Maximum value of absolute values of a floating-point vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void arm_absmax_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex);
+
+    /**
+   * @brief  Minimum value of absolute values of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   */
+  void arm_absmin_no_idx_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
+
+/**
+ * @brief Maximum value of a floating-point vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ */
+  void arm_absmax_no_idx_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
+
+
 /**
  * @brief Entropy
  *
@@ -181,6 +231,44 @@ float16_t arm_kullback_leibler_f16(const float16_t * pSrcA
       uint32_t   blockSize,
       float16_t *pResult);
 
+/**
+    @brief         Minimum value of a floating-point vector.
+    @param[in]     pSrc       points to the input vector
+    @param[in]     blockSize  number of samples in input vector
+    @param[out]    pResult    minimum value returned here
+    @return        none
+   */
+  void arm_min_no_idx_f16(
+      const float16_t *pSrc,
+      uint32_t   blockSize,
+      float16_t *pResult);
+
+/**
+  @brief         Mean square error between two half precision float vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    mean square error
+  @return        none 
+*/
+  
+void arm_mse_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        uint32_t blockSize,
+        float16_t * pResult);
+
+
+/**
+  * @brief  Sum value of a floating-point vector.
+  * @param[in]  pSrc       is input pointer
+  * @param[in]  blockSize  is the number of samples to process
+  * @param[out] pResult    is output value.
+  */
+ void arm_accumulate_f16(
+ const float16_t * pSrc,
+       uint32_t blockSize,
+       float16_t * pResult);
 
 
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/support_functions.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/support_functions.h
index 3a2e333..7b586e3 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/support_functions.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/support_functions.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     support_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -294,6 +295,20 @@ extern "C"
         float32_t * pDst,
         uint32_t blockSize);
 
+ 
+ 
+  /**
+   * @brief  Copies the elements of a floating-point vector.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void arm_copy_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize);
+
+
 
   /**
    * @brief  Copies the elements of a Q7 vector.
@@ -343,6 +358,18 @@ extern "C"
         uint32_t blockSize);
 
 
+  /**
+   * @brief  Fills a constant value into a floating-point vector.
+   * @param[in]  value      input value to be filled
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void arm_fill_f64(
+        float64_t value,
+        float64_t * pDst,
+        uint32_t blockSize);
+
+
   /**
    * @brief  Fills a constant value into a Q7 vector.
    * @param[in]  value      input value to be filled
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/support_functions_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/support_functions_f16.h
index 6858f82..f36d06f 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/support_functions_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/support_functions_f16.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     support_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -120,6 +121,64 @@ void arm_barycenter_f16(const float16_t *in
   , uint32_t nbVectors
   , uint32_t vecDim);
 
+
+/**
+  @ingroup groupSupport
+ */
+
+/**
+ * @defgroup typecast Typecasting
+ */
+
+/**
+  @addtogroup typecast
+  @{
+ */
+
+/**
+   * @brief  Interpret a f16 as an s16 value
+   * @param[in] x  input value.
+   * @return  return value.
+   * 
+   * @par    Description
+   *            It is a typecast. No conversion of the float to int is done.
+   *            The memcpy will be optimized out by the compiler.
+   *            memcpy is used to prevent type punning issues.
+   *            With gcc, -fno-builtins MUST not be used or the
+   *            memcpy will not be optimized out.
+   */
+__STATIC_INLINE int16_t arm_typecast_s16_f16(float16_t x)
+{
+   int16_t res;
+   res=*(int16_t*)memcpy((char*)&res,(char*)&x,sizeof(float16_t));
+   return(res);
+}
+
+/**
+   * @brief  Interpret an s16 as an f16 value
+   * @param[in] x  input value.
+   * @return  return value.
+   * 
+   * @par    Description
+   *            It is a typecast. No conversion of the int to float is done.
+   *            The memcpy will be optimized out by the compiler.
+   *            memcpy is used to prevent type punning issues.
+   *            With gcc, -fno-builtins MUST not be used or the
+   *            memcpy will not be optimized out.
+   */
+__STATIC_INLINE float16_t arm_typecast_f16_s16(int16_t x)
+{
+   float16_t res;
+   res=*(float16_t*)memcpy((char*)&res,(char*)&x,sizeof(int16_t));
+   return(res);
+}
+
+
+/**
+  @} end of typecast group
+ */
+
+
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/svm_defines.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/svm_defines.h
index 71ad2f7..f93e953 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/svm_defines.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/svm_defines.h
@@ -1,6 +1,10 @@
 /******************************************************************************
  * @file     svm_defines.h
  * @brief    Public header file for CMSIS DSP Library
+ * @version  V1.10.0
+ * @date     08 July 2021
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/svm_functions.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/svm_functions.h
index 3e1038c..6576c93 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/svm_functions.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/svm_functions.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     svm_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -132,7 +133,7 @@ typedef struct
   const float32_t *dualCoefficients;      /**< Dual coefficients */
   const float32_t *supportVectors;        /**< Support vectors */
   const int32_t   *classes;               /**< The two SVM classes */
-  float32_t       coef0;                  /**< Independant constant */
+  float32_t       coef0;                  /**< Independent constant */
   float32_t       gamma;                  /**< Gamma factor */
 } arm_svm_sigmoid_instance_f32;
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/svm_functions_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/svm_functions_f16.h
index 9d28c74..67c97aa 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/svm_functions_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/svm_functions_f16.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     svm_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -33,6 +34,7 @@
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/utils.h"
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/svm_defines.h"
 
+
 #ifdef   __cplusplus
 extern "C"
 {
@@ -56,24 +58,6 @@ extern "C"
  * 
  */
 
-/**
- * @brief Integer exponentiation
- * @param[in]    x           value
- * @param[in]    nb          integer exponent >= 1
- * @return x^nb
- *
- */
-__STATIC_INLINE float16_t arm_exponent_f16(float16_t x, int32_t nb)
-{
-    float16_t r = x;
-    nb --;
-    while(nb > 0)
-    {
-        r = r * x;
-        nb--;
-    }
-    return(r);
-}
 
 
 /**
@@ -131,7 +115,7 @@ typedef struct
   const float16_t *dualCoefficients;      /**< Dual coefficients */
   const float16_t *supportVectors;        /**< Support vectors */
   const int32_t   *classes;               /**< The two SVM classes */
-  float16_t       coef0;                  /**< Independant constant */
+  float16_t       coef0;                  /**< Independent constant */
   float16_t       gamma;                  /**< Gamma factor */
 } arm_svm_sigmoid_instance_f16;
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions.h
index f64f5a4..2722620 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     transform_functions.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -598,6 +599,149 @@ typedef struct
         q15_t * pState,
         q15_t * pInlineBuffer);
 
+  /**
+   * @brief Instance structure for the Floating-point MFCC function.
+   */
+typedef struct
+  {
+     const float32_t *dctCoefs; /**< Internal DCT coefficients */
+     const float32_t *filterCoefs; /**< Internal Mel filter coefficients */ 
+     const float32_t *windowCoefs; /**< Windowing coefficients */ 
+     const uint32_t *filterPos; /**< Internal Mel filter positions in spectrum */ 
+     const uint32_t *filterLengths; /**< Internal Mel filter  lengths */ 
+     uint32_t fftLen; /**< FFT length */
+     uint32_t nbMelFilters; /**< Number of Mel filters */
+     uint32_t nbDctOutputs; /**< Number of DCT outputs */
+#if defined(ARM_MFCC_CFFT_BASED)
+     /* Implementation of the MFCC is using a CFFT */
+     arm_cfft_instance_f32 cfft; /**< Internal CFFT instance */
+#else
+     /* Implementation of the MFCC is using a RFFT (default) */
+     arm_rfft_fast_instance_f32 rfft;
+#endif
+  } arm_mfcc_instance_f32 ;
+
+arm_status arm_mfcc_init_f32(
+  arm_mfcc_instance_f32 * S,
+  uint32_t fftLen,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float32_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float32_t *filterCoefs,
+  const float32_t *windowCoefs
+  );
+
+
+/**
+  @brief         MFCC F32
+  @param[in]    S       points to the mfcc instance structure
+  @param[in]     pSrc points to the input samples
+  @param[out]     pDst  points to the output MFCC values
+  @param[inout]     pTmp  points to a temporary buffer of complex
+  @return        none
+ */
+  void arm_mfcc_f32(
+  const arm_mfcc_instance_f32 * S,
+  float32_t *pSrc,
+  float32_t *pDst,
+  float32_t *pTmp
+  );
+
+typedef struct
+  {
+     const q31_t *dctCoefs; /**< Internal DCT coefficients */
+     const q31_t *filterCoefs; /**< Internal Mel filter coefficients */ 
+     const q31_t *windowCoefs; /**< Windowing coefficients */ 
+     const uint32_t *filterPos; /**< Internal Mel filter positions in spectrum */ 
+     const uint32_t *filterLengths; /**< Internal Mel filter  lengths */ 
+     uint32_t fftLen; /**< FFT length */
+     uint32_t nbMelFilters; /**< Number of Mel filters */
+     uint32_t nbDctOutputs; /**< Number of DCT outputs */
+#if defined(ARM_MFCC_CFFT_BASED)
+     /* Implementation of the MFCC is using a CFFT */
+     arm_cfft_instance_q31 cfft; /**< Internal CFFT instance */
+#else
+     /* Implementation of the MFCC is using a RFFT (default) */
+     arm_rfft_instance_q31 rfft;
+#endif
+  } arm_mfcc_instance_q31 ;
+
+arm_status arm_mfcc_init_q31(
+  arm_mfcc_instance_q31 * S,
+  uint32_t fftLen,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q31_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q31_t *filterCoefs,
+  const q31_t *windowCoefs
+  );
+
+
+/**
+  @brief         MFCC Q31
+  @param[in]    S       points to the mfcc instance structure
+  @param[in]     pSrc points to the input samples
+  @param[out]     pDst  points to the output MFCC values
+  @param[inout]     pTmp  points to a temporary buffer of complex
+  @return        none
+ */
+  arm_status arm_mfcc_q31(
+  const arm_mfcc_instance_q31 * S,
+  q31_t *pSrc,
+  q31_t *pDst,
+  q31_t *pTmp
+  );
+
+typedef struct
+  {
+     const q15_t *dctCoefs; /**< Internal DCT coefficients */
+     const q15_t *filterCoefs; /**< Internal Mel filter coefficients */ 
+     const q15_t *windowCoefs; /**< Windowing coefficients */ 
+     const uint32_t *filterPos; /**< Internal Mel filter positions in spectrum */ 
+     const uint32_t *filterLengths; /**< Internal Mel filter  lengths */ 
+     uint32_t fftLen; /**< FFT length */
+     uint32_t nbMelFilters; /**< Number of Mel filters */
+     uint32_t nbDctOutputs; /**< Number of DCT outputs */
+#if defined(ARM_MFCC_CFFT_BASED)
+     /* Implementation of the MFCC is using a CFFT */
+     arm_cfft_instance_q15 cfft; /**< Internal CFFT instance */
+#else
+     /* Implementation of the MFCC is using a RFFT (default) */
+     arm_rfft_instance_q15 rfft;
+#endif
+  } arm_mfcc_instance_q15 ;
+
+arm_status arm_mfcc_init_q15(
+  arm_mfcc_instance_q15 * S,
+  uint32_t fftLen,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q15_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q15_t *filterCoefs,
+  const q15_t *windowCoefs
+  );
+
+
+/**
+  @brief         MFCC Q15
+  @param[in]    S       points to the mfcc instance structure
+  @param[in]     pSrc points to the input samples
+  @param[out]     pDst  points to the output MFCC values in q8.7 format
+  @param[inout]     pTmp  points to a temporary buffer of complex
+  @return        error status
+ */
+  arm_status arm_mfcc_q15(
+  const arm_mfcc_instance_q15 * S,
+  q15_t *pSrc,
+  q15_t *pDst,
+  q31_t *pTmp
+  );
 
 
 #ifdef   __cplusplus
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions_f16.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions_f16.h
index cb2419a..b38a587 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions_f16.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions_f16.h
@@ -1,8 +1,9 @@
 /******************************************************************************
  * @file     transform_functions_f16.h
  * @brief    Public header file for CMSIS DSP Library
- * @version  V1.9.0
- * @date     20. July 2020
+ * @version  V1.10.0
+ * @date     08 July 2021
+ * Target Processor: Cortex-M and Cortex-A cores
  ******************************************************************************/
 /*
  * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
@@ -146,6 +147,57 @@ arm_status arm_rfft_fast_init_f16 (
   void arm_cfft_radix2_f16(
   const arm_cfft_radix2_instance_f16 * S,
         float16_t * pSrc);
+
+  /**
+   * @brief Instance structure for the Floating-point MFCC function.
+   */
+typedef struct
+  {
+     const float16_t *dctCoefs; /**< Internal DCT coefficients */
+     const float16_t *filterCoefs; /**< Internal Mel filter coefficients */ 
+     const float16_t *windowCoefs; /**< Windowing coefficients */ 
+     const uint32_t *filterPos; /**< Internal Mel filter positions in spectrum */ 
+     const uint32_t *filterLengths; /**< Internal Mel filter  lengths */ 
+     uint32_t fftLen; /**< FFT length */
+     uint32_t nbMelFilters; /**< Number of Mel filters */
+     uint32_t nbDctOutputs; /**< Number of DCT outputs */
+#if defined(ARM_MFCC_CFFT_BASED)
+     /* Implementation of the MFCC is using a CFFT */
+     arm_cfft_instance_f16 cfft; /**< Internal CFFT instance */
+#else
+     /* Implementation of the MFCC is using a RFFT (default) */
+     arm_rfft_fast_instance_f16 rfft;
+#endif
+  } arm_mfcc_instance_f16 ;
+
+arm_status arm_mfcc_init_f16(
+  arm_mfcc_instance_f16 * S,
+  uint32_t fftLen,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float16_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float16_t *filterCoefs,
+  const float16_t *windowCoefs
+  );
+
+
+/**
+  @brief         MFCC F16
+  @param[in]    S       points to the mfcc instance structure
+  @param[in]     pSrc points to the input samples
+  @param[out]     pDst  points to the output MFCC values
+  @param[inout]     pTmp  points to a temporary buffer of complex
+  @return        none
+ */
+  void arm_mfcc_f16(
+  const arm_mfcc_instance_f16 * S,
+  float16_t *pSrc,
+  float16_t *pDst,
+  float16_t *pTmp
+  );
+
   
 #endif /* defined(ARM_FLOAT16_SUPPORTED)*/
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/utils.h b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/utils.h
index e83da70..e6e24df 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Include/dsp/utils.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Include/dsp/utils.h
@@ -27,6 +27,7 @@
 #define _ARM_MATH_UTILS_H_
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types.h"
+#include <limits.h>
 
 #ifdef   __cplusplus
 extern "C"
@@ -47,6 +48,7 @@ extern "C"
 
   /**
    * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type.
+     It should not be used with negative values.
    */
   __STATIC_FORCEINLINE uint32_t arm_recip_q31(
         q31_t in,
@@ -60,11 +62,11 @@ extern "C"
 
     if (in > 0)
     {
-      signBits = ((uint32_t) (__CLZ( in) - 1));
+      signBits = ((uint32_t) (__CLZ( (uint32_t)in) - 1));
     }
     else
     {
-      signBits = ((uint32_t) (__CLZ(-in) - 1));
+      signBits = ((uint32_t) (__CLZ((uint32_t)(-in)) - 1));
     }
 
     /* Convert input sample to 1.31 format */
@@ -98,6 +100,7 @@ extern "C"
 
   /**
    * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type.
+     It should not be used with negative values.
    */
   __STATIC_FORCEINLINE uint32_t arm_recip_q15(
         q15_t in,
@@ -105,21 +108,21 @@ extern "C"
   const q15_t * pRecipTable)
   {
     q15_t out = 0;
-    uint32_t tempVal = 0;
+    int32_t tempVal = 0;
     uint32_t index = 0, i = 0;
     uint32_t signBits = 0;
 
     if (in > 0)
     {
-      signBits = ((uint32_t)(__CLZ( in) - 17));
+      signBits = ((uint32_t)(__CLZ( (uint32_t)in) - 17));
     }
     else
     {
-      signBits = ((uint32_t)(__CLZ(-in) - 17));
+      signBits = ((uint32_t)(__CLZ((uint32_t)(-in)) - 17));
     }
 
     /* Convert input sample to 1.15 format */
-    in = (in << signBits);
+    in = (q15_t)(in << signBits);
 
     /* calculation of index for initial approximated Val */
     index = (uint32_t)(in >>  8);
@@ -132,8 +135,8 @@ extern "C"
     /* running approximation for two iterations */
     for (i = 0U; i < 2U; i++)
     {
-      tempVal = (uint32_t) (((q31_t) in * out) >> 15);
-      tempVal = 0x7FFFu - tempVal;
+      tempVal = (((q31_t) in * out) >> 15);
+      tempVal = 0x7FFF - tempVal;
       /*      1.15 with exp 1 */
       out = (q15_t) (((q31_t) out * tempVal) >> 14);
       /* out = clip_q31_to_q15(((q31_t) out * tempVal) >> 14); */
@@ -159,13 +162,13 @@ __STATIC_INLINE  void arm_norm_64_to_32u(uint64_t in, int32_t * normalized, int3
     int32_t     hi = (int32_t) (in >> 32);
     int32_t     lo = (int32_t) ((in << 32) >> 32);
 
-    n1 = __CLZ(hi) - 32;
+    n1 = __CLZ((uint32_t)hi) - 32;
     if (!n1)
     {
         /*
          * input fits in 32-bit
          */
-        n1 = __CLZ(lo);
+        n1 = __CLZ((uint32_t)lo);
         if (!n1)
         {
             /*
@@ -201,13 +204,13 @@ __STATIC_INLINE  void arm_norm_64_to_32u(uint64_t in, int32_t * normalized, int3
         /*
          * 64 bit normalization
          */
-        *normalized = (((uint32_t) lo) >> n1) | (hi << (32 - n1));
+        *normalized = (int32_t)(((uint32_t)lo) >> n1) | (hi << (32 - n1));
     }
 }
 
-__STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
+__STATIC_INLINE int32_t arm_div_int64_to_int32(int64_t num, int32_t den)
 {
-    q31_t   result;
+    int32_t   result;
     uint64_t   absNum;
     int32_t   normalized;
     int32_t   norm;
@@ -216,18 +219,25 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
      * if sum fits in 32bits
      * avoid costly 64-bit division
      */
-    absNum = num > 0 ? num : -num;
+    if (num == (int64_t)LONG_MIN)
+    {
+        absNum = LONG_MAX;
+    }
+    else
+    {
+       absNum = (uint64_t) (num > 0 ? num : -num);
+    }
     arm_norm_64_to_32u(absNum, &normalized, &norm);
     if (norm > 0)
         /*
          * 32-bit division
          */
-        result = (q31_t) num / den;
+        result = (int32_t) num / den;
     else
         /*
          * 64-bit division
          */
-        result = (q31_t) (num / den);
+        result = (int32_t) (num / den);
 
     return result;
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f16.c
index 7df97b9..e974c82 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_abs_f16.c
  * Description:  Floating-point vector absolute value
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -33,19 +35,6 @@
   @ingroup groupMath
  */
 
-/**
-  @defgroup BasicAbs Vector Absolute Value
-
-  Computes the absolute value of a vector on an element-by-element basis.
-
-  <pre>
-      pDst[n] = abs(pSrc[n]),   0 <= n < blockSize.
-  </pre>
-
-  The functions support in-place computation allowing the source and
-  destination pointers to reference the same memory buffer.
-  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
- */
 
 /**
   @addtogroup BasicAbs
@@ -156,13 +145,13 @@ void arm_abs_f16(
     /* C = |A| */
 
     /* Calculate absolute and store result in destination buffer. */
-    *pDst++ = fabsf(*pSrc++);
+    *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
 
-    *pDst++ = fabsf(*pSrc++);
+    *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
 
-    *pDst++ = fabsf(*pSrc++);
+    *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
 
-    *pDst++ = fabsf(*pSrc++);
+    *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
 
     /* Decrement loop counter */
     blkCnt--;
@@ -184,7 +173,7 @@ void arm_abs_f16(
     /* C = |A| */
 
     /* Calculate absolute and store result in destination buffer. */
-    *pDst++ = fabsf(*pSrc++);
+    *pDst++ = (_Float16)fabsf((float32_t)*pSrc++);
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f32.c
index fde9ea5..3d27210 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_abs_f32.c
  * Description:  Floating-point vector absolute value
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f64.c
new file mode 100644
index 0000000..a0bd5f0
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_f64.c
@@ -0,0 +1,78 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_abs_f64.c
+ * Description:  Floating-point vector absolute value
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+#include <math.h>
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicAbs
+  @{
+ */
+
+/**
+  @brief         Floating-point vector absolute value.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void arm_abs_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = |A| */
+
+    /* Calculate absolute and store result in destination buffer. */
+    *pDst++ = fabs(*pSrc++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicAbs group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_q15.c
index cce4f60..7c8ec53 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_abs_q15.c
  * Description:  Q15 vector absolute value
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_q31.c
index 368e23e..fab95f2 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_abs_q31.c
  * Description:  Q31 vector absolute value
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_q7.c
index 8915683..f62d67a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_abs_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_abs_q7.c
  * Description:  Q7 vector absolute value
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f16.c
index 8f825c7..d9d6226 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_add_f16.c
  * Description:  Floating-point vector addition
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -32,17 +34,6 @@
   @ingroup groupMath
  */
 
-/**
-  @defgroup BasicAdd Vector Addition
-
-  Element-by-element addition of two vectors.
-
-  <pre>
-      pDst[n] = pSrcA[n] + pSrcB[n],   0 <= n < blockSize.
-  </pre>
-
-  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
- */
 
 /**
   @addtogroup BasicAdd
@@ -130,10 +121,10 @@ void arm_add_f16(
     /* C = A + B */
 
     /* Add and store result in destination buffer. */
-    *pDst++ = (*pSrcA++) + (*pSrcB++);
-    *pDst++ = (*pSrcA++) + (*pSrcB++);
-    *pDst++ = (*pSrcA++) + (*pSrcB++);
-    *pDst++ = (*pSrcA++) + (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) + (_Float16)(*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) + (_Float16)(*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) + (_Float16)(*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) + (_Float16)(*pSrcB++);
 
     /* Decrement loop counter */
     blkCnt--;
@@ -154,7 +145,7 @@ void arm_add_f16(
     /* C = A + B */
 
     /* Add and store result in destination buffer. */
-    *pDst++ = (*pSrcA++) + (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) + (_Float16)(*pSrcB++);
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f32.c
index 2a56f0a..4e854f5 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_add_f32.c
  * Description:  Floating-point vector addition
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f64.c
new file mode 100644
index 0000000..a1f01a7
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_f64.c
@@ -0,0 +1,79 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_add_f64.c
+ * Description:  Floating-point vector addition
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicAdd
+  @{
+ */
+
+/**
+  @brief         Floating-point vector addition.
+  @param[in]     pSrcA      points to first input vector
+  @param[in]     pSrcB      points to second input vector
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void arm_add_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A + B */
+
+    /* Add and store result in destination buffer. */
+    *pDst++ = (*pSrcA++) + (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicAdd group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_q15.c
index a1b6d84..6265058 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_add_q15.c
  * Description:  Q15 vector addition
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -126,11 +126,11 @@ void arm_add_q15(
 
 #if defined (ARM_MATH_DSP)
     /* read 2 times 2 samples at a time from sourceA */
-    inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
-    inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
+    inA1 = read_q15x2_ia (&pSrcA);
+    inA2 = read_q15x2_ia (&pSrcA);
     /* read 2 times 2 samples at a time from sourceB */
-    inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
-    inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
+    inB1 = read_q15x2_ia (&pSrcB);
+    inB2 = read_q15x2_ia (&pSrcB);
 
     /* Add and store 2 times 2 samples at a time */
     write_q15x2_ia (&pDst, __QADD16(inA1, inB1));
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_q31.c
index fe85869..2d6e791 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_add_q31.c
  * Description:  Q31 vector addition
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_q7.c
index 488b45d..46446c7 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_add_q7.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/dsp/config.hpp"
 #if EIDSP_LOAD_CMSIS_DSP_SOURCES
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,10 +23,10 @@
  * Title:        arm_add_q7.c
  * Description:  Q7 vector addition
  *
- * $Date:        May 29, 2020
- * $Revision:    V1.6.1
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
@@ -121,7 +121,7 @@ void arm_add_q7(
 
 #if defined (ARM_MATH_DSP)
     /* Add and store result in destination buffer (4 samples at a time). */
-    write_q7x4_ia (&pDst, __QADD8 (read_q7x4_ia ((q7_t **) &pSrcA), read_q7x4_ia ((q7_t **) &pSrcB)));
+    write_q7x4_ia (&pDst, __QADD8 (read_q7x4_ia (&pSrcA), read_q7x4_ia (&pSrcB)));
 #else
     *pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
     *pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_and_u16.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_and_u16.c
index fb90af6..82aabc8 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_and_u16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_and_u16.c
@@ -5,13 +5,13 @@
  * Title:        arm_and_u16.c
  * Description:  uint16_t bitwise AND
  *
- * $Date:        14 November 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_and_u32.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_and_u32.c
index 73b8087..0c4b090 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_and_u32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_and_u32.c
@@ -5,13 +5,13 @@
  * Title:        arm_and_u32.c
  * Description:  uint32_t bitwise AND
  *
- * $Date:        14 November 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_and_u8.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_and_u8.c
index f68e992..52ac33e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_and_u8.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_and_u8.c
@@ -5,13 +5,13 @@
  * Title:        arm_and_u8.c
  * Description:  uint8_t bitwise AND
  *
- * $Date:        14 November 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f16.c
index 38bae53..bc4e732 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f16.c
@@ -5,8 +5,10 @@
  * Title:        arm_clip_f16.c
  * Description:  Floating-point vector addition
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
@@ -123,9 +125,9 @@ void arm_clip_f16(const float16_t * pSrc,
 {
     for (uint32_t i = 0; i < numSamples; i++)
     {                                        
-        if (pSrc[i] > high)                  
+        if ((_Float16)pSrc[i] > (_Float16)high)                  
             pDst[i] = high;                  
-        else if (pSrc[i] < low)              
+        else if ((_Float16)pSrc[i] < (_Float16)low)              
             pDst[i] = low;                   
         else                                 
             pDst[i] = pSrc[i];               
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f32.c
index b25896a..b2b1374 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_f32.c
@@ -5,8 +5,10 @@
  * Title:        arm_clip_f32.c
  * Description:  Floating-point vector addition
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
@@ -126,7 +128,8 @@ void arm_clip_f32(const float32_t * pSrc,
   float32_t high, 
   uint32_t numSamples)
 {
-    for (uint32_t i = 0; i < numSamples; i++)
+    uint32_t i;
+    for (i = 0; i < numSamples; i++)
     {                                        
         if (pSrc[i] > high)                  
             pDst[i] = high;                  
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q15.c
index 1ba2cfc..287109a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q15.c
@@ -5,8 +5,10 @@
  * Title:        arm_clip_q15.c
  * Description:  Floating-point vector addition
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
@@ -116,7 +118,8 @@ void arm_clip_q15(const q15_t * pSrc,
   q15_t high, 
   uint32_t numSamples)
 {
-    for (uint32_t i = 0; i < numSamples; i++)
+    uint32_t i;
+    for (i = 0; i < numSamples; i++)
     {                                        
         if (pSrc[i] > high)                  
             pDst[i] = high;                  
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q31.c
index 70d6d59..a82d2df 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q31.c
@@ -5,8 +5,10 @@
  * Title:        arm_clip_q31.c
  * Description:  Floating-point vector addition
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
@@ -116,7 +118,8 @@ void arm_clip_q31(const q31_t * pSrc,
   q31_t high, 
   uint32_t numSamples)
 {
-    for (uint32_t i = 0; i < numSamples; i++)
+    uint32_t i;
+    for (i = 0; i < numSamples; i++)
     {                                        
         if (pSrc[i] > high)                  
             pDst[i] = high;                  
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q7.c
index 006a7dc..f28678c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_clip_q7.c
@@ -5,8 +5,10 @@
  * Title:        arm_clip_q7.c
  * Description:  Floating-point vector addition
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
@@ -116,7 +118,8 @@ void arm_clip_q7(const q7_t * pSrc,
   q7_t high, 
   uint32_t numSamples)
 {
-    for (uint32_t i = 0; i < numSamples; i++)
+    uint32_t i;
+    for (i = 0; i < numSamples; i++)
     {                                        
         if (pSrc[i] > high)                  
             pDst[i] = high;                  
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f16.c
index 11cbf9e..71ea70d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_dot_prod_f16.c
  * Description:  Floating-point dot product
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -34,18 +34,6 @@
   @ingroup groupMath
  */
 
-/**
-  @defgroup BasicDotProd Vector Dot Product
-
-  Computes the dot product of two vectors.
-  The vectors are multiplied element-by-element and then summed.
-
-  <pre>
-      sum = pSrcA[0]*pSrcB[0] + pSrcA[1]*pSrcB[1] + ... + pSrcA[blockSize-1]*pSrcB[blockSize-1]
-  </pre>
-
-  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
- */
 
 /**
   @addtogroup BasicDotProd
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f32.c
index cd3b4f0..6f5e421 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_dot_prod_f32.c
  * Description:  Floating-point dot product
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        05 October 2021
+ * $Revision:    V1.9.1
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -134,7 +134,9 @@ void arm_dot_prod_f32(
     f32x4_t vec1;
     f32x4_t vec2;
     f32x4_t accum = vdupq_n_f32(0);   
-    f32x2_t tmp = vdup_n_f32(0);    
+#if !defined(__aarch64__)
+    f32x2_t tmp = vdup_n_f32(0); 
+#endif   
 
     /* Compute 4 outputs at a time */
     blkCnt = blockSize >> 2U;
@@ -160,7 +162,7 @@ void arm_dot_prod_f32(
         blkCnt--;
     }
     
-#if __aarch64__
+#if defined(__aarch64__)
     sum = vpadds_f32(vpadd_f32(vget_low_f32(accum), vget_high_f32(accum)));
 #else
     tmp = vpadd_f32(vget_low_f32(accum), vget_high_f32(accum));
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f64.c
new file mode 100644
index 0000000..821931f
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_f64.c
@@ -0,0 +1,82 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_dot_prod_f64.c
+ * Description:  Floating-point dot product
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicDotProd
+  @{
+ */
+
+/**
+  @brief         Dot product of floating-point vectors.
+  @param[in]     pSrcA      points to the first input vector.
+  @param[in]     pSrcB      points to the second input vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @param[out]    result     output result returned here.
+  @return        none
+ */
+
+void arm_dot_prod_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        uint32_t blockSize,
+        float64_t * result)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  float64_t sum = 0.;                            /* Temporary return variable */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+
+    /* Calculate dot product and store result in a temporary buffer. */
+    sum += (*pSrcA++) * (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in destination buffer */
+  *result = sum;
+}
+
+/**
+  @} end of BasicDotProd group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_q15.c
index be944f3..a8faebc 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_dot_prod_q15.c
  * Description:  Q15 dot product
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -126,8 +126,8 @@ void arm_dot_prod_q15(
 
 #if defined (ARM_MATH_DSP)
     /* Calculate dot product and store result in a temporary buffer. */
-    sum = __SMLALD(read_q15x2_ia ((q15_t **) &pSrcA), read_q15x2_ia ((q15_t **) &pSrcB), sum);
-    sum = __SMLALD(read_q15x2_ia ((q15_t **) &pSrcA), read_q15x2_ia ((q15_t **) &pSrcB), sum);
+    sum = __SMLALD(read_q15x2_ia (&pSrcA), read_q15x2_ia (&pSrcB), sum);
+    sum = __SMLALD(read_q15x2_ia (&pSrcA), read_q15x2_ia (&pSrcB), sum);
 #else
     sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
     sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_q31.c
index ee2d26d..bced7e8 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_dot_prod_q31.c
  * Description:  Q31 dot product
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_q7.c
index d17f129..594bd01 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_dot_prod_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_dot_prod_q7.c
  * Description:  Q7 dot product
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -131,9 +131,9 @@ void arm_dot_prod_q7(
 
 #if defined (ARM_MATH_DSP)
     /* read 4 samples at a time from sourceA */
-    input1 = read_q7x4_ia ((q7_t **) &pSrcA);
+    input1 = read_q7x4_ia (&pSrcA);
     /* read 4 samples at a time from sourceB */
-    input2 = read_q7x4_ia ((q7_t **) &pSrcB);
+    input2 = read_q7x4_ia (&pSrcB);
 
     /* extract two q7_t samples to q15_t samples */
     inA1 = __SXTB16(__ROR(input1, 8));
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f16.c
index 9fc66e4..0b5994b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_mult_f16.c
  * Description:  Floating-point vector multiplication
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -32,17 +34,6 @@
   @ingroup groupMath
  */
 
-/**
-  @defgroup BasicMult Vector Multiplication
-
-  Element-by-element multiplication of two vectors.
-
-  <pre>
-      pDst[n] = pSrcA[n] * pSrcB[n],   0 <= n < blockSize.
-  </pre>
-
-  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
- */
 
 /**
   @addtogroup BasicMult
@@ -129,13 +120,13 @@ void arm_mult_f16(
     /* C = A * B */
 
     /* Multiply inputs and store result in destination buffer. */
-    *pDst++ = (*pSrcA++) * (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
 
-    *pDst++ = (*pSrcA++) * (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
 
-    *pDst++ = (*pSrcA++) * (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
 
-    *pDst++ = (*pSrcA++) * (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
 
     /* Decrement loop counter */
     blkCnt--;
@@ -156,7 +147,7 @@ void arm_mult_f16(
     /* C = A * B */
 
     /* Multiply input and store result in destination buffer. */
-    *pDst++ = (*pSrcA++) * (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) * (_Float16)(*pSrcB++);
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f32.c
index 6441b3b..0744ac5 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_mult_f32.c
  * Description:  Floating-point vector multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f64.c
new file mode 100644
index 0000000..9b914aa
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_f64.c
@@ -0,0 +1,79 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mult_f64.c
+ * Description:  Floating-point vector multiplication
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicMult
+  @{
+ */
+
+/**
+  @brief         Floating-point vector multiplication.
+  @param[in]     pSrcA      points to the first input vector.
+  @param[in]     pSrcB      points to the second input vector.
+  @param[out]    pDst       points to the output vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @return        none
+ */
+
+void arm_mult_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A * B */
+
+    /* Multiply input and store result in destination buffer. */
+    *pDst++ = (*pSrcA++) * (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicMult group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_q15.c
index 079059b..d6ec9ec 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_mult_q15.c
  * Description:  Q15 vector multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -125,13 +125,13 @@ void arm_mult_q15(
 
 #if defined (ARM_MATH_DSP)
     /* read 2 samples at a time from sourceA */
-    inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
+    inA1 = read_q15x2_ia (&pSrcA);
     /* read 2 samples at a time from sourceB */
-    inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
+    inB1 = read_q15x2_ia (&pSrcB);
     /* read 2 samples at a time from sourceA */
-    inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
+    inA2 = read_q15x2_ia (&pSrcA);
     /* read 2 samples at a time from sourceB */
-    inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
+    inB2 = read_q15x2_ia (&pSrcB);
 
     /* multiply mul = sourceA * sourceB */
     mul1 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_q31.c
index 9598133..60c103c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_mult_q31.c
  * Description:  Q31 vector multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_q7.c
index ce3f4a7..fd0bc3b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_mult_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_mult_q7.c
  * Description:  Q7 vector multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f16.c
index 36e2a88..c4d6ca0 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_negate_f16.c
  * Description:  Negates floating-point vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -32,19 +34,6 @@
   @ingroup groupMath
  */
 
-/**
-  @defgroup BasicNegate Vector Negate
-
-  Negates the elements of a vector.
-
-  <pre>
-      pDst[n] = -pSrc[n],   0 <= n < blockSize.
-  </pre>
-
-  The functions support in-place computation allowing the source and
-  destination pointers to reference the same memory buffer.
-  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
- */
 
 /**
   @addtogroup BasicNegate
@@ -124,13 +113,13 @@ void arm_negate_f16(
     /* C = -A */
 
     /* Negate and store result in destination buffer. */
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -151,7 +140,7 @@ void arm_negate_f16(
     /* C = -A */
 
     /* Negate and store result in destination buffer. */
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f32.c
index 3eec34e..e4df7ad 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_negate_f32.c
  * Description:  Negates floating-point vectors
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f64.c
new file mode 100644
index 0000000..870a767
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_f64.c
@@ -0,0 +1,77 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_negate_f64.c
+ * Description:  Negates floating-point vectors
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicNegate
+  @{
+ */
+
+/**
+  @brief         Negates the elements of a floating-point vector.
+  @param[in]     pSrc       points to input vector.
+  @param[out]    pDst       points to output vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @return        none
+ */
+
+void arm_negate_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = -A */
+
+    /* Negate and store result in destination buffer. */
+    *pDst++ = -*pSrc++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicNegate group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_q15.c
index 72d964b..c642c24 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_negate_q15.c
  * Description:  Negates Q15 vectors
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -120,10 +120,10 @@ void arm_negate_q15(
 
 #if defined (ARM_MATH_DSP)
     /* Negate and store result in destination buffer (2 samples at a time). */
-    in1 = read_q15x2_ia ((q15_t **) &pSrc);
+    in1 = read_q15x2_ia (&pSrc);
     write_q15x2_ia (&pDst, __QSUB16(0, in1));
 
-    in1 = read_q15x2_ia ((q15_t **) &pSrc);
+    in1 = read_q15x2_ia (&pSrc);
     write_q15x2_ia (&pDst, __QSUB16(0, in1));
 #else
     in = *pSrc++;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_q31.c
index 539f890..e0048e7 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_negate_q31.c
  * Description:  Negates Q31 vectors
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_q7.c
index 181896f..3d3cae1 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_negate_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_negate_q7.c
  * Description:  Negates Q7 vectors
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -118,7 +118,7 @@ void arm_negate_q7(
 
 #if defined (ARM_MATH_DSP)
     /* Negate and store result in destination buffer (4 samples at a time). */
-    in1 = read_q7x4_ia ((q7_t **) &pSrc);
+    in1 = read_q7x4_ia (&pSrc);
     write_q7x4_ia (&pDst, __QSUB8(0, in1));
 #else
     in = *pSrc++;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_not_u16.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_not_u16.c
index e583e49..5e58873 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_not_u16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_not_u16.c
@@ -5,13 +5,13 @@
  * Title:        arm_not_u16.c
  * Description:  uint16_t bitwise NOT
  *
- * $Date:        14 November 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_not_u32.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_not_u32.c
index ce702dd..634800a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_not_u32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_not_u32.c
@@ -5,13 +5,13 @@
  * Title:        arm_not_u32.c
  * Description:  uint32_t bitwise NOT
  *
- * $Date:        14 November 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_not_u8.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_not_u8.c
index 87a417d..b83fb0f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_not_u8.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_not_u8.c
@@ -5,13 +5,13 @@
  * Title:        arm_not_u8.c
  * Description:  uint8_t bitwise NOT
  *
- * $Date:        14 November 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f16.c
index a8a9bd9..4bb665c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_offset_f16.c
  * Description:  Floating-point vector offset
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -32,20 +34,6 @@
   @ingroup groupMath
  */
 
-/**
-  @defgroup BasicOffset Vector Offset
-
-  Adds a constant offset to each element of a vector.
-
-  <pre>
-      pDst[n] = pSrc[n] + offset,   0 <= n < blockSize.
-  </pre>
-
-  The functions support in-place computation allowing the source and
-  destination pointers to reference the same memory buffer.
-  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
- */
-
 /**
   @addtogroup BasicOffset
   @{
@@ -128,13 +116,13 @@ void arm_offset_f16(
     /* C = A + offset */
 
     /* Add offset and store result in destination buffer. */
-    *pDst++ = (*pSrc++) + offset;
+    *pDst++ = (_Float16)(*pSrc++) + (_Float16)offset;
 
-    *pDst++ = (*pSrc++) + offset;
+    *pDst++ = (_Float16)(*pSrc++) + (_Float16)offset;
 
-    *pDst++ = (*pSrc++) + offset;
+    *pDst++ = (_Float16)(*pSrc++) + (_Float16)offset;
 
-    *pDst++ = (*pSrc++) + offset;
+    *pDst++ = (_Float16)(*pSrc++) + (_Float16)offset;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -155,7 +143,7 @@ void arm_offset_f16(
     /* C = A + offset */
 
     /* Add offset and store result in destination buffer. */
-    *pDst++ = (*pSrc++) + offset;
+    *pDst++ = (_Float16)(*pSrc++) + (_Float16)offset;
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f32.c
index c32e7e4..3033def 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_offset_f32.c
  * Description:  Floating-point vector offset
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f64.c
new file mode 100644
index 0000000..36b9007
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_f64.c
@@ -0,0 +1,79 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_offset_f64.c
+ * Description:  Floating-point vector offset
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicOffset
+  @{
+ */
+
+/**
+  @brief         Adds a constant offset to a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     offset     is the offset to be added
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void arm_offset_f64(
+  const float64_t * pSrc,
+        float64_t offset,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A + offset */
+
+    /* Add offset and store result in destination buffer. */
+    *pDst++ = (*pSrc++) + offset;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicOffset group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_q15.c
index ecf0829..9423730 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_offset_q15.c
  * Description:  Q15 vector offset
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -124,8 +124,8 @@ void arm_offset_q15(
 
 #if defined (ARM_MATH_DSP)
     /* Add offset and store result in destination buffer (2 samples at a time). */
-    write_q15x2_ia (&pDst, __QADD16(read_q15x2_ia ((q15_t **) &pSrc), offset_packed));
-    write_q15x2_ia (&pDst, __QADD16(read_q15x2_ia ((q15_t **) &pSrc), offset_packed));
+    write_q15x2_ia (&pDst, __QADD16(read_q15x2_ia (&pSrc), offset_packed));
+    write_q15x2_ia (&pDst, __QADD16(read_q15x2_ia (&pSrc), offset_packed));
 #else
     *pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
     *pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_q31.c
index b6ecb9c..b0a1c99 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_offset_q31.c
  * Description:  Q31 vector offset
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_q7.c
index 452cdb2..dacfe48 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_offset_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_offset_q7.c
  * Description:  Q7 vector offset
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -123,7 +123,7 @@ void arm_offset_q7(
 
 #if defined (ARM_MATH_DSP)
     /* Add offset and store result in destination buffer (4 samples at a time). */
-    write_q7x4_ia (&pDst, __QADD8(read_q7x4_ia ((q7_t **) &pSrc), offset_packed));
+    write_q7x4_ia (&pDst, __QADD8(read_q7x4_ia (&pSrc), offset_packed));
 #else
     *pDst++ = (q7_t) __SSAT((q15_t) *pSrc++ + offset, 8);
     *pDst++ = (q7_t) __SSAT((q15_t) *pSrc++ + offset, 8);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_or_u16.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_or_u16.c
index c7d12ba..2de542a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_or_u16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_or_u16.c
@@ -5,13 +5,13 @@
  * Title:        arm_or_u16.c
  * Description:  uint16_t bitwise inclusive OR
  *
- * $Date:        14 November 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_or_u32.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_or_u32.c
index 655b925..6e285dc 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_or_u32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_or_u32.c
@@ -5,13 +5,13 @@
  * Title:        arm_or_u32.c
  * Description:  uint32_t bitwise inclusive OR
  *
- * $Date:        14 November 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_or_u8.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_or_u8.c
index 3eb9058..b9014a3 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_or_u8.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_or_u8.c
@@ -5,13 +5,13 @@
  * Title:        arm_or_u8.c
  * Description:  uint8_t bitwise inclusive OR
  *
- * $Date:        14 November 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f16.c
index 240881a..ecd4180 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_scale_f16.c
  * Description:  Multiplies a floating-point vector by a scalar
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -34,32 +34,7 @@
   @ingroup groupMath
  */
 
-/**
-  @defgroup BasicScale Vector Scale
-
-  Multiply a vector by a scalar value.  For floating-point data, the algorithm used is:
-
-  <pre>
-      pDst[n] = pSrc[n] * scale,   0 <= n < blockSize.
-  </pre>
-
-  In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by
-  a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
-  The shift allows the gain of the scaling operation to exceed 1.0.
-  The algorithm used with fixed-point data is:
 
-  <pre>
-      pDst[n] = (pSrc[n] * scaleFract) << shift,   0 <= n < blockSize.
-  </pre>
-
-  The overall scale factor applied to the fixed-point data is
-  <pre>
-      scale = scaleFract * 2^shift.
-  </pre>
-
-  The functions support in-place computation allowing the source and destination
-  pointers to reference the same memory buffer.
- */
 
 /**
   @addtogroup BasicScale
@@ -143,13 +118,13 @@ void arm_scale_f16(
     /* C = A * scale */
 
     /* Scale input and store result in destination buffer. */
-    *pDst++ = (*pSrc++) * scale;
+    *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
 
-    *pDst++ = (*pSrc++) * scale;
+    *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
 
-    *pDst++ = (*pSrc++) * scale;
+    *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
 
-    *pDst++ = (*pSrc++) * scale;
+    *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -170,7 +145,7 @@ void arm_scale_f16(
     /* C = A * scale */
 
     /* Scale input and store result in destination buffer. */
-    *pDst++ = (*pSrc++) * scale;
+    *pDst++ = (_Float16)(*pSrc++) * (_Float16)scale;
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f32.c
index 342b656..c5c5479 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_scale_f32.c
  * Description:  Multiplies a floating-point vector by a scalar
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f64.c
new file mode 100644
index 0000000..747f06b
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_f64.c
@@ -0,0 +1,79 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_scale_f64.c
+ * Description:  Multiplies a floating-point vector by a scalar
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicScale
+  @{
+ */
+
+/**
+  @brief         Multiplies a floating-point vector by a scalar.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     scale      scale factor to be applied
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void arm_scale_f64(
+  const float64_t *pSrc,
+        float64_t scale,
+        float64_t *pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A * scale */
+
+    /* Scale input and store result in destination buffer. */
+    *pDst++ = (*pSrc++) * scale;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicScale group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_q15.c
index 5a53708..3443de5 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_scale_q15.c
  * Description:  Multiplies a Q15 vector by a scalar
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -138,8 +138,8 @@ void arm_scale_q15(
 
 #if defined (ARM_MATH_DSP)
     /* read 2 times 2 samples at a time from source */
-    inA1 = read_q15x2_ia ((q15_t **) &pSrc);
-    inA2 = read_q15x2_ia ((q15_t **) &pSrc);
+    inA1 = read_q15x2_ia (&pSrc);
+    inA2 = read_q15x2_ia (&pSrc);
 
     /* Scale inputs and store result in temporary variables
      * in single cycle by packing the outputs */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_q31.c
index 6d0f7c7..271278b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_scale_q31.c
  * Description:  Multiplies a Q31 vector by a scalar
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_q7.c
index 5847500..f4383ee 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_scale_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_scale_q7.c
  * Description:  Multiplies a Q7 vector by a scalar
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q15.c
index ce677b3..3579dad 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_shift_q15.c
  * Description:  Shifts the elements of a Q15 vector by a specified number of bits
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q31.c
index 53f01cd..c2fc8fb 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_shift_q31.c
  * Description:  Shifts the elements of a Q31 vector by a specified number of bits
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q7.c
index eedb7eb..87ef339 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_shift_q7.c
  * Description:  Processing function for the Q7 Shifting
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f16.c
index 0cba4bf..571fe5f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_sub_f16.c
  * Description:  Floating-point vector subtraction
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -34,17 +34,6 @@
   @ingroup groupMath
  */
 
-/**
-  @defgroup BasicSub Vector Subtraction
-
-  Element-by-element subtraction of two vectors.
-
-  <pre>
-      pDst[n] = pSrcA[n] - pSrcB[n],   0 <= n < blockSize.
-  </pre>
-
-  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
- */
 
 /**
   @addtogroup BasicSub
@@ -131,13 +120,13 @@ void arm_sub_f16(
     /* C = A - B */
 
     /* Subtract and store result in destination buffer. */
-    *pDst++ = (*pSrcA++) - (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) - (_Float16)(*pSrcB++);
 
-    *pDst++ = (*pSrcA++) - (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) - (_Float16)(*pSrcB++);
 
-    *pDst++ = (*pSrcA++) - (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) - (_Float16)(*pSrcB++);
 
-    *pDst++ = (*pSrcA++) - (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) - (_Float16)(*pSrcB++);
 
     /* Decrement loop counter */
     blkCnt--;
@@ -158,7 +147,7 @@ void arm_sub_f16(
     /* C = A - B */
 
     /* Subtract and store result in destination buffer. */
-    *pDst++ = (*pSrcA++) - (*pSrcB++);
+    *pDst++ = (_Float16)(*pSrcA++) - (_Float16)(*pSrcB++);
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f32.c
index 5dbd231..476d7ee 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_sub_f32.c
  * Description:  Floating-point vector subtraction
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f64.c
new file mode 100644
index 0000000..a956a14
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_f64.c
@@ -0,0 +1,79 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_sub_f64.c
+ * Description:  Floating-point vector subtraction
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @addtogroup BasicSub
+  @{
+ */
+
+/**
+  @brief         Floating-point vector subtraction.
+  @param[in]     pSrcA      points to the first input vector
+  @param[in]     pSrcB      points to the second input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void arm_sub_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A - B */
+
+    /* Subtract and store result in destination buffer. */
+    *pDst++ = (*pSrcA++) - (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicSub group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_q15.c
index f5a2d2c..0892988 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_sub_q15.c
  * Description:  Q15 vector subtraction
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -127,11 +127,11 @@ void arm_sub_q15(
 
 #if defined (ARM_MATH_DSP)
     /* read 2 times 2 samples at a time from sourceA */
-    inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
-    inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
+    inA1 = read_q15x2_ia (&pSrcA);
+    inA2 = read_q15x2_ia (&pSrcA);
     /* read 2 times 2 samples at a time from sourceB */
-    inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
-    inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
+    inB1 = read_q15x2_ia (&pSrcB);
+    inB2 = read_q15x2_ia (&pSrcB);
 
     /* Subtract and store 2 times 2 samples at a time */
     write_q15x2_ia (&pDst, __QSUB16(inA1, inB1));
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_q31.c
index 79c291e..8aaae08 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_sub_q31.c
  * Description:  Q31 vector subtraction
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_q7.c
index e2c1ecb..c2aea32 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_sub_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_sub_q7.c
  * Description:  Q7 vector subtraction
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -119,7 +119,7 @@ void arm_sub_q7(
 
 #if defined (ARM_MATH_DSP)
     /* Subtract and store result in destination buffer (4 samples at a time). */
-    write_q7x4_ia (&pDst, __QSUB8(read_q7x4_ia ((q7_t **) &pSrcA), read_q7x4_ia ((q7_t **) &pSrcB)));
+    write_q7x4_ia (&pDst, __QSUB8(read_q7x4_ia (&pSrcA), read_q7x4_ia (&pSrcB)));
 #else
     *pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
     *pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_xor_u16.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_xor_u16.c
index 002edf5..def6516 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_xor_u16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_xor_u16.c
@@ -5,13 +5,13 @@
  * Title:        arm_xor_u16.c
  * Description:  uint16_t bitwise exclusive OR
  *
- * $Date:        14 November 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_xor_u32.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_xor_u32.c
index f835b1f..74c3d1c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_xor_u32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_xor_u32.c
@@ -5,13 +5,13 @@
  * Title:        arm_xor_u32.c
  * Description:  uint32_t bitwise exclusive OR
  *
- * $Date:        14 November 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_xor_u8.c b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_xor_u8.c
index 6d57e3e..c1c8615 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_xor_u8.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BasicMathFunctions/arm_xor_u8.c
@@ -5,13 +5,13 @@
  * Title:        arm_xor_u8.c
  * Description:  uint8_t bitwise exclusive OR
  *
- * $Date:        14 November 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c
index 1db5cfe..bfe2e28 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_naive_gaussian_bayes_predict_f16
  * Description:  Naive Gaussian Bayesian Estimator
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -33,7 +35,6 @@
 #include <limits.h>
 #include <math.h>
 
-#define PI_F 3.1415926535897932384626433832795f16
 
 /**
  * @addtogroup groupBayes
@@ -43,13 +44,12 @@
 /**
  * @brief Naive Gaussian Bayesian Estimator
  *
- * @param[in]  *S         points to a naive bayes instance structure
- * @param[in]  *in        points to the elements of the input vector.
- * @param[in]  *pBuffer   points to a buffer of length numberOfClasses
+ * @param[in]  *S                       points to a naive bayes instance structure
+ * @param[in]  *in                      points to the elements of the input vector.
+ * @param[out] *pOutputProbabilities    points to a buffer of length numberOfClasses containing estimated probabilities
+ * @param[out] *pBufferB                points to a temporary buffer of length numberOfClasses
  * @return The predicted class
  *
- * @par If the number of classes is big, MVE version will consume lot of
- * stack since the log prior are computed on the stack.
  *
  */
 
@@ -60,19 +60,21 @@
 
 uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 *S, 
    const float16_t * in, 
-   float16_t *pBuffer)
+   float16_t *pOutputProbabilities,
+   float16_t *pBufferB
+   )
 {
     uint32_t         nbClass;
     const float16_t *pTheta = S->theta;
     const float16_t *pSigma = S->sigma;
-    float16_t      *buffer = pBuffer;
+    float16_t      *buffer = pOutputProbabilities;
     const float16_t *pIn = in;
     float16_t       result;
     f16x8_t         vsigma;
     _Float16       tmp;
     f16x8_t         vacc1, vacc2;
     uint32_t        index;
-    float16_t       logclassPriors[S->numberOfClasses];
+    float16_t       *logclassPriors=pBufferB;
     float16_t      *pLogPrior = logclassPriors;
 
     arm_vlog_f16((float16_t *) S->classPriors, logclassPriors, S->numberOfClasses);
@@ -131,42 +133,35 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins
         tmp = -0.5f16 * (_Float16)vecAddAcrossF16Mve(vacc1);
         tmp -= 0.5f16 * (_Float16)vecAddAcrossF16Mve(vacc2);
 
-        *buffer = tmp + *pLogPrior++;
+        *buffer = (_Float16)tmp + (_Float16)*pLogPrior++;
         buffer++;
     }
 
-    arm_max_f16(pBuffer, S->numberOfClasses, &result, &index);
+    arm_max_f16(pOutputProbabilities, S->numberOfClasses, &result, &index);
 
     return (index);
 }
 
 #else
 
-/**
- * @brief Naive Gaussian Bayesian Estimator
- *
- * @param[in]  *S         points to a naive bayes instance structure
- * @param[in]  *in        points to the elements of the input vector.
- * @param[in]  *pBuffer   points to a buffer of length numberOfClasses
- * @return The predicted class
- *
- */
 uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 *S, 
    const float16_t * in, 
-   float16_t *pBuffer)
+   float16_t *pOutputProbabilities,
+   float16_t *pBufferB)
 {
     uint32_t nbClass;
     uint32_t nbDim;
     const float16_t *pPrior = S->classPriors;
     const float16_t *pTheta = S->theta;
     const float16_t *pSigma = S->sigma;
-    float16_t *buffer = pBuffer;
+    float16_t *buffer = pOutputProbabilities;
     const float16_t *pIn=in;
     float16_t result;
     _Float16 sigma;
     _Float16 tmp;
     _Float16 acc1,acc2;
     uint32_t index;
+    (void)pBufferB;
 
     pTheta=S->theta;
     pSigma=S->sigma;
@@ -182,24 +177,24 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins
         acc2 = 0.0f16;
         for(nbDim = 0; nbDim < S->vectorDimension; nbDim++)
         {
-           sigma = *pSigma + S->epsilon;
-           acc1 += logf(2.0f16 * (_Float16)PI_F * sigma);
-           acc2 += (*pIn - *pTheta) * (*pIn - *pTheta) / sigma;
+           sigma = (_Float16)*pSigma + (_Float16)S->epsilon;
+           acc1 += (_Float16)logf(2.0f * PI * (float32_t)sigma);
+           acc2 += ((_Float16)*pIn - (_Float16)*pTheta) * ((_Float16)*pIn - (_Float16)*pTheta) / (_Float16)sigma;
 
            pIn++;
            pTheta++;
            pSigma++;
         }
 
-        tmp = -0.5f16 * acc1;
-        tmp -= 0.5f16 * acc2;
+        tmp = -0.5f16 * (_Float16)acc1;
+        tmp -= 0.5f16 * (_Float16)acc2;
 
 
-        *buffer = tmp + logf(*pPrior++);
+        *buffer = (_Float16)tmp + (_Float16)logf((float32_t)*pPrior++);
         buffer++;
     }
 
-    arm_max_f16(pBuffer,S->numberOfClasses,&result,&index);
+    arm_max_f16(pOutputProbabilities,S->numberOfClasses,&result,&index);
 
     return(index);
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f32.c
index 24b89e7..30d1ab9 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_naive_gaussian_bayes_predict_f32
  * Description:  Naive Gaussian Bayesian Estimator
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -41,13 +43,12 @@
 /**
  * @brief Naive Gaussian Bayesian Estimator
  *
- * @param[in]  *S         points to a naive bayes instance structure
- * @param[in]  *in        points to the elements of the input vector.
- * @param[in]  *pBuffer   points to a buffer of length numberOfClasses
+ * @param[in]   *S                      points to a naive bayes instance structure
+ * @param[in]   *in                     points to the elements of the input vector.
+ * @param[out]  *pOutputProbabilities   points to a buffer of length numberOfClasses containing estimated probabilities
+ * @param[out]  *pBufferB               points to a temporary buffer of length numberOfClasses
  * @return The predicted class
  *
- * @par If the number of classes is big, MVE version will consume lot of
- * stack since the log prior are computed on the stack.
  *
  */
 
@@ -58,19 +59,21 @@
 
 uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S, 
    const float32_t * in, 
-   float32_t *pBuffer)
+   float32_t *pOutputProbabilities,
+   float32_t *pBufferB
+   )
 {
     uint32_t         nbClass;
     const float32_t *pTheta = S->theta;
     const float32_t *pSigma = S->sigma;
-    float32_t      *buffer = pBuffer;
+    float32_t      *buffer = pOutputProbabilities;
     const float32_t *pIn = in;
     float32_t       result;
     f32x4_t         vsigma;
     float32_t       tmp;
     f32x4_t         vacc1, vacc2;
     uint32_t        index;
-    float32_t       logclassPriors[S->numberOfClasses];
+    float32_t       *logclassPriors=pBufferB;
     float32_t      *pLogPrior = logclassPriors;
 
     arm_vlog_f32((float32_t *) S->classPriors, logclassPriors, S->numberOfClasses);
@@ -133,7 +136,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
         buffer++;
     }
 
-    arm_max_f32(pBuffer, S->numberOfClasses, &result, &index);
+    arm_max_f32(pOutputProbabilities, S->numberOfClasses, &result, &index);
 
     return (index);
 }
@@ -148,7 +151,8 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
 
 uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S, 
    const float32_t * in, 
-   float32_t *pBuffer)
+   float32_t *pOutputProbabilities,
+   float32_t *pBufferB)
 {
     
     const float32_t *pPrior = S->classPriors;
@@ -159,7 +163,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
     const float32_t *pTheta1 = S->theta + S->vectorDimension;
     const float32_t *pSigma1 = S->sigma + S->vectorDimension;
 
-    float32_t *buffer = pBuffer;
+    float32_t *buffer = pOutputProbabilities;
     const float32_t *pIn=in;
 
     float32_t result;
@@ -174,6 +178,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
     float32x2_t tmpV2;
     float32x4_t thetaV,thetaV1;
     float32x4_t inV;
+    (void)pBufferB;
 
     epsilonV = vdupq_n_f32(S->epsilon);
 
@@ -322,32 +327,24 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
         classBlkCnt--;
     }
 
-    arm_max_f32(pBuffer,S->numberOfClasses,&result,&index);
+    arm_max_f32(pOutputProbabilities,S->numberOfClasses,&result,&index);
 
     return(index);
 }
 
 #else
 
-/**
- * @brief Naive Gaussian Bayesian Estimator
- *
- * @param[in]  *S         points to a naive bayes instance structure
- * @param[in]  *in        points to the elements of the input vector.
- * @param[in]  *pBuffer   points to a buffer of length numberOfClasses
- * @return The predicted class
- *
- */
 uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S, 
    const float32_t * in, 
-   float32_t *pBuffer)
+   float32_t *pOutputProbabilities,
+   float32_t *pBufferB)
 {
     uint32_t nbClass;
     uint32_t nbDim;
     const float32_t *pPrior = S->classPriors;
     const float32_t *pTheta = S->theta;
     const float32_t *pSigma = S->sigma;
-    float32_t *buffer = pBuffer;
+    float32_t *buffer = pOutputProbabilities;
     const float32_t *pIn=in;
     float32_t result;
     float32_t sigma;
@@ -355,6 +352,8 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
     float32_t acc1,acc2;
     uint32_t index;
 
+    (void)pBufferB;
+
     pTheta=S->theta;
     pSigma=S->sigma;
 
@@ -386,7 +385,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
         buffer++;
     }
 
-    arm_max_f32(pBuffer,S->numberOfClasses,&result,&index);
+    arm_max_f32(pOutputProbabilities,S->numberOfClasses,&result,&index);
 
     return(index);
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_common_tables.c b/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_common_tables.c
index c059075..28a2085 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_common_tables.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_common_tables.c
@@ -5,13 +5,13 @@
  * Title:        arm_common_tables.c
  * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -70509,8 +70509,36 @@ const q15_t sqrtTable_Q15[256] = {
      #endif
 #endif /* defined(ARM_MATH_MVEI) */
 
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SQRT_Q31)
+/*
+ClearAll[tofix];
+tofix[q_][a_] := With[{r = Round[a*2^q]},
+   If[r > (2^q - 1), 2^q - 1, r]
+   ];
+
+(* For q = format, 2^nb is length of the table *)
+With[{q = 15, nb = 4, q12quarter = 16^^2000},
+  With[{shift = Echo[q - nb]},
+   Table[tofix[q][1.0/Sqrt[1.0*i/2^q]/8.0], {i, 2^(q - 2), 
+     2^q + q12quarter - 1, 2^shift}]]
+  ] // CopyToClipboard
+
+*/
+const q31_t sqrt_initial_lut_q31[32]={536870912, 506166750, 480191942, 457845052, 438353264, 421156193, \
+405836263, 392075079, 379625062, 368290407, 357913941, 348367849, \
+339546978, 331363921, 323745341, 316629190, 309962566, 303700050, \
+297802400, 292235509, 286969573, 281978417, 277238947, 272730696, \
+268435456, 264336964, 260420644, 256673389, 253083375, 249639903, \
+246333269, 243154642};
+#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SQRT_Q31) */
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SQRT_Q15)
+const q15_t sqrt_initial_lut_q15[16]={8192, 7327, 6689, 6193, 5793, 5461, 5181, 4940, 4730, 4544, 4379, \
+4230, 4096, 3974, 3862, 3759};
+#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SQRT_Q15) */
+
 
-#endif /* if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FAST_TABLES) */
+#endif /* #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FAST_ALLOW_TABLES) */
 
 #if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
 const float32_t exp_tab[8] = {
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_common_tables_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_common_tables_f16.c
index 9541c33..d71efb8 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_common_tables_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_common_tables_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_common_tables_f16.c
  * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_const_structs.c b/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_const_structs.c
index 9dd7af5..30b810a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_const_structs.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_const_structs.c
@@ -6,13 +6,13 @@
  * Description:  Constant structs that are initialized for user convenience.
  *               For example, some can be given as arguments to the arm_cfft_f32() or arm_rfft_f32() functions.
  *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_const_structs_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_const_structs_f16.c
index 5e326cb..603e423 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_const_structs_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_const_structs_f16.c
@@ -6,13 +6,13 @@
  * Description:  Constant structs that are initialized for user convenience.
  *               For example, some can be given as arguments to the arm_cfft_f32() or arm_rfft_f32() functions.
  *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_mve_tables.c b/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_mve_tables.c
index b6c845c..ba5aa16 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_mve_tables.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_mve_tables.c
@@ -6,12 +6,13 @@
  * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
  *               used for MVE implementation only
  *
- * $Date:        14. April 2020
+ * @version  V1.10.0
+ * @date     04 October 2021
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -28,8 +29,10 @@
  * limitations under the License.
  */
 
+ #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types.h"
+
  
-#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types.h"
+
 
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 
@@ -47,19 +50,19 @@ uint32_t rearranged_twiddle_tab_stride3_arr_16_f32[2]={
 0,0,};
 
 float32_t rearranged_twiddle_stride1_16_f32[8]={
-1.00000000000000000000f,0.00000000000000000000f,0.92387953251128673848f,
-0.38268343236508978178f,0.70710678118654757274f,0.70710678118654757274f,
-0.38268343236508983729f,0.92387953251128673848f,};
+1.00000000000000000000f,0.00000000000000000000f,0.92387950420379638672f,
+0.38268342614173889160f,0.70710676908493041992f,0.70710676908493041992f,
+0.38268342614173889160f,0.92387950420379638672f,};
 
 float32_t rearranged_twiddle_stride2_16_f32[8]={
-1.00000000000000000000f,0.00000000000000000000f,0.70710678118654757274f,
-0.70710678118654757274f,0.00000000000000006123f,1.00000000000000000000f,
--0.70710678118654746172f,0.70710678118654757274f,};
+1.00000000000000000000f,0.00000000000000000000f,0.70710676908493041992f,
+0.70710676908493041992f,0.00000000000000006123f,1.00000000000000000000f,
+-0.70710676908493041992f,0.70710676908493041992f,};
 
 float32_t rearranged_twiddle_stride3_16_f32[8]={
-1.00000000000000000000f,0.00000000000000000000f,0.38268343236508983729f,
-0.92387953251128673848f,-0.70710678118654746172f,0.70710678118654757274f,
--0.92387953251128684951f,-0.38268343236508967076f,};
+1.00000000000000000000f,0.00000000000000000000f,0.38268342614173889160f,
+0.92387950420379638672f,-0.70710676908493041992f,0.70710676908493041992f,
+-0.92387950420379638672f,-0.38268342614173889160f,};
 
 #endif
 
@@ -75,52 +78,52 @@ uint32_t rearranged_twiddle_tab_stride3_arr_64_f32[3]={
 0,32,0,};
 
 float32_t rearranged_twiddle_stride1_64_f32[40]={
-1.00000000000000000000f,0.00000000000000000000f,0.99518472667219692873f,
-0.09801714032956060363f,0.98078528040323043058f,0.19509032201612824808f,
-0.95694033573220882438f,0.29028467725446233105f,0.92387953251128673848f,
-0.38268343236508978178f,0.88192126434835504956f,0.47139673682599764204f,
-0.83146961230254523567f,0.55557023301960217765f,0.77301045336273699338f,
-0.63439328416364548779f,0.70710678118654757274f,0.70710678118654757274f,
-0.63439328416364548779f,0.77301045336273688235f,0.55557023301960228867f,
-0.83146961230254523567f,0.47139673682599780857f,0.88192126434835493853f,
-0.38268343236508983729f,0.92387953251128673848f,0.29028467725446233105f,
-0.95694033573220893540f,0.19509032201612833135f,0.98078528040323043058f,
-0.09801714032956077016f,0.99518472667219681771f,1.00000000000000000000f,
-0.00000000000000000000f,0.92387953251128673848f,0.38268343236508978178f,
-0.70710678118654757274f,0.70710678118654757274f,0.38268343236508983729f,
-0.92387953251128673848f,};
+1.00000000000000000000f,0.00000000000000000000f,0.99518471956253051758f,
+0.09801714122295379639f,0.98078525066375732422f,0.19509032368659973145f,
+0.95694035291671752930f,0.29028466343879699707f,0.92387950420379638672f,
+0.38268342614173889160f,0.88192129135131835938f,0.47139674425125122070f,
+0.83146959543228149414f,0.55557024478912353516f,0.77301043272018432617f,
+0.63439327478408813477f,0.70710676908493041992f,0.70710676908493041992f,
+0.63439327478408813477f,0.77301043272018432617f,0.55557024478912353516f,
+0.83146959543228149414f,0.47139674425125122070f,0.88192129135131835938f,
+0.38268342614173889160f,0.92387950420379638672f,0.29028466343879699707f,
+0.95694035291671752930f,0.19509032368659973145f,0.98078525066375732422f,
+0.09801714122295379639f,0.99518471956253051758f,1.00000000000000000000f,
+0.00000000000000000000f,0.92387950420379638672f,0.38268342614173889160f,
+0.70710676908493041992f,0.70710676908493041992f,0.38268342614173889160f,
+0.92387950420379638672f,};
 
 float32_t rearranged_twiddle_stride2_64_f32[40]={
-1.00000000000000000000f,0.00000000000000000000f,0.98078528040323043058f,
-0.19509032201612824808f,0.92387953251128673848f,0.38268343236508978178f,
-0.83146961230254523567f,0.55557023301960217765f,0.70710678118654757274f,
-0.70710678118654757274f,0.55557023301960228867f,0.83146961230254523567f,
-0.38268343236508983729f,0.92387953251128673848f,0.19509032201612833135f,
-0.98078528040323043058f,0.00000000000000006123f,1.00000000000000000000f,
--0.19509032201612819257f,0.98078528040323043058f,-0.38268343236508972627f,
-0.92387953251128673848f,-0.55557023301960195560f,0.83146961230254534669f,
--0.70710678118654746172f,0.70710678118654757274f,-0.83146961230254534669f,
-0.55557023301960217765f,-0.92387953251128673848f,0.38268343236508989280f,
--0.98078528040323043058f,0.19509032201612860891f,1.00000000000000000000f,
-0.00000000000000000000f,0.70710678118654757274f,0.70710678118654757274f,
-0.00000000000000006123f,1.00000000000000000000f,-0.70710678118654746172f,
-0.70710678118654757274f,};
+1.00000000000000000000f,0.00000000000000000000f,0.98078525066375732422f,
+0.19509032368659973145f,0.92387950420379638672f,0.38268342614173889160f,
+0.83146959543228149414f,0.55557024478912353516f,0.70710676908493041992f,
+0.70710676908493041992f,0.55557024478912353516f,0.83146959543228149414f,
+0.38268342614173889160f,0.92387950420379638672f,0.19509032368659973145f,
+0.98078525066375732422f,0.00000000000000006123f,1.00000000000000000000f,
+-0.19509032368659973145f,0.98078525066375732422f,-0.38268342614173889160f,
+0.92387950420379638672f,-0.55557024478912353516f,0.83146959543228149414f,
+-0.70710676908493041992f,0.70710676908493041992f,-0.83146959543228149414f,
+0.55557024478912353516f,-0.92387950420379638672f,0.38268342614173889160f,
+-0.98078525066375732422f,0.19509032368659973145f,1.00000000000000000000f,
+0.00000000000000000000f,0.70710676908493041992f,0.70710676908493041992f,
+0.00000000000000006123f,1.00000000000000000000f,-0.70710676908493041992f,
+0.70710676908493041992f,};
 
 float32_t rearranged_twiddle_stride3_64_f32[40]={
-1.00000000000000000000f,0.00000000000000000000f,0.95694033573220882438f,
-0.29028467725446233105f,0.83146961230254523567f,0.55557023301960217765f,
-0.63439328416364548779f,0.77301045336273688235f,0.38268343236508983729f,
-0.92387953251128673848f,0.09801714032956077016f,0.99518472667219681771f,
--0.19509032201612819257f,0.98078528040323043058f,-0.47139673682599769755f,
-0.88192126434835504956f,-0.70710678118654746172f,0.70710678118654757274f,
--0.88192126434835493853f,0.47139673682599780857f,-0.98078528040323043058f,
-0.19509032201612860891f,-0.99518472667219692873f,-0.09801714032956058975f,
--0.92387953251128684951f,-0.38268343236508967076f,-0.77301045336273710440f,
--0.63439328416364526575f,-0.55557023301960217765f,-0.83146961230254523567f,
--0.29028467725446244208f,-0.95694033573220882438f,1.00000000000000000000f,
-0.00000000000000000000f,0.38268343236508983729f,0.92387953251128673848f,
--0.70710678118654746172f,0.70710678118654757274f,-0.92387953251128684951f,
--0.38268343236508967076f,};
+1.00000000000000000000f,0.00000000000000000000f,0.95694035291671752930f,
+0.29028466343879699707f,0.83146959543228149414f,0.55557024478912353516f,
+0.63439327478408813477f,0.77301043272018432617f,0.38268342614173889160f,
+0.92387950420379638672f,0.09801714122295379639f,0.99518471956253051758f,
+-0.19509032368659973145f,0.98078525066375732422f,-0.47139674425125122070f,
+0.88192129135131835938f,-0.70710676908493041992f,0.70710676908493041992f,
+-0.88192129135131835938f,0.47139674425125122070f,-0.98078525066375732422f,
+0.19509032368659973145f,-0.99518471956253051758f,-0.09801714122295379639f,
+-0.92387950420379638672f,-0.38268342614173889160f,-0.77301043272018432617f,
+-0.63439327478408813477f,-0.55557024478912353516f,-0.83146959543228149414f,
+-0.29028466343879699707f,-0.95694035291671752930f,1.00000000000000000000f,
+0.00000000000000000000f,0.38268342614173889160f,0.92387950420379638672f,
+-0.70710676908493041992f,0.70710676908493041992f,-0.92387950420379638672f,
+-0.38268342614173889160f,};
 
 #endif
 
@@ -136,178 +139,178 @@ uint32_t rearranged_twiddle_tab_stride3_arr_256_f32[4]={
 0,128,160,0,};
 
 float32_t rearranged_twiddle_stride1_256_f32[168]={
-1.00000000000000000000f,0.00000000000000000000f,0.99969881869620424997f,
-0.02454122852291228812f,0.99879545620517240501f,0.04906767432741801493f,
-0.99729045667869020697f,0.07356456359966742631f,0.99518472667219692873f,
-0.09801714032956060363f,0.99247953459870996706f,0.12241067519921619566f,
-0.98917650996478101444f,0.14673047445536174793f,0.98527764238894122162f,
-0.17096188876030121717f,0.98078528040323043058f,0.19509032201612824808f,
-0.97570213003852857003f,0.21910124015686979759f,0.97003125319454397424f,
-0.24298017990326387094f,0.96377606579543984022f,0.26671275747489836538f,
-0.95694033573220882438f,0.29028467725446233105f,0.94952818059303667475f,
-0.31368174039889151761f,0.94154406518302080631f,0.33688985339222005111f,
-0.93299279883473895669f,0.35989503653498811087f,0.92387953251128673848f,
-0.38268343236508978178f,0.91420975570353069095f,0.40524131400498986100f,
-0.90398929312344333820f,0.42755509343028208491f,0.89322430119551532446f,
-0.44961132965460653965f,0.88192126434835504956f,0.47139673682599764204f,
-0.87008699110871146054f,0.49289819222978403790f,0.85772861000027211809f,
-0.51410274419322166128f,0.84485356524970711689f,0.53499761988709715332f,
-0.83146961230254523567f,0.55557023301960217765f,0.81758481315158371139f,
-0.57580819141784533866f,0.80320753148064494287f,0.59569930449243335691f,
-0.78834642762660622761f,0.61523159058062681925f,0.77301045336273699338f,
-0.63439328416364548779f,0.75720884650648456748f,0.65317284295377675551f,
-0.74095112535495921691f,0.67155895484701833009f,0.72424708295146700276f,
-0.68954054473706682948f,0.70710678118654757274f,0.70710678118654757274f,
-0.68954054473706694051f,0.72424708295146689174f,0.67155895484701833009f,
-0.74095112535495910588f,0.65317284295377686654f,0.75720884650648456748f,
-0.63439328416364548779f,0.77301045336273688235f,0.61523159058062681925f,
-0.78834642762660622761f,0.59569930449243346793f,0.80320753148064483184f,
-0.57580819141784533866f,0.81758481315158371139f,0.55557023301960228867f,
-0.83146961230254523567f,0.53499761988709726435f,0.84485356524970700587f,
-0.51410274419322166128f,0.85772861000027211809f,0.49289819222978409341f,
-0.87008699110871134952f,0.47139673682599780857f,0.88192126434835493853f,
-0.44961132965460659516f,0.89322430119551532446f,0.42755509343028219593f,
-0.90398929312344333820f,0.40524131400498986100f,0.91420975570353069095f,
-0.38268343236508983729f,0.92387953251128673848f,0.35989503653498827740f,
-0.93299279883473884567f,0.33688985339222005111f,0.94154406518302080631f,
-0.31368174039889157312f,0.94952818059303667475f,0.29028467725446233105f,
-0.95694033573220893540f,0.26671275747489842090f,0.96377606579543984022f,
-0.24298017990326398197f,0.97003125319454397424f,0.21910124015686976984f,
-0.97570213003852857003f,0.19509032201612833135f,0.98078528040323043058f,
-0.17096188876030135595f,0.98527764238894122162f,0.14673047445536174793f,
-0.98917650996478101444f,0.12241067519921627893f,0.99247953459870996706f,
-0.09801714032956077016f,0.99518472667219681771f,0.07356456359966745406f,
-0.99729045667869020697f,0.04906767432741812596f,0.99879545620517240501f,
-0.02454122852291226384f,0.99969881869620424997f,1.00000000000000000000f,
-0.00000000000000000000f,0.99518472667219692873f,0.09801714032956060363f,
-0.98078528040323043058f,0.19509032201612824808f,0.95694033573220882438f,
-0.29028467725446233105f,0.92387953251128673848f,0.38268343236508978178f,
-0.88192126434835504956f,0.47139673682599764204f,0.83146961230254523567f,
-0.55557023301960217765f,0.77301045336273699338f,0.63439328416364548779f,
-0.70710678118654757274f,0.70710678118654757274f,0.63439328416364548779f,
-0.77301045336273688235f,0.55557023301960228867f,0.83146961230254523567f,
-0.47139673682599780857f,0.88192126434835493853f,0.38268343236508983729f,
-0.92387953251128673848f,0.29028467725446233105f,0.95694033573220893540f,
-0.19509032201612833135f,0.98078528040323043058f,0.09801714032956077016f,
-0.99518472667219681771f,1.00000000000000000000f,0.00000000000000000000f,
-0.92387953251128673848f,0.38268343236508978178f,0.70710678118654757274f,
-0.70710678118654757274f,0.38268343236508983729f,0.92387953251128673848f,};
+1.00000000000000000000f,0.00000000000000000000f,0.99969881772994995117f,
+0.02454122900962829590f,0.99879544973373413086f,0.04906767606735229492f,
+0.99729043245315551758f,0.07356456667184829712f,0.99518471956253051758f,
+0.09801714122295379639f,0.99247956275939941406f,0.12241067737340927124f,
+0.98917651176452636719f,0.14673046767711639404f,0.98527765274047851562f,
+0.17096188664436340332f,0.98078525066375732422f,0.19509032368659973145f,
+0.97570210695266723633f,0.21910123527050018311f,0.97003126144409179688f,
+0.24298018217086791992f,0.96377605199813842773f,0.26671275496482849121f,
+0.95694035291671752930f,0.29028466343879699707f,0.94952815771102905273f,
+0.31368175148963928223f,0.94154405593872070312f,0.33688986301422119141f,
+0.93299281597137451172f,0.35989505052566528320f,0.92387950420379638672f,
+0.38268342614173889160f,0.91420978307723999023f,0.40524131059646606445f,
+0.90398931503295898438f,0.42755508422851562500f,0.89322429895401000977f,
+0.44961133599281311035f,0.88192129135131835938f,0.47139674425125122070f,
+0.87008696794509887695f,0.49289819598197937012f,0.85772860050201416016f,
+0.51410275697708129883f,0.84485357999801635742f,0.53499764204025268555f,
+0.83146959543228149414f,0.55557024478912353516f,0.81758481264114379883f,
+0.57580816745758056641f,0.80320751667022705078f,0.59569931030273437500f,
+0.78834640979766845703f,0.61523157358169555664f,0.77301043272018432617f,
+0.63439327478408813477f,0.75720882415771484375f,0.65317285060882568359f,
+0.74095112085342407227f,0.67155897617340087891f,0.72424709796905517578f,
+0.68954056501388549805f,0.70710676908493041992f,0.70710676908493041992f,
+0.68954056501388549805f,0.72424709796905517578f,0.67155897617340087891f,
+0.74095112085342407227f,0.65317285060882568359f,0.75720882415771484375f,
+0.63439327478408813477f,0.77301043272018432617f,0.61523157358169555664f,
+0.78834640979766845703f,0.59569931030273437500f,0.80320751667022705078f,
+0.57580816745758056641f,0.81758481264114379883f,0.55557024478912353516f,
+0.83146959543228149414f,0.53499764204025268555f,0.84485357999801635742f,
+0.51410275697708129883f,0.85772860050201416016f,0.49289819598197937012f,
+0.87008696794509887695f,0.47139674425125122070f,0.88192129135131835938f,
+0.44961133599281311035f,0.89322429895401000977f,0.42755508422851562500f,
+0.90398931503295898438f,0.40524131059646606445f,0.91420978307723999023f,
+0.38268342614173889160f,0.92387950420379638672f,0.35989505052566528320f,
+0.93299281597137451172f,0.33688986301422119141f,0.94154405593872070312f,
+0.31368175148963928223f,0.94952815771102905273f,0.29028466343879699707f,
+0.95694035291671752930f,0.26671275496482849121f,0.96377605199813842773f,
+0.24298018217086791992f,0.97003126144409179688f,0.21910123527050018311f,
+0.97570210695266723633f,0.19509032368659973145f,0.98078525066375732422f,
+0.17096188664436340332f,0.98527765274047851562f,0.14673046767711639404f,
+0.98917651176452636719f,0.12241067737340927124f,0.99247956275939941406f,
+0.09801714122295379639f,0.99518471956253051758f,0.07356456667184829712f,
+0.99729043245315551758f,0.04906767606735229492f,0.99879544973373413086f,
+0.02454122900962829590f,0.99969881772994995117f,1.00000000000000000000f,
+0.00000000000000000000f,0.99518471956253051758f,0.09801714122295379639f,
+0.98078525066375732422f,0.19509032368659973145f,0.95694035291671752930f,
+0.29028466343879699707f,0.92387950420379638672f,0.38268342614173889160f,
+0.88192129135131835938f,0.47139674425125122070f,0.83146959543228149414f,
+0.55557024478912353516f,0.77301043272018432617f,0.63439327478408813477f,
+0.70710676908493041992f,0.70710676908493041992f,0.63439327478408813477f,
+0.77301043272018432617f,0.55557024478912353516f,0.83146959543228149414f,
+0.47139674425125122070f,0.88192129135131835938f,0.38268342614173889160f,
+0.92387950420379638672f,0.29028466343879699707f,0.95694035291671752930f,
+0.19509032368659973145f,0.98078525066375732422f,0.09801714122295379639f,
+0.99518471956253051758f,1.00000000000000000000f,0.00000000000000000000f,
+0.92387950420379638672f,0.38268342614173889160f,0.70710676908493041992f,
+0.70710676908493041992f,0.38268342614173889160f,0.92387950420379638672f,};
 
 float32_t rearranged_twiddle_stride2_256_f32[168]={
-1.00000000000000000000f,0.00000000000000000000f,0.99879545620517240501f,
-0.04906767432741801493f,0.99518472667219692873f,0.09801714032956060363f,
-0.98917650996478101444f,0.14673047445536174793f,0.98078528040323043058f,
-0.19509032201612824808f,0.97003125319454397424f,0.24298017990326387094f,
-0.95694033573220882438f,0.29028467725446233105f,0.94154406518302080631f,
-0.33688985339222005111f,0.92387953251128673848f,0.38268343236508978178f,
-0.90398929312344333820f,0.42755509343028208491f,0.88192126434835504956f,
-0.47139673682599764204f,0.85772861000027211809f,0.51410274419322166128f,
-0.83146961230254523567f,0.55557023301960217765f,0.80320753148064494287f,
-0.59569930449243335691f,0.77301045336273699338f,0.63439328416364548779f,
-0.74095112535495921691f,0.67155895484701833009f,0.70710678118654757274f,
-0.70710678118654757274f,0.67155895484701833009f,0.74095112535495910588f,
-0.63439328416364548779f,0.77301045336273688235f,0.59569930449243346793f,
-0.80320753148064483184f,0.55557023301960228867f,0.83146961230254523567f,
-0.51410274419322166128f,0.85772861000027211809f,0.47139673682599780857f,
-0.88192126434835493853f,0.42755509343028219593f,0.90398929312344333820f,
-0.38268343236508983729f,0.92387953251128673848f,0.33688985339222005111f,
-0.94154406518302080631f,0.29028467725446233105f,0.95694033573220893540f,
-0.24298017990326398197f,0.97003125319454397424f,0.19509032201612833135f,
-0.98078528040323043058f,0.14673047445536174793f,0.98917650996478101444f,
-0.09801714032956077016f,0.99518472667219681771f,0.04906767432741812596f,
-0.99879545620517240501f,0.00000000000000006123f,1.00000000000000000000f,
--0.04906767432741800800f,0.99879545620517240501f,-0.09801714032956064526f,
-0.99518472667219692873f,-0.14673047445536163691f,0.98917650996478101444f,
--0.19509032201612819257f,0.98078528040323043058f,-0.24298017990326387094f,
-0.97003125319454397424f,-0.29028467725446216452f,0.95694033573220893540f,
--0.33688985339221994009f,0.94154406518302080631f,-0.38268343236508972627f,
-0.92387953251128673848f,-0.42755509343028186287f,0.90398929312344344922f,
--0.47139673682599769755f,0.88192126434835504956f,-0.51410274419322155026f,
-0.85772861000027211809f,-0.55557023301960195560f,0.83146961230254534669f,
--0.59569930449243335691f,0.80320753148064494287f,-0.63439328416364537677f,
-0.77301045336273710440f,-0.67155895484701844111f,0.74095112535495899486f,
--0.70710678118654746172f,0.70710678118654757274f,-0.74095112535495888384f,
-0.67155895484701855214f,-0.77301045336273699338f,0.63439328416364548779f,
--0.80320753148064483184f,0.59569930449243346793f,-0.83146961230254534669f,
-0.55557023301960217765f,-0.85772861000027200706f,0.51410274419322177231f,
--0.88192126434835493853f,0.47139673682599780857f,-0.90398929312344333820f,
-0.42755509343028202940f,-0.92387953251128673848f,0.38268343236508989280f,
--0.94154406518302069529f,0.33688985339222032867f,-0.95694033573220882438f,
-0.29028467725446238656f,-0.97003125319454397424f,0.24298017990326406523f,
--0.98078528040323043058f,0.19509032201612860891f,-0.98917650996478101444f,
-0.14673047445536180344f,-0.99518472667219681771f,0.09801714032956082567f,
--0.99879545620517240501f,0.04906767432741796636f,1.00000000000000000000f,
-0.00000000000000000000f,0.98078528040323043058f,0.19509032201612824808f,
-0.92387953251128673848f,0.38268343236508978178f,0.83146961230254523567f,
-0.55557023301960217765f,0.70710678118654757274f,0.70710678118654757274f,
-0.55557023301960228867f,0.83146961230254523567f,0.38268343236508983729f,
-0.92387953251128673848f,0.19509032201612833135f,0.98078528040323043058f,
-0.00000000000000006123f,1.00000000000000000000f,-0.19509032201612819257f,
-0.98078528040323043058f,-0.38268343236508972627f,0.92387953251128673848f,
--0.55557023301960195560f,0.83146961230254534669f,-0.70710678118654746172f,
-0.70710678118654757274f,-0.83146961230254534669f,0.55557023301960217765f,
--0.92387953251128673848f,0.38268343236508989280f,-0.98078528040323043058f,
-0.19509032201612860891f,1.00000000000000000000f,0.00000000000000000000f,
-0.70710678118654757274f,0.70710678118654757274f,0.00000000000000006123f,
-1.00000000000000000000f,-0.70710678118654746172f,0.70710678118654757274f,};
+1.00000000000000000000f,0.00000000000000000000f,0.99879544973373413086f,
+0.04906767606735229492f,0.99518471956253051758f,0.09801714122295379639f,
+0.98917651176452636719f,0.14673046767711639404f,0.98078525066375732422f,
+0.19509032368659973145f,0.97003126144409179688f,0.24298018217086791992f,
+0.95694035291671752930f,0.29028466343879699707f,0.94154405593872070312f,
+0.33688986301422119141f,0.92387950420379638672f,0.38268342614173889160f,
+0.90398931503295898438f,0.42755508422851562500f,0.88192129135131835938f,
+0.47139674425125122070f,0.85772860050201416016f,0.51410275697708129883f,
+0.83146959543228149414f,0.55557024478912353516f,0.80320751667022705078f,
+0.59569931030273437500f,0.77301043272018432617f,0.63439327478408813477f,
+0.74095112085342407227f,0.67155897617340087891f,0.70710676908493041992f,
+0.70710676908493041992f,0.67155897617340087891f,0.74095112085342407227f,
+0.63439327478408813477f,0.77301043272018432617f,0.59569931030273437500f,
+0.80320751667022705078f,0.55557024478912353516f,0.83146959543228149414f,
+0.51410275697708129883f,0.85772860050201416016f,0.47139674425125122070f,
+0.88192129135131835938f,0.42755508422851562500f,0.90398931503295898438f,
+0.38268342614173889160f,0.92387950420379638672f,0.33688986301422119141f,
+0.94154405593872070312f,0.29028466343879699707f,0.95694035291671752930f,
+0.24298018217086791992f,0.97003126144409179688f,0.19509032368659973145f,
+0.98078525066375732422f,0.14673046767711639404f,0.98917651176452636719f,
+0.09801714122295379639f,0.99518471956253051758f,0.04906767606735229492f,
+0.99879544973373413086f,0.00000000000000006123f,1.00000000000000000000f,
+-0.04906767606735229492f,0.99879544973373413086f,-0.09801714122295379639f,
+0.99518471956253051758f,-0.14673046767711639404f,0.98917651176452636719f,
+-0.19509032368659973145f,0.98078525066375732422f,-0.24298018217086791992f,
+0.97003126144409179688f,-0.29028466343879699707f,0.95694035291671752930f,
+-0.33688986301422119141f,0.94154405593872070312f,-0.38268342614173889160f,
+0.92387950420379638672f,-0.42755508422851562500f,0.90398931503295898438f,
+-0.47139674425125122070f,0.88192129135131835938f,-0.51410275697708129883f,
+0.85772860050201416016f,-0.55557024478912353516f,0.83146959543228149414f,
+-0.59569931030273437500f,0.80320751667022705078f,-0.63439327478408813477f,
+0.77301043272018432617f,-0.67155897617340087891f,0.74095112085342407227f,
+-0.70710676908493041992f,0.70710676908493041992f,-0.74095112085342407227f,
+0.67155897617340087891f,-0.77301043272018432617f,0.63439327478408813477f,
+-0.80320751667022705078f,0.59569931030273437500f,-0.83146959543228149414f,
+0.55557024478912353516f,-0.85772860050201416016f,0.51410275697708129883f,
+-0.88192129135131835938f,0.47139674425125122070f,-0.90398931503295898438f,
+0.42755508422851562500f,-0.92387950420379638672f,0.38268342614173889160f,
+-0.94154405593872070312f,0.33688986301422119141f,-0.95694035291671752930f,
+0.29028466343879699707f,-0.97003126144409179688f,0.24298018217086791992f,
+-0.98078525066375732422f,0.19509032368659973145f,-0.98917651176452636719f,
+0.14673046767711639404f,-0.99518471956253051758f,0.09801714122295379639f,
+-0.99879544973373413086f,0.04906767606735229492f,1.00000000000000000000f,
+0.00000000000000000000f,0.98078525066375732422f,0.19509032368659973145f,
+0.92387950420379638672f,0.38268342614173889160f,0.83146959543228149414f,
+0.55557024478912353516f,0.70710676908493041992f,0.70710676908493041992f,
+0.55557024478912353516f,0.83146959543228149414f,0.38268342614173889160f,
+0.92387950420379638672f,0.19509032368659973145f,0.98078525066375732422f,
+0.00000000000000006123f,1.00000000000000000000f,-0.19509032368659973145f,
+0.98078525066375732422f,-0.38268342614173889160f,0.92387950420379638672f,
+-0.55557024478912353516f,0.83146959543228149414f,-0.70710676908493041992f,
+0.70710676908493041992f,-0.83146959543228149414f,0.55557024478912353516f,
+-0.92387950420379638672f,0.38268342614173889160f,-0.98078525066375732422f,
+0.19509032368659973145f,1.00000000000000000000f,0.00000000000000000000f,
+0.70710676908493041992f,0.70710676908493041992f,0.00000000000000006123f,
+1.00000000000000000000f,-0.70710676908493041992f,0.70710676908493041992f,};
 
 float32_t rearranged_twiddle_stride3_256_f32[168]={
-1.00000000000000000000f,0.00000000000000000000f,0.99729045667869020697f,
-0.07356456359966742631f,0.98917650996478101444f,0.14673047445536174793f,
-0.97570213003852857003f,0.21910124015686979759f,0.95694033573220882438f,
-0.29028467725446233105f,0.93299279883473895669f,0.35989503653498811087f,
-0.90398929312344333820f,0.42755509343028208491f,0.87008699110871146054f,
-0.49289819222978403790f,0.83146961230254523567f,0.55557023301960217765f,
-0.78834642762660622761f,0.61523159058062681925f,0.74095112535495921691f,
-0.67155895484701833009f,0.68954054473706694051f,0.72424708295146689174f,
-0.63439328416364548779f,0.77301045336273688235f,0.57580819141784533866f,
-0.81758481315158371139f,0.51410274419322166128f,0.85772861000027211809f,
-0.44961132965460659516f,0.89322430119551532446f,0.38268343236508983729f,
-0.92387953251128673848f,0.31368174039889157312f,0.94952818059303667475f,
-0.24298017990326398197f,0.97003125319454397424f,0.17096188876030135595f,
-0.98527764238894122162f,0.09801714032956077016f,0.99518472667219681771f,
-0.02454122852291226384f,0.99969881869620424997f,-0.04906767432741800800f,
-0.99879545620517240501f,-0.12241067519921615403f,0.99247953459870996706f,
--0.19509032201612819257f,0.98078528040323043058f,-0.26671275747489830987f,
-0.96377606579543984022f,-0.33688985339221994009f,0.94154406518302080631f,
--0.40524131400498974998f,0.91420975570353069095f,-0.47139673682599769755f,
-0.88192126434835504956f,-0.53499761988709704230f,0.84485356524970722791f,
--0.59569930449243335691f,0.80320753148064494287f,-0.65317284295377653347f,
-0.75720884650648467851f,-0.70710678118654746172f,0.70710678118654757274f,
--0.75720884650648467851f,0.65317284295377664449f,-0.80320753148064483184f,
-0.59569930449243346793f,-0.84485356524970711689f,0.53499761988709715332f,
--0.88192126434835493853f,0.47139673682599780857f,-0.91420975570353069095f,
-0.40524131400498991651f,-0.94154406518302069529f,0.33688985339222032867f,
--0.96377606579543984022f,0.26671275747489847641f,-0.98078528040323043058f,
-0.19509032201612860891f,-0.99247953459870996706f,0.12241067519921634832f,
--0.99879545620517240501f,0.04906767432741796636f,-0.99969881869620424997f,
--0.02454122852291207996f,-0.99518472667219692873f,-0.09801714032956058975f,
--0.98527764238894133264f,-0.17096188876030096737f,-0.97003125319454397424f,
--0.24298017990326381543f,-0.94952818059303678577f,-0.31368174039889118454f,
--0.92387953251128684951f,-0.38268343236508967076f,-0.89322430119551532446f,
--0.44961132965460665067f,-0.85772861000027211809f,-0.51410274419322155026f,
--0.81758481315158371139f,-0.57580819141784533866f,-0.77301045336273710440f,
--0.63439328416364526575f,-0.72424708295146700276f,-0.68954054473706682948f,
--0.67155895484701866316f,-0.74095112535495888384f,-0.61523159058062726334f,
--0.78834642762660589455f,-0.55557023301960217765f,-0.83146961230254523567f,
--0.49289819222978420443f,-0.87008699110871134952f,-0.42755509343028247349f,
--0.90398929312344311615f,-0.35989503653498794433f,-0.93299279883473895669f,
--0.29028467725446244208f,-0.95694033573220882438f,-0.21910124015687010290f,
--0.97570213003852845901f,-0.14673047445536230304f,-0.98917650996478090342f,
--0.07356456359966735692f,-0.99729045667869020697f,1.00000000000000000000f,
-0.00000000000000000000f,0.95694033573220882438f,0.29028467725446233105f,
-0.83146961230254523567f,0.55557023301960217765f,0.63439328416364548779f,
-0.77301045336273688235f,0.38268343236508983729f,0.92387953251128673848f,
-0.09801714032956077016f,0.99518472667219681771f,-0.19509032201612819257f,
-0.98078528040323043058f,-0.47139673682599769755f,0.88192126434835504956f,
--0.70710678118654746172f,0.70710678118654757274f,-0.88192126434835493853f,
-0.47139673682599780857f,-0.98078528040323043058f,0.19509032201612860891f,
--0.99518472667219692873f,-0.09801714032956058975f,-0.92387953251128684951f,
--0.38268343236508967076f,-0.77301045336273710440f,-0.63439328416364526575f,
--0.55557023301960217765f,-0.83146961230254523567f,-0.29028467725446244208f,
--0.95694033573220882438f,1.00000000000000000000f,0.00000000000000000000f,
-0.38268343236508983729f,0.92387953251128673848f,-0.70710678118654746172f,
-0.70710678118654757274f,-0.92387953251128684951f,-0.38268343236508967076f,};
+1.00000000000000000000f,0.00000000000000000000f,0.99729043245315551758f,
+0.07356456667184829712f,0.98917651176452636719f,0.14673046767711639404f,
+0.97570210695266723633f,0.21910123527050018311f,0.95694035291671752930f,
+0.29028466343879699707f,0.93299281597137451172f,0.35989505052566528320f,
+0.90398931503295898438f,0.42755508422851562500f,0.87008696794509887695f,
+0.49289819598197937012f,0.83146959543228149414f,0.55557024478912353516f,
+0.78834640979766845703f,0.61523157358169555664f,0.74095112085342407227f,
+0.67155897617340087891f,0.68954056501388549805f,0.72424709796905517578f,
+0.63439327478408813477f,0.77301043272018432617f,0.57580816745758056641f,
+0.81758481264114379883f,0.51410275697708129883f,0.85772860050201416016f,
+0.44961133599281311035f,0.89322429895401000977f,0.38268342614173889160f,
+0.92387950420379638672f,0.31368175148963928223f,0.94952815771102905273f,
+0.24298018217086791992f,0.97003126144409179688f,0.17096188664436340332f,
+0.98527765274047851562f,0.09801714122295379639f,0.99518471956253051758f,
+0.02454122900962829590f,0.99969881772994995117f,-0.04906767606735229492f,
+0.99879544973373413086f,-0.12241067737340927124f,0.99247956275939941406f,
+-0.19509032368659973145f,0.98078525066375732422f,-0.26671275496482849121f,
+0.96377605199813842773f,-0.33688986301422119141f,0.94154405593872070312f,
+-0.40524131059646606445f,0.91420978307723999023f,-0.47139674425125122070f,
+0.88192129135131835938f,-0.53499764204025268555f,0.84485357999801635742f,
+-0.59569931030273437500f,0.80320751667022705078f,-0.65317285060882568359f,
+0.75720882415771484375f,-0.70710676908493041992f,0.70710676908493041992f,
+-0.75720882415771484375f,0.65317285060882568359f,-0.80320751667022705078f,
+0.59569931030273437500f,-0.84485357999801635742f,0.53499764204025268555f,
+-0.88192129135131835938f,0.47139674425125122070f,-0.91420978307723999023f,
+0.40524131059646606445f,-0.94154405593872070312f,0.33688986301422119141f,
+-0.96377605199813842773f,0.26671275496482849121f,-0.98078525066375732422f,
+0.19509032368659973145f,-0.99247956275939941406f,0.12241067737340927124f,
+-0.99879544973373413086f,0.04906767606735229492f,-0.99969881772994995117f,
+-0.02454122900962829590f,-0.99518471956253051758f,-0.09801714122295379639f,
+-0.98527765274047851562f,-0.17096188664436340332f,-0.97003126144409179688f,
+-0.24298018217086791992f,-0.94952815771102905273f,-0.31368175148963928223f,
+-0.92387950420379638672f,-0.38268342614173889160f,-0.89322429895401000977f,
+-0.44961133599281311035f,-0.85772860050201416016f,-0.51410275697708129883f,
+-0.81758481264114379883f,-0.57580816745758056641f,-0.77301043272018432617f,
+-0.63439327478408813477f,-0.72424709796905517578f,-0.68954056501388549805f,
+-0.67155897617340087891f,-0.74095112085342407227f,-0.61523157358169555664f,
+-0.78834640979766845703f,-0.55557024478912353516f,-0.83146959543228149414f,
+-0.49289819598197937012f,-0.87008696794509887695f,-0.42755508422851562500f,
+-0.90398931503295898438f,-0.35989505052566528320f,-0.93299281597137451172f,
+-0.29028466343879699707f,-0.95694035291671752930f,-0.21910123527050018311f,
+-0.97570210695266723633f,-0.14673046767711639404f,-0.98917651176452636719f,
+-0.07356456667184829712f,-0.99729043245315551758f,1.00000000000000000000f,
+0.00000000000000000000f,0.95694035291671752930f,0.29028466343879699707f,
+0.83146959543228149414f,0.55557024478912353516f,0.63439327478408813477f,
+0.77301043272018432617f,0.38268342614173889160f,0.92387950420379638672f,
+0.09801714122295379639f,0.99518471956253051758f,-0.19509032368659973145f,
+0.98078525066375732422f,-0.47139674425125122070f,0.88192129135131835938f,
+-0.70710676908493041992f,0.70710676908493041992f,-0.88192129135131835938f,
+0.47139674425125122070f,-0.98078525066375732422f,0.19509032368659973145f,
+-0.99518471956253051758f,-0.09801714122295379639f,-0.92387950420379638672f,
+-0.38268342614173889160f,-0.77301043272018432617f,-0.63439327478408813477f,
+-0.55557024478912353516f,-0.83146959543228149414f,-0.29028466343879699707f,
+-0.95694035291671752930f,1.00000000000000000000f,0.00000000000000000000f,
+0.38268342614173889160f,0.92387950420379638672f,-0.70710676908493041992f,
+0.70710676908493041992f,-0.92387950420379638672f,-0.38268342614173889160f,};
 
 #endif
 
@@ -323,691 +326,691 @@ uint32_t rearranged_twiddle_tab_stride3_arr_1024_f32[5]={
 0,512,640,672,0,};
 
 float32_t rearranged_twiddle_stride1_1024_f32[680]={
-1.00000000000000000000f,0.00000000000000000000f,0.99998117528260110909f,
-0.00613588464915447527f,0.99992470183914450299f,0.01227153828571992539f,
-0.99983058179582340319f,0.01840672990580482019f,0.99969881869620424997f,
-0.02454122852291228812f,0.99952941750109314256f,0.03067480317663662595f,
-0.99932238458834954375f,0.03680722294135883171f,0.99907772775264536147f,
-0.04293825693494082024f,0.99879545620517240501f,0.04906767432741801493f,
-0.99847558057329477421f,0.05519524434968993420f,0.99811811290014917919f,
-0.06132073630220857829f,0.99772306664419163624f,0.06744391956366405094f,
-0.99729045667869020697f,0.07356456359966742631f,0.99682029929116566791f,
-0.07968243797143012563f,0.99631261218277800129f,0.08579731234443989385f,
-0.99576741446765981713f,0.09190895649713272386f,0.99518472667219692873f,
-0.09801714032956060363f,0.99456457073425541537f,0.10412163387205458642f,
-0.99390697000235606051f,0.11022220729388305938f,0.99321194923479450001f,
-0.11631863091190475235f,0.99247953459870996706f,0.12241067519921619566f,
-0.99170975366909952520f,0.12849811079379316880f,0.99090263542778000971f,
-0.13458070850712616773f,0.99005821026229712256f,0.14065823933284921088f,
-0.98917650996478101444f,0.14673047445536174793f,0.98825756773074946437f,
-0.15279718525844343535f,0.98730141815785843473f,0.15885814333386144570f,
-0.98630809724459866938f,0.16491312048996989437f,0.98527764238894122162f,
-0.17096188876030121717f,0.98421009238692902521f,0.17700422041214874946f,
-0.98310548743121628501f,0.18303988795514095078f,0.98196386910955524296f,
-0.18906866414980619262f,0.98078528040323043058f,0.19509032201612824808f,
-0.97956976568544051887f,0.20110463484209190055f,0.97831737071962765473f,
-0.20711137619221856032f,0.97702814265775439484f,0.21311031991609136194f,
-0.97570213003852857003f,0.21910124015686979759f,0.97433938278557585821f,
-0.22508391135979283204f,0.97293995220556017678f,0.23105810828067110951f,
-0.97150389098625178352f,0.23702360599436719801f,0.97003125319454397424f,
-0.24298017990326387094f,0.96852209427441737777f,0.24892760574572014853f,
-0.96697647104485207059f,0.25486565960451457169f,0.96539444169768939830f,
-0.26079411791527551401f,0.96377606579543984022f,0.26671275747489836538f,
-0.96212140426904158019f,0.27262135544994897662f,0.96043051941556578655f,
-0.27851968938505305973f,0.95870347489587159906f,0.28440753721127187692f,
-0.95694033573220882438f,0.29028467725446233105f,0.95514116830577078243f,
-0.29615088824362378883f,0.95330604035419386211f,0.30200594931922808417f,
-0.95143502096900833820f,0.30784964004153486661f,0.94952818059303667475f,
-0.31368174039889151761f,0.94758559101774109124f,0.31950203081601569188f,
-0.94560732538052127971f,0.32531029216226292622f,0.94359345816196038559f,
-0.33110630575987642921f,0.94154406518302080631f,0.33688985339222005111f,
-0.93945922360218991898f,0.34266071731199437833f,0.93733901191257495977f,
-0.34841868024943456472f,0.93518350993894761025f,0.35416352542049034380f,
-0.93299279883473895669f,0.35989503653498811087f,0.93076696107898371224f,
-0.36561299780477385379f,0.92850608047321558924f,0.37131719395183754306f,
-0.92621024213831137928f,0.37700741021641825945f,0.92387953251128673848f,
-0.38268343236508978178f,0.92151403934204190183f,0.38834504669882624617f,
-0.91911385169005777040f,0.39399204006104809883f,0.91667905992104270485f,
-0.39962419984564678810f,0.91420975570353069095f,0.40524131400498986100f,
-0.91170603200542987832f,0.41084317105790391089f,0.90916798309052238025f,
-0.41642956009763715253f,0.90659570451491533483f,0.42200027079979968159f,
-0.90398929312344333820f,0.42755509343028208491f,0.90134884704602202810f,
-0.43309381885315195726f,0.89867446569395381673f,0.43861623853852765853f,
-0.89596624975618521791f,0.44412214457042920035f,0.89322430119551532446f,
-0.44961132965460653965f,0.89044872324475787817f,0.45508358712634383592f,
-0.88763962040285393496f,0.46053871095824000514f,0.88479709843093778954f,
-0.46597649576796618121f,0.88192126434835504956f,0.47139673682599764204f,
-0.87901222642863352519f,0.47679923006332208812f,0.87607009419540660122f,
-0.48218377207912271887f,0.87309497841829009079f,0.48755016014843599592f,
-0.87008699110871146054f,0.49289819222978403790f,0.86704624551569264845f,
-0.49822766697278181303f,0.86397285612158669643f,0.50353838372571757542f,
-0.86086693863776730939f,0.50883014254310698909f,0.85772861000027211809f,
-0.51410274419322166128f,0.85455798836540053376f,0.51935599016558964269f,
-0.85135519310526519554f,0.52458968267846894928f,0.84812034480329723252f,
-0.52980362468629460526f,0.84485356524970711689f,0.53499761988709715332f,
-0.84155497743689844370f,0.54017147272989285423f,0.83822470555483807875f,
-0.54532498842204646383f,0.83486287498638001026f,0.55045797293660481131f,
-0.83146961230254523567f,0.55557023301960217765f,0.82804504525775579626f,
-0.56066157619733603124f,0.82458930278502529099f,0.56573181078361312046f,
-0.82110251499110464835f,0.57078074588696725566f,0.81758481315158371139f,
-0.57580819141784533866f,0.81403632970594841378f,0.58081395809576452649f,
-0.81045719825259476821f,0.58579785745643886408f,0.80684755354379933401f,
-0.59075970185887416442f,0.80320753148064494287f,0.59569930449243335691f,
-0.79953726910790501314f,0.60061647938386897305f,0.79583690460888356633f,
-0.60551104140432554512f,0.79210657730021238887f,0.61038280627630947528f,
-0.78834642762660622761f,0.61523159058062681925f,0.78455659715557524159f,
-0.62005721176328909561f,0.78073722857209448822f,0.62485948814238634341f,
-0.77688846567323244230f,0.62963823891492698426f,0.77301045336273699338f,
-0.63439328416364548779f,0.76910333764557969882f,0.63912444486377573138f,
-0.76516726562245895860f,0.64383154288979138613f,0.76120238548426177871f,
-0.64851440102211244110f,0.75720884650648456748f,0.65317284295377675551f,
-0.75318679904361252042f,0.65780669329707863735f,0.74913639452345937020f,
-0.66241577759017178373f,0.74505778544146594733f,0.66699992230363747137f,
-0.74095112535495921691f,0.67155895484701833009f,0.73681656887736979300f,
-0.67609270357531592310f,0.73265427167241281570f,0.68060099779545302212f,
-0.72846439044822519637f,0.68508366777270035541f,0.72424708295146700276f,
-0.68954054473706682948f,0.72000250796138165477f,0.69397146088965389055f,
-0.71573082528381870571f,0.69837624940897280457f,0.71143219574521643356f,
-0.70275474445722529993f,0.70710678118654757274f,0.70710678118654757274f,
-0.70275474445722529993f,0.71143219574521643356f,0.69837624940897291559f,
-0.71573082528381859468f,0.69397146088965400157f,0.72000250796138165477f,
-0.68954054473706694051f,0.72424708295146689174f,0.68508366777270035541f,
-0.72846439044822519637f,0.68060099779545302212f,0.73265427167241281570f,
-0.67609270357531603413f,0.73681656887736979300f,0.67155895484701833009f,
-0.74095112535495910588f,0.66699992230363747137f,0.74505778544146594733f,
-0.66241577759017178373f,0.74913639452345925918f,0.65780669329707874837f,
-0.75318679904361252042f,0.65317284295377686654f,0.75720884650648456748f,
-0.64851440102211255212f,0.76120238548426177871f,0.64383154288979149715f,
-0.76516726562245895860f,0.63912444486377573138f,0.76910333764557958780f,
-0.63439328416364548779f,0.77301045336273688235f,0.62963823891492709528f,
-0.77688846567323244230f,0.62485948814238645443f,0.78073722857209448822f,
-0.62005721176328920663f,0.78455659715557524159f,0.61523159058062681925f,
-0.78834642762660622761f,0.61038280627630947528f,0.79210657730021227785f,
-0.60551104140432554512f,0.79583690460888345530f,0.60061647938386897305f,
-0.79953726910790501314f,0.59569930449243346793f,0.80320753148064483184f,
-0.59075970185887427544f,0.80684755354379922299f,0.58579785745643886408f,
-0.81045719825259476821f,0.58081395809576452649f,0.81403632970594830276f,
-0.57580819141784533866f,0.81758481315158371139f,0.57078074588696736669f,
-0.82110251499110464835f,0.56573181078361323149f,0.82458930278502529099f,
-0.56066157619733603124f,0.82804504525775579626f,0.55557023301960228867f,
-0.83146961230254523567f,0.55045797293660481131f,0.83486287498638001026f,
-0.54532498842204646383f,0.83822470555483796772f,0.54017147272989296525f,
-0.84155497743689833268f,0.53499761988709726435f,0.84485356524970700587f,
-0.52980362468629482731f,0.84812034480329712149f,0.52458968267846883826f,
-0.85135519310526519554f,0.51935599016558953167f,0.85455798836540053376f,
-0.51410274419322166128f,0.85772861000027211809f,0.50883014254310698909f,
-0.86086693863776730939f,0.50353838372571757542f,0.86397285612158669643f,
-0.49822766697278186854f,0.86704624551569264845f,0.49289819222978409341f,
-0.87008699110871134952f,0.48755016014843605143f,0.87309497841829009079f,
-0.48218377207912282989f,0.87607009419540660122f,0.47679923006332225466f,
-0.87901222642863341417f,0.47139673682599780857f,0.88192126434835493853f,
-0.46597649576796612569f,0.88479709843093778954f,0.46053871095824000514f,
-0.88763962040285393496f,0.45508358712634383592f,0.89044872324475787817f,
-0.44961132965460659516f,0.89322430119551532446f,0.44412214457042925586f,
-0.89596624975618510689f,0.43861623853852771404f,0.89867446569395381673f,
-0.43309381885315201277f,0.90134884704602202810f,0.42755509343028219593f,
-0.90398929312344333820f,0.42200027079979979261f,0.90659570451491533483f,
-0.41642956009763731906f,0.90916798309052226923f,0.41084317105790391089f,
-0.91170603200542987832f,0.40524131400498986100f,0.91420975570353069095f,
-0.39962419984564678810f,0.91667905992104270485f,0.39399204006104809883f,
-0.91911385169005777040f,0.38834504669882630168f,0.92151403934204190183f,
-0.38268343236508983729f,0.92387953251128673848f,0.37700741021641831496f,
-0.92621024213831126826f,0.37131719395183759858f,0.92850608047321558924f,
-0.36561299780477396482f,0.93076696107898371224f,0.35989503653498827740f,
-0.93299279883473884567f,0.35416352542049051033f,0.93518350993894749923f,
-0.34841868024943450921f,0.93733901191257495977f,0.34266071731199437833f,
-0.93945922360218991898f,0.33688985339222005111f,0.94154406518302080631f,
-0.33110630575987642921f,0.94359345816196038559f,0.32531029216226298173f,
-0.94560732538052127971f,0.31950203081601574739f,0.94758559101774109124f,
-0.31368174039889157312f,0.94952818059303667475f,0.30784964004153497763f,
-0.95143502096900833820f,0.30200594931922819519f,0.95330604035419375109f,
-0.29615088824362395536f,0.95514116830577067141f,0.29028467725446233105f,
-0.95694033573220893540f,0.28440753721127182141f,0.95870347489587159906f,
-0.27851968938505305973f,0.96043051941556578655f,0.27262135544994897662f,
-0.96212140426904158019f,0.26671275747489842090f,0.96377606579543984022f,
-0.26079411791527556952f,0.96539444169768939830f,0.25486565960451462720f,
-0.96697647104485207059f,0.24892760574572025956f,0.96852209427441726675f,
-0.24298017990326398197f,0.97003125319454397424f,0.23702360599436733679f,
-0.97150389098625178352f,0.23105810828067127605f,0.97293995220556006576f,
-0.22508391135979277653f,0.97433938278557585821f,0.21910124015686976984f,
-0.97570213003852857003f,0.21311031991609136194f,0.97702814265775439484f,
-0.20711137619221856032f,0.97831737071962765473f,0.20110463484209195606f,
-0.97956976568544051887f,0.19509032201612833135f,0.98078528040323043058f,
-0.18906866414980627589f,0.98196386910955524296f,0.18303988795514106180f,
-0.98310548743121628501f,0.17700422041214886049f,0.98421009238692902521f,
-0.17096188876030135595f,0.98527764238894122162f,0.16491312048997008866f,
-0.98630809724459866938f,0.15885814333386139019f,0.98730141815785843473f,
-0.15279718525844340760f,0.98825756773074946437f,0.14673047445536174793f,
-0.98917650996478101444f,0.14065823933284923863f,0.99005821026229712256f,
-0.13458070850712622324f,0.99090263542778000971f,0.12849811079379322432f,
-0.99170975366909952520f,0.12241067519921627893f,0.99247953459870996706f,
-0.11631863091190487725f,0.99321194923479450001f,0.11022220729388318428f,
-0.99390697000235606051f,0.10412163387205472520f,0.99456457073425541537f,
-0.09801714032956077016f,0.99518472667219681771f,0.09190895649713269611f,
-0.99576741446765981713f,0.08579731234443987997f,0.99631261218277800129f,
-0.07968243797143012563f,0.99682029929116566791f,0.07356456359966745406f,
-0.99729045667869020697f,0.06744391956366410645f,0.99772306664419163624f,
-0.06132073630220864768f,0.99811811290014917919f,0.05519524434969003135f,
-0.99847558057329477421f,0.04906767432741812596f,0.99879545620517240501f,
-0.04293825693494095902f,0.99907772775264536147f,0.03680722294135899131f,
-0.99932238458834954375f,0.03067480317663658085f,0.99952941750109314256f,
-0.02454122852291226384f,0.99969881869620424997f,0.01840672990580482019f,
-0.99983058179582340319f,0.01227153828571994447f,0.99992470183914450299f,
-0.00613588464915451517f,0.99998117528260110909f,1.00000000000000000000f,
-0.00000000000000000000f,0.99969881869620424997f,0.02454122852291228812f,
-0.99879545620517240501f,0.04906767432741801493f,0.99729045667869020697f,
-0.07356456359966742631f,0.99518472667219692873f,0.09801714032956060363f,
-0.99247953459870996706f,0.12241067519921619566f,0.98917650996478101444f,
-0.14673047445536174793f,0.98527764238894122162f,0.17096188876030121717f,
-0.98078528040323043058f,0.19509032201612824808f,0.97570213003852857003f,
-0.21910124015686979759f,0.97003125319454397424f,0.24298017990326387094f,
-0.96377606579543984022f,0.26671275747489836538f,0.95694033573220882438f,
-0.29028467725446233105f,0.94952818059303667475f,0.31368174039889151761f,
-0.94154406518302080631f,0.33688985339222005111f,0.93299279883473895669f,
-0.35989503653498811087f,0.92387953251128673848f,0.38268343236508978178f,
-0.91420975570353069095f,0.40524131400498986100f,0.90398929312344333820f,
-0.42755509343028208491f,0.89322430119551532446f,0.44961132965460653965f,
-0.88192126434835504956f,0.47139673682599764204f,0.87008699110871146054f,
-0.49289819222978403790f,0.85772861000027211809f,0.51410274419322166128f,
-0.84485356524970711689f,0.53499761988709715332f,0.83146961230254523567f,
-0.55557023301960217765f,0.81758481315158371139f,0.57580819141784533866f,
-0.80320753148064494287f,0.59569930449243335691f,0.78834642762660622761f,
-0.61523159058062681925f,0.77301045336273699338f,0.63439328416364548779f,
-0.75720884650648456748f,0.65317284295377675551f,0.74095112535495921691f,
-0.67155895484701833009f,0.72424708295146700276f,0.68954054473706682948f,
-0.70710678118654757274f,0.70710678118654757274f,0.68954054473706694051f,
-0.72424708295146689174f,0.67155895484701833009f,0.74095112535495910588f,
-0.65317284295377686654f,0.75720884650648456748f,0.63439328416364548779f,
-0.77301045336273688235f,0.61523159058062681925f,0.78834642762660622761f,
-0.59569930449243346793f,0.80320753148064483184f,0.57580819141784533866f,
-0.81758481315158371139f,0.55557023301960228867f,0.83146961230254523567f,
-0.53499761988709726435f,0.84485356524970700587f,0.51410274419322166128f,
-0.85772861000027211809f,0.49289819222978409341f,0.87008699110871134952f,
-0.47139673682599780857f,0.88192126434835493853f,0.44961132965460659516f,
-0.89322430119551532446f,0.42755509343028219593f,0.90398929312344333820f,
-0.40524131400498986100f,0.91420975570353069095f,0.38268343236508983729f,
-0.92387953251128673848f,0.35989503653498827740f,0.93299279883473884567f,
-0.33688985339222005111f,0.94154406518302080631f,0.31368174039889157312f,
-0.94952818059303667475f,0.29028467725446233105f,0.95694033573220893540f,
-0.26671275747489842090f,0.96377606579543984022f,0.24298017990326398197f,
-0.97003125319454397424f,0.21910124015686976984f,0.97570213003852857003f,
-0.19509032201612833135f,0.98078528040323043058f,0.17096188876030135595f,
-0.98527764238894122162f,0.14673047445536174793f,0.98917650996478101444f,
-0.12241067519921627893f,0.99247953459870996706f,0.09801714032956077016f,
-0.99518472667219681771f,0.07356456359966745406f,0.99729045667869020697f,
-0.04906767432741812596f,0.99879545620517240501f,0.02454122852291226384f,
-0.99969881869620424997f,1.00000000000000000000f,0.00000000000000000000f,
-0.99518472667219692873f,0.09801714032956060363f,0.98078528040323043058f,
-0.19509032201612824808f,0.95694033573220882438f,0.29028467725446233105f,
-0.92387953251128673848f,0.38268343236508978178f,0.88192126434835504956f,
-0.47139673682599764204f,0.83146961230254523567f,0.55557023301960217765f,
-0.77301045336273699338f,0.63439328416364548779f,0.70710678118654757274f,
-0.70710678118654757274f,0.63439328416364548779f,0.77301045336273688235f,
-0.55557023301960228867f,0.83146961230254523567f,0.47139673682599780857f,
-0.88192126434835493853f,0.38268343236508983729f,0.92387953251128673848f,
-0.29028467725446233105f,0.95694033573220893540f,0.19509032201612833135f,
-0.98078528040323043058f,0.09801714032956077016f,0.99518472667219681771f,
-1.00000000000000000000f,0.00000000000000000000f,0.92387953251128673848f,
-0.38268343236508978178f,0.70710678118654757274f,0.70710678118654757274f,
-0.38268343236508983729f,0.92387953251128673848f,};
+1.00000000000000000000f,0.00000000000000000000f,0.99998116493225097656f,
+0.00613588467240333557f,0.99992471933364868164f,0.01227153837680816650f,
+0.99983060359954833984f,0.01840673014521598816f,0.99969881772994995117f,
+0.02454122900962829590f,0.99952942132949829102f,0.03067480400204658508f,
+0.99932235479354858398f,0.03680722415447235107f,0.99907773733139038086f,
+0.04293825849890708923f,0.99879544973373413086f,0.04906767606735229492f,
+0.99847555160522460938f,0.05519524589180946350f,0.99811810255050659180f,
+0.06132073700428009033f,0.99772304296493530273f,0.06744392216205596924f,
+0.99729043245315551758f,0.07356456667184829712f,0.99682027101516723633f,
+0.07968243956565856934f,0.99631261825561523438f,0.08579730987548828125f,
+0.99576741456985473633f,0.09190895408391952515f,0.99518471956253051758f,
+0.09801714122295379639f,0.99456459283828735352f,0.10412163287401199341f,
+0.99390697479248046875f,0.11022220551967620850f,0.99321192502975463867f,
+0.11631862819194793701f,0.99247956275939941406f,0.12241067737340927124f,
+0.99170976877212524414f,0.12849810719490051270f,0.99090266227722167969f,
+0.13458070158958435059f,0.99005818367004394531f,0.14065824449062347412f,
+0.98917651176452636719f,0.14673046767711639404f,0.98825758695602416992f,
+0.15279719233512878418f,0.98730140924453735352f,0.15885815024375915527f,
+0.98630809783935546875f,0.16491311788558959961f,0.98527765274047851562f,
+0.17096188664436340332f,0.98421007394790649414f,0.17700421810150146484f,
+0.98310548067092895508f,0.18303988873958587646f,0.98196387290954589844f,
+0.18906866014003753662f,0.98078525066375732422f,0.19509032368659973145f,
+0.97956979274749755859f,0.20110464096069335938f,0.97831737995147705078f,
+0.20711137354373931885f,0.97702813148498535156f,0.21311031281948089600f,
+0.97570210695266723633f,0.21910123527050018311f,0.97433936595916748047f,
+0.22508391737937927246f,0.97293996810913085938f,0.23105810582637786865f,
+0.97150391340255737305f,0.23702360689640045166f,0.97003126144409179688f,
+0.24298018217086791992f,0.96852207183837890625f,0.24892760813236236572f,
+0.96697646379470825195f,0.25486564636230468750f,0.96539443731307983398f,
+0.26079410314559936523f,0.96377605199813842773f,0.26671275496482849121f,
+0.96212142705917358398f,0.27262136340141296387f,0.96043050289154052734f,
+0.27851969003677368164f,0.95870345830917358398f,0.28440752625465393066f,
+0.95694035291671752930f,0.29028466343879699707f,0.95514118671417236328f,
+0.29615089297294616699f,0.95330601930618286133f,0.30200594663619995117f,
+0.95143502950668334961f,0.30784964561462402344f,0.94952815771102905273f,
+0.31368175148963928223f,0.94758558273315429688f,0.31950202584266662598f,
+0.94560730457305908203f,0.32531028985977172852f,0.94359344244003295898f,
+0.33110630512237548828f,0.94154405593872070312f,0.33688986301422119141f,
+0.93945920467376708984f,0.34266072511672973633f,0.93733900785446166992f,
+0.34841868281364440918f,0.93518352508544921875f,0.35416352748870849609f,
+0.93299281597137451172f,0.35989505052566528320f,0.93076694011688232422f,
+0.36561298370361328125f,0.92850607633590698242f,0.37131720781326293945f,
+0.92621022462844848633f,0.37700742483139038086f,0.92387950420379638672f,
+0.38268342614173889160f,0.92151403427124023438f,0.38834503293037414551f,
+0.91911387443542480469f,0.39399203658103942871f,0.91667908430099487305f,
+0.39962419867515563965f,0.91420978307723999023f,0.40524131059646606445f,
+0.91170603036880493164f,0.41084316372871398926f,0.90916800498962402344f,
+0.41642954945564270020f,0.90659570693969726562f,0.42200025916099548340f,
+0.90398931503295898438f,0.42755508422851562500f,0.90134882926940917969f,
+0.43309381604194641113f,0.89867448806762695312f,0.43861624598503112793f,
+0.89596623182296752930f,0.44412213563919067383f,0.89322429895401000977f,
+0.44961133599281311035f,0.89044874906539916992f,0.45508357882499694824f,
+0.88763964176177978516f,0.46053871512413024902f,0.88479709625244140625f,
+0.46597650647163391113f,0.88192129135131835938f,0.47139674425125122070f,
+0.87901222705841064453f,0.47679921984672546387f,0.87607008218765258789f,
+0.48218378424644470215f,0.87309497594833374023f,0.48755016922950744629f,
+0.87008696794509887695f,0.49289819598197937012f,0.86704623699188232422f,
+0.49822765588760375977f,0.86397284269332885742f,0.50353837013244628906f,
+0.86086696386337280273f,0.50883013010025024414f,0.85772860050201416016f,
+0.51410275697708129883f,0.85455799102783203125f,0.51935601234436035156f,
+0.85135519504547119141f,0.52458965778350830078f,0.84812033176422119141f,
+0.52980363368988037109f,0.84485357999801635742f,0.53499764204025268555f,
+0.84155499935150146484f,0.54017144441604614258f,0.83822470903396606445f,
+0.54532498121261596680f,0.83486288785934448242f,0.55045795440673828125f,
+0.83146959543228149414f,0.55557024478912353516f,0.82804507017135620117f,
+0.56066155433654785156f,0.82458931207656860352f,0.56573182344436645508f,
+0.82110249996185302734f,0.57078075408935546875f,0.81758481264114379883f,
+0.57580816745758056641f,0.81403630971908569336f,0.58081394433975219727f,
+0.81045717000961303711f,0.58579784631729125977f,0.80684757232666015625f,
+0.59075969457626342773f,0.80320751667022705078f,0.59569931030273437500f,
+0.79953724145889282227f,0.60061645507812500000f,0.79583692550659179688f,
+0.60551106929779052734f,0.79210656881332397461f,0.61038279533386230469f,
+0.78834640979766845703f,0.61523157358169555664f,0.78455656766891479492f,
+0.62005722522735595703f,0.78073722124099731445f,0.62485951185226440430f,
+0.77688848972320556641f,0.62963825464248657227f,0.77301043272018432617f,
+0.63439327478408813477f,0.76910334825515747070f,0.63912445306777954102f,
+0.76516723632812500000f,0.64383155107498168945f,0.76120239496231079102f,
+0.64851438999176025391f,0.75720882415771484375f,0.65317285060882568359f,
+0.75318682193756103516f,0.65780669450759887695f,0.74913638830184936523f,
+0.66241580247879028320f,0.74505776166915893555f,0.66699993610382080078f,
+0.74095112085342407227f,0.67155897617340087891f,0.73681658506393432617f,
+0.67609268426895141602f,0.73265427350997924805f,0.68060100078582763672f,
+0.72846436500549316406f,0.68508368730545043945f,0.72424709796905517578f,
+0.68954056501388549805f,0.72000253200531005859f,0.69397145509719848633f,
+0.71573084592819213867f,0.69837623834609985352f,0.71143221855163574219f,
+0.70275473594665527344f,0.70710676908493041992f,0.70710676908493041992f,
+0.70275473594665527344f,0.71143221855163574219f,0.69837623834609985352f,
+0.71573084592819213867f,0.69397145509719848633f,0.72000253200531005859f,
+0.68954056501388549805f,0.72424709796905517578f,0.68508368730545043945f,
+0.72846436500549316406f,0.68060100078582763672f,0.73265427350997924805f,
+0.67609268426895141602f,0.73681658506393432617f,0.67155897617340087891f,
+0.74095112085342407227f,0.66699993610382080078f,0.74505776166915893555f,
+0.66241580247879028320f,0.74913638830184936523f,0.65780669450759887695f,
+0.75318682193756103516f,0.65317285060882568359f,0.75720882415771484375f,
+0.64851438999176025391f,0.76120239496231079102f,0.64383155107498168945f,
+0.76516723632812500000f,0.63912445306777954102f,0.76910334825515747070f,
+0.63439327478408813477f,0.77301043272018432617f,0.62963825464248657227f,
+0.77688848972320556641f,0.62485951185226440430f,0.78073722124099731445f,
+0.62005722522735595703f,0.78455656766891479492f,0.61523157358169555664f,
+0.78834640979766845703f,0.61038279533386230469f,0.79210656881332397461f,
+0.60551106929779052734f,0.79583692550659179688f,0.60061645507812500000f,
+0.79953724145889282227f,0.59569931030273437500f,0.80320751667022705078f,
+0.59075969457626342773f,0.80684757232666015625f,0.58579784631729125977f,
+0.81045717000961303711f,0.58081394433975219727f,0.81403630971908569336f,
+0.57580816745758056641f,0.81758481264114379883f,0.57078075408935546875f,
+0.82110249996185302734f,0.56573182344436645508f,0.82458931207656860352f,
+0.56066155433654785156f,0.82804507017135620117f,0.55557024478912353516f,
+0.83146959543228149414f,0.55045795440673828125f,0.83486288785934448242f,
+0.54532498121261596680f,0.83822470903396606445f,0.54017144441604614258f,
+0.84155499935150146484f,0.53499764204025268555f,0.84485357999801635742f,
+0.52980363368988037109f,0.84812033176422119141f,0.52458965778350830078f,
+0.85135519504547119141f,0.51935601234436035156f,0.85455799102783203125f,
+0.51410275697708129883f,0.85772860050201416016f,0.50883013010025024414f,
+0.86086696386337280273f,0.50353837013244628906f,0.86397284269332885742f,
+0.49822765588760375977f,0.86704623699188232422f,0.49289819598197937012f,
+0.87008696794509887695f,0.48755016922950744629f,0.87309497594833374023f,
+0.48218378424644470215f,0.87607008218765258789f,0.47679921984672546387f,
+0.87901222705841064453f,0.47139674425125122070f,0.88192129135131835938f,
+0.46597650647163391113f,0.88479709625244140625f,0.46053871512413024902f,
+0.88763964176177978516f,0.45508357882499694824f,0.89044874906539916992f,
+0.44961133599281311035f,0.89322429895401000977f,0.44412213563919067383f,
+0.89596623182296752930f,0.43861624598503112793f,0.89867448806762695312f,
+0.43309381604194641113f,0.90134882926940917969f,0.42755508422851562500f,
+0.90398931503295898438f,0.42200025916099548340f,0.90659570693969726562f,
+0.41642954945564270020f,0.90916800498962402344f,0.41084316372871398926f,
+0.91170603036880493164f,0.40524131059646606445f,0.91420978307723999023f,
+0.39962419867515563965f,0.91667908430099487305f,0.39399203658103942871f,
+0.91911387443542480469f,0.38834503293037414551f,0.92151403427124023438f,
+0.38268342614173889160f,0.92387950420379638672f,0.37700742483139038086f,
+0.92621022462844848633f,0.37131720781326293945f,0.92850607633590698242f,
+0.36561298370361328125f,0.93076694011688232422f,0.35989505052566528320f,
+0.93299281597137451172f,0.35416352748870849609f,0.93518352508544921875f,
+0.34841868281364440918f,0.93733900785446166992f,0.34266072511672973633f,
+0.93945920467376708984f,0.33688986301422119141f,0.94154405593872070312f,
+0.33110630512237548828f,0.94359344244003295898f,0.32531028985977172852f,
+0.94560730457305908203f,0.31950202584266662598f,0.94758558273315429688f,
+0.31368175148963928223f,0.94952815771102905273f,0.30784964561462402344f,
+0.95143502950668334961f,0.30200594663619995117f,0.95330601930618286133f,
+0.29615089297294616699f,0.95514118671417236328f,0.29028466343879699707f,
+0.95694035291671752930f,0.28440752625465393066f,0.95870345830917358398f,
+0.27851969003677368164f,0.96043050289154052734f,0.27262136340141296387f,
+0.96212142705917358398f,0.26671275496482849121f,0.96377605199813842773f,
+0.26079410314559936523f,0.96539443731307983398f,0.25486564636230468750f,
+0.96697646379470825195f,0.24892760813236236572f,0.96852207183837890625f,
+0.24298018217086791992f,0.97003126144409179688f,0.23702360689640045166f,
+0.97150391340255737305f,0.23105810582637786865f,0.97293996810913085938f,
+0.22508391737937927246f,0.97433936595916748047f,0.21910123527050018311f,
+0.97570210695266723633f,0.21311031281948089600f,0.97702813148498535156f,
+0.20711137354373931885f,0.97831737995147705078f,0.20110464096069335938f,
+0.97956979274749755859f,0.19509032368659973145f,0.98078525066375732422f,
+0.18906866014003753662f,0.98196387290954589844f,0.18303988873958587646f,
+0.98310548067092895508f,0.17700421810150146484f,0.98421007394790649414f,
+0.17096188664436340332f,0.98527765274047851562f,0.16491311788558959961f,
+0.98630809783935546875f,0.15885815024375915527f,0.98730140924453735352f,
+0.15279719233512878418f,0.98825758695602416992f,0.14673046767711639404f,
+0.98917651176452636719f,0.14065824449062347412f,0.99005818367004394531f,
+0.13458070158958435059f,0.99090266227722167969f,0.12849810719490051270f,
+0.99170976877212524414f,0.12241067737340927124f,0.99247956275939941406f,
+0.11631862819194793701f,0.99321192502975463867f,0.11022220551967620850f,
+0.99390697479248046875f,0.10412163287401199341f,0.99456459283828735352f,
+0.09801714122295379639f,0.99518471956253051758f,0.09190895408391952515f,
+0.99576741456985473633f,0.08579730987548828125f,0.99631261825561523438f,
+0.07968243956565856934f,0.99682027101516723633f,0.07356456667184829712f,
+0.99729043245315551758f,0.06744392216205596924f,0.99772304296493530273f,
+0.06132073700428009033f,0.99811810255050659180f,0.05519524589180946350f,
+0.99847555160522460938f,0.04906767606735229492f,0.99879544973373413086f,
+0.04293825849890708923f,0.99907773733139038086f,0.03680722415447235107f,
+0.99932235479354858398f,0.03067480400204658508f,0.99952942132949829102f,
+0.02454122900962829590f,0.99969881772994995117f,0.01840673014521598816f,
+0.99983060359954833984f,0.01227153837680816650f,0.99992471933364868164f,
+0.00613588467240333557f,0.99998116493225097656f,1.00000000000000000000f,
+0.00000000000000000000f,0.99969881772994995117f,0.02454122900962829590f,
+0.99879544973373413086f,0.04906767606735229492f,0.99729043245315551758f,
+0.07356456667184829712f,0.99518471956253051758f,0.09801714122295379639f,
+0.99247956275939941406f,0.12241067737340927124f,0.98917651176452636719f,
+0.14673046767711639404f,0.98527765274047851562f,0.17096188664436340332f,
+0.98078525066375732422f,0.19509032368659973145f,0.97570210695266723633f,
+0.21910123527050018311f,0.97003126144409179688f,0.24298018217086791992f,
+0.96377605199813842773f,0.26671275496482849121f,0.95694035291671752930f,
+0.29028466343879699707f,0.94952815771102905273f,0.31368175148963928223f,
+0.94154405593872070312f,0.33688986301422119141f,0.93299281597137451172f,
+0.35989505052566528320f,0.92387950420379638672f,0.38268342614173889160f,
+0.91420978307723999023f,0.40524131059646606445f,0.90398931503295898438f,
+0.42755508422851562500f,0.89322429895401000977f,0.44961133599281311035f,
+0.88192129135131835938f,0.47139674425125122070f,0.87008696794509887695f,
+0.49289819598197937012f,0.85772860050201416016f,0.51410275697708129883f,
+0.84485357999801635742f,0.53499764204025268555f,0.83146959543228149414f,
+0.55557024478912353516f,0.81758481264114379883f,0.57580816745758056641f,
+0.80320751667022705078f,0.59569931030273437500f,0.78834640979766845703f,
+0.61523157358169555664f,0.77301043272018432617f,0.63439327478408813477f,
+0.75720882415771484375f,0.65317285060882568359f,0.74095112085342407227f,
+0.67155897617340087891f,0.72424709796905517578f,0.68954056501388549805f,
+0.70710676908493041992f,0.70710676908493041992f,0.68954056501388549805f,
+0.72424709796905517578f,0.67155897617340087891f,0.74095112085342407227f,
+0.65317285060882568359f,0.75720882415771484375f,0.63439327478408813477f,
+0.77301043272018432617f,0.61523157358169555664f,0.78834640979766845703f,
+0.59569931030273437500f,0.80320751667022705078f,0.57580816745758056641f,
+0.81758481264114379883f,0.55557024478912353516f,0.83146959543228149414f,
+0.53499764204025268555f,0.84485357999801635742f,0.51410275697708129883f,
+0.85772860050201416016f,0.49289819598197937012f,0.87008696794509887695f,
+0.47139674425125122070f,0.88192129135131835938f,0.44961133599281311035f,
+0.89322429895401000977f,0.42755508422851562500f,0.90398931503295898438f,
+0.40524131059646606445f,0.91420978307723999023f,0.38268342614173889160f,
+0.92387950420379638672f,0.35989505052566528320f,0.93299281597137451172f,
+0.33688986301422119141f,0.94154405593872070312f,0.31368175148963928223f,
+0.94952815771102905273f,0.29028466343879699707f,0.95694035291671752930f,
+0.26671275496482849121f,0.96377605199813842773f,0.24298018217086791992f,
+0.97003126144409179688f,0.21910123527050018311f,0.97570210695266723633f,
+0.19509032368659973145f,0.98078525066375732422f,0.17096188664436340332f,
+0.98527765274047851562f,0.14673046767711639404f,0.98917651176452636719f,
+0.12241067737340927124f,0.99247956275939941406f,0.09801714122295379639f,
+0.99518471956253051758f,0.07356456667184829712f,0.99729043245315551758f,
+0.04906767606735229492f,0.99879544973373413086f,0.02454122900962829590f,
+0.99969881772994995117f,1.00000000000000000000f,0.00000000000000000000f,
+0.99518471956253051758f,0.09801714122295379639f,0.98078525066375732422f,
+0.19509032368659973145f,0.95694035291671752930f,0.29028466343879699707f,
+0.92387950420379638672f,0.38268342614173889160f,0.88192129135131835938f,
+0.47139674425125122070f,0.83146959543228149414f,0.55557024478912353516f,
+0.77301043272018432617f,0.63439327478408813477f,0.70710676908493041992f,
+0.70710676908493041992f,0.63439327478408813477f,0.77301043272018432617f,
+0.55557024478912353516f,0.83146959543228149414f,0.47139674425125122070f,
+0.88192129135131835938f,0.38268342614173889160f,0.92387950420379638672f,
+0.29028466343879699707f,0.95694035291671752930f,0.19509032368659973145f,
+0.98078525066375732422f,0.09801714122295379639f,0.99518471956253051758f,
+1.00000000000000000000f,0.00000000000000000000f,0.92387950420379638672f,
+0.38268342614173889160f,0.70710676908493041992f,0.70710676908493041992f,
+0.38268342614173889160f,0.92387950420379638672f,};
 
 float32_t rearranged_twiddle_stride2_1024_f32[680]={
-1.00000000000000000000f,0.00000000000000000000f,0.99992470183914450299f,
-0.01227153828571992539f,0.99969881869620424997f,0.02454122852291228812f,
-0.99932238458834954375f,0.03680722294135883171f,0.99879545620517240501f,
-0.04906767432741801493f,0.99811811290014917919f,0.06132073630220857829f,
-0.99729045667869020697f,0.07356456359966742631f,0.99631261218277800129f,
-0.08579731234443989385f,0.99518472667219692873f,0.09801714032956060363f,
-0.99390697000235606051f,0.11022220729388305938f,0.99247953459870996706f,
-0.12241067519921619566f,0.99090263542778000971f,0.13458070850712616773f,
-0.98917650996478101444f,0.14673047445536174793f,0.98730141815785843473f,
-0.15885814333386144570f,0.98527764238894122162f,0.17096188876030121717f,
-0.98310548743121628501f,0.18303988795514095078f,0.98078528040323043058f,
-0.19509032201612824808f,0.97831737071962765473f,0.20711137619221856032f,
-0.97570213003852857003f,0.21910124015686979759f,0.97293995220556017678f,
-0.23105810828067110951f,0.97003125319454397424f,0.24298017990326387094f,
-0.96697647104485207059f,0.25486565960451457169f,0.96377606579543984022f,
-0.26671275747489836538f,0.96043051941556578655f,0.27851968938505305973f,
-0.95694033573220882438f,0.29028467725446233105f,0.95330604035419386211f,
-0.30200594931922808417f,0.94952818059303667475f,0.31368174039889151761f,
-0.94560732538052127971f,0.32531029216226292622f,0.94154406518302080631f,
-0.33688985339222005111f,0.93733901191257495977f,0.34841868024943456472f,
-0.93299279883473895669f,0.35989503653498811087f,0.92850608047321558924f,
-0.37131719395183754306f,0.92387953251128673848f,0.38268343236508978178f,
-0.91911385169005777040f,0.39399204006104809883f,0.91420975570353069095f,
-0.40524131400498986100f,0.90916798309052238025f,0.41642956009763715253f,
-0.90398929312344333820f,0.42755509343028208491f,0.89867446569395381673f,
-0.43861623853852765853f,0.89322430119551532446f,0.44961132965460653965f,
-0.88763962040285393496f,0.46053871095824000514f,0.88192126434835504956f,
-0.47139673682599764204f,0.87607009419540660122f,0.48218377207912271887f,
-0.87008699110871146054f,0.49289819222978403790f,0.86397285612158669643f,
-0.50353838372571757542f,0.85772861000027211809f,0.51410274419322166128f,
-0.85135519310526519554f,0.52458968267846894928f,0.84485356524970711689f,
-0.53499761988709715332f,0.83822470555483807875f,0.54532498842204646383f,
-0.83146961230254523567f,0.55557023301960217765f,0.82458930278502529099f,
-0.56573181078361312046f,0.81758481315158371139f,0.57580819141784533866f,
-0.81045719825259476821f,0.58579785745643886408f,0.80320753148064494287f,
-0.59569930449243335691f,0.79583690460888356633f,0.60551104140432554512f,
-0.78834642762660622761f,0.61523159058062681925f,0.78073722857209448822f,
-0.62485948814238634341f,0.77301045336273699338f,0.63439328416364548779f,
-0.76516726562245895860f,0.64383154288979138613f,0.75720884650648456748f,
-0.65317284295377675551f,0.74913639452345937020f,0.66241577759017178373f,
-0.74095112535495921691f,0.67155895484701833009f,0.73265427167241281570f,
-0.68060099779545302212f,0.72424708295146700276f,0.68954054473706682948f,
-0.71573082528381870571f,0.69837624940897280457f,0.70710678118654757274f,
-0.70710678118654757274f,0.69837624940897291559f,0.71573082528381859468f,
-0.68954054473706694051f,0.72424708295146689174f,0.68060099779545302212f,
-0.73265427167241281570f,0.67155895484701833009f,0.74095112535495910588f,
-0.66241577759017178373f,0.74913639452345925918f,0.65317284295377686654f,
-0.75720884650648456748f,0.64383154288979149715f,0.76516726562245895860f,
-0.63439328416364548779f,0.77301045336273688235f,0.62485948814238645443f,
-0.78073722857209448822f,0.61523159058062681925f,0.78834642762660622761f,
-0.60551104140432554512f,0.79583690460888345530f,0.59569930449243346793f,
-0.80320753148064483184f,0.58579785745643886408f,0.81045719825259476821f,
-0.57580819141784533866f,0.81758481315158371139f,0.56573181078361323149f,
-0.82458930278502529099f,0.55557023301960228867f,0.83146961230254523567f,
-0.54532498842204646383f,0.83822470555483796772f,0.53499761988709726435f,
-0.84485356524970700587f,0.52458968267846883826f,0.85135519310526519554f,
-0.51410274419322166128f,0.85772861000027211809f,0.50353838372571757542f,
-0.86397285612158669643f,0.49289819222978409341f,0.87008699110871134952f,
-0.48218377207912282989f,0.87607009419540660122f,0.47139673682599780857f,
-0.88192126434835493853f,0.46053871095824000514f,0.88763962040285393496f,
-0.44961132965460659516f,0.89322430119551532446f,0.43861623853852771404f,
-0.89867446569395381673f,0.42755509343028219593f,0.90398929312344333820f,
-0.41642956009763731906f,0.90916798309052226923f,0.40524131400498986100f,
-0.91420975570353069095f,0.39399204006104809883f,0.91911385169005777040f,
-0.38268343236508983729f,0.92387953251128673848f,0.37131719395183759858f,
-0.92850608047321558924f,0.35989503653498827740f,0.93299279883473884567f,
-0.34841868024943450921f,0.93733901191257495977f,0.33688985339222005111f,
-0.94154406518302080631f,0.32531029216226298173f,0.94560732538052127971f,
-0.31368174039889157312f,0.94952818059303667475f,0.30200594931922819519f,
-0.95330604035419375109f,0.29028467725446233105f,0.95694033573220893540f,
-0.27851968938505305973f,0.96043051941556578655f,0.26671275747489842090f,
-0.96377606579543984022f,0.25486565960451462720f,0.96697647104485207059f,
-0.24298017990326398197f,0.97003125319454397424f,0.23105810828067127605f,
-0.97293995220556006576f,0.21910124015686976984f,0.97570213003852857003f,
-0.20711137619221856032f,0.97831737071962765473f,0.19509032201612833135f,
-0.98078528040323043058f,0.18303988795514106180f,0.98310548743121628501f,
-0.17096188876030135595f,0.98527764238894122162f,0.15885814333386139019f,
-0.98730141815785843473f,0.14673047445536174793f,0.98917650996478101444f,
-0.13458070850712622324f,0.99090263542778000971f,0.12241067519921627893f,
-0.99247953459870996706f,0.11022220729388318428f,0.99390697000235606051f,
-0.09801714032956077016f,0.99518472667219681771f,0.08579731234443987997f,
-0.99631261218277800129f,0.07356456359966745406f,0.99729045667869020697f,
-0.06132073630220864768f,0.99811811290014917919f,0.04906767432741812596f,
-0.99879545620517240501f,0.03680722294135899131f,0.99932238458834954375f,
-0.02454122852291226384f,0.99969881869620424997f,0.01227153828571994447f,
-0.99992470183914450299f,0.00000000000000006123f,1.00000000000000000000f,
--0.01227153828571982304f,0.99992470183914450299f,-0.02454122852291214241f,
-0.99969881869620424997f,-0.03680722294135886641f,0.99932238458834954375f,
--0.04906767432741800800f,0.99879545620517240501f,-0.06132073630220852972f,
-0.99811811290014917919f,-0.07356456359966732916f,0.99729045667869020697f,
--0.08579731234443975507f,0.99631261218277800129f,-0.09801714032956064526f,
-0.99518472667219692873f,-0.11022220729388305938f,0.99390697000235606051f,
--0.12241067519921615403f,0.99247953459870996706f,-0.13458070850712611222f,
-0.99090263542778000971f,-0.14673047445536163691f,0.98917650996478101444f,
--0.15885814333386127917f,0.98730141815785843473f,-0.17096188876030124493f,
-0.98527764238894122162f,-0.18303988795514092303f,0.98310548743121628501f,
--0.19509032201612819257f,0.98078528040323043058f,-0.20711137619221844930f,
-0.97831737071962765473f,-0.21910124015686965881f,0.97570213003852857003f,
--0.23105810828067113727f,0.97293995220556017678f,-0.24298017990326387094f,
-0.97003125319454397424f,-0.25486565960451451618f,0.96697647104485207059f,
--0.26671275747489830987f,0.96377606579543984022f,-0.27851968938505294870f,
-0.96043051941556589757f,-0.29028467725446216452f,0.95694033573220893540f,
--0.30200594931922808417f,0.95330604035419386211f,-0.31368174039889140658f,
-0.94952818059303667475f,-0.32531029216226287071f,0.94560732538052139073f,
--0.33688985339221994009f,0.94154406518302080631f,-0.34841868024943439819f,
-0.93733901191257495977f,-0.35989503653498816638f,0.93299279883473884567f,
--0.37131719395183748755f,0.92850608047321558924f,-0.38268343236508972627f,
-0.92387953251128673848f,-0.39399204006104798781f,0.91911385169005777040f,
--0.40524131400498974998f,0.91420975570353069095f,-0.41642956009763698599f,
-0.90916798309052249127f,-0.42755509343028186287f,0.90398929312344344922f,
--0.43861623853852738097f,0.89867446569395392775f,-0.44961132965460670619f,
-0.89322430119551521344f,-0.46053871095824006066f,0.88763962040285393496f,
--0.47139673682599769755f,0.88192126434835504956f,-0.48218377207912271887f,
-0.87607009419540660122f,-0.49289819222978398239f,0.87008699110871146054f,
--0.50353838372571746440f,0.86397285612158680745f,-0.51410274419322155026f,
-0.85772861000027211809f,-0.52458968267846872724f,0.85135519310526519554f,
--0.53499761988709704230f,0.84485356524970722791f,-0.54532498842204624179f,
-0.83822470555483818977f,-0.55557023301960195560f,0.83146961230254534669f,
--0.56573181078361323149f,0.82458930278502517996f,-0.57580819141784533866f,
-0.81758481315158371139f,-0.58579785745643886408f,0.81045719825259476821f,
--0.59569930449243335691f,0.80320753148064494287f,-0.60551104140432543410f,
-0.79583690460888356633f,-0.61523159058062670823f,0.78834642762660633863f,
--0.62485948814238623239f,0.78073722857209459924f,-0.63439328416364537677f,
-0.77301045336273710440f,-0.64383154288979127511f,0.76516726562245906962f,
--0.65317284295377653347f,0.75720884650648467851f,-0.66241577759017189475f,
-0.74913639452345925918f,-0.67155895484701844111f,0.74095112535495899486f,
--0.68060099779545302212f,0.73265427167241281570f,-0.68954054473706694051f,
-0.72424708295146689174f,-0.69837624940897280457f,0.71573082528381870571f,
--0.70710678118654746172f,0.70710678118654757274f,-0.71573082528381859468f,
-0.69837624940897291559f,-0.72424708295146678072f,0.68954054473706705153f,
--0.73265427167241270467f,0.68060099779545324417f,-0.74095112535495888384f,
-0.67155895484701855214f,-0.74913639452345914815f,0.66241577759017200577f,
--0.75720884650648467851f,0.65317284295377664449f,-0.76516726562245895860f,
-0.64383154288979138613f,-0.77301045336273699338f,0.63439328416364548779f,
--0.78073722857209448822f,0.62485948814238634341f,-0.78834642762660622761f,
-0.61523159058062693028f,-0.79583690460888345530f,0.60551104140432565615f,
--0.80320753148064483184f,0.59569930449243346793f,-0.81045719825259465718f,
-0.58579785745643897510f,-0.81758481315158360037f,0.57580819141784544968f,
--0.82458930278502506894f,0.56573181078361345353f,-0.83146961230254534669f,
-0.55557023301960217765f,-0.83822470555483807875f,0.54532498842204635281f,
--0.84485356524970711689f,0.53499761988709715332f,-0.85135519310526519554f,
-0.52458968267846894928f,-0.85772861000027200706f,0.51410274419322177231f,
--0.86397285612158669643f,0.50353838372571757542f,-0.87008699110871134952f,
-0.49289819222978414892f,-0.87607009419540649020f,0.48218377207912288540f,
--0.88192126434835493853f,0.47139673682599780857f,-0.88763962040285382393f,
-0.46053871095824022719f,-0.89322430119551521344f,0.44961132965460687272f,
--0.89867446569395392775f,0.43861623853852754751f,-0.90398929312344333820f,
-0.42755509343028202940f,-0.90916798309052238025f,0.41642956009763715253f,
--0.91420975570353069095f,0.40524131400498991651f,-0.91911385169005777040f,
-0.39399204006104815434f,-0.92387953251128673848f,0.38268343236508989280f,
--0.92850608047321547822f,0.37131719395183770960f,-0.93299279883473884567f,
-0.35989503653498833291f,-0.93733901191257484875f,0.34841868024943478677f,
--0.94154406518302069529f,0.33688985339222032867f,-0.94560732538052116869f,
-0.32531029216226325929f,-0.94952818059303667475f,0.31368174039889140658f,
--0.95330604035419386211f,0.30200594931922802866f,-0.95694033573220882438f,
-0.29028467725446238656f,-0.96043051941556578655f,0.27851968938505317075f,
--0.96377606579543984022f,0.26671275747489847641f,-0.96697647104485207059f,
-0.25486565960451468271f,-0.97003125319454397424f,0.24298017990326406523f,
--0.97293995220556006576f,0.23105810828067133156f,-0.97570213003852845901f,
-0.21910124015687004739f,-0.97831737071962754371f,0.20711137619221883788f,
--0.98078528040323043058f,0.19509032201612860891f,-0.98310548743121628501f,
-0.18303988795514089527f,-0.98527764238894122162f,0.17096188876030121717f,
--0.98730141815785843473f,0.15885814333386147346f,-0.98917650996478101444f,
-0.14673047445536180344f,-0.99090263542778000971f,0.13458070850712627875f,
--0.99247953459870996706f,0.12241067519921634832f,-0.99390697000235606051f,
-0.11022220729388323979f,-0.99518472667219681771f,0.09801714032956082567f,
--0.99631261218277800129f,0.08579731234444015753f,-0.99729045667869020697f,
-0.07356456359966773162f,-0.99811811290014917919f,0.06132073630220848809f,
--0.99879545620517240501f,0.04906767432741796636f,-0.99932238458834954375f,
-0.03680722294135883171f,-0.99969881869620424997f,0.02454122852291232629f,
--0.99992470183914450299f,0.01227153828572000692f,1.00000000000000000000f,
-0.00000000000000000000f,0.99879545620517240501f,0.04906767432741801493f,
-0.99518472667219692873f,0.09801714032956060363f,0.98917650996478101444f,
-0.14673047445536174793f,0.98078528040323043058f,0.19509032201612824808f,
-0.97003125319454397424f,0.24298017990326387094f,0.95694033573220882438f,
-0.29028467725446233105f,0.94154406518302080631f,0.33688985339222005111f,
-0.92387953251128673848f,0.38268343236508978178f,0.90398929312344333820f,
-0.42755509343028208491f,0.88192126434835504956f,0.47139673682599764204f,
-0.85772861000027211809f,0.51410274419322166128f,0.83146961230254523567f,
-0.55557023301960217765f,0.80320753148064494287f,0.59569930449243335691f,
-0.77301045336273699338f,0.63439328416364548779f,0.74095112535495921691f,
-0.67155895484701833009f,0.70710678118654757274f,0.70710678118654757274f,
-0.67155895484701833009f,0.74095112535495910588f,0.63439328416364548779f,
-0.77301045336273688235f,0.59569930449243346793f,0.80320753148064483184f,
-0.55557023301960228867f,0.83146961230254523567f,0.51410274419322166128f,
-0.85772861000027211809f,0.47139673682599780857f,0.88192126434835493853f,
-0.42755509343028219593f,0.90398929312344333820f,0.38268343236508983729f,
-0.92387953251128673848f,0.33688985339222005111f,0.94154406518302080631f,
-0.29028467725446233105f,0.95694033573220893540f,0.24298017990326398197f,
-0.97003125319454397424f,0.19509032201612833135f,0.98078528040323043058f,
-0.14673047445536174793f,0.98917650996478101444f,0.09801714032956077016f,
-0.99518472667219681771f,0.04906767432741812596f,0.99879545620517240501f,
-0.00000000000000006123f,1.00000000000000000000f,-0.04906767432741800800f,
-0.99879545620517240501f,-0.09801714032956064526f,0.99518472667219692873f,
--0.14673047445536163691f,0.98917650996478101444f,-0.19509032201612819257f,
-0.98078528040323043058f,-0.24298017990326387094f,0.97003125319454397424f,
--0.29028467725446216452f,0.95694033573220893540f,-0.33688985339221994009f,
-0.94154406518302080631f,-0.38268343236508972627f,0.92387953251128673848f,
--0.42755509343028186287f,0.90398929312344344922f,-0.47139673682599769755f,
-0.88192126434835504956f,-0.51410274419322155026f,0.85772861000027211809f,
--0.55557023301960195560f,0.83146961230254534669f,-0.59569930449243335691f,
-0.80320753148064494287f,-0.63439328416364537677f,0.77301045336273710440f,
--0.67155895484701844111f,0.74095112535495899486f,-0.70710678118654746172f,
-0.70710678118654757274f,-0.74095112535495888384f,0.67155895484701855214f,
--0.77301045336273699338f,0.63439328416364548779f,-0.80320753148064483184f,
-0.59569930449243346793f,-0.83146961230254534669f,0.55557023301960217765f,
--0.85772861000027200706f,0.51410274419322177231f,-0.88192126434835493853f,
-0.47139673682599780857f,-0.90398929312344333820f,0.42755509343028202940f,
--0.92387953251128673848f,0.38268343236508989280f,-0.94154406518302069529f,
-0.33688985339222032867f,-0.95694033573220882438f,0.29028467725446238656f,
--0.97003125319454397424f,0.24298017990326406523f,-0.98078528040323043058f,
-0.19509032201612860891f,-0.98917650996478101444f,0.14673047445536180344f,
--0.99518472667219681771f,0.09801714032956082567f,-0.99879545620517240501f,
-0.04906767432741796636f,1.00000000000000000000f,0.00000000000000000000f,
-0.98078528040323043058f,0.19509032201612824808f,0.92387953251128673848f,
-0.38268343236508978178f,0.83146961230254523567f,0.55557023301960217765f,
-0.70710678118654757274f,0.70710678118654757274f,0.55557023301960228867f,
-0.83146961230254523567f,0.38268343236508983729f,0.92387953251128673848f,
-0.19509032201612833135f,0.98078528040323043058f,0.00000000000000006123f,
-1.00000000000000000000f,-0.19509032201612819257f,0.98078528040323043058f,
--0.38268343236508972627f,0.92387953251128673848f,-0.55557023301960195560f,
-0.83146961230254534669f,-0.70710678118654746172f,0.70710678118654757274f,
--0.83146961230254534669f,0.55557023301960217765f,-0.92387953251128673848f,
-0.38268343236508989280f,-0.98078528040323043058f,0.19509032201612860891f,
-1.00000000000000000000f,0.00000000000000000000f,0.70710678118654757274f,
-0.70710678118654757274f,0.00000000000000006123f,1.00000000000000000000f,
--0.70710678118654746172f,0.70710678118654757274f,};
+1.00000000000000000000f,0.00000000000000000000f,0.99992471933364868164f,
+0.01227153837680816650f,0.99969881772994995117f,0.02454122900962829590f,
+0.99932235479354858398f,0.03680722415447235107f,0.99879544973373413086f,
+0.04906767606735229492f,0.99811810255050659180f,0.06132073700428009033f,
+0.99729043245315551758f,0.07356456667184829712f,0.99631261825561523438f,
+0.08579730987548828125f,0.99518471956253051758f,0.09801714122295379639f,
+0.99390697479248046875f,0.11022220551967620850f,0.99247956275939941406f,
+0.12241067737340927124f,0.99090266227722167969f,0.13458070158958435059f,
+0.98917651176452636719f,0.14673046767711639404f,0.98730140924453735352f,
+0.15885815024375915527f,0.98527765274047851562f,0.17096188664436340332f,
+0.98310548067092895508f,0.18303988873958587646f,0.98078525066375732422f,
+0.19509032368659973145f,0.97831737995147705078f,0.20711137354373931885f,
+0.97570210695266723633f,0.21910123527050018311f,0.97293996810913085938f,
+0.23105810582637786865f,0.97003126144409179688f,0.24298018217086791992f,
+0.96697646379470825195f,0.25486564636230468750f,0.96377605199813842773f,
+0.26671275496482849121f,0.96043050289154052734f,0.27851969003677368164f,
+0.95694035291671752930f,0.29028466343879699707f,0.95330601930618286133f,
+0.30200594663619995117f,0.94952815771102905273f,0.31368175148963928223f,
+0.94560730457305908203f,0.32531028985977172852f,0.94154405593872070312f,
+0.33688986301422119141f,0.93733900785446166992f,0.34841868281364440918f,
+0.93299281597137451172f,0.35989505052566528320f,0.92850607633590698242f,
+0.37131720781326293945f,0.92387950420379638672f,0.38268342614173889160f,
+0.91911387443542480469f,0.39399203658103942871f,0.91420978307723999023f,
+0.40524131059646606445f,0.90916800498962402344f,0.41642954945564270020f,
+0.90398931503295898438f,0.42755508422851562500f,0.89867448806762695312f,
+0.43861624598503112793f,0.89322429895401000977f,0.44961133599281311035f,
+0.88763964176177978516f,0.46053871512413024902f,0.88192129135131835938f,
+0.47139674425125122070f,0.87607008218765258789f,0.48218378424644470215f,
+0.87008696794509887695f,0.49289819598197937012f,0.86397284269332885742f,
+0.50353837013244628906f,0.85772860050201416016f,0.51410275697708129883f,
+0.85135519504547119141f,0.52458965778350830078f,0.84485357999801635742f,
+0.53499764204025268555f,0.83822470903396606445f,0.54532498121261596680f,
+0.83146959543228149414f,0.55557024478912353516f,0.82458931207656860352f,
+0.56573182344436645508f,0.81758481264114379883f,0.57580816745758056641f,
+0.81045717000961303711f,0.58579784631729125977f,0.80320751667022705078f,
+0.59569931030273437500f,0.79583692550659179688f,0.60551106929779052734f,
+0.78834640979766845703f,0.61523157358169555664f,0.78073722124099731445f,
+0.62485951185226440430f,0.77301043272018432617f,0.63439327478408813477f,
+0.76516723632812500000f,0.64383155107498168945f,0.75720882415771484375f,
+0.65317285060882568359f,0.74913638830184936523f,0.66241580247879028320f,
+0.74095112085342407227f,0.67155897617340087891f,0.73265427350997924805f,
+0.68060100078582763672f,0.72424709796905517578f,0.68954056501388549805f,
+0.71573084592819213867f,0.69837623834609985352f,0.70710676908493041992f,
+0.70710676908493041992f,0.69837623834609985352f,0.71573084592819213867f,
+0.68954056501388549805f,0.72424709796905517578f,0.68060100078582763672f,
+0.73265427350997924805f,0.67155897617340087891f,0.74095112085342407227f,
+0.66241580247879028320f,0.74913638830184936523f,0.65317285060882568359f,
+0.75720882415771484375f,0.64383155107498168945f,0.76516723632812500000f,
+0.63439327478408813477f,0.77301043272018432617f,0.62485951185226440430f,
+0.78073722124099731445f,0.61523157358169555664f,0.78834640979766845703f,
+0.60551106929779052734f,0.79583692550659179688f,0.59569931030273437500f,
+0.80320751667022705078f,0.58579784631729125977f,0.81045717000961303711f,
+0.57580816745758056641f,0.81758481264114379883f,0.56573182344436645508f,
+0.82458931207656860352f,0.55557024478912353516f,0.83146959543228149414f,
+0.54532498121261596680f,0.83822470903396606445f,0.53499764204025268555f,
+0.84485357999801635742f,0.52458965778350830078f,0.85135519504547119141f,
+0.51410275697708129883f,0.85772860050201416016f,0.50353837013244628906f,
+0.86397284269332885742f,0.49289819598197937012f,0.87008696794509887695f,
+0.48218378424644470215f,0.87607008218765258789f,0.47139674425125122070f,
+0.88192129135131835938f,0.46053871512413024902f,0.88763964176177978516f,
+0.44961133599281311035f,0.89322429895401000977f,0.43861624598503112793f,
+0.89867448806762695312f,0.42755508422851562500f,0.90398931503295898438f,
+0.41642954945564270020f,0.90916800498962402344f,0.40524131059646606445f,
+0.91420978307723999023f,0.39399203658103942871f,0.91911387443542480469f,
+0.38268342614173889160f,0.92387950420379638672f,0.37131720781326293945f,
+0.92850607633590698242f,0.35989505052566528320f,0.93299281597137451172f,
+0.34841868281364440918f,0.93733900785446166992f,0.33688986301422119141f,
+0.94154405593872070312f,0.32531028985977172852f,0.94560730457305908203f,
+0.31368175148963928223f,0.94952815771102905273f,0.30200594663619995117f,
+0.95330601930618286133f,0.29028466343879699707f,0.95694035291671752930f,
+0.27851969003677368164f,0.96043050289154052734f,0.26671275496482849121f,
+0.96377605199813842773f,0.25486564636230468750f,0.96697646379470825195f,
+0.24298018217086791992f,0.97003126144409179688f,0.23105810582637786865f,
+0.97293996810913085938f,0.21910123527050018311f,0.97570210695266723633f,
+0.20711137354373931885f,0.97831737995147705078f,0.19509032368659973145f,
+0.98078525066375732422f,0.18303988873958587646f,0.98310548067092895508f,
+0.17096188664436340332f,0.98527765274047851562f,0.15885815024375915527f,
+0.98730140924453735352f,0.14673046767711639404f,0.98917651176452636719f,
+0.13458070158958435059f,0.99090266227722167969f,0.12241067737340927124f,
+0.99247956275939941406f,0.11022220551967620850f,0.99390697479248046875f,
+0.09801714122295379639f,0.99518471956253051758f,0.08579730987548828125f,
+0.99631261825561523438f,0.07356456667184829712f,0.99729043245315551758f,
+0.06132073700428009033f,0.99811810255050659180f,0.04906767606735229492f,
+0.99879544973373413086f,0.03680722415447235107f,0.99932235479354858398f,
+0.02454122900962829590f,0.99969881772994995117f,0.01227153837680816650f,
+0.99992471933364868164f,0.00000000000000006123f,1.00000000000000000000f,
+-0.01227153837680816650f,0.99992471933364868164f,-0.02454122900962829590f,
+0.99969881772994995117f,-0.03680722415447235107f,0.99932235479354858398f,
+-0.04906767606735229492f,0.99879544973373413086f,-0.06132073700428009033f,
+0.99811810255050659180f,-0.07356456667184829712f,0.99729043245315551758f,
+-0.08579730987548828125f,0.99631261825561523438f,-0.09801714122295379639f,
+0.99518471956253051758f,-0.11022220551967620850f,0.99390697479248046875f,
+-0.12241067737340927124f,0.99247956275939941406f,-0.13458070158958435059f,
+0.99090266227722167969f,-0.14673046767711639404f,0.98917651176452636719f,
+-0.15885815024375915527f,0.98730140924453735352f,-0.17096188664436340332f,
+0.98527765274047851562f,-0.18303988873958587646f,0.98310548067092895508f,
+-0.19509032368659973145f,0.98078525066375732422f,-0.20711137354373931885f,
+0.97831737995147705078f,-0.21910123527050018311f,0.97570210695266723633f,
+-0.23105810582637786865f,0.97293996810913085938f,-0.24298018217086791992f,
+0.97003126144409179688f,-0.25486564636230468750f,0.96697646379470825195f,
+-0.26671275496482849121f,0.96377605199813842773f,-0.27851969003677368164f,
+0.96043050289154052734f,-0.29028466343879699707f,0.95694035291671752930f,
+-0.30200594663619995117f,0.95330601930618286133f,-0.31368175148963928223f,
+0.94952815771102905273f,-0.32531028985977172852f,0.94560730457305908203f,
+-0.33688986301422119141f,0.94154405593872070312f,-0.34841868281364440918f,
+0.93733900785446166992f,-0.35989505052566528320f,0.93299281597137451172f,
+-0.37131720781326293945f,0.92850607633590698242f,-0.38268342614173889160f,
+0.92387950420379638672f,-0.39399203658103942871f,0.91911387443542480469f,
+-0.40524131059646606445f,0.91420978307723999023f,-0.41642954945564270020f,
+0.90916800498962402344f,-0.42755508422851562500f,0.90398931503295898438f,
+-0.43861624598503112793f,0.89867448806762695312f,-0.44961133599281311035f,
+0.89322429895401000977f,-0.46053871512413024902f,0.88763964176177978516f,
+-0.47139674425125122070f,0.88192129135131835938f,-0.48218378424644470215f,
+0.87607008218765258789f,-0.49289819598197937012f,0.87008696794509887695f,
+-0.50353837013244628906f,0.86397284269332885742f,-0.51410275697708129883f,
+0.85772860050201416016f,-0.52458965778350830078f,0.85135519504547119141f,
+-0.53499764204025268555f,0.84485357999801635742f,-0.54532498121261596680f,
+0.83822470903396606445f,-0.55557024478912353516f,0.83146959543228149414f,
+-0.56573182344436645508f,0.82458931207656860352f,-0.57580816745758056641f,
+0.81758481264114379883f,-0.58579784631729125977f,0.81045717000961303711f,
+-0.59569931030273437500f,0.80320751667022705078f,-0.60551106929779052734f,
+0.79583692550659179688f,-0.61523157358169555664f,0.78834640979766845703f,
+-0.62485951185226440430f,0.78073722124099731445f,-0.63439327478408813477f,
+0.77301043272018432617f,-0.64383155107498168945f,0.76516723632812500000f,
+-0.65317285060882568359f,0.75720882415771484375f,-0.66241580247879028320f,
+0.74913638830184936523f,-0.67155897617340087891f,0.74095112085342407227f,
+-0.68060100078582763672f,0.73265427350997924805f,-0.68954056501388549805f,
+0.72424709796905517578f,-0.69837623834609985352f,0.71573084592819213867f,
+-0.70710676908493041992f,0.70710676908493041992f,-0.71573084592819213867f,
+0.69837623834609985352f,-0.72424709796905517578f,0.68954056501388549805f,
+-0.73265427350997924805f,0.68060100078582763672f,-0.74095112085342407227f,
+0.67155897617340087891f,-0.74913638830184936523f,0.66241580247879028320f,
+-0.75720882415771484375f,0.65317285060882568359f,-0.76516723632812500000f,
+0.64383155107498168945f,-0.77301043272018432617f,0.63439327478408813477f,
+-0.78073722124099731445f,0.62485951185226440430f,-0.78834640979766845703f,
+0.61523157358169555664f,-0.79583692550659179688f,0.60551106929779052734f,
+-0.80320751667022705078f,0.59569931030273437500f,-0.81045717000961303711f,
+0.58579784631729125977f,-0.81758481264114379883f,0.57580816745758056641f,
+-0.82458931207656860352f,0.56573182344436645508f,-0.83146959543228149414f,
+0.55557024478912353516f,-0.83822470903396606445f,0.54532498121261596680f,
+-0.84485357999801635742f,0.53499764204025268555f,-0.85135519504547119141f,
+0.52458965778350830078f,-0.85772860050201416016f,0.51410275697708129883f,
+-0.86397284269332885742f,0.50353837013244628906f,-0.87008696794509887695f,
+0.49289819598197937012f,-0.87607008218765258789f,0.48218378424644470215f,
+-0.88192129135131835938f,0.47139674425125122070f,-0.88763964176177978516f,
+0.46053871512413024902f,-0.89322429895401000977f,0.44961133599281311035f,
+-0.89867448806762695312f,0.43861624598503112793f,-0.90398931503295898438f,
+0.42755508422851562500f,-0.90916800498962402344f,0.41642954945564270020f,
+-0.91420978307723999023f,0.40524131059646606445f,-0.91911387443542480469f,
+0.39399203658103942871f,-0.92387950420379638672f,0.38268342614173889160f,
+-0.92850607633590698242f,0.37131720781326293945f,-0.93299281597137451172f,
+0.35989505052566528320f,-0.93733900785446166992f,0.34841868281364440918f,
+-0.94154405593872070312f,0.33688986301422119141f,-0.94560730457305908203f,
+0.32531028985977172852f,-0.94952815771102905273f,0.31368175148963928223f,
+-0.95330601930618286133f,0.30200594663619995117f,-0.95694035291671752930f,
+0.29028466343879699707f,-0.96043050289154052734f,0.27851969003677368164f,
+-0.96377605199813842773f,0.26671275496482849121f,-0.96697646379470825195f,
+0.25486564636230468750f,-0.97003126144409179688f,0.24298018217086791992f,
+-0.97293996810913085938f,0.23105810582637786865f,-0.97570210695266723633f,
+0.21910123527050018311f,-0.97831737995147705078f,0.20711137354373931885f,
+-0.98078525066375732422f,0.19509032368659973145f,-0.98310548067092895508f,
+0.18303988873958587646f,-0.98527765274047851562f,0.17096188664436340332f,
+-0.98730140924453735352f,0.15885815024375915527f,-0.98917651176452636719f,
+0.14673046767711639404f,-0.99090266227722167969f,0.13458070158958435059f,
+-0.99247956275939941406f,0.12241067737340927124f,-0.99390697479248046875f,
+0.11022220551967620850f,-0.99518471956253051758f,0.09801714122295379639f,
+-0.99631261825561523438f,0.08579730987548828125f,-0.99729043245315551758f,
+0.07356456667184829712f,-0.99811810255050659180f,0.06132073700428009033f,
+-0.99879544973373413086f,0.04906767606735229492f,-0.99932235479354858398f,
+0.03680722415447235107f,-0.99969881772994995117f,0.02454122900962829590f,
+-0.99992471933364868164f,0.01227153837680816650f,1.00000000000000000000f,
+0.00000000000000000000f,0.99879544973373413086f,0.04906767606735229492f,
+0.99518471956253051758f,0.09801714122295379639f,0.98917651176452636719f,
+0.14673046767711639404f,0.98078525066375732422f,0.19509032368659973145f,
+0.97003126144409179688f,0.24298018217086791992f,0.95694035291671752930f,
+0.29028466343879699707f,0.94154405593872070312f,0.33688986301422119141f,
+0.92387950420379638672f,0.38268342614173889160f,0.90398931503295898438f,
+0.42755508422851562500f,0.88192129135131835938f,0.47139674425125122070f,
+0.85772860050201416016f,0.51410275697708129883f,0.83146959543228149414f,
+0.55557024478912353516f,0.80320751667022705078f,0.59569931030273437500f,
+0.77301043272018432617f,0.63439327478408813477f,0.74095112085342407227f,
+0.67155897617340087891f,0.70710676908493041992f,0.70710676908493041992f,
+0.67155897617340087891f,0.74095112085342407227f,0.63439327478408813477f,
+0.77301043272018432617f,0.59569931030273437500f,0.80320751667022705078f,
+0.55557024478912353516f,0.83146959543228149414f,0.51410275697708129883f,
+0.85772860050201416016f,0.47139674425125122070f,0.88192129135131835938f,
+0.42755508422851562500f,0.90398931503295898438f,0.38268342614173889160f,
+0.92387950420379638672f,0.33688986301422119141f,0.94154405593872070312f,
+0.29028466343879699707f,0.95694035291671752930f,0.24298018217086791992f,
+0.97003126144409179688f,0.19509032368659973145f,0.98078525066375732422f,
+0.14673046767711639404f,0.98917651176452636719f,0.09801714122295379639f,
+0.99518471956253051758f,0.04906767606735229492f,0.99879544973373413086f,
+0.00000000000000006123f,1.00000000000000000000f,-0.04906767606735229492f,
+0.99879544973373413086f,-0.09801714122295379639f,0.99518471956253051758f,
+-0.14673046767711639404f,0.98917651176452636719f,-0.19509032368659973145f,
+0.98078525066375732422f,-0.24298018217086791992f,0.97003126144409179688f,
+-0.29028466343879699707f,0.95694035291671752930f,-0.33688986301422119141f,
+0.94154405593872070312f,-0.38268342614173889160f,0.92387950420379638672f,
+-0.42755508422851562500f,0.90398931503295898438f,-0.47139674425125122070f,
+0.88192129135131835938f,-0.51410275697708129883f,0.85772860050201416016f,
+-0.55557024478912353516f,0.83146959543228149414f,-0.59569931030273437500f,
+0.80320751667022705078f,-0.63439327478408813477f,0.77301043272018432617f,
+-0.67155897617340087891f,0.74095112085342407227f,-0.70710676908493041992f,
+0.70710676908493041992f,-0.74095112085342407227f,0.67155897617340087891f,
+-0.77301043272018432617f,0.63439327478408813477f,-0.80320751667022705078f,
+0.59569931030273437500f,-0.83146959543228149414f,0.55557024478912353516f,
+-0.85772860050201416016f,0.51410275697708129883f,-0.88192129135131835938f,
+0.47139674425125122070f,-0.90398931503295898438f,0.42755508422851562500f,
+-0.92387950420379638672f,0.38268342614173889160f,-0.94154405593872070312f,
+0.33688986301422119141f,-0.95694035291671752930f,0.29028466343879699707f,
+-0.97003126144409179688f,0.24298018217086791992f,-0.98078525066375732422f,
+0.19509032368659973145f,-0.98917651176452636719f,0.14673046767711639404f,
+-0.99518471956253051758f,0.09801714122295379639f,-0.99879544973373413086f,
+0.04906767606735229492f,1.00000000000000000000f,0.00000000000000000000f,
+0.98078525066375732422f,0.19509032368659973145f,0.92387950420379638672f,
+0.38268342614173889160f,0.83146959543228149414f,0.55557024478912353516f,
+0.70710676908493041992f,0.70710676908493041992f,0.55557024478912353516f,
+0.83146959543228149414f,0.38268342614173889160f,0.92387950420379638672f,
+0.19509032368659973145f,0.98078525066375732422f,0.00000000000000006123f,
+1.00000000000000000000f,-0.19509032368659973145f,0.98078525066375732422f,
+-0.38268342614173889160f,0.92387950420379638672f,-0.55557024478912353516f,
+0.83146959543228149414f,-0.70710676908493041992f,0.70710676908493041992f,
+-0.83146959543228149414f,0.55557024478912353516f,-0.92387950420379638672f,
+0.38268342614173889160f,-0.98078525066375732422f,0.19509032368659973145f,
+1.00000000000000000000f,0.00000000000000000000f,0.70710676908493041992f,
+0.70710676908493041992f,0.00000000000000006123f,1.00000000000000000000f,
+-0.70710676908493041992f,0.70710676908493041992f,};
 
 float32_t rearranged_twiddle_stride3_1024_f32[680]={
-1.00000000000000000000f,0.00000000000000000000f,0.99983058179582340319f,
-0.01840672990580482019f,0.99932238458834954375f,0.03680722294135883171f,
-0.99847558057329477421f,0.05519524434968993420f,0.99729045667869020697f,
-0.07356456359966742631f,0.99576741446765981713f,0.09190895649713272386f,
-0.99390697000235606051f,0.11022220729388305938f,0.99170975366909952520f,
-0.12849811079379316880f,0.98917650996478101444f,0.14673047445536174793f,
-0.98630809724459866938f,0.16491312048996989437f,0.98310548743121628501f,
-0.18303988795514095078f,0.97956976568544051887f,0.20110463484209190055f,
-0.97570213003852857003f,0.21910124015686979759f,0.97150389098625178352f,
-0.23702360599436719801f,0.96697647104485207059f,0.25486565960451457169f,
-0.96212140426904158019f,0.27262135544994897662f,0.95694033573220882438f,
-0.29028467725446233105f,0.95143502096900833820f,0.30784964004153486661f,
-0.94560732538052127971f,0.32531029216226292622f,0.93945922360218991898f,
-0.34266071731199437833f,0.93299279883473895669f,0.35989503653498811087f,
-0.92621024213831137928f,0.37700741021641825945f,0.91911385169005777040f,
-0.39399204006104809883f,0.91170603200542987832f,0.41084317105790391089f,
-0.90398929312344333820f,0.42755509343028208491f,0.89596624975618521791f,
-0.44412214457042920035f,0.88763962040285393496f,0.46053871095824000514f,
-0.87901222642863352519f,0.47679923006332208812f,0.87008699110871146054f,
-0.49289819222978403790f,0.86086693863776730939f,0.50883014254310698909f,
-0.85135519310526519554f,0.52458968267846894928f,0.84155497743689844370f,
-0.54017147272989285423f,0.83146961230254523567f,0.55557023301960217765f,
-0.82110251499110464835f,0.57078074588696725566f,0.81045719825259476821f,
-0.58579785745643886408f,0.79953726910790501314f,0.60061647938386897305f,
-0.78834642762660622761f,0.61523159058062681925f,0.77688846567323244230f,
-0.62963823891492698426f,0.76516726562245895860f,0.64383154288979138613f,
-0.75318679904361252042f,0.65780669329707863735f,0.74095112535495921691f,
-0.67155895484701833009f,0.72846439044822519637f,0.68508366777270035541f,
-0.71573082528381870571f,0.69837624940897280457f,0.70275474445722529993f,
-0.71143219574521643356f,0.68954054473706694051f,0.72424708295146689174f,
-0.67609270357531603413f,0.73681656887736979300f,0.66241577759017178373f,
-0.74913639452345925918f,0.64851440102211255212f,0.76120238548426177871f,
-0.63439328416364548779f,0.77301045336273688235f,0.62005721176328920663f,
-0.78455659715557524159f,0.60551104140432554512f,0.79583690460888345530f,
-0.59075970185887427544f,0.80684755354379922299f,0.57580819141784533866f,
-0.81758481315158371139f,0.56066157619733603124f,0.82804504525775579626f,
-0.54532498842204646383f,0.83822470555483796772f,0.52980362468629482731f,
-0.84812034480329712149f,0.51410274419322166128f,0.85772861000027211809f,
-0.49822766697278186854f,0.86704624551569264845f,0.48218377207912282989f,
-0.87607009419540660122f,0.46597649576796612569f,0.88479709843093778954f,
-0.44961132965460659516f,0.89322430119551532446f,0.43309381885315201277f,
-0.90134884704602202810f,0.41642956009763731906f,0.90916798309052226923f,
-0.39962419984564678810f,0.91667905992104270485f,0.38268343236508983729f,
-0.92387953251128673848f,0.36561299780477396482f,0.93076696107898371224f,
-0.34841868024943450921f,0.93733901191257495977f,0.33110630575987642921f,
-0.94359345816196038559f,0.31368174039889157312f,0.94952818059303667475f,
-0.29615088824362395536f,0.95514116830577067141f,0.27851968938505305973f,
-0.96043051941556578655f,0.26079411791527556952f,0.96539444169768939830f,
-0.24298017990326398197f,0.97003125319454397424f,0.22508391135979277653f,
-0.97433938278557585821f,0.20711137619221856032f,0.97831737071962765473f,
-0.18906866414980627589f,0.98196386910955524296f,0.17096188876030135595f,
-0.98527764238894122162f,0.15279718525844340760f,0.98825756773074946437f,
-0.13458070850712622324f,0.99090263542778000971f,0.11631863091190487725f,
-0.99321194923479450001f,0.09801714032956077016f,0.99518472667219681771f,
-0.07968243797143012563f,0.99682029929116566791f,0.06132073630220864768f,
-0.99811811290014917919f,0.04293825693494095902f,0.99907772775264536147f,
-0.02454122852291226384f,0.99969881869620424997f,0.00613588464915451517f,
-0.99998117528260110909f,-0.01227153828571982304f,0.99992470183914450299f,
--0.03067480317663645942f,0.99952941750109314256f,-0.04906767432741800800f,
-0.99879545620517240501f,-0.06744391956366398155f,0.99772306664419163624f,
--0.08579731234443975507f,0.99631261218277800129f,-0.10412163387205460030f,
-0.99456457073425541537f,-0.12241067519921615403f,0.99247953459870996706f,
--0.14065823933284912761f,0.99005821026229712256f,-0.15885814333386127917f,
-0.98730141815785843473f,-0.17700422041214874946f,0.98421009238692902521f,
--0.19509032201612819257f,0.98078528040323043058f,-0.21311031991609125091f,
-0.97702814265775439484f,-0.23105810828067113727f,0.97293995220556017678f,
--0.24892760574572012078f,0.96852209427441737777f,-0.26671275747489830987f,
-0.96377606579543984022f,-0.28440753721127171039f,0.95870347489587159906f,
--0.30200594931922808417f,0.95330604035419386211f,-0.31950203081601563637f,
-0.94758559101774120226f,-0.33688985339221994009f,0.94154406518302080631f,
--0.35416352542049039931f,0.93518350993894761025f,-0.37131719395183748755f,
-0.92850608047321558924f,-0.38834504669882619066f,0.92151403934204201285f,
--0.40524131400498974998f,0.91420975570353069095f,-0.42200027079979968159f,
-0.90659570451491533483f,-0.43861623853852738097f,0.89867446569395392775f,
--0.45508358712634372489f,0.89044872324475798919f,-0.47139673682599769755f,
-0.88192126434835504956f,-0.48755016014843571837f,0.87309497841829020182f,
--0.50353838372571746440f,0.86397285612158680745f,-0.51935599016558964269f,
-0.85455798836540053376f,-0.53499761988709704230f,0.84485356524970722791f,
--0.55045797293660470029f,0.83486287498638012128f,-0.56573181078361323149f,
-0.82458930278502517996f,-0.58081395809576441547f,0.81403632970594852480f,
--0.59569930449243335691f,0.80320753148064494287f,-0.61038280627630958630f,
-0.79210657730021227785f,-0.62485948814238623239f,0.78073722857209459924f,
--0.63912444486377573138f,0.76910333764557958780f,-0.65317284295377653347f,
-0.75720884650648467851f,-0.66699992230363736034f,0.74505778544146605835f,
--0.68060099779545302212f,0.73265427167241281570f,-0.69397146088965377952f,
-0.72000250796138176579f,-0.70710678118654746172f,0.70710678118654757274f,
--0.72000250796138165477f,0.69397146088965389055f,-0.73265427167241270467f,
-0.68060099779545324417f,-0.74505778544146594733f,0.66699992230363758239f,
--0.75720884650648467851f,0.65317284295377664449f,-0.76910333764557947678f,
-0.63912444486377584241f,-0.78073722857209448822f,0.62485948814238634341f,
--0.79210657730021216683f,0.61038280627630969732f,-0.80320753148064483184f,
-0.59569930449243346793f,-0.81403632970594841378f,0.58081395809576452649f,
--0.82458930278502506894f,0.56573181078361345353f,-0.83486287498638001026f,
-0.55045797293660492233f,-0.84485356524970711689f,0.53499761988709715332f,
--0.85455798836540042274f,0.51935599016558975372f,-0.86397285612158669643f,
-0.50353838372571757542f,-0.87309497841829009079f,0.48755016014843588490f,
--0.88192126434835493853f,0.47139673682599780857f,-0.89044872324475787817f,
-0.45508358712634389143f,-0.89867446569395392775f,0.43861623853852754751f,
--0.90659570451491533483f,0.42200027079979984812f,-0.91420975570353069095f,
-0.40524131400498991651f,-0.92151403934204179080f,0.38834504669882657923f,
--0.92850608047321547822f,0.37131719395183770960f,-0.93518350993894761025f,
-0.35416352542049039931f,-0.94154406518302069529f,0.33688985339222032867f,
--0.94758559101774109124f,0.31950203081601580291f,-0.95330604035419386211f,
-0.30200594931922802866f,-0.95870347489587148804f,0.28440753721127209896f,
--0.96377606579543984022f,0.26671275747489847641f,-0.96852209427441737777f,
-0.24892760574572009302f,-0.97293995220556006576f,0.23105810828067133156f,
--0.97702814265775439484f,0.21311031991609141745f,-0.98078528040323043058f,
-0.19509032201612860891f,-0.98421009238692902521f,0.17700422041214894375f,
--0.98730141815785843473f,0.15885814333386147346f,-0.99005821026229701154f,
-0.14065823933284954395f,-0.99247953459870996706f,0.12241067519921634832f,
--0.99456457073425541537f,0.10412163387205457254f,-0.99631261218277800129f,
-0.08579731234444015753f,-0.99772306664419163624f,0.06744391956366417584f,
--0.99879545620517240501f,0.04906767432741796636f,-0.99952941750109314256f,
-0.03067480317663686534f,-0.99992470183914450299f,0.01227153828572000692f,
--0.99998117528260110909f,-0.00613588464915455420f,-0.99969881869620424997f,
--0.02454122852291207996f,-0.99907772775264536147f,-0.04293825693494077861f,
--0.99811811290014917919f,-0.06132073630220824523f,-0.99682029929116577893f,
--0.07968243797142994522f,-0.99518472667219692873f,-0.09801714032956058975f,
--0.99321194923479461103f,-0.11631863091190447479f,-0.99090263542778000971f,
--0.13458070850712605671f,-0.98825756773074946437f,-0.15279718525844343535f,
--0.98527764238894133264f,-0.17096188876030096737f,-0.98196386910955524296f,
--0.18906866414980610935f,-0.97831737071962765473f,-0.20711137619221858808f,
--0.97433938278557585821f,-0.22508391135979261000f,-0.97003125319454397424f,
--0.24298017990326381543f,-0.96539444169768939830f,-0.26079411791527562503f,
--0.96043051941556589757f,-0.27851968938505289319f,-0.95514116830577078243f,
--0.29615088824362378883f,-0.94952818059303678577f,-0.31368174039889118454f,
--0.94359345816196038559f,-0.33110630575987626267f,-0.93733901191257495977f,
--0.34841868024943456472f,-0.93076696107898382326f,-0.36561299780477357624f,
--0.92387953251128684951f,-0.38268343236508967076f,-0.91667905992104270485f,
--0.39962419984564684361f,-0.90916798309052249127f,-0.41642956009763693048f,
--0.90134884704602202810f,-0.43309381885315184624f,-0.89322430119551532446f,
--0.44961132965460665067f,-0.88479709843093790056f,-0.46597649576796595916f,
--0.87607009419540660122f,-0.48218377207912266336f,-0.86704624551569287050f,
--0.49822766697278153547f,-0.85772861000027211809f,-0.51410274419322155026f,
--0.84812034480329723252f,-0.52980362468629460526f,-0.83822470555483818977f,
--0.54532498842204613076f,-0.82804504525775590729f,-0.56066157619733592021f,
--0.81758481315158371139f,-0.57580819141784533866f,-0.80684755354379944503f,
--0.59075970185887394237f,-0.79583690460888356633f,-0.60551104140432543410f,
--0.78455659715557524159f,-0.62005721176328920663f,-0.77301045336273710440f,
--0.63439328416364526575f,-0.76120238548426188974f,-0.64851440102211233008f,
--0.74913639452345925918f,-0.66241577759017178373f,-0.73681656887737001504f,
--0.67609270357531581208f,-0.72424708295146700276f,-0.68954054473706682948f,
--0.71143219574521665560f,-0.70275474445722507788f,-0.69837624940897302661f,
--0.71573082528381848366f,-0.68508366777270035541f,-0.72846439044822519637f,
--0.67155895484701866316f,-0.74095112535495888384f,-0.65780669329707874837f,
--0.75318679904361240940f,-0.64383154288979149715f,-0.76516726562245895860f,
--0.62963823891492687324f,-0.77688846567323255332f,-0.61523159058062726334f,
--0.78834642762660589455f,-0.60061647938386930612f,-0.79953726910790479110f,
--0.58579785745643908612f,-0.81045719825259465718f,-0.57078074588696736669f,
--0.82110251499110464835f,-0.55557023301960217765f,-0.83146961230254523567f,
--0.54017147272989274320f,-0.84155497743689855472f,-0.52458968267846928235f,
--0.85135519310526486247f,-0.50883014254310732216f,-0.86086693863776708735f,
--0.49289819222978420443f,-0.87008699110871134952f,-0.47679923006332214364f,
--0.87901222642863341417f,-0.46053871095823989412f,-0.88763962040285404598f,
--0.44412214457042975546f,-0.89596624975618488484f,-0.42755509343028247349f,
--0.90398929312344311615f,-0.41084317105790418845f,-0.91170603200542976730f,
--0.39399204006104820985f,-0.91911385169005765938f,-0.37700741021641820394f,
--0.92621024213831137928f,-0.35989503653498794433f,-0.93299279883473895669f,
--0.34266071731199487793f,-0.93945922360218969693f,-0.32531029216226331480f,
--0.94560732538052116869f,-0.30784964004153508865f,-0.95143502096900833820f,
--0.29028467725446244208f,-0.95694033573220882438f,-0.27262135544994886560f,
--0.96212140426904158019f,-0.25486565960451434965f,-0.96697647104485218161f,
--0.23702360599436766986f,-0.97150389098625167250f,-0.21910124015687010290f,
--0.97570213003852845901f,-0.20110463484209206708f,-0.97956976568544051887f,
--0.18303988795514095078f,-0.98310548743121628501f,-0.16491312048996975559f,
--0.98630809724459866938f,-0.14673047445536230304f,-0.98917650996478090342f,
--0.12849811079379358514f,-0.99170975366909952520f,-0.11022220729388330918f,
--0.99390697000235606051f,-0.09190895649713282101f,-0.99576741446765981713f,
--0.07356456359966735692f,-0.99729045667869020697f,-0.05519524434968971216f,
--0.99847558057329477421f,-0.03680722294135933131f,-0.99932238458834943273f,
--0.01840672990580516366f,-0.99983058179582340319f,1.00000000000000000000f,
-0.00000000000000000000f,0.99729045667869020697f,0.07356456359966742631f,
-0.98917650996478101444f,0.14673047445536174793f,0.97570213003852857003f,
-0.21910124015686979759f,0.95694033573220882438f,0.29028467725446233105f,
-0.93299279883473895669f,0.35989503653498811087f,0.90398929312344333820f,
-0.42755509343028208491f,0.87008699110871146054f,0.49289819222978403790f,
-0.83146961230254523567f,0.55557023301960217765f,0.78834642762660622761f,
-0.61523159058062681925f,0.74095112535495921691f,0.67155895484701833009f,
-0.68954054473706694051f,0.72424708295146689174f,0.63439328416364548779f,
-0.77301045336273688235f,0.57580819141784533866f,0.81758481315158371139f,
-0.51410274419322166128f,0.85772861000027211809f,0.44961132965460659516f,
-0.89322430119551532446f,0.38268343236508983729f,0.92387953251128673848f,
-0.31368174039889157312f,0.94952818059303667475f,0.24298017990326398197f,
-0.97003125319454397424f,0.17096188876030135595f,0.98527764238894122162f,
-0.09801714032956077016f,0.99518472667219681771f,0.02454122852291226384f,
-0.99969881869620424997f,-0.04906767432741800800f,0.99879545620517240501f,
--0.12241067519921615403f,0.99247953459870996706f,-0.19509032201612819257f,
-0.98078528040323043058f,-0.26671275747489830987f,0.96377606579543984022f,
--0.33688985339221994009f,0.94154406518302080631f,-0.40524131400498974998f,
-0.91420975570353069095f,-0.47139673682599769755f,0.88192126434835504956f,
--0.53499761988709704230f,0.84485356524970722791f,-0.59569930449243335691f,
-0.80320753148064494287f,-0.65317284295377653347f,0.75720884650648467851f,
--0.70710678118654746172f,0.70710678118654757274f,-0.75720884650648467851f,
-0.65317284295377664449f,-0.80320753148064483184f,0.59569930449243346793f,
--0.84485356524970711689f,0.53499761988709715332f,-0.88192126434835493853f,
-0.47139673682599780857f,-0.91420975570353069095f,0.40524131400498991651f,
--0.94154406518302069529f,0.33688985339222032867f,-0.96377606579543984022f,
-0.26671275747489847641f,-0.98078528040323043058f,0.19509032201612860891f,
--0.99247953459870996706f,0.12241067519921634832f,-0.99879545620517240501f,
-0.04906767432741796636f,-0.99969881869620424997f,-0.02454122852291207996f,
--0.99518472667219692873f,-0.09801714032956058975f,-0.98527764238894133264f,
--0.17096188876030096737f,-0.97003125319454397424f,-0.24298017990326381543f,
--0.94952818059303678577f,-0.31368174039889118454f,-0.92387953251128684951f,
--0.38268343236508967076f,-0.89322430119551532446f,-0.44961132965460665067f,
--0.85772861000027211809f,-0.51410274419322155026f,-0.81758481315158371139f,
--0.57580819141784533866f,-0.77301045336273710440f,-0.63439328416364526575f,
--0.72424708295146700276f,-0.68954054473706682948f,-0.67155895484701866316f,
--0.74095112535495888384f,-0.61523159058062726334f,-0.78834642762660589455f,
--0.55557023301960217765f,-0.83146961230254523567f,-0.49289819222978420443f,
--0.87008699110871134952f,-0.42755509343028247349f,-0.90398929312344311615f,
--0.35989503653498794433f,-0.93299279883473895669f,-0.29028467725446244208f,
--0.95694033573220882438f,-0.21910124015687010290f,-0.97570213003852845901f,
--0.14673047445536230304f,-0.98917650996478090342f,-0.07356456359966735692f,
--0.99729045667869020697f,1.00000000000000000000f,0.00000000000000000000f,
-0.95694033573220882438f,0.29028467725446233105f,0.83146961230254523567f,
-0.55557023301960217765f,0.63439328416364548779f,0.77301045336273688235f,
-0.38268343236508983729f,0.92387953251128673848f,0.09801714032956077016f,
-0.99518472667219681771f,-0.19509032201612819257f,0.98078528040323043058f,
--0.47139673682599769755f,0.88192126434835504956f,-0.70710678118654746172f,
-0.70710678118654757274f,-0.88192126434835493853f,0.47139673682599780857f,
--0.98078528040323043058f,0.19509032201612860891f,-0.99518472667219692873f,
--0.09801714032956058975f,-0.92387953251128684951f,-0.38268343236508967076f,
--0.77301045336273710440f,-0.63439328416364526575f,-0.55557023301960217765f,
--0.83146961230254523567f,-0.29028467725446244208f,-0.95694033573220882438f,
-1.00000000000000000000f,0.00000000000000000000f,0.38268343236508983729f,
-0.92387953251128673848f,-0.70710678118654746172f,0.70710678118654757274f,
--0.92387953251128684951f,-0.38268343236508967076f,};
+1.00000000000000000000f,0.00000000000000000000f,0.99983060359954833984f,
+0.01840673014521598816f,0.99932235479354858398f,0.03680722415447235107f,
+0.99847555160522460938f,0.05519524589180946350f,0.99729043245315551758f,
+0.07356456667184829712f,0.99576741456985473633f,0.09190895408391952515f,
+0.99390697479248046875f,0.11022220551967620850f,0.99170976877212524414f,
+0.12849810719490051270f,0.98917651176452636719f,0.14673046767711639404f,
+0.98630809783935546875f,0.16491311788558959961f,0.98310548067092895508f,
+0.18303988873958587646f,0.97956979274749755859f,0.20110464096069335938f,
+0.97570210695266723633f,0.21910123527050018311f,0.97150391340255737305f,
+0.23702360689640045166f,0.96697646379470825195f,0.25486564636230468750f,
+0.96212142705917358398f,0.27262136340141296387f,0.95694035291671752930f,
+0.29028466343879699707f,0.95143502950668334961f,0.30784964561462402344f,
+0.94560730457305908203f,0.32531028985977172852f,0.93945920467376708984f,
+0.34266072511672973633f,0.93299281597137451172f,0.35989505052566528320f,
+0.92621022462844848633f,0.37700742483139038086f,0.91911387443542480469f,
+0.39399203658103942871f,0.91170603036880493164f,0.41084316372871398926f,
+0.90398931503295898438f,0.42755508422851562500f,0.89596623182296752930f,
+0.44412213563919067383f,0.88763964176177978516f,0.46053871512413024902f,
+0.87901222705841064453f,0.47679921984672546387f,0.87008696794509887695f,
+0.49289819598197937012f,0.86086696386337280273f,0.50883013010025024414f,
+0.85135519504547119141f,0.52458965778350830078f,0.84155499935150146484f,
+0.54017144441604614258f,0.83146959543228149414f,0.55557024478912353516f,
+0.82110249996185302734f,0.57078075408935546875f,0.81045717000961303711f,
+0.58579784631729125977f,0.79953724145889282227f,0.60061645507812500000f,
+0.78834640979766845703f,0.61523157358169555664f,0.77688848972320556641f,
+0.62963825464248657227f,0.76516723632812500000f,0.64383155107498168945f,
+0.75318682193756103516f,0.65780669450759887695f,0.74095112085342407227f,
+0.67155897617340087891f,0.72846436500549316406f,0.68508368730545043945f,
+0.71573084592819213867f,0.69837623834609985352f,0.70275473594665527344f,
+0.71143221855163574219f,0.68954056501388549805f,0.72424709796905517578f,
+0.67609268426895141602f,0.73681658506393432617f,0.66241580247879028320f,
+0.74913638830184936523f,0.64851438999176025391f,0.76120239496231079102f,
+0.63439327478408813477f,0.77301043272018432617f,0.62005722522735595703f,
+0.78455656766891479492f,0.60551106929779052734f,0.79583692550659179688f,
+0.59075969457626342773f,0.80684757232666015625f,0.57580816745758056641f,
+0.81758481264114379883f,0.56066155433654785156f,0.82804507017135620117f,
+0.54532498121261596680f,0.83822470903396606445f,0.52980363368988037109f,
+0.84812033176422119141f,0.51410275697708129883f,0.85772860050201416016f,
+0.49822765588760375977f,0.86704623699188232422f,0.48218378424644470215f,
+0.87607008218765258789f,0.46597650647163391113f,0.88479709625244140625f,
+0.44961133599281311035f,0.89322429895401000977f,0.43309381604194641113f,
+0.90134882926940917969f,0.41642954945564270020f,0.90916800498962402344f,
+0.39962419867515563965f,0.91667908430099487305f,0.38268342614173889160f,
+0.92387950420379638672f,0.36561298370361328125f,0.93076694011688232422f,
+0.34841868281364440918f,0.93733900785446166992f,0.33110630512237548828f,
+0.94359344244003295898f,0.31368175148963928223f,0.94952815771102905273f,
+0.29615089297294616699f,0.95514118671417236328f,0.27851969003677368164f,
+0.96043050289154052734f,0.26079410314559936523f,0.96539443731307983398f,
+0.24298018217086791992f,0.97003126144409179688f,0.22508391737937927246f,
+0.97433936595916748047f,0.20711137354373931885f,0.97831737995147705078f,
+0.18906866014003753662f,0.98196387290954589844f,0.17096188664436340332f,
+0.98527765274047851562f,0.15279719233512878418f,0.98825758695602416992f,
+0.13458070158958435059f,0.99090266227722167969f,0.11631862819194793701f,
+0.99321192502975463867f,0.09801714122295379639f,0.99518471956253051758f,
+0.07968243956565856934f,0.99682027101516723633f,0.06132073700428009033f,
+0.99811810255050659180f,0.04293825849890708923f,0.99907773733139038086f,
+0.02454122900962829590f,0.99969881772994995117f,0.00613588467240333557f,
+0.99998116493225097656f,-0.01227153837680816650f,0.99992471933364868164f,
+-0.03067480400204658508f,0.99952942132949829102f,-0.04906767606735229492f,
+0.99879544973373413086f,-0.06744392216205596924f,0.99772304296493530273f,
+-0.08579730987548828125f,0.99631261825561523438f,-0.10412163287401199341f,
+0.99456459283828735352f,-0.12241067737340927124f,0.99247956275939941406f,
+-0.14065824449062347412f,0.99005818367004394531f,-0.15885815024375915527f,
+0.98730140924453735352f,-0.17700421810150146484f,0.98421007394790649414f,
+-0.19509032368659973145f,0.98078525066375732422f,-0.21311031281948089600f,
+0.97702813148498535156f,-0.23105810582637786865f,0.97293996810913085938f,
+-0.24892760813236236572f,0.96852207183837890625f,-0.26671275496482849121f,
+0.96377605199813842773f,-0.28440752625465393066f,0.95870345830917358398f,
+-0.30200594663619995117f,0.95330601930618286133f,-0.31950202584266662598f,
+0.94758558273315429688f,-0.33688986301422119141f,0.94154405593872070312f,
+-0.35416352748870849609f,0.93518352508544921875f,-0.37131720781326293945f,
+0.92850607633590698242f,-0.38834503293037414551f,0.92151403427124023438f,
+-0.40524131059646606445f,0.91420978307723999023f,-0.42200025916099548340f,
+0.90659570693969726562f,-0.43861624598503112793f,0.89867448806762695312f,
+-0.45508357882499694824f,0.89044874906539916992f,-0.47139674425125122070f,
+0.88192129135131835938f,-0.48755016922950744629f,0.87309497594833374023f,
+-0.50353837013244628906f,0.86397284269332885742f,-0.51935601234436035156f,
+0.85455799102783203125f,-0.53499764204025268555f,0.84485357999801635742f,
+-0.55045795440673828125f,0.83486288785934448242f,-0.56573182344436645508f,
+0.82458931207656860352f,-0.58081394433975219727f,0.81403630971908569336f,
+-0.59569931030273437500f,0.80320751667022705078f,-0.61038279533386230469f,
+0.79210656881332397461f,-0.62485951185226440430f,0.78073722124099731445f,
+-0.63912445306777954102f,0.76910334825515747070f,-0.65317285060882568359f,
+0.75720882415771484375f,-0.66699993610382080078f,0.74505776166915893555f,
+-0.68060100078582763672f,0.73265427350997924805f,-0.69397145509719848633f,
+0.72000253200531005859f,-0.70710676908493041992f,0.70710676908493041992f,
+-0.72000253200531005859f,0.69397145509719848633f,-0.73265427350997924805f,
+0.68060100078582763672f,-0.74505776166915893555f,0.66699993610382080078f,
+-0.75720882415771484375f,0.65317285060882568359f,-0.76910334825515747070f,
+0.63912445306777954102f,-0.78073722124099731445f,0.62485951185226440430f,
+-0.79210656881332397461f,0.61038279533386230469f,-0.80320751667022705078f,
+0.59569931030273437500f,-0.81403630971908569336f,0.58081394433975219727f,
+-0.82458931207656860352f,0.56573182344436645508f,-0.83486288785934448242f,
+0.55045795440673828125f,-0.84485357999801635742f,0.53499764204025268555f,
+-0.85455799102783203125f,0.51935601234436035156f,-0.86397284269332885742f,
+0.50353837013244628906f,-0.87309497594833374023f,0.48755016922950744629f,
+-0.88192129135131835938f,0.47139674425125122070f,-0.89044874906539916992f,
+0.45508357882499694824f,-0.89867448806762695312f,0.43861624598503112793f,
+-0.90659570693969726562f,0.42200025916099548340f,-0.91420978307723999023f,
+0.40524131059646606445f,-0.92151403427124023438f,0.38834503293037414551f,
+-0.92850607633590698242f,0.37131720781326293945f,-0.93518352508544921875f,
+0.35416352748870849609f,-0.94154405593872070312f,0.33688986301422119141f,
+-0.94758558273315429688f,0.31950202584266662598f,-0.95330601930618286133f,
+0.30200594663619995117f,-0.95870345830917358398f,0.28440752625465393066f,
+-0.96377605199813842773f,0.26671275496482849121f,-0.96852207183837890625f,
+0.24892760813236236572f,-0.97293996810913085938f,0.23105810582637786865f,
+-0.97702813148498535156f,0.21311031281948089600f,-0.98078525066375732422f,
+0.19509032368659973145f,-0.98421007394790649414f,0.17700421810150146484f,
+-0.98730140924453735352f,0.15885815024375915527f,-0.99005818367004394531f,
+0.14065824449062347412f,-0.99247956275939941406f,0.12241067737340927124f,
+-0.99456459283828735352f,0.10412163287401199341f,-0.99631261825561523438f,
+0.08579730987548828125f,-0.99772304296493530273f,0.06744392216205596924f,
+-0.99879544973373413086f,0.04906767606735229492f,-0.99952942132949829102f,
+0.03067480400204658508f,-0.99992471933364868164f,0.01227153837680816650f,
+-0.99998116493225097656f,-0.00613588467240333557f,-0.99969881772994995117f,
+-0.02454122900962829590f,-0.99907773733139038086f,-0.04293825849890708923f,
+-0.99811810255050659180f,-0.06132073700428009033f,-0.99682027101516723633f,
+-0.07968243956565856934f,-0.99518471956253051758f,-0.09801714122295379639f,
+-0.99321192502975463867f,-0.11631862819194793701f,-0.99090266227722167969f,
+-0.13458070158958435059f,-0.98825758695602416992f,-0.15279719233512878418f,
+-0.98527765274047851562f,-0.17096188664436340332f,-0.98196387290954589844f,
+-0.18906866014003753662f,-0.97831737995147705078f,-0.20711137354373931885f,
+-0.97433936595916748047f,-0.22508391737937927246f,-0.97003126144409179688f,
+-0.24298018217086791992f,-0.96539443731307983398f,-0.26079410314559936523f,
+-0.96043050289154052734f,-0.27851969003677368164f,-0.95514118671417236328f,
+-0.29615089297294616699f,-0.94952815771102905273f,-0.31368175148963928223f,
+-0.94359344244003295898f,-0.33110630512237548828f,-0.93733900785446166992f,
+-0.34841868281364440918f,-0.93076694011688232422f,-0.36561298370361328125f,
+-0.92387950420379638672f,-0.38268342614173889160f,-0.91667908430099487305f,
+-0.39962419867515563965f,-0.90916800498962402344f,-0.41642954945564270020f,
+-0.90134882926940917969f,-0.43309381604194641113f,-0.89322429895401000977f,
+-0.44961133599281311035f,-0.88479709625244140625f,-0.46597650647163391113f,
+-0.87607008218765258789f,-0.48218378424644470215f,-0.86704623699188232422f,
+-0.49822765588760375977f,-0.85772860050201416016f,-0.51410275697708129883f,
+-0.84812033176422119141f,-0.52980363368988037109f,-0.83822470903396606445f,
+-0.54532498121261596680f,-0.82804507017135620117f,-0.56066155433654785156f,
+-0.81758481264114379883f,-0.57580816745758056641f,-0.80684757232666015625f,
+-0.59075969457626342773f,-0.79583692550659179688f,-0.60551106929779052734f,
+-0.78455656766891479492f,-0.62005722522735595703f,-0.77301043272018432617f,
+-0.63439327478408813477f,-0.76120239496231079102f,-0.64851438999176025391f,
+-0.74913638830184936523f,-0.66241580247879028320f,-0.73681658506393432617f,
+-0.67609268426895141602f,-0.72424709796905517578f,-0.68954056501388549805f,
+-0.71143221855163574219f,-0.70275473594665527344f,-0.69837623834609985352f,
+-0.71573084592819213867f,-0.68508368730545043945f,-0.72846436500549316406f,
+-0.67155897617340087891f,-0.74095112085342407227f,-0.65780669450759887695f,
+-0.75318682193756103516f,-0.64383155107498168945f,-0.76516723632812500000f,
+-0.62963825464248657227f,-0.77688848972320556641f,-0.61523157358169555664f,
+-0.78834640979766845703f,-0.60061645507812500000f,-0.79953724145889282227f,
+-0.58579784631729125977f,-0.81045717000961303711f,-0.57078075408935546875f,
+-0.82110249996185302734f,-0.55557024478912353516f,-0.83146959543228149414f,
+-0.54017144441604614258f,-0.84155499935150146484f,-0.52458965778350830078f,
+-0.85135519504547119141f,-0.50883013010025024414f,-0.86086696386337280273f,
+-0.49289819598197937012f,-0.87008696794509887695f,-0.47679921984672546387f,
+-0.87901222705841064453f,-0.46053871512413024902f,-0.88763964176177978516f,
+-0.44412213563919067383f,-0.89596623182296752930f,-0.42755508422851562500f,
+-0.90398931503295898438f,-0.41084316372871398926f,-0.91170603036880493164f,
+-0.39399203658103942871f,-0.91911387443542480469f,-0.37700742483139038086f,
+-0.92621022462844848633f,-0.35989505052566528320f,-0.93299281597137451172f,
+-0.34266072511672973633f,-0.93945920467376708984f,-0.32531028985977172852f,
+-0.94560730457305908203f,-0.30784964561462402344f,-0.95143502950668334961f,
+-0.29028466343879699707f,-0.95694035291671752930f,-0.27262136340141296387f,
+-0.96212142705917358398f,-0.25486564636230468750f,-0.96697646379470825195f,
+-0.23702360689640045166f,-0.97150391340255737305f,-0.21910123527050018311f,
+-0.97570210695266723633f,-0.20110464096069335938f,-0.97956979274749755859f,
+-0.18303988873958587646f,-0.98310548067092895508f,-0.16491311788558959961f,
+-0.98630809783935546875f,-0.14673046767711639404f,-0.98917651176452636719f,
+-0.12849810719490051270f,-0.99170976877212524414f,-0.11022220551967620850f,
+-0.99390697479248046875f,-0.09190895408391952515f,-0.99576741456985473633f,
+-0.07356456667184829712f,-0.99729043245315551758f,-0.05519524589180946350f,
+-0.99847555160522460938f,-0.03680722415447235107f,-0.99932235479354858398f,
+-0.01840673014521598816f,-0.99983060359954833984f,1.00000000000000000000f,
+0.00000000000000000000f,0.99729043245315551758f,0.07356456667184829712f,
+0.98917651176452636719f,0.14673046767711639404f,0.97570210695266723633f,
+0.21910123527050018311f,0.95694035291671752930f,0.29028466343879699707f,
+0.93299281597137451172f,0.35989505052566528320f,0.90398931503295898438f,
+0.42755508422851562500f,0.87008696794509887695f,0.49289819598197937012f,
+0.83146959543228149414f,0.55557024478912353516f,0.78834640979766845703f,
+0.61523157358169555664f,0.74095112085342407227f,0.67155897617340087891f,
+0.68954056501388549805f,0.72424709796905517578f,0.63439327478408813477f,
+0.77301043272018432617f,0.57580816745758056641f,0.81758481264114379883f,
+0.51410275697708129883f,0.85772860050201416016f,0.44961133599281311035f,
+0.89322429895401000977f,0.38268342614173889160f,0.92387950420379638672f,
+0.31368175148963928223f,0.94952815771102905273f,0.24298018217086791992f,
+0.97003126144409179688f,0.17096188664436340332f,0.98527765274047851562f,
+0.09801714122295379639f,0.99518471956253051758f,0.02454122900962829590f,
+0.99969881772994995117f,-0.04906767606735229492f,0.99879544973373413086f,
+-0.12241067737340927124f,0.99247956275939941406f,-0.19509032368659973145f,
+0.98078525066375732422f,-0.26671275496482849121f,0.96377605199813842773f,
+-0.33688986301422119141f,0.94154405593872070312f,-0.40524131059646606445f,
+0.91420978307723999023f,-0.47139674425125122070f,0.88192129135131835938f,
+-0.53499764204025268555f,0.84485357999801635742f,-0.59569931030273437500f,
+0.80320751667022705078f,-0.65317285060882568359f,0.75720882415771484375f,
+-0.70710676908493041992f,0.70710676908493041992f,-0.75720882415771484375f,
+0.65317285060882568359f,-0.80320751667022705078f,0.59569931030273437500f,
+-0.84485357999801635742f,0.53499764204025268555f,-0.88192129135131835938f,
+0.47139674425125122070f,-0.91420978307723999023f,0.40524131059646606445f,
+-0.94154405593872070312f,0.33688986301422119141f,-0.96377605199813842773f,
+0.26671275496482849121f,-0.98078525066375732422f,0.19509032368659973145f,
+-0.99247956275939941406f,0.12241067737340927124f,-0.99879544973373413086f,
+0.04906767606735229492f,-0.99969881772994995117f,-0.02454122900962829590f,
+-0.99518471956253051758f,-0.09801714122295379639f,-0.98527765274047851562f,
+-0.17096188664436340332f,-0.97003126144409179688f,-0.24298018217086791992f,
+-0.94952815771102905273f,-0.31368175148963928223f,-0.92387950420379638672f,
+-0.38268342614173889160f,-0.89322429895401000977f,-0.44961133599281311035f,
+-0.85772860050201416016f,-0.51410275697708129883f,-0.81758481264114379883f,
+-0.57580816745758056641f,-0.77301043272018432617f,-0.63439327478408813477f,
+-0.72424709796905517578f,-0.68954056501388549805f,-0.67155897617340087891f,
+-0.74095112085342407227f,-0.61523157358169555664f,-0.78834640979766845703f,
+-0.55557024478912353516f,-0.83146959543228149414f,-0.49289819598197937012f,
+-0.87008696794509887695f,-0.42755508422851562500f,-0.90398931503295898438f,
+-0.35989505052566528320f,-0.93299281597137451172f,-0.29028466343879699707f,
+-0.95694035291671752930f,-0.21910123527050018311f,-0.97570210695266723633f,
+-0.14673046767711639404f,-0.98917651176452636719f,-0.07356456667184829712f,
+-0.99729043245315551758f,1.00000000000000000000f,0.00000000000000000000f,
+0.95694035291671752930f,0.29028466343879699707f,0.83146959543228149414f,
+0.55557024478912353516f,0.63439327478408813477f,0.77301043272018432617f,
+0.38268342614173889160f,0.92387950420379638672f,0.09801714122295379639f,
+0.99518471956253051758f,-0.19509032368659973145f,0.98078525066375732422f,
+-0.47139674425125122070f,0.88192129135131835938f,-0.70710676908493041992f,
+0.70710676908493041992f,-0.88192129135131835938f,0.47139674425125122070f,
+-0.98078525066375732422f,0.19509032368659973145f,-0.99518471956253051758f,
+-0.09801714122295379639f,-0.92387950420379638672f,-0.38268342614173889160f,
+-0.77301043272018432617f,-0.63439327478408813477f,-0.55557024478912353516f,
+-0.83146959543228149414f,-0.29028466343879699707f,-0.95694035291671752930f,
+1.00000000000000000000f,0.00000000000000000000f,0.38268342614173889160f,
+0.92387950420379638672f,-0.70710676908493041992f,0.70710676908493041992f,
+-0.92387950420379638672f,-0.38268342614173889160f,};
 
 #endif
 
@@ -1023,2740 +1026,2740 @@ uint32_t rearranged_twiddle_tab_stride3_arr_4096_f32[6]={
 0,2048,2560,2688,2720,0,};
 
 float32_t rearranged_twiddle_stride1_4096_f32[2728]={
-1.00000000000000000000f,0.00000000000000000000f,0.99999882345170187925f,
-0.00153398018628476550f,0.99999529380957619118f,0.00306795676296597614f,
-0.99998941108192840321f,0.00460192612044857050f,0.99998117528260110909f,
-0.00613588464915447527f,0.99997058643097413988f,0.00766982873953109701f,
-0.99995764455196389786f,0.00920375478205981944f,0.99994234967602391162f,
-0.01073765916726449055f,0.99992470183914450299f,0.01227153828571992539f,
-0.99990470108285289808f,0.01380538852806039059f,0.99988234745421256111f,
-0.01533920628498810015f,0.99985764100582386060f,0.01687298794728171042f,
-0.99983058179582340319f,0.01840672990580482019f,0.99980116988788425569f,
-0.01994042855151444138f,0.99976940535121527898f,0.02147408027546950787f,
-0.99973528826056168306f,0.02300768146883936868f,0.99969881869620424997f,
-0.02454122852291228812f,0.99965999674395922270f,0.02607471782910390085f,
-0.99961882249517863830f,0.02760814577896573974f,0.99957529604674921764f,
-0.02914150876419372219f,0.99952941750109314256f,0.03067480317663662595f,
-0.99948118696616694567f,0.03220802540830458582f,0.99943060455546173237f,
-0.03374117185137757990f,0.99937767038800284780f,0.03527423889821394709f,
-0.99932238458834954375f,0.03680722294135883171f,0.99926474728659442359f,
-0.03834012037355269409f,0.99920475861836388631f,0.03987292758773981066f,
-0.99914241872481690532f,0.04140564097707673946f,0.99907772775264536147f,
-0.04293825693494082024f,0.99901068585407337697f,0.04447077185493866769f,
-0.99894129318685687124f,0.04600318213091462299f,0.99886954991428356099f,
-0.04753548415695930257f,0.99879545620517240501f,0.04906767432741801493f,
-0.99871901223387293811f,0.05059974903689928166f,0.99864021818026527111f,
-0.05213170468028332366f,0.99855907422975931365f,0.05366353765273051968f,
-0.99847558057329477421f,0.05519524434968993420f,0.99838973740734016094f,
-0.05672682116690774823f,0.99830154493389289261f,0.05825826450043575244f,
-0.99821100336047818846f,0.05978957074663986820f,0.99811811290014917919f,
-0.06132073630220857829f,0.99802287377148624081f,0.06285175756416140624f,
-0.99792528619859599548f,0.06438263092985746505f,0.99782535041111164453f,
-0.06591335279700380467f,0.99772306664419163624f,0.06744391956366405094f,
-0.99761843513851955478f,0.06897432762826674613f,0.99751145614030345410f,
-0.07050457338961385600f,0.99740212990127530279f,0.07203465324688933247f,
-0.99729045667869020697f,0.07356456359966742631f,0.99717643673532618820f,
-0.07509430084792130533f,0.99706007033948296225f,0.07662386139203149205f,
-0.99694135776498216117f,0.07815324163279423197f,0.99682029929116566791f,
-0.07968243797143012563f,0.99669689520289606044f,0.08121144680959244133f,
-0.99657114579055483539f,0.08274026454937569164f,0.99644305135004263008f,
-0.08426888759332407108f,0.99631261218277800129f,0.08579731234443989385f,
-0.99617982859569698117f,0.08732553520619205922f,0.99604470090125196702f,
-0.08885355258252460031f,0.99590722941741172125f,0.09038136087786498296f,
-0.99576741446765981713f,0.09190895649713272386f,0.99562525638099430569f,
-0.09343633584574778661f,0.99548075549192693856f,0.09496349532963899165f,
-0.99533391214048227980f,0.09649043135525259274f,0.99518472667219692873f,
-0.09801714032956060363f,0.99503319943811863180f,0.09954361866006931903f,
-0.99487933079480561638f,0.10106986275482782167f,0.99472312110432570265f,
-0.10259586902243628126f,0.99456457073425541537f,0.10412163387205458642f,
-0.99440368005767909576f,0.10564715371341061589f,0.99424044945318790223f,
-0.10717242495680884273f,0.99407487930487936634f,0.10869744401313871651f,
-0.99390697000235606051f,0.11022220729388305938f,0.99373672194072459884f,
-0.11174671121112658700f,0.99356413552059530403f,0.11327095217756434631f,
-0.99338921114808065305f,0.11479492660651008373f,0.99321194923479450001f,
-0.11631863091190475235f,0.99303235019785141002f,0.11784206150832497728f,
-0.99285041445986510489f,0.11936521481099135467f,0.99266614244894801899f,
-0.12088808723577708359f,0.99247953459870996706f,0.12241067519921619566f,
-0.99229059134825736699f,0.12393297511851215920f,0.99209931314219179654f,
-0.12545498341154623367f,0.99190570043060932726f,0.12697669649688586579f,
-0.99170975366909952520f,0.12849811079379316880f,0.99151147331874389668f,
-0.13001922272223334631f,0.99131085984611544415f,0.13154002870288311611f,
-0.99110791372327688986f,0.13306052515713906459f,0.99090263542778000971f,
-0.13458070850712616773f,0.99069502544266463406f,0.13610057517570620100f,
-0.99048508425645709341f,0.13762012158648603832f,0.99027281236316910817f,
-0.13913934416382620074f,0.99005821026229712256f,0.14065823933284921088f,
-0.98984127845882052821f,0.14217680351944803063f,0.98962201746320088702f,
-0.14369503315029447110f,0.98940042779138037687f,0.14521292465284746376f,
-0.98917650996478101444f,0.14673047445536174793f,0.98895026451030298986f,
-0.14824767898689603096f,0.98872169196032377858f,0.14976453467732150915f,
-0.98849079285269658701f,0.15128103795733022219f,0.98825756773074946437f,
-0.15279718525844343535f,0.98802201714328352633f,0.15431297301302010494f,
-0.98778414164457217783f,0.15582839765426523271f,0.98754394179435922574f,
-0.15734345561623824805f,0.98730141815785843473f,0.15885814333386144570f,
-0.98705657130575097380f,0.16037245724292828464f,0.98680940181418552726f,
-0.16188639378011182579f,0.98655991026477540817f,0.16339994938297322524f,
-0.98630809724459866938f,0.16491312048996989437f,0.98605396334619543897f,
-0.16642590354046410406f,0.98579750916756747614f,0.16793829497473117263f,
-0.98553873531217606185f,0.16945029123396795900f,0.98527764238894122162f,
-0.17096188876030121717f,0.98501423101223983814f,0.17247308399679595059f,
-0.98474850180190420801f,0.17398387338746382214f,0.98448045538322093151f,
-0.17549425337727142526f,0.98421009238692902521f,0.17700422041214874946f,
-0.98393741344921892278f,0.17851377093899750692f,0.98366241921173025453f,
-0.18002290140569951471f,0.98338511032155118130f,0.18153160826112496595f,
-0.98310548743121628501f,0.18303988795514095078f,0.98282355119870523641f,
-0.18454773693861961648f,0.98253930228744124076f,0.18605515166344663291f,
-0.98225274136628937249f,0.18756212858252960252f,0.98196386910955524296f,
-0.18906866414980619262f,0.98167268619698311305f,0.19057475482025273972f,
-0.98137919331375456089f,0.19208039704989243734f,0.98108339115048670553f,
-0.19358558729580360724f,0.98078528040323043058f,0.19509032201612824808f,
-0.98048486177346938497f,0.19659459767008022335f,0.98018213596811742949f,
-0.19809841071795356027f,0.97987710369951763756f,0.19960175762113097075f,
-0.97956976568544051887f,0.20110463484209190055f,0.97926012264908202098f,
-0.20260703884442113343f,0.97894817531906219710f,0.20410896609281686809f,
-0.97863392442942320759f,0.20561041305309923910f,0.97831737071962765473f,
-0.20711137619221856032f,0.97799851493455713936f,0.20861185197826348503f,
-0.97767735782450992943f,0.21011183688046961016f,0.97735390014519996082f,
-0.21161132736922755315f,0.97702814265775439484f,0.21311031991609136194f,
-0.97670008612871184184f,0.21460881099378675829f,0.97636973133002114000f,
-0.21610679707621952006f,0.97603707903903902388f,0.21760427463848364127f,
-0.97570213003852857003f,0.21910124015686979759f,0.97536488511665697665f,
-0.22059769010887350649f,0.97502534506699412020f,0.22209362097320350937f,
-0.97468351068851066810f,0.22358902922978998729f,0.97433938278557585821f,
-0.22508391135979283204f,0.97399296216795583359f,0.22657826384561000066f,
-0.97364424965081197705f,0.22807208317088573102f,0.97329324605469824672f,
-0.22956536582051886852f,0.97293995220556017678f,0.23105810828067110951f,
-0.97258436893473221296f,0.23255030703877524467f,0.97222649707893626925f,
-0.23404195858354343018f,0.97186633748027939639f,0.23553305940497548665f,
-0.97150389098625178352f,0.23702360599436719801f,0.97113915844972509284f,
-0.23851359484431841618f,0.97077214072895035013f,0.24000302244874149871f,
-0.97040283868755550234f,0.24149188530286933019f,0.97003125319454397424f,
-0.24298017990326387094f,0.96965738512429244800f,0.24446790274782415064f,
-0.96928123535654853171f,0.24595505033579459497f,0.96890280477642887202f,
-0.24744161916777326904f,0.96852209427441737777f,0.24892760574572014853f,
-0.96813910474636244441f,0.25041300657296522436f,0.96775383709347551076f,
-0.25189781815421696809f,0.96736629222232850545f,0.25338203699557015902f,
-0.96697647104485207059f,0.25486565960451457169f,0.96658437447833311928f,
-0.25634868248994291395f,0.96619000344541250413f,0.25783110216215898713f,
-0.96579335887408368500f,0.25931291513288623474f,0.96539444169768939830f,
-0.26079411791527551401f,0.96499325285492032478f,0.26227470702391358914f,
-0.96458979328981275803f,0.26375467897483134694f,0.96418406395174582890f,
-0.26523403028551179039f,0.96377606579543984022f,0.26671275747489836538f,
-0.96336579978095404631f,0.26819085706340317632f,0.96295326687368387741f,
-0.26966832557291509076f,0.96253846804435916340f,0.27114515952680801059f,
-0.96212140426904158019f,0.27262135544994897662f,0.96170207652912254037f,
-0.27409690986870638429f,0.96128048581132063966f,0.27557181931095814376f,
-0.96085663310767965850f,0.27704608030609989555f,0.96043051941556578655f,
-0.27851968938505305973f,0.96000214573766595727f,0.27999264308027321801f,
-0.95957151308198451733f,0.28146493792575794091f,0.95913862246184189431f,
-0.28293657045705539188f,0.95870347489587159906f,0.28440753721127187692f,
-0.95826607140801767226f,0.28587783472708061527f,0.95782641302753290802f,
-0.28734745954472951102f,0.95738450078897585627f,0.28881640820604947972f,
-0.95694033573220882438f,0.29028467725446233105f,0.95649391890239510161f,
-0.29175226323498926195f,0.95604525134999640557f,0.29321916269425862822f,
-0.95559433413077110586f,0.29468537218051432669f,0.95514116830577078243f,
-0.29615088824362378883f,0.95468575494133833814f,0.29761570743508619641f,
-0.95422809510910566733f,0.29907982630804047508f,0.95376818988599032512f,
-0.30054324141727345454f,0.95330604035419386211f,0.30200594931922808417f,
-0.95284164760119871573f,0.30346794657201131562f,0.95237501271976587880f,
-0.30492922973540237397f,0.95190613680793234597f,0.30638979537086091787f,
-0.95143502096900833820f,0.30784964004153486661f,0.95096166631157508231f,
-0.30930876031226872680f,0.95048607394948170235f,0.31076715274961147495f,
-0.95000824500184299914f,0.31222481392182488413f,0.94952818059303667475f,
-0.31368174039889151761f,0.94904588185270055689f,0.31513792875252244485f,
-0.94856134991573026749f,0.31659337555616584581f,0.94807458592227622507f,
-0.31804807738501494896f,0.94758559101774109124f,0.31950203081601569188f,
-0.94709436635277721717f,0.32095523242787521445f,0.94660091308328353499f,
-0.32240767880106985244f,0.94610523237040344835f,0.32385936651785285356f,
-0.94560732538052127971f,0.32531029216226292622f,0.94510719328526060501f,
-0.32676045232013173347f,0.94460483726148025685f,0.32820984357909249729f,
-0.94410025849127265918f,0.32965846252858749255f,0.94359345816196038559f,
-0.33110630575987642921f,0.94308443746609349478f,0.33255336986604422389f,
-0.94257319760144686605f,0.33399965144200938205f,0.94205973977101731265f,
-0.33544514708453160301f,0.94154406518302080631f,0.33688985339222005111f,
-0.94102617505088925753f,0.33833376696554112728f,0.94050607059326829518f,
-0.33977688440682685123f,0.93998375303401404679f,0.34121920232028235542f,
-0.93945922360218991898f,0.34266071731199437833f,0.93893248353206459900f,
-0.34410142598993881391f,0.93840353406310805795f,0.34554132496398909380f,
-0.93787237643998988545f,0.34698041084592368133f,0.93733901191257495977f,
-0.34841868024943456472f,0.93680344173592156043f,0.34985612979013491763f,
-0.93626566717027825959f,0.35129275608556709276f,0.93572568948108036935f,
-0.35272855575521072646f,0.93518350993894761025f,0.35416352542049034380f,
-0.93463912981968078064f,0.35559766170478385172f,0.93409255040425887007f,
-0.35703096123342997759f,0.93354377297883617270f,0.35846342063373654030f,
-0.93299279883473895669f,0.35989503653498811087f,0.93243962926846235550f,
-0.36132580556845428355f,0.93188426558166814750f,0.36275572436739722537f,
-0.93132670908118042608f,0.36418478956707989180f,0.93076696107898371224f,
-0.36561299780477385379f,0.93020502289221906889f,0.36704034571976718038f,
-0.92964089584318121418f,0.36846682995337232125f,0.92907458125931585702f,
-0.36989244714893410038f,0.92850608047321558924f,0.37131719395183754306f,
-0.92793539482261788720f,0.37274106700951575855f,0.92736252565040111495f,
-0.37416406297145793358f,0.92678747430458174872f,0.37558617848921721505f,
-0.92621024213831137928f,0.37700741021641825945f,0.92563083050987271516f,
-0.37842775480876555960f,0.92504924078267758425f,0.37984720892405116066f,
-0.92446547432526260391f,0.38126576922216237620f,0.92387953251128673848f,
-0.38268343236508978178f,0.92329141671952763559f,0.38410019501693504207f,
-0.92270112833387862850f,0.38551605384391884890f,0.92210866874334518339f,
-0.38693100551438858181f,0.92151403934204190183f,0.38834504669882624617f,
-0.92091724152918941204f,0.38975817406985641123f,0.92031827670911059425f,
-0.39117038430225387069f,0.91971714629122736095f,0.39258167407295146978f,
-0.91911385169005777040f,0.39399204006104809883f,0.91850839432521225181f,
-0.39540147894781635385f,0.91790077562139049672f,0.39680998741671030805f,
-0.91729099700837790632f,0.39821756215337356100f,0.91667905992104270485f,
-0.39962419984564678810f,0.91606496579933172075f,0.40102989718357562321f,
-0.91544871608826783316f,0.40243465085941843018f,0.91483031223794619713f,
-0.40383845756765407442f,0.91420975570353069095f,0.40524131400498986100f,
-0.91358704794525080750f,0.40664321687036902864f,0.91296219042839821256f,
-0.40804416286497868782f,0.91233518462332274801f,0.40944414869225759235f,
-0.91170603200542987832f,0.41084317105790391089f,0.91107473405517636067f,
-0.41224122666988288755f,0.91044129225806724737f,0.41363831223843450235f,
-0.90980570810465222209f,0.41503442447608163146f,0.90916798309052238025f,
-0.41642956009763715253f,0.90852811871630612117f,0.41782371582021227141f,
-0.90788611648766626150f,0.41921688836322390515f,0.90724197791529581636f,
-0.42060907444840250902f,0.90659570451491533483f,0.42200027079979968159f,
-0.90594729780726845902f,0.42339047414379604728f,0.90529675931811881551f,
-0.42477968120910880589f,0.90464409057824624050f,0.42616788872679961520f,
-0.90398929312344333820f,0.42755509343028208491f,0.90333236849451181705f,
-0.42894129205532949278f,0.90267331823725882600f,0.43032648134008261165f,
-0.90201214390249317976f,0.43171065802505725895f,0.90134884704602202810f,
-0.43309381885315195726f,0.90068342922864685907f,0.43447596056965565037f,
-0.90001589201616016833f,0.43585707992225547480f,0.89934623697934157338f,
-0.43723717366104408732f,0.89867446569395381673f,0.43861623853852765853f,
-0.89800057974073987932f,0.43999427130963325583f,0.89732458070541831763f,
-0.44137126873171667052f,0.89664647017868015499f,0.44274722756457002282f,
-0.89596624975618521791f,0.44412214457042920035f,0.89528392103855758410f,
-0.44549601651398174074f,0.89459948563138269595f,0.44686884016237415906f,
-0.89391294514520325265f,0.44824061228521988598f,0.89322430119551532446f,
-0.44961132965460653965f,0.89253355540276457791f,0.45098098904510386387f,
-0.89184070939234272313f,0.45234958723377088896f,0.89114576479458318392f,
-0.45371712100016386993f,0.89044872324475787817f,0.45508358712634383592f,
-0.88974958638307277692f,0.45644898239688391772f,0.88904835585466457371f,
-0.45781330359887717485f,0.88834503330959635470f,0.45917654752194408951f,
-0.88763962040285393496f,0.46053871095824000514f,0.88693211879434219469f,
-0.46189979070246273141f,0.88622253014888063838f,0.46325978355186014923f,
-0.88551085613619995307f,0.46461868630623781584f,0.88479709843093778954f,
-0.46597649576796618121f,0.88408125871263498752f,0.46733320874198841510f,
-0.88336333866573157891f,0.46868882203582790114f,0.88264333997956279099f,
-0.47004333245959561971f,0.88192126434835504956f,0.47139673682599764204f,
-0.88119711347122209322f,0.47274903195034279069f,0.88047088905216075450f,
-0.47410021465054996703f,0.87974259280004740713f,0.47545028174715586733f,
-0.87901222642863352519f,0.47679923006332208812f,0.87827979165654157523f,
-0.47814705642484300885f,0.87754529020726135258f,0.47949375766015295275f,
-0.87680872380914565145f,0.48083933060033395845f,0.87607009419540660122f,
-0.48218377207912271887f,0.87532940310411089246f,0.48352707893291868579f,
-0.87458665227817611321f,0.48486924800079106435f,0.87384184346536686316f,
-0.48621027612448641797f,0.87309497841829009079f,0.48755016014843599592f,
-0.87234605889439154058f,0.48888889691976317176f,0.87159508665595097909f,
-0.49022648328829115938f,0.87084206347007897531f,0.49156291610654989643f,
-0.87008699110871146054f,0.49289819222978403790f,0.86932987134860684186f,
-0.49423230851595967295f,0.86857070597134089507f,0.49556526182577254058f,
-0.86780949676330332299f,0.49689704902265446895f,0.86704624551569264845f,
-0.49822766697278181303f,0.86628095402451299467f,0.49955711254508183838f,
-0.86551362409056908920f,0.50088538261124071482f,0.86474425751946237817f,
-0.50221247404571078832f,0.86397285612158669643f,0.50353838372571757542f,
-0.86319942171212415971f,0.50486310853126759035f,0.86242395611104050168f,
-0.50618664534515522835f,0.86164646114308129921f,0.50750899105297087033f,
-0.86086693863776730939f,0.50883014254310698909f,0.86008539042939013974f,
-0.51015009670676680908f,0.85930181835700847337f,0.51146885043797030157f,
-0.85851622426444273994f,0.51278640063356295542f,0.85772861000027211809f,
-0.51410274419322166128f,0.85693897741782876221f,0.51541787801946292724f,
-0.85614732837519447184f,0.51673179901764987321f,0.85535366473519602870f,
-0.51804450409599933636f,0.85455798836540053376f,0.51935599016558964269f,
-0.85376030113811141042f,0.52066625414036715735f,0.85296060493036363059f,
-0.52197529293715438925f,0.85215890162391982887f,0.52328310347565643035f,
-0.85135519310526519554f,0.52458968267846894928f,0.85054948126560347976f,
-0.52589502747108463065f,0.84974176800085254868f,0.52719913478190127964f,
-0.84893205521163961347f,0.52850200154222848337f,0.84812034480329723252f,
-0.52980362468629460526f,0.84730663868585831544f,0.53110400115125500076f,
-0.84649093877405212627f,0.53240312787719790144f,0.84567324698729906540f,
-0.53370100180715296379f,0.84485356524970711689f,0.53499761988709715332f,
-0.84403189549006640835f,0.53629297906596318235f,0.84320823964184543620f,
-0.53758707629564539410f,0.84238259964318584760f,0.53887990853100842248f,
-0.84155497743689844370f,0.54017147272989285423f,0.84072537497045807253f,
-0.54146176585312344454f,0.83989379419599952126f,0.54275078486451588944f,
-0.83906023707031274217f,0.54403852673088382019f,0.83822470555483807875f,
-0.54532498842204646383f,0.83738720161566193578f,0.54661016691083486041f,
-0.83654772722351200542f,0.54789405917310018967f,0.83570628435375260423f,
-0.54917666218771965525f,0.83486287498638001026f,0.55045797293660481131f,
-0.83401750110601813315f,0.55173798840470733573f,0.83317016470191318511f,
-0.55301670558002746780f,0.83232086776792968408f,0.55429412145362000341f,
-0.83146961230254523567f,0.55557023301960217765f,0.83061640030884631436f,
-0.55684503727516010407f,0.82976123379452304540f,0.55811853122055610221f,
-0.82890411477186487499f,0.55939071185913613604f,0.82804504525775579626f,
-0.56066157619733603124f,0.82718402727366913130f,0.56193112124468935775f,
-0.82632106284566353427f,0.56319934401383409117f,0.82545615400437755138f,
-0.56446624152051938506f,0.82458930278502529099f,0.56573181078361312046f,
-0.82372051122739142759f,0.56699604882510867832f,0.82284978137582642788f,
-0.56825895267013148970f,0.82197711527924155472f,0.56952051934694714053f,
-0.82110251499110464835f,0.57078074588696725566f,0.82022598256943468620f,
-0.57203962932475704850f,0.81934752007679700903f,0.57329716669804220430f,
-0.81846712958029865792f,0.57455335504771576360f,0.81758481315158371139f,
-0.57580819141784533866f,0.81670057286682784525f,0.57706167285567944170f,
-0.81581441080673378075f,0.57831379641165558958f,0.81492632905652662156f,
-0.57956455913940563285f,0.81403632970594841378f,0.58081395809576452649f,
-0.81314441484925359394f,0.58206199034077543697f,0.81225058658520399302f,
-0.58330865293769829094f,0.81135484701706372945f,0.58455394295301532637f,
-0.81045719825259476821f,0.58579785745643886408f,0.80955764240405125864f,
-0.58704039352091796911f,0.80865618158817498262f,0.58828154822264522306f,
-0.80775281792619035848f,0.58952131864106394055f,0.80684755354379933401f,
-0.59075970185887416442f,0.80594039057117627944f,0.59199669496204099239f,
-0.80503133114296365758f,0.59323229503979979516f,0.80412037739826569549f,
-0.59446649918466443197f,0.80320753148064494287f,0.59569930449243335691f,
-0.80229279553811572168f,0.59693070806219639124f,0.80137617172314024039f,
-0.59816070699634238395f,0.80045766219262282082f,0.59938929840056454079f,
-0.79953726910790501314f,0.60061647938386897305f,0.79861499463476093297f,
-0.60184224705858002658f,0.79769084094339115509f,0.60306659854034816437f,
-0.79676481020841882774f,0.60428953094815596181f,0.79583690460888356633f,
-0.60551104140432554512f,0.79490712632823701256f,0.60673112703452447558f,
-0.79397547755433717231f,0.60794978496777363208f,0.79304196047944364167f,
-0.60916701233645320634f,0.79210657730021238887f,0.61038280627630947528f,
-0.79116933021769020318f,0.61159716392646190641f,0.79023022143731003197f,
-0.61281008242940970820f,0.78928925316888565167f,0.61402155893103849138f,
-0.78834642762660622761f,0.61523159058062681925f,0.78740174702903142911f,
-0.61644017453085364622f,0.78645521359908576731f,0.61764730793780386886f,
-0.78550682956405393220f,0.61885298796097631957f,0.78455659715557524159f,
-0.62005721176328909561f,0.78360451860963820092f,0.62125997651108755271f,
-0.78265059616657572938f,0.62246127937414996723f,0.78169483207105938671f,
-0.62366111752569453053f,0.78073722857209448822f,0.62485948814238634341f,
-0.77977778792301455368f,0.62605638840434352232f,0.77881651238147597827f,
-0.62725181549514408275f,0.77785340420945314754f,0.62844576660183271155f,
-0.77688846567323244230f,0.62963823891492698426f,0.77592169904340768660f,
-0.63082922962842447046f,0.77495310659487393057f,0.63201873593980906207f,
-0.77398269060682289844f,0.63320675505005719064f,0.77301045336273699338f,
-0.63439328416364548779f,0.77203639715038452351f,0.63557832048855611440f,
-0.77106052426181381776f,0.63676186123628419899f,0.77008283699334789674f,
-0.63794390362184405507f,0.76910333764557969882f,0.63912444486377573138f,
-0.76812202852336541881f,0.64030348218415167327f,0.76713891193582040007f,
-0.64148101280858305095f,0.76615399019631291733f,0.64265703396622686494f,
-0.76516726562245895860f,0.64383154288979138613f,0.76417874053611667406f,
-0.64500453681554392737f,0.76318841726338138010f,0.64617601298331628357f,
-0.76219629813457900891f,0.64734596863651205911f,0.76120238548426177871f,
-0.64851440102211244110f,0.76020668165120242055f,0.64968130739068319368f,
-0.75920918897838796102f,0.65084668499638087535f,0.75820990981301528144f,
-0.65201053109695950027f,0.75720884650648456748f,0.65317284295377675551f,
-0.75620600141439453523f,0.65433361783180044036f,0.75520137689653654700f,
-0.65549285299961534967f,0.75419497531688917125f,0.65665054572942893607f,
-0.75318679904361252042f,0.65780669329707863735f,0.75217685044904269986f,
-0.65896129298203731661f,0.75116513190968636771f,0.66011434206742047870f,
-0.75015164580621507273f,0.66126583783999226540f,0.74913639452345937020f,
-0.66241577759017178373f,0.74811938045040360379f,0.66356415861203976725f,
-0.74710060598018013245f,0.66471097820334479334f,0.74608007351006377927f,
-0.66585623366550972246f,0.74505778544146594733f,0.66699992230363747137f,
-0.74403374417992929057f,0.66814204142651845153f,0.74300795213512171866f,
-0.66928258834663600929f,0.74198041172083106787f,0.67042156038017308717f,
-0.74095112535495921691f,0.67155895484701833009f,0.73992009545951620275f,
-0.67269476907077285777f,0.73888732446061511361f,0.67382900037875603783f,
-0.73785281478846598269f,0.67496164610201192513f,0.73681656887736979300f,
-0.67609270357531592310f,0.73577858916571359238f,0.67722217013718033485f,
-0.73473887809596349907f,0.67835004312986146857f,0.73369743811466026084f,
-0.67947631989936496666f,0.73265427167241281570f,0.68060099779545302212f,
-0.73160938122389262972f,0.68172407417164970767f,0.73056276922782759087f,
-0.68284554638524808112f,0.72951443814699701296f,0.68396541179731540350f,
-0.72846439044822519637f,0.68508366777270035541f,0.72741262860237576593f,
-0.68620031168003858824f,0.72635915508434600873f,0.68731534089175905233f,
-0.72530397237306076796f,0.68842875278409043638f,0.72424708295146700276f,
-0.68954054473706682948f,0.72318848930652745999f,0.69065071413453460458f,
-0.72212819392921534511f,0.69175925836415774750f,0.72106619931450810501f,
-0.69286617481742462932f,0.72000250796138165477f,0.69397146088965389055f,
-0.71893712237280449351f,0.69507511398000088043f,0.71787004505573170920f,
-0.69617713149146298601f,0.71680127852109953857f,0.69727751083088651551f,
-0.71573082528381870571f,0.69837624940897280457f,0.71465868786276909308f,
-0.69947334464028376733f,0.71358486878079352422f,0.70056879394324833576f,
-0.71250937056469243469f,0.70166259474016845488f,0.71143219574521643356f,
-0.70275474445722529993f,0.71035334685706241764f,0.70384524052448493858f,
-0.70927282643886568891f,0.70493408037590488124f,0.70819063703319540259f,
-0.70602126144933974317f,0.70710678118654757274f,0.70710678118654757274f,
-0.70602126144933974317f,0.70819063703319540259f,0.70493408037590499227f,
-0.70927282643886568891f,0.70384524052448493858f,0.71035334685706241764f,
-0.70275474445722529993f,0.71143219574521643356f,0.70166259474016845488f,
-0.71250937056469232367f,0.70056879394324844679f,0.71358486878079352422f,
-0.69947334464028376733f,0.71465868786276909308f,0.69837624940897291559f,
-0.71573082528381859468f,0.69727751083088662654f,0.71680127852109942754f,
-0.69617713149146298601f,0.71787004505573170920f,0.69507511398000088043f,
-0.71893712237280438249f,0.69397146088965400157f,0.72000250796138165477f,
-0.69286617481742474034f,0.72106619931450810501f,0.69175925836415774750f,
-0.72212819392921534511f,0.69065071413453460458f,0.72318848930652734897f,
-0.68954054473706694051f,0.72424708295146689174f,0.68842875278409043638f,
-0.72530397237306076796f,0.68731534089175905233f,0.72635915508434600873f,
-0.68620031168003858824f,0.72741262860237576593f,0.68508366777270035541f,
-0.72846439044822519637f,0.68396541179731551452f,0.72951443814699690193f,
-0.68284554638524808112f,0.73056276922782759087f,0.68172407417164981869f,
-0.73160938122389262972f,0.68060099779545302212f,0.73265427167241281570f,
-0.67947631989936496666f,0.73369743811466026084f,0.67835004312986146857f,
-0.73473887809596349907f,0.67722217013718044587f,0.73577858916571348136f,
-0.67609270357531603413f,0.73681656887736979300f,0.67496164610201203615f,
-0.73785281478846598269f,0.67382900037875614885f,0.73888732446061511361f,
-0.67269476907077296879f,0.73992009545951609173f,0.67155895484701833009f,
-0.74095112535495910588f,0.67042156038017308717f,0.74198041172083095685f,
-0.66928258834663600929f,0.74300795213512171866f,0.66814204142651856255f,
-0.74403374417992929057f,0.66699992230363747137f,0.74505778544146594733f,
-0.66585623366550972246f,0.74608007351006366825f,0.66471097820334490436f,
-0.74710060598018013245f,0.66356415861203987827f,0.74811938045040349277f,
-0.66241577759017178373f,0.74913639452345925918f,0.66126583783999226540f,
-0.75015164580621496171f,0.66011434206742047870f,0.75116513190968636771f,
-0.65896129298203731661f,0.75217685044904269986f,0.65780669329707874837f,
-0.75318679904361252042f,0.65665054572942904709f,0.75419497531688917125f,
-0.65549285299961546070f,0.75520137689653654700f,0.65433361783180055138f,
-0.75620600141439453523f,0.65317284295377686654f,0.75720884650648456748f,
-0.65201053109695950027f,0.75820990981301528144f,0.65084668499638098638f,
-0.75920918897838796102f,0.64968130739068319368f,0.76020668165120242055f,
-0.64851440102211255212f,0.76120238548426177871f,0.64734596863651205911f,
-0.76219629813457889789f,0.64617601298331639459f,0.76318841726338126907f,
-0.64500453681554403840f,0.76417874053611667406f,0.64383154288979149715f,
-0.76516726562245895860f,0.64265703396622686494f,0.76615399019631280630f,
-0.64148101280858316198f,0.76713891193582040007f,0.64030348218415167327f,
-0.76812202852336530778f,0.63912444486377573138f,0.76910333764557958780f,
-0.63794390362184416610f,0.77008283699334789674f,0.63676186123628419899f,
-0.77106052426181381776f,0.63557832048855622542f,0.77203639715038441249f,
-0.63439328416364548779f,0.77301045336273688235f,0.63320675505005719064f,
-0.77398269060682278742f,0.63201873593980906207f,0.77495310659487381955f,
-0.63082922962842458148f,0.77592169904340757558f,0.62963823891492709528f,
-0.77688846567323244230f,0.62844576660183271155f,0.77785340420945303652f,
-0.62725181549514419377f,0.77881651238147586724f,0.62605638840434352232f,
-0.77977778792301444266f,0.62485948814238645443f,0.78073722857209448822f,
-0.62366111752569464155f,0.78169483207105938671f,0.62246127937415007825f,
-0.78265059616657572938f,0.62125997651108766373f,0.78360451860963820092f,
-0.62005721176328920663f,0.78455659715557524159f,0.61885298796097631957f,
-0.78550682956405393220f,0.61764730793780397988f,0.78645521359908576731f,
-0.61644017453085364622f,0.78740174702903131809f,0.61523159058062681925f,
-0.78834642762660622761f,0.61402155893103849138f,0.78928925316888565167f,
-0.61281008242940970820f,0.79023022143731003197f,0.61159716392646201744f,
-0.79116933021769009216f,0.61038280627630947528f,0.79210657730021227785f,
-0.60916701233645320634f,0.79304196047944364167f,0.60794978496777374311f,
-0.79397547755433717231f,0.60673112703452447558f,0.79490712632823701256f,
-0.60551104140432554512f,0.79583690460888345530f,0.60428953094815607283f,
-0.79676481020841871672f,0.60306659854034827539f,0.79769084094339104407f,
-0.60184224705858002658f,0.79861499463476082195f,0.60061647938386897305f,
-0.79953726910790501314f,0.59938929840056454079f,0.80045766219262270980f,
-0.59816070699634238395f,0.80137617172314012937f,0.59693070806219650226f,
-0.80229279553811572168f,0.59569930449243346793f,0.80320753148064483184f,
-0.59446649918466454299f,0.80412037739826569549f,0.59323229503979979516f,
-0.80503133114296365758f,0.59199669496204099239f,0.80594039057117627944f,
-0.59075970185887427544f,0.80684755354379922299f,0.58952131864106394055f,
-0.80775281792619024746f,0.58828154822264533408f,0.80865618158817498262f,
-0.58704039352091808013f,0.80955764240405125864f,0.58579785745643886408f,
-0.81045719825259476821f,0.58455394295301532637f,0.81135484701706372945f,
-0.58330865293769829094f,0.81225058658520388200f,0.58206199034077554799f,
-0.81314441484925359394f,0.58081395809576452649f,0.81403632970594830276f,
-0.57956455913940574387f,0.81492632905652662156f,0.57831379641165558958f,
-0.81581441080673378075f,0.57706167285567955272f,0.81670057286682784525f,
-0.57580819141784533866f,0.81758481315158371139f,0.57455335504771576360f,
-0.81846712958029865792f,0.57329716669804231532f,0.81934752007679689800f,
-0.57203962932475704850f,0.82022598256943468620f,0.57078074588696736669f,
-0.82110251499110464835f,0.56952051934694725155f,0.82197711527924155472f,
-0.56825895267013148970f,0.82284978137582631685f,0.56699604882510867832f,
-0.82372051122739131657f,0.56573181078361323149f,0.82458930278502529099f,
-0.56446624152051949608f,0.82545615400437744036f,0.56319934401383409117f,
-0.82632106284566353427f,0.56193112124468946877f,0.82718402727366913130f,
-0.56066157619733603124f,0.82804504525775579626f,0.55939071185913613604f,
-0.82890411477186487499f,0.55811853122055610221f,0.82976123379452304540f,
-0.55684503727516010407f,0.83061640030884620334f,0.55557023301960228867f,
-0.83146961230254523567f,0.55429412145362011444f,0.83232086776792968408f,
-0.55301670558002757883f,0.83317016470191318511f,0.55173798840470744675f,
-0.83401750110601813315f,0.55045797293660481131f,0.83486287498638001026f,
-0.54917666218771976627f,0.83570628435375260423f,0.54789405917310018967f,
-0.83654772722351189440f,0.54661016691083486041f,0.83738720161566193578f,
-0.54532498842204646383f,0.83822470555483796772f,0.54403852673088393122f,
-0.83906023707031263115f,0.54275078486451600046f,0.83989379419599941023f,
-0.54146176585312355556f,0.84072537497045796151f,0.54017147272989296525f,
-0.84155497743689833268f,0.53887990853100842248f,0.84238259964318584760f,
-0.53758707629564550512f,0.84320823964184543620f,0.53629297906596318235f,
-0.84403189549006640835f,0.53499761988709726435f,0.84485356524970700587f,
-0.53370100180715296379f,0.84567324698729906540f,0.53240312787719801246f,
-0.84649093877405212627f,0.53110400115125500076f,0.84730663868585831544f,
-0.52980362468629482731f,0.84812034480329712149f,0.52850200154222848337f,
-0.84893205521163961347f,0.52719913478190139067f,0.84974176800085243766f,
-0.52589502747108474168f,0.85054948126560336874f,0.52458968267846883826f,
-0.85135519310526519554f,0.52328310347565643035f,0.85215890162391982887f,
-0.52197529293715438925f,0.85296060493036363059f,0.52066625414036726838f,
-0.85376030113811129940f,0.51935599016558953167f,0.85455798836540053376f,
-0.51804450409599933636f,0.85535366473519602870f,0.51673179901764998423f,
-0.85614732837519447184f,0.51541787801946314929f,0.85693897741782865118f,
-0.51410274419322166128f,0.85772861000027211809f,0.51278640063356306644f,
-0.85851622426444273994f,0.51146885043797052361f,0.85930181835700836235f,
-0.51015009670676669806f,0.86008539042939025077f,0.50883014254310698909f,
-0.86086693863776730939f,0.50750899105297087033f,0.86164646114308129921f,
-0.50618664534515533937f,0.86242395611104050168f,0.50486310853126747933f,
-0.86319942171212415971f,0.50353838372571757542f,0.86397285612158669643f,
-0.50221247404571089934f,0.86474425751946237817f,0.50088538261124093687f,
-0.86551362409056897818f,0.49955711254508183838f,0.86628095402451299467f,
-0.49822766697278186854f,0.86704624551569264845f,0.49689704902265463549f,
-0.86780949676330321196f,0.49556526182577248507f,0.86857070597134089507f,
-0.49423230851595972846f,0.86932987134860673084f,0.49289819222978409341f,
-0.87008699110871134952f,0.49156291610655006297f,0.87084206347007886428f,
-0.49022648328829110387f,0.87159508665595109012f,0.48888889691976322727f,
-0.87234605889439142956f,0.48755016014843605143f,0.87309497841829009079f,
-0.48621027612448652899f,0.87384184346536675214f,0.48486924800079111986f,
-0.87458665227817611321f,0.48352707893291874131f,0.87532940310411078144f,
-0.48218377207912282989f,0.87607009419540660122f,0.48083933060033390294f,
-0.87680872380914576247f,0.47949375766015300826f,0.87754529020726124156f,
-0.47814705642484311987f,0.87827979165654146421f,0.47679923006332225466f,
-0.87901222642863341417f,0.47545028174715586733f,0.87974259280004740713f,
-0.47410021465055002254f,0.88047088905216075450f,0.47274903195034290171f,
-0.88119711347122198219f,0.47139673682599780857f,0.88192126434835493853f,
-0.47004333245959561971f,0.88264333997956279099f,0.46868882203582795665f,
-0.88336333866573157891f,0.46733320874198852612f,0.88408125871263498752f,
-0.46597649576796612569f,0.88479709843093778954f,0.46461868630623781584f,
-0.88551085613619995307f,0.46325978355186026025f,0.88622253014888063838f,
-0.46189979070246284243f,0.88693211879434208367f,0.46053871095824000514f,
-0.88763962040285393496f,0.45917654752194414502f,0.88834503330959635470f,
-0.45781330359887728587f,0.88904835585466457371f,0.45644898239688386221f,
-0.88974958638307288794f,0.45508358712634383592f,0.89044872324475787817f,
-0.45371712100016392544f,0.89114576479458318392f,0.45234958723377099998f,
-0.89184070939234272313f,0.45098098904510380835f,0.89253355540276468894f,
-0.44961132965460659516f,0.89322430119551532446f,0.44824061228521999700f,
-0.89391294514520325265f,0.44686884016237432560f,0.89459948563138258493f,
-0.44549601651398174074f,0.89528392103855758410f,0.44412214457042925586f,
-0.89596624975618510689f,0.44274722756457013384f,0.89664647017868015499f,
-0.44137126873171661501f,0.89732458070541831763f,0.43999427130963325583f,
-0.89800057974073987932f,0.43861623853852771404f,0.89867446569395381673f,
-0.43723717366104419835f,0.89934623697934146236f,0.43585707992225547480f,
-0.90001589201616027935f,0.43447596056965570588f,0.90068342922864685907f,
-0.43309381885315201277f,0.90134884704602202810f,0.43171065802505736997f,
-0.90201214390249306874f,0.43032648134008261165f,0.90267331823725882600f,
-0.42894129205532954829f,0.90333236849451181705f,0.42755509343028219593f,
-0.90398929312344333820f,0.42616788872679961520f,0.90464409057824624050f,
-0.42477968120910880589f,0.90529675931811881551f,0.42339047414379610279f,
-0.90594729780726845902f,0.42200027079979979261f,0.90659570451491533483f,
-0.42060907444840250902f,0.90724197791529592738f,0.41921688836322396066f,
-0.90788611648766626150f,0.41782371582021238243f,0.90852811871630612117f,
-0.41642956009763731906f,0.90916798309052226923f,0.41503442447608163146f,
-0.90980570810465222209f,0.41363831223843455787f,0.91044129225806713634f,
-0.41224122666988299857f,0.91107473405517624965f,0.41084317105790391089f,
-0.91170603200542987832f,0.40944414869225764786f,0.91233518462332274801f,
-0.40804416286497874333f,0.91296219042839810154f,0.40664321687036913966f,
-0.91358704794525080750f,0.40524131400498986100f,0.91420975570353069095f,
-0.40383845756765412993f,0.91483031223794608611f,0.40243465085941854120f,
-0.91544871608826783316f,0.40102989718357578974f,0.91606496579933160973f,
-0.39962419984564678810f,0.91667905992104270485f,0.39821756215337361651f,
-0.91729099700837790632f,0.39680998741671041907f,0.91790077562139038569f,
-0.39540147894781629834f,0.91850839432521225181f,0.39399204006104809883f,
-0.91911385169005777040f,0.39258167407295152529f,0.91971714629122736095f,
-0.39117038430225398171f,0.92031827670911048322f,0.38975817406985641123f,
-0.92091724152918941204f,0.38834504669882630168f,0.92151403934204190183f,
-0.38693100551438869283f,0.92210866874334507237f,0.38551605384391901543f,
-0.92270112833387851747f,0.38410019501693504207f,0.92329141671952763559f,
-0.38268343236508983729f,0.92387953251128673848f,0.38126576922216248722f,
-0.92446547432526260391f,0.37984720892405110515f,0.92504924078267758425f,
-0.37842775480876561511f,0.92563083050987271516f,0.37700741021641831496f,
-0.92621024213831126826f,0.37558617848921732607f,0.92678747430458174872f,
-0.37416406297145798909f,0.92736252565040111495f,0.37274106700951581406f,
-0.92793539482261788720f,0.37131719395183759858f,0.92850608047321558924f,
-0.36989244714893426691f,0.92907458125931574600f,0.36846682995337232125f,
-0.92964089584318121418f,0.36704034571976723589f,0.93020502289221906889f,
-0.36561299780477396482f,0.93076696107898371224f,0.36418478956707983629f,
-0.93132670908118042608f,0.36275572436739722537f,0.93188426558166814750f,
-0.36132580556845433906f,0.93243962926846235550f,0.35989503653498827740f,
-0.93299279883473884567f,0.35846342063373654030f,0.93354377297883617270f,
-0.35703096123343003310f,0.93409255040425887007f,0.35559766170478396274f,
-0.93463912981968078064f,0.35416352542049051033f,0.93518350993894749923f,
-0.35272855575521072646f,0.93572568948108036935f,0.35129275608556714827f,
-0.93626566717027825959f,0.34985612979013502866f,0.93680344173592156043f,
-0.34841868024943450921f,0.93733901191257495977f,0.34698041084592368133f,
-0.93787237643998988545f,0.34554132496398914931f,0.93840353406310805795f,
-0.34410142598993898044f,0.93893248353206448797f,0.34266071731199437833f,
-0.93945922360218991898f,0.34121920232028241093f,0.93998375303401393577f,
-0.33977688440682696225f,0.94050607059326829518f,0.33833376696554129381f,
-0.94102617505088925753f,0.33688985339222005111f,0.94154406518302080631f,
-0.33544514708453165852f,0.94205973977101731265f,0.33399965144200949307f,
-0.94257319760144686605f,0.33255336986604422389f,0.94308443746609349478f,
-0.33110630575987642921f,0.94359345816196038559f,0.32965846252858754806f,
-0.94410025849127265918f,0.32820984357909266382f,0.94460483726148025685f,
-0.32676045232013178898f,0.94510719328526060501f,0.32531029216226298173f,
-0.94560732538052127971f,0.32385936651785296458f,0.94610523237040333733f,
-0.32240767880107001897f,0.94660091308328353499f,0.32095523242787521445f,
-0.94709436635277721717f,0.31950203081601574739f,0.94758559101774109124f,
-0.31804807738501505998f,0.94807458592227622507f,0.31659337555616584581f,
-0.94856134991573026749f,0.31513792875252244485f,0.94904588185270055689f,
-0.31368174039889157312f,0.94952818059303667475f,0.31222481392182505067f,
-0.95000824500184299914f,0.31076715274961147495f,0.95048607394948170235f,
-0.30930876031226878231f,0.95096166631157508231f,0.30784964004153497763f,
-0.95143502096900833820f,0.30638979537086108440f,0.95190613680793223494f,
-0.30492922973540242948f,0.95237501271976587880f,0.30346794657201137113f,
-0.95284164760119871573f,0.30200594931922819519f,0.95330604035419375109f,
-0.30054324141727339903f,0.95376818988599032512f,0.29907982630804047508f,
-0.95422809510910566733f,0.29761570743508630743f,0.95468575494133833814f,
-0.29615088824362395536f,0.95514116830577067141f,0.29468537218051432669f,
-0.95559433413077110586f,0.29321916269425868373f,0.95604525134999640557f,
-0.29175226323498937298f,0.95649391890239499059f,0.29028467725446233105f,
-0.95694033573220893540f,0.28881640820604947972f,0.95738450078897585627f,
-0.28734745954472956653f,0.95782641302753290802f,0.28587783472708072630f,
-0.95826607140801767226f,0.28440753721127182141f,0.95870347489587159906f,
-0.28293657045705539188f,0.95913862246184189431f,0.28146493792575805193f,
-0.95957151308198451733f,0.27999264308027338455f,0.96000214573766584625f,
-0.27851968938505305973f,0.96043051941556578655f,0.27704608030609995106f,
-0.96085663310767965850f,0.27557181931095825478f,0.96128048581132063966f,
-0.27409690986870632878f,0.96170207652912254037f,0.27262135544994897662f,
-0.96212140426904158019f,0.27114515952680806610f,0.96253846804435916340f,
-0.26966832557291520178f,0.96295326687368387741f,0.26819085706340317632f,
-0.96336579978095404631f,0.26671275747489842090f,0.96377606579543984022f,
-0.26523403028551190141f,0.96418406395174571788f,0.26375467897483151347f,
-0.96458979328981264700f,0.26227470702391358914f,0.96499325285492032478f,
-0.26079411791527556952f,0.96539444169768939830f,0.25931291513288634576f,
-0.96579335887408357397f,0.25783110216215893162f,0.96619000344541261516f,
-0.25634868248994291395f,0.96658437447833311928f,0.25486565960451462720f,
-0.96697647104485207059f,0.25338203699557027004f,0.96736629222232850545f,
-0.25189781815421691258f,0.96775383709347551076f,0.25041300657296527987f,
-0.96813910474636244441f,0.24892760574572025956f,0.96852209427441726675f,
-0.24744161916777343557f,0.96890280477642887202f,0.24595505033579459497f,
-0.96928123535654853171f,0.24446790274782420616f,0.96965738512429244800f,
-0.24298017990326398197f,0.97003125319454397424f,0.24149188530286930243f,
-0.97040283868755550234f,0.24000302244874149871f,0.97077214072895035013f,
-0.23851359484431849944f,0.97113915844972509284f,0.23702360599436733679f,
-0.97150389098625178352f,0.23553305940497545889f,0.97186633748027939639f,
-0.23404195858354345794f,0.97222649707893626925f,0.23255030703877532794f,
-0.97258436893473221296f,0.23105810828067127605f,0.97293995220556006576f,
-0.22956536582051886852f,0.97329324605469824672f,0.22807208317088578653f,
-0.97364424965081186603f,0.22657826384561011168f,0.97399296216795583359f,
-0.22508391135979277653f,0.97433938278557585821f,0.22358902922979001504f,
-0.97468351068851066810f,0.22209362097320359264f,0.97502534506699412020f,
-0.22059769010887364526f,0.97536488511665686563f,0.21910124015686976984f,
-0.97570213003852857003f,0.21760427463848366902f,0.97603707903903902388f,
-0.21610679707621960333f,0.97636973133002114000f,0.21460881099378692483f,
-0.97670008612871184184f,0.21311031991609136194f,0.97702814265775439484f,
-0.21161132736922760866f,0.97735390014519996082f,0.21011183688046972118f,
-0.97767735782450992943f,0.20861185197826345727f,0.97799851493455713936f,
-0.20711137619221856032f,0.97831737071962765473f,0.20561041305309932237f,
-0.97863392442942309657f,0.20410896609281700687f,0.97894817531906219710f,
-0.20260703884442110567f,0.97926012264908202098f,0.20110463484209195606f,
-0.97956976568544051887f,0.19960175762113105402f,0.97987710369951763756f,
-0.19809841071795372680f,0.98018213596811731847f,0.19659459767008022335f,
-0.98048486177346938497f,0.19509032201612833135f,0.98078528040323043058f,
-0.19358558729580374602f,0.98108339115048659451f,0.19208039704989238183f,
-0.98137919331375456089f,0.19057475482025279523f,0.98167268619698311305f,
-0.18906866414980627589f,0.98196386910955524296f,0.18756212858252974129f,
-0.98225274136628937249f,0.18605515166344663291f,0.98253930228744124076f,
-0.18454773693861964423f,0.98282355119870523641f,0.18303988795514106180f,
-0.98310548743121628501f,0.18153160826112513249f,0.98338511032155118130f,
-0.18002290140569951471f,0.98366241921173025453f,0.17851377093899759019f,
-0.98393741344921892278f,0.17700422041214886049f,0.98421009238692902521f,
-0.17549425337727139751f,0.98448045538322093151f,0.17398387338746384989f,
-0.98474850180190420801f,0.17247308399679603386f,0.98501423101223983814f,
-0.17096188876030135595f,0.98527764238894122162f,0.16945029123396793125f,
-0.98553873531217606185f,0.16793829497473122814f,0.98579750916756736512f,
-0.16642590354046421508f,0.98605396334619543897f,0.16491312048997008866f,
-0.98630809724459866938f,0.16339994938297322524f,0.98655991026477540817f,
-0.16188639378011188130f,0.98680940181418541624f,0.16037245724292839566f,
-0.98705657130575097380f,0.15885814333386139019f,0.98730141815785843473f,
-0.15734345561623827581f,0.98754394179435922574f,0.15582839765426531597f,
-0.98778414164457217783f,0.15431297301302024372f,0.98802201714328352633f,
-0.15279718525844340760f,0.98825756773074946437f,0.15128103795733024994f,
-0.98849079285269658701f,0.14976453467732162017f,0.98872169196032377858f,
-0.14824767898689619749f,0.98895026451030298986f,0.14673047445536174793f,
-0.98917650996478101444f,0.14521292465284751927f,0.98940042779138037687f,
-0.14369503315029458212f,0.98962201746320077600f,0.14217680351944800288f,
-0.98984127845882052821f,0.14065823933284923863f,0.99005821026229712256f,
-0.13913934416382628401f,0.99027281236316910817f,0.13762012158648617710f,
-0.99048508425645698239f,0.13610057517570620100f,0.99069502544266463406f,
-0.13458070850712622324f,0.99090263542778000971f,0.13306052515713917561f,
-0.99110791372327677884f,0.13154002870288328264f,0.99131085984611544415f,
-0.13001922272223334631f,0.99151147331874389668f,0.12849811079379322432f,
-0.99170975366909952520f,0.12697669649688597682f,0.99190570043060932726f,
-0.12545498341154620592f,0.99209931314219179654f,0.12393297511851220083f,
-0.99229059134825736699f,0.12241067519921627893f,0.99247953459870996706f,
-0.12088808723577722237f,0.99266614244894801899f,0.11936521481099135467f,
-0.99285041445986510489f,0.11784206150832501891f,0.99303235019785141002f,
-0.11631863091190487725f,0.99321194923479450001f,0.11479492660651025027f,
-0.99338921114808065305f,0.11327095217756436019f,0.99356413552059530403f,
-0.11174671121112665639f,0.99373672194072459884f,0.11022220729388318428f,
-0.99390697000235606051f,0.10869744401313867488f,0.99407487930487936634f,
-0.10717242495680887049f,0.99424044945318790223f,0.10564715371341069916f,
-0.99440368005767909576f,0.10412163387205472520f,0.99456457073425541537f,
-0.10259586902243628126f,0.99472312110432570265f,0.10106986275482787718f,
-0.99487933079480561638f,0.09954361866006944393f,0.99503319943811863180f,
-0.09801714032956077016f,0.99518472667219681771f,0.09649043135525260662f,
-0.99533391214048227980f,0.09496349532963906104f,0.99548075549192693856f,
-0.09343633584574791151f,0.99562525638099430569f,0.09190895649713269611f,
-0.99576741446765981713f,0.09038136087786501072f,0.99590722941741172125f,
-0.08885355258252468358f,0.99604470090125196702f,0.08732553520619222576f,
-0.99617982859569687015f,0.08579731234443987997f,0.99631261218277800129f,
-0.08426888759332412659f,0.99644305135004263008f,0.08274026454937580266f,
-0.99657114579055483539f,0.08121144680959238582f,0.99669689520289606044f,
-0.07968243797143012563f,0.99682029929116566791f,0.07815324163279431524f,
-0.99694135776498216117f,0.07662386139203161695f,0.99706007033948296225f,
-0.07509430084792129145f,0.99717643673532618820f,0.07356456359966745406f,
-0.99729045667869020697f,0.07203465324688941573f,0.99740212990127530279f,
-0.07050457338961400866f,0.99751145614030345410f,0.06897432762826673225f,
-0.99761843513851955478f,0.06744391956366410645f,0.99772306664419163624f,
-0.06591335279700392957f,0.99782535041111164453f,0.06438263092985740954f,
-0.99792528619859599548f,0.06285175756416142012f,0.99802287377148624081f,
-0.06132073630220864768f,0.99811811290014917919f,0.05978957074664000698f,
-0.99821100336047818846f,0.05825826450043573163f,0.99830154493389289261f,
-0.05672682116690778292f,0.99838973740734016094f,0.05519524434969003135f,
-0.99847558057329477421f,0.05366353765273067927f,0.99855907422975931365f,
-0.05213170468028331672f,0.99864021818026527111f,0.05059974903689933717f,
-0.99871901223387293811f,0.04906767432741812596f,0.99879545620517240501f,
-0.04753548415695926094f,0.99886954991428356099f,0.04600318213091464381f,
-0.99894129318685687124f,0.04447077185493874402f,0.99901068585407337697f,
-0.04293825693494095902f,0.99907772775264536147f,0.04140564097707671171f,
-0.99914241872481690532f,0.03987292758773984536f,0.99920475861836388631f,
-0.03834012037355279123f,0.99926474728659442359f,0.03680722294135899131f,
-0.99932238458834954375f,0.03527423889821394709f,0.99937767038800284780f,
-0.03374117185137764235f,0.99943060455546173237f,0.03220802540830470378f,
-0.99948118696616694567f,0.03067480317663658085f,0.99952941750109314256f,
-0.02914150876419373953f,0.99957529604674921764f,0.02760814577896581953f,
-0.99961882249517863830f,0.02607471782910403962f,0.99965999674395922270f,
-0.02454122852291226384f,0.99969881869620424997f,0.02300768146883941032f,
-0.99973528826056168306f,0.02147408027546960502f,0.99976940535121527898f,
-0.01994042855151459750f,0.99980116988788425569f,0.01840672990580482019f,
-0.99983058179582340319f,0.01687298794728177287f,0.99985764100582386060f,
-0.01533920628498821985f,0.99988234745421256111f,0.01380538852806034895f,
-0.99990470108285289808f,0.01227153828571994447f,0.99992470183914450299f,
-0.01073765916726457208f,0.99994234967602391162f,0.00920375478205995995f,
-0.99995764455196389786f,0.00766982873953107706f,0.99997058643097413988f,
-0.00613588464915451517f,0.99998117528260110909f,0.00460192612044867198f,
-0.99998941108192840321f,0.00306795676296613791f,0.99999529380957619118f,
-0.00153398018628476615f,0.99999882345170187925f,1.00000000000000000000f,
-0.00000000000000000000f,0.99998117528260110909f,0.00613588464915447527f,
-0.99992470183914450299f,0.01227153828571992539f,0.99983058179582340319f,
-0.01840672990580482019f,0.99969881869620424997f,0.02454122852291228812f,
-0.99952941750109314256f,0.03067480317663662595f,0.99932238458834954375f,
-0.03680722294135883171f,0.99907772775264536147f,0.04293825693494082024f,
-0.99879545620517240501f,0.04906767432741801493f,0.99847558057329477421f,
-0.05519524434968993420f,0.99811811290014917919f,0.06132073630220857829f,
-0.99772306664419163624f,0.06744391956366405094f,0.99729045667869020697f,
-0.07356456359966742631f,0.99682029929116566791f,0.07968243797143012563f,
-0.99631261218277800129f,0.08579731234443989385f,0.99576741446765981713f,
-0.09190895649713272386f,0.99518472667219692873f,0.09801714032956060363f,
-0.99456457073425541537f,0.10412163387205458642f,0.99390697000235606051f,
-0.11022220729388305938f,0.99321194923479450001f,0.11631863091190475235f,
-0.99247953459870996706f,0.12241067519921619566f,0.99170975366909952520f,
-0.12849811079379316880f,0.99090263542778000971f,0.13458070850712616773f,
-0.99005821026229712256f,0.14065823933284921088f,0.98917650996478101444f,
-0.14673047445536174793f,0.98825756773074946437f,0.15279718525844343535f,
-0.98730141815785843473f,0.15885814333386144570f,0.98630809724459866938f,
-0.16491312048996989437f,0.98527764238894122162f,0.17096188876030121717f,
-0.98421009238692902521f,0.17700422041214874946f,0.98310548743121628501f,
-0.18303988795514095078f,0.98196386910955524296f,0.18906866414980619262f,
-0.98078528040323043058f,0.19509032201612824808f,0.97956976568544051887f,
-0.20110463484209190055f,0.97831737071962765473f,0.20711137619221856032f,
-0.97702814265775439484f,0.21311031991609136194f,0.97570213003852857003f,
-0.21910124015686979759f,0.97433938278557585821f,0.22508391135979283204f,
-0.97293995220556017678f,0.23105810828067110951f,0.97150389098625178352f,
-0.23702360599436719801f,0.97003125319454397424f,0.24298017990326387094f,
-0.96852209427441737777f,0.24892760574572014853f,0.96697647104485207059f,
-0.25486565960451457169f,0.96539444169768939830f,0.26079411791527551401f,
-0.96377606579543984022f,0.26671275747489836538f,0.96212140426904158019f,
-0.27262135544994897662f,0.96043051941556578655f,0.27851968938505305973f,
-0.95870347489587159906f,0.28440753721127187692f,0.95694033573220882438f,
-0.29028467725446233105f,0.95514116830577078243f,0.29615088824362378883f,
-0.95330604035419386211f,0.30200594931922808417f,0.95143502096900833820f,
-0.30784964004153486661f,0.94952818059303667475f,0.31368174039889151761f,
-0.94758559101774109124f,0.31950203081601569188f,0.94560732538052127971f,
-0.32531029216226292622f,0.94359345816196038559f,0.33110630575987642921f,
-0.94154406518302080631f,0.33688985339222005111f,0.93945922360218991898f,
-0.34266071731199437833f,0.93733901191257495977f,0.34841868024943456472f,
-0.93518350993894761025f,0.35416352542049034380f,0.93299279883473895669f,
-0.35989503653498811087f,0.93076696107898371224f,0.36561299780477385379f,
-0.92850608047321558924f,0.37131719395183754306f,0.92621024213831137928f,
-0.37700741021641825945f,0.92387953251128673848f,0.38268343236508978178f,
-0.92151403934204190183f,0.38834504669882624617f,0.91911385169005777040f,
-0.39399204006104809883f,0.91667905992104270485f,0.39962419984564678810f,
-0.91420975570353069095f,0.40524131400498986100f,0.91170603200542987832f,
-0.41084317105790391089f,0.90916798309052238025f,0.41642956009763715253f,
-0.90659570451491533483f,0.42200027079979968159f,0.90398929312344333820f,
-0.42755509343028208491f,0.90134884704602202810f,0.43309381885315195726f,
-0.89867446569395381673f,0.43861623853852765853f,0.89596624975618521791f,
-0.44412214457042920035f,0.89322430119551532446f,0.44961132965460653965f,
-0.89044872324475787817f,0.45508358712634383592f,0.88763962040285393496f,
-0.46053871095824000514f,0.88479709843093778954f,0.46597649576796618121f,
-0.88192126434835504956f,0.47139673682599764204f,0.87901222642863352519f,
-0.47679923006332208812f,0.87607009419540660122f,0.48218377207912271887f,
-0.87309497841829009079f,0.48755016014843599592f,0.87008699110871146054f,
-0.49289819222978403790f,0.86704624551569264845f,0.49822766697278181303f,
-0.86397285612158669643f,0.50353838372571757542f,0.86086693863776730939f,
-0.50883014254310698909f,0.85772861000027211809f,0.51410274419322166128f,
-0.85455798836540053376f,0.51935599016558964269f,0.85135519310526519554f,
-0.52458968267846894928f,0.84812034480329723252f,0.52980362468629460526f,
-0.84485356524970711689f,0.53499761988709715332f,0.84155497743689844370f,
-0.54017147272989285423f,0.83822470555483807875f,0.54532498842204646383f,
-0.83486287498638001026f,0.55045797293660481131f,0.83146961230254523567f,
-0.55557023301960217765f,0.82804504525775579626f,0.56066157619733603124f,
-0.82458930278502529099f,0.56573181078361312046f,0.82110251499110464835f,
-0.57078074588696725566f,0.81758481315158371139f,0.57580819141784533866f,
-0.81403632970594841378f,0.58081395809576452649f,0.81045719825259476821f,
-0.58579785745643886408f,0.80684755354379933401f,0.59075970185887416442f,
-0.80320753148064494287f,0.59569930449243335691f,0.79953726910790501314f,
-0.60061647938386897305f,0.79583690460888356633f,0.60551104140432554512f,
-0.79210657730021238887f,0.61038280627630947528f,0.78834642762660622761f,
-0.61523159058062681925f,0.78455659715557524159f,0.62005721176328909561f,
-0.78073722857209448822f,0.62485948814238634341f,0.77688846567323244230f,
-0.62963823891492698426f,0.77301045336273699338f,0.63439328416364548779f,
-0.76910333764557969882f,0.63912444486377573138f,0.76516726562245895860f,
-0.64383154288979138613f,0.76120238548426177871f,0.64851440102211244110f,
-0.75720884650648456748f,0.65317284295377675551f,0.75318679904361252042f,
-0.65780669329707863735f,0.74913639452345937020f,0.66241577759017178373f,
-0.74505778544146594733f,0.66699992230363747137f,0.74095112535495921691f,
-0.67155895484701833009f,0.73681656887736979300f,0.67609270357531592310f,
-0.73265427167241281570f,0.68060099779545302212f,0.72846439044822519637f,
-0.68508366777270035541f,0.72424708295146700276f,0.68954054473706682948f,
-0.72000250796138165477f,0.69397146088965389055f,0.71573082528381870571f,
-0.69837624940897280457f,0.71143219574521643356f,0.70275474445722529993f,
-0.70710678118654757274f,0.70710678118654757274f,0.70275474445722529993f,
-0.71143219574521643356f,0.69837624940897291559f,0.71573082528381859468f,
-0.69397146088965400157f,0.72000250796138165477f,0.68954054473706694051f,
-0.72424708295146689174f,0.68508366777270035541f,0.72846439044822519637f,
-0.68060099779545302212f,0.73265427167241281570f,0.67609270357531603413f,
-0.73681656887736979300f,0.67155895484701833009f,0.74095112535495910588f,
-0.66699992230363747137f,0.74505778544146594733f,0.66241577759017178373f,
-0.74913639452345925918f,0.65780669329707874837f,0.75318679904361252042f,
-0.65317284295377686654f,0.75720884650648456748f,0.64851440102211255212f,
-0.76120238548426177871f,0.64383154288979149715f,0.76516726562245895860f,
-0.63912444486377573138f,0.76910333764557958780f,0.63439328416364548779f,
-0.77301045336273688235f,0.62963823891492709528f,0.77688846567323244230f,
-0.62485948814238645443f,0.78073722857209448822f,0.62005721176328920663f,
-0.78455659715557524159f,0.61523159058062681925f,0.78834642762660622761f,
-0.61038280627630947528f,0.79210657730021227785f,0.60551104140432554512f,
-0.79583690460888345530f,0.60061647938386897305f,0.79953726910790501314f,
-0.59569930449243346793f,0.80320753148064483184f,0.59075970185887427544f,
-0.80684755354379922299f,0.58579785745643886408f,0.81045719825259476821f,
-0.58081395809576452649f,0.81403632970594830276f,0.57580819141784533866f,
-0.81758481315158371139f,0.57078074588696736669f,0.82110251499110464835f,
-0.56573181078361323149f,0.82458930278502529099f,0.56066157619733603124f,
-0.82804504525775579626f,0.55557023301960228867f,0.83146961230254523567f,
-0.55045797293660481131f,0.83486287498638001026f,0.54532498842204646383f,
-0.83822470555483796772f,0.54017147272989296525f,0.84155497743689833268f,
-0.53499761988709726435f,0.84485356524970700587f,0.52980362468629482731f,
-0.84812034480329712149f,0.52458968267846883826f,0.85135519310526519554f,
-0.51935599016558953167f,0.85455798836540053376f,0.51410274419322166128f,
-0.85772861000027211809f,0.50883014254310698909f,0.86086693863776730939f,
-0.50353838372571757542f,0.86397285612158669643f,0.49822766697278186854f,
-0.86704624551569264845f,0.49289819222978409341f,0.87008699110871134952f,
-0.48755016014843605143f,0.87309497841829009079f,0.48218377207912282989f,
-0.87607009419540660122f,0.47679923006332225466f,0.87901222642863341417f,
-0.47139673682599780857f,0.88192126434835493853f,0.46597649576796612569f,
-0.88479709843093778954f,0.46053871095824000514f,0.88763962040285393496f,
-0.45508358712634383592f,0.89044872324475787817f,0.44961132965460659516f,
-0.89322430119551532446f,0.44412214457042925586f,0.89596624975618510689f,
-0.43861623853852771404f,0.89867446569395381673f,0.43309381885315201277f,
-0.90134884704602202810f,0.42755509343028219593f,0.90398929312344333820f,
-0.42200027079979979261f,0.90659570451491533483f,0.41642956009763731906f,
-0.90916798309052226923f,0.41084317105790391089f,0.91170603200542987832f,
-0.40524131400498986100f,0.91420975570353069095f,0.39962419984564678810f,
-0.91667905992104270485f,0.39399204006104809883f,0.91911385169005777040f,
-0.38834504669882630168f,0.92151403934204190183f,0.38268343236508983729f,
-0.92387953251128673848f,0.37700741021641831496f,0.92621024213831126826f,
-0.37131719395183759858f,0.92850608047321558924f,0.36561299780477396482f,
-0.93076696107898371224f,0.35989503653498827740f,0.93299279883473884567f,
-0.35416352542049051033f,0.93518350993894749923f,0.34841868024943450921f,
-0.93733901191257495977f,0.34266071731199437833f,0.93945922360218991898f,
-0.33688985339222005111f,0.94154406518302080631f,0.33110630575987642921f,
-0.94359345816196038559f,0.32531029216226298173f,0.94560732538052127971f,
-0.31950203081601574739f,0.94758559101774109124f,0.31368174039889157312f,
-0.94952818059303667475f,0.30784964004153497763f,0.95143502096900833820f,
-0.30200594931922819519f,0.95330604035419375109f,0.29615088824362395536f,
-0.95514116830577067141f,0.29028467725446233105f,0.95694033573220893540f,
-0.28440753721127182141f,0.95870347489587159906f,0.27851968938505305973f,
-0.96043051941556578655f,0.27262135544994897662f,0.96212140426904158019f,
-0.26671275747489842090f,0.96377606579543984022f,0.26079411791527556952f,
-0.96539444169768939830f,0.25486565960451462720f,0.96697647104485207059f,
-0.24892760574572025956f,0.96852209427441726675f,0.24298017990326398197f,
-0.97003125319454397424f,0.23702360599436733679f,0.97150389098625178352f,
-0.23105810828067127605f,0.97293995220556006576f,0.22508391135979277653f,
-0.97433938278557585821f,0.21910124015686976984f,0.97570213003852857003f,
-0.21311031991609136194f,0.97702814265775439484f,0.20711137619221856032f,
-0.97831737071962765473f,0.20110463484209195606f,0.97956976568544051887f,
-0.19509032201612833135f,0.98078528040323043058f,0.18906866414980627589f,
-0.98196386910955524296f,0.18303988795514106180f,0.98310548743121628501f,
-0.17700422041214886049f,0.98421009238692902521f,0.17096188876030135595f,
-0.98527764238894122162f,0.16491312048997008866f,0.98630809724459866938f,
-0.15885814333386139019f,0.98730141815785843473f,0.15279718525844340760f,
-0.98825756773074946437f,0.14673047445536174793f,0.98917650996478101444f,
-0.14065823933284923863f,0.99005821026229712256f,0.13458070850712622324f,
-0.99090263542778000971f,0.12849811079379322432f,0.99170975366909952520f,
-0.12241067519921627893f,0.99247953459870996706f,0.11631863091190487725f,
-0.99321194923479450001f,0.11022220729388318428f,0.99390697000235606051f,
-0.10412163387205472520f,0.99456457073425541537f,0.09801714032956077016f,
-0.99518472667219681771f,0.09190895649713269611f,0.99576741446765981713f,
-0.08579731234443987997f,0.99631261218277800129f,0.07968243797143012563f,
-0.99682029929116566791f,0.07356456359966745406f,0.99729045667869020697f,
-0.06744391956366410645f,0.99772306664419163624f,0.06132073630220864768f,
-0.99811811290014917919f,0.05519524434969003135f,0.99847558057329477421f,
-0.04906767432741812596f,0.99879545620517240501f,0.04293825693494095902f,
-0.99907772775264536147f,0.03680722294135899131f,0.99932238458834954375f,
-0.03067480317663658085f,0.99952941750109314256f,0.02454122852291226384f,
-0.99969881869620424997f,0.01840672990580482019f,0.99983058179582340319f,
-0.01227153828571994447f,0.99992470183914450299f,0.00613588464915451517f,
-0.99998117528260110909f,1.00000000000000000000f,0.00000000000000000000f,
-0.99969881869620424997f,0.02454122852291228812f,0.99879545620517240501f,
-0.04906767432741801493f,0.99729045667869020697f,0.07356456359966742631f,
-0.99518472667219692873f,0.09801714032956060363f,0.99247953459870996706f,
-0.12241067519921619566f,0.98917650996478101444f,0.14673047445536174793f,
-0.98527764238894122162f,0.17096188876030121717f,0.98078528040323043058f,
-0.19509032201612824808f,0.97570213003852857003f,0.21910124015686979759f,
-0.97003125319454397424f,0.24298017990326387094f,0.96377606579543984022f,
-0.26671275747489836538f,0.95694033573220882438f,0.29028467725446233105f,
-0.94952818059303667475f,0.31368174039889151761f,0.94154406518302080631f,
-0.33688985339222005111f,0.93299279883473895669f,0.35989503653498811087f,
-0.92387953251128673848f,0.38268343236508978178f,0.91420975570353069095f,
-0.40524131400498986100f,0.90398929312344333820f,0.42755509343028208491f,
-0.89322430119551532446f,0.44961132965460653965f,0.88192126434835504956f,
-0.47139673682599764204f,0.87008699110871146054f,0.49289819222978403790f,
-0.85772861000027211809f,0.51410274419322166128f,0.84485356524970711689f,
-0.53499761988709715332f,0.83146961230254523567f,0.55557023301960217765f,
-0.81758481315158371139f,0.57580819141784533866f,0.80320753148064494287f,
-0.59569930449243335691f,0.78834642762660622761f,0.61523159058062681925f,
-0.77301045336273699338f,0.63439328416364548779f,0.75720884650648456748f,
-0.65317284295377675551f,0.74095112535495921691f,0.67155895484701833009f,
-0.72424708295146700276f,0.68954054473706682948f,0.70710678118654757274f,
-0.70710678118654757274f,0.68954054473706694051f,0.72424708295146689174f,
-0.67155895484701833009f,0.74095112535495910588f,0.65317284295377686654f,
-0.75720884650648456748f,0.63439328416364548779f,0.77301045336273688235f,
-0.61523159058062681925f,0.78834642762660622761f,0.59569930449243346793f,
-0.80320753148064483184f,0.57580819141784533866f,0.81758481315158371139f,
-0.55557023301960228867f,0.83146961230254523567f,0.53499761988709726435f,
-0.84485356524970700587f,0.51410274419322166128f,0.85772861000027211809f,
-0.49289819222978409341f,0.87008699110871134952f,0.47139673682599780857f,
-0.88192126434835493853f,0.44961132965460659516f,0.89322430119551532446f,
-0.42755509343028219593f,0.90398929312344333820f,0.40524131400498986100f,
-0.91420975570353069095f,0.38268343236508983729f,0.92387953251128673848f,
-0.35989503653498827740f,0.93299279883473884567f,0.33688985339222005111f,
-0.94154406518302080631f,0.31368174039889157312f,0.94952818059303667475f,
-0.29028467725446233105f,0.95694033573220893540f,0.26671275747489842090f,
-0.96377606579543984022f,0.24298017990326398197f,0.97003125319454397424f,
-0.21910124015686976984f,0.97570213003852857003f,0.19509032201612833135f,
-0.98078528040323043058f,0.17096188876030135595f,0.98527764238894122162f,
-0.14673047445536174793f,0.98917650996478101444f,0.12241067519921627893f,
-0.99247953459870996706f,0.09801714032956077016f,0.99518472667219681771f,
-0.07356456359966745406f,0.99729045667869020697f,0.04906767432741812596f,
-0.99879545620517240501f,0.02454122852291226384f,0.99969881869620424997f,
-1.00000000000000000000f,0.00000000000000000000f,0.99518472667219692873f,
-0.09801714032956060363f,0.98078528040323043058f,0.19509032201612824808f,
-0.95694033573220882438f,0.29028467725446233105f,0.92387953251128673848f,
-0.38268343236508978178f,0.88192126434835504956f,0.47139673682599764204f,
-0.83146961230254523567f,0.55557023301960217765f,0.77301045336273699338f,
-0.63439328416364548779f,0.70710678118654757274f,0.70710678118654757274f,
-0.63439328416364548779f,0.77301045336273688235f,0.55557023301960228867f,
-0.83146961230254523567f,0.47139673682599780857f,0.88192126434835493853f,
-0.38268343236508983729f,0.92387953251128673848f,0.29028467725446233105f,
-0.95694033573220893540f,0.19509032201612833135f,0.98078528040323043058f,
-0.09801714032956077016f,0.99518472667219681771f,1.00000000000000000000f,
-0.00000000000000000000f,0.92387953251128673848f,0.38268343236508978178f,
-0.70710678118654757274f,0.70710678118654757274f,0.38268343236508983729f,
-0.92387953251128673848f,};
+1.00000000000000000000f,0.00000000000000000000f,0.99999880790710449219f,
+0.00153398013208061457f,0.99999529123306274414f,0.00306795677170157433f,
+0.99998939037322998047f,0.00460192607715725899f,0.99998116493225097656f,
+0.00613588467240333557f,0.99997061491012573242f,0.00766982883214950562f,
+0.99995762109756469727f,0.00920375436544418335f,0.99994236230850219727f,
+0.01073765940964221954f,0.99992471933364868164f,0.01227153837680816650f,
+0.99990469217300415039f,0.01380538847297430038f,0.99988234043121337891f,
+0.01533920597285032272f,0.99985766410827636719f,0.01687298715114593506f,
+0.99983060359954833984f,0.01840673014521598816f,0.99980115890502929688f,
+0.01994042843580245972f,0.99976938962936401367f,0.02147408016026020050f,
+0.99973529577255249023f,0.02300768159329891205f,0.99969881772994995117f,
+0.02454122900962829590f,0.99966001510620117188f,0.02607471868395805359f,
+0.99961882829666137695f,0.02760814502835273743f,0.99957531690597534180f,
+0.02914150804281234741f,0.99952942132949829102f,0.03067480400204658508f,
+0.99948120117187500000f,0.03220802545547485352f,0.99943059682846069336f,
+0.03374117240309715271f,0.99937766790390014648f,0.03527423739433288574f,
+0.99932235479354858398f,0.03680722415447235107f,0.99926477670669555664f,
+0.03834012150764465332f,0.99920475482940673828f,0.03987292572855949402f,
+0.99914240837097167969f,0.04140564054250717163f,0.99907773733139038086f,
+0.04293825849890708923f,0.99901068210601806641f,0.04447077214717864990f,
+0.99894130229949951172f,0.04600318148732185364f,0.99886953830718994141f,
+0.04753548279404640198f,0.99879544973373413086f,0.04906767606735229492f,
+0.99871903657913208008f,0.05059975013136863708f,0.99864023923873901367f,
+0.05213170498609542847f,0.99855905771255493164f,0.05366353690624237061f,
+0.99847555160522460938f,0.05519524589180946350f,0.99838972091674804688f,
+0.05672682076692581177f,0.99830156564712524414f,0.05825826525688171387f,
+0.99821102619171142578f,0.05978957191109657288f,0.99811810255050659180f,
+0.06132073700428009033f,0.99802285432815551758f,0.06285175681114196777f,
+0.99792528152465820312f,0.06438262760639190674f,0.99782532453536987305f,
+0.06591334939002990723f,0.99772304296493530273f,0.06744392216205596924f,
+0.99761843681335449219f,0.06897433102130889893f,0.99751144647598266602f,
+0.07050457596778869629f,0.99740213155746459961f,0.07203464955091476440f,
+0.99729043245315551758f,0.07356456667184829712f,0.99717640876770019531f,
+0.07509429752826690674f,0.99706006050109863281f,0.07662386447191238403f,
+0.99694132804870605469f,0.07815324515104293823f,0.99682027101516723633f,
+0.07968243956565856934f,0.99669688940048217773f,0.08121144771575927734f,
+0.99657112360000610352f,0.08274026215076446533f,0.99644303321838378906f,
+0.08426889032125473022f,0.99631261825561523438f,0.08579730987548828125f,
+0.99617981910705566406f,0.08732553571462631226f,0.99604469537734985352f,
+0.08885355293750762939f,0.99590724706649780273f,0.09038136154413223267f,
+0.99576741456985473633f,0.09190895408391952515f,0.99562525749206542969f,
+0.09343633800745010376f,0.99548077583312988281f,0.09496349841356277466f,
+0.99533390998840332031f,0.09649042785167694092f,0.99518471956253051758f,
+0.09801714122295379639f,0.99503320455551147461f,0.09954361617565155029f,
+0.99487930536270141602f,0.10106986016035079956f,0.99472314119338989258f,
+0.10259586572647094727f,0.99456459283828735352f,0.10412163287401199341f,
+0.99440366029739379883f,0.10564715415239334106f,0.99424046277999877930f,
+0.10717242211103439331f,0.99407488107681274414f,0.10869744420051574707f,
+0.99390697479248046875f,0.11022220551967620850f,0.99373674392700195312f,
+0.11174671351909637451f,0.99356412887573242188f,0.11327095329761505127f,
+0.99338918924331665039f,0.11479492485523223877f,0.99321192502975463867f,
+0.11631862819194793701f,0.99303233623504638672f,0.11784206330776214600f,
+0.99285042285919189453f,0.11936521530151367188f,0.99266612529754638672f,
+0.12088808417320251465f,0.99247956275939941406f,0.12241067737340927124f,
+0.99229061603546142578f,0.12393297255039215088f,0.99209928512573242188f,
+0.12545497715473175049f,0.99190568923950195312f,0.12697669863700866699f,
+0.99170976877212524414f,0.12849810719490051270f,0.99151146411895751953f,
+0.13001921772956848145f,0.99131083488464355469f,0.13154003024101257324f,
+0.99110794067382812500f,0.13306052982807159424f,0.99090266227722167969f,
+0.13458070158958435059f,0.99069499969482421875f,0.13610057532787322998f,
+0.99048507213592529297f,0.13762012124061584473f,0.99027281999588012695f,
+0.13913933932781219482f,0.99005818367004394531f,0.14065824449062347412f,
+0.98984128236770629883f,0.14217680692672729492f,0.98962199687957763672f,
+0.14369502663612365723f,0.98940044641494750977f,0.14521291851997375488f,
+0.98917651176452636719f,0.14673046767711639404f,0.98895025253295898438f,
+0.14824767410755157471f,0.98872166872024536133f,0.14976453781127929688f,
+0.98849081993103027344f,0.15128104388713836670f,0.98825758695602416992f,
+0.15279719233512878418f,0.98802202939987182617f,0.15431296825408935547f,
+0.98778414726257324219f,0.15582840144634246826f,0.98754394054412841797f,
+0.15734346210956573486f,0.98730140924453735352f,0.15885815024375915527f,
+0.98705655336380004883f,0.16037245094776153564f,0.98680937290191650391f,
+0.16188639402389526367f,0.98655992746353149414f,0.16339994966983795166f,
+0.98630809783935546875f,0.16491311788558959961f,0.98605394363403320312f,
+0.16642589867115020752f,0.98579752445220947266f,0.16793829202651977539f,
+0.98553872108459472656f,0.16945029795169830322f,0.98527765274047851562f,
+0.17096188664436340332f,0.98501425981521606445f,0.17247308790683746338f,
+0.98474848270416259766f,0.17398387193679809570f,0.98448044061660766602f,
+0.17549425363540649414f,0.98421007394790649414f,0.17700421810150146484f,
+0.98393744230270385742f,0.17851376533508300781f,0.98366242647171020508f,
+0.18002289533615112305f,0.98338508605957031250f,0.18153160810470581055f,
+0.98310548067092895508f,0.18303988873958587646f,0.98282355070114135742f,
+0.18454773724079132080f,0.98253929615020751953f,0.18605515360832214355f,
+0.98225271701812744141f,0.18756212294101715088f,0.98196387290954589844f,
+0.18906866014003753662f,0.98167270421981811523f,0.19057475030422210693f,
+0.98137921094894409180f,0.19208039343357086182f,0.98108339309692382812f,
+0.19358558952808380127f,0.98078525066375732422f,0.19509032368659973145f,
+0.98048484325408935547f,0.19659459590911865234f,0.98018211126327514648f,
+0.19809840619564056396f,0.97987711429595947266f,0.19960175454616546631f,
+0.97956979274749755859f,0.20110464096069335938f,0.97926014661788940430f,
+0.20260703563690185547f,0.97894817590713500977f,0.20410896837711334229f,
+0.97863394021987915039f,0.20561040937900543213f,0.97831737995147705078f,
+0.20711137354373931885f,0.97799849510192871094f,0.20861184597015380859f,
+0.97767734527587890625f,0.21011184155941009521f,0.97735387086868286133f,
+0.21161133050918579102f,0.97702813148498535156f,0.21311031281948089600f,
+0.97670006752014160156f,0.21460881829261779785f,0.97636973857879638672f,
+0.21610680222511291504f,0.97603708505630493164f,0.21760427951812744141f,
+0.97570210695266723633f,0.21910123527050018311f,0.97536486387252807617f,
+0.22059768438339233398f,0.97502535581588745117f,0.22209362685680389404f,
+0.97468352317810058594f,0.22358903288841247559f,0.97433936595916748047f,
+0.22508391737937927246f,0.97399294376373291016f,0.22657826542854309082f,
+0.97364425659179687500f,0.22807207703590393066f,0.97329324483871459961f,
+0.22956536710262298584f,0.97293996810913085938f,0.23105810582637786865f,
+0.97258436679840087891f,0.23255030810832977295f,0.97222650051116943359f,
+0.23404195904731750488f,0.97186630964279174805f,0.23553305864334106445f,
+0.97150391340255737305f,0.23702360689640045166f,0.97113913297653198242f,
+0.23851358890533447266f,0.97077214717864990234f,0.24000301957130432129f,
+0.97040283679962158203f,0.24149188399314880371f,0.97003126144409179688f,
+0.24298018217086791992f,0.96965736150741577148f,0.24446789920330047607f,
+0.96928125619888305664f,0.24595504999160766602f,0.96890282630920410156f,
+0.24744161963462829590f,0.96852207183837890625f,0.24892760813236236572f,
+0.96813911199569702148f,0.25041300058364868164f,0.96775382757186889648f,
+0.25189781188964843750f,0.96736627817153930664f,0.25338202714920043945f,
+0.96697646379470825195f,0.25486564636230468750f,0.96658438444137573242f,
+0.25634866952896118164f,0.96618998050689697266f,0.25783109664916992188f,
+0.96579337120056152344f,0.25931292772293090820f,0.96539443731307983398f,
+0.26079410314559936523f,0.96499323844909667969f,0.26227471232414245605f,
+0.96458977460861206055f,0.26375466585159301758f,0.96418404579162597656f,
+0.26523402333259582520f,0.96377605199813842773f,0.26671275496482849121f,
+0.96336579322814941406f,0.26819086074829101562f,0.96295326948165893555f,
+0.26966831088066101074f,0.96253848075866699219f,0.27114516496658325195f,
+0.96212142705917358398f,0.27262136340141296387f,0.96170204877853393555f,
+0.27409690618515014648f,0.96128046512603759766f,0.27557182312011718750f,
+0.96085661649703979492f,0.27704608440399169922f,0.96043050289154052734f,
+0.27851969003677368164f,0.96000212430953979492f,0.27999264001846313477f,
+0.95957154035568237305f,0.28146493434906005859f,0.95913863182067871094f,
+0.28293657302856445312f,0.95870345830917358398f,0.28440752625465393066f,
+0.95826607942581176758f,0.28587782382965087891f,0.95782643556594848633f,
+0.28734746575355529785f,0.95738452672958374023f,0.28881642222404479980f,
+0.95694035291671752930f,0.29028466343879699707f,0.95649391412734985352f,
+0.29175224900245666504f,0.95604526996612548828f,0.29321914911270141602f,
+0.95559436082839965820f,0.29468536376953125000f,0.95514118671417236328f,
+0.29615089297294616699f,0.95468574762344360352f,0.29761570692062377930f,
+0.95422810316085815430f,0.29907983541488647461f,0.95376819372177124023f,
+0.30054324865341186523f,0.95330601930618286133f,0.30200594663619995117f,
+0.95284163951873779297f,0.30346795916557312012f,0.95237499475479125977f,
+0.30492922663688659668f,0.95190614461898803711f,0.30638980865478515625f,
+0.95143502950668334961f,0.30784964561462402344f,0.95096164941787719727f,
+0.30930876731872558594f,0.95048606395721435547f,0.31076714396476745605f,
+0.95000827312469482422f,0.31222480535507202148f,0.94952815771102905273f,
+0.31368175148963928223f,0.94904589653015136719f,0.31513792276382446289f,
+0.94856137037277221680f,0.31659337878227233887f,0.94807457923889160156f,
+0.31804808974266052246f,0.94758558273315429688f,0.31950202584266662598f,
+0.94709438085556030273f,0.32095524668693542480f,0.94660091400146484375f,
+0.32240769267082214355f,0.94610524177551269531f,0.32385936379432678223f,
+0.94560730457305908203f,0.32531028985977172852f,0.94510722160339355469f,
+0.32676044106483459473f,0.94460481405258178711f,0.32820984721183776855f,
+0.94410026073455810547f,0.32965844869613647461f,0.94359344244003295898f,
+0.33110630512237548828f,0.94308441877365112305f,0.33255335688591003418f,
+0.94257318973541259766f,0.33399966359138488770f,0.94205975532531738281f,
+0.33544513583183288574f,0.94154405593872070312f,0.33688986301422119141f,
+0.94102615118026733398f,0.33833375573158264160f,0.94050604104995727539f,
+0.33977687358856201172f,0.93998372554779052734f,0.34121921658515930176f,
+0.93945920467376708984f,0.34266072511672973633f,0.93893247842788696289f,
+0.34410142898559570312f,0.93840354681015014648f,0.34554132819175720215f,
+0.93787235021591186523f,0.34698042273521423340f,0.93733900785446166992f,
+0.34841868281364440918f,0.93680346012115478516f,0.34985613822937011719f,
+0.93626564741134643555f,0.35129275918006896973f,0.93572568893432617188f,
+0.35272854566574096680f,0.93518352508544921875f,0.35416352748870849609f,
+0.93463915586471557617f,0.35559767484664916992f,0.93409252166748046875f,
+0.35703095793724060059f,0.93354380130767822266f,0.35846340656280517578f,
+0.93299281597137451172f,0.35989505052566528320f,0.93243962526321411133f,
+0.36132580041885375977f,0.93188428878784179688f,0.36275571584701538086f,
+0.93132668733596801758f,0.36418479681015014648f,0.93076694011688232422f,
+0.36561298370361328125f,0.93020504713058471680f,0.36704033613204956055f,
+0.92964088916778564453f,0.36846682429313659668f,0.92907458543777465820f,
+0.36989244818687438965f,0.92850607633590698242f,0.37131720781326293945f,
+0.92793542146682739258f,0.37274107336997985840f,0.92736250162124633789f,
+0.37416407465934753418f,0.92678749561309814453f,0.37558618187904357910f,
+0.92621022462844848633f,0.37700742483139038086f,0.92563080787658691406f,
+0.37842774391174316406f,0.92504924535751342773f,0.37984719872474670410f,
+0.92446547746658325195f,0.38126575946807861328f,0.92387950420379638672f,
+0.38268342614173889160f,0.92329144477844238281f,0.38410019874572753906f,
+0.92270112037658691406f,0.38551604747772216797f,0.92210865020751953125f,
+0.38693100214004516602f,0.92151403427124023438f,0.38834503293037414551f,
+0.92091721296310424805f,0.38975816965103149414f,0.92031830549240112305f,
+0.39117038249969482422f,0.91971713304519653320f,0.39258167147636413574f,
+0.91911387443542480469f,0.39399203658103942871f,0.91850841045379638672f,
+0.39540147781372070312f,0.91790080070495605469f,0.39680999517440795898f,
+0.91729098558425903320f,0.39821755886077880859f,0.91667908430099487305f,
+0.39962419867515563965f,0.91606497764587402344f,0.40102988481521606445f,
+0.91544872522354125977f,0.40243464708328247070f,0.91483032703399658203f,
+0.40383845567703247070f,0.91420978307723999023f,0.40524131059646606445f,
+0.91358703374862670898f,0.40664321184158325195f,0.91296219825744628906f,
+0.40804415941238403320f,0.91233515739440917969f,0.40944415330886840820f,
+0.91170603036880493164f,0.41084316372871398926f,0.91107475757598876953f,
+0.41224122047424316406f,0.91044127941131591797f,0.41363832354545593262f,
+0.90980571508407592773f,0.41503441333770751953f,0.90916800498962402344f,
+0.41642954945564270020f,0.90852808952331542969f,0.41782370209693908691f,
+0.90788608789443969727f,0.41921690106391906738f,0.90724200010299682617f,
+0.42060908675193786621f,0.90659570693969726562f,0.42200025916099548340f,
+0.90594726800918579102f,0.42339047789573669434f,0.90529674291610717773f,
+0.42477968335151672363f,0.90464407205581665039f,0.42616787552833557129f,
+0.90398931503295898438f,0.42755508422851562500f,0.90333235263824462891f,
+0.42894127964973449707f,0.90267330408096313477f,0.43032649159431457520f,
+0.90201216936111450195f,0.43171066045761108398f,0.90134882926940917969f,
+0.43309381604194641113f,0.90068340301513671875f,0.43447595834732055664f,
+0.90001589059829711914f,0.43585708737373352051f,0.89934623241424560547f,
+0.43723717331886291504f,0.89867448806762695312f,0.43861624598503112793f,
+0.89800059795379638672f,0.43999427556991577148f,0.89732456207275390625f,
+0.44137126207351684570f,0.89664649963378906250f,0.44274723529815673828f,
+0.89596623182296752930f,0.44412213563919067383f,0.89528393745422363281f,
+0.44549602270126342773f,0.89459949731826782227f,0.44686883687973022461f,
+0.89391297101974487305f,0.44824060797691345215f,0.89322429895401000977f,
+0.44961133599281311035f,0.89253354072570800781f,0.45098099112510681152f,
+0.89184069633483886719f,0.45234957337379455566f,0.89114576578140258789f,
+0.45371711254119873047f,0.89044874906539916992f,0.45508357882499694824f,
+0.88974958658218383789f,0.45644897222518920898f,0.88904833793640136719f,
+0.45781329274177551270f,0.88834506273269653320f,0.45917654037475585938f,
+0.88763964176177978516f,0.46053871512413024902f,0.88693213462829589844f,
+0.46189978718757629395f,0.88622254133224487305f,0.46325978636741638184f,
+0.88551086187362670898f,0.46461868286132812500f,0.88479709625244140625f,
+0.46597650647163391113f,0.88408124446868896484f,0.46733319759368896484f,
+0.88336336612701416016f,0.46868881583213806152f,0.88264334201812744141f,
+0.47004333138465881348f,0.88192129135131835938f,0.47139674425125122070f,
+0.88119709491729736328f,0.47274902462959289551f,0.88047087192535400391f,
+0.47410020232200622559f,0.87974262237548828125f,0.47545027732849121094f,
+0.87901222705841064453f,0.47679921984672546387f,0.87827980518341064453f,
+0.47814705967903137207f,0.87754529714584350586f,0.47949376702308654785f,
+0.87680870294570922852f,0.48083934187889099121f,0.87607008218765258789f,
+0.48218378424644470215f,0.87532937526702880859f,0.48352706432342529297f,
+0.87458664178848266602f,0.48486924171447753906f,0.87384182214736938477f,
+0.48621028661727905273f,0.87309497594833374023f,0.48755016922950744629f,
+0.87234604358673095703f,0.48888888955116271973f,0.87159508466720581055f,
+0.49022647738456726074f,0.87084203958511352539f,0.49156290292739868164f,
+0.87008696794509887695f,0.49289819598197937012f,0.86932986974716186523f,
+0.49423229694366455078f,0.86857068538665771484f,0.49556526541709899902f,
+0.86780947446823120117f,0.49689704179763793945f,0.86704623699188232422f,
+0.49822765588760375977f,0.86628097295761108398f,0.49955710768699645996f,
+0.86551362276077270508f,0.50088536739349365234f,0.86474424600601196289f,
+0.50221246480941772461f,0.86397284269332885742f,0.50353837013244628906f,
+0.86319941282272338867f,0.50486308336257934570f,0.86242395639419555664f,
+0.50618666410446166992f,0.86164647340774536133f,0.50750899314880371094f,
+0.86086696386337280273f,0.50883013010025024414f,0.86008536815643310547f,
+0.51015007495880126953f,0.85930180549621582031f,0.51146882772445678711f,
+0.85851621627807617188f,0.51278638839721679688f,0.85772860050201416016f,
+0.51410275697708129883f,0.85693895816802978516f,0.51541787385940551758f,
+0.85614734888076782227f,0.51673179864883422852f,0.85535365343093872070f,
+0.51804453134536743164f,0.85455799102783203125f,0.51935601234436035156f,
+0.85376030206680297852f,0.52066624164581298828f,0.85296058654785156250f,
+0.52197527885437011719f,0.85215890407562255859f,0.52328312397003173828f,
+0.85135519504547119141f,0.52458965778350830078f,0.85054945945739746094f,
+0.52589499950408935547f,0.84974175691604614258f,0.52719914913177490234f,
+0.84893202781677246094f,0.52850198745727539062f,0.84812033176422119141f,
+0.52980363368988037109f,0.84730660915374755859f,0.53110402822494506836f,
+0.84649091958999633789f,0.53240311145782470703f,0.84567326307296752930f,
+0.53370100259780883789f,0.84485357999801635742f,0.53499764204025268555f,
+0.84403187036514282227f,0.53629297018051147461f,0.84320825338363647461f,
+0.53758704662322998047f,0.84238260984420776367f,0.53887993097305297852f,
+0.84155499935150146484f,0.54017144441604614258f,0.84072536230087280273f,
+0.54146176576614379883f,0.83989381790161132812f,0.54275077581405639648f,
+0.83906024694442749023f,0.54403853416442871094f,0.83822470903396606445f,
+0.54532498121261596680f,0.83738720417022705078f,0.54661017656326293945f,
+0.83654773235321044922f,0.54789406061172485352f,0.83570629358291625977f,
+0.54917663335800170898f,0.83486288785934448242f,0.55045795440673828125f,
+0.83401751518249511719f,0.55173796415328979492f,0.83317017555236816406f,
+0.55301672220230102539f,0.83232086896896362305f,0.55429410934448242188f,
+0.83146959543228149414f,0.55557024478912353516f,0.83061641454696655273f,
+0.55684500932693481445f,0.82976120710372924805f,0.55811852216720581055f,
+0.82890409231185913086f,0.55939072370529174805f,0.82804507017135620117f,
+0.56066155433654785156f,0.82718402147293090820f,0.56193113327026367188f,
+0.82632106542587280273f,0.56319934129714965820f,0.82545614242553710938f,
+0.56446623802185058594f,0.82458931207656860352f,0.56573182344436645508f,
+0.82372051477432250977f,0.56699603796005249023f,0.82284981012344360352f,
+0.56825894117355346680f,0.82197713851928710938f,0.56952053308486938477f,
+0.82110249996185302734f,0.57078075408935546875f,0.82022595405578613281f,
+0.57203960418701171875f,0.81934750080108642578f,0.57329714298248291016f,
+0.81846714019775390625f,0.57455337047576904297f,0.81758481264114379883f,
+0.57580816745758056641f,0.81670057773590087891f,0.57706165313720703125f,
+0.81581443548202514648f,0.57831376791000366211f,0.81492632627487182617f,
+0.57956457138061523438f,0.81403630971908569336f,0.58081394433975219727f,
+0.81314438581466674805f,0.58206200599670410156f,0.81225061416625976562f,
+0.58330863714218139648f,0.81135487556457519531f,0.58455395698547363281f,
+0.81045717000961303711f,0.58579784631729125977f,0.80955761671066284180f,
+0.58704036474227905273f,0.80865615606307983398f,0.58828157186508178711f,
+0.80775284767150878906f,0.58952128887176513672f,0.80684757232666015625f,
+0.59075969457626342773f,0.80594038963317871094f,0.59199666976928710938f,
+0.80503135919570922852f,0.59323227405548095703f,0.80412036180496215820f,
+0.59446650743484497070f,0.80320751667022705078f,0.59569931030273437500f,
+0.80229282379150390625f,0.59693068265914916992f,0.80137616395950317383f,
+0.59816068410873413086f,0.80045765638351440430f,0.59938931465148925781f,
+0.79953724145889282227f,0.60061645507812500000f,0.79861497879028320312f,
+0.60184222459793090820f,0.79769086837768554688f,0.60306662321090698242f,
+0.79676479101181030273f,0.60428953170776367188f,0.79583692550659179688f,
+0.60551106929779052734f,0.79490715265274047852f,0.60673111677169799805f,
+0.79397547245025634766f,0.60794979333877563477f,0.79304194450378417969f,
+0.60916703939437866211f,0.79210656881332397461f,0.61038279533386230469f,
+0.79116934537887573242f,0.61159718036651611328f,0.79023021459579467773f,
+0.61281007528305053711f,0.78928923606872558594f,0.61402153968811035156f,
+0.78834640979766845703f,0.61523157358169555664f,0.78740173578262329102f,
+0.61644017696380615234f,0.78645521402359008789f,0.61764729022979736328f,
+0.78550684452056884766f,0.61885297298431396484f,0.78455656766891479492f,
+0.62005722522735595703f,0.78360450267791748047f,0.62125998735427856445f,
+0.78265058994293212891f,0.62246125936508178711f,0.78169482946395874023f,
+0.62366110086441040039f,0.78073722124099731445f,0.62485951185226440430f,
+0.77977776527404785156f,0.62605637311935424805f,0.77881652116775512695f,
+0.62725180387496948242f,0.77785342931747436523f,0.62844574451446533203f,
+0.77688848972320556641f,0.62963825464248657227f,0.77592170238494873047f,
+0.63082921504974365234f,0.77495312690734863281f,0.63201874494552612305f,
+0.77398270368576049805f,0.63320678472518920898f,0.77301043272018432617f,
+0.63439327478408813477f,0.77203637361526489258f,0.63557833433151245117f,
+0.77106052637100219727f,0.63676184415817260742f,0.77008283138275146484f,
+0.63794392347335815430f,0.76910334825515747070f,0.63912445306777954102f,
+0.76812201738357543945f,0.64030349254608154297f,0.76713889837265014648f,
+0.64148104190826416016f,0.76615399122238159180f,0.64265704154968261719f,
+0.76516723632812500000f,0.64383155107498168945f,0.76417875289916992188f,
+0.64500451087951660156f,0.76318842172622680664f,0.64617604017257690430f,
+0.76219630241394042969f,0.64734596014022827148f,0.76120239496231079102f,
+0.64851438999176025391f,0.76020669937133789062f,0.64968132972717285156f,
+0.75920921564102172852f,0.65084666013717651367f,0.75820988416671752930f,
+0.65201056003570556641f,0.75720882415771484375f,0.65317285060882568359f,
+0.75620597600936889648f,0.65433359146118164062f,0.75520139932632446289f,
+0.65549284219741821289f,0.75419497489929199219f,0.65665054321289062500f,
+0.75318682193756103516f,0.65780669450759887695f,0.75217682123184204102f,
+0.65896129608154296875f,0.75116515159606933594f,0.66011434793472290039f,
+0.75015163421630859375f,0.66126585006713867188f,0.74913638830184936523f,
+0.66241580247879028320f,0.74811935424804687500f,0.66356414556503295898f,
+0.74710059165954589844f,0.66471099853515625000f,0.74608010053634643555f,
+0.66585624217987060547f,0.74505776166915893555f,0.66699993610382080078f,
+0.74403375387191772461f,0.66814202070236206055f,0.74300795793533325195f,
+0.66928261518478393555f,0.74198043346405029297f,0.67042154073715209961f,
+0.74095112085342407227f,0.67155897617340087891f,0.73992007970809936523f,
+0.67269474267959594727f,0.73888731002807617188f,0.67382901906967163086f,
+0.73785281181335449219f,0.67496162652969360352f,0.73681658506393432617f,
+0.67609268426895141602f,0.73577857017517089844f,0.67722219228744506836f,
+0.73473888635635375977f,0.67835003137588500977f,0.73369741439819335938f,
+0.67947632074356079102f,0.73265427350997924805f,0.68060100078582763672f,
+0.73160940408706665039f,0.68172407150268554688f,0.73056274652481079102f,
+0.68284553289413452148f,0.72951442003250122070f,0.68396538496017456055f,
+0.72846436500549316406f,0.68508368730545043945f,0.72741264104843139648f,
+0.68620032072067260742f,0.72635912895202636719f,0.68731534481048583984f,
+0.72530394792556762695f,0.68842875957489013672f,0.72424709796905517578f,
+0.68954056501388549805f,0.72318845987319946289f,0.69065070152282714844f,
+0.72212821245193481445f,0.69175922870635986328f,0.72106617689132690430f,
+0.69286614656448364258f,0.72000253200531005859f,0.69397145509719848633f,
+0.71893709897994995117f,0.69507509469985961914f,0.71787005662918090820f,
+0.69617712497711181641f,0.71680128574371337891f,0.69727748632431030273f,
+0.71573084592819213867f,0.69837623834609985352f,0.71465867757797241211f,
+0.69947332143783569336f,0.71358484029769897461f,0.70056879520416259766f,
+0.71250939369201660156f,0.70166260004043579102f,0.71143221855163574219f,
+0.70275473594665527344f,0.71035337448120117188f,0.70384526252746582031f,
+0.70927280187606811523f,0.70493406057357788086f,0.70819061994552612305f,
+0.70602124929428100586f,0.70710676908493041992f,0.70710676908493041992f,
+0.70602124929428100586f,0.70819061994552612305f,0.70493406057357788086f,
+0.70927280187606811523f,0.70384526252746582031f,0.71035337448120117188f,
+0.70275473594665527344f,0.71143221855163574219f,0.70166260004043579102f,
+0.71250939369201660156f,0.70056879520416259766f,0.71358484029769897461f,
+0.69947332143783569336f,0.71465867757797241211f,0.69837623834609985352f,
+0.71573084592819213867f,0.69727748632431030273f,0.71680128574371337891f,
+0.69617712497711181641f,0.71787005662918090820f,0.69507509469985961914f,
+0.71893709897994995117f,0.69397145509719848633f,0.72000253200531005859f,
+0.69286614656448364258f,0.72106617689132690430f,0.69175922870635986328f,
+0.72212821245193481445f,0.69065070152282714844f,0.72318845987319946289f,
+0.68954056501388549805f,0.72424709796905517578f,0.68842875957489013672f,
+0.72530394792556762695f,0.68731534481048583984f,0.72635912895202636719f,
+0.68620032072067260742f,0.72741264104843139648f,0.68508368730545043945f,
+0.72846436500549316406f,0.68396538496017456055f,0.72951442003250122070f,
+0.68284553289413452148f,0.73056274652481079102f,0.68172407150268554688f,
+0.73160940408706665039f,0.68060100078582763672f,0.73265427350997924805f,
+0.67947632074356079102f,0.73369741439819335938f,0.67835003137588500977f,
+0.73473888635635375977f,0.67722219228744506836f,0.73577857017517089844f,
+0.67609268426895141602f,0.73681658506393432617f,0.67496162652969360352f,
+0.73785281181335449219f,0.67382901906967163086f,0.73888731002807617188f,
+0.67269474267959594727f,0.73992007970809936523f,0.67155897617340087891f,
+0.74095112085342407227f,0.67042154073715209961f,0.74198043346405029297f,
+0.66928261518478393555f,0.74300795793533325195f,0.66814202070236206055f,
+0.74403375387191772461f,0.66699993610382080078f,0.74505776166915893555f,
+0.66585624217987060547f,0.74608010053634643555f,0.66471099853515625000f,
+0.74710059165954589844f,0.66356414556503295898f,0.74811935424804687500f,
+0.66241580247879028320f,0.74913638830184936523f,0.66126585006713867188f,
+0.75015163421630859375f,0.66011434793472290039f,0.75116515159606933594f,
+0.65896129608154296875f,0.75217682123184204102f,0.65780669450759887695f,
+0.75318682193756103516f,0.65665054321289062500f,0.75419497489929199219f,
+0.65549284219741821289f,0.75520139932632446289f,0.65433359146118164062f,
+0.75620597600936889648f,0.65317285060882568359f,0.75720882415771484375f,
+0.65201056003570556641f,0.75820988416671752930f,0.65084666013717651367f,
+0.75920921564102172852f,0.64968132972717285156f,0.76020669937133789062f,
+0.64851438999176025391f,0.76120239496231079102f,0.64734596014022827148f,
+0.76219630241394042969f,0.64617604017257690430f,0.76318842172622680664f,
+0.64500451087951660156f,0.76417875289916992188f,0.64383155107498168945f,
+0.76516723632812500000f,0.64265704154968261719f,0.76615399122238159180f,
+0.64148104190826416016f,0.76713889837265014648f,0.64030349254608154297f,
+0.76812201738357543945f,0.63912445306777954102f,0.76910334825515747070f,
+0.63794392347335815430f,0.77008283138275146484f,0.63676184415817260742f,
+0.77106052637100219727f,0.63557833433151245117f,0.77203637361526489258f,
+0.63439327478408813477f,0.77301043272018432617f,0.63320678472518920898f,
+0.77398270368576049805f,0.63201874494552612305f,0.77495312690734863281f,
+0.63082921504974365234f,0.77592170238494873047f,0.62963825464248657227f,
+0.77688848972320556641f,0.62844574451446533203f,0.77785342931747436523f,
+0.62725180387496948242f,0.77881652116775512695f,0.62605637311935424805f,
+0.77977776527404785156f,0.62485951185226440430f,0.78073722124099731445f,
+0.62366110086441040039f,0.78169482946395874023f,0.62246125936508178711f,
+0.78265058994293212891f,0.62125998735427856445f,0.78360450267791748047f,
+0.62005722522735595703f,0.78455656766891479492f,0.61885297298431396484f,
+0.78550684452056884766f,0.61764729022979736328f,0.78645521402359008789f,
+0.61644017696380615234f,0.78740173578262329102f,0.61523157358169555664f,
+0.78834640979766845703f,0.61402153968811035156f,0.78928923606872558594f,
+0.61281007528305053711f,0.79023021459579467773f,0.61159718036651611328f,
+0.79116934537887573242f,0.61038279533386230469f,0.79210656881332397461f,
+0.60916703939437866211f,0.79304194450378417969f,0.60794979333877563477f,
+0.79397547245025634766f,0.60673111677169799805f,0.79490715265274047852f,
+0.60551106929779052734f,0.79583692550659179688f,0.60428953170776367188f,
+0.79676479101181030273f,0.60306662321090698242f,0.79769086837768554688f,
+0.60184222459793090820f,0.79861497879028320312f,0.60061645507812500000f,
+0.79953724145889282227f,0.59938931465148925781f,0.80045765638351440430f,
+0.59816068410873413086f,0.80137616395950317383f,0.59693068265914916992f,
+0.80229282379150390625f,0.59569931030273437500f,0.80320751667022705078f,
+0.59446650743484497070f,0.80412036180496215820f,0.59323227405548095703f,
+0.80503135919570922852f,0.59199666976928710938f,0.80594038963317871094f,
+0.59075969457626342773f,0.80684757232666015625f,0.58952128887176513672f,
+0.80775284767150878906f,0.58828157186508178711f,0.80865615606307983398f,
+0.58704036474227905273f,0.80955761671066284180f,0.58579784631729125977f,
+0.81045717000961303711f,0.58455395698547363281f,0.81135487556457519531f,
+0.58330863714218139648f,0.81225061416625976562f,0.58206200599670410156f,
+0.81314438581466674805f,0.58081394433975219727f,0.81403630971908569336f,
+0.57956457138061523438f,0.81492632627487182617f,0.57831376791000366211f,
+0.81581443548202514648f,0.57706165313720703125f,0.81670057773590087891f,
+0.57580816745758056641f,0.81758481264114379883f,0.57455337047576904297f,
+0.81846714019775390625f,0.57329714298248291016f,0.81934750080108642578f,
+0.57203960418701171875f,0.82022595405578613281f,0.57078075408935546875f,
+0.82110249996185302734f,0.56952053308486938477f,0.82197713851928710938f,
+0.56825894117355346680f,0.82284981012344360352f,0.56699603796005249023f,
+0.82372051477432250977f,0.56573182344436645508f,0.82458931207656860352f,
+0.56446623802185058594f,0.82545614242553710938f,0.56319934129714965820f,
+0.82632106542587280273f,0.56193113327026367188f,0.82718402147293090820f,
+0.56066155433654785156f,0.82804507017135620117f,0.55939072370529174805f,
+0.82890409231185913086f,0.55811852216720581055f,0.82976120710372924805f,
+0.55684500932693481445f,0.83061641454696655273f,0.55557024478912353516f,
+0.83146959543228149414f,0.55429410934448242188f,0.83232086896896362305f,
+0.55301672220230102539f,0.83317017555236816406f,0.55173796415328979492f,
+0.83401751518249511719f,0.55045795440673828125f,0.83486288785934448242f,
+0.54917663335800170898f,0.83570629358291625977f,0.54789406061172485352f,
+0.83654773235321044922f,0.54661017656326293945f,0.83738720417022705078f,
+0.54532498121261596680f,0.83822470903396606445f,0.54403853416442871094f,
+0.83906024694442749023f,0.54275077581405639648f,0.83989381790161132812f,
+0.54146176576614379883f,0.84072536230087280273f,0.54017144441604614258f,
+0.84155499935150146484f,0.53887993097305297852f,0.84238260984420776367f,
+0.53758704662322998047f,0.84320825338363647461f,0.53629297018051147461f,
+0.84403187036514282227f,0.53499764204025268555f,0.84485357999801635742f,
+0.53370100259780883789f,0.84567326307296752930f,0.53240311145782470703f,
+0.84649091958999633789f,0.53110402822494506836f,0.84730660915374755859f,
+0.52980363368988037109f,0.84812033176422119141f,0.52850198745727539062f,
+0.84893202781677246094f,0.52719914913177490234f,0.84974175691604614258f,
+0.52589499950408935547f,0.85054945945739746094f,0.52458965778350830078f,
+0.85135519504547119141f,0.52328312397003173828f,0.85215890407562255859f,
+0.52197527885437011719f,0.85296058654785156250f,0.52066624164581298828f,
+0.85376030206680297852f,0.51935601234436035156f,0.85455799102783203125f,
+0.51804453134536743164f,0.85535365343093872070f,0.51673179864883422852f,
+0.85614734888076782227f,0.51541787385940551758f,0.85693895816802978516f,
+0.51410275697708129883f,0.85772860050201416016f,0.51278638839721679688f,
+0.85851621627807617188f,0.51146882772445678711f,0.85930180549621582031f,
+0.51015007495880126953f,0.86008536815643310547f,0.50883013010025024414f,
+0.86086696386337280273f,0.50750899314880371094f,0.86164647340774536133f,
+0.50618666410446166992f,0.86242395639419555664f,0.50486308336257934570f,
+0.86319941282272338867f,0.50353837013244628906f,0.86397284269332885742f,
+0.50221246480941772461f,0.86474424600601196289f,0.50088536739349365234f,
+0.86551362276077270508f,0.49955710768699645996f,0.86628097295761108398f,
+0.49822765588760375977f,0.86704623699188232422f,0.49689704179763793945f,
+0.86780947446823120117f,0.49556526541709899902f,0.86857068538665771484f,
+0.49423229694366455078f,0.86932986974716186523f,0.49289819598197937012f,
+0.87008696794509887695f,0.49156290292739868164f,0.87084203958511352539f,
+0.49022647738456726074f,0.87159508466720581055f,0.48888888955116271973f,
+0.87234604358673095703f,0.48755016922950744629f,0.87309497594833374023f,
+0.48621028661727905273f,0.87384182214736938477f,0.48486924171447753906f,
+0.87458664178848266602f,0.48352706432342529297f,0.87532937526702880859f,
+0.48218378424644470215f,0.87607008218765258789f,0.48083934187889099121f,
+0.87680870294570922852f,0.47949376702308654785f,0.87754529714584350586f,
+0.47814705967903137207f,0.87827980518341064453f,0.47679921984672546387f,
+0.87901222705841064453f,0.47545027732849121094f,0.87974262237548828125f,
+0.47410020232200622559f,0.88047087192535400391f,0.47274902462959289551f,
+0.88119709491729736328f,0.47139674425125122070f,0.88192129135131835938f,
+0.47004333138465881348f,0.88264334201812744141f,0.46868881583213806152f,
+0.88336336612701416016f,0.46733319759368896484f,0.88408124446868896484f,
+0.46597650647163391113f,0.88479709625244140625f,0.46461868286132812500f,
+0.88551086187362670898f,0.46325978636741638184f,0.88622254133224487305f,
+0.46189978718757629395f,0.88693213462829589844f,0.46053871512413024902f,
+0.88763964176177978516f,0.45917654037475585938f,0.88834506273269653320f,
+0.45781329274177551270f,0.88904833793640136719f,0.45644897222518920898f,
+0.88974958658218383789f,0.45508357882499694824f,0.89044874906539916992f,
+0.45371711254119873047f,0.89114576578140258789f,0.45234957337379455566f,
+0.89184069633483886719f,0.45098099112510681152f,0.89253354072570800781f,
+0.44961133599281311035f,0.89322429895401000977f,0.44824060797691345215f,
+0.89391297101974487305f,0.44686883687973022461f,0.89459949731826782227f,
+0.44549602270126342773f,0.89528393745422363281f,0.44412213563919067383f,
+0.89596623182296752930f,0.44274723529815673828f,0.89664649963378906250f,
+0.44137126207351684570f,0.89732456207275390625f,0.43999427556991577148f,
+0.89800059795379638672f,0.43861624598503112793f,0.89867448806762695312f,
+0.43723717331886291504f,0.89934623241424560547f,0.43585708737373352051f,
+0.90001589059829711914f,0.43447595834732055664f,0.90068340301513671875f,
+0.43309381604194641113f,0.90134882926940917969f,0.43171066045761108398f,
+0.90201216936111450195f,0.43032649159431457520f,0.90267330408096313477f,
+0.42894127964973449707f,0.90333235263824462891f,0.42755508422851562500f,
+0.90398931503295898438f,0.42616787552833557129f,0.90464407205581665039f,
+0.42477968335151672363f,0.90529674291610717773f,0.42339047789573669434f,
+0.90594726800918579102f,0.42200025916099548340f,0.90659570693969726562f,
+0.42060908675193786621f,0.90724200010299682617f,0.41921690106391906738f,
+0.90788608789443969727f,0.41782370209693908691f,0.90852808952331542969f,
+0.41642954945564270020f,0.90916800498962402344f,0.41503441333770751953f,
+0.90980571508407592773f,0.41363832354545593262f,0.91044127941131591797f,
+0.41224122047424316406f,0.91107475757598876953f,0.41084316372871398926f,
+0.91170603036880493164f,0.40944415330886840820f,0.91233515739440917969f,
+0.40804415941238403320f,0.91296219825744628906f,0.40664321184158325195f,
+0.91358703374862670898f,0.40524131059646606445f,0.91420978307723999023f,
+0.40383845567703247070f,0.91483032703399658203f,0.40243464708328247070f,
+0.91544872522354125977f,0.40102988481521606445f,0.91606497764587402344f,
+0.39962419867515563965f,0.91667908430099487305f,0.39821755886077880859f,
+0.91729098558425903320f,0.39680999517440795898f,0.91790080070495605469f,
+0.39540147781372070312f,0.91850841045379638672f,0.39399203658103942871f,
+0.91911387443542480469f,0.39258167147636413574f,0.91971713304519653320f,
+0.39117038249969482422f,0.92031830549240112305f,0.38975816965103149414f,
+0.92091721296310424805f,0.38834503293037414551f,0.92151403427124023438f,
+0.38693100214004516602f,0.92210865020751953125f,0.38551604747772216797f,
+0.92270112037658691406f,0.38410019874572753906f,0.92329144477844238281f,
+0.38268342614173889160f,0.92387950420379638672f,0.38126575946807861328f,
+0.92446547746658325195f,0.37984719872474670410f,0.92504924535751342773f,
+0.37842774391174316406f,0.92563080787658691406f,0.37700742483139038086f,
+0.92621022462844848633f,0.37558618187904357910f,0.92678749561309814453f,
+0.37416407465934753418f,0.92736250162124633789f,0.37274107336997985840f,
+0.92793542146682739258f,0.37131720781326293945f,0.92850607633590698242f,
+0.36989244818687438965f,0.92907458543777465820f,0.36846682429313659668f,
+0.92964088916778564453f,0.36704033613204956055f,0.93020504713058471680f,
+0.36561298370361328125f,0.93076694011688232422f,0.36418479681015014648f,
+0.93132668733596801758f,0.36275571584701538086f,0.93188428878784179688f,
+0.36132580041885375977f,0.93243962526321411133f,0.35989505052566528320f,
+0.93299281597137451172f,0.35846340656280517578f,0.93354380130767822266f,
+0.35703095793724060059f,0.93409252166748046875f,0.35559767484664916992f,
+0.93463915586471557617f,0.35416352748870849609f,0.93518352508544921875f,
+0.35272854566574096680f,0.93572568893432617188f,0.35129275918006896973f,
+0.93626564741134643555f,0.34985613822937011719f,0.93680346012115478516f,
+0.34841868281364440918f,0.93733900785446166992f,0.34698042273521423340f,
+0.93787235021591186523f,0.34554132819175720215f,0.93840354681015014648f,
+0.34410142898559570312f,0.93893247842788696289f,0.34266072511672973633f,
+0.93945920467376708984f,0.34121921658515930176f,0.93998372554779052734f,
+0.33977687358856201172f,0.94050604104995727539f,0.33833375573158264160f,
+0.94102615118026733398f,0.33688986301422119141f,0.94154405593872070312f,
+0.33544513583183288574f,0.94205975532531738281f,0.33399966359138488770f,
+0.94257318973541259766f,0.33255335688591003418f,0.94308441877365112305f,
+0.33110630512237548828f,0.94359344244003295898f,0.32965844869613647461f,
+0.94410026073455810547f,0.32820984721183776855f,0.94460481405258178711f,
+0.32676044106483459473f,0.94510722160339355469f,0.32531028985977172852f,
+0.94560730457305908203f,0.32385936379432678223f,0.94610524177551269531f,
+0.32240769267082214355f,0.94660091400146484375f,0.32095524668693542480f,
+0.94709438085556030273f,0.31950202584266662598f,0.94758558273315429688f,
+0.31804808974266052246f,0.94807457923889160156f,0.31659337878227233887f,
+0.94856137037277221680f,0.31513792276382446289f,0.94904589653015136719f,
+0.31368175148963928223f,0.94952815771102905273f,0.31222480535507202148f,
+0.95000827312469482422f,0.31076714396476745605f,0.95048606395721435547f,
+0.30930876731872558594f,0.95096164941787719727f,0.30784964561462402344f,
+0.95143502950668334961f,0.30638980865478515625f,0.95190614461898803711f,
+0.30492922663688659668f,0.95237499475479125977f,0.30346795916557312012f,
+0.95284163951873779297f,0.30200594663619995117f,0.95330601930618286133f,
+0.30054324865341186523f,0.95376819372177124023f,0.29907983541488647461f,
+0.95422810316085815430f,0.29761570692062377930f,0.95468574762344360352f,
+0.29615089297294616699f,0.95514118671417236328f,0.29468536376953125000f,
+0.95559436082839965820f,0.29321914911270141602f,0.95604526996612548828f,
+0.29175224900245666504f,0.95649391412734985352f,0.29028466343879699707f,
+0.95694035291671752930f,0.28881642222404479980f,0.95738452672958374023f,
+0.28734746575355529785f,0.95782643556594848633f,0.28587782382965087891f,
+0.95826607942581176758f,0.28440752625465393066f,0.95870345830917358398f,
+0.28293657302856445312f,0.95913863182067871094f,0.28146493434906005859f,
+0.95957154035568237305f,0.27999264001846313477f,0.96000212430953979492f,
+0.27851969003677368164f,0.96043050289154052734f,0.27704608440399169922f,
+0.96085661649703979492f,0.27557182312011718750f,0.96128046512603759766f,
+0.27409690618515014648f,0.96170204877853393555f,0.27262136340141296387f,
+0.96212142705917358398f,0.27114516496658325195f,0.96253848075866699219f,
+0.26966831088066101074f,0.96295326948165893555f,0.26819086074829101562f,
+0.96336579322814941406f,0.26671275496482849121f,0.96377605199813842773f,
+0.26523402333259582520f,0.96418404579162597656f,0.26375466585159301758f,
+0.96458977460861206055f,0.26227471232414245605f,0.96499323844909667969f,
+0.26079410314559936523f,0.96539443731307983398f,0.25931292772293090820f,
+0.96579337120056152344f,0.25783109664916992188f,0.96618998050689697266f,
+0.25634866952896118164f,0.96658438444137573242f,0.25486564636230468750f,
+0.96697646379470825195f,0.25338202714920043945f,0.96736627817153930664f,
+0.25189781188964843750f,0.96775382757186889648f,0.25041300058364868164f,
+0.96813911199569702148f,0.24892760813236236572f,0.96852207183837890625f,
+0.24744161963462829590f,0.96890282630920410156f,0.24595504999160766602f,
+0.96928125619888305664f,0.24446789920330047607f,0.96965736150741577148f,
+0.24298018217086791992f,0.97003126144409179688f,0.24149188399314880371f,
+0.97040283679962158203f,0.24000301957130432129f,0.97077214717864990234f,
+0.23851358890533447266f,0.97113913297653198242f,0.23702360689640045166f,
+0.97150391340255737305f,0.23553305864334106445f,0.97186630964279174805f,
+0.23404195904731750488f,0.97222650051116943359f,0.23255030810832977295f,
+0.97258436679840087891f,0.23105810582637786865f,0.97293996810913085938f,
+0.22956536710262298584f,0.97329324483871459961f,0.22807207703590393066f,
+0.97364425659179687500f,0.22657826542854309082f,0.97399294376373291016f,
+0.22508391737937927246f,0.97433936595916748047f,0.22358903288841247559f,
+0.97468352317810058594f,0.22209362685680389404f,0.97502535581588745117f,
+0.22059768438339233398f,0.97536486387252807617f,0.21910123527050018311f,
+0.97570210695266723633f,0.21760427951812744141f,0.97603708505630493164f,
+0.21610680222511291504f,0.97636973857879638672f,0.21460881829261779785f,
+0.97670006752014160156f,0.21311031281948089600f,0.97702813148498535156f,
+0.21161133050918579102f,0.97735387086868286133f,0.21011184155941009521f,
+0.97767734527587890625f,0.20861184597015380859f,0.97799849510192871094f,
+0.20711137354373931885f,0.97831737995147705078f,0.20561040937900543213f,
+0.97863394021987915039f,0.20410896837711334229f,0.97894817590713500977f,
+0.20260703563690185547f,0.97926014661788940430f,0.20110464096069335938f,
+0.97956979274749755859f,0.19960175454616546631f,0.97987711429595947266f,
+0.19809840619564056396f,0.98018211126327514648f,0.19659459590911865234f,
+0.98048484325408935547f,0.19509032368659973145f,0.98078525066375732422f,
+0.19358558952808380127f,0.98108339309692382812f,0.19208039343357086182f,
+0.98137921094894409180f,0.19057475030422210693f,0.98167270421981811523f,
+0.18906866014003753662f,0.98196387290954589844f,0.18756212294101715088f,
+0.98225271701812744141f,0.18605515360832214355f,0.98253929615020751953f,
+0.18454773724079132080f,0.98282355070114135742f,0.18303988873958587646f,
+0.98310548067092895508f,0.18153160810470581055f,0.98338508605957031250f,
+0.18002289533615112305f,0.98366242647171020508f,0.17851376533508300781f,
+0.98393744230270385742f,0.17700421810150146484f,0.98421007394790649414f,
+0.17549425363540649414f,0.98448044061660766602f,0.17398387193679809570f,
+0.98474848270416259766f,0.17247308790683746338f,0.98501425981521606445f,
+0.17096188664436340332f,0.98527765274047851562f,0.16945029795169830322f,
+0.98553872108459472656f,0.16793829202651977539f,0.98579752445220947266f,
+0.16642589867115020752f,0.98605394363403320312f,0.16491311788558959961f,
+0.98630809783935546875f,0.16339994966983795166f,0.98655992746353149414f,
+0.16188639402389526367f,0.98680937290191650391f,0.16037245094776153564f,
+0.98705655336380004883f,0.15885815024375915527f,0.98730140924453735352f,
+0.15734346210956573486f,0.98754394054412841797f,0.15582840144634246826f,
+0.98778414726257324219f,0.15431296825408935547f,0.98802202939987182617f,
+0.15279719233512878418f,0.98825758695602416992f,0.15128104388713836670f,
+0.98849081993103027344f,0.14976453781127929688f,0.98872166872024536133f,
+0.14824767410755157471f,0.98895025253295898438f,0.14673046767711639404f,
+0.98917651176452636719f,0.14521291851997375488f,0.98940044641494750977f,
+0.14369502663612365723f,0.98962199687957763672f,0.14217680692672729492f,
+0.98984128236770629883f,0.14065824449062347412f,0.99005818367004394531f,
+0.13913933932781219482f,0.99027281999588012695f,0.13762012124061584473f,
+0.99048507213592529297f,0.13610057532787322998f,0.99069499969482421875f,
+0.13458070158958435059f,0.99090266227722167969f,0.13306052982807159424f,
+0.99110794067382812500f,0.13154003024101257324f,0.99131083488464355469f,
+0.13001921772956848145f,0.99151146411895751953f,0.12849810719490051270f,
+0.99170976877212524414f,0.12697669863700866699f,0.99190568923950195312f,
+0.12545497715473175049f,0.99209928512573242188f,0.12393297255039215088f,
+0.99229061603546142578f,0.12241067737340927124f,0.99247956275939941406f,
+0.12088808417320251465f,0.99266612529754638672f,0.11936521530151367188f,
+0.99285042285919189453f,0.11784206330776214600f,0.99303233623504638672f,
+0.11631862819194793701f,0.99321192502975463867f,0.11479492485523223877f,
+0.99338918924331665039f,0.11327095329761505127f,0.99356412887573242188f,
+0.11174671351909637451f,0.99373674392700195312f,0.11022220551967620850f,
+0.99390697479248046875f,0.10869744420051574707f,0.99407488107681274414f,
+0.10717242211103439331f,0.99424046277999877930f,0.10564715415239334106f,
+0.99440366029739379883f,0.10412163287401199341f,0.99456459283828735352f,
+0.10259586572647094727f,0.99472314119338989258f,0.10106986016035079956f,
+0.99487930536270141602f,0.09954361617565155029f,0.99503320455551147461f,
+0.09801714122295379639f,0.99518471956253051758f,0.09649042785167694092f,
+0.99533390998840332031f,0.09496349841356277466f,0.99548077583312988281f,
+0.09343633800745010376f,0.99562525749206542969f,0.09190895408391952515f,
+0.99576741456985473633f,0.09038136154413223267f,0.99590724706649780273f,
+0.08885355293750762939f,0.99604469537734985352f,0.08732553571462631226f,
+0.99617981910705566406f,0.08579730987548828125f,0.99631261825561523438f,
+0.08426889032125473022f,0.99644303321838378906f,0.08274026215076446533f,
+0.99657112360000610352f,0.08121144771575927734f,0.99669688940048217773f,
+0.07968243956565856934f,0.99682027101516723633f,0.07815324515104293823f,
+0.99694132804870605469f,0.07662386447191238403f,0.99706006050109863281f,
+0.07509429752826690674f,0.99717640876770019531f,0.07356456667184829712f,
+0.99729043245315551758f,0.07203464955091476440f,0.99740213155746459961f,
+0.07050457596778869629f,0.99751144647598266602f,0.06897433102130889893f,
+0.99761843681335449219f,0.06744392216205596924f,0.99772304296493530273f,
+0.06591334939002990723f,0.99782532453536987305f,0.06438262760639190674f,
+0.99792528152465820312f,0.06285175681114196777f,0.99802285432815551758f,
+0.06132073700428009033f,0.99811810255050659180f,0.05978957191109657288f,
+0.99821102619171142578f,0.05825826525688171387f,0.99830156564712524414f,
+0.05672682076692581177f,0.99838972091674804688f,0.05519524589180946350f,
+0.99847555160522460938f,0.05366353690624237061f,0.99855905771255493164f,
+0.05213170498609542847f,0.99864023923873901367f,0.05059975013136863708f,
+0.99871903657913208008f,0.04906767606735229492f,0.99879544973373413086f,
+0.04753548279404640198f,0.99886953830718994141f,0.04600318148732185364f,
+0.99894130229949951172f,0.04447077214717864990f,0.99901068210601806641f,
+0.04293825849890708923f,0.99907773733139038086f,0.04140564054250717163f,
+0.99914240837097167969f,0.03987292572855949402f,0.99920475482940673828f,
+0.03834012150764465332f,0.99926477670669555664f,0.03680722415447235107f,
+0.99932235479354858398f,0.03527423739433288574f,0.99937766790390014648f,
+0.03374117240309715271f,0.99943059682846069336f,0.03220802545547485352f,
+0.99948120117187500000f,0.03067480400204658508f,0.99952942132949829102f,
+0.02914150804281234741f,0.99957531690597534180f,0.02760814502835273743f,
+0.99961882829666137695f,0.02607471868395805359f,0.99966001510620117188f,
+0.02454122900962829590f,0.99969881772994995117f,0.02300768159329891205f,
+0.99973529577255249023f,0.02147408016026020050f,0.99976938962936401367f,
+0.01994042843580245972f,0.99980115890502929688f,0.01840673014521598816f,
+0.99983060359954833984f,0.01687298715114593506f,0.99985766410827636719f,
+0.01533920597285032272f,0.99988234043121337891f,0.01380538847297430038f,
+0.99990469217300415039f,0.01227153837680816650f,0.99992471933364868164f,
+0.01073765940964221954f,0.99994236230850219727f,0.00920375436544418335f,
+0.99995762109756469727f,0.00766982883214950562f,0.99997061491012573242f,
+0.00613588467240333557f,0.99998116493225097656f,0.00460192607715725899f,
+0.99998939037322998047f,0.00306795677170157433f,0.99999529123306274414f,
+0.00153398013208061457f,0.99999880790710449219f,1.00000000000000000000f,
+0.00000000000000000000f,0.99998116493225097656f,0.00613588467240333557f,
+0.99992471933364868164f,0.01227153837680816650f,0.99983060359954833984f,
+0.01840673014521598816f,0.99969881772994995117f,0.02454122900962829590f,
+0.99952942132949829102f,0.03067480400204658508f,0.99932235479354858398f,
+0.03680722415447235107f,0.99907773733139038086f,0.04293825849890708923f,
+0.99879544973373413086f,0.04906767606735229492f,0.99847555160522460938f,
+0.05519524589180946350f,0.99811810255050659180f,0.06132073700428009033f,
+0.99772304296493530273f,0.06744392216205596924f,0.99729043245315551758f,
+0.07356456667184829712f,0.99682027101516723633f,0.07968243956565856934f,
+0.99631261825561523438f,0.08579730987548828125f,0.99576741456985473633f,
+0.09190895408391952515f,0.99518471956253051758f,0.09801714122295379639f,
+0.99456459283828735352f,0.10412163287401199341f,0.99390697479248046875f,
+0.11022220551967620850f,0.99321192502975463867f,0.11631862819194793701f,
+0.99247956275939941406f,0.12241067737340927124f,0.99170976877212524414f,
+0.12849810719490051270f,0.99090266227722167969f,0.13458070158958435059f,
+0.99005818367004394531f,0.14065824449062347412f,0.98917651176452636719f,
+0.14673046767711639404f,0.98825758695602416992f,0.15279719233512878418f,
+0.98730140924453735352f,0.15885815024375915527f,0.98630809783935546875f,
+0.16491311788558959961f,0.98527765274047851562f,0.17096188664436340332f,
+0.98421007394790649414f,0.17700421810150146484f,0.98310548067092895508f,
+0.18303988873958587646f,0.98196387290954589844f,0.18906866014003753662f,
+0.98078525066375732422f,0.19509032368659973145f,0.97956979274749755859f,
+0.20110464096069335938f,0.97831737995147705078f,0.20711137354373931885f,
+0.97702813148498535156f,0.21311031281948089600f,0.97570210695266723633f,
+0.21910123527050018311f,0.97433936595916748047f,0.22508391737937927246f,
+0.97293996810913085938f,0.23105810582637786865f,0.97150391340255737305f,
+0.23702360689640045166f,0.97003126144409179688f,0.24298018217086791992f,
+0.96852207183837890625f,0.24892760813236236572f,0.96697646379470825195f,
+0.25486564636230468750f,0.96539443731307983398f,0.26079410314559936523f,
+0.96377605199813842773f,0.26671275496482849121f,0.96212142705917358398f,
+0.27262136340141296387f,0.96043050289154052734f,0.27851969003677368164f,
+0.95870345830917358398f,0.28440752625465393066f,0.95694035291671752930f,
+0.29028466343879699707f,0.95514118671417236328f,0.29615089297294616699f,
+0.95330601930618286133f,0.30200594663619995117f,0.95143502950668334961f,
+0.30784964561462402344f,0.94952815771102905273f,0.31368175148963928223f,
+0.94758558273315429688f,0.31950202584266662598f,0.94560730457305908203f,
+0.32531028985977172852f,0.94359344244003295898f,0.33110630512237548828f,
+0.94154405593872070312f,0.33688986301422119141f,0.93945920467376708984f,
+0.34266072511672973633f,0.93733900785446166992f,0.34841868281364440918f,
+0.93518352508544921875f,0.35416352748870849609f,0.93299281597137451172f,
+0.35989505052566528320f,0.93076694011688232422f,0.36561298370361328125f,
+0.92850607633590698242f,0.37131720781326293945f,0.92621022462844848633f,
+0.37700742483139038086f,0.92387950420379638672f,0.38268342614173889160f,
+0.92151403427124023438f,0.38834503293037414551f,0.91911387443542480469f,
+0.39399203658103942871f,0.91667908430099487305f,0.39962419867515563965f,
+0.91420978307723999023f,0.40524131059646606445f,0.91170603036880493164f,
+0.41084316372871398926f,0.90916800498962402344f,0.41642954945564270020f,
+0.90659570693969726562f,0.42200025916099548340f,0.90398931503295898438f,
+0.42755508422851562500f,0.90134882926940917969f,0.43309381604194641113f,
+0.89867448806762695312f,0.43861624598503112793f,0.89596623182296752930f,
+0.44412213563919067383f,0.89322429895401000977f,0.44961133599281311035f,
+0.89044874906539916992f,0.45508357882499694824f,0.88763964176177978516f,
+0.46053871512413024902f,0.88479709625244140625f,0.46597650647163391113f,
+0.88192129135131835938f,0.47139674425125122070f,0.87901222705841064453f,
+0.47679921984672546387f,0.87607008218765258789f,0.48218378424644470215f,
+0.87309497594833374023f,0.48755016922950744629f,0.87008696794509887695f,
+0.49289819598197937012f,0.86704623699188232422f,0.49822765588760375977f,
+0.86397284269332885742f,0.50353837013244628906f,0.86086696386337280273f,
+0.50883013010025024414f,0.85772860050201416016f,0.51410275697708129883f,
+0.85455799102783203125f,0.51935601234436035156f,0.85135519504547119141f,
+0.52458965778350830078f,0.84812033176422119141f,0.52980363368988037109f,
+0.84485357999801635742f,0.53499764204025268555f,0.84155499935150146484f,
+0.54017144441604614258f,0.83822470903396606445f,0.54532498121261596680f,
+0.83486288785934448242f,0.55045795440673828125f,0.83146959543228149414f,
+0.55557024478912353516f,0.82804507017135620117f,0.56066155433654785156f,
+0.82458931207656860352f,0.56573182344436645508f,0.82110249996185302734f,
+0.57078075408935546875f,0.81758481264114379883f,0.57580816745758056641f,
+0.81403630971908569336f,0.58081394433975219727f,0.81045717000961303711f,
+0.58579784631729125977f,0.80684757232666015625f,0.59075969457626342773f,
+0.80320751667022705078f,0.59569931030273437500f,0.79953724145889282227f,
+0.60061645507812500000f,0.79583692550659179688f,0.60551106929779052734f,
+0.79210656881332397461f,0.61038279533386230469f,0.78834640979766845703f,
+0.61523157358169555664f,0.78455656766891479492f,0.62005722522735595703f,
+0.78073722124099731445f,0.62485951185226440430f,0.77688848972320556641f,
+0.62963825464248657227f,0.77301043272018432617f,0.63439327478408813477f,
+0.76910334825515747070f,0.63912445306777954102f,0.76516723632812500000f,
+0.64383155107498168945f,0.76120239496231079102f,0.64851438999176025391f,
+0.75720882415771484375f,0.65317285060882568359f,0.75318682193756103516f,
+0.65780669450759887695f,0.74913638830184936523f,0.66241580247879028320f,
+0.74505776166915893555f,0.66699993610382080078f,0.74095112085342407227f,
+0.67155897617340087891f,0.73681658506393432617f,0.67609268426895141602f,
+0.73265427350997924805f,0.68060100078582763672f,0.72846436500549316406f,
+0.68508368730545043945f,0.72424709796905517578f,0.68954056501388549805f,
+0.72000253200531005859f,0.69397145509719848633f,0.71573084592819213867f,
+0.69837623834609985352f,0.71143221855163574219f,0.70275473594665527344f,
+0.70710676908493041992f,0.70710676908493041992f,0.70275473594665527344f,
+0.71143221855163574219f,0.69837623834609985352f,0.71573084592819213867f,
+0.69397145509719848633f,0.72000253200531005859f,0.68954056501388549805f,
+0.72424709796905517578f,0.68508368730545043945f,0.72846436500549316406f,
+0.68060100078582763672f,0.73265427350997924805f,0.67609268426895141602f,
+0.73681658506393432617f,0.67155897617340087891f,0.74095112085342407227f,
+0.66699993610382080078f,0.74505776166915893555f,0.66241580247879028320f,
+0.74913638830184936523f,0.65780669450759887695f,0.75318682193756103516f,
+0.65317285060882568359f,0.75720882415771484375f,0.64851438999176025391f,
+0.76120239496231079102f,0.64383155107498168945f,0.76516723632812500000f,
+0.63912445306777954102f,0.76910334825515747070f,0.63439327478408813477f,
+0.77301043272018432617f,0.62963825464248657227f,0.77688848972320556641f,
+0.62485951185226440430f,0.78073722124099731445f,0.62005722522735595703f,
+0.78455656766891479492f,0.61523157358169555664f,0.78834640979766845703f,
+0.61038279533386230469f,0.79210656881332397461f,0.60551106929779052734f,
+0.79583692550659179688f,0.60061645507812500000f,0.79953724145889282227f,
+0.59569931030273437500f,0.80320751667022705078f,0.59075969457626342773f,
+0.80684757232666015625f,0.58579784631729125977f,0.81045717000961303711f,
+0.58081394433975219727f,0.81403630971908569336f,0.57580816745758056641f,
+0.81758481264114379883f,0.57078075408935546875f,0.82110249996185302734f,
+0.56573182344436645508f,0.82458931207656860352f,0.56066155433654785156f,
+0.82804507017135620117f,0.55557024478912353516f,0.83146959543228149414f,
+0.55045795440673828125f,0.83486288785934448242f,0.54532498121261596680f,
+0.83822470903396606445f,0.54017144441604614258f,0.84155499935150146484f,
+0.53499764204025268555f,0.84485357999801635742f,0.52980363368988037109f,
+0.84812033176422119141f,0.52458965778350830078f,0.85135519504547119141f,
+0.51935601234436035156f,0.85455799102783203125f,0.51410275697708129883f,
+0.85772860050201416016f,0.50883013010025024414f,0.86086696386337280273f,
+0.50353837013244628906f,0.86397284269332885742f,0.49822765588760375977f,
+0.86704623699188232422f,0.49289819598197937012f,0.87008696794509887695f,
+0.48755016922950744629f,0.87309497594833374023f,0.48218378424644470215f,
+0.87607008218765258789f,0.47679921984672546387f,0.87901222705841064453f,
+0.47139674425125122070f,0.88192129135131835938f,0.46597650647163391113f,
+0.88479709625244140625f,0.46053871512413024902f,0.88763964176177978516f,
+0.45508357882499694824f,0.89044874906539916992f,0.44961133599281311035f,
+0.89322429895401000977f,0.44412213563919067383f,0.89596623182296752930f,
+0.43861624598503112793f,0.89867448806762695312f,0.43309381604194641113f,
+0.90134882926940917969f,0.42755508422851562500f,0.90398931503295898438f,
+0.42200025916099548340f,0.90659570693969726562f,0.41642954945564270020f,
+0.90916800498962402344f,0.41084316372871398926f,0.91170603036880493164f,
+0.40524131059646606445f,0.91420978307723999023f,0.39962419867515563965f,
+0.91667908430099487305f,0.39399203658103942871f,0.91911387443542480469f,
+0.38834503293037414551f,0.92151403427124023438f,0.38268342614173889160f,
+0.92387950420379638672f,0.37700742483139038086f,0.92621022462844848633f,
+0.37131720781326293945f,0.92850607633590698242f,0.36561298370361328125f,
+0.93076694011688232422f,0.35989505052566528320f,0.93299281597137451172f,
+0.35416352748870849609f,0.93518352508544921875f,0.34841868281364440918f,
+0.93733900785446166992f,0.34266072511672973633f,0.93945920467376708984f,
+0.33688986301422119141f,0.94154405593872070312f,0.33110630512237548828f,
+0.94359344244003295898f,0.32531028985977172852f,0.94560730457305908203f,
+0.31950202584266662598f,0.94758558273315429688f,0.31368175148963928223f,
+0.94952815771102905273f,0.30784964561462402344f,0.95143502950668334961f,
+0.30200594663619995117f,0.95330601930618286133f,0.29615089297294616699f,
+0.95514118671417236328f,0.29028466343879699707f,0.95694035291671752930f,
+0.28440752625465393066f,0.95870345830917358398f,0.27851969003677368164f,
+0.96043050289154052734f,0.27262136340141296387f,0.96212142705917358398f,
+0.26671275496482849121f,0.96377605199813842773f,0.26079410314559936523f,
+0.96539443731307983398f,0.25486564636230468750f,0.96697646379470825195f,
+0.24892760813236236572f,0.96852207183837890625f,0.24298018217086791992f,
+0.97003126144409179688f,0.23702360689640045166f,0.97150391340255737305f,
+0.23105810582637786865f,0.97293996810913085938f,0.22508391737937927246f,
+0.97433936595916748047f,0.21910123527050018311f,0.97570210695266723633f,
+0.21311031281948089600f,0.97702813148498535156f,0.20711137354373931885f,
+0.97831737995147705078f,0.20110464096069335938f,0.97956979274749755859f,
+0.19509032368659973145f,0.98078525066375732422f,0.18906866014003753662f,
+0.98196387290954589844f,0.18303988873958587646f,0.98310548067092895508f,
+0.17700421810150146484f,0.98421007394790649414f,0.17096188664436340332f,
+0.98527765274047851562f,0.16491311788558959961f,0.98630809783935546875f,
+0.15885815024375915527f,0.98730140924453735352f,0.15279719233512878418f,
+0.98825758695602416992f,0.14673046767711639404f,0.98917651176452636719f,
+0.14065824449062347412f,0.99005818367004394531f,0.13458070158958435059f,
+0.99090266227722167969f,0.12849810719490051270f,0.99170976877212524414f,
+0.12241067737340927124f,0.99247956275939941406f,0.11631862819194793701f,
+0.99321192502975463867f,0.11022220551967620850f,0.99390697479248046875f,
+0.10412163287401199341f,0.99456459283828735352f,0.09801714122295379639f,
+0.99518471956253051758f,0.09190895408391952515f,0.99576741456985473633f,
+0.08579730987548828125f,0.99631261825561523438f,0.07968243956565856934f,
+0.99682027101516723633f,0.07356456667184829712f,0.99729043245315551758f,
+0.06744392216205596924f,0.99772304296493530273f,0.06132073700428009033f,
+0.99811810255050659180f,0.05519524589180946350f,0.99847555160522460938f,
+0.04906767606735229492f,0.99879544973373413086f,0.04293825849890708923f,
+0.99907773733139038086f,0.03680722415447235107f,0.99932235479354858398f,
+0.03067480400204658508f,0.99952942132949829102f,0.02454122900962829590f,
+0.99969881772994995117f,0.01840673014521598816f,0.99983060359954833984f,
+0.01227153837680816650f,0.99992471933364868164f,0.00613588467240333557f,
+0.99998116493225097656f,1.00000000000000000000f,0.00000000000000000000f,
+0.99969881772994995117f,0.02454122900962829590f,0.99879544973373413086f,
+0.04906767606735229492f,0.99729043245315551758f,0.07356456667184829712f,
+0.99518471956253051758f,0.09801714122295379639f,0.99247956275939941406f,
+0.12241067737340927124f,0.98917651176452636719f,0.14673046767711639404f,
+0.98527765274047851562f,0.17096188664436340332f,0.98078525066375732422f,
+0.19509032368659973145f,0.97570210695266723633f,0.21910123527050018311f,
+0.97003126144409179688f,0.24298018217086791992f,0.96377605199813842773f,
+0.26671275496482849121f,0.95694035291671752930f,0.29028466343879699707f,
+0.94952815771102905273f,0.31368175148963928223f,0.94154405593872070312f,
+0.33688986301422119141f,0.93299281597137451172f,0.35989505052566528320f,
+0.92387950420379638672f,0.38268342614173889160f,0.91420978307723999023f,
+0.40524131059646606445f,0.90398931503295898438f,0.42755508422851562500f,
+0.89322429895401000977f,0.44961133599281311035f,0.88192129135131835938f,
+0.47139674425125122070f,0.87008696794509887695f,0.49289819598197937012f,
+0.85772860050201416016f,0.51410275697708129883f,0.84485357999801635742f,
+0.53499764204025268555f,0.83146959543228149414f,0.55557024478912353516f,
+0.81758481264114379883f,0.57580816745758056641f,0.80320751667022705078f,
+0.59569931030273437500f,0.78834640979766845703f,0.61523157358169555664f,
+0.77301043272018432617f,0.63439327478408813477f,0.75720882415771484375f,
+0.65317285060882568359f,0.74095112085342407227f,0.67155897617340087891f,
+0.72424709796905517578f,0.68954056501388549805f,0.70710676908493041992f,
+0.70710676908493041992f,0.68954056501388549805f,0.72424709796905517578f,
+0.67155897617340087891f,0.74095112085342407227f,0.65317285060882568359f,
+0.75720882415771484375f,0.63439327478408813477f,0.77301043272018432617f,
+0.61523157358169555664f,0.78834640979766845703f,0.59569931030273437500f,
+0.80320751667022705078f,0.57580816745758056641f,0.81758481264114379883f,
+0.55557024478912353516f,0.83146959543228149414f,0.53499764204025268555f,
+0.84485357999801635742f,0.51410275697708129883f,0.85772860050201416016f,
+0.49289819598197937012f,0.87008696794509887695f,0.47139674425125122070f,
+0.88192129135131835938f,0.44961133599281311035f,0.89322429895401000977f,
+0.42755508422851562500f,0.90398931503295898438f,0.40524131059646606445f,
+0.91420978307723999023f,0.38268342614173889160f,0.92387950420379638672f,
+0.35989505052566528320f,0.93299281597137451172f,0.33688986301422119141f,
+0.94154405593872070312f,0.31368175148963928223f,0.94952815771102905273f,
+0.29028466343879699707f,0.95694035291671752930f,0.26671275496482849121f,
+0.96377605199813842773f,0.24298018217086791992f,0.97003126144409179688f,
+0.21910123527050018311f,0.97570210695266723633f,0.19509032368659973145f,
+0.98078525066375732422f,0.17096188664436340332f,0.98527765274047851562f,
+0.14673046767711639404f,0.98917651176452636719f,0.12241067737340927124f,
+0.99247956275939941406f,0.09801714122295379639f,0.99518471956253051758f,
+0.07356456667184829712f,0.99729043245315551758f,0.04906767606735229492f,
+0.99879544973373413086f,0.02454122900962829590f,0.99969881772994995117f,
+1.00000000000000000000f,0.00000000000000000000f,0.99518471956253051758f,
+0.09801714122295379639f,0.98078525066375732422f,0.19509032368659973145f,
+0.95694035291671752930f,0.29028466343879699707f,0.92387950420379638672f,
+0.38268342614173889160f,0.88192129135131835938f,0.47139674425125122070f,
+0.83146959543228149414f,0.55557024478912353516f,0.77301043272018432617f,
+0.63439327478408813477f,0.70710676908493041992f,0.70710676908493041992f,
+0.63439327478408813477f,0.77301043272018432617f,0.55557024478912353516f,
+0.83146959543228149414f,0.47139674425125122070f,0.88192129135131835938f,
+0.38268342614173889160f,0.92387950420379638672f,0.29028466343879699707f,
+0.95694035291671752930f,0.19509032368659973145f,0.98078525066375732422f,
+0.09801714122295379639f,0.99518471956253051758f,1.00000000000000000000f,
+0.00000000000000000000f,0.92387950420379638672f,0.38268342614173889160f,
+0.70710676908493041992f,0.70710676908493041992f,0.38268342614173889160f,
+0.92387950420379638672f,};
 
 float32_t rearranged_twiddle_stride2_4096_f32[2728]={
-1.00000000000000000000f,0.00000000000000000000f,0.99999529380957619118f,
-0.00306795676296597614f,0.99998117528260110909f,0.00613588464915447527f,
-0.99995764455196389786f,0.00920375478205981944f,0.99992470183914450299f,
-0.01227153828571992539f,0.99988234745421256111f,0.01533920628498810015f,
-0.99983058179582340319f,0.01840672990580482019f,0.99976940535121527898f,
-0.02147408027546950787f,0.99969881869620424997f,0.02454122852291228812f,
-0.99961882249517863830f,0.02760814577896573974f,0.99952941750109314256f,
-0.03067480317663662595f,0.99943060455546173237f,0.03374117185137757990f,
-0.99932238458834954375f,0.03680722294135883171f,0.99920475861836388631f,
-0.03987292758773981066f,0.99907772775264536147f,0.04293825693494082024f,
-0.99894129318685687124f,0.04600318213091462299f,0.99879545620517240501f,
-0.04906767432741801493f,0.99864021818026527111f,0.05213170468028332366f,
-0.99847558057329477421f,0.05519524434968993420f,0.99830154493389289261f,
-0.05825826450043575244f,0.99811811290014917919f,0.06132073630220857829f,
-0.99792528619859599548f,0.06438263092985746505f,0.99772306664419163624f,
-0.06744391956366405094f,0.99751145614030345410f,0.07050457338961385600f,
-0.99729045667869020697f,0.07356456359966742631f,0.99706007033948296225f,
-0.07662386139203149205f,0.99682029929116566791f,0.07968243797143012563f,
-0.99657114579055483539f,0.08274026454937569164f,0.99631261218277800129f,
-0.08579731234443989385f,0.99604470090125196702f,0.08885355258252460031f,
-0.99576741446765981713f,0.09190895649713272386f,0.99548075549192693856f,
-0.09496349532963899165f,0.99518472667219692873f,0.09801714032956060363f,
-0.99487933079480561638f,0.10106986275482782167f,0.99456457073425541537f,
-0.10412163387205458642f,0.99424044945318790223f,0.10717242495680884273f,
-0.99390697000235606051f,0.11022220729388305938f,0.99356413552059530403f,
-0.11327095217756434631f,0.99321194923479450001f,0.11631863091190475235f,
-0.99285041445986510489f,0.11936521481099135467f,0.99247953459870996706f,
-0.12241067519921619566f,0.99209931314219179654f,0.12545498341154623367f,
-0.99170975366909952520f,0.12849811079379316880f,0.99131085984611544415f,
-0.13154002870288311611f,0.99090263542778000971f,0.13458070850712616773f,
-0.99048508425645709341f,0.13762012158648603832f,0.99005821026229712256f,
-0.14065823933284921088f,0.98962201746320088702f,0.14369503315029447110f,
-0.98917650996478101444f,0.14673047445536174793f,0.98872169196032377858f,
-0.14976453467732150915f,0.98825756773074946437f,0.15279718525844343535f,
-0.98778414164457217783f,0.15582839765426523271f,0.98730141815785843473f,
-0.15885814333386144570f,0.98680940181418552726f,0.16188639378011182579f,
-0.98630809724459866938f,0.16491312048996989437f,0.98579750916756747614f,
-0.16793829497473117263f,0.98527764238894122162f,0.17096188876030121717f,
-0.98474850180190420801f,0.17398387338746382214f,0.98421009238692902521f,
-0.17700422041214874946f,0.98366241921173025453f,0.18002290140569951471f,
-0.98310548743121628501f,0.18303988795514095078f,0.98253930228744124076f,
-0.18605515166344663291f,0.98196386910955524296f,0.18906866414980619262f,
-0.98137919331375456089f,0.19208039704989243734f,0.98078528040323043058f,
-0.19509032201612824808f,0.98018213596811742949f,0.19809841071795356027f,
-0.97956976568544051887f,0.20110463484209190055f,0.97894817531906219710f,
-0.20410896609281686809f,0.97831737071962765473f,0.20711137619221856032f,
-0.97767735782450992943f,0.21011183688046961016f,0.97702814265775439484f,
-0.21311031991609136194f,0.97636973133002114000f,0.21610679707621952006f,
-0.97570213003852857003f,0.21910124015686979759f,0.97502534506699412020f,
-0.22209362097320350937f,0.97433938278557585821f,0.22508391135979283204f,
-0.97364424965081197705f,0.22807208317088573102f,0.97293995220556017678f,
-0.23105810828067110951f,0.97222649707893626925f,0.23404195858354343018f,
-0.97150389098625178352f,0.23702360599436719801f,0.97077214072895035013f,
-0.24000302244874149871f,0.97003125319454397424f,0.24298017990326387094f,
-0.96928123535654853171f,0.24595505033579459497f,0.96852209427441737777f,
-0.24892760574572014853f,0.96775383709347551076f,0.25189781815421696809f,
-0.96697647104485207059f,0.25486565960451457169f,0.96619000344541250413f,
-0.25783110216215898713f,0.96539444169768939830f,0.26079411791527551401f,
-0.96458979328981275803f,0.26375467897483134694f,0.96377606579543984022f,
-0.26671275747489836538f,0.96295326687368387741f,0.26966832557291509076f,
-0.96212140426904158019f,0.27262135544994897662f,0.96128048581132063966f,
-0.27557181931095814376f,0.96043051941556578655f,0.27851968938505305973f,
-0.95957151308198451733f,0.28146493792575794091f,0.95870347489587159906f,
-0.28440753721127187692f,0.95782641302753290802f,0.28734745954472951102f,
-0.95694033573220882438f,0.29028467725446233105f,0.95604525134999640557f,
-0.29321916269425862822f,0.95514116830577078243f,0.29615088824362378883f,
-0.95422809510910566733f,0.29907982630804047508f,0.95330604035419386211f,
-0.30200594931922808417f,0.95237501271976587880f,0.30492922973540237397f,
-0.95143502096900833820f,0.30784964004153486661f,0.95048607394948170235f,
-0.31076715274961147495f,0.94952818059303667475f,0.31368174039889151761f,
-0.94856134991573026749f,0.31659337555616584581f,0.94758559101774109124f,
-0.31950203081601569188f,0.94660091308328353499f,0.32240767880106985244f,
-0.94560732538052127971f,0.32531029216226292622f,0.94460483726148025685f,
-0.32820984357909249729f,0.94359345816196038559f,0.33110630575987642921f,
-0.94257319760144686605f,0.33399965144200938205f,0.94154406518302080631f,
-0.33688985339222005111f,0.94050607059326829518f,0.33977688440682685123f,
-0.93945922360218991898f,0.34266071731199437833f,0.93840353406310805795f,
-0.34554132496398909380f,0.93733901191257495977f,0.34841868024943456472f,
-0.93626566717027825959f,0.35129275608556709276f,0.93518350993894761025f,
-0.35416352542049034380f,0.93409255040425887007f,0.35703096123342997759f,
-0.93299279883473895669f,0.35989503653498811087f,0.93188426558166814750f,
-0.36275572436739722537f,0.93076696107898371224f,0.36561299780477385379f,
-0.92964089584318121418f,0.36846682995337232125f,0.92850608047321558924f,
-0.37131719395183754306f,0.92736252565040111495f,0.37416406297145793358f,
-0.92621024213831137928f,0.37700741021641825945f,0.92504924078267758425f,
-0.37984720892405116066f,0.92387953251128673848f,0.38268343236508978178f,
-0.92270112833387862850f,0.38551605384391884890f,0.92151403934204190183f,
-0.38834504669882624617f,0.92031827670911059425f,0.39117038430225387069f,
-0.91911385169005777040f,0.39399204006104809883f,0.91790077562139049672f,
-0.39680998741671030805f,0.91667905992104270485f,0.39962419984564678810f,
-0.91544871608826783316f,0.40243465085941843018f,0.91420975570353069095f,
-0.40524131400498986100f,0.91296219042839821256f,0.40804416286497868782f,
-0.91170603200542987832f,0.41084317105790391089f,0.91044129225806724737f,
-0.41363831223843450235f,0.90916798309052238025f,0.41642956009763715253f,
-0.90788611648766626150f,0.41921688836322390515f,0.90659570451491533483f,
-0.42200027079979968159f,0.90529675931811881551f,0.42477968120910880589f,
-0.90398929312344333820f,0.42755509343028208491f,0.90267331823725882600f,
-0.43032648134008261165f,0.90134884704602202810f,0.43309381885315195726f,
-0.90001589201616016833f,0.43585707992225547480f,0.89867446569395381673f,
-0.43861623853852765853f,0.89732458070541831763f,0.44137126873171667052f,
-0.89596624975618521791f,0.44412214457042920035f,0.89459948563138269595f,
-0.44686884016237415906f,0.89322430119551532446f,0.44961132965460653965f,
-0.89184070939234272313f,0.45234958723377088896f,0.89044872324475787817f,
-0.45508358712634383592f,0.88904835585466457371f,0.45781330359887717485f,
-0.88763962040285393496f,0.46053871095824000514f,0.88622253014888063838f,
-0.46325978355186014923f,0.88479709843093778954f,0.46597649576796618121f,
-0.88336333866573157891f,0.46868882203582790114f,0.88192126434835504956f,
-0.47139673682599764204f,0.88047088905216075450f,0.47410021465054996703f,
-0.87901222642863352519f,0.47679923006332208812f,0.87754529020726135258f,
-0.47949375766015295275f,0.87607009419540660122f,0.48218377207912271887f,
-0.87458665227817611321f,0.48486924800079106435f,0.87309497841829009079f,
-0.48755016014843599592f,0.87159508665595097909f,0.49022648328829115938f,
-0.87008699110871146054f,0.49289819222978403790f,0.86857070597134089507f,
-0.49556526182577254058f,0.86704624551569264845f,0.49822766697278181303f,
-0.86551362409056908920f,0.50088538261124071482f,0.86397285612158669643f,
-0.50353838372571757542f,0.86242395611104050168f,0.50618664534515522835f,
-0.86086693863776730939f,0.50883014254310698909f,0.85930181835700847337f,
-0.51146885043797030157f,0.85772861000027211809f,0.51410274419322166128f,
-0.85614732837519447184f,0.51673179901764987321f,0.85455798836540053376f,
-0.51935599016558964269f,0.85296060493036363059f,0.52197529293715438925f,
-0.85135519310526519554f,0.52458968267846894928f,0.84974176800085254868f,
-0.52719913478190127964f,0.84812034480329723252f,0.52980362468629460526f,
-0.84649093877405212627f,0.53240312787719790144f,0.84485356524970711689f,
-0.53499761988709715332f,0.84320823964184543620f,0.53758707629564539410f,
-0.84155497743689844370f,0.54017147272989285423f,0.83989379419599952126f,
-0.54275078486451588944f,0.83822470555483807875f,0.54532498842204646383f,
-0.83654772722351200542f,0.54789405917310018967f,0.83486287498638001026f,
-0.55045797293660481131f,0.83317016470191318511f,0.55301670558002746780f,
-0.83146961230254523567f,0.55557023301960217765f,0.82976123379452304540f,
-0.55811853122055610221f,0.82804504525775579626f,0.56066157619733603124f,
-0.82632106284566353427f,0.56319934401383409117f,0.82458930278502529099f,
-0.56573181078361312046f,0.82284978137582642788f,0.56825895267013148970f,
-0.82110251499110464835f,0.57078074588696725566f,0.81934752007679700903f,
-0.57329716669804220430f,0.81758481315158371139f,0.57580819141784533866f,
-0.81581441080673378075f,0.57831379641165558958f,0.81403632970594841378f,
-0.58081395809576452649f,0.81225058658520399302f,0.58330865293769829094f,
-0.81045719825259476821f,0.58579785745643886408f,0.80865618158817498262f,
-0.58828154822264522306f,0.80684755354379933401f,0.59075970185887416442f,
-0.80503133114296365758f,0.59323229503979979516f,0.80320753148064494287f,
-0.59569930449243335691f,0.80137617172314024039f,0.59816070699634238395f,
-0.79953726910790501314f,0.60061647938386897305f,0.79769084094339115509f,
-0.60306659854034816437f,0.79583690460888356633f,0.60551104140432554512f,
-0.79397547755433717231f,0.60794978496777363208f,0.79210657730021238887f,
-0.61038280627630947528f,0.79023022143731003197f,0.61281008242940970820f,
-0.78834642762660622761f,0.61523159058062681925f,0.78645521359908576731f,
-0.61764730793780386886f,0.78455659715557524159f,0.62005721176328909561f,
-0.78265059616657572938f,0.62246127937414996723f,0.78073722857209448822f,
-0.62485948814238634341f,0.77881651238147597827f,0.62725181549514408275f,
-0.77688846567323244230f,0.62963823891492698426f,0.77495310659487393057f,
-0.63201873593980906207f,0.77301045336273699338f,0.63439328416364548779f,
-0.77106052426181381776f,0.63676186123628419899f,0.76910333764557969882f,
-0.63912444486377573138f,0.76713891193582040007f,0.64148101280858305095f,
-0.76516726562245895860f,0.64383154288979138613f,0.76318841726338138010f,
-0.64617601298331628357f,0.76120238548426177871f,0.64851440102211244110f,
-0.75920918897838796102f,0.65084668499638087535f,0.75720884650648456748f,
-0.65317284295377675551f,0.75520137689653654700f,0.65549285299961534967f,
-0.75318679904361252042f,0.65780669329707863735f,0.75116513190968636771f,
-0.66011434206742047870f,0.74913639452345937020f,0.66241577759017178373f,
-0.74710060598018013245f,0.66471097820334479334f,0.74505778544146594733f,
-0.66699992230363747137f,0.74300795213512171866f,0.66928258834663600929f,
-0.74095112535495921691f,0.67155895484701833009f,0.73888732446061511361f,
-0.67382900037875603783f,0.73681656887736979300f,0.67609270357531592310f,
-0.73473887809596349907f,0.67835004312986146857f,0.73265427167241281570f,
-0.68060099779545302212f,0.73056276922782759087f,0.68284554638524808112f,
-0.72846439044822519637f,0.68508366777270035541f,0.72635915508434600873f,
-0.68731534089175905233f,0.72424708295146700276f,0.68954054473706682948f,
-0.72212819392921534511f,0.69175925836415774750f,0.72000250796138165477f,
-0.69397146088965389055f,0.71787004505573170920f,0.69617713149146298601f,
-0.71573082528381870571f,0.69837624940897280457f,0.71358486878079352422f,
-0.70056879394324833576f,0.71143219574521643356f,0.70275474445722529993f,
-0.70927282643886568891f,0.70493408037590488124f,0.70710678118654757274f,
-0.70710678118654757274f,0.70493408037590499227f,0.70927282643886568891f,
-0.70275474445722529993f,0.71143219574521643356f,0.70056879394324844679f,
-0.71358486878079352422f,0.69837624940897291559f,0.71573082528381859468f,
-0.69617713149146298601f,0.71787004505573170920f,0.69397146088965400157f,
-0.72000250796138165477f,0.69175925836415774750f,0.72212819392921534511f,
-0.68954054473706694051f,0.72424708295146689174f,0.68731534089175905233f,
-0.72635915508434600873f,0.68508366777270035541f,0.72846439044822519637f,
-0.68284554638524808112f,0.73056276922782759087f,0.68060099779545302212f,
-0.73265427167241281570f,0.67835004312986146857f,0.73473887809596349907f,
-0.67609270357531603413f,0.73681656887736979300f,0.67382900037875614885f,
-0.73888732446061511361f,0.67155895484701833009f,0.74095112535495910588f,
-0.66928258834663600929f,0.74300795213512171866f,0.66699992230363747137f,
-0.74505778544146594733f,0.66471097820334490436f,0.74710060598018013245f,
-0.66241577759017178373f,0.74913639452345925918f,0.66011434206742047870f,
-0.75116513190968636771f,0.65780669329707874837f,0.75318679904361252042f,
-0.65549285299961546070f,0.75520137689653654700f,0.65317284295377686654f,
-0.75720884650648456748f,0.65084668499638098638f,0.75920918897838796102f,
-0.64851440102211255212f,0.76120238548426177871f,0.64617601298331639459f,
-0.76318841726338126907f,0.64383154288979149715f,0.76516726562245895860f,
-0.64148101280858316198f,0.76713891193582040007f,0.63912444486377573138f,
-0.76910333764557958780f,0.63676186123628419899f,0.77106052426181381776f,
-0.63439328416364548779f,0.77301045336273688235f,0.63201873593980906207f,
-0.77495310659487381955f,0.62963823891492709528f,0.77688846567323244230f,
-0.62725181549514419377f,0.77881651238147586724f,0.62485948814238645443f,
-0.78073722857209448822f,0.62246127937415007825f,0.78265059616657572938f,
-0.62005721176328920663f,0.78455659715557524159f,0.61764730793780397988f,
-0.78645521359908576731f,0.61523159058062681925f,0.78834642762660622761f,
-0.61281008242940970820f,0.79023022143731003197f,0.61038280627630947528f,
-0.79210657730021227785f,0.60794978496777374311f,0.79397547755433717231f,
-0.60551104140432554512f,0.79583690460888345530f,0.60306659854034827539f,
-0.79769084094339104407f,0.60061647938386897305f,0.79953726910790501314f,
-0.59816070699634238395f,0.80137617172314012937f,0.59569930449243346793f,
-0.80320753148064483184f,0.59323229503979979516f,0.80503133114296365758f,
-0.59075970185887427544f,0.80684755354379922299f,0.58828154822264533408f,
-0.80865618158817498262f,0.58579785745643886408f,0.81045719825259476821f,
-0.58330865293769829094f,0.81225058658520388200f,0.58081395809576452649f,
-0.81403632970594830276f,0.57831379641165558958f,0.81581441080673378075f,
-0.57580819141784533866f,0.81758481315158371139f,0.57329716669804231532f,
-0.81934752007679689800f,0.57078074588696736669f,0.82110251499110464835f,
-0.56825895267013148970f,0.82284978137582631685f,0.56573181078361323149f,
-0.82458930278502529099f,0.56319934401383409117f,0.82632106284566353427f,
-0.56066157619733603124f,0.82804504525775579626f,0.55811853122055610221f,
-0.82976123379452304540f,0.55557023301960228867f,0.83146961230254523567f,
-0.55301670558002757883f,0.83317016470191318511f,0.55045797293660481131f,
-0.83486287498638001026f,0.54789405917310018967f,0.83654772722351189440f,
-0.54532498842204646383f,0.83822470555483796772f,0.54275078486451600046f,
-0.83989379419599941023f,0.54017147272989296525f,0.84155497743689833268f,
-0.53758707629564550512f,0.84320823964184543620f,0.53499761988709726435f,
-0.84485356524970700587f,0.53240312787719801246f,0.84649093877405212627f,
-0.52980362468629482731f,0.84812034480329712149f,0.52719913478190139067f,
-0.84974176800085243766f,0.52458968267846883826f,0.85135519310526519554f,
-0.52197529293715438925f,0.85296060493036363059f,0.51935599016558953167f,
-0.85455798836540053376f,0.51673179901764998423f,0.85614732837519447184f,
-0.51410274419322166128f,0.85772861000027211809f,0.51146885043797052361f,
-0.85930181835700836235f,0.50883014254310698909f,0.86086693863776730939f,
-0.50618664534515533937f,0.86242395611104050168f,0.50353838372571757542f,
-0.86397285612158669643f,0.50088538261124093687f,0.86551362409056897818f,
-0.49822766697278186854f,0.86704624551569264845f,0.49556526182577248507f,
-0.86857070597134089507f,0.49289819222978409341f,0.87008699110871134952f,
-0.49022648328829110387f,0.87159508665595109012f,0.48755016014843605143f,
-0.87309497841829009079f,0.48486924800079111986f,0.87458665227817611321f,
-0.48218377207912282989f,0.87607009419540660122f,0.47949375766015300826f,
-0.87754529020726124156f,0.47679923006332225466f,0.87901222642863341417f,
-0.47410021465055002254f,0.88047088905216075450f,0.47139673682599780857f,
-0.88192126434835493853f,0.46868882203582795665f,0.88336333866573157891f,
-0.46597649576796612569f,0.88479709843093778954f,0.46325978355186026025f,
-0.88622253014888063838f,0.46053871095824000514f,0.88763962040285393496f,
-0.45781330359887728587f,0.88904835585466457371f,0.45508358712634383592f,
-0.89044872324475787817f,0.45234958723377099998f,0.89184070939234272313f,
-0.44961132965460659516f,0.89322430119551532446f,0.44686884016237432560f,
-0.89459948563138258493f,0.44412214457042925586f,0.89596624975618510689f,
-0.44137126873171661501f,0.89732458070541831763f,0.43861623853852771404f,
-0.89867446569395381673f,0.43585707992225547480f,0.90001589201616027935f,
-0.43309381885315201277f,0.90134884704602202810f,0.43032648134008261165f,
-0.90267331823725882600f,0.42755509343028219593f,0.90398929312344333820f,
-0.42477968120910880589f,0.90529675931811881551f,0.42200027079979979261f,
-0.90659570451491533483f,0.41921688836322396066f,0.90788611648766626150f,
-0.41642956009763731906f,0.90916798309052226923f,0.41363831223843455787f,
-0.91044129225806713634f,0.41084317105790391089f,0.91170603200542987832f,
-0.40804416286497874333f,0.91296219042839810154f,0.40524131400498986100f,
-0.91420975570353069095f,0.40243465085941854120f,0.91544871608826783316f,
-0.39962419984564678810f,0.91667905992104270485f,0.39680998741671041907f,
-0.91790077562139038569f,0.39399204006104809883f,0.91911385169005777040f,
-0.39117038430225398171f,0.92031827670911048322f,0.38834504669882630168f,
-0.92151403934204190183f,0.38551605384391901543f,0.92270112833387851747f,
-0.38268343236508983729f,0.92387953251128673848f,0.37984720892405110515f,
-0.92504924078267758425f,0.37700741021641831496f,0.92621024213831126826f,
-0.37416406297145798909f,0.92736252565040111495f,0.37131719395183759858f,
-0.92850608047321558924f,0.36846682995337232125f,0.92964089584318121418f,
-0.36561299780477396482f,0.93076696107898371224f,0.36275572436739722537f,
-0.93188426558166814750f,0.35989503653498827740f,0.93299279883473884567f,
-0.35703096123343003310f,0.93409255040425887007f,0.35416352542049051033f,
-0.93518350993894749923f,0.35129275608556714827f,0.93626566717027825959f,
-0.34841868024943450921f,0.93733901191257495977f,0.34554132496398914931f,
-0.93840353406310805795f,0.34266071731199437833f,0.93945922360218991898f,
-0.33977688440682696225f,0.94050607059326829518f,0.33688985339222005111f,
-0.94154406518302080631f,0.33399965144200949307f,0.94257319760144686605f,
-0.33110630575987642921f,0.94359345816196038559f,0.32820984357909266382f,
-0.94460483726148025685f,0.32531029216226298173f,0.94560732538052127971f,
-0.32240767880107001897f,0.94660091308328353499f,0.31950203081601574739f,
-0.94758559101774109124f,0.31659337555616584581f,0.94856134991573026749f,
-0.31368174039889157312f,0.94952818059303667475f,0.31076715274961147495f,
-0.95048607394948170235f,0.30784964004153497763f,0.95143502096900833820f,
-0.30492922973540242948f,0.95237501271976587880f,0.30200594931922819519f,
-0.95330604035419375109f,0.29907982630804047508f,0.95422809510910566733f,
-0.29615088824362395536f,0.95514116830577067141f,0.29321916269425868373f,
-0.95604525134999640557f,0.29028467725446233105f,0.95694033573220893540f,
-0.28734745954472956653f,0.95782641302753290802f,0.28440753721127182141f,
-0.95870347489587159906f,0.28146493792575805193f,0.95957151308198451733f,
-0.27851968938505305973f,0.96043051941556578655f,0.27557181931095825478f,
-0.96128048581132063966f,0.27262135544994897662f,0.96212140426904158019f,
-0.26966832557291520178f,0.96295326687368387741f,0.26671275747489842090f,
-0.96377606579543984022f,0.26375467897483151347f,0.96458979328981264700f,
-0.26079411791527556952f,0.96539444169768939830f,0.25783110216215893162f,
-0.96619000344541261516f,0.25486565960451462720f,0.96697647104485207059f,
-0.25189781815421691258f,0.96775383709347551076f,0.24892760574572025956f,
-0.96852209427441726675f,0.24595505033579459497f,0.96928123535654853171f,
-0.24298017990326398197f,0.97003125319454397424f,0.24000302244874149871f,
-0.97077214072895035013f,0.23702360599436733679f,0.97150389098625178352f,
-0.23404195858354345794f,0.97222649707893626925f,0.23105810828067127605f,
-0.97293995220556006576f,0.22807208317088578653f,0.97364424965081186603f,
-0.22508391135979277653f,0.97433938278557585821f,0.22209362097320359264f,
-0.97502534506699412020f,0.21910124015686976984f,0.97570213003852857003f,
-0.21610679707621960333f,0.97636973133002114000f,0.21311031991609136194f,
-0.97702814265775439484f,0.21011183688046972118f,0.97767735782450992943f,
-0.20711137619221856032f,0.97831737071962765473f,0.20410896609281700687f,
-0.97894817531906219710f,0.20110463484209195606f,0.97956976568544051887f,
-0.19809841071795372680f,0.98018213596811731847f,0.19509032201612833135f,
-0.98078528040323043058f,0.19208039704989238183f,0.98137919331375456089f,
-0.18906866414980627589f,0.98196386910955524296f,0.18605515166344663291f,
-0.98253930228744124076f,0.18303988795514106180f,0.98310548743121628501f,
-0.18002290140569951471f,0.98366241921173025453f,0.17700422041214886049f,
-0.98421009238692902521f,0.17398387338746384989f,0.98474850180190420801f,
-0.17096188876030135595f,0.98527764238894122162f,0.16793829497473122814f,
-0.98579750916756736512f,0.16491312048997008866f,0.98630809724459866938f,
-0.16188639378011188130f,0.98680940181418541624f,0.15885814333386139019f,
-0.98730141815785843473f,0.15582839765426531597f,0.98778414164457217783f,
-0.15279718525844340760f,0.98825756773074946437f,0.14976453467732162017f,
-0.98872169196032377858f,0.14673047445536174793f,0.98917650996478101444f,
-0.14369503315029458212f,0.98962201746320077600f,0.14065823933284923863f,
-0.99005821026229712256f,0.13762012158648617710f,0.99048508425645698239f,
-0.13458070850712622324f,0.99090263542778000971f,0.13154002870288328264f,
-0.99131085984611544415f,0.12849811079379322432f,0.99170975366909952520f,
-0.12545498341154620592f,0.99209931314219179654f,0.12241067519921627893f,
-0.99247953459870996706f,0.11936521481099135467f,0.99285041445986510489f,
-0.11631863091190487725f,0.99321194923479450001f,0.11327095217756436019f,
-0.99356413552059530403f,0.11022220729388318428f,0.99390697000235606051f,
-0.10717242495680887049f,0.99424044945318790223f,0.10412163387205472520f,
-0.99456457073425541537f,0.10106986275482787718f,0.99487933079480561638f,
-0.09801714032956077016f,0.99518472667219681771f,0.09496349532963906104f,
-0.99548075549192693856f,0.09190895649713269611f,0.99576741446765981713f,
-0.08885355258252468358f,0.99604470090125196702f,0.08579731234443987997f,
-0.99631261218277800129f,0.08274026454937580266f,0.99657114579055483539f,
-0.07968243797143012563f,0.99682029929116566791f,0.07662386139203161695f,
-0.99706007033948296225f,0.07356456359966745406f,0.99729045667869020697f,
-0.07050457338961400866f,0.99751145614030345410f,0.06744391956366410645f,
-0.99772306664419163624f,0.06438263092985740954f,0.99792528619859599548f,
-0.06132073630220864768f,0.99811811290014917919f,0.05825826450043573163f,
-0.99830154493389289261f,0.05519524434969003135f,0.99847558057329477421f,
-0.05213170468028331672f,0.99864021818026527111f,0.04906767432741812596f,
-0.99879545620517240501f,0.04600318213091464381f,0.99894129318685687124f,
-0.04293825693494095902f,0.99907772775264536147f,0.03987292758773984536f,
-0.99920475861836388631f,0.03680722294135899131f,0.99932238458834954375f,
-0.03374117185137764235f,0.99943060455546173237f,0.03067480317663658085f,
-0.99952941750109314256f,0.02760814577896581953f,0.99961882249517863830f,
-0.02454122852291226384f,0.99969881869620424997f,0.02147408027546960502f,
-0.99976940535121527898f,0.01840672990580482019f,0.99983058179582340319f,
-0.01533920628498821985f,0.99988234745421256111f,0.01227153828571994447f,
-0.99992470183914450299f,0.00920375478205995995f,0.99995764455196389786f,
-0.00613588464915451517f,0.99998117528260110909f,0.00306795676296613791f,
-0.99999529380957619118f,0.00000000000000006123f,1.00000000000000000000f,
--0.00306795676296601561f,0.99999529380957619118f,-0.00613588464915439287f,
-0.99998117528260110909f,-0.00920375478205983678f,0.99995764455196389786f,
--0.01227153828571982304f,0.99992470183914450299f,-0.01533920628498809842f,
-0.99988234745421256111f,-0.01840672990580469529f,0.99983058179582340319f,
--0.02147408027546948359f,0.99976940535121527898f,-0.02454122852291214241f,
-0.99969881869620424997f,-0.02760814577896569810f,0.99961882249517863830f,
--0.03067480317663645942f,0.99952941750109314256f,-0.03374117185137751745f,
-0.99943060455546173237f,-0.03680722294135886641f,0.99932238458834954375f,
--0.03987292758773972740f,0.99920475861836388631f,-0.04293825693494083412f,
-0.99907772775264536147f,-0.04600318213091451891f,0.99894129318685687124f,
--0.04906767432741800800f,0.99879545620517240501f,-0.05213170468028319182f,
-0.99864021818026527111f,-0.05519524434968991339f,0.99847558057329477421f,
--0.05825826450043560673f,0.99830154493389289261f,-0.06132073630220852972f,
-0.99811811290014917919f,-0.06438263092985728464f,0.99792528619859599548f,
--0.06744391956366398155f,0.99772306664419163624f,-0.07050457338961389764f,
-0.99751145614030345410f,-0.07356456359966732916f,0.99729045667869020697f,
--0.07662386139203150592f,0.99706007033948296225f,-0.07968243797143001461f,
-0.99682029929116577893f,-0.08274026454937567776f,0.99657114579055483539f,
--0.08579731234443975507f,0.99631261218277800129f,-0.08885355258252455868f,
-0.99604470090125196702f,-0.09190895649713257121f,0.99576741446765981713f,
--0.09496349532963895002f,0.99548075549192693856f,-0.09801714032956064526f,
-0.99518472667219692873f,-0.10106986275482775228f,0.99487933079480561638f,
--0.10412163387205460030f,0.99456457073425541537f,-0.10717242495680875947f,
-0.99424044945318790223f,-0.11022220729388305938f,0.99390697000235606051f,
--0.11327095217756423529f,0.99356413552059530403f,-0.11631863091190475235f,
-0.99321194923479450001f,-0.11936521481099122977f,0.99285041445986510489f,
--0.12241067519921615403f,0.99247953459870996706f,-0.12545498341154606714f,
-0.99209931314219179654f,-0.12849811079379311329f,0.99170975366909952520f,
--0.13154002870288314386f,0.99131085984611544415f,-0.13458070850712611222f,
-0.99090263542778000971f,-0.13762012158648606608f,0.99048508425645698239f,
--0.14065823933284912761f,0.99005821026229712256f,-0.14369503315029444335f,
-0.98962201746320088702f,-0.14673047445536163691f,0.98917650996478101444f,
--0.14976453467732150915f,0.98872169196032377858f,-0.15279718525844329657f,
-0.98825756773074946437f,-0.15582839765426520495f,0.98778414164457217783f,
--0.15885814333386127917f,0.98730141815785843473f,-0.16188639378011177028f,
-0.98680940181418552726f,-0.16491312048996994988f,0.98630809724459866938f,
--0.16793829497473108936f,0.98579750916756747614f,-0.17096188876030124493f,
-0.98527764238894122162f,-0.17398387338746371111f,0.98474850180190420801f,
--0.17700422041214874946f,0.98421009238692902521f,-0.18002290140569940369f,
-0.98366241921173025453f,-0.18303988795514092303f,0.98310548743121628501f,
--0.18605515166344649414f,0.98253930228744124076f,-0.18906866414980616486f,
-0.98196386910955524296f,-0.19208039704989227081f,0.98137919331375456089f,
--0.19509032201612819257f,0.98078528040323043058f,-0.19809841071795361578f,
-0.98018213596811742949f,-0.20110463484209181728f,0.97956976568544051887f,
--0.20410896609281689584f,0.97894817531906219710f,-0.20711137619221844930f,
-0.97831737071962765473f,-0.21011183688046961016f,0.97767735782450992943f,
--0.21311031991609125091f,0.97702814265775439484f,-0.21610679707621949230f,
-0.97636973133002114000f,-0.21910124015686965881f,0.97570213003852857003f,
--0.22209362097320348162f,0.97502534506699412020f,-0.22508391135979266551f,
-0.97433938278557585821f,-0.22807208317088567551f,0.97364424965081197705f,
--0.23105810828067113727f,0.97293995220556017678f,-0.23404195858354331916f,
-0.97222649707893638027f,-0.23702360599436722577f,0.97150389098625178352f,
--0.24000302244874138768f,0.97077214072895035013f,-0.24298017990326387094f,
-0.97003125319454397424f,-0.24595505033579448395f,0.96928123535654853171f,
--0.24892760574572012078f,0.96852209427441737777f,-0.25189781815421680156f,
-0.96775383709347551076f,-0.25486565960451451618f,0.96697647104485207059f,
--0.25783110216215882060f,0.96619000344541261516f,-0.26079411791527545850f,
-0.96539444169768939830f,-0.26375467897483140245f,0.96458979328981275803f,
--0.26671275747489830987f,0.96377606579543984022f,-0.26966832557291509076f,
-0.96295326687368387741f,-0.27262135544994886560f,0.96212140426904158019f,
--0.27557181931095814376f,0.96128048581132063966f,-0.27851968938505294870f,
-0.96043051941556589757f,-0.28146493792575794091f,0.95957151308198451733f,
--0.28440753721127171039f,0.95870347489587159906f,-0.28734745954472945551f,
-0.95782641302753290802f,-0.29028467725446216452f,0.95694033573220893540f,
--0.29321916269425857271f,0.95604525134999651659f,-0.29615088824362384434f,
-0.95514116830577067141f,-0.29907982630804036406f,0.95422809510910566733f,
--0.30200594931922808417f,0.95330604035419386211f,-0.30492922973540226295f,
-0.95237501271976587880f,-0.30784964004153486661f,0.95143502096900833820f,
--0.31076715274961136393f,0.95048607394948181337f,-0.31368174039889140658f,
-0.94952818059303667475f,-0.31659337555616573479f,0.94856134991573037851f,
--0.31950203081601563637f,0.94758559101774120226f,-0.32240767880106985244f,
-0.94660091308328353499f,-0.32531029216226287071f,0.94560732538052139073f,
--0.32820984357909255280f,0.94460483726148025685f,-0.33110630575987631818f,
-0.94359345816196038559f,-0.33399965144200938205f,0.94257319760144686605f,
--0.33688985339221994009f,0.94154406518302080631f,-0.33977688440682685123f,
-0.94050607059326829518f,-0.34266071731199426731f,0.93945922360218991898f,
--0.34554132496398903829f,0.93840353406310816897f,-0.34841868024943439819f,
-0.93733901191257495977f,-0.35129275608556703725f,0.93626566717027825959f,
--0.35416352542049039931f,0.93518350993894761025f,-0.35703096123342992207f,
-0.93409255040425898109f,-0.35989503653498816638f,0.93299279883473884567f,
--0.36275572436739711435f,0.93188426558166814750f,-0.36561299780477385379f,
-0.93076696107898371224f,-0.36846682995337221023f,0.92964089584318132520f,
--0.37131719395183748755f,0.92850608047321558924f,-0.37416406297145787807f,
-0.92736252565040111495f,-0.37700741021641820394f,0.92621024213831137928f,
--0.37984720892405099413f,0.92504924078267769527f,-0.38268343236508972627f,
-0.92387953251128673848f,-0.38551605384391890441f,0.92270112833387851747f,
--0.38834504669882619066f,0.92151403934204201285f,-0.39117038430225387069f,
-0.92031827670911059425f,-0.39399204006104798781f,0.91911385169005777040f,
--0.39680998741671030805f,0.91790077562139049672f,-0.39962419984564667708f,
-0.91667905992104270485f,-0.40243465085941843018f,0.91544871608826783316f,
--0.40524131400498974998f,0.91420975570353069095f,-0.40804416286497863231f,
-0.91296219042839821256f,-0.41084317105790379987f,0.91170603200542987832f,
--0.41363831223843450235f,0.91044129225806724737f,-0.41642956009763698599f,
-0.90916798309052249127f,-0.41921688836322407168f,0.90788611648766615048f,
--0.42200027079979968159f,0.90659570451491533483f,-0.42477968120910869487f,
-0.90529675931811881551f,-0.42755509343028186287f,0.90398929312344344922f,
--0.43032648134008272267f,0.90267331823725871498f,-0.43309381885315190175f,
-0.90134884704602202810f,-0.43585707992225536378f,0.90001589201616027935f,
--0.43861623853852738097f,0.89867446569395392775f,-0.44137126873171672603f,
-0.89732458070541831763f,-0.44412214457042914484f,0.89596624975618521791f,
--0.44686884016237399253f,0.89459948563138280697f,-0.44961132965460670619f,
-0.89322430119551521344f,-0.45234958723377088896f,0.89184070939234272313f,
--0.45508358712634372489f,0.89044872324475798919f,-0.45781330359887700832f,
-0.88904835585466468473f,-0.46053871095824006066f,0.88763962040285393496f,
--0.46325978355186014923f,0.88622253014888063838f,-0.46597649576796601467f,
-0.88479709843093790056f,-0.46868882203582767909f,0.88336333866573168994f,
--0.47139673682599769755f,0.88192126434835504956f,-0.47410021465054991152f,
-0.88047088905216086552f,-0.47679923006332192159f,0.87901222642863352519f,
--0.47949375766015311928f,0.87754529020726124156f,-0.48218377207912271887f,
-0.87607009419540660122f,-0.48486924800079100883f,0.87458665227817622423f,
--0.48755016014843571837f,0.87309497841829020182f,-0.49022648328829121489f,
-0.87159508665595097909f,-0.49289819222978398239f,0.87008699110871146054f,
--0.49556526182577237405f,0.86857070597134100609f,-0.49822766697278159098f,
-0.86704624551569275948f,-0.50088538261124082585f,0.86551362409056908920f,
--0.50353838372571746440f,0.86397285612158680745f,-0.50618664534515511733f,
-0.86242395611104061270f,-0.50883014254310710012f,0.86086693863776719837f,
--0.51146885043797041259f,0.85930181835700847337f,-0.51410274419322155026f,
-0.85772861000027211809f,-0.51673179901764965116f,0.85614732837519458286f,
--0.51935599016558964269f,0.85455798836540053376f,-0.52197529293715427823f,
-0.85296060493036374162f,-0.52458968267846872724f,0.85135519310526519554f,
--0.52719913478190105760f,0.84974176800085265970f,-0.52980362468629471628f,
-0.84812034480329723252f,-0.53240312787719790144f,0.84649093877405212627f,
--0.53499761988709704230f,0.84485356524970722791f,-0.53758707629564561614f,
-0.84320823964184532517f,-0.54017147272989285423f,0.84155497743689844370f,
--0.54275078486451577842f,0.83989379419599952126f,-0.54532498842204624179f,
-0.83822470555483818977f,-0.54789405917310018967f,0.83654772722351200542f,
--0.55045797293660470029f,0.83486287498638012128f,-0.55301670558002735678f,
-0.83317016470191329613f,-0.55557023301960195560f,0.83146961230254534669f,
--0.55811853122055610221f,0.82976123379452304540f,-0.56066157619733592021f,
-0.82804504525775579626f,-0.56319934401383386913f,0.82632106284566364529f,
--0.56573181078361323149f,0.82458930278502517996f,-0.56825895267013148970f,
-0.82284978137582631685f,-0.57078074588696714464f,0.82110251499110475937f,
--0.57329716669804198226f,0.81934752007679712005f,-0.57580819141784533866f,
-0.81758481315158371139f,-0.57831379641165547856f,0.81581441080673378075f,
--0.58081395809576441547f,0.81403632970594852480f,-0.58330865293769840196f,
-0.81225058658520388200f,-0.58579785745643886408f,0.81045719825259476821f,
--0.58828154822264522306f,0.80865618158817509364f,-0.59075970185887405339f,
-0.80684755354379944503f,-0.59323229503979990618f,0.80503133114296354655f,
--0.59569930449243335691f,0.80320753148064494287f,-0.59816070699634216190f,
-0.80137617172314024039f,-0.60061647938386875101f,0.79953726910790523519f,
--0.60306659854034827539f,0.79769084094339104407f,-0.60551104140432543410f,
-0.79583690460888356633f,-0.60794978496777352106f,0.79397547755433728334f,
--0.61038280627630958630f,0.79210657730021227785f,-0.61281008242940970820f,
-0.79023022143731003197f,-0.61523159058062670823f,0.78834642762660633863f,
--0.61764730793780375784f,0.78645521359908587833f,-0.62005721176328920663f,
-0.78455659715557513056f,-0.62246127937414996723f,0.78265059616657572938f,
--0.62485948814238623239f,0.78073722857209459924f,-0.62725181549514386070f,
-0.77881651238147608929f,-0.62963823891492709528f,0.77688846567323244230f,
--0.63201873593980895105f,0.77495310659487393057f,-0.63439328416364537677f,
-0.77301045336273710440f,-0.63676186123628431002f,0.77106052426181370674f,
--0.63912444486377573138f,0.76910333764557958780f,-0.64148101280858305095f,
-0.76713891193582040007f,-0.64383154288979127511f,0.76516726562245906962f,
--0.64617601298331639459f,0.76318841726338115805f,-0.64851440102211244110f,
-0.76120238548426188974f,-0.65084668499638076433f,0.75920918897838807204f,
--0.65317284295377653347f,0.75720884650648467851f,-0.65549285299961546070f,
-0.75520137689653643598f,-0.65780669329707852633f,0.75318679904361252042f,
--0.66011434206742036768f,0.75116513190968658975f,-0.66241577759017189475f,
-0.74913639452345925918f,-0.66471097820334490436f,0.74710060598018013245f,
--0.66699992230363736034f,0.74505778544146605835f,-0.66928258834663589827f,
-0.74300795213512182968f,-0.67155895484701844111f,0.74095112535495899486f,
--0.67382900037875603783f,0.73888732446061522463f,-0.67609270357531581208f,
-0.73681656887737001504f,-0.67835004312986124653f,0.73473887809596372112f,
--0.68060099779545302212f,0.73265427167241281570f,-0.68284554638524797010f,
-0.73056276922782759087f,-0.68508366777270024439f,0.72846439044822530740f,
--0.68731534089175916336f,0.72635915508434589771f,-0.68954054473706694051f,
-0.72424708295146689174f,-0.69175925836415763648f,0.72212819392921545614f,
--0.69397146088965377952f,0.72000250796138176579f,-0.69617713149146298601f,
-0.71787004505573170920f,-0.69837624940897280457f,0.71573082528381870571f,
--0.70056879394324822474f,0.71358486878079363525f,-0.70275474445722507788f,
-0.71143219574521665560f,-0.70493408037590488124f,0.70927282643886557789f,
--0.70710678118654746172f,0.70710678118654757274f,-0.70927282643886546687f,
-0.70493408037590510329f,-0.71143219574521654458f,0.70275474445722518890f,
--0.71358486878079352422f,0.70056879394324833576f,-0.71573082528381859468f,
-0.69837624940897291559f,-0.71787004505573159818f,0.69617713149146309703f,
--0.72000250796138165477f,0.69397146088965389055f,-0.72212819392921523409f,
-0.69175925836415785852f,-0.72424708295146678072f,0.68954054473706705153f,
--0.72635915508434578669f,0.68731534089175927438f,-0.72846439044822519637f,
-0.68508366777270035541f,-0.73056276922782747985f,0.68284554638524808112f,
--0.73265427167241270467f,0.68060099779545324417f,-0.73473887809596349907f,
-0.67835004312986135755f,-0.73681656887736979300f,0.67609270357531592310f,
--0.73888732446061511361f,0.67382900037875614885f,-0.74095112535495888384f,
-0.67155895484701855214f,-0.74300795213512171866f,0.66928258834663600929f,
--0.74505778544146594733f,0.66699992230363758239f,-0.74710060598018002143f,
-0.66471097820334501538f,-0.74913639452345914815f,0.66241577759017200577f,
--0.75116513190968636771f,0.66011434206742047870f,-0.75318679904361240940f,
-0.65780669329707874837f,-0.75520137689653643598f,0.65549285299961557172f,
--0.75720884650648467851f,0.65317284295377664449f,-0.75920918897838796102f,
-0.65084668499638098638f,-0.76120238548426166769f,0.64851440102211255212f,
--0.76318841726338115805f,0.64617601298331661663f,-0.76516726562245895860f,
-0.64383154288979138613f,-0.76713891193582040007f,0.64148101280858316198f,
--0.76910333764557947678f,0.63912444486377584241f,-0.77106052426181359571f,
-0.63676186123628442104f,-0.77301045336273699338f,0.63439328416364548779f,
--0.77495310659487381955f,0.63201873593980906207f,-0.77688846567323233128f,
-0.62963823891492720630f,-0.77881651238147597827f,0.62725181549514408275f,
--0.78073722857209448822f,0.62485948814238634341f,-0.78265059616657561836f,
-0.62246127937415007825f,-0.78455659715557501954f,0.62005721176328942867f,
--0.78645521359908576731f,0.61764730793780386886f,-0.78834642762660622761f,
-0.61523159058062693028f,-0.79023022143730992095f,0.61281008242940981923f,
--0.79210657730021216683f,0.61038280627630969732f,-0.79397547755433717231f,
-0.60794978496777363208f,-0.79583690460888345530f,0.60551104140432565615f,
--0.79769084094339093305f,0.60306659854034838641f,-0.79953726910790512417f,
-0.60061647938386886203f,-0.80137617172314024039f,0.59816070699634238395f,
--0.80320753148064483184f,0.59569930449243346793f,-0.80503133114296343553f,
-0.59323229503980001720f,-0.80684755354379933401f,0.59075970185887416442f,
--0.80865618158817498262f,0.58828154822264533408f,-0.81045719825259465718f,
-0.58579785745643897510f,-0.81225058658520377097f,0.58330865293769851299f,
--0.81403632970594841378f,0.58081395809576452649f,-0.81581441080673378075f,
-0.57831379641165570060f,-0.81758481315158360037f,0.57580819141784544968f,
--0.81934752007679700903f,0.57329716669804209328f,-0.82110251499110464835f,
-0.57078074588696725566f,-0.82284978137582620583f,0.56825895267013171175f,
--0.82458930278502506894f,0.56573181078361345353f,-0.82632106284566353427f,
-0.56319934401383409117f,-0.82804504525775568524f,0.56066157619733614226f,
--0.82976123379452293438f,0.55811853122055632426f,-0.83146961230254534669f,
-0.55557023301960217765f,-0.83317016470191318511f,0.55301670558002746780f,
--0.83486287498638001026f,0.55045797293660492233f,-0.83654772722351189440f,
-0.54789405917310041172f,-0.83822470555483807875f,0.54532498842204635281f,
--0.83989379419599952126f,0.54275078486451588944f,-0.84155497743689833268f,
-0.54017147272989296525f,-0.84320823964184532517f,0.53758707629564572716f,
--0.84485356524970711689f,0.53499761988709715332f,-0.84649093877405201525f,
-0.53240312787719801246f,-0.84812034480329712149f,0.52980362468629482731f,
--0.84974176800085254868f,0.52719913478190127964f,-0.85135519310526519554f,
-0.52458968267846894928f,-0.85296060493036363059f,0.52197529293715438925f,
--0.85455798836540042274f,0.51935599016558975372f,-0.85614732837519447184f,
-0.51673179901764976218f,-0.85772861000027200706f,0.51410274419322177231f,
--0.85930181835700836235f,0.51146885043797052361f,-0.86086693863776719837f,
-0.50883014254310732216f,-0.86242395611104050168f,0.50618664534515522835f,
--0.86397285612158669643f,0.50353838372571757542f,-0.86551362409056897818f,
-0.50088538261124093687f,-0.86704624551569264845f,0.49822766697278175752f,
--0.86857070597134089507f,0.49556526182577254058f,-0.87008699110871134952f,
-0.49289819222978414892f,-0.87159508665595086807f,0.49022648328829138142f,
--0.87309497841829009079f,0.48755016014843588490f,-0.87458665227817611321f,
-0.48486924800079111986f,-0.87607009419540649020f,0.48218377207912288540f,
--0.87754529020726113053f,0.47949375766015328582f,-0.87901222642863352519f,
-0.47679923006332208812f,-0.88047088905216075450f,0.47410021465055007805f,
--0.88192126434835493853f,0.47139673682599780857f,-0.88336333866573168994f,
-0.46868882203582784562f,-0.88479709843093778954f,0.46597649576796618121f,
--0.88622253014888052736f,0.46325978355186031576f,-0.88763962040285382393f,
-0.46053871095824022719f,-0.88904835585466457371f,0.45781330359887717485f,
--0.89044872324475787817f,0.45508358712634389143f,-0.89184070939234261211f,
-0.45234958723377105549f,-0.89322430119551521344f,0.44961132965460687272f,
--0.89459948563138269595f,0.44686884016237415906f,-0.89596624975618510689f,
-0.44412214457042931137f,-0.89732458070541820661f,0.44137126873171689256f,
--0.89867446569395392775f,0.43861623853852754751f,-0.90001589201616016833f,
-0.43585707992225553031f,-0.90134884704602191707f,0.43309381885315206828f,
--0.90267331823725871498f,0.43032648134008288920f,-0.90398929312344333820f,
-0.42755509343028202940f,-0.90529675931811870448f,0.42477968120910886141f,
--0.90659570451491533483f,0.42200027079979984812f,-0.90788611648766603945f,
-0.41921688836322423821f,-0.90916798309052238025f,0.41642956009763715253f,
--0.91044129225806713634f,0.41363831223843466889f,-0.91170603200542976730f,
-0.41084317105790413294f,-0.91296219042839821256f,0.40804416286497857680f,
--0.91420975570353069095f,0.40524131400498991651f,-0.91544871608826772214f,
-0.40243465085941859671f,-0.91667905992104259383f,0.39962419984564706565f,
--0.91790077562139049672f,0.39680998741671025254f,-0.91911385169005777040f,
-0.39399204006104815434f,-0.92031827670911048322f,0.39117038430225403722f,
--0.92151403934204179080f,0.38834504669882657923f,-0.92270112833387862850f,
-0.38551605384391884890f,-0.92387953251128673848f,0.38268343236508989280f,
--0.92504924078267747323f,0.37984720892405138271f,-0.92621024213831137928f,
-0.37700741021641814843f,-0.92736252565040111495f,0.37416406297145804460f,
--0.92850608047321547822f,0.37131719395183770960f,-0.92964089584318121418f,
-0.36846682995337259880f,-0.93076696107898371224f,0.36561299780477379828f,
--0.93188426558166803648f,0.36275572436739728088f,-0.93299279883473884567f,
-0.35989503653498833291f,-0.93409255040425875904f,0.35703096123343031065f,
--0.93518350993894761025f,0.35416352542049039931f,-0.93626566717027825959f,
-0.35129275608556720378f,-0.93733901191257484875f,0.34841868024943478677f,
--0.93840353406310816897f,0.34554132496398898278f,-0.93945922360218991898f,
-0.34266071731199443384f,-0.94050607059326829518f,0.33977688440682701776f,
--0.94154406518302069529f,0.33688985339222032867f,-0.94257319760144686605f,
-0.33399965144200938205f,-0.94359345816196038559f,0.33110630575987648472f,
--0.94460483726148014583f,0.32820984357909271933f,-0.94560732538052116869f,
-0.32531029216226325929f,-0.94660091308328353499f,0.32240767880106985244f,
--0.94758559101774109124f,0.31950203081601580291f,-0.94856134991573026749f,
-0.31659337555616606785f,-0.94952818059303667475f,0.31368174039889140658f,
--0.95048607394948170235f,0.31076715274961153046f,-0.95143502096900833820f,
-0.30784964004153503314f,-0.95237501271976576778f,0.30492922973540265152f,
--0.95330604035419386211f,0.30200594931922802866f,-0.95422809510910555630f,
-0.29907982630804053059f,-0.95514116830577067141f,0.29615088824362401088f,
--0.95604525134999629454f,0.29321916269425896129f,-0.95694033573220882438f,
-0.29028467725446238656f,-0.95782641302753290802f,0.28734745954472962204f,
--0.95870347489587148804f,0.28440753721127209896f,-0.95957151308198451733f,
-0.28146493792575788540f,-0.96043051941556578655f,0.27851968938505317075f,
--0.96128048581132063966f,0.27557181931095831029f,-0.96212140426904146917f,
-0.27262135544994925418f,-0.96295326687368387741f,0.26966832557291509076f,
--0.96377606579543984022f,0.26671275747489847641f,-0.96458979328981264700f,
-0.26375467897483156898f,-0.96539444169768928727f,0.26079411791527584707f,
--0.96619000344541250413f,0.25783110216215898713f,-0.96697647104485207059f,
-0.25486565960451468271f,-0.96775383709347539973f,0.25189781815421719013f,
--0.96852209427441737777f,0.24892760574572009302f,-0.96928123535654842069f,
-0.24595505033579465048f,-0.97003125319454397424f,0.24298017990326406523f,
--0.97077214072895023911f,0.24000302244874177626f,-0.97150389098625178352f,
-0.23702360599436717026f,-0.97222649707893626925f,0.23404195858354351345f,
--0.97293995220556006576f,0.23105810828067133156f,-0.97364424965081186603f,
-0.22807208317088606409f,-0.97433938278557585821f,0.22508391135979283204f,
--0.97502534506699412020f,0.22209362097320364815f,-0.97570213003852845901f,
-0.21910124015687004739f,-0.97636973133002114000f,0.21610679707621943679f,
--0.97702814265775439484f,0.21311031991609141745f,-0.97767735782450992943f,
-0.21011183688046980444f,-0.97831737071962754371f,0.20711137619221883788f,
--0.97894817531906219710f,0.20410896609281684033f,-0.97956976568544051887f,
-0.20110463484209201157f,-0.98018213596811731847f,0.19809841071795381007f,
--0.98078528040323043058f,0.19509032201612860891f,-0.98137919331375456089f,
-0.19208039704989246510f,-0.98196386910955524296f,0.18906866414980635915f,
--0.98253930228744124076f,0.18605515166344691047f,-0.98310548743121628501f,
-0.18303988795514089527f,-0.98366241921173025453f,0.18002290140569957022f,
--0.98421009238692902521f,0.17700422041214894375f,-0.98474850180190420801f,
-0.17398387338746412745f,-0.98527764238894122162f,0.17096188876030121717f,
--0.98579750916756736512f,0.16793829497473128365f,-0.98630809724459855836f,
-0.16491312048997014417f,-0.98680940181418552726f,0.16188639378011174252f,
--0.98730141815785843473f,0.15885814333386147346f,-0.98778414164457217783f,
-0.15582839765426537149f,-0.98825756773074946437f,0.15279718525844368515f,
--0.98872169196032377858f,0.14976453467732145364f,-0.98917650996478101444f,
-0.14673047445536180344f,-0.98962201746320077600f,0.14369503315029463764f,
--0.99005821026229701154f,0.14065823933284954395f,-0.99048508425645709341f,
-0.13762012158648603832f,-0.99090263542778000971f,0.13458070850712627875f,
--0.99131085984611544415f,0.13154002870288333815f,-0.99170975366909952520f,
-0.12849811079379308554f,-0.99209931314219179654f,0.12545498341154626143f,
--0.99247953459870996706f,0.12241067519921634832f,-0.99285041445986510489f,
-0.11936521481099163222f,-0.99321194923479450001f,0.11631863091190471071f,
--0.99356413552059530403f,0.11327095217756441570f,-0.99390697000235606051f,
-0.11022220729388323979f,-0.99424044945318790223f,0.10717242495680916192f,
--0.99456457073425541537f,0.10412163387205457254f,-0.99487933079480561638f,
-0.10106986275482793269f,-0.99518472667219681771f,0.09801714032956082567f,
--0.99548075549192693856f,0.09496349532963890838f,-0.99576741446765981713f,
-0.09190895649713275162f,-0.99604470090125196702f,0.08885355258252475297f,
--0.99631261218277800129f,0.08579731234444015753f,-0.99657114579055483539f,
-0.08274026454937563613f,-0.99682029929116566791f,0.07968243797143019502f,
--0.99706007033948296225f,0.07662386139203168633f,-0.99729045667869020697f,
-0.07356456359966773162f,-0.99751145614030345410f,0.07050457338961385600f,
--0.99772306664419163624f,0.06744391956366417584f,-0.99792528619859599548f,
-0.06438263092985770097f,-0.99811811290014917919f,0.06132073630220848809f,
--0.99830154493389289261f,0.05825826450043579408f,-0.99847558057329477421f,
-0.05519524434969009380f,-0.99864021818026516009f,0.05213170468028359428f,
--0.99879545620517240501f,0.04906767432741796636f,-0.99894129318685687124f,
-0.04600318213091470626f,-0.99907772775264536147f,0.04293825693494102147f,
--0.99920475861836388631f,0.03987292758774012985f,-0.99932238458834954375f,
-0.03680722294135883171f,-0.99943060455546173237f,0.03374117185137770480f,
--0.99952941750109314256f,0.03067480317663686534f,-0.99961882249517863830f,
-0.02760814577896565994f,-0.99969881869620424997f,0.02454122852291232629f,
--0.99976940535121527898f,0.02147408027546966747f,-0.99983058179582340319f,
-0.01840672990580510121f,-0.99988234745421256111f,0.01533920628498806026f,
--0.99992470183914450299f,0.01227153828572000692f,-0.99995764455196389786f,
-0.00920375478206002066f,-0.99998117528260110909f,0.00613588464915479880f,
--0.99999529380957619118f,0.00306795676296597701f,1.00000000000000000000f,
-0.00000000000000000000f,0.99992470183914450299f,0.01227153828571992539f,
-0.99969881869620424997f,0.02454122852291228812f,0.99932238458834954375f,
-0.03680722294135883171f,0.99879545620517240501f,0.04906767432741801493f,
-0.99811811290014917919f,0.06132073630220857829f,0.99729045667869020697f,
-0.07356456359966742631f,0.99631261218277800129f,0.08579731234443989385f,
-0.99518472667219692873f,0.09801714032956060363f,0.99390697000235606051f,
-0.11022220729388305938f,0.99247953459870996706f,0.12241067519921619566f,
-0.99090263542778000971f,0.13458070850712616773f,0.98917650996478101444f,
-0.14673047445536174793f,0.98730141815785843473f,0.15885814333386144570f,
-0.98527764238894122162f,0.17096188876030121717f,0.98310548743121628501f,
-0.18303988795514095078f,0.98078528040323043058f,0.19509032201612824808f,
-0.97831737071962765473f,0.20711137619221856032f,0.97570213003852857003f,
-0.21910124015686979759f,0.97293995220556017678f,0.23105810828067110951f,
-0.97003125319454397424f,0.24298017990326387094f,0.96697647104485207059f,
-0.25486565960451457169f,0.96377606579543984022f,0.26671275747489836538f,
-0.96043051941556578655f,0.27851968938505305973f,0.95694033573220882438f,
-0.29028467725446233105f,0.95330604035419386211f,0.30200594931922808417f,
-0.94952818059303667475f,0.31368174039889151761f,0.94560732538052127971f,
-0.32531029216226292622f,0.94154406518302080631f,0.33688985339222005111f,
-0.93733901191257495977f,0.34841868024943456472f,0.93299279883473895669f,
-0.35989503653498811087f,0.92850608047321558924f,0.37131719395183754306f,
-0.92387953251128673848f,0.38268343236508978178f,0.91911385169005777040f,
-0.39399204006104809883f,0.91420975570353069095f,0.40524131400498986100f,
-0.90916798309052238025f,0.41642956009763715253f,0.90398929312344333820f,
-0.42755509343028208491f,0.89867446569395381673f,0.43861623853852765853f,
-0.89322430119551532446f,0.44961132965460653965f,0.88763962040285393496f,
-0.46053871095824000514f,0.88192126434835504956f,0.47139673682599764204f,
-0.87607009419540660122f,0.48218377207912271887f,0.87008699110871146054f,
-0.49289819222978403790f,0.86397285612158669643f,0.50353838372571757542f,
-0.85772861000027211809f,0.51410274419322166128f,0.85135519310526519554f,
-0.52458968267846894928f,0.84485356524970711689f,0.53499761988709715332f,
-0.83822470555483807875f,0.54532498842204646383f,0.83146961230254523567f,
-0.55557023301960217765f,0.82458930278502529099f,0.56573181078361312046f,
-0.81758481315158371139f,0.57580819141784533866f,0.81045719825259476821f,
-0.58579785745643886408f,0.80320753148064494287f,0.59569930449243335691f,
-0.79583690460888356633f,0.60551104140432554512f,0.78834642762660622761f,
-0.61523159058062681925f,0.78073722857209448822f,0.62485948814238634341f,
-0.77301045336273699338f,0.63439328416364548779f,0.76516726562245895860f,
-0.64383154288979138613f,0.75720884650648456748f,0.65317284295377675551f,
-0.74913639452345937020f,0.66241577759017178373f,0.74095112535495921691f,
-0.67155895484701833009f,0.73265427167241281570f,0.68060099779545302212f,
-0.72424708295146700276f,0.68954054473706682948f,0.71573082528381870571f,
-0.69837624940897280457f,0.70710678118654757274f,0.70710678118654757274f,
-0.69837624940897291559f,0.71573082528381859468f,0.68954054473706694051f,
-0.72424708295146689174f,0.68060099779545302212f,0.73265427167241281570f,
-0.67155895484701833009f,0.74095112535495910588f,0.66241577759017178373f,
-0.74913639452345925918f,0.65317284295377686654f,0.75720884650648456748f,
-0.64383154288979149715f,0.76516726562245895860f,0.63439328416364548779f,
-0.77301045336273688235f,0.62485948814238645443f,0.78073722857209448822f,
-0.61523159058062681925f,0.78834642762660622761f,0.60551104140432554512f,
-0.79583690460888345530f,0.59569930449243346793f,0.80320753148064483184f,
-0.58579785745643886408f,0.81045719825259476821f,0.57580819141784533866f,
-0.81758481315158371139f,0.56573181078361323149f,0.82458930278502529099f,
-0.55557023301960228867f,0.83146961230254523567f,0.54532498842204646383f,
-0.83822470555483796772f,0.53499761988709726435f,0.84485356524970700587f,
-0.52458968267846883826f,0.85135519310526519554f,0.51410274419322166128f,
-0.85772861000027211809f,0.50353838372571757542f,0.86397285612158669643f,
-0.49289819222978409341f,0.87008699110871134952f,0.48218377207912282989f,
-0.87607009419540660122f,0.47139673682599780857f,0.88192126434835493853f,
-0.46053871095824000514f,0.88763962040285393496f,0.44961132965460659516f,
-0.89322430119551532446f,0.43861623853852771404f,0.89867446569395381673f,
-0.42755509343028219593f,0.90398929312344333820f,0.41642956009763731906f,
-0.90916798309052226923f,0.40524131400498986100f,0.91420975570353069095f,
-0.39399204006104809883f,0.91911385169005777040f,0.38268343236508983729f,
-0.92387953251128673848f,0.37131719395183759858f,0.92850608047321558924f,
-0.35989503653498827740f,0.93299279883473884567f,0.34841868024943450921f,
-0.93733901191257495977f,0.33688985339222005111f,0.94154406518302080631f,
-0.32531029216226298173f,0.94560732538052127971f,0.31368174039889157312f,
-0.94952818059303667475f,0.30200594931922819519f,0.95330604035419375109f,
-0.29028467725446233105f,0.95694033573220893540f,0.27851968938505305973f,
-0.96043051941556578655f,0.26671275747489842090f,0.96377606579543984022f,
-0.25486565960451462720f,0.96697647104485207059f,0.24298017990326398197f,
-0.97003125319454397424f,0.23105810828067127605f,0.97293995220556006576f,
-0.21910124015686976984f,0.97570213003852857003f,0.20711137619221856032f,
-0.97831737071962765473f,0.19509032201612833135f,0.98078528040323043058f,
-0.18303988795514106180f,0.98310548743121628501f,0.17096188876030135595f,
-0.98527764238894122162f,0.15885814333386139019f,0.98730141815785843473f,
-0.14673047445536174793f,0.98917650996478101444f,0.13458070850712622324f,
-0.99090263542778000971f,0.12241067519921627893f,0.99247953459870996706f,
-0.11022220729388318428f,0.99390697000235606051f,0.09801714032956077016f,
-0.99518472667219681771f,0.08579731234443987997f,0.99631261218277800129f,
-0.07356456359966745406f,0.99729045667869020697f,0.06132073630220864768f,
-0.99811811290014917919f,0.04906767432741812596f,0.99879545620517240501f,
-0.03680722294135899131f,0.99932238458834954375f,0.02454122852291226384f,
-0.99969881869620424997f,0.01227153828571994447f,0.99992470183914450299f,
-0.00000000000000006123f,1.00000000000000000000f,-0.01227153828571982304f,
-0.99992470183914450299f,-0.02454122852291214241f,0.99969881869620424997f,
--0.03680722294135886641f,0.99932238458834954375f,-0.04906767432741800800f,
-0.99879545620517240501f,-0.06132073630220852972f,0.99811811290014917919f,
--0.07356456359966732916f,0.99729045667869020697f,-0.08579731234443975507f,
-0.99631261218277800129f,-0.09801714032956064526f,0.99518472667219692873f,
--0.11022220729388305938f,0.99390697000235606051f,-0.12241067519921615403f,
-0.99247953459870996706f,-0.13458070850712611222f,0.99090263542778000971f,
--0.14673047445536163691f,0.98917650996478101444f,-0.15885814333386127917f,
-0.98730141815785843473f,-0.17096188876030124493f,0.98527764238894122162f,
--0.18303988795514092303f,0.98310548743121628501f,-0.19509032201612819257f,
-0.98078528040323043058f,-0.20711137619221844930f,0.97831737071962765473f,
--0.21910124015686965881f,0.97570213003852857003f,-0.23105810828067113727f,
-0.97293995220556017678f,-0.24298017990326387094f,0.97003125319454397424f,
--0.25486565960451451618f,0.96697647104485207059f,-0.26671275747489830987f,
-0.96377606579543984022f,-0.27851968938505294870f,0.96043051941556589757f,
--0.29028467725446216452f,0.95694033573220893540f,-0.30200594931922808417f,
-0.95330604035419386211f,-0.31368174039889140658f,0.94952818059303667475f,
--0.32531029216226287071f,0.94560732538052139073f,-0.33688985339221994009f,
-0.94154406518302080631f,-0.34841868024943439819f,0.93733901191257495977f,
--0.35989503653498816638f,0.93299279883473884567f,-0.37131719395183748755f,
-0.92850608047321558924f,-0.38268343236508972627f,0.92387953251128673848f,
--0.39399204006104798781f,0.91911385169005777040f,-0.40524131400498974998f,
-0.91420975570353069095f,-0.41642956009763698599f,0.90916798309052249127f,
--0.42755509343028186287f,0.90398929312344344922f,-0.43861623853852738097f,
-0.89867446569395392775f,-0.44961132965460670619f,0.89322430119551521344f,
--0.46053871095824006066f,0.88763962040285393496f,-0.47139673682599769755f,
-0.88192126434835504956f,-0.48218377207912271887f,0.87607009419540660122f,
--0.49289819222978398239f,0.87008699110871146054f,-0.50353838372571746440f,
-0.86397285612158680745f,-0.51410274419322155026f,0.85772861000027211809f,
--0.52458968267846872724f,0.85135519310526519554f,-0.53499761988709704230f,
-0.84485356524970722791f,-0.54532498842204624179f,0.83822470555483818977f,
--0.55557023301960195560f,0.83146961230254534669f,-0.56573181078361323149f,
-0.82458930278502517996f,-0.57580819141784533866f,0.81758481315158371139f,
--0.58579785745643886408f,0.81045719825259476821f,-0.59569930449243335691f,
-0.80320753148064494287f,-0.60551104140432543410f,0.79583690460888356633f,
--0.61523159058062670823f,0.78834642762660633863f,-0.62485948814238623239f,
-0.78073722857209459924f,-0.63439328416364537677f,0.77301045336273710440f,
--0.64383154288979127511f,0.76516726562245906962f,-0.65317284295377653347f,
-0.75720884650648467851f,-0.66241577759017189475f,0.74913639452345925918f,
--0.67155895484701844111f,0.74095112535495899486f,-0.68060099779545302212f,
-0.73265427167241281570f,-0.68954054473706694051f,0.72424708295146689174f,
--0.69837624940897280457f,0.71573082528381870571f,-0.70710678118654746172f,
-0.70710678118654757274f,-0.71573082528381859468f,0.69837624940897291559f,
--0.72424708295146678072f,0.68954054473706705153f,-0.73265427167241270467f,
-0.68060099779545324417f,-0.74095112535495888384f,0.67155895484701855214f,
--0.74913639452345914815f,0.66241577759017200577f,-0.75720884650648467851f,
-0.65317284295377664449f,-0.76516726562245895860f,0.64383154288979138613f,
--0.77301045336273699338f,0.63439328416364548779f,-0.78073722857209448822f,
-0.62485948814238634341f,-0.78834642762660622761f,0.61523159058062693028f,
--0.79583690460888345530f,0.60551104140432565615f,-0.80320753148064483184f,
-0.59569930449243346793f,-0.81045719825259465718f,0.58579785745643897510f,
--0.81758481315158360037f,0.57580819141784544968f,-0.82458930278502506894f,
-0.56573181078361345353f,-0.83146961230254534669f,0.55557023301960217765f,
--0.83822470555483807875f,0.54532498842204635281f,-0.84485356524970711689f,
-0.53499761988709715332f,-0.85135519310526519554f,0.52458968267846894928f,
--0.85772861000027200706f,0.51410274419322177231f,-0.86397285612158669643f,
-0.50353838372571757542f,-0.87008699110871134952f,0.49289819222978414892f,
--0.87607009419540649020f,0.48218377207912288540f,-0.88192126434835493853f,
-0.47139673682599780857f,-0.88763962040285382393f,0.46053871095824022719f,
--0.89322430119551521344f,0.44961132965460687272f,-0.89867446569395392775f,
-0.43861623853852754751f,-0.90398929312344333820f,0.42755509343028202940f,
--0.90916798309052238025f,0.41642956009763715253f,-0.91420975570353069095f,
-0.40524131400498991651f,-0.91911385169005777040f,0.39399204006104815434f,
--0.92387953251128673848f,0.38268343236508989280f,-0.92850608047321547822f,
-0.37131719395183770960f,-0.93299279883473884567f,0.35989503653498833291f,
--0.93733901191257484875f,0.34841868024943478677f,-0.94154406518302069529f,
-0.33688985339222032867f,-0.94560732538052116869f,0.32531029216226325929f,
--0.94952818059303667475f,0.31368174039889140658f,-0.95330604035419386211f,
-0.30200594931922802866f,-0.95694033573220882438f,0.29028467725446238656f,
--0.96043051941556578655f,0.27851968938505317075f,-0.96377606579543984022f,
-0.26671275747489847641f,-0.96697647104485207059f,0.25486565960451468271f,
--0.97003125319454397424f,0.24298017990326406523f,-0.97293995220556006576f,
-0.23105810828067133156f,-0.97570213003852845901f,0.21910124015687004739f,
--0.97831737071962754371f,0.20711137619221883788f,-0.98078528040323043058f,
-0.19509032201612860891f,-0.98310548743121628501f,0.18303988795514089527f,
--0.98527764238894122162f,0.17096188876030121717f,-0.98730141815785843473f,
-0.15885814333386147346f,-0.98917650996478101444f,0.14673047445536180344f,
--0.99090263542778000971f,0.13458070850712627875f,-0.99247953459870996706f,
-0.12241067519921634832f,-0.99390697000235606051f,0.11022220729388323979f,
--0.99518472667219681771f,0.09801714032956082567f,-0.99631261218277800129f,
-0.08579731234444015753f,-0.99729045667869020697f,0.07356456359966773162f,
--0.99811811290014917919f,0.06132073630220848809f,-0.99879545620517240501f,
-0.04906767432741796636f,-0.99932238458834954375f,0.03680722294135883171f,
--0.99969881869620424997f,0.02454122852291232629f,-0.99992470183914450299f,
-0.01227153828572000692f,1.00000000000000000000f,0.00000000000000000000f,
-0.99879545620517240501f,0.04906767432741801493f,0.99518472667219692873f,
-0.09801714032956060363f,0.98917650996478101444f,0.14673047445536174793f,
-0.98078528040323043058f,0.19509032201612824808f,0.97003125319454397424f,
-0.24298017990326387094f,0.95694033573220882438f,0.29028467725446233105f,
-0.94154406518302080631f,0.33688985339222005111f,0.92387953251128673848f,
-0.38268343236508978178f,0.90398929312344333820f,0.42755509343028208491f,
-0.88192126434835504956f,0.47139673682599764204f,0.85772861000027211809f,
-0.51410274419322166128f,0.83146961230254523567f,0.55557023301960217765f,
-0.80320753148064494287f,0.59569930449243335691f,0.77301045336273699338f,
-0.63439328416364548779f,0.74095112535495921691f,0.67155895484701833009f,
-0.70710678118654757274f,0.70710678118654757274f,0.67155895484701833009f,
-0.74095112535495910588f,0.63439328416364548779f,0.77301045336273688235f,
-0.59569930449243346793f,0.80320753148064483184f,0.55557023301960228867f,
-0.83146961230254523567f,0.51410274419322166128f,0.85772861000027211809f,
-0.47139673682599780857f,0.88192126434835493853f,0.42755509343028219593f,
-0.90398929312344333820f,0.38268343236508983729f,0.92387953251128673848f,
-0.33688985339222005111f,0.94154406518302080631f,0.29028467725446233105f,
-0.95694033573220893540f,0.24298017990326398197f,0.97003125319454397424f,
-0.19509032201612833135f,0.98078528040323043058f,0.14673047445536174793f,
-0.98917650996478101444f,0.09801714032956077016f,0.99518472667219681771f,
-0.04906767432741812596f,0.99879545620517240501f,0.00000000000000006123f,
-1.00000000000000000000f,-0.04906767432741800800f,0.99879545620517240501f,
--0.09801714032956064526f,0.99518472667219692873f,-0.14673047445536163691f,
-0.98917650996478101444f,-0.19509032201612819257f,0.98078528040323043058f,
--0.24298017990326387094f,0.97003125319454397424f,-0.29028467725446216452f,
-0.95694033573220893540f,-0.33688985339221994009f,0.94154406518302080631f,
--0.38268343236508972627f,0.92387953251128673848f,-0.42755509343028186287f,
-0.90398929312344344922f,-0.47139673682599769755f,0.88192126434835504956f,
--0.51410274419322155026f,0.85772861000027211809f,-0.55557023301960195560f,
-0.83146961230254534669f,-0.59569930449243335691f,0.80320753148064494287f,
--0.63439328416364537677f,0.77301045336273710440f,-0.67155895484701844111f,
-0.74095112535495899486f,-0.70710678118654746172f,0.70710678118654757274f,
--0.74095112535495888384f,0.67155895484701855214f,-0.77301045336273699338f,
-0.63439328416364548779f,-0.80320753148064483184f,0.59569930449243346793f,
--0.83146961230254534669f,0.55557023301960217765f,-0.85772861000027200706f,
-0.51410274419322177231f,-0.88192126434835493853f,0.47139673682599780857f,
--0.90398929312344333820f,0.42755509343028202940f,-0.92387953251128673848f,
-0.38268343236508989280f,-0.94154406518302069529f,0.33688985339222032867f,
--0.95694033573220882438f,0.29028467725446238656f,-0.97003125319454397424f,
-0.24298017990326406523f,-0.98078528040323043058f,0.19509032201612860891f,
--0.98917650996478101444f,0.14673047445536180344f,-0.99518472667219681771f,
-0.09801714032956082567f,-0.99879545620517240501f,0.04906767432741796636f,
-1.00000000000000000000f,0.00000000000000000000f,0.98078528040323043058f,
-0.19509032201612824808f,0.92387953251128673848f,0.38268343236508978178f,
-0.83146961230254523567f,0.55557023301960217765f,0.70710678118654757274f,
-0.70710678118654757274f,0.55557023301960228867f,0.83146961230254523567f,
-0.38268343236508983729f,0.92387953251128673848f,0.19509032201612833135f,
-0.98078528040323043058f,0.00000000000000006123f,1.00000000000000000000f,
--0.19509032201612819257f,0.98078528040323043058f,-0.38268343236508972627f,
-0.92387953251128673848f,-0.55557023301960195560f,0.83146961230254534669f,
--0.70710678118654746172f,0.70710678118654757274f,-0.83146961230254534669f,
-0.55557023301960217765f,-0.92387953251128673848f,0.38268343236508989280f,
--0.98078528040323043058f,0.19509032201612860891f,1.00000000000000000000f,
-0.00000000000000000000f,0.70710678118654757274f,0.70710678118654757274f,
-0.00000000000000006123f,1.00000000000000000000f,-0.70710678118654746172f,
-0.70710678118654757274f,};
+1.00000000000000000000f,0.00000000000000000000f,0.99999529123306274414f,
+0.00306795677170157433f,0.99998116493225097656f,0.00613588467240333557f,
+0.99995762109756469727f,0.00920375436544418335f,0.99992471933364868164f,
+0.01227153837680816650f,0.99988234043121337891f,0.01533920597285032272f,
+0.99983060359954833984f,0.01840673014521598816f,0.99976938962936401367f,
+0.02147408016026020050f,0.99969881772994995117f,0.02454122900962829590f,
+0.99961882829666137695f,0.02760814502835273743f,0.99952942132949829102f,
+0.03067480400204658508f,0.99943059682846069336f,0.03374117240309715271f,
+0.99932235479354858398f,0.03680722415447235107f,0.99920475482940673828f,
+0.03987292572855949402f,0.99907773733139038086f,0.04293825849890708923f,
+0.99894130229949951172f,0.04600318148732185364f,0.99879544973373413086f,
+0.04906767606735229492f,0.99864023923873901367f,0.05213170498609542847f,
+0.99847555160522460938f,0.05519524589180946350f,0.99830156564712524414f,
+0.05825826525688171387f,0.99811810255050659180f,0.06132073700428009033f,
+0.99792528152465820312f,0.06438262760639190674f,0.99772304296493530273f,
+0.06744392216205596924f,0.99751144647598266602f,0.07050457596778869629f,
+0.99729043245315551758f,0.07356456667184829712f,0.99706006050109863281f,
+0.07662386447191238403f,0.99682027101516723633f,0.07968243956565856934f,
+0.99657112360000610352f,0.08274026215076446533f,0.99631261825561523438f,
+0.08579730987548828125f,0.99604469537734985352f,0.08885355293750762939f,
+0.99576741456985473633f,0.09190895408391952515f,0.99548077583312988281f,
+0.09496349841356277466f,0.99518471956253051758f,0.09801714122295379639f,
+0.99487930536270141602f,0.10106986016035079956f,0.99456459283828735352f,
+0.10412163287401199341f,0.99424046277999877930f,0.10717242211103439331f,
+0.99390697479248046875f,0.11022220551967620850f,0.99356412887573242188f,
+0.11327095329761505127f,0.99321192502975463867f,0.11631862819194793701f,
+0.99285042285919189453f,0.11936521530151367188f,0.99247956275939941406f,
+0.12241067737340927124f,0.99209928512573242188f,0.12545497715473175049f,
+0.99170976877212524414f,0.12849810719490051270f,0.99131083488464355469f,
+0.13154003024101257324f,0.99090266227722167969f,0.13458070158958435059f,
+0.99048507213592529297f,0.13762012124061584473f,0.99005818367004394531f,
+0.14065824449062347412f,0.98962199687957763672f,0.14369502663612365723f,
+0.98917651176452636719f,0.14673046767711639404f,0.98872166872024536133f,
+0.14976453781127929688f,0.98825758695602416992f,0.15279719233512878418f,
+0.98778414726257324219f,0.15582840144634246826f,0.98730140924453735352f,
+0.15885815024375915527f,0.98680937290191650391f,0.16188639402389526367f,
+0.98630809783935546875f,0.16491311788558959961f,0.98579752445220947266f,
+0.16793829202651977539f,0.98527765274047851562f,0.17096188664436340332f,
+0.98474848270416259766f,0.17398387193679809570f,0.98421007394790649414f,
+0.17700421810150146484f,0.98366242647171020508f,0.18002289533615112305f,
+0.98310548067092895508f,0.18303988873958587646f,0.98253929615020751953f,
+0.18605515360832214355f,0.98196387290954589844f,0.18906866014003753662f,
+0.98137921094894409180f,0.19208039343357086182f,0.98078525066375732422f,
+0.19509032368659973145f,0.98018211126327514648f,0.19809840619564056396f,
+0.97956979274749755859f,0.20110464096069335938f,0.97894817590713500977f,
+0.20410896837711334229f,0.97831737995147705078f,0.20711137354373931885f,
+0.97767734527587890625f,0.21011184155941009521f,0.97702813148498535156f,
+0.21311031281948089600f,0.97636973857879638672f,0.21610680222511291504f,
+0.97570210695266723633f,0.21910123527050018311f,0.97502535581588745117f,
+0.22209362685680389404f,0.97433936595916748047f,0.22508391737937927246f,
+0.97364425659179687500f,0.22807207703590393066f,0.97293996810913085938f,
+0.23105810582637786865f,0.97222650051116943359f,0.23404195904731750488f,
+0.97150391340255737305f,0.23702360689640045166f,0.97077214717864990234f,
+0.24000301957130432129f,0.97003126144409179688f,0.24298018217086791992f,
+0.96928125619888305664f,0.24595504999160766602f,0.96852207183837890625f,
+0.24892760813236236572f,0.96775382757186889648f,0.25189781188964843750f,
+0.96697646379470825195f,0.25486564636230468750f,0.96618998050689697266f,
+0.25783109664916992188f,0.96539443731307983398f,0.26079410314559936523f,
+0.96458977460861206055f,0.26375466585159301758f,0.96377605199813842773f,
+0.26671275496482849121f,0.96295326948165893555f,0.26966831088066101074f,
+0.96212142705917358398f,0.27262136340141296387f,0.96128046512603759766f,
+0.27557182312011718750f,0.96043050289154052734f,0.27851969003677368164f,
+0.95957154035568237305f,0.28146493434906005859f,0.95870345830917358398f,
+0.28440752625465393066f,0.95782643556594848633f,0.28734746575355529785f,
+0.95694035291671752930f,0.29028466343879699707f,0.95604526996612548828f,
+0.29321914911270141602f,0.95514118671417236328f,0.29615089297294616699f,
+0.95422810316085815430f,0.29907983541488647461f,0.95330601930618286133f,
+0.30200594663619995117f,0.95237499475479125977f,0.30492922663688659668f,
+0.95143502950668334961f,0.30784964561462402344f,0.95048606395721435547f,
+0.31076714396476745605f,0.94952815771102905273f,0.31368175148963928223f,
+0.94856137037277221680f,0.31659337878227233887f,0.94758558273315429688f,
+0.31950202584266662598f,0.94660091400146484375f,0.32240769267082214355f,
+0.94560730457305908203f,0.32531028985977172852f,0.94460481405258178711f,
+0.32820984721183776855f,0.94359344244003295898f,0.33110630512237548828f,
+0.94257318973541259766f,0.33399966359138488770f,0.94154405593872070312f,
+0.33688986301422119141f,0.94050604104995727539f,0.33977687358856201172f,
+0.93945920467376708984f,0.34266072511672973633f,0.93840354681015014648f,
+0.34554132819175720215f,0.93733900785446166992f,0.34841868281364440918f,
+0.93626564741134643555f,0.35129275918006896973f,0.93518352508544921875f,
+0.35416352748870849609f,0.93409252166748046875f,0.35703095793724060059f,
+0.93299281597137451172f,0.35989505052566528320f,0.93188428878784179688f,
+0.36275571584701538086f,0.93076694011688232422f,0.36561298370361328125f,
+0.92964088916778564453f,0.36846682429313659668f,0.92850607633590698242f,
+0.37131720781326293945f,0.92736250162124633789f,0.37416407465934753418f,
+0.92621022462844848633f,0.37700742483139038086f,0.92504924535751342773f,
+0.37984719872474670410f,0.92387950420379638672f,0.38268342614173889160f,
+0.92270112037658691406f,0.38551604747772216797f,0.92151403427124023438f,
+0.38834503293037414551f,0.92031830549240112305f,0.39117038249969482422f,
+0.91911387443542480469f,0.39399203658103942871f,0.91790080070495605469f,
+0.39680999517440795898f,0.91667908430099487305f,0.39962419867515563965f,
+0.91544872522354125977f,0.40243464708328247070f,0.91420978307723999023f,
+0.40524131059646606445f,0.91296219825744628906f,0.40804415941238403320f,
+0.91170603036880493164f,0.41084316372871398926f,0.91044127941131591797f,
+0.41363832354545593262f,0.90916800498962402344f,0.41642954945564270020f,
+0.90788608789443969727f,0.41921690106391906738f,0.90659570693969726562f,
+0.42200025916099548340f,0.90529674291610717773f,0.42477968335151672363f,
+0.90398931503295898438f,0.42755508422851562500f,0.90267330408096313477f,
+0.43032649159431457520f,0.90134882926940917969f,0.43309381604194641113f,
+0.90001589059829711914f,0.43585708737373352051f,0.89867448806762695312f,
+0.43861624598503112793f,0.89732456207275390625f,0.44137126207351684570f,
+0.89596623182296752930f,0.44412213563919067383f,0.89459949731826782227f,
+0.44686883687973022461f,0.89322429895401000977f,0.44961133599281311035f,
+0.89184069633483886719f,0.45234957337379455566f,0.89044874906539916992f,
+0.45508357882499694824f,0.88904833793640136719f,0.45781329274177551270f,
+0.88763964176177978516f,0.46053871512413024902f,0.88622254133224487305f,
+0.46325978636741638184f,0.88479709625244140625f,0.46597650647163391113f,
+0.88336336612701416016f,0.46868881583213806152f,0.88192129135131835938f,
+0.47139674425125122070f,0.88047087192535400391f,0.47410020232200622559f,
+0.87901222705841064453f,0.47679921984672546387f,0.87754529714584350586f,
+0.47949376702308654785f,0.87607008218765258789f,0.48218378424644470215f,
+0.87458664178848266602f,0.48486924171447753906f,0.87309497594833374023f,
+0.48755016922950744629f,0.87159508466720581055f,0.49022647738456726074f,
+0.87008696794509887695f,0.49289819598197937012f,0.86857068538665771484f,
+0.49556526541709899902f,0.86704623699188232422f,0.49822765588760375977f,
+0.86551362276077270508f,0.50088536739349365234f,0.86397284269332885742f,
+0.50353837013244628906f,0.86242395639419555664f,0.50618666410446166992f,
+0.86086696386337280273f,0.50883013010025024414f,0.85930180549621582031f,
+0.51146882772445678711f,0.85772860050201416016f,0.51410275697708129883f,
+0.85614734888076782227f,0.51673179864883422852f,0.85455799102783203125f,
+0.51935601234436035156f,0.85296058654785156250f,0.52197527885437011719f,
+0.85135519504547119141f,0.52458965778350830078f,0.84974175691604614258f,
+0.52719914913177490234f,0.84812033176422119141f,0.52980363368988037109f,
+0.84649091958999633789f,0.53240311145782470703f,0.84485357999801635742f,
+0.53499764204025268555f,0.84320825338363647461f,0.53758704662322998047f,
+0.84155499935150146484f,0.54017144441604614258f,0.83989381790161132812f,
+0.54275077581405639648f,0.83822470903396606445f,0.54532498121261596680f,
+0.83654773235321044922f,0.54789406061172485352f,0.83486288785934448242f,
+0.55045795440673828125f,0.83317017555236816406f,0.55301672220230102539f,
+0.83146959543228149414f,0.55557024478912353516f,0.82976120710372924805f,
+0.55811852216720581055f,0.82804507017135620117f,0.56066155433654785156f,
+0.82632106542587280273f,0.56319934129714965820f,0.82458931207656860352f,
+0.56573182344436645508f,0.82284981012344360352f,0.56825894117355346680f,
+0.82110249996185302734f,0.57078075408935546875f,0.81934750080108642578f,
+0.57329714298248291016f,0.81758481264114379883f,0.57580816745758056641f,
+0.81581443548202514648f,0.57831376791000366211f,0.81403630971908569336f,
+0.58081394433975219727f,0.81225061416625976562f,0.58330863714218139648f,
+0.81045717000961303711f,0.58579784631729125977f,0.80865615606307983398f,
+0.58828157186508178711f,0.80684757232666015625f,0.59075969457626342773f,
+0.80503135919570922852f,0.59323227405548095703f,0.80320751667022705078f,
+0.59569931030273437500f,0.80137616395950317383f,0.59816068410873413086f,
+0.79953724145889282227f,0.60061645507812500000f,0.79769086837768554688f,
+0.60306662321090698242f,0.79583692550659179688f,0.60551106929779052734f,
+0.79397547245025634766f,0.60794979333877563477f,0.79210656881332397461f,
+0.61038279533386230469f,0.79023021459579467773f,0.61281007528305053711f,
+0.78834640979766845703f,0.61523157358169555664f,0.78645521402359008789f,
+0.61764729022979736328f,0.78455656766891479492f,0.62005722522735595703f,
+0.78265058994293212891f,0.62246125936508178711f,0.78073722124099731445f,
+0.62485951185226440430f,0.77881652116775512695f,0.62725180387496948242f,
+0.77688848972320556641f,0.62963825464248657227f,0.77495312690734863281f,
+0.63201874494552612305f,0.77301043272018432617f,0.63439327478408813477f,
+0.77106052637100219727f,0.63676184415817260742f,0.76910334825515747070f,
+0.63912445306777954102f,0.76713889837265014648f,0.64148104190826416016f,
+0.76516723632812500000f,0.64383155107498168945f,0.76318842172622680664f,
+0.64617604017257690430f,0.76120239496231079102f,0.64851438999176025391f,
+0.75920921564102172852f,0.65084666013717651367f,0.75720882415771484375f,
+0.65317285060882568359f,0.75520139932632446289f,0.65549284219741821289f,
+0.75318682193756103516f,0.65780669450759887695f,0.75116515159606933594f,
+0.66011434793472290039f,0.74913638830184936523f,0.66241580247879028320f,
+0.74710059165954589844f,0.66471099853515625000f,0.74505776166915893555f,
+0.66699993610382080078f,0.74300795793533325195f,0.66928261518478393555f,
+0.74095112085342407227f,0.67155897617340087891f,0.73888731002807617188f,
+0.67382901906967163086f,0.73681658506393432617f,0.67609268426895141602f,
+0.73473888635635375977f,0.67835003137588500977f,0.73265427350997924805f,
+0.68060100078582763672f,0.73056274652481079102f,0.68284553289413452148f,
+0.72846436500549316406f,0.68508368730545043945f,0.72635912895202636719f,
+0.68731534481048583984f,0.72424709796905517578f,0.68954056501388549805f,
+0.72212821245193481445f,0.69175922870635986328f,0.72000253200531005859f,
+0.69397145509719848633f,0.71787005662918090820f,0.69617712497711181641f,
+0.71573084592819213867f,0.69837623834609985352f,0.71358484029769897461f,
+0.70056879520416259766f,0.71143221855163574219f,0.70275473594665527344f,
+0.70927280187606811523f,0.70493406057357788086f,0.70710676908493041992f,
+0.70710676908493041992f,0.70493406057357788086f,0.70927280187606811523f,
+0.70275473594665527344f,0.71143221855163574219f,0.70056879520416259766f,
+0.71358484029769897461f,0.69837623834609985352f,0.71573084592819213867f,
+0.69617712497711181641f,0.71787005662918090820f,0.69397145509719848633f,
+0.72000253200531005859f,0.69175922870635986328f,0.72212821245193481445f,
+0.68954056501388549805f,0.72424709796905517578f,0.68731534481048583984f,
+0.72635912895202636719f,0.68508368730545043945f,0.72846436500549316406f,
+0.68284553289413452148f,0.73056274652481079102f,0.68060100078582763672f,
+0.73265427350997924805f,0.67835003137588500977f,0.73473888635635375977f,
+0.67609268426895141602f,0.73681658506393432617f,0.67382901906967163086f,
+0.73888731002807617188f,0.67155897617340087891f,0.74095112085342407227f,
+0.66928261518478393555f,0.74300795793533325195f,0.66699993610382080078f,
+0.74505776166915893555f,0.66471099853515625000f,0.74710059165954589844f,
+0.66241580247879028320f,0.74913638830184936523f,0.66011434793472290039f,
+0.75116515159606933594f,0.65780669450759887695f,0.75318682193756103516f,
+0.65549284219741821289f,0.75520139932632446289f,0.65317285060882568359f,
+0.75720882415771484375f,0.65084666013717651367f,0.75920921564102172852f,
+0.64851438999176025391f,0.76120239496231079102f,0.64617604017257690430f,
+0.76318842172622680664f,0.64383155107498168945f,0.76516723632812500000f,
+0.64148104190826416016f,0.76713889837265014648f,0.63912445306777954102f,
+0.76910334825515747070f,0.63676184415817260742f,0.77106052637100219727f,
+0.63439327478408813477f,0.77301043272018432617f,0.63201874494552612305f,
+0.77495312690734863281f,0.62963825464248657227f,0.77688848972320556641f,
+0.62725180387496948242f,0.77881652116775512695f,0.62485951185226440430f,
+0.78073722124099731445f,0.62246125936508178711f,0.78265058994293212891f,
+0.62005722522735595703f,0.78455656766891479492f,0.61764729022979736328f,
+0.78645521402359008789f,0.61523157358169555664f,0.78834640979766845703f,
+0.61281007528305053711f,0.79023021459579467773f,0.61038279533386230469f,
+0.79210656881332397461f,0.60794979333877563477f,0.79397547245025634766f,
+0.60551106929779052734f,0.79583692550659179688f,0.60306662321090698242f,
+0.79769086837768554688f,0.60061645507812500000f,0.79953724145889282227f,
+0.59816068410873413086f,0.80137616395950317383f,0.59569931030273437500f,
+0.80320751667022705078f,0.59323227405548095703f,0.80503135919570922852f,
+0.59075969457626342773f,0.80684757232666015625f,0.58828157186508178711f,
+0.80865615606307983398f,0.58579784631729125977f,0.81045717000961303711f,
+0.58330863714218139648f,0.81225061416625976562f,0.58081394433975219727f,
+0.81403630971908569336f,0.57831376791000366211f,0.81581443548202514648f,
+0.57580816745758056641f,0.81758481264114379883f,0.57329714298248291016f,
+0.81934750080108642578f,0.57078075408935546875f,0.82110249996185302734f,
+0.56825894117355346680f,0.82284981012344360352f,0.56573182344436645508f,
+0.82458931207656860352f,0.56319934129714965820f,0.82632106542587280273f,
+0.56066155433654785156f,0.82804507017135620117f,0.55811852216720581055f,
+0.82976120710372924805f,0.55557024478912353516f,0.83146959543228149414f,
+0.55301672220230102539f,0.83317017555236816406f,0.55045795440673828125f,
+0.83486288785934448242f,0.54789406061172485352f,0.83654773235321044922f,
+0.54532498121261596680f,0.83822470903396606445f,0.54275077581405639648f,
+0.83989381790161132812f,0.54017144441604614258f,0.84155499935150146484f,
+0.53758704662322998047f,0.84320825338363647461f,0.53499764204025268555f,
+0.84485357999801635742f,0.53240311145782470703f,0.84649091958999633789f,
+0.52980363368988037109f,0.84812033176422119141f,0.52719914913177490234f,
+0.84974175691604614258f,0.52458965778350830078f,0.85135519504547119141f,
+0.52197527885437011719f,0.85296058654785156250f,0.51935601234436035156f,
+0.85455799102783203125f,0.51673179864883422852f,0.85614734888076782227f,
+0.51410275697708129883f,0.85772860050201416016f,0.51146882772445678711f,
+0.85930180549621582031f,0.50883013010025024414f,0.86086696386337280273f,
+0.50618666410446166992f,0.86242395639419555664f,0.50353837013244628906f,
+0.86397284269332885742f,0.50088536739349365234f,0.86551362276077270508f,
+0.49822765588760375977f,0.86704623699188232422f,0.49556526541709899902f,
+0.86857068538665771484f,0.49289819598197937012f,0.87008696794509887695f,
+0.49022647738456726074f,0.87159508466720581055f,0.48755016922950744629f,
+0.87309497594833374023f,0.48486924171447753906f,0.87458664178848266602f,
+0.48218378424644470215f,0.87607008218765258789f,0.47949376702308654785f,
+0.87754529714584350586f,0.47679921984672546387f,0.87901222705841064453f,
+0.47410020232200622559f,0.88047087192535400391f,0.47139674425125122070f,
+0.88192129135131835938f,0.46868881583213806152f,0.88336336612701416016f,
+0.46597650647163391113f,0.88479709625244140625f,0.46325978636741638184f,
+0.88622254133224487305f,0.46053871512413024902f,0.88763964176177978516f,
+0.45781329274177551270f,0.88904833793640136719f,0.45508357882499694824f,
+0.89044874906539916992f,0.45234957337379455566f,0.89184069633483886719f,
+0.44961133599281311035f,0.89322429895401000977f,0.44686883687973022461f,
+0.89459949731826782227f,0.44412213563919067383f,0.89596623182296752930f,
+0.44137126207351684570f,0.89732456207275390625f,0.43861624598503112793f,
+0.89867448806762695312f,0.43585708737373352051f,0.90001589059829711914f,
+0.43309381604194641113f,0.90134882926940917969f,0.43032649159431457520f,
+0.90267330408096313477f,0.42755508422851562500f,0.90398931503295898438f,
+0.42477968335151672363f,0.90529674291610717773f,0.42200025916099548340f,
+0.90659570693969726562f,0.41921690106391906738f,0.90788608789443969727f,
+0.41642954945564270020f,0.90916800498962402344f,0.41363832354545593262f,
+0.91044127941131591797f,0.41084316372871398926f,0.91170603036880493164f,
+0.40804415941238403320f,0.91296219825744628906f,0.40524131059646606445f,
+0.91420978307723999023f,0.40243464708328247070f,0.91544872522354125977f,
+0.39962419867515563965f,0.91667908430099487305f,0.39680999517440795898f,
+0.91790080070495605469f,0.39399203658103942871f,0.91911387443542480469f,
+0.39117038249969482422f,0.92031830549240112305f,0.38834503293037414551f,
+0.92151403427124023438f,0.38551604747772216797f,0.92270112037658691406f,
+0.38268342614173889160f,0.92387950420379638672f,0.37984719872474670410f,
+0.92504924535751342773f,0.37700742483139038086f,0.92621022462844848633f,
+0.37416407465934753418f,0.92736250162124633789f,0.37131720781326293945f,
+0.92850607633590698242f,0.36846682429313659668f,0.92964088916778564453f,
+0.36561298370361328125f,0.93076694011688232422f,0.36275571584701538086f,
+0.93188428878784179688f,0.35989505052566528320f,0.93299281597137451172f,
+0.35703095793724060059f,0.93409252166748046875f,0.35416352748870849609f,
+0.93518352508544921875f,0.35129275918006896973f,0.93626564741134643555f,
+0.34841868281364440918f,0.93733900785446166992f,0.34554132819175720215f,
+0.93840354681015014648f,0.34266072511672973633f,0.93945920467376708984f,
+0.33977687358856201172f,0.94050604104995727539f,0.33688986301422119141f,
+0.94154405593872070312f,0.33399966359138488770f,0.94257318973541259766f,
+0.33110630512237548828f,0.94359344244003295898f,0.32820984721183776855f,
+0.94460481405258178711f,0.32531028985977172852f,0.94560730457305908203f,
+0.32240769267082214355f,0.94660091400146484375f,0.31950202584266662598f,
+0.94758558273315429688f,0.31659337878227233887f,0.94856137037277221680f,
+0.31368175148963928223f,0.94952815771102905273f,0.31076714396476745605f,
+0.95048606395721435547f,0.30784964561462402344f,0.95143502950668334961f,
+0.30492922663688659668f,0.95237499475479125977f,0.30200594663619995117f,
+0.95330601930618286133f,0.29907983541488647461f,0.95422810316085815430f,
+0.29615089297294616699f,0.95514118671417236328f,0.29321914911270141602f,
+0.95604526996612548828f,0.29028466343879699707f,0.95694035291671752930f,
+0.28734746575355529785f,0.95782643556594848633f,0.28440752625465393066f,
+0.95870345830917358398f,0.28146493434906005859f,0.95957154035568237305f,
+0.27851969003677368164f,0.96043050289154052734f,0.27557182312011718750f,
+0.96128046512603759766f,0.27262136340141296387f,0.96212142705917358398f,
+0.26966831088066101074f,0.96295326948165893555f,0.26671275496482849121f,
+0.96377605199813842773f,0.26375466585159301758f,0.96458977460861206055f,
+0.26079410314559936523f,0.96539443731307983398f,0.25783109664916992188f,
+0.96618998050689697266f,0.25486564636230468750f,0.96697646379470825195f,
+0.25189781188964843750f,0.96775382757186889648f,0.24892760813236236572f,
+0.96852207183837890625f,0.24595504999160766602f,0.96928125619888305664f,
+0.24298018217086791992f,0.97003126144409179688f,0.24000301957130432129f,
+0.97077214717864990234f,0.23702360689640045166f,0.97150391340255737305f,
+0.23404195904731750488f,0.97222650051116943359f,0.23105810582637786865f,
+0.97293996810913085938f,0.22807207703590393066f,0.97364425659179687500f,
+0.22508391737937927246f,0.97433936595916748047f,0.22209362685680389404f,
+0.97502535581588745117f,0.21910123527050018311f,0.97570210695266723633f,
+0.21610680222511291504f,0.97636973857879638672f,0.21311031281948089600f,
+0.97702813148498535156f,0.21011184155941009521f,0.97767734527587890625f,
+0.20711137354373931885f,0.97831737995147705078f,0.20410896837711334229f,
+0.97894817590713500977f,0.20110464096069335938f,0.97956979274749755859f,
+0.19809840619564056396f,0.98018211126327514648f,0.19509032368659973145f,
+0.98078525066375732422f,0.19208039343357086182f,0.98137921094894409180f,
+0.18906866014003753662f,0.98196387290954589844f,0.18605515360832214355f,
+0.98253929615020751953f,0.18303988873958587646f,0.98310548067092895508f,
+0.18002289533615112305f,0.98366242647171020508f,0.17700421810150146484f,
+0.98421007394790649414f,0.17398387193679809570f,0.98474848270416259766f,
+0.17096188664436340332f,0.98527765274047851562f,0.16793829202651977539f,
+0.98579752445220947266f,0.16491311788558959961f,0.98630809783935546875f,
+0.16188639402389526367f,0.98680937290191650391f,0.15885815024375915527f,
+0.98730140924453735352f,0.15582840144634246826f,0.98778414726257324219f,
+0.15279719233512878418f,0.98825758695602416992f,0.14976453781127929688f,
+0.98872166872024536133f,0.14673046767711639404f,0.98917651176452636719f,
+0.14369502663612365723f,0.98962199687957763672f,0.14065824449062347412f,
+0.99005818367004394531f,0.13762012124061584473f,0.99048507213592529297f,
+0.13458070158958435059f,0.99090266227722167969f,0.13154003024101257324f,
+0.99131083488464355469f,0.12849810719490051270f,0.99170976877212524414f,
+0.12545497715473175049f,0.99209928512573242188f,0.12241067737340927124f,
+0.99247956275939941406f,0.11936521530151367188f,0.99285042285919189453f,
+0.11631862819194793701f,0.99321192502975463867f,0.11327095329761505127f,
+0.99356412887573242188f,0.11022220551967620850f,0.99390697479248046875f,
+0.10717242211103439331f,0.99424046277999877930f,0.10412163287401199341f,
+0.99456459283828735352f,0.10106986016035079956f,0.99487930536270141602f,
+0.09801714122295379639f,0.99518471956253051758f,0.09496349841356277466f,
+0.99548077583312988281f,0.09190895408391952515f,0.99576741456985473633f,
+0.08885355293750762939f,0.99604469537734985352f,0.08579730987548828125f,
+0.99631261825561523438f,0.08274026215076446533f,0.99657112360000610352f,
+0.07968243956565856934f,0.99682027101516723633f,0.07662386447191238403f,
+0.99706006050109863281f,0.07356456667184829712f,0.99729043245315551758f,
+0.07050457596778869629f,0.99751144647598266602f,0.06744392216205596924f,
+0.99772304296493530273f,0.06438262760639190674f,0.99792528152465820312f,
+0.06132073700428009033f,0.99811810255050659180f,0.05825826525688171387f,
+0.99830156564712524414f,0.05519524589180946350f,0.99847555160522460938f,
+0.05213170498609542847f,0.99864023923873901367f,0.04906767606735229492f,
+0.99879544973373413086f,0.04600318148732185364f,0.99894130229949951172f,
+0.04293825849890708923f,0.99907773733139038086f,0.03987292572855949402f,
+0.99920475482940673828f,0.03680722415447235107f,0.99932235479354858398f,
+0.03374117240309715271f,0.99943059682846069336f,0.03067480400204658508f,
+0.99952942132949829102f,0.02760814502835273743f,0.99961882829666137695f,
+0.02454122900962829590f,0.99969881772994995117f,0.02147408016026020050f,
+0.99976938962936401367f,0.01840673014521598816f,0.99983060359954833984f,
+0.01533920597285032272f,0.99988234043121337891f,0.01227153837680816650f,
+0.99992471933364868164f,0.00920375436544418335f,0.99995762109756469727f,
+0.00613588467240333557f,0.99998116493225097656f,0.00306795677170157433f,
+0.99999529123306274414f,0.00000000000000006123f,1.00000000000000000000f,
+-0.00306795677170157433f,0.99999529123306274414f,-0.00613588467240333557f,
+0.99998116493225097656f,-0.00920375436544418335f,0.99995762109756469727f,
+-0.01227153837680816650f,0.99992471933364868164f,-0.01533920597285032272f,
+0.99988234043121337891f,-0.01840673014521598816f,0.99983060359954833984f,
+-0.02147408016026020050f,0.99976938962936401367f,-0.02454122900962829590f,
+0.99969881772994995117f,-0.02760814502835273743f,0.99961882829666137695f,
+-0.03067480400204658508f,0.99952942132949829102f,-0.03374117240309715271f,
+0.99943059682846069336f,-0.03680722415447235107f,0.99932235479354858398f,
+-0.03987292572855949402f,0.99920475482940673828f,-0.04293825849890708923f,
+0.99907773733139038086f,-0.04600318148732185364f,0.99894130229949951172f,
+-0.04906767606735229492f,0.99879544973373413086f,-0.05213170498609542847f,
+0.99864023923873901367f,-0.05519524589180946350f,0.99847555160522460938f,
+-0.05825826525688171387f,0.99830156564712524414f,-0.06132073700428009033f,
+0.99811810255050659180f,-0.06438262760639190674f,0.99792528152465820312f,
+-0.06744392216205596924f,0.99772304296493530273f,-0.07050457596778869629f,
+0.99751144647598266602f,-0.07356456667184829712f,0.99729043245315551758f,
+-0.07662386447191238403f,0.99706006050109863281f,-0.07968243956565856934f,
+0.99682027101516723633f,-0.08274026215076446533f,0.99657112360000610352f,
+-0.08579730987548828125f,0.99631261825561523438f,-0.08885355293750762939f,
+0.99604469537734985352f,-0.09190895408391952515f,0.99576741456985473633f,
+-0.09496349841356277466f,0.99548077583312988281f,-0.09801714122295379639f,
+0.99518471956253051758f,-0.10106986016035079956f,0.99487930536270141602f,
+-0.10412163287401199341f,0.99456459283828735352f,-0.10717242211103439331f,
+0.99424046277999877930f,-0.11022220551967620850f,0.99390697479248046875f,
+-0.11327095329761505127f,0.99356412887573242188f,-0.11631862819194793701f,
+0.99321192502975463867f,-0.11936521530151367188f,0.99285042285919189453f,
+-0.12241067737340927124f,0.99247956275939941406f,-0.12545497715473175049f,
+0.99209928512573242188f,-0.12849810719490051270f,0.99170976877212524414f,
+-0.13154003024101257324f,0.99131083488464355469f,-0.13458070158958435059f,
+0.99090266227722167969f,-0.13762012124061584473f,0.99048507213592529297f,
+-0.14065824449062347412f,0.99005818367004394531f,-0.14369502663612365723f,
+0.98962199687957763672f,-0.14673046767711639404f,0.98917651176452636719f,
+-0.14976453781127929688f,0.98872166872024536133f,-0.15279719233512878418f,
+0.98825758695602416992f,-0.15582840144634246826f,0.98778414726257324219f,
+-0.15885815024375915527f,0.98730140924453735352f,-0.16188639402389526367f,
+0.98680937290191650391f,-0.16491311788558959961f,0.98630809783935546875f,
+-0.16793829202651977539f,0.98579752445220947266f,-0.17096188664436340332f,
+0.98527765274047851562f,-0.17398387193679809570f,0.98474848270416259766f,
+-0.17700421810150146484f,0.98421007394790649414f,-0.18002289533615112305f,
+0.98366242647171020508f,-0.18303988873958587646f,0.98310548067092895508f,
+-0.18605515360832214355f,0.98253929615020751953f,-0.18906866014003753662f,
+0.98196387290954589844f,-0.19208039343357086182f,0.98137921094894409180f,
+-0.19509032368659973145f,0.98078525066375732422f,-0.19809840619564056396f,
+0.98018211126327514648f,-0.20110464096069335938f,0.97956979274749755859f,
+-0.20410896837711334229f,0.97894817590713500977f,-0.20711137354373931885f,
+0.97831737995147705078f,-0.21011184155941009521f,0.97767734527587890625f,
+-0.21311031281948089600f,0.97702813148498535156f,-0.21610680222511291504f,
+0.97636973857879638672f,-0.21910123527050018311f,0.97570210695266723633f,
+-0.22209362685680389404f,0.97502535581588745117f,-0.22508391737937927246f,
+0.97433936595916748047f,-0.22807207703590393066f,0.97364425659179687500f,
+-0.23105810582637786865f,0.97293996810913085938f,-0.23404195904731750488f,
+0.97222650051116943359f,-0.23702360689640045166f,0.97150391340255737305f,
+-0.24000301957130432129f,0.97077214717864990234f,-0.24298018217086791992f,
+0.97003126144409179688f,-0.24595504999160766602f,0.96928125619888305664f,
+-0.24892760813236236572f,0.96852207183837890625f,-0.25189781188964843750f,
+0.96775382757186889648f,-0.25486564636230468750f,0.96697646379470825195f,
+-0.25783109664916992188f,0.96618998050689697266f,-0.26079410314559936523f,
+0.96539443731307983398f,-0.26375466585159301758f,0.96458977460861206055f,
+-0.26671275496482849121f,0.96377605199813842773f,-0.26966831088066101074f,
+0.96295326948165893555f,-0.27262136340141296387f,0.96212142705917358398f,
+-0.27557182312011718750f,0.96128046512603759766f,-0.27851969003677368164f,
+0.96043050289154052734f,-0.28146493434906005859f,0.95957154035568237305f,
+-0.28440752625465393066f,0.95870345830917358398f,-0.28734746575355529785f,
+0.95782643556594848633f,-0.29028466343879699707f,0.95694035291671752930f,
+-0.29321914911270141602f,0.95604526996612548828f,-0.29615089297294616699f,
+0.95514118671417236328f,-0.29907983541488647461f,0.95422810316085815430f,
+-0.30200594663619995117f,0.95330601930618286133f,-0.30492922663688659668f,
+0.95237499475479125977f,-0.30784964561462402344f,0.95143502950668334961f,
+-0.31076714396476745605f,0.95048606395721435547f,-0.31368175148963928223f,
+0.94952815771102905273f,-0.31659337878227233887f,0.94856137037277221680f,
+-0.31950202584266662598f,0.94758558273315429688f,-0.32240769267082214355f,
+0.94660091400146484375f,-0.32531028985977172852f,0.94560730457305908203f,
+-0.32820984721183776855f,0.94460481405258178711f,-0.33110630512237548828f,
+0.94359344244003295898f,-0.33399966359138488770f,0.94257318973541259766f,
+-0.33688986301422119141f,0.94154405593872070312f,-0.33977687358856201172f,
+0.94050604104995727539f,-0.34266072511672973633f,0.93945920467376708984f,
+-0.34554132819175720215f,0.93840354681015014648f,-0.34841868281364440918f,
+0.93733900785446166992f,-0.35129275918006896973f,0.93626564741134643555f,
+-0.35416352748870849609f,0.93518352508544921875f,-0.35703095793724060059f,
+0.93409252166748046875f,-0.35989505052566528320f,0.93299281597137451172f,
+-0.36275571584701538086f,0.93188428878784179688f,-0.36561298370361328125f,
+0.93076694011688232422f,-0.36846682429313659668f,0.92964088916778564453f,
+-0.37131720781326293945f,0.92850607633590698242f,-0.37416407465934753418f,
+0.92736250162124633789f,-0.37700742483139038086f,0.92621022462844848633f,
+-0.37984719872474670410f,0.92504924535751342773f,-0.38268342614173889160f,
+0.92387950420379638672f,-0.38551604747772216797f,0.92270112037658691406f,
+-0.38834503293037414551f,0.92151403427124023438f,-0.39117038249969482422f,
+0.92031830549240112305f,-0.39399203658103942871f,0.91911387443542480469f,
+-0.39680999517440795898f,0.91790080070495605469f,-0.39962419867515563965f,
+0.91667908430099487305f,-0.40243464708328247070f,0.91544872522354125977f,
+-0.40524131059646606445f,0.91420978307723999023f,-0.40804415941238403320f,
+0.91296219825744628906f,-0.41084316372871398926f,0.91170603036880493164f,
+-0.41363832354545593262f,0.91044127941131591797f,-0.41642954945564270020f,
+0.90916800498962402344f,-0.41921690106391906738f,0.90788608789443969727f,
+-0.42200025916099548340f,0.90659570693969726562f,-0.42477968335151672363f,
+0.90529674291610717773f,-0.42755508422851562500f,0.90398931503295898438f,
+-0.43032649159431457520f,0.90267330408096313477f,-0.43309381604194641113f,
+0.90134882926940917969f,-0.43585708737373352051f,0.90001589059829711914f,
+-0.43861624598503112793f,0.89867448806762695312f,-0.44137126207351684570f,
+0.89732456207275390625f,-0.44412213563919067383f,0.89596623182296752930f,
+-0.44686883687973022461f,0.89459949731826782227f,-0.44961133599281311035f,
+0.89322429895401000977f,-0.45234957337379455566f,0.89184069633483886719f,
+-0.45508357882499694824f,0.89044874906539916992f,-0.45781329274177551270f,
+0.88904833793640136719f,-0.46053871512413024902f,0.88763964176177978516f,
+-0.46325978636741638184f,0.88622254133224487305f,-0.46597650647163391113f,
+0.88479709625244140625f,-0.46868881583213806152f,0.88336336612701416016f,
+-0.47139674425125122070f,0.88192129135131835938f,-0.47410020232200622559f,
+0.88047087192535400391f,-0.47679921984672546387f,0.87901222705841064453f,
+-0.47949376702308654785f,0.87754529714584350586f,-0.48218378424644470215f,
+0.87607008218765258789f,-0.48486924171447753906f,0.87458664178848266602f,
+-0.48755016922950744629f,0.87309497594833374023f,-0.49022647738456726074f,
+0.87159508466720581055f,-0.49289819598197937012f,0.87008696794509887695f,
+-0.49556526541709899902f,0.86857068538665771484f,-0.49822765588760375977f,
+0.86704623699188232422f,-0.50088536739349365234f,0.86551362276077270508f,
+-0.50353837013244628906f,0.86397284269332885742f,-0.50618666410446166992f,
+0.86242395639419555664f,-0.50883013010025024414f,0.86086696386337280273f,
+-0.51146882772445678711f,0.85930180549621582031f,-0.51410275697708129883f,
+0.85772860050201416016f,-0.51673179864883422852f,0.85614734888076782227f,
+-0.51935601234436035156f,0.85455799102783203125f,-0.52197527885437011719f,
+0.85296058654785156250f,-0.52458965778350830078f,0.85135519504547119141f,
+-0.52719914913177490234f,0.84974175691604614258f,-0.52980363368988037109f,
+0.84812033176422119141f,-0.53240311145782470703f,0.84649091958999633789f,
+-0.53499764204025268555f,0.84485357999801635742f,-0.53758704662322998047f,
+0.84320825338363647461f,-0.54017144441604614258f,0.84155499935150146484f,
+-0.54275077581405639648f,0.83989381790161132812f,-0.54532498121261596680f,
+0.83822470903396606445f,-0.54789406061172485352f,0.83654773235321044922f,
+-0.55045795440673828125f,0.83486288785934448242f,-0.55301672220230102539f,
+0.83317017555236816406f,-0.55557024478912353516f,0.83146959543228149414f,
+-0.55811852216720581055f,0.82976120710372924805f,-0.56066155433654785156f,
+0.82804507017135620117f,-0.56319934129714965820f,0.82632106542587280273f,
+-0.56573182344436645508f,0.82458931207656860352f,-0.56825894117355346680f,
+0.82284981012344360352f,-0.57078075408935546875f,0.82110249996185302734f,
+-0.57329714298248291016f,0.81934750080108642578f,-0.57580816745758056641f,
+0.81758481264114379883f,-0.57831376791000366211f,0.81581443548202514648f,
+-0.58081394433975219727f,0.81403630971908569336f,-0.58330863714218139648f,
+0.81225061416625976562f,-0.58579784631729125977f,0.81045717000961303711f,
+-0.58828157186508178711f,0.80865615606307983398f,-0.59075969457626342773f,
+0.80684757232666015625f,-0.59323227405548095703f,0.80503135919570922852f,
+-0.59569931030273437500f,0.80320751667022705078f,-0.59816068410873413086f,
+0.80137616395950317383f,-0.60061645507812500000f,0.79953724145889282227f,
+-0.60306662321090698242f,0.79769086837768554688f,-0.60551106929779052734f,
+0.79583692550659179688f,-0.60794979333877563477f,0.79397547245025634766f,
+-0.61038279533386230469f,0.79210656881332397461f,-0.61281007528305053711f,
+0.79023021459579467773f,-0.61523157358169555664f,0.78834640979766845703f,
+-0.61764729022979736328f,0.78645521402359008789f,-0.62005722522735595703f,
+0.78455656766891479492f,-0.62246125936508178711f,0.78265058994293212891f,
+-0.62485951185226440430f,0.78073722124099731445f,-0.62725180387496948242f,
+0.77881652116775512695f,-0.62963825464248657227f,0.77688848972320556641f,
+-0.63201874494552612305f,0.77495312690734863281f,-0.63439327478408813477f,
+0.77301043272018432617f,-0.63676184415817260742f,0.77106052637100219727f,
+-0.63912445306777954102f,0.76910334825515747070f,-0.64148104190826416016f,
+0.76713889837265014648f,-0.64383155107498168945f,0.76516723632812500000f,
+-0.64617604017257690430f,0.76318842172622680664f,-0.64851438999176025391f,
+0.76120239496231079102f,-0.65084666013717651367f,0.75920921564102172852f,
+-0.65317285060882568359f,0.75720882415771484375f,-0.65549284219741821289f,
+0.75520139932632446289f,-0.65780669450759887695f,0.75318682193756103516f,
+-0.66011434793472290039f,0.75116515159606933594f,-0.66241580247879028320f,
+0.74913638830184936523f,-0.66471099853515625000f,0.74710059165954589844f,
+-0.66699993610382080078f,0.74505776166915893555f,-0.66928261518478393555f,
+0.74300795793533325195f,-0.67155897617340087891f,0.74095112085342407227f,
+-0.67382901906967163086f,0.73888731002807617188f,-0.67609268426895141602f,
+0.73681658506393432617f,-0.67835003137588500977f,0.73473888635635375977f,
+-0.68060100078582763672f,0.73265427350997924805f,-0.68284553289413452148f,
+0.73056274652481079102f,-0.68508368730545043945f,0.72846436500549316406f,
+-0.68731534481048583984f,0.72635912895202636719f,-0.68954056501388549805f,
+0.72424709796905517578f,-0.69175922870635986328f,0.72212821245193481445f,
+-0.69397145509719848633f,0.72000253200531005859f,-0.69617712497711181641f,
+0.71787005662918090820f,-0.69837623834609985352f,0.71573084592819213867f,
+-0.70056879520416259766f,0.71358484029769897461f,-0.70275473594665527344f,
+0.71143221855163574219f,-0.70493406057357788086f,0.70927280187606811523f,
+-0.70710676908493041992f,0.70710676908493041992f,-0.70927280187606811523f,
+0.70493406057357788086f,-0.71143221855163574219f,0.70275473594665527344f,
+-0.71358484029769897461f,0.70056879520416259766f,-0.71573084592819213867f,
+0.69837623834609985352f,-0.71787005662918090820f,0.69617712497711181641f,
+-0.72000253200531005859f,0.69397145509719848633f,-0.72212821245193481445f,
+0.69175922870635986328f,-0.72424709796905517578f,0.68954056501388549805f,
+-0.72635912895202636719f,0.68731534481048583984f,-0.72846436500549316406f,
+0.68508368730545043945f,-0.73056274652481079102f,0.68284553289413452148f,
+-0.73265427350997924805f,0.68060100078582763672f,-0.73473888635635375977f,
+0.67835003137588500977f,-0.73681658506393432617f,0.67609268426895141602f,
+-0.73888731002807617188f,0.67382901906967163086f,-0.74095112085342407227f,
+0.67155897617340087891f,-0.74300795793533325195f,0.66928261518478393555f,
+-0.74505776166915893555f,0.66699993610382080078f,-0.74710059165954589844f,
+0.66471099853515625000f,-0.74913638830184936523f,0.66241580247879028320f,
+-0.75116515159606933594f,0.66011434793472290039f,-0.75318682193756103516f,
+0.65780669450759887695f,-0.75520139932632446289f,0.65549284219741821289f,
+-0.75720882415771484375f,0.65317285060882568359f,-0.75920921564102172852f,
+0.65084666013717651367f,-0.76120239496231079102f,0.64851438999176025391f,
+-0.76318842172622680664f,0.64617604017257690430f,-0.76516723632812500000f,
+0.64383155107498168945f,-0.76713889837265014648f,0.64148104190826416016f,
+-0.76910334825515747070f,0.63912445306777954102f,-0.77106052637100219727f,
+0.63676184415817260742f,-0.77301043272018432617f,0.63439327478408813477f,
+-0.77495312690734863281f,0.63201874494552612305f,-0.77688848972320556641f,
+0.62963825464248657227f,-0.77881652116775512695f,0.62725180387496948242f,
+-0.78073722124099731445f,0.62485951185226440430f,-0.78265058994293212891f,
+0.62246125936508178711f,-0.78455656766891479492f,0.62005722522735595703f,
+-0.78645521402359008789f,0.61764729022979736328f,-0.78834640979766845703f,
+0.61523157358169555664f,-0.79023021459579467773f,0.61281007528305053711f,
+-0.79210656881332397461f,0.61038279533386230469f,-0.79397547245025634766f,
+0.60794979333877563477f,-0.79583692550659179688f,0.60551106929779052734f,
+-0.79769086837768554688f,0.60306662321090698242f,-0.79953724145889282227f,
+0.60061645507812500000f,-0.80137616395950317383f,0.59816068410873413086f,
+-0.80320751667022705078f,0.59569931030273437500f,-0.80503135919570922852f,
+0.59323227405548095703f,-0.80684757232666015625f,0.59075969457626342773f,
+-0.80865615606307983398f,0.58828157186508178711f,-0.81045717000961303711f,
+0.58579784631729125977f,-0.81225061416625976562f,0.58330863714218139648f,
+-0.81403630971908569336f,0.58081394433975219727f,-0.81581443548202514648f,
+0.57831376791000366211f,-0.81758481264114379883f,0.57580816745758056641f,
+-0.81934750080108642578f,0.57329714298248291016f,-0.82110249996185302734f,
+0.57078075408935546875f,-0.82284981012344360352f,0.56825894117355346680f,
+-0.82458931207656860352f,0.56573182344436645508f,-0.82632106542587280273f,
+0.56319934129714965820f,-0.82804507017135620117f,0.56066155433654785156f,
+-0.82976120710372924805f,0.55811852216720581055f,-0.83146959543228149414f,
+0.55557024478912353516f,-0.83317017555236816406f,0.55301672220230102539f,
+-0.83486288785934448242f,0.55045795440673828125f,-0.83654773235321044922f,
+0.54789406061172485352f,-0.83822470903396606445f,0.54532498121261596680f,
+-0.83989381790161132812f,0.54275077581405639648f,-0.84155499935150146484f,
+0.54017144441604614258f,-0.84320825338363647461f,0.53758704662322998047f,
+-0.84485357999801635742f,0.53499764204025268555f,-0.84649091958999633789f,
+0.53240311145782470703f,-0.84812033176422119141f,0.52980363368988037109f,
+-0.84974175691604614258f,0.52719914913177490234f,-0.85135519504547119141f,
+0.52458965778350830078f,-0.85296058654785156250f,0.52197527885437011719f,
+-0.85455799102783203125f,0.51935601234436035156f,-0.85614734888076782227f,
+0.51673179864883422852f,-0.85772860050201416016f,0.51410275697708129883f,
+-0.85930180549621582031f,0.51146882772445678711f,-0.86086696386337280273f,
+0.50883013010025024414f,-0.86242395639419555664f,0.50618666410446166992f,
+-0.86397284269332885742f,0.50353837013244628906f,-0.86551362276077270508f,
+0.50088536739349365234f,-0.86704623699188232422f,0.49822765588760375977f,
+-0.86857068538665771484f,0.49556526541709899902f,-0.87008696794509887695f,
+0.49289819598197937012f,-0.87159508466720581055f,0.49022647738456726074f,
+-0.87309497594833374023f,0.48755016922950744629f,-0.87458664178848266602f,
+0.48486924171447753906f,-0.87607008218765258789f,0.48218378424644470215f,
+-0.87754529714584350586f,0.47949376702308654785f,-0.87901222705841064453f,
+0.47679921984672546387f,-0.88047087192535400391f,0.47410020232200622559f,
+-0.88192129135131835938f,0.47139674425125122070f,-0.88336336612701416016f,
+0.46868881583213806152f,-0.88479709625244140625f,0.46597650647163391113f,
+-0.88622254133224487305f,0.46325978636741638184f,-0.88763964176177978516f,
+0.46053871512413024902f,-0.88904833793640136719f,0.45781329274177551270f,
+-0.89044874906539916992f,0.45508357882499694824f,-0.89184069633483886719f,
+0.45234957337379455566f,-0.89322429895401000977f,0.44961133599281311035f,
+-0.89459949731826782227f,0.44686883687973022461f,-0.89596623182296752930f,
+0.44412213563919067383f,-0.89732456207275390625f,0.44137126207351684570f,
+-0.89867448806762695312f,0.43861624598503112793f,-0.90001589059829711914f,
+0.43585708737373352051f,-0.90134882926940917969f,0.43309381604194641113f,
+-0.90267330408096313477f,0.43032649159431457520f,-0.90398931503295898438f,
+0.42755508422851562500f,-0.90529674291610717773f,0.42477968335151672363f,
+-0.90659570693969726562f,0.42200025916099548340f,-0.90788608789443969727f,
+0.41921690106391906738f,-0.90916800498962402344f,0.41642954945564270020f,
+-0.91044127941131591797f,0.41363832354545593262f,-0.91170603036880493164f,
+0.41084316372871398926f,-0.91296219825744628906f,0.40804415941238403320f,
+-0.91420978307723999023f,0.40524131059646606445f,-0.91544872522354125977f,
+0.40243464708328247070f,-0.91667908430099487305f,0.39962419867515563965f,
+-0.91790080070495605469f,0.39680999517440795898f,-0.91911387443542480469f,
+0.39399203658103942871f,-0.92031830549240112305f,0.39117038249969482422f,
+-0.92151403427124023438f,0.38834503293037414551f,-0.92270112037658691406f,
+0.38551604747772216797f,-0.92387950420379638672f,0.38268342614173889160f,
+-0.92504924535751342773f,0.37984719872474670410f,-0.92621022462844848633f,
+0.37700742483139038086f,-0.92736250162124633789f,0.37416407465934753418f,
+-0.92850607633590698242f,0.37131720781326293945f,-0.92964088916778564453f,
+0.36846682429313659668f,-0.93076694011688232422f,0.36561298370361328125f,
+-0.93188428878784179688f,0.36275571584701538086f,-0.93299281597137451172f,
+0.35989505052566528320f,-0.93409252166748046875f,0.35703095793724060059f,
+-0.93518352508544921875f,0.35416352748870849609f,-0.93626564741134643555f,
+0.35129275918006896973f,-0.93733900785446166992f,0.34841868281364440918f,
+-0.93840354681015014648f,0.34554132819175720215f,-0.93945920467376708984f,
+0.34266072511672973633f,-0.94050604104995727539f,0.33977687358856201172f,
+-0.94154405593872070312f,0.33688986301422119141f,-0.94257318973541259766f,
+0.33399966359138488770f,-0.94359344244003295898f,0.33110630512237548828f,
+-0.94460481405258178711f,0.32820984721183776855f,-0.94560730457305908203f,
+0.32531028985977172852f,-0.94660091400146484375f,0.32240769267082214355f,
+-0.94758558273315429688f,0.31950202584266662598f,-0.94856137037277221680f,
+0.31659337878227233887f,-0.94952815771102905273f,0.31368175148963928223f,
+-0.95048606395721435547f,0.31076714396476745605f,-0.95143502950668334961f,
+0.30784964561462402344f,-0.95237499475479125977f,0.30492922663688659668f,
+-0.95330601930618286133f,0.30200594663619995117f,-0.95422810316085815430f,
+0.29907983541488647461f,-0.95514118671417236328f,0.29615089297294616699f,
+-0.95604526996612548828f,0.29321914911270141602f,-0.95694035291671752930f,
+0.29028466343879699707f,-0.95782643556594848633f,0.28734746575355529785f,
+-0.95870345830917358398f,0.28440752625465393066f,-0.95957154035568237305f,
+0.28146493434906005859f,-0.96043050289154052734f,0.27851969003677368164f,
+-0.96128046512603759766f,0.27557182312011718750f,-0.96212142705917358398f,
+0.27262136340141296387f,-0.96295326948165893555f,0.26966831088066101074f,
+-0.96377605199813842773f,0.26671275496482849121f,-0.96458977460861206055f,
+0.26375466585159301758f,-0.96539443731307983398f,0.26079410314559936523f,
+-0.96618998050689697266f,0.25783109664916992188f,-0.96697646379470825195f,
+0.25486564636230468750f,-0.96775382757186889648f,0.25189781188964843750f,
+-0.96852207183837890625f,0.24892760813236236572f,-0.96928125619888305664f,
+0.24595504999160766602f,-0.97003126144409179688f,0.24298018217086791992f,
+-0.97077214717864990234f,0.24000301957130432129f,-0.97150391340255737305f,
+0.23702360689640045166f,-0.97222650051116943359f,0.23404195904731750488f,
+-0.97293996810913085938f,0.23105810582637786865f,-0.97364425659179687500f,
+0.22807207703590393066f,-0.97433936595916748047f,0.22508391737937927246f,
+-0.97502535581588745117f,0.22209362685680389404f,-0.97570210695266723633f,
+0.21910123527050018311f,-0.97636973857879638672f,0.21610680222511291504f,
+-0.97702813148498535156f,0.21311031281948089600f,-0.97767734527587890625f,
+0.21011184155941009521f,-0.97831737995147705078f,0.20711137354373931885f,
+-0.97894817590713500977f,0.20410896837711334229f,-0.97956979274749755859f,
+0.20110464096069335938f,-0.98018211126327514648f,0.19809840619564056396f,
+-0.98078525066375732422f,0.19509032368659973145f,-0.98137921094894409180f,
+0.19208039343357086182f,-0.98196387290954589844f,0.18906866014003753662f,
+-0.98253929615020751953f,0.18605515360832214355f,-0.98310548067092895508f,
+0.18303988873958587646f,-0.98366242647171020508f,0.18002289533615112305f,
+-0.98421007394790649414f,0.17700421810150146484f,-0.98474848270416259766f,
+0.17398387193679809570f,-0.98527765274047851562f,0.17096188664436340332f,
+-0.98579752445220947266f,0.16793829202651977539f,-0.98630809783935546875f,
+0.16491311788558959961f,-0.98680937290191650391f,0.16188639402389526367f,
+-0.98730140924453735352f,0.15885815024375915527f,-0.98778414726257324219f,
+0.15582840144634246826f,-0.98825758695602416992f,0.15279719233512878418f,
+-0.98872166872024536133f,0.14976453781127929688f,-0.98917651176452636719f,
+0.14673046767711639404f,-0.98962199687957763672f,0.14369502663612365723f,
+-0.99005818367004394531f,0.14065824449062347412f,-0.99048507213592529297f,
+0.13762012124061584473f,-0.99090266227722167969f,0.13458070158958435059f,
+-0.99131083488464355469f,0.13154003024101257324f,-0.99170976877212524414f,
+0.12849810719490051270f,-0.99209928512573242188f,0.12545497715473175049f,
+-0.99247956275939941406f,0.12241067737340927124f,-0.99285042285919189453f,
+0.11936521530151367188f,-0.99321192502975463867f,0.11631862819194793701f,
+-0.99356412887573242188f,0.11327095329761505127f,-0.99390697479248046875f,
+0.11022220551967620850f,-0.99424046277999877930f,0.10717242211103439331f,
+-0.99456459283828735352f,0.10412163287401199341f,-0.99487930536270141602f,
+0.10106986016035079956f,-0.99518471956253051758f,0.09801714122295379639f,
+-0.99548077583312988281f,0.09496349841356277466f,-0.99576741456985473633f,
+0.09190895408391952515f,-0.99604469537734985352f,0.08885355293750762939f,
+-0.99631261825561523438f,0.08579730987548828125f,-0.99657112360000610352f,
+0.08274026215076446533f,-0.99682027101516723633f,0.07968243956565856934f,
+-0.99706006050109863281f,0.07662386447191238403f,-0.99729043245315551758f,
+0.07356456667184829712f,-0.99751144647598266602f,0.07050457596778869629f,
+-0.99772304296493530273f,0.06744392216205596924f,-0.99792528152465820312f,
+0.06438262760639190674f,-0.99811810255050659180f,0.06132073700428009033f,
+-0.99830156564712524414f,0.05825826525688171387f,-0.99847555160522460938f,
+0.05519524589180946350f,-0.99864023923873901367f,0.05213170498609542847f,
+-0.99879544973373413086f,0.04906767606735229492f,-0.99894130229949951172f,
+0.04600318148732185364f,-0.99907773733139038086f,0.04293825849890708923f,
+-0.99920475482940673828f,0.03987292572855949402f,-0.99932235479354858398f,
+0.03680722415447235107f,-0.99943059682846069336f,0.03374117240309715271f,
+-0.99952942132949829102f,0.03067480400204658508f,-0.99961882829666137695f,
+0.02760814502835273743f,-0.99969881772994995117f,0.02454122900962829590f,
+-0.99976938962936401367f,0.02147408016026020050f,-0.99983060359954833984f,
+0.01840673014521598816f,-0.99988234043121337891f,0.01533920597285032272f,
+-0.99992471933364868164f,0.01227153837680816650f,-0.99995762109756469727f,
+0.00920375436544418335f,-0.99998116493225097656f,0.00613588467240333557f,
+-0.99999529123306274414f,0.00306795677170157433f,1.00000000000000000000f,
+0.00000000000000000000f,0.99992471933364868164f,0.01227153837680816650f,
+0.99969881772994995117f,0.02454122900962829590f,0.99932235479354858398f,
+0.03680722415447235107f,0.99879544973373413086f,0.04906767606735229492f,
+0.99811810255050659180f,0.06132073700428009033f,0.99729043245315551758f,
+0.07356456667184829712f,0.99631261825561523438f,0.08579730987548828125f,
+0.99518471956253051758f,0.09801714122295379639f,0.99390697479248046875f,
+0.11022220551967620850f,0.99247956275939941406f,0.12241067737340927124f,
+0.99090266227722167969f,0.13458070158958435059f,0.98917651176452636719f,
+0.14673046767711639404f,0.98730140924453735352f,0.15885815024375915527f,
+0.98527765274047851562f,0.17096188664436340332f,0.98310548067092895508f,
+0.18303988873958587646f,0.98078525066375732422f,0.19509032368659973145f,
+0.97831737995147705078f,0.20711137354373931885f,0.97570210695266723633f,
+0.21910123527050018311f,0.97293996810913085938f,0.23105810582637786865f,
+0.97003126144409179688f,0.24298018217086791992f,0.96697646379470825195f,
+0.25486564636230468750f,0.96377605199813842773f,0.26671275496482849121f,
+0.96043050289154052734f,0.27851969003677368164f,0.95694035291671752930f,
+0.29028466343879699707f,0.95330601930618286133f,0.30200594663619995117f,
+0.94952815771102905273f,0.31368175148963928223f,0.94560730457305908203f,
+0.32531028985977172852f,0.94154405593872070312f,0.33688986301422119141f,
+0.93733900785446166992f,0.34841868281364440918f,0.93299281597137451172f,
+0.35989505052566528320f,0.92850607633590698242f,0.37131720781326293945f,
+0.92387950420379638672f,0.38268342614173889160f,0.91911387443542480469f,
+0.39399203658103942871f,0.91420978307723999023f,0.40524131059646606445f,
+0.90916800498962402344f,0.41642954945564270020f,0.90398931503295898438f,
+0.42755508422851562500f,0.89867448806762695312f,0.43861624598503112793f,
+0.89322429895401000977f,0.44961133599281311035f,0.88763964176177978516f,
+0.46053871512413024902f,0.88192129135131835938f,0.47139674425125122070f,
+0.87607008218765258789f,0.48218378424644470215f,0.87008696794509887695f,
+0.49289819598197937012f,0.86397284269332885742f,0.50353837013244628906f,
+0.85772860050201416016f,0.51410275697708129883f,0.85135519504547119141f,
+0.52458965778350830078f,0.84485357999801635742f,0.53499764204025268555f,
+0.83822470903396606445f,0.54532498121261596680f,0.83146959543228149414f,
+0.55557024478912353516f,0.82458931207656860352f,0.56573182344436645508f,
+0.81758481264114379883f,0.57580816745758056641f,0.81045717000961303711f,
+0.58579784631729125977f,0.80320751667022705078f,0.59569931030273437500f,
+0.79583692550659179688f,0.60551106929779052734f,0.78834640979766845703f,
+0.61523157358169555664f,0.78073722124099731445f,0.62485951185226440430f,
+0.77301043272018432617f,0.63439327478408813477f,0.76516723632812500000f,
+0.64383155107498168945f,0.75720882415771484375f,0.65317285060882568359f,
+0.74913638830184936523f,0.66241580247879028320f,0.74095112085342407227f,
+0.67155897617340087891f,0.73265427350997924805f,0.68060100078582763672f,
+0.72424709796905517578f,0.68954056501388549805f,0.71573084592819213867f,
+0.69837623834609985352f,0.70710676908493041992f,0.70710676908493041992f,
+0.69837623834609985352f,0.71573084592819213867f,0.68954056501388549805f,
+0.72424709796905517578f,0.68060100078582763672f,0.73265427350997924805f,
+0.67155897617340087891f,0.74095112085342407227f,0.66241580247879028320f,
+0.74913638830184936523f,0.65317285060882568359f,0.75720882415771484375f,
+0.64383155107498168945f,0.76516723632812500000f,0.63439327478408813477f,
+0.77301043272018432617f,0.62485951185226440430f,0.78073722124099731445f,
+0.61523157358169555664f,0.78834640979766845703f,0.60551106929779052734f,
+0.79583692550659179688f,0.59569931030273437500f,0.80320751667022705078f,
+0.58579784631729125977f,0.81045717000961303711f,0.57580816745758056641f,
+0.81758481264114379883f,0.56573182344436645508f,0.82458931207656860352f,
+0.55557024478912353516f,0.83146959543228149414f,0.54532498121261596680f,
+0.83822470903396606445f,0.53499764204025268555f,0.84485357999801635742f,
+0.52458965778350830078f,0.85135519504547119141f,0.51410275697708129883f,
+0.85772860050201416016f,0.50353837013244628906f,0.86397284269332885742f,
+0.49289819598197937012f,0.87008696794509887695f,0.48218378424644470215f,
+0.87607008218765258789f,0.47139674425125122070f,0.88192129135131835938f,
+0.46053871512413024902f,0.88763964176177978516f,0.44961133599281311035f,
+0.89322429895401000977f,0.43861624598503112793f,0.89867448806762695312f,
+0.42755508422851562500f,0.90398931503295898438f,0.41642954945564270020f,
+0.90916800498962402344f,0.40524131059646606445f,0.91420978307723999023f,
+0.39399203658103942871f,0.91911387443542480469f,0.38268342614173889160f,
+0.92387950420379638672f,0.37131720781326293945f,0.92850607633590698242f,
+0.35989505052566528320f,0.93299281597137451172f,0.34841868281364440918f,
+0.93733900785446166992f,0.33688986301422119141f,0.94154405593872070312f,
+0.32531028985977172852f,0.94560730457305908203f,0.31368175148963928223f,
+0.94952815771102905273f,0.30200594663619995117f,0.95330601930618286133f,
+0.29028466343879699707f,0.95694035291671752930f,0.27851969003677368164f,
+0.96043050289154052734f,0.26671275496482849121f,0.96377605199813842773f,
+0.25486564636230468750f,0.96697646379470825195f,0.24298018217086791992f,
+0.97003126144409179688f,0.23105810582637786865f,0.97293996810913085938f,
+0.21910123527050018311f,0.97570210695266723633f,0.20711137354373931885f,
+0.97831737995147705078f,0.19509032368659973145f,0.98078525066375732422f,
+0.18303988873958587646f,0.98310548067092895508f,0.17096188664436340332f,
+0.98527765274047851562f,0.15885815024375915527f,0.98730140924453735352f,
+0.14673046767711639404f,0.98917651176452636719f,0.13458070158958435059f,
+0.99090266227722167969f,0.12241067737340927124f,0.99247956275939941406f,
+0.11022220551967620850f,0.99390697479248046875f,0.09801714122295379639f,
+0.99518471956253051758f,0.08579730987548828125f,0.99631261825561523438f,
+0.07356456667184829712f,0.99729043245315551758f,0.06132073700428009033f,
+0.99811810255050659180f,0.04906767606735229492f,0.99879544973373413086f,
+0.03680722415447235107f,0.99932235479354858398f,0.02454122900962829590f,
+0.99969881772994995117f,0.01227153837680816650f,0.99992471933364868164f,
+0.00000000000000006123f,1.00000000000000000000f,-0.01227153837680816650f,
+0.99992471933364868164f,-0.02454122900962829590f,0.99969881772994995117f,
+-0.03680722415447235107f,0.99932235479354858398f,-0.04906767606735229492f,
+0.99879544973373413086f,-0.06132073700428009033f,0.99811810255050659180f,
+-0.07356456667184829712f,0.99729043245315551758f,-0.08579730987548828125f,
+0.99631261825561523438f,-0.09801714122295379639f,0.99518471956253051758f,
+-0.11022220551967620850f,0.99390697479248046875f,-0.12241067737340927124f,
+0.99247956275939941406f,-0.13458070158958435059f,0.99090266227722167969f,
+-0.14673046767711639404f,0.98917651176452636719f,-0.15885815024375915527f,
+0.98730140924453735352f,-0.17096188664436340332f,0.98527765274047851562f,
+-0.18303988873958587646f,0.98310548067092895508f,-0.19509032368659973145f,
+0.98078525066375732422f,-0.20711137354373931885f,0.97831737995147705078f,
+-0.21910123527050018311f,0.97570210695266723633f,-0.23105810582637786865f,
+0.97293996810913085938f,-0.24298018217086791992f,0.97003126144409179688f,
+-0.25486564636230468750f,0.96697646379470825195f,-0.26671275496482849121f,
+0.96377605199813842773f,-0.27851969003677368164f,0.96043050289154052734f,
+-0.29028466343879699707f,0.95694035291671752930f,-0.30200594663619995117f,
+0.95330601930618286133f,-0.31368175148963928223f,0.94952815771102905273f,
+-0.32531028985977172852f,0.94560730457305908203f,-0.33688986301422119141f,
+0.94154405593872070312f,-0.34841868281364440918f,0.93733900785446166992f,
+-0.35989505052566528320f,0.93299281597137451172f,-0.37131720781326293945f,
+0.92850607633590698242f,-0.38268342614173889160f,0.92387950420379638672f,
+-0.39399203658103942871f,0.91911387443542480469f,-0.40524131059646606445f,
+0.91420978307723999023f,-0.41642954945564270020f,0.90916800498962402344f,
+-0.42755508422851562500f,0.90398931503295898438f,-0.43861624598503112793f,
+0.89867448806762695312f,-0.44961133599281311035f,0.89322429895401000977f,
+-0.46053871512413024902f,0.88763964176177978516f,-0.47139674425125122070f,
+0.88192129135131835938f,-0.48218378424644470215f,0.87607008218765258789f,
+-0.49289819598197937012f,0.87008696794509887695f,-0.50353837013244628906f,
+0.86397284269332885742f,-0.51410275697708129883f,0.85772860050201416016f,
+-0.52458965778350830078f,0.85135519504547119141f,-0.53499764204025268555f,
+0.84485357999801635742f,-0.54532498121261596680f,0.83822470903396606445f,
+-0.55557024478912353516f,0.83146959543228149414f,-0.56573182344436645508f,
+0.82458931207656860352f,-0.57580816745758056641f,0.81758481264114379883f,
+-0.58579784631729125977f,0.81045717000961303711f,-0.59569931030273437500f,
+0.80320751667022705078f,-0.60551106929779052734f,0.79583692550659179688f,
+-0.61523157358169555664f,0.78834640979766845703f,-0.62485951185226440430f,
+0.78073722124099731445f,-0.63439327478408813477f,0.77301043272018432617f,
+-0.64383155107498168945f,0.76516723632812500000f,-0.65317285060882568359f,
+0.75720882415771484375f,-0.66241580247879028320f,0.74913638830184936523f,
+-0.67155897617340087891f,0.74095112085342407227f,-0.68060100078582763672f,
+0.73265427350997924805f,-0.68954056501388549805f,0.72424709796905517578f,
+-0.69837623834609985352f,0.71573084592819213867f,-0.70710676908493041992f,
+0.70710676908493041992f,-0.71573084592819213867f,0.69837623834609985352f,
+-0.72424709796905517578f,0.68954056501388549805f,-0.73265427350997924805f,
+0.68060100078582763672f,-0.74095112085342407227f,0.67155897617340087891f,
+-0.74913638830184936523f,0.66241580247879028320f,-0.75720882415771484375f,
+0.65317285060882568359f,-0.76516723632812500000f,0.64383155107498168945f,
+-0.77301043272018432617f,0.63439327478408813477f,-0.78073722124099731445f,
+0.62485951185226440430f,-0.78834640979766845703f,0.61523157358169555664f,
+-0.79583692550659179688f,0.60551106929779052734f,-0.80320751667022705078f,
+0.59569931030273437500f,-0.81045717000961303711f,0.58579784631729125977f,
+-0.81758481264114379883f,0.57580816745758056641f,-0.82458931207656860352f,
+0.56573182344436645508f,-0.83146959543228149414f,0.55557024478912353516f,
+-0.83822470903396606445f,0.54532498121261596680f,-0.84485357999801635742f,
+0.53499764204025268555f,-0.85135519504547119141f,0.52458965778350830078f,
+-0.85772860050201416016f,0.51410275697708129883f,-0.86397284269332885742f,
+0.50353837013244628906f,-0.87008696794509887695f,0.49289819598197937012f,
+-0.87607008218765258789f,0.48218378424644470215f,-0.88192129135131835938f,
+0.47139674425125122070f,-0.88763964176177978516f,0.46053871512413024902f,
+-0.89322429895401000977f,0.44961133599281311035f,-0.89867448806762695312f,
+0.43861624598503112793f,-0.90398931503295898438f,0.42755508422851562500f,
+-0.90916800498962402344f,0.41642954945564270020f,-0.91420978307723999023f,
+0.40524131059646606445f,-0.91911387443542480469f,0.39399203658103942871f,
+-0.92387950420379638672f,0.38268342614173889160f,-0.92850607633590698242f,
+0.37131720781326293945f,-0.93299281597137451172f,0.35989505052566528320f,
+-0.93733900785446166992f,0.34841868281364440918f,-0.94154405593872070312f,
+0.33688986301422119141f,-0.94560730457305908203f,0.32531028985977172852f,
+-0.94952815771102905273f,0.31368175148963928223f,-0.95330601930618286133f,
+0.30200594663619995117f,-0.95694035291671752930f,0.29028466343879699707f,
+-0.96043050289154052734f,0.27851969003677368164f,-0.96377605199813842773f,
+0.26671275496482849121f,-0.96697646379470825195f,0.25486564636230468750f,
+-0.97003126144409179688f,0.24298018217086791992f,-0.97293996810913085938f,
+0.23105810582637786865f,-0.97570210695266723633f,0.21910123527050018311f,
+-0.97831737995147705078f,0.20711137354373931885f,-0.98078525066375732422f,
+0.19509032368659973145f,-0.98310548067092895508f,0.18303988873958587646f,
+-0.98527765274047851562f,0.17096188664436340332f,-0.98730140924453735352f,
+0.15885815024375915527f,-0.98917651176452636719f,0.14673046767711639404f,
+-0.99090266227722167969f,0.13458070158958435059f,-0.99247956275939941406f,
+0.12241067737340927124f,-0.99390697479248046875f,0.11022220551967620850f,
+-0.99518471956253051758f,0.09801714122295379639f,-0.99631261825561523438f,
+0.08579730987548828125f,-0.99729043245315551758f,0.07356456667184829712f,
+-0.99811810255050659180f,0.06132073700428009033f,-0.99879544973373413086f,
+0.04906767606735229492f,-0.99932235479354858398f,0.03680722415447235107f,
+-0.99969881772994995117f,0.02454122900962829590f,-0.99992471933364868164f,
+0.01227153837680816650f,1.00000000000000000000f,0.00000000000000000000f,
+0.99879544973373413086f,0.04906767606735229492f,0.99518471956253051758f,
+0.09801714122295379639f,0.98917651176452636719f,0.14673046767711639404f,
+0.98078525066375732422f,0.19509032368659973145f,0.97003126144409179688f,
+0.24298018217086791992f,0.95694035291671752930f,0.29028466343879699707f,
+0.94154405593872070312f,0.33688986301422119141f,0.92387950420379638672f,
+0.38268342614173889160f,0.90398931503295898438f,0.42755508422851562500f,
+0.88192129135131835938f,0.47139674425125122070f,0.85772860050201416016f,
+0.51410275697708129883f,0.83146959543228149414f,0.55557024478912353516f,
+0.80320751667022705078f,0.59569931030273437500f,0.77301043272018432617f,
+0.63439327478408813477f,0.74095112085342407227f,0.67155897617340087891f,
+0.70710676908493041992f,0.70710676908493041992f,0.67155897617340087891f,
+0.74095112085342407227f,0.63439327478408813477f,0.77301043272018432617f,
+0.59569931030273437500f,0.80320751667022705078f,0.55557024478912353516f,
+0.83146959543228149414f,0.51410275697708129883f,0.85772860050201416016f,
+0.47139674425125122070f,0.88192129135131835938f,0.42755508422851562500f,
+0.90398931503295898438f,0.38268342614173889160f,0.92387950420379638672f,
+0.33688986301422119141f,0.94154405593872070312f,0.29028466343879699707f,
+0.95694035291671752930f,0.24298018217086791992f,0.97003126144409179688f,
+0.19509032368659973145f,0.98078525066375732422f,0.14673046767711639404f,
+0.98917651176452636719f,0.09801714122295379639f,0.99518471956253051758f,
+0.04906767606735229492f,0.99879544973373413086f,0.00000000000000006123f,
+1.00000000000000000000f,-0.04906767606735229492f,0.99879544973373413086f,
+-0.09801714122295379639f,0.99518471956253051758f,-0.14673046767711639404f,
+0.98917651176452636719f,-0.19509032368659973145f,0.98078525066375732422f,
+-0.24298018217086791992f,0.97003126144409179688f,-0.29028466343879699707f,
+0.95694035291671752930f,-0.33688986301422119141f,0.94154405593872070312f,
+-0.38268342614173889160f,0.92387950420379638672f,-0.42755508422851562500f,
+0.90398931503295898438f,-0.47139674425125122070f,0.88192129135131835938f,
+-0.51410275697708129883f,0.85772860050201416016f,-0.55557024478912353516f,
+0.83146959543228149414f,-0.59569931030273437500f,0.80320751667022705078f,
+-0.63439327478408813477f,0.77301043272018432617f,-0.67155897617340087891f,
+0.74095112085342407227f,-0.70710676908493041992f,0.70710676908493041992f,
+-0.74095112085342407227f,0.67155897617340087891f,-0.77301043272018432617f,
+0.63439327478408813477f,-0.80320751667022705078f,0.59569931030273437500f,
+-0.83146959543228149414f,0.55557024478912353516f,-0.85772860050201416016f,
+0.51410275697708129883f,-0.88192129135131835938f,0.47139674425125122070f,
+-0.90398931503295898438f,0.42755508422851562500f,-0.92387950420379638672f,
+0.38268342614173889160f,-0.94154405593872070312f,0.33688986301422119141f,
+-0.95694035291671752930f,0.29028466343879699707f,-0.97003126144409179688f,
+0.24298018217086791992f,-0.98078525066375732422f,0.19509032368659973145f,
+-0.98917651176452636719f,0.14673046767711639404f,-0.99518471956253051758f,
+0.09801714122295379639f,-0.99879544973373413086f,0.04906767606735229492f,
+1.00000000000000000000f,0.00000000000000000000f,0.98078525066375732422f,
+0.19509032368659973145f,0.92387950420379638672f,0.38268342614173889160f,
+0.83146959543228149414f,0.55557024478912353516f,0.70710676908493041992f,
+0.70710676908493041992f,0.55557024478912353516f,0.83146959543228149414f,
+0.38268342614173889160f,0.92387950420379638672f,0.19509032368659973145f,
+0.98078525066375732422f,0.00000000000000006123f,1.00000000000000000000f,
+-0.19509032368659973145f,0.98078525066375732422f,-0.38268342614173889160f,
+0.92387950420379638672f,-0.55557024478912353516f,0.83146959543228149414f,
+-0.70710676908493041992f,0.70710676908493041992f,-0.83146959543228149414f,
+0.55557024478912353516f,-0.92387950420379638672f,0.38268342614173889160f,
+-0.98078525066375732422f,0.19509032368659973145f,1.00000000000000000000f,
+0.00000000000000000000f,0.70710676908493041992f,0.70710676908493041992f,
+0.00000000000000006123f,1.00000000000000000000f,-0.70710676908493041992f,
+0.70710676908493041992f,};
 
 float32_t rearranged_twiddle_stride3_4096_f32[2728]={
-1.00000000000000000000f,0.00000000000000000000f,0.99998941108192840321f,
-0.00460192612044857050f,0.99995764455196389786f,0.00920375478205981944f,
-0.99990470108285289808f,0.01380538852806039059f,0.99983058179582340319f,
-0.01840672990580482019f,0.99973528826056168306f,0.02300768146883936868f,
-0.99961882249517863830f,0.02760814577896573974f,0.99948118696616694567f,
-0.03220802540830458582f,0.99932238458834954375f,0.03680722294135883171f,
-0.99914241872481690532f,0.04140564097707673946f,0.99894129318685687124f,
-0.04600318213091462299f,0.99871901223387293811f,0.05059974903689928166f,
-0.99847558057329477421f,0.05519524434968993420f,0.99821100336047818846f,
-0.05978957074663986820f,0.99792528619859599548f,0.06438263092985746505f,
-0.99761843513851955478f,0.06897432762826674613f,0.99729045667869020697f,
-0.07356456359966742631f,0.99694135776498216117f,0.07815324163279423197f,
-0.99657114579055483539f,0.08274026454937569164f,0.99617982859569698117f,
-0.08732553520619205922f,0.99576741446765981713f,0.09190895649713272386f,
-0.99533391214048227980f,0.09649043135525259274f,0.99487933079480561638f,
-0.10106986275482782167f,0.99440368005767909576f,0.10564715371341061589f,
-0.99390697000235606051f,0.11022220729388305938f,0.99338921114808065305f,
-0.11479492660651008373f,0.99285041445986510489f,0.11936521481099135467f,
-0.99229059134825736699f,0.12393297511851215920f,0.99170975366909952520f,
-0.12849811079379316880f,0.99110791372327688986f,0.13306052515713906459f,
-0.99048508425645709341f,0.13762012158648603832f,0.98984127845882052821f,
-0.14217680351944803063f,0.98917650996478101444f,0.14673047445536174793f,
-0.98849079285269658701f,0.15128103795733022219f,0.98778414164457217783f,
-0.15582839765426523271f,0.98705657130575097380f,0.16037245724292828464f,
-0.98630809724459866938f,0.16491312048996989437f,0.98553873531217606185f,
-0.16945029123396795900f,0.98474850180190420801f,0.17398387338746382214f,
-0.98393741344921892278f,0.17851377093899750692f,0.98310548743121628501f,
-0.18303988795514095078f,0.98225274136628937249f,0.18756212858252960252f,
-0.98137919331375456089f,0.19208039704989243734f,0.98048486177346938497f,
-0.19659459767008022335f,0.97956976568544051887f,0.20110463484209190055f,
-0.97863392442942320759f,0.20561041305309923910f,0.97767735782450992943f,
-0.21011183688046961016f,0.97670008612871184184f,0.21460881099378675829f,
-0.97570213003852857003f,0.21910124015686979759f,0.97468351068851066810f,
-0.22358902922978998729f,0.97364424965081197705f,0.22807208317088573102f,
-0.97258436893473221296f,0.23255030703877524467f,0.97150389098625178352f,
-0.23702360599436719801f,0.97040283868755550234f,0.24149188530286933019f,
-0.96928123535654853171f,0.24595505033579459497f,0.96813910474636244441f,
-0.25041300657296522436f,0.96697647104485207059f,0.25486565960451457169f,
-0.96579335887408368500f,0.25931291513288623474f,0.96458979328981275803f,
-0.26375467897483134694f,0.96336579978095404631f,0.26819085706340317632f,
-0.96212140426904158019f,0.27262135544994897662f,0.96085663310767965850f,
-0.27704608030609989555f,0.95957151308198451733f,0.28146493792575794091f,
-0.95826607140801767226f,0.28587783472708061527f,0.95694033573220882438f,
-0.29028467725446233105f,0.95559433413077110586f,0.29468537218051432669f,
-0.95422809510910566733f,0.29907982630804047508f,0.95284164760119871573f,
-0.30346794657201131562f,0.95143502096900833820f,0.30784964004153486661f,
-0.95000824500184299914f,0.31222481392182488413f,0.94856134991573026749f,
-0.31659337555616584581f,0.94709436635277721717f,0.32095523242787521445f,
-0.94560732538052127971f,0.32531029216226292622f,0.94410025849127265918f,
-0.32965846252858749255f,0.94257319760144686605f,0.33399965144200938205f,
-0.94102617505088925753f,0.33833376696554112728f,0.93945922360218991898f,
-0.34266071731199437833f,0.93787237643998988545f,0.34698041084592368133f,
-0.93626566717027825959f,0.35129275608556709276f,0.93463912981968078064f,
-0.35559766170478385172f,0.93299279883473895669f,0.35989503653498811087f,
-0.93132670908118042608f,0.36418478956707989180f,0.92964089584318121418f,
-0.36846682995337232125f,0.92793539482261788720f,0.37274106700951575855f,
-0.92621024213831137928f,0.37700741021641825945f,0.92446547432526260391f,
-0.38126576922216237620f,0.92270112833387862850f,0.38551605384391884890f,
-0.92091724152918941204f,0.38975817406985641123f,0.91911385169005777040f,
-0.39399204006104809883f,0.91729099700837790632f,0.39821756215337356100f,
-0.91544871608826783316f,0.40243465085941843018f,0.91358704794525080750f,
-0.40664321687036902864f,0.91170603200542987832f,0.41084317105790391089f,
-0.90980570810465222209f,0.41503442447608163146f,0.90788611648766626150f,
-0.41921688836322390515f,0.90594729780726845902f,0.42339047414379604728f,
-0.90398929312344333820f,0.42755509343028208491f,0.90201214390249317976f,
-0.43171065802505725895f,0.90001589201616016833f,0.43585707992225547480f,
-0.89800057974073987932f,0.43999427130963325583f,0.89596624975618521791f,
-0.44412214457042920035f,0.89391294514520325265f,0.44824061228521988598f,
-0.89184070939234272313f,0.45234958723377088896f,0.88974958638307277692f,
-0.45644898239688391772f,0.88763962040285393496f,0.46053871095824000514f,
-0.88551085613619995307f,0.46461868630623781584f,0.88336333866573157891f,
-0.46868882203582790114f,0.88119711347122209322f,0.47274903195034279069f,
-0.87901222642863352519f,0.47679923006332208812f,0.87680872380914565145f,
-0.48083933060033395845f,0.87458665227817611321f,0.48486924800079106435f,
-0.87234605889439154058f,0.48888889691976317176f,0.87008699110871146054f,
-0.49289819222978403790f,0.86780949676330332299f,0.49689704902265446895f,
-0.86551362409056908920f,0.50088538261124071482f,0.86319942171212415971f,
-0.50486310853126759035f,0.86086693863776730939f,0.50883014254310698909f,
-0.85851622426444273994f,0.51278640063356295542f,0.85614732837519447184f,
-0.51673179901764987321f,0.85376030113811141042f,0.52066625414036715735f,
-0.85135519310526519554f,0.52458968267846894928f,0.84893205521163961347f,
-0.52850200154222848337f,0.84649093877405212627f,0.53240312787719790144f,
-0.84403189549006640835f,0.53629297906596318235f,0.84155497743689844370f,
-0.54017147272989285423f,0.83906023707031274217f,0.54403852673088382019f,
-0.83654772722351200542f,0.54789405917310018967f,0.83401750110601813315f,
-0.55173798840470733573f,0.83146961230254523567f,0.55557023301960217765f,
-0.82890411477186487499f,0.55939071185913613604f,0.82632106284566353427f,
-0.56319934401383409117f,0.82372051122739142759f,0.56699604882510867832f,
-0.82110251499110464835f,0.57078074588696725566f,0.81846712958029865792f,
-0.57455335504771576360f,0.81581441080673378075f,0.57831379641165558958f,
-0.81314441484925359394f,0.58206199034077543697f,0.81045719825259476821f,
-0.58579785745643886408f,0.80775281792619035848f,0.58952131864106394055f,
-0.80503133114296365758f,0.59323229503979979516f,0.80229279553811572168f,
-0.59693070806219639124f,0.79953726910790501314f,0.60061647938386897305f,
-0.79676481020841882774f,0.60428953094815596181f,0.79397547755433717231f,
-0.60794978496777363208f,0.79116933021769020318f,0.61159716392646190641f,
-0.78834642762660622761f,0.61523159058062681925f,0.78550682956405393220f,
-0.61885298796097631957f,0.78265059616657572938f,0.62246127937414996723f,
-0.77977778792301455368f,0.62605638840434352232f,0.77688846567323244230f,
-0.62963823891492698426f,0.77398269060682289844f,0.63320675505005719064f,
-0.77106052426181381776f,0.63676186123628419899f,0.76812202852336541881f,
-0.64030348218415167327f,0.76516726562245895860f,0.64383154288979138613f,
-0.76219629813457900891f,0.64734596863651205911f,0.75920918897838796102f,
-0.65084668499638087535f,0.75620600141439453523f,0.65433361783180044036f,
-0.75318679904361252042f,0.65780669329707863735f,0.75015164580621507273f,
-0.66126583783999226540f,0.74710060598018013245f,0.66471097820334479334f,
-0.74403374417992929057f,0.66814204142651845153f,0.74095112535495921691f,
-0.67155895484701833009f,0.73785281478846598269f,0.67496164610201192513f,
-0.73473887809596349907f,0.67835004312986146857f,0.73160938122389262972f,
-0.68172407417164970767f,0.72846439044822519637f,0.68508366777270035541f,
-0.72530397237306076796f,0.68842875278409043638f,0.72212819392921534511f,
-0.69175925836415774750f,0.71893712237280449351f,0.69507511398000088043f,
-0.71573082528381870571f,0.69837624940897280457f,0.71250937056469243469f,
-0.70166259474016845488f,0.70927282643886568891f,0.70493408037590488124f,
-0.70602126144933974317f,0.70819063703319540259f,0.70275474445722529993f,
-0.71143219574521643356f,0.69947334464028376733f,0.71465868786276909308f,
-0.69617713149146298601f,0.71787004505573170920f,0.69286617481742474034f,
-0.72106619931450810501f,0.68954054473706694051f,0.72424708295146689174f,
-0.68620031168003858824f,0.72741262860237576593f,0.68284554638524808112f,
-0.73056276922782759087f,0.67947631989936496666f,0.73369743811466026084f,
-0.67609270357531603413f,0.73681656887736979300f,0.67269476907077296879f,
-0.73992009545951609173f,0.66928258834663600929f,0.74300795213512171866f,
-0.66585623366550972246f,0.74608007351006366825f,0.66241577759017178373f,
-0.74913639452345925918f,0.65896129298203731661f,0.75217685044904269986f,
-0.65549285299961546070f,0.75520137689653654700f,0.65201053109695950027f,
-0.75820990981301528144f,0.64851440102211255212f,0.76120238548426177871f,
-0.64500453681554403840f,0.76417874053611667406f,0.64148101280858316198f,
-0.76713891193582040007f,0.63794390362184416610f,0.77008283699334789674f,
-0.63439328416364548779f,0.77301045336273688235f,0.63082922962842458148f,
-0.77592169904340757558f,0.62725181549514419377f,0.77881651238147586724f,
-0.62366111752569464155f,0.78169483207105938671f,0.62005721176328920663f,
-0.78455659715557524159f,0.61644017453085364622f,0.78740174702903131809f,
-0.61281008242940970820f,0.79023022143731003197f,0.60916701233645320634f,
-0.79304196047944364167f,0.60551104140432554512f,0.79583690460888345530f,
-0.60184224705858002658f,0.79861499463476082195f,0.59816070699634238395f,
-0.80137617172314012937f,0.59446649918466454299f,0.80412037739826569549f,
-0.59075970185887427544f,0.80684755354379922299f,0.58704039352091808013f,
-0.80955764240405125864f,0.58330865293769829094f,0.81225058658520388200f,
-0.57956455913940574387f,0.81492632905652662156f,0.57580819141784533866f,
-0.81758481315158371139f,0.57203962932475704850f,0.82022598256943468620f,
-0.56825895267013148970f,0.82284978137582631685f,0.56446624152051949608f,
-0.82545615400437744036f,0.56066157619733603124f,0.82804504525775579626f,
-0.55684503727516010407f,0.83061640030884620334f,0.55301670558002757883f,
-0.83317016470191318511f,0.54917666218771976627f,0.83570628435375260423f,
-0.54532498842204646383f,0.83822470555483796772f,0.54146176585312355556f,
-0.84072537497045796151f,0.53758707629564550512f,0.84320823964184543620f,
-0.53370100180715296379f,0.84567324698729906540f,0.52980362468629482731f,
-0.84812034480329712149f,0.52589502747108474168f,0.85054948126560336874f,
-0.52197529293715438925f,0.85296060493036363059f,0.51804450409599933636f,
-0.85535366473519602870f,0.51410274419322166128f,0.85772861000027211809f,
-0.51015009670676669806f,0.86008539042939025077f,0.50618664534515533937f,
-0.86242395611104050168f,0.50221247404571089934f,0.86474425751946237817f,
-0.49822766697278186854f,0.86704624551569264845f,0.49423230851595972846f,
-0.86932987134860673084f,0.49022648328829110387f,0.87159508665595109012f,
-0.48621027612448652899f,0.87384184346536675214f,0.48218377207912282989f,
-0.87607009419540660122f,0.47814705642484311987f,0.87827979165654146421f,
-0.47410021465055002254f,0.88047088905216075450f,0.47004333245959561971f,
-0.88264333997956279099f,0.46597649576796612569f,0.88479709843093778954f,
-0.46189979070246284243f,0.88693211879434208367f,0.45781330359887728587f,
-0.88904835585466457371f,0.45371712100016392544f,0.89114576479458318392f,
-0.44961132965460659516f,0.89322430119551532446f,0.44549601651398174074f,
-0.89528392103855758410f,0.44137126873171661501f,0.89732458070541831763f,
-0.43723717366104419835f,0.89934623697934146236f,0.43309381885315201277f,
-0.90134884704602202810f,0.42894129205532954829f,0.90333236849451181705f,
-0.42477968120910880589f,0.90529675931811881551f,0.42060907444840250902f,
-0.90724197791529592738f,0.41642956009763731906f,0.90916798309052226923f,
-0.41224122666988299857f,0.91107473405517624965f,0.40804416286497874333f,
-0.91296219042839810154f,0.40383845756765412993f,0.91483031223794608611f,
-0.39962419984564678810f,0.91667905992104270485f,0.39540147894781629834f,
-0.91850839432521225181f,0.39117038430225398171f,0.92031827670911048322f,
-0.38693100551438869283f,0.92210866874334507237f,0.38268343236508983729f,
-0.92387953251128673848f,0.37842775480876561511f,0.92563083050987271516f,
-0.37416406297145798909f,0.92736252565040111495f,0.36989244714893426691f,
-0.92907458125931574600f,0.36561299780477396482f,0.93076696107898371224f,
-0.36132580556845433906f,0.93243962926846235550f,0.35703096123343003310f,
-0.93409255040425887007f,0.35272855575521072646f,0.93572568948108036935f,
-0.34841868024943450921f,0.93733901191257495977f,0.34410142598993898044f,
-0.93893248353206448797f,0.33977688440682696225f,0.94050607059326829518f,
-0.33544514708453165852f,0.94205973977101731265f,0.33110630575987642921f,
-0.94359345816196038559f,0.32676045232013178898f,0.94510719328526060501f,
-0.32240767880107001897f,0.94660091308328353499f,0.31804807738501505998f,
-0.94807458592227622507f,0.31368174039889157312f,0.94952818059303667475f,
-0.30930876031226878231f,0.95096166631157508231f,0.30492922973540242948f,
-0.95237501271976587880f,0.30054324141727339903f,0.95376818988599032512f,
-0.29615088824362395536f,0.95514116830577067141f,0.29175226323498937298f,
-0.95649391890239499059f,0.28734745954472956653f,0.95782641302753290802f,
-0.28293657045705539188f,0.95913862246184189431f,0.27851968938505305973f,
-0.96043051941556578655f,0.27409690986870632878f,0.96170207652912254037f,
-0.26966832557291520178f,0.96295326687368387741f,0.26523403028551190141f,
-0.96418406395174571788f,0.26079411791527556952f,0.96539444169768939830f,
-0.25634868248994291395f,0.96658437447833311928f,0.25189781815421691258f,
-0.96775383709347551076f,0.24744161916777343557f,0.96890280477642887202f,
-0.24298017990326398197f,0.97003125319454397424f,0.23851359484431849944f,
-0.97113915844972509284f,0.23404195858354345794f,0.97222649707893626925f,
-0.22956536582051886852f,0.97329324605469824672f,0.22508391135979277653f,
-0.97433938278557585821f,0.22059769010887364526f,0.97536488511665686563f,
-0.21610679707621960333f,0.97636973133002114000f,0.21161132736922760866f,
-0.97735390014519996082f,0.20711137619221856032f,0.97831737071962765473f,
-0.20260703884442110567f,0.97926012264908202098f,0.19809841071795372680f,
-0.98018213596811731847f,0.19358558729580374602f,0.98108339115048659451f,
-0.18906866414980627589f,0.98196386910955524296f,0.18454773693861964423f,
-0.98282355119870523641f,0.18002290140569951471f,0.98366241921173025453f,
-0.17549425337727139751f,0.98448045538322093151f,0.17096188876030135595f,
-0.98527764238894122162f,0.16642590354046421508f,0.98605396334619543897f,
-0.16188639378011188130f,0.98680940181418541624f,0.15734345561623827581f,
-0.98754394179435922574f,0.15279718525844340760f,0.98825756773074946437f,
-0.14824767898689619749f,0.98895026451030298986f,0.14369503315029458212f,
-0.98962201746320077600f,0.13913934416382628401f,0.99027281236316910817f,
-0.13458070850712622324f,0.99090263542778000971f,0.13001922272223334631f,
-0.99151147331874389668f,0.12545498341154620592f,0.99209931314219179654f,
-0.12088808723577722237f,0.99266614244894801899f,0.11631863091190487725f,
-0.99321194923479450001f,0.11174671121112665639f,0.99373672194072459884f,
-0.10717242495680887049f,0.99424044945318790223f,0.10259586902243628126f,
-0.99472312110432570265f,0.09801714032956077016f,0.99518472667219681771f,
-0.09343633584574791151f,0.99562525638099430569f,0.08885355258252468358f,
-0.99604470090125196702f,0.08426888759332412659f,0.99644305135004263008f,
-0.07968243797143012563f,0.99682029929116566791f,0.07509430084792129145f,
-0.99717643673532618820f,0.07050457338961400866f,0.99751145614030345410f,
-0.06591335279700392957f,0.99782535041111164453f,0.06132073630220864768f,
-0.99811811290014917919f,0.05672682116690778292f,0.99838973740734016094f,
-0.05213170468028331672f,0.99864021818026527111f,0.04753548415695926094f,
-0.99886954991428356099f,0.04293825693494095902f,0.99907772775264536147f,
-0.03834012037355279123f,0.99926474728659442359f,0.03374117185137764235f,
-0.99943060455546173237f,0.02914150876419373953f,0.99957529604674921764f,
-0.02454122852291226384f,0.99969881869620424997f,0.01994042855151459750f,
-0.99980116988788425569f,0.01533920628498821985f,0.99988234745421256111f,
-0.01073765916726457208f,0.99994234967602391162f,0.00613588464915451517f,
-0.99998117528260110909f,0.00153398018628476615f,0.99999882345170187925f,
--0.00306795676296601561f,0.99999529380957619118f,-0.00766982873953095477f,
-0.99997058643097413988f,-0.01227153828571982304f,0.99992470183914450299f,
--0.01687298794728165144f,0.99985764100582386060f,-0.02147408027546948359f,
-0.99976940535121527898f,-0.02607471782910391472f,0.99965999674395922270f,
--0.03067480317663645942f,0.99952941750109314256f,-0.03527423889821382219f,
-0.99937767038800284780f,-0.03987292758773972740f,0.99920475861836388631f,
--0.04447077185493861912f,0.99901068585407337697f,-0.04906767432741800800f,
-0.99879545620517240501f,-0.05366353765273055437f,0.99855907422975931365f,
--0.05825826450043560673f,0.99830154493389289261f,-0.06285175756416130910f,
-0.99802287377148624081f,-0.06744391956366398155f,0.99772306664419163624f,
--0.07203465324688929083f,0.99740212990127530279f,-0.07662386139203150592f,
-0.99706007033948296225f,-0.08121144680959226092f,0.99669689520289606044f,
--0.08579731234443975507f,0.99631261218277800129f,-0.09038136087786488582f,
-0.99590722941741172125f,-0.09496349532963895002f,0.99548075549192693856f,
--0.09954361866006931903f,0.99503319943811863180f,-0.10412163387205460030f,
-0.99456457073425541537f,-0.10869744401313856386f,0.99407487930487947736f,
--0.11327095217756423529f,0.99356413552059530403f,-0.11784206150832489401f,
-0.99303235019785141002f,-0.12241067519921615403f,0.99247953459870996706f,
--0.12697669649688586579f,0.99190570043060932726f,-0.13154002870288314386f,
-0.99131085984611544415f,-0.13610057517570606223f,0.99069502544266463406f,
--0.14065823933284912761f,0.99005821026229712256f,-0.14521292465284740825f,
-0.98940042779138037687f,-0.14976453467732150915f,0.98872169196032377858f,
--0.15431297301302013270f,0.98802201714328352633f,-0.15885814333386127917f,
-0.98730141815785843473f,-0.16339994938297311422f,0.98655991026477551920f,
--0.16793829497473108936f,0.98579750916756747614f,-0.17247308399679592283f,
-0.98501423101223983814f,-0.17700422041214874946f,0.98421009238692902521f,
--0.18153160826112502146f,0.98338511032155118130f,-0.18605515166344649414f,
-0.98253930228744124076f,-0.19057475482025265645f,0.98167268619698311305f,
--0.19509032201612819257f,0.98078528040323043058f,-0.19960175762113094300f,
-0.97987710369951763756f,-0.20410896609281689584f,0.97894817531906219710f,
--0.20861185197826331850f,0.97799851493455713936f,-0.21311031991609125091f,
-0.97702814265775439484f,-0.21760427463848355800f,0.97603707903903913490f,
--0.22209362097320348162f,0.97502534506699412020f,-0.22657826384560997290f,
-0.97399296216795583359f,-0.23105810828067113727f,0.97293995220556017678f,
--0.23553305940497534787f,0.97186633748027939639f,-0.24000302244874138768f,
-0.97077214072895035013f,-0.24446790274782409513f,0.96965738512429244800f,
--0.24892760574572012078f,0.96852209427441737777f,-0.25338203699557015902f,
-0.96736629222232850545f,-0.25783110216215882060f,0.96619000344541261516f,
--0.26227470702391347812f,0.96499325285492043580f,-0.26671275747489830987f,
-0.96377606579543984022f,-0.27114515952680795507f,0.96253846804435916340f,
--0.27557181931095814376f,0.96128048581132063966f,-0.27999264308027327353f,
-0.96000214573766584625f,-0.28440753721127171039f,0.95870347489587159906f,
--0.28881640820604936870f,0.95738450078897596729f,-0.29321916269425857271f,
-0.95604525134999651659f,-0.29761570743508619641f,0.95468575494133833814f,
--0.30200594931922808417f,0.95330604035419386211f,-0.30638979537086097338f,
-0.95190613680793234597f,-0.31076715274961136393f,0.95048607394948181337f,
--0.31513792875252233383f,0.94904588185270055689f,-0.31950203081601563637f,
-0.94758559101774120226f,-0.32385936651785285356f,0.94610523237040344835f,
--0.32820984357909255280f,0.94460483726148025685f,-0.33255336986604405736f,
-0.94308443746609349478f,-0.33688985339221994009f,0.94154406518302080631f,
--0.34121920232028229991f,0.93998375303401404679f,-0.34554132496398903829f,
-0.93840353406310816897f,-0.34985612979013491763f,0.93680344173592156043f,
--0.35416352542049039931f,0.93518350993894761025f,-0.35846342063373642928f,
-0.93354377297883628373f,-0.36275572436739711435f,0.93188426558166814750f,
--0.36704034571976712487f,0.93020502289221906889f,-0.37131719395183748755f,
-0.92850608047321558924f,-0.37558617848921721505f,0.92678747430458174872f,
--0.37984720892405099413f,0.92504924078267769527f,-0.38410019501693493105f,
-0.92329141671952774661f,-0.38834504669882619066f,0.92151403934204201285f,
--0.39258167407295141427f,0.91971714629122736095f,-0.39680998741671030805f,
-0.91790077562139049672f,-0.40102989718357567872f,0.91606496579933172075f,
--0.40524131400498974998f,0.91420975570353069095f,-0.40944414869225753684f,
-0.91233518462332285903f,-0.41363831223843450235f,0.91044129225806724737f,
--0.41782371582021227141f,0.90852811871630612117f,-0.42200027079979968159f,
-0.90659570451491533483f,-0.42616788872679967071f,0.90464409057824612947f,
--0.43032648134008272267f,0.90267331823725871498f,-0.43447596056965581690f,
-0.90068342922864685907f,-0.43861623853852738097f,0.89867446569395392775f,
--0.44274722756456980077f,0.89664647017868026602f,-0.44686884016237399253f,
-0.89459948563138280697f,-0.45098098904510369733f,0.89253355540276468894f,
--0.45508358712634372489f,0.89044872324475798919f,-0.45917654752194403400f,
-0.88834503330959635470f,-0.46325978355186014923f,0.88622253014888063838f,
--0.46733320874198841510f,0.88408125871263498752f,-0.47139673682599769755f,
-0.88192126434835504956f,-0.47545028174715592284f,0.87974259280004740713f,
--0.47949375766015311928f,0.87754529020726124156f,-0.48352707893291846375f,
-0.87532940310411100349f,-0.48755016014843571837f,0.87309497841829020182f,
--0.49156291610654972990f,0.87084206347007897531f,-0.49556526182577237405f,
-0.86857070597134100609f,-0.49955711254508178287f,0.86628095402451310569f,
--0.50353838372571746440f,0.86397285612158680745f,-0.50750899105297075931f,
-0.86164646114308141023f,-0.51146885043797041259f,0.85930181835700847337f,
--0.51541787801946303826f,0.85693897741782865118f,-0.51935599016558964269f,
-0.85455798836540053376f,-0.52328310347565654137f,0.85215890162391971785f,
--0.52719913478190105760f,0.84974176800085265970f,-0.53110400115125477871f,
-0.84730663868585853749f,-0.53499761988709704230f,0.84485356524970722791f,
--0.53887990853100831146f,0.84238259964318595863f,-0.54275078486451577842f,
-0.83989379419599952126f,-0.54661016691083474939f,0.83738720161566193578f,
--0.55045797293660470029f,0.83486287498638012128f,-0.55429412145362011444f,
-0.83232086776792968408f,-0.55811853122055610221f,0.82976123379452304540f,
--0.56193112124468946877f,0.82718402727366902027f,-0.56573181078361323149f,
-0.82458930278502517996f,-0.56952051934694725155f,0.82197711527924144370f,
--0.57329716669804198226f,0.81934752007679712005f,-0.57706167285567933067f,
-0.81670057286682795628f,-0.58081395809576441547f,0.81403632970594852480f,
--0.58455394295301521534f,0.81135484701706384048f,-0.58828154822264522306f,
-0.80865618158817509364f,-0.59199669496204088137f,0.80594039057117639047f,
--0.59569930449243335691f,0.80320753148064494287f,-0.59938929840056454079f,
-0.80045766219262282082f,-0.60306659854034827539f,0.79769084094339104407f,
--0.60673112703452458661f,0.79490712632823690154f,-0.61038280627630958630f,
-0.79210657730021227785f,-0.61402155893103815831f,0.78928925316888587371f,
--0.61764730793780375784f,0.78645521359908587833f,-0.62125997651108744169f,
-0.78360451860963831194f,-0.62485948814238623239f,0.78073722857209459924f,
--0.62844576660183260053f,0.77785340420945314754f,-0.63201873593980895105f,
-0.77495310659487393057f,-0.63557832048855611440f,0.77203639715038452351f,
--0.63912444486377573138f,0.76910333764557958780f,-0.64265703396622686494f,
-0.76615399019631280630f,-0.64617601298331639459f,0.76318841726338115805f,
--0.64968130739068330470f,0.76020668165120230952f,-0.65317284295377653347f,
-0.75720884650648467851f,-0.65665054572942882505f,0.75419497531688928227f,
--0.66011434206742036768f,0.75116513190968658975f,-0.66356415861203965623f,
-0.74811938045040371481f,-0.66699992230363736034f,0.74505778544146605835f,
--0.67042156038017308717f,0.74198041172083106787f,-0.67382900037875603783f,
-0.73888732446061522463f,-0.67722217013718044587f,0.73577858916571359238f,
--0.68060099779545302212f,0.73265427167241281570f,-0.68396541179731551452f,
-0.72951443814699701296f,-0.68731534089175916336f,0.72635915508434589771f,
--0.69065071413453438254f,0.72318848930652757101f,-0.69397146088965377952f,
-0.72000250796138176579f,-0.69727751083088640449f,0.71680127852109964959f,
--0.70056879394324822474f,0.71358486878079363525f,-0.70384524052448482756f,
-0.71035334685706241764f,-0.70710678118654746172f,0.70710678118654757274f,
--0.71035334685706230662f,0.70384524052448504960f,-0.71358486878079352422f,
-0.70056879394324833576f,-0.71680127852109953857f,0.69727751083088651551f,
--0.72000250796138165477f,0.69397146088965389055f,-0.72318848930652745999f,
-0.69065071413453460458f,-0.72635915508434578669f,0.68731534089175927438f,
--0.72951443814699679091f,0.68396541179731562554f,-0.73265427167241270467f,
-0.68060099779545324417f,-0.73577858916571337033f,0.67722217013718055689f,
--0.73888732446061511361f,0.67382900037875614885f,-0.74198041172083095685f,
-0.67042156038017319819f,-0.74505778544146594733f,0.66699992230363758239f,
--0.74811938045040360379f,0.66356415861203976725f,-0.75116513190968636771f,
-0.66011434206742047870f,-0.75419497531688917125f,0.65665054572942904709f,
--0.75720884650648467851f,0.65317284295377664449f,-0.76020668165120219850f,
-0.64968130739068341573f,-0.76318841726338115805f,0.64617601298331661663f,
--0.76615399019631280630f,0.64265703396622708699f,-0.76910333764557947678f,
-0.63912444486377584241f,-0.77203639715038441249f,0.63557832048855622542f,
--0.77495310659487381955f,0.63201873593980906207f,-0.77785340420945303652f,
-0.62844576660183271155f,-0.78073722857209448822f,0.62485948814238634341f,
--0.78360451860963820092f,0.62125997651108755271f,-0.78645521359908576731f,
-0.61764730793780386886f,-0.78928925316888576269f,0.61402155893103838036f,
--0.79210657730021216683f,0.61038280627630969732f,-0.79490712632823679051f,
-0.60673112703452469763f,-0.79769084094339093305f,0.60306659854034838641f,
--0.80045766219262259877f,0.59938929840056465181f,-0.80320753148064483184f,
-0.59569930449243346793f,-0.80594039057117627944f,0.59199669496204099239f,
--0.80865618158817498262f,0.58828154822264533408f,-0.81135484701706372945f,
-0.58455394295301532637f,-0.81403632970594841378f,0.58081395809576452649f,
--0.81670057286682784525f,0.57706167285567944170f,-0.81934752007679700903f,
-0.57329716669804209328f,-0.82197711527924133268f,0.56952051934694747359f,
--0.82458930278502506894f,0.56573181078361345353f,-0.82718402727366902027f,
-0.56193112124468957980f,-0.82976123379452293438f,0.55811853122055632426f,
--0.83232086776792957306f,0.55429412145362022546f,-0.83486287498638001026f,
-0.55045797293660492233f,-0.83738720161566182476f,0.54661016691083497143f,
--0.83989379419599952126f,0.54275078486451588944f,-0.84238259964318584760f,
-0.53887990853100842248f,-0.84485356524970711689f,0.53499761988709715332f,
--0.84730663868585842646f,0.53110400115125488973f,-0.84974176800085254868f,
-0.52719913478190127964f,-0.85215890162391960683f,0.52328310347565665239f,
--0.85455798836540042274f,0.51935599016558975372f,-0.85693897741782865118f,
-0.51541787801946314929f,-0.85930181835700836235f,0.51146885043797052361f,
--0.86164646114308129921f,0.50750899105297098135f,-0.86397285612158669643f,
-0.50353838372571757542f,-0.86628095402451299467f,0.49955711254508189390f,
--0.86857070597134089507f,0.49556526182577254058f,-0.87084206347007886428f,
-0.49156291610654989643f,-0.87309497841829009079f,0.48755016014843588490f,
--0.87532940310411089246f,0.48352707893291863028f,-0.87754529020726113053f,
-0.47949375766015328582f,-0.87974259280004729611f,0.47545028174715608937f,
--0.88192126434835493853f,0.47139673682599780857f,-0.88408125871263487650f,
-0.46733320874198858164f,-0.88622253014888052736f,0.46325978355186031576f,
--0.88834503330959624368f,0.45917654752194420054f,-0.89044872324475787817f,
-0.45508358712634389143f,-0.89253355540276457791f,0.45098098904510386387f,
--0.89459948563138269595f,0.44686884016237415906f,-0.89664647017868026602f,
-0.44274722756456996731f,-0.89867446569395392775f,0.43861623853852754751f,
--0.90068342922864674804f,0.43447596056965598343f,-0.90267331823725871498f,
-0.43032648134008288920f,-0.90464409057824612947f,0.42616788872679983724f,
--0.90659570451491533483f,0.42200027079979984812f,-0.90852811871630612117f,
-0.41782371582021243794f,-0.91044129225806713634f,0.41363831223843466889f,
--0.91233518462332274801f,0.40944414869225770337f,-0.91420975570353069095f,
-0.40524131400498991651f,-0.91606496579933172075f,0.40102989718357562321f,
--0.91790077562139049672f,0.39680998741671025254f,-0.91971714629122736095f,
-0.39258167407295141427f,-0.92151403934204179080f,0.38834504669882657923f,
--0.92329141671952752457f,0.38410019501693531963f,-0.92504924078267747323f,
-0.37984720892405138271f,-0.92678747430458174872f,0.37558617848921738158f,
--0.92850608047321547822f,0.37131719395183770960f,-0.93020502289221906889f,
-0.36704034571976729140f,-0.93188426558166803648f,0.36275572436739728088f,
--0.93354377297883617270f,0.35846342063373659581f,-0.93518350993894761025f,
-0.35416352542049039931f,-0.93680344173592167145f,0.34985612979013486212f,
--0.93840353406310816897f,0.34554132496398898278f,-0.93998375303401382475f,
-0.34121920232028268849f,-0.94154406518302069529f,0.33688985339222032867f,
--0.94308443746609338376f,0.33255336986604444593f,-0.94460483726148014583f,
-0.32820984357909271933f,-0.94610523237040333733f,0.32385936651785302010f,
--0.94758559101774109124f,0.31950203081601580291f,-0.94904588185270055689f,
-0.31513792875252250036f,-0.95048607394948170235f,0.31076715274961153046f,
--0.95190613680793234597f,0.30638979537086091787f,-0.95330604035419386211f,
-0.30200594931922802866f,-0.95468575494133833814f,0.29761570743508614090f,
--0.95604525134999629454f,0.29321916269425896129f,-0.95738450078897585627f,
-0.28881640820604975728f,-0.95870347489587148804f,0.28440753721127209896f,
--0.96000214573766584625f,0.27999264308027344006f,-0.96128048581132063966f,
-0.27557181931095831029f,-0.96253846804435916340f,0.27114515952680812161f,
--0.96377606579543984022f,0.26671275747489847641f,-0.96499325285492032478f,
-0.26227470702391370017f,-0.96619000344541250413f,0.25783110216215898713f,
--0.96736629222232850545f,0.25338203699557010351f,-0.96852209427441737777f,
-0.24892760574572009302f,-0.96965738512429233698f,0.24446790274782448371f,
--0.97077214072895023911f,0.24000302244874177626f,-0.97186633748027928537f,
-0.23553305940497573645f,-0.97293995220556006576f,0.23105810828067133156f,
--0.97399296216795583359f,0.22657826384561016719f,-0.97502534506699412020f,
-0.22209362097320364815f,-0.97603707903903902388f,0.21760427463848372454f,
--0.97702814265775439484f,0.21311031991609141745f,-0.97799851493455713936f,
-0.20861185197826351279f,-0.97894817531906219710f,0.20410896609281684033f,
--0.97987710369951763756f,0.19960175762113091524f,-0.98078528040323043058f,
-0.19509032201612860891f,-0.98167268619698311305f,0.19057475482025307278f,
--0.98253930228744124076f,0.18605515166344691047f,-0.98338511032155118130f,
-0.18153160826112521575f,-0.98421009238692902521f,0.17700422041214894375f,
--0.98501423101223983814f,0.17247308399679611712f,-0.98579750916756736512f,
-0.16793829497473128365f,-0.98655991026477540817f,0.16339994938297328075f,
--0.98730141815785843473f,0.15885814333386147346f,-0.98802201714328352633f,
-0.15431297301302007718f,-0.98872169196032377858f,0.14976453467732145364f,
--0.98940042779138037687f,0.14521292465284735274f,-0.99005821026229701154f,
-0.14065823933284954395f,-0.99069502544266463406f,0.13610057517570647856f,
--0.99131085984611544415f,0.13154002870288333815f,-0.99190570043060932726f,
-0.12697669649688606008f,-0.99247953459870996706f,0.12241067519921634832f,
--0.99303235019785141002f,0.11784206150832508830f,-0.99356413552059530403f,
-0.11327095217756441570f,-0.99407487930487936634f,0.10869744401313874427f,
--0.99456457073425541537f,0.10412163387205457254f,-0.99503319943811863180f,
-0.09954361866006927739f,-0.99548075549192693856f,0.09496349532963890838f,
--0.99590722941741172125f,0.09038136087786528827f,-0.99631261218277800129f,
-0.08579731234444015753f,-0.99669689520289606044f,0.08121144680959266338f,
--0.99706007033948296225f,0.07662386139203168633f,-0.99740212990127530279f,
-0.07203465324688947125f,-0.99772306664419163624f,0.06744391956366417584f,
--0.99802287377148624081f,0.06285175756416148951f,-0.99830154493389289261f,
-0.05825826450043579408f,-0.99855907422975931365f,0.05366353765273051968f,
--0.99879545620517240501f,0.04906767432741796636f,-0.99901068585407337697f,
-0.04447077185493858442f,-0.99920475861836388631f,0.03987292758774012985f,
--0.99937767038800284780f,0.03527423889821423159f,-0.99952941750109314256f,
-0.03067480317663686534f,-0.99965999674395922270f,0.02607471782910409860f,
--0.99976940535121527898f,0.02147408027546966747f,-0.99985764100582386060f,
-0.01687298794728183532f,-0.99992470183914450299f,0.01227153828572000692f,
--0.99997058643097413988f,0.00766982873953113778f,-0.99999529380957619118f,
-0.00306795676296597701f,-0.99999882345170187925f,-0.00153398018628480431f,
--0.99998117528260110909f,-0.00613588464915455420f,-0.99994234967602391162f,
--0.01073765916726416615f,-0.99988234745421256111f,-0.01533920628498781566f,
--0.99980116988788425569f,-0.01994042855151419158f,-0.99969881869620424997f,
--0.02454122852291207996f,-0.99957529604674921764f,-0.02914150876419355565f,
--0.99943060455546173237f,-0.03374117185137745500f,-0.99926474728659442359f,
--0.03834012037355261082f,-0.99907772775264536147f,-0.04293825693494077861f,
--0.99886954991428356099f,-0.04753548415695929563f,-0.99864021818026527111f,
--0.05213170468028335142f,-0.99838973740734016094f,-0.05672682116690781762f,
--0.99811811290014917919f,-0.06132073630220824523f,-0.99782535041111164453f,
--0.06591335279700352712f,-0.99751145614030345410f,-0.07050457338961360620f,
--0.99717643673532618820f,-0.07509430084792109716f,-0.99682029929116577893f,
--0.07968243797142994522f,-0.99644305135004263008f,-0.08426888759332393231f,
--0.99604470090125196702f,-0.08885355258252450317f,-0.99562525638099430569f,
--0.09343633584574773110f,-0.99518472667219692873f,-0.09801714032956058975f,
--0.99472312110432570265f,-0.10259586902243630901f,-0.99424044945318790223f,
--0.10717242495680891212f,-0.99373672194072470987f,-0.11174671121112625394f,
--0.99321194923479461103f,-0.11631863091190447479f,-0.99266614244894801899f,
--0.12088808723577681992f,-0.99209931314219179654f,-0.12545498341154601163f,
--0.99151147331874400770f,-0.13001922272223317978f,-0.99090263542778000971f,
--0.13458070850712605671f,-0.99027281236316910817f,-0.13913934416382611747f,
--0.98962201746320088702f,-0.14369503315029438784f,-0.98895026451030298986f,
--0.14824767898689603096f,-0.98825756773074946437f,-0.15279718525844343535f,
--0.98754394179435922574f,-0.15734345561623830356f,-0.98680940181418552726f,
--0.16188639378011149272f,-0.98605396334619543897f,-0.16642590354046382650f,
--0.98527764238894133264f,-0.17096188876030096737f,-0.98448045538322093151f,
--0.17549425337727120322f,-0.98366241921173025453f,-0.18002290140569934818f,
--0.98282355119870534743f,-0.18454773693861947770f,-0.98196386910955524296f,
--0.18906866414980610935f,-0.98108339115048670553f,-0.19358558729580355173f,
--0.98018213596811742949f,-0.19809841071795356027f,-0.97926012264908202098f,
--0.20260703884442113343f,-0.97831737071962765473f,-0.20711137619221858808f,
--0.97735390014519996082f,-0.21161132736922766417f,-0.97636973133002125103f,
--0.21610679707621921475f,-0.97536488511665697665f,-0.22059769010887325669f,
--0.97433938278557585821f,-0.22508391135979261000f,-0.97329324605469824672f,
--0.22956536582051870199f,-0.97222649707893638027f,-0.23404195858354326365f,
--0.97113915844972520386f,-0.23851359484431830515f,-0.97003125319454397424f,
--0.24298017990326381543f,-0.96890280477642887202f,-0.24744161916777326904f,
--0.96775383709347551076f,-0.25189781815421696809f,-0.96658437447833311928f,
--0.25634868248994291395f,-0.96539444169768939830f,-0.26079411791527562503f,
--0.96418406395174582890f,-0.26523403028551151284f,-0.96295326687368398844f,
--0.26966832557291481320f,-0.96170207652912265139f,-0.27409690986870616225f,
--0.96043051941556589757f,-0.27851968938505289319f,-0.95913862246184200533f,
--0.28293657045705516984f,-0.95782641302753290802f,-0.28734745954472939999f,
--0.95649391890239510161f,-0.29175226323498920644f,-0.95514116830577078243f,
--0.29615088824362378883f,-0.95376818988599032512f,-0.30054324141727345454f,
--0.95237501271976587880f,-0.30492922973540242948f,-0.95096166631157508231f,
--0.30930876031226878231f,-0.94952818059303678577f,-0.31368174039889118454f,
--0.94807458592227633609f,-0.31804807738501467140f,-0.94660091308328364601f,
--0.32240767880106963039f,-0.94510719328526060501f,-0.32676045232013156694f,
--0.94359345816196038559f,-0.33110630575987626267f,-0.94205973977101742367f,
--0.33544514708453149199f,-0.94050607059326840620f,-0.33977688440682679571f,
--0.93893248353206459900f,-0.34410142598993881391f,-0.93733901191257495977f,
--0.34841868024943456472f,-0.93572568948108036935f,-0.35272855575521072646f,
--0.93409255040425887007f,-0.35703096123343008861f,-0.93243962926846246653f,
--0.36132580556845395048f,-0.93076696107898382326f,-0.36561299780477357624f,
--0.92907458125931585702f,-0.36989244714893387833f,-0.92736252565040111495f,
--0.37416406297145782256f,-0.92563083050987282618f,-0.37842775480876539307f,
--0.92387953251128684951f,-0.38268343236508967076f,-0.92210866874334518339f,
--0.38693100551438852630f,-0.92031827670911059425f,-0.39117038430225381518f,
--0.91850839432521225181f,-0.39540147894781629834f,-0.91667905992104270485f,
--0.39962419984564684361f,-0.91483031223794608611f,-0.40383845756765418544f,
--0.91296219042839832358f,-0.40804416286497835475f,-0.91107473405517647169f,
--0.41224122666988260999f,-0.90916798309052249127f,-0.41642956009763693048f,
--0.90724197791529592738f,-0.42060907444840234248f,-0.90529675931811881551f,
--0.42477968120910863936f,-0.90333236849451192807f,-0.42894129205532938176f,
--0.90134884704602202810f,-0.43309381885315184624f,-0.89934623697934157338f,
--0.43723717366104403181f,-0.89732458070541831763f,-0.44137126873171667052f,
--0.89528392103855747308f,-0.44549601651398174074f,-0.89322430119551532446f,
--0.44961132965460665067f,-0.89114576479458340597f,-0.45371712100016353686f,
--0.88904835585466468473f,-0.45781330359887695280f,-0.88693211879434230571f,
--0.46189979070246250936f,-0.88479709843093790056f,-0.46597649576796595916f,
--0.88264333997956290201f,-0.47004333245959545318f,-0.88047088905216086552f,
--0.47410021465054985601f,-0.87827979165654157523f,-0.47814705642484295334f,
--0.87607009419540660122f,-0.48218377207912266336f,-0.87384184346536686316f,
--0.48621027612448636246f,-0.87159508665595109012f,-0.49022648328829115938f,
--0.86932987134860673084f,-0.49423230851595978397f,-0.86704624551569287050f,
--0.49822766697278153547f,-0.86474425751946248919f,-0.50221247404571056627f,
--0.86242395611104072373f,-0.50618664534515500630f,-0.86008539042939025077f,
--0.51015009670676658704f,-0.85772861000027211809f,-0.51410274419322155026f,
--0.85535366473519613972f,-0.51804450409599922533f,-0.85296060493036374162f,
--0.52197529293715427823f,-0.85054948126560347976f,-0.52589502747108463065f,
--0.84812034480329723252f,-0.52980362468629460526f,-0.84567324698729906540f,
--0.53370100180715296379f,-0.84320823964184543620f,-0.53758707629564550512f,
--0.84072537497045818355f,-0.54146176585312322249f,-0.83822470555483818977f,
--0.54532498842204613076f,-0.83570628435375271525f,-0.54917666218771943321f,
--0.83317016470191329613f,-0.55301670558002735678f,-0.83061640030884642538f,
--0.55684503727515988203f,-0.82804504525775590729f,-0.56066157619733592021f,
--0.82545615400437755138f,-0.56446624152051938506f,-0.82284978137582642788f,
--0.56825895267013148970f,-0.82022598256943468620f,-0.57203962932475704850f,
--0.81758481315158371139f,-0.57580819141784533866f,-0.81492632905652662156f,
--0.57956455913940574387f,-0.81225058658520388200f,-0.58330865293769829094f,
--0.80955764240405148069f,-0.58704039352091774706f,-0.80684755354379944503f,
--0.59075970185887394237f,-0.80412037739826591753f,-0.59446649918466420992f,
--0.80137617172314035141f,-0.59816070699634216190f,-0.79861499463476093297f,
--0.60184224705857991555f,-0.79583690460888356633f,-0.60551104140432543410f,
--0.79304196047944375270f,-0.60916701233645309532f,-0.79023022143731003197f,
--0.61281008242940970820f,-0.78740174702903142911f,-0.61644017453085364622f,
--0.78455659715557524159f,-0.62005721176328920663f,-0.78169483207105938671f,
--0.62366111752569464155f,-0.77881651238147620031f,-0.62725181549514386070f,
--0.77592169904340779762f,-0.63082922962842424841f,-0.77301045336273710440f,
--0.63439328416364526575f,-0.77008283699334811878f,-0.63794390362184394405f,
--0.76713891193582051109f,-0.64148101280858305095f,-0.76417874053611678509f,
--0.64500453681554381635f,-0.76120238548426188974f,-0.64851440102211233008f,
--0.75820990981301539247f,-0.65201053109695950027f,-0.75520137689653654700f,
--0.65549285299961534967f,-0.75217685044904269986f,-0.65896129298203731661f,
--0.74913639452345925918f,-0.66241577759017178373f,-0.74608007351006400132f,
--0.66585623366550938940f,-0.74300795213512194071f,-0.66928258834663578725f,
--0.73992009545951631377f,-0.67269476907077274674f,-0.73681656887737001504f,
--0.67609270357531581208f,-0.73369743811466037187f,-0.67947631989936485564f,
--0.73056276922782770189f,-0.68284554638524797010f,-0.72741262860237587695f,
--0.68620031168003847721f,-0.72424708295146700276f,-0.68954054473706682948f,
--0.72106619931450810501f,-0.69286617481742462932f,-0.71787004505573170920f,
--0.69617713149146298601f,-0.71465868786276898206f,-0.69947334464028387835f,
--0.71143219574521665560f,-0.70275474445722507788f,-0.70819063703319551362f,
--0.70602126144933952112f,-0.70493408037590510329f,-0.70927282643886546687f,
--0.70166259474016867692f,-0.71250937056469221265f,-0.69837624940897302661f,
--0.71573082528381848366f,-0.69507511398000099145f,-0.71893712237280438249f,
--0.69175925836415785852f,-0.72212819392921523409f,-0.68842875278409054740f,
--0.72530397237306065694f,-0.68508366777270035541f,-0.72846439044822519637f,
--0.68172407417164981869f,-0.73160938122389251870f,-0.67835004312986146857f,
--0.73473887809596349907f,-0.67496164610201225820f,-0.73785281478846576064f,
--0.67155895484701866316f,-0.74095112535495888384f,-0.66814204142651867357f,
--0.74403374417992906853f,-0.66471097820334501538f,-0.74710060598017991040f,
--0.66126583783999237642f,-0.75015164580621496171f,-0.65780669329707874837f,
--0.75318679904361240940f,-0.65433361783180066240f,-0.75620600141439442421f,
--0.65084668499638098638f,-0.75920918897838796102f,-0.64734596863651250320f,
--0.76219629813457856482f,-0.64383154288979149715f,-0.76516726562245895860f,
--0.64030348218415200634f,-0.76812202852336519676f,-0.63676186123628419899f,
--0.77106052426181381776f,-0.63320675505005752370f,-0.77398269060682256537f,
--0.62963823891492687324f,-0.77688846567323255332f,-0.62605638840434374437f,
--0.77977778792301433164f,-0.62246127937414974518f,-0.78265059616657584041f,
--0.61885298796097643059f,-0.78550682956405382118f,-0.61523159058062726334f,
--0.78834642762660589455f,-0.61159716392646201744f,-0.79116933021769009216f,
--0.60794978496777407617f,-0.79397547755433683925f,-0.60428953094815607283f,
--0.79676481020841871672f,-0.60061647938386930612f,-0.79953726910790479110f,
--0.59693070806219639124f,-0.80229279553811572168f,-0.59323229503980012822f,
--0.80503133114296343553f,-0.58952131864106382952f,-0.80775281792619046950f,
--0.58579785745643908612f,-0.81045719825259465718f,-0.58206199034077532595f,
--0.81314441484925370496f,-0.57831379641165570060f,-0.81581441080673366972f,
--0.57455335504771631872f,-0.81846712958029832485f,-0.57078074588696736669f,
--0.82110251499110464835f,-0.56699604882510901138f,-0.82372051122739109452f,
--0.56319934401383409117f,-0.82632106284566342325f,-0.55939071185913646911f,
--0.82890411477186465294f,-0.55557023301960217765f,-0.83146961230254523567f,
--0.55173798840470766880f,-0.83401750110601791111f,-0.54789405917310007865f,
--0.83654772722351211645f,-0.54403852673088415326f,-0.83906023707031252012f,
--0.54017147272989274320f,-0.84155497743689855472f,-0.53629297906596329337f,
--0.84403189549006629733f,-0.53240312787719845655f,-0.84649093877405179320f,
--0.52850200154222859439f,-0.84893205521163961347f,-0.52458968267846928235f,
--0.85135519310526486247f,-0.52066625414036715735f,-0.85376030113811141042f,
--0.51673179901765020627f,-0.85614732837519424979f,-0.51278640063356295542f,
--0.85851622426444285097f,-0.50883014254310732216f,-0.86086693863776708735f,
--0.50486310853126736831f,-0.86319942171212427073f,-0.50088538261124104789f,
--0.86551362409056897818f,-0.49689704902265435793f,-0.86780949676330332299f,
--0.49289819222978420443f,-0.87008699110871134952f,-0.48888889691976367136f,
--0.87234605889439120752f,-0.48486924800079117537f,-0.87458665227817611321f,
--0.48083933060033440254f,-0.87680872380914542941f,-0.47679923006332214364f,
--0.87901222642863341417f,-0.47274903195034317926f,-0.88119711347122187117f,
--0.46868882203582790114f,-0.88336333866573157891f,-0.46461868630623814891f,
--0.88551085613619973103f,-0.46053871095823989412f,-0.88763962040285404598f,
--0.45644898239688419528f,-0.88974958638307266590f,-0.45234958723377066692f,
--0.89184070939234283415f,-0.44824061228522010802f,-0.89391294514520314163f,
--0.44412214457042975546f,-0.89596624975618488484f,-0.43999427130963336685f,
--0.89800057974073976830f,-0.43585707992225597440f,-0.90001589201615994629f,
--0.43171065802505731446f,-0.90201214390249317976f,-0.42755509343028247349f,
--0.90398929312344311615f,-0.42339047414379599177f,-0.90594729780726845902f,
--0.41921688836322429372f,-0.90788611648766603945f,-0.41503442447608152044f,
--0.90980570810465233311f,-0.41084317105790418845f,-0.91170603200542976730f,
--0.40664321687036886210f,-0.91358704794525091852f,-0.40243465085941865222f,
--0.91544871608826772214f,-0.39821756215337417162f,-0.91729099700837768427f,
--0.39399204006104820985f,-0.91911385169005765938f,-0.38975817406985696634f,
--0.92091724152918930102f,-0.38551605384391890441f,-0.92270112833387851747f,
--0.38126576922216276477f,-0.92446547432526249288f,-0.37700741021641820394f,
--0.92621024213831137928f,-0.37274106700951614712f,-0.92793539482261766516f,
--0.36846682995337221023f,-0.92964089584318132520f,-0.36418478956708016936f,
--0.93132670908118031505f,-0.35989503653498794433f,-0.93299279883473895669f,
--0.35559766170478407377f,-0.93463912981968066962f,-0.35129275608556687072f,
--0.93626566717027837061f,-0.34698041084592379235f,-0.93787237643998977443f,
--0.34266071731199487793f,-0.93945922360218969693f,-0.33833376696554123830f,
--0.94102617505088925753f,-0.33399965144200982614f,-0.94257319760144675502f,
--0.32965846252858749255f,-0.94410025849127265918f,-0.32531029216226331480f,
--0.94560732538052116869f,-0.32095523242787515894f,-0.94709436635277721717f,
--0.31659337555616617887f,-0.94856134991573015647f,-0.31222481392182477311f,
--0.95000824500184311017f,-0.30784964004153508865f,-0.95143502096900833820f,
--0.30346794657201103806f,-0.95284164760119871573f,-0.29907982630804058610f,
--0.95422809510910555630f,-0.29468537218051488180f,-0.95559433413077088382f,
--0.29028467725446244208f,-0.95694033573220882438f,-0.28587783472708105936f,
--0.95826607140801756124f,-0.28146493792575794091f,-0.95957151308198451733f,
--0.27704608030610028413f,-0.96085663310767954748f,-0.27262135544994886560f,
--0.96212140426904158019f,-0.26819085706340350939f,-0.96336579978095393528f,
--0.26375467897483123592f,-0.96458979328981275803f,-0.25931291513288645678f,
--0.96579335887408357397f,-0.25486565960451434965f,-0.96697647104485218161f,
--0.25041300657296539089f,-0.96813910474636233339f,-0.24595505033579515008f,
--0.96928123535654830967f,-0.24149188530286941345f,-0.97040283868755550234f,
--0.23702360599436766986f,-0.97150389098625167250f,-0.23255030703877521692f,
--0.97258436893473221296f,-0.22807208317088611960f,-0.97364424965081186603f,
--0.22358902922978990402f,-0.97468351068851066810f,-0.21910124015687010290f,
--0.97570213003852845901f,-0.21460881099378659176f,-0.97670008612871184184f,
--0.21011183688046985996f,-0.97767735782450992943f,-0.20561041305309901706f,
--0.97863392442942320759f,-0.20110463484209206708f,-0.97956976568544051887f,
--0.19659459767008077846f,-0.98048486177346927395f,-0.19208039704989252061f,
--0.98137919331375456089f,-0.18756212858253007436f,-0.98225274136628937249f,
--0.18303988795514095078f,-0.98310548743121628501f,-0.17851377093899792325f,
--0.98393741344921881176f,-0.17398387338746373887f,-0.98474850180190420801f,
--0.16945029123396829207f,-0.98553873531217606185f,-0.16491312048996975559f,
--0.98630809724459866938f,-0.16037245724292850668f,-0.98705657130575097380f,
--0.15582839765426498291f,-0.98778414164457217783f,-0.15128103795733036097f,
--0.98849079285269658701f,-0.14673047445536230304f,-0.98917650996478090342f,
--0.14217680351944814165f,-0.98984127845882052821f,-0.13762012158648653792f,
--0.99048508425645698239f,-0.13306052515713906459f,-0.99110791372327688986f,
--0.12849811079379358514f,-0.99170975366909952520f,-0.12393297511851208981f,
--0.99229059134825736699f,-0.11936521481099168773f,-0.99285041445986510489f,
--0.11479492660650993108f,-0.99338921114808065305f,-0.11022220729388330918f,
--0.99390697000235606051f,-0.10564715371341037997f,-0.99440368005767909576f,
--0.10106986275482798820f,-0.99487933079480561638f,-0.09649043135525316173f,
--0.99533391214048216877f,-0.09190895649713282101f,-0.99576741446765981713f,
--0.08732553520619255882f,-0.99617982859569687015f,-0.08274026454937570552f,
--0.99657114579055483539f,-0.07815324163279464831f,-0.99694135776498205015f,
--0.07356456359966735692f,-0.99729045667869020697f,-0.06897432762826707919f,
--0.99761843513851955478f,-0.06438263092985731240f,-0.99792528619859599548f,
--0.05978957074664013188f,-0.99821100336047818846f,-0.05519524434968971216f,
--0.99847558057329477421f,-0.05059974903689945513f,-0.99871901223387293811f,
--0.04600318213091520586f,-0.99894129318685687124f,-0.04140564097707683661f,
--0.99914241872481690532f,-0.03680722294135933131f,-0.99932238458834943273f,
--0.03220802540830459970f,-0.99948118696616694567f,-0.02760814577896616301f,
--0.99961882249517863830f,-0.02300768146883930970f,-0.99973528826056168306f,
--0.01840672990580516366f,-0.99983058179582340319f,-0.01380538852806025008f,
--0.99990470108285289808f,-0.00920375478206008311f,-0.99995764455196389786f,
--0.00460192612044835019f,-0.99998941108192840321f,1.00000000000000000000f,
-0.00000000000000000000f,0.99983058179582340319f,0.01840672990580482019f,
-0.99932238458834954375f,0.03680722294135883171f,0.99847558057329477421f,
-0.05519524434968993420f,0.99729045667869020697f,0.07356456359966742631f,
-0.99576741446765981713f,0.09190895649713272386f,0.99390697000235606051f,
-0.11022220729388305938f,0.99170975366909952520f,0.12849811079379316880f,
-0.98917650996478101444f,0.14673047445536174793f,0.98630809724459866938f,
-0.16491312048996989437f,0.98310548743121628501f,0.18303988795514095078f,
-0.97956976568544051887f,0.20110463484209190055f,0.97570213003852857003f,
-0.21910124015686979759f,0.97150389098625178352f,0.23702360599436719801f,
-0.96697647104485207059f,0.25486565960451457169f,0.96212140426904158019f,
-0.27262135544994897662f,0.95694033573220882438f,0.29028467725446233105f,
-0.95143502096900833820f,0.30784964004153486661f,0.94560732538052127971f,
-0.32531029216226292622f,0.93945922360218991898f,0.34266071731199437833f,
-0.93299279883473895669f,0.35989503653498811087f,0.92621024213831137928f,
-0.37700741021641825945f,0.91911385169005777040f,0.39399204006104809883f,
-0.91170603200542987832f,0.41084317105790391089f,0.90398929312344333820f,
-0.42755509343028208491f,0.89596624975618521791f,0.44412214457042920035f,
-0.88763962040285393496f,0.46053871095824000514f,0.87901222642863352519f,
-0.47679923006332208812f,0.87008699110871146054f,0.49289819222978403790f,
-0.86086693863776730939f,0.50883014254310698909f,0.85135519310526519554f,
-0.52458968267846894928f,0.84155497743689844370f,0.54017147272989285423f,
-0.83146961230254523567f,0.55557023301960217765f,0.82110251499110464835f,
-0.57078074588696725566f,0.81045719825259476821f,0.58579785745643886408f,
-0.79953726910790501314f,0.60061647938386897305f,0.78834642762660622761f,
-0.61523159058062681925f,0.77688846567323244230f,0.62963823891492698426f,
-0.76516726562245895860f,0.64383154288979138613f,0.75318679904361252042f,
-0.65780669329707863735f,0.74095112535495921691f,0.67155895484701833009f,
-0.72846439044822519637f,0.68508366777270035541f,0.71573082528381870571f,
-0.69837624940897280457f,0.70275474445722529993f,0.71143219574521643356f,
-0.68954054473706694051f,0.72424708295146689174f,0.67609270357531603413f,
-0.73681656887736979300f,0.66241577759017178373f,0.74913639452345925918f,
-0.64851440102211255212f,0.76120238548426177871f,0.63439328416364548779f,
-0.77301045336273688235f,0.62005721176328920663f,0.78455659715557524159f,
-0.60551104140432554512f,0.79583690460888345530f,0.59075970185887427544f,
-0.80684755354379922299f,0.57580819141784533866f,0.81758481315158371139f,
-0.56066157619733603124f,0.82804504525775579626f,0.54532498842204646383f,
-0.83822470555483796772f,0.52980362468629482731f,0.84812034480329712149f,
-0.51410274419322166128f,0.85772861000027211809f,0.49822766697278186854f,
-0.86704624551569264845f,0.48218377207912282989f,0.87607009419540660122f,
-0.46597649576796612569f,0.88479709843093778954f,0.44961132965460659516f,
-0.89322430119551532446f,0.43309381885315201277f,0.90134884704602202810f,
-0.41642956009763731906f,0.90916798309052226923f,0.39962419984564678810f,
-0.91667905992104270485f,0.38268343236508983729f,0.92387953251128673848f,
-0.36561299780477396482f,0.93076696107898371224f,0.34841868024943450921f,
-0.93733901191257495977f,0.33110630575987642921f,0.94359345816196038559f,
-0.31368174039889157312f,0.94952818059303667475f,0.29615088824362395536f,
-0.95514116830577067141f,0.27851968938505305973f,0.96043051941556578655f,
-0.26079411791527556952f,0.96539444169768939830f,0.24298017990326398197f,
-0.97003125319454397424f,0.22508391135979277653f,0.97433938278557585821f,
-0.20711137619221856032f,0.97831737071962765473f,0.18906866414980627589f,
-0.98196386910955524296f,0.17096188876030135595f,0.98527764238894122162f,
-0.15279718525844340760f,0.98825756773074946437f,0.13458070850712622324f,
-0.99090263542778000971f,0.11631863091190487725f,0.99321194923479450001f,
-0.09801714032956077016f,0.99518472667219681771f,0.07968243797143012563f,
-0.99682029929116566791f,0.06132073630220864768f,0.99811811290014917919f,
-0.04293825693494095902f,0.99907772775264536147f,0.02454122852291226384f,
-0.99969881869620424997f,0.00613588464915451517f,0.99998117528260110909f,
--0.01227153828571982304f,0.99992470183914450299f,-0.03067480317663645942f,
-0.99952941750109314256f,-0.04906767432741800800f,0.99879545620517240501f,
--0.06744391956366398155f,0.99772306664419163624f,-0.08579731234443975507f,
-0.99631261218277800129f,-0.10412163387205460030f,0.99456457073425541537f,
--0.12241067519921615403f,0.99247953459870996706f,-0.14065823933284912761f,
-0.99005821026229712256f,-0.15885814333386127917f,0.98730141815785843473f,
--0.17700422041214874946f,0.98421009238692902521f,-0.19509032201612819257f,
-0.98078528040323043058f,-0.21311031991609125091f,0.97702814265775439484f,
--0.23105810828067113727f,0.97293995220556017678f,-0.24892760574572012078f,
-0.96852209427441737777f,-0.26671275747489830987f,0.96377606579543984022f,
--0.28440753721127171039f,0.95870347489587159906f,-0.30200594931922808417f,
-0.95330604035419386211f,-0.31950203081601563637f,0.94758559101774120226f,
--0.33688985339221994009f,0.94154406518302080631f,-0.35416352542049039931f,
-0.93518350993894761025f,-0.37131719395183748755f,0.92850608047321558924f,
--0.38834504669882619066f,0.92151403934204201285f,-0.40524131400498974998f,
-0.91420975570353069095f,-0.42200027079979968159f,0.90659570451491533483f,
--0.43861623853852738097f,0.89867446569395392775f,-0.45508358712634372489f,
-0.89044872324475798919f,-0.47139673682599769755f,0.88192126434835504956f,
--0.48755016014843571837f,0.87309497841829020182f,-0.50353838372571746440f,
-0.86397285612158680745f,-0.51935599016558964269f,0.85455798836540053376f,
--0.53499761988709704230f,0.84485356524970722791f,-0.55045797293660470029f,
-0.83486287498638012128f,-0.56573181078361323149f,0.82458930278502517996f,
--0.58081395809576441547f,0.81403632970594852480f,-0.59569930449243335691f,
-0.80320753148064494287f,-0.61038280627630958630f,0.79210657730021227785f,
--0.62485948814238623239f,0.78073722857209459924f,-0.63912444486377573138f,
-0.76910333764557958780f,-0.65317284295377653347f,0.75720884650648467851f,
--0.66699992230363736034f,0.74505778544146605835f,-0.68060099779545302212f,
-0.73265427167241281570f,-0.69397146088965377952f,0.72000250796138176579f,
--0.70710678118654746172f,0.70710678118654757274f,-0.72000250796138165477f,
-0.69397146088965389055f,-0.73265427167241270467f,0.68060099779545324417f,
--0.74505778544146594733f,0.66699992230363758239f,-0.75720884650648467851f,
-0.65317284295377664449f,-0.76910333764557947678f,0.63912444486377584241f,
--0.78073722857209448822f,0.62485948814238634341f,-0.79210657730021216683f,
-0.61038280627630969732f,-0.80320753148064483184f,0.59569930449243346793f,
--0.81403632970594841378f,0.58081395809576452649f,-0.82458930278502506894f,
-0.56573181078361345353f,-0.83486287498638001026f,0.55045797293660492233f,
--0.84485356524970711689f,0.53499761988709715332f,-0.85455798836540042274f,
-0.51935599016558975372f,-0.86397285612158669643f,0.50353838372571757542f,
--0.87309497841829009079f,0.48755016014843588490f,-0.88192126434835493853f,
-0.47139673682599780857f,-0.89044872324475787817f,0.45508358712634389143f,
--0.89867446569395392775f,0.43861623853852754751f,-0.90659570451491533483f,
-0.42200027079979984812f,-0.91420975570353069095f,0.40524131400498991651f,
--0.92151403934204179080f,0.38834504669882657923f,-0.92850608047321547822f,
-0.37131719395183770960f,-0.93518350993894761025f,0.35416352542049039931f,
--0.94154406518302069529f,0.33688985339222032867f,-0.94758559101774109124f,
-0.31950203081601580291f,-0.95330604035419386211f,0.30200594931922802866f,
--0.95870347489587148804f,0.28440753721127209896f,-0.96377606579543984022f,
-0.26671275747489847641f,-0.96852209427441737777f,0.24892760574572009302f,
--0.97293995220556006576f,0.23105810828067133156f,-0.97702814265775439484f,
-0.21311031991609141745f,-0.98078528040323043058f,0.19509032201612860891f,
--0.98421009238692902521f,0.17700422041214894375f,-0.98730141815785843473f,
-0.15885814333386147346f,-0.99005821026229701154f,0.14065823933284954395f,
--0.99247953459870996706f,0.12241067519921634832f,-0.99456457073425541537f,
-0.10412163387205457254f,-0.99631261218277800129f,0.08579731234444015753f,
--0.99772306664419163624f,0.06744391956366417584f,-0.99879545620517240501f,
-0.04906767432741796636f,-0.99952941750109314256f,0.03067480317663686534f,
--0.99992470183914450299f,0.01227153828572000692f,-0.99998117528260110909f,
--0.00613588464915455420f,-0.99969881869620424997f,-0.02454122852291207996f,
--0.99907772775264536147f,-0.04293825693494077861f,-0.99811811290014917919f,
--0.06132073630220824523f,-0.99682029929116577893f,-0.07968243797142994522f,
--0.99518472667219692873f,-0.09801714032956058975f,-0.99321194923479461103f,
--0.11631863091190447479f,-0.99090263542778000971f,-0.13458070850712605671f,
--0.98825756773074946437f,-0.15279718525844343535f,-0.98527764238894133264f,
--0.17096188876030096737f,-0.98196386910955524296f,-0.18906866414980610935f,
--0.97831737071962765473f,-0.20711137619221858808f,-0.97433938278557585821f,
--0.22508391135979261000f,-0.97003125319454397424f,-0.24298017990326381543f,
--0.96539444169768939830f,-0.26079411791527562503f,-0.96043051941556589757f,
--0.27851968938505289319f,-0.95514116830577078243f,-0.29615088824362378883f,
--0.94952818059303678577f,-0.31368174039889118454f,-0.94359345816196038559f,
--0.33110630575987626267f,-0.93733901191257495977f,-0.34841868024943456472f,
--0.93076696107898382326f,-0.36561299780477357624f,-0.92387953251128684951f,
--0.38268343236508967076f,-0.91667905992104270485f,-0.39962419984564684361f,
--0.90916798309052249127f,-0.41642956009763693048f,-0.90134884704602202810f,
--0.43309381885315184624f,-0.89322430119551532446f,-0.44961132965460665067f,
--0.88479709843093790056f,-0.46597649576796595916f,-0.87607009419540660122f,
--0.48218377207912266336f,-0.86704624551569287050f,-0.49822766697278153547f,
--0.85772861000027211809f,-0.51410274419322155026f,-0.84812034480329723252f,
--0.52980362468629460526f,-0.83822470555483818977f,-0.54532498842204613076f,
--0.82804504525775590729f,-0.56066157619733592021f,-0.81758481315158371139f,
--0.57580819141784533866f,-0.80684755354379944503f,-0.59075970185887394237f,
--0.79583690460888356633f,-0.60551104140432543410f,-0.78455659715557524159f,
--0.62005721176328920663f,-0.77301045336273710440f,-0.63439328416364526575f,
--0.76120238548426188974f,-0.64851440102211233008f,-0.74913639452345925918f,
--0.66241577759017178373f,-0.73681656887737001504f,-0.67609270357531581208f,
--0.72424708295146700276f,-0.68954054473706682948f,-0.71143219574521665560f,
--0.70275474445722507788f,-0.69837624940897302661f,-0.71573082528381848366f,
--0.68508366777270035541f,-0.72846439044822519637f,-0.67155895484701866316f,
--0.74095112535495888384f,-0.65780669329707874837f,-0.75318679904361240940f,
--0.64383154288979149715f,-0.76516726562245895860f,-0.62963823891492687324f,
--0.77688846567323255332f,-0.61523159058062726334f,-0.78834642762660589455f,
--0.60061647938386930612f,-0.79953726910790479110f,-0.58579785745643908612f,
--0.81045719825259465718f,-0.57078074588696736669f,-0.82110251499110464835f,
--0.55557023301960217765f,-0.83146961230254523567f,-0.54017147272989274320f,
--0.84155497743689855472f,-0.52458968267846928235f,-0.85135519310526486247f,
--0.50883014254310732216f,-0.86086693863776708735f,-0.49289819222978420443f,
--0.87008699110871134952f,-0.47679923006332214364f,-0.87901222642863341417f,
--0.46053871095823989412f,-0.88763962040285404598f,-0.44412214457042975546f,
--0.89596624975618488484f,-0.42755509343028247349f,-0.90398929312344311615f,
--0.41084317105790418845f,-0.91170603200542976730f,-0.39399204006104820985f,
--0.91911385169005765938f,-0.37700741021641820394f,-0.92621024213831137928f,
--0.35989503653498794433f,-0.93299279883473895669f,-0.34266071731199487793f,
--0.93945922360218969693f,-0.32531029216226331480f,-0.94560732538052116869f,
--0.30784964004153508865f,-0.95143502096900833820f,-0.29028467725446244208f,
--0.95694033573220882438f,-0.27262135544994886560f,-0.96212140426904158019f,
--0.25486565960451434965f,-0.96697647104485218161f,-0.23702360599436766986f,
--0.97150389098625167250f,-0.21910124015687010290f,-0.97570213003852845901f,
--0.20110463484209206708f,-0.97956976568544051887f,-0.18303988795514095078f,
--0.98310548743121628501f,-0.16491312048996975559f,-0.98630809724459866938f,
--0.14673047445536230304f,-0.98917650996478090342f,-0.12849811079379358514f,
--0.99170975366909952520f,-0.11022220729388330918f,-0.99390697000235606051f,
--0.09190895649713282101f,-0.99576741446765981713f,-0.07356456359966735692f,
--0.99729045667869020697f,-0.05519524434968971216f,-0.99847558057329477421f,
--0.03680722294135933131f,-0.99932238458834943273f,-0.01840672990580516366f,
--0.99983058179582340319f,1.00000000000000000000f,0.00000000000000000000f,
-0.99729045667869020697f,0.07356456359966742631f,0.98917650996478101444f,
-0.14673047445536174793f,0.97570213003852857003f,0.21910124015686979759f,
-0.95694033573220882438f,0.29028467725446233105f,0.93299279883473895669f,
-0.35989503653498811087f,0.90398929312344333820f,0.42755509343028208491f,
-0.87008699110871146054f,0.49289819222978403790f,0.83146961230254523567f,
-0.55557023301960217765f,0.78834642762660622761f,0.61523159058062681925f,
-0.74095112535495921691f,0.67155895484701833009f,0.68954054473706694051f,
-0.72424708295146689174f,0.63439328416364548779f,0.77301045336273688235f,
-0.57580819141784533866f,0.81758481315158371139f,0.51410274419322166128f,
-0.85772861000027211809f,0.44961132965460659516f,0.89322430119551532446f,
-0.38268343236508983729f,0.92387953251128673848f,0.31368174039889157312f,
-0.94952818059303667475f,0.24298017990326398197f,0.97003125319454397424f,
-0.17096188876030135595f,0.98527764238894122162f,0.09801714032956077016f,
-0.99518472667219681771f,0.02454122852291226384f,0.99969881869620424997f,
--0.04906767432741800800f,0.99879545620517240501f,-0.12241067519921615403f,
-0.99247953459870996706f,-0.19509032201612819257f,0.98078528040323043058f,
--0.26671275747489830987f,0.96377606579543984022f,-0.33688985339221994009f,
-0.94154406518302080631f,-0.40524131400498974998f,0.91420975570353069095f,
--0.47139673682599769755f,0.88192126434835504956f,-0.53499761988709704230f,
-0.84485356524970722791f,-0.59569930449243335691f,0.80320753148064494287f,
--0.65317284295377653347f,0.75720884650648467851f,-0.70710678118654746172f,
-0.70710678118654757274f,-0.75720884650648467851f,0.65317284295377664449f,
--0.80320753148064483184f,0.59569930449243346793f,-0.84485356524970711689f,
-0.53499761988709715332f,-0.88192126434835493853f,0.47139673682599780857f,
--0.91420975570353069095f,0.40524131400498991651f,-0.94154406518302069529f,
-0.33688985339222032867f,-0.96377606579543984022f,0.26671275747489847641f,
--0.98078528040323043058f,0.19509032201612860891f,-0.99247953459870996706f,
-0.12241067519921634832f,-0.99879545620517240501f,0.04906767432741796636f,
--0.99969881869620424997f,-0.02454122852291207996f,-0.99518472667219692873f,
--0.09801714032956058975f,-0.98527764238894133264f,-0.17096188876030096737f,
--0.97003125319454397424f,-0.24298017990326381543f,-0.94952818059303678577f,
--0.31368174039889118454f,-0.92387953251128684951f,-0.38268343236508967076f,
--0.89322430119551532446f,-0.44961132965460665067f,-0.85772861000027211809f,
--0.51410274419322155026f,-0.81758481315158371139f,-0.57580819141784533866f,
--0.77301045336273710440f,-0.63439328416364526575f,-0.72424708295146700276f,
--0.68954054473706682948f,-0.67155895484701866316f,-0.74095112535495888384f,
--0.61523159058062726334f,-0.78834642762660589455f,-0.55557023301960217765f,
--0.83146961230254523567f,-0.49289819222978420443f,-0.87008699110871134952f,
--0.42755509343028247349f,-0.90398929312344311615f,-0.35989503653498794433f,
--0.93299279883473895669f,-0.29028467725446244208f,-0.95694033573220882438f,
--0.21910124015687010290f,-0.97570213003852845901f,-0.14673047445536230304f,
--0.98917650996478090342f,-0.07356456359966735692f,-0.99729045667869020697f,
-1.00000000000000000000f,0.00000000000000000000f,0.95694033573220882438f,
-0.29028467725446233105f,0.83146961230254523567f,0.55557023301960217765f,
-0.63439328416364548779f,0.77301045336273688235f,0.38268343236508983729f,
-0.92387953251128673848f,0.09801714032956077016f,0.99518472667219681771f,
--0.19509032201612819257f,0.98078528040323043058f,-0.47139673682599769755f,
-0.88192126434835504956f,-0.70710678118654746172f,0.70710678118654757274f,
--0.88192126434835493853f,0.47139673682599780857f,-0.98078528040323043058f,
-0.19509032201612860891f,-0.99518472667219692873f,-0.09801714032956058975f,
--0.92387953251128684951f,-0.38268343236508967076f,-0.77301045336273710440f,
--0.63439328416364526575f,-0.55557023301960217765f,-0.83146961230254523567f,
--0.29028467725446244208f,-0.95694033573220882438f,1.00000000000000000000f,
-0.00000000000000000000f,0.38268343236508983729f,0.92387953251128673848f,
--0.70710678118654746172f,0.70710678118654757274f,-0.92387953251128684951f,
--0.38268343236508967076f,};
+1.00000000000000000000f,0.00000000000000000000f,0.99998939037322998047f,
+0.00460192607715725899f,0.99995762109756469727f,0.00920375436544418335f,
+0.99990469217300415039f,0.01380538847297430038f,0.99983060359954833984f,
+0.01840673014521598816f,0.99973529577255249023f,0.02300768159329891205f,
+0.99961882829666137695f,0.02760814502835273743f,0.99948120117187500000f,
+0.03220802545547485352f,0.99932235479354858398f,0.03680722415447235107f,
+0.99914240837097167969f,0.04140564054250717163f,0.99894130229949951172f,
+0.04600318148732185364f,0.99871903657913208008f,0.05059975013136863708f,
+0.99847555160522460938f,0.05519524589180946350f,0.99821102619171142578f,
+0.05978957191109657288f,0.99792528152465820312f,0.06438262760639190674f,
+0.99761843681335449219f,0.06897433102130889893f,0.99729043245315551758f,
+0.07356456667184829712f,0.99694132804870605469f,0.07815324515104293823f,
+0.99657112360000610352f,0.08274026215076446533f,0.99617981910705566406f,
+0.08732553571462631226f,0.99576741456985473633f,0.09190895408391952515f,
+0.99533390998840332031f,0.09649042785167694092f,0.99487930536270141602f,
+0.10106986016035079956f,0.99440366029739379883f,0.10564715415239334106f,
+0.99390697479248046875f,0.11022220551967620850f,0.99338918924331665039f,
+0.11479492485523223877f,0.99285042285919189453f,0.11936521530151367188f,
+0.99229061603546142578f,0.12393297255039215088f,0.99170976877212524414f,
+0.12849810719490051270f,0.99110794067382812500f,0.13306052982807159424f,
+0.99048507213592529297f,0.13762012124061584473f,0.98984128236770629883f,
+0.14217680692672729492f,0.98917651176452636719f,0.14673046767711639404f,
+0.98849081993103027344f,0.15128104388713836670f,0.98778414726257324219f,
+0.15582840144634246826f,0.98705655336380004883f,0.16037245094776153564f,
+0.98630809783935546875f,0.16491311788558959961f,0.98553872108459472656f,
+0.16945029795169830322f,0.98474848270416259766f,0.17398387193679809570f,
+0.98393744230270385742f,0.17851376533508300781f,0.98310548067092895508f,
+0.18303988873958587646f,0.98225271701812744141f,0.18756212294101715088f,
+0.98137921094894409180f,0.19208039343357086182f,0.98048484325408935547f,
+0.19659459590911865234f,0.97956979274749755859f,0.20110464096069335938f,
+0.97863394021987915039f,0.20561040937900543213f,0.97767734527587890625f,
+0.21011184155941009521f,0.97670006752014160156f,0.21460881829261779785f,
+0.97570210695266723633f,0.21910123527050018311f,0.97468352317810058594f,
+0.22358903288841247559f,0.97364425659179687500f,0.22807207703590393066f,
+0.97258436679840087891f,0.23255030810832977295f,0.97150391340255737305f,
+0.23702360689640045166f,0.97040283679962158203f,0.24149188399314880371f,
+0.96928125619888305664f,0.24595504999160766602f,0.96813911199569702148f,
+0.25041300058364868164f,0.96697646379470825195f,0.25486564636230468750f,
+0.96579337120056152344f,0.25931292772293090820f,0.96458977460861206055f,
+0.26375466585159301758f,0.96336579322814941406f,0.26819086074829101562f,
+0.96212142705917358398f,0.27262136340141296387f,0.96085661649703979492f,
+0.27704608440399169922f,0.95957154035568237305f,0.28146493434906005859f,
+0.95826607942581176758f,0.28587782382965087891f,0.95694035291671752930f,
+0.29028466343879699707f,0.95559436082839965820f,0.29468536376953125000f,
+0.95422810316085815430f,0.29907983541488647461f,0.95284163951873779297f,
+0.30346795916557312012f,0.95143502950668334961f,0.30784964561462402344f,
+0.95000827312469482422f,0.31222480535507202148f,0.94856137037277221680f,
+0.31659337878227233887f,0.94709438085556030273f,0.32095524668693542480f,
+0.94560730457305908203f,0.32531028985977172852f,0.94410026073455810547f,
+0.32965844869613647461f,0.94257318973541259766f,0.33399966359138488770f,
+0.94102615118026733398f,0.33833375573158264160f,0.93945920467376708984f,
+0.34266072511672973633f,0.93787235021591186523f,0.34698042273521423340f,
+0.93626564741134643555f,0.35129275918006896973f,0.93463915586471557617f,
+0.35559767484664916992f,0.93299281597137451172f,0.35989505052566528320f,
+0.93132668733596801758f,0.36418479681015014648f,0.92964088916778564453f,
+0.36846682429313659668f,0.92793542146682739258f,0.37274107336997985840f,
+0.92621022462844848633f,0.37700742483139038086f,0.92446547746658325195f,
+0.38126575946807861328f,0.92270112037658691406f,0.38551604747772216797f,
+0.92091721296310424805f,0.38975816965103149414f,0.91911387443542480469f,
+0.39399203658103942871f,0.91729098558425903320f,0.39821755886077880859f,
+0.91544872522354125977f,0.40243464708328247070f,0.91358703374862670898f,
+0.40664321184158325195f,0.91170603036880493164f,0.41084316372871398926f,
+0.90980571508407592773f,0.41503441333770751953f,0.90788608789443969727f,
+0.41921690106391906738f,0.90594726800918579102f,0.42339047789573669434f,
+0.90398931503295898438f,0.42755508422851562500f,0.90201216936111450195f,
+0.43171066045761108398f,0.90001589059829711914f,0.43585708737373352051f,
+0.89800059795379638672f,0.43999427556991577148f,0.89596623182296752930f,
+0.44412213563919067383f,0.89391297101974487305f,0.44824060797691345215f,
+0.89184069633483886719f,0.45234957337379455566f,0.88974958658218383789f,
+0.45644897222518920898f,0.88763964176177978516f,0.46053871512413024902f,
+0.88551086187362670898f,0.46461868286132812500f,0.88336336612701416016f,
+0.46868881583213806152f,0.88119709491729736328f,0.47274902462959289551f,
+0.87901222705841064453f,0.47679921984672546387f,0.87680870294570922852f,
+0.48083934187889099121f,0.87458664178848266602f,0.48486924171447753906f,
+0.87234604358673095703f,0.48888888955116271973f,0.87008696794509887695f,
+0.49289819598197937012f,0.86780947446823120117f,0.49689704179763793945f,
+0.86551362276077270508f,0.50088536739349365234f,0.86319941282272338867f,
+0.50486308336257934570f,0.86086696386337280273f,0.50883013010025024414f,
+0.85851621627807617188f,0.51278638839721679688f,0.85614734888076782227f,
+0.51673179864883422852f,0.85376030206680297852f,0.52066624164581298828f,
+0.85135519504547119141f,0.52458965778350830078f,0.84893202781677246094f,
+0.52850198745727539062f,0.84649091958999633789f,0.53240311145782470703f,
+0.84403187036514282227f,0.53629297018051147461f,0.84155499935150146484f,
+0.54017144441604614258f,0.83906024694442749023f,0.54403853416442871094f,
+0.83654773235321044922f,0.54789406061172485352f,0.83401751518249511719f,
+0.55173796415328979492f,0.83146959543228149414f,0.55557024478912353516f,
+0.82890409231185913086f,0.55939072370529174805f,0.82632106542587280273f,
+0.56319934129714965820f,0.82372051477432250977f,0.56699603796005249023f,
+0.82110249996185302734f,0.57078075408935546875f,0.81846714019775390625f,
+0.57455337047576904297f,0.81581443548202514648f,0.57831376791000366211f,
+0.81314438581466674805f,0.58206200599670410156f,0.81045717000961303711f,
+0.58579784631729125977f,0.80775284767150878906f,0.58952128887176513672f,
+0.80503135919570922852f,0.59323227405548095703f,0.80229282379150390625f,
+0.59693068265914916992f,0.79953724145889282227f,0.60061645507812500000f,
+0.79676479101181030273f,0.60428953170776367188f,0.79397547245025634766f,
+0.60794979333877563477f,0.79116934537887573242f,0.61159718036651611328f,
+0.78834640979766845703f,0.61523157358169555664f,0.78550684452056884766f,
+0.61885297298431396484f,0.78265058994293212891f,0.62246125936508178711f,
+0.77977776527404785156f,0.62605637311935424805f,0.77688848972320556641f,
+0.62963825464248657227f,0.77398270368576049805f,0.63320678472518920898f,
+0.77106052637100219727f,0.63676184415817260742f,0.76812201738357543945f,
+0.64030349254608154297f,0.76516723632812500000f,0.64383155107498168945f,
+0.76219630241394042969f,0.64734596014022827148f,0.75920921564102172852f,
+0.65084666013717651367f,0.75620597600936889648f,0.65433359146118164062f,
+0.75318682193756103516f,0.65780669450759887695f,0.75015163421630859375f,
+0.66126585006713867188f,0.74710059165954589844f,0.66471099853515625000f,
+0.74403375387191772461f,0.66814202070236206055f,0.74095112085342407227f,
+0.67155897617340087891f,0.73785281181335449219f,0.67496162652969360352f,
+0.73473888635635375977f,0.67835003137588500977f,0.73160940408706665039f,
+0.68172407150268554688f,0.72846436500549316406f,0.68508368730545043945f,
+0.72530394792556762695f,0.68842875957489013672f,0.72212821245193481445f,
+0.69175922870635986328f,0.71893709897994995117f,0.69507509469985961914f,
+0.71573084592819213867f,0.69837623834609985352f,0.71250939369201660156f,
+0.70166260004043579102f,0.70927280187606811523f,0.70493406057357788086f,
+0.70602124929428100586f,0.70819061994552612305f,0.70275473594665527344f,
+0.71143221855163574219f,0.69947332143783569336f,0.71465867757797241211f,
+0.69617712497711181641f,0.71787005662918090820f,0.69286614656448364258f,
+0.72106617689132690430f,0.68954056501388549805f,0.72424709796905517578f,
+0.68620032072067260742f,0.72741264104843139648f,0.68284553289413452148f,
+0.73056274652481079102f,0.67947632074356079102f,0.73369741439819335938f,
+0.67609268426895141602f,0.73681658506393432617f,0.67269474267959594727f,
+0.73992007970809936523f,0.66928261518478393555f,0.74300795793533325195f,
+0.66585624217987060547f,0.74608010053634643555f,0.66241580247879028320f,
+0.74913638830184936523f,0.65896129608154296875f,0.75217682123184204102f,
+0.65549284219741821289f,0.75520139932632446289f,0.65201056003570556641f,
+0.75820988416671752930f,0.64851438999176025391f,0.76120239496231079102f,
+0.64500451087951660156f,0.76417875289916992188f,0.64148104190826416016f,
+0.76713889837265014648f,0.63794392347335815430f,0.77008283138275146484f,
+0.63439327478408813477f,0.77301043272018432617f,0.63082921504974365234f,
+0.77592170238494873047f,0.62725180387496948242f,0.77881652116775512695f,
+0.62366110086441040039f,0.78169482946395874023f,0.62005722522735595703f,
+0.78455656766891479492f,0.61644017696380615234f,0.78740173578262329102f,
+0.61281007528305053711f,0.79023021459579467773f,0.60916703939437866211f,
+0.79304194450378417969f,0.60551106929779052734f,0.79583692550659179688f,
+0.60184222459793090820f,0.79861497879028320312f,0.59816068410873413086f,
+0.80137616395950317383f,0.59446650743484497070f,0.80412036180496215820f,
+0.59075969457626342773f,0.80684757232666015625f,0.58704036474227905273f,
+0.80955761671066284180f,0.58330863714218139648f,0.81225061416625976562f,
+0.57956457138061523438f,0.81492632627487182617f,0.57580816745758056641f,
+0.81758481264114379883f,0.57203960418701171875f,0.82022595405578613281f,
+0.56825894117355346680f,0.82284981012344360352f,0.56446623802185058594f,
+0.82545614242553710938f,0.56066155433654785156f,0.82804507017135620117f,
+0.55684500932693481445f,0.83061641454696655273f,0.55301672220230102539f,
+0.83317017555236816406f,0.54917663335800170898f,0.83570629358291625977f,
+0.54532498121261596680f,0.83822470903396606445f,0.54146176576614379883f,
+0.84072536230087280273f,0.53758704662322998047f,0.84320825338363647461f,
+0.53370100259780883789f,0.84567326307296752930f,0.52980363368988037109f,
+0.84812033176422119141f,0.52589499950408935547f,0.85054945945739746094f,
+0.52197527885437011719f,0.85296058654785156250f,0.51804453134536743164f,
+0.85535365343093872070f,0.51410275697708129883f,0.85772860050201416016f,
+0.51015007495880126953f,0.86008536815643310547f,0.50618666410446166992f,
+0.86242395639419555664f,0.50221246480941772461f,0.86474424600601196289f,
+0.49822765588760375977f,0.86704623699188232422f,0.49423229694366455078f,
+0.86932986974716186523f,0.49022647738456726074f,0.87159508466720581055f,
+0.48621028661727905273f,0.87384182214736938477f,0.48218378424644470215f,
+0.87607008218765258789f,0.47814705967903137207f,0.87827980518341064453f,
+0.47410020232200622559f,0.88047087192535400391f,0.47004333138465881348f,
+0.88264334201812744141f,0.46597650647163391113f,0.88479709625244140625f,
+0.46189978718757629395f,0.88693213462829589844f,0.45781329274177551270f,
+0.88904833793640136719f,0.45371711254119873047f,0.89114576578140258789f,
+0.44961133599281311035f,0.89322429895401000977f,0.44549602270126342773f,
+0.89528393745422363281f,0.44137126207351684570f,0.89732456207275390625f,
+0.43723717331886291504f,0.89934623241424560547f,0.43309381604194641113f,
+0.90134882926940917969f,0.42894127964973449707f,0.90333235263824462891f,
+0.42477968335151672363f,0.90529674291610717773f,0.42060908675193786621f,
+0.90724200010299682617f,0.41642954945564270020f,0.90916800498962402344f,
+0.41224122047424316406f,0.91107475757598876953f,0.40804415941238403320f,
+0.91296219825744628906f,0.40383845567703247070f,0.91483032703399658203f,
+0.39962419867515563965f,0.91667908430099487305f,0.39540147781372070312f,
+0.91850841045379638672f,0.39117038249969482422f,0.92031830549240112305f,
+0.38693100214004516602f,0.92210865020751953125f,0.38268342614173889160f,
+0.92387950420379638672f,0.37842774391174316406f,0.92563080787658691406f,
+0.37416407465934753418f,0.92736250162124633789f,0.36989244818687438965f,
+0.92907458543777465820f,0.36561298370361328125f,0.93076694011688232422f,
+0.36132580041885375977f,0.93243962526321411133f,0.35703095793724060059f,
+0.93409252166748046875f,0.35272854566574096680f,0.93572568893432617188f,
+0.34841868281364440918f,0.93733900785446166992f,0.34410142898559570312f,
+0.93893247842788696289f,0.33977687358856201172f,0.94050604104995727539f,
+0.33544513583183288574f,0.94205975532531738281f,0.33110630512237548828f,
+0.94359344244003295898f,0.32676044106483459473f,0.94510722160339355469f,
+0.32240769267082214355f,0.94660091400146484375f,0.31804808974266052246f,
+0.94807457923889160156f,0.31368175148963928223f,0.94952815771102905273f,
+0.30930876731872558594f,0.95096164941787719727f,0.30492922663688659668f,
+0.95237499475479125977f,0.30054324865341186523f,0.95376819372177124023f,
+0.29615089297294616699f,0.95514118671417236328f,0.29175224900245666504f,
+0.95649391412734985352f,0.28734746575355529785f,0.95782643556594848633f,
+0.28293657302856445312f,0.95913863182067871094f,0.27851969003677368164f,
+0.96043050289154052734f,0.27409690618515014648f,0.96170204877853393555f,
+0.26966831088066101074f,0.96295326948165893555f,0.26523402333259582520f,
+0.96418404579162597656f,0.26079410314559936523f,0.96539443731307983398f,
+0.25634866952896118164f,0.96658438444137573242f,0.25189781188964843750f,
+0.96775382757186889648f,0.24744161963462829590f,0.96890282630920410156f,
+0.24298018217086791992f,0.97003126144409179688f,0.23851358890533447266f,
+0.97113913297653198242f,0.23404195904731750488f,0.97222650051116943359f,
+0.22956536710262298584f,0.97329324483871459961f,0.22508391737937927246f,
+0.97433936595916748047f,0.22059768438339233398f,0.97536486387252807617f,
+0.21610680222511291504f,0.97636973857879638672f,0.21161133050918579102f,
+0.97735387086868286133f,0.20711137354373931885f,0.97831737995147705078f,
+0.20260703563690185547f,0.97926014661788940430f,0.19809840619564056396f,
+0.98018211126327514648f,0.19358558952808380127f,0.98108339309692382812f,
+0.18906866014003753662f,0.98196387290954589844f,0.18454773724079132080f,
+0.98282355070114135742f,0.18002289533615112305f,0.98366242647171020508f,
+0.17549425363540649414f,0.98448044061660766602f,0.17096188664436340332f,
+0.98527765274047851562f,0.16642589867115020752f,0.98605394363403320312f,
+0.16188639402389526367f,0.98680937290191650391f,0.15734346210956573486f,
+0.98754394054412841797f,0.15279719233512878418f,0.98825758695602416992f,
+0.14824767410755157471f,0.98895025253295898438f,0.14369502663612365723f,
+0.98962199687957763672f,0.13913933932781219482f,0.99027281999588012695f,
+0.13458070158958435059f,0.99090266227722167969f,0.13001921772956848145f,
+0.99151146411895751953f,0.12545497715473175049f,0.99209928512573242188f,
+0.12088808417320251465f,0.99266612529754638672f,0.11631862819194793701f,
+0.99321192502975463867f,0.11174671351909637451f,0.99373674392700195312f,
+0.10717242211103439331f,0.99424046277999877930f,0.10259586572647094727f,
+0.99472314119338989258f,0.09801714122295379639f,0.99518471956253051758f,
+0.09343633800745010376f,0.99562525749206542969f,0.08885355293750762939f,
+0.99604469537734985352f,0.08426889032125473022f,0.99644303321838378906f,
+0.07968243956565856934f,0.99682027101516723633f,0.07509429752826690674f,
+0.99717640876770019531f,0.07050457596778869629f,0.99751144647598266602f,
+0.06591334939002990723f,0.99782532453536987305f,0.06132073700428009033f,
+0.99811810255050659180f,0.05672682076692581177f,0.99838972091674804688f,
+0.05213170498609542847f,0.99864023923873901367f,0.04753548279404640198f,
+0.99886953830718994141f,0.04293825849890708923f,0.99907773733139038086f,
+0.03834012150764465332f,0.99926477670669555664f,0.03374117240309715271f,
+0.99943059682846069336f,0.02914150804281234741f,0.99957531690597534180f,
+0.02454122900962829590f,0.99969881772994995117f,0.01994042843580245972f,
+0.99980115890502929688f,0.01533920597285032272f,0.99988234043121337891f,
+0.01073765940964221954f,0.99994236230850219727f,0.00613588467240333557f,
+0.99998116493225097656f,0.00153398013208061457f,0.99999880790710449219f,
+-0.00306795677170157433f,0.99999529123306274414f,-0.00766982883214950562f,
+0.99997061491012573242f,-0.01227153837680816650f,0.99992471933364868164f,
+-0.01687298715114593506f,0.99985766410827636719f,-0.02147408016026020050f,
+0.99976938962936401367f,-0.02607471868395805359f,0.99966001510620117188f,
+-0.03067480400204658508f,0.99952942132949829102f,-0.03527423739433288574f,
+0.99937766790390014648f,-0.03987292572855949402f,0.99920475482940673828f,
+-0.04447077214717864990f,0.99901068210601806641f,-0.04906767606735229492f,
+0.99879544973373413086f,-0.05366353690624237061f,0.99855905771255493164f,
+-0.05825826525688171387f,0.99830156564712524414f,-0.06285175681114196777f,
+0.99802285432815551758f,-0.06744392216205596924f,0.99772304296493530273f,
+-0.07203464955091476440f,0.99740213155746459961f,-0.07662386447191238403f,
+0.99706006050109863281f,-0.08121144771575927734f,0.99669688940048217773f,
+-0.08579730987548828125f,0.99631261825561523438f,-0.09038136154413223267f,
+0.99590724706649780273f,-0.09496349841356277466f,0.99548077583312988281f,
+-0.09954361617565155029f,0.99503320455551147461f,-0.10412163287401199341f,
+0.99456459283828735352f,-0.10869744420051574707f,0.99407488107681274414f,
+-0.11327095329761505127f,0.99356412887573242188f,-0.11784206330776214600f,
+0.99303233623504638672f,-0.12241067737340927124f,0.99247956275939941406f,
+-0.12697669863700866699f,0.99190568923950195312f,-0.13154003024101257324f,
+0.99131083488464355469f,-0.13610057532787322998f,0.99069499969482421875f,
+-0.14065824449062347412f,0.99005818367004394531f,-0.14521291851997375488f,
+0.98940044641494750977f,-0.14976453781127929688f,0.98872166872024536133f,
+-0.15431296825408935547f,0.98802202939987182617f,-0.15885815024375915527f,
+0.98730140924453735352f,-0.16339994966983795166f,0.98655992746353149414f,
+-0.16793829202651977539f,0.98579752445220947266f,-0.17247308790683746338f,
+0.98501425981521606445f,-0.17700421810150146484f,0.98421007394790649414f,
+-0.18153160810470581055f,0.98338508605957031250f,-0.18605515360832214355f,
+0.98253929615020751953f,-0.19057475030422210693f,0.98167270421981811523f,
+-0.19509032368659973145f,0.98078525066375732422f,-0.19960175454616546631f,
+0.97987711429595947266f,-0.20410896837711334229f,0.97894817590713500977f,
+-0.20861184597015380859f,0.97799849510192871094f,-0.21311031281948089600f,
+0.97702813148498535156f,-0.21760427951812744141f,0.97603708505630493164f,
+-0.22209362685680389404f,0.97502535581588745117f,-0.22657826542854309082f,
+0.97399294376373291016f,-0.23105810582637786865f,0.97293996810913085938f,
+-0.23553305864334106445f,0.97186630964279174805f,-0.24000301957130432129f,
+0.97077214717864990234f,-0.24446789920330047607f,0.96965736150741577148f,
+-0.24892760813236236572f,0.96852207183837890625f,-0.25338202714920043945f,
+0.96736627817153930664f,-0.25783109664916992188f,0.96618998050689697266f,
+-0.26227471232414245605f,0.96499323844909667969f,-0.26671275496482849121f,
+0.96377605199813842773f,-0.27114516496658325195f,0.96253848075866699219f,
+-0.27557182312011718750f,0.96128046512603759766f,-0.27999264001846313477f,
+0.96000212430953979492f,-0.28440752625465393066f,0.95870345830917358398f,
+-0.28881642222404479980f,0.95738452672958374023f,-0.29321914911270141602f,
+0.95604526996612548828f,-0.29761570692062377930f,0.95468574762344360352f,
+-0.30200594663619995117f,0.95330601930618286133f,-0.30638980865478515625f,
+0.95190614461898803711f,-0.31076714396476745605f,0.95048606395721435547f,
+-0.31513792276382446289f,0.94904589653015136719f,-0.31950202584266662598f,
+0.94758558273315429688f,-0.32385936379432678223f,0.94610524177551269531f,
+-0.32820984721183776855f,0.94460481405258178711f,-0.33255335688591003418f,
+0.94308441877365112305f,-0.33688986301422119141f,0.94154405593872070312f,
+-0.34121921658515930176f,0.93998372554779052734f,-0.34554132819175720215f,
+0.93840354681015014648f,-0.34985613822937011719f,0.93680346012115478516f,
+-0.35416352748870849609f,0.93518352508544921875f,-0.35846340656280517578f,
+0.93354380130767822266f,-0.36275571584701538086f,0.93188428878784179688f,
+-0.36704033613204956055f,0.93020504713058471680f,-0.37131720781326293945f,
+0.92850607633590698242f,-0.37558618187904357910f,0.92678749561309814453f,
+-0.37984719872474670410f,0.92504924535751342773f,-0.38410019874572753906f,
+0.92329144477844238281f,-0.38834503293037414551f,0.92151403427124023438f,
+-0.39258167147636413574f,0.91971713304519653320f,-0.39680999517440795898f,
+0.91790080070495605469f,-0.40102988481521606445f,0.91606497764587402344f,
+-0.40524131059646606445f,0.91420978307723999023f,-0.40944415330886840820f,
+0.91233515739440917969f,-0.41363832354545593262f,0.91044127941131591797f,
+-0.41782370209693908691f,0.90852808952331542969f,-0.42200025916099548340f,
+0.90659570693969726562f,-0.42616787552833557129f,0.90464407205581665039f,
+-0.43032649159431457520f,0.90267330408096313477f,-0.43447595834732055664f,
+0.90068340301513671875f,-0.43861624598503112793f,0.89867448806762695312f,
+-0.44274723529815673828f,0.89664649963378906250f,-0.44686883687973022461f,
+0.89459949731826782227f,-0.45098099112510681152f,0.89253354072570800781f,
+-0.45508357882499694824f,0.89044874906539916992f,-0.45917654037475585938f,
+0.88834506273269653320f,-0.46325978636741638184f,0.88622254133224487305f,
+-0.46733319759368896484f,0.88408124446868896484f,-0.47139674425125122070f,
+0.88192129135131835938f,-0.47545027732849121094f,0.87974262237548828125f,
+-0.47949376702308654785f,0.87754529714584350586f,-0.48352706432342529297f,
+0.87532937526702880859f,-0.48755016922950744629f,0.87309497594833374023f,
+-0.49156290292739868164f,0.87084203958511352539f,-0.49556526541709899902f,
+0.86857068538665771484f,-0.49955710768699645996f,0.86628097295761108398f,
+-0.50353837013244628906f,0.86397284269332885742f,-0.50750899314880371094f,
+0.86164647340774536133f,-0.51146882772445678711f,0.85930180549621582031f,
+-0.51541787385940551758f,0.85693895816802978516f,-0.51935601234436035156f,
+0.85455799102783203125f,-0.52328312397003173828f,0.85215890407562255859f,
+-0.52719914913177490234f,0.84974175691604614258f,-0.53110402822494506836f,
+0.84730660915374755859f,-0.53499764204025268555f,0.84485357999801635742f,
+-0.53887993097305297852f,0.84238260984420776367f,-0.54275077581405639648f,
+0.83989381790161132812f,-0.54661017656326293945f,0.83738720417022705078f,
+-0.55045795440673828125f,0.83486288785934448242f,-0.55429410934448242188f,
+0.83232086896896362305f,-0.55811852216720581055f,0.82976120710372924805f,
+-0.56193113327026367188f,0.82718402147293090820f,-0.56573182344436645508f,
+0.82458931207656860352f,-0.56952053308486938477f,0.82197713851928710938f,
+-0.57329714298248291016f,0.81934750080108642578f,-0.57706165313720703125f,
+0.81670057773590087891f,-0.58081394433975219727f,0.81403630971908569336f,
+-0.58455395698547363281f,0.81135487556457519531f,-0.58828157186508178711f,
+0.80865615606307983398f,-0.59199666976928710938f,0.80594038963317871094f,
+-0.59569931030273437500f,0.80320751667022705078f,-0.59938931465148925781f,
+0.80045765638351440430f,-0.60306662321090698242f,0.79769086837768554688f,
+-0.60673111677169799805f,0.79490715265274047852f,-0.61038279533386230469f,
+0.79210656881332397461f,-0.61402153968811035156f,0.78928923606872558594f,
+-0.61764729022979736328f,0.78645521402359008789f,-0.62125998735427856445f,
+0.78360450267791748047f,-0.62485951185226440430f,0.78073722124099731445f,
+-0.62844574451446533203f,0.77785342931747436523f,-0.63201874494552612305f,
+0.77495312690734863281f,-0.63557833433151245117f,0.77203637361526489258f,
+-0.63912445306777954102f,0.76910334825515747070f,-0.64265704154968261719f,
+0.76615399122238159180f,-0.64617604017257690430f,0.76318842172622680664f,
+-0.64968132972717285156f,0.76020669937133789062f,-0.65317285060882568359f,
+0.75720882415771484375f,-0.65665054321289062500f,0.75419497489929199219f,
+-0.66011434793472290039f,0.75116515159606933594f,-0.66356414556503295898f,
+0.74811935424804687500f,-0.66699993610382080078f,0.74505776166915893555f,
+-0.67042154073715209961f,0.74198043346405029297f,-0.67382901906967163086f,
+0.73888731002807617188f,-0.67722219228744506836f,0.73577857017517089844f,
+-0.68060100078582763672f,0.73265427350997924805f,-0.68396538496017456055f,
+0.72951442003250122070f,-0.68731534481048583984f,0.72635912895202636719f,
+-0.69065070152282714844f,0.72318845987319946289f,-0.69397145509719848633f,
+0.72000253200531005859f,-0.69727748632431030273f,0.71680128574371337891f,
+-0.70056879520416259766f,0.71358484029769897461f,-0.70384526252746582031f,
+0.71035337448120117188f,-0.70710676908493041992f,0.70710676908493041992f,
+-0.71035337448120117188f,0.70384526252746582031f,-0.71358484029769897461f,
+0.70056879520416259766f,-0.71680128574371337891f,0.69727748632431030273f,
+-0.72000253200531005859f,0.69397145509719848633f,-0.72318845987319946289f,
+0.69065070152282714844f,-0.72635912895202636719f,0.68731534481048583984f,
+-0.72951442003250122070f,0.68396538496017456055f,-0.73265427350997924805f,
+0.68060100078582763672f,-0.73577857017517089844f,0.67722219228744506836f,
+-0.73888731002807617188f,0.67382901906967163086f,-0.74198043346405029297f,
+0.67042154073715209961f,-0.74505776166915893555f,0.66699993610382080078f,
+-0.74811935424804687500f,0.66356414556503295898f,-0.75116515159606933594f,
+0.66011434793472290039f,-0.75419497489929199219f,0.65665054321289062500f,
+-0.75720882415771484375f,0.65317285060882568359f,-0.76020669937133789062f,
+0.64968132972717285156f,-0.76318842172622680664f,0.64617604017257690430f,
+-0.76615399122238159180f,0.64265704154968261719f,-0.76910334825515747070f,
+0.63912445306777954102f,-0.77203637361526489258f,0.63557833433151245117f,
+-0.77495312690734863281f,0.63201874494552612305f,-0.77785342931747436523f,
+0.62844574451446533203f,-0.78073722124099731445f,0.62485951185226440430f,
+-0.78360450267791748047f,0.62125998735427856445f,-0.78645521402359008789f,
+0.61764729022979736328f,-0.78928923606872558594f,0.61402153968811035156f,
+-0.79210656881332397461f,0.61038279533386230469f,-0.79490715265274047852f,
+0.60673111677169799805f,-0.79769086837768554688f,0.60306662321090698242f,
+-0.80045765638351440430f,0.59938931465148925781f,-0.80320751667022705078f,
+0.59569931030273437500f,-0.80594038963317871094f,0.59199666976928710938f,
+-0.80865615606307983398f,0.58828157186508178711f,-0.81135487556457519531f,
+0.58455395698547363281f,-0.81403630971908569336f,0.58081394433975219727f,
+-0.81670057773590087891f,0.57706165313720703125f,-0.81934750080108642578f,
+0.57329714298248291016f,-0.82197713851928710938f,0.56952053308486938477f,
+-0.82458931207656860352f,0.56573182344436645508f,-0.82718402147293090820f,
+0.56193113327026367188f,-0.82976120710372924805f,0.55811852216720581055f,
+-0.83232086896896362305f,0.55429410934448242188f,-0.83486288785934448242f,
+0.55045795440673828125f,-0.83738720417022705078f,0.54661017656326293945f,
+-0.83989381790161132812f,0.54275077581405639648f,-0.84238260984420776367f,
+0.53887993097305297852f,-0.84485357999801635742f,0.53499764204025268555f,
+-0.84730660915374755859f,0.53110402822494506836f,-0.84974175691604614258f,
+0.52719914913177490234f,-0.85215890407562255859f,0.52328312397003173828f,
+-0.85455799102783203125f,0.51935601234436035156f,-0.85693895816802978516f,
+0.51541787385940551758f,-0.85930180549621582031f,0.51146882772445678711f,
+-0.86164647340774536133f,0.50750899314880371094f,-0.86397284269332885742f,
+0.50353837013244628906f,-0.86628097295761108398f,0.49955710768699645996f,
+-0.86857068538665771484f,0.49556526541709899902f,-0.87084203958511352539f,
+0.49156290292739868164f,-0.87309497594833374023f,0.48755016922950744629f,
+-0.87532937526702880859f,0.48352706432342529297f,-0.87754529714584350586f,
+0.47949376702308654785f,-0.87974262237548828125f,0.47545027732849121094f,
+-0.88192129135131835938f,0.47139674425125122070f,-0.88408124446868896484f,
+0.46733319759368896484f,-0.88622254133224487305f,0.46325978636741638184f,
+-0.88834506273269653320f,0.45917654037475585938f,-0.89044874906539916992f,
+0.45508357882499694824f,-0.89253354072570800781f,0.45098099112510681152f,
+-0.89459949731826782227f,0.44686883687973022461f,-0.89664649963378906250f,
+0.44274723529815673828f,-0.89867448806762695312f,0.43861624598503112793f,
+-0.90068340301513671875f,0.43447595834732055664f,-0.90267330408096313477f,
+0.43032649159431457520f,-0.90464407205581665039f,0.42616787552833557129f,
+-0.90659570693969726562f,0.42200025916099548340f,-0.90852808952331542969f,
+0.41782370209693908691f,-0.91044127941131591797f,0.41363832354545593262f,
+-0.91233515739440917969f,0.40944415330886840820f,-0.91420978307723999023f,
+0.40524131059646606445f,-0.91606497764587402344f,0.40102988481521606445f,
+-0.91790080070495605469f,0.39680999517440795898f,-0.91971713304519653320f,
+0.39258167147636413574f,-0.92151403427124023438f,0.38834503293037414551f,
+-0.92329144477844238281f,0.38410019874572753906f,-0.92504924535751342773f,
+0.37984719872474670410f,-0.92678749561309814453f,0.37558618187904357910f,
+-0.92850607633590698242f,0.37131720781326293945f,-0.93020504713058471680f,
+0.36704033613204956055f,-0.93188428878784179688f,0.36275571584701538086f,
+-0.93354380130767822266f,0.35846340656280517578f,-0.93518352508544921875f,
+0.35416352748870849609f,-0.93680346012115478516f,0.34985613822937011719f,
+-0.93840354681015014648f,0.34554132819175720215f,-0.93998372554779052734f,
+0.34121921658515930176f,-0.94154405593872070312f,0.33688986301422119141f,
+-0.94308441877365112305f,0.33255335688591003418f,-0.94460481405258178711f,
+0.32820984721183776855f,-0.94610524177551269531f,0.32385936379432678223f,
+-0.94758558273315429688f,0.31950202584266662598f,-0.94904589653015136719f,
+0.31513792276382446289f,-0.95048606395721435547f,0.31076714396476745605f,
+-0.95190614461898803711f,0.30638980865478515625f,-0.95330601930618286133f,
+0.30200594663619995117f,-0.95468574762344360352f,0.29761570692062377930f,
+-0.95604526996612548828f,0.29321914911270141602f,-0.95738452672958374023f,
+0.28881642222404479980f,-0.95870345830917358398f,0.28440752625465393066f,
+-0.96000212430953979492f,0.27999264001846313477f,-0.96128046512603759766f,
+0.27557182312011718750f,-0.96253848075866699219f,0.27114516496658325195f,
+-0.96377605199813842773f,0.26671275496482849121f,-0.96499323844909667969f,
+0.26227471232414245605f,-0.96618998050689697266f,0.25783109664916992188f,
+-0.96736627817153930664f,0.25338202714920043945f,-0.96852207183837890625f,
+0.24892760813236236572f,-0.96965736150741577148f,0.24446789920330047607f,
+-0.97077214717864990234f,0.24000301957130432129f,-0.97186630964279174805f,
+0.23553305864334106445f,-0.97293996810913085938f,0.23105810582637786865f,
+-0.97399294376373291016f,0.22657826542854309082f,-0.97502535581588745117f,
+0.22209362685680389404f,-0.97603708505630493164f,0.21760427951812744141f,
+-0.97702813148498535156f,0.21311031281948089600f,-0.97799849510192871094f,
+0.20861184597015380859f,-0.97894817590713500977f,0.20410896837711334229f,
+-0.97987711429595947266f,0.19960175454616546631f,-0.98078525066375732422f,
+0.19509032368659973145f,-0.98167270421981811523f,0.19057475030422210693f,
+-0.98253929615020751953f,0.18605515360832214355f,-0.98338508605957031250f,
+0.18153160810470581055f,-0.98421007394790649414f,0.17700421810150146484f,
+-0.98501425981521606445f,0.17247308790683746338f,-0.98579752445220947266f,
+0.16793829202651977539f,-0.98655992746353149414f,0.16339994966983795166f,
+-0.98730140924453735352f,0.15885815024375915527f,-0.98802202939987182617f,
+0.15431296825408935547f,-0.98872166872024536133f,0.14976453781127929688f,
+-0.98940044641494750977f,0.14521291851997375488f,-0.99005818367004394531f,
+0.14065824449062347412f,-0.99069499969482421875f,0.13610057532787322998f,
+-0.99131083488464355469f,0.13154003024101257324f,-0.99190568923950195312f,
+0.12697669863700866699f,-0.99247956275939941406f,0.12241067737340927124f,
+-0.99303233623504638672f,0.11784206330776214600f,-0.99356412887573242188f,
+0.11327095329761505127f,-0.99407488107681274414f,0.10869744420051574707f,
+-0.99456459283828735352f,0.10412163287401199341f,-0.99503320455551147461f,
+0.09954361617565155029f,-0.99548077583312988281f,0.09496349841356277466f,
+-0.99590724706649780273f,0.09038136154413223267f,-0.99631261825561523438f,
+0.08579730987548828125f,-0.99669688940048217773f,0.08121144771575927734f,
+-0.99706006050109863281f,0.07662386447191238403f,-0.99740213155746459961f,
+0.07203464955091476440f,-0.99772304296493530273f,0.06744392216205596924f,
+-0.99802285432815551758f,0.06285175681114196777f,-0.99830156564712524414f,
+0.05825826525688171387f,-0.99855905771255493164f,0.05366353690624237061f,
+-0.99879544973373413086f,0.04906767606735229492f,-0.99901068210601806641f,
+0.04447077214717864990f,-0.99920475482940673828f,0.03987292572855949402f,
+-0.99937766790390014648f,0.03527423739433288574f,-0.99952942132949829102f,
+0.03067480400204658508f,-0.99966001510620117188f,0.02607471868395805359f,
+-0.99976938962936401367f,0.02147408016026020050f,-0.99985766410827636719f,
+0.01687298715114593506f,-0.99992471933364868164f,0.01227153837680816650f,
+-0.99997061491012573242f,0.00766982883214950562f,-0.99999529123306274414f,
+0.00306795677170157433f,-0.99999880790710449219f,-0.00153398013208061457f,
+-0.99998116493225097656f,-0.00613588467240333557f,-0.99994236230850219727f,
+-0.01073765940964221954f,-0.99988234043121337891f,-0.01533920597285032272f,
+-0.99980115890502929688f,-0.01994042843580245972f,-0.99969881772994995117f,
+-0.02454122900962829590f,-0.99957531690597534180f,-0.02914150804281234741f,
+-0.99943059682846069336f,-0.03374117240309715271f,-0.99926477670669555664f,
+-0.03834012150764465332f,-0.99907773733139038086f,-0.04293825849890708923f,
+-0.99886953830718994141f,-0.04753548279404640198f,-0.99864023923873901367f,
+-0.05213170498609542847f,-0.99838972091674804688f,-0.05672682076692581177f,
+-0.99811810255050659180f,-0.06132073700428009033f,-0.99782532453536987305f,
+-0.06591334939002990723f,-0.99751144647598266602f,-0.07050457596778869629f,
+-0.99717640876770019531f,-0.07509429752826690674f,-0.99682027101516723633f,
+-0.07968243956565856934f,-0.99644303321838378906f,-0.08426889032125473022f,
+-0.99604469537734985352f,-0.08885355293750762939f,-0.99562525749206542969f,
+-0.09343633800745010376f,-0.99518471956253051758f,-0.09801714122295379639f,
+-0.99472314119338989258f,-0.10259586572647094727f,-0.99424046277999877930f,
+-0.10717242211103439331f,-0.99373674392700195312f,-0.11174671351909637451f,
+-0.99321192502975463867f,-0.11631862819194793701f,-0.99266612529754638672f,
+-0.12088808417320251465f,-0.99209928512573242188f,-0.12545497715473175049f,
+-0.99151146411895751953f,-0.13001921772956848145f,-0.99090266227722167969f,
+-0.13458070158958435059f,-0.99027281999588012695f,-0.13913933932781219482f,
+-0.98962199687957763672f,-0.14369502663612365723f,-0.98895025253295898438f,
+-0.14824767410755157471f,-0.98825758695602416992f,-0.15279719233512878418f,
+-0.98754394054412841797f,-0.15734346210956573486f,-0.98680937290191650391f,
+-0.16188639402389526367f,-0.98605394363403320312f,-0.16642589867115020752f,
+-0.98527765274047851562f,-0.17096188664436340332f,-0.98448044061660766602f,
+-0.17549425363540649414f,-0.98366242647171020508f,-0.18002289533615112305f,
+-0.98282355070114135742f,-0.18454773724079132080f,-0.98196387290954589844f,
+-0.18906866014003753662f,-0.98108339309692382812f,-0.19358558952808380127f,
+-0.98018211126327514648f,-0.19809840619564056396f,-0.97926014661788940430f,
+-0.20260703563690185547f,-0.97831737995147705078f,-0.20711137354373931885f,
+-0.97735387086868286133f,-0.21161133050918579102f,-0.97636973857879638672f,
+-0.21610680222511291504f,-0.97536486387252807617f,-0.22059768438339233398f,
+-0.97433936595916748047f,-0.22508391737937927246f,-0.97329324483871459961f,
+-0.22956536710262298584f,-0.97222650051116943359f,-0.23404195904731750488f,
+-0.97113913297653198242f,-0.23851358890533447266f,-0.97003126144409179688f,
+-0.24298018217086791992f,-0.96890282630920410156f,-0.24744161963462829590f,
+-0.96775382757186889648f,-0.25189781188964843750f,-0.96658438444137573242f,
+-0.25634866952896118164f,-0.96539443731307983398f,-0.26079410314559936523f,
+-0.96418404579162597656f,-0.26523402333259582520f,-0.96295326948165893555f,
+-0.26966831088066101074f,-0.96170204877853393555f,-0.27409690618515014648f,
+-0.96043050289154052734f,-0.27851969003677368164f,-0.95913863182067871094f,
+-0.28293657302856445312f,-0.95782643556594848633f,-0.28734746575355529785f,
+-0.95649391412734985352f,-0.29175224900245666504f,-0.95514118671417236328f,
+-0.29615089297294616699f,-0.95376819372177124023f,-0.30054324865341186523f,
+-0.95237499475479125977f,-0.30492922663688659668f,-0.95096164941787719727f,
+-0.30930876731872558594f,-0.94952815771102905273f,-0.31368175148963928223f,
+-0.94807457923889160156f,-0.31804808974266052246f,-0.94660091400146484375f,
+-0.32240769267082214355f,-0.94510722160339355469f,-0.32676044106483459473f,
+-0.94359344244003295898f,-0.33110630512237548828f,-0.94205975532531738281f,
+-0.33544513583183288574f,-0.94050604104995727539f,-0.33977687358856201172f,
+-0.93893247842788696289f,-0.34410142898559570312f,-0.93733900785446166992f,
+-0.34841868281364440918f,-0.93572568893432617188f,-0.35272854566574096680f,
+-0.93409252166748046875f,-0.35703095793724060059f,-0.93243962526321411133f,
+-0.36132580041885375977f,-0.93076694011688232422f,-0.36561298370361328125f,
+-0.92907458543777465820f,-0.36989244818687438965f,-0.92736250162124633789f,
+-0.37416407465934753418f,-0.92563080787658691406f,-0.37842774391174316406f,
+-0.92387950420379638672f,-0.38268342614173889160f,-0.92210865020751953125f,
+-0.38693100214004516602f,-0.92031830549240112305f,-0.39117038249969482422f,
+-0.91850841045379638672f,-0.39540147781372070312f,-0.91667908430099487305f,
+-0.39962419867515563965f,-0.91483032703399658203f,-0.40383845567703247070f,
+-0.91296219825744628906f,-0.40804415941238403320f,-0.91107475757598876953f,
+-0.41224122047424316406f,-0.90916800498962402344f,-0.41642954945564270020f,
+-0.90724200010299682617f,-0.42060908675193786621f,-0.90529674291610717773f,
+-0.42477968335151672363f,-0.90333235263824462891f,-0.42894127964973449707f,
+-0.90134882926940917969f,-0.43309381604194641113f,-0.89934623241424560547f,
+-0.43723717331886291504f,-0.89732456207275390625f,-0.44137126207351684570f,
+-0.89528393745422363281f,-0.44549602270126342773f,-0.89322429895401000977f,
+-0.44961133599281311035f,-0.89114576578140258789f,-0.45371711254119873047f,
+-0.88904833793640136719f,-0.45781329274177551270f,-0.88693213462829589844f,
+-0.46189978718757629395f,-0.88479709625244140625f,-0.46597650647163391113f,
+-0.88264334201812744141f,-0.47004333138465881348f,-0.88047087192535400391f,
+-0.47410020232200622559f,-0.87827980518341064453f,-0.47814705967903137207f,
+-0.87607008218765258789f,-0.48218378424644470215f,-0.87384182214736938477f,
+-0.48621028661727905273f,-0.87159508466720581055f,-0.49022647738456726074f,
+-0.86932986974716186523f,-0.49423229694366455078f,-0.86704623699188232422f,
+-0.49822765588760375977f,-0.86474424600601196289f,-0.50221246480941772461f,
+-0.86242395639419555664f,-0.50618666410446166992f,-0.86008536815643310547f,
+-0.51015007495880126953f,-0.85772860050201416016f,-0.51410275697708129883f,
+-0.85535365343093872070f,-0.51804453134536743164f,-0.85296058654785156250f,
+-0.52197527885437011719f,-0.85054945945739746094f,-0.52589499950408935547f,
+-0.84812033176422119141f,-0.52980363368988037109f,-0.84567326307296752930f,
+-0.53370100259780883789f,-0.84320825338363647461f,-0.53758704662322998047f,
+-0.84072536230087280273f,-0.54146176576614379883f,-0.83822470903396606445f,
+-0.54532498121261596680f,-0.83570629358291625977f,-0.54917663335800170898f,
+-0.83317017555236816406f,-0.55301672220230102539f,-0.83061641454696655273f,
+-0.55684500932693481445f,-0.82804507017135620117f,-0.56066155433654785156f,
+-0.82545614242553710938f,-0.56446623802185058594f,-0.82284981012344360352f,
+-0.56825894117355346680f,-0.82022595405578613281f,-0.57203960418701171875f,
+-0.81758481264114379883f,-0.57580816745758056641f,-0.81492632627487182617f,
+-0.57956457138061523438f,-0.81225061416625976562f,-0.58330863714218139648f,
+-0.80955761671066284180f,-0.58704036474227905273f,-0.80684757232666015625f,
+-0.59075969457626342773f,-0.80412036180496215820f,-0.59446650743484497070f,
+-0.80137616395950317383f,-0.59816068410873413086f,-0.79861497879028320312f,
+-0.60184222459793090820f,-0.79583692550659179688f,-0.60551106929779052734f,
+-0.79304194450378417969f,-0.60916703939437866211f,-0.79023021459579467773f,
+-0.61281007528305053711f,-0.78740173578262329102f,-0.61644017696380615234f,
+-0.78455656766891479492f,-0.62005722522735595703f,-0.78169482946395874023f,
+-0.62366110086441040039f,-0.77881652116775512695f,-0.62725180387496948242f,
+-0.77592170238494873047f,-0.63082921504974365234f,-0.77301043272018432617f,
+-0.63439327478408813477f,-0.77008283138275146484f,-0.63794392347335815430f,
+-0.76713889837265014648f,-0.64148104190826416016f,-0.76417875289916992188f,
+-0.64500451087951660156f,-0.76120239496231079102f,-0.64851438999176025391f,
+-0.75820988416671752930f,-0.65201056003570556641f,-0.75520139932632446289f,
+-0.65549284219741821289f,-0.75217682123184204102f,-0.65896129608154296875f,
+-0.74913638830184936523f,-0.66241580247879028320f,-0.74608010053634643555f,
+-0.66585624217987060547f,-0.74300795793533325195f,-0.66928261518478393555f,
+-0.73992007970809936523f,-0.67269474267959594727f,-0.73681658506393432617f,
+-0.67609268426895141602f,-0.73369741439819335938f,-0.67947632074356079102f,
+-0.73056274652481079102f,-0.68284553289413452148f,-0.72741264104843139648f,
+-0.68620032072067260742f,-0.72424709796905517578f,-0.68954056501388549805f,
+-0.72106617689132690430f,-0.69286614656448364258f,-0.71787005662918090820f,
+-0.69617712497711181641f,-0.71465867757797241211f,-0.69947332143783569336f,
+-0.71143221855163574219f,-0.70275473594665527344f,-0.70819061994552612305f,
+-0.70602124929428100586f,-0.70493406057357788086f,-0.70927280187606811523f,
+-0.70166260004043579102f,-0.71250939369201660156f,-0.69837623834609985352f,
+-0.71573084592819213867f,-0.69507509469985961914f,-0.71893709897994995117f,
+-0.69175922870635986328f,-0.72212821245193481445f,-0.68842875957489013672f,
+-0.72530394792556762695f,-0.68508368730545043945f,-0.72846436500549316406f,
+-0.68172407150268554688f,-0.73160940408706665039f,-0.67835003137588500977f,
+-0.73473888635635375977f,-0.67496162652969360352f,-0.73785281181335449219f,
+-0.67155897617340087891f,-0.74095112085342407227f,-0.66814202070236206055f,
+-0.74403375387191772461f,-0.66471099853515625000f,-0.74710059165954589844f,
+-0.66126585006713867188f,-0.75015163421630859375f,-0.65780669450759887695f,
+-0.75318682193756103516f,-0.65433359146118164062f,-0.75620597600936889648f,
+-0.65084666013717651367f,-0.75920921564102172852f,-0.64734596014022827148f,
+-0.76219630241394042969f,-0.64383155107498168945f,-0.76516723632812500000f,
+-0.64030349254608154297f,-0.76812201738357543945f,-0.63676184415817260742f,
+-0.77106052637100219727f,-0.63320678472518920898f,-0.77398270368576049805f,
+-0.62963825464248657227f,-0.77688848972320556641f,-0.62605637311935424805f,
+-0.77977776527404785156f,-0.62246125936508178711f,-0.78265058994293212891f,
+-0.61885297298431396484f,-0.78550684452056884766f,-0.61523157358169555664f,
+-0.78834640979766845703f,-0.61159718036651611328f,-0.79116934537887573242f,
+-0.60794979333877563477f,-0.79397547245025634766f,-0.60428953170776367188f,
+-0.79676479101181030273f,-0.60061645507812500000f,-0.79953724145889282227f,
+-0.59693068265914916992f,-0.80229282379150390625f,-0.59323227405548095703f,
+-0.80503135919570922852f,-0.58952128887176513672f,-0.80775284767150878906f,
+-0.58579784631729125977f,-0.81045717000961303711f,-0.58206200599670410156f,
+-0.81314438581466674805f,-0.57831376791000366211f,-0.81581443548202514648f,
+-0.57455337047576904297f,-0.81846714019775390625f,-0.57078075408935546875f,
+-0.82110249996185302734f,-0.56699603796005249023f,-0.82372051477432250977f,
+-0.56319934129714965820f,-0.82632106542587280273f,-0.55939072370529174805f,
+-0.82890409231185913086f,-0.55557024478912353516f,-0.83146959543228149414f,
+-0.55173796415328979492f,-0.83401751518249511719f,-0.54789406061172485352f,
+-0.83654773235321044922f,-0.54403853416442871094f,-0.83906024694442749023f,
+-0.54017144441604614258f,-0.84155499935150146484f,-0.53629297018051147461f,
+-0.84403187036514282227f,-0.53240311145782470703f,-0.84649091958999633789f,
+-0.52850198745727539062f,-0.84893202781677246094f,-0.52458965778350830078f,
+-0.85135519504547119141f,-0.52066624164581298828f,-0.85376030206680297852f,
+-0.51673179864883422852f,-0.85614734888076782227f,-0.51278638839721679688f,
+-0.85851621627807617188f,-0.50883013010025024414f,-0.86086696386337280273f,
+-0.50486308336257934570f,-0.86319941282272338867f,-0.50088536739349365234f,
+-0.86551362276077270508f,-0.49689704179763793945f,-0.86780947446823120117f,
+-0.49289819598197937012f,-0.87008696794509887695f,-0.48888888955116271973f,
+-0.87234604358673095703f,-0.48486924171447753906f,-0.87458664178848266602f,
+-0.48083934187889099121f,-0.87680870294570922852f,-0.47679921984672546387f,
+-0.87901222705841064453f,-0.47274902462959289551f,-0.88119709491729736328f,
+-0.46868881583213806152f,-0.88336336612701416016f,-0.46461868286132812500f,
+-0.88551086187362670898f,-0.46053871512413024902f,-0.88763964176177978516f,
+-0.45644897222518920898f,-0.88974958658218383789f,-0.45234957337379455566f,
+-0.89184069633483886719f,-0.44824060797691345215f,-0.89391297101974487305f,
+-0.44412213563919067383f,-0.89596623182296752930f,-0.43999427556991577148f,
+-0.89800059795379638672f,-0.43585708737373352051f,-0.90001589059829711914f,
+-0.43171066045761108398f,-0.90201216936111450195f,-0.42755508422851562500f,
+-0.90398931503295898438f,-0.42339047789573669434f,-0.90594726800918579102f,
+-0.41921690106391906738f,-0.90788608789443969727f,-0.41503441333770751953f,
+-0.90980571508407592773f,-0.41084316372871398926f,-0.91170603036880493164f,
+-0.40664321184158325195f,-0.91358703374862670898f,-0.40243464708328247070f,
+-0.91544872522354125977f,-0.39821755886077880859f,-0.91729098558425903320f,
+-0.39399203658103942871f,-0.91911387443542480469f,-0.38975816965103149414f,
+-0.92091721296310424805f,-0.38551604747772216797f,-0.92270112037658691406f,
+-0.38126575946807861328f,-0.92446547746658325195f,-0.37700742483139038086f,
+-0.92621022462844848633f,-0.37274107336997985840f,-0.92793542146682739258f,
+-0.36846682429313659668f,-0.92964088916778564453f,-0.36418479681015014648f,
+-0.93132668733596801758f,-0.35989505052566528320f,-0.93299281597137451172f,
+-0.35559767484664916992f,-0.93463915586471557617f,-0.35129275918006896973f,
+-0.93626564741134643555f,-0.34698042273521423340f,-0.93787235021591186523f,
+-0.34266072511672973633f,-0.93945920467376708984f,-0.33833375573158264160f,
+-0.94102615118026733398f,-0.33399966359138488770f,-0.94257318973541259766f,
+-0.32965844869613647461f,-0.94410026073455810547f,-0.32531028985977172852f,
+-0.94560730457305908203f,-0.32095524668693542480f,-0.94709438085556030273f,
+-0.31659337878227233887f,-0.94856137037277221680f,-0.31222480535507202148f,
+-0.95000827312469482422f,-0.30784964561462402344f,-0.95143502950668334961f,
+-0.30346795916557312012f,-0.95284163951873779297f,-0.29907983541488647461f,
+-0.95422810316085815430f,-0.29468536376953125000f,-0.95559436082839965820f,
+-0.29028466343879699707f,-0.95694035291671752930f,-0.28587782382965087891f,
+-0.95826607942581176758f,-0.28146493434906005859f,-0.95957154035568237305f,
+-0.27704608440399169922f,-0.96085661649703979492f,-0.27262136340141296387f,
+-0.96212142705917358398f,-0.26819086074829101562f,-0.96336579322814941406f,
+-0.26375466585159301758f,-0.96458977460861206055f,-0.25931292772293090820f,
+-0.96579337120056152344f,-0.25486564636230468750f,-0.96697646379470825195f,
+-0.25041300058364868164f,-0.96813911199569702148f,-0.24595504999160766602f,
+-0.96928125619888305664f,-0.24149188399314880371f,-0.97040283679962158203f,
+-0.23702360689640045166f,-0.97150391340255737305f,-0.23255030810832977295f,
+-0.97258436679840087891f,-0.22807207703590393066f,-0.97364425659179687500f,
+-0.22358903288841247559f,-0.97468352317810058594f,-0.21910123527050018311f,
+-0.97570210695266723633f,-0.21460881829261779785f,-0.97670006752014160156f,
+-0.21011184155941009521f,-0.97767734527587890625f,-0.20561040937900543213f,
+-0.97863394021987915039f,-0.20110464096069335938f,-0.97956979274749755859f,
+-0.19659459590911865234f,-0.98048484325408935547f,-0.19208039343357086182f,
+-0.98137921094894409180f,-0.18756212294101715088f,-0.98225271701812744141f,
+-0.18303988873958587646f,-0.98310548067092895508f,-0.17851376533508300781f,
+-0.98393744230270385742f,-0.17398387193679809570f,-0.98474848270416259766f,
+-0.16945029795169830322f,-0.98553872108459472656f,-0.16491311788558959961f,
+-0.98630809783935546875f,-0.16037245094776153564f,-0.98705655336380004883f,
+-0.15582840144634246826f,-0.98778414726257324219f,-0.15128104388713836670f,
+-0.98849081993103027344f,-0.14673046767711639404f,-0.98917651176452636719f,
+-0.14217680692672729492f,-0.98984128236770629883f,-0.13762012124061584473f,
+-0.99048507213592529297f,-0.13306052982807159424f,-0.99110794067382812500f,
+-0.12849810719490051270f,-0.99170976877212524414f,-0.12393297255039215088f,
+-0.99229061603546142578f,-0.11936521530151367188f,-0.99285042285919189453f,
+-0.11479492485523223877f,-0.99338918924331665039f,-0.11022220551967620850f,
+-0.99390697479248046875f,-0.10564715415239334106f,-0.99440366029739379883f,
+-0.10106986016035079956f,-0.99487930536270141602f,-0.09649042785167694092f,
+-0.99533390998840332031f,-0.09190895408391952515f,-0.99576741456985473633f,
+-0.08732553571462631226f,-0.99617981910705566406f,-0.08274026215076446533f,
+-0.99657112360000610352f,-0.07815324515104293823f,-0.99694132804870605469f,
+-0.07356456667184829712f,-0.99729043245315551758f,-0.06897433102130889893f,
+-0.99761843681335449219f,-0.06438262760639190674f,-0.99792528152465820312f,
+-0.05978957191109657288f,-0.99821102619171142578f,-0.05519524589180946350f,
+-0.99847555160522460938f,-0.05059975013136863708f,-0.99871903657913208008f,
+-0.04600318148732185364f,-0.99894130229949951172f,-0.04140564054250717163f,
+-0.99914240837097167969f,-0.03680722415447235107f,-0.99932235479354858398f,
+-0.03220802545547485352f,-0.99948120117187500000f,-0.02760814502835273743f,
+-0.99961882829666137695f,-0.02300768159329891205f,-0.99973529577255249023f,
+-0.01840673014521598816f,-0.99983060359954833984f,-0.01380538847297430038f,
+-0.99990469217300415039f,-0.00920375436544418335f,-0.99995762109756469727f,
+-0.00460192607715725899f,-0.99998939037322998047f,1.00000000000000000000f,
+0.00000000000000000000f,0.99983060359954833984f,0.01840673014521598816f,
+0.99932235479354858398f,0.03680722415447235107f,0.99847555160522460938f,
+0.05519524589180946350f,0.99729043245315551758f,0.07356456667184829712f,
+0.99576741456985473633f,0.09190895408391952515f,0.99390697479248046875f,
+0.11022220551967620850f,0.99170976877212524414f,0.12849810719490051270f,
+0.98917651176452636719f,0.14673046767711639404f,0.98630809783935546875f,
+0.16491311788558959961f,0.98310548067092895508f,0.18303988873958587646f,
+0.97956979274749755859f,0.20110464096069335938f,0.97570210695266723633f,
+0.21910123527050018311f,0.97150391340255737305f,0.23702360689640045166f,
+0.96697646379470825195f,0.25486564636230468750f,0.96212142705917358398f,
+0.27262136340141296387f,0.95694035291671752930f,0.29028466343879699707f,
+0.95143502950668334961f,0.30784964561462402344f,0.94560730457305908203f,
+0.32531028985977172852f,0.93945920467376708984f,0.34266072511672973633f,
+0.93299281597137451172f,0.35989505052566528320f,0.92621022462844848633f,
+0.37700742483139038086f,0.91911387443542480469f,0.39399203658103942871f,
+0.91170603036880493164f,0.41084316372871398926f,0.90398931503295898438f,
+0.42755508422851562500f,0.89596623182296752930f,0.44412213563919067383f,
+0.88763964176177978516f,0.46053871512413024902f,0.87901222705841064453f,
+0.47679921984672546387f,0.87008696794509887695f,0.49289819598197937012f,
+0.86086696386337280273f,0.50883013010025024414f,0.85135519504547119141f,
+0.52458965778350830078f,0.84155499935150146484f,0.54017144441604614258f,
+0.83146959543228149414f,0.55557024478912353516f,0.82110249996185302734f,
+0.57078075408935546875f,0.81045717000961303711f,0.58579784631729125977f,
+0.79953724145889282227f,0.60061645507812500000f,0.78834640979766845703f,
+0.61523157358169555664f,0.77688848972320556641f,0.62963825464248657227f,
+0.76516723632812500000f,0.64383155107498168945f,0.75318682193756103516f,
+0.65780669450759887695f,0.74095112085342407227f,0.67155897617340087891f,
+0.72846436500549316406f,0.68508368730545043945f,0.71573084592819213867f,
+0.69837623834609985352f,0.70275473594665527344f,0.71143221855163574219f,
+0.68954056501388549805f,0.72424709796905517578f,0.67609268426895141602f,
+0.73681658506393432617f,0.66241580247879028320f,0.74913638830184936523f,
+0.64851438999176025391f,0.76120239496231079102f,0.63439327478408813477f,
+0.77301043272018432617f,0.62005722522735595703f,0.78455656766891479492f,
+0.60551106929779052734f,0.79583692550659179688f,0.59075969457626342773f,
+0.80684757232666015625f,0.57580816745758056641f,0.81758481264114379883f,
+0.56066155433654785156f,0.82804507017135620117f,0.54532498121261596680f,
+0.83822470903396606445f,0.52980363368988037109f,0.84812033176422119141f,
+0.51410275697708129883f,0.85772860050201416016f,0.49822765588760375977f,
+0.86704623699188232422f,0.48218378424644470215f,0.87607008218765258789f,
+0.46597650647163391113f,0.88479709625244140625f,0.44961133599281311035f,
+0.89322429895401000977f,0.43309381604194641113f,0.90134882926940917969f,
+0.41642954945564270020f,0.90916800498962402344f,0.39962419867515563965f,
+0.91667908430099487305f,0.38268342614173889160f,0.92387950420379638672f,
+0.36561298370361328125f,0.93076694011688232422f,0.34841868281364440918f,
+0.93733900785446166992f,0.33110630512237548828f,0.94359344244003295898f,
+0.31368175148963928223f,0.94952815771102905273f,0.29615089297294616699f,
+0.95514118671417236328f,0.27851969003677368164f,0.96043050289154052734f,
+0.26079410314559936523f,0.96539443731307983398f,0.24298018217086791992f,
+0.97003126144409179688f,0.22508391737937927246f,0.97433936595916748047f,
+0.20711137354373931885f,0.97831737995147705078f,0.18906866014003753662f,
+0.98196387290954589844f,0.17096188664436340332f,0.98527765274047851562f,
+0.15279719233512878418f,0.98825758695602416992f,0.13458070158958435059f,
+0.99090266227722167969f,0.11631862819194793701f,0.99321192502975463867f,
+0.09801714122295379639f,0.99518471956253051758f,0.07968243956565856934f,
+0.99682027101516723633f,0.06132073700428009033f,0.99811810255050659180f,
+0.04293825849890708923f,0.99907773733139038086f,0.02454122900962829590f,
+0.99969881772994995117f,0.00613588467240333557f,0.99998116493225097656f,
+-0.01227153837680816650f,0.99992471933364868164f,-0.03067480400204658508f,
+0.99952942132949829102f,-0.04906767606735229492f,0.99879544973373413086f,
+-0.06744392216205596924f,0.99772304296493530273f,-0.08579730987548828125f,
+0.99631261825561523438f,-0.10412163287401199341f,0.99456459283828735352f,
+-0.12241067737340927124f,0.99247956275939941406f,-0.14065824449062347412f,
+0.99005818367004394531f,-0.15885815024375915527f,0.98730140924453735352f,
+-0.17700421810150146484f,0.98421007394790649414f,-0.19509032368659973145f,
+0.98078525066375732422f,-0.21311031281948089600f,0.97702813148498535156f,
+-0.23105810582637786865f,0.97293996810913085938f,-0.24892760813236236572f,
+0.96852207183837890625f,-0.26671275496482849121f,0.96377605199813842773f,
+-0.28440752625465393066f,0.95870345830917358398f,-0.30200594663619995117f,
+0.95330601930618286133f,-0.31950202584266662598f,0.94758558273315429688f,
+-0.33688986301422119141f,0.94154405593872070312f,-0.35416352748870849609f,
+0.93518352508544921875f,-0.37131720781326293945f,0.92850607633590698242f,
+-0.38834503293037414551f,0.92151403427124023438f,-0.40524131059646606445f,
+0.91420978307723999023f,-0.42200025916099548340f,0.90659570693969726562f,
+-0.43861624598503112793f,0.89867448806762695312f,-0.45508357882499694824f,
+0.89044874906539916992f,-0.47139674425125122070f,0.88192129135131835938f,
+-0.48755016922950744629f,0.87309497594833374023f,-0.50353837013244628906f,
+0.86397284269332885742f,-0.51935601234436035156f,0.85455799102783203125f,
+-0.53499764204025268555f,0.84485357999801635742f,-0.55045795440673828125f,
+0.83486288785934448242f,-0.56573182344436645508f,0.82458931207656860352f,
+-0.58081394433975219727f,0.81403630971908569336f,-0.59569931030273437500f,
+0.80320751667022705078f,-0.61038279533386230469f,0.79210656881332397461f,
+-0.62485951185226440430f,0.78073722124099731445f,-0.63912445306777954102f,
+0.76910334825515747070f,-0.65317285060882568359f,0.75720882415771484375f,
+-0.66699993610382080078f,0.74505776166915893555f,-0.68060100078582763672f,
+0.73265427350997924805f,-0.69397145509719848633f,0.72000253200531005859f,
+-0.70710676908493041992f,0.70710676908493041992f,-0.72000253200531005859f,
+0.69397145509719848633f,-0.73265427350997924805f,0.68060100078582763672f,
+-0.74505776166915893555f,0.66699993610382080078f,-0.75720882415771484375f,
+0.65317285060882568359f,-0.76910334825515747070f,0.63912445306777954102f,
+-0.78073722124099731445f,0.62485951185226440430f,-0.79210656881332397461f,
+0.61038279533386230469f,-0.80320751667022705078f,0.59569931030273437500f,
+-0.81403630971908569336f,0.58081394433975219727f,-0.82458931207656860352f,
+0.56573182344436645508f,-0.83486288785934448242f,0.55045795440673828125f,
+-0.84485357999801635742f,0.53499764204025268555f,-0.85455799102783203125f,
+0.51935601234436035156f,-0.86397284269332885742f,0.50353837013244628906f,
+-0.87309497594833374023f,0.48755016922950744629f,-0.88192129135131835938f,
+0.47139674425125122070f,-0.89044874906539916992f,0.45508357882499694824f,
+-0.89867448806762695312f,0.43861624598503112793f,-0.90659570693969726562f,
+0.42200025916099548340f,-0.91420978307723999023f,0.40524131059646606445f,
+-0.92151403427124023438f,0.38834503293037414551f,-0.92850607633590698242f,
+0.37131720781326293945f,-0.93518352508544921875f,0.35416352748870849609f,
+-0.94154405593872070312f,0.33688986301422119141f,-0.94758558273315429688f,
+0.31950202584266662598f,-0.95330601930618286133f,0.30200594663619995117f,
+-0.95870345830917358398f,0.28440752625465393066f,-0.96377605199813842773f,
+0.26671275496482849121f,-0.96852207183837890625f,0.24892760813236236572f,
+-0.97293996810913085938f,0.23105810582637786865f,-0.97702813148498535156f,
+0.21311031281948089600f,-0.98078525066375732422f,0.19509032368659973145f,
+-0.98421007394790649414f,0.17700421810150146484f,-0.98730140924453735352f,
+0.15885815024375915527f,-0.99005818367004394531f,0.14065824449062347412f,
+-0.99247956275939941406f,0.12241067737340927124f,-0.99456459283828735352f,
+0.10412163287401199341f,-0.99631261825561523438f,0.08579730987548828125f,
+-0.99772304296493530273f,0.06744392216205596924f,-0.99879544973373413086f,
+0.04906767606735229492f,-0.99952942132949829102f,0.03067480400204658508f,
+-0.99992471933364868164f,0.01227153837680816650f,-0.99998116493225097656f,
+-0.00613588467240333557f,-0.99969881772994995117f,-0.02454122900962829590f,
+-0.99907773733139038086f,-0.04293825849890708923f,-0.99811810255050659180f,
+-0.06132073700428009033f,-0.99682027101516723633f,-0.07968243956565856934f,
+-0.99518471956253051758f,-0.09801714122295379639f,-0.99321192502975463867f,
+-0.11631862819194793701f,-0.99090266227722167969f,-0.13458070158958435059f,
+-0.98825758695602416992f,-0.15279719233512878418f,-0.98527765274047851562f,
+-0.17096188664436340332f,-0.98196387290954589844f,-0.18906866014003753662f,
+-0.97831737995147705078f,-0.20711137354373931885f,-0.97433936595916748047f,
+-0.22508391737937927246f,-0.97003126144409179688f,-0.24298018217086791992f,
+-0.96539443731307983398f,-0.26079410314559936523f,-0.96043050289154052734f,
+-0.27851969003677368164f,-0.95514118671417236328f,-0.29615089297294616699f,
+-0.94952815771102905273f,-0.31368175148963928223f,-0.94359344244003295898f,
+-0.33110630512237548828f,-0.93733900785446166992f,-0.34841868281364440918f,
+-0.93076694011688232422f,-0.36561298370361328125f,-0.92387950420379638672f,
+-0.38268342614173889160f,-0.91667908430099487305f,-0.39962419867515563965f,
+-0.90916800498962402344f,-0.41642954945564270020f,-0.90134882926940917969f,
+-0.43309381604194641113f,-0.89322429895401000977f,-0.44961133599281311035f,
+-0.88479709625244140625f,-0.46597650647163391113f,-0.87607008218765258789f,
+-0.48218378424644470215f,-0.86704623699188232422f,-0.49822765588760375977f,
+-0.85772860050201416016f,-0.51410275697708129883f,-0.84812033176422119141f,
+-0.52980363368988037109f,-0.83822470903396606445f,-0.54532498121261596680f,
+-0.82804507017135620117f,-0.56066155433654785156f,-0.81758481264114379883f,
+-0.57580816745758056641f,-0.80684757232666015625f,-0.59075969457626342773f,
+-0.79583692550659179688f,-0.60551106929779052734f,-0.78455656766891479492f,
+-0.62005722522735595703f,-0.77301043272018432617f,-0.63439327478408813477f,
+-0.76120239496231079102f,-0.64851438999176025391f,-0.74913638830184936523f,
+-0.66241580247879028320f,-0.73681658506393432617f,-0.67609268426895141602f,
+-0.72424709796905517578f,-0.68954056501388549805f,-0.71143221855163574219f,
+-0.70275473594665527344f,-0.69837623834609985352f,-0.71573084592819213867f,
+-0.68508368730545043945f,-0.72846436500549316406f,-0.67155897617340087891f,
+-0.74095112085342407227f,-0.65780669450759887695f,-0.75318682193756103516f,
+-0.64383155107498168945f,-0.76516723632812500000f,-0.62963825464248657227f,
+-0.77688848972320556641f,-0.61523157358169555664f,-0.78834640979766845703f,
+-0.60061645507812500000f,-0.79953724145889282227f,-0.58579784631729125977f,
+-0.81045717000961303711f,-0.57078075408935546875f,-0.82110249996185302734f,
+-0.55557024478912353516f,-0.83146959543228149414f,-0.54017144441604614258f,
+-0.84155499935150146484f,-0.52458965778350830078f,-0.85135519504547119141f,
+-0.50883013010025024414f,-0.86086696386337280273f,-0.49289819598197937012f,
+-0.87008696794509887695f,-0.47679921984672546387f,-0.87901222705841064453f,
+-0.46053871512413024902f,-0.88763964176177978516f,-0.44412213563919067383f,
+-0.89596623182296752930f,-0.42755508422851562500f,-0.90398931503295898438f,
+-0.41084316372871398926f,-0.91170603036880493164f,-0.39399203658103942871f,
+-0.91911387443542480469f,-0.37700742483139038086f,-0.92621022462844848633f,
+-0.35989505052566528320f,-0.93299281597137451172f,-0.34266072511672973633f,
+-0.93945920467376708984f,-0.32531028985977172852f,-0.94560730457305908203f,
+-0.30784964561462402344f,-0.95143502950668334961f,-0.29028466343879699707f,
+-0.95694035291671752930f,-0.27262136340141296387f,-0.96212142705917358398f,
+-0.25486564636230468750f,-0.96697646379470825195f,-0.23702360689640045166f,
+-0.97150391340255737305f,-0.21910123527050018311f,-0.97570210695266723633f,
+-0.20110464096069335938f,-0.97956979274749755859f,-0.18303988873958587646f,
+-0.98310548067092895508f,-0.16491311788558959961f,-0.98630809783935546875f,
+-0.14673046767711639404f,-0.98917651176452636719f,-0.12849810719490051270f,
+-0.99170976877212524414f,-0.11022220551967620850f,-0.99390697479248046875f,
+-0.09190895408391952515f,-0.99576741456985473633f,-0.07356456667184829712f,
+-0.99729043245315551758f,-0.05519524589180946350f,-0.99847555160522460938f,
+-0.03680722415447235107f,-0.99932235479354858398f,-0.01840673014521598816f,
+-0.99983060359954833984f,1.00000000000000000000f,0.00000000000000000000f,
+0.99729043245315551758f,0.07356456667184829712f,0.98917651176452636719f,
+0.14673046767711639404f,0.97570210695266723633f,0.21910123527050018311f,
+0.95694035291671752930f,0.29028466343879699707f,0.93299281597137451172f,
+0.35989505052566528320f,0.90398931503295898438f,0.42755508422851562500f,
+0.87008696794509887695f,0.49289819598197937012f,0.83146959543228149414f,
+0.55557024478912353516f,0.78834640979766845703f,0.61523157358169555664f,
+0.74095112085342407227f,0.67155897617340087891f,0.68954056501388549805f,
+0.72424709796905517578f,0.63439327478408813477f,0.77301043272018432617f,
+0.57580816745758056641f,0.81758481264114379883f,0.51410275697708129883f,
+0.85772860050201416016f,0.44961133599281311035f,0.89322429895401000977f,
+0.38268342614173889160f,0.92387950420379638672f,0.31368175148963928223f,
+0.94952815771102905273f,0.24298018217086791992f,0.97003126144409179688f,
+0.17096188664436340332f,0.98527765274047851562f,0.09801714122295379639f,
+0.99518471956253051758f,0.02454122900962829590f,0.99969881772994995117f,
+-0.04906767606735229492f,0.99879544973373413086f,-0.12241067737340927124f,
+0.99247956275939941406f,-0.19509032368659973145f,0.98078525066375732422f,
+-0.26671275496482849121f,0.96377605199813842773f,-0.33688986301422119141f,
+0.94154405593872070312f,-0.40524131059646606445f,0.91420978307723999023f,
+-0.47139674425125122070f,0.88192129135131835938f,-0.53499764204025268555f,
+0.84485357999801635742f,-0.59569931030273437500f,0.80320751667022705078f,
+-0.65317285060882568359f,0.75720882415771484375f,-0.70710676908493041992f,
+0.70710676908493041992f,-0.75720882415771484375f,0.65317285060882568359f,
+-0.80320751667022705078f,0.59569931030273437500f,-0.84485357999801635742f,
+0.53499764204025268555f,-0.88192129135131835938f,0.47139674425125122070f,
+-0.91420978307723999023f,0.40524131059646606445f,-0.94154405593872070312f,
+0.33688986301422119141f,-0.96377605199813842773f,0.26671275496482849121f,
+-0.98078525066375732422f,0.19509032368659973145f,-0.99247956275939941406f,
+0.12241067737340927124f,-0.99879544973373413086f,0.04906767606735229492f,
+-0.99969881772994995117f,-0.02454122900962829590f,-0.99518471956253051758f,
+-0.09801714122295379639f,-0.98527765274047851562f,-0.17096188664436340332f,
+-0.97003126144409179688f,-0.24298018217086791992f,-0.94952815771102905273f,
+-0.31368175148963928223f,-0.92387950420379638672f,-0.38268342614173889160f,
+-0.89322429895401000977f,-0.44961133599281311035f,-0.85772860050201416016f,
+-0.51410275697708129883f,-0.81758481264114379883f,-0.57580816745758056641f,
+-0.77301043272018432617f,-0.63439327478408813477f,-0.72424709796905517578f,
+-0.68954056501388549805f,-0.67155897617340087891f,-0.74095112085342407227f,
+-0.61523157358169555664f,-0.78834640979766845703f,-0.55557024478912353516f,
+-0.83146959543228149414f,-0.49289819598197937012f,-0.87008696794509887695f,
+-0.42755508422851562500f,-0.90398931503295898438f,-0.35989505052566528320f,
+-0.93299281597137451172f,-0.29028466343879699707f,-0.95694035291671752930f,
+-0.21910123527050018311f,-0.97570210695266723633f,-0.14673046767711639404f,
+-0.98917651176452636719f,-0.07356456667184829712f,-0.99729043245315551758f,
+1.00000000000000000000f,0.00000000000000000000f,0.95694035291671752930f,
+0.29028466343879699707f,0.83146959543228149414f,0.55557024478912353516f,
+0.63439327478408813477f,0.77301043272018432617f,0.38268342614173889160f,
+0.92387950420379638672f,0.09801714122295379639f,0.99518471956253051758f,
+-0.19509032368659973145f,0.98078525066375732422f,-0.47139674425125122070f,
+0.88192129135131835938f,-0.70710676908493041992f,0.70710676908493041992f,
+-0.88192129135131835938f,0.47139674425125122070f,-0.98078525066375732422f,
+0.19509032368659973145f,-0.99518471956253051758f,-0.09801714122295379639f,
+-0.92387950420379638672f,-0.38268342614173889160f,-0.77301043272018432617f,
+-0.63439327478408813477f,-0.55557024478912353516f,-0.83146959543228149414f,
+-0.29028466343879699707f,-0.95694035291671752930f,1.00000000000000000000f,
+0.00000000000000000000f,0.38268342614173889160f,0.92387950420379638672f,
+-0.70710676908493041992f,0.70710676908493041992f,-0.92387950420379638672f,
+-0.38268342614173889160f,};
 
 #endif
 
@@ -3766,7 +3769,8 @@ float32_t rearranged_twiddle_stride3_4096_f32[2728]={
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 
 
-#if defined(ARM_MATH_MVEI)  && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
 
 #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
 
@@ -5431,7 +5435,8 @@ q31_t rearranged_twiddle_stride3_4096_q31[2728]={
 #endif /* defined(ARM_MATH_MVEI)  */
 
 
-#if defined(ARM_MATH_MVEI)  && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
 
 #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_mve_tables_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_mve_tables_f16.c
index d3f2d34..56e3acd 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_mve_tables_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/CommonTables/arm_mve_tables_f16.c
@@ -6,12 +6,13 @@
  * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
  *               used for MVE implementation only
  *
- * $Date:        14. April 2020
+ * @version  V1.10.0
+ * @date     04 October 2021
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -28,10 +29,12 @@
  * limitations under the License.
  */
 
-#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types_f16.h"
+ #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types_f16.h"
 
+ 
 #if defined(ARM_FLOAT16_SUPPORTED)
 
+
 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 
 #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
@@ -48,22 +51,22 @@ uint32_t rearranged_twiddle_tab_stride3_arr_16_f16[2]={
 0,0,};
 
 float16_t rearranged_twiddle_stride1_16_f16[8]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,};
 
 float16_t rearranged_twiddle_stride2_16_f16[8]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,};
 
 float16_t rearranged_twiddle_stride3_16_f16[8]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,};
 
 #endif
 
@@ -79,70 +82,70 @@ uint32_t rearranged_twiddle_tab_stride3_arr_64_f16[3]={
 0,32,0,};
 
 float16_t rearranged_twiddle_stride1_64_f16[40]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,};
 
 float16_t rearranged_twiddle_stride2_64_f16[40]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.38268343236508972627f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.55557023301960195560f,(float16_t)0.83146961230254534669f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.83146961230254534669f,(float16_t)0.55557023301960217765f,
-(float16_t)-0.92387953251128673848f,(float16_t)0.38268343236508989280f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)-0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,};
 
 float16_t rearranged_twiddle_stride3_64_f16[40]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.99518472667219692873f,(float16_t)-0.09801714032956058975f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,
-(float16_t)-0.77301045336273710440f,(float16_t)-0.63439328416364526575f,
-(float16_t)-0.55557023301960217765f,(float16_t)-0.83146961230254523567f,
-(float16_t)-0.29028467725446244208f,(float16_t)-0.95694033573220882438f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9951171875000f,(float16_t)-0.0980224609375f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,
+(float16_t)-0.7729492187500f,(float16_t)-0.6342773437500f,
+(float16_t)-0.5556640625000f,(float16_t)-0.8315429687500f,
+(float16_t)-0.2902832031250f,(float16_t)-0.9570312500000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,};
 
 #endif
 
@@ -158,262 +161,262 @@ uint32_t rearranged_twiddle_tab_stride3_arr_256_f16[4]={
 0,128,160,0,};
 
 float16_t rearranged_twiddle_stride1_256_f16[168]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99969881869620424997f,(float16_t)0.02454122852291228812f,
-(float16_t)0.99879545620517240501f,(float16_t)0.04906767432741801493f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.99247953459870996706f,(float16_t)0.12241067519921619566f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98527764238894122162f,(float16_t)0.17096188876030121717f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.97003125319454397424f,(float16_t)0.24298017990326387094f,
-(float16_t)0.96377606579543984022f,(float16_t)0.26671275747489836538f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.94952818059303667475f,(float16_t)0.31368174039889151761f,
-(float16_t)0.94154406518302080631f,(float16_t)0.33688985339222005111f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.91420975570353069095f,(float16_t)0.40524131400498986100f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.89322430119551532446f,(float16_t)0.44961132965460653965f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.85772861000027211809f,(float16_t)0.51410274419322166128f,
-(float16_t)0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.81758481315158371139f,(float16_t)0.57580819141784533866f,
-(float16_t)0.80320753148064494287f,(float16_t)0.59569930449243335691f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.75720884650648456748f,(float16_t)0.65317284295377675551f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.72424708295146700276f,(float16_t)0.68954054473706682948f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.67155895484701833009f,(float16_t)0.74095112535495910588f,
-(float16_t)0.65317284295377686654f,(float16_t)0.75720884650648456748f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.61523159058062681925f,(float16_t)0.78834642762660622761f,
-(float16_t)0.59569930449243346793f,(float16_t)0.80320753148064483184f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.53499761988709726435f,(float16_t)0.84485356524970700587f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.49289819222978409341f,(float16_t)0.87008699110871134952f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.42755509343028219593f,(float16_t)0.90398929312344333820f,
-(float16_t)0.40524131400498986100f,(float16_t)0.91420975570353069095f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.35989503653498827740f,(float16_t)0.93299279883473884567f,
-(float16_t)0.33688985339222005111f,(float16_t)0.94154406518302080631f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.26671275747489842090f,(float16_t)0.96377606579543984022f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.21910124015686976984f,(float16_t)0.97570213003852857003f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.14673047445536174793f,(float16_t)0.98917650996478101444f,
-(float16_t)0.12241067519921627893f,(float16_t)0.99247953459870996706f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.07356456359966745406f,(float16_t)0.99729045667869020697f,
-(float16_t)0.04906767432741812596f,(float16_t)0.99879545620517240501f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9995117187500f,(float16_t)0.0245361328125f,
+(float16_t)0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9853515625000f,(float16_t)0.1710205078125f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9497070312500f,(float16_t)0.3137207031250f,
+(float16_t)0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.8930664062500f,(float16_t)0.4497070312500f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8173828125000f,(float16_t)0.5756835937500f,
+(float16_t)0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7241210937500f,(float16_t)0.6894531250000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.6152343750000f,(float16_t)0.7885742187500f,
+(float16_t)0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.4929199218750f,(float16_t)0.8701171875000f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3598632812500f,(float16_t)0.9331054687500f,
+(float16_t)0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.2191162109375f,(float16_t)0.9755859375000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0735473632812f,(float16_t)0.9970703125000f,
+(float16_t)0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,};
 
 float16_t rearranged_twiddle_stride2_256_f16[168]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99879545620517240501f,(float16_t)0.04906767432741801493f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.97003125319454397424f,(float16_t)0.24298017990326387094f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.94154406518302080631f,(float16_t)0.33688985339222005111f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.85772861000027211809f,(float16_t)0.51410274419322166128f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.80320753148064494287f,(float16_t)0.59569930449243335691f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.67155895484701833009f,(float16_t)0.74095112535495910588f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.59569930449243346793f,(float16_t)0.80320753148064483184f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.42755509343028219593f,(float16_t)0.90398929312344333820f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.33688985339222005111f,(float16_t)0.94154406518302080631f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.14673047445536174793f,(float16_t)0.98917650996478101444f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.04906767432741812596f,(float16_t)0.99879545620517240501f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.04906767432741800800f,(float16_t)0.99879545620517240501f,
-(float16_t)-0.09801714032956064526f,(float16_t)0.99518472667219692873f,
-(float16_t)-0.14673047445536163691f,(float16_t)0.98917650996478101444f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.24298017990326387094f,(float16_t)0.97003125319454397424f,
-(float16_t)-0.29028467725446216452f,(float16_t)0.95694033573220893540f,
-(float16_t)-0.33688985339221994009f,(float16_t)0.94154406518302080631f,
-(float16_t)-0.38268343236508972627f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.42755509343028186287f,(float16_t)0.90398929312344344922f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.51410274419322155026f,(float16_t)0.85772861000027211809f,
-(float16_t)-0.55557023301960195560f,(float16_t)0.83146961230254534669f,
-(float16_t)-0.59569930449243335691f,(float16_t)0.80320753148064494287f,
-(float16_t)-0.63439328416364537677f,(float16_t)0.77301045336273710440f,
-(float16_t)-0.67155895484701844111f,(float16_t)0.74095112535495899486f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.74095112535495888384f,(float16_t)0.67155895484701855214f,
-(float16_t)-0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)-0.80320753148064483184f,(float16_t)0.59569930449243346793f,
-(float16_t)-0.83146961230254534669f,(float16_t)0.55557023301960217765f,
-(float16_t)-0.85772861000027200706f,(float16_t)0.51410274419322177231f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.90398929312344333820f,(float16_t)0.42755509343028202940f,
-(float16_t)-0.92387953251128673848f,(float16_t)0.38268343236508989280f,
-(float16_t)-0.94154406518302069529f,(float16_t)0.33688985339222032867f,
-(float16_t)-0.95694033573220882438f,(float16_t)0.29028467725446238656f,
-(float16_t)-0.97003125319454397424f,(float16_t)0.24298017990326406523f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.98917650996478101444f,(float16_t)0.14673047445536180344f,
-(float16_t)-0.99518472667219681771f,(float16_t)0.09801714032956082567f,
-(float16_t)-0.99879545620517240501f,(float16_t)0.04906767432741796636f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.38268343236508972627f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.55557023301960195560f,(float16_t)0.83146961230254534669f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.83146961230254534669f,(float16_t)0.55557023301960217765f,
-(float16_t)-0.92387953251128673848f,(float16_t)0.38268343236508989280f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)-0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)-0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)-0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)-0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)-0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)-0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)-0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)-0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)-0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)-0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)-0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)-0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)-0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)-0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)-0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)-0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)-0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)-0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)-0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)-0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,};
 
 float16_t rearranged_twiddle_stride3_256_f16[168]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)-0.04906767432741800800f,(float16_t)0.99879545620517240501f,
-(float16_t)-0.12241067519921615403f,(float16_t)0.99247953459870996706f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.26671275747489830987f,(float16_t)0.96377606579543984022f,
-(float16_t)-0.33688985339221994009f,(float16_t)0.94154406518302080631f,
-(float16_t)-0.40524131400498974998f,(float16_t)0.91420975570353069095f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.53499761988709704230f,(float16_t)0.84485356524970722791f,
-(float16_t)-0.59569930449243335691f,(float16_t)0.80320753148064494287f,
-(float16_t)-0.65317284295377653347f,(float16_t)0.75720884650648467851f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.75720884650648467851f,(float16_t)0.65317284295377664449f,
-(float16_t)-0.80320753148064483184f,(float16_t)0.59569930449243346793f,
-(float16_t)-0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.91420975570353069095f,(float16_t)0.40524131400498991651f,
-(float16_t)-0.94154406518302069529f,(float16_t)0.33688985339222032867f,
-(float16_t)-0.96377606579543984022f,(float16_t)0.26671275747489847641f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.99247953459870996706f,(float16_t)0.12241067519921634832f,
-(float16_t)-0.99879545620517240501f,(float16_t)0.04906767432741796636f,
-(float16_t)-0.99969881869620424997f,(float16_t)-0.02454122852291207996f,
-(float16_t)-0.99518472667219692873f,(float16_t)-0.09801714032956058975f,
-(float16_t)-0.98527764238894133264f,(float16_t)-0.17096188876030096737f,
-(float16_t)-0.97003125319454397424f,(float16_t)-0.24298017990326381543f,
-(float16_t)-0.94952818059303678577f,(float16_t)-0.31368174039889118454f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,
-(float16_t)-0.89322430119551532446f,(float16_t)-0.44961132965460665067f,
-(float16_t)-0.85772861000027211809f,(float16_t)-0.51410274419322155026f,
-(float16_t)-0.81758481315158371139f,(float16_t)-0.57580819141784533866f,
-(float16_t)-0.77301045336273710440f,(float16_t)-0.63439328416364526575f,
-(float16_t)-0.72424708295146700276f,(float16_t)-0.68954054473706682948f,
-(float16_t)-0.67155895484701866316f,(float16_t)-0.74095112535495888384f,
-(float16_t)-0.61523159058062726334f,(float16_t)-0.78834642762660589455f,
-(float16_t)-0.55557023301960217765f,(float16_t)-0.83146961230254523567f,
-(float16_t)-0.49289819222978420443f,(float16_t)-0.87008699110871134952f,
-(float16_t)-0.42755509343028247349f,(float16_t)-0.90398929312344311615f,
-(float16_t)-0.35989503653498794433f,(float16_t)-0.93299279883473895669f,
-(float16_t)-0.29028467725446244208f,(float16_t)-0.95694033573220882438f,
-(float16_t)-0.21910124015687010290f,(float16_t)-0.97570213003852845901f,
-(float16_t)-0.14673047445536230304f,(float16_t)-0.98917650996478090342f,
-(float16_t)-0.07356456359966735692f,(float16_t)-0.99729045667869020697f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.99518472667219692873f,(float16_t)-0.09801714032956058975f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,
-(float16_t)-0.77301045336273710440f,(float16_t)-0.63439328416364526575f,
-(float16_t)-0.55557023301960217765f,(float16_t)-0.83146961230254523567f,
-(float16_t)-0.29028467725446244208f,(float16_t)-0.95694033573220882438f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)-0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)-0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)-0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)-0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)-0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)-0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)-0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)-0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)-0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)-0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)-0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)-0.9995117187500f,(float16_t)-0.0245361328125f,
+(float16_t)-0.9951171875000f,(float16_t)-0.0980224609375f,
+(float16_t)-0.9853515625000f,(float16_t)-0.1710205078125f,
+(float16_t)-0.9702148437500f,(float16_t)-0.2429199218750f,
+(float16_t)-0.9497070312500f,(float16_t)-0.3137207031250f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,
+(float16_t)-0.8930664062500f,(float16_t)-0.4497070312500f,
+(float16_t)-0.8579101562500f,(float16_t)-0.5141601562500f,
+(float16_t)-0.8173828125000f,(float16_t)-0.5756835937500f,
+(float16_t)-0.7729492187500f,(float16_t)-0.6342773437500f,
+(float16_t)-0.7241210937500f,(float16_t)-0.6894531250000f,
+(float16_t)-0.6713867187500f,(float16_t)-0.7407226562500f,
+(float16_t)-0.6152343750000f,(float16_t)-0.7885742187500f,
+(float16_t)-0.5556640625000f,(float16_t)-0.8315429687500f,
+(float16_t)-0.4929199218750f,(float16_t)-0.8701171875000f,
+(float16_t)-0.4274902343750f,(float16_t)-0.9038085937500f,
+(float16_t)-0.3598632812500f,(float16_t)-0.9331054687500f,
+(float16_t)-0.2902832031250f,(float16_t)-0.9570312500000f,
+(float16_t)-0.2191162109375f,(float16_t)-0.9755859375000f,
+(float16_t)-0.1467285156250f,(float16_t)-0.9892578125000f,
+(float16_t)-0.0735473632812f,(float16_t)-0.9970703125000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9951171875000f,(float16_t)-0.0980224609375f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,
+(float16_t)-0.7729492187500f,(float16_t)-0.6342773437500f,
+(float16_t)-0.5556640625000f,(float16_t)-0.8315429687500f,
+(float16_t)-0.2902832031250f,(float16_t)-0.9570312500000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,};
 
 #endif
 
@@ -429,1030 +432,1030 @@ uint32_t rearranged_twiddle_tab_stride3_arr_1024_f16[5]={
 0,512,640,672,0,};
 
 float16_t rearranged_twiddle_stride1_1024_f16[680]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99998117528260110909f,(float16_t)0.00613588464915447527f,
-(float16_t)0.99992470183914450299f,(float16_t)0.01227153828571992539f,
-(float16_t)0.99983058179582340319f,(float16_t)0.01840672990580482019f,
-(float16_t)0.99969881869620424997f,(float16_t)0.02454122852291228812f,
-(float16_t)0.99952941750109314256f,(float16_t)0.03067480317663662595f,
-(float16_t)0.99932238458834954375f,(float16_t)0.03680722294135883171f,
-(float16_t)0.99907772775264536147f,(float16_t)0.04293825693494082024f,
-(float16_t)0.99879545620517240501f,(float16_t)0.04906767432741801493f,
-(float16_t)0.99847558057329477421f,(float16_t)0.05519524434968993420f,
-(float16_t)0.99811811290014917919f,(float16_t)0.06132073630220857829f,
-(float16_t)0.99772306664419163624f,(float16_t)0.06744391956366405094f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.99682029929116566791f,(float16_t)0.07968243797143012563f,
-(float16_t)0.99631261218277800129f,(float16_t)0.08579731234443989385f,
-(float16_t)0.99576741446765981713f,(float16_t)0.09190895649713272386f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.99456457073425541537f,(float16_t)0.10412163387205458642f,
-(float16_t)0.99390697000235606051f,(float16_t)0.11022220729388305938f,
-(float16_t)0.99321194923479450001f,(float16_t)0.11631863091190475235f,
-(float16_t)0.99247953459870996706f,(float16_t)0.12241067519921619566f,
-(float16_t)0.99170975366909952520f,(float16_t)0.12849811079379316880f,
-(float16_t)0.99090263542778000971f,(float16_t)0.13458070850712616773f,
-(float16_t)0.99005821026229712256f,(float16_t)0.14065823933284921088f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98825756773074946437f,(float16_t)0.15279718525844343535f,
-(float16_t)0.98730141815785843473f,(float16_t)0.15885814333386144570f,
-(float16_t)0.98630809724459866938f,(float16_t)0.16491312048996989437f,
-(float16_t)0.98527764238894122162f,(float16_t)0.17096188876030121717f,
-(float16_t)0.98421009238692902521f,(float16_t)0.17700422041214874946f,
-(float16_t)0.98310548743121628501f,(float16_t)0.18303988795514095078f,
-(float16_t)0.98196386910955524296f,(float16_t)0.18906866414980619262f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.97956976568544051887f,(float16_t)0.20110463484209190055f,
-(float16_t)0.97831737071962765473f,(float16_t)0.20711137619221856032f,
-(float16_t)0.97702814265775439484f,(float16_t)0.21311031991609136194f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.97433938278557585821f,(float16_t)0.22508391135979283204f,
-(float16_t)0.97293995220556017678f,(float16_t)0.23105810828067110951f,
-(float16_t)0.97150389098625178352f,(float16_t)0.23702360599436719801f,
-(float16_t)0.97003125319454397424f,(float16_t)0.24298017990326387094f,
-(float16_t)0.96852209427441737777f,(float16_t)0.24892760574572014853f,
-(float16_t)0.96697647104485207059f,(float16_t)0.25486565960451457169f,
-(float16_t)0.96539444169768939830f,(float16_t)0.26079411791527551401f,
-(float16_t)0.96377606579543984022f,(float16_t)0.26671275747489836538f,
-(float16_t)0.96212140426904158019f,(float16_t)0.27262135544994897662f,
-(float16_t)0.96043051941556578655f,(float16_t)0.27851968938505305973f,
-(float16_t)0.95870347489587159906f,(float16_t)0.28440753721127187692f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.95514116830577078243f,(float16_t)0.29615088824362378883f,
-(float16_t)0.95330604035419386211f,(float16_t)0.30200594931922808417f,
-(float16_t)0.95143502096900833820f,(float16_t)0.30784964004153486661f,
-(float16_t)0.94952818059303667475f,(float16_t)0.31368174039889151761f,
-(float16_t)0.94758559101774109124f,(float16_t)0.31950203081601569188f,
-(float16_t)0.94560732538052127971f,(float16_t)0.32531029216226292622f,
-(float16_t)0.94359345816196038559f,(float16_t)0.33110630575987642921f,
-(float16_t)0.94154406518302080631f,(float16_t)0.33688985339222005111f,
-(float16_t)0.93945922360218991898f,(float16_t)0.34266071731199437833f,
-(float16_t)0.93733901191257495977f,(float16_t)0.34841868024943456472f,
-(float16_t)0.93518350993894761025f,(float16_t)0.35416352542049034380f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.93076696107898371224f,(float16_t)0.36561299780477385379f,
-(float16_t)0.92850608047321558924f,(float16_t)0.37131719395183754306f,
-(float16_t)0.92621024213831137928f,(float16_t)0.37700741021641825945f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.92151403934204190183f,(float16_t)0.38834504669882624617f,
-(float16_t)0.91911385169005777040f,(float16_t)0.39399204006104809883f,
-(float16_t)0.91667905992104270485f,(float16_t)0.39962419984564678810f,
-(float16_t)0.91420975570353069095f,(float16_t)0.40524131400498986100f,
-(float16_t)0.91170603200542987832f,(float16_t)0.41084317105790391089f,
-(float16_t)0.90916798309052238025f,(float16_t)0.41642956009763715253f,
-(float16_t)0.90659570451491533483f,(float16_t)0.42200027079979968159f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.90134884704602202810f,(float16_t)0.43309381885315195726f,
-(float16_t)0.89867446569395381673f,(float16_t)0.43861623853852765853f,
-(float16_t)0.89596624975618521791f,(float16_t)0.44412214457042920035f,
-(float16_t)0.89322430119551532446f,(float16_t)0.44961132965460653965f,
-(float16_t)0.89044872324475787817f,(float16_t)0.45508358712634383592f,
-(float16_t)0.88763962040285393496f,(float16_t)0.46053871095824000514f,
-(float16_t)0.88479709843093778954f,(float16_t)0.46597649576796618121f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.87901222642863352519f,(float16_t)0.47679923006332208812f,
-(float16_t)0.87607009419540660122f,(float16_t)0.48218377207912271887f,
-(float16_t)0.87309497841829009079f,(float16_t)0.48755016014843599592f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.86704624551569264845f,(float16_t)0.49822766697278181303f,
-(float16_t)0.86397285612158669643f,(float16_t)0.50353838372571757542f,
-(float16_t)0.86086693863776730939f,(float16_t)0.50883014254310698909f,
-(float16_t)0.85772861000027211809f,(float16_t)0.51410274419322166128f,
-(float16_t)0.85455798836540053376f,(float16_t)0.51935599016558964269f,
-(float16_t)0.85135519310526519554f,(float16_t)0.52458968267846894928f,
-(float16_t)0.84812034480329723252f,(float16_t)0.52980362468629460526f,
-(float16_t)0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)0.84155497743689844370f,(float16_t)0.54017147272989285423f,
-(float16_t)0.83822470555483807875f,(float16_t)0.54532498842204646383f,
-(float16_t)0.83486287498638001026f,(float16_t)0.55045797293660481131f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.82804504525775579626f,(float16_t)0.56066157619733603124f,
-(float16_t)0.82458930278502529099f,(float16_t)0.56573181078361312046f,
-(float16_t)0.82110251499110464835f,(float16_t)0.57078074588696725566f,
-(float16_t)0.81758481315158371139f,(float16_t)0.57580819141784533866f,
-(float16_t)0.81403632970594841378f,(float16_t)0.58081395809576452649f,
-(float16_t)0.81045719825259476821f,(float16_t)0.58579785745643886408f,
-(float16_t)0.80684755354379933401f,(float16_t)0.59075970185887416442f,
-(float16_t)0.80320753148064494287f,(float16_t)0.59569930449243335691f,
-(float16_t)0.79953726910790501314f,(float16_t)0.60061647938386897305f,
-(float16_t)0.79583690460888356633f,(float16_t)0.60551104140432554512f,
-(float16_t)0.79210657730021238887f,(float16_t)0.61038280627630947528f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.78455659715557524159f,(float16_t)0.62005721176328909561f,
-(float16_t)0.78073722857209448822f,(float16_t)0.62485948814238634341f,
-(float16_t)0.77688846567323244230f,(float16_t)0.62963823891492698426f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.76910333764557969882f,(float16_t)0.63912444486377573138f,
-(float16_t)0.76516726562245895860f,(float16_t)0.64383154288979138613f,
-(float16_t)0.76120238548426177871f,(float16_t)0.64851440102211244110f,
-(float16_t)0.75720884650648456748f,(float16_t)0.65317284295377675551f,
-(float16_t)0.75318679904361252042f,(float16_t)0.65780669329707863735f,
-(float16_t)0.74913639452345937020f,(float16_t)0.66241577759017178373f,
-(float16_t)0.74505778544146594733f,(float16_t)0.66699992230363747137f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.73681656887736979300f,(float16_t)0.67609270357531592310f,
-(float16_t)0.73265427167241281570f,(float16_t)0.68060099779545302212f,
-(float16_t)0.72846439044822519637f,(float16_t)0.68508366777270035541f,
-(float16_t)0.72424708295146700276f,(float16_t)0.68954054473706682948f,
-(float16_t)0.72000250796138165477f,(float16_t)0.69397146088965389055f,
-(float16_t)0.71573082528381870571f,(float16_t)0.69837624940897280457f,
-(float16_t)0.71143219574521643356f,(float16_t)0.70275474445722529993f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.70275474445722529993f,(float16_t)0.71143219574521643356f,
-(float16_t)0.69837624940897291559f,(float16_t)0.71573082528381859468f,
-(float16_t)0.69397146088965400157f,(float16_t)0.72000250796138165477f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.68508366777270035541f,(float16_t)0.72846439044822519637f,
-(float16_t)0.68060099779545302212f,(float16_t)0.73265427167241281570f,
-(float16_t)0.67609270357531603413f,(float16_t)0.73681656887736979300f,
-(float16_t)0.67155895484701833009f,(float16_t)0.74095112535495910588f,
-(float16_t)0.66699992230363747137f,(float16_t)0.74505778544146594733f,
-(float16_t)0.66241577759017178373f,(float16_t)0.74913639452345925918f,
-(float16_t)0.65780669329707874837f,(float16_t)0.75318679904361252042f,
-(float16_t)0.65317284295377686654f,(float16_t)0.75720884650648456748f,
-(float16_t)0.64851440102211255212f,(float16_t)0.76120238548426177871f,
-(float16_t)0.64383154288979149715f,(float16_t)0.76516726562245895860f,
-(float16_t)0.63912444486377573138f,(float16_t)0.76910333764557958780f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.62963823891492709528f,(float16_t)0.77688846567323244230f,
-(float16_t)0.62485948814238645443f,(float16_t)0.78073722857209448822f,
-(float16_t)0.62005721176328920663f,(float16_t)0.78455659715557524159f,
-(float16_t)0.61523159058062681925f,(float16_t)0.78834642762660622761f,
-(float16_t)0.61038280627630947528f,(float16_t)0.79210657730021227785f,
-(float16_t)0.60551104140432554512f,(float16_t)0.79583690460888345530f,
-(float16_t)0.60061647938386897305f,(float16_t)0.79953726910790501314f,
-(float16_t)0.59569930449243346793f,(float16_t)0.80320753148064483184f,
-(float16_t)0.59075970185887427544f,(float16_t)0.80684755354379922299f,
-(float16_t)0.58579785745643886408f,(float16_t)0.81045719825259476821f,
-(float16_t)0.58081395809576452649f,(float16_t)0.81403632970594830276f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.57078074588696736669f,(float16_t)0.82110251499110464835f,
-(float16_t)0.56573181078361323149f,(float16_t)0.82458930278502529099f,
-(float16_t)0.56066157619733603124f,(float16_t)0.82804504525775579626f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.55045797293660481131f,(float16_t)0.83486287498638001026f,
-(float16_t)0.54532498842204646383f,(float16_t)0.83822470555483796772f,
-(float16_t)0.54017147272989296525f,(float16_t)0.84155497743689833268f,
-(float16_t)0.53499761988709726435f,(float16_t)0.84485356524970700587f,
-(float16_t)0.52980362468629482731f,(float16_t)0.84812034480329712149f,
-(float16_t)0.52458968267846883826f,(float16_t)0.85135519310526519554f,
-(float16_t)0.51935599016558953167f,(float16_t)0.85455798836540053376f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.50883014254310698909f,(float16_t)0.86086693863776730939f,
-(float16_t)0.50353838372571757542f,(float16_t)0.86397285612158669643f,
-(float16_t)0.49822766697278186854f,(float16_t)0.86704624551569264845f,
-(float16_t)0.49289819222978409341f,(float16_t)0.87008699110871134952f,
-(float16_t)0.48755016014843605143f,(float16_t)0.87309497841829009079f,
-(float16_t)0.48218377207912282989f,(float16_t)0.87607009419540660122f,
-(float16_t)0.47679923006332225466f,(float16_t)0.87901222642863341417f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.46597649576796612569f,(float16_t)0.88479709843093778954f,
-(float16_t)0.46053871095824000514f,(float16_t)0.88763962040285393496f,
-(float16_t)0.45508358712634383592f,(float16_t)0.89044872324475787817f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.44412214457042925586f,(float16_t)0.89596624975618510689f,
-(float16_t)0.43861623853852771404f,(float16_t)0.89867446569395381673f,
-(float16_t)0.43309381885315201277f,(float16_t)0.90134884704602202810f,
-(float16_t)0.42755509343028219593f,(float16_t)0.90398929312344333820f,
-(float16_t)0.42200027079979979261f,(float16_t)0.90659570451491533483f,
-(float16_t)0.41642956009763731906f,(float16_t)0.90916798309052226923f,
-(float16_t)0.41084317105790391089f,(float16_t)0.91170603200542987832f,
-(float16_t)0.40524131400498986100f,(float16_t)0.91420975570353069095f,
-(float16_t)0.39962419984564678810f,(float16_t)0.91667905992104270485f,
-(float16_t)0.39399204006104809883f,(float16_t)0.91911385169005777040f,
-(float16_t)0.38834504669882630168f,(float16_t)0.92151403934204190183f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.37700741021641831496f,(float16_t)0.92621024213831126826f,
-(float16_t)0.37131719395183759858f,(float16_t)0.92850608047321558924f,
-(float16_t)0.36561299780477396482f,(float16_t)0.93076696107898371224f,
-(float16_t)0.35989503653498827740f,(float16_t)0.93299279883473884567f,
-(float16_t)0.35416352542049051033f,(float16_t)0.93518350993894749923f,
-(float16_t)0.34841868024943450921f,(float16_t)0.93733901191257495977f,
-(float16_t)0.34266071731199437833f,(float16_t)0.93945922360218991898f,
-(float16_t)0.33688985339222005111f,(float16_t)0.94154406518302080631f,
-(float16_t)0.33110630575987642921f,(float16_t)0.94359345816196038559f,
-(float16_t)0.32531029216226298173f,(float16_t)0.94560732538052127971f,
-(float16_t)0.31950203081601574739f,(float16_t)0.94758559101774109124f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.30784964004153497763f,(float16_t)0.95143502096900833820f,
-(float16_t)0.30200594931922819519f,(float16_t)0.95330604035419375109f,
-(float16_t)0.29615088824362395536f,(float16_t)0.95514116830577067141f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.28440753721127182141f,(float16_t)0.95870347489587159906f,
-(float16_t)0.27851968938505305973f,(float16_t)0.96043051941556578655f,
-(float16_t)0.27262135544994897662f,(float16_t)0.96212140426904158019f,
-(float16_t)0.26671275747489842090f,(float16_t)0.96377606579543984022f,
-(float16_t)0.26079411791527556952f,(float16_t)0.96539444169768939830f,
-(float16_t)0.25486565960451462720f,(float16_t)0.96697647104485207059f,
-(float16_t)0.24892760574572025956f,(float16_t)0.96852209427441726675f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.23702360599436733679f,(float16_t)0.97150389098625178352f,
-(float16_t)0.23105810828067127605f,(float16_t)0.97293995220556006576f,
-(float16_t)0.22508391135979277653f,(float16_t)0.97433938278557585821f,
-(float16_t)0.21910124015686976984f,(float16_t)0.97570213003852857003f,
-(float16_t)0.21311031991609136194f,(float16_t)0.97702814265775439484f,
-(float16_t)0.20711137619221856032f,(float16_t)0.97831737071962765473f,
-(float16_t)0.20110463484209195606f,(float16_t)0.97956976568544051887f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.18906866414980627589f,(float16_t)0.98196386910955524296f,
-(float16_t)0.18303988795514106180f,(float16_t)0.98310548743121628501f,
-(float16_t)0.17700422041214886049f,(float16_t)0.98421009238692902521f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.16491312048997008866f,(float16_t)0.98630809724459866938f,
-(float16_t)0.15885814333386139019f,(float16_t)0.98730141815785843473f,
-(float16_t)0.15279718525844340760f,(float16_t)0.98825756773074946437f,
-(float16_t)0.14673047445536174793f,(float16_t)0.98917650996478101444f,
-(float16_t)0.14065823933284923863f,(float16_t)0.99005821026229712256f,
-(float16_t)0.13458070850712622324f,(float16_t)0.99090263542778000971f,
-(float16_t)0.12849811079379322432f,(float16_t)0.99170975366909952520f,
-(float16_t)0.12241067519921627893f,(float16_t)0.99247953459870996706f,
-(float16_t)0.11631863091190487725f,(float16_t)0.99321194923479450001f,
-(float16_t)0.11022220729388318428f,(float16_t)0.99390697000235606051f,
-(float16_t)0.10412163387205472520f,(float16_t)0.99456457073425541537f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.09190895649713269611f,(float16_t)0.99576741446765981713f,
-(float16_t)0.08579731234443987997f,(float16_t)0.99631261218277800129f,
-(float16_t)0.07968243797143012563f,(float16_t)0.99682029929116566791f,
-(float16_t)0.07356456359966745406f,(float16_t)0.99729045667869020697f,
-(float16_t)0.06744391956366410645f,(float16_t)0.99772306664419163624f,
-(float16_t)0.06132073630220864768f,(float16_t)0.99811811290014917919f,
-(float16_t)0.05519524434969003135f,(float16_t)0.99847558057329477421f,
-(float16_t)0.04906767432741812596f,(float16_t)0.99879545620517240501f,
-(float16_t)0.04293825693494095902f,(float16_t)0.99907772775264536147f,
-(float16_t)0.03680722294135899131f,(float16_t)0.99932238458834954375f,
-(float16_t)0.03067480317663658085f,(float16_t)0.99952941750109314256f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)0.01840672990580482019f,(float16_t)0.99983058179582340319f,
-(float16_t)0.01227153828571994447f,(float16_t)0.99992470183914450299f,
-(float16_t)0.00613588464915451517f,(float16_t)0.99998117528260110909f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99969881869620424997f,(float16_t)0.02454122852291228812f,
-(float16_t)0.99879545620517240501f,(float16_t)0.04906767432741801493f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.99247953459870996706f,(float16_t)0.12241067519921619566f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98527764238894122162f,(float16_t)0.17096188876030121717f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.97003125319454397424f,(float16_t)0.24298017990326387094f,
-(float16_t)0.96377606579543984022f,(float16_t)0.26671275747489836538f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.94952818059303667475f,(float16_t)0.31368174039889151761f,
-(float16_t)0.94154406518302080631f,(float16_t)0.33688985339222005111f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.91420975570353069095f,(float16_t)0.40524131400498986100f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.89322430119551532446f,(float16_t)0.44961132965460653965f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.85772861000027211809f,(float16_t)0.51410274419322166128f,
-(float16_t)0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.81758481315158371139f,(float16_t)0.57580819141784533866f,
-(float16_t)0.80320753148064494287f,(float16_t)0.59569930449243335691f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.75720884650648456748f,(float16_t)0.65317284295377675551f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.72424708295146700276f,(float16_t)0.68954054473706682948f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.67155895484701833009f,(float16_t)0.74095112535495910588f,
-(float16_t)0.65317284295377686654f,(float16_t)0.75720884650648456748f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.61523159058062681925f,(float16_t)0.78834642762660622761f,
-(float16_t)0.59569930449243346793f,(float16_t)0.80320753148064483184f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.53499761988709726435f,(float16_t)0.84485356524970700587f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.49289819222978409341f,(float16_t)0.87008699110871134952f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.42755509343028219593f,(float16_t)0.90398929312344333820f,
-(float16_t)0.40524131400498986100f,(float16_t)0.91420975570353069095f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.35989503653498827740f,(float16_t)0.93299279883473884567f,
-(float16_t)0.33688985339222005111f,(float16_t)0.94154406518302080631f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.26671275747489842090f,(float16_t)0.96377606579543984022f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.21910124015686976984f,(float16_t)0.97570213003852857003f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.14673047445536174793f,(float16_t)0.98917650996478101444f,
-(float16_t)0.12241067519921627893f,(float16_t)0.99247953459870996706f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.07356456359966745406f,(float16_t)0.99729045667869020697f,
-(float16_t)0.04906767432741812596f,(float16_t)0.99879545620517240501f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0061340332031f,
+(float16_t)1.0000000000000f,(float16_t)0.0122680664062f,
+(float16_t)1.0000000000000f,(float16_t)0.0184020996094f,
+(float16_t)0.9995117187500f,(float16_t)0.0245361328125f,
+(float16_t)0.9995117187500f,(float16_t)0.0306701660156f,
+(float16_t)0.9995117187500f,(float16_t)0.0368041992188f,
+(float16_t)0.9990234375000f,(float16_t)0.0429382324219f,
+(float16_t)0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)0.9985351562500f,(float16_t)0.0552062988281f,
+(float16_t)0.9980468750000f,(float16_t)0.0613098144531f,
+(float16_t)0.9975585937500f,(float16_t)0.0674438476562f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9965820312500f,(float16_t)0.0797119140625f,
+(float16_t)0.9960937500000f,(float16_t)0.0858154296875f,
+(float16_t)0.9956054687500f,(float16_t)0.0919189453125f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9946289062500f,(float16_t)0.1041259765625f,
+(float16_t)0.9941406250000f,(float16_t)0.1102294921875f,
+(float16_t)0.9931640625000f,(float16_t)0.1163330078125f,
+(float16_t)0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)0.9916992187500f,(float16_t)0.1285400390625f,
+(float16_t)0.9907226562500f,(float16_t)0.1345214843750f,
+(float16_t)0.9902343750000f,(float16_t)0.1406250000000f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9882812500000f,(float16_t)0.1528320312500f,
+(float16_t)0.9873046875000f,(float16_t)0.1588134765625f,
+(float16_t)0.9863281250000f,(float16_t)0.1649169921875f,
+(float16_t)0.9853515625000f,(float16_t)0.1710205078125f,
+(float16_t)0.9843750000000f,(float16_t)0.1770019531250f,
+(float16_t)0.9829101562500f,(float16_t)0.1829833984375f,
+(float16_t)0.9819335937500f,(float16_t)0.1890869140625f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9794921875000f,(float16_t)0.2010498046875f,
+(float16_t)0.9785156250000f,(float16_t)0.2071533203125f,
+(float16_t)0.9770507812500f,(float16_t)0.2131347656250f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9741210937500f,(float16_t)0.2250976562500f,
+(float16_t)0.9731445312500f,(float16_t)0.2310791015625f,
+(float16_t)0.9716796875000f,(float16_t)0.2370605468750f,
+(float16_t)0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)0.9687500000000f,(float16_t)0.2489013671875f,
+(float16_t)0.9667968750000f,(float16_t)0.2548828125000f,
+(float16_t)0.9653320312500f,(float16_t)0.2607421875000f,
+(float16_t)0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)0.9619140625000f,(float16_t)0.2727050781250f,
+(float16_t)0.9604492187500f,(float16_t)0.2785644531250f,
+(float16_t)0.9584960937500f,(float16_t)0.2844238281250f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9550781250000f,(float16_t)0.2961425781250f,
+(float16_t)0.9531250000000f,(float16_t)0.3020019531250f,
+(float16_t)0.9516601562500f,(float16_t)0.3078613281250f,
+(float16_t)0.9497070312500f,(float16_t)0.3137207031250f,
+(float16_t)0.9477539062500f,(float16_t)0.3195800781250f,
+(float16_t)0.9458007812500f,(float16_t)0.3251953125000f,
+(float16_t)0.9433593750000f,(float16_t)0.3310546875000f,
+(float16_t)0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)0.9394531250000f,(float16_t)0.3427734375000f,
+(float16_t)0.9375000000000f,(float16_t)0.3483886718750f,
+(float16_t)0.9350585937500f,(float16_t)0.3542480468750f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9306640625000f,(float16_t)0.3657226562500f,
+(float16_t)0.9287109375000f,(float16_t)0.3713378906250f,
+(float16_t)0.9262695312500f,(float16_t)0.3769531250000f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.9213867187500f,(float16_t)0.3884277343750f,
+(float16_t)0.9189453125000f,(float16_t)0.3940429687500f,
+(float16_t)0.9165039062500f,(float16_t)0.3996582031250f,
+(float16_t)0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)0.9116210937500f,(float16_t)0.4108886718750f,
+(float16_t)0.9091796875000f,(float16_t)0.4165039062500f,
+(float16_t)0.9067382812500f,(float16_t)0.4221191406250f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.9013671875000f,(float16_t)0.4331054687500f,
+(float16_t)0.8984375000000f,(float16_t)0.4387207031250f,
+(float16_t)0.8959960937500f,(float16_t)0.4440917968750f,
+(float16_t)0.8930664062500f,(float16_t)0.4497070312500f,
+(float16_t)0.8906250000000f,(float16_t)0.4550781250000f,
+(float16_t)0.8876953125000f,(float16_t)0.4604492187500f,
+(float16_t)0.8847656250000f,(float16_t)0.4660644531250f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8789062500000f,(float16_t)0.4768066406250f,
+(float16_t)0.8759765625000f,(float16_t)0.4821777343750f,
+(float16_t)0.8730468750000f,(float16_t)0.4875488281250f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8671875000000f,(float16_t)0.4982910156250f,
+(float16_t)0.8637695312500f,(float16_t)0.5034179687500f,
+(float16_t)0.8608398437500f,(float16_t)0.5087890625000f,
+(float16_t)0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)0.8544921875000f,(float16_t)0.5195312500000f,
+(float16_t)0.8515625000000f,(float16_t)0.5244140625000f,
+(float16_t)0.8481445312500f,(float16_t)0.5297851562500f,
+(float16_t)0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)0.8417968750000f,(float16_t)0.5400390625000f,
+(float16_t)0.8383789062500f,(float16_t)0.5454101562500f,
+(float16_t)0.8349609375000f,(float16_t)0.5502929687500f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8281250000000f,(float16_t)0.5605468750000f,
+(float16_t)0.8247070312500f,(float16_t)0.5659179687500f,
+(float16_t)0.8212890625000f,(float16_t)0.5708007812500f,
+(float16_t)0.8173828125000f,(float16_t)0.5756835937500f,
+(float16_t)0.8139648437500f,(float16_t)0.5810546875000f,
+(float16_t)0.8105468750000f,(float16_t)0.5859375000000f,
+(float16_t)0.8066406250000f,(float16_t)0.5908203125000f,
+(float16_t)0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)0.7993164062500f,(float16_t)0.6005859375000f,
+(float16_t)0.7958984375000f,(float16_t)0.6054687500000f,
+(float16_t)0.7919921875000f,(float16_t)0.6103515625000f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7846679687500f,(float16_t)0.6201171875000f,
+(float16_t)0.7807617187500f,(float16_t)0.6250000000000f,
+(float16_t)0.7768554687500f,(float16_t)0.6293945312500f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7690429687500f,(float16_t)0.6391601562500f,
+(float16_t)0.7651367187500f,(float16_t)0.6440429687500f,
+(float16_t)0.7612304687500f,(float16_t)0.6484375000000f,
+(float16_t)0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)0.7534179687500f,(float16_t)0.6577148437500f,
+(float16_t)0.7490234375000f,(float16_t)0.6625976562500f,
+(float16_t)0.7451171875000f,(float16_t)0.6669921875000f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7368164062500f,(float16_t)0.6762695312500f,
+(float16_t)0.7324218750000f,(float16_t)0.6806640625000f,
+(float16_t)0.7285156250000f,(float16_t)0.6850585937500f,
+(float16_t)0.7241210937500f,(float16_t)0.6894531250000f,
+(float16_t)0.7202148437500f,(float16_t)0.6938476562500f,
+(float16_t)0.7158203125000f,(float16_t)0.6982421875000f,
+(float16_t)0.7114257812500f,(float16_t)0.7026367187500f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.7026367187500f,(float16_t)0.7114257812500f,
+(float16_t)0.6982421875000f,(float16_t)0.7158203125000f,
+(float16_t)0.6938476562500f,(float16_t)0.7202148437500f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6850585937500f,(float16_t)0.7285156250000f,
+(float16_t)0.6806640625000f,(float16_t)0.7324218750000f,
+(float16_t)0.6762695312500f,(float16_t)0.7368164062500f,
+(float16_t)0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)0.6669921875000f,(float16_t)0.7451171875000f,
+(float16_t)0.6625976562500f,(float16_t)0.7490234375000f,
+(float16_t)0.6577148437500f,(float16_t)0.7534179687500f,
+(float16_t)0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)0.6484375000000f,(float16_t)0.7612304687500f,
+(float16_t)0.6440429687500f,(float16_t)0.7651367187500f,
+(float16_t)0.6391601562500f,(float16_t)0.7690429687500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.6293945312500f,(float16_t)0.7768554687500f,
+(float16_t)0.6250000000000f,(float16_t)0.7807617187500f,
+(float16_t)0.6201171875000f,(float16_t)0.7846679687500f,
+(float16_t)0.6152343750000f,(float16_t)0.7885742187500f,
+(float16_t)0.6103515625000f,(float16_t)0.7919921875000f,
+(float16_t)0.6054687500000f,(float16_t)0.7958984375000f,
+(float16_t)0.6005859375000f,(float16_t)0.7993164062500f,
+(float16_t)0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)0.5908203125000f,(float16_t)0.8066406250000f,
+(float16_t)0.5859375000000f,(float16_t)0.8105468750000f,
+(float16_t)0.5810546875000f,(float16_t)0.8139648437500f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5708007812500f,(float16_t)0.8212890625000f,
+(float16_t)0.5659179687500f,(float16_t)0.8247070312500f,
+(float16_t)0.5605468750000f,(float16_t)0.8281250000000f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.5502929687500f,(float16_t)0.8349609375000f,
+(float16_t)0.5454101562500f,(float16_t)0.8383789062500f,
+(float16_t)0.5400390625000f,(float16_t)0.8417968750000f,
+(float16_t)0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)0.5297851562500f,(float16_t)0.8481445312500f,
+(float16_t)0.5244140625000f,(float16_t)0.8515625000000f,
+(float16_t)0.5195312500000f,(float16_t)0.8544921875000f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.5087890625000f,(float16_t)0.8608398437500f,
+(float16_t)0.5034179687500f,(float16_t)0.8637695312500f,
+(float16_t)0.4982910156250f,(float16_t)0.8671875000000f,
+(float16_t)0.4929199218750f,(float16_t)0.8701171875000f,
+(float16_t)0.4875488281250f,(float16_t)0.8730468750000f,
+(float16_t)0.4821777343750f,(float16_t)0.8759765625000f,
+(float16_t)0.4768066406250f,(float16_t)0.8789062500000f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.4660644531250f,(float16_t)0.8847656250000f,
+(float16_t)0.4604492187500f,(float16_t)0.8876953125000f,
+(float16_t)0.4550781250000f,(float16_t)0.8906250000000f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.4440917968750f,(float16_t)0.8959960937500f,
+(float16_t)0.4387207031250f,(float16_t)0.8984375000000f,
+(float16_t)0.4331054687500f,(float16_t)0.9013671875000f,
+(float16_t)0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)0.4221191406250f,(float16_t)0.9067382812500f,
+(float16_t)0.4165039062500f,(float16_t)0.9091796875000f,
+(float16_t)0.4108886718750f,(float16_t)0.9116210937500f,
+(float16_t)0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)0.3996582031250f,(float16_t)0.9165039062500f,
+(float16_t)0.3940429687500f,(float16_t)0.9189453125000f,
+(float16_t)0.3884277343750f,(float16_t)0.9213867187500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3769531250000f,(float16_t)0.9262695312500f,
+(float16_t)0.3713378906250f,(float16_t)0.9287109375000f,
+(float16_t)0.3657226562500f,(float16_t)0.9306640625000f,
+(float16_t)0.3598632812500f,(float16_t)0.9331054687500f,
+(float16_t)0.3542480468750f,(float16_t)0.9350585937500f,
+(float16_t)0.3483886718750f,(float16_t)0.9375000000000f,
+(float16_t)0.3427734375000f,(float16_t)0.9394531250000f,
+(float16_t)0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)0.3310546875000f,(float16_t)0.9433593750000f,
+(float16_t)0.3251953125000f,(float16_t)0.9458007812500f,
+(float16_t)0.3195800781250f,(float16_t)0.9477539062500f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.3078613281250f,(float16_t)0.9516601562500f,
+(float16_t)0.3020019531250f,(float16_t)0.9531250000000f,
+(float16_t)0.2961425781250f,(float16_t)0.9550781250000f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.2844238281250f,(float16_t)0.9584960937500f,
+(float16_t)0.2785644531250f,(float16_t)0.9604492187500f,
+(float16_t)0.2727050781250f,(float16_t)0.9619140625000f,
+(float16_t)0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)0.2607421875000f,(float16_t)0.9653320312500f,
+(float16_t)0.2548828125000f,(float16_t)0.9667968750000f,
+(float16_t)0.2489013671875f,(float16_t)0.9687500000000f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.2370605468750f,(float16_t)0.9716796875000f,
+(float16_t)0.2310791015625f,(float16_t)0.9731445312500f,
+(float16_t)0.2250976562500f,(float16_t)0.9741210937500f,
+(float16_t)0.2191162109375f,(float16_t)0.9755859375000f,
+(float16_t)0.2131347656250f,(float16_t)0.9770507812500f,
+(float16_t)0.2071533203125f,(float16_t)0.9785156250000f,
+(float16_t)0.2010498046875f,(float16_t)0.9794921875000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.1890869140625f,(float16_t)0.9819335937500f,
+(float16_t)0.1829833984375f,(float16_t)0.9829101562500f,
+(float16_t)0.1770019531250f,(float16_t)0.9843750000000f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.1649169921875f,(float16_t)0.9863281250000f,
+(float16_t)0.1588134765625f,(float16_t)0.9873046875000f,
+(float16_t)0.1528320312500f,(float16_t)0.9882812500000f,
+(float16_t)0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)0.1406250000000f,(float16_t)0.9902343750000f,
+(float16_t)0.1345214843750f,(float16_t)0.9907226562500f,
+(float16_t)0.1285400390625f,(float16_t)0.9916992187500f,
+(float16_t)0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)0.1163330078125f,(float16_t)0.9931640625000f,
+(float16_t)0.1102294921875f,(float16_t)0.9941406250000f,
+(float16_t)0.1041259765625f,(float16_t)0.9946289062500f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0919189453125f,(float16_t)0.9956054687500f,
+(float16_t)0.0858154296875f,(float16_t)0.9960937500000f,
+(float16_t)0.0797119140625f,(float16_t)0.9965820312500f,
+(float16_t)0.0735473632812f,(float16_t)0.9970703125000f,
+(float16_t)0.0674438476562f,(float16_t)0.9975585937500f,
+(float16_t)0.0613098144531f,(float16_t)0.9980468750000f,
+(float16_t)0.0552062988281f,(float16_t)0.9985351562500f,
+(float16_t)0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)0.0429382324219f,(float16_t)0.9990234375000f,
+(float16_t)0.0368041992188f,(float16_t)0.9995117187500f,
+(float16_t)0.0306701660156f,(float16_t)0.9995117187500f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)0.0184020996094f,(float16_t)1.0000000000000f,
+(float16_t)0.0122680664062f,(float16_t)1.0000000000000f,
+(float16_t)0.0061340332031f,(float16_t)1.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9995117187500f,(float16_t)0.0245361328125f,
+(float16_t)0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9853515625000f,(float16_t)0.1710205078125f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9497070312500f,(float16_t)0.3137207031250f,
+(float16_t)0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.8930664062500f,(float16_t)0.4497070312500f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8173828125000f,(float16_t)0.5756835937500f,
+(float16_t)0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7241210937500f,(float16_t)0.6894531250000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.6152343750000f,(float16_t)0.7885742187500f,
+(float16_t)0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.4929199218750f,(float16_t)0.8701171875000f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3598632812500f,(float16_t)0.9331054687500f,
+(float16_t)0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.2191162109375f,(float16_t)0.9755859375000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0735473632812f,(float16_t)0.9970703125000f,
+(float16_t)0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,};
 
 float16_t rearranged_twiddle_stride2_1024_f16[680]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99992470183914450299f,(float16_t)0.01227153828571992539f,
-(float16_t)0.99969881869620424997f,(float16_t)0.02454122852291228812f,
-(float16_t)0.99932238458834954375f,(float16_t)0.03680722294135883171f,
-(float16_t)0.99879545620517240501f,(float16_t)0.04906767432741801493f,
-(float16_t)0.99811811290014917919f,(float16_t)0.06132073630220857829f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.99631261218277800129f,(float16_t)0.08579731234443989385f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.99390697000235606051f,(float16_t)0.11022220729388305938f,
-(float16_t)0.99247953459870996706f,(float16_t)0.12241067519921619566f,
-(float16_t)0.99090263542778000971f,(float16_t)0.13458070850712616773f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98730141815785843473f,(float16_t)0.15885814333386144570f,
-(float16_t)0.98527764238894122162f,(float16_t)0.17096188876030121717f,
-(float16_t)0.98310548743121628501f,(float16_t)0.18303988795514095078f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.97831737071962765473f,(float16_t)0.20711137619221856032f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.97293995220556017678f,(float16_t)0.23105810828067110951f,
-(float16_t)0.97003125319454397424f,(float16_t)0.24298017990326387094f,
-(float16_t)0.96697647104485207059f,(float16_t)0.25486565960451457169f,
-(float16_t)0.96377606579543984022f,(float16_t)0.26671275747489836538f,
-(float16_t)0.96043051941556578655f,(float16_t)0.27851968938505305973f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.95330604035419386211f,(float16_t)0.30200594931922808417f,
-(float16_t)0.94952818059303667475f,(float16_t)0.31368174039889151761f,
-(float16_t)0.94560732538052127971f,(float16_t)0.32531029216226292622f,
-(float16_t)0.94154406518302080631f,(float16_t)0.33688985339222005111f,
-(float16_t)0.93733901191257495977f,(float16_t)0.34841868024943456472f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.92850608047321558924f,(float16_t)0.37131719395183754306f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.91911385169005777040f,(float16_t)0.39399204006104809883f,
-(float16_t)0.91420975570353069095f,(float16_t)0.40524131400498986100f,
-(float16_t)0.90916798309052238025f,(float16_t)0.41642956009763715253f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.89867446569395381673f,(float16_t)0.43861623853852765853f,
-(float16_t)0.89322430119551532446f,(float16_t)0.44961132965460653965f,
-(float16_t)0.88763962040285393496f,(float16_t)0.46053871095824000514f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.87607009419540660122f,(float16_t)0.48218377207912271887f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.86397285612158669643f,(float16_t)0.50353838372571757542f,
-(float16_t)0.85772861000027211809f,(float16_t)0.51410274419322166128f,
-(float16_t)0.85135519310526519554f,(float16_t)0.52458968267846894928f,
-(float16_t)0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)0.83822470555483807875f,(float16_t)0.54532498842204646383f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.82458930278502529099f,(float16_t)0.56573181078361312046f,
-(float16_t)0.81758481315158371139f,(float16_t)0.57580819141784533866f,
-(float16_t)0.81045719825259476821f,(float16_t)0.58579785745643886408f,
-(float16_t)0.80320753148064494287f,(float16_t)0.59569930449243335691f,
-(float16_t)0.79583690460888356633f,(float16_t)0.60551104140432554512f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.78073722857209448822f,(float16_t)0.62485948814238634341f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.76516726562245895860f,(float16_t)0.64383154288979138613f,
-(float16_t)0.75720884650648456748f,(float16_t)0.65317284295377675551f,
-(float16_t)0.74913639452345937020f,(float16_t)0.66241577759017178373f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.73265427167241281570f,(float16_t)0.68060099779545302212f,
-(float16_t)0.72424708295146700276f,(float16_t)0.68954054473706682948f,
-(float16_t)0.71573082528381870571f,(float16_t)0.69837624940897280457f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.69837624940897291559f,(float16_t)0.71573082528381859468f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.68060099779545302212f,(float16_t)0.73265427167241281570f,
-(float16_t)0.67155895484701833009f,(float16_t)0.74095112535495910588f,
-(float16_t)0.66241577759017178373f,(float16_t)0.74913639452345925918f,
-(float16_t)0.65317284295377686654f,(float16_t)0.75720884650648456748f,
-(float16_t)0.64383154288979149715f,(float16_t)0.76516726562245895860f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.62485948814238645443f,(float16_t)0.78073722857209448822f,
-(float16_t)0.61523159058062681925f,(float16_t)0.78834642762660622761f,
-(float16_t)0.60551104140432554512f,(float16_t)0.79583690460888345530f,
-(float16_t)0.59569930449243346793f,(float16_t)0.80320753148064483184f,
-(float16_t)0.58579785745643886408f,(float16_t)0.81045719825259476821f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.56573181078361323149f,(float16_t)0.82458930278502529099f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.54532498842204646383f,(float16_t)0.83822470555483796772f,
-(float16_t)0.53499761988709726435f,(float16_t)0.84485356524970700587f,
-(float16_t)0.52458968267846883826f,(float16_t)0.85135519310526519554f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.50353838372571757542f,(float16_t)0.86397285612158669643f,
-(float16_t)0.49289819222978409341f,(float16_t)0.87008699110871134952f,
-(float16_t)0.48218377207912282989f,(float16_t)0.87607009419540660122f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.46053871095824000514f,(float16_t)0.88763962040285393496f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.43861623853852771404f,(float16_t)0.89867446569395381673f,
-(float16_t)0.42755509343028219593f,(float16_t)0.90398929312344333820f,
-(float16_t)0.41642956009763731906f,(float16_t)0.90916798309052226923f,
-(float16_t)0.40524131400498986100f,(float16_t)0.91420975570353069095f,
-(float16_t)0.39399204006104809883f,(float16_t)0.91911385169005777040f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.37131719395183759858f,(float16_t)0.92850608047321558924f,
-(float16_t)0.35989503653498827740f,(float16_t)0.93299279883473884567f,
-(float16_t)0.34841868024943450921f,(float16_t)0.93733901191257495977f,
-(float16_t)0.33688985339222005111f,(float16_t)0.94154406518302080631f,
-(float16_t)0.32531029216226298173f,(float16_t)0.94560732538052127971f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.30200594931922819519f,(float16_t)0.95330604035419375109f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.27851968938505305973f,(float16_t)0.96043051941556578655f,
-(float16_t)0.26671275747489842090f,(float16_t)0.96377606579543984022f,
-(float16_t)0.25486565960451462720f,(float16_t)0.96697647104485207059f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.23105810828067127605f,(float16_t)0.97293995220556006576f,
-(float16_t)0.21910124015686976984f,(float16_t)0.97570213003852857003f,
-(float16_t)0.20711137619221856032f,(float16_t)0.97831737071962765473f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.18303988795514106180f,(float16_t)0.98310548743121628501f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.15885814333386139019f,(float16_t)0.98730141815785843473f,
-(float16_t)0.14673047445536174793f,(float16_t)0.98917650996478101444f,
-(float16_t)0.13458070850712622324f,(float16_t)0.99090263542778000971f,
-(float16_t)0.12241067519921627893f,(float16_t)0.99247953459870996706f,
-(float16_t)0.11022220729388318428f,(float16_t)0.99390697000235606051f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.08579731234443987997f,(float16_t)0.99631261218277800129f,
-(float16_t)0.07356456359966745406f,(float16_t)0.99729045667869020697f,
-(float16_t)0.06132073630220864768f,(float16_t)0.99811811290014917919f,
-(float16_t)0.04906767432741812596f,(float16_t)0.99879545620517240501f,
-(float16_t)0.03680722294135899131f,(float16_t)0.99932238458834954375f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)0.01227153828571994447f,(float16_t)0.99992470183914450299f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.01227153828571982304f,(float16_t)0.99992470183914450299f,
-(float16_t)-0.02454122852291214241f,(float16_t)0.99969881869620424997f,
-(float16_t)-0.03680722294135886641f,(float16_t)0.99932238458834954375f,
-(float16_t)-0.04906767432741800800f,(float16_t)0.99879545620517240501f,
-(float16_t)-0.06132073630220852972f,(float16_t)0.99811811290014917919f,
-(float16_t)-0.07356456359966732916f,(float16_t)0.99729045667869020697f,
-(float16_t)-0.08579731234443975507f,(float16_t)0.99631261218277800129f,
-(float16_t)-0.09801714032956064526f,(float16_t)0.99518472667219692873f,
-(float16_t)-0.11022220729388305938f,(float16_t)0.99390697000235606051f,
-(float16_t)-0.12241067519921615403f,(float16_t)0.99247953459870996706f,
-(float16_t)-0.13458070850712611222f,(float16_t)0.99090263542778000971f,
-(float16_t)-0.14673047445536163691f,(float16_t)0.98917650996478101444f,
-(float16_t)-0.15885814333386127917f,(float16_t)0.98730141815785843473f,
-(float16_t)-0.17096188876030124493f,(float16_t)0.98527764238894122162f,
-(float16_t)-0.18303988795514092303f,(float16_t)0.98310548743121628501f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.20711137619221844930f,(float16_t)0.97831737071962765473f,
-(float16_t)-0.21910124015686965881f,(float16_t)0.97570213003852857003f,
-(float16_t)-0.23105810828067113727f,(float16_t)0.97293995220556017678f,
-(float16_t)-0.24298017990326387094f,(float16_t)0.97003125319454397424f,
-(float16_t)-0.25486565960451451618f,(float16_t)0.96697647104485207059f,
-(float16_t)-0.26671275747489830987f,(float16_t)0.96377606579543984022f,
-(float16_t)-0.27851968938505294870f,(float16_t)0.96043051941556589757f,
-(float16_t)-0.29028467725446216452f,(float16_t)0.95694033573220893540f,
-(float16_t)-0.30200594931922808417f,(float16_t)0.95330604035419386211f,
-(float16_t)-0.31368174039889140658f,(float16_t)0.94952818059303667475f,
-(float16_t)-0.32531029216226287071f,(float16_t)0.94560732538052139073f,
-(float16_t)-0.33688985339221994009f,(float16_t)0.94154406518302080631f,
-(float16_t)-0.34841868024943439819f,(float16_t)0.93733901191257495977f,
-(float16_t)-0.35989503653498816638f,(float16_t)0.93299279883473884567f,
-(float16_t)-0.37131719395183748755f,(float16_t)0.92850608047321558924f,
-(float16_t)-0.38268343236508972627f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.39399204006104798781f,(float16_t)0.91911385169005777040f,
-(float16_t)-0.40524131400498974998f,(float16_t)0.91420975570353069095f,
-(float16_t)-0.41642956009763698599f,(float16_t)0.90916798309052249127f,
-(float16_t)-0.42755509343028186287f,(float16_t)0.90398929312344344922f,
-(float16_t)-0.43861623853852738097f,(float16_t)0.89867446569395392775f,
-(float16_t)-0.44961132965460670619f,(float16_t)0.89322430119551521344f,
-(float16_t)-0.46053871095824006066f,(float16_t)0.88763962040285393496f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.48218377207912271887f,(float16_t)0.87607009419540660122f,
-(float16_t)-0.49289819222978398239f,(float16_t)0.87008699110871146054f,
-(float16_t)-0.50353838372571746440f,(float16_t)0.86397285612158680745f,
-(float16_t)-0.51410274419322155026f,(float16_t)0.85772861000027211809f,
-(float16_t)-0.52458968267846872724f,(float16_t)0.85135519310526519554f,
-(float16_t)-0.53499761988709704230f,(float16_t)0.84485356524970722791f,
-(float16_t)-0.54532498842204624179f,(float16_t)0.83822470555483818977f,
-(float16_t)-0.55557023301960195560f,(float16_t)0.83146961230254534669f,
-(float16_t)-0.56573181078361323149f,(float16_t)0.82458930278502517996f,
-(float16_t)-0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)-0.58579785745643886408f,(float16_t)0.81045719825259476821f,
-(float16_t)-0.59569930449243335691f,(float16_t)0.80320753148064494287f,
-(float16_t)-0.60551104140432543410f,(float16_t)0.79583690460888356633f,
-(float16_t)-0.61523159058062670823f,(float16_t)0.78834642762660633863f,
-(float16_t)-0.62485948814238623239f,(float16_t)0.78073722857209459924f,
-(float16_t)-0.63439328416364537677f,(float16_t)0.77301045336273710440f,
-(float16_t)-0.64383154288979127511f,(float16_t)0.76516726562245906962f,
-(float16_t)-0.65317284295377653347f,(float16_t)0.75720884650648467851f,
-(float16_t)-0.66241577759017189475f,(float16_t)0.74913639452345925918f,
-(float16_t)-0.67155895484701844111f,(float16_t)0.74095112535495899486f,
-(float16_t)-0.68060099779545302212f,(float16_t)0.73265427167241281570f,
-(float16_t)-0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)-0.69837624940897280457f,(float16_t)0.71573082528381870571f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.71573082528381859468f,(float16_t)0.69837624940897291559f,
-(float16_t)-0.72424708295146678072f,(float16_t)0.68954054473706705153f,
-(float16_t)-0.73265427167241270467f,(float16_t)0.68060099779545324417f,
-(float16_t)-0.74095112535495888384f,(float16_t)0.67155895484701855214f,
-(float16_t)-0.74913639452345914815f,(float16_t)0.66241577759017200577f,
-(float16_t)-0.75720884650648467851f,(float16_t)0.65317284295377664449f,
-(float16_t)-0.76516726562245895860f,(float16_t)0.64383154288979138613f,
-(float16_t)-0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)-0.78073722857209448822f,(float16_t)0.62485948814238634341f,
-(float16_t)-0.78834642762660622761f,(float16_t)0.61523159058062693028f,
-(float16_t)-0.79583690460888345530f,(float16_t)0.60551104140432565615f,
-(float16_t)-0.80320753148064483184f,(float16_t)0.59569930449243346793f,
-(float16_t)-0.81045719825259465718f,(float16_t)0.58579785745643897510f,
-(float16_t)-0.81758481315158360037f,(float16_t)0.57580819141784544968f,
-(float16_t)-0.82458930278502506894f,(float16_t)0.56573181078361345353f,
-(float16_t)-0.83146961230254534669f,(float16_t)0.55557023301960217765f,
-(float16_t)-0.83822470555483807875f,(float16_t)0.54532498842204635281f,
-(float16_t)-0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)-0.85135519310526519554f,(float16_t)0.52458968267846894928f,
-(float16_t)-0.85772861000027200706f,(float16_t)0.51410274419322177231f,
-(float16_t)-0.86397285612158669643f,(float16_t)0.50353838372571757542f,
-(float16_t)-0.87008699110871134952f,(float16_t)0.49289819222978414892f,
-(float16_t)-0.87607009419540649020f,(float16_t)0.48218377207912288540f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.88763962040285382393f,(float16_t)0.46053871095824022719f,
-(float16_t)-0.89322430119551521344f,(float16_t)0.44961132965460687272f,
-(float16_t)-0.89867446569395392775f,(float16_t)0.43861623853852754751f,
-(float16_t)-0.90398929312344333820f,(float16_t)0.42755509343028202940f,
-(float16_t)-0.90916798309052238025f,(float16_t)0.41642956009763715253f,
-(float16_t)-0.91420975570353069095f,(float16_t)0.40524131400498991651f,
-(float16_t)-0.91911385169005777040f,(float16_t)0.39399204006104815434f,
-(float16_t)-0.92387953251128673848f,(float16_t)0.38268343236508989280f,
-(float16_t)-0.92850608047321547822f,(float16_t)0.37131719395183770960f,
-(float16_t)-0.93299279883473884567f,(float16_t)0.35989503653498833291f,
-(float16_t)-0.93733901191257484875f,(float16_t)0.34841868024943478677f,
-(float16_t)-0.94154406518302069529f,(float16_t)0.33688985339222032867f,
-(float16_t)-0.94560732538052116869f,(float16_t)0.32531029216226325929f,
-(float16_t)-0.94952818059303667475f,(float16_t)0.31368174039889140658f,
-(float16_t)-0.95330604035419386211f,(float16_t)0.30200594931922802866f,
-(float16_t)-0.95694033573220882438f,(float16_t)0.29028467725446238656f,
-(float16_t)-0.96043051941556578655f,(float16_t)0.27851968938505317075f,
-(float16_t)-0.96377606579543984022f,(float16_t)0.26671275747489847641f,
-(float16_t)-0.96697647104485207059f,(float16_t)0.25486565960451468271f,
-(float16_t)-0.97003125319454397424f,(float16_t)0.24298017990326406523f,
-(float16_t)-0.97293995220556006576f,(float16_t)0.23105810828067133156f,
-(float16_t)-0.97570213003852845901f,(float16_t)0.21910124015687004739f,
-(float16_t)-0.97831737071962754371f,(float16_t)0.20711137619221883788f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.98310548743121628501f,(float16_t)0.18303988795514089527f,
-(float16_t)-0.98527764238894122162f,(float16_t)0.17096188876030121717f,
-(float16_t)-0.98730141815785843473f,(float16_t)0.15885814333386147346f,
-(float16_t)-0.98917650996478101444f,(float16_t)0.14673047445536180344f,
-(float16_t)-0.99090263542778000971f,(float16_t)0.13458070850712627875f,
-(float16_t)-0.99247953459870996706f,(float16_t)0.12241067519921634832f,
-(float16_t)-0.99390697000235606051f,(float16_t)0.11022220729388323979f,
-(float16_t)-0.99518472667219681771f,(float16_t)0.09801714032956082567f,
-(float16_t)-0.99631261218277800129f,(float16_t)0.08579731234444015753f,
-(float16_t)-0.99729045667869020697f,(float16_t)0.07356456359966773162f,
-(float16_t)-0.99811811290014917919f,(float16_t)0.06132073630220848809f,
-(float16_t)-0.99879545620517240501f,(float16_t)0.04906767432741796636f,
-(float16_t)-0.99932238458834954375f,(float16_t)0.03680722294135883171f,
-(float16_t)-0.99969881869620424997f,(float16_t)0.02454122852291232629f,
-(float16_t)-0.99992470183914450299f,(float16_t)0.01227153828572000692f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99879545620517240501f,(float16_t)0.04906767432741801493f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.97003125319454397424f,(float16_t)0.24298017990326387094f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.94154406518302080631f,(float16_t)0.33688985339222005111f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.85772861000027211809f,(float16_t)0.51410274419322166128f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.80320753148064494287f,(float16_t)0.59569930449243335691f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.67155895484701833009f,(float16_t)0.74095112535495910588f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.59569930449243346793f,(float16_t)0.80320753148064483184f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.42755509343028219593f,(float16_t)0.90398929312344333820f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.33688985339222005111f,(float16_t)0.94154406518302080631f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.14673047445536174793f,(float16_t)0.98917650996478101444f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.04906767432741812596f,(float16_t)0.99879545620517240501f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.04906767432741800800f,(float16_t)0.99879545620517240501f,
-(float16_t)-0.09801714032956064526f,(float16_t)0.99518472667219692873f,
-(float16_t)-0.14673047445536163691f,(float16_t)0.98917650996478101444f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.24298017990326387094f,(float16_t)0.97003125319454397424f,
-(float16_t)-0.29028467725446216452f,(float16_t)0.95694033573220893540f,
-(float16_t)-0.33688985339221994009f,(float16_t)0.94154406518302080631f,
-(float16_t)-0.38268343236508972627f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.42755509343028186287f,(float16_t)0.90398929312344344922f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.51410274419322155026f,(float16_t)0.85772861000027211809f,
-(float16_t)-0.55557023301960195560f,(float16_t)0.83146961230254534669f,
-(float16_t)-0.59569930449243335691f,(float16_t)0.80320753148064494287f,
-(float16_t)-0.63439328416364537677f,(float16_t)0.77301045336273710440f,
-(float16_t)-0.67155895484701844111f,(float16_t)0.74095112535495899486f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.74095112535495888384f,(float16_t)0.67155895484701855214f,
-(float16_t)-0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)-0.80320753148064483184f,(float16_t)0.59569930449243346793f,
-(float16_t)-0.83146961230254534669f,(float16_t)0.55557023301960217765f,
-(float16_t)-0.85772861000027200706f,(float16_t)0.51410274419322177231f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.90398929312344333820f,(float16_t)0.42755509343028202940f,
-(float16_t)-0.92387953251128673848f,(float16_t)0.38268343236508989280f,
-(float16_t)-0.94154406518302069529f,(float16_t)0.33688985339222032867f,
-(float16_t)-0.95694033573220882438f,(float16_t)0.29028467725446238656f,
-(float16_t)-0.97003125319454397424f,(float16_t)0.24298017990326406523f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.98917650996478101444f,(float16_t)0.14673047445536180344f,
-(float16_t)-0.99518472667219681771f,(float16_t)0.09801714032956082567f,
-(float16_t)-0.99879545620517240501f,(float16_t)0.04906767432741796636f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.38268343236508972627f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.55557023301960195560f,(float16_t)0.83146961230254534669f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.83146961230254534669f,(float16_t)0.55557023301960217765f,
-(float16_t)-0.92387953251128673848f,(float16_t)0.38268343236508989280f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0122680664062f,
+(float16_t)0.9995117187500f,(float16_t)0.0245361328125f,
+(float16_t)0.9995117187500f,(float16_t)0.0368041992188f,
+(float16_t)0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)0.9980468750000f,(float16_t)0.0613098144531f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9960937500000f,(float16_t)0.0858154296875f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9941406250000f,(float16_t)0.1102294921875f,
+(float16_t)0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)0.9907226562500f,(float16_t)0.1345214843750f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9873046875000f,(float16_t)0.1588134765625f,
+(float16_t)0.9853515625000f,(float16_t)0.1710205078125f,
+(float16_t)0.9829101562500f,(float16_t)0.1829833984375f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9785156250000f,(float16_t)0.2071533203125f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9731445312500f,(float16_t)0.2310791015625f,
+(float16_t)0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)0.9667968750000f,(float16_t)0.2548828125000f,
+(float16_t)0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)0.9604492187500f,(float16_t)0.2785644531250f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9531250000000f,(float16_t)0.3020019531250f,
+(float16_t)0.9497070312500f,(float16_t)0.3137207031250f,
+(float16_t)0.9458007812500f,(float16_t)0.3251953125000f,
+(float16_t)0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)0.9375000000000f,(float16_t)0.3483886718750f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9287109375000f,(float16_t)0.3713378906250f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.9189453125000f,(float16_t)0.3940429687500f,
+(float16_t)0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)0.9091796875000f,(float16_t)0.4165039062500f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.8984375000000f,(float16_t)0.4387207031250f,
+(float16_t)0.8930664062500f,(float16_t)0.4497070312500f,
+(float16_t)0.8876953125000f,(float16_t)0.4604492187500f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8759765625000f,(float16_t)0.4821777343750f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8637695312500f,(float16_t)0.5034179687500f,
+(float16_t)0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)0.8515625000000f,(float16_t)0.5244140625000f,
+(float16_t)0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)0.8383789062500f,(float16_t)0.5454101562500f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8247070312500f,(float16_t)0.5659179687500f,
+(float16_t)0.8173828125000f,(float16_t)0.5756835937500f,
+(float16_t)0.8105468750000f,(float16_t)0.5859375000000f,
+(float16_t)0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)0.7958984375000f,(float16_t)0.6054687500000f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7807617187500f,(float16_t)0.6250000000000f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7651367187500f,(float16_t)0.6440429687500f,
+(float16_t)0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)0.7490234375000f,(float16_t)0.6625976562500f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7324218750000f,(float16_t)0.6806640625000f,
+(float16_t)0.7241210937500f,(float16_t)0.6894531250000f,
+(float16_t)0.7158203125000f,(float16_t)0.6982421875000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.6982421875000f,(float16_t)0.7158203125000f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6806640625000f,(float16_t)0.7324218750000f,
+(float16_t)0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)0.6625976562500f,(float16_t)0.7490234375000f,
+(float16_t)0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)0.6440429687500f,(float16_t)0.7651367187500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.6250000000000f,(float16_t)0.7807617187500f,
+(float16_t)0.6152343750000f,(float16_t)0.7885742187500f,
+(float16_t)0.6054687500000f,(float16_t)0.7958984375000f,
+(float16_t)0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)0.5859375000000f,(float16_t)0.8105468750000f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5659179687500f,(float16_t)0.8247070312500f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.5454101562500f,(float16_t)0.8383789062500f,
+(float16_t)0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)0.5244140625000f,(float16_t)0.8515625000000f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.5034179687500f,(float16_t)0.8637695312500f,
+(float16_t)0.4929199218750f,(float16_t)0.8701171875000f,
+(float16_t)0.4821777343750f,(float16_t)0.8759765625000f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.4604492187500f,(float16_t)0.8876953125000f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.4387207031250f,(float16_t)0.8984375000000f,
+(float16_t)0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)0.4165039062500f,(float16_t)0.9091796875000f,
+(float16_t)0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)0.3940429687500f,(float16_t)0.9189453125000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3713378906250f,(float16_t)0.9287109375000f,
+(float16_t)0.3598632812500f,(float16_t)0.9331054687500f,
+(float16_t)0.3483886718750f,(float16_t)0.9375000000000f,
+(float16_t)0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)0.3251953125000f,(float16_t)0.9458007812500f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.3020019531250f,(float16_t)0.9531250000000f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.2785644531250f,(float16_t)0.9604492187500f,
+(float16_t)0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)0.2548828125000f,(float16_t)0.9667968750000f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.2310791015625f,(float16_t)0.9731445312500f,
+(float16_t)0.2191162109375f,(float16_t)0.9755859375000f,
+(float16_t)0.2071533203125f,(float16_t)0.9785156250000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.1829833984375f,(float16_t)0.9829101562500f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.1588134765625f,(float16_t)0.9873046875000f,
+(float16_t)0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)0.1345214843750f,(float16_t)0.9907226562500f,
+(float16_t)0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)0.1102294921875f,(float16_t)0.9941406250000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0858154296875f,(float16_t)0.9960937500000f,
+(float16_t)0.0735473632812f,(float16_t)0.9970703125000f,
+(float16_t)0.0613098144531f,(float16_t)0.9980468750000f,
+(float16_t)0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)0.0368041992188f,(float16_t)0.9995117187500f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)0.0122680664062f,(float16_t)1.0000000000000f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.0122680664062f,(float16_t)1.0000000000000f,
+(float16_t)-0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)-0.0368041992188f,(float16_t)0.9995117187500f,
+(float16_t)-0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)-0.0613098144531f,(float16_t)0.9980468750000f,
+(float16_t)-0.0735473632812f,(float16_t)0.9970703125000f,
+(float16_t)-0.0858154296875f,(float16_t)0.9960937500000f,
+(float16_t)-0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)-0.1102294921875f,(float16_t)0.9941406250000f,
+(float16_t)-0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)-0.1345214843750f,(float16_t)0.9907226562500f,
+(float16_t)-0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)-0.1588134765625f,(float16_t)0.9873046875000f,
+(float16_t)-0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)-0.1829833984375f,(float16_t)0.9829101562500f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.2071533203125f,(float16_t)0.9785156250000f,
+(float16_t)-0.2191162109375f,(float16_t)0.9755859375000f,
+(float16_t)-0.2310791015625f,(float16_t)0.9731445312500f,
+(float16_t)-0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)-0.2548828125000f,(float16_t)0.9667968750000f,
+(float16_t)-0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)-0.2785644531250f,(float16_t)0.9604492187500f,
+(float16_t)-0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)-0.3020019531250f,(float16_t)0.9531250000000f,
+(float16_t)-0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)-0.3251953125000f,(float16_t)0.9458007812500f,
+(float16_t)-0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)-0.3483886718750f,(float16_t)0.9375000000000f,
+(float16_t)-0.3598632812500f,(float16_t)0.9331054687500f,
+(float16_t)-0.3713378906250f,(float16_t)0.9287109375000f,
+(float16_t)-0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.3940429687500f,(float16_t)0.9189453125000f,
+(float16_t)-0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)-0.4165039062500f,(float16_t)0.9091796875000f,
+(float16_t)-0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)-0.4387207031250f,(float16_t)0.8984375000000f,
+(float16_t)-0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)-0.4604492187500f,(float16_t)0.8876953125000f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.4821777343750f,(float16_t)0.8759765625000f,
+(float16_t)-0.4929199218750f,(float16_t)0.8701171875000f,
+(float16_t)-0.5034179687500f,(float16_t)0.8637695312500f,
+(float16_t)-0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)-0.5244140625000f,(float16_t)0.8515625000000f,
+(float16_t)-0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)-0.5454101562500f,(float16_t)0.8383789062500f,
+(float16_t)-0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)-0.5659179687500f,(float16_t)0.8247070312500f,
+(float16_t)-0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)-0.5859375000000f,(float16_t)0.8105468750000f,
+(float16_t)-0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)-0.6054687500000f,(float16_t)0.7958984375000f,
+(float16_t)-0.6152343750000f,(float16_t)0.7885742187500f,
+(float16_t)-0.6250000000000f,(float16_t)0.7807617187500f,
+(float16_t)-0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)-0.6440429687500f,(float16_t)0.7651367187500f,
+(float16_t)-0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)-0.6625976562500f,(float16_t)0.7490234375000f,
+(float16_t)-0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)-0.6806640625000f,(float16_t)0.7324218750000f,
+(float16_t)-0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)-0.6982421875000f,(float16_t)0.7158203125000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.7158203125000f,(float16_t)0.6982421875000f,
+(float16_t)-0.7241210937500f,(float16_t)0.6894531250000f,
+(float16_t)-0.7324218750000f,(float16_t)0.6806640625000f,
+(float16_t)-0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)-0.7490234375000f,(float16_t)0.6625976562500f,
+(float16_t)-0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)-0.7651367187500f,(float16_t)0.6440429687500f,
+(float16_t)-0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)-0.7807617187500f,(float16_t)0.6250000000000f,
+(float16_t)-0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)-0.7958984375000f,(float16_t)0.6054687500000f,
+(float16_t)-0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)-0.8105468750000f,(float16_t)0.5859375000000f,
+(float16_t)-0.8173828125000f,(float16_t)0.5756835937500f,
+(float16_t)-0.8247070312500f,(float16_t)0.5659179687500f,
+(float16_t)-0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)-0.8383789062500f,(float16_t)0.5454101562500f,
+(float16_t)-0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)-0.8515625000000f,(float16_t)0.5244140625000f,
+(float16_t)-0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)-0.8637695312500f,(float16_t)0.5034179687500f,
+(float16_t)-0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)-0.8759765625000f,(float16_t)0.4821777343750f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.8876953125000f,(float16_t)0.4604492187500f,
+(float16_t)-0.8930664062500f,(float16_t)0.4497070312500f,
+(float16_t)-0.8984375000000f,(float16_t)0.4387207031250f,
+(float16_t)-0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)-0.9091796875000f,(float16_t)0.4165039062500f,
+(float16_t)-0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)-0.9189453125000f,(float16_t)0.3940429687500f,
+(float16_t)-0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)-0.9287109375000f,(float16_t)0.3713378906250f,
+(float16_t)-0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)-0.9375000000000f,(float16_t)0.3483886718750f,
+(float16_t)-0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)-0.9458007812500f,(float16_t)0.3251953125000f,
+(float16_t)-0.9497070312500f,(float16_t)0.3137207031250f,
+(float16_t)-0.9531250000000f,(float16_t)0.3020019531250f,
+(float16_t)-0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)-0.9604492187500f,(float16_t)0.2785644531250f,
+(float16_t)-0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)-0.9667968750000f,(float16_t)0.2548828125000f,
+(float16_t)-0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)-0.9731445312500f,(float16_t)0.2310791015625f,
+(float16_t)-0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)-0.9785156250000f,(float16_t)0.2071533203125f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9829101562500f,(float16_t)0.1829833984375f,
+(float16_t)-0.9853515625000f,(float16_t)0.1710205078125f,
+(float16_t)-0.9873046875000f,(float16_t)0.1588134765625f,
+(float16_t)-0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)-0.9907226562500f,(float16_t)0.1345214843750f,
+(float16_t)-0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)-0.9941406250000f,(float16_t)0.1102294921875f,
+(float16_t)-0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)-0.9960937500000f,(float16_t)0.0858154296875f,
+(float16_t)-0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)-0.9980468750000f,(float16_t)0.0613098144531f,
+(float16_t)-0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)-0.9995117187500f,(float16_t)0.0368041992188f,
+(float16_t)-0.9995117187500f,(float16_t)0.0245361328125f,
+(float16_t)-1.0000000000000f,(float16_t)0.0122680664062f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)-0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)-0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)-0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)-0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)-0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)-0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)-0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)-0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)-0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)-0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)-0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)-0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)-0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)-0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)-0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)-0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)-0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)-0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)-0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)-0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,};
 
 float16_t rearranged_twiddle_stride3_1024_f16[680]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99983058179582340319f,(float16_t)0.01840672990580482019f,
-(float16_t)0.99932238458834954375f,(float16_t)0.03680722294135883171f,
-(float16_t)0.99847558057329477421f,(float16_t)0.05519524434968993420f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.99576741446765981713f,(float16_t)0.09190895649713272386f,
-(float16_t)0.99390697000235606051f,(float16_t)0.11022220729388305938f,
-(float16_t)0.99170975366909952520f,(float16_t)0.12849811079379316880f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98630809724459866938f,(float16_t)0.16491312048996989437f,
-(float16_t)0.98310548743121628501f,(float16_t)0.18303988795514095078f,
-(float16_t)0.97956976568544051887f,(float16_t)0.20110463484209190055f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.97150389098625178352f,(float16_t)0.23702360599436719801f,
-(float16_t)0.96697647104485207059f,(float16_t)0.25486565960451457169f,
-(float16_t)0.96212140426904158019f,(float16_t)0.27262135544994897662f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.95143502096900833820f,(float16_t)0.30784964004153486661f,
-(float16_t)0.94560732538052127971f,(float16_t)0.32531029216226292622f,
-(float16_t)0.93945922360218991898f,(float16_t)0.34266071731199437833f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.92621024213831137928f,(float16_t)0.37700741021641825945f,
-(float16_t)0.91911385169005777040f,(float16_t)0.39399204006104809883f,
-(float16_t)0.91170603200542987832f,(float16_t)0.41084317105790391089f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.89596624975618521791f,(float16_t)0.44412214457042920035f,
-(float16_t)0.88763962040285393496f,(float16_t)0.46053871095824000514f,
-(float16_t)0.87901222642863352519f,(float16_t)0.47679923006332208812f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.86086693863776730939f,(float16_t)0.50883014254310698909f,
-(float16_t)0.85135519310526519554f,(float16_t)0.52458968267846894928f,
-(float16_t)0.84155497743689844370f,(float16_t)0.54017147272989285423f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.82110251499110464835f,(float16_t)0.57078074588696725566f,
-(float16_t)0.81045719825259476821f,(float16_t)0.58579785745643886408f,
-(float16_t)0.79953726910790501314f,(float16_t)0.60061647938386897305f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.77688846567323244230f,(float16_t)0.62963823891492698426f,
-(float16_t)0.76516726562245895860f,(float16_t)0.64383154288979138613f,
-(float16_t)0.75318679904361252042f,(float16_t)0.65780669329707863735f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.72846439044822519637f,(float16_t)0.68508366777270035541f,
-(float16_t)0.71573082528381870571f,(float16_t)0.69837624940897280457f,
-(float16_t)0.70275474445722529993f,(float16_t)0.71143219574521643356f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.67609270357531603413f,(float16_t)0.73681656887736979300f,
-(float16_t)0.66241577759017178373f,(float16_t)0.74913639452345925918f,
-(float16_t)0.64851440102211255212f,(float16_t)0.76120238548426177871f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.62005721176328920663f,(float16_t)0.78455659715557524159f,
-(float16_t)0.60551104140432554512f,(float16_t)0.79583690460888345530f,
-(float16_t)0.59075970185887427544f,(float16_t)0.80684755354379922299f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.56066157619733603124f,(float16_t)0.82804504525775579626f,
-(float16_t)0.54532498842204646383f,(float16_t)0.83822470555483796772f,
-(float16_t)0.52980362468629482731f,(float16_t)0.84812034480329712149f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.49822766697278186854f,(float16_t)0.86704624551569264845f,
-(float16_t)0.48218377207912282989f,(float16_t)0.87607009419540660122f,
-(float16_t)0.46597649576796612569f,(float16_t)0.88479709843093778954f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.43309381885315201277f,(float16_t)0.90134884704602202810f,
-(float16_t)0.41642956009763731906f,(float16_t)0.90916798309052226923f,
-(float16_t)0.39962419984564678810f,(float16_t)0.91667905992104270485f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.36561299780477396482f,(float16_t)0.93076696107898371224f,
-(float16_t)0.34841868024943450921f,(float16_t)0.93733901191257495977f,
-(float16_t)0.33110630575987642921f,(float16_t)0.94359345816196038559f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.29615088824362395536f,(float16_t)0.95514116830577067141f,
-(float16_t)0.27851968938505305973f,(float16_t)0.96043051941556578655f,
-(float16_t)0.26079411791527556952f,(float16_t)0.96539444169768939830f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.22508391135979277653f,(float16_t)0.97433938278557585821f,
-(float16_t)0.20711137619221856032f,(float16_t)0.97831737071962765473f,
-(float16_t)0.18906866414980627589f,(float16_t)0.98196386910955524296f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.15279718525844340760f,(float16_t)0.98825756773074946437f,
-(float16_t)0.13458070850712622324f,(float16_t)0.99090263542778000971f,
-(float16_t)0.11631863091190487725f,(float16_t)0.99321194923479450001f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.07968243797143012563f,(float16_t)0.99682029929116566791f,
-(float16_t)0.06132073630220864768f,(float16_t)0.99811811290014917919f,
-(float16_t)0.04293825693494095902f,(float16_t)0.99907772775264536147f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)0.00613588464915451517f,(float16_t)0.99998117528260110909f,
-(float16_t)-0.01227153828571982304f,(float16_t)0.99992470183914450299f,
-(float16_t)-0.03067480317663645942f,(float16_t)0.99952941750109314256f,
-(float16_t)-0.04906767432741800800f,(float16_t)0.99879545620517240501f,
-(float16_t)-0.06744391956366398155f,(float16_t)0.99772306664419163624f,
-(float16_t)-0.08579731234443975507f,(float16_t)0.99631261218277800129f,
-(float16_t)-0.10412163387205460030f,(float16_t)0.99456457073425541537f,
-(float16_t)-0.12241067519921615403f,(float16_t)0.99247953459870996706f,
-(float16_t)-0.14065823933284912761f,(float16_t)0.99005821026229712256f,
-(float16_t)-0.15885814333386127917f,(float16_t)0.98730141815785843473f,
-(float16_t)-0.17700422041214874946f,(float16_t)0.98421009238692902521f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.21311031991609125091f,(float16_t)0.97702814265775439484f,
-(float16_t)-0.23105810828067113727f,(float16_t)0.97293995220556017678f,
-(float16_t)-0.24892760574572012078f,(float16_t)0.96852209427441737777f,
-(float16_t)-0.26671275747489830987f,(float16_t)0.96377606579543984022f,
-(float16_t)-0.28440753721127171039f,(float16_t)0.95870347489587159906f,
-(float16_t)-0.30200594931922808417f,(float16_t)0.95330604035419386211f,
-(float16_t)-0.31950203081601563637f,(float16_t)0.94758559101774120226f,
-(float16_t)-0.33688985339221994009f,(float16_t)0.94154406518302080631f,
-(float16_t)-0.35416352542049039931f,(float16_t)0.93518350993894761025f,
-(float16_t)-0.37131719395183748755f,(float16_t)0.92850608047321558924f,
-(float16_t)-0.38834504669882619066f,(float16_t)0.92151403934204201285f,
-(float16_t)-0.40524131400498974998f,(float16_t)0.91420975570353069095f,
-(float16_t)-0.42200027079979968159f,(float16_t)0.90659570451491533483f,
-(float16_t)-0.43861623853852738097f,(float16_t)0.89867446569395392775f,
-(float16_t)-0.45508358712634372489f,(float16_t)0.89044872324475798919f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.48755016014843571837f,(float16_t)0.87309497841829020182f,
-(float16_t)-0.50353838372571746440f,(float16_t)0.86397285612158680745f,
-(float16_t)-0.51935599016558964269f,(float16_t)0.85455798836540053376f,
-(float16_t)-0.53499761988709704230f,(float16_t)0.84485356524970722791f,
-(float16_t)-0.55045797293660470029f,(float16_t)0.83486287498638012128f,
-(float16_t)-0.56573181078361323149f,(float16_t)0.82458930278502517996f,
-(float16_t)-0.58081395809576441547f,(float16_t)0.81403632970594852480f,
-(float16_t)-0.59569930449243335691f,(float16_t)0.80320753148064494287f,
-(float16_t)-0.61038280627630958630f,(float16_t)0.79210657730021227785f,
-(float16_t)-0.62485948814238623239f,(float16_t)0.78073722857209459924f,
-(float16_t)-0.63912444486377573138f,(float16_t)0.76910333764557958780f,
-(float16_t)-0.65317284295377653347f,(float16_t)0.75720884650648467851f,
-(float16_t)-0.66699992230363736034f,(float16_t)0.74505778544146605835f,
-(float16_t)-0.68060099779545302212f,(float16_t)0.73265427167241281570f,
-(float16_t)-0.69397146088965377952f,(float16_t)0.72000250796138176579f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.72000250796138165477f,(float16_t)0.69397146088965389055f,
-(float16_t)-0.73265427167241270467f,(float16_t)0.68060099779545324417f,
-(float16_t)-0.74505778544146594733f,(float16_t)0.66699992230363758239f,
-(float16_t)-0.75720884650648467851f,(float16_t)0.65317284295377664449f,
-(float16_t)-0.76910333764557947678f,(float16_t)0.63912444486377584241f,
-(float16_t)-0.78073722857209448822f,(float16_t)0.62485948814238634341f,
-(float16_t)-0.79210657730021216683f,(float16_t)0.61038280627630969732f,
-(float16_t)-0.80320753148064483184f,(float16_t)0.59569930449243346793f,
-(float16_t)-0.81403632970594841378f,(float16_t)0.58081395809576452649f,
-(float16_t)-0.82458930278502506894f,(float16_t)0.56573181078361345353f,
-(float16_t)-0.83486287498638001026f,(float16_t)0.55045797293660492233f,
-(float16_t)-0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)-0.85455798836540042274f,(float16_t)0.51935599016558975372f,
-(float16_t)-0.86397285612158669643f,(float16_t)0.50353838372571757542f,
-(float16_t)-0.87309497841829009079f,(float16_t)0.48755016014843588490f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.89044872324475787817f,(float16_t)0.45508358712634389143f,
-(float16_t)-0.89867446569395392775f,(float16_t)0.43861623853852754751f,
-(float16_t)-0.90659570451491533483f,(float16_t)0.42200027079979984812f,
-(float16_t)-0.91420975570353069095f,(float16_t)0.40524131400498991651f,
-(float16_t)-0.92151403934204179080f,(float16_t)0.38834504669882657923f,
-(float16_t)-0.92850608047321547822f,(float16_t)0.37131719395183770960f,
-(float16_t)-0.93518350993894761025f,(float16_t)0.35416352542049039931f,
-(float16_t)-0.94154406518302069529f,(float16_t)0.33688985339222032867f,
-(float16_t)-0.94758559101774109124f,(float16_t)0.31950203081601580291f,
-(float16_t)-0.95330604035419386211f,(float16_t)0.30200594931922802866f,
-(float16_t)-0.95870347489587148804f,(float16_t)0.28440753721127209896f,
-(float16_t)-0.96377606579543984022f,(float16_t)0.26671275747489847641f,
-(float16_t)-0.96852209427441737777f,(float16_t)0.24892760574572009302f,
-(float16_t)-0.97293995220556006576f,(float16_t)0.23105810828067133156f,
-(float16_t)-0.97702814265775439484f,(float16_t)0.21311031991609141745f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.98421009238692902521f,(float16_t)0.17700422041214894375f,
-(float16_t)-0.98730141815785843473f,(float16_t)0.15885814333386147346f,
-(float16_t)-0.99005821026229701154f,(float16_t)0.14065823933284954395f,
-(float16_t)-0.99247953459870996706f,(float16_t)0.12241067519921634832f,
-(float16_t)-0.99456457073425541537f,(float16_t)0.10412163387205457254f,
-(float16_t)-0.99631261218277800129f,(float16_t)0.08579731234444015753f,
-(float16_t)-0.99772306664419163624f,(float16_t)0.06744391956366417584f,
-(float16_t)-0.99879545620517240501f,(float16_t)0.04906767432741796636f,
-(float16_t)-0.99952941750109314256f,(float16_t)0.03067480317663686534f,
-(float16_t)-0.99992470183914450299f,(float16_t)0.01227153828572000692f,
-(float16_t)-0.99998117528260110909f,(float16_t)-0.00613588464915455420f,
-(float16_t)-0.99969881869620424997f,(float16_t)-0.02454122852291207996f,
-(float16_t)-0.99907772775264536147f,(float16_t)-0.04293825693494077861f,
-(float16_t)-0.99811811290014917919f,(float16_t)-0.06132073630220824523f,
-(float16_t)-0.99682029929116577893f,(float16_t)-0.07968243797142994522f,
-(float16_t)-0.99518472667219692873f,(float16_t)-0.09801714032956058975f,
-(float16_t)-0.99321194923479461103f,(float16_t)-0.11631863091190447479f,
-(float16_t)-0.99090263542778000971f,(float16_t)-0.13458070850712605671f,
-(float16_t)-0.98825756773074946437f,(float16_t)-0.15279718525844343535f,
-(float16_t)-0.98527764238894133264f,(float16_t)-0.17096188876030096737f,
-(float16_t)-0.98196386910955524296f,(float16_t)-0.18906866414980610935f,
-(float16_t)-0.97831737071962765473f,(float16_t)-0.20711137619221858808f,
-(float16_t)-0.97433938278557585821f,(float16_t)-0.22508391135979261000f,
-(float16_t)-0.97003125319454397424f,(float16_t)-0.24298017990326381543f,
-(float16_t)-0.96539444169768939830f,(float16_t)-0.26079411791527562503f,
-(float16_t)-0.96043051941556589757f,(float16_t)-0.27851968938505289319f,
-(float16_t)-0.95514116830577078243f,(float16_t)-0.29615088824362378883f,
-(float16_t)-0.94952818059303678577f,(float16_t)-0.31368174039889118454f,
-(float16_t)-0.94359345816196038559f,(float16_t)-0.33110630575987626267f,
-(float16_t)-0.93733901191257495977f,(float16_t)-0.34841868024943456472f,
-(float16_t)-0.93076696107898382326f,(float16_t)-0.36561299780477357624f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,
-(float16_t)-0.91667905992104270485f,(float16_t)-0.39962419984564684361f,
-(float16_t)-0.90916798309052249127f,(float16_t)-0.41642956009763693048f,
-(float16_t)-0.90134884704602202810f,(float16_t)-0.43309381885315184624f,
-(float16_t)-0.89322430119551532446f,(float16_t)-0.44961132965460665067f,
-(float16_t)-0.88479709843093790056f,(float16_t)-0.46597649576796595916f,
-(float16_t)-0.87607009419540660122f,(float16_t)-0.48218377207912266336f,
-(float16_t)-0.86704624551569287050f,(float16_t)-0.49822766697278153547f,
-(float16_t)-0.85772861000027211809f,(float16_t)-0.51410274419322155026f,
-(float16_t)-0.84812034480329723252f,(float16_t)-0.52980362468629460526f,
-(float16_t)-0.83822470555483818977f,(float16_t)-0.54532498842204613076f,
-(float16_t)-0.82804504525775590729f,(float16_t)-0.56066157619733592021f,
-(float16_t)-0.81758481315158371139f,(float16_t)-0.57580819141784533866f,
-(float16_t)-0.80684755354379944503f,(float16_t)-0.59075970185887394237f,
-(float16_t)-0.79583690460888356633f,(float16_t)-0.60551104140432543410f,
-(float16_t)-0.78455659715557524159f,(float16_t)-0.62005721176328920663f,
-(float16_t)-0.77301045336273710440f,(float16_t)-0.63439328416364526575f,
-(float16_t)-0.76120238548426188974f,(float16_t)-0.64851440102211233008f,
-(float16_t)-0.74913639452345925918f,(float16_t)-0.66241577759017178373f,
-(float16_t)-0.73681656887737001504f,(float16_t)-0.67609270357531581208f,
-(float16_t)-0.72424708295146700276f,(float16_t)-0.68954054473706682948f,
-(float16_t)-0.71143219574521665560f,(float16_t)-0.70275474445722507788f,
-(float16_t)-0.69837624940897302661f,(float16_t)-0.71573082528381848366f,
-(float16_t)-0.68508366777270035541f,(float16_t)-0.72846439044822519637f,
-(float16_t)-0.67155895484701866316f,(float16_t)-0.74095112535495888384f,
-(float16_t)-0.65780669329707874837f,(float16_t)-0.75318679904361240940f,
-(float16_t)-0.64383154288979149715f,(float16_t)-0.76516726562245895860f,
-(float16_t)-0.62963823891492687324f,(float16_t)-0.77688846567323255332f,
-(float16_t)-0.61523159058062726334f,(float16_t)-0.78834642762660589455f,
-(float16_t)-0.60061647938386930612f,(float16_t)-0.79953726910790479110f,
-(float16_t)-0.58579785745643908612f,(float16_t)-0.81045719825259465718f,
-(float16_t)-0.57078074588696736669f,(float16_t)-0.82110251499110464835f,
-(float16_t)-0.55557023301960217765f,(float16_t)-0.83146961230254523567f,
-(float16_t)-0.54017147272989274320f,(float16_t)-0.84155497743689855472f,
-(float16_t)-0.52458968267846928235f,(float16_t)-0.85135519310526486247f,
-(float16_t)-0.50883014254310732216f,(float16_t)-0.86086693863776708735f,
-(float16_t)-0.49289819222978420443f,(float16_t)-0.87008699110871134952f,
-(float16_t)-0.47679923006332214364f,(float16_t)-0.87901222642863341417f,
-(float16_t)-0.46053871095823989412f,(float16_t)-0.88763962040285404598f,
-(float16_t)-0.44412214457042975546f,(float16_t)-0.89596624975618488484f,
-(float16_t)-0.42755509343028247349f,(float16_t)-0.90398929312344311615f,
-(float16_t)-0.41084317105790418845f,(float16_t)-0.91170603200542976730f,
-(float16_t)-0.39399204006104820985f,(float16_t)-0.91911385169005765938f,
-(float16_t)-0.37700741021641820394f,(float16_t)-0.92621024213831137928f,
-(float16_t)-0.35989503653498794433f,(float16_t)-0.93299279883473895669f,
-(float16_t)-0.34266071731199487793f,(float16_t)-0.93945922360218969693f,
-(float16_t)-0.32531029216226331480f,(float16_t)-0.94560732538052116869f,
-(float16_t)-0.30784964004153508865f,(float16_t)-0.95143502096900833820f,
-(float16_t)-0.29028467725446244208f,(float16_t)-0.95694033573220882438f,
-(float16_t)-0.27262135544994886560f,(float16_t)-0.96212140426904158019f,
-(float16_t)-0.25486565960451434965f,(float16_t)-0.96697647104485218161f,
-(float16_t)-0.23702360599436766986f,(float16_t)-0.97150389098625167250f,
-(float16_t)-0.21910124015687010290f,(float16_t)-0.97570213003852845901f,
-(float16_t)-0.20110463484209206708f,(float16_t)-0.97956976568544051887f,
-(float16_t)-0.18303988795514095078f,(float16_t)-0.98310548743121628501f,
-(float16_t)-0.16491312048996975559f,(float16_t)-0.98630809724459866938f,
-(float16_t)-0.14673047445536230304f,(float16_t)-0.98917650996478090342f,
-(float16_t)-0.12849811079379358514f,(float16_t)-0.99170975366909952520f,
-(float16_t)-0.11022220729388330918f,(float16_t)-0.99390697000235606051f,
-(float16_t)-0.09190895649713282101f,(float16_t)-0.99576741446765981713f,
-(float16_t)-0.07356456359966735692f,(float16_t)-0.99729045667869020697f,
-(float16_t)-0.05519524434968971216f,(float16_t)-0.99847558057329477421f,
-(float16_t)-0.03680722294135933131f,(float16_t)-0.99932238458834943273f,
-(float16_t)-0.01840672990580516366f,(float16_t)-0.99983058179582340319f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)-0.04906767432741800800f,(float16_t)0.99879545620517240501f,
-(float16_t)-0.12241067519921615403f,(float16_t)0.99247953459870996706f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.26671275747489830987f,(float16_t)0.96377606579543984022f,
-(float16_t)-0.33688985339221994009f,(float16_t)0.94154406518302080631f,
-(float16_t)-0.40524131400498974998f,(float16_t)0.91420975570353069095f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.53499761988709704230f,(float16_t)0.84485356524970722791f,
-(float16_t)-0.59569930449243335691f,(float16_t)0.80320753148064494287f,
-(float16_t)-0.65317284295377653347f,(float16_t)0.75720884650648467851f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.75720884650648467851f,(float16_t)0.65317284295377664449f,
-(float16_t)-0.80320753148064483184f,(float16_t)0.59569930449243346793f,
-(float16_t)-0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.91420975570353069095f,(float16_t)0.40524131400498991651f,
-(float16_t)-0.94154406518302069529f,(float16_t)0.33688985339222032867f,
-(float16_t)-0.96377606579543984022f,(float16_t)0.26671275747489847641f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.99247953459870996706f,(float16_t)0.12241067519921634832f,
-(float16_t)-0.99879545620517240501f,(float16_t)0.04906767432741796636f,
-(float16_t)-0.99969881869620424997f,(float16_t)-0.02454122852291207996f,
-(float16_t)-0.99518472667219692873f,(float16_t)-0.09801714032956058975f,
-(float16_t)-0.98527764238894133264f,(float16_t)-0.17096188876030096737f,
-(float16_t)-0.97003125319454397424f,(float16_t)-0.24298017990326381543f,
-(float16_t)-0.94952818059303678577f,(float16_t)-0.31368174039889118454f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,
-(float16_t)-0.89322430119551532446f,(float16_t)-0.44961132965460665067f,
-(float16_t)-0.85772861000027211809f,(float16_t)-0.51410274419322155026f,
-(float16_t)-0.81758481315158371139f,(float16_t)-0.57580819141784533866f,
-(float16_t)-0.77301045336273710440f,(float16_t)-0.63439328416364526575f,
-(float16_t)-0.72424708295146700276f,(float16_t)-0.68954054473706682948f,
-(float16_t)-0.67155895484701866316f,(float16_t)-0.74095112535495888384f,
-(float16_t)-0.61523159058062726334f,(float16_t)-0.78834642762660589455f,
-(float16_t)-0.55557023301960217765f,(float16_t)-0.83146961230254523567f,
-(float16_t)-0.49289819222978420443f,(float16_t)-0.87008699110871134952f,
-(float16_t)-0.42755509343028247349f,(float16_t)-0.90398929312344311615f,
-(float16_t)-0.35989503653498794433f,(float16_t)-0.93299279883473895669f,
-(float16_t)-0.29028467725446244208f,(float16_t)-0.95694033573220882438f,
-(float16_t)-0.21910124015687010290f,(float16_t)-0.97570213003852845901f,
-(float16_t)-0.14673047445536230304f,(float16_t)-0.98917650996478090342f,
-(float16_t)-0.07356456359966735692f,(float16_t)-0.99729045667869020697f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.99518472667219692873f,(float16_t)-0.09801714032956058975f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,
-(float16_t)-0.77301045336273710440f,(float16_t)-0.63439328416364526575f,
-(float16_t)-0.55557023301960217765f,(float16_t)-0.83146961230254523567f,
-(float16_t)-0.29028467725446244208f,(float16_t)-0.95694033573220882438f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0184020996094f,
+(float16_t)0.9995117187500f,(float16_t)0.0368041992188f,
+(float16_t)0.9985351562500f,(float16_t)0.0552062988281f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9956054687500f,(float16_t)0.0919189453125f,
+(float16_t)0.9941406250000f,(float16_t)0.1102294921875f,
+(float16_t)0.9916992187500f,(float16_t)0.1285400390625f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9863281250000f,(float16_t)0.1649169921875f,
+(float16_t)0.9829101562500f,(float16_t)0.1829833984375f,
+(float16_t)0.9794921875000f,(float16_t)0.2010498046875f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9716796875000f,(float16_t)0.2370605468750f,
+(float16_t)0.9667968750000f,(float16_t)0.2548828125000f,
+(float16_t)0.9619140625000f,(float16_t)0.2727050781250f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9516601562500f,(float16_t)0.3078613281250f,
+(float16_t)0.9458007812500f,(float16_t)0.3251953125000f,
+(float16_t)0.9394531250000f,(float16_t)0.3427734375000f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9262695312500f,(float16_t)0.3769531250000f,
+(float16_t)0.9189453125000f,(float16_t)0.3940429687500f,
+(float16_t)0.9116210937500f,(float16_t)0.4108886718750f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.8959960937500f,(float16_t)0.4440917968750f,
+(float16_t)0.8876953125000f,(float16_t)0.4604492187500f,
+(float16_t)0.8789062500000f,(float16_t)0.4768066406250f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8608398437500f,(float16_t)0.5087890625000f,
+(float16_t)0.8515625000000f,(float16_t)0.5244140625000f,
+(float16_t)0.8417968750000f,(float16_t)0.5400390625000f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8212890625000f,(float16_t)0.5708007812500f,
+(float16_t)0.8105468750000f,(float16_t)0.5859375000000f,
+(float16_t)0.7993164062500f,(float16_t)0.6005859375000f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7768554687500f,(float16_t)0.6293945312500f,
+(float16_t)0.7651367187500f,(float16_t)0.6440429687500f,
+(float16_t)0.7534179687500f,(float16_t)0.6577148437500f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7285156250000f,(float16_t)0.6850585937500f,
+(float16_t)0.7158203125000f,(float16_t)0.6982421875000f,
+(float16_t)0.7026367187500f,(float16_t)0.7114257812500f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6762695312500f,(float16_t)0.7368164062500f,
+(float16_t)0.6625976562500f,(float16_t)0.7490234375000f,
+(float16_t)0.6484375000000f,(float16_t)0.7612304687500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.6201171875000f,(float16_t)0.7846679687500f,
+(float16_t)0.6054687500000f,(float16_t)0.7958984375000f,
+(float16_t)0.5908203125000f,(float16_t)0.8066406250000f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5605468750000f,(float16_t)0.8281250000000f,
+(float16_t)0.5454101562500f,(float16_t)0.8383789062500f,
+(float16_t)0.5297851562500f,(float16_t)0.8481445312500f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.4982910156250f,(float16_t)0.8671875000000f,
+(float16_t)0.4821777343750f,(float16_t)0.8759765625000f,
+(float16_t)0.4660644531250f,(float16_t)0.8847656250000f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.4331054687500f,(float16_t)0.9013671875000f,
+(float16_t)0.4165039062500f,(float16_t)0.9091796875000f,
+(float16_t)0.3996582031250f,(float16_t)0.9165039062500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3657226562500f,(float16_t)0.9306640625000f,
+(float16_t)0.3483886718750f,(float16_t)0.9375000000000f,
+(float16_t)0.3310546875000f,(float16_t)0.9433593750000f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.2961425781250f,(float16_t)0.9550781250000f,
+(float16_t)0.2785644531250f,(float16_t)0.9604492187500f,
+(float16_t)0.2607421875000f,(float16_t)0.9653320312500f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.2250976562500f,(float16_t)0.9741210937500f,
+(float16_t)0.2071533203125f,(float16_t)0.9785156250000f,
+(float16_t)0.1890869140625f,(float16_t)0.9819335937500f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.1528320312500f,(float16_t)0.9882812500000f,
+(float16_t)0.1345214843750f,(float16_t)0.9907226562500f,
+(float16_t)0.1163330078125f,(float16_t)0.9931640625000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0797119140625f,(float16_t)0.9965820312500f,
+(float16_t)0.0613098144531f,(float16_t)0.9980468750000f,
+(float16_t)0.0429382324219f,(float16_t)0.9990234375000f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)0.0061340332031f,(float16_t)1.0000000000000f,
+(float16_t)-0.0122680664062f,(float16_t)1.0000000000000f,
+(float16_t)-0.0306701660156f,(float16_t)0.9995117187500f,
+(float16_t)-0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)-0.0674438476562f,(float16_t)0.9975585937500f,
+(float16_t)-0.0858154296875f,(float16_t)0.9960937500000f,
+(float16_t)-0.1041259765625f,(float16_t)0.9946289062500f,
+(float16_t)-0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)-0.1406250000000f,(float16_t)0.9902343750000f,
+(float16_t)-0.1588134765625f,(float16_t)0.9873046875000f,
+(float16_t)-0.1770019531250f,(float16_t)0.9843750000000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.2131347656250f,(float16_t)0.9770507812500f,
+(float16_t)-0.2310791015625f,(float16_t)0.9731445312500f,
+(float16_t)-0.2489013671875f,(float16_t)0.9687500000000f,
+(float16_t)-0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)-0.2844238281250f,(float16_t)0.9584960937500f,
+(float16_t)-0.3020019531250f,(float16_t)0.9531250000000f,
+(float16_t)-0.3195800781250f,(float16_t)0.9477539062500f,
+(float16_t)-0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)-0.3542480468750f,(float16_t)0.9350585937500f,
+(float16_t)-0.3713378906250f,(float16_t)0.9287109375000f,
+(float16_t)-0.3884277343750f,(float16_t)0.9213867187500f,
+(float16_t)-0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)-0.4221191406250f,(float16_t)0.9067382812500f,
+(float16_t)-0.4387207031250f,(float16_t)0.8984375000000f,
+(float16_t)-0.4550781250000f,(float16_t)0.8906250000000f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.4875488281250f,(float16_t)0.8730468750000f,
+(float16_t)-0.5034179687500f,(float16_t)0.8637695312500f,
+(float16_t)-0.5195312500000f,(float16_t)0.8544921875000f,
+(float16_t)-0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)-0.5502929687500f,(float16_t)0.8349609375000f,
+(float16_t)-0.5659179687500f,(float16_t)0.8247070312500f,
+(float16_t)-0.5810546875000f,(float16_t)0.8139648437500f,
+(float16_t)-0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)-0.6103515625000f,(float16_t)0.7919921875000f,
+(float16_t)-0.6250000000000f,(float16_t)0.7807617187500f,
+(float16_t)-0.6391601562500f,(float16_t)0.7690429687500f,
+(float16_t)-0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)-0.6669921875000f,(float16_t)0.7451171875000f,
+(float16_t)-0.6806640625000f,(float16_t)0.7324218750000f,
+(float16_t)-0.6938476562500f,(float16_t)0.7202148437500f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.7202148437500f,(float16_t)0.6938476562500f,
+(float16_t)-0.7324218750000f,(float16_t)0.6806640625000f,
+(float16_t)-0.7451171875000f,(float16_t)0.6669921875000f,
+(float16_t)-0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)-0.7690429687500f,(float16_t)0.6391601562500f,
+(float16_t)-0.7807617187500f,(float16_t)0.6250000000000f,
+(float16_t)-0.7919921875000f,(float16_t)0.6103515625000f,
+(float16_t)-0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)-0.8139648437500f,(float16_t)0.5810546875000f,
+(float16_t)-0.8247070312500f,(float16_t)0.5659179687500f,
+(float16_t)-0.8349609375000f,(float16_t)0.5502929687500f,
+(float16_t)-0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)-0.8544921875000f,(float16_t)0.5195312500000f,
+(float16_t)-0.8637695312500f,(float16_t)0.5034179687500f,
+(float16_t)-0.8730468750000f,(float16_t)0.4875488281250f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.8906250000000f,(float16_t)0.4550781250000f,
+(float16_t)-0.8984375000000f,(float16_t)0.4387207031250f,
+(float16_t)-0.9067382812500f,(float16_t)0.4221191406250f,
+(float16_t)-0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)-0.9213867187500f,(float16_t)0.3884277343750f,
+(float16_t)-0.9287109375000f,(float16_t)0.3713378906250f,
+(float16_t)-0.9350585937500f,(float16_t)0.3542480468750f,
+(float16_t)-0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)-0.9477539062500f,(float16_t)0.3195800781250f,
+(float16_t)-0.9531250000000f,(float16_t)0.3020019531250f,
+(float16_t)-0.9584960937500f,(float16_t)0.2844238281250f,
+(float16_t)-0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)-0.9687500000000f,(float16_t)0.2489013671875f,
+(float16_t)-0.9731445312500f,(float16_t)0.2310791015625f,
+(float16_t)-0.9770507812500f,(float16_t)0.2131347656250f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9843750000000f,(float16_t)0.1770019531250f,
+(float16_t)-0.9873046875000f,(float16_t)0.1588134765625f,
+(float16_t)-0.9902343750000f,(float16_t)0.1406250000000f,
+(float16_t)-0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)-0.9946289062500f,(float16_t)0.1041259765625f,
+(float16_t)-0.9960937500000f,(float16_t)0.0858154296875f,
+(float16_t)-0.9975585937500f,(float16_t)0.0674438476562f,
+(float16_t)-0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)-0.9995117187500f,(float16_t)0.0306701660156f,
+(float16_t)-1.0000000000000f,(float16_t)0.0122680664062f,
+(float16_t)-1.0000000000000f,(float16_t)-0.0061340332031f,
+(float16_t)-0.9995117187500f,(float16_t)-0.0245361328125f,
+(float16_t)-0.9990234375000f,(float16_t)-0.0429382324219f,
+(float16_t)-0.9980468750000f,(float16_t)-0.0613098144531f,
+(float16_t)-0.9965820312500f,(float16_t)-0.0797119140625f,
+(float16_t)-0.9951171875000f,(float16_t)-0.0980224609375f,
+(float16_t)-0.9931640625000f,(float16_t)-0.1163330078125f,
+(float16_t)-0.9907226562500f,(float16_t)-0.1345214843750f,
+(float16_t)-0.9882812500000f,(float16_t)-0.1528320312500f,
+(float16_t)-0.9853515625000f,(float16_t)-0.1710205078125f,
+(float16_t)-0.9819335937500f,(float16_t)-0.1890869140625f,
+(float16_t)-0.9785156250000f,(float16_t)-0.2071533203125f,
+(float16_t)-0.9741210937500f,(float16_t)-0.2250976562500f,
+(float16_t)-0.9702148437500f,(float16_t)-0.2429199218750f,
+(float16_t)-0.9653320312500f,(float16_t)-0.2607421875000f,
+(float16_t)-0.9604492187500f,(float16_t)-0.2785644531250f,
+(float16_t)-0.9550781250000f,(float16_t)-0.2961425781250f,
+(float16_t)-0.9497070312500f,(float16_t)-0.3137207031250f,
+(float16_t)-0.9433593750000f,(float16_t)-0.3310546875000f,
+(float16_t)-0.9375000000000f,(float16_t)-0.3483886718750f,
+(float16_t)-0.9306640625000f,(float16_t)-0.3657226562500f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,
+(float16_t)-0.9165039062500f,(float16_t)-0.3996582031250f,
+(float16_t)-0.9091796875000f,(float16_t)-0.4165039062500f,
+(float16_t)-0.9013671875000f,(float16_t)-0.4331054687500f,
+(float16_t)-0.8930664062500f,(float16_t)-0.4497070312500f,
+(float16_t)-0.8847656250000f,(float16_t)-0.4660644531250f,
+(float16_t)-0.8759765625000f,(float16_t)-0.4821777343750f,
+(float16_t)-0.8671875000000f,(float16_t)-0.4982910156250f,
+(float16_t)-0.8579101562500f,(float16_t)-0.5141601562500f,
+(float16_t)-0.8481445312500f,(float16_t)-0.5297851562500f,
+(float16_t)-0.8383789062500f,(float16_t)-0.5454101562500f,
+(float16_t)-0.8281250000000f,(float16_t)-0.5605468750000f,
+(float16_t)-0.8173828125000f,(float16_t)-0.5756835937500f,
+(float16_t)-0.8066406250000f,(float16_t)-0.5908203125000f,
+(float16_t)-0.7958984375000f,(float16_t)-0.6054687500000f,
+(float16_t)-0.7846679687500f,(float16_t)-0.6201171875000f,
+(float16_t)-0.7729492187500f,(float16_t)-0.6342773437500f,
+(float16_t)-0.7612304687500f,(float16_t)-0.6484375000000f,
+(float16_t)-0.7490234375000f,(float16_t)-0.6625976562500f,
+(float16_t)-0.7368164062500f,(float16_t)-0.6762695312500f,
+(float16_t)-0.7241210937500f,(float16_t)-0.6894531250000f,
+(float16_t)-0.7114257812500f,(float16_t)-0.7026367187500f,
+(float16_t)-0.6982421875000f,(float16_t)-0.7158203125000f,
+(float16_t)-0.6850585937500f,(float16_t)-0.7285156250000f,
+(float16_t)-0.6713867187500f,(float16_t)-0.7407226562500f,
+(float16_t)-0.6577148437500f,(float16_t)-0.7534179687500f,
+(float16_t)-0.6440429687500f,(float16_t)-0.7651367187500f,
+(float16_t)-0.6293945312500f,(float16_t)-0.7768554687500f,
+(float16_t)-0.6152343750000f,(float16_t)-0.7885742187500f,
+(float16_t)-0.6005859375000f,(float16_t)-0.7993164062500f,
+(float16_t)-0.5859375000000f,(float16_t)-0.8105468750000f,
+(float16_t)-0.5708007812500f,(float16_t)-0.8212890625000f,
+(float16_t)-0.5556640625000f,(float16_t)-0.8315429687500f,
+(float16_t)-0.5400390625000f,(float16_t)-0.8417968750000f,
+(float16_t)-0.5244140625000f,(float16_t)-0.8515625000000f,
+(float16_t)-0.5087890625000f,(float16_t)-0.8608398437500f,
+(float16_t)-0.4929199218750f,(float16_t)-0.8701171875000f,
+(float16_t)-0.4768066406250f,(float16_t)-0.8789062500000f,
+(float16_t)-0.4604492187500f,(float16_t)-0.8876953125000f,
+(float16_t)-0.4440917968750f,(float16_t)-0.8959960937500f,
+(float16_t)-0.4274902343750f,(float16_t)-0.9038085937500f,
+(float16_t)-0.4108886718750f,(float16_t)-0.9116210937500f,
+(float16_t)-0.3940429687500f,(float16_t)-0.9189453125000f,
+(float16_t)-0.3769531250000f,(float16_t)-0.9262695312500f,
+(float16_t)-0.3598632812500f,(float16_t)-0.9331054687500f,
+(float16_t)-0.3427734375000f,(float16_t)-0.9394531250000f,
+(float16_t)-0.3251953125000f,(float16_t)-0.9458007812500f,
+(float16_t)-0.3078613281250f,(float16_t)-0.9516601562500f,
+(float16_t)-0.2902832031250f,(float16_t)-0.9570312500000f,
+(float16_t)-0.2727050781250f,(float16_t)-0.9619140625000f,
+(float16_t)-0.2548828125000f,(float16_t)-0.9667968750000f,
+(float16_t)-0.2370605468750f,(float16_t)-0.9716796875000f,
+(float16_t)-0.2191162109375f,(float16_t)-0.9755859375000f,
+(float16_t)-0.2010498046875f,(float16_t)-0.9794921875000f,
+(float16_t)-0.1829833984375f,(float16_t)-0.9829101562500f,
+(float16_t)-0.1649169921875f,(float16_t)-0.9863281250000f,
+(float16_t)-0.1467285156250f,(float16_t)-0.9892578125000f,
+(float16_t)-0.1285400390625f,(float16_t)-0.9916992187500f,
+(float16_t)-0.1102294921875f,(float16_t)-0.9941406250000f,
+(float16_t)-0.0919189453125f,(float16_t)-0.9956054687500f,
+(float16_t)-0.0735473632812f,(float16_t)-0.9970703125000f,
+(float16_t)-0.0552062988281f,(float16_t)-0.9985351562500f,
+(float16_t)-0.0368041992188f,(float16_t)-0.9995117187500f,
+(float16_t)-0.0184020996094f,(float16_t)-1.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)-0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)-0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)-0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)-0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)-0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)-0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)-0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)-0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)-0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)-0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)-0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)-0.9995117187500f,(float16_t)-0.0245361328125f,
+(float16_t)-0.9951171875000f,(float16_t)-0.0980224609375f,
+(float16_t)-0.9853515625000f,(float16_t)-0.1710205078125f,
+(float16_t)-0.9702148437500f,(float16_t)-0.2429199218750f,
+(float16_t)-0.9497070312500f,(float16_t)-0.3137207031250f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,
+(float16_t)-0.8930664062500f,(float16_t)-0.4497070312500f,
+(float16_t)-0.8579101562500f,(float16_t)-0.5141601562500f,
+(float16_t)-0.8173828125000f,(float16_t)-0.5756835937500f,
+(float16_t)-0.7729492187500f,(float16_t)-0.6342773437500f,
+(float16_t)-0.7241210937500f,(float16_t)-0.6894531250000f,
+(float16_t)-0.6713867187500f,(float16_t)-0.7407226562500f,
+(float16_t)-0.6152343750000f,(float16_t)-0.7885742187500f,
+(float16_t)-0.5556640625000f,(float16_t)-0.8315429687500f,
+(float16_t)-0.4929199218750f,(float16_t)-0.8701171875000f,
+(float16_t)-0.4274902343750f,(float16_t)-0.9038085937500f,
+(float16_t)-0.3598632812500f,(float16_t)-0.9331054687500f,
+(float16_t)-0.2902832031250f,(float16_t)-0.9570312500000f,
+(float16_t)-0.2191162109375f,(float16_t)-0.9755859375000f,
+(float16_t)-0.1467285156250f,(float16_t)-0.9892578125000f,
+(float16_t)-0.0735473632812f,(float16_t)-0.9970703125000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9951171875000f,(float16_t)-0.0980224609375f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,
+(float16_t)-0.7729492187500f,(float16_t)-0.6342773437500f,
+(float16_t)-0.5556640625000f,(float16_t)-0.8315429687500f,
+(float16_t)-0.2902832031250f,(float16_t)-0.9570312500000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,};
 
 #endif
 
@@ -1468,4102 +1471,4102 @@ uint32_t rearranged_twiddle_tab_stride3_arr_4096_f16[6]={
 0,2048,2560,2688,2720,0,};
 
 float16_t rearranged_twiddle_stride1_4096_f16[2728]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99999882345170187925f,(float16_t)0.00153398018628476550f,
-(float16_t)0.99999529380957619118f,(float16_t)0.00306795676296597614f,
-(float16_t)0.99998941108192840321f,(float16_t)0.00460192612044857050f,
-(float16_t)0.99998117528260110909f,(float16_t)0.00613588464915447527f,
-(float16_t)0.99997058643097413988f,(float16_t)0.00766982873953109701f,
-(float16_t)0.99995764455196389786f,(float16_t)0.00920375478205981944f,
-(float16_t)0.99994234967602391162f,(float16_t)0.01073765916726449055f,
-(float16_t)0.99992470183914450299f,(float16_t)0.01227153828571992539f,
-(float16_t)0.99990470108285289808f,(float16_t)0.01380538852806039059f,
-(float16_t)0.99988234745421256111f,(float16_t)0.01533920628498810015f,
-(float16_t)0.99985764100582386060f,(float16_t)0.01687298794728171042f,
-(float16_t)0.99983058179582340319f,(float16_t)0.01840672990580482019f,
-(float16_t)0.99980116988788425569f,(float16_t)0.01994042855151444138f,
-(float16_t)0.99976940535121527898f,(float16_t)0.02147408027546950787f,
-(float16_t)0.99973528826056168306f,(float16_t)0.02300768146883936868f,
-(float16_t)0.99969881869620424997f,(float16_t)0.02454122852291228812f,
-(float16_t)0.99965999674395922270f,(float16_t)0.02607471782910390085f,
-(float16_t)0.99961882249517863830f,(float16_t)0.02760814577896573974f,
-(float16_t)0.99957529604674921764f,(float16_t)0.02914150876419372219f,
-(float16_t)0.99952941750109314256f,(float16_t)0.03067480317663662595f,
-(float16_t)0.99948118696616694567f,(float16_t)0.03220802540830458582f,
-(float16_t)0.99943060455546173237f,(float16_t)0.03374117185137757990f,
-(float16_t)0.99937767038800284780f,(float16_t)0.03527423889821394709f,
-(float16_t)0.99932238458834954375f,(float16_t)0.03680722294135883171f,
-(float16_t)0.99926474728659442359f,(float16_t)0.03834012037355269409f,
-(float16_t)0.99920475861836388631f,(float16_t)0.03987292758773981066f,
-(float16_t)0.99914241872481690532f,(float16_t)0.04140564097707673946f,
-(float16_t)0.99907772775264536147f,(float16_t)0.04293825693494082024f,
-(float16_t)0.99901068585407337697f,(float16_t)0.04447077185493866769f,
-(float16_t)0.99894129318685687124f,(float16_t)0.04600318213091462299f,
-(float16_t)0.99886954991428356099f,(float16_t)0.04753548415695930257f,
-(float16_t)0.99879545620517240501f,(float16_t)0.04906767432741801493f,
-(float16_t)0.99871901223387293811f,(float16_t)0.05059974903689928166f,
-(float16_t)0.99864021818026527111f,(float16_t)0.05213170468028332366f,
-(float16_t)0.99855907422975931365f,(float16_t)0.05366353765273051968f,
-(float16_t)0.99847558057329477421f,(float16_t)0.05519524434968993420f,
-(float16_t)0.99838973740734016094f,(float16_t)0.05672682116690774823f,
-(float16_t)0.99830154493389289261f,(float16_t)0.05825826450043575244f,
-(float16_t)0.99821100336047818846f,(float16_t)0.05978957074663986820f,
-(float16_t)0.99811811290014917919f,(float16_t)0.06132073630220857829f,
-(float16_t)0.99802287377148624081f,(float16_t)0.06285175756416140624f,
-(float16_t)0.99792528619859599548f,(float16_t)0.06438263092985746505f,
-(float16_t)0.99782535041111164453f,(float16_t)0.06591335279700380467f,
-(float16_t)0.99772306664419163624f,(float16_t)0.06744391956366405094f,
-(float16_t)0.99761843513851955478f,(float16_t)0.06897432762826674613f,
-(float16_t)0.99751145614030345410f,(float16_t)0.07050457338961385600f,
-(float16_t)0.99740212990127530279f,(float16_t)0.07203465324688933247f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.99717643673532618820f,(float16_t)0.07509430084792130533f,
-(float16_t)0.99706007033948296225f,(float16_t)0.07662386139203149205f,
-(float16_t)0.99694135776498216117f,(float16_t)0.07815324163279423197f,
-(float16_t)0.99682029929116566791f,(float16_t)0.07968243797143012563f,
-(float16_t)0.99669689520289606044f,(float16_t)0.08121144680959244133f,
-(float16_t)0.99657114579055483539f,(float16_t)0.08274026454937569164f,
-(float16_t)0.99644305135004263008f,(float16_t)0.08426888759332407108f,
-(float16_t)0.99631261218277800129f,(float16_t)0.08579731234443989385f,
-(float16_t)0.99617982859569698117f,(float16_t)0.08732553520619205922f,
-(float16_t)0.99604470090125196702f,(float16_t)0.08885355258252460031f,
-(float16_t)0.99590722941741172125f,(float16_t)0.09038136087786498296f,
-(float16_t)0.99576741446765981713f,(float16_t)0.09190895649713272386f,
-(float16_t)0.99562525638099430569f,(float16_t)0.09343633584574778661f,
-(float16_t)0.99548075549192693856f,(float16_t)0.09496349532963899165f,
-(float16_t)0.99533391214048227980f,(float16_t)0.09649043135525259274f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.99503319943811863180f,(float16_t)0.09954361866006931903f,
-(float16_t)0.99487933079480561638f,(float16_t)0.10106986275482782167f,
-(float16_t)0.99472312110432570265f,(float16_t)0.10259586902243628126f,
-(float16_t)0.99456457073425541537f,(float16_t)0.10412163387205458642f,
-(float16_t)0.99440368005767909576f,(float16_t)0.10564715371341061589f,
-(float16_t)0.99424044945318790223f,(float16_t)0.10717242495680884273f,
-(float16_t)0.99407487930487936634f,(float16_t)0.10869744401313871651f,
-(float16_t)0.99390697000235606051f,(float16_t)0.11022220729388305938f,
-(float16_t)0.99373672194072459884f,(float16_t)0.11174671121112658700f,
-(float16_t)0.99356413552059530403f,(float16_t)0.11327095217756434631f,
-(float16_t)0.99338921114808065305f,(float16_t)0.11479492660651008373f,
-(float16_t)0.99321194923479450001f,(float16_t)0.11631863091190475235f,
-(float16_t)0.99303235019785141002f,(float16_t)0.11784206150832497728f,
-(float16_t)0.99285041445986510489f,(float16_t)0.11936521481099135467f,
-(float16_t)0.99266614244894801899f,(float16_t)0.12088808723577708359f,
-(float16_t)0.99247953459870996706f,(float16_t)0.12241067519921619566f,
-(float16_t)0.99229059134825736699f,(float16_t)0.12393297511851215920f,
-(float16_t)0.99209931314219179654f,(float16_t)0.12545498341154623367f,
-(float16_t)0.99190570043060932726f,(float16_t)0.12697669649688586579f,
-(float16_t)0.99170975366909952520f,(float16_t)0.12849811079379316880f,
-(float16_t)0.99151147331874389668f,(float16_t)0.13001922272223334631f,
-(float16_t)0.99131085984611544415f,(float16_t)0.13154002870288311611f,
-(float16_t)0.99110791372327688986f,(float16_t)0.13306052515713906459f,
-(float16_t)0.99090263542778000971f,(float16_t)0.13458070850712616773f,
-(float16_t)0.99069502544266463406f,(float16_t)0.13610057517570620100f,
-(float16_t)0.99048508425645709341f,(float16_t)0.13762012158648603832f,
-(float16_t)0.99027281236316910817f,(float16_t)0.13913934416382620074f,
-(float16_t)0.99005821026229712256f,(float16_t)0.14065823933284921088f,
-(float16_t)0.98984127845882052821f,(float16_t)0.14217680351944803063f,
-(float16_t)0.98962201746320088702f,(float16_t)0.14369503315029447110f,
-(float16_t)0.98940042779138037687f,(float16_t)0.14521292465284746376f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98895026451030298986f,(float16_t)0.14824767898689603096f,
-(float16_t)0.98872169196032377858f,(float16_t)0.14976453467732150915f,
-(float16_t)0.98849079285269658701f,(float16_t)0.15128103795733022219f,
-(float16_t)0.98825756773074946437f,(float16_t)0.15279718525844343535f,
-(float16_t)0.98802201714328352633f,(float16_t)0.15431297301302010494f,
-(float16_t)0.98778414164457217783f,(float16_t)0.15582839765426523271f,
-(float16_t)0.98754394179435922574f,(float16_t)0.15734345561623824805f,
-(float16_t)0.98730141815785843473f,(float16_t)0.15885814333386144570f,
-(float16_t)0.98705657130575097380f,(float16_t)0.16037245724292828464f,
-(float16_t)0.98680940181418552726f,(float16_t)0.16188639378011182579f,
-(float16_t)0.98655991026477540817f,(float16_t)0.16339994938297322524f,
-(float16_t)0.98630809724459866938f,(float16_t)0.16491312048996989437f,
-(float16_t)0.98605396334619543897f,(float16_t)0.16642590354046410406f,
-(float16_t)0.98579750916756747614f,(float16_t)0.16793829497473117263f,
-(float16_t)0.98553873531217606185f,(float16_t)0.16945029123396795900f,
-(float16_t)0.98527764238894122162f,(float16_t)0.17096188876030121717f,
-(float16_t)0.98501423101223983814f,(float16_t)0.17247308399679595059f,
-(float16_t)0.98474850180190420801f,(float16_t)0.17398387338746382214f,
-(float16_t)0.98448045538322093151f,(float16_t)0.17549425337727142526f,
-(float16_t)0.98421009238692902521f,(float16_t)0.17700422041214874946f,
-(float16_t)0.98393741344921892278f,(float16_t)0.17851377093899750692f,
-(float16_t)0.98366241921173025453f,(float16_t)0.18002290140569951471f,
-(float16_t)0.98338511032155118130f,(float16_t)0.18153160826112496595f,
-(float16_t)0.98310548743121628501f,(float16_t)0.18303988795514095078f,
-(float16_t)0.98282355119870523641f,(float16_t)0.18454773693861961648f,
-(float16_t)0.98253930228744124076f,(float16_t)0.18605515166344663291f,
-(float16_t)0.98225274136628937249f,(float16_t)0.18756212858252960252f,
-(float16_t)0.98196386910955524296f,(float16_t)0.18906866414980619262f,
-(float16_t)0.98167268619698311305f,(float16_t)0.19057475482025273972f,
-(float16_t)0.98137919331375456089f,(float16_t)0.19208039704989243734f,
-(float16_t)0.98108339115048670553f,(float16_t)0.19358558729580360724f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.98048486177346938497f,(float16_t)0.19659459767008022335f,
-(float16_t)0.98018213596811742949f,(float16_t)0.19809841071795356027f,
-(float16_t)0.97987710369951763756f,(float16_t)0.19960175762113097075f,
-(float16_t)0.97956976568544051887f,(float16_t)0.20110463484209190055f,
-(float16_t)0.97926012264908202098f,(float16_t)0.20260703884442113343f,
-(float16_t)0.97894817531906219710f,(float16_t)0.20410896609281686809f,
-(float16_t)0.97863392442942320759f,(float16_t)0.20561041305309923910f,
-(float16_t)0.97831737071962765473f,(float16_t)0.20711137619221856032f,
-(float16_t)0.97799851493455713936f,(float16_t)0.20861185197826348503f,
-(float16_t)0.97767735782450992943f,(float16_t)0.21011183688046961016f,
-(float16_t)0.97735390014519996082f,(float16_t)0.21161132736922755315f,
-(float16_t)0.97702814265775439484f,(float16_t)0.21311031991609136194f,
-(float16_t)0.97670008612871184184f,(float16_t)0.21460881099378675829f,
-(float16_t)0.97636973133002114000f,(float16_t)0.21610679707621952006f,
-(float16_t)0.97603707903903902388f,(float16_t)0.21760427463848364127f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.97536488511665697665f,(float16_t)0.22059769010887350649f,
-(float16_t)0.97502534506699412020f,(float16_t)0.22209362097320350937f,
-(float16_t)0.97468351068851066810f,(float16_t)0.22358902922978998729f,
-(float16_t)0.97433938278557585821f,(float16_t)0.22508391135979283204f,
-(float16_t)0.97399296216795583359f,(float16_t)0.22657826384561000066f,
-(float16_t)0.97364424965081197705f,(float16_t)0.22807208317088573102f,
-(float16_t)0.97329324605469824672f,(float16_t)0.22956536582051886852f,
-(float16_t)0.97293995220556017678f,(float16_t)0.23105810828067110951f,
-(float16_t)0.97258436893473221296f,(float16_t)0.23255030703877524467f,
-(float16_t)0.97222649707893626925f,(float16_t)0.23404195858354343018f,
-(float16_t)0.97186633748027939639f,(float16_t)0.23553305940497548665f,
-(float16_t)0.97150389098625178352f,(float16_t)0.23702360599436719801f,
-(float16_t)0.97113915844972509284f,(float16_t)0.23851359484431841618f,
-(float16_t)0.97077214072895035013f,(float16_t)0.24000302244874149871f,
-(float16_t)0.97040283868755550234f,(float16_t)0.24149188530286933019f,
-(float16_t)0.97003125319454397424f,(float16_t)0.24298017990326387094f,
-(float16_t)0.96965738512429244800f,(float16_t)0.24446790274782415064f,
-(float16_t)0.96928123535654853171f,(float16_t)0.24595505033579459497f,
-(float16_t)0.96890280477642887202f,(float16_t)0.24744161916777326904f,
-(float16_t)0.96852209427441737777f,(float16_t)0.24892760574572014853f,
-(float16_t)0.96813910474636244441f,(float16_t)0.25041300657296522436f,
-(float16_t)0.96775383709347551076f,(float16_t)0.25189781815421696809f,
-(float16_t)0.96736629222232850545f,(float16_t)0.25338203699557015902f,
-(float16_t)0.96697647104485207059f,(float16_t)0.25486565960451457169f,
-(float16_t)0.96658437447833311928f,(float16_t)0.25634868248994291395f,
-(float16_t)0.96619000344541250413f,(float16_t)0.25783110216215898713f,
-(float16_t)0.96579335887408368500f,(float16_t)0.25931291513288623474f,
-(float16_t)0.96539444169768939830f,(float16_t)0.26079411791527551401f,
-(float16_t)0.96499325285492032478f,(float16_t)0.26227470702391358914f,
-(float16_t)0.96458979328981275803f,(float16_t)0.26375467897483134694f,
-(float16_t)0.96418406395174582890f,(float16_t)0.26523403028551179039f,
-(float16_t)0.96377606579543984022f,(float16_t)0.26671275747489836538f,
-(float16_t)0.96336579978095404631f,(float16_t)0.26819085706340317632f,
-(float16_t)0.96295326687368387741f,(float16_t)0.26966832557291509076f,
-(float16_t)0.96253846804435916340f,(float16_t)0.27114515952680801059f,
-(float16_t)0.96212140426904158019f,(float16_t)0.27262135544994897662f,
-(float16_t)0.96170207652912254037f,(float16_t)0.27409690986870638429f,
-(float16_t)0.96128048581132063966f,(float16_t)0.27557181931095814376f,
-(float16_t)0.96085663310767965850f,(float16_t)0.27704608030609989555f,
-(float16_t)0.96043051941556578655f,(float16_t)0.27851968938505305973f,
-(float16_t)0.96000214573766595727f,(float16_t)0.27999264308027321801f,
-(float16_t)0.95957151308198451733f,(float16_t)0.28146493792575794091f,
-(float16_t)0.95913862246184189431f,(float16_t)0.28293657045705539188f,
-(float16_t)0.95870347489587159906f,(float16_t)0.28440753721127187692f,
-(float16_t)0.95826607140801767226f,(float16_t)0.28587783472708061527f,
-(float16_t)0.95782641302753290802f,(float16_t)0.28734745954472951102f,
-(float16_t)0.95738450078897585627f,(float16_t)0.28881640820604947972f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.95649391890239510161f,(float16_t)0.29175226323498926195f,
-(float16_t)0.95604525134999640557f,(float16_t)0.29321916269425862822f,
-(float16_t)0.95559433413077110586f,(float16_t)0.29468537218051432669f,
-(float16_t)0.95514116830577078243f,(float16_t)0.29615088824362378883f,
-(float16_t)0.95468575494133833814f,(float16_t)0.29761570743508619641f,
-(float16_t)0.95422809510910566733f,(float16_t)0.29907982630804047508f,
-(float16_t)0.95376818988599032512f,(float16_t)0.30054324141727345454f,
-(float16_t)0.95330604035419386211f,(float16_t)0.30200594931922808417f,
-(float16_t)0.95284164760119871573f,(float16_t)0.30346794657201131562f,
-(float16_t)0.95237501271976587880f,(float16_t)0.30492922973540237397f,
-(float16_t)0.95190613680793234597f,(float16_t)0.30638979537086091787f,
-(float16_t)0.95143502096900833820f,(float16_t)0.30784964004153486661f,
-(float16_t)0.95096166631157508231f,(float16_t)0.30930876031226872680f,
-(float16_t)0.95048607394948170235f,(float16_t)0.31076715274961147495f,
-(float16_t)0.95000824500184299914f,(float16_t)0.31222481392182488413f,
-(float16_t)0.94952818059303667475f,(float16_t)0.31368174039889151761f,
-(float16_t)0.94904588185270055689f,(float16_t)0.31513792875252244485f,
-(float16_t)0.94856134991573026749f,(float16_t)0.31659337555616584581f,
-(float16_t)0.94807458592227622507f,(float16_t)0.31804807738501494896f,
-(float16_t)0.94758559101774109124f,(float16_t)0.31950203081601569188f,
-(float16_t)0.94709436635277721717f,(float16_t)0.32095523242787521445f,
-(float16_t)0.94660091308328353499f,(float16_t)0.32240767880106985244f,
-(float16_t)0.94610523237040344835f,(float16_t)0.32385936651785285356f,
-(float16_t)0.94560732538052127971f,(float16_t)0.32531029216226292622f,
-(float16_t)0.94510719328526060501f,(float16_t)0.32676045232013173347f,
-(float16_t)0.94460483726148025685f,(float16_t)0.32820984357909249729f,
-(float16_t)0.94410025849127265918f,(float16_t)0.32965846252858749255f,
-(float16_t)0.94359345816196038559f,(float16_t)0.33110630575987642921f,
-(float16_t)0.94308443746609349478f,(float16_t)0.33255336986604422389f,
-(float16_t)0.94257319760144686605f,(float16_t)0.33399965144200938205f,
-(float16_t)0.94205973977101731265f,(float16_t)0.33544514708453160301f,
-(float16_t)0.94154406518302080631f,(float16_t)0.33688985339222005111f,
-(float16_t)0.94102617505088925753f,(float16_t)0.33833376696554112728f,
-(float16_t)0.94050607059326829518f,(float16_t)0.33977688440682685123f,
-(float16_t)0.93998375303401404679f,(float16_t)0.34121920232028235542f,
-(float16_t)0.93945922360218991898f,(float16_t)0.34266071731199437833f,
-(float16_t)0.93893248353206459900f,(float16_t)0.34410142598993881391f,
-(float16_t)0.93840353406310805795f,(float16_t)0.34554132496398909380f,
-(float16_t)0.93787237643998988545f,(float16_t)0.34698041084592368133f,
-(float16_t)0.93733901191257495977f,(float16_t)0.34841868024943456472f,
-(float16_t)0.93680344173592156043f,(float16_t)0.34985612979013491763f,
-(float16_t)0.93626566717027825959f,(float16_t)0.35129275608556709276f,
-(float16_t)0.93572568948108036935f,(float16_t)0.35272855575521072646f,
-(float16_t)0.93518350993894761025f,(float16_t)0.35416352542049034380f,
-(float16_t)0.93463912981968078064f,(float16_t)0.35559766170478385172f,
-(float16_t)0.93409255040425887007f,(float16_t)0.35703096123342997759f,
-(float16_t)0.93354377297883617270f,(float16_t)0.35846342063373654030f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.93243962926846235550f,(float16_t)0.36132580556845428355f,
-(float16_t)0.93188426558166814750f,(float16_t)0.36275572436739722537f,
-(float16_t)0.93132670908118042608f,(float16_t)0.36418478956707989180f,
-(float16_t)0.93076696107898371224f,(float16_t)0.36561299780477385379f,
-(float16_t)0.93020502289221906889f,(float16_t)0.36704034571976718038f,
-(float16_t)0.92964089584318121418f,(float16_t)0.36846682995337232125f,
-(float16_t)0.92907458125931585702f,(float16_t)0.36989244714893410038f,
-(float16_t)0.92850608047321558924f,(float16_t)0.37131719395183754306f,
-(float16_t)0.92793539482261788720f,(float16_t)0.37274106700951575855f,
-(float16_t)0.92736252565040111495f,(float16_t)0.37416406297145793358f,
-(float16_t)0.92678747430458174872f,(float16_t)0.37558617848921721505f,
-(float16_t)0.92621024213831137928f,(float16_t)0.37700741021641825945f,
-(float16_t)0.92563083050987271516f,(float16_t)0.37842775480876555960f,
-(float16_t)0.92504924078267758425f,(float16_t)0.37984720892405116066f,
-(float16_t)0.92446547432526260391f,(float16_t)0.38126576922216237620f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.92329141671952763559f,(float16_t)0.38410019501693504207f,
-(float16_t)0.92270112833387862850f,(float16_t)0.38551605384391884890f,
-(float16_t)0.92210866874334518339f,(float16_t)0.38693100551438858181f,
-(float16_t)0.92151403934204190183f,(float16_t)0.38834504669882624617f,
-(float16_t)0.92091724152918941204f,(float16_t)0.38975817406985641123f,
-(float16_t)0.92031827670911059425f,(float16_t)0.39117038430225387069f,
-(float16_t)0.91971714629122736095f,(float16_t)0.39258167407295146978f,
-(float16_t)0.91911385169005777040f,(float16_t)0.39399204006104809883f,
-(float16_t)0.91850839432521225181f,(float16_t)0.39540147894781635385f,
-(float16_t)0.91790077562139049672f,(float16_t)0.39680998741671030805f,
-(float16_t)0.91729099700837790632f,(float16_t)0.39821756215337356100f,
-(float16_t)0.91667905992104270485f,(float16_t)0.39962419984564678810f,
-(float16_t)0.91606496579933172075f,(float16_t)0.40102989718357562321f,
-(float16_t)0.91544871608826783316f,(float16_t)0.40243465085941843018f,
-(float16_t)0.91483031223794619713f,(float16_t)0.40383845756765407442f,
-(float16_t)0.91420975570353069095f,(float16_t)0.40524131400498986100f,
-(float16_t)0.91358704794525080750f,(float16_t)0.40664321687036902864f,
-(float16_t)0.91296219042839821256f,(float16_t)0.40804416286497868782f,
-(float16_t)0.91233518462332274801f,(float16_t)0.40944414869225759235f,
-(float16_t)0.91170603200542987832f,(float16_t)0.41084317105790391089f,
-(float16_t)0.91107473405517636067f,(float16_t)0.41224122666988288755f,
-(float16_t)0.91044129225806724737f,(float16_t)0.41363831223843450235f,
-(float16_t)0.90980570810465222209f,(float16_t)0.41503442447608163146f,
-(float16_t)0.90916798309052238025f,(float16_t)0.41642956009763715253f,
-(float16_t)0.90852811871630612117f,(float16_t)0.41782371582021227141f,
-(float16_t)0.90788611648766626150f,(float16_t)0.41921688836322390515f,
-(float16_t)0.90724197791529581636f,(float16_t)0.42060907444840250902f,
-(float16_t)0.90659570451491533483f,(float16_t)0.42200027079979968159f,
-(float16_t)0.90594729780726845902f,(float16_t)0.42339047414379604728f,
-(float16_t)0.90529675931811881551f,(float16_t)0.42477968120910880589f,
-(float16_t)0.90464409057824624050f,(float16_t)0.42616788872679961520f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.90333236849451181705f,(float16_t)0.42894129205532949278f,
-(float16_t)0.90267331823725882600f,(float16_t)0.43032648134008261165f,
-(float16_t)0.90201214390249317976f,(float16_t)0.43171065802505725895f,
-(float16_t)0.90134884704602202810f,(float16_t)0.43309381885315195726f,
-(float16_t)0.90068342922864685907f,(float16_t)0.43447596056965565037f,
-(float16_t)0.90001589201616016833f,(float16_t)0.43585707992225547480f,
-(float16_t)0.89934623697934157338f,(float16_t)0.43723717366104408732f,
-(float16_t)0.89867446569395381673f,(float16_t)0.43861623853852765853f,
-(float16_t)0.89800057974073987932f,(float16_t)0.43999427130963325583f,
-(float16_t)0.89732458070541831763f,(float16_t)0.44137126873171667052f,
-(float16_t)0.89664647017868015499f,(float16_t)0.44274722756457002282f,
-(float16_t)0.89596624975618521791f,(float16_t)0.44412214457042920035f,
-(float16_t)0.89528392103855758410f,(float16_t)0.44549601651398174074f,
-(float16_t)0.89459948563138269595f,(float16_t)0.44686884016237415906f,
-(float16_t)0.89391294514520325265f,(float16_t)0.44824061228521988598f,
-(float16_t)0.89322430119551532446f,(float16_t)0.44961132965460653965f,
-(float16_t)0.89253355540276457791f,(float16_t)0.45098098904510386387f,
-(float16_t)0.89184070939234272313f,(float16_t)0.45234958723377088896f,
-(float16_t)0.89114576479458318392f,(float16_t)0.45371712100016386993f,
-(float16_t)0.89044872324475787817f,(float16_t)0.45508358712634383592f,
-(float16_t)0.88974958638307277692f,(float16_t)0.45644898239688391772f,
-(float16_t)0.88904835585466457371f,(float16_t)0.45781330359887717485f,
-(float16_t)0.88834503330959635470f,(float16_t)0.45917654752194408951f,
-(float16_t)0.88763962040285393496f,(float16_t)0.46053871095824000514f,
-(float16_t)0.88693211879434219469f,(float16_t)0.46189979070246273141f,
-(float16_t)0.88622253014888063838f,(float16_t)0.46325978355186014923f,
-(float16_t)0.88551085613619995307f,(float16_t)0.46461868630623781584f,
-(float16_t)0.88479709843093778954f,(float16_t)0.46597649576796618121f,
-(float16_t)0.88408125871263498752f,(float16_t)0.46733320874198841510f,
-(float16_t)0.88336333866573157891f,(float16_t)0.46868882203582790114f,
-(float16_t)0.88264333997956279099f,(float16_t)0.47004333245959561971f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.88119711347122209322f,(float16_t)0.47274903195034279069f,
-(float16_t)0.88047088905216075450f,(float16_t)0.47410021465054996703f,
-(float16_t)0.87974259280004740713f,(float16_t)0.47545028174715586733f,
-(float16_t)0.87901222642863352519f,(float16_t)0.47679923006332208812f,
-(float16_t)0.87827979165654157523f,(float16_t)0.47814705642484300885f,
-(float16_t)0.87754529020726135258f,(float16_t)0.47949375766015295275f,
-(float16_t)0.87680872380914565145f,(float16_t)0.48083933060033395845f,
-(float16_t)0.87607009419540660122f,(float16_t)0.48218377207912271887f,
-(float16_t)0.87532940310411089246f,(float16_t)0.48352707893291868579f,
-(float16_t)0.87458665227817611321f,(float16_t)0.48486924800079106435f,
-(float16_t)0.87384184346536686316f,(float16_t)0.48621027612448641797f,
-(float16_t)0.87309497841829009079f,(float16_t)0.48755016014843599592f,
-(float16_t)0.87234605889439154058f,(float16_t)0.48888889691976317176f,
-(float16_t)0.87159508665595097909f,(float16_t)0.49022648328829115938f,
-(float16_t)0.87084206347007897531f,(float16_t)0.49156291610654989643f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.86932987134860684186f,(float16_t)0.49423230851595967295f,
-(float16_t)0.86857070597134089507f,(float16_t)0.49556526182577254058f,
-(float16_t)0.86780949676330332299f,(float16_t)0.49689704902265446895f,
-(float16_t)0.86704624551569264845f,(float16_t)0.49822766697278181303f,
-(float16_t)0.86628095402451299467f,(float16_t)0.49955711254508183838f,
-(float16_t)0.86551362409056908920f,(float16_t)0.50088538261124071482f,
-(float16_t)0.86474425751946237817f,(float16_t)0.50221247404571078832f,
-(float16_t)0.86397285612158669643f,(float16_t)0.50353838372571757542f,
-(float16_t)0.86319942171212415971f,(float16_t)0.50486310853126759035f,
-(float16_t)0.86242395611104050168f,(float16_t)0.50618664534515522835f,
-(float16_t)0.86164646114308129921f,(float16_t)0.50750899105297087033f,
-(float16_t)0.86086693863776730939f,(float16_t)0.50883014254310698909f,
-(float16_t)0.86008539042939013974f,(float16_t)0.51015009670676680908f,
-(float16_t)0.85930181835700847337f,(float16_t)0.51146885043797030157f,
-(float16_t)0.85851622426444273994f,(float16_t)0.51278640063356295542f,
-(float16_t)0.85772861000027211809f,(float16_t)0.51410274419322166128f,
-(float16_t)0.85693897741782876221f,(float16_t)0.51541787801946292724f,
-(float16_t)0.85614732837519447184f,(float16_t)0.51673179901764987321f,
-(float16_t)0.85535366473519602870f,(float16_t)0.51804450409599933636f,
-(float16_t)0.85455798836540053376f,(float16_t)0.51935599016558964269f,
-(float16_t)0.85376030113811141042f,(float16_t)0.52066625414036715735f,
-(float16_t)0.85296060493036363059f,(float16_t)0.52197529293715438925f,
-(float16_t)0.85215890162391982887f,(float16_t)0.52328310347565643035f,
-(float16_t)0.85135519310526519554f,(float16_t)0.52458968267846894928f,
-(float16_t)0.85054948126560347976f,(float16_t)0.52589502747108463065f,
-(float16_t)0.84974176800085254868f,(float16_t)0.52719913478190127964f,
-(float16_t)0.84893205521163961347f,(float16_t)0.52850200154222848337f,
-(float16_t)0.84812034480329723252f,(float16_t)0.52980362468629460526f,
-(float16_t)0.84730663868585831544f,(float16_t)0.53110400115125500076f,
-(float16_t)0.84649093877405212627f,(float16_t)0.53240312787719790144f,
-(float16_t)0.84567324698729906540f,(float16_t)0.53370100180715296379f,
-(float16_t)0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)0.84403189549006640835f,(float16_t)0.53629297906596318235f,
-(float16_t)0.84320823964184543620f,(float16_t)0.53758707629564539410f,
-(float16_t)0.84238259964318584760f,(float16_t)0.53887990853100842248f,
-(float16_t)0.84155497743689844370f,(float16_t)0.54017147272989285423f,
-(float16_t)0.84072537497045807253f,(float16_t)0.54146176585312344454f,
-(float16_t)0.83989379419599952126f,(float16_t)0.54275078486451588944f,
-(float16_t)0.83906023707031274217f,(float16_t)0.54403852673088382019f,
-(float16_t)0.83822470555483807875f,(float16_t)0.54532498842204646383f,
-(float16_t)0.83738720161566193578f,(float16_t)0.54661016691083486041f,
-(float16_t)0.83654772722351200542f,(float16_t)0.54789405917310018967f,
-(float16_t)0.83570628435375260423f,(float16_t)0.54917666218771965525f,
-(float16_t)0.83486287498638001026f,(float16_t)0.55045797293660481131f,
-(float16_t)0.83401750110601813315f,(float16_t)0.55173798840470733573f,
-(float16_t)0.83317016470191318511f,(float16_t)0.55301670558002746780f,
-(float16_t)0.83232086776792968408f,(float16_t)0.55429412145362000341f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.83061640030884631436f,(float16_t)0.55684503727516010407f,
-(float16_t)0.82976123379452304540f,(float16_t)0.55811853122055610221f,
-(float16_t)0.82890411477186487499f,(float16_t)0.55939071185913613604f,
-(float16_t)0.82804504525775579626f,(float16_t)0.56066157619733603124f,
-(float16_t)0.82718402727366913130f,(float16_t)0.56193112124468935775f,
-(float16_t)0.82632106284566353427f,(float16_t)0.56319934401383409117f,
-(float16_t)0.82545615400437755138f,(float16_t)0.56446624152051938506f,
-(float16_t)0.82458930278502529099f,(float16_t)0.56573181078361312046f,
-(float16_t)0.82372051122739142759f,(float16_t)0.56699604882510867832f,
-(float16_t)0.82284978137582642788f,(float16_t)0.56825895267013148970f,
-(float16_t)0.82197711527924155472f,(float16_t)0.56952051934694714053f,
-(float16_t)0.82110251499110464835f,(float16_t)0.57078074588696725566f,
-(float16_t)0.82022598256943468620f,(float16_t)0.57203962932475704850f,
-(float16_t)0.81934752007679700903f,(float16_t)0.57329716669804220430f,
-(float16_t)0.81846712958029865792f,(float16_t)0.57455335504771576360f,
-(float16_t)0.81758481315158371139f,(float16_t)0.57580819141784533866f,
-(float16_t)0.81670057286682784525f,(float16_t)0.57706167285567944170f,
-(float16_t)0.81581441080673378075f,(float16_t)0.57831379641165558958f,
-(float16_t)0.81492632905652662156f,(float16_t)0.57956455913940563285f,
-(float16_t)0.81403632970594841378f,(float16_t)0.58081395809576452649f,
-(float16_t)0.81314441484925359394f,(float16_t)0.58206199034077543697f,
-(float16_t)0.81225058658520399302f,(float16_t)0.58330865293769829094f,
-(float16_t)0.81135484701706372945f,(float16_t)0.58455394295301532637f,
-(float16_t)0.81045719825259476821f,(float16_t)0.58579785745643886408f,
-(float16_t)0.80955764240405125864f,(float16_t)0.58704039352091796911f,
-(float16_t)0.80865618158817498262f,(float16_t)0.58828154822264522306f,
-(float16_t)0.80775281792619035848f,(float16_t)0.58952131864106394055f,
-(float16_t)0.80684755354379933401f,(float16_t)0.59075970185887416442f,
-(float16_t)0.80594039057117627944f,(float16_t)0.59199669496204099239f,
-(float16_t)0.80503133114296365758f,(float16_t)0.59323229503979979516f,
-(float16_t)0.80412037739826569549f,(float16_t)0.59446649918466443197f,
-(float16_t)0.80320753148064494287f,(float16_t)0.59569930449243335691f,
-(float16_t)0.80229279553811572168f,(float16_t)0.59693070806219639124f,
-(float16_t)0.80137617172314024039f,(float16_t)0.59816070699634238395f,
-(float16_t)0.80045766219262282082f,(float16_t)0.59938929840056454079f,
-(float16_t)0.79953726910790501314f,(float16_t)0.60061647938386897305f,
-(float16_t)0.79861499463476093297f,(float16_t)0.60184224705858002658f,
-(float16_t)0.79769084094339115509f,(float16_t)0.60306659854034816437f,
-(float16_t)0.79676481020841882774f,(float16_t)0.60428953094815596181f,
-(float16_t)0.79583690460888356633f,(float16_t)0.60551104140432554512f,
-(float16_t)0.79490712632823701256f,(float16_t)0.60673112703452447558f,
-(float16_t)0.79397547755433717231f,(float16_t)0.60794978496777363208f,
-(float16_t)0.79304196047944364167f,(float16_t)0.60916701233645320634f,
-(float16_t)0.79210657730021238887f,(float16_t)0.61038280627630947528f,
-(float16_t)0.79116933021769020318f,(float16_t)0.61159716392646190641f,
-(float16_t)0.79023022143731003197f,(float16_t)0.61281008242940970820f,
-(float16_t)0.78928925316888565167f,(float16_t)0.61402155893103849138f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.78740174702903142911f,(float16_t)0.61644017453085364622f,
-(float16_t)0.78645521359908576731f,(float16_t)0.61764730793780386886f,
-(float16_t)0.78550682956405393220f,(float16_t)0.61885298796097631957f,
-(float16_t)0.78455659715557524159f,(float16_t)0.62005721176328909561f,
-(float16_t)0.78360451860963820092f,(float16_t)0.62125997651108755271f,
-(float16_t)0.78265059616657572938f,(float16_t)0.62246127937414996723f,
-(float16_t)0.78169483207105938671f,(float16_t)0.62366111752569453053f,
-(float16_t)0.78073722857209448822f,(float16_t)0.62485948814238634341f,
-(float16_t)0.77977778792301455368f,(float16_t)0.62605638840434352232f,
-(float16_t)0.77881651238147597827f,(float16_t)0.62725181549514408275f,
-(float16_t)0.77785340420945314754f,(float16_t)0.62844576660183271155f,
-(float16_t)0.77688846567323244230f,(float16_t)0.62963823891492698426f,
-(float16_t)0.77592169904340768660f,(float16_t)0.63082922962842447046f,
-(float16_t)0.77495310659487393057f,(float16_t)0.63201873593980906207f,
-(float16_t)0.77398269060682289844f,(float16_t)0.63320675505005719064f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.77203639715038452351f,(float16_t)0.63557832048855611440f,
-(float16_t)0.77106052426181381776f,(float16_t)0.63676186123628419899f,
-(float16_t)0.77008283699334789674f,(float16_t)0.63794390362184405507f,
-(float16_t)0.76910333764557969882f,(float16_t)0.63912444486377573138f,
-(float16_t)0.76812202852336541881f,(float16_t)0.64030348218415167327f,
-(float16_t)0.76713891193582040007f,(float16_t)0.64148101280858305095f,
-(float16_t)0.76615399019631291733f,(float16_t)0.64265703396622686494f,
-(float16_t)0.76516726562245895860f,(float16_t)0.64383154288979138613f,
-(float16_t)0.76417874053611667406f,(float16_t)0.64500453681554392737f,
-(float16_t)0.76318841726338138010f,(float16_t)0.64617601298331628357f,
-(float16_t)0.76219629813457900891f,(float16_t)0.64734596863651205911f,
-(float16_t)0.76120238548426177871f,(float16_t)0.64851440102211244110f,
-(float16_t)0.76020668165120242055f,(float16_t)0.64968130739068319368f,
-(float16_t)0.75920918897838796102f,(float16_t)0.65084668499638087535f,
-(float16_t)0.75820990981301528144f,(float16_t)0.65201053109695950027f,
-(float16_t)0.75720884650648456748f,(float16_t)0.65317284295377675551f,
-(float16_t)0.75620600141439453523f,(float16_t)0.65433361783180044036f,
-(float16_t)0.75520137689653654700f,(float16_t)0.65549285299961534967f,
-(float16_t)0.75419497531688917125f,(float16_t)0.65665054572942893607f,
-(float16_t)0.75318679904361252042f,(float16_t)0.65780669329707863735f,
-(float16_t)0.75217685044904269986f,(float16_t)0.65896129298203731661f,
-(float16_t)0.75116513190968636771f,(float16_t)0.66011434206742047870f,
-(float16_t)0.75015164580621507273f,(float16_t)0.66126583783999226540f,
-(float16_t)0.74913639452345937020f,(float16_t)0.66241577759017178373f,
-(float16_t)0.74811938045040360379f,(float16_t)0.66356415861203976725f,
-(float16_t)0.74710060598018013245f,(float16_t)0.66471097820334479334f,
-(float16_t)0.74608007351006377927f,(float16_t)0.66585623366550972246f,
-(float16_t)0.74505778544146594733f,(float16_t)0.66699992230363747137f,
-(float16_t)0.74403374417992929057f,(float16_t)0.66814204142651845153f,
-(float16_t)0.74300795213512171866f,(float16_t)0.66928258834663600929f,
-(float16_t)0.74198041172083106787f,(float16_t)0.67042156038017308717f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.73992009545951620275f,(float16_t)0.67269476907077285777f,
-(float16_t)0.73888732446061511361f,(float16_t)0.67382900037875603783f,
-(float16_t)0.73785281478846598269f,(float16_t)0.67496164610201192513f,
-(float16_t)0.73681656887736979300f,(float16_t)0.67609270357531592310f,
-(float16_t)0.73577858916571359238f,(float16_t)0.67722217013718033485f,
-(float16_t)0.73473887809596349907f,(float16_t)0.67835004312986146857f,
-(float16_t)0.73369743811466026084f,(float16_t)0.67947631989936496666f,
-(float16_t)0.73265427167241281570f,(float16_t)0.68060099779545302212f,
-(float16_t)0.73160938122389262972f,(float16_t)0.68172407417164970767f,
-(float16_t)0.73056276922782759087f,(float16_t)0.68284554638524808112f,
-(float16_t)0.72951443814699701296f,(float16_t)0.68396541179731540350f,
-(float16_t)0.72846439044822519637f,(float16_t)0.68508366777270035541f,
-(float16_t)0.72741262860237576593f,(float16_t)0.68620031168003858824f,
-(float16_t)0.72635915508434600873f,(float16_t)0.68731534089175905233f,
-(float16_t)0.72530397237306076796f,(float16_t)0.68842875278409043638f,
-(float16_t)0.72424708295146700276f,(float16_t)0.68954054473706682948f,
-(float16_t)0.72318848930652745999f,(float16_t)0.69065071413453460458f,
-(float16_t)0.72212819392921534511f,(float16_t)0.69175925836415774750f,
-(float16_t)0.72106619931450810501f,(float16_t)0.69286617481742462932f,
-(float16_t)0.72000250796138165477f,(float16_t)0.69397146088965389055f,
-(float16_t)0.71893712237280449351f,(float16_t)0.69507511398000088043f,
-(float16_t)0.71787004505573170920f,(float16_t)0.69617713149146298601f,
-(float16_t)0.71680127852109953857f,(float16_t)0.69727751083088651551f,
-(float16_t)0.71573082528381870571f,(float16_t)0.69837624940897280457f,
-(float16_t)0.71465868786276909308f,(float16_t)0.69947334464028376733f,
-(float16_t)0.71358486878079352422f,(float16_t)0.70056879394324833576f,
-(float16_t)0.71250937056469243469f,(float16_t)0.70166259474016845488f,
-(float16_t)0.71143219574521643356f,(float16_t)0.70275474445722529993f,
-(float16_t)0.71035334685706241764f,(float16_t)0.70384524052448493858f,
-(float16_t)0.70927282643886568891f,(float16_t)0.70493408037590488124f,
-(float16_t)0.70819063703319540259f,(float16_t)0.70602126144933974317f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.70602126144933974317f,(float16_t)0.70819063703319540259f,
-(float16_t)0.70493408037590499227f,(float16_t)0.70927282643886568891f,
-(float16_t)0.70384524052448493858f,(float16_t)0.71035334685706241764f,
-(float16_t)0.70275474445722529993f,(float16_t)0.71143219574521643356f,
-(float16_t)0.70166259474016845488f,(float16_t)0.71250937056469232367f,
-(float16_t)0.70056879394324844679f,(float16_t)0.71358486878079352422f,
-(float16_t)0.69947334464028376733f,(float16_t)0.71465868786276909308f,
-(float16_t)0.69837624940897291559f,(float16_t)0.71573082528381859468f,
-(float16_t)0.69727751083088662654f,(float16_t)0.71680127852109942754f,
-(float16_t)0.69617713149146298601f,(float16_t)0.71787004505573170920f,
-(float16_t)0.69507511398000088043f,(float16_t)0.71893712237280438249f,
-(float16_t)0.69397146088965400157f,(float16_t)0.72000250796138165477f,
-(float16_t)0.69286617481742474034f,(float16_t)0.72106619931450810501f,
-(float16_t)0.69175925836415774750f,(float16_t)0.72212819392921534511f,
-(float16_t)0.69065071413453460458f,(float16_t)0.72318848930652734897f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.68842875278409043638f,(float16_t)0.72530397237306076796f,
-(float16_t)0.68731534089175905233f,(float16_t)0.72635915508434600873f,
-(float16_t)0.68620031168003858824f,(float16_t)0.72741262860237576593f,
-(float16_t)0.68508366777270035541f,(float16_t)0.72846439044822519637f,
-(float16_t)0.68396541179731551452f,(float16_t)0.72951443814699690193f,
-(float16_t)0.68284554638524808112f,(float16_t)0.73056276922782759087f,
-(float16_t)0.68172407417164981869f,(float16_t)0.73160938122389262972f,
-(float16_t)0.68060099779545302212f,(float16_t)0.73265427167241281570f,
-(float16_t)0.67947631989936496666f,(float16_t)0.73369743811466026084f,
-(float16_t)0.67835004312986146857f,(float16_t)0.73473887809596349907f,
-(float16_t)0.67722217013718044587f,(float16_t)0.73577858916571348136f,
-(float16_t)0.67609270357531603413f,(float16_t)0.73681656887736979300f,
-(float16_t)0.67496164610201203615f,(float16_t)0.73785281478846598269f,
-(float16_t)0.67382900037875614885f,(float16_t)0.73888732446061511361f,
-(float16_t)0.67269476907077296879f,(float16_t)0.73992009545951609173f,
-(float16_t)0.67155895484701833009f,(float16_t)0.74095112535495910588f,
-(float16_t)0.67042156038017308717f,(float16_t)0.74198041172083095685f,
-(float16_t)0.66928258834663600929f,(float16_t)0.74300795213512171866f,
-(float16_t)0.66814204142651856255f,(float16_t)0.74403374417992929057f,
-(float16_t)0.66699992230363747137f,(float16_t)0.74505778544146594733f,
-(float16_t)0.66585623366550972246f,(float16_t)0.74608007351006366825f,
-(float16_t)0.66471097820334490436f,(float16_t)0.74710060598018013245f,
-(float16_t)0.66356415861203987827f,(float16_t)0.74811938045040349277f,
-(float16_t)0.66241577759017178373f,(float16_t)0.74913639452345925918f,
-(float16_t)0.66126583783999226540f,(float16_t)0.75015164580621496171f,
-(float16_t)0.66011434206742047870f,(float16_t)0.75116513190968636771f,
-(float16_t)0.65896129298203731661f,(float16_t)0.75217685044904269986f,
-(float16_t)0.65780669329707874837f,(float16_t)0.75318679904361252042f,
-(float16_t)0.65665054572942904709f,(float16_t)0.75419497531688917125f,
-(float16_t)0.65549285299961546070f,(float16_t)0.75520137689653654700f,
-(float16_t)0.65433361783180055138f,(float16_t)0.75620600141439453523f,
-(float16_t)0.65317284295377686654f,(float16_t)0.75720884650648456748f,
-(float16_t)0.65201053109695950027f,(float16_t)0.75820990981301528144f,
-(float16_t)0.65084668499638098638f,(float16_t)0.75920918897838796102f,
-(float16_t)0.64968130739068319368f,(float16_t)0.76020668165120242055f,
-(float16_t)0.64851440102211255212f,(float16_t)0.76120238548426177871f,
-(float16_t)0.64734596863651205911f,(float16_t)0.76219629813457889789f,
-(float16_t)0.64617601298331639459f,(float16_t)0.76318841726338126907f,
-(float16_t)0.64500453681554403840f,(float16_t)0.76417874053611667406f,
-(float16_t)0.64383154288979149715f,(float16_t)0.76516726562245895860f,
-(float16_t)0.64265703396622686494f,(float16_t)0.76615399019631280630f,
-(float16_t)0.64148101280858316198f,(float16_t)0.76713891193582040007f,
-(float16_t)0.64030348218415167327f,(float16_t)0.76812202852336530778f,
-(float16_t)0.63912444486377573138f,(float16_t)0.76910333764557958780f,
-(float16_t)0.63794390362184416610f,(float16_t)0.77008283699334789674f,
-(float16_t)0.63676186123628419899f,(float16_t)0.77106052426181381776f,
-(float16_t)0.63557832048855622542f,(float16_t)0.77203639715038441249f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.63320675505005719064f,(float16_t)0.77398269060682278742f,
-(float16_t)0.63201873593980906207f,(float16_t)0.77495310659487381955f,
-(float16_t)0.63082922962842458148f,(float16_t)0.77592169904340757558f,
-(float16_t)0.62963823891492709528f,(float16_t)0.77688846567323244230f,
-(float16_t)0.62844576660183271155f,(float16_t)0.77785340420945303652f,
-(float16_t)0.62725181549514419377f,(float16_t)0.77881651238147586724f,
-(float16_t)0.62605638840434352232f,(float16_t)0.77977778792301444266f,
-(float16_t)0.62485948814238645443f,(float16_t)0.78073722857209448822f,
-(float16_t)0.62366111752569464155f,(float16_t)0.78169483207105938671f,
-(float16_t)0.62246127937415007825f,(float16_t)0.78265059616657572938f,
-(float16_t)0.62125997651108766373f,(float16_t)0.78360451860963820092f,
-(float16_t)0.62005721176328920663f,(float16_t)0.78455659715557524159f,
-(float16_t)0.61885298796097631957f,(float16_t)0.78550682956405393220f,
-(float16_t)0.61764730793780397988f,(float16_t)0.78645521359908576731f,
-(float16_t)0.61644017453085364622f,(float16_t)0.78740174702903131809f,
-(float16_t)0.61523159058062681925f,(float16_t)0.78834642762660622761f,
-(float16_t)0.61402155893103849138f,(float16_t)0.78928925316888565167f,
-(float16_t)0.61281008242940970820f,(float16_t)0.79023022143731003197f,
-(float16_t)0.61159716392646201744f,(float16_t)0.79116933021769009216f,
-(float16_t)0.61038280627630947528f,(float16_t)0.79210657730021227785f,
-(float16_t)0.60916701233645320634f,(float16_t)0.79304196047944364167f,
-(float16_t)0.60794978496777374311f,(float16_t)0.79397547755433717231f,
-(float16_t)0.60673112703452447558f,(float16_t)0.79490712632823701256f,
-(float16_t)0.60551104140432554512f,(float16_t)0.79583690460888345530f,
-(float16_t)0.60428953094815607283f,(float16_t)0.79676481020841871672f,
-(float16_t)0.60306659854034827539f,(float16_t)0.79769084094339104407f,
-(float16_t)0.60184224705858002658f,(float16_t)0.79861499463476082195f,
-(float16_t)0.60061647938386897305f,(float16_t)0.79953726910790501314f,
-(float16_t)0.59938929840056454079f,(float16_t)0.80045766219262270980f,
-(float16_t)0.59816070699634238395f,(float16_t)0.80137617172314012937f,
-(float16_t)0.59693070806219650226f,(float16_t)0.80229279553811572168f,
-(float16_t)0.59569930449243346793f,(float16_t)0.80320753148064483184f,
-(float16_t)0.59446649918466454299f,(float16_t)0.80412037739826569549f,
-(float16_t)0.59323229503979979516f,(float16_t)0.80503133114296365758f,
-(float16_t)0.59199669496204099239f,(float16_t)0.80594039057117627944f,
-(float16_t)0.59075970185887427544f,(float16_t)0.80684755354379922299f,
-(float16_t)0.58952131864106394055f,(float16_t)0.80775281792619024746f,
-(float16_t)0.58828154822264533408f,(float16_t)0.80865618158817498262f,
-(float16_t)0.58704039352091808013f,(float16_t)0.80955764240405125864f,
-(float16_t)0.58579785745643886408f,(float16_t)0.81045719825259476821f,
-(float16_t)0.58455394295301532637f,(float16_t)0.81135484701706372945f,
-(float16_t)0.58330865293769829094f,(float16_t)0.81225058658520388200f,
-(float16_t)0.58206199034077554799f,(float16_t)0.81314441484925359394f,
-(float16_t)0.58081395809576452649f,(float16_t)0.81403632970594830276f,
-(float16_t)0.57956455913940574387f,(float16_t)0.81492632905652662156f,
-(float16_t)0.57831379641165558958f,(float16_t)0.81581441080673378075f,
-(float16_t)0.57706167285567955272f,(float16_t)0.81670057286682784525f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.57455335504771576360f,(float16_t)0.81846712958029865792f,
-(float16_t)0.57329716669804231532f,(float16_t)0.81934752007679689800f,
-(float16_t)0.57203962932475704850f,(float16_t)0.82022598256943468620f,
-(float16_t)0.57078074588696736669f,(float16_t)0.82110251499110464835f,
-(float16_t)0.56952051934694725155f,(float16_t)0.82197711527924155472f,
-(float16_t)0.56825895267013148970f,(float16_t)0.82284978137582631685f,
-(float16_t)0.56699604882510867832f,(float16_t)0.82372051122739131657f,
-(float16_t)0.56573181078361323149f,(float16_t)0.82458930278502529099f,
-(float16_t)0.56446624152051949608f,(float16_t)0.82545615400437744036f,
-(float16_t)0.56319934401383409117f,(float16_t)0.82632106284566353427f,
-(float16_t)0.56193112124468946877f,(float16_t)0.82718402727366913130f,
-(float16_t)0.56066157619733603124f,(float16_t)0.82804504525775579626f,
-(float16_t)0.55939071185913613604f,(float16_t)0.82890411477186487499f,
-(float16_t)0.55811853122055610221f,(float16_t)0.82976123379452304540f,
-(float16_t)0.55684503727516010407f,(float16_t)0.83061640030884620334f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.55429412145362011444f,(float16_t)0.83232086776792968408f,
-(float16_t)0.55301670558002757883f,(float16_t)0.83317016470191318511f,
-(float16_t)0.55173798840470744675f,(float16_t)0.83401750110601813315f,
-(float16_t)0.55045797293660481131f,(float16_t)0.83486287498638001026f,
-(float16_t)0.54917666218771976627f,(float16_t)0.83570628435375260423f,
-(float16_t)0.54789405917310018967f,(float16_t)0.83654772722351189440f,
-(float16_t)0.54661016691083486041f,(float16_t)0.83738720161566193578f,
-(float16_t)0.54532498842204646383f,(float16_t)0.83822470555483796772f,
-(float16_t)0.54403852673088393122f,(float16_t)0.83906023707031263115f,
-(float16_t)0.54275078486451600046f,(float16_t)0.83989379419599941023f,
-(float16_t)0.54146176585312355556f,(float16_t)0.84072537497045796151f,
-(float16_t)0.54017147272989296525f,(float16_t)0.84155497743689833268f,
-(float16_t)0.53887990853100842248f,(float16_t)0.84238259964318584760f,
-(float16_t)0.53758707629564550512f,(float16_t)0.84320823964184543620f,
-(float16_t)0.53629297906596318235f,(float16_t)0.84403189549006640835f,
-(float16_t)0.53499761988709726435f,(float16_t)0.84485356524970700587f,
-(float16_t)0.53370100180715296379f,(float16_t)0.84567324698729906540f,
-(float16_t)0.53240312787719801246f,(float16_t)0.84649093877405212627f,
-(float16_t)0.53110400115125500076f,(float16_t)0.84730663868585831544f,
-(float16_t)0.52980362468629482731f,(float16_t)0.84812034480329712149f,
-(float16_t)0.52850200154222848337f,(float16_t)0.84893205521163961347f,
-(float16_t)0.52719913478190139067f,(float16_t)0.84974176800085243766f,
-(float16_t)0.52589502747108474168f,(float16_t)0.85054948126560336874f,
-(float16_t)0.52458968267846883826f,(float16_t)0.85135519310526519554f,
-(float16_t)0.52328310347565643035f,(float16_t)0.85215890162391982887f,
-(float16_t)0.52197529293715438925f,(float16_t)0.85296060493036363059f,
-(float16_t)0.52066625414036726838f,(float16_t)0.85376030113811129940f,
-(float16_t)0.51935599016558953167f,(float16_t)0.85455798836540053376f,
-(float16_t)0.51804450409599933636f,(float16_t)0.85535366473519602870f,
-(float16_t)0.51673179901764998423f,(float16_t)0.85614732837519447184f,
-(float16_t)0.51541787801946314929f,(float16_t)0.85693897741782865118f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.51278640063356306644f,(float16_t)0.85851622426444273994f,
-(float16_t)0.51146885043797052361f,(float16_t)0.85930181835700836235f,
-(float16_t)0.51015009670676669806f,(float16_t)0.86008539042939025077f,
-(float16_t)0.50883014254310698909f,(float16_t)0.86086693863776730939f,
-(float16_t)0.50750899105297087033f,(float16_t)0.86164646114308129921f,
-(float16_t)0.50618664534515533937f,(float16_t)0.86242395611104050168f,
-(float16_t)0.50486310853126747933f,(float16_t)0.86319942171212415971f,
-(float16_t)0.50353838372571757542f,(float16_t)0.86397285612158669643f,
-(float16_t)0.50221247404571089934f,(float16_t)0.86474425751946237817f,
-(float16_t)0.50088538261124093687f,(float16_t)0.86551362409056897818f,
-(float16_t)0.49955711254508183838f,(float16_t)0.86628095402451299467f,
-(float16_t)0.49822766697278186854f,(float16_t)0.86704624551569264845f,
-(float16_t)0.49689704902265463549f,(float16_t)0.86780949676330321196f,
-(float16_t)0.49556526182577248507f,(float16_t)0.86857070597134089507f,
-(float16_t)0.49423230851595972846f,(float16_t)0.86932987134860673084f,
-(float16_t)0.49289819222978409341f,(float16_t)0.87008699110871134952f,
-(float16_t)0.49156291610655006297f,(float16_t)0.87084206347007886428f,
-(float16_t)0.49022648328829110387f,(float16_t)0.87159508665595109012f,
-(float16_t)0.48888889691976322727f,(float16_t)0.87234605889439142956f,
-(float16_t)0.48755016014843605143f,(float16_t)0.87309497841829009079f,
-(float16_t)0.48621027612448652899f,(float16_t)0.87384184346536675214f,
-(float16_t)0.48486924800079111986f,(float16_t)0.87458665227817611321f,
-(float16_t)0.48352707893291874131f,(float16_t)0.87532940310411078144f,
-(float16_t)0.48218377207912282989f,(float16_t)0.87607009419540660122f,
-(float16_t)0.48083933060033390294f,(float16_t)0.87680872380914576247f,
-(float16_t)0.47949375766015300826f,(float16_t)0.87754529020726124156f,
-(float16_t)0.47814705642484311987f,(float16_t)0.87827979165654146421f,
-(float16_t)0.47679923006332225466f,(float16_t)0.87901222642863341417f,
-(float16_t)0.47545028174715586733f,(float16_t)0.87974259280004740713f,
-(float16_t)0.47410021465055002254f,(float16_t)0.88047088905216075450f,
-(float16_t)0.47274903195034290171f,(float16_t)0.88119711347122198219f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.47004333245959561971f,(float16_t)0.88264333997956279099f,
-(float16_t)0.46868882203582795665f,(float16_t)0.88336333866573157891f,
-(float16_t)0.46733320874198852612f,(float16_t)0.88408125871263498752f,
-(float16_t)0.46597649576796612569f,(float16_t)0.88479709843093778954f,
-(float16_t)0.46461868630623781584f,(float16_t)0.88551085613619995307f,
-(float16_t)0.46325978355186026025f,(float16_t)0.88622253014888063838f,
-(float16_t)0.46189979070246284243f,(float16_t)0.88693211879434208367f,
-(float16_t)0.46053871095824000514f,(float16_t)0.88763962040285393496f,
-(float16_t)0.45917654752194414502f,(float16_t)0.88834503330959635470f,
-(float16_t)0.45781330359887728587f,(float16_t)0.88904835585466457371f,
-(float16_t)0.45644898239688386221f,(float16_t)0.88974958638307288794f,
-(float16_t)0.45508358712634383592f,(float16_t)0.89044872324475787817f,
-(float16_t)0.45371712100016392544f,(float16_t)0.89114576479458318392f,
-(float16_t)0.45234958723377099998f,(float16_t)0.89184070939234272313f,
-(float16_t)0.45098098904510380835f,(float16_t)0.89253355540276468894f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.44824061228521999700f,(float16_t)0.89391294514520325265f,
-(float16_t)0.44686884016237432560f,(float16_t)0.89459948563138258493f,
-(float16_t)0.44549601651398174074f,(float16_t)0.89528392103855758410f,
-(float16_t)0.44412214457042925586f,(float16_t)0.89596624975618510689f,
-(float16_t)0.44274722756457013384f,(float16_t)0.89664647017868015499f,
-(float16_t)0.44137126873171661501f,(float16_t)0.89732458070541831763f,
-(float16_t)0.43999427130963325583f,(float16_t)0.89800057974073987932f,
-(float16_t)0.43861623853852771404f,(float16_t)0.89867446569395381673f,
-(float16_t)0.43723717366104419835f,(float16_t)0.89934623697934146236f,
-(float16_t)0.43585707992225547480f,(float16_t)0.90001589201616027935f,
-(float16_t)0.43447596056965570588f,(float16_t)0.90068342922864685907f,
-(float16_t)0.43309381885315201277f,(float16_t)0.90134884704602202810f,
-(float16_t)0.43171065802505736997f,(float16_t)0.90201214390249306874f,
-(float16_t)0.43032648134008261165f,(float16_t)0.90267331823725882600f,
-(float16_t)0.42894129205532954829f,(float16_t)0.90333236849451181705f,
-(float16_t)0.42755509343028219593f,(float16_t)0.90398929312344333820f,
-(float16_t)0.42616788872679961520f,(float16_t)0.90464409057824624050f,
-(float16_t)0.42477968120910880589f,(float16_t)0.90529675931811881551f,
-(float16_t)0.42339047414379610279f,(float16_t)0.90594729780726845902f,
-(float16_t)0.42200027079979979261f,(float16_t)0.90659570451491533483f,
-(float16_t)0.42060907444840250902f,(float16_t)0.90724197791529592738f,
-(float16_t)0.41921688836322396066f,(float16_t)0.90788611648766626150f,
-(float16_t)0.41782371582021238243f,(float16_t)0.90852811871630612117f,
-(float16_t)0.41642956009763731906f,(float16_t)0.90916798309052226923f,
-(float16_t)0.41503442447608163146f,(float16_t)0.90980570810465222209f,
-(float16_t)0.41363831223843455787f,(float16_t)0.91044129225806713634f,
-(float16_t)0.41224122666988299857f,(float16_t)0.91107473405517624965f,
-(float16_t)0.41084317105790391089f,(float16_t)0.91170603200542987832f,
-(float16_t)0.40944414869225764786f,(float16_t)0.91233518462332274801f,
-(float16_t)0.40804416286497874333f,(float16_t)0.91296219042839810154f,
-(float16_t)0.40664321687036913966f,(float16_t)0.91358704794525080750f,
-(float16_t)0.40524131400498986100f,(float16_t)0.91420975570353069095f,
-(float16_t)0.40383845756765412993f,(float16_t)0.91483031223794608611f,
-(float16_t)0.40243465085941854120f,(float16_t)0.91544871608826783316f,
-(float16_t)0.40102989718357578974f,(float16_t)0.91606496579933160973f,
-(float16_t)0.39962419984564678810f,(float16_t)0.91667905992104270485f,
-(float16_t)0.39821756215337361651f,(float16_t)0.91729099700837790632f,
-(float16_t)0.39680998741671041907f,(float16_t)0.91790077562139038569f,
-(float16_t)0.39540147894781629834f,(float16_t)0.91850839432521225181f,
-(float16_t)0.39399204006104809883f,(float16_t)0.91911385169005777040f,
-(float16_t)0.39258167407295152529f,(float16_t)0.91971714629122736095f,
-(float16_t)0.39117038430225398171f,(float16_t)0.92031827670911048322f,
-(float16_t)0.38975817406985641123f,(float16_t)0.92091724152918941204f,
-(float16_t)0.38834504669882630168f,(float16_t)0.92151403934204190183f,
-(float16_t)0.38693100551438869283f,(float16_t)0.92210866874334507237f,
-(float16_t)0.38551605384391901543f,(float16_t)0.92270112833387851747f,
-(float16_t)0.38410019501693504207f,(float16_t)0.92329141671952763559f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.38126576922216248722f,(float16_t)0.92446547432526260391f,
-(float16_t)0.37984720892405110515f,(float16_t)0.92504924078267758425f,
-(float16_t)0.37842775480876561511f,(float16_t)0.92563083050987271516f,
-(float16_t)0.37700741021641831496f,(float16_t)0.92621024213831126826f,
-(float16_t)0.37558617848921732607f,(float16_t)0.92678747430458174872f,
-(float16_t)0.37416406297145798909f,(float16_t)0.92736252565040111495f,
-(float16_t)0.37274106700951581406f,(float16_t)0.92793539482261788720f,
-(float16_t)0.37131719395183759858f,(float16_t)0.92850608047321558924f,
-(float16_t)0.36989244714893426691f,(float16_t)0.92907458125931574600f,
-(float16_t)0.36846682995337232125f,(float16_t)0.92964089584318121418f,
-(float16_t)0.36704034571976723589f,(float16_t)0.93020502289221906889f,
-(float16_t)0.36561299780477396482f,(float16_t)0.93076696107898371224f,
-(float16_t)0.36418478956707983629f,(float16_t)0.93132670908118042608f,
-(float16_t)0.36275572436739722537f,(float16_t)0.93188426558166814750f,
-(float16_t)0.36132580556845433906f,(float16_t)0.93243962926846235550f,
-(float16_t)0.35989503653498827740f,(float16_t)0.93299279883473884567f,
-(float16_t)0.35846342063373654030f,(float16_t)0.93354377297883617270f,
-(float16_t)0.35703096123343003310f,(float16_t)0.93409255040425887007f,
-(float16_t)0.35559766170478396274f,(float16_t)0.93463912981968078064f,
-(float16_t)0.35416352542049051033f,(float16_t)0.93518350993894749923f,
-(float16_t)0.35272855575521072646f,(float16_t)0.93572568948108036935f,
-(float16_t)0.35129275608556714827f,(float16_t)0.93626566717027825959f,
-(float16_t)0.34985612979013502866f,(float16_t)0.93680344173592156043f,
-(float16_t)0.34841868024943450921f,(float16_t)0.93733901191257495977f,
-(float16_t)0.34698041084592368133f,(float16_t)0.93787237643998988545f,
-(float16_t)0.34554132496398914931f,(float16_t)0.93840353406310805795f,
-(float16_t)0.34410142598993898044f,(float16_t)0.93893248353206448797f,
-(float16_t)0.34266071731199437833f,(float16_t)0.93945922360218991898f,
-(float16_t)0.34121920232028241093f,(float16_t)0.93998375303401393577f,
-(float16_t)0.33977688440682696225f,(float16_t)0.94050607059326829518f,
-(float16_t)0.33833376696554129381f,(float16_t)0.94102617505088925753f,
-(float16_t)0.33688985339222005111f,(float16_t)0.94154406518302080631f,
-(float16_t)0.33544514708453165852f,(float16_t)0.94205973977101731265f,
-(float16_t)0.33399965144200949307f,(float16_t)0.94257319760144686605f,
-(float16_t)0.33255336986604422389f,(float16_t)0.94308443746609349478f,
-(float16_t)0.33110630575987642921f,(float16_t)0.94359345816196038559f,
-(float16_t)0.32965846252858754806f,(float16_t)0.94410025849127265918f,
-(float16_t)0.32820984357909266382f,(float16_t)0.94460483726148025685f,
-(float16_t)0.32676045232013178898f,(float16_t)0.94510719328526060501f,
-(float16_t)0.32531029216226298173f,(float16_t)0.94560732538052127971f,
-(float16_t)0.32385936651785296458f,(float16_t)0.94610523237040333733f,
-(float16_t)0.32240767880107001897f,(float16_t)0.94660091308328353499f,
-(float16_t)0.32095523242787521445f,(float16_t)0.94709436635277721717f,
-(float16_t)0.31950203081601574739f,(float16_t)0.94758559101774109124f,
-(float16_t)0.31804807738501505998f,(float16_t)0.94807458592227622507f,
-(float16_t)0.31659337555616584581f,(float16_t)0.94856134991573026749f,
-(float16_t)0.31513792875252244485f,(float16_t)0.94904588185270055689f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.31222481392182505067f,(float16_t)0.95000824500184299914f,
-(float16_t)0.31076715274961147495f,(float16_t)0.95048607394948170235f,
-(float16_t)0.30930876031226878231f,(float16_t)0.95096166631157508231f,
-(float16_t)0.30784964004153497763f,(float16_t)0.95143502096900833820f,
-(float16_t)0.30638979537086108440f,(float16_t)0.95190613680793223494f,
-(float16_t)0.30492922973540242948f,(float16_t)0.95237501271976587880f,
-(float16_t)0.30346794657201137113f,(float16_t)0.95284164760119871573f,
-(float16_t)0.30200594931922819519f,(float16_t)0.95330604035419375109f,
-(float16_t)0.30054324141727339903f,(float16_t)0.95376818988599032512f,
-(float16_t)0.29907982630804047508f,(float16_t)0.95422809510910566733f,
-(float16_t)0.29761570743508630743f,(float16_t)0.95468575494133833814f,
-(float16_t)0.29615088824362395536f,(float16_t)0.95514116830577067141f,
-(float16_t)0.29468537218051432669f,(float16_t)0.95559433413077110586f,
-(float16_t)0.29321916269425868373f,(float16_t)0.95604525134999640557f,
-(float16_t)0.29175226323498937298f,(float16_t)0.95649391890239499059f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.28881640820604947972f,(float16_t)0.95738450078897585627f,
-(float16_t)0.28734745954472956653f,(float16_t)0.95782641302753290802f,
-(float16_t)0.28587783472708072630f,(float16_t)0.95826607140801767226f,
-(float16_t)0.28440753721127182141f,(float16_t)0.95870347489587159906f,
-(float16_t)0.28293657045705539188f,(float16_t)0.95913862246184189431f,
-(float16_t)0.28146493792575805193f,(float16_t)0.95957151308198451733f,
-(float16_t)0.27999264308027338455f,(float16_t)0.96000214573766584625f,
-(float16_t)0.27851968938505305973f,(float16_t)0.96043051941556578655f,
-(float16_t)0.27704608030609995106f,(float16_t)0.96085663310767965850f,
-(float16_t)0.27557181931095825478f,(float16_t)0.96128048581132063966f,
-(float16_t)0.27409690986870632878f,(float16_t)0.96170207652912254037f,
-(float16_t)0.27262135544994897662f,(float16_t)0.96212140426904158019f,
-(float16_t)0.27114515952680806610f,(float16_t)0.96253846804435916340f,
-(float16_t)0.26966832557291520178f,(float16_t)0.96295326687368387741f,
-(float16_t)0.26819085706340317632f,(float16_t)0.96336579978095404631f,
-(float16_t)0.26671275747489842090f,(float16_t)0.96377606579543984022f,
-(float16_t)0.26523403028551190141f,(float16_t)0.96418406395174571788f,
-(float16_t)0.26375467897483151347f,(float16_t)0.96458979328981264700f,
-(float16_t)0.26227470702391358914f,(float16_t)0.96499325285492032478f,
-(float16_t)0.26079411791527556952f,(float16_t)0.96539444169768939830f,
-(float16_t)0.25931291513288634576f,(float16_t)0.96579335887408357397f,
-(float16_t)0.25783110216215893162f,(float16_t)0.96619000344541261516f,
-(float16_t)0.25634868248994291395f,(float16_t)0.96658437447833311928f,
-(float16_t)0.25486565960451462720f,(float16_t)0.96697647104485207059f,
-(float16_t)0.25338203699557027004f,(float16_t)0.96736629222232850545f,
-(float16_t)0.25189781815421691258f,(float16_t)0.96775383709347551076f,
-(float16_t)0.25041300657296527987f,(float16_t)0.96813910474636244441f,
-(float16_t)0.24892760574572025956f,(float16_t)0.96852209427441726675f,
-(float16_t)0.24744161916777343557f,(float16_t)0.96890280477642887202f,
-(float16_t)0.24595505033579459497f,(float16_t)0.96928123535654853171f,
-(float16_t)0.24446790274782420616f,(float16_t)0.96965738512429244800f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.24149188530286930243f,(float16_t)0.97040283868755550234f,
-(float16_t)0.24000302244874149871f,(float16_t)0.97077214072895035013f,
-(float16_t)0.23851359484431849944f,(float16_t)0.97113915844972509284f,
-(float16_t)0.23702360599436733679f,(float16_t)0.97150389098625178352f,
-(float16_t)0.23553305940497545889f,(float16_t)0.97186633748027939639f,
-(float16_t)0.23404195858354345794f,(float16_t)0.97222649707893626925f,
-(float16_t)0.23255030703877532794f,(float16_t)0.97258436893473221296f,
-(float16_t)0.23105810828067127605f,(float16_t)0.97293995220556006576f,
-(float16_t)0.22956536582051886852f,(float16_t)0.97329324605469824672f,
-(float16_t)0.22807208317088578653f,(float16_t)0.97364424965081186603f,
-(float16_t)0.22657826384561011168f,(float16_t)0.97399296216795583359f,
-(float16_t)0.22508391135979277653f,(float16_t)0.97433938278557585821f,
-(float16_t)0.22358902922979001504f,(float16_t)0.97468351068851066810f,
-(float16_t)0.22209362097320359264f,(float16_t)0.97502534506699412020f,
-(float16_t)0.22059769010887364526f,(float16_t)0.97536488511665686563f,
-(float16_t)0.21910124015686976984f,(float16_t)0.97570213003852857003f,
-(float16_t)0.21760427463848366902f,(float16_t)0.97603707903903902388f,
-(float16_t)0.21610679707621960333f,(float16_t)0.97636973133002114000f,
-(float16_t)0.21460881099378692483f,(float16_t)0.97670008612871184184f,
-(float16_t)0.21311031991609136194f,(float16_t)0.97702814265775439484f,
-(float16_t)0.21161132736922760866f,(float16_t)0.97735390014519996082f,
-(float16_t)0.21011183688046972118f,(float16_t)0.97767735782450992943f,
-(float16_t)0.20861185197826345727f,(float16_t)0.97799851493455713936f,
-(float16_t)0.20711137619221856032f,(float16_t)0.97831737071962765473f,
-(float16_t)0.20561041305309932237f,(float16_t)0.97863392442942309657f,
-(float16_t)0.20410896609281700687f,(float16_t)0.97894817531906219710f,
-(float16_t)0.20260703884442110567f,(float16_t)0.97926012264908202098f,
-(float16_t)0.20110463484209195606f,(float16_t)0.97956976568544051887f,
-(float16_t)0.19960175762113105402f,(float16_t)0.97987710369951763756f,
-(float16_t)0.19809841071795372680f,(float16_t)0.98018213596811731847f,
-(float16_t)0.19659459767008022335f,(float16_t)0.98048486177346938497f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.19358558729580374602f,(float16_t)0.98108339115048659451f,
-(float16_t)0.19208039704989238183f,(float16_t)0.98137919331375456089f,
-(float16_t)0.19057475482025279523f,(float16_t)0.98167268619698311305f,
-(float16_t)0.18906866414980627589f,(float16_t)0.98196386910955524296f,
-(float16_t)0.18756212858252974129f,(float16_t)0.98225274136628937249f,
-(float16_t)0.18605515166344663291f,(float16_t)0.98253930228744124076f,
-(float16_t)0.18454773693861964423f,(float16_t)0.98282355119870523641f,
-(float16_t)0.18303988795514106180f,(float16_t)0.98310548743121628501f,
-(float16_t)0.18153160826112513249f,(float16_t)0.98338511032155118130f,
-(float16_t)0.18002290140569951471f,(float16_t)0.98366241921173025453f,
-(float16_t)0.17851377093899759019f,(float16_t)0.98393741344921892278f,
-(float16_t)0.17700422041214886049f,(float16_t)0.98421009238692902521f,
-(float16_t)0.17549425337727139751f,(float16_t)0.98448045538322093151f,
-(float16_t)0.17398387338746384989f,(float16_t)0.98474850180190420801f,
-(float16_t)0.17247308399679603386f,(float16_t)0.98501423101223983814f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.16945029123396793125f,(float16_t)0.98553873531217606185f,
-(float16_t)0.16793829497473122814f,(float16_t)0.98579750916756736512f,
-(float16_t)0.16642590354046421508f,(float16_t)0.98605396334619543897f,
-(float16_t)0.16491312048997008866f,(float16_t)0.98630809724459866938f,
-(float16_t)0.16339994938297322524f,(float16_t)0.98655991026477540817f,
-(float16_t)0.16188639378011188130f,(float16_t)0.98680940181418541624f,
-(float16_t)0.16037245724292839566f,(float16_t)0.98705657130575097380f,
-(float16_t)0.15885814333386139019f,(float16_t)0.98730141815785843473f,
-(float16_t)0.15734345561623827581f,(float16_t)0.98754394179435922574f,
-(float16_t)0.15582839765426531597f,(float16_t)0.98778414164457217783f,
-(float16_t)0.15431297301302024372f,(float16_t)0.98802201714328352633f,
-(float16_t)0.15279718525844340760f,(float16_t)0.98825756773074946437f,
-(float16_t)0.15128103795733024994f,(float16_t)0.98849079285269658701f,
-(float16_t)0.14976453467732162017f,(float16_t)0.98872169196032377858f,
-(float16_t)0.14824767898689619749f,(float16_t)0.98895026451030298986f,
-(float16_t)0.14673047445536174793f,(float16_t)0.98917650996478101444f,
-(float16_t)0.14521292465284751927f,(float16_t)0.98940042779138037687f,
-(float16_t)0.14369503315029458212f,(float16_t)0.98962201746320077600f,
-(float16_t)0.14217680351944800288f,(float16_t)0.98984127845882052821f,
-(float16_t)0.14065823933284923863f,(float16_t)0.99005821026229712256f,
-(float16_t)0.13913934416382628401f,(float16_t)0.99027281236316910817f,
-(float16_t)0.13762012158648617710f,(float16_t)0.99048508425645698239f,
-(float16_t)0.13610057517570620100f,(float16_t)0.99069502544266463406f,
-(float16_t)0.13458070850712622324f,(float16_t)0.99090263542778000971f,
-(float16_t)0.13306052515713917561f,(float16_t)0.99110791372327677884f,
-(float16_t)0.13154002870288328264f,(float16_t)0.99131085984611544415f,
-(float16_t)0.13001922272223334631f,(float16_t)0.99151147331874389668f,
-(float16_t)0.12849811079379322432f,(float16_t)0.99170975366909952520f,
-(float16_t)0.12697669649688597682f,(float16_t)0.99190570043060932726f,
-(float16_t)0.12545498341154620592f,(float16_t)0.99209931314219179654f,
-(float16_t)0.12393297511851220083f,(float16_t)0.99229059134825736699f,
-(float16_t)0.12241067519921627893f,(float16_t)0.99247953459870996706f,
-(float16_t)0.12088808723577722237f,(float16_t)0.99266614244894801899f,
-(float16_t)0.11936521481099135467f,(float16_t)0.99285041445986510489f,
-(float16_t)0.11784206150832501891f,(float16_t)0.99303235019785141002f,
-(float16_t)0.11631863091190487725f,(float16_t)0.99321194923479450001f,
-(float16_t)0.11479492660651025027f,(float16_t)0.99338921114808065305f,
-(float16_t)0.11327095217756436019f,(float16_t)0.99356413552059530403f,
-(float16_t)0.11174671121112665639f,(float16_t)0.99373672194072459884f,
-(float16_t)0.11022220729388318428f,(float16_t)0.99390697000235606051f,
-(float16_t)0.10869744401313867488f,(float16_t)0.99407487930487936634f,
-(float16_t)0.10717242495680887049f,(float16_t)0.99424044945318790223f,
-(float16_t)0.10564715371341069916f,(float16_t)0.99440368005767909576f,
-(float16_t)0.10412163387205472520f,(float16_t)0.99456457073425541537f,
-(float16_t)0.10259586902243628126f,(float16_t)0.99472312110432570265f,
-(float16_t)0.10106986275482787718f,(float16_t)0.99487933079480561638f,
-(float16_t)0.09954361866006944393f,(float16_t)0.99503319943811863180f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.09649043135525260662f,(float16_t)0.99533391214048227980f,
-(float16_t)0.09496349532963906104f,(float16_t)0.99548075549192693856f,
-(float16_t)0.09343633584574791151f,(float16_t)0.99562525638099430569f,
-(float16_t)0.09190895649713269611f,(float16_t)0.99576741446765981713f,
-(float16_t)0.09038136087786501072f,(float16_t)0.99590722941741172125f,
-(float16_t)0.08885355258252468358f,(float16_t)0.99604470090125196702f,
-(float16_t)0.08732553520619222576f,(float16_t)0.99617982859569687015f,
-(float16_t)0.08579731234443987997f,(float16_t)0.99631261218277800129f,
-(float16_t)0.08426888759332412659f,(float16_t)0.99644305135004263008f,
-(float16_t)0.08274026454937580266f,(float16_t)0.99657114579055483539f,
-(float16_t)0.08121144680959238582f,(float16_t)0.99669689520289606044f,
-(float16_t)0.07968243797143012563f,(float16_t)0.99682029929116566791f,
-(float16_t)0.07815324163279431524f,(float16_t)0.99694135776498216117f,
-(float16_t)0.07662386139203161695f,(float16_t)0.99706007033948296225f,
-(float16_t)0.07509430084792129145f,(float16_t)0.99717643673532618820f,
-(float16_t)0.07356456359966745406f,(float16_t)0.99729045667869020697f,
-(float16_t)0.07203465324688941573f,(float16_t)0.99740212990127530279f,
-(float16_t)0.07050457338961400866f,(float16_t)0.99751145614030345410f,
-(float16_t)0.06897432762826673225f,(float16_t)0.99761843513851955478f,
-(float16_t)0.06744391956366410645f,(float16_t)0.99772306664419163624f,
-(float16_t)0.06591335279700392957f,(float16_t)0.99782535041111164453f,
-(float16_t)0.06438263092985740954f,(float16_t)0.99792528619859599548f,
-(float16_t)0.06285175756416142012f,(float16_t)0.99802287377148624081f,
-(float16_t)0.06132073630220864768f,(float16_t)0.99811811290014917919f,
-(float16_t)0.05978957074664000698f,(float16_t)0.99821100336047818846f,
-(float16_t)0.05825826450043573163f,(float16_t)0.99830154493389289261f,
-(float16_t)0.05672682116690778292f,(float16_t)0.99838973740734016094f,
-(float16_t)0.05519524434969003135f,(float16_t)0.99847558057329477421f,
-(float16_t)0.05366353765273067927f,(float16_t)0.99855907422975931365f,
-(float16_t)0.05213170468028331672f,(float16_t)0.99864021818026527111f,
-(float16_t)0.05059974903689933717f,(float16_t)0.99871901223387293811f,
-(float16_t)0.04906767432741812596f,(float16_t)0.99879545620517240501f,
-(float16_t)0.04753548415695926094f,(float16_t)0.99886954991428356099f,
-(float16_t)0.04600318213091464381f,(float16_t)0.99894129318685687124f,
-(float16_t)0.04447077185493874402f,(float16_t)0.99901068585407337697f,
-(float16_t)0.04293825693494095902f,(float16_t)0.99907772775264536147f,
-(float16_t)0.04140564097707671171f,(float16_t)0.99914241872481690532f,
-(float16_t)0.03987292758773984536f,(float16_t)0.99920475861836388631f,
-(float16_t)0.03834012037355279123f,(float16_t)0.99926474728659442359f,
-(float16_t)0.03680722294135899131f,(float16_t)0.99932238458834954375f,
-(float16_t)0.03527423889821394709f,(float16_t)0.99937767038800284780f,
-(float16_t)0.03374117185137764235f,(float16_t)0.99943060455546173237f,
-(float16_t)0.03220802540830470378f,(float16_t)0.99948118696616694567f,
-(float16_t)0.03067480317663658085f,(float16_t)0.99952941750109314256f,
-(float16_t)0.02914150876419373953f,(float16_t)0.99957529604674921764f,
-(float16_t)0.02760814577896581953f,(float16_t)0.99961882249517863830f,
-(float16_t)0.02607471782910403962f,(float16_t)0.99965999674395922270f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)0.02300768146883941032f,(float16_t)0.99973528826056168306f,
-(float16_t)0.02147408027546960502f,(float16_t)0.99976940535121527898f,
-(float16_t)0.01994042855151459750f,(float16_t)0.99980116988788425569f,
-(float16_t)0.01840672990580482019f,(float16_t)0.99983058179582340319f,
-(float16_t)0.01687298794728177287f,(float16_t)0.99985764100582386060f,
-(float16_t)0.01533920628498821985f,(float16_t)0.99988234745421256111f,
-(float16_t)0.01380538852806034895f,(float16_t)0.99990470108285289808f,
-(float16_t)0.01227153828571994447f,(float16_t)0.99992470183914450299f,
-(float16_t)0.01073765916726457208f,(float16_t)0.99994234967602391162f,
-(float16_t)0.00920375478205995995f,(float16_t)0.99995764455196389786f,
-(float16_t)0.00766982873953107706f,(float16_t)0.99997058643097413988f,
-(float16_t)0.00613588464915451517f,(float16_t)0.99998117528260110909f,
-(float16_t)0.00460192612044867198f,(float16_t)0.99998941108192840321f,
-(float16_t)0.00306795676296613791f,(float16_t)0.99999529380957619118f,
-(float16_t)0.00153398018628476615f,(float16_t)0.99999882345170187925f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99998117528260110909f,(float16_t)0.00613588464915447527f,
-(float16_t)0.99992470183914450299f,(float16_t)0.01227153828571992539f,
-(float16_t)0.99983058179582340319f,(float16_t)0.01840672990580482019f,
-(float16_t)0.99969881869620424997f,(float16_t)0.02454122852291228812f,
-(float16_t)0.99952941750109314256f,(float16_t)0.03067480317663662595f,
-(float16_t)0.99932238458834954375f,(float16_t)0.03680722294135883171f,
-(float16_t)0.99907772775264536147f,(float16_t)0.04293825693494082024f,
-(float16_t)0.99879545620517240501f,(float16_t)0.04906767432741801493f,
-(float16_t)0.99847558057329477421f,(float16_t)0.05519524434968993420f,
-(float16_t)0.99811811290014917919f,(float16_t)0.06132073630220857829f,
-(float16_t)0.99772306664419163624f,(float16_t)0.06744391956366405094f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.99682029929116566791f,(float16_t)0.07968243797143012563f,
-(float16_t)0.99631261218277800129f,(float16_t)0.08579731234443989385f,
-(float16_t)0.99576741446765981713f,(float16_t)0.09190895649713272386f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.99456457073425541537f,(float16_t)0.10412163387205458642f,
-(float16_t)0.99390697000235606051f,(float16_t)0.11022220729388305938f,
-(float16_t)0.99321194923479450001f,(float16_t)0.11631863091190475235f,
-(float16_t)0.99247953459870996706f,(float16_t)0.12241067519921619566f,
-(float16_t)0.99170975366909952520f,(float16_t)0.12849811079379316880f,
-(float16_t)0.99090263542778000971f,(float16_t)0.13458070850712616773f,
-(float16_t)0.99005821026229712256f,(float16_t)0.14065823933284921088f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98825756773074946437f,(float16_t)0.15279718525844343535f,
-(float16_t)0.98730141815785843473f,(float16_t)0.15885814333386144570f,
-(float16_t)0.98630809724459866938f,(float16_t)0.16491312048996989437f,
-(float16_t)0.98527764238894122162f,(float16_t)0.17096188876030121717f,
-(float16_t)0.98421009238692902521f,(float16_t)0.17700422041214874946f,
-(float16_t)0.98310548743121628501f,(float16_t)0.18303988795514095078f,
-(float16_t)0.98196386910955524296f,(float16_t)0.18906866414980619262f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.97956976568544051887f,(float16_t)0.20110463484209190055f,
-(float16_t)0.97831737071962765473f,(float16_t)0.20711137619221856032f,
-(float16_t)0.97702814265775439484f,(float16_t)0.21311031991609136194f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.97433938278557585821f,(float16_t)0.22508391135979283204f,
-(float16_t)0.97293995220556017678f,(float16_t)0.23105810828067110951f,
-(float16_t)0.97150389098625178352f,(float16_t)0.23702360599436719801f,
-(float16_t)0.97003125319454397424f,(float16_t)0.24298017990326387094f,
-(float16_t)0.96852209427441737777f,(float16_t)0.24892760574572014853f,
-(float16_t)0.96697647104485207059f,(float16_t)0.25486565960451457169f,
-(float16_t)0.96539444169768939830f,(float16_t)0.26079411791527551401f,
-(float16_t)0.96377606579543984022f,(float16_t)0.26671275747489836538f,
-(float16_t)0.96212140426904158019f,(float16_t)0.27262135544994897662f,
-(float16_t)0.96043051941556578655f,(float16_t)0.27851968938505305973f,
-(float16_t)0.95870347489587159906f,(float16_t)0.28440753721127187692f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.95514116830577078243f,(float16_t)0.29615088824362378883f,
-(float16_t)0.95330604035419386211f,(float16_t)0.30200594931922808417f,
-(float16_t)0.95143502096900833820f,(float16_t)0.30784964004153486661f,
-(float16_t)0.94952818059303667475f,(float16_t)0.31368174039889151761f,
-(float16_t)0.94758559101774109124f,(float16_t)0.31950203081601569188f,
-(float16_t)0.94560732538052127971f,(float16_t)0.32531029216226292622f,
-(float16_t)0.94359345816196038559f,(float16_t)0.33110630575987642921f,
-(float16_t)0.94154406518302080631f,(float16_t)0.33688985339222005111f,
-(float16_t)0.93945922360218991898f,(float16_t)0.34266071731199437833f,
-(float16_t)0.93733901191257495977f,(float16_t)0.34841868024943456472f,
-(float16_t)0.93518350993894761025f,(float16_t)0.35416352542049034380f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.93076696107898371224f,(float16_t)0.36561299780477385379f,
-(float16_t)0.92850608047321558924f,(float16_t)0.37131719395183754306f,
-(float16_t)0.92621024213831137928f,(float16_t)0.37700741021641825945f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.92151403934204190183f,(float16_t)0.38834504669882624617f,
-(float16_t)0.91911385169005777040f,(float16_t)0.39399204006104809883f,
-(float16_t)0.91667905992104270485f,(float16_t)0.39962419984564678810f,
-(float16_t)0.91420975570353069095f,(float16_t)0.40524131400498986100f,
-(float16_t)0.91170603200542987832f,(float16_t)0.41084317105790391089f,
-(float16_t)0.90916798309052238025f,(float16_t)0.41642956009763715253f,
-(float16_t)0.90659570451491533483f,(float16_t)0.42200027079979968159f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.90134884704602202810f,(float16_t)0.43309381885315195726f,
-(float16_t)0.89867446569395381673f,(float16_t)0.43861623853852765853f,
-(float16_t)0.89596624975618521791f,(float16_t)0.44412214457042920035f,
-(float16_t)0.89322430119551532446f,(float16_t)0.44961132965460653965f,
-(float16_t)0.89044872324475787817f,(float16_t)0.45508358712634383592f,
-(float16_t)0.88763962040285393496f,(float16_t)0.46053871095824000514f,
-(float16_t)0.88479709843093778954f,(float16_t)0.46597649576796618121f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.87901222642863352519f,(float16_t)0.47679923006332208812f,
-(float16_t)0.87607009419540660122f,(float16_t)0.48218377207912271887f,
-(float16_t)0.87309497841829009079f,(float16_t)0.48755016014843599592f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.86704624551569264845f,(float16_t)0.49822766697278181303f,
-(float16_t)0.86397285612158669643f,(float16_t)0.50353838372571757542f,
-(float16_t)0.86086693863776730939f,(float16_t)0.50883014254310698909f,
-(float16_t)0.85772861000027211809f,(float16_t)0.51410274419322166128f,
-(float16_t)0.85455798836540053376f,(float16_t)0.51935599016558964269f,
-(float16_t)0.85135519310526519554f,(float16_t)0.52458968267846894928f,
-(float16_t)0.84812034480329723252f,(float16_t)0.52980362468629460526f,
-(float16_t)0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)0.84155497743689844370f,(float16_t)0.54017147272989285423f,
-(float16_t)0.83822470555483807875f,(float16_t)0.54532498842204646383f,
-(float16_t)0.83486287498638001026f,(float16_t)0.55045797293660481131f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.82804504525775579626f,(float16_t)0.56066157619733603124f,
-(float16_t)0.82458930278502529099f,(float16_t)0.56573181078361312046f,
-(float16_t)0.82110251499110464835f,(float16_t)0.57078074588696725566f,
-(float16_t)0.81758481315158371139f,(float16_t)0.57580819141784533866f,
-(float16_t)0.81403632970594841378f,(float16_t)0.58081395809576452649f,
-(float16_t)0.81045719825259476821f,(float16_t)0.58579785745643886408f,
-(float16_t)0.80684755354379933401f,(float16_t)0.59075970185887416442f,
-(float16_t)0.80320753148064494287f,(float16_t)0.59569930449243335691f,
-(float16_t)0.79953726910790501314f,(float16_t)0.60061647938386897305f,
-(float16_t)0.79583690460888356633f,(float16_t)0.60551104140432554512f,
-(float16_t)0.79210657730021238887f,(float16_t)0.61038280627630947528f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.78455659715557524159f,(float16_t)0.62005721176328909561f,
-(float16_t)0.78073722857209448822f,(float16_t)0.62485948814238634341f,
-(float16_t)0.77688846567323244230f,(float16_t)0.62963823891492698426f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.76910333764557969882f,(float16_t)0.63912444486377573138f,
-(float16_t)0.76516726562245895860f,(float16_t)0.64383154288979138613f,
-(float16_t)0.76120238548426177871f,(float16_t)0.64851440102211244110f,
-(float16_t)0.75720884650648456748f,(float16_t)0.65317284295377675551f,
-(float16_t)0.75318679904361252042f,(float16_t)0.65780669329707863735f,
-(float16_t)0.74913639452345937020f,(float16_t)0.66241577759017178373f,
-(float16_t)0.74505778544146594733f,(float16_t)0.66699992230363747137f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.73681656887736979300f,(float16_t)0.67609270357531592310f,
-(float16_t)0.73265427167241281570f,(float16_t)0.68060099779545302212f,
-(float16_t)0.72846439044822519637f,(float16_t)0.68508366777270035541f,
-(float16_t)0.72424708295146700276f,(float16_t)0.68954054473706682948f,
-(float16_t)0.72000250796138165477f,(float16_t)0.69397146088965389055f,
-(float16_t)0.71573082528381870571f,(float16_t)0.69837624940897280457f,
-(float16_t)0.71143219574521643356f,(float16_t)0.70275474445722529993f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.70275474445722529993f,(float16_t)0.71143219574521643356f,
-(float16_t)0.69837624940897291559f,(float16_t)0.71573082528381859468f,
-(float16_t)0.69397146088965400157f,(float16_t)0.72000250796138165477f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.68508366777270035541f,(float16_t)0.72846439044822519637f,
-(float16_t)0.68060099779545302212f,(float16_t)0.73265427167241281570f,
-(float16_t)0.67609270357531603413f,(float16_t)0.73681656887736979300f,
-(float16_t)0.67155895484701833009f,(float16_t)0.74095112535495910588f,
-(float16_t)0.66699992230363747137f,(float16_t)0.74505778544146594733f,
-(float16_t)0.66241577759017178373f,(float16_t)0.74913639452345925918f,
-(float16_t)0.65780669329707874837f,(float16_t)0.75318679904361252042f,
-(float16_t)0.65317284295377686654f,(float16_t)0.75720884650648456748f,
-(float16_t)0.64851440102211255212f,(float16_t)0.76120238548426177871f,
-(float16_t)0.64383154288979149715f,(float16_t)0.76516726562245895860f,
-(float16_t)0.63912444486377573138f,(float16_t)0.76910333764557958780f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.62963823891492709528f,(float16_t)0.77688846567323244230f,
-(float16_t)0.62485948814238645443f,(float16_t)0.78073722857209448822f,
-(float16_t)0.62005721176328920663f,(float16_t)0.78455659715557524159f,
-(float16_t)0.61523159058062681925f,(float16_t)0.78834642762660622761f,
-(float16_t)0.61038280627630947528f,(float16_t)0.79210657730021227785f,
-(float16_t)0.60551104140432554512f,(float16_t)0.79583690460888345530f,
-(float16_t)0.60061647938386897305f,(float16_t)0.79953726910790501314f,
-(float16_t)0.59569930449243346793f,(float16_t)0.80320753148064483184f,
-(float16_t)0.59075970185887427544f,(float16_t)0.80684755354379922299f,
-(float16_t)0.58579785745643886408f,(float16_t)0.81045719825259476821f,
-(float16_t)0.58081395809576452649f,(float16_t)0.81403632970594830276f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.57078074588696736669f,(float16_t)0.82110251499110464835f,
-(float16_t)0.56573181078361323149f,(float16_t)0.82458930278502529099f,
-(float16_t)0.56066157619733603124f,(float16_t)0.82804504525775579626f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.55045797293660481131f,(float16_t)0.83486287498638001026f,
-(float16_t)0.54532498842204646383f,(float16_t)0.83822470555483796772f,
-(float16_t)0.54017147272989296525f,(float16_t)0.84155497743689833268f,
-(float16_t)0.53499761988709726435f,(float16_t)0.84485356524970700587f,
-(float16_t)0.52980362468629482731f,(float16_t)0.84812034480329712149f,
-(float16_t)0.52458968267846883826f,(float16_t)0.85135519310526519554f,
-(float16_t)0.51935599016558953167f,(float16_t)0.85455798836540053376f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.50883014254310698909f,(float16_t)0.86086693863776730939f,
-(float16_t)0.50353838372571757542f,(float16_t)0.86397285612158669643f,
-(float16_t)0.49822766697278186854f,(float16_t)0.86704624551569264845f,
-(float16_t)0.49289819222978409341f,(float16_t)0.87008699110871134952f,
-(float16_t)0.48755016014843605143f,(float16_t)0.87309497841829009079f,
-(float16_t)0.48218377207912282989f,(float16_t)0.87607009419540660122f,
-(float16_t)0.47679923006332225466f,(float16_t)0.87901222642863341417f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.46597649576796612569f,(float16_t)0.88479709843093778954f,
-(float16_t)0.46053871095824000514f,(float16_t)0.88763962040285393496f,
-(float16_t)0.45508358712634383592f,(float16_t)0.89044872324475787817f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.44412214457042925586f,(float16_t)0.89596624975618510689f,
-(float16_t)0.43861623853852771404f,(float16_t)0.89867446569395381673f,
-(float16_t)0.43309381885315201277f,(float16_t)0.90134884704602202810f,
-(float16_t)0.42755509343028219593f,(float16_t)0.90398929312344333820f,
-(float16_t)0.42200027079979979261f,(float16_t)0.90659570451491533483f,
-(float16_t)0.41642956009763731906f,(float16_t)0.90916798309052226923f,
-(float16_t)0.41084317105790391089f,(float16_t)0.91170603200542987832f,
-(float16_t)0.40524131400498986100f,(float16_t)0.91420975570353069095f,
-(float16_t)0.39962419984564678810f,(float16_t)0.91667905992104270485f,
-(float16_t)0.39399204006104809883f,(float16_t)0.91911385169005777040f,
-(float16_t)0.38834504669882630168f,(float16_t)0.92151403934204190183f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.37700741021641831496f,(float16_t)0.92621024213831126826f,
-(float16_t)0.37131719395183759858f,(float16_t)0.92850608047321558924f,
-(float16_t)0.36561299780477396482f,(float16_t)0.93076696107898371224f,
-(float16_t)0.35989503653498827740f,(float16_t)0.93299279883473884567f,
-(float16_t)0.35416352542049051033f,(float16_t)0.93518350993894749923f,
-(float16_t)0.34841868024943450921f,(float16_t)0.93733901191257495977f,
-(float16_t)0.34266071731199437833f,(float16_t)0.93945922360218991898f,
-(float16_t)0.33688985339222005111f,(float16_t)0.94154406518302080631f,
-(float16_t)0.33110630575987642921f,(float16_t)0.94359345816196038559f,
-(float16_t)0.32531029216226298173f,(float16_t)0.94560732538052127971f,
-(float16_t)0.31950203081601574739f,(float16_t)0.94758559101774109124f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.30784964004153497763f,(float16_t)0.95143502096900833820f,
-(float16_t)0.30200594931922819519f,(float16_t)0.95330604035419375109f,
-(float16_t)0.29615088824362395536f,(float16_t)0.95514116830577067141f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.28440753721127182141f,(float16_t)0.95870347489587159906f,
-(float16_t)0.27851968938505305973f,(float16_t)0.96043051941556578655f,
-(float16_t)0.27262135544994897662f,(float16_t)0.96212140426904158019f,
-(float16_t)0.26671275747489842090f,(float16_t)0.96377606579543984022f,
-(float16_t)0.26079411791527556952f,(float16_t)0.96539444169768939830f,
-(float16_t)0.25486565960451462720f,(float16_t)0.96697647104485207059f,
-(float16_t)0.24892760574572025956f,(float16_t)0.96852209427441726675f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.23702360599436733679f,(float16_t)0.97150389098625178352f,
-(float16_t)0.23105810828067127605f,(float16_t)0.97293995220556006576f,
-(float16_t)0.22508391135979277653f,(float16_t)0.97433938278557585821f,
-(float16_t)0.21910124015686976984f,(float16_t)0.97570213003852857003f,
-(float16_t)0.21311031991609136194f,(float16_t)0.97702814265775439484f,
-(float16_t)0.20711137619221856032f,(float16_t)0.97831737071962765473f,
-(float16_t)0.20110463484209195606f,(float16_t)0.97956976568544051887f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.18906866414980627589f,(float16_t)0.98196386910955524296f,
-(float16_t)0.18303988795514106180f,(float16_t)0.98310548743121628501f,
-(float16_t)0.17700422041214886049f,(float16_t)0.98421009238692902521f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.16491312048997008866f,(float16_t)0.98630809724459866938f,
-(float16_t)0.15885814333386139019f,(float16_t)0.98730141815785843473f,
-(float16_t)0.15279718525844340760f,(float16_t)0.98825756773074946437f,
-(float16_t)0.14673047445536174793f,(float16_t)0.98917650996478101444f,
-(float16_t)0.14065823933284923863f,(float16_t)0.99005821026229712256f,
-(float16_t)0.13458070850712622324f,(float16_t)0.99090263542778000971f,
-(float16_t)0.12849811079379322432f,(float16_t)0.99170975366909952520f,
-(float16_t)0.12241067519921627893f,(float16_t)0.99247953459870996706f,
-(float16_t)0.11631863091190487725f,(float16_t)0.99321194923479450001f,
-(float16_t)0.11022220729388318428f,(float16_t)0.99390697000235606051f,
-(float16_t)0.10412163387205472520f,(float16_t)0.99456457073425541537f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.09190895649713269611f,(float16_t)0.99576741446765981713f,
-(float16_t)0.08579731234443987997f,(float16_t)0.99631261218277800129f,
-(float16_t)0.07968243797143012563f,(float16_t)0.99682029929116566791f,
-(float16_t)0.07356456359966745406f,(float16_t)0.99729045667869020697f,
-(float16_t)0.06744391956366410645f,(float16_t)0.99772306664419163624f,
-(float16_t)0.06132073630220864768f,(float16_t)0.99811811290014917919f,
-(float16_t)0.05519524434969003135f,(float16_t)0.99847558057329477421f,
-(float16_t)0.04906767432741812596f,(float16_t)0.99879545620517240501f,
-(float16_t)0.04293825693494095902f,(float16_t)0.99907772775264536147f,
-(float16_t)0.03680722294135899131f,(float16_t)0.99932238458834954375f,
-(float16_t)0.03067480317663658085f,(float16_t)0.99952941750109314256f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)0.01840672990580482019f,(float16_t)0.99983058179582340319f,
-(float16_t)0.01227153828571994447f,(float16_t)0.99992470183914450299f,
-(float16_t)0.00613588464915451517f,(float16_t)0.99998117528260110909f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99969881869620424997f,(float16_t)0.02454122852291228812f,
-(float16_t)0.99879545620517240501f,(float16_t)0.04906767432741801493f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.99247953459870996706f,(float16_t)0.12241067519921619566f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98527764238894122162f,(float16_t)0.17096188876030121717f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.97003125319454397424f,(float16_t)0.24298017990326387094f,
-(float16_t)0.96377606579543984022f,(float16_t)0.26671275747489836538f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.94952818059303667475f,(float16_t)0.31368174039889151761f,
-(float16_t)0.94154406518302080631f,(float16_t)0.33688985339222005111f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.91420975570353069095f,(float16_t)0.40524131400498986100f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.89322430119551532446f,(float16_t)0.44961132965460653965f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.85772861000027211809f,(float16_t)0.51410274419322166128f,
-(float16_t)0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.81758481315158371139f,(float16_t)0.57580819141784533866f,
-(float16_t)0.80320753148064494287f,(float16_t)0.59569930449243335691f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.75720884650648456748f,(float16_t)0.65317284295377675551f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.72424708295146700276f,(float16_t)0.68954054473706682948f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.67155895484701833009f,(float16_t)0.74095112535495910588f,
-(float16_t)0.65317284295377686654f,(float16_t)0.75720884650648456748f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.61523159058062681925f,(float16_t)0.78834642762660622761f,
-(float16_t)0.59569930449243346793f,(float16_t)0.80320753148064483184f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.53499761988709726435f,(float16_t)0.84485356524970700587f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.49289819222978409341f,(float16_t)0.87008699110871134952f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.42755509343028219593f,(float16_t)0.90398929312344333820f,
-(float16_t)0.40524131400498986100f,(float16_t)0.91420975570353069095f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.35989503653498827740f,(float16_t)0.93299279883473884567f,
-(float16_t)0.33688985339222005111f,(float16_t)0.94154406518302080631f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.26671275747489842090f,(float16_t)0.96377606579543984022f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.21910124015686976984f,(float16_t)0.97570213003852857003f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.14673047445536174793f,(float16_t)0.98917650996478101444f,
-(float16_t)0.12241067519921627893f,(float16_t)0.99247953459870996706f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.07356456359966745406f,(float16_t)0.99729045667869020697f,
-(float16_t)0.04906767432741812596f,(float16_t)0.99879545620517240501f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0015335083008f,
+(float16_t)1.0000000000000f,(float16_t)0.0030670166016f,
+(float16_t)1.0000000000000f,(float16_t)0.0046005249023f,
+(float16_t)1.0000000000000f,(float16_t)0.0061340332031f,
+(float16_t)1.0000000000000f,(float16_t)0.0076713562012f,
+(float16_t)1.0000000000000f,(float16_t)0.0092010498047f,
+(float16_t)1.0000000000000f,(float16_t)0.0107345581055f,
+(float16_t)1.0000000000000f,(float16_t)0.0122680664062f,
+(float16_t)1.0000000000000f,(float16_t)0.0138015747070f,
+(float16_t)1.0000000000000f,(float16_t)0.0153427124023f,
+(float16_t)1.0000000000000f,(float16_t)0.0168762207031f,
+(float16_t)1.0000000000000f,(float16_t)0.0184020996094f,
+(float16_t)1.0000000000000f,(float16_t)0.0199432373047f,
+(float16_t)1.0000000000000f,(float16_t)0.0214691162109f,
+(float16_t)0.9995117187500f,(float16_t)0.0230102539062f,
+(float16_t)0.9995117187500f,(float16_t)0.0245361328125f,
+(float16_t)0.9995117187500f,(float16_t)0.0260772705078f,
+(float16_t)0.9995117187500f,(float16_t)0.0276031494141f,
+(float16_t)0.9995117187500f,(float16_t)0.0291442871094f,
+(float16_t)0.9995117187500f,(float16_t)0.0306701660156f,
+(float16_t)0.9995117187500f,(float16_t)0.0321960449219f,
+(float16_t)0.9995117187500f,(float16_t)0.0337524414062f,
+(float16_t)0.9995117187500f,(float16_t)0.0352783203125f,
+(float16_t)0.9995117187500f,(float16_t)0.0368041992188f,
+(float16_t)0.9990234375000f,(float16_t)0.0383300781250f,
+(float16_t)0.9990234375000f,(float16_t)0.0398864746094f,
+(float16_t)0.9990234375000f,(float16_t)0.0414123535156f,
+(float16_t)0.9990234375000f,(float16_t)0.0429382324219f,
+(float16_t)0.9990234375000f,(float16_t)0.0444641113281f,
+(float16_t)0.9990234375000f,(float16_t)0.0459899902344f,
+(float16_t)0.9990234375000f,(float16_t)0.0475463867188f,
+(float16_t)0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)0.9985351562500f,(float16_t)0.0505981445312f,
+(float16_t)0.9985351562500f,(float16_t)0.0521240234375f,
+(float16_t)0.9985351562500f,(float16_t)0.0536499023438f,
+(float16_t)0.9985351562500f,(float16_t)0.0552062988281f,
+(float16_t)0.9985351562500f,(float16_t)0.0567321777344f,
+(float16_t)0.9985351562500f,(float16_t)0.0582580566406f,
+(float16_t)0.9980468750000f,(float16_t)0.0597839355469f,
+(float16_t)0.9980468750000f,(float16_t)0.0613098144531f,
+(float16_t)0.9980468750000f,(float16_t)0.0628662109375f,
+(float16_t)0.9980468750000f,(float16_t)0.0643920898438f,
+(float16_t)0.9980468750000f,(float16_t)0.0659179687500f,
+(float16_t)0.9975585937500f,(float16_t)0.0674438476562f,
+(float16_t)0.9975585937500f,(float16_t)0.0689697265625f,
+(float16_t)0.9975585937500f,(float16_t)0.0704956054688f,
+(float16_t)0.9975585937500f,(float16_t)0.0720214843750f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9970703125000f,(float16_t)0.0750732421875f,
+(float16_t)0.9970703125000f,(float16_t)0.0765991210938f,
+(float16_t)0.9970703125000f,(float16_t)0.0781250000000f,
+(float16_t)0.9965820312500f,(float16_t)0.0797119140625f,
+(float16_t)0.9965820312500f,(float16_t)0.0812377929688f,
+(float16_t)0.9965820312500f,(float16_t)0.0827636718750f,
+(float16_t)0.9965820312500f,(float16_t)0.0842895507812f,
+(float16_t)0.9960937500000f,(float16_t)0.0858154296875f,
+(float16_t)0.9960937500000f,(float16_t)0.0873413085938f,
+(float16_t)0.9960937500000f,(float16_t)0.0888671875000f,
+(float16_t)0.9960937500000f,(float16_t)0.0903930664062f,
+(float16_t)0.9956054687500f,(float16_t)0.0919189453125f,
+(float16_t)0.9956054687500f,(float16_t)0.0934448242188f,
+(float16_t)0.9956054687500f,(float16_t)0.0949707031250f,
+(float16_t)0.9951171875000f,(float16_t)0.0964965820312f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9951171875000f,(float16_t)0.0995483398438f,
+(float16_t)0.9951171875000f,(float16_t)0.1010742187500f,
+(float16_t)0.9946289062500f,(float16_t)0.1026000976562f,
+(float16_t)0.9946289062500f,(float16_t)0.1041259765625f,
+(float16_t)0.9946289062500f,(float16_t)0.1056518554688f,
+(float16_t)0.9941406250000f,(float16_t)0.1071777343750f,
+(float16_t)0.9941406250000f,(float16_t)0.1087036132812f,
+(float16_t)0.9941406250000f,(float16_t)0.1102294921875f,
+(float16_t)0.9936523437500f,(float16_t)0.1117553710938f,
+(float16_t)0.9936523437500f,(float16_t)0.1132812500000f,
+(float16_t)0.9931640625000f,(float16_t)0.1148071289062f,
+(float16_t)0.9931640625000f,(float16_t)0.1163330078125f,
+(float16_t)0.9931640625000f,(float16_t)0.1178588867188f,
+(float16_t)0.9926757812500f,(float16_t)0.1193847656250f,
+(float16_t)0.9926757812500f,(float16_t)0.1209106445312f,
+(float16_t)0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)0.9921875000000f,(float16_t)0.1239624023438f,
+(float16_t)0.9921875000000f,(float16_t)0.1254882812500f,
+(float16_t)0.9916992187500f,(float16_t)0.1269531250000f,
+(float16_t)0.9916992187500f,(float16_t)0.1285400390625f,
+(float16_t)0.9916992187500f,(float16_t)0.1300048828125f,
+(float16_t)0.9912109375000f,(float16_t)0.1315917968750f,
+(float16_t)0.9912109375000f,(float16_t)0.1330566406250f,
+(float16_t)0.9907226562500f,(float16_t)0.1345214843750f,
+(float16_t)0.9907226562500f,(float16_t)0.1361083984375f,
+(float16_t)0.9907226562500f,(float16_t)0.1375732421875f,
+(float16_t)0.9902343750000f,(float16_t)0.1391601562500f,
+(float16_t)0.9902343750000f,(float16_t)0.1406250000000f,
+(float16_t)0.9897460937500f,(float16_t)0.1422119140625f,
+(float16_t)0.9897460937500f,(float16_t)0.1436767578125f,
+(float16_t)0.9892578125000f,(float16_t)0.1452636718750f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9887695312500f,(float16_t)0.1481933593750f,
+(float16_t)0.9887695312500f,(float16_t)0.1497802734375f,
+(float16_t)0.9882812500000f,(float16_t)0.1512451171875f,
+(float16_t)0.9882812500000f,(float16_t)0.1528320312500f,
+(float16_t)0.9877929687500f,(float16_t)0.1542968750000f,
+(float16_t)0.9877929687500f,(float16_t)0.1558837890625f,
+(float16_t)0.9873046875000f,(float16_t)0.1573486328125f,
+(float16_t)0.9873046875000f,(float16_t)0.1588134765625f,
+(float16_t)0.9868164062500f,(float16_t)0.1604003906250f,
+(float16_t)0.9868164062500f,(float16_t)0.1618652343750f,
+(float16_t)0.9863281250000f,(float16_t)0.1634521484375f,
+(float16_t)0.9863281250000f,(float16_t)0.1649169921875f,
+(float16_t)0.9858398437500f,(float16_t)0.1663818359375f,
+(float16_t)0.9858398437500f,(float16_t)0.1679687500000f,
+(float16_t)0.9853515625000f,(float16_t)0.1694335937500f,
+(float16_t)0.9853515625000f,(float16_t)0.1710205078125f,
+(float16_t)0.9848632812500f,(float16_t)0.1724853515625f,
+(float16_t)0.9848632812500f,(float16_t)0.1739501953125f,
+(float16_t)0.9843750000000f,(float16_t)0.1755371093750f,
+(float16_t)0.9843750000000f,(float16_t)0.1770019531250f,
+(float16_t)0.9838867187500f,(float16_t)0.1784667968750f,
+(float16_t)0.9838867187500f,(float16_t)0.1800537109375f,
+(float16_t)0.9833984375000f,(float16_t)0.1815185546875f,
+(float16_t)0.9829101562500f,(float16_t)0.1829833984375f,
+(float16_t)0.9829101562500f,(float16_t)0.1845703125000f,
+(float16_t)0.9824218750000f,(float16_t)0.1860351562500f,
+(float16_t)0.9824218750000f,(float16_t)0.1876220703125f,
+(float16_t)0.9819335937500f,(float16_t)0.1890869140625f,
+(float16_t)0.9814453125000f,(float16_t)0.1905517578125f,
+(float16_t)0.9814453125000f,(float16_t)0.1921386718750f,
+(float16_t)0.9809570312500f,(float16_t)0.1936035156250f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9804687500000f,(float16_t)0.1966552734375f,
+(float16_t)0.9799804687500f,(float16_t)0.1981201171875f,
+(float16_t)0.9799804687500f,(float16_t)0.1995849609375f,
+(float16_t)0.9794921875000f,(float16_t)0.2010498046875f,
+(float16_t)0.9794921875000f,(float16_t)0.2026367187500f,
+(float16_t)0.9790039062500f,(float16_t)0.2041015625000f,
+(float16_t)0.9785156250000f,(float16_t)0.2055664062500f,
+(float16_t)0.9785156250000f,(float16_t)0.2071533203125f,
+(float16_t)0.9780273437500f,(float16_t)0.2086181640625f,
+(float16_t)0.9775390625000f,(float16_t)0.2100830078125f,
+(float16_t)0.9775390625000f,(float16_t)0.2116699218750f,
+(float16_t)0.9770507812500f,(float16_t)0.2131347656250f,
+(float16_t)0.9765625000000f,(float16_t)0.2145996093750f,
+(float16_t)0.9765625000000f,(float16_t)0.2160644531250f,
+(float16_t)0.9760742187500f,(float16_t)0.2176513671875f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9755859375000f,(float16_t)0.2205810546875f,
+(float16_t)0.9750976562500f,(float16_t)0.2220458984375f,
+(float16_t)0.9746093750000f,(float16_t)0.2236328125000f,
+(float16_t)0.9741210937500f,(float16_t)0.2250976562500f,
+(float16_t)0.9741210937500f,(float16_t)0.2265625000000f,
+(float16_t)0.9736328125000f,(float16_t)0.2280273437500f,
+(float16_t)0.9731445312500f,(float16_t)0.2296142578125f,
+(float16_t)0.9731445312500f,(float16_t)0.2310791015625f,
+(float16_t)0.9726562500000f,(float16_t)0.2325439453125f,
+(float16_t)0.9721679687500f,(float16_t)0.2340087890625f,
+(float16_t)0.9716796875000f,(float16_t)0.2354736328125f,
+(float16_t)0.9716796875000f,(float16_t)0.2370605468750f,
+(float16_t)0.9711914062500f,(float16_t)0.2385253906250f,
+(float16_t)0.9707031250000f,(float16_t)0.2399902343750f,
+(float16_t)0.9702148437500f,(float16_t)0.2414550781250f,
+(float16_t)0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)0.9697265625000f,(float16_t)0.2445068359375f,
+(float16_t)0.9692382812500f,(float16_t)0.2459716796875f,
+(float16_t)0.9687500000000f,(float16_t)0.2474365234375f,
+(float16_t)0.9687500000000f,(float16_t)0.2489013671875f,
+(float16_t)0.9682617187500f,(float16_t)0.2504882812500f,
+(float16_t)0.9677734375000f,(float16_t)0.2519531250000f,
+(float16_t)0.9672851562500f,(float16_t)0.2534179687500f,
+(float16_t)0.9667968750000f,(float16_t)0.2548828125000f,
+(float16_t)0.9667968750000f,(float16_t)0.2563476562500f,
+(float16_t)0.9663085937500f,(float16_t)0.2578125000000f,
+(float16_t)0.9658203125000f,(float16_t)0.2592773437500f,
+(float16_t)0.9653320312500f,(float16_t)0.2607421875000f,
+(float16_t)0.9648437500000f,(float16_t)0.2622070312500f,
+(float16_t)0.9643554687500f,(float16_t)0.2636718750000f,
+(float16_t)0.9643554687500f,(float16_t)0.2651367187500f,
+(float16_t)0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)0.9633789062500f,(float16_t)0.2683105468750f,
+(float16_t)0.9628906250000f,(float16_t)0.2697753906250f,
+(float16_t)0.9624023437500f,(float16_t)0.2712402343750f,
+(float16_t)0.9619140625000f,(float16_t)0.2727050781250f,
+(float16_t)0.9619140625000f,(float16_t)0.2741699218750f,
+(float16_t)0.9614257812500f,(float16_t)0.2756347656250f,
+(float16_t)0.9609375000000f,(float16_t)0.2770996093750f,
+(float16_t)0.9604492187500f,(float16_t)0.2785644531250f,
+(float16_t)0.9599609375000f,(float16_t)0.2800292968750f,
+(float16_t)0.9594726562500f,(float16_t)0.2814941406250f,
+(float16_t)0.9589843750000f,(float16_t)0.2829589843750f,
+(float16_t)0.9584960937500f,(float16_t)0.2844238281250f,
+(float16_t)0.9584960937500f,(float16_t)0.2858886718750f,
+(float16_t)0.9580078125000f,(float16_t)0.2873535156250f,
+(float16_t)0.9575195312500f,(float16_t)0.2888183593750f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9565429687500f,(float16_t)0.2917480468750f,
+(float16_t)0.9560546875000f,(float16_t)0.2932128906250f,
+(float16_t)0.9555664062500f,(float16_t)0.2946777343750f,
+(float16_t)0.9550781250000f,(float16_t)0.2961425781250f,
+(float16_t)0.9545898437500f,(float16_t)0.2976074218750f,
+(float16_t)0.9541015625000f,(float16_t)0.2990722656250f,
+(float16_t)0.9536132812500f,(float16_t)0.3005371093750f,
+(float16_t)0.9531250000000f,(float16_t)0.3020019531250f,
+(float16_t)0.9526367187500f,(float16_t)0.3034667968750f,
+(float16_t)0.9521484375000f,(float16_t)0.3049316406250f,
+(float16_t)0.9521484375000f,(float16_t)0.3063964843750f,
+(float16_t)0.9516601562500f,(float16_t)0.3078613281250f,
+(float16_t)0.9511718750000f,(float16_t)0.3093261718750f,
+(float16_t)0.9506835937500f,(float16_t)0.3107910156250f,
+(float16_t)0.9501953125000f,(float16_t)0.3122558593750f,
+(float16_t)0.9497070312500f,(float16_t)0.3137207031250f,
+(float16_t)0.9492187500000f,(float16_t)0.3151855468750f,
+(float16_t)0.9487304687500f,(float16_t)0.3166503906250f,
+(float16_t)0.9482421875000f,(float16_t)0.3181152343750f,
+(float16_t)0.9477539062500f,(float16_t)0.3195800781250f,
+(float16_t)0.9472656250000f,(float16_t)0.3210449218750f,
+(float16_t)0.9467773437500f,(float16_t)0.3225097656250f,
+(float16_t)0.9462890625000f,(float16_t)0.3239746093750f,
+(float16_t)0.9458007812500f,(float16_t)0.3251953125000f,
+(float16_t)0.9453125000000f,(float16_t)0.3266601562500f,
+(float16_t)0.9448242187500f,(float16_t)0.3281250000000f,
+(float16_t)0.9443359375000f,(float16_t)0.3295898437500f,
+(float16_t)0.9433593750000f,(float16_t)0.3310546875000f,
+(float16_t)0.9428710937500f,(float16_t)0.3325195312500f,
+(float16_t)0.9423828125000f,(float16_t)0.3339843750000f,
+(float16_t)0.9418945312500f,(float16_t)0.3354492187500f,
+(float16_t)0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)0.9409179687500f,(float16_t)0.3383789062500f,
+(float16_t)0.9404296875000f,(float16_t)0.3398437500000f,
+(float16_t)0.9399414062500f,(float16_t)0.3413085937500f,
+(float16_t)0.9394531250000f,(float16_t)0.3427734375000f,
+(float16_t)0.9389648437500f,(float16_t)0.3439941406250f,
+(float16_t)0.9384765625000f,(float16_t)0.3454589843750f,
+(float16_t)0.9379882812500f,(float16_t)0.3469238281250f,
+(float16_t)0.9375000000000f,(float16_t)0.3483886718750f,
+(float16_t)0.9370117187500f,(float16_t)0.3498535156250f,
+(float16_t)0.9360351562500f,(float16_t)0.3513183593750f,
+(float16_t)0.9355468750000f,(float16_t)0.3527832031250f,
+(float16_t)0.9350585937500f,(float16_t)0.3542480468750f,
+(float16_t)0.9345703125000f,(float16_t)0.3557128906250f,
+(float16_t)0.9340820312500f,(float16_t)0.3569335937500f,
+(float16_t)0.9335937500000f,(float16_t)0.3583984375000f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9326171875000f,(float16_t)0.3613281250000f,
+(float16_t)0.9316406250000f,(float16_t)0.3627929687500f,
+(float16_t)0.9311523437500f,(float16_t)0.3642578125000f,
+(float16_t)0.9306640625000f,(float16_t)0.3657226562500f,
+(float16_t)0.9301757812500f,(float16_t)0.3669433593750f,
+(float16_t)0.9296875000000f,(float16_t)0.3684082031250f,
+(float16_t)0.9291992187500f,(float16_t)0.3698730468750f,
+(float16_t)0.9287109375000f,(float16_t)0.3713378906250f,
+(float16_t)0.9277343750000f,(float16_t)0.3728027343750f,
+(float16_t)0.9272460937500f,(float16_t)0.3742675781250f,
+(float16_t)0.9267578125000f,(float16_t)0.3754882812500f,
+(float16_t)0.9262695312500f,(float16_t)0.3769531250000f,
+(float16_t)0.9257812500000f,(float16_t)0.3784179687500f,
+(float16_t)0.9252929687500f,(float16_t)0.3798828125000f,
+(float16_t)0.9243164062500f,(float16_t)0.3813476562500f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.9233398437500f,(float16_t)0.3840332031250f,
+(float16_t)0.9228515625000f,(float16_t)0.3854980468750f,
+(float16_t)0.9218750000000f,(float16_t)0.3869628906250f,
+(float16_t)0.9213867187500f,(float16_t)0.3884277343750f,
+(float16_t)0.9208984375000f,(float16_t)0.3896484375000f,
+(float16_t)0.9204101562500f,(float16_t)0.3911132812500f,
+(float16_t)0.9199218750000f,(float16_t)0.3925781250000f,
+(float16_t)0.9189453125000f,(float16_t)0.3940429687500f,
+(float16_t)0.9184570312500f,(float16_t)0.3955078125000f,
+(float16_t)0.9179687500000f,(float16_t)0.3967285156250f,
+(float16_t)0.9174804687500f,(float16_t)0.3981933593750f,
+(float16_t)0.9165039062500f,(float16_t)0.3996582031250f,
+(float16_t)0.9160156250000f,(float16_t)0.4011230468750f,
+(float16_t)0.9155273437500f,(float16_t)0.4023437500000f,
+(float16_t)0.9150390625000f,(float16_t)0.4038085937500f,
+(float16_t)0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)0.9135742187500f,(float16_t)0.4067382812500f,
+(float16_t)0.9130859375000f,(float16_t)0.4079589843750f,
+(float16_t)0.9121093750000f,(float16_t)0.4094238281250f,
+(float16_t)0.9116210937500f,(float16_t)0.4108886718750f,
+(float16_t)0.9111328125000f,(float16_t)0.4123535156250f,
+(float16_t)0.9106445312500f,(float16_t)0.4135742187500f,
+(float16_t)0.9096679687500f,(float16_t)0.4150390625000f,
+(float16_t)0.9091796875000f,(float16_t)0.4165039062500f,
+(float16_t)0.9086914062500f,(float16_t)0.4177246093750f,
+(float16_t)0.9077148437500f,(float16_t)0.4191894531250f,
+(float16_t)0.9072265625000f,(float16_t)0.4206542968750f,
+(float16_t)0.9067382812500f,(float16_t)0.4221191406250f,
+(float16_t)0.9057617187500f,(float16_t)0.4233398437500f,
+(float16_t)0.9052734375000f,(float16_t)0.4248046875000f,
+(float16_t)0.9047851562500f,(float16_t)0.4262695312500f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.9033203125000f,(float16_t)0.4289550781250f,
+(float16_t)0.9028320312500f,(float16_t)0.4304199218750f,
+(float16_t)0.9018554687500f,(float16_t)0.4316406250000f,
+(float16_t)0.9013671875000f,(float16_t)0.4331054687500f,
+(float16_t)0.9008789062500f,(float16_t)0.4345703125000f,
+(float16_t)0.8999023437500f,(float16_t)0.4357910156250f,
+(float16_t)0.8994140625000f,(float16_t)0.4372558593750f,
+(float16_t)0.8984375000000f,(float16_t)0.4387207031250f,
+(float16_t)0.8979492187500f,(float16_t)0.4399414062500f,
+(float16_t)0.8974609375000f,(float16_t)0.4414062500000f,
+(float16_t)0.8964843750000f,(float16_t)0.4426269531250f,
+(float16_t)0.8959960937500f,(float16_t)0.4440917968750f,
+(float16_t)0.8955078125000f,(float16_t)0.4455566406250f,
+(float16_t)0.8945312500000f,(float16_t)0.4467773437500f,
+(float16_t)0.8940429687500f,(float16_t)0.4482421875000f,
+(float16_t)0.8930664062500f,(float16_t)0.4497070312500f,
+(float16_t)0.8925781250000f,(float16_t)0.4509277343750f,
+(float16_t)0.8916015625000f,(float16_t)0.4523925781250f,
+(float16_t)0.8911132812500f,(float16_t)0.4536132812500f,
+(float16_t)0.8906250000000f,(float16_t)0.4550781250000f,
+(float16_t)0.8896484375000f,(float16_t)0.4565429687500f,
+(float16_t)0.8891601562500f,(float16_t)0.4577636718750f,
+(float16_t)0.8881835937500f,(float16_t)0.4592285156250f,
+(float16_t)0.8876953125000f,(float16_t)0.4604492187500f,
+(float16_t)0.8867187500000f,(float16_t)0.4619140625000f,
+(float16_t)0.8862304687500f,(float16_t)0.4633789062500f,
+(float16_t)0.8857421875000f,(float16_t)0.4645996093750f,
+(float16_t)0.8847656250000f,(float16_t)0.4660644531250f,
+(float16_t)0.8842773437500f,(float16_t)0.4672851562500f,
+(float16_t)0.8833007812500f,(float16_t)0.4687500000000f,
+(float16_t)0.8828125000000f,(float16_t)0.4699707031250f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8813476562500f,(float16_t)0.4726562500000f,
+(float16_t)0.8803710937500f,(float16_t)0.4741210937500f,
+(float16_t)0.8798828125000f,(float16_t)0.4753417968750f,
+(float16_t)0.8789062500000f,(float16_t)0.4768066406250f,
+(float16_t)0.8784179687500f,(float16_t)0.4780273437500f,
+(float16_t)0.8774414062500f,(float16_t)0.4794921875000f,
+(float16_t)0.8769531250000f,(float16_t)0.4809570312500f,
+(float16_t)0.8759765625000f,(float16_t)0.4821777343750f,
+(float16_t)0.8754882812500f,(float16_t)0.4836425781250f,
+(float16_t)0.8745117187500f,(float16_t)0.4848632812500f,
+(float16_t)0.8740234375000f,(float16_t)0.4863281250000f,
+(float16_t)0.8730468750000f,(float16_t)0.4875488281250f,
+(float16_t)0.8725585937500f,(float16_t)0.4887695312500f,
+(float16_t)0.8715820312500f,(float16_t)0.4902343750000f,
+(float16_t)0.8706054687500f,(float16_t)0.4914550781250f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8691406250000f,(float16_t)0.4941406250000f,
+(float16_t)0.8686523437500f,(float16_t)0.4956054687500f,
+(float16_t)0.8676757812500f,(float16_t)0.4968261718750f,
+(float16_t)0.8671875000000f,(float16_t)0.4982910156250f,
+(float16_t)0.8662109375000f,(float16_t)0.4995117187500f,
+(float16_t)0.8657226562500f,(float16_t)0.5009765625000f,
+(float16_t)0.8647460937500f,(float16_t)0.5024414062500f,
+(float16_t)0.8637695312500f,(float16_t)0.5034179687500f,
+(float16_t)0.8632812500000f,(float16_t)0.5048828125000f,
+(float16_t)0.8623046875000f,(float16_t)0.5063476562500f,
+(float16_t)0.8618164062500f,(float16_t)0.5073242187500f,
+(float16_t)0.8608398437500f,(float16_t)0.5087890625000f,
+(float16_t)0.8598632812500f,(float16_t)0.5102539062500f,
+(float16_t)0.8593750000000f,(float16_t)0.5112304687500f,
+(float16_t)0.8583984375000f,(float16_t)0.5126953125000f,
+(float16_t)0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)0.8569335937500f,(float16_t)0.5156250000000f,
+(float16_t)0.8559570312500f,(float16_t)0.5166015625000f,
+(float16_t)0.8554687500000f,(float16_t)0.5180664062500f,
+(float16_t)0.8544921875000f,(float16_t)0.5195312500000f,
+(float16_t)0.8540039062500f,(float16_t)0.5205078125000f,
+(float16_t)0.8530273437500f,(float16_t)0.5219726562500f,
+(float16_t)0.8520507812500f,(float16_t)0.5234375000000f,
+(float16_t)0.8515625000000f,(float16_t)0.5244140625000f,
+(float16_t)0.8505859375000f,(float16_t)0.5258789062500f,
+(float16_t)0.8496093750000f,(float16_t)0.5273437500000f,
+(float16_t)0.8491210937500f,(float16_t)0.5283203125000f,
+(float16_t)0.8481445312500f,(float16_t)0.5297851562500f,
+(float16_t)0.8471679687500f,(float16_t)0.5312500000000f,
+(float16_t)0.8466796875000f,(float16_t)0.5322265625000f,
+(float16_t)0.8457031250000f,(float16_t)0.5336914062500f,
+(float16_t)0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)0.8442382812500f,(float16_t)0.5361328125000f,
+(float16_t)0.8432617187500f,(float16_t)0.5375976562500f,
+(float16_t)0.8422851562500f,(float16_t)0.5390625000000f,
+(float16_t)0.8417968750000f,(float16_t)0.5400390625000f,
+(float16_t)0.8408203125000f,(float16_t)0.5415039062500f,
+(float16_t)0.8398437500000f,(float16_t)0.5429687500000f,
+(float16_t)0.8388671875000f,(float16_t)0.5439453125000f,
+(float16_t)0.8383789062500f,(float16_t)0.5454101562500f,
+(float16_t)0.8374023437500f,(float16_t)0.5463867187500f,
+(float16_t)0.8364257812500f,(float16_t)0.5478515625000f,
+(float16_t)0.8359375000000f,(float16_t)0.5493164062500f,
+(float16_t)0.8349609375000f,(float16_t)0.5502929687500f,
+(float16_t)0.8339843750000f,(float16_t)0.5517578125000f,
+(float16_t)0.8330078125000f,(float16_t)0.5532226562500f,
+(float16_t)0.8325195312500f,(float16_t)0.5541992187500f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8305664062500f,(float16_t)0.5566406250000f,
+(float16_t)0.8295898437500f,(float16_t)0.5581054687500f,
+(float16_t)0.8291015625000f,(float16_t)0.5595703125000f,
+(float16_t)0.8281250000000f,(float16_t)0.5605468750000f,
+(float16_t)0.8271484375000f,(float16_t)0.5620117187500f,
+(float16_t)0.8261718750000f,(float16_t)0.5629882812500f,
+(float16_t)0.8256835937500f,(float16_t)0.5644531250000f,
+(float16_t)0.8247070312500f,(float16_t)0.5659179687500f,
+(float16_t)0.8237304687500f,(float16_t)0.5668945312500f,
+(float16_t)0.8227539062500f,(float16_t)0.5683593750000f,
+(float16_t)0.8217773437500f,(float16_t)0.5693359375000f,
+(float16_t)0.8212890625000f,(float16_t)0.5708007812500f,
+(float16_t)0.8203125000000f,(float16_t)0.5722656250000f,
+(float16_t)0.8193359375000f,(float16_t)0.5732421875000f,
+(float16_t)0.8183593750000f,(float16_t)0.5747070312500f,
+(float16_t)0.8173828125000f,(float16_t)0.5756835937500f,
+(float16_t)0.8168945312500f,(float16_t)0.5771484375000f,
+(float16_t)0.8159179687500f,(float16_t)0.5781250000000f,
+(float16_t)0.8149414062500f,(float16_t)0.5795898437500f,
+(float16_t)0.8139648437500f,(float16_t)0.5810546875000f,
+(float16_t)0.8129882812500f,(float16_t)0.5820312500000f,
+(float16_t)0.8120117187500f,(float16_t)0.5834960937500f,
+(float16_t)0.8115234375000f,(float16_t)0.5844726562500f,
+(float16_t)0.8105468750000f,(float16_t)0.5859375000000f,
+(float16_t)0.8095703125000f,(float16_t)0.5869140625000f,
+(float16_t)0.8085937500000f,(float16_t)0.5883789062500f,
+(float16_t)0.8076171875000f,(float16_t)0.5893554687500f,
+(float16_t)0.8066406250000f,(float16_t)0.5908203125000f,
+(float16_t)0.8061523437500f,(float16_t)0.5917968750000f,
+(float16_t)0.8051757812500f,(float16_t)0.5932617187500f,
+(float16_t)0.8041992187500f,(float16_t)0.5942382812500f,
+(float16_t)0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)0.8022460937500f,(float16_t)0.5971679687500f,
+(float16_t)0.8012695312500f,(float16_t)0.5981445312500f,
+(float16_t)0.8002929687500f,(float16_t)0.5996093750000f,
+(float16_t)0.7993164062500f,(float16_t)0.6005859375000f,
+(float16_t)0.7988281250000f,(float16_t)0.6020507812500f,
+(float16_t)0.7978515625000f,(float16_t)0.6030273437500f,
+(float16_t)0.7968750000000f,(float16_t)0.6044921875000f,
+(float16_t)0.7958984375000f,(float16_t)0.6054687500000f,
+(float16_t)0.7949218750000f,(float16_t)0.6069335937500f,
+(float16_t)0.7939453125000f,(float16_t)0.6079101562500f,
+(float16_t)0.7929687500000f,(float16_t)0.6093750000000f,
+(float16_t)0.7919921875000f,(float16_t)0.6103515625000f,
+(float16_t)0.7910156250000f,(float16_t)0.6118164062500f,
+(float16_t)0.7900390625000f,(float16_t)0.6127929687500f,
+(float16_t)0.7890625000000f,(float16_t)0.6142578125000f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7875976562500f,(float16_t)0.6162109375000f,
+(float16_t)0.7866210937500f,(float16_t)0.6176757812500f,
+(float16_t)0.7856445312500f,(float16_t)0.6186523437500f,
+(float16_t)0.7846679687500f,(float16_t)0.6201171875000f,
+(float16_t)0.7836914062500f,(float16_t)0.6210937500000f,
+(float16_t)0.7827148437500f,(float16_t)0.6225585937500f,
+(float16_t)0.7817382812500f,(float16_t)0.6235351562500f,
+(float16_t)0.7807617187500f,(float16_t)0.6250000000000f,
+(float16_t)0.7797851562500f,(float16_t)0.6259765625000f,
+(float16_t)0.7788085937500f,(float16_t)0.6274414062500f,
+(float16_t)0.7778320312500f,(float16_t)0.6284179687500f,
+(float16_t)0.7768554687500f,(float16_t)0.6293945312500f,
+(float16_t)0.7758789062500f,(float16_t)0.6308593750000f,
+(float16_t)0.7749023437500f,(float16_t)0.6318359375000f,
+(float16_t)0.7739257812500f,(float16_t)0.6333007812500f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7719726562500f,(float16_t)0.6357421875000f,
+(float16_t)0.7709960937500f,(float16_t)0.6367187500000f,
+(float16_t)0.7700195312500f,(float16_t)0.6381835937500f,
+(float16_t)0.7690429687500f,(float16_t)0.6391601562500f,
+(float16_t)0.7680664062500f,(float16_t)0.6401367187500f,
+(float16_t)0.7670898437500f,(float16_t)0.6416015625000f,
+(float16_t)0.7661132812500f,(float16_t)0.6425781250000f,
+(float16_t)0.7651367187500f,(float16_t)0.6440429687500f,
+(float16_t)0.7641601562500f,(float16_t)0.6450195312500f,
+(float16_t)0.7631835937500f,(float16_t)0.6459960937500f,
+(float16_t)0.7622070312500f,(float16_t)0.6474609375000f,
+(float16_t)0.7612304687500f,(float16_t)0.6484375000000f,
+(float16_t)0.7602539062500f,(float16_t)0.6499023437500f,
+(float16_t)0.7592773437500f,(float16_t)0.6508789062500f,
+(float16_t)0.7583007812500f,(float16_t)0.6518554687500f,
+(float16_t)0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)0.7563476562500f,(float16_t)0.6542968750000f,
+(float16_t)0.7553710937500f,(float16_t)0.6552734375000f,
+(float16_t)0.7543945312500f,(float16_t)0.6567382812500f,
+(float16_t)0.7534179687500f,(float16_t)0.6577148437500f,
+(float16_t)0.7519531250000f,(float16_t)0.6591796875000f,
+(float16_t)0.7509765625000f,(float16_t)0.6601562500000f,
+(float16_t)0.7500000000000f,(float16_t)0.6611328125000f,
+(float16_t)0.7490234375000f,(float16_t)0.6625976562500f,
+(float16_t)0.7480468750000f,(float16_t)0.6635742187500f,
+(float16_t)0.7470703125000f,(float16_t)0.6645507812500f,
+(float16_t)0.7460937500000f,(float16_t)0.6660156250000f,
+(float16_t)0.7451171875000f,(float16_t)0.6669921875000f,
+(float16_t)0.7441406250000f,(float16_t)0.6679687500000f,
+(float16_t)0.7431640625000f,(float16_t)0.6694335937500f,
+(float16_t)0.7421875000000f,(float16_t)0.6704101562500f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7397460937500f,(float16_t)0.6728515625000f,
+(float16_t)0.7387695312500f,(float16_t)0.6738281250000f,
+(float16_t)0.7377929687500f,(float16_t)0.6748046875000f,
+(float16_t)0.7368164062500f,(float16_t)0.6762695312500f,
+(float16_t)0.7358398437500f,(float16_t)0.6772460937500f,
+(float16_t)0.7348632812500f,(float16_t)0.6782226562500f,
+(float16_t)0.7338867187500f,(float16_t)0.6796875000000f,
+(float16_t)0.7324218750000f,(float16_t)0.6806640625000f,
+(float16_t)0.7314453125000f,(float16_t)0.6816406250000f,
+(float16_t)0.7304687500000f,(float16_t)0.6826171875000f,
+(float16_t)0.7294921875000f,(float16_t)0.6840820312500f,
+(float16_t)0.7285156250000f,(float16_t)0.6850585937500f,
+(float16_t)0.7275390625000f,(float16_t)0.6860351562500f,
+(float16_t)0.7265625000000f,(float16_t)0.6875000000000f,
+(float16_t)0.7250976562500f,(float16_t)0.6884765625000f,
+(float16_t)0.7241210937500f,(float16_t)0.6894531250000f,
+(float16_t)0.7231445312500f,(float16_t)0.6904296875000f,
+(float16_t)0.7221679687500f,(float16_t)0.6918945312500f,
+(float16_t)0.7211914062500f,(float16_t)0.6928710937500f,
+(float16_t)0.7202148437500f,(float16_t)0.6938476562500f,
+(float16_t)0.7187500000000f,(float16_t)0.6953125000000f,
+(float16_t)0.7177734375000f,(float16_t)0.6962890625000f,
+(float16_t)0.7167968750000f,(float16_t)0.6972656250000f,
+(float16_t)0.7158203125000f,(float16_t)0.6982421875000f,
+(float16_t)0.7148437500000f,(float16_t)0.6997070312500f,
+(float16_t)0.7133789062500f,(float16_t)0.7006835937500f,
+(float16_t)0.7124023437500f,(float16_t)0.7016601562500f,
+(float16_t)0.7114257812500f,(float16_t)0.7026367187500f,
+(float16_t)0.7104492187500f,(float16_t)0.7036132812500f,
+(float16_t)0.7094726562500f,(float16_t)0.7050781250000f,
+(float16_t)0.7080078125000f,(float16_t)0.7060546875000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.7060546875000f,(float16_t)0.7080078125000f,
+(float16_t)0.7050781250000f,(float16_t)0.7094726562500f,
+(float16_t)0.7036132812500f,(float16_t)0.7104492187500f,
+(float16_t)0.7026367187500f,(float16_t)0.7114257812500f,
+(float16_t)0.7016601562500f,(float16_t)0.7124023437500f,
+(float16_t)0.7006835937500f,(float16_t)0.7133789062500f,
+(float16_t)0.6997070312500f,(float16_t)0.7148437500000f,
+(float16_t)0.6982421875000f,(float16_t)0.7158203125000f,
+(float16_t)0.6972656250000f,(float16_t)0.7167968750000f,
+(float16_t)0.6962890625000f,(float16_t)0.7177734375000f,
+(float16_t)0.6953125000000f,(float16_t)0.7187500000000f,
+(float16_t)0.6938476562500f,(float16_t)0.7202148437500f,
+(float16_t)0.6928710937500f,(float16_t)0.7211914062500f,
+(float16_t)0.6918945312500f,(float16_t)0.7221679687500f,
+(float16_t)0.6904296875000f,(float16_t)0.7231445312500f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6884765625000f,(float16_t)0.7250976562500f,
+(float16_t)0.6875000000000f,(float16_t)0.7265625000000f,
+(float16_t)0.6860351562500f,(float16_t)0.7275390625000f,
+(float16_t)0.6850585937500f,(float16_t)0.7285156250000f,
+(float16_t)0.6840820312500f,(float16_t)0.7294921875000f,
+(float16_t)0.6826171875000f,(float16_t)0.7304687500000f,
+(float16_t)0.6816406250000f,(float16_t)0.7314453125000f,
+(float16_t)0.6806640625000f,(float16_t)0.7324218750000f,
+(float16_t)0.6796875000000f,(float16_t)0.7338867187500f,
+(float16_t)0.6782226562500f,(float16_t)0.7348632812500f,
+(float16_t)0.6772460937500f,(float16_t)0.7358398437500f,
+(float16_t)0.6762695312500f,(float16_t)0.7368164062500f,
+(float16_t)0.6748046875000f,(float16_t)0.7377929687500f,
+(float16_t)0.6738281250000f,(float16_t)0.7387695312500f,
+(float16_t)0.6728515625000f,(float16_t)0.7397460937500f,
+(float16_t)0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)0.6704101562500f,(float16_t)0.7421875000000f,
+(float16_t)0.6694335937500f,(float16_t)0.7431640625000f,
+(float16_t)0.6679687500000f,(float16_t)0.7441406250000f,
+(float16_t)0.6669921875000f,(float16_t)0.7451171875000f,
+(float16_t)0.6660156250000f,(float16_t)0.7460937500000f,
+(float16_t)0.6645507812500f,(float16_t)0.7470703125000f,
+(float16_t)0.6635742187500f,(float16_t)0.7480468750000f,
+(float16_t)0.6625976562500f,(float16_t)0.7490234375000f,
+(float16_t)0.6611328125000f,(float16_t)0.7500000000000f,
+(float16_t)0.6601562500000f,(float16_t)0.7509765625000f,
+(float16_t)0.6591796875000f,(float16_t)0.7519531250000f,
+(float16_t)0.6577148437500f,(float16_t)0.7534179687500f,
+(float16_t)0.6567382812500f,(float16_t)0.7543945312500f,
+(float16_t)0.6552734375000f,(float16_t)0.7553710937500f,
+(float16_t)0.6542968750000f,(float16_t)0.7563476562500f,
+(float16_t)0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)0.6518554687500f,(float16_t)0.7583007812500f,
+(float16_t)0.6508789062500f,(float16_t)0.7592773437500f,
+(float16_t)0.6499023437500f,(float16_t)0.7602539062500f,
+(float16_t)0.6484375000000f,(float16_t)0.7612304687500f,
+(float16_t)0.6474609375000f,(float16_t)0.7622070312500f,
+(float16_t)0.6459960937500f,(float16_t)0.7631835937500f,
+(float16_t)0.6450195312500f,(float16_t)0.7641601562500f,
+(float16_t)0.6440429687500f,(float16_t)0.7651367187500f,
+(float16_t)0.6425781250000f,(float16_t)0.7661132812500f,
+(float16_t)0.6416015625000f,(float16_t)0.7670898437500f,
+(float16_t)0.6401367187500f,(float16_t)0.7680664062500f,
+(float16_t)0.6391601562500f,(float16_t)0.7690429687500f,
+(float16_t)0.6381835937500f,(float16_t)0.7700195312500f,
+(float16_t)0.6367187500000f,(float16_t)0.7709960937500f,
+(float16_t)0.6357421875000f,(float16_t)0.7719726562500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.6333007812500f,(float16_t)0.7739257812500f,
+(float16_t)0.6318359375000f,(float16_t)0.7749023437500f,
+(float16_t)0.6308593750000f,(float16_t)0.7758789062500f,
+(float16_t)0.6293945312500f,(float16_t)0.7768554687500f,
+(float16_t)0.6284179687500f,(float16_t)0.7778320312500f,
+(float16_t)0.6274414062500f,(float16_t)0.7788085937500f,
+(float16_t)0.6259765625000f,(float16_t)0.7797851562500f,
+(float16_t)0.6250000000000f,(float16_t)0.7807617187500f,
+(float16_t)0.6235351562500f,(float16_t)0.7817382812500f,
+(float16_t)0.6225585937500f,(float16_t)0.7827148437500f,
+(float16_t)0.6210937500000f,(float16_t)0.7836914062500f,
+(float16_t)0.6201171875000f,(float16_t)0.7846679687500f,
+(float16_t)0.6186523437500f,(float16_t)0.7856445312500f,
+(float16_t)0.6176757812500f,(float16_t)0.7866210937500f,
+(float16_t)0.6162109375000f,(float16_t)0.7875976562500f,
+(float16_t)0.6152343750000f,(float16_t)0.7885742187500f,
+(float16_t)0.6142578125000f,(float16_t)0.7890625000000f,
+(float16_t)0.6127929687500f,(float16_t)0.7900390625000f,
+(float16_t)0.6118164062500f,(float16_t)0.7910156250000f,
+(float16_t)0.6103515625000f,(float16_t)0.7919921875000f,
+(float16_t)0.6093750000000f,(float16_t)0.7929687500000f,
+(float16_t)0.6079101562500f,(float16_t)0.7939453125000f,
+(float16_t)0.6069335937500f,(float16_t)0.7949218750000f,
+(float16_t)0.6054687500000f,(float16_t)0.7958984375000f,
+(float16_t)0.6044921875000f,(float16_t)0.7968750000000f,
+(float16_t)0.6030273437500f,(float16_t)0.7978515625000f,
+(float16_t)0.6020507812500f,(float16_t)0.7988281250000f,
+(float16_t)0.6005859375000f,(float16_t)0.7993164062500f,
+(float16_t)0.5996093750000f,(float16_t)0.8002929687500f,
+(float16_t)0.5981445312500f,(float16_t)0.8012695312500f,
+(float16_t)0.5971679687500f,(float16_t)0.8022460937500f,
+(float16_t)0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)0.5942382812500f,(float16_t)0.8041992187500f,
+(float16_t)0.5932617187500f,(float16_t)0.8051757812500f,
+(float16_t)0.5917968750000f,(float16_t)0.8061523437500f,
+(float16_t)0.5908203125000f,(float16_t)0.8066406250000f,
+(float16_t)0.5893554687500f,(float16_t)0.8076171875000f,
+(float16_t)0.5883789062500f,(float16_t)0.8085937500000f,
+(float16_t)0.5869140625000f,(float16_t)0.8095703125000f,
+(float16_t)0.5859375000000f,(float16_t)0.8105468750000f,
+(float16_t)0.5844726562500f,(float16_t)0.8115234375000f,
+(float16_t)0.5834960937500f,(float16_t)0.8120117187500f,
+(float16_t)0.5820312500000f,(float16_t)0.8129882812500f,
+(float16_t)0.5810546875000f,(float16_t)0.8139648437500f,
+(float16_t)0.5795898437500f,(float16_t)0.8149414062500f,
+(float16_t)0.5781250000000f,(float16_t)0.8159179687500f,
+(float16_t)0.5771484375000f,(float16_t)0.8168945312500f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5747070312500f,(float16_t)0.8183593750000f,
+(float16_t)0.5732421875000f,(float16_t)0.8193359375000f,
+(float16_t)0.5722656250000f,(float16_t)0.8203125000000f,
+(float16_t)0.5708007812500f,(float16_t)0.8212890625000f,
+(float16_t)0.5693359375000f,(float16_t)0.8217773437500f,
+(float16_t)0.5683593750000f,(float16_t)0.8227539062500f,
+(float16_t)0.5668945312500f,(float16_t)0.8237304687500f,
+(float16_t)0.5659179687500f,(float16_t)0.8247070312500f,
+(float16_t)0.5644531250000f,(float16_t)0.8256835937500f,
+(float16_t)0.5629882812500f,(float16_t)0.8261718750000f,
+(float16_t)0.5620117187500f,(float16_t)0.8271484375000f,
+(float16_t)0.5605468750000f,(float16_t)0.8281250000000f,
+(float16_t)0.5595703125000f,(float16_t)0.8291015625000f,
+(float16_t)0.5581054687500f,(float16_t)0.8295898437500f,
+(float16_t)0.5566406250000f,(float16_t)0.8305664062500f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.5541992187500f,(float16_t)0.8325195312500f,
+(float16_t)0.5532226562500f,(float16_t)0.8330078125000f,
+(float16_t)0.5517578125000f,(float16_t)0.8339843750000f,
+(float16_t)0.5502929687500f,(float16_t)0.8349609375000f,
+(float16_t)0.5493164062500f,(float16_t)0.8359375000000f,
+(float16_t)0.5478515625000f,(float16_t)0.8364257812500f,
+(float16_t)0.5463867187500f,(float16_t)0.8374023437500f,
+(float16_t)0.5454101562500f,(float16_t)0.8383789062500f,
+(float16_t)0.5439453125000f,(float16_t)0.8388671875000f,
+(float16_t)0.5429687500000f,(float16_t)0.8398437500000f,
+(float16_t)0.5415039062500f,(float16_t)0.8408203125000f,
+(float16_t)0.5400390625000f,(float16_t)0.8417968750000f,
+(float16_t)0.5390625000000f,(float16_t)0.8422851562500f,
+(float16_t)0.5375976562500f,(float16_t)0.8432617187500f,
+(float16_t)0.5361328125000f,(float16_t)0.8442382812500f,
+(float16_t)0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)0.5336914062500f,(float16_t)0.8457031250000f,
+(float16_t)0.5322265625000f,(float16_t)0.8466796875000f,
+(float16_t)0.5312500000000f,(float16_t)0.8471679687500f,
+(float16_t)0.5297851562500f,(float16_t)0.8481445312500f,
+(float16_t)0.5283203125000f,(float16_t)0.8491210937500f,
+(float16_t)0.5273437500000f,(float16_t)0.8496093750000f,
+(float16_t)0.5258789062500f,(float16_t)0.8505859375000f,
+(float16_t)0.5244140625000f,(float16_t)0.8515625000000f,
+(float16_t)0.5234375000000f,(float16_t)0.8520507812500f,
+(float16_t)0.5219726562500f,(float16_t)0.8530273437500f,
+(float16_t)0.5205078125000f,(float16_t)0.8540039062500f,
+(float16_t)0.5195312500000f,(float16_t)0.8544921875000f,
+(float16_t)0.5180664062500f,(float16_t)0.8554687500000f,
+(float16_t)0.5166015625000f,(float16_t)0.8559570312500f,
+(float16_t)0.5156250000000f,(float16_t)0.8569335937500f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.5126953125000f,(float16_t)0.8583984375000f,
+(float16_t)0.5112304687500f,(float16_t)0.8593750000000f,
+(float16_t)0.5102539062500f,(float16_t)0.8598632812500f,
+(float16_t)0.5087890625000f,(float16_t)0.8608398437500f,
+(float16_t)0.5073242187500f,(float16_t)0.8618164062500f,
+(float16_t)0.5063476562500f,(float16_t)0.8623046875000f,
+(float16_t)0.5048828125000f,(float16_t)0.8632812500000f,
+(float16_t)0.5034179687500f,(float16_t)0.8637695312500f,
+(float16_t)0.5024414062500f,(float16_t)0.8647460937500f,
+(float16_t)0.5009765625000f,(float16_t)0.8657226562500f,
+(float16_t)0.4995117187500f,(float16_t)0.8662109375000f,
+(float16_t)0.4982910156250f,(float16_t)0.8671875000000f,
+(float16_t)0.4968261718750f,(float16_t)0.8676757812500f,
+(float16_t)0.4956054687500f,(float16_t)0.8686523437500f,
+(float16_t)0.4941406250000f,(float16_t)0.8691406250000f,
+(float16_t)0.4929199218750f,(float16_t)0.8701171875000f,
+(float16_t)0.4914550781250f,(float16_t)0.8706054687500f,
+(float16_t)0.4902343750000f,(float16_t)0.8715820312500f,
+(float16_t)0.4887695312500f,(float16_t)0.8725585937500f,
+(float16_t)0.4875488281250f,(float16_t)0.8730468750000f,
+(float16_t)0.4863281250000f,(float16_t)0.8740234375000f,
+(float16_t)0.4848632812500f,(float16_t)0.8745117187500f,
+(float16_t)0.4836425781250f,(float16_t)0.8754882812500f,
+(float16_t)0.4821777343750f,(float16_t)0.8759765625000f,
+(float16_t)0.4809570312500f,(float16_t)0.8769531250000f,
+(float16_t)0.4794921875000f,(float16_t)0.8774414062500f,
+(float16_t)0.4780273437500f,(float16_t)0.8784179687500f,
+(float16_t)0.4768066406250f,(float16_t)0.8789062500000f,
+(float16_t)0.4753417968750f,(float16_t)0.8798828125000f,
+(float16_t)0.4741210937500f,(float16_t)0.8803710937500f,
+(float16_t)0.4726562500000f,(float16_t)0.8813476562500f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.4699707031250f,(float16_t)0.8828125000000f,
+(float16_t)0.4687500000000f,(float16_t)0.8833007812500f,
+(float16_t)0.4672851562500f,(float16_t)0.8842773437500f,
+(float16_t)0.4660644531250f,(float16_t)0.8847656250000f,
+(float16_t)0.4645996093750f,(float16_t)0.8857421875000f,
+(float16_t)0.4633789062500f,(float16_t)0.8862304687500f,
+(float16_t)0.4619140625000f,(float16_t)0.8867187500000f,
+(float16_t)0.4604492187500f,(float16_t)0.8876953125000f,
+(float16_t)0.4592285156250f,(float16_t)0.8881835937500f,
+(float16_t)0.4577636718750f,(float16_t)0.8891601562500f,
+(float16_t)0.4565429687500f,(float16_t)0.8896484375000f,
+(float16_t)0.4550781250000f,(float16_t)0.8906250000000f,
+(float16_t)0.4536132812500f,(float16_t)0.8911132812500f,
+(float16_t)0.4523925781250f,(float16_t)0.8916015625000f,
+(float16_t)0.4509277343750f,(float16_t)0.8925781250000f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.4482421875000f,(float16_t)0.8940429687500f,
+(float16_t)0.4467773437500f,(float16_t)0.8945312500000f,
+(float16_t)0.4455566406250f,(float16_t)0.8955078125000f,
+(float16_t)0.4440917968750f,(float16_t)0.8959960937500f,
+(float16_t)0.4426269531250f,(float16_t)0.8964843750000f,
+(float16_t)0.4414062500000f,(float16_t)0.8974609375000f,
+(float16_t)0.4399414062500f,(float16_t)0.8979492187500f,
+(float16_t)0.4387207031250f,(float16_t)0.8984375000000f,
+(float16_t)0.4372558593750f,(float16_t)0.8994140625000f,
+(float16_t)0.4357910156250f,(float16_t)0.8999023437500f,
+(float16_t)0.4345703125000f,(float16_t)0.9008789062500f,
+(float16_t)0.4331054687500f,(float16_t)0.9013671875000f,
+(float16_t)0.4316406250000f,(float16_t)0.9018554687500f,
+(float16_t)0.4304199218750f,(float16_t)0.9028320312500f,
+(float16_t)0.4289550781250f,(float16_t)0.9033203125000f,
+(float16_t)0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)0.4262695312500f,(float16_t)0.9047851562500f,
+(float16_t)0.4248046875000f,(float16_t)0.9052734375000f,
+(float16_t)0.4233398437500f,(float16_t)0.9057617187500f,
+(float16_t)0.4221191406250f,(float16_t)0.9067382812500f,
+(float16_t)0.4206542968750f,(float16_t)0.9072265625000f,
+(float16_t)0.4191894531250f,(float16_t)0.9077148437500f,
+(float16_t)0.4177246093750f,(float16_t)0.9086914062500f,
+(float16_t)0.4165039062500f,(float16_t)0.9091796875000f,
+(float16_t)0.4150390625000f,(float16_t)0.9096679687500f,
+(float16_t)0.4135742187500f,(float16_t)0.9106445312500f,
+(float16_t)0.4123535156250f,(float16_t)0.9111328125000f,
+(float16_t)0.4108886718750f,(float16_t)0.9116210937500f,
+(float16_t)0.4094238281250f,(float16_t)0.9121093750000f,
+(float16_t)0.4079589843750f,(float16_t)0.9130859375000f,
+(float16_t)0.4067382812500f,(float16_t)0.9135742187500f,
+(float16_t)0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)0.4038085937500f,(float16_t)0.9150390625000f,
+(float16_t)0.4023437500000f,(float16_t)0.9155273437500f,
+(float16_t)0.4011230468750f,(float16_t)0.9160156250000f,
+(float16_t)0.3996582031250f,(float16_t)0.9165039062500f,
+(float16_t)0.3981933593750f,(float16_t)0.9174804687500f,
+(float16_t)0.3967285156250f,(float16_t)0.9179687500000f,
+(float16_t)0.3955078125000f,(float16_t)0.9184570312500f,
+(float16_t)0.3940429687500f,(float16_t)0.9189453125000f,
+(float16_t)0.3925781250000f,(float16_t)0.9199218750000f,
+(float16_t)0.3911132812500f,(float16_t)0.9204101562500f,
+(float16_t)0.3896484375000f,(float16_t)0.9208984375000f,
+(float16_t)0.3884277343750f,(float16_t)0.9213867187500f,
+(float16_t)0.3869628906250f,(float16_t)0.9218750000000f,
+(float16_t)0.3854980468750f,(float16_t)0.9228515625000f,
+(float16_t)0.3840332031250f,(float16_t)0.9233398437500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3813476562500f,(float16_t)0.9243164062500f,
+(float16_t)0.3798828125000f,(float16_t)0.9252929687500f,
+(float16_t)0.3784179687500f,(float16_t)0.9257812500000f,
+(float16_t)0.3769531250000f,(float16_t)0.9262695312500f,
+(float16_t)0.3754882812500f,(float16_t)0.9267578125000f,
+(float16_t)0.3742675781250f,(float16_t)0.9272460937500f,
+(float16_t)0.3728027343750f,(float16_t)0.9277343750000f,
+(float16_t)0.3713378906250f,(float16_t)0.9287109375000f,
+(float16_t)0.3698730468750f,(float16_t)0.9291992187500f,
+(float16_t)0.3684082031250f,(float16_t)0.9296875000000f,
+(float16_t)0.3669433593750f,(float16_t)0.9301757812500f,
+(float16_t)0.3657226562500f,(float16_t)0.9306640625000f,
+(float16_t)0.3642578125000f,(float16_t)0.9311523437500f,
+(float16_t)0.3627929687500f,(float16_t)0.9316406250000f,
+(float16_t)0.3613281250000f,(float16_t)0.9326171875000f,
+(float16_t)0.3598632812500f,(float16_t)0.9331054687500f,
+(float16_t)0.3583984375000f,(float16_t)0.9335937500000f,
+(float16_t)0.3569335937500f,(float16_t)0.9340820312500f,
+(float16_t)0.3557128906250f,(float16_t)0.9345703125000f,
+(float16_t)0.3542480468750f,(float16_t)0.9350585937500f,
+(float16_t)0.3527832031250f,(float16_t)0.9355468750000f,
+(float16_t)0.3513183593750f,(float16_t)0.9360351562500f,
+(float16_t)0.3498535156250f,(float16_t)0.9370117187500f,
+(float16_t)0.3483886718750f,(float16_t)0.9375000000000f,
+(float16_t)0.3469238281250f,(float16_t)0.9379882812500f,
+(float16_t)0.3454589843750f,(float16_t)0.9384765625000f,
+(float16_t)0.3439941406250f,(float16_t)0.9389648437500f,
+(float16_t)0.3427734375000f,(float16_t)0.9394531250000f,
+(float16_t)0.3413085937500f,(float16_t)0.9399414062500f,
+(float16_t)0.3398437500000f,(float16_t)0.9404296875000f,
+(float16_t)0.3383789062500f,(float16_t)0.9409179687500f,
+(float16_t)0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)0.3354492187500f,(float16_t)0.9418945312500f,
+(float16_t)0.3339843750000f,(float16_t)0.9423828125000f,
+(float16_t)0.3325195312500f,(float16_t)0.9428710937500f,
+(float16_t)0.3310546875000f,(float16_t)0.9433593750000f,
+(float16_t)0.3295898437500f,(float16_t)0.9443359375000f,
+(float16_t)0.3281250000000f,(float16_t)0.9448242187500f,
+(float16_t)0.3266601562500f,(float16_t)0.9453125000000f,
+(float16_t)0.3251953125000f,(float16_t)0.9458007812500f,
+(float16_t)0.3239746093750f,(float16_t)0.9462890625000f,
+(float16_t)0.3225097656250f,(float16_t)0.9467773437500f,
+(float16_t)0.3210449218750f,(float16_t)0.9472656250000f,
+(float16_t)0.3195800781250f,(float16_t)0.9477539062500f,
+(float16_t)0.3181152343750f,(float16_t)0.9482421875000f,
+(float16_t)0.3166503906250f,(float16_t)0.9487304687500f,
+(float16_t)0.3151855468750f,(float16_t)0.9492187500000f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.3122558593750f,(float16_t)0.9501953125000f,
+(float16_t)0.3107910156250f,(float16_t)0.9506835937500f,
+(float16_t)0.3093261718750f,(float16_t)0.9511718750000f,
+(float16_t)0.3078613281250f,(float16_t)0.9516601562500f,
+(float16_t)0.3063964843750f,(float16_t)0.9521484375000f,
+(float16_t)0.3049316406250f,(float16_t)0.9521484375000f,
+(float16_t)0.3034667968750f,(float16_t)0.9526367187500f,
+(float16_t)0.3020019531250f,(float16_t)0.9531250000000f,
+(float16_t)0.3005371093750f,(float16_t)0.9536132812500f,
+(float16_t)0.2990722656250f,(float16_t)0.9541015625000f,
+(float16_t)0.2976074218750f,(float16_t)0.9545898437500f,
+(float16_t)0.2961425781250f,(float16_t)0.9550781250000f,
+(float16_t)0.2946777343750f,(float16_t)0.9555664062500f,
+(float16_t)0.2932128906250f,(float16_t)0.9560546875000f,
+(float16_t)0.2917480468750f,(float16_t)0.9565429687500f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.2888183593750f,(float16_t)0.9575195312500f,
+(float16_t)0.2873535156250f,(float16_t)0.9580078125000f,
+(float16_t)0.2858886718750f,(float16_t)0.9584960937500f,
+(float16_t)0.2844238281250f,(float16_t)0.9584960937500f,
+(float16_t)0.2829589843750f,(float16_t)0.9589843750000f,
+(float16_t)0.2814941406250f,(float16_t)0.9594726562500f,
+(float16_t)0.2800292968750f,(float16_t)0.9599609375000f,
+(float16_t)0.2785644531250f,(float16_t)0.9604492187500f,
+(float16_t)0.2770996093750f,(float16_t)0.9609375000000f,
+(float16_t)0.2756347656250f,(float16_t)0.9614257812500f,
+(float16_t)0.2741699218750f,(float16_t)0.9619140625000f,
+(float16_t)0.2727050781250f,(float16_t)0.9619140625000f,
+(float16_t)0.2712402343750f,(float16_t)0.9624023437500f,
+(float16_t)0.2697753906250f,(float16_t)0.9628906250000f,
+(float16_t)0.2683105468750f,(float16_t)0.9633789062500f,
+(float16_t)0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)0.2651367187500f,(float16_t)0.9643554687500f,
+(float16_t)0.2636718750000f,(float16_t)0.9643554687500f,
+(float16_t)0.2622070312500f,(float16_t)0.9648437500000f,
+(float16_t)0.2607421875000f,(float16_t)0.9653320312500f,
+(float16_t)0.2592773437500f,(float16_t)0.9658203125000f,
+(float16_t)0.2578125000000f,(float16_t)0.9663085937500f,
+(float16_t)0.2563476562500f,(float16_t)0.9667968750000f,
+(float16_t)0.2548828125000f,(float16_t)0.9667968750000f,
+(float16_t)0.2534179687500f,(float16_t)0.9672851562500f,
+(float16_t)0.2519531250000f,(float16_t)0.9677734375000f,
+(float16_t)0.2504882812500f,(float16_t)0.9682617187500f,
+(float16_t)0.2489013671875f,(float16_t)0.9687500000000f,
+(float16_t)0.2474365234375f,(float16_t)0.9687500000000f,
+(float16_t)0.2459716796875f,(float16_t)0.9692382812500f,
+(float16_t)0.2445068359375f,(float16_t)0.9697265625000f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.2414550781250f,(float16_t)0.9702148437500f,
+(float16_t)0.2399902343750f,(float16_t)0.9707031250000f,
+(float16_t)0.2385253906250f,(float16_t)0.9711914062500f,
+(float16_t)0.2370605468750f,(float16_t)0.9716796875000f,
+(float16_t)0.2354736328125f,(float16_t)0.9716796875000f,
+(float16_t)0.2340087890625f,(float16_t)0.9721679687500f,
+(float16_t)0.2325439453125f,(float16_t)0.9726562500000f,
+(float16_t)0.2310791015625f,(float16_t)0.9731445312500f,
+(float16_t)0.2296142578125f,(float16_t)0.9731445312500f,
+(float16_t)0.2280273437500f,(float16_t)0.9736328125000f,
+(float16_t)0.2265625000000f,(float16_t)0.9741210937500f,
+(float16_t)0.2250976562500f,(float16_t)0.9741210937500f,
+(float16_t)0.2236328125000f,(float16_t)0.9746093750000f,
+(float16_t)0.2220458984375f,(float16_t)0.9750976562500f,
+(float16_t)0.2205810546875f,(float16_t)0.9755859375000f,
+(float16_t)0.2191162109375f,(float16_t)0.9755859375000f,
+(float16_t)0.2176513671875f,(float16_t)0.9760742187500f,
+(float16_t)0.2160644531250f,(float16_t)0.9765625000000f,
+(float16_t)0.2145996093750f,(float16_t)0.9765625000000f,
+(float16_t)0.2131347656250f,(float16_t)0.9770507812500f,
+(float16_t)0.2116699218750f,(float16_t)0.9775390625000f,
+(float16_t)0.2100830078125f,(float16_t)0.9775390625000f,
+(float16_t)0.2086181640625f,(float16_t)0.9780273437500f,
+(float16_t)0.2071533203125f,(float16_t)0.9785156250000f,
+(float16_t)0.2055664062500f,(float16_t)0.9785156250000f,
+(float16_t)0.2041015625000f,(float16_t)0.9790039062500f,
+(float16_t)0.2026367187500f,(float16_t)0.9794921875000f,
+(float16_t)0.2010498046875f,(float16_t)0.9794921875000f,
+(float16_t)0.1995849609375f,(float16_t)0.9799804687500f,
+(float16_t)0.1981201171875f,(float16_t)0.9799804687500f,
+(float16_t)0.1966552734375f,(float16_t)0.9804687500000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.1936035156250f,(float16_t)0.9809570312500f,
+(float16_t)0.1921386718750f,(float16_t)0.9814453125000f,
+(float16_t)0.1905517578125f,(float16_t)0.9814453125000f,
+(float16_t)0.1890869140625f,(float16_t)0.9819335937500f,
+(float16_t)0.1876220703125f,(float16_t)0.9824218750000f,
+(float16_t)0.1860351562500f,(float16_t)0.9824218750000f,
+(float16_t)0.1845703125000f,(float16_t)0.9829101562500f,
+(float16_t)0.1829833984375f,(float16_t)0.9829101562500f,
+(float16_t)0.1815185546875f,(float16_t)0.9833984375000f,
+(float16_t)0.1800537109375f,(float16_t)0.9838867187500f,
+(float16_t)0.1784667968750f,(float16_t)0.9838867187500f,
+(float16_t)0.1770019531250f,(float16_t)0.9843750000000f,
+(float16_t)0.1755371093750f,(float16_t)0.9843750000000f,
+(float16_t)0.1739501953125f,(float16_t)0.9848632812500f,
+(float16_t)0.1724853515625f,(float16_t)0.9848632812500f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.1694335937500f,(float16_t)0.9853515625000f,
+(float16_t)0.1679687500000f,(float16_t)0.9858398437500f,
+(float16_t)0.1663818359375f,(float16_t)0.9858398437500f,
+(float16_t)0.1649169921875f,(float16_t)0.9863281250000f,
+(float16_t)0.1634521484375f,(float16_t)0.9863281250000f,
+(float16_t)0.1618652343750f,(float16_t)0.9868164062500f,
+(float16_t)0.1604003906250f,(float16_t)0.9868164062500f,
+(float16_t)0.1588134765625f,(float16_t)0.9873046875000f,
+(float16_t)0.1573486328125f,(float16_t)0.9873046875000f,
+(float16_t)0.1558837890625f,(float16_t)0.9877929687500f,
+(float16_t)0.1542968750000f,(float16_t)0.9877929687500f,
+(float16_t)0.1528320312500f,(float16_t)0.9882812500000f,
+(float16_t)0.1512451171875f,(float16_t)0.9882812500000f,
+(float16_t)0.1497802734375f,(float16_t)0.9887695312500f,
+(float16_t)0.1481933593750f,(float16_t)0.9887695312500f,
+(float16_t)0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)0.1452636718750f,(float16_t)0.9892578125000f,
+(float16_t)0.1436767578125f,(float16_t)0.9897460937500f,
+(float16_t)0.1422119140625f,(float16_t)0.9897460937500f,
+(float16_t)0.1406250000000f,(float16_t)0.9902343750000f,
+(float16_t)0.1391601562500f,(float16_t)0.9902343750000f,
+(float16_t)0.1375732421875f,(float16_t)0.9907226562500f,
+(float16_t)0.1361083984375f,(float16_t)0.9907226562500f,
+(float16_t)0.1345214843750f,(float16_t)0.9907226562500f,
+(float16_t)0.1330566406250f,(float16_t)0.9912109375000f,
+(float16_t)0.1315917968750f,(float16_t)0.9912109375000f,
+(float16_t)0.1300048828125f,(float16_t)0.9916992187500f,
+(float16_t)0.1285400390625f,(float16_t)0.9916992187500f,
+(float16_t)0.1269531250000f,(float16_t)0.9916992187500f,
+(float16_t)0.1254882812500f,(float16_t)0.9921875000000f,
+(float16_t)0.1239624023438f,(float16_t)0.9921875000000f,
+(float16_t)0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)0.1209106445312f,(float16_t)0.9926757812500f,
+(float16_t)0.1193847656250f,(float16_t)0.9926757812500f,
+(float16_t)0.1178588867188f,(float16_t)0.9931640625000f,
+(float16_t)0.1163330078125f,(float16_t)0.9931640625000f,
+(float16_t)0.1148071289062f,(float16_t)0.9931640625000f,
+(float16_t)0.1132812500000f,(float16_t)0.9936523437500f,
+(float16_t)0.1117553710938f,(float16_t)0.9936523437500f,
+(float16_t)0.1102294921875f,(float16_t)0.9941406250000f,
+(float16_t)0.1087036132812f,(float16_t)0.9941406250000f,
+(float16_t)0.1071777343750f,(float16_t)0.9941406250000f,
+(float16_t)0.1056518554688f,(float16_t)0.9946289062500f,
+(float16_t)0.1041259765625f,(float16_t)0.9946289062500f,
+(float16_t)0.1026000976562f,(float16_t)0.9946289062500f,
+(float16_t)0.1010742187500f,(float16_t)0.9951171875000f,
+(float16_t)0.0995483398438f,(float16_t)0.9951171875000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0964965820312f,(float16_t)0.9951171875000f,
+(float16_t)0.0949707031250f,(float16_t)0.9956054687500f,
+(float16_t)0.0934448242188f,(float16_t)0.9956054687500f,
+(float16_t)0.0919189453125f,(float16_t)0.9956054687500f,
+(float16_t)0.0903930664062f,(float16_t)0.9960937500000f,
+(float16_t)0.0888671875000f,(float16_t)0.9960937500000f,
+(float16_t)0.0873413085938f,(float16_t)0.9960937500000f,
+(float16_t)0.0858154296875f,(float16_t)0.9960937500000f,
+(float16_t)0.0842895507812f,(float16_t)0.9965820312500f,
+(float16_t)0.0827636718750f,(float16_t)0.9965820312500f,
+(float16_t)0.0812377929688f,(float16_t)0.9965820312500f,
+(float16_t)0.0797119140625f,(float16_t)0.9965820312500f,
+(float16_t)0.0781250000000f,(float16_t)0.9970703125000f,
+(float16_t)0.0765991210938f,(float16_t)0.9970703125000f,
+(float16_t)0.0750732421875f,(float16_t)0.9970703125000f,
+(float16_t)0.0735473632812f,(float16_t)0.9970703125000f,
+(float16_t)0.0720214843750f,(float16_t)0.9975585937500f,
+(float16_t)0.0704956054688f,(float16_t)0.9975585937500f,
+(float16_t)0.0689697265625f,(float16_t)0.9975585937500f,
+(float16_t)0.0674438476562f,(float16_t)0.9975585937500f,
+(float16_t)0.0659179687500f,(float16_t)0.9980468750000f,
+(float16_t)0.0643920898438f,(float16_t)0.9980468750000f,
+(float16_t)0.0628662109375f,(float16_t)0.9980468750000f,
+(float16_t)0.0613098144531f,(float16_t)0.9980468750000f,
+(float16_t)0.0597839355469f,(float16_t)0.9980468750000f,
+(float16_t)0.0582580566406f,(float16_t)0.9985351562500f,
+(float16_t)0.0567321777344f,(float16_t)0.9985351562500f,
+(float16_t)0.0552062988281f,(float16_t)0.9985351562500f,
+(float16_t)0.0536499023438f,(float16_t)0.9985351562500f,
+(float16_t)0.0521240234375f,(float16_t)0.9985351562500f,
+(float16_t)0.0505981445312f,(float16_t)0.9985351562500f,
+(float16_t)0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)0.0475463867188f,(float16_t)0.9990234375000f,
+(float16_t)0.0459899902344f,(float16_t)0.9990234375000f,
+(float16_t)0.0444641113281f,(float16_t)0.9990234375000f,
+(float16_t)0.0429382324219f,(float16_t)0.9990234375000f,
+(float16_t)0.0414123535156f,(float16_t)0.9990234375000f,
+(float16_t)0.0398864746094f,(float16_t)0.9990234375000f,
+(float16_t)0.0383300781250f,(float16_t)0.9990234375000f,
+(float16_t)0.0368041992188f,(float16_t)0.9995117187500f,
+(float16_t)0.0352783203125f,(float16_t)0.9995117187500f,
+(float16_t)0.0337524414062f,(float16_t)0.9995117187500f,
+(float16_t)0.0321960449219f,(float16_t)0.9995117187500f,
+(float16_t)0.0306701660156f,(float16_t)0.9995117187500f,
+(float16_t)0.0291442871094f,(float16_t)0.9995117187500f,
+(float16_t)0.0276031494141f,(float16_t)0.9995117187500f,
+(float16_t)0.0260772705078f,(float16_t)0.9995117187500f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)0.0230102539062f,(float16_t)0.9995117187500f,
+(float16_t)0.0214691162109f,(float16_t)1.0000000000000f,
+(float16_t)0.0199432373047f,(float16_t)1.0000000000000f,
+(float16_t)0.0184020996094f,(float16_t)1.0000000000000f,
+(float16_t)0.0168762207031f,(float16_t)1.0000000000000f,
+(float16_t)0.0153427124023f,(float16_t)1.0000000000000f,
+(float16_t)0.0138015747070f,(float16_t)1.0000000000000f,
+(float16_t)0.0122680664062f,(float16_t)1.0000000000000f,
+(float16_t)0.0107345581055f,(float16_t)1.0000000000000f,
+(float16_t)0.0092010498047f,(float16_t)1.0000000000000f,
+(float16_t)0.0076713562012f,(float16_t)1.0000000000000f,
+(float16_t)0.0061340332031f,(float16_t)1.0000000000000f,
+(float16_t)0.0046005249023f,(float16_t)1.0000000000000f,
+(float16_t)0.0030670166016f,(float16_t)1.0000000000000f,
+(float16_t)0.0015335083008f,(float16_t)1.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0061340332031f,
+(float16_t)1.0000000000000f,(float16_t)0.0122680664062f,
+(float16_t)1.0000000000000f,(float16_t)0.0184020996094f,
+(float16_t)0.9995117187500f,(float16_t)0.0245361328125f,
+(float16_t)0.9995117187500f,(float16_t)0.0306701660156f,
+(float16_t)0.9995117187500f,(float16_t)0.0368041992188f,
+(float16_t)0.9990234375000f,(float16_t)0.0429382324219f,
+(float16_t)0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)0.9985351562500f,(float16_t)0.0552062988281f,
+(float16_t)0.9980468750000f,(float16_t)0.0613098144531f,
+(float16_t)0.9975585937500f,(float16_t)0.0674438476562f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9965820312500f,(float16_t)0.0797119140625f,
+(float16_t)0.9960937500000f,(float16_t)0.0858154296875f,
+(float16_t)0.9956054687500f,(float16_t)0.0919189453125f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9946289062500f,(float16_t)0.1041259765625f,
+(float16_t)0.9941406250000f,(float16_t)0.1102294921875f,
+(float16_t)0.9931640625000f,(float16_t)0.1163330078125f,
+(float16_t)0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)0.9916992187500f,(float16_t)0.1285400390625f,
+(float16_t)0.9907226562500f,(float16_t)0.1345214843750f,
+(float16_t)0.9902343750000f,(float16_t)0.1406250000000f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9882812500000f,(float16_t)0.1528320312500f,
+(float16_t)0.9873046875000f,(float16_t)0.1588134765625f,
+(float16_t)0.9863281250000f,(float16_t)0.1649169921875f,
+(float16_t)0.9853515625000f,(float16_t)0.1710205078125f,
+(float16_t)0.9843750000000f,(float16_t)0.1770019531250f,
+(float16_t)0.9829101562500f,(float16_t)0.1829833984375f,
+(float16_t)0.9819335937500f,(float16_t)0.1890869140625f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9794921875000f,(float16_t)0.2010498046875f,
+(float16_t)0.9785156250000f,(float16_t)0.2071533203125f,
+(float16_t)0.9770507812500f,(float16_t)0.2131347656250f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9741210937500f,(float16_t)0.2250976562500f,
+(float16_t)0.9731445312500f,(float16_t)0.2310791015625f,
+(float16_t)0.9716796875000f,(float16_t)0.2370605468750f,
+(float16_t)0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)0.9687500000000f,(float16_t)0.2489013671875f,
+(float16_t)0.9667968750000f,(float16_t)0.2548828125000f,
+(float16_t)0.9653320312500f,(float16_t)0.2607421875000f,
+(float16_t)0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)0.9619140625000f,(float16_t)0.2727050781250f,
+(float16_t)0.9604492187500f,(float16_t)0.2785644531250f,
+(float16_t)0.9584960937500f,(float16_t)0.2844238281250f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9550781250000f,(float16_t)0.2961425781250f,
+(float16_t)0.9531250000000f,(float16_t)0.3020019531250f,
+(float16_t)0.9516601562500f,(float16_t)0.3078613281250f,
+(float16_t)0.9497070312500f,(float16_t)0.3137207031250f,
+(float16_t)0.9477539062500f,(float16_t)0.3195800781250f,
+(float16_t)0.9458007812500f,(float16_t)0.3251953125000f,
+(float16_t)0.9433593750000f,(float16_t)0.3310546875000f,
+(float16_t)0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)0.9394531250000f,(float16_t)0.3427734375000f,
+(float16_t)0.9375000000000f,(float16_t)0.3483886718750f,
+(float16_t)0.9350585937500f,(float16_t)0.3542480468750f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9306640625000f,(float16_t)0.3657226562500f,
+(float16_t)0.9287109375000f,(float16_t)0.3713378906250f,
+(float16_t)0.9262695312500f,(float16_t)0.3769531250000f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.9213867187500f,(float16_t)0.3884277343750f,
+(float16_t)0.9189453125000f,(float16_t)0.3940429687500f,
+(float16_t)0.9165039062500f,(float16_t)0.3996582031250f,
+(float16_t)0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)0.9116210937500f,(float16_t)0.4108886718750f,
+(float16_t)0.9091796875000f,(float16_t)0.4165039062500f,
+(float16_t)0.9067382812500f,(float16_t)0.4221191406250f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.9013671875000f,(float16_t)0.4331054687500f,
+(float16_t)0.8984375000000f,(float16_t)0.4387207031250f,
+(float16_t)0.8959960937500f,(float16_t)0.4440917968750f,
+(float16_t)0.8930664062500f,(float16_t)0.4497070312500f,
+(float16_t)0.8906250000000f,(float16_t)0.4550781250000f,
+(float16_t)0.8876953125000f,(float16_t)0.4604492187500f,
+(float16_t)0.8847656250000f,(float16_t)0.4660644531250f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8789062500000f,(float16_t)0.4768066406250f,
+(float16_t)0.8759765625000f,(float16_t)0.4821777343750f,
+(float16_t)0.8730468750000f,(float16_t)0.4875488281250f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8671875000000f,(float16_t)0.4982910156250f,
+(float16_t)0.8637695312500f,(float16_t)0.5034179687500f,
+(float16_t)0.8608398437500f,(float16_t)0.5087890625000f,
+(float16_t)0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)0.8544921875000f,(float16_t)0.5195312500000f,
+(float16_t)0.8515625000000f,(float16_t)0.5244140625000f,
+(float16_t)0.8481445312500f,(float16_t)0.5297851562500f,
+(float16_t)0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)0.8417968750000f,(float16_t)0.5400390625000f,
+(float16_t)0.8383789062500f,(float16_t)0.5454101562500f,
+(float16_t)0.8349609375000f,(float16_t)0.5502929687500f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8281250000000f,(float16_t)0.5605468750000f,
+(float16_t)0.8247070312500f,(float16_t)0.5659179687500f,
+(float16_t)0.8212890625000f,(float16_t)0.5708007812500f,
+(float16_t)0.8173828125000f,(float16_t)0.5756835937500f,
+(float16_t)0.8139648437500f,(float16_t)0.5810546875000f,
+(float16_t)0.8105468750000f,(float16_t)0.5859375000000f,
+(float16_t)0.8066406250000f,(float16_t)0.5908203125000f,
+(float16_t)0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)0.7993164062500f,(float16_t)0.6005859375000f,
+(float16_t)0.7958984375000f,(float16_t)0.6054687500000f,
+(float16_t)0.7919921875000f,(float16_t)0.6103515625000f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7846679687500f,(float16_t)0.6201171875000f,
+(float16_t)0.7807617187500f,(float16_t)0.6250000000000f,
+(float16_t)0.7768554687500f,(float16_t)0.6293945312500f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7690429687500f,(float16_t)0.6391601562500f,
+(float16_t)0.7651367187500f,(float16_t)0.6440429687500f,
+(float16_t)0.7612304687500f,(float16_t)0.6484375000000f,
+(float16_t)0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)0.7534179687500f,(float16_t)0.6577148437500f,
+(float16_t)0.7490234375000f,(float16_t)0.6625976562500f,
+(float16_t)0.7451171875000f,(float16_t)0.6669921875000f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7368164062500f,(float16_t)0.6762695312500f,
+(float16_t)0.7324218750000f,(float16_t)0.6806640625000f,
+(float16_t)0.7285156250000f,(float16_t)0.6850585937500f,
+(float16_t)0.7241210937500f,(float16_t)0.6894531250000f,
+(float16_t)0.7202148437500f,(float16_t)0.6938476562500f,
+(float16_t)0.7158203125000f,(float16_t)0.6982421875000f,
+(float16_t)0.7114257812500f,(float16_t)0.7026367187500f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.7026367187500f,(float16_t)0.7114257812500f,
+(float16_t)0.6982421875000f,(float16_t)0.7158203125000f,
+(float16_t)0.6938476562500f,(float16_t)0.7202148437500f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6850585937500f,(float16_t)0.7285156250000f,
+(float16_t)0.6806640625000f,(float16_t)0.7324218750000f,
+(float16_t)0.6762695312500f,(float16_t)0.7368164062500f,
+(float16_t)0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)0.6669921875000f,(float16_t)0.7451171875000f,
+(float16_t)0.6625976562500f,(float16_t)0.7490234375000f,
+(float16_t)0.6577148437500f,(float16_t)0.7534179687500f,
+(float16_t)0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)0.6484375000000f,(float16_t)0.7612304687500f,
+(float16_t)0.6440429687500f,(float16_t)0.7651367187500f,
+(float16_t)0.6391601562500f,(float16_t)0.7690429687500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.6293945312500f,(float16_t)0.7768554687500f,
+(float16_t)0.6250000000000f,(float16_t)0.7807617187500f,
+(float16_t)0.6201171875000f,(float16_t)0.7846679687500f,
+(float16_t)0.6152343750000f,(float16_t)0.7885742187500f,
+(float16_t)0.6103515625000f,(float16_t)0.7919921875000f,
+(float16_t)0.6054687500000f,(float16_t)0.7958984375000f,
+(float16_t)0.6005859375000f,(float16_t)0.7993164062500f,
+(float16_t)0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)0.5908203125000f,(float16_t)0.8066406250000f,
+(float16_t)0.5859375000000f,(float16_t)0.8105468750000f,
+(float16_t)0.5810546875000f,(float16_t)0.8139648437500f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5708007812500f,(float16_t)0.8212890625000f,
+(float16_t)0.5659179687500f,(float16_t)0.8247070312500f,
+(float16_t)0.5605468750000f,(float16_t)0.8281250000000f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.5502929687500f,(float16_t)0.8349609375000f,
+(float16_t)0.5454101562500f,(float16_t)0.8383789062500f,
+(float16_t)0.5400390625000f,(float16_t)0.8417968750000f,
+(float16_t)0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)0.5297851562500f,(float16_t)0.8481445312500f,
+(float16_t)0.5244140625000f,(float16_t)0.8515625000000f,
+(float16_t)0.5195312500000f,(float16_t)0.8544921875000f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.5087890625000f,(float16_t)0.8608398437500f,
+(float16_t)0.5034179687500f,(float16_t)0.8637695312500f,
+(float16_t)0.4982910156250f,(float16_t)0.8671875000000f,
+(float16_t)0.4929199218750f,(float16_t)0.8701171875000f,
+(float16_t)0.4875488281250f,(float16_t)0.8730468750000f,
+(float16_t)0.4821777343750f,(float16_t)0.8759765625000f,
+(float16_t)0.4768066406250f,(float16_t)0.8789062500000f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.4660644531250f,(float16_t)0.8847656250000f,
+(float16_t)0.4604492187500f,(float16_t)0.8876953125000f,
+(float16_t)0.4550781250000f,(float16_t)0.8906250000000f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.4440917968750f,(float16_t)0.8959960937500f,
+(float16_t)0.4387207031250f,(float16_t)0.8984375000000f,
+(float16_t)0.4331054687500f,(float16_t)0.9013671875000f,
+(float16_t)0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)0.4221191406250f,(float16_t)0.9067382812500f,
+(float16_t)0.4165039062500f,(float16_t)0.9091796875000f,
+(float16_t)0.4108886718750f,(float16_t)0.9116210937500f,
+(float16_t)0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)0.3996582031250f,(float16_t)0.9165039062500f,
+(float16_t)0.3940429687500f,(float16_t)0.9189453125000f,
+(float16_t)0.3884277343750f,(float16_t)0.9213867187500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3769531250000f,(float16_t)0.9262695312500f,
+(float16_t)0.3713378906250f,(float16_t)0.9287109375000f,
+(float16_t)0.3657226562500f,(float16_t)0.9306640625000f,
+(float16_t)0.3598632812500f,(float16_t)0.9331054687500f,
+(float16_t)0.3542480468750f,(float16_t)0.9350585937500f,
+(float16_t)0.3483886718750f,(float16_t)0.9375000000000f,
+(float16_t)0.3427734375000f,(float16_t)0.9394531250000f,
+(float16_t)0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)0.3310546875000f,(float16_t)0.9433593750000f,
+(float16_t)0.3251953125000f,(float16_t)0.9458007812500f,
+(float16_t)0.3195800781250f,(float16_t)0.9477539062500f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.3078613281250f,(float16_t)0.9516601562500f,
+(float16_t)0.3020019531250f,(float16_t)0.9531250000000f,
+(float16_t)0.2961425781250f,(float16_t)0.9550781250000f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.2844238281250f,(float16_t)0.9584960937500f,
+(float16_t)0.2785644531250f,(float16_t)0.9604492187500f,
+(float16_t)0.2727050781250f,(float16_t)0.9619140625000f,
+(float16_t)0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)0.2607421875000f,(float16_t)0.9653320312500f,
+(float16_t)0.2548828125000f,(float16_t)0.9667968750000f,
+(float16_t)0.2489013671875f,(float16_t)0.9687500000000f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.2370605468750f,(float16_t)0.9716796875000f,
+(float16_t)0.2310791015625f,(float16_t)0.9731445312500f,
+(float16_t)0.2250976562500f,(float16_t)0.9741210937500f,
+(float16_t)0.2191162109375f,(float16_t)0.9755859375000f,
+(float16_t)0.2131347656250f,(float16_t)0.9770507812500f,
+(float16_t)0.2071533203125f,(float16_t)0.9785156250000f,
+(float16_t)0.2010498046875f,(float16_t)0.9794921875000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.1890869140625f,(float16_t)0.9819335937500f,
+(float16_t)0.1829833984375f,(float16_t)0.9829101562500f,
+(float16_t)0.1770019531250f,(float16_t)0.9843750000000f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.1649169921875f,(float16_t)0.9863281250000f,
+(float16_t)0.1588134765625f,(float16_t)0.9873046875000f,
+(float16_t)0.1528320312500f,(float16_t)0.9882812500000f,
+(float16_t)0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)0.1406250000000f,(float16_t)0.9902343750000f,
+(float16_t)0.1345214843750f,(float16_t)0.9907226562500f,
+(float16_t)0.1285400390625f,(float16_t)0.9916992187500f,
+(float16_t)0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)0.1163330078125f,(float16_t)0.9931640625000f,
+(float16_t)0.1102294921875f,(float16_t)0.9941406250000f,
+(float16_t)0.1041259765625f,(float16_t)0.9946289062500f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0919189453125f,(float16_t)0.9956054687500f,
+(float16_t)0.0858154296875f,(float16_t)0.9960937500000f,
+(float16_t)0.0797119140625f,(float16_t)0.9965820312500f,
+(float16_t)0.0735473632812f,(float16_t)0.9970703125000f,
+(float16_t)0.0674438476562f,(float16_t)0.9975585937500f,
+(float16_t)0.0613098144531f,(float16_t)0.9980468750000f,
+(float16_t)0.0552062988281f,(float16_t)0.9985351562500f,
+(float16_t)0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)0.0429382324219f,(float16_t)0.9990234375000f,
+(float16_t)0.0368041992188f,(float16_t)0.9995117187500f,
+(float16_t)0.0306701660156f,(float16_t)0.9995117187500f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)0.0184020996094f,(float16_t)1.0000000000000f,
+(float16_t)0.0122680664062f,(float16_t)1.0000000000000f,
+(float16_t)0.0061340332031f,(float16_t)1.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9995117187500f,(float16_t)0.0245361328125f,
+(float16_t)0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9853515625000f,(float16_t)0.1710205078125f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9497070312500f,(float16_t)0.3137207031250f,
+(float16_t)0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.8930664062500f,(float16_t)0.4497070312500f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8173828125000f,(float16_t)0.5756835937500f,
+(float16_t)0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7241210937500f,(float16_t)0.6894531250000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.6152343750000f,(float16_t)0.7885742187500f,
+(float16_t)0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.4929199218750f,(float16_t)0.8701171875000f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3598632812500f,(float16_t)0.9331054687500f,
+(float16_t)0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.2191162109375f,(float16_t)0.9755859375000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0735473632812f,(float16_t)0.9970703125000f,
+(float16_t)0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,};
 
 float16_t rearranged_twiddle_stride2_4096_f16[2728]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99999529380957619118f,(float16_t)0.00306795676296597614f,
-(float16_t)0.99998117528260110909f,(float16_t)0.00613588464915447527f,
-(float16_t)0.99995764455196389786f,(float16_t)0.00920375478205981944f,
-(float16_t)0.99992470183914450299f,(float16_t)0.01227153828571992539f,
-(float16_t)0.99988234745421256111f,(float16_t)0.01533920628498810015f,
-(float16_t)0.99983058179582340319f,(float16_t)0.01840672990580482019f,
-(float16_t)0.99976940535121527898f,(float16_t)0.02147408027546950787f,
-(float16_t)0.99969881869620424997f,(float16_t)0.02454122852291228812f,
-(float16_t)0.99961882249517863830f,(float16_t)0.02760814577896573974f,
-(float16_t)0.99952941750109314256f,(float16_t)0.03067480317663662595f,
-(float16_t)0.99943060455546173237f,(float16_t)0.03374117185137757990f,
-(float16_t)0.99932238458834954375f,(float16_t)0.03680722294135883171f,
-(float16_t)0.99920475861836388631f,(float16_t)0.03987292758773981066f,
-(float16_t)0.99907772775264536147f,(float16_t)0.04293825693494082024f,
-(float16_t)0.99894129318685687124f,(float16_t)0.04600318213091462299f,
-(float16_t)0.99879545620517240501f,(float16_t)0.04906767432741801493f,
-(float16_t)0.99864021818026527111f,(float16_t)0.05213170468028332366f,
-(float16_t)0.99847558057329477421f,(float16_t)0.05519524434968993420f,
-(float16_t)0.99830154493389289261f,(float16_t)0.05825826450043575244f,
-(float16_t)0.99811811290014917919f,(float16_t)0.06132073630220857829f,
-(float16_t)0.99792528619859599548f,(float16_t)0.06438263092985746505f,
-(float16_t)0.99772306664419163624f,(float16_t)0.06744391956366405094f,
-(float16_t)0.99751145614030345410f,(float16_t)0.07050457338961385600f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.99706007033948296225f,(float16_t)0.07662386139203149205f,
-(float16_t)0.99682029929116566791f,(float16_t)0.07968243797143012563f,
-(float16_t)0.99657114579055483539f,(float16_t)0.08274026454937569164f,
-(float16_t)0.99631261218277800129f,(float16_t)0.08579731234443989385f,
-(float16_t)0.99604470090125196702f,(float16_t)0.08885355258252460031f,
-(float16_t)0.99576741446765981713f,(float16_t)0.09190895649713272386f,
-(float16_t)0.99548075549192693856f,(float16_t)0.09496349532963899165f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.99487933079480561638f,(float16_t)0.10106986275482782167f,
-(float16_t)0.99456457073425541537f,(float16_t)0.10412163387205458642f,
-(float16_t)0.99424044945318790223f,(float16_t)0.10717242495680884273f,
-(float16_t)0.99390697000235606051f,(float16_t)0.11022220729388305938f,
-(float16_t)0.99356413552059530403f,(float16_t)0.11327095217756434631f,
-(float16_t)0.99321194923479450001f,(float16_t)0.11631863091190475235f,
-(float16_t)0.99285041445986510489f,(float16_t)0.11936521481099135467f,
-(float16_t)0.99247953459870996706f,(float16_t)0.12241067519921619566f,
-(float16_t)0.99209931314219179654f,(float16_t)0.12545498341154623367f,
-(float16_t)0.99170975366909952520f,(float16_t)0.12849811079379316880f,
-(float16_t)0.99131085984611544415f,(float16_t)0.13154002870288311611f,
-(float16_t)0.99090263542778000971f,(float16_t)0.13458070850712616773f,
-(float16_t)0.99048508425645709341f,(float16_t)0.13762012158648603832f,
-(float16_t)0.99005821026229712256f,(float16_t)0.14065823933284921088f,
-(float16_t)0.98962201746320088702f,(float16_t)0.14369503315029447110f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98872169196032377858f,(float16_t)0.14976453467732150915f,
-(float16_t)0.98825756773074946437f,(float16_t)0.15279718525844343535f,
-(float16_t)0.98778414164457217783f,(float16_t)0.15582839765426523271f,
-(float16_t)0.98730141815785843473f,(float16_t)0.15885814333386144570f,
-(float16_t)0.98680940181418552726f,(float16_t)0.16188639378011182579f,
-(float16_t)0.98630809724459866938f,(float16_t)0.16491312048996989437f,
-(float16_t)0.98579750916756747614f,(float16_t)0.16793829497473117263f,
-(float16_t)0.98527764238894122162f,(float16_t)0.17096188876030121717f,
-(float16_t)0.98474850180190420801f,(float16_t)0.17398387338746382214f,
-(float16_t)0.98421009238692902521f,(float16_t)0.17700422041214874946f,
-(float16_t)0.98366241921173025453f,(float16_t)0.18002290140569951471f,
-(float16_t)0.98310548743121628501f,(float16_t)0.18303988795514095078f,
-(float16_t)0.98253930228744124076f,(float16_t)0.18605515166344663291f,
-(float16_t)0.98196386910955524296f,(float16_t)0.18906866414980619262f,
-(float16_t)0.98137919331375456089f,(float16_t)0.19208039704989243734f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.98018213596811742949f,(float16_t)0.19809841071795356027f,
-(float16_t)0.97956976568544051887f,(float16_t)0.20110463484209190055f,
-(float16_t)0.97894817531906219710f,(float16_t)0.20410896609281686809f,
-(float16_t)0.97831737071962765473f,(float16_t)0.20711137619221856032f,
-(float16_t)0.97767735782450992943f,(float16_t)0.21011183688046961016f,
-(float16_t)0.97702814265775439484f,(float16_t)0.21311031991609136194f,
-(float16_t)0.97636973133002114000f,(float16_t)0.21610679707621952006f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.97502534506699412020f,(float16_t)0.22209362097320350937f,
-(float16_t)0.97433938278557585821f,(float16_t)0.22508391135979283204f,
-(float16_t)0.97364424965081197705f,(float16_t)0.22807208317088573102f,
-(float16_t)0.97293995220556017678f,(float16_t)0.23105810828067110951f,
-(float16_t)0.97222649707893626925f,(float16_t)0.23404195858354343018f,
-(float16_t)0.97150389098625178352f,(float16_t)0.23702360599436719801f,
-(float16_t)0.97077214072895035013f,(float16_t)0.24000302244874149871f,
-(float16_t)0.97003125319454397424f,(float16_t)0.24298017990326387094f,
-(float16_t)0.96928123535654853171f,(float16_t)0.24595505033579459497f,
-(float16_t)0.96852209427441737777f,(float16_t)0.24892760574572014853f,
-(float16_t)0.96775383709347551076f,(float16_t)0.25189781815421696809f,
-(float16_t)0.96697647104485207059f,(float16_t)0.25486565960451457169f,
-(float16_t)0.96619000344541250413f,(float16_t)0.25783110216215898713f,
-(float16_t)0.96539444169768939830f,(float16_t)0.26079411791527551401f,
-(float16_t)0.96458979328981275803f,(float16_t)0.26375467897483134694f,
-(float16_t)0.96377606579543984022f,(float16_t)0.26671275747489836538f,
-(float16_t)0.96295326687368387741f,(float16_t)0.26966832557291509076f,
-(float16_t)0.96212140426904158019f,(float16_t)0.27262135544994897662f,
-(float16_t)0.96128048581132063966f,(float16_t)0.27557181931095814376f,
-(float16_t)0.96043051941556578655f,(float16_t)0.27851968938505305973f,
-(float16_t)0.95957151308198451733f,(float16_t)0.28146493792575794091f,
-(float16_t)0.95870347489587159906f,(float16_t)0.28440753721127187692f,
-(float16_t)0.95782641302753290802f,(float16_t)0.28734745954472951102f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.95604525134999640557f,(float16_t)0.29321916269425862822f,
-(float16_t)0.95514116830577078243f,(float16_t)0.29615088824362378883f,
-(float16_t)0.95422809510910566733f,(float16_t)0.29907982630804047508f,
-(float16_t)0.95330604035419386211f,(float16_t)0.30200594931922808417f,
-(float16_t)0.95237501271976587880f,(float16_t)0.30492922973540237397f,
-(float16_t)0.95143502096900833820f,(float16_t)0.30784964004153486661f,
-(float16_t)0.95048607394948170235f,(float16_t)0.31076715274961147495f,
-(float16_t)0.94952818059303667475f,(float16_t)0.31368174039889151761f,
-(float16_t)0.94856134991573026749f,(float16_t)0.31659337555616584581f,
-(float16_t)0.94758559101774109124f,(float16_t)0.31950203081601569188f,
-(float16_t)0.94660091308328353499f,(float16_t)0.32240767880106985244f,
-(float16_t)0.94560732538052127971f,(float16_t)0.32531029216226292622f,
-(float16_t)0.94460483726148025685f,(float16_t)0.32820984357909249729f,
-(float16_t)0.94359345816196038559f,(float16_t)0.33110630575987642921f,
-(float16_t)0.94257319760144686605f,(float16_t)0.33399965144200938205f,
-(float16_t)0.94154406518302080631f,(float16_t)0.33688985339222005111f,
-(float16_t)0.94050607059326829518f,(float16_t)0.33977688440682685123f,
-(float16_t)0.93945922360218991898f,(float16_t)0.34266071731199437833f,
-(float16_t)0.93840353406310805795f,(float16_t)0.34554132496398909380f,
-(float16_t)0.93733901191257495977f,(float16_t)0.34841868024943456472f,
-(float16_t)0.93626566717027825959f,(float16_t)0.35129275608556709276f,
-(float16_t)0.93518350993894761025f,(float16_t)0.35416352542049034380f,
-(float16_t)0.93409255040425887007f,(float16_t)0.35703096123342997759f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.93188426558166814750f,(float16_t)0.36275572436739722537f,
-(float16_t)0.93076696107898371224f,(float16_t)0.36561299780477385379f,
-(float16_t)0.92964089584318121418f,(float16_t)0.36846682995337232125f,
-(float16_t)0.92850608047321558924f,(float16_t)0.37131719395183754306f,
-(float16_t)0.92736252565040111495f,(float16_t)0.37416406297145793358f,
-(float16_t)0.92621024213831137928f,(float16_t)0.37700741021641825945f,
-(float16_t)0.92504924078267758425f,(float16_t)0.37984720892405116066f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.92270112833387862850f,(float16_t)0.38551605384391884890f,
-(float16_t)0.92151403934204190183f,(float16_t)0.38834504669882624617f,
-(float16_t)0.92031827670911059425f,(float16_t)0.39117038430225387069f,
-(float16_t)0.91911385169005777040f,(float16_t)0.39399204006104809883f,
-(float16_t)0.91790077562139049672f,(float16_t)0.39680998741671030805f,
-(float16_t)0.91667905992104270485f,(float16_t)0.39962419984564678810f,
-(float16_t)0.91544871608826783316f,(float16_t)0.40243465085941843018f,
-(float16_t)0.91420975570353069095f,(float16_t)0.40524131400498986100f,
-(float16_t)0.91296219042839821256f,(float16_t)0.40804416286497868782f,
-(float16_t)0.91170603200542987832f,(float16_t)0.41084317105790391089f,
-(float16_t)0.91044129225806724737f,(float16_t)0.41363831223843450235f,
-(float16_t)0.90916798309052238025f,(float16_t)0.41642956009763715253f,
-(float16_t)0.90788611648766626150f,(float16_t)0.41921688836322390515f,
-(float16_t)0.90659570451491533483f,(float16_t)0.42200027079979968159f,
-(float16_t)0.90529675931811881551f,(float16_t)0.42477968120910880589f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.90267331823725882600f,(float16_t)0.43032648134008261165f,
-(float16_t)0.90134884704602202810f,(float16_t)0.43309381885315195726f,
-(float16_t)0.90001589201616016833f,(float16_t)0.43585707992225547480f,
-(float16_t)0.89867446569395381673f,(float16_t)0.43861623853852765853f,
-(float16_t)0.89732458070541831763f,(float16_t)0.44137126873171667052f,
-(float16_t)0.89596624975618521791f,(float16_t)0.44412214457042920035f,
-(float16_t)0.89459948563138269595f,(float16_t)0.44686884016237415906f,
-(float16_t)0.89322430119551532446f,(float16_t)0.44961132965460653965f,
-(float16_t)0.89184070939234272313f,(float16_t)0.45234958723377088896f,
-(float16_t)0.89044872324475787817f,(float16_t)0.45508358712634383592f,
-(float16_t)0.88904835585466457371f,(float16_t)0.45781330359887717485f,
-(float16_t)0.88763962040285393496f,(float16_t)0.46053871095824000514f,
-(float16_t)0.88622253014888063838f,(float16_t)0.46325978355186014923f,
-(float16_t)0.88479709843093778954f,(float16_t)0.46597649576796618121f,
-(float16_t)0.88336333866573157891f,(float16_t)0.46868882203582790114f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.88047088905216075450f,(float16_t)0.47410021465054996703f,
-(float16_t)0.87901222642863352519f,(float16_t)0.47679923006332208812f,
-(float16_t)0.87754529020726135258f,(float16_t)0.47949375766015295275f,
-(float16_t)0.87607009419540660122f,(float16_t)0.48218377207912271887f,
-(float16_t)0.87458665227817611321f,(float16_t)0.48486924800079106435f,
-(float16_t)0.87309497841829009079f,(float16_t)0.48755016014843599592f,
-(float16_t)0.87159508665595097909f,(float16_t)0.49022648328829115938f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.86857070597134089507f,(float16_t)0.49556526182577254058f,
-(float16_t)0.86704624551569264845f,(float16_t)0.49822766697278181303f,
-(float16_t)0.86551362409056908920f,(float16_t)0.50088538261124071482f,
-(float16_t)0.86397285612158669643f,(float16_t)0.50353838372571757542f,
-(float16_t)0.86242395611104050168f,(float16_t)0.50618664534515522835f,
-(float16_t)0.86086693863776730939f,(float16_t)0.50883014254310698909f,
-(float16_t)0.85930181835700847337f,(float16_t)0.51146885043797030157f,
-(float16_t)0.85772861000027211809f,(float16_t)0.51410274419322166128f,
-(float16_t)0.85614732837519447184f,(float16_t)0.51673179901764987321f,
-(float16_t)0.85455798836540053376f,(float16_t)0.51935599016558964269f,
-(float16_t)0.85296060493036363059f,(float16_t)0.52197529293715438925f,
-(float16_t)0.85135519310526519554f,(float16_t)0.52458968267846894928f,
-(float16_t)0.84974176800085254868f,(float16_t)0.52719913478190127964f,
-(float16_t)0.84812034480329723252f,(float16_t)0.52980362468629460526f,
-(float16_t)0.84649093877405212627f,(float16_t)0.53240312787719790144f,
-(float16_t)0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)0.84320823964184543620f,(float16_t)0.53758707629564539410f,
-(float16_t)0.84155497743689844370f,(float16_t)0.54017147272989285423f,
-(float16_t)0.83989379419599952126f,(float16_t)0.54275078486451588944f,
-(float16_t)0.83822470555483807875f,(float16_t)0.54532498842204646383f,
-(float16_t)0.83654772722351200542f,(float16_t)0.54789405917310018967f,
-(float16_t)0.83486287498638001026f,(float16_t)0.55045797293660481131f,
-(float16_t)0.83317016470191318511f,(float16_t)0.55301670558002746780f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.82976123379452304540f,(float16_t)0.55811853122055610221f,
-(float16_t)0.82804504525775579626f,(float16_t)0.56066157619733603124f,
-(float16_t)0.82632106284566353427f,(float16_t)0.56319934401383409117f,
-(float16_t)0.82458930278502529099f,(float16_t)0.56573181078361312046f,
-(float16_t)0.82284978137582642788f,(float16_t)0.56825895267013148970f,
-(float16_t)0.82110251499110464835f,(float16_t)0.57078074588696725566f,
-(float16_t)0.81934752007679700903f,(float16_t)0.57329716669804220430f,
-(float16_t)0.81758481315158371139f,(float16_t)0.57580819141784533866f,
-(float16_t)0.81581441080673378075f,(float16_t)0.57831379641165558958f,
-(float16_t)0.81403632970594841378f,(float16_t)0.58081395809576452649f,
-(float16_t)0.81225058658520399302f,(float16_t)0.58330865293769829094f,
-(float16_t)0.81045719825259476821f,(float16_t)0.58579785745643886408f,
-(float16_t)0.80865618158817498262f,(float16_t)0.58828154822264522306f,
-(float16_t)0.80684755354379933401f,(float16_t)0.59075970185887416442f,
-(float16_t)0.80503133114296365758f,(float16_t)0.59323229503979979516f,
-(float16_t)0.80320753148064494287f,(float16_t)0.59569930449243335691f,
-(float16_t)0.80137617172314024039f,(float16_t)0.59816070699634238395f,
-(float16_t)0.79953726910790501314f,(float16_t)0.60061647938386897305f,
-(float16_t)0.79769084094339115509f,(float16_t)0.60306659854034816437f,
-(float16_t)0.79583690460888356633f,(float16_t)0.60551104140432554512f,
-(float16_t)0.79397547755433717231f,(float16_t)0.60794978496777363208f,
-(float16_t)0.79210657730021238887f,(float16_t)0.61038280627630947528f,
-(float16_t)0.79023022143731003197f,(float16_t)0.61281008242940970820f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.78645521359908576731f,(float16_t)0.61764730793780386886f,
-(float16_t)0.78455659715557524159f,(float16_t)0.62005721176328909561f,
-(float16_t)0.78265059616657572938f,(float16_t)0.62246127937414996723f,
-(float16_t)0.78073722857209448822f,(float16_t)0.62485948814238634341f,
-(float16_t)0.77881651238147597827f,(float16_t)0.62725181549514408275f,
-(float16_t)0.77688846567323244230f,(float16_t)0.62963823891492698426f,
-(float16_t)0.77495310659487393057f,(float16_t)0.63201873593980906207f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.77106052426181381776f,(float16_t)0.63676186123628419899f,
-(float16_t)0.76910333764557969882f,(float16_t)0.63912444486377573138f,
-(float16_t)0.76713891193582040007f,(float16_t)0.64148101280858305095f,
-(float16_t)0.76516726562245895860f,(float16_t)0.64383154288979138613f,
-(float16_t)0.76318841726338138010f,(float16_t)0.64617601298331628357f,
-(float16_t)0.76120238548426177871f,(float16_t)0.64851440102211244110f,
-(float16_t)0.75920918897838796102f,(float16_t)0.65084668499638087535f,
-(float16_t)0.75720884650648456748f,(float16_t)0.65317284295377675551f,
-(float16_t)0.75520137689653654700f,(float16_t)0.65549285299961534967f,
-(float16_t)0.75318679904361252042f,(float16_t)0.65780669329707863735f,
-(float16_t)0.75116513190968636771f,(float16_t)0.66011434206742047870f,
-(float16_t)0.74913639452345937020f,(float16_t)0.66241577759017178373f,
-(float16_t)0.74710060598018013245f,(float16_t)0.66471097820334479334f,
-(float16_t)0.74505778544146594733f,(float16_t)0.66699992230363747137f,
-(float16_t)0.74300795213512171866f,(float16_t)0.66928258834663600929f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.73888732446061511361f,(float16_t)0.67382900037875603783f,
-(float16_t)0.73681656887736979300f,(float16_t)0.67609270357531592310f,
-(float16_t)0.73473887809596349907f,(float16_t)0.67835004312986146857f,
-(float16_t)0.73265427167241281570f,(float16_t)0.68060099779545302212f,
-(float16_t)0.73056276922782759087f,(float16_t)0.68284554638524808112f,
-(float16_t)0.72846439044822519637f,(float16_t)0.68508366777270035541f,
-(float16_t)0.72635915508434600873f,(float16_t)0.68731534089175905233f,
-(float16_t)0.72424708295146700276f,(float16_t)0.68954054473706682948f,
-(float16_t)0.72212819392921534511f,(float16_t)0.69175925836415774750f,
-(float16_t)0.72000250796138165477f,(float16_t)0.69397146088965389055f,
-(float16_t)0.71787004505573170920f,(float16_t)0.69617713149146298601f,
-(float16_t)0.71573082528381870571f,(float16_t)0.69837624940897280457f,
-(float16_t)0.71358486878079352422f,(float16_t)0.70056879394324833576f,
-(float16_t)0.71143219574521643356f,(float16_t)0.70275474445722529993f,
-(float16_t)0.70927282643886568891f,(float16_t)0.70493408037590488124f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.70493408037590499227f,(float16_t)0.70927282643886568891f,
-(float16_t)0.70275474445722529993f,(float16_t)0.71143219574521643356f,
-(float16_t)0.70056879394324844679f,(float16_t)0.71358486878079352422f,
-(float16_t)0.69837624940897291559f,(float16_t)0.71573082528381859468f,
-(float16_t)0.69617713149146298601f,(float16_t)0.71787004505573170920f,
-(float16_t)0.69397146088965400157f,(float16_t)0.72000250796138165477f,
-(float16_t)0.69175925836415774750f,(float16_t)0.72212819392921534511f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.68731534089175905233f,(float16_t)0.72635915508434600873f,
-(float16_t)0.68508366777270035541f,(float16_t)0.72846439044822519637f,
-(float16_t)0.68284554638524808112f,(float16_t)0.73056276922782759087f,
-(float16_t)0.68060099779545302212f,(float16_t)0.73265427167241281570f,
-(float16_t)0.67835004312986146857f,(float16_t)0.73473887809596349907f,
-(float16_t)0.67609270357531603413f,(float16_t)0.73681656887736979300f,
-(float16_t)0.67382900037875614885f,(float16_t)0.73888732446061511361f,
-(float16_t)0.67155895484701833009f,(float16_t)0.74095112535495910588f,
-(float16_t)0.66928258834663600929f,(float16_t)0.74300795213512171866f,
-(float16_t)0.66699992230363747137f,(float16_t)0.74505778544146594733f,
-(float16_t)0.66471097820334490436f,(float16_t)0.74710060598018013245f,
-(float16_t)0.66241577759017178373f,(float16_t)0.74913639452345925918f,
-(float16_t)0.66011434206742047870f,(float16_t)0.75116513190968636771f,
-(float16_t)0.65780669329707874837f,(float16_t)0.75318679904361252042f,
-(float16_t)0.65549285299961546070f,(float16_t)0.75520137689653654700f,
-(float16_t)0.65317284295377686654f,(float16_t)0.75720884650648456748f,
-(float16_t)0.65084668499638098638f,(float16_t)0.75920918897838796102f,
-(float16_t)0.64851440102211255212f,(float16_t)0.76120238548426177871f,
-(float16_t)0.64617601298331639459f,(float16_t)0.76318841726338126907f,
-(float16_t)0.64383154288979149715f,(float16_t)0.76516726562245895860f,
-(float16_t)0.64148101280858316198f,(float16_t)0.76713891193582040007f,
-(float16_t)0.63912444486377573138f,(float16_t)0.76910333764557958780f,
-(float16_t)0.63676186123628419899f,(float16_t)0.77106052426181381776f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.63201873593980906207f,(float16_t)0.77495310659487381955f,
-(float16_t)0.62963823891492709528f,(float16_t)0.77688846567323244230f,
-(float16_t)0.62725181549514419377f,(float16_t)0.77881651238147586724f,
-(float16_t)0.62485948814238645443f,(float16_t)0.78073722857209448822f,
-(float16_t)0.62246127937415007825f,(float16_t)0.78265059616657572938f,
-(float16_t)0.62005721176328920663f,(float16_t)0.78455659715557524159f,
-(float16_t)0.61764730793780397988f,(float16_t)0.78645521359908576731f,
-(float16_t)0.61523159058062681925f,(float16_t)0.78834642762660622761f,
-(float16_t)0.61281008242940970820f,(float16_t)0.79023022143731003197f,
-(float16_t)0.61038280627630947528f,(float16_t)0.79210657730021227785f,
-(float16_t)0.60794978496777374311f,(float16_t)0.79397547755433717231f,
-(float16_t)0.60551104140432554512f,(float16_t)0.79583690460888345530f,
-(float16_t)0.60306659854034827539f,(float16_t)0.79769084094339104407f,
-(float16_t)0.60061647938386897305f,(float16_t)0.79953726910790501314f,
-(float16_t)0.59816070699634238395f,(float16_t)0.80137617172314012937f,
-(float16_t)0.59569930449243346793f,(float16_t)0.80320753148064483184f,
-(float16_t)0.59323229503979979516f,(float16_t)0.80503133114296365758f,
-(float16_t)0.59075970185887427544f,(float16_t)0.80684755354379922299f,
-(float16_t)0.58828154822264533408f,(float16_t)0.80865618158817498262f,
-(float16_t)0.58579785745643886408f,(float16_t)0.81045719825259476821f,
-(float16_t)0.58330865293769829094f,(float16_t)0.81225058658520388200f,
-(float16_t)0.58081395809576452649f,(float16_t)0.81403632970594830276f,
-(float16_t)0.57831379641165558958f,(float16_t)0.81581441080673378075f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.57329716669804231532f,(float16_t)0.81934752007679689800f,
-(float16_t)0.57078074588696736669f,(float16_t)0.82110251499110464835f,
-(float16_t)0.56825895267013148970f,(float16_t)0.82284978137582631685f,
-(float16_t)0.56573181078361323149f,(float16_t)0.82458930278502529099f,
-(float16_t)0.56319934401383409117f,(float16_t)0.82632106284566353427f,
-(float16_t)0.56066157619733603124f,(float16_t)0.82804504525775579626f,
-(float16_t)0.55811853122055610221f,(float16_t)0.82976123379452304540f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.55301670558002757883f,(float16_t)0.83317016470191318511f,
-(float16_t)0.55045797293660481131f,(float16_t)0.83486287498638001026f,
-(float16_t)0.54789405917310018967f,(float16_t)0.83654772722351189440f,
-(float16_t)0.54532498842204646383f,(float16_t)0.83822470555483796772f,
-(float16_t)0.54275078486451600046f,(float16_t)0.83989379419599941023f,
-(float16_t)0.54017147272989296525f,(float16_t)0.84155497743689833268f,
-(float16_t)0.53758707629564550512f,(float16_t)0.84320823964184543620f,
-(float16_t)0.53499761988709726435f,(float16_t)0.84485356524970700587f,
-(float16_t)0.53240312787719801246f,(float16_t)0.84649093877405212627f,
-(float16_t)0.52980362468629482731f,(float16_t)0.84812034480329712149f,
-(float16_t)0.52719913478190139067f,(float16_t)0.84974176800085243766f,
-(float16_t)0.52458968267846883826f,(float16_t)0.85135519310526519554f,
-(float16_t)0.52197529293715438925f,(float16_t)0.85296060493036363059f,
-(float16_t)0.51935599016558953167f,(float16_t)0.85455798836540053376f,
-(float16_t)0.51673179901764998423f,(float16_t)0.85614732837519447184f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.51146885043797052361f,(float16_t)0.85930181835700836235f,
-(float16_t)0.50883014254310698909f,(float16_t)0.86086693863776730939f,
-(float16_t)0.50618664534515533937f,(float16_t)0.86242395611104050168f,
-(float16_t)0.50353838372571757542f,(float16_t)0.86397285612158669643f,
-(float16_t)0.50088538261124093687f,(float16_t)0.86551362409056897818f,
-(float16_t)0.49822766697278186854f,(float16_t)0.86704624551569264845f,
-(float16_t)0.49556526182577248507f,(float16_t)0.86857070597134089507f,
-(float16_t)0.49289819222978409341f,(float16_t)0.87008699110871134952f,
-(float16_t)0.49022648328829110387f,(float16_t)0.87159508665595109012f,
-(float16_t)0.48755016014843605143f,(float16_t)0.87309497841829009079f,
-(float16_t)0.48486924800079111986f,(float16_t)0.87458665227817611321f,
-(float16_t)0.48218377207912282989f,(float16_t)0.87607009419540660122f,
-(float16_t)0.47949375766015300826f,(float16_t)0.87754529020726124156f,
-(float16_t)0.47679923006332225466f,(float16_t)0.87901222642863341417f,
-(float16_t)0.47410021465055002254f,(float16_t)0.88047088905216075450f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.46868882203582795665f,(float16_t)0.88336333866573157891f,
-(float16_t)0.46597649576796612569f,(float16_t)0.88479709843093778954f,
-(float16_t)0.46325978355186026025f,(float16_t)0.88622253014888063838f,
-(float16_t)0.46053871095824000514f,(float16_t)0.88763962040285393496f,
-(float16_t)0.45781330359887728587f,(float16_t)0.88904835585466457371f,
-(float16_t)0.45508358712634383592f,(float16_t)0.89044872324475787817f,
-(float16_t)0.45234958723377099998f,(float16_t)0.89184070939234272313f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.44686884016237432560f,(float16_t)0.89459948563138258493f,
-(float16_t)0.44412214457042925586f,(float16_t)0.89596624975618510689f,
-(float16_t)0.44137126873171661501f,(float16_t)0.89732458070541831763f,
-(float16_t)0.43861623853852771404f,(float16_t)0.89867446569395381673f,
-(float16_t)0.43585707992225547480f,(float16_t)0.90001589201616027935f,
-(float16_t)0.43309381885315201277f,(float16_t)0.90134884704602202810f,
-(float16_t)0.43032648134008261165f,(float16_t)0.90267331823725882600f,
-(float16_t)0.42755509343028219593f,(float16_t)0.90398929312344333820f,
-(float16_t)0.42477968120910880589f,(float16_t)0.90529675931811881551f,
-(float16_t)0.42200027079979979261f,(float16_t)0.90659570451491533483f,
-(float16_t)0.41921688836322396066f,(float16_t)0.90788611648766626150f,
-(float16_t)0.41642956009763731906f,(float16_t)0.90916798309052226923f,
-(float16_t)0.41363831223843455787f,(float16_t)0.91044129225806713634f,
-(float16_t)0.41084317105790391089f,(float16_t)0.91170603200542987832f,
-(float16_t)0.40804416286497874333f,(float16_t)0.91296219042839810154f,
-(float16_t)0.40524131400498986100f,(float16_t)0.91420975570353069095f,
-(float16_t)0.40243465085941854120f,(float16_t)0.91544871608826783316f,
-(float16_t)0.39962419984564678810f,(float16_t)0.91667905992104270485f,
-(float16_t)0.39680998741671041907f,(float16_t)0.91790077562139038569f,
-(float16_t)0.39399204006104809883f,(float16_t)0.91911385169005777040f,
-(float16_t)0.39117038430225398171f,(float16_t)0.92031827670911048322f,
-(float16_t)0.38834504669882630168f,(float16_t)0.92151403934204190183f,
-(float16_t)0.38551605384391901543f,(float16_t)0.92270112833387851747f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.37984720892405110515f,(float16_t)0.92504924078267758425f,
-(float16_t)0.37700741021641831496f,(float16_t)0.92621024213831126826f,
-(float16_t)0.37416406297145798909f,(float16_t)0.92736252565040111495f,
-(float16_t)0.37131719395183759858f,(float16_t)0.92850608047321558924f,
-(float16_t)0.36846682995337232125f,(float16_t)0.92964089584318121418f,
-(float16_t)0.36561299780477396482f,(float16_t)0.93076696107898371224f,
-(float16_t)0.36275572436739722537f,(float16_t)0.93188426558166814750f,
-(float16_t)0.35989503653498827740f,(float16_t)0.93299279883473884567f,
-(float16_t)0.35703096123343003310f,(float16_t)0.93409255040425887007f,
-(float16_t)0.35416352542049051033f,(float16_t)0.93518350993894749923f,
-(float16_t)0.35129275608556714827f,(float16_t)0.93626566717027825959f,
-(float16_t)0.34841868024943450921f,(float16_t)0.93733901191257495977f,
-(float16_t)0.34554132496398914931f,(float16_t)0.93840353406310805795f,
-(float16_t)0.34266071731199437833f,(float16_t)0.93945922360218991898f,
-(float16_t)0.33977688440682696225f,(float16_t)0.94050607059326829518f,
-(float16_t)0.33688985339222005111f,(float16_t)0.94154406518302080631f,
-(float16_t)0.33399965144200949307f,(float16_t)0.94257319760144686605f,
-(float16_t)0.33110630575987642921f,(float16_t)0.94359345816196038559f,
-(float16_t)0.32820984357909266382f,(float16_t)0.94460483726148025685f,
-(float16_t)0.32531029216226298173f,(float16_t)0.94560732538052127971f,
-(float16_t)0.32240767880107001897f,(float16_t)0.94660091308328353499f,
-(float16_t)0.31950203081601574739f,(float16_t)0.94758559101774109124f,
-(float16_t)0.31659337555616584581f,(float16_t)0.94856134991573026749f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.31076715274961147495f,(float16_t)0.95048607394948170235f,
-(float16_t)0.30784964004153497763f,(float16_t)0.95143502096900833820f,
-(float16_t)0.30492922973540242948f,(float16_t)0.95237501271976587880f,
-(float16_t)0.30200594931922819519f,(float16_t)0.95330604035419375109f,
-(float16_t)0.29907982630804047508f,(float16_t)0.95422809510910566733f,
-(float16_t)0.29615088824362395536f,(float16_t)0.95514116830577067141f,
-(float16_t)0.29321916269425868373f,(float16_t)0.95604525134999640557f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.28734745954472956653f,(float16_t)0.95782641302753290802f,
-(float16_t)0.28440753721127182141f,(float16_t)0.95870347489587159906f,
-(float16_t)0.28146493792575805193f,(float16_t)0.95957151308198451733f,
-(float16_t)0.27851968938505305973f,(float16_t)0.96043051941556578655f,
-(float16_t)0.27557181931095825478f,(float16_t)0.96128048581132063966f,
-(float16_t)0.27262135544994897662f,(float16_t)0.96212140426904158019f,
-(float16_t)0.26966832557291520178f,(float16_t)0.96295326687368387741f,
-(float16_t)0.26671275747489842090f,(float16_t)0.96377606579543984022f,
-(float16_t)0.26375467897483151347f,(float16_t)0.96458979328981264700f,
-(float16_t)0.26079411791527556952f,(float16_t)0.96539444169768939830f,
-(float16_t)0.25783110216215893162f,(float16_t)0.96619000344541261516f,
-(float16_t)0.25486565960451462720f,(float16_t)0.96697647104485207059f,
-(float16_t)0.25189781815421691258f,(float16_t)0.96775383709347551076f,
-(float16_t)0.24892760574572025956f,(float16_t)0.96852209427441726675f,
-(float16_t)0.24595505033579459497f,(float16_t)0.96928123535654853171f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.24000302244874149871f,(float16_t)0.97077214072895035013f,
-(float16_t)0.23702360599436733679f,(float16_t)0.97150389098625178352f,
-(float16_t)0.23404195858354345794f,(float16_t)0.97222649707893626925f,
-(float16_t)0.23105810828067127605f,(float16_t)0.97293995220556006576f,
-(float16_t)0.22807208317088578653f,(float16_t)0.97364424965081186603f,
-(float16_t)0.22508391135979277653f,(float16_t)0.97433938278557585821f,
-(float16_t)0.22209362097320359264f,(float16_t)0.97502534506699412020f,
-(float16_t)0.21910124015686976984f,(float16_t)0.97570213003852857003f,
-(float16_t)0.21610679707621960333f,(float16_t)0.97636973133002114000f,
-(float16_t)0.21311031991609136194f,(float16_t)0.97702814265775439484f,
-(float16_t)0.21011183688046972118f,(float16_t)0.97767735782450992943f,
-(float16_t)0.20711137619221856032f,(float16_t)0.97831737071962765473f,
-(float16_t)0.20410896609281700687f,(float16_t)0.97894817531906219710f,
-(float16_t)0.20110463484209195606f,(float16_t)0.97956976568544051887f,
-(float16_t)0.19809841071795372680f,(float16_t)0.98018213596811731847f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.19208039704989238183f,(float16_t)0.98137919331375456089f,
-(float16_t)0.18906866414980627589f,(float16_t)0.98196386910955524296f,
-(float16_t)0.18605515166344663291f,(float16_t)0.98253930228744124076f,
-(float16_t)0.18303988795514106180f,(float16_t)0.98310548743121628501f,
-(float16_t)0.18002290140569951471f,(float16_t)0.98366241921173025453f,
-(float16_t)0.17700422041214886049f,(float16_t)0.98421009238692902521f,
-(float16_t)0.17398387338746384989f,(float16_t)0.98474850180190420801f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.16793829497473122814f,(float16_t)0.98579750916756736512f,
-(float16_t)0.16491312048997008866f,(float16_t)0.98630809724459866938f,
-(float16_t)0.16188639378011188130f,(float16_t)0.98680940181418541624f,
-(float16_t)0.15885814333386139019f,(float16_t)0.98730141815785843473f,
-(float16_t)0.15582839765426531597f,(float16_t)0.98778414164457217783f,
-(float16_t)0.15279718525844340760f,(float16_t)0.98825756773074946437f,
-(float16_t)0.14976453467732162017f,(float16_t)0.98872169196032377858f,
-(float16_t)0.14673047445536174793f,(float16_t)0.98917650996478101444f,
-(float16_t)0.14369503315029458212f,(float16_t)0.98962201746320077600f,
-(float16_t)0.14065823933284923863f,(float16_t)0.99005821026229712256f,
-(float16_t)0.13762012158648617710f,(float16_t)0.99048508425645698239f,
-(float16_t)0.13458070850712622324f,(float16_t)0.99090263542778000971f,
-(float16_t)0.13154002870288328264f,(float16_t)0.99131085984611544415f,
-(float16_t)0.12849811079379322432f,(float16_t)0.99170975366909952520f,
-(float16_t)0.12545498341154620592f,(float16_t)0.99209931314219179654f,
-(float16_t)0.12241067519921627893f,(float16_t)0.99247953459870996706f,
-(float16_t)0.11936521481099135467f,(float16_t)0.99285041445986510489f,
-(float16_t)0.11631863091190487725f,(float16_t)0.99321194923479450001f,
-(float16_t)0.11327095217756436019f,(float16_t)0.99356413552059530403f,
-(float16_t)0.11022220729388318428f,(float16_t)0.99390697000235606051f,
-(float16_t)0.10717242495680887049f,(float16_t)0.99424044945318790223f,
-(float16_t)0.10412163387205472520f,(float16_t)0.99456457073425541537f,
-(float16_t)0.10106986275482787718f,(float16_t)0.99487933079480561638f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.09496349532963906104f,(float16_t)0.99548075549192693856f,
-(float16_t)0.09190895649713269611f,(float16_t)0.99576741446765981713f,
-(float16_t)0.08885355258252468358f,(float16_t)0.99604470090125196702f,
-(float16_t)0.08579731234443987997f,(float16_t)0.99631261218277800129f,
-(float16_t)0.08274026454937580266f,(float16_t)0.99657114579055483539f,
-(float16_t)0.07968243797143012563f,(float16_t)0.99682029929116566791f,
-(float16_t)0.07662386139203161695f,(float16_t)0.99706007033948296225f,
-(float16_t)0.07356456359966745406f,(float16_t)0.99729045667869020697f,
-(float16_t)0.07050457338961400866f,(float16_t)0.99751145614030345410f,
-(float16_t)0.06744391956366410645f,(float16_t)0.99772306664419163624f,
-(float16_t)0.06438263092985740954f,(float16_t)0.99792528619859599548f,
-(float16_t)0.06132073630220864768f,(float16_t)0.99811811290014917919f,
-(float16_t)0.05825826450043573163f,(float16_t)0.99830154493389289261f,
-(float16_t)0.05519524434969003135f,(float16_t)0.99847558057329477421f,
-(float16_t)0.05213170468028331672f,(float16_t)0.99864021818026527111f,
-(float16_t)0.04906767432741812596f,(float16_t)0.99879545620517240501f,
-(float16_t)0.04600318213091464381f,(float16_t)0.99894129318685687124f,
-(float16_t)0.04293825693494095902f,(float16_t)0.99907772775264536147f,
-(float16_t)0.03987292758773984536f,(float16_t)0.99920475861836388631f,
-(float16_t)0.03680722294135899131f,(float16_t)0.99932238458834954375f,
-(float16_t)0.03374117185137764235f,(float16_t)0.99943060455546173237f,
-(float16_t)0.03067480317663658085f,(float16_t)0.99952941750109314256f,
-(float16_t)0.02760814577896581953f,(float16_t)0.99961882249517863830f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)0.02147408027546960502f,(float16_t)0.99976940535121527898f,
-(float16_t)0.01840672990580482019f,(float16_t)0.99983058179582340319f,
-(float16_t)0.01533920628498821985f,(float16_t)0.99988234745421256111f,
-(float16_t)0.01227153828571994447f,(float16_t)0.99992470183914450299f,
-(float16_t)0.00920375478205995995f,(float16_t)0.99995764455196389786f,
-(float16_t)0.00613588464915451517f,(float16_t)0.99998117528260110909f,
-(float16_t)0.00306795676296613791f,(float16_t)0.99999529380957619118f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.00306795676296601561f,(float16_t)0.99999529380957619118f,
-(float16_t)-0.00613588464915439287f,(float16_t)0.99998117528260110909f,
-(float16_t)-0.00920375478205983678f,(float16_t)0.99995764455196389786f,
-(float16_t)-0.01227153828571982304f,(float16_t)0.99992470183914450299f,
-(float16_t)-0.01533920628498809842f,(float16_t)0.99988234745421256111f,
-(float16_t)-0.01840672990580469529f,(float16_t)0.99983058179582340319f,
-(float16_t)-0.02147408027546948359f,(float16_t)0.99976940535121527898f,
-(float16_t)-0.02454122852291214241f,(float16_t)0.99969881869620424997f,
-(float16_t)-0.02760814577896569810f,(float16_t)0.99961882249517863830f,
-(float16_t)-0.03067480317663645942f,(float16_t)0.99952941750109314256f,
-(float16_t)-0.03374117185137751745f,(float16_t)0.99943060455546173237f,
-(float16_t)-0.03680722294135886641f,(float16_t)0.99932238458834954375f,
-(float16_t)-0.03987292758773972740f,(float16_t)0.99920475861836388631f,
-(float16_t)-0.04293825693494083412f,(float16_t)0.99907772775264536147f,
-(float16_t)-0.04600318213091451891f,(float16_t)0.99894129318685687124f,
-(float16_t)-0.04906767432741800800f,(float16_t)0.99879545620517240501f,
-(float16_t)-0.05213170468028319182f,(float16_t)0.99864021818026527111f,
-(float16_t)-0.05519524434968991339f,(float16_t)0.99847558057329477421f,
-(float16_t)-0.05825826450043560673f,(float16_t)0.99830154493389289261f,
-(float16_t)-0.06132073630220852972f,(float16_t)0.99811811290014917919f,
-(float16_t)-0.06438263092985728464f,(float16_t)0.99792528619859599548f,
-(float16_t)-0.06744391956366398155f,(float16_t)0.99772306664419163624f,
-(float16_t)-0.07050457338961389764f,(float16_t)0.99751145614030345410f,
-(float16_t)-0.07356456359966732916f,(float16_t)0.99729045667869020697f,
-(float16_t)-0.07662386139203150592f,(float16_t)0.99706007033948296225f,
-(float16_t)-0.07968243797143001461f,(float16_t)0.99682029929116577893f,
-(float16_t)-0.08274026454937567776f,(float16_t)0.99657114579055483539f,
-(float16_t)-0.08579731234443975507f,(float16_t)0.99631261218277800129f,
-(float16_t)-0.08885355258252455868f,(float16_t)0.99604470090125196702f,
-(float16_t)-0.09190895649713257121f,(float16_t)0.99576741446765981713f,
-(float16_t)-0.09496349532963895002f,(float16_t)0.99548075549192693856f,
-(float16_t)-0.09801714032956064526f,(float16_t)0.99518472667219692873f,
-(float16_t)-0.10106986275482775228f,(float16_t)0.99487933079480561638f,
-(float16_t)-0.10412163387205460030f,(float16_t)0.99456457073425541537f,
-(float16_t)-0.10717242495680875947f,(float16_t)0.99424044945318790223f,
-(float16_t)-0.11022220729388305938f,(float16_t)0.99390697000235606051f,
-(float16_t)-0.11327095217756423529f,(float16_t)0.99356413552059530403f,
-(float16_t)-0.11631863091190475235f,(float16_t)0.99321194923479450001f,
-(float16_t)-0.11936521481099122977f,(float16_t)0.99285041445986510489f,
-(float16_t)-0.12241067519921615403f,(float16_t)0.99247953459870996706f,
-(float16_t)-0.12545498341154606714f,(float16_t)0.99209931314219179654f,
-(float16_t)-0.12849811079379311329f,(float16_t)0.99170975366909952520f,
-(float16_t)-0.13154002870288314386f,(float16_t)0.99131085984611544415f,
-(float16_t)-0.13458070850712611222f,(float16_t)0.99090263542778000971f,
-(float16_t)-0.13762012158648606608f,(float16_t)0.99048508425645698239f,
-(float16_t)-0.14065823933284912761f,(float16_t)0.99005821026229712256f,
-(float16_t)-0.14369503315029444335f,(float16_t)0.98962201746320088702f,
-(float16_t)-0.14673047445536163691f,(float16_t)0.98917650996478101444f,
-(float16_t)-0.14976453467732150915f,(float16_t)0.98872169196032377858f,
-(float16_t)-0.15279718525844329657f,(float16_t)0.98825756773074946437f,
-(float16_t)-0.15582839765426520495f,(float16_t)0.98778414164457217783f,
-(float16_t)-0.15885814333386127917f,(float16_t)0.98730141815785843473f,
-(float16_t)-0.16188639378011177028f,(float16_t)0.98680940181418552726f,
-(float16_t)-0.16491312048996994988f,(float16_t)0.98630809724459866938f,
-(float16_t)-0.16793829497473108936f,(float16_t)0.98579750916756747614f,
-(float16_t)-0.17096188876030124493f,(float16_t)0.98527764238894122162f,
-(float16_t)-0.17398387338746371111f,(float16_t)0.98474850180190420801f,
-(float16_t)-0.17700422041214874946f,(float16_t)0.98421009238692902521f,
-(float16_t)-0.18002290140569940369f,(float16_t)0.98366241921173025453f,
-(float16_t)-0.18303988795514092303f,(float16_t)0.98310548743121628501f,
-(float16_t)-0.18605515166344649414f,(float16_t)0.98253930228744124076f,
-(float16_t)-0.18906866414980616486f,(float16_t)0.98196386910955524296f,
-(float16_t)-0.19208039704989227081f,(float16_t)0.98137919331375456089f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.19809841071795361578f,(float16_t)0.98018213596811742949f,
-(float16_t)-0.20110463484209181728f,(float16_t)0.97956976568544051887f,
-(float16_t)-0.20410896609281689584f,(float16_t)0.97894817531906219710f,
-(float16_t)-0.20711137619221844930f,(float16_t)0.97831737071962765473f,
-(float16_t)-0.21011183688046961016f,(float16_t)0.97767735782450992943f,
-(float16_t)-0.21311031991609125091f,(float16_t)0.97702814265775439484f,
-(float16_t)-0.21610679707621949230f,(float16_t)0.97636973133002114000f,
-(float16_t)-0.21910124015686965881f,(float16_t)0.97570213003852857003f,
-(float16_t)-0.22209362097320348162f,(float16_t)0.97502534506699412020f,
-(float16_t)-0.22508391135979266551f,(float16_t)0.97433938278557585821f,
-(float16_t)-0.22807208317088567551f,(float16_t)0.97364424965081197705f,
-(float16_t)-0.23105810828067113727f,(float16_t)0.97293995220556017678f,
-(float16_t)-0.23404195858354331916f,(float16_t)0.97222649707893638027f,
-(float16_t)-0.23702360599436722577f,(float16_t)0.97150389098625178352f,
-(float16_t)-0.24000302244874138768f,(float16_t)0.97077214072895035013f,
-(float16_t)-0.24298017990326387094f,(float16_t)0.97003125319454397424f,
-(float16_t)-0.24595505033579448395f,(float16_t)0.96928123535654853171f,
-(float16_t)-0.24892760574572012078f,(float16_t)0.96852209427441737777f,
-(float16_t)-0.25189781815421680156f,(float16_t)0.96775383709347551076f,
-(float16_t)-0.25486565960451451618f,(float16_t)0.96697647104485207059f,
-(float16_t)-0.25783110216215882060f,(float16_t)0.96619000344541261516f,
-(float16_t)-0.26079411791527545850f,(float16_t)0.96539444169768939830f,
-(float16_t)-0.26375467897483140245f,(float16_t)0.96458979328981275803f,
-(float16_t)-0.26671275747489830987f,(float16_t)0.96377606579543984022f,
-(float16_t)-0.26966832557291509076f,(float16_t)0.96295326687368387741f,
-(float16_t)-0.27262135544994886560f,(float16_t)0.96212140426904158019f,
-(float16_t)-0.27557181931095814376f,(float16_t)0.96128048581132063966f,
-(float16_t)-0.27851968938505294870f,(float16_t)0.96043051941556589757f,
-(float16_t)-0.28146493792575794091f,(float16_t)0.95957151308198451733f,
-(float16_t)-0.28440753721127171039f,(float16_t)0.95870347489587159906f,
-(float16_t)-0.28734745954472945551f,(float16_t)0.95782641302753290802f,
-(float16_t)-0.29028467725446216452f,(float16_t)0.95694033573220893540f,
-(float16_t)-0.29321916269425857271f,(float16_t)0.95604525134999651659f,
-(float16_t)-0.29615088824362384434f,(float16_t)0.95514116830577067141f,
-(float16_t)-0.29907982630804036406f,(float16_t)0.95422809510910566733f,
-(float16_t)-0.30200594931922808417f,(float16_t)0.95330604035419386211f,
-(float16_t)-0.30492922973540226295f,(float16_t)0.95237501271976587880f,
-(float16_t)-0.30784964004153486661f,(float16_t)0.95143502096900833820f,
-(float16_t)-0.31076715274961136393f,(float16_t)0.95048607394948181337f,
-(float16_t)-0.31368174039889140658f,(float16_t)0.94952818059303667475f,
-(float16_t)-0.31659337555616573479f,(float16_t)0.94856134991573037851f,
-(float16_t)-0.31950203081601563637f,(float16_t)0.94758559101774120226f,
-(float16_t)-0.32240767880106985244f,(float16_t)0.94660091308328353499f,
-(float16_t)-0.32531029216226287071f,(float16_t)0.94560732538052139073f,
-(float16_t)-0.32820984357909255280f,(float16_t)0.94460483726148025685f,
-(float16_t)-0.33110630575987631818f,(float16_t)0.94359345816196038559f,
-(float16_t)-0.33399965144200938205f,(float16_t)0.94257319760144686605f,
-(float16_t)-0.33688985339221994009f,(float16_t)0.94154406518302080631f,
-(float16_t)-0.33977688440682685123f,(float16_t)0.94050607059326829518f,
-(float16_t)-0.34266071731199426731f,(float16_t)0.93945922360218991898f,
-(float16_t)-0.34554132496398903829f,(float16_t)0.93840353406310816897f,
-(float16_t)-0.34841868024943439819f,(float16_t)0.93733901191257495977f,
-(float16_t)-0.35129275608556703725f,(float16_t)0.93626566717027825959f,
-(float16_t)-0.35416352542049039931f,(float16_t)0.93518350993894761025f,
-(float16_t)-0.35703096123342992207f,(float16_t)0.93409255040425898109f,
-(float16_t)-0.35989503653498816638f,(float16_t)0.93299279883473884567f,
-(float16_t)-0.36275572436739711435f,(float16_t)0.93188426558166814750f,
-(float16_t)-0.36561299780477385379f,(float16_t)0.93076696107898371224f,
-(float16_t)-0.36846682995337221023f,(float16_t)0.92964089584318132520f,
-(float16_t)-0.37131719395183748755f,(float16_t)0.92850608047321558924f,
-(float16_t)-0.37416406297145787807f,(float16_t)0.92736252565040111495f,
-(float16_t)-0.37700741021641820394f,(float16_t)0.92621024213831137928f,
-(float16_t)-0.37984720892405099413f,(float16_t)0.92504924078267769527f,
-(float16_t)-0.38268343236508972627f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.38551605384391890441f,(float16_t)0.92270112833387851747f,
-(float16_t)-0.38834504669882619066f,(float16_t)0.92151403934204201285f,
-(float16_t)-0.39117038430225387069f,(float16_t)0.92031827670911059425f,
-(float16_t)-0.39399204006104798781f,(float16_t)0.91911385169005777040f,
-(float16_t)-0.39680998741671030805f,(float16_t)0.91790077562139049672f,
-(float16_t)-0.39962419984564667708f,(float16_t)0.91667905992104270485f,
-(float16_t)-0.40243465085941843018f,(float16_t)0.91544871608826783316f,
-(float16_t)-0.40524131400498974998f,(float16_t)0.91420975570353069095f,
-(float16_t)-0.40804416286497863231f,(float16_t)0.91296219042839821256f,
-(float16_t)-0.41084317105790379987f,(float16_t)0.91170603200542987832f,
-(float16_t)-0.41363831223843450235f,(float16_t)0.91044129225806724737f,
-(float16_t)-0.41642956009763698599f,(float16_t)0.90916798309052249127f,
-(float16_t)-0.41921688836322407168f,(float16_t)0.90788611648766615048f,
-(float16_t)-0.42200027079979968159f,(float16_t)0.90659570451491533483f,
-(float16_t)-0.42477968120910869487f,(float16_t)0.90529675931811881551f,
-(float16_t)-0.42755509343028186287f,(float16_t)0.90398929312344344922f,
-(float16_t)-0.43032648134008272267f,(float16_t)0.90267331823725871498f,
-(float16_t)-0.43309381885315190175f,(float16_t)0.90134884704602202810f,
-(float16_t)-0.43585707992225536378f,(float16_t)0.90001589201616027935f,
-(float16_t)-0.43861623853852738097f,(float16_t)0.89867446569395392775f,
-(float16_t)-0.44137126873171672603f,(float16_t)0.89732458070541831763f,
-(float16_t)-0.44412214457042914484f,(float16_t)0.89596624975618521791f,
-(float16_t)-0.44686884016237399253f,(float16_t)0.89459948563138280697f,
-(float16_t)-0.44961132965460670619f,(float16_t)0.89322430119551521344f,
-(float16_t)-0.45234958723377088896f,(float16_t)0.89184070939234272313f,
-(float16_t)-0.45508358712634372489f,(float16_t)0.89044872324475798919f,
-(float16_t)-0.45781330359887700832f,(float16_t)0.88904835585466468473f,
-(float16_t)-0.46053871095824006066f,(float16_t)0.88763962040285393496f,
-(float16_t)-0.46325978355186014923f,(float16_t)0.88622253014888063838f,
-(float16_t)-0.46597649576796601467f,(float16_t)0.88479709843093790056f,
-(float16_t)-0.46868882203582767909f,(float16_t)0.88336333866573168994f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.47410021465054991152f,(float16_t)0.88047088905216086552f,
-(float16_t)-0.47679923006332192159f,(float16_t)0.87901222642863352519f,
-(float16_t)-0.47949375766015311928f,(float16_t)0.87754529020726124156f,
-(float16_t)-0.48218377207912271887f,(float16_t)0.87607009419540660122f,
-(float16_t)-0.48486924800079100883f,(float16_t)0.87458665227817622423f,
-(float16_t)-0.48755016014843571837f,(float16_t)0.87309497841829020182f,
-(float16_t)-0.49022648328829121489f,(float16_t)0.87159508665595097909f,
-(float16_t)-0.49289819222978398239f,(float16_t)0.87008699110871146054f,
-(float16_t)-0.49556526182577237405f,(float16_t)0.86857070597134100609f,
-(float16_t)-0.49822766697278159098f,(float16_t)0.86704624551569275948f,
-(float16_t)-0.50088538261124082585f,(float16_t)0.86551362409056908920f,
-(float16_t)-0.50353838372571746440f,(float16_t)0.86397285612158680745f,
-(float16_t)-0.50618664534515511733f,(float16_t)0.86242395611104061270f,
-(float16_t)-0.50883014254310710012f,(float16_t)0.86086693863776719837f,
-(float16_t)-0.51146885043797041259f,(float16_t)0.85930181835700847337f,
-(float16_t)-0.51410274419322155026f,(float16_t)0.85772861000027211809f,
-(float16_t)-0.51673179901764965116f,(float16_t)0.85614732837519458286f,
-(float16_t)-0.51935599016558964269f,(float16_t)0.85455798836540053376f,
-(float16_t)-0.52197529293715427823f,(float16_t)0.85296060493036374162f,
-(float16_t)-0.52458968267846872724f,(float16_t)0.85135519310526519554f,
-(float16_t)-0.52719913478190105760f,(float16_t)0.84974176800085265970f,
-(float16_t)-0.52980362468629471628f,(float16_t)0.84812034480329723252f,
-(float16_t)-0.53240312787719790144f,(float16_t)0.84649093877405212627f,
-(float16_t)-0.53499761988709704230f,(float16_t)0.84485356524970722791f,
-(float16_t)-0.53758707629564561614f,(float16_t)0.84320823964184532517f,
-(float16_t)-0.54017147272989285423f,(float16_t)0.84155497743689844370f,
-(float16_t)-0.54275078486451577842f,(float16_t)0.83989379419599952126f,
-(float16_t)-0.54532498842204624179f,(float16_t)0.83822470555483818977f,
-(float16_t)-0.54789405917310018967f,(float16_t)0.83654772722351200542f,
-(float16_t)-0.55045797293660470029f,(float16_t)0.83486287498638012128f,
-(float16_t)-0.55301670558002735678f,(float16_t)0.83317016470191329613f,
-(float16_t)-0.55557023301960195560f,(float16_t)0.83146961230254534669f,
-(float16_t)-0.55811853122055610221f,(float16_t)0.82976123379452304540f,
-(float16_t)-0.56066157619733592021f,(float16_t)0.82804504525775579626f,
-(float16_t)-0.56319934401383386913f,(float16_t)0.82632106284566364529f,
-(float16_t)-0.56573181078361323149f,(float16_t)0.82458930278502517996f,
-(float16_t)-0.56825895267013148970f,(float16_t)0.82284978137582631685f,
-(float16_t)-0.57078074588696714464f,(float16_t)0.82110251499110475937f,
-(float16_t)-0.57329716669804198226f,(float16_t)0.81934752007679712005f,
-(float16_t)-0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)-0.57831379641165547856f,(float16_t)0.81581441080673378075f,
-(float16_t)-0.58081395809576441547f,(float16_t)0.81403632970594852480f,
-(float16_t)-0.58330865293769840196f,(float16_t)0.81225058658520388200f,
-(float16_t)-0.58579785745643886408f,(float16_t)0.81045719825259476821f,
-(float16_t)-0.58828154822264522306f,(float16_t)0.80865618158817509364f,
-(float16_t)-0.59075970185887405339f,(float16_t)0.80684755354379944503f,
-(float16_t)-0.59323229503979990618f,(float16_t)0.80503133114296354655f,
-(float16_t)-0.59569930449243335691f,(float16_t)0.80320753148064494287f,
-(float16_t)-0.59816070699634216190f,(float16_t)0.80137617172314024039f,
-(float16_t)-0.60061647938386875101f,(float16_t)0.79953726910790523519f,
-(float16_t)-0.60306659854034827539f,(float16_t)0.79769084094339104407f,
-(float16_t)-0.60551104140432543410f,(float16_t)0.79583690460888356633f,
-(float16_t)-0.60794978496777352106f,(float16_t)0.79397547755433728334f,
-(float16_t)-0.61038280627630958630f,(float16_t)0.79210657730021227785f,
-(float16_t)-0.61281008242940970820f,(float16_t)0.79023022143731003197f,
-(float16_t)-0.61523159058062670823f,(float16_t)0.78834642762660633863f,
-(float16_t)-0.61764730793780375784f,(float16_t)0.78645521359908587833f,
-(float16_t)-0.62005721176328920663f,(float16_t)0.78455659715557513056f,
-(float16_t)-0.62246127937414996723f,(float16_t)0.78265059616657572938f,
-(float16_t)-0.62485948814238623239f,(float16_t)0.78073722857209459924f,
-(float16_t)-0.62725181549514386070f,(float16_t)0.77881651238147608929f,
-(float16_t)-0.62963823891492709528f,(float16_t)0.77688846567323244230f,
-(float16_t)-0.63201873593980895105f,(float16_t)0.77495310659487393057f,
-(float16_t)-0.63439328416364537677f,(float16_t)0.77301045336273710440f,
-(float16_t)-0.63676186123628431002f,(float16_t)0.77106052426181370674f,
-(float16_t)-0.63912444486377573138f,(float16_t)0.76910333764557958780f,
-(float16_t)-0.64148101280858305095f,(float16_t)0.76713891193582040007f,
-(float16_t)-0.64383154288979127511f,(float16_t)0.76516726562245906962f,
-(float16_t)-0.64617601298331639459f,(float16_t)0.76318841726338115805f,
-(float16_t)-0.64851440102211244110f,(float16_t)0.76120238548426188974f,
-(float16_t)-0.65084668499638076433f,(float16_t)0.75920918897838807204f,
-(float16_t)-0.65317284295377653347f,(float16_t)0.75720884650648467851f,
-(float16_t)-0.65549285299961546070f,(float16_t)0.75520137689653643598f,
-(float16_t)-0.65780669329707852633f,(float16_t)0.75318679904361252042f,
-(float16_t)-0.66011434206742036768f,(float16_t)0.75116513190968658975f,
-(float16_t)-0.66241577759017189475f,(float16_t)0.74913639452345925918f,
-(float16_t)-0.66471097820334490436f,(float16_t)0.74710060598018013245f,
-(float16_t)-0.66699992230363736034f,(float16_t)0.74505778544146605835f,
-(float16_t)-0.66928258834663589827f,(float16_t)0.74300795213512182968f,
-(float16_t)-0.67155895484701844111f,(float16_t)0.74095112535495899486f,
-(float16_t)-0.67382900037875603783f,(float16_t)0.73888732446061522463f,
-(float16_t)-0.67609270357531581208f,(float16_t)0.73681656887737001504f,
-(float16_t)-0.67835004312986124653f,(float16_t)0.73473887809596372112f,
-(float16_t)-0.68060099779545302212f,(float16_t)0.73265427167241281570f,
-(float16_t)-0.68284554638524797010f,(float16_t)0.73056276922782759087f,
-(float16_t)-0.68508366777270024439f,(float16_t)0.72846439044822530740f,
-(float16_t)-0.68731534089175916336f,(float16_t)0.72635915508434589771f,
-(float16_t)-0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)-0.69175925836415763648f,(float16_t)0.72212819392921545614f,
-(float16_t)-0.69397146088965377952f,(float16_t)0.72000250796138176579f,
-(float16_t)-0.69617713149146298601f,(float16_t)0.71787004505573170920f,
-(float16_t)-0.69837624940897280457f,(float16_t)0.71573082528381870571f,
-(float16_t)-0.70056879394324822474f,(float16_t)0.71358486878079363525f,
-(float16_t)-0.70275474445722507788f,(float16_t)0.71143219574521665560f,
-(float16_t)-0.70493408037590488124f,(float16_t)0.70927282643886557789f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.70927282643886546687f,(float16_t)0.70493408037590510329f,
-(float16_t)-0.71143219574521654458f,(float16_t)0.70275474445722518890f,
-(float16_t)-0.71358486878079352422f,(float16_t)0.70056879394324833576f,
-(float16_t)-0.71573082528381859468f,(float16_t)0.69837624940897291559f,
-(float16_t)-0.71787004505573159818f,(float16_t)0.69617713149146309703f,
-(float16_t)-0.72000250796138165477f,(float16_t)0.69397146088965389055f,
-(float16_t)-0.72212819392921523409f,(float16_t)0.69175925836415785852f,
-(float16_t)-0.72424708295146678072f,(float16_t)0.68954054473706705153f,
-(float16_t)-0.72635915508434578669f,(float16_t)0.68731534089175927438f,
-(float16_t)-0.72846439044822519637f,(float16_t)0.68508366777270035541f,
-(float16_t)-0.73056276922782747985f,(float16_t)0.68284554638524808112f,
-(float16_t)-0.73265427167241270467f,(float16_t)0.68060099779545324417f,
-(float16_t)-0.73473887809596349907f,(float16_t)0.67835004312986135755f,
-(float16_t)-0.73681656887736979300f,(float16_t)0.67609270357531592310f,
-(float16_t)-0.73888732446061511361f,(float16_t)0.67382900037875614885f,
-(float16_t)-0.74095112535495888384f,(float16_t)0.67155895484701855214f,
-(float16_t)-0.74300795213512171866f,(float16_t)0.66928258834663600929f,
-(float16_t)-0.74505778544146594733f,(float16_t)0.66699992230363758239f,
-(float16_t)-0.74710060598018002143f,(float16_t)0.66471097820334501538f,
-(float16_t)-0.74913639452345914815f,(float16_t)0.66241577759017200577f,
-(float16_t)-0.75116513190968636771f,(float16_t)0.66011434206742047870f,
-(float16_t)-0.75318679904361240940f,(float16_t)0.65780669329707874837f,
-(float16_t)-0.75520137689653643598f,(float16_t)0.65549285299961557172f,
-(float16_t)-0.75720884650648467851f,(float16_t)0.65317284295377664449f,
-(float16_t)-0.75920918897838796102f,(float16_t)0.65084668499638098638f,
-(float16_t)-0.76120238548426166769f,(float16_t)0.64851440102211255212f,
-(float16_t)-0.76318841726338115805f,(float16_t)0.64617601298331661663f,
-(float16_t)-0.76516726562245895860f,(float16_t)0.64383154288979138613f,
-(float16_t)-0.76713891193582040007f,(float16_t)0.64148101280858316198f,
-(float16_t)-0.76910333764557947678f,(float16_t)0.63912444486377584241f,
-(float16_t)-0.77106052426181359571f,(float16_t)0.63676186123628442104f,
-(float16_t)-0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)-0.77495310659487381955f,(float16_t)0.63201873593980906207f,
-(float16_t)-0.77688846567323233128f,(float16_t)0.62963823891492720630f,
-(float16_t)-0.77881651238147597827f,(float16_t)0.62725181549514408275f,
-(float16_t)-0.78073722857209448822f,(float16_t)0.62485948814238634341f,
-(float16_t)-0.78265059616657561836f,(float16_t)0.62246127937415007825f,
-(float16_t)-0.78455659715557501954f,(float16_t)0.62005721176328942867f,
-(float16_t)-0.78645521359908576731f,(float16_t)0.61764730793780386886f,
-(float16_t)-0.78834642762660622761f,(float16_t)0.61523159058062693028f,
-(float16_t)-0.79023022143730992095f,(float16_t)0.61281008242940981923f,
-(float16_t)-0.79210657730021216683f,(float16_t)0.61038280627630969732f,
-(float16_t)-0.79397547755433717231f,(float16_t)0.60794978496777363208f,
-(float16_t)-0.79583690460888345530f,(float16_t)0.60551104140432565615f,
-(float16_t)-0.79769084094339093305f,(float16_t)0.60306659854034838641f,
-(float16_t)-0.79953726910790512417f,(float16_t)0.60061647938386886203f,
-(float16_t)-0.80137617172314024039f,(float16_t)0.59816070699634238395f,
-(float16_t)-0.80320753148064483184f,(float16_t)0.59569930449243346793f,
-(float16_t)-0.80503133114296343553f,(float16_t)0.59323229503980001720f,
-(float16_t)-0.80684755354379933401f,(float16_t)0.59075970185887416442f,
-(float16_t)-0.80865618158817498262f,(float16_t)0.58828154822264533408f,
-(float16_t)-0.81045719825259465718f,(float16_t)0.58579785745643897510f,
-(float16_t)-0.81225058658520377097f,(float16_t)0.58330865293769851299f,
-(float16_t)-0.81403632970594841378f,(float16_t)0.58081395809576452649f,
-(float16_t)-0.81581441080673378075f,(float16_t)0.57831379641165570060f,
-(float16_t)-0.81758481315158360037f,(float16_t)0.57580819141784544968f,
-(float16_t)-0.81934752007679700903f,(float16_t)0.57329716669804209328f,
-(float16_t)-0.82110251499110464835f,(float16_t)0.57078074588696725566f,
-(float16_t)-0.82284978137582620583f,(float16_t)0.56825895267013171175f,
-(float16_t)-0.82458930278502506894f,(float16_t)0.56573181078361345353f,
-(float16_t)-0.82632106284566353427f,(float16_t)0.56319934401383409117f,
-(float16_t)-0.82804504525775568524f,(float16_t)0.56066157619733614226f,
-(float16_t)-0.82976123379452293438f,(float16_t)0.55811853122055632426f,
-(float16_t)-0.83146961230254534669f,(float16_t)0.55557023301960217765f,
-(float16_t)-0.83317016470191318511f,(float16_t)0.55301670558002746780f,
-(float16_t)-0.83486287498638001026f,(float16_t)0.55045797293660492233f,
-(float16_t)-0.83654772722351189440f,(float16_t)0.54789405917310041172f,
-(float16_t)-0.83822470555483807875f,(float16_t)0.54532498842204635281f,
-(float16_t)-0.83989379419599952126f,(float16_t)0.54275078486451588944f,
-(float16_t)-0.84155497743689833268f,(float16_t)0.54017147272989296525f,
-(float16_t)-0.84320823964184532517f,(float16_t)0.53758707629564572716f,
-(float16_t)-0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)-0.84649093877405201525f,(float16_t)0.53240312787719801246f,
-(float16_t)-0.84812034480329712149f,(float16_t)0.52980362468629482731f,
-(float16_t)-0.84974176800085254868f,(float16_t)0.52719913478190127964f,
-(float16_t)-0.85135519310526519554f,(float16_t)0.52458968267846894928f,
-(float16_t)-0.85296060493036363059f,(float16_t)0.52197529293715438925f,
-(float16_t)-0.85455798836540042274f,(float16_t)0.51935599016558975372f,
-(float16_t)-0.85614732837519447184f,(float16_t)0.51673179901764976218f,
-(float16_t)-0.85772861000027200706f,(float16_t)0.51410274419322177231f,
-(float16_t)-0.85930181835700836235f,(float16_t)0.51146885043797052361f,
-(float16_t)-0.86086693863776719837f,(float16_t)0.50883014254310732216f,
-(float16_t)-0.86242395611104050168f,(float16_t)0.50618664534515522835f,
-(float16_t)-0.86397285612158669643f,(float16_t)0.50353838372571757542f,
-(float16_t)-0.86551362409056897818f,(float16_t)0.50088538261124093687f,
-(float16_t)-0.86704624551569264845f,(float16_t)0.49822766697278175752f,
-(float16_t)-0.86857070597134089507f,(float16_t)0.49556526182577254058f,
-(float16_t)-0.87008699110871134952f,(float16_t)0.49289819222978414892f,
-(float16_t)-0.87159508665595086807f,(float16_t)0.49022648328829138142f,
-(float16_t)-0.87309497841829009079f,(float16_t)0.48755016014843588490f,
-(float16_t)-0.87458665227817611321f,(float16_t)0.48486924800079111986f,
-(float16_t)-0.87607009419540649020f,(float16_t)0.48218377207912288540f,
-(float16_t)-0.87754529020726113053f,(float16_t)0.47949375766015328582f,
-(float16_t)-0.87901222642863352519f,(float16_t)0.47679923006332208812f,
-(float16_t)-0.88047088905216075450f,(float16_t)0.47410021465055007805f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.88336333866573168994f,(float16_t)0.46868882203582784562f,
-(float16_t)-0.88479709843093778954f,(float16_t)0.46597649576796618121f,
-(float16_t)-0.88622253014888052736f,(float16_t)0.46325978355186031576f,
-(float16_t)-0.88763962040285382393f,(float16_t)0.46053871095824022719f,
-(float16_t)-0.88904835585466457371f,(float16_t)0.45781330359887717485f,
-(float16_t)-0.89044872324475787817f,(float16_t)0.45508358712634389143f,
-(float16_t)-0.89184070939234261211f,(float16_t)0.45234958723377105549f,
-(float16_t)-0.89322430119551521344f,(float16_t)0.44961132965460687272f,
-(float16_t)-0.89459948563138269595f,(float16_t)0.44686884016237415906f,
-(float16_t)-0.89596624975618510689f,(float16_t)0.44412214457042931137f,
-(float16_t)-0.89732458070541820661f,(float16_t)0.44137126873171689256f,
-(float16_t)-0.89867446569395392775f,(float16_t)0.43861623853852754751f,
-(float16_t)-0.90001589201616016833f,(float16_t)0.43585707992225553031f,
-(float16_t)-0.90134884704602191707f,(float16_t)0.43309381885315206828f,
-(float16_t)-0.90267331823725871498f,(float16_t)0.43032648134008288920f,
-(float16_t)-0.90398929312344333820f,(float16_t)0.42755509343028202940f,
-(float16_t)-0.90529675931811870448f,(float16_t)0.42477968120910886141f,
-(float16_t)-0.90659570451491533483f,(float16_t)0.42200027079979984812f,
-(float16_t)-0.90788611648766603945f,(float16_t)0.41921688836322423821f,
-(float16_t)-0.90916798309052238025f,(float16_t)0.41642956009763715253f,
-(float16_t)-0.91044129225806713634f,(float16_t)0.41363831223843466889f,
-(float16_t)-0.91170603200542976730f,(float16_t)0.41084317105790413294f,
-(float16_t)-0.91296219042839821256f,(float16_t)0.40804416286497857680f,
-(float16_t)-0.91420975570353069095f,(float16_t)0.40524131400498991651f,
-(float16_t)-0.91544871608826772214f,(float16_t)0.40243465085941859671f,
-(float16_t)-0.91667905992104259383f,(float16_t)0.39962419984564706565f,
-(float16_t)-0.91790077562139049672f,(float16_t)0.39680998741671025254f,
-(float16_t)-0.91911385169005777040f,(float16_t)0.39399204006104815434f,
-(float16_t)-0.92031827670911048322f,(float16_t)0.39117038430225403722f,
-(float16_t)-0.92151403934204179080f,(float16_t)0.38834504669882657923f,
-(float16_t)-0.92270112833387862850f,(float16_t)0.38551605384391884890f,
-(float16_t)-0.92387953251128673848f,(float16_t)0.38268343236508989280f,
-(float16_t)-0.92504924078267747323f,(float16_t)0.37984720892405138271f,
-(float16_t)-0.92621024213831137928f,(float16_t)0.37700741021641814843f,
-(float16_t)-0.92736252565040111495f,(float16_t)0.37416406297145804460f,
-(float16_t)-0.92850608047321547822f,(float16_t)0.37131719395183770960f,
-(float16_t)-0.92964089584318121418f,(float16_t)0.36846682995337259880f,
-(float16_t)-0.93076696107898371224f,(float16_t)0.36561299780477379828f,
-(float16_t)-0.93188426558166803648f,(float16_t)0.36275572436739728088f,
-(float16_t)-0.93299279883473884567f,(float16_t)0.35989503653498833291f,
-(float16_t)-0.93409255040425875904f,(float16_t)0.35703096123343031065f,
-(float16_t)-0.93518350993894761025f,(float16_t)0.35416352542049039931f,
-(float16_t)-0.93626566717027825959f,(float16_t)0.35129275608556720378f,
-(float16_t)-0.93733901191257484875f,(float16_t)0.34841868024943478677f,
-(float16_t)-0.93840353406310816897f,(float16_t)0.34554132496398898278f,
-(float16_t)-0.93945922360218991898f,(float16_t)0.34266071731199443384f,
-(float16_t)-0.94050607059326829518f,(float16_t)0.33977688440682701776f,
-(float16_t)-0.94154406518302069529f,(float16_t)0.33688985339222032867f,
-(float16_t)-0.94257319760144686605f,(float16_t)0.33399965144200938205f,
-(float16_t)-0.94359345816196038559f,(float16_t)0.33110630575987648472f,
-(float16_t)-0.94460483726148014583f,(float16_t)0.32820984357909271933f,
-(float16_t)-0.94560732538052116869f,(float16_t)0.32531029216226325929f,
-(float16_t)-0.94660091308328353499f,(float16_t)0.32240767880106985244f,
-(float16_t)-0.94758559101774109124f,(float16_t)0.31950203081601580291f,
-(float16_t)-0.94856134991573026749f,(float16_t)0.31659337555616606785f,
-(float16_t)-0.94952818059303667475f,(float16_t)0.31368174039889140658f,
-(float16_t)-0.95048607394948170235f,(float16_t)0.31076715274961153046f,
-(float16_t)-0.95143502096900833820f,(float16_t)0.30784964004153503314f,
-(float16_t)-0.95237501271976576778f,(float16_t)0.30492922973540265152f,
-(float16_t)-0.95330604035419386211f,(float16_t)0.30200594931922802866f,
-(float16_t)-0.95422809510910555630f,(float16_t)0.29907982630804053059f,
-(float16_t)-0.95514116830577067141f,(float16_t)0.29615088824362401088f,
-(float16_t)-0.95604525134999629454f,(float16_t)0.29321916269425896129f,
-(float16_t)-0.95694033573220882438f,(float16_t)0.29028467725446238656f,
-(float16_t)-0.95782641302753290802f,(float16_t)0.28734745954472962204f,
-(float16_t)-0.95870347489587148804f,(float16_t)0.28440753721127209896f,
-(float16_t)-0.95957151308198451733f,(float16_t)0.28146493792575788540f,
-(float16_t)-0.96043051941556578655f,(float16_t)0.27851968938505317075f,
-(float16_t)-0.96128048581132063966f,(float16_t)0.27557181931095831029f,
-(float16_t)-0.96212140426904146917f,(float16_t)0.27262135544994925418f,
-(float16_t)-0.96295326687368387741f,(float16_t)0.26966832557291509076f,
-(float16_t)-0.96377606579543984022f,(float16_t)0.26671275747489847641f,
-(float16_t)-0.96458979328981264700f,(float16_t)0.26375467897483156898f,
-(float16_t)-0.96539444169768928727f,(float16_t)0.26079411791527584707f,
-(float16_t)-0.96619000344541250413f,(float16_t)0.25783110216215898713f,
-(float16_t)-0.96697647104485207059f,(float16_t)0.25486565960451468271f,
-(float16_t)-0.96775383709347539973f,(float16_t)0.25189781815421719013f,
-(float16_t)-0.96852209427441737777f,(float16_t)0.24892760574572009302f,
-(float16_t)-0.96928123535654842069f,(float16_t)0.24595505033579465048f,
-(float16_t)-0.97003125319454397424f,(float16_t)0.24298017990326406523f,
-(float16_t)-0.97077214072895023911f,(float16_t)0.24000302244874177626f,
-(float16_t)-0.97150389098625178352f,(float16_t)0.23702360599436717026f,
-(float16_t)-0.97222649707893626925f,(float16_t)0.23404195858354351345f,
-(float16_t)-0.97293995220556006576f,(float16_t)0.23105810828067133156f,
-(float16_t)-0.97364424965081186603f,(float16_t)0.22807208317088606409f,
-(float16_t)-0.97433938278557585821f,(float16_t)0.22508391135979283204f,
-(float16_t)-0.97502534506699412020f,(float16_t)0.22209362097320364815f,
-(float16_t)-0.97570213003852845901f,(float16_t)0.21910124015687004739f,
-(float16_t)-0.97636973133002114000f,(float16_t)0.21610679707621943679f,
-(float16_t)-0.97702814265775439484f,(float16_t)0.21311031991609141745f,
-(float16_t)-0.97767735782450992943f,(float16_t)0.21011183688046980444f,
-(float16_t)-0.97831737071962754371f,(float16_t)0.20711137619221883788f,
-(float16_t)-0.97894817531906219710f,(float16_t)0.20410896609281684033f,
-(float16_t)-0.97956976568544051887f,(float16_t)0.20110463484209201157f,
-(float16_t)-0.98018213596811731847f,(float16_t)0.19809841071795381007f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.98137919331375456089f,(float16_t)0.19208039704989246510f,
-(float16_t)-0.98196386910955524296f,(float16_t)0.18906866414980635915f,
-(float16_t)-0.98253930228744124076f,(float16_t)0.18605515166344691047f,
-(float16_t)-0.98310548743121628501f,(float16_t)0.18303988795514089527f,
-(float16_t)-0.98366241921173025453f,(float16_t)0.18002290140569957022f,
-(float16_t)-0.98421009238692902521f,(float16_t)0.17700422041214894375f,
-(float16_t)-0.98474850180190420801f,(float16_t)0.17398387338746412745f,
-(float16_t)-0.98527764238894122162f,(float16_t)0.17096188876030121717f,
-(float16_t)-0.98579750916756736512f,(float16_t)0.16793829497473128365f,
-(float16_t)-0.98630809724459855836f,(float16_t)0.16491312048997014417f,
-(float16_t)-0.98680940181418552726f,(float16_t)0.16188639378011174252f,
-(float16_t)-0.98730141815785843473f,(float16_t)0.15885814333386147346f,
-(float16_t)-0.98778414164457217783f,(float16_t)0.15582839765426537149f,
-(float16_t)-0.98825756773074946437f,(float16_t)0.15279718525844368515f,
-(float16_t)-0.98872169196032377858f,(float16_t)0.14976453467732145364f,
-(float16_t)-0.98917650996478101444f,(float16_t)0.14673047445536180344f,
-(float16_t)-0.98962201746320077600f,(float16_t)0.14369503315029463764f,
-(float16_t)-0.99005821026229701154f,(float16_t)0.14065823933284954395f,
-(float16_t)-0.99048508425645709341f,(float16_t)0.13762012158648603832f,
-(float16_t)-0.99090263542778000971f,(float16_t)0.13458070850712627875f,
-(float16_t)-0.99131085984611544415f,(float16_t)0.13154002870288333815f,
-(float16_t)-0.99170975366909952520f,(float16_t)0.12849811079379308554f,
-(float16_t)-0.99209931314219179654f,(float16_t)0.12545498341154626143f,
-(float16_t)-0.99247953459870996706f,(float16_t)0.12241067519921634832f,
-(float16_t)-0.99285041445986510489f,(float16_t)0.11936521481099163222f,
-(float16_t)-0.99321194923479450001f,(float16_t)0.11631863091190471071f,
-(float16_t)-0.99356413552059530403f,(float16_t)0.11327095217756441570f,
-(float16_t)-0.99390697000235606051f,(float16_t)0.11022220729388323979f,
-(float16_t)-0.99424044945318790223f,(float16_t)0.10717242495680916192f,
-(float16_t)-0.99456457073425541537f,(float16_t)0.10412163387205457254f,
-(float16_t)-0.99487933079480561638f,(float16_t)0.10106986275482793269f,
-(float16_t)-0.99518472667219681771f,(float16_t)0.09801714032956082567f,
-(float16_t)-0.99548075549192693856f,(float16_t)0.09496349532963890838f,
-(float16_t)-0.99576741446765981713f,(float16_t)0.09190895649713275162f,
-(float16_t)-0.99604470090125196702f,(float16_t)0.08885355258252475297f,
-(float16_t)-0.99631261218277800129f,(float16_t)0.08579731234444015753f,
-(float16_t)-0.99657114579055483539f,(float16_t)0.08274026454937563613f,
-(float16_t)-0.99682029929116566791f,(float16_t)0.07968243797143019502f,
-(float16_t)-0.99706007033948296225f,(float16_t)0.07662386139203168633f,
-(float16_t)-0.99729045667869020697f,(float16_t)0.07356456359966773162f,
-(float16_t)-0.99751145614030345410f,(float16_t)0.07050457338961385600f,
-(float16_t)-0.99772306664419163624f,(float16_t)0.06744391956366417584f,
-(float16_t)-0.99792528619859599548f,(float16_t)0.06438263092985770097f,
-(float16_t)-0.99811811290014917919f,(float16_t)0.06132073630220848809f,
-(float16_t)-0.99830154493389289261f,(float16_t)0.05825826450043579408f,
-(float16_t)-0.99847558057329477421f,(float16_t)0.05519524434969009380f,
-(float16_t)-0.99864021818026516009f,(float16_t)0.05213170468028359428f,
-(float16_t)-0.99879545620517240501f,(float16_t)0.04906767432741796636f,
-(float16_t)-0.99894129318685687124f,(float16_t)0.04600318213091470626f,
-(float16_t)-0.99907772775264536147f,(float16_t)0.04293825693494102147f,
-(float16_t)-0.99920475861836388631f,(float16_t)0.03987292758774012985f,
-(float16_t)-0.99932238458834954375f,(float16_t)0.03680722294135883171f,
-(float16_t)-0.99943060455546173237f,(float16_t)0.03374117185137770480f,
-(float16_t)-0.99952941750109314256f,(float16_t)0.03067480317663686534f,
-(float16_t)-0.99961882249517863830f,(float16_t)0.02760814577896565994f,
-(float16_t)-0.99969881869620424997f,(float16_t)0.02454122852291232629f,
-(float16_t)-0.99976940535121527898f,(float16_t)0.02147408027546966747f,
-(float16_t)-0.99983058179582340319f,(float16_t)0.01840672990580510121f,
-(float16_t)-0.99988234745421256111f,(float16_t)0.01533920628498806026f,
-(float16_t)-0.99992470183914450299f,(float16_t)0.01227153828572000692f,
-(float16_t)-0.99995764455196389786f,(float16_t)0.00920375478206002066f,
-(float16_t)-0.99998117528260110909f,(float16_t)0.00613588464915479880f,
-(float16_t)-0.99999529380957619118f,(float16_t)0.00306795676296597701f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99992470183914450299f,(float16_t)0.01227153828571992539f,
-(float16_t)0.99969881869620424997f,(float16_t)0.02454122852291228812f,
-(float16_t)0.99932238458834954375f,(float16_t)0.03680722294135883171f,
-(float16_t)0.99879545620517240501f,(float16_t)0.04906767432741801493f,
-(float16_t)0.99811811290014917919f,(float16_t)0.06132073630220857829f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.99631261218277800129f,(float16_t)0.08579731234443989385f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.99390697000235606051f,(float16_t)0.11022220729388305938f,
-(float16_t)0.99247953459870996706f,(float16_t)0.12241067519921619566f,
-(float16_t)0.99090263542778000971f,(float16_t)0.13458070850712616773f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98730141815785843473f,(float16_t)0.15885814333386144570f,
-(float16_t)0.98527764238894122162f,(float16_t)0.17096188876030121717f,
-(float16_t)0.98310548743121628501f,(float16_t)0.18303988795514095078f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.97831737071962765473f,(float16_t)0.20711137619221856032f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.97293995220556017678f,(float16_t)0.23105810828067110951f,
-(float16_t)0.97003125319454397424f,(float16_t)0.24298017990326387094f,
-(float16_t)0.96697647104485207059f,(float16_t)0.25486565960451457169f,
-(float16_t)0.96377606579543984022f,(float16_t)0.26671275747489836538f,
-(float16_t)0.96043051941556578655f,(float16_t)0.27851968938505305973f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.95330604035419386211f,(float16_t)0.30200594931922808417f,
-(float16_t)0.94952818059303667475f,(float16_t)0.31368174039889151761f,
-(float16_t)0.94560732538052127971f,(float16_t)0.32531029216226292622f,
-(float16_t)0.94154406518302080631f,(float16_t)0.33688985339222005111f,
-(float16_t)0.93733901191257495977f,(float16_t)0.34841868024943456472f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.92850608047321558924f,(float16_t)0.37131719395183754306f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.91911385169005777040f,(float16_t)0.39399204006104809883f,
-(float16_t)0.91420975570353069095f,(float16_t)0.40524131400498986100f,
-(float16_t)0.90916798309052238025f,(float16_t)0.41642956009763715253f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.89867446569395381673f,(float16_t)0.43861623853852765853f,
-(float16_t)0.89322430119551532446f,(float16_t)0.44961132965460653965f,
-(float16_t)0.88763962040285393496f,(float16_t)0.46053871095824000514f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.87607009419540660122f,(float16_t)0.48218377207912271887f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.86397285612158669643f,(float16_t)0.50353838372571757542f,
-(float16_t)0.85772861000027211809f,(float16_t)0.51410274419322166128f,
-(float16_t)0.85135519310526519554f,(float16_t)0.52458968267846894928f,
-(float16_t)0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)0.83822470555483807875f,(float16_t)0.54532498842204646383f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.82458930278502529099f,(float16_t)0.56573181078361312046f,
-(float16_t)0.81758481315158371139f,(float16_t)0.57580819141784533866f,
-(float16_t)0.81045719825259476821f,(float16_t)0.58579785745643886408f,
-(float16_t)0.80320753148064494287f,(float16_t)0.59569930449243335691f,
-(float16_t)0.79583690460888356633f,(float16_t)0.60551104140432554512f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.78073722857209448822f,(float16_t)0.62485948814238634341f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.76516726562245895860f,(float16_t)0.64383154288979138613f,
-(float16_t)0.75720884650648456748f,(float16_t)0.65317284295377675551f,
-(float16_t)0.74913639452345937020f,(float16_t)0.66241577759017178373f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.73265427167241281570f,(float16_t)0.68060099779545302212f,
-(float16_t)0.72424708295146700276f,(float16_t)0.68954054473706682948f,
-(float16_t)0.71573082528381870571f,(float16_t)0.69837624940897280457f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.69837624940897291559f,(float16_t)0.71573082528381859468f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.68060099779545302212f,(float16_t)0.73265427167241281570f,
-(float16_t)0.67155895484701833009f,(float16_t)0.74095112535495910588f,
-(float16_t)0.66241577759017178373f,(float16_t)0.74913639452345925918f,
-(float16_t)0.65317284295377686654f,(float16_t)0.75720884650648456748f,
-(float16_t)0.64383154288979149715f,(float16_t)0.76516726562245895860f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.62485948814238645443f,(float16_t)0.78073722857209448822f,
-(float16_t)0.61523159058062681925f,(float16_t)0.78834642762660622761f,
-(float16_t)0.60551104140432554512f,(float16_t)0.79583690460888345530f,
-(float16_t)0.59569930449243346793f,(float16_t)0.80320753148064483184f,
-(float16_t)0.58579785745643886408f,(float16_t)0.81045719825259476821f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.56573181078361323149f,(float16_t)0.82458930278502529099f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.54532498842204646383f,(float16_t)0.83822470555483796772f,
-(float16_t)0.53499761988709726435f,(float16_t)0.84485356524970700587f,
-(float16_t)0.52458968267846883826f,(float16_t)0.85135519310526519554f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.50353838372571757542f,(float16_t)0.86397285612158669643f,
-(float16_t)0.49289819222978409341f,(float16_t)0.87008699110871134952f,
-(float16_t)0.48218377207912282989f,(float16_t)0.87607009419540660122f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.46053871095824000514f,(float16_t)0.88763962040285393496f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.43861623853852771404f,(float16_t)0.89867446569395381673f,
-(float16_t)0.42755509343028219593f,(float16_t)0.90398929312344333820f,
-(float16_t)0.41642956009763731906f,(float16_t)0.90916798309052226923f,
-(float16_t)0.40524131400498986100f,(float16_t)0.91420975570353069095f,
-(float16_t)0.39399204006104809883f,(float16_t)0.91911385169005777040f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.37131719395183759858f,(float16_t)0.92850608047321558924f,
-(float16_t)0.35989503653498827740f,(float16_t)0.93299279883473884567f,
-(float16_t)0.34841868024943450921f,(float16_t)0.93733901191257495977f,
-(float16_t)0.33688985339222005111f,(float16_t)0.94154406518302080631f,
-(float16_t)0.32531029216226298173f,(float16_t)0.94560732538052127971f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.30200594931922819519f,(float16_t)0.95330604035419375109f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.27851968938505305973f,(float16_t)0.96043051941556578655f,
-(float16_t)0.26671275747489842090f,(float16_t)0.96377606579543984022f,
-(float16_t)0.25486565960451462720f,(float16_t)0.96697647104485207059f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.23105810828067127605f,(float16_t)0.97293995220556006576f,
-(float16_t)0.21910124015686976984f,(float16_t)0.97570213003852857003f,
-(float16_t)0.20711137619221856032f,(float16_t)0.97831737071962765473f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.18303988795514106180f,(float16_t)0.98310548743121628501f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.15885814333386139019f,(float16_t)0.98730141815785843473f,
-(float16_t)0.14673047445536174793f,(float16_t)0.98917650996478101444f,
-(float16_t)0.13458070850712622324f,(float16_t)0.99090263542778000971f,
-(float16_t)0.12241067519921627893f,(float16_t)0.99247953459870996706f,
-(float16_t)0.11022220729388318428f,(float16_t)0.99390697000235606051f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.08579731234443987997f,(float16_t)0.99631261218277800129f,
-(float16_t)0.07356456359966745406f,(float16_t)0.99729045667869020697f,
-(float16_t)0.06132073630220864768f,(float16_t)0.99811811290014917919f,
-(float16_t)0.04906767432741812596f,(float16_t)0.99879545620517240501f,
-(float16_t)0.03680722294135899131f,(float16_t)0.99932238458834954375f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)0.01227153828571994447f,(float16_t)0.99992470183914450299f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.01227153828571982304f,(float16_t)0.99992470183914450299f,
-(float16_t)-0.02454122852291214241f,(float16_t)0.99969881869620424997f,
-(float16_t)-0.03680722294135886641f,(float16_t)0.99932238458834954375f,
-(float16_t)-0.04906767432741800800f,(float16_t)0.99879545620517240501f,
-(float16_t)-0.06132073630220852972f,(float16_t)0.99811811290014917919f,
-(float16_t)-0.07356456359966732916f,(float16_t)0.99729045667869020697f,
-(float16_t)-0.08579731234443975507f,(float16_t)0.99631261218277800129f,
-(float16_t)-0.09801714032956064526f,(float16_t)0.99518472667219692873f,
-(float16_t)-0.11022220729388305938f,(float16_t)0.99390697000235606051f,
-(float16_t)-0.12241067519921615403f,(float16_t)0.99247953459870996706f,
-(float16_t)-0.13458070850712611222f,(float16_t)0.99090263542778000971f,
-(float16_t)-0.14673047445536163691f,(float16_t)0.98917650996478101444f,
-(float16_t)-0.15885814333386127917f,(float16_t)0.98730141815785843473f,
-(float16_t)-0.17096188876030124493f,(float16_t)0.98527764238894122162f,
-(float16_t)-0.18303988795514092303f,(float16_t)0.98310548743121628501f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.20711137619221844930f,(float16_t)0.97831737071962765473f,
-(float16_t)-0.21910124015686965881f,(float16_t)0.97570213003852857003f,
-(float16_t)-0.23105810828067113727f,(float16_t)0.97293995220556017678f,
-(float16_t)-0.24298017990326387094f,(float16_t)0.97003125319454397424f,
-(float16_t)-0.25486565960451451618f,(float16_t)0.96697647104485207059f,
-(float16_t)-0.26671275747489830987f,(float16_t)0.96377606579543984022f,
-(float16_t)-0.27851968938505294870f,(float16_t)0.96043051941556589757f,
-(float16_t)-0.29028467725446216452f,(float16_t)0.95694033573220893540f,
-(float16_t)-0.30200594931922808417f,(float16_t)0.95330604035419386211f,
-(float16_t)-0.31368174039889140658f,(float16_t)0.94952818059303667475f,
-(float16_t)-0.32531029216226287071f,(float16_t)0.94560732538052139073f,
-(float16_t)-0.33688985339221994009f,(float16_t)0.94154406518302080631f,
-(float16_t)-0.34841868024943439819f,(float16_t)0.93733901191257495977f,
-(float16_t)-0.35989503653498816638f,(float16_t)0.93299279883473884567f,
-(float16_t)-0.37131719395183748755f,(float16_t)0.92850608047321558924f,
-(float16_t)-0.38268343236508972627f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.39399204006104798781f,(float16_t)0.91911385169005777040f,
-(float16_t)-0.40524131400498974998f,(float16_t)0.91420975570353069095f,
-(float16_t)-0.41642956009763698599f,(float16_t)0.90916798309052249127f,
-(float16_t)-0.42755509343028186287f,(float16_t)0.90398929312344344922f,
-(float16_t)-0.43861623853852738097f,(float16_t)0.89867446569395392775f,
-(float16_t)-0.44961132965460670619f,(float16_t)0.89322430119551521344f,
-(float16_t)-0.46053871095824006066f,(float16_t)0.88763962040285393496f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.48218377207912271887f,(float16_t)0.87607009419540660122f,
-(float16_t)-0.49289819222978398239f,(float16_t)0.87008699110871146054f,
-(float16_t)-0.50353838372571746440f,(float16_t)0.86397285612158680745f,
-(float16_t)-0.51410274419322155026f,(float16_t)0.85772861000027211809f,
-(float16_t)-0.52458968267846872724f,(float16_t)0.85135519310526519554f,
-(float16_t)-0.53499761988709704230f,(float16_t)0.84485356524970722791f,
-(float16_t)-0.54532498842204624179f,(float16_t)0.83822470555483818977f,
-(float16_t)-0.55557023301960195560f,(float16_t)0.83146961230254534669f,
-(float16_t)-0.56573181078361323149f,(float16_t)0.82458930278502517996f,
-(float16_t)-0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)-0.58579785745643886408f,(float16_t)0.81045719825259476821f,
-(float16_t)-0.59569930449243335691f,(float16_t)0.80320753148064494287f,
-(float16_t)-0.60551104140432543410f,(float16_t)0.79583690460888356633f,
-(float16_t)-0.61523159058062670823f,(float16_t)0.78834642762660633863f,
-(float16_t)-0.62485948814238623239f,(float16_t)0.78073722857209459924f,
-(float16_t)-0.63439328416364537677f,(float16_t)0.77301045336273710440f,
-(float16_t)-0.64383154288979127511f,(float16_t)0.76516726562245906962f,
-(float16_t)-0.65317284295377653347f,(float16_t)0.75720884650648467851f,
-(float16_t)-0.66241577759017189475f,(float16_t)0.74913639452345925918f,
-(float16_t)-0.67155895484701844111f,(float16_t)0.74095112535495899486f,
-(float16_t)-0.68060099779545302212f,(float16_t)0.73265427167241281570f,
-(float16_t)-0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)-0.69837624940897280457f,(float16_t)0.71573082528381870571f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.71573082528381859468f,(float16_t)0.69837624940897291559f,
-(float16_t)-0.72424708295146678072f,(float16_t)0.68954054473706705153f,
-(float16_t)-0.73265427167241270467f,(float16_t)0.68060099779545324417f,
-(float16_t)-0.74095112535495888384f,(float16_t)0.67155895484701855214f,
-(float16_t)-0.74913639452345914815f,(float16_t)0.66241577759017200577f,
-(float16_t)-0.75720884650648467851f,(float16_t)0.65317284295377664449f,
-(float16_t)-0.76516726562245895860f,(float16_t)0.64383154288979138613f,
-(float16_t)-0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)-0.78073722857209448822f,(float16_t)0.62485948814238634341f,
-(float16_t)-0.78834642762660622761f,(float16_t)0.61523159058062693028f,
-(float16_t)-0.79583690460888345530f,(float16_t)0.60551104140432565615f,
-(float16_t)-0.80320753148064483184f,(float16_t)0.59569930449243346793f,
-(float16_t)-0.81045719825259465718f,(float16_t)0.58579785745643897510f,
-(float16_t)-0.81758481315158360037f,(float16_t)0.57580819141784544968f,
-(float16_t)-0.82458930278502506894f,(float16_t)0.56573181078361345353f,
-(float16_t)-0.83146961230254534669f,(float16_t)0.55557023301960217765f,
-(float16_t)-0.83822470555483807875f,(float16_t)0.54532498842204635281f,
-(float16_t)-0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)-0.85135519310526519554f,(float16_t)0.52458968267846894928f,
-(float16_t)-0.85772861000027200706f,(float16_t)0.51410274419322177231f,
-(float16_t)-0.86397285612158669643f,(float16_t)0.50353838372571757542f,
-(float16_t)-0.87008699110871134952f,(float16_t)0.49289819222978414892f,
-(float16_t)-0.87607009419540649020f,(float16_t)0.48218377207912288540f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.88763962040285382393f,(float16_t)0.46053871095824022719f,
-(float16_t)-0.89322430119551521344f,(float16_t)0.44961132965460687272f,
-(float16_t)-0.89867446569395392775f,(float16_t)0.43861623853852754751f,
-(float16_t)-0.90398929312344333820f,(float16_t)0.42755509343028202940f,
-(float16_t)-0.90916798309052238025f,(float16_t)0.41642956009763715253f,
-(float16_t)-0.91420975570353069095f,(float16_t)0.40524131400498991651f,
-(float16_t)-0.91911385169005777040f,(float16_t)0.39399204006104815434f,
-(float16_t)-0.92387953251128673848f,(float16_t)0.38268343236508989280f,
-(float16_t)-0.92850608047321547822f,(float16_t)0.37131719395183770960f,
-(float16_t)-0.93299279883473884567f,(float16_t)0.35989503653498833291f,
-(float16_t)-0.93733901191257484875f,(float16_t)0.34841868024943478677f,
-(float16_t)-0.94154406518302069529f,(float16_t)0.33688985339222032867f,
-(float16_t)-0.94560732538052116869f,(float16_t)0.32531029216226325929f,
-(float16_t)-0.94952818059303667475f,(float16_t)0.31368174039889140658f,
-(float16_t)-0.95330604035419386211f,(float16_t)0.30200594931922802866f,
-(float16_t)-0.95694033573220882438f,(float16_t)0.29028467725446238656f,
-(float16_t)-0.96043051941556578655f,(float16_t)0.27851968938505317075f,
-(float16_t)-0.96377606579543984022f,(float16_t)0.26671275747489847641f,
-(float16_t)-0.96697647104485207059f,(float16_t)0.25486565960451468271f,
-(float16_t)-0.97003125319454397424f,(float16_t)0.24298017990326406523f,
-(float16_t)-0.97293995220556006576f,(float16_t)0.23105810828067133156f,
-(float16_t)-0.97570213003852845901f,(float16_t)0.21910124015687004739f,
-(float16_t)-0.97831737071962754371f,(float16_t)0.20711137619221883788f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.98310548743121628501f,(float16_t)0.18303988795514089527f,
-(float16_t)-0.98527764238894122162f,(float16_t)0.17096188876030121717f,
-(float16_t)-0.98730141815785843473f,(float16_t)0.15885814333386147346f,
-(float16_t)-0.98917650996478101444f,(float16_t)0.14673047445536180344f,
-(float16_t)-0.99090263542778000971f,(float16_t)0.13458070850712627875f,
-(float16_t)-0.99247953459870996706f,(float16_t)0.12241067519921634832f,
-(float16_t)-0.99390697000235606051f,(float16_t)0.11022220729388323979f,
-(float16_t)-0.99518472667219681771f,(float16_t)0.09801714032956082567f,
-(float16_t)-0.99631261218277800129f,(float16_t)0.08579731234444015753f,
-(float16_t)-0.99729045667869020697f,(float16_t)0.07356456359966773162f,
-(float16_t)-0.99811811290014917919f,(float16_t)0.06132073630220848809f,
-(float16_t)-0.99879545620517240501f,(float16_t)0.04906767432741796636f,
-(float16_t)-0.99932238458834954375f,(float16_t)0.03680722294135883171f,
-(float16_t)-0.99969881869620424997f,(float16_t)0.02454122852291232629f,
-(float16_t)-0.99992470183914450299f,(float16_t)0.01227153828572000692f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99879545620517240501f,(float16_t)0.04906767432741801493f,
-(float16_t)0.99518472667219692873f,(float16_t)0.09801714032956060363f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.97003125319454397424f,(float16_t)0.24298017990326387094f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.94154406518302080631f,(float16_t)0.33688985339222005111f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.88192126434835504956f,(float16_t)0.47139673682599764204f,
-(float16_t)0.85772861000027211809f,(float16_t)0.51410274419322166128f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.80320753148064494287f,(float16_t)0.59569930449243335691f,
-(float16_t)0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.67155895484701833009f,(float16_t)0.74095112535495910588f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.59569930449243346793f,(float16_t)0.80320753148064483184f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.47139673682599780857f,(float16_t)0.88192126434835493853f,
-(float16_t)0.42755509343028219593f,(float16_t)0.90398929312344333820f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.33688985339222005111f,(float16_t)0.94154406518302080631f,
-(float16_t)0.29028467725446233105f,(float16_t)0.95694033573220893540f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.14673047445536174793f,(float16_t)0.98917650996478101444f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.04906767432741812596f,(float16_t)0.99879545620517240501f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.04906767432741800800f,(float16_t)0.99879545620517240501f,
-(float16_t)-0.09801714032956064526f,(float16_t)0.99518472667219692873f,
-(float16_t)-0.14673047445536163691f,(float16_t)0.98917650996478101444f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.24298017990326387094f,(float16_t)0.97003125319454397424f,
-(float16_t)-0.29028467725446216452f,(float16_t)0.95694033573220893540f,
-(float16_t)-0.33688985339221994009f,(float16_t)0.94154406518302080631f,
-(float16_t)-0.38268343236508972627f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.42755509343028186287f,(float16_t)0.90398929312344344922f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.51410274419322155026f,(float16_t)0.85772861000027211809f,
-(float16_t)-0.55557023301960195560f,(float16_t)0.83146961230254534669f,
-(float16_t)-0.59569930449243335691f,(float16_t)0.80320753148064494287f,
-(float16_t)-0.63439328416364537677f,(float16_t)0.77301045336273710440f,
-(float16_t)-0.67155895484701844111f,(float16_t)0.74095112535495899486f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.74095112535495888384f,(float16_t)0.67155895484701855214f,
-(float16_t)-0.77301045336273699338f,(float16_t)0.63439328416364548779f,
-(float16_t)-0.80320753148064483184f,(float16_t)0.59569930449243346793f,
-(float16_t)-0.83146961230254534669f,(float16_t)0.55557023301960217765f,
-(float16_t)-0.85772861000027200706f,(float16_t)0.51410274419322177231f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.90398929312344333820f,(float16_t)0.42755509343028202940f,
-(float16_t)-0.92387953251128673848f,(float16_t)0.38268343236508989280f,
-(float16_t)-0.94154406518302069529f,(float16_t)0.33688985339222032867f,
-(float16_t)-0.95694033573220882438f,(float16_t)0.29028467725446238656f,
-(float16_t)-0.97003125319454397424f,(float16_t)0.24298017990326406523f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.98917650996478101444f,(float16_t)0.14673047445536180344f,
-(float16_t)-0.99518472667219681771f,(float16_t)0.09801714032956082567f,
-(float16_t)-0.99879545620517240501f,(float16_t)0.04906767432741796636f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.98078528040323043058f,(float16_t)0.19509032201612824808f,
-(float16_t)0.92387953251128673848f,(float16_t)0.38268343236508978178f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.55557023301960228867f,(float16_t)0.83146961230254523567f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.19509032201612833135f,(float16_t)0.98078528040323043058f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.38268343236508972627f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.55557023301960195560f,(float16_t)0.83146961230254534669f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.83146961230254534669f,(float16_t)0.55557023301960217765f,
-(float16_t)-0.92387953251128673848f,(float16_t)0.38268343236508989280f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.70710678118654757274f,(float16_t)0.70710678118654757274f,
-(float16_t)0.00000000000000006123f,(float16_t)1.00000000000000000000f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0030670166016f,
+(float16_t)1.0000000000000f,(float16_t)0.0061340332031f,
+(float16_t)1.0000000000000f,(float16_t)0.0092010498047f,
+(float16_t)1.0000000000000f,(float16_t)0.0122680664062f,
+(float16_t)1.0000000000000f,(float16_t)0.0153427124023f,
+(float16_t)1.0000000000000f,(float16_t)0.0184020996094f,
+(float16_t)1.0000000000000f,(float16_t)0.0214691162109f,
+(float16_t)0.9995117187500f,(float16_t)0.0245361328125f,
+(float16_t)0.9995117187500f,(float16_t)0.0276031494141f,
+(float16_t)0.9995117187500f,(float16_t)0.0306701660156f,
+(float16_t)0.9995117187500f,(float16_t)0.0337524414062f,
+(float16_t)0.9995117187500f,(float16_t)0.0368041992188f,
+(float16_t)0.9990234375000f,(float16_t)0.0398864746094f,
+(float16_t)0.9990234375000f,(float16_t)0.0429382324219f,
+(float16_t)0.9990234375000f,(float16_t)0.0459899902344f,
+(float16_t)0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)0.9985351562500f,(float16_t)0.0521240234375f,
+(float16_t)0.9985351562500f,(float16_t)0.0552062988281f,
+(float16_t)0.9985351562500f,(float16_t)0.0582580566406f,
+(float16_t)0.9980468750000f,(float16_t)0.0613098144531f,
+(float16_t)0.9980468750000f,(float16_t)0.0643920898438f,
+(float16_t)0.9975585937500f,(float16_t)0.0674438476562f,
+(float16_t)0.9975585937500f,(float16_t)0.0704956054688f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9970703125000f,(float16_t)0.0765991210938f,
+(float16_t)0.9965820312500f,(float16_t)0.0797119140625f,
+(float16_t)0.9965820312500f,(float16_t)0.0827636718750f,
+(float16_t)0.9960937500000f,(float16_t)0.0858154296875f,
+(float16_t)0.9960937500000f,(float16_t)0.0888671875000f,
+(float16_t)0.9956054687500f,(float16_t)0.0919189453125f,
+(float16_t)0.9956054687500f,(float16_t)0.0949707031250f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9951171875000f,(float16_t)0.1010742187500f,
+(float16_t)0.9946289062500f,(float16_t)0.1041259765625f,
+(float16_t)0.9941406250000f,(float16_t)0.1071777343750f,
+(float16_t)0.9941406250000f,(float16_t)0.1102294921875f,
+(float16_t)0.9936523437500f,(float16_t)0.1132812500000f,
+(float16_t)0.9931640625000f,(float16_t)0.1163330078125f,
+(float16_t)0.9926757812500f,(float16_t)0.1193847656250f,
+(float16_t)0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)0.9921875000000f,(float16_t)0.1254882812500f,
+(float16_t)0.9916992187500f,(float16_t)0.1285400390625f,
+(float16_t)0.9912109375000f,(float16_t)0.1315917968750f,
+(float16_t)0.9907226562500f,(float16_t)0.1345214843750f,
+(float16_t)0.9907226562500f,(float16_t)0.1375732421875f,
+(float16_t)0.9902343750000f,(float16_t)0.1406250000000f,
+(float16_t)0.9897460937500f,(float16_t)0.1436767578125f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9887695312500f,(float16_t)0.1497802734375f,
+(float16_t)0.9882812500000f,(float16_t)0.1528320312500f,
+(float16_t)0.9877929687500f,(float16_t)0.1558837890625f,
+(float16_t)0.9873046875000f,(float16_t)0.1588134765625f,
+(float16_t)0.9868164062500f,(float16_t)0.1618652343750f,
+(float16_t)0.9863281250000f,(float16_t)0.1649169921875f,
+(float16_t)0.9858398437500f,(float16_t)0.1679687500000f,
+(float16_t)0.9853515625000f,(float16_t)0.1710205078125f,
+(float16_t)0.9848632812500f,(float16_t)0.1739501953125f,
+(float16_t)0.9843750000000f,(float16_t)0.1770019531250f,
+(float16_t)0.9838867187500f,(float16_t)0.1800537109375f,
+(float16_t)0.9829101562500f,(float16_t)0.1829833984375f,
+(float16_t)0.9824218750000f,(float16_t)0.1860351562500f,
+(float16_t)0.9819335937500f,(float16_t)0.1890869140625f,
+(float16_t)0.9814453125000f,(float16_t)0.1921386718750f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9799804687500f,(float16_t)0.1981201171875f,
+(float16_t)0.9794921875000f,(float16_t)0.2010498046875f,
+(float16_t)0.9790039062500f,(float16_t)0.2041015625000f,
+(float16_t)0.9785156250000f,(float16_t)0.2071533203125f,
+(float16_t)0.9775390625000f,(float16_t)0.2100830078125f,
+(float16_t)0.9770507812500f,(float16_t)0.2131347656250f,
+(float16_t)0.9765625000000f,(float16_t)0.2160644531250f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9750976562500f,(float16_t)0.2220458984375f,
+(float16_t)0.9741210937500f,(float16_t)0.2250976562500f,
+(float16_t)0.9736328125000f,(float16_t)0.2280273437500f,
+(float16_t)0.9731445312500f,(float16_t)0.2310791015625f,
+(float16_t)0.9721679687500f,(float16_t)0.2340087890625f,
+(float16_t)0.9716796875000f,(float16_t)0.2370605468750f,
+(float16_t)0.9707031250000f,(float16_t)0.2399902343750f,
+(float16_t)0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)0.9692382812500f,(float16_t)0.2459716796875f,
+(float16_t)0.9687500000000f,(float16_t)0.2489013671875f,
+(float16_t)0.9677734375000f,(float16_t)0.2519531250000f,
+(float16_t)0.9667968750000f,(float16_t)0.2548828125000f,
+(float16_t)0.9663085937500f,(float16_t)0.2578125000000f,
+(float16_t)0.9653320312500f,(float16_t)0.2607421875000f,
+(float16_t)0.9643554687500f,(float16_t)0.2636718750000f,
+(float16_t)0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)0.9628906250000f,(float16_t)0.2697753906250f,
+(float16_t)0.9619140625000f,(float16_t)0.2727050781250f,
+(float16_t)0.9614257812500f,(float16_t)0.2756347656250f,
+(float16_t)0.9604492187500f,(float16_t)0.2785644531250f,
+(float16_t)0.9594726562500f,(float16_t)0.2814941406250f,
+(float16_t)0.9584960937500f,(float16_t)0.2844238281250f,
+(float16_t)0.9580078125000f,(float16_t)0.2873535156250f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9560546875000f,(float16_t)0.2932128906250f,
+(float16_t)0.9550781250000f,(float16_t)0.2961425781250f,
+(float16_t)0.9541015625000f,(float16_t)0.2990722656250f,
+(float16_t)0.9531250000000f,(float16_t)0.3020019531250f,
+(float16_t)0.9521484375000f,(float16_t)0.3049316406250f,
+(float16_t)0.9516601562500f,(float16_t)0.3078613281250f,
+(float16_t)0.9506835937500f,(float16_t)0.3107910156250f,
+(float16_t)0.9497070312500f,(float16_t)0.3137207031250f,
+(float16_t)0.9487304687500f,(float16_t)0.3166503906250f,
+(float16_t)0.9477539062500f,(float16_t)0.3195800781250f,
+(float16_t)0.9467773437500f,(float16_t)0.3225097656250f,
+(float16_t)0.9458007812500f,(float16_t)0.3251953125000f,
+(float16_t)0.9448242187500f,(float16_t)0.3281250000000f,
+(float16_t)0.9433593750000f,(float16_t)0.3310546875000f,
+(float16_t)0.9423828125000f,(float16_t)0.3339843750000f,
+(float16_t)0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)0.9404296875000f,(float16_t)0.3398437500000f,
+(float16_t)0.9394531250000f,(float16_t)0.3427734375000f,
+(float16_t)0.9384765625000f,(float16_t)0.3454589843750f,
+(float16_t)0.9375000000000f,(float16_t)0.3483886718750f,
+(float16_t)0.9360351562500f,(float16_t)0.3513183593750f,
+(float16_t)0.9350585937500f,(float16_t)0.3542480468750f,
+(float16_t)0.9340820312500f,(float16_t)0.3569335937500f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9316406250000f,(float16_t)0.3627929687500f,
+(float16_t)0.9306640625000f,(float16_t)0.3657226562500f,
+(float16_t)0.9296875000000f,(float16_t)0.3684082031250f,
+(float16_t)0.9287109375000f,(float16_t)0.3713378906250f,
+(float16_t)0.9272460937500f,(float16_t)0.3742675781250f,
+(float16_t)0.9262695312500f,(float16_t)0.3769531250000f,
+(float16_t)0.9252929687500f,(float16_t)0.3798828125000f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.9228515625000f,(float16_t)0.3854980468750f,
+(float16_t)0.9213867187500f,(float16_t)0.3884277343750f,
+(float16_t)0.9204101562500f,(float16_t)0.3911132812500f,
+(float16_t)0.9189453125000f,(float16_t)0.3940429687500f,
+(float16_t)0.9179687500000f,(float16_t)0.3967285156250f,
+(float16_t)0.9165039062500f,(float16_t)0.3996582031250f,
+(float16_t)0.9155273437500f,(float16_t)0.4023437500000f,
+(float16_t)0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)0.9130859375000f,(float16_t)0.4079589843750f,
+(float16_t)0.9116210937500f,(float16_t)0.4108886718750f,
+(float16_t)0.9106445312500f,(float16_t)0.4135742187500f,
+(float16_t)0.9091796875000f,(float16_t)0.4165039062500f,
+(float16_t)0.9077148437500f,(float16_t)0.4191894531250f,
+(float16_t)0.9067382812500f,(float16_t)0.4221191406250f,
+(float16_t)0.9052734375000f,(float16_t)0.4248046875000f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.9028320312500f,(float16_t)0.4304199218750f,
+(float16_t)0.9013671875000f,(float16_t)0.4331054687500f,
+(float16_t)0.8999023437500f,(float16_t)0.4357910156250f,
+(float16_t)0.8984375000000f,(float16_t)0.4387207031250f,
+(float16_t)0.8974609375000f,(float16_t)0.4414062500000f,
+(float16_t)0.8959960937500f,(float16_t)0.4440917968750f,
+(float16_t)0.8945312500000f,(float16_t)0.4467773437500f,
+(float16_t)0.8930664062500f,(float16_t)0.4497070312500f,
+(float16_t)0.8916015625000f,(float16_t)0.4523925781250f,
+(float16_t)0.8906250000000f,(float16_t)0.4550781250000f,
+(float16_t)0.8891601562500f,(float16_t)0.4577636718750f,
+(float16_t)0.8876953125000f,(float16_t)0.4604492187500f,
+(float16_t)0.8862304687500f,(float16_t)0.4633789062500f,
+(float16_t)0.8847656250000f,(float16_t)0.4660644531250f,
+(float16_t)0.8833007812500f,(float16_t)0.4687500000000f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8803710937500f,(float16_t)0.4741210937500f,
+(float16_t)0.8789062500000f,(float16_t)0.4768066406250f,
+(float16_t)0.8774414062500f,(float16_t)0.4794921875000f,
+(float16_t)0.8759765625000f,(float16_t)0.4821777343750f,
+(float16_t)0.8745117187500f,(float16_t)0.4848632812500f,
+(float16_t)0.8730468750000f,(float16_t)0.4875488281250f,
+(float16_t)0.8715820312500f,(float16_t)0.4902343750000f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8686523437500f,(float16_t)0.4956054687500f,
+(float16_t)0.8671875000000f,(float16_t)0.4982910156250f,
+(float16_t)0.8657226562500f,(float16_t)0.5009765625000f,
+(float16_t)0.8637695312500f,(float16_t)0.5034179687500f,
+(float16_t)0.8623046875000f,(float16_t)0.5063476562500f,
+(float16_t)0.8608398437500f,(float16_t)0.5087890625000f,
+(float16_t)0.8593750000000f,(float16_t)0.5112304687500f,
+(float16_t)0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)0.8559570312500f,(float16_t)0.5166015625000f,
+(float16_t)0.8544921875000f,(float16_t)0.5195312500000f,
+(float16_t)0.8530273437500f,(float16_t)0.5219726562500f,
+(float16_t)0.8515625000000f,(float16_t)0.5244140625000f,
+(float16_t)0.8496093750000f,(float16_t)0.5273437500000f,
+(float16_t)0.8481445312500f,(float16_t)0.5297851562500f,
+(float16_t)0.8466796875000f,(float16_t)0.5322265625000f,
+(float16_t)0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)0.8432617187500f,(float16_t)0.5375976562500f,
+(float16_t)0.8417968750000f,(float16_t)0.5400390625000f,
+(float16_t)0.8398437500000f,(float16_t)0.5429687500000f,
+(float16_t)0.8383789062500f,(float16_t)0.5454101562500f,
+(float16_t)0.8364257812500f,(float16_t)0.5478515625000f,
+(float16_t)0.8349609375000f,(float16_t)0.5502929687500f,
+(float16_t)0.8330078125000f,(float16_t)0.5532226562500f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8295898437500f,(float16_t)0.5581054687500f,
+(float16_t)0.8281250000000f,(float16_t)0.5605468750000f,
+(float16_t)0.8261718750000f,(float16_t)0.5629882812500f,
+(float16_t)0.8247070312500f,(float16_t)0.5659179687500f,
+(float16_t)0.8227539062500f,(float16_t)0.5683593750000f,
+(float16_t)0.8212890625000f,(float16_t)0.5708007812500f,
+(float16_t)0.8193359375000f,(float16_t)0.5732421875000f,
+(float16_t)0.8173828125000f,(float16_t)0.5756835937500f,
+(float16_t)0.8159179687500f,(float16_t)0.5781250000000f,
+(float16_t)0.8139648437500f,(float16_t)0.5810546875000f,
+(float16_t)0.8120117187500f,(float16_t)0.5834960937500f,
+(float16_t)0.8105468750000f,(float16_t)0.5859375000000f,
+(float16_t)0.8085937500000f,(float16_t)0.5883789062500f,
+(float16_t)0.8066406250000f,(float16_t)0.5908203125000f,
+(float16_t)0.8051757812500f,(float16_t)0.5932617187500f,
+(float16_t)0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)0.8012695312500f,(float16_t)0.5981445312500f,
+(float16_t)0.7993164062500f,(float16_t)0.6005859375000f,
+(float16_t)0.7978515625000f,(float16_t)0.6030273437500f,
+(float16_t)0.7958984375000f,(float16_t)0.6054687500000f,
+(float16_t)0.7939453125000f,(float16_t)0.6079101562500f,
+(float16_t)0.7919921875000f,(float16_t)0.6103515625000f,
+(float16_t)0.7900390625000f,(float16_t)0.6127929687500f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7866210937500f,(float16_t)0.6176757812500f,
+(float16_t)0.7846679687500f,(float16_t)0.6201171875000f,
+(float16_t)0.7827148437500f,(float16_t)0.6225585937500f,
+(float16_t)0.7807617187500f,(float16_t)0.6250000000000f,
+(float16_t)0.7788085937500f,(float16_t)0.6274414062500f,
+(float16_t)0.7768554687500f,(float16_t)0.6293945312500f,
+(float16_t)0.7749023437500f,(float16_t)0.6318359375000f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7709960937500f,(float16_t)0.6367187500000f,
+(float16_t)0.7690429687500f,(float16_t)0.6391601562500f,
+(float16_t)0.7670898437500f,(float16_t)0.6416015625000f,
+(float16_t)0.7651367187500f,(float16_t)0.6440429687500f,
+(float16_t)0.7631835937500f,(float16_t)0.6459960937500f,
+(float16_t)0.7612304687500f,(float16_t)0.6484375000000f,
+(float16_t)0.7592773437500f,(float16_t)0.6508789062500f,
+(float16_t)0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)0.7553710937500f,(float16_t)0.6552734375000f,
+(float16_t)0.7534179687500f,(float16_t)0.6577148437500f,
+(float16_t)0.7509765625000f,(float16_t)0.6601562500000f,
+(float16_t)0.7490234375000f,(float16_t)0.6625976562500f,
+(float16_t)0.7470703125000f,(float16_t)0.6645507812500f,
+(float16_t)0.7451171875000f,(float16_t)0.6669921875000f,
+(float16_t)0.7431640625000f,(float16_t)0.6694335937500f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7387695312500f,(float16_t)0.6738281250000f,
+(float16_t)0.7368164062500f,(float16_t)0.6762695312500f,
+(float16_t)0.7348632812500f,(float16_t)0.6782226562500f,
+(float16_t)0.7324218750000f,(float16_t)0.6806640625000f,
+(float16_t)0.7304687500000f,(float16_t)0.6826171875000f,
+(float16_t)0.7285156250000f,(float16_t)0.6850585937500f,
+(float16_t)0.7265625000000f,(float16_t)0.6875000000000f,
+(float16_t)0.7241210937500f,(float16_t)0.6894531250000f,
+(float16_t)0.7221679687500f,(float16_t)0.6918945312500f,
+(float16_t)0.7202148437500f,(float16_t)0.6938476562500f,
+(float16_t)0.7177734375000f,(float16_t)0.6962890625000f,
+(float16_t)0.7158203125000f,(float16_t)0.6982421875000f,
+(float16_t)0.7133789062500f,(float16_t)0.7006835937500f,
+(float16_t)0.7114257812500f,(float16_t)0.7026367187500f,
+(float16_t)0.7094726562500f,(float16_t)0.7050781250000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.7050781250000f,(float16_t)0.7094726562500f,
+(float16_t)0.7026367187500f,(float16_t)0.7114257812500f,
+(float16_t)0.7006835937500f,(float16_t)0.7133789062500f,
+(float16_t)0.6982421875000f,(float16_t)0.7158203125000f,
+(float16_t)0.6962890625000f,(float16_t)0.7177734375000f,
+(float16_t)0.6938476562500f,(float16_t)0.7202148437500f,
+(float16_t)0.6918945312500f,(float16_t)0.7221679687500f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6875000000000f,(float16_t)0.7265625000000f,
+(float16_t)0.6850585937500f,(float16_t)0.7285156250000f,
+(float16_t)0.6826171875000f,(float16_t)0.7304687500000f,
+(float16_t)0.6806640625000f,(float16_t)0.7324218750000f,
+(float16_t)0.6782226562500f,(float16_t)0.7348632812500f,
+(float16_t)0.6762695312500f,(float16_t)0.7368164062500f,
+(float16_t)0.6738281250000f,(float16_t)0.7387695312500f,
+(float16_t)0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)0.6694335937500f,(float16_t)0.7431640625000f,
+(float16_t)0.6669921875000f,(float16_t)0.7451171875000f,
+(float16_t)0.6645507812500f,(float16_t)0.7470703125000f,
+(float16_t)0.6625976562500f,(float16_t)0.7490234375000f,
+(float16_t)0.6601562500000f,(float16_t)0.7509765625000f,
+(float16_t)0.6577148437500f,(float16_t)0.7534179687500f,
+(float16_t)0.6552734375000f,(float16_t)0.7553710937500f,
+(float16_t)0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)0.6508789062500f,(float16_t)0.7592773437500f,
+(float16_t)0.6484375000000f,(float16_t)0.7612304687500f,
+(float16_t)0.6459960937500f,(float16_t)0.7631835937500f,
+(float16_t)0.6440429687500f,(float16_t)0.7651367187500f,
+(float16_t)0.6416015625000f,(float16_t)0.7670898437500f,
+(float16_t)0.6391601562500f,(float16_t)0.7690429687500f,
+(float16_t)0.6367187500000f,(float16_t)0.7709960937500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.6318359375000f,(float16_t)0.7749023437500f,
+(float16_t)0.6293945312500f,(float16_t)0.7768554687500f,
+(float16_t)0.6274414062500f,(float16_t)0.7788085937500f,
+(float16_t)0.6250000000000f,(float16_t)0.7807617187500f,
+(float16_t)0.6225585937500f,(float16_t)0.7827148437500f,
+(float16_t)0.6201171875000f,(float16_t)0.7846679687500f,
+(float16_t)0.6176757812500f,(float16_t)0.7866210937500f,
+(float16_t)0.6152343750000f,(float16_t)0.7885742187500f,
+(float16_t)0.6127929687500f,(float16_t)0.7900390625000f,
+(float16_t)0.6103515625000f,(float16_t)0.7919921875000f,
+(float16_t)0.6079101562500f,(float16_t)0.7939453125000f,
+(float16_t)0.6054687500000f,(float16_t)0.7958984375000f,
+(float16_t)0.6030273437500f,(float16_t)0.7978515625000f,
+(float16_t)0.6005859375000f,(float16_t)0.7993164062500f,
+(float16_t)0.5981445312500f,(float16_t)0.8012695312500f,
+(float16_t)0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)0.5932617187500f,(float16_t)0.8051757812500f,
+(float16_t)0.5908203125000f,(float16_t)0.8066406250000f,
+(float16_t)0.5883789062500f,(float16_t)0.8085937500000f,
+(float16_t)0.5859375000000f,(float16_t)0.8105468750000f,
+(float16_t)0.5834960937500f,(float16_t)0.8120117187500f,
+(float16_t)0.5810546875000f,(float16_t)0.8139648437500f,
+(float16_t)0.5781250000000f,(float16_t)0.8159179687500f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5732421875000f,(float16_t)0.8193359375000f,
+(float16_t)0.5708007812500f,(float16_t)0.8212890625000f,
+(float16_t)0.5683593750000f,(float16_t)0.8227539062500f,
+(float16_t)0.5659179687500f,(float16_t)0.8247070312500f,
+(float16_t)0.5629882812500f,(float16_t)0.8261718750000f,
+(float16_t)0.5605468750000f,(float16_t)0.8281250000000f,
+(float16_t)0.5581054687500f,(float16_t)0.8295898437500f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.5532226562500f,(float16_t)0.8330078125000f,
+(float16_t)0.5502929687500f,(float16_t)0.8349609375000f,
+(float16_t)0.5478515625000f,(float16_t)0.8364257812500f,
+(float16_t)0.5454101562500f,(float16_t)0.8383789062500f,
+(float16_t)0.5429687500000f,(float16_t)0.8398437500000f,
+(float16_t)0.5400390625000f,(float16_t)0.8417968750000f,
+(float16_t)0.5375976562500f,(float16_t)0.8432617187500f,
+(float16_t)0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)0.5322265625000f,(float16_t)0.8466796875000f,
+(float16_t)0.5297851562500f,(float16_t)0.8481445312500f,
+(float16_t)0.5273437500000f,(float16_t)0.8496093750000f,
+(float16_t)0.5244140625000f,(float16_t)0.8515625000000f,
+(float16_t)0.5219726562500f,(float16_t)0.8530273437500f,
+(float16_t)0.5195312500000f,(float16_t)0.8544921875000f,
+(float16_t)0.5166015625000f,(float16_t)0.8559570312500f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.5112304687500f,(float16_t)0.8593750000000f,
+(float16_t)0.5087890625000f,(float16_t)0.8608398437500f,
+(float16_t)0.5063476562500f,(float16_t)0.8623046875000f,
+(float16_t)0.5034179687500f,(float16_t)0.8637695312500f,
+(float16_t)0.5009765625000f,(float16_t)0.8657226562500f,
+(float16_t)0.4982910156250f,(float16_t)0.8671875000000f,
+(float16_t)0.4956054687500f,(float16_t)0.8686523437500f,
+(float16_t)0.4929199218750f,(float16_t)0.8701171875000f,
+(float16_t)0.4902343750000f,(float16_t)0.8715820312500f,
+(float16_t)0.4875488281250f,(float16_t)0.8730468750000f,
+(float16_t)0.4848632812500f,(float16_t)0.8745117187500f,
+(float16_t)0.4821777343750f,(float16_t)0.8759765625000f,
+(float16_t)0.4794921875000f,(float16_t)0.8774414062500f,
+(float16_t)0.4768066406250f,(float16_t)0.8789062500000f,
+(float16_t)0.4741210937500f,(float16_t)0.8803710937500f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.4687500000000f,(float16_t)0.8833007812500f,
+(float16_t)0.4660644531250f,(float16_t)0.8847656250000f,
+(float16_t)0.4633789062500f,(float16_t)0.8862304687500f,
+(float16_t)0.4604492187500f,(float16_t)0.8876953125000f,
+(float16_t)0.4577636718750f,(float16_t)0.8891601562500f,
+(float16_t)0.4550781250000f,(float16_t)0.8906250000000f,
+(float16_t)0.4523925781250f,(float16_t)0.8916015625000f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.4467773437500f,(float16_t)0.8945312500000f,
+(float16_t)0.4440917968750f,(float16_t)0.8959960937500f,
+(float16_t)0.4414062500000f,(float16_t)0.8974609375000f,
+(float16_t)0.4387207031250f,(float16_t)0.8984375000000f,
+(float16_t)0.4357910156250f,(float16_t)0.8999023437500f,
+(float16_t)0.4331054687500f,(float16_t)0.9013671875000f,
+(float16_t)0.4304199218750f,(float16_t)0.9028320312500f,
+(float16_t)0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)0.4248046875000f,(float16_t)0.9052734375000f,
+(float16_t)0.4221191406250f,(float16_t)0.9067382812500f,
+(float16_t)0.4191894531250f,(float16_t)0.9077148437500f,
+(float16_t)0.4165039062500f,(float16_t)0.9091796875000f,
+(float16_t)0.4135742187500f,(float16_t)0.9106445312500f,
+(float16_t)0.4108886718750f,(float16_t)0.9116210937500f,
+(float16_t)0.4079589843750f,(float16_t)0.9130859375000f,
+(float16_t)0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)0.4023437500000f,(float16_t)0.9155273437500f,
+(float16_t)0.3996582031250f,(float16_t)0.9165039062500f,
+(float16_t)0.3967285156250f,(float16_t)0.9179687500000f,
+(float16_t)0.3940429687500f,(float16_t)0.9189453125000f,
+(float16_t)0.3911132812500f,(float16_t)0.9204101562500f,
+(float16_t)0.3884277343750f,(float16_t)0.9213867187500f,
+(float16_t)0.3854980468750f,(float16_t)0.9228515625000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3798828125000f,(float16_t)0.9252929687500f,
+(float16_t)0.3769531250000f,(float16_t)0.9262695312500f,
+(float16_t)0.3742675781250f,(float16_t)0.9272460937500f,
+(float16_t)0.3713378906250f,(float16_t)0.9287109375000f,
+(float16_t)0.3684082031250f,(float16_t)0.9296875000000f,
+(float16_t)0.3657226562500f,(float16_t)0.9306640625000f,
+(float16_t)0.3627929687500f,(float16_t)0.9316406250000f,
+(float16_t)0.3598632812500f,(float16_t)0.9331054687500f,
+(float16_t)0.3569335937500f,(float16_t)0.9340820312500f,
+(float16_t)0.3542480468750f,(float16_t)0.9350585937500f,
+(float16_t)0.3513183593750f,(float16_t)0.9360351562500f,
+(float16_t)0.3483886718750f,(float16_t)0.9375000000000f,
+(float16_t)0.3454589843750f,(float16_t)0.9384765625000f,
+(float16_t)0.3427734375000f,(float16_t)0.9394531250000f,
+(float16_t)0.3398437500000f,(float16_t)0.9404296875000f,
+(float16_t)0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)0.3339843750000f,(float16_t)0.9423828125000f,
+(float16_t)0.3310546875000f,(float16_t)0.9433593750000f,
+(float16_t)0.3281250000000f,(float16_t)0.9448242187500f,
+(float16_t)0.3251953125000f,(float16_t)0.9458007812500f,
+(float16_t)0.3225097656250f,(float16_t)0.9467773437500f,
+(float16_t)0.3195800781250f,(float16_t)0.9477539062500f,
+(float16_t)0.3166503906250f,(float16_t)0.9487304687500f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.3107910156250f,(float16_t)0.9506835937500f,
+(float16_t)0.3078613281250f,(float16_t)0.9516601562500f,
+(float16_t)0.3049316406250f,(float16_t)0.9521484375000f,
+(float16_t)0.3020019531250f,(float16_t)0.9531250000000f,
+(float16_t)0.2990722656250f,(float16_t)0.9541015625000f,
+(float16_t)0.2961425781250f,(float16_t)0.9550781250000f,
+(float16_t)0.2932128906250f,(float16_t)0.9560546875000f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.2873535156250f,(float16_t)0.9580078125000f,
+(float16_t)0.2844238281250f,(float16_t)0.9584960937500f,
+(float16_t)0.2814941406250f,(float16_t)0.9594726562500f,
+(float16_t)0.2785644531250f,(float16_t)0.9604492187500f,
+(float16_t)0.2756347656250f,(float16_t)0.9614257812500f,
+(float16_t)0.2727050781250f,(float16_t)0.9619140625000f,
+(float16_t)0.2697753906250f,(float16_t)0.9628906250000f,
+(float16_t)0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)0.2636718750000f,(float16_t)0.9643554687500f,
+(float16_t)0.2607421875000f,(float16_t)0.9653320312500f,
+(float16_t)0.2578125000000f,(float16_t)0.9663085937500f,
+(float16_t)0.2548828125000f,(float16_t)0.9667968750000f,
+(float16_t)0.2519531250000f,(float16_t)0.9677734375000f,
+(float16_t)0.2489013671875f,(float16_t)0.9687500000000f,
+(float16_t)0.2459716796875f,(float16_t)0.9692382812500f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.2399902343750f,(float16_t)0.9707031250000f,
+(float16_t)0.2370605468750f,(float16_t)0.9716796875000f,
+(float16_t)0.2340087890625f,(float16_t)0.9721679687500f,
+(float16_t)0.2310791015625f,(float16_t)0.9731445312500f,
+(float16_t)0.2280273437500f,(float16_t)0.9736328125000f,
+(float16_t)0.2250976562500f,(float16_t)0.9741210937500f,
+(float16_t)0.2220458984375f,(float16_t)0.9750976562500f,
+(float16_t)0.2191162109375f,(float16_t)0.9755859375000f,
+(float16_t)0.2160644531250f,(float16_t)0.9765625000000f,
+(float16_t)0.2131347656250f,(float16_t)0.9770507812500f,
+(float16_t)0.2100830078125f,(float16_t)0.9775390625000f,
+(float16_t)0.2071533203125f,(float16_t)0.9785156250000f,
+(float16_t)0.2041015625000f,(float16_t)0.9790039062500f,
+(float16_t)0.2010498046875f,(float16_t)0.9794921875000f,
+(float16_t)0.1981201171875f,(float16_t)0.9799804687500f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.1921386718750f,(float16_t)0.9814453125000f,
+(float16_t)0.1890869140625f,(float16_t)0.9819335937500f,
+(float16_t)0.1860351562500f,(float16_t)0.9824218750000f,
+(float16_t)0.1829833984375f,(float16_t)0.9829101562500f,
+(float16_t)0.1800537109375f,(float16_t)0.9838867187500f,
+(float16_t)0.1770019531250f,(float16_t)0.9843750000000f,
+(float16_t)0.1739501953125f,(float16_t)0.9848632812500f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.1679687500000f,(float16_t)0.9858398437500f,
+(float16_t)0.1649169921875f,(float16_t)0.9863281250000f,
+(float16_t)0.1618652343750f,(float16_t)0.9868164062500f,
+(float16_t)0.1588134765625f,(float16_t)0.9873046875000f,
+(float16_t)0.1558837890625f,(float16_t)0.9877929687500f,
+(float16_t)0.1528320312500f,(float16_t)0.9882812500000f,
+(float16_t)0.1497802734375f,(float16_t)0.9887695312500f,
+(float16_t)0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)0.1436767578125f,(float16_t)0.9897460937500f,
+(float16_t)0.1406250000000f,(float16_t)0.9902343750000f,
+(float16_t)0.1375732421875f,(float16_t)0.9907226562500f,
+(float16_t)0.1345214843750f,(float16_t)0.9907226562500f,
+(float16_t)0.1315917968750f,(float16_t)0.9912109375000f,
+(float16_t)0.1285400390625f,(float16_t)0.9916992187500f,
+(float16_t)0.1254882812500f,(float16_t)0.9921875000000f,
+(float16_t)0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)0.1193847656250f,(float16_t)0.9926757812500f,
+(float16_t)0.1163330078125f,(float16_t)0.9931640625000f,
+(float16_t)0.1132812500000f,(float16_t)0.9936523437500f,
+(float16_t)0.1102294921875f,(float16_t)0.9941406250000f,
+(float16_t)0.1071777343750f,(float16_t)0.9941406250000f,
+(float16_t)0.1041259765625f,(float16_t)0.9946289062500f,
+(float16_t)0.1010742187500f,(float16_t)0.9951171875000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0949707031250f,(float16_t)0.9956054687500f,
+(float16_t)0.0919189453125f,(float16_t)0.9956054687500f,
+(float16_t)0.0888671875000f,(float16_t)0.9960937500000f,
+(float16_t)0.0858154296875f,(float16_t)0.9960937500000f,
+(float16_t)0.0827636718750f,(float16_t)0.9965820312500f,
+(float16_t)0.0797119140625f,(float16_t)0.9965820312500f,
+(float16_t)0.0765991210938f,(float16_t)0.9970703125000f,
+(float16_t)0.0735473632812f,(float16_t)0.9970703125000f,
+(float16_t)0.0704956054688f,(float16_t)0.9975585937500f,
+(float16_t)0.0674438476562f,(float16_t)0.9975585937500f,
+(float16_t)0.0643920898438f,(float16_t)0.9980468750000f,
+(float16_t)0.0613098144531f,(float16_t)0.9980468750000f,
+(float16_t)0.0582580566406f,(float16_t)0.9985351562500f,
+(float16_t)0.0552062988281f,(float16_t)0.9985351562500f,
+(float16_t)0.0521240234375f,(float16_t)0.9985351562500f,
+(float16_t)0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)0.0459899902344f,(float16_t)0.9990234375000f,
+(float16_t)0.0429382324219f,(float16_t)0.9990234375000f,
+(float16_t)0.0398864746094f,(float16_t)0.9990234375000f,
+(float16_t)0.0368041992188f,(float16_t)0.9995117187500f,
+(float16_t)0.0337524414062f,(float16_t)0.9995117187500f,
+(float16_t)0.0306701660156f,(float16_t)0.9995117187500f,
+(float16_t)0.0276031494141f,(float16_t)0.9995117187500f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)0.0214691162109f,(float16_t)1.0000000000000f,
+(float16_t)0.0184020996094f,(float16_t)1.0000000000000f,
+(float16_t)0.0153427124023f,(float16_t)1.0000000000000f,
+(float16_t)0.0122680664062f,(float16_t)1.0000000000000f,
+(float16_t)0.0092010498047f,(float16_t)1.0000000000000f,
+(float16_t)0.0061340332031f,(float16_t)1.0000000000000f,
+(float16_t)0.0030670166016f,(float16_t)1.0000000000000f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.0030670166016f,(float16_t)1.0000000000000f,
+(float16_t)-0.0061340332031f,(float16_t)1.0000000000000f,
+(float16_t)-0.0092010498047f,(float16_t)1.0000000000000f,
+(float16_t)-0.0122680664062f,(float16_t)1.0000000000000f,
+(float16_t)-0.0153427124023f,(float16_t)1.0000000000000f,
+(float16_t)-0.0184020996094f,(float16_t)1.0000000000000f,
+(float16_t)-0.0214691162109f,(float16_t)1.0000000000000f,
+(float16_t)-0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)-0.0276031494141f,(float16_t)0.9995117187500f,
+(float16_t)-0.0306701660156f,(float16_t)0.9995117187500f,
+(float16_t)-0.0337524414062f,(float16_t)0.9995117187500f,
+(float16_t)-0.0368041992188f,(float16_t)0.9995117187500f,
+(float16_t)-0.0398864746094f,(float16_t)0.9990234375000f,
+(float16_t)-0.0429382324219f,(float16_t)0.9990234375000f,
+(float16_t)-0.0459899902344f,(float16_t)0.9990234375000f,
+(float16_t)-0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)-0.0521240234375f,(float16_t)0.9985351562500f,
+(float16_t)-0.0552062988281f,(float16_t)0.9985351562500f,
+(float16_t)-0.0582580566406f,(float16_t)0.9985351562500f,
+(float16_t)-0.0613098144531f,(float16_t)0.9980468750000f,
+(float16_t)-0.0643920898438f,(float16_t)0.9980468750000f,
+(float16_t)-0.0674438476562f,(float16_t)0.9975585937500f,
+(float16_t)-0.0704956054688f,(float16_t)0.9975585937500f,
+(float16_t)-0.0735473632812f,(float16_t)0.9970703125000f,
+(float16_t)-0.0765991210938f,(float16_t)0.9970703125000f,
+(float16_t)-0.0797119140625f,(float16_t)0.9965820312500f,
+(float16_t)-0.0827636718750f,(float16_t)0.9965820312500f,
+(float16_t)-0.0858154296875f,(float16_t)0.9960937500000f,
+(float16_t)-0.0888671875000f,(float16_t)0.9960937500000f,
+(float16_t)-0.0919189453125f,(float16_t)0.9956054687500f,
+(float16_t)-0.0949707031250f,(float16_t)0.9956054687500f,
+(float16_t)-0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)-0.1010742187500f,(float16_t)0.9951171875000f,
+(float16_t)-0.1041259765625f,(float16_t)0.9946289062500f,
+(float16_t)-0.1071777343750f,(float16_t)0.9941406250000f,
+(float16_t)-0.1102294921875f,(float16_t)0.9941406250000f,
+(float16_t)-0.1132812500000f,(float16_t)0.9936523437500f,
+(float16_t)-0.1163330078125f,(float16_t)0.9931640625000f,
+(float16_t)-0.1193847656250f,(float16_t)0.9926757812500f,
+(float16_t)-0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)-0.1254882812500f,(float16_t)0.9921875000000f,
+(float16_t)-0.1285400390625f,(float16_t)0.9916992187500f,
+(float16_t)-0.1315917968750f,(float16_t)0.9912109375000f,
+(float16_t)-0.1345214843750f,(float16_t)0.9907226562500f,
+(float16_t)-0.1375732421875f,(float16_t)0.9907226562500f,
+(float16_t)-0.1406250000000f,(float16_t)0.9902343750000f,
+(float16_t)-0.1436767578125f,(float16_t)0.9897460937500f,
+(float16_t)-0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)-0.1497802734375f,(float16_t)0.9887695312500f,
+(float16_t)-0.1528320312500f,(float16_t)0.9882812500000f,
+(float16_t)-0.1558837890625f,(float16_t)0.9877929687500f,
+(float16_t)-0.1588134765625f,(float16_t)0.9873046875000f,
+(float16_t)-0.1618652343750f,(float16_t)0.9868164062500f,
+(float16_t)-0.1649169921875f,(float16_t)0.9863281250000f,
+(float16_t)-0.1679687500000f,(float16_t)0.9858398437500f,
+(float16_t)-0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)-0.1739501953125f,(float16_t)0.9848632812500f,
+(float16_t)-0.1770019531250f,(float16_t)0.9843750000000f,
+(float16_t)-0.1800537109375f,(float16_t)0.9838867187500f,
+(float16_t)-0.1829833984375f,(float16_t)0.9829101562500f,
+(float16_t)-0.1860351562500f,(float16_t)0.9824218750000f,
+(float16_t)-0.1890869140625f,(float16_t)0.9819335937500f,
+(float16_t)-0.1921386718750f,(float16_t)0.9814453125000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.1981201171875f,(float16_t)0.9799804687500f,
+(float16_t)-0.2010498046875f,(float16_t)0.9794921875000f,
+(float16_t)-0.2041015625000f,(float16_t)0.9790039062500f,
+(float16_t)-0.2071533203125f,(float16_t)0.9785156250000f,
+(float16_t)-0.2100830078125f,(float16_t)0.9775390625000f,
+(float16_t)-0.2131347656250f,(float16_t)0.9770507812500f,
+(float16_t)-0.2160644531250f,(float16_t)0.9765625000000f,
+(float16_t)-0.2191162109375f,(float16_t)0.9755859375000f,
+(float16_t)-0.2220458984375f,(float16_t)0.9750976562500f,
+(float16_t)-0.2250976562500f,(float16_t)0.9741210937500f,
+(float16_t)-0.2280273437500f,(float16_t)0.9736328125000f,
+(float16_t)-0.2310791015625f,(float16_t)0.9731445312500f,
+(float16_t)-0.2340087890625f,(float16_t)0.9721679687500f,
+(float16_t)-0.2370605468750f,(float16_t)0.9716796875000f,
+(float16_t)-0.2399902343750f,(float16_t)0.9707031250000f,
+(float16_t)-0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)-0.2459716796875f,(float16_t)0.9692382812500f,
+(float16_t)-0.2489013671875f,(float16_t)0.9687500000000f,
+(float16_t)-0.2519531250000f,(float16_t)0.9677734375000f,
+(float16_t)-0.2548828125000f,(float16_t)0.9667968750000f,
+(float16_t)-0.2578125000000f,(float16_t)0.9663085937500f,
+(float16_t)-0.2607421875000f,(float16_t)0.9653320312500f,
+(float16_t)-0.2636718750000f,(float16_t)0.9643554687500f,
+(float16_t)-0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)-0.2697753906250f,(float16_t)0.9628906250000f,
+(float16_t)-0.2727050781250f,(float16_t)0.9619140625000f,
+(float16_t)-0.2756347656250f,(float16_t)0.9614257812500f,
+(float16_t)-0.2785644531250f,(float16_t)0.9604492187500f,
+(float16_t)-0.2814941406250f,(float16_t)0.9594726562500f,
+(float16_t)-0.2844238281250f,(float16_t)0.9584960937500f,
+(float16_t)-0.2873535156250f,(float16_t)0.9580078125000f,
+(float16_t)-0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)-0.2932128906250f,(float16_t)0.9560546875000f,
+(float16_t)-0.2961425781250f,(float16_t)0.9550781250000f,
+(float16_t)-0.2990722656250f,(float16_t)0.9541015625000f,
+(float16_t)-0.3020019531250f,(float16_t)0.9531250000000f,
+(float16_t)-0.3049316406250f,(float16_t)0.9521484375000f,
+(float16_t)-0.3078613281250f,(float16_t)0.9516601562500f,
+(float16_t)-0.3107910156250f,(float16_t)0.9506835937500f,
+(float16_t)-0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)-0.3166503906250f,(float16_t)0.9487304687500f,
+(float16_t)-0.3195800781250f,(float16_t)0.9477539062500f,
+(float16_t)-0.3225097656250f,(float16_t)0.9467773437500f,
+(float16_t)-0.3251953125000f,(float16_t)0.9458007812500f,
+(float16_t)-0.3281250000000f,(float16_t)0.9448242187500f,
+(float16_t)-0.3310546875000f,(float16_t)0.9433593750000f,
+(float16_t)-0.3339843750000f,(float16_t)0.9423828125000f,
+(float16_t)-0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)-0.3398437500000f,(float16_t)0.9404296875000f,
+(float16_t)-0.3427734375000f,(float16_t)0.9394531250000f,
+(float16_t)-0.3454589843750f,(float16_t)0.9384765625000f,
+(float16_t)-0.3483886718750f,(float16_t)0.9375000000000f,
+(float16_t)-0.3513183593750f,(float16_t)0.9360351562500f,
+(float16_t)-0.3542480468750f,(float16_t)0.9350585937500f,
+(float16_t)-0.3569335937500f,(float16_t)0.9340820312500f,
+(float16_t)-0.3598632812500f,(float16_t)0.9331054687500f,
+(float16_t)-0.3627929687500f,(float16_t)0.9316406250000f,
+(float16_t)-0.3657226562500f,(float16_t)0.9306640625000f,
+(float16_t)-0.3684082031250f,(float16_t)0.9296875000000f,
+(float16_t)-0.3713378906250f,(float16_t)0.9287109375000f,
+(float16_t)-0.3742675781250f,(float16_t)0.9272460937500f,
+(float16_t)-0.3769531250000f,(float16_t)0.9262695312500f,
+(float16_t)-0.3798828125000f,(float16_t)0.9252929687500f,
+(float16_t)-0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.3854980468750f,(float16_t)0.9228515625000f,
+(float16_t)-0.3884277343750f,(float16_t)0.9213867187500f,
+(float16_t)-0.3911132812500f,(float16_t)0.9204101562500f,
+(float16_t)-0.3940429687500f,(float16_t)0.9189453125000f,
+(float16_t)-0.3967285156250f,(float16_t)0.9179687500000f,
+(float16_t)-0.3996582031250f,(float16_t)0.9165039062500f,
+(float16_t)-0.4023437500000f,(float16_t)0.9155273437500f,
+(float16_t)-0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)-0.4079589843750f,(float16_t)0.9130859375000f,
+(float16_t)-0.4108886718750f,(float16_t)0.9116210937500f,
+(float16_t)-0.4135742187500f,(float16_t)0.9106445312500f,
+(float16_t)-0.4165039062500f,(float16_t)0.9091796875000f,
+(float16_t)-0.4191894531250f,(float16_t)0.9077148437500f,
+(float16_t)-0.4221191406250f,(float16_t)0.9067382812500f,
+(float16_t)-0.4248046875000f,(float16_t)0.9052734375000f,
+(float16_t)-0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)-0.4304199218750f,(float16_t)0.9028320312500f,
+(float16_t)-0.4331054687500f,(float16_t)0.9013671875000f,
+(float16_t)-0.4357910156250f,(float16_t)0.8999023437500f,
+(float16_t)-0.4387207031250f,(float16_t)0.8984375000000f,
+(float16_t)-0.4414062500000f,(float16_t)0.8974609375000f,
+(float16_t)-0.4440917968750f,(float16_t)0.8959960937500f,
+(float16_t)-0.4467773437500f,(float16_t)0.8945312500000f,
+(float16_t)-0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)-0.4523925781250f,(float16_t)0.8916015625000f,
+(float16_t)-0.4550781250000f,(float16_t)0.8906250000000f,
+(float16_t)-0.4577636718750f,(float16_t)0.8891601562500f,
+(float16_t)-0.4604492187500f,(float16_t)0.8876953125000f,
+(float16_t)-0.4633789062500f,(float16_t)0.8862304687500f,
+(float16_t)-0.4660644531250f,(float16_t)0.8847656250000f,
+(float16_t)-0.4687500000000f,(float16_t)0.8833007812500f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.4741210937500f,(float16_t)0.8803710937500f,
+(float16_t)-0.4768066406250f,(float16_t)0.8789062500000f,
+(float16_t)-0.4794921875000f,(float16_t)0.8774414062500f,
+(float16_t)-0.4821777343750f,(float16_t)0.8759765625000f,
+(float16_t)-0.4848632812500f,(float16_t)0.8745117187500f,
+(float16_t)-0.4875488281250f,(float16_t)0.8730468750000f,
+(float16_t)-0.4902343750000f,(float16_t)0.8715820312500f,
+(float16_t)-0.4929199218750f,(float16_t)0.8701171875000f,
+(float16_t)-0.4956054687500f,(float16_t)0.8686523437500f,
+(float16_t)-0.4982910156250f,(float16_t)0.8671875000000f,
+(float16_t)-0.5009765625000f,(float16_t)0.8657226562500f,
+(float16_t)-0.5034179687500f,(float16_t)0.8637695312500f,
+(float16_t)-0.5063476562500f,(float16_t)0.8623046875000f,
+(float16_t)-0.5087890625000f,(float16_t)0.8608398437500f,
+(float16_t)-0.5112304687500f,(float16_t)0.8593750000000f,
+(float16_t)-0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)-0.5166015625000f,(float16_t)0.8559570312500f,
+(float16_t)-0.5195312500000f,(float16_t)0.8544921875000f,
+(float16_t)-0.5219726562500f,(float16_t)0.8530273437500f,
+(float16_t)-0.5244140625000f,(float16_t)0.8515625000000f,
+(float16_t)-0.5273437500000f,(float16_t)0.8496093750000f,
+(float16_t)-0.5297851562500f,(float16_t)0.8481445312500f,
+(float16_t)-0.5322265625000f,(float16_t)0.8466796875000f,
+(float16_t)-0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)-0.5375976562500f,(float16_t)0.8432617187500f,
+(float16_t)-0.5400390625000f,(float16_t)0.8417968750000f,
+(float16_t)-0.5429687500000f,(float16_t)0.8398437500000f,
+(float16_t)-0.5454101562500f,(float16_t)0.8383789062500f,
+(float16_t)-0.5478515625000f,(float16_t)0.8364257812500f,
+(float16_t)-0.5502929687500f,(float16_t)0.8349609375000f,
+(float16_t)-0.5532226562500f,(float16_t)0.8330078125000f,
+(float16_t)-0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)-0.5581054687500f,(float16_t)0.8295898437500f,
+(float16_t)-0.5605468750000f,(float16_t)0.8281250000000f,
+(float16_t)-0.5629882812500f,(float16_t)0.8261718750000f,
+(float16_t)-0.5659179687500f,(float16_t)0.8247070312500f,
+(float16_t)-0.5683593750000f,(float16_t)0.8227539062500f,
+(float16_t)-0.5708007812500f,(float16_t)0.8212890625000f,
+(float16_t)-0.5732421875000f,(float16_t)0.8193359375000f,
+(float16_t)-0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)-0.5781250000000f,(float16_t)0.8159179687500f,
+(float16_t)-0.5810546875000f,(float16_t)0.8139648437500f,
+(float16_t)-0.5834960937500f,(float16_t)0.8120117187500f,
+(float16_t)-0.5859375000000f,(float16_t)0.8105468750000f,
+(float16_t)-0.5883789062500f,(float16_t)0.8085937500000f,
+(float16_t)-0.5908203125000f,(float16_t)0.8066406250000f,
+(float16_t)-0.5932617187500f,(float16_t)0.8051757812500f,
+(float16_t)-0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)-0.5981445312500f,(float16_t)0.8012695312500f,
+(float16_t)-0.6005859375000f,(float16_t)0.7993164062500f,
+(float16_t)-0.6030273437500f,(float16_t)0.7978515625000f,
+(float16_t)-0.6054687500000f,(float16_t)0.7958984375000f,
+(float16_t)-0.6079101562500f,(float16_t)0.7939453125000f,
+(float16_t)-0.6103515625000f,(float16_t)0.7919921875000f,
+(float16_t)-0.6127929687500f,(float16_t)0.7900390625000f,
+(float16_t)-0.6152343750000f,(float16_t)0.7885742187500f,
+(float16_t)-0.6176757812500f,(float16_t)0.7866210937500f,
+(float16_t)-0.6201171875000f,(float16_t)0.7846679687500f,
+(float16_t)-0.6225585937500f,(float16_t)0.7827148437500f,
+(float16_t)-0.6250000000000f,(float16_t)0.7807617187500f,
+(float16_t)-0.6274414062500f,(float16_t)0.7788085937500f,
+(float16_t)-0.6293945312500f,(float16_t)0.7768554687500f,
+(float16_t)-0.6318359375000f,(float16_t)0.7749023437500f,
+(float16_t)-0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)-0.6367187500000f,(float16_t)0.7709960937500f,
+(float16_t)-0.6391601562500f,(float16_t)0.7690429687500f,
+(float16_t)-0.6416015625000f,(float16_t)0.7670898437500f,
+(float16_t)-0.6440429687500f,(float16_t)0.7651367187500f,
+(float16_t)-0.6459960937500f,(float16_t)0.7631835937500f,
+(float16_t)-0.6484375000000f,(float16_t)0.7612304687500f,
+(float16_t)-0.6508789062500f,(float16_t)0.7592773437500f,
+(float16_t)-0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)-0.6552734375000f,(float16_t)0.7553710937500f,
+(float16_t)-0.6577148437500f,(float16_t)0.7534179687500f,
+(float16_t)-0.6601562500000f,(float16_t)0.7509765625000f,
+(float16_t)-0.6625976562500f,(float16_t)0.7490234375000f,
+(float16_t)-0.6645507812500f,(float16_t)0.7470703125000f,
+(float16_t)-0.6669921875000f,(float16_t)0.7451171875000f,
+(float16_t)-0.6694335937500f,(float16_t)0.7431640625000f,
+(float16_t)-0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)-0.6738281250000f,(float16_t)0.7387695312500f,
+(float16_t)-0.6762695312500f,(float16_t)0.7368164062500f,
+(float16_t)-0.6782226562500f,(float16_t)0.7348632812500f,
+(float16_t)-0.6806640625000f,(float16_t)0.7324218750000f,
+(float16_t)-0.6826171875000f,(float16_t)0.7304687500000f,
+(float16_t)-0.6850585937500f,(float16_t)0.7285156250000f,
+(float16_t)-0.6875000000000f,(float16_t)0.7265625000000f,
+(float16_t)-0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)-0.6918945312500f,(float16_t)0.7221679687500f,
+(float16_t)-0.6938476562500f,(float16_t)0.7202148437500f,
+(float16_t)-0.6962890625000f,(float16_t)0.7177734375000f,
+(float16_t)-0.6982421875000f,(float16_t)0.7158203125000f,
+(float16_t)-0.7006835937500f,(float16_t)0.7133789062500f,
+(float16_t)-0.7026367187500f,(float16_t)0.7114257812500f,
+(float16_t)-0.7050781250000f,(float16_t)0.7094726562500f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.7094726562500f,(float16_t)0.7050781250000f,
+(float16_t)-0.7114257812500f,(float16_t)0.7026367187500f,
+(float16_t)-0.7133789062500f,(float16_t)0.7006835937500f,
+(float16_t)-0.7158203125000f,(float16_t)0.6982421875000f,
+(float16_t)-0.7177734375000f,(float16_t)0.6962890625000f,
+(float16_t)-0.7202148437500f,(float16_t)0.6938476562500f,
+(float16_t)-0.7221679687500f,(float16_t)0.6918945312500f,
+(float16_t)-0.7241210937500f,(float16_t)0.6894531250000f,
+(float16_t)-0.7265625000000f,(float16_t)0.6875000000000f,
+(float16_t)-0.7285156250000f,(float16_t)0.6850585937500f,
+(float16_t)-0.7304687500000f,(float16_t)0.6826171875000f,
+(float16_t)-0.7324218750000f,(float16_t)0.6806640625000f,
+(float16_t)-0.7348632812500f,(float16_t)0.6782226562500f,
+(float16_t)-0.7368164062500f,(float16_t)0.6762695312500f,
+(float16_t)-0.7387695312500f,(float16_t)0.6738281250000f,
+(float16_t)-0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)-0.7431640625000f,(float16_t)0.6694335937500f,
+(float16_t)-0.7451171875000f,(float16_t)0.6669921875000f,
+(float16_t)-0.7470703125000f,(float16_t)0.6645507812500f,
+(float16_t)-0.7490234375000f,(float16_t)0.6625976562500f,
+(float16_t)-0.7509765625000f,(float16_t)0.6601562500000f,
+(float16_t)-0.7534179687500f,(float16_t)0.6577148437500f,
+(float16_t)-0.7553710937500f,(float16_t)0.6552734375000f,
+(float16_t)-0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)-0.7592773437500f,(float16_t)0.6508789062500f,
+(float16_t)-0.7612304687500f,(float16_t)0.6484375000000f,
+(float16_t)-0.7631835937500f,(float16_t)0.6459960937500f,
+(float16_t)-0.7651367187500f,(float16_t)0.6440429687500f,
+(float16_t)-0.7670898437500f,(float16_t)0.6416015625000f,
+(float16_t)-0.7690429687500f,(float16_t)0.6391601562500f,
+(float16_t)-0.7709960937500f,(float16_t)0.6367187500000f,
+(float16_t)-0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)-0.7749023437500f,(float16_t)0.6318359375000f,
+(float16_t)-0.7768554687500f,(float16_t)0.6293945312500f,
+(float16_t)-0.7788085937500f,(float16_t)0.6274414062500f,
+(float16_t)-0.7807617187500f,(float16_t)0.6250000000000f,
+(float16_t)-0.7827148437500f,(float16_t)0.6225585937500f,
+(float16_t)-0.7846679687500f,(float16_t)0.6201171875000f,
+(float16_t)-0.7866210937500f,(float16_t)0.6176757812500f,
+(float16_t)-0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)-0.7900390625000f,(float16_t)0.6127929687500f,
+(float16_t)-0.7919921875000f,(float16_t)0.6103515625000f,
+(float16_t)-0.7939453125000f,(float16_t)0.6079101562500f,
+(float16_t)-0.7958984375000f,(float16_t)0.6054687500000f,
+(float16_t)-0.7978515625000f,(float16_t)0.6030273437500f,
+(float16_t)-0.7993164062500f,(float16_t)0.6005859375000f,
+(float16_t)-0.8012695312500f,(float16_t)0.5981445312500f,
+(float16_t)-0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)-0.8051757812500f,(float16_t)0.5932617187500f,
+(float16_t)-0.8066406250000f,(float16_t)0.5908203125000f,
+(float16_t)-0.8085937500000f,(float16_t)0.5883789062500f,
+(float16_t)-0.8105468750000f,(float16_t)0.5859375000000f,
+(float16_t)-0.8120117187500f,(float16_t)0.5834960937500f,
+(float16_t)-0.8139648437500f,(float16_t)0.5810546875000f,
+(float16_t)-0.8159179687500f,(float16_t)0.5781250000000f,
+(float16_t)-0.8173828125000f,(float16_t)0.5756835937500f,
+(float16_t)-0.8193359375000f,(float16_t)0.5732421875000f,
+(float16_t)-0.8212890625000f,(float16_t)0.5708007812500f,
+(float16_t)-0.8227539062500f,(float16_t)0.5683593750000f,
+(float16_t)-0.8247070312500f,(float16_t)0.5659179687500f,
+(float16_t)-0.8261718750000f,(float16_t)0.5629882812500f,
+(float16_t)-0.8281250000000f,(float16_t)0.5605468750000f,
+(float16_t)-0.8295898437500f,(float16_t)0.5581054687500f,
+(float16_t)-0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)-0.8330078125000f,(float16_t)0.5532226562500f,
+(float16_t)-0.8349609375000f,(float16_t)0.5502929687500f,
+(float16_t)-0.8364257812500f,(float16_t)0.5478515625000f,
+(float16_t)-0.8383789062500f,(float16_t)0.5454101562500f,
+(float16_t)-0.8398437500000f,(float16_t)0.5429687500000f,
+(float16_t)-0.8417968750000f,(float16_t)0.5400390625000f,
+(float16_t)-0.8432617187500f,(float16_t)0.5375976562500f,
+(float16_t)-0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)-0.8466796875000f,(float16_t)0.5322265625000f,
+(float16_t)-0.8481445312500f,(float16_t)0.5297851562500f,
+(float16_t)-0.8496093750000f,(float16_t)0.5273437500000f,
+(float16_t)-0.8515625000000f,(float16_t)0.5244140625000f,
+(float16_t)-0.8530273437500f,(float16_t)0.5219726562500f,
+(float16_t)-0.8544921875000f,(float16_t)0.5195312500000f,
+(float16_t)-0.8559570312500f,(float16_t)0.5166015625000f,
+(float16_t)-0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)-0.8593750000000f,(float16_t)0.5112304687500f,
+(float16_t)-0.8608398437500f,(float16_t)0.5087890625000f,
+(float16_t)-0.8623046875000f,(float16_t)0.5063476562500f,
+(float16_t)-0.8637695312500f,(float16_t)0.5034179687500f,
+(float16_t)-0.8657226562500f,(float16_t)0.5009765625000f,
+(float16_t)-0.8671875000000f,(float16_t)0.4982910156250f,
+(float16_t)-0.8686523437500f,(float16_t)0.4956054687500f,
+(float16_t)-0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)-0.8715820312500f,(float16_t)0.4902343750000f,
+(float16_t)-0.8730468750000f,(float16_t)0.4875488281250f,
+(float16_t)-0.8745117187500f,(float16_t)0.4848632812500f,
+(float16_t)-0.8759765625000f,(float16_t)0.4821777343750f,
+(float16_t)-0.8774414062500f,(float16_t)0.4794921875000f,
+(float16_t)-0.8789062500000f,(float16_t)0.4768066406250f,
+(float16_t)-0.8803710937500f,(float16_t)0.4741210937500f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.8833007812500f,(float16_t)0.4687500000000f,
+(float16_t)-0.8847656250000f,(float16_t)0.4660644531250f,
+(float16_t)-0.8862304687500f,(float16_t)0.4633789062500f,
+(float16_t)-0.8876953125000f,(float16_t)0.4604492187500f,
+(float16_t)-0.8891601562500f,(float16_t)0.4577636718750f,
+(float16_t)-0.8906250000000f,(float16_t)0.4550781250000f,
+(float16_t)-0.8916015625000f,(float16_t)0.4523925781250f,
+(float16_t)-0.8930664062500f,(float16_t)0.4497070312500f,
+(float16_t)-0.8945312500000f,(float16_t)0.4467773437500f,
+(float16_t)-0.8959960937500f,(float16_t)0.4440917968750f,
+(float16_t)-0.8974609375000f,(float16_t)0.4414062500000f,
+(float16_t)-0.8984375000000f,(float16_t)0.4387207031250f,
+(float16_t)-0.8999023437500f,(float16_t)0.4357910156250f,
+(float16_t)-0.9013671875000f,(float16_t)0.4331054687500f,
+(float16_t)-0.9028320312500f,(float16_t)0.4304199218750f,
+(float16_t)-0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)-0.9052734375000f,(float16_t)0.4248046875000f,
+(float16_t)-0.9067382812500f,(float16_t)0.4221191406250f,
+(float16_t)-0.9077148437500f,(float16_t)0.4191894531250f,
+(float16_t)-0.9091796875000f,(float16_t)0.4165039062500f,
+(float16_t)-0.9106445312500f,(float16_t)0.4135742187500f,
+(float16_t)-0.9116210937500f,(float16_t)0.4108886718750f,
+(float16_t)-0.9130859375000f,(float16_t)0.4079589843750f,
+(float16_t)-0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)-0.9155273437500f,(float16_t)0.4023437500000f,
+(float16_t)-0.9165039062500f,(float16_t)0.3996582031250f,
+(float16_t)-0.9179687500000f,(float16_t)0.3967285156250f,
+(float16_t)-0.9189453125000f,(float16_t)0.3940429687500f,
+(float16_t)-0.9204101562500f,(float16_t)0.3911132812500f,
+(float16_t)-0.9213867187500f,(float16_t)0.3884277343750f,
+(float16_t)-0.9228515625000f,(float16_t)0.3854980468750f,
+(float16_t)-0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)-0.9252929687500f,(float16_t)0.3798828125000f,
+(float16_t)-0.9262695312500f,(float16_t)0.3769531250000f,
+(float16_t)-0.9272460937500f,(float16_t)0.3742675781250f,
+(float16_t)-0.9287109375000f,(float16_t)0.3713378906250f,
+(float16_t)-0.9296875000000f,(float16_t)0.3684082031250f,
+(float16_t)-0.9306640625000f,(float16_t)0.3657226562500f,
+(float16_t)-0.9316406250000f,(float16_t)0.3627929687500f,
+(float16_t)-0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)-0.9340820312500f,(float16_t)0.3569335937500f,
+(float16_t)-0.9350585937500f,(float16_t)0.3542480468750f,
+(float16_t)-0.9360351562500f,(float16_t)0.3513183593750f,
+(float16_t)-0.9375000000000f,(float16_t)0.3483886718750f,
+(float16_t)-0.9384765625000f,(float16_t)0.3454589843750f,
+(float16_t)-0.9394531250000f,(float16_t)0.3427734375000f,
+(float16_t)-0.9404296875000f,(float16_t)0.3398437500000f,
+(float16_t)-0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)-0.9423828125000f,(float16_t)0.3339843750000f,
+(float16_t)-0.9433593750000f,(float16_t)0.3310546875000f,
+(float16_t)-0.9448242187500f,(float16_t)0.3281250000000f,
+(float16_t)-0.9458007812500f,(float16_t)0.3251953125000f,
+(float16_t)-0.9467773437500f,(float16_t)0.3225097656250f,
+(float16_t)-0.9477539062500f,(float16_t)0.3195800781250f,
+(float16_t)-0.9487304687500f,(float16_t)0.3166503906250f,
+(float16_t)-0.9497070312500f,(float16_t)0.3137207031250f,
+(float16_t)-0.9506835937500f,(float16_t)0.3107910156250f,
+(float16_t)-0.9516601562500f,(float16_t)0.3078613281250f,
+(float16_t)-0.9521484375000f,(float16_t)0.3049316406250f,
+(float16_t)-0.9531250000000f,(float16_t)0.3020019531250f,
+(float16_t)-0.9541015625000f,(float16_t)0.2990722656250f,
+(float16_t)-0.9550781250000f,(float16_t)0.2961425781250f,
+(float16_t)-0.9560546875000f,(float16_t)0.2932128906250f,
+(float16_t)-0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)-0.9580078125000f,(float16_t)0.2873535156250f,
+(float16_t)-0.9584960937500f,(float16_t)0.2844238281250f,
+(float16_t)-0.9594726562500f,(float16_t)0.2814941406250f,
+(float16_t)-0.9604492187500f,(float16_t)0.2785644531250f,
+(float16_t)-0.9614257812500f,(float16_t)0.2756347656250f,
+(float16_t)-0.9619140625000f,(float16_t)0.2727050781250f,
+(float16_t)-0.9628906250000f,(float16_t)0.2697753906250f,
+(float16_t)-0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)-0.9643554687500f,(float16_t)0.2636718750000f,
+(float16_t)-0.9653320312500f,(float16_t)0.2607421875000f,
+(float16_t)-0.9663085937500f,(float16_t)0.2578125000000f,
+(float16_t)-0.9667968750000f,(float16_t)0.2548828125000f,
+(float16_t)-0.9677734375000f,(float16_t)0.2519531250000f,
+(float16_t)-0.9687500000000f,(float16_t)0.2489013671875f,
+(float16_t)-0.9692382812500f,(float16_t)0.2459716796875f,
+(float16_t)-0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)-0.9707031250000f,(float16_t)0.2399902343750f,
+(float16_t)-0.9716796875000f,(float16_t)0.2370605468750f,
+(float16_t)-0.9721679687500f,(float16_t)0.2340087890625f,
+(float16_t)-0.9731445312500f,(float16_t)0.2310791015625f,
+(float16_t)-0.9736328125000f,(float16_t)0.2280273437500f,
+(float16_t)-0.9741210937500f,(float16_t)0.2250976562500f,
+(float16_t)-0.9750976562500f,(float16_t)0.2220458984375f,
+(float16_t)-0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)-0.9765625000000f,(float16_t)0.2160644531250f,
+(float16_t)-0.9770507812500f,(float16_t)0.2131347656250f,
+(float16_t)-0.9775390625000f,(float16_t)0.2100830078125f,
+(float16_t)-0.9785156250000f,(float16_t)0.2071533203125f,
+(float16_t)-0.9790039062500f,(float16_t)0.2041015625000f,
+(float16_t)-0.9794921875000f,(float16_t)0.2010498046875f,
+(float16_t)-0.9799804687500f,(float16_t)0.1981201171875f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9814453125000f,(float16_t)0.1921386718750f,
+(float16_t)-0.9819335937500f,(float16_t)0.1890869140625f,
+(float16_t)-0.9824218750000f,(float16_t)0.1860351562500f,
+(float16_t)-0.9829101562500f,(float16_t)0.1829833984375f,
+(float16_t)-0.9838867187500f,(float16_t)0.1800537109375f,
+(float16_t)-0.9843750000000f,(float16_t)0.1770019531250f,
+(float16_t)-0.9848632812500f,(float16_t)0.1739501953125f,
+(float16_t)-0.9853515625000f,(float16_t)0.1710205078125f,
+(float16_t)-0.9858398437500f,(float16_t)0.1679687500000f,
+(float16_t)-0.9863281250000f,(float16_t)0.1649169921875f,
+(float16_t)-0.9868164062500f,(float16_t)0.1618652343750f,
+(float16_t)-0.9873046875000f,(float16_t)0.1588134765625f,
+(float16_t)-0.9877929687500f,(float16_t)0.1558837890625f,
+(float16_t)-0.9882812500000f,(float16_t)0.1528320312500f,
+(float16_t)-0.9887695312500f,(float16_t)0.1497802734375f,
+(float16_t)-0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)-0.9897460937500f,(float16_t)0.1436767578125f,
+(float16_t)-0.9902343750000f,(float16_t)0.1406250000000f,
+(float16_t)-0.9907226562500f,(float16_t)0.1375732421875f,
+(float16_t)-0.9907226562500f,(float16_t)0.1345214843750f,
+(float16_t)-0.9912109375000f,(float16_t)0.1315917968750f,
+(float16_t)-0.9916992187500f,(float16_t)0.1285400390625f,
+(float16_t)-0.9921875000000f,(float16_t)0.1254882812500f,
+(float16_t)-0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)-0.9926757812500f,(float16_t)0.1193847656250f,
+(float16_t)-0.9931640625000f,(float16_t)0.1163330078125f,
+(float16_t)-0.9936523437500f,(float16_t)0.1132812500000f,
+(float16_t)-0.9941406250000f,(float16_t)0.1102294921875f,
+(float16_t)-0.9941406250000f,(float16_t)0.1071777343750f,
+(float16_t)-0.9946289062500f,(float16_t)0.1041259765625f,
+(float16_t)-0.9951171875000f,(float16_t)0.1010742187500f,
+(float16_t)-0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)-0.9956054687500f,(float16_t)0.0949707031250f,
+(float16_t)-0.9956054687500f,(float16_t)0.0919189453125f,
+(float16_t)-0.9960937500000f,(float16_t)0.0888671875000f,
+(float16_t)-0.9960937500000f,(float16_t)0.0858154296875f,
+(float16_t)-0.9965820312500f,(float16_t)0.0827636718750f,
+(float16_t)-0.9965820312500f,(float16_t)0.0797119140625f,
+(float16_t)-0.9970703125000f,(float16_t)0.0765991210938f,
+(float16_t)-0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)-0.9975585937500f,(float16_t)0.0704956054688f,
+(float16_t)-0.9975585937500f,(float16_t)0.0674438476562f,
+(float16_t)-0.9980468750000f,(float16_t)0.0643920898438f,
+(float16_t)-0.9980468750000f,(float16_t)0.0613098144531f,
+(float16_t)-0.9985351562500f,(float16_t)0.0582580566406f,
+(float16_t)-0.9985351562500f,(float16_t)0.0552062988281f,
+(float16_t)-0.9985351562500f,(float16_t)0.0521240234375f,
+(float16_t)-0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)-0.9990234375000f,(float16_t)0.0459899902344f,
+(float16_t)-0.9990234375000f,(float16_t)0.0429382324219f,
+(float16_t)-0.9990234375000f,(float16_t)0.0398864746094f,
+(float16_t)-0.9995117187500f,(float16_t)0.0368041992188f,
+(float16_t)-0.9995117187500f,(float16_t)0.0337524414062f,
+(float16_t)-0.9995117187500f,(float16_t)0.0306701660156f,
+(float16_t)-0.9995117187500f,(float16_t)0.0276031494141f,
+(float16_t)-0.9995117187500f,(float16_t)0.0245361328125f,
+(float16_t)-1.0000000000000f,(float16_t)0.0214691162109f,
+(float16_t)-1.0000000000000f,(float16_t)0.0184020996094f,
+(float16_t)-1.0000000000000f,(float16_t)0.0153427124023f,
+(float16_t)-1.0000000000000f,(float16_t)0.0122680664062f,
+(float16_t)-1.0000000000000f,(float16_t)0.0092010498047f,
+(float16_t)-1.0000000000000f,(float16_t)0.0061340332031f,
+(float16_t)-1.0000000000000f,(float16_t)0.0030670166016f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0122680664062f,
+(float16_t)0.9995117187500f,(float16_t)0.0245361328125f,
+(float16_t)0.9995117187500f,(float16_t)0.0368041992188f,
+(float16_t)0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)0.9980468750000f,(float16_t)0.0613098144531f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9960937500000f,(float16_t)0.0858154296875f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9941406250000f,(float16_t)0.1102294921875f,
+(float16_t)0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)0.9907226562500f,(float16_t)0.1345214843750f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9873046875000f,(float16_t)0.1588134765625f,
+(float16_t)0.9853515625000f,(float16_t)0.1710205078125f,
+(float16_t)0.9829101562500f,(float16_t)0.1829833984375f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9785156250000f,(float16_t)0.2071533203125f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9731445312500f,(float16_t)0.2310791015625f,
+(float16_t)0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)0.9667968750000f,(float16_t)0.2548828125000f,
+(float16_t)0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)0.9604492187500f,(float16_t)0.2785644531250f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9531250000000f,(float16_t)0.3020019531250f,
+(float16_t)0.9497070312500f,(float16_t)0.3137207031250f,
+(float16_t)0.9458007812500f,(float16_t)0.3251953125000f,
+(float16_t)0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)0.9375000000000f,(float16_t)0.3483886718750f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9287109375000f,(float16_t)0.3713378906250f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.9189453125000f,(float16_t)0.3940429687500f,
+(float16_t)0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)0.9091796875000f,(float16_t)0.4165039062500f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.8984375000000f,(float16_t)0.4387207031250f,
+(float16_t)0.8930664062500f,(float16_t)0.4497070312500f,
+(float16_t)0.8876953125000f,(float16_t)0.4604492187500f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8759765625000f,(float16_t)0.4821777343750f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8637695312500f,(float16_t)0.5034179687500f,
+(float16_t)0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)0.8515625000000f,(float16_t)0.5244140625000f,
+(float16_t)0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)0.8383789062500f,(float16_t)0.5454101562500f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8247070312500f,(float16_t)0.5659179687500f,
+(float16_t)0.8173828125000f,(float16_t)0.5756835937500f,
+(float16_t)0.8105468750000f,(float16_t)0.5859375000000f,
+(float16_t)0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)0.7958984375000f,(float16_t)0.6054687500000f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7807617187500f,(float16_t)0.6250000000000f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7651367187500f,(float16_t)0.6440429687500f,
+(float16_t)0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)0.7490234375000f,(float16_t)0.6625976562500f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7324218750000f,(float16_t)0.6806640625000f,
+(float16_t)0.7241210937500f,(float16_t)0.6894531250000f,
+(float16_t)0.7158203125000f,(float16_t)0.6982421875000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.6982421875000f,(float16_t)0.7158203125000f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6806640625000f,(float16_t)0.7324218750000f,
+(float16_t)0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)0.6625976562500f,(float16_t)0.7490234375000f,
+(float16_t)0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)0.6440429687500f,(float16_t)0.7651367187500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.6250000000000f,(float16_t)0.7807617187500f,
+(float16_t)0.6152343750000f,(float16_t)0.7885742187500f,
+(float16_t)0.6054687500000f,(float16_t)0.7958984375000f,
+(float16_t)0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)0.5859375000000f,(float16_t)0.8105468750000f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5659179687500f,(float16_t)0.8247070312500f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.5454101562500f,(float16_t)0.8383789062500f,
+(float16_t)0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)0.5244140625000f,(float16_t)0.8515625000000f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.5034179687500f,(float16_t)0.8637695312500f,
+(float16_t)0.4929199218750f,(float16_t)0.8701171875000f,
+(float16_t)0.4821777343750f,(float16_t)0.8759765625000f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.4604492187500f,(float16_t)0.8876953125000f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.4387207031250f,(float16_t)0.8984375000000f,
+(float16_t)0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)0.4165039062500f,(float16_t)0.9091796875000f,
+(float16_t)0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)0.3940429687500f,(float16_t)0.9189453125000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3713378906250f,(float16_t)0.9287109375000f,
+(float16_t)0.3598632812500f,(float16_t)0.9331054687500f,
+(float16_t)0.3483886718750f,(float16_t)0.9375000000000f,
+(float16_t)0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)0.3251953125000f,(float16_t)0.9458007812500f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.3020019531250f,(float16_t)0.9531250000000f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.2785644531250f,(float16_t)0.9604492187500f,
+(float16_t)0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)0.2548828125000f,(float16_t)0.9667968750000f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.2310791015625f,(float16_t)0.9731445312500f,
+(float16_t)0.2191162109375f,(float16_t)0.9755859375000f,
+(float16_t)0.2071533203125f,(float16_t)0.9785156250000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.1829833984375f,(float16_t)0.9829101562500f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.1588134765625f,(float16_t)0.9873046875000f,
+(float16_t)0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)0.1345214843750f,(float16_t)0.9907226562500f,
+(float16_t)0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)0.1102294921875f,(float16_t)0.9941406250000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0858154296875f,(float16_t)0.9960937500000f,
+(float16_t)0.0735473632812f,(float16_t)0.9970703125000f,
+(float16_t)0.0613098144531f,(float16_t)0.9980468750000f,
+(float16_t)0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)0.0368041992188f,(float16_t)0.9995117187500f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)0.0122680664062f,(float16_t)1.0000000000000f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.0122680664062f,(float16_t)1.0000000000000f,
+(float16_t)-0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)-0.0368041992188f,(float16_t)0.9995117187500f,
+(float16_t)-0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)-0.0613098144531f,(float16_t)0.9980468750000f,
+(float16_t)-0.0735473632812f,(float16_t)0.9970703125000f,
+(float16_t)-0.0858154296875f,(float16_t)0.9960937500000f,
+(float16_t)-0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)-0.1102294921875f,(float16_t)0.9941406250000f,
+(float16_t)-0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)-0.1345214843750f,(float16_t)0.9907226562500f,
+(float16_t)-0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)-0.1588134765625f,(float16_t)0.9873046875000f,
+(float16_t)-0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)-0.1829833984375f,(float16_t)0.9829101562500f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.2071533203125f,(float16_t)0.9785156250000f,
+(float16_t)-0.2191162109375f,(float16_t)0.9755859375000f,
+(float16_t)-0.2310791015625f,(float16_t)0.9731445312500f,
+(float16_t)-0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)-0.2548828125000f,(float16_t)0.9667968750000f,
+(float16_t)-0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)-0.2785644531250f,(float16_t)0.9604492187500f,
+(float16_t)-0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)-0.3020019531250f,(float16_t)0.9531250000000f,
+(float16_t)-0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)-0.3251953125000f,(float16_t)0.9458007812500f,
+(float16_t)-0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)-0.3483886718750f,(float16_t)0.9375000000000f,
+(float16_t)-0.3598632812500f,(float16_t)0.9331054687500f,
+(float16_t)-0.3713378906250f,(float16_t)0.9287109375000f,
+(float16_t)-0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.3940429687500f,(float16_t)0.9189453125000f,
+(float16_t)-0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)-0.4165039062500f,(float16_t)0.9091796875000f,
+(float16_t)-0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)-0.4387207031250f,(float16_t)0.8984375000000f,
+(float16_t)-0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)-0.4604492187500f,(float16_t)0.8876953125000f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.4821777343750f,(float16_t)0.8759765625000f,
+(float16_t)-0.4929199218750f,(float16_t)0.8701171875000f,
+(float16_t)-0.5034179687500f,(float16_t)0.8637695312500f,
+(float16_t)-0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)-0.5244140625000f,(float16_t)0.8515625000000f,
+(float16_t)-0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)-0.5454101562500f,(float16_t)0.8383789062500f,
+(float16_t)-0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)-0.5659179687500f,(float16_t)0.8247070312500f,
+(float16_t)-0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)-0.5859375000000f,(float16_t)0.8105468750000f,
+(float16_t)-0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)-0.6054687500000f,(float16_t)0.7958984375000f,
+(float16_t)-0.6152343750000f,(float16_t)0.7885742187500f,
+(float16_t)-0.6250000000000f,(float16_t)0.7807617187500f,
+(float16_t)-0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)-0.6440429687500f,(float16_t)0.7651367187500f,
+(float16_t)-0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)-0.6625976562500f,(float16_t)0.7490234375000f,
+(float16_t)-0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)-0.6806640625000f,(float16_t)0.7324218750000f,
+(float16_t)-0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)-0.6982421875000f,(float16_t)0.7158203125000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.7158203125000f,(float16_t)0.6982421875000f,
+(float16_t)-0.7241210937500f,(float16_t)0.6894531250000f,
+(float16_t)-0.7324218750000f,(float16_t)0.6806640625000f,
+(float16_t)-0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)-0.7490234375000f,(float16_t)0.6625976562500f,
+(float16_t)-0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)-0.7651367187500f,(float16_t)0.6440429687500f,
+(float16_t)-0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)-0.7807617187500f,(float16_t)0.6250000000000f,
+(float16_t)-0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)-0.7958984375000f,(float16_t)0.6054687500000f,
+(float16_t)-0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)-0.8105468750000f,(float16_t)0.5859375000000f,
+(float16_t)-0.8173828125000f,(float16_t)0.5756835937500f,
+(float16_t)-0.8247070312500f,(float16_t)0.5659179687500f,
+(float16_t)-0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)-0.8383789062500f,(float16_t)0.5454101562500f,
+(float16_t)-0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)-0.8515625000000f,(float16_t)0.5244140625000f,
+(float16_t)-0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)-0.8637695312500f,(float16_t)0.5034179687500f,
+(float16_t)-0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)-0.8759765625000f,(float16_t)0.4821777343750f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.8876953125000f,(float16_t)0.4604492187500f,
+(float16_t)-0.8930664062500f,(float16_t)0.4497070312500f,
+(float16_t)-0.8984375000000f,(float16_t)0.4387207031250f,
+(float16_t)-0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)-0.9091796875000f,(float16_t)0.4165039062500f,
+(float16_t)-0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)-0.9189453125000f,(float16_t)0.3940429687500f,
+(float16_t)-0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)-0.9287109375000f,(float16_t)0.3713378906250f,
+(float16_t)-0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)-0.9375000000000f,(float16_t)0.3483886718750f,
+(float16_t)-0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)-0.9458007812500f,(float16_t)0.3251953125000f,
+(float16_t)-0.9497070312500f,(float16_t)0.3137207031250f,
+(float16_t)-0.9531250000000f,(float16_t)0.3020019531250f,
+(float16_t)-0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)-0.9604492187500f,(float16_t)0.2785644531250f,
+(float16_t)-0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)-0.9667968750000f,(float16_t)0.2548828125000f,
+(float16_t)-0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)-0.9731445312500f,(float16_t)0.2310791015625f,
+(float16_t)-0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)-0.9785156250000f,(float16_t)0.2071533203125f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9829101562500f,(float16_t)0.1829833984375f,
+(float16_t)-0.9853515625000f,(float16_t)0.1710205078125f,
+(float16_t)-0.9873046875000f,(float16_t)0.1588134765625f,
+(float16_t)-0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)-0.9907226562500f,(float16_t)0.1345214843750f,
+(float16_t)-0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)-0.9941406250000f,(float16_t)0.1102294921875f,
+(float16_t)-0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)-0.9960937500000f,(float16_t)0.0858154296875f,
+(float16_t)-0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)-0.9980468750000f,(float16_t)0.0613098144531f,
+(float16_t)-0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)-0.9995117187500f,(float16_t)0.0368041992188f,
+(float16_t)-0.9995117187500f,(float16_t)0.0245361328125f,
+(float16_t)-1.0000000000000f,(float16_t)0.0122680664062f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)-0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)-0.1467285156250f,(float16_t)0.9892578125000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)-0.2902832031250f,(float16_t)0.9570312500000f,
+(float16_t)-0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)-0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.4274902343750f,(float16_t)0.9038085937500f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)-0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)-0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)-0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)-0.6713867187500f,(float16_t)0.7407226562500f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)-0.7729492187500f,(float16_t)0.6342773437500f,
+(float16_t)-0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)-0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)-0.8579101562500f,(float16_t)0.5141601562500f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)-0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)-0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)-0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)-0.9702148437500f,(float16_t)0.2429199218750f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)-0.9951171875000f,(float16_t)0.0980224609375f,
+(float16_t)-0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.5556640625000f,(float16_t)0.8315429687500f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)-0.9238281250000f,(float16_t)0.3825683593750f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)0.0000000000000f,(float16_t)1.0000000000000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,};
 
 float16_t rearranged_twiddle_stride3_4096_f16[2728]={
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99998941108192840321f,(float16_t)0.00460192612044857050f,
-(float16_t)0.99995764455196389786f,(float16_t)0.00920375478205981944f,
-(float16_t)0.99990470108285289808f,(float16_t)0.01380538852806039059f,
-(float16_t)0.99983058179582340319f,(float16_t)0.01840672990580482019f,
-(float16_t)0.99973528826056168306f,(float16_t)0.02300768146883936868f,
-(float16_t)0.99961882249517863830f,(float16_t)0.02760814577896573974f,
-(float16_t)0.99948118696616694567f,(float16_t)0.03220802540830458582f,
-(float16_t)0.99932238458834954375f,(float16_t)0.03680722294135883171f,
-(float16_t)0.99914241872481690532f,(float16_t)0.04140564097707673946f,
-(float16_t)0.99894129318685687124f,(float16_t)0.04600318213091462299f,
-(float16_t)0.99871901223387293811f,(float16_t)0.05059974903689928166f,
-(float16_t)0.99847558057329477421f,(float16_t)0.05519524434968993420f,
-(float16_t)0.99821100336047818846f,(float16_t)0.05978957074663986820f,
-(float16_t)0.99792528619859599548f,(float16_t)0.06438263092985746505f,
-(float16_t)0.99761843513851955478f,(float16_t)0.06897432762826674613f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.99694135776498216117f,(float16_t)0.07815324163279423197f,
-(float16_t)0.99657114579055483539f,(float16_t)0.08274026454937569164f,
-(float16_t)0.99617982859569698117f,(float16_t)0.08732553520619205922f,
-(float16_t)0.99576741446765981713f,(float16_t)0.09190895649713272386f,
-(float16_t)0.99533391214048227980f,(float16_t)0.09649043135525259274f,
-(float16_t)0.99487933079480561638f,(float16_t)0.10106986275482782167f,
-(float16_t)0.99440368005767909576f,(float16_t)0.10564715371341061589f,
-(float16_t)0.99390697000235606051f,(float16_t)0.11022220729388305938f,
-(float16_t)0.99338921114808065305f,(float16_t)0.11479492660651008373f,
-(float16_t)0.99285041445986510489f,(float16_t)0.11936521481099135467f,
-(float16_t)0.99229059134825736699f,(float16_t)0.12393297511851215920f,
-(float16_t)0.99170975366909952520f,(float16_t)0.12849811079379316880f,
-(float16_t)0.99110791372327688986f,(float16_t)0.13306052515713906459f,
-(float16_t)0.99048508425645709341f,(float16_t)0.13762012158648603832f,
-(float16_t)0.98984127845882052821f,(float16_t)0.14217680351944803063f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98849079285269658701f,(float16_t)0.15128103795733022219f,
-(float16_t)0.98778414164457217783f,(float16_t)0.15582839765426523271f,
-(float16_t)0.98705657130575097380f,(float16_t)0.16037245724292828464f,
-(float16_t)0.98630809724459866938f,(float16_t)0.16491312048996989437f,
-(float16_t)0.98553873531217606185f,(float16_t)0.16945029123396795900f,
-(float16_t)0.98474850180190420801f,(float16_t)0.17398387338746382214f,
-(float16_t)0.98393741344921892278f,(float16_t)0.17851377093899750692f,
-(float16_t)0.98310548743121628501f,(float16_t)0.18303988795514095078f,
-(float16_t)0.98225274136628937249f,(float16_t)0.18756212858252960252f,
-(float16_t)0.98137919331375456089f,(float16_t)0.19208039704989243734f,
-(float16_t)0.98048486177346938497f,(float16_t)0.19659459767008022335f,
-(float16_t)0.97956976568544051887f,(float16_t)0.20110463484209190055f,
-(float16_t)0.97863392442942320759f,(float16_t)0.20561041305309923910f,
-(float16_t)0.97767735782450992943f,(float16_t)0.21011183688046961016f,
-(float16_t)0.97670008612871184184f,(float16_t)0.21460881099378675829f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.97468351068851066810f,(float16_t)0.22358902922978998729f,
-(float16_t)0.97364424965081197705f,(float16_t)0.22807208317088573102f,
-(float16_t)0.97258436893473221296f,(float16_t)0.23255030703877524467f,
-(float16_t)0.97150389098625178352f,(float16_t)0.23702360599436719801f,
-(float16_t)0.97040283868755550234f,(float16_t)0.24149188530286933019f,
-(float16_t)0.96928123535654853171f,(float16_t)0.24595505033579459497f,
-(float16_t)0.96813910474636244441f,(float16_t)0.25041300657296522436f,
-(float16_t)0.96697647104485207059f,(float16_t)0.25486565960451457169f,
-(float16_t)0.96579335887408368500f,(float16_t)0.25931291513288623474f,
-(float16_t)0.96458979328981275803f,(float16_t)0.26375467897483134694f,
-(float16_t)0.96336579978095404631f,(float16_t)0.26819085706340317632f,
-(float16_t)0.96212140426904158019f,(float16_t)0.27262135544994897662f,
-(float16_t)0.96085663310767965850f,(float16_t)0.27704608030609989555f,
-(float16_t)0.95957151308198451733f,(float16_t)0.28146493792575794091f,
-(float16_t)0.95826607140801767226f,(float16_t)0.28587783472708061527f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.95559433413077110586f,(float16_t)0.29468537218051432669f,
-(float16_t)0.95422809510910566733f,(float16_t)0.29907982630804047508f,
-(float16_t)0.95284164760119871573f,(float16_t)0.30346794657201131562f,
-(float16_t)0.95143502096900833820f,(float16_t)0.30784964004153486661f,
-(float16_t)0.95000824500184299914f,(float16_t)0.31222481392182488413f,
-(float16_t)0.94856134991573026749f,(float16_t)0.31659337555616584581f,
-(float16_t)0.94709436635277721717f,(float16_t)0.32095523242787521445f,
-(float16_t)0.94560732538052127971f,(float16_t)0.32531029216226292622f,
-(float16_t)0.94410025849127265918f,(float16_t)0.32965846252858749255f,
-(float16_t)0.94257319760144686605f,(float16_t)0.33399965144200938205f,
-(float16_t)0.94102617505088925753f,(float16_t)0.33833376696554112728f,
-(float16_t)0.93945922360218991898f,(float16_t)0.34266071731199437833f,
-(float16_t)0.93787237643998988545f,(float16_t)0.34698041084592368133f,
-(float16_t)0.93626566717027825959f,(float16_t)0.35129275608556709276f,
-(float16_t)0.93463912981968078064f,(float16_t)0.35559766170478385172f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.93132670908118042608f,(float16_t)0.36418478956707989180f,
-(float16_t)0.92964089584318121418f,(float16_t)0.36846682995337232125f,
-(float16_t)0.92793539482261788720f,(float16_t)0.37274106700951575855f,
-(float16_t)0.92621024213831137928f,(float16_t)0.37700741021641825945f,
-(float16_t)0.92446547432526260391f,(float16_t)0.38126576922216237620f,
-(float16_t)0.92270112833387862850f,(float16_t)0.38551605384391884890f,
-(float16_t)0.92091724152918941204f,(float16_t)0.38975817406985641123f,
-(float16_t)0.91911385169005777040f,(float16_t)0.39399204006104809883f,
-(float16_t)0.91729099700837790632f,(float16_t)0.39821756215337356100f,
-(float16_t)0.91544871608826783316f,(float16_t)0.40243465085941843018f,
-(float16_t)0.91358704794525080750f,(float16_t)0.40664321687036902864f,
-(float16_t)0.91170603200542987832f,(float16_t)0.41084317105790391089f,
-(float16_t)0.90980570810465222209f,(float16_t)0.41503442447608163146f,
-(float16_t)0.90788611648766626150f,(float16_t)0.41921688836322390515f,
-(float16_t)0.90594729780726845902f,(float16_t)0.42339047414379604728f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.90201214390249317976f,(float16_t)0.43171065802505725895f,
-(float16_t)0.90001589201616016833f,(float16_t)0.43585707992225547480f,
-(float16_t)0.89800057974073987932f,(float16_t)0.43999427130963325583f,
-(float16_t)0.89596624975618521791f,(float16_t)0.44412214457042920035f,
-(float16_t)0.89391294514520325265f,(float16_t)0.44824061228521988598f,
-(float16_t)0.89184070939234272313f,(float16_t)0.45234958723377088896f,
-(float16_t)0.88974958638307277692f,(float16_t)0.45644898239688391772f,
-(float16_t)0.88763962040285393496f,(float16_t)0.46053871095824000514f,
-(float16_t)0.88551085613619995307f,(float16_t)0.46461868630623781584f,
-(float16_t)0.88336333866573157891f,(float16_t)0.46868882203582790114f,
-(float16_t)0.88119711347122209322f,(float16_t)0.47274903195034279069f,
-(float16_t)0.87901222642863352519f,(float16_t)0.47679923006332208812f,
-(float16_t)0.87680872380914565145f,(float16_t)0.48083933060033395845f,
-(float16_t)0.87458665227817611321f,(float16_t)0.48486924800079106435f,
-(float16_t)0.87234605889439154058f,(float16_t)0.48888889691976317176f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.86780949676330332299f,(float16_t)0.49689704902265446895f,
-(float16_t)0.86551362409056908920f,(float16_t)0.50088538261124071482f,
-(float16_t)0.86319942171212415971f,(float16_t)0.50486310853126759035f,
-(float16_t)0.86086693863776730939f,(float16_t)0.50883014254310698909f,
-(float16_t)0.85851622426444273994f,(float16_t)0.51278640063356295542f,
-(float16_t)0.85614732837519447184f,(float16_t)0.51673179901764987321f,
-(float16_t)0.85376030113811141042f,(float16_t)0.52066625414036715735f,
-(float16_t)0.85135519310526519554f,(float16_t)0.52458968267846894928f,
-(float16_t)0.84893205521163961347f,(float16_t)0.52850200154222848337f,
-(float16_t)0.84649093877405212627f,(float16_t)0.53240312787719790144f,
-(float16_t)0.84403189549006640835f,(float16_t)0.53629297906596318235f,
-(float16_t)0.84155497743689844370f,(float16_t)0.54017147272989285423f,
-(float16_t)0.83906023707031274217f,(float16_t)0.54403852673088382019f,
-(float16_t)0.83654772722351200542f,(float16_t)0.54789405917310018967f,
-(float16_t)0.83401750110601813315f,(float16_t)0.55173798840470733573f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.82890411477186487499f,(float16_t)0.55939071185913613604f,
-(float16_t)0.82632106284566353427f,(float16_t)0.56319934401383409117f,
-(float16_t)0.82372051122739142759f,(float16_t)0.56699604882510867832f,
-(float16_t)0.82110251499110464835f,(float16_t)0.57078074588696725566f,
-(float16_t)0.81846712958029865792f,(float16_t)0.57455335504771576360f,
-(float16_t)0.81581441080673378075f,(float16_t)0.57831379641165558958f,
-(float16_t)0.81314441484925359394f,(float16_t)0.58206199034077543697f,
-(float16_t)0.81045719825259476821f,(float16_t)0.58579785745643886408f,
-(float16_t)0.80775281792619035848f,(float16_t)0.58952131864106394055f,
-(float16_t)0.80503133114296365758f,(float16_t)0.59323229503979979516f,
-(float16_t)0.80229279553811572168f,(float16_t)0.59693070806219639124f,
-(float16_t)0.79953726910790501314f,(float16_t)0.60061647938386897305f,
-(float16_t)0.79676481020841882774f,(float16_t)0.60428953094815596181f,
-(float16_t)0.79397547755433717231f,(float16_t)0.60794978496777363208f,
-(float16_t)0.79116933021769020318f,(float16_t)0.61159716392646190641f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.78550682956405393220f,(float16_t)0.61885298796097631957f,
-(float16_t)0.78265059616657572938f,(float16_t)0.62246127937414996723f,
-(float16_t)0.77977778792301455368f,(float16_t)0.62605638840434352232f,
-(float16_t)0.77688846567323244230f,(float16_t)0.62963823891492698426f,
-(float16_t)0.77398269060682289844f,(float16_t)0.63320675505005719064f,
-(float16_t)0.77106052426181381776f,(float16_t)0.63676186123628419899f,
-(float16_t)0.76812202852336541881f,(float16_t)0.64030348218415167327f,
-(float16_t)0.76516726562245895860f,(float16_t)0.64383154288979138613f,
-(float16_t)0.76219629813457900891f,(float16_t)0.64734596863651205911f,
-(float16_t)0.75920918897838796102f,(float16_t)0.65084668499638087535f,
-(float16_t)0.75620600141439453523f,(float16_t)0.65433361783180044036f,
-(float16_t)0.75318679904361252042f,(float16_t)0.65780669329707863735f,
-(float16_t)0.75015164580621507273f,(float16_t)0.66126583783999226540f,
-(float16_t)0.74710060598018013245f,(float16_t)0.66471097820334479334f,
-(float16_t)0.74403374417992929057f,(float16_t)0.66814204142651845153f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.73785281478846598269f,(float16_t)0.67496164610201192513f,
-(float16_t)0.73473887809596349907f,(float16_t)0.67835004312986146857f,
-(float16_t)0.73160938122389262972f,(float16_t)0.68172407417164970767f,
-(float16_t)0.72846439044822519637f,(float16_t)0.68508366777270035541f,
-(float16_t)0.72530397237306076796f,(float16_t)0.68842875278409043638f,
-(float16_t)0.72212819392921534511f,(float16_t)0.69175925836415774750f,
-(float16_t)0.71893712237280449351f,(float16_t)0.69507511398000088043f,
-(float16_t)0.71573082528381870571f,(float16_t)0.69837624940897280457f,
-(float16_t)0.71250937056469243469f,(float16_t)0.70166259474016845488f,
-(float16_t)0.70927282643886568891f,(float16_t)0.70493408037590488124f,
-(float16_t)0.70602126144933974317f,(float16_t)0.70819063703319540259f,
-(float16_t)0.70275474445722529993f,(float16_t)0.71143219574521643356f,
-(float16_t)0.69947334464028376733f,(float16_t)0.71465868786276909308f,
-(float16_t)0.69617713149146298601f,(float16_t)0.71787004505573170920f,
-(float16_t)0.69286617481742474034f,(float16_t)0.72106619931450810501f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.68620031168003858824f,(float16_t)0.72741262860237576593f,
-(float16_t)0.68284554638524808112f,(float16_t)0.73056276922782759087f,
-(float16_t)0.67947631989936496666f,(float16_t)0.73369743811466026084f,
-(float16_t)0.67609270357531603413f,(float16_t)0.73681656887736979300f,
-(float16_t)0.67269476907077296879f,(float16_t)0.73992009545951609173f,
-(float16_t)0.66928258834663600929f,(float16_t)0.74300795213512171866f,
-(float16_t)0.66585623366550972246f,(float16_t)0.74608007351006366825f,
-(float16_t)0.66241577759017178373f,(float16_t)0.74913639452345925918f,
-(float16_t)0.65896129298203731661f,(float16_t)0.75217685044904269986f,
-(float16_t)0.65549285299961546070f,(float16_t)0.75520137689653654700f,
-(float16_t)0.65201053109695950027f,(float16_t)0.75820990981301528144f,
-(float16_t)0.64851440102211255212f,(float16_t)0.76120238548426177871f,
-(float16_t)0.64500453681554403840f,(float16_t)0.76417874053611667406f,
-(float16_t)0.64148101280858316198f,(float16_t)0.76713891193582040007f,
-(float16_t)0.63794390362184416610f,(float16_t)0.77008283699334789674f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.63082922962842458148f,(float16_t)0.77592169904340757558f,
-(float16_t)0.62725181549514419377f,(float16_t)0.77881651238147586724f,
-(float16_t)0.62366111752569464155f,(float16_t)0.78169483207105938671f,
-(float16_t)0.62005721176328920663f,(float16_t)0.78455659715557524159f,
-(float16_t)0.61644017453085364622f,(float16_t)0.78740174702903131809f,
-(float16_t)0.61281008242940970820f,(float16_t)0.79023022143731003197f,
-(float16_t)0.60916701233645320634f,(float16_t)0.79304196047944364167f,
-(float16_t)0.60551104140432554512f,(float16_t)0.79583690460888345530f,
-(float16_t)0.60184224705858002658f,(float16_t)0.79861499463476082195f,
-(float16_t)0.59816070699634238395f,(float16_t)0.80137617172314012937f,
-(float16_t)0.59446649918466454299f,(float16_t)0.80412037739826569549f,
-(float16_t)0.59075970185887427544f,(float16_t)0.80684755354379922299f,
-(float16_t)0.58704039352091808013f,(float16_t)0.80955764240405125864f,
-(float16_t)0.58330865293769829094f,(float16_t)0.81225058658520388200f,
-(float16_t)0.57956455913940574387f,(float16_t)0.81492632905652662156f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.57203962932475704850f,(float16_t)0.82022598256943468620f,
-(float16_t)0.56825895267013148970f,(float16_t)0.82284978137582631685f,
-(float16_t)0.56446624152051949608f,(float16_t)0.82545615400437744036f,
-(float16_t)0.56066157619733603124f,(float16_t)0.82804504525775579626f,
-(float16_t)0.55684503727516010407f,(float16_t)0.83061640030884620334f,
-(float16_t)0.55301670558002757883f,(float16_t)0.83317016470191318511f,
-(float16_t)0.54917666218771976627f,(float16_t)0.83570628435375260423f,
-(float16_t)0.54532498842204646383f,(float16_t)0.83822470555483796772f,
-(float16_t)0.54146176585312355556f,(float16_t)0.84072537497045796151f,
-(float16_t)0.53758707629564550512f,(float16_t)0.84320823964184543620f,
-(float16_t)0.53370100180715296379f,(float16_t)0.84567324698729906540f,
-(float16_t)0.52980362468629482731f,(float16_t)0.84812034480329712149f,
-(float16_t)0.52589502747108474168f,(float16_t)0.85054948126560336874f,
-(float16_t)0.52197529293715438925f,(float16_t)0.85296060493036363059f,
-(float16_t)0.51804450409599933636f,(float16_t)0.85535366473519602870f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.51015009670676669806f,(float16_t)0.86008539042939025077f,
-(float16_t)0.50618664534515533937f,(float16_t)0.86242395611104050168f,
-(float16_t)0.50221247404571089934f,(float16_t)0.86474425751946237817f,
-(float16_t)0.49822766697278186854f,(float16_t)0.86704624551569264845f,
-(float16_t)0.49423230851595972846f,(float16_t)0.86932987134860673084f,
-(float16_t)0.49022648328829110387f,(float16_t)0.87159508665595109012f,
-(float16_t)0.48621027612448652899f,(float16_t)0.87384184346536675214f,
-(float16_t)0.48218377207912282989f,(float16_t)0.87607009419540660122f,
-(float16_t)0.47814705642484311987f,(float16_t)0.87827979165654146421f,
-(float16_t)0.47410021465055002254f,(float16_t)0.88047088905216075450f,
-(float16_t)0.47004333245959561971f,(float16_t)0.88264333997956279099f,
-(float16_t)0.46597649576796612569f,(float16_t)0.88479709843093778954f,
-(float16_t)0.46189979070246284243f,(float16_t)0.88693211879434208367f,
-(float16_t)0.45781330359887728587f,(float16_t)0.88904835585466457371f,
-(float16_t)0.45371712100016392544f,(float16_t)0.89114576479458318392f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.44549601651398174074f,(float16_t)0.89528392103855758410f,
-(float16_t)0.44137126873171661501f,(float16_t)0.89732458070541831763f,
-(float16_t)0.43723717366104419835f,(float16_t)0.89934623697934146236f,
-(float16_t)0.43309381885315201277f,(float16_t)0.90134884704602202810f,
-(float16_t)0.42894129205532954829f,(float16_t)0.90333236849451181705f,
-(float16_t)0.42477968120910880589f,(float16_t)0.90529675931811881551f,
-(float16_t)0.42060907444840250902f,(float16_t)0.90724197791529592738f,
-(float16_t)0.41642956009763731906f,(float16_t)0.90916798309052226923f,
-(float16_t)0.41224122666988299857f,(float16_t)0.91107473405517624965f,
-(float16_t)0.40804416286497874333f,(float16_t)0.91296219042839810154f,
-(float16_t)0.40383845756765412993f,(float16_t)0.91483031223794608611f,
-(float16_t)0.39962419984564678810f,(float16_t)0.91667905992104270485f,
-(float16_t)0.39540147894781629834f,(float16_t)0.91850839432521225181f,
-(float16_t)0.39117038430225398171f,(float16_t)0.92031827670911048322f,
-(float16_t)0.38693100551438869283f,(float16_t)0.92210866874334507237f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.37842775480876561511f,(float16_t)0.92563083050987271516f,
-(float16_t)0.37416406297145798909f,(float16_t)0.92736252565040111495f,
-(float16_t)0.36989244714893426691f,(float16_t)0.92907458125931574600f,
-(float16_t)0.36561299780477396482f,(float16_t)0.93076696107898371224f,
-(float16_t)0.36132580556845433906f,(float16_t)0.93243962926846235550f,
-(float16_t)0.35703096123343003310f,(float16_t)0.93409255040425887007f,
-(float16_t)0.35272855575521072646f,(float16_t)0.93572568948108036935f,
-(float16_t)0.34841868024943450921f,(float16_t)0.93733901191257495977f,
-(float16_t)0.34410142598993898044f,(float16_t)0.93893248353206448797f,
-(float16_t)0.33977688440682696225f,(float16_t)0.94050607059326829518f,
-(float16_t)0.33544514708453165852f,(float16_t)0.94205973977101731265f,
-(float16_t)0.33110630575987642921f,(float16_t)0.94359345816196038559f,
-(float16_t)0.32676045232013178898f,(float16_t)0.94510719328526060501f,
-(float16_t)0.32240767880107001897f,(float16_t)0.94660091308328353499f,
-(float16_t)0.31804807738501505998f,(float16_t)0.94807458592227622507f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.30930876031226878231f,(float16_t)0.95096166631157508231f,
-(float16_t)0.30492922973540242948f,(float16_t)0.95237501271976587880f,
-(float16_t)0.30054324141727339903f,(float16_t)0.95376818988599032512f,
-(float16_t)0.29615088824362395536f,(float16_t)0.95514116830577067141f,
-(float16_t)0.29175226323498937298f,(float16_t)0.95649391890239499059f,
-(float16_t)0.28734745954472956653f,(float16_t)0.95782641302753290802f,
-(float16_t)0.28293657045705539188f,(float16_t)0.95913862246184189431f,
-(float16_t)0.27851968938505305973f,(float16_t)0.96043051941556578655f,
-(float16_t)0.27409690986870632878f,(float16_t)0.96170207652912254037f,
-(float16_t)0.26966832557291520178f,(float16_t)0.96295326687368387741f,
-(float16_t)0.26523403028551190141f,(float16_t)0.96418406395174571788f,
-(float16_t)0.26079411791527556952f,(float16_t)0.96539444169768939830f,
-(float16_t)0.25634868248994291395f,(float16_t)0.96658437447833311928f,
-(float16_t)0.25189781815421691258f,(float16_t)0.96775383709347551076f,
-(float16_t)0.24744161916777343557f,(float16_t)0.96890280477642887202f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.23851359484431849944f,(float16_t)0.97113915844972509284f,
-(float16_t)0.23404195858354345794f,(float16_t)0.97222649707893626925f,
-(float16_t)0.22956536582051886852f,(float16_t)0.97329324605469824672f,
-(float16_t)0.22508391135979277653f,(float16_t)0.97433938278557585821f,
-(float16_t)0.22059769010887364526f,(float16_t)0.97536488511665686563f,
-(float16_t)0.21610679707621960333f,(float16_t)0.97636973133002114000f,
-(float16_t)0.21161132736922760866f,(float16_t)0.97735390014519996082f,
-(float16_t)0.20711137619221856032f,(float16_t)0.97831737071962765473f,
-(float16_t)0.20260703884442110567f,(float16_t)0.97926012264908202098f,
-(float16_t)0.19809841071795372680f,(float16_t)0.98018213596811731847f,
-(float16_t)0.19358558729580374602f,(float16_t)0.98108339115048659451f,
-(float16_t)0.18906866414980627589f,(float16_t)0.98196386910955524296f,
-(float16_t)0.18454773693861964423f,(float16_t)0.98282355119870523641f,
-(float16_t)0.18002290140569951471f,(float16_t)0.98366241921173025453f,
-(float16_t)0.17549425337727139751f,(float16_t)0.98448045538322093151f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.16642590354046421508f,(float16_t)0.98605396334619543897f,
-(float16_t)0.16188639378011188130f,(float16_t)0.98680940181418541624f,
-(float16_t)0.15734345561623827581f,(float16_t)0.98754394179435922574f,
-(float16_t)0.15279718525844340760f,(float16_t)0.98825756773074946437f,
-(float16_t)0.14824767898689619749f,(float16_t)0.98895026451030298986f,
-(float16_t)0.14369503315029458212f,(float16_t)0.98962201746320077600f,
-(float16_t)0.13913934416382628401f,(float16_t)0.99027281236316910817f,
-(float16_t)0.13458070850712622324f,(float16_t)0.99090263542778000971f,
-(float16_t)0.13001922272223334631f,(float16_t)0.99151147331874389668f,
-(float16_t)0.12545498341154620592f,(float16_t)0.99209931314219179654f,
-(float16_t)0.12088808723577722237f,(float16_t)0.99266614244894801899f,
-(float16_t)0.11631863091190487725f,(float16_t)0.99321194923479450001f,
-(float16_t)0.11174671121112665639f,(float16_t)0.99373672194072459884f,
-(float16_t)0.10717242495680887049f,(float16_t)0.99424044945318790223f,
-(float16_t)0.10259586902243628126f,(float16_t)0.99472312110432570265f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.09343633584574791151f,(float16_t)0.99562525638099430569f,
-(float16_t)0.08885355258252468358f,(float16_t)0.99604470090125196702f,
-(float16_t)0.08426888759332412659f,(float16_t)0.99644305135004263008f,
-(float16_t)0.07968243797143012563f,(float16_t)0.99682029929116566791f,
-(float16_t)0.07509430084792129145f,(float16_t)0.99717643673532618820f,
-(float16_t)0.07050457338961400866f,(float16_t)0.99751145614030345410f,
-(float16_t)0.06591335279700392957f,(float16_t)0.99782535041111164453f,
-(float16_t)0.06132073630220864768f,(float16_t)0.99811811290014917919f,
-(float16_t)0.05672682116690778292f,(float16_t)0.99838973740734016094f,
-(float16_t)0.05213170468028331672f,(float16_t)0.99864021818026527111f,
-(float16_t)0.04753548415695926094f,(float16_t)0.99886954991428356099f,
-(float16_t)0.04293825693494095902f,(float16_t)0.99907772775264536147f,
-(float16_t)0.03834012037355279123f,(float16_t)0.99926474728659442359f,
-(float16_t)0.03374117185137764235f,(float16_t)0.99943060455546173237f,
-(float16_t)0.02914150876419373953f,(float16_t)0.99957529604674921764f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)0.01994042855151459750f,(float16_t)0.99980116988788425569f,
-(float16_t)0.01533920628498821985f,(float16_t)0.99988234745421256111f,
-(float16_t)0.01073765916726457208f,(float16_t)0.99994234967602391162f,
-(float16_t)0.00613588464915451517f,(float16_t)0.99998117528260110909f,
-(float16_t)0.00153398018628476615f,(float16_t)0.99999882345170187925f,
-(float16_t)-0.00306795676296601561f,(float16_t)0.99999529380957619118f,
-(float16_t)-0.00766982873953095477f,(float16_t)0.99997058643097413988f,
-(float16_t)-0.01227153828571982304f,(float16_t)0.99992470183914450299f,
-(float16_t)-0.01687298794728165144f,(float16_t)0.99985764100582386060f,
-(float16_t)-0.02147408027546948359f,(float16_t)0.99976940535121527898f,
-(float16_t)-0.02607471782910391472f,(float16_t)0.99965999674395922270f,
-(float16_t)-0.03067480317663645942f,(float16_t)0.99952941750109314256f,
-(float16_t)-0.03527423889821382219f,(float16_t)0.99937767038800284780f,
-(float16_t)-0.03987292758773972740f,(float16_t)0.99920475861836388631f,
-(float16_t)-0.04447077185493861912f,(float16_t)0.99901068585407337697f,
-(float16_t)-0.04906767432741800800f,(float16_t)0.99879545620517240501f,
-(float16_t)-0.05366353765273055437f,(float16_t)0.99855907422975931365f,
-(float16_t)-0.05825826450043560673f,(float16_t)0.99830154493389289261f,
-(float16_t)-0.06285175756416130910f,(float16_t)0.99802287377148624081f,
-(float16_t)-0.06744391956366398155f,(float16_t)0.99772306664419163624f,
-(float16_t)-0.07203465324688929083f,(float16_t)0.99740212990127530279f,
-(float16_t)-0.07662386139203150592f,(float16_t)0.99706007033948296225f,
-(float16_t)-0.08121144680959226092f,(float16_t)0.99669689520289606044f,
-(float16_t)-0.08579731234443975507f,(float16_t)0.99631261218277800129f,
-(float16_t)-0.09038136087786488582f,(float16_t)0.99590722941741172125f,
-(float16_t)-0.09496349532963895002f,(float16_t)0.99548075549192693856f,
-(float16_t)-0.09954361866006931903f,(float16_t)0.99503319943811863180f,
-(float16_t)-0.10412163387205460030f,(float16_t)0.99456457073425541537f,
-(float16_t)-0.10869744401313856386f,(float16_t)0.99407487930487947736f,
-(float16_t)-0.11327095217756423529f,(float16_t)0.99356413552059530403f,
-(float16_t)-0.11784206150832489401f,(float16_t)0.99303235019785141002f,
-(float16_t)-0.12241067519921615403f,(float16_t)0.99247953459870996706f,
-(float16_t)-0.12697669649688586579f,(float16_t)0.99190570043060932726f,
-(float16_t)-0.13154002870288314386f,(float16_t)0.99131085984611544415f,
-(float16_t)-0.13610057517570606223f,(float16_t)0.99069502544266463406f,
-(float16_t)-0.14065823933284912761f,(float16_t)0.99005821026229712256f,
-(float16_t)-0.14521292465284740825f,(float16_t)0.98940042779138037687f,
-(float16_t)-0.14976453467732150915f,(float16_t)0.98872169196032377858f,
-(float16_t)-0.15431297301302013270f,(float16_t)0.98802201714328352633f,
-(float16_t)-0.15885814333386127917f,(float16_t)0.98730141815785843473f,
-(float16_t)-0.16339994938297311422f,(float16_t)0.98655991026477551920f,
-(float16_t)-0.16793829497473108936f,(float16_t)0.98579750916756747614f,
-(float16_t)-0.17247308399679592283f,(float16_t)0.98501423101223983814f,
-(float16_t)-0.17700422041214874946f,(float16_t)0.98421009238692902521f,
-(float16_t)-0.18153160826112502146f,(float16_t)0.98338511032155118130f,
-(float16_t)-0.18605515166344649414f,(float16_t)0.98253930228744124076f,
-(float16_t)-0.19057475482025265645f,(float16_t)0.98167268619698311305f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.19960175762113094300f,(float16_t)0.97987710369951763756f,
-(float16_t)-0.20410896609281689584f,(float16_t)0.97894817531906219710f,
-(float16_t)-0.20861185197826331850f,(float16_t)0.97799851493455713936f,
-(float16_t)-0.21311031991609125091f,(float16_t)0.97702814265775439484f,
-(float16_t)-0.21760427463848355800f,(float16_t)0.97603707903903913490f,
-(float16_t)-0.22209362097320348162f,(float16_t)0.97502534506699412020f,
-(float16_t)-0.22657826384560997290f,(float16_t)0.97399296216795583359f,
-(float16_t)-0.23105810828067113727f,(float16_t)0.97293995220556017678f,
-(float16_t)-0.23553305940497534787f,(float16_t)0.97186633748027939639f,
-(float16_t)-0.24000302244874138768f,(float16_t)0.97077214072895035013f,
-(float16_t)-0.24446790274782409513f,(float16_t)0.96965738512429244800f,
-(float16_t)-0.24892760574572012078f,(float16_t)0.96852209427441737777f,
-(float16_t)-0.25338203699557015902f,(float16_t)0.96736629222232850545f,
-(float16_t)-0.25783110216215882060f,(float16_t)0.96619000344541261516f,
-(float16_t)-0.26227470702391347812f,(float16_t)0.96499325285492043580f,
-(float16_t)-0.26671275747489830987f,(float16_t)0.96377606579543984022f,
-(float16_t)-0.27114515952680795507f,(float16_t)0.96253846804435916340f,
-(float16_t)-0.27557181931095814376f,(float16_t)0.96128048581132063966f,
-(float16_t)-0.27999264308027327353f,(float16_t)0.96000214573766584625f,
-(float16_t)-0.28440753721127171039f,(float16_t)0.95870347489587159906f,
-(float16_t)-0.28881640820604936870f,(float16_t)0.95738450078897596729f,
-(float16_t)-0.29321916269425857271f,(float16_t)0.95604525134999651659f,
-(float16_t)-0.29761570743508619641f,(float16_t)0.95468575494133833814f,
-(float16_t)-0.30200594931922808417f,(float16_t)0.95330604035419386211f,
-(float16_t)-0.30638979537086097338f,(float16_t)0.95190613680793234597f,
-(float16_t)-0.31076715274961136393f,(float16_t)0.95048607394948181337f,
-(float16_t)-0.31513792875252233383f,(float16_t)0.94904588185270055689f,
-(float16_t)-0.31950203081601563637f,(float16_t)0.94758559101774120226f,
-(float16_t)-0.32385936651785285356f,(float16_t)0.94610523237040344835f,
-(float16_t)-0.32820984357909255280f,(float16_t)0.94460483726148025685f,
-(float16_t)-0.33255336986604405736f,(float16_t)0.94308443746609349478f,
-(float16_t)-0.33688985339221994009f,(float16_t)0.94154406518302080631f,
-(float16_t)-0.34121920232028229991f,(float16_t)0.93998375303401404679f,
-(float16_t)-0.34554132496398903829f,(float16_t)0.93840353406310816897f,
-(float16_t)-0.34985612979013491763f,(float16_t)0.93680344173592156043f,
-(float16_t)-0.35416352542049039931f,(float16_t)0.93518350993894761025f,
-(float16_t)-0.35846342063373642928f,(float16_t)0.93354377297883628373f,
-(float16_t)-0.36275572436739711435f,(float16_t)0.93188426558166814750f,
-(float16_t)-0.36704034571976712487f,(float16_t)0.93020502289221906889f,
-(float16_t)-0.37131719395183748755f,(float16_t)0.92850608047321558924f,
-(float16_t)-0.37558617848921721505f,(float16_t)0.92678747430458174872f,
-(float16_t)-0.37984720892405099413f,(float16_t)0.92504924078267769527f,
-(float16_t)-0.38410019501693493105f,(float16_t)0.92329141671952774661f,
-(float16_t)-0.38834504669882619066f,(float16_t)0.92151403934204201285f,
-(float16_t)-0.39258167407295141427f,(float16_t)0.91971714629122736095f,
-(float16_t)-0.39680998741671030805f,(float16_t)0.91790077562139049672f,
-(float16_t)-0.40102989718357567872f,(float16_t)0.91606496579933172075f,
-(float16_t)-0.40524131400498974998f,(float16_t)0.91420975570353069095f,
-(float16_t)-0.40944414869225753684f,(float16_t)0.91233518462332285903f,
-(float16_t)-0.41363831223843450235f,(float16_t)0.91044129225806724737f,
-(float16_t)-0.41782371582021227141f,(float16_t)0.90852811871630612117f,
-(float16_t)-0.42200027079979968159f,(float16_t)0.90659570451491533483f,
-(float16_t)-0.42616788872679967071f,(float16_t)0.90464409057824612947f,
-(float16_t)-0.43032648134008272267f,(float16_t)0.90267331823725871498f,
-(float16_t)-0.43447596056965581690f,(float16_t)0.90068342922864685907f,
-(float16_t)-0.43861623853852738097f,(float16_t)0.89867446569395392775f,
-(float16_t)-0.44274722756456980077f,(float16_t)0.89664647017868026602f,
-(float16_t)-0.44686884016237399253f,(float16_t)0.89459948563138280697f,
-(float16_t)-0.45098098904510369733f,(float16_t)0.89253355540276468894f,
-(float16_t)-0.45508358712634372489f,(float16_t)0.89044872324475798919f,
-(float16_t)-0.45917654752194403400f,(float16_t)0.88834503330959635470f,
-(float16_t)-0.46325978355186014923f,(float16_t)0.88622253014888063838f,
-(float16_t)-0.46733320874198841510f,(float16_t)0.88408125871263498752f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.47545028174715592284f,(float16_t)0.87974259280004740713f,
-(float16_t)-0.47949375766015311928f,(float16_t)0.87754529020726124156f,
-(float16_t)-0.48352707893291846375f,(float16_t)0.87532940310411100349f,
-(float16_t)-0.48755016014843571837f,(float16_t)0.87309497841829020182f,
-(float16_t)-0.49156291610654972990f,(float16_t)0.87084206347007897531f,
-(float16_t)-0.49556526182577237405f,(float16_t)0.86857070597134100609f,
-(float16_t)-0.49955711254508178287f,(float16_t)0.86628095402451310569f,
-(float16_t)-0.50353838372571746440f,(float16_t)0.86397285612158680745f,
-(float16_t)-0.50750899105297075931f,(float16_t)0.86164646114308141023f,
-(float16_t)-0.51146885043797041259f,(float16_t)0.85930181835700847337f,
-(float16_t)-0.51541787801946303826f,(float16_t)0.85693897741782865118f,
-(float16_t)-0.51935599016558964269f,(float16_t)0.85455798836540053376f,
-(float16_t)-0.52328310347565654137f,(float16_t)0.85215890162391971785f,
-(float16_t)-0.52719913478190105760f,(float16_t)0.84974176800085265970f,
-(float16_t)-0.53110400115125477871f,(float16_t)0.84730663868585853749f,
-(float16_t)-0.53499761988709704230f,(float16_t)0.84485356524970722791f,
-(float16_t)-0.53887990853100831146f,(float16_t)0.84238259964318595863f,
-(float16_t)-0.54275078486451577842f,(float16_t)0.83989379419599952126f,
-(float16_t)-0.54661016691083474939f,(float16_t)0.83738720161566193578f,
-(float16_t)-0.55045797293660470029f,(float16_t)0.83486287498638012128f,
-(float16_t)-0.55429412145362011444f,(float16_t)0.83232086776792968408f,
-(float16_t)-0.55811853122055610221f,(float16_t)0.82976123379452304540f,
-(float16_t)-0.56193112124468946877f,(float16_t)0.82718402727366902027f,
-(float16_t)-0.56573181078361323149f,(float16_t)0.82458930278502517996f,
-(float16_t)-0.56952051934694725155f,(float16_t)0.82197711527924144370f,
-(float16_t)-0.57329716669804198226f,(float16_t)0.81934752007679712005f,
-(float16_t)-0.57706167285567933067f,(float16_t)0.81670057286682795628f,
-(float16_t)-0.58081395809576441547f,(float16_t)0.81403632970594852480f,
-(float16_t)-0.58455394295301521534f,(float16_t)0.81135484701706384048f,
-(float16_t)-0.58828154822264522306f,(float16_t)0.80865618158817509364f,
-(float16_t)-0.59199669496204088137f,(float16_t)0.80594039057117639047f,
-(float16_t)-0.59569930449243335691f,(float16_t)0.80320753148064494287f,
-(float16_t)-0.59938929840056454079f,(float16_t)0.80045766219262282082f,
-(float16_t)-0.60306659854034827539f,(float16_t)0.79769084094339104407f,
-(float16_t)-0.60673112703452458661f,(float16_t)0.79490712632823690154f,
-(float16_t)-0.61038280627630958630f,(float16_t)0.79210657730021227785f,
-(float16_t)-0.61402155893103815831f,(float16_t)0.78928925316888587371f,
-(float16_t)-0.61764730793780375784f,(float16_t)0.78645521359908587833f,
-(float16_t)-0.62125997651108744169f,(float16_t)0.78360451860963831194f,
-(float16_t)-0.62485948814238623239f,(float16_t)0.78073722857209459924f,
-(float16_t)-0.62844576660183260053f,(float16_t)0.77785340420945314754f,
-(float16_t)-0.63201873593980895105f,(float16_t)0.77495310659487393057f,
-(float16_t)-0.63557832048855611440f,(float16_t)0.77203639715038452351f,
-(float16_t)-0.63912444486377573138f,(float16_t)0.76910333764557958780f,
-(float16_t)-0.64265703396622686494f,(float16_t)0.76615399019631280630f,
-(float16_t)-0.64617601298331639459f,(float16_t)0.76318841726338115805f,
-(float16_t)-0.64968130739068330470f,(float16_t)0.76020668165120230952f,
-(float16_t)-0.65317284295377653347f,(float16_t)0.75720884650648467851f,
-(float16_t)-0.65665054572942882505f,(float16_t)0.75419497531688928227f,
-(float16_t)-0.66011434206742036768f,(float16_t)0.75116513190968658975f,
-(float16_t)-0.66356415861203965623f,(float16_t)0.74811938045040371481f,
-(float16_t)-0.66699992230363736034f,(float16_t)0.74505778544146605835f,
-(float16_t)-0.67042156038017308717f,(float16_t)0.74198041172083106787f,
-(float16_t)-0.67382900037875603783f,(float16_t)0.73888732446061522463f,
-(float16_t)-0.67722217013718044587f,(float16_t)0.73577858916571359238f,
-(float16_t)-0.68060099779545302212f,(float16_t)0.73265427167241281570f,
-(float16_t)-0.68396541179731551452f,(float16_t)0.72951443814699701296f,
-(float16_t)-0.68731534089175916336f,(float16_t)0.72635915508434589771f,
-(float16_t)-0.69065071413453438254f,(float16_t)0.72318848930652757101f,
-(float16_t)-0.69397146088965377952f,(float16_t)0.72000250796138176579f,
-(float16_t)-0.69727751083088640449f,(float16_t)0.71680127852109964959f,
-(float16_t)-0.70056879394324822474f,(float16_t)0.71358486878079363525f,
-(float16_t)-0.70384524052448482756f,(float16_t)0.71035334685706241764f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.71035334685706230662f,(float16_t)0.70384524052448504960f,
-(float16_t)-0.71358486878079352422f,(float16_t)0.70056879394324833576f,
-(float16_t)-0.71680127852109953857f,(float16_t)0.69727751083088651551f,
-(float16_t)-0.72000250796138165477f,(float16_t)0.69397146088965389055f,
-(float16_t)-0.72318848930652745999f,(float16_t)0.69065071413453460458f,
-(float16_t)-0.72635915508434578669f,(float16_t)0.68731534089175927438f,
-(float16_t)-0.72951443814699679091f,(float16_t)0.68396541179731562554f,
-(float16_t)-0.73265427167241270467f,(float16_t)0.68060099779545324417f,
-(float16_t)-0.73577858916571337033f,(float16_t)0.67722217013718055689f,
-(float16_t)-0.73888732446061511361f,(float16_t)0.67382900037875614885f,
-(float16_t)-0.74198041172083095685f,(float16_t)0.67042156038017319819f,
-(float16_t)-0.74505778544146594733f,(float16_t)0.66699992230363758239f,
-(float16_t)-0.74811938045040360379f,(float16_t)0.66356415861203976725f,
-(float16_t)-0.75116513190968636771f,(float16_t)0.66011434206742047870f,
-(float16_t)-0.75419497531688917125f,(float16_t)0.65665054572942904709f,
-(float16_t)-0.75720884650648467851f,(float16_t)0.65317284295377664449f,
-(float16_t)-0.76020668165120219850f,(float16_t)0.64968130739068341573f,
-(float16_t)-0.76318841726338115805f,(float16_t)0.64617601298331661663f,
-(float16_t)-0.76615399019631280630f,(float16_t)0.64265703396622708699f,
-(float16_t)-0.76910333764557947678f,(float16_t)0.63912444486377584241f,
-(float16_t)-0.77203639715038441249f,(float16_t)0.63557832048855622542f,
-(float16_t)-0.77495310659487381955f,(float16_t)0.63201873593980906207f,
-(float16_t)-0.77785340420945303652f,(float16_t)0.62844576660183271155f,
-(float16_t)-0.78073722857209448822f,(float16_t)0.62485948814238634341f,
-(float16_t)-0.78360451860963820092f,(float16_t)0.62125997651108755271f,
-(float16_t)-0.78645521359908576731f,(float16_t)0.61764730793780386886f,
-(float16_t)-0.78928925316888576269f,(float16_t)0.61402155893103838036f,
-(float16_t)-0.79210657730021216683f,(float16_t)0.61038280627630969732f,
-(float16_t)-0.79490712632823679051f,(float16_t)0.60673112703452469763f,
-(float16_t)-0.79769084094339093305f,(float16_t)0.60306659854034838641f,
-(float16_t)-0.80045766219262259877f,(float16_t)0.59938929840056465181f,
-(float16_t)-0.80320753148064483184f,(float16_t)0.59569930449243346793f,
-(float16_t)-0.80594039057117627944f,(float16_t)0.59199669496204099239f,
-(float16_t)-0.80865618158817498262f,(float16_t)0.58828154822264533408f,
-(float16_t)-0.81135484701706372945f,(float16_t)0.58455394295301532637f,
-(float16_t)-0.81403632970594841378f,(float16_t)0.58081395809576452649f,
-(float16_t)-0.81670057286682784525f,(float16_t)0.57706167285567944170f,
-(float16_t)-0.81934752007679700903f,(float16_t)0.57329716669804209328f,
-(float16_t)-0.82197711527924133268f,(float16_t)0.56952051934694747359f,
-(float16_t)-0.82458930278502506894f,(float16_t)0.56573181078361345353f,
-(float16_t)-0.82718402727366902027f,(float16_t)0.56193112124468957980f,
-(float16_t)-0.82976123379452293438f,(float16_t)0.55811853122055632426f,
-(float16_t)-0.83232086776792957306f,(float16_t)0.55429412145362022546f,
-(float16_t)-0.83486287498638001026f,(float16_t)0.55045797293660492233f,
-(float16_t)-0.83738720161566182476f,(float16_t)0.54661016691083497143f,
-(float16_t)-0.83989379419599952126f,(float16_t)0.54275078486451588944f,
-(float16_t)-0.84238259964318584760f,(float16_t)0.53887990853100842248f,
-(float16_t)-0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)-0.84730663868585842646f,(float16_t)0.53110400115125488973f,
-(float16_t)-0.84974176800085254868f,(float16_t)0.52719913478190127964f,
-(float16_t)-0.85215890162391960683f,(float16_t)0.52328310347565665239f,
-(float16_t)-0.85455798836540042274f,(float16_t)0.51935599016558975372f,
-(float16_t)-0.85693897741782865118f,(float16_t)0.51541787801946314929f,
-(float16_t)-0.85930181835700836235f,(float16_t)0.51146885043797052361f,
-(float16_t)-0.86164646114308129921f,(float16_t)0.50750899105297098135f,
-(float16_t)-0.86397285612158669643f,(float16_t)0.50353838372571757542f,
-(float16_t)-0.86628095402451299467f,(float16_t)0.49955711254508189390f,
-(float16_t)-0.86857070597134089507f,(float16_t)0.49556526182577254058f,
-(float16_t)-0.87084206347007886428f,(float16_t)0.49156291610654989643f,
-(float16_t)-0.87309497841829009079f,(float16_t)0.48755016014843588490f,
-(float16_t)-0.87532940310411089246f,(float16_t)0.48352707893291863028f,
-(float16_t)-0.87754529020726113053f,(float16_t)0.47949375766015328582f,
-(float16_t)-0.87974259280004729611f,(float16_t)0.47545028174715608937f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.88408125871263487650f,(float16_t)0.46733320874198858164f,
-(float16_t)-0.88622253014888052736f,(float16_t)0.46325978355186031576f,
-(float16_t)-0.88834503330959624368f,(float16_t)0.45917654752194420054f,
-(float16_t)-0.89044872324475787817f,(float16_t)0.45508358712634389143f,
-(float16_t)-0.89253355540276457791f,(float16_t)0.45098098904510386387f,
-(float16_t)-0.89459948563138269595f,(float16_t)0.44686884016237415906f,
-(float16_t)-0.89664647017868026602f,(float16_t)0.44274722756456996731f,
-(float16_t)-0.89867446569395392775f,(float16_t)0.43861623853852754751f,
-(float16_t)-0.90068342922864674804f,(float16_t)0.43447596056965598343f,
-(float16_t)-0.90267331823725871498f,(float16_t)0.43032648134008288920f,
-(float16_t)-0.90464409057824612947f,(float16_t)0.42616788872679983724f,
-(float16_t)-0.90659570451491533483f,(float16_t)0.42200027079979984812f,
-(float16_t)-0.90852811871630612117f,(float16_t)0.41782371582021243794f,
-(float16_t)-0.91044129225806713634f,(float16_t)0.41363831223843466889f,
-(float16_t)-0.91233518462332274801f,(float16_t)0.40944414869225770337f,
-(float16_t)-0.91420975570353069095f,(float16_t)0.40524131400498991651f,
-(float16_t)-0.91606496579933172075f,(float16_t)0.40102989718357562321f,
-(float16_t)-0.91790077562139049672f,(float16_t)0.39680998741671025254f,
-(float16_t)-0.91971714629122736095f,(float16_t)0.39258167407295141427f,
-(float16_t)-0.92151403934204179080f,(float16_t)0.38834504669882657923f,
-(float16_t)-0.92329141671952752457f,(float16_t)0.38410019501693531963f,
-(float16_t)-0.92504924078267747323f,(float16_t)0.37984720892405138271f,
-(float16_t)-0.92678747430458174872f,(float16_t)0.37558617848921738158f,
-(float16_t)-0.92850608047321547822f,(float16_t)0.37131719395183770960f,
-(float16_t)-0.93020502289221906889f,(float16_t)0.36704034571976729140f,
-(float16_t)-0.93188426558166803648f,(float16_t)0.36275572436739728088f,
-(float16_t)-0.93354377297883617270f,(float16_t)0.35846342063373659581f,
-(float16_t)-0.93518350993894761025f,(float16_t)0.35416352542049039931f,
-(float16_t)-0.93680344173592167145f,(float16_t)0.34985612979013486212f,
-(float16_t)-0.93840353406310816897f,(float16_t)0.34554132496398898278f,
-(float16_t)-0.93998375303401382475f,(float16_t)0.34121920232028268849f,
-(float16_t)-0.94154406518302069529f,(float16_t)0.33688985339222032867f,
-(float16_t)-0.94308443746609338376f,(float16_t)0.33255336986604444593f,
-(float16_t)-0.94460483726148014583f,(float16_t)0.32820984357909271933f,
-(float16_t)-0.94610523237040333733f,(float16_t)0.32385936651785302010f,
-(float16_t)-0.94758559101774109124f,(float16_t)0.31950203081601580291f,
-(float16_t)-0.94904588185270055689f,(float16_t)0.31513792875252250036f,
-(float16_t)-0.95048607394948170235f,(float16_t)0.31076715274961153046f,
-(float16_t)-0.95190613680793234597f,(float16_t)0.30638979537086091787f,
-(float16_t)-0.95330604035419386211f,(float16_t)0.30200594931922802866f,
-(float16_t)-0.95468575494133833814f,(float16_t)0.29761570743508614090f,
-(float16_t)-0.95604525134999629454f,(float16_t)0.29321916269425896129f,
-(float16_t)-0.95738450078897585627f,(float16_t)0.28881640820604975728f,
-(float16_t)-0.95870347489587148804f,(float16_t)0.28440753721127209896f,
-(float16_t)-0.96000214573766584625f,(float16_t)0.27999264308027344006f,
-(float16_t)-0.96128048581132063966f,(float16_t)0.27557181931095831029f,
-(float16_t)-0.96253846804435916340f,(float16_t)0.27114515952680812161f,
-(float16_t)-0.96377606579543984022f,(float16_t)0.26671275747489847641f,
-(float16_t)-0.96499325285492032478f,(float16_t)0.26227470702391370017f,
-(float16_t)-0.96619000344541250413f,(float16_t)0.25783110216215898713f,
-(float16_t)-0.96736629222232850545f,(float16_t)0.25338203699557010351f,
-(float16_t)-0.96852209427441737777f,(float16_t)0.24892760574572009302f,
-(float16_t)-0.96965738512429233698f,(float16_t)0.24446790274782448371f,
-(float16_t)-0.97077214072895023911f,(float16_t)0.24000302244874177626f,
-(float16_t)-0.97186633748027928537f,(float16_t)0.23553305940497573645f,
-(float16_t)-0.97293995220556006576f,(float16_t)0.23105810828067133156f,
-(float16_t)-0.97399296216795583359f,(float16_t)0.22657826384561016719f,
-(float16_t)-0.97502534506699412020f,(float16_t)0.22209362097320364815f,
-(float16_t)-0.97603707903903902388f,(float16_t)0.21760427463848372454f,
-(float16_t)-0.97702814265775439484f,(float16_t)0.21311031991609141745f,
-(float16_t)-0.97799851493455713936f,(float16_t)0.20861185197826351279f,
-(float16_t)-0.97894817531906219710f,(float16_t)0.20410896609281684033f,
-(float16_t)-0.97987710369951763756f,(float16_t)0.19960175762113091524f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.98167268619698311305f,(float16_t)0.19057475482025307278f,
-(float16_t)-0.98253930228744124076f,(float16_t)0.18605515166344691047f,
-(float16_t)-0.98338511032155118130f,(float16_t)0.18153160826112521575f,
-(float16_t)-0.98421009238692902521f,(float16_t)0.17700422041214894375f,
-(float16_t)-0.98501423101223983814f,(float16_t)0.17247308399679611712f,
-(float16_t)-0.98579750916756736512f,(float16_t)0.16793829497473128365f,
-(float16_t)-0.98655991026477540817f,(float16_t)0.16339994938297328075f,
-(float16_t)-0.98730141815785843473f,(float16_t)0.15885814333386147346f,
-(float16_t)-0.98802201714328352633f,(float16_t)0.15431297301302007718f,
-(float16_t)-0.98872169196032377858f,(float16_t)0.14976453467732145364f,
-(float16_t)-0.98940042779138037687f,(float16_t)0.14521292465284735274f,
-(float16_t)-0.99005821026229701154f,(float16_t)0.14065823933284954395f,
-(float16_t)-0.99069502544266463406f,(float16_t)0.13610057517570647856f,
-(float16_t)-0.99131085984611544415f,(float16_t)0.13154002870288333815f,
-(float16_t)-0.99190570043060932726f,(float16_t)0.12697669649688606008f,
-(float16_t)-0.99247953459870996706f,(float16_t)0.12241067519921634832f,
-(float16_t)-0.99303235019785141002f,(float16_t)0.11784206150832508830f,
-(float16_t)-0.99356413552059530403f,(float16_t)0.11327095217756441570f,
-(float16_t)-0.99407487930487936634f,(float16_t)0.10869744401313874427f,
-(float16_t)-0.99456457073425541537f,(float16_t)0.10412163387205457254f,
-(float16_t)-0.99503319943811863180f,(float16_t)0.09954361866006927739f,
-(float16_t)-0.99548075549192693856f,(float16_t)0.09496349532963890838f,
-(float16_t)-0.99590722941741172125f,(float16_t)0.09038136087786528827f,
-(float16_t)-0.99631261218277800129f,(float16_t)0.08579731234444015753f,
-(float16_t)-0.99669689520289606044f,(float16_t)0.08121144680959266338f,
-(float16_t)-0.99706007033948296225f,(float16_t)0.07662386139203168633f,
-(float16_t)-0.99740212990127530279f,(float16_t)0.07203465324688947125f,
-(float16_t)-0.99772306664419163624f,(float16_t)0.06744391956366417584f,
-(float16_t)-0.99802287377148624081f,(float16_t)0.06285175756416148951f,
-(float16_t)-0.99830154493389289261f,(float16_t)0.05825826450043579408f,
-(float16_t)-0.99855907422975931365f,(float16_t)0.05366353765273051968f,
-(float16_t)-0.99879545620517240501f,(float16_t)0.04906767432741796636f,
-(float16_t)-0.99901068585407337697f,(float16_t)0.04447077185493858442f,
-(float16_t)-0.99920475861836388631f,(float16_t)0.03987292758774012985f,
-(float16_t)-0.99937767038800284780f,(float16_t)0.03527423889821423159f,
-(float16_t)-0.99952941750109314256f,(float16_t)0.03067480317663686534f,
-(float16_t)-0.99965999674395922270f,(float16_t)0.02607471782910409860f,
-(float16_t)-0.99976940535121527898f,(float16_t)0.02147408027546966747f,
-(float16_t)-0.99985764100582386060f,(float16_t)0.01687298794728183532f,
-(float16_t)-0.99992470183914450299f,(float16_t)0.01227153828572000692f,
-(float16_t)-0.99997058643097413988f,(float16_t)0.00766982873953113778f,
-(float16_t)-0.99999529380957619118f,(float16_t)0.00306795676296597701f,
-(float16_t)-0.99999882345170187925f,(float16_t)-0.00153398018628480431f,
-(float16_t)-0.99998117528260110909f,(float16_t)-0.00613588464915455420f,
-(float16_t)-0.99994234967602391162f,(float16_t)-0.01073765916726416615f,
-(float16_t)-0.99988234745421256111f,(float16_t)-0.01533920628498781566f,
-(float16_t)-0.99980116988788425569f,(float16_t)-0.01994042855151419158f,
-(float16_t)-0.99969881869620424997f,(float16_t)-0.02454122852291207996f,
-(float16_t)-0.99957529604674921764f,(float16_t)-0.02914150876419355565f,
-(float16_t)-0.99943060455546173237f,(float16_t)-0.03374117185137745500f,
-(float16_t)-0.99926474728659442359f,(float16_t)-0.03834012037355261082f,
-(float16_t)-0.99907772775264536147f,(float16_t)-0.04293825693494077861f,
-(float16_t)-0.99886954991428356099f,(float16_t)-0.04753548415695929563f,
-(float16_t)-0.99864021818026527111f,(float16_t)-0.05213170468028335142f,
-(float16_t)-0.99838973740734016094f,(float16_t)-0.05672682116690781762f,
-(float16_t)-0.99811811290014917919f,(float16_t)-0.06132073630220824523f,
-(float16_t)-0.99782535041111164453f,(float16_t)-0.06591335279700352712f,
-(float16_t)-0.99751145614030345410f,(float16_t)-0.07050457338961360620f,
-(float16_t)-0.99717643673532618820f,(float16_t)-0.07509430084792109716f,
-(float16_t)-0.99682029929116577893f,(float16_t)-0.07968243797142994522f,
-(float16_t)-0.99644305135004263008f,(float16_t)-0.08426888759332393231f,
-(float16_t)-0.99604470090125196702f,(float16_t)-0.08885355258252450317f,
-(float16_t)-0.99562525638099430569f,(float16_t)-0.09343633584574773110f,
-(float16_t)-0.99518472667219692873f,(float16_t)-0.09801714032956058975f,
-(float16_t)-0.99472312110432570265f,(float16_t)-0.10259586902243630901f,
-(float16_t)-0.99424044945318790223f,(float16_t)-0.10717242495680891212f,
-(float16_t)-0.99373672194072470987f,(float16_t)-0.11174671121112625394f,
-(float16_t)-0.99321194923479461103f,(float16_t)-0.11631863091190447479f,
-(float16_t)-0.99266614244894801899f,(float16_t)-0.12088808723577681992f,
-(float16_t)-0.99209931314219179654f,(float16_t)-0.12545498341154601163f,
-(float16_t)-0.99151147331874400770f,(float16_t)-0.13001922272223317978f,
-(float16_t)-0.99090263542778000971f,(float16_t)-0.13458070850712605671f,
-(float16_t)-0.99027281236316910817f,(float16_t)-0.13913934416382611747f,
-(float16_t)-0.98962201746320088702f,(float16_t)-0.14369503315029438784f,
-(float16_t)-0.98895026451030298986f,(float16_t)-0.14824767898689603096f,
-(float16_t)-0.98825756773074946437f,(float16_t)-0.15279718525844343535f,
-(float16_t)-0.98754394179435922574f,(float16_t)-0.15734345561623830356f,
-(float16_t)-0.98680940181418552726f,(float16_t)-0.16188639378011149272f,
-(float16_t)-0.98605396334619543897f,(float16_t)-0.16642590354046382650f,
-(float16_t)-0.98527764238894133264f,(float16_t)-0.17096188876030096737f,
-(float16_t)-0.98448045538322093151f,(float16_t)-0.17549425337727120322f,
-(float16_t)-0.98366241921173025453f,(float16_t)-0.18002290140569934818f,
-(float16_t)-0.98282355119870534743f,(float16_t)-0.18454773693861947770f,
-(float16_t)-0.98196386910955524296f,(float16_t)-0.18906866414980610935f,
-(float16_t)-0.98108339115048670553f,(float16_t)-0.19358558729580355173f,
-(float16_t)-0.98018213596811742949f,(float16_t)-0.19809841071795356027f,
-(float16_t)-0.97926012264908202098f,(float16_t)-0.20260703884442113343f,
-(float16_t)-0.97831737071962765473f,(float16_t)-0.20711137619221858808f,
-(float16_t)-0.97735390014519996082f,(float16_t)-0.21161132736922766417f,
-(float16_t)-0.97636973133002125103f,(float16_t)-0.21610679707621921475f,
-(float16_t)-0.97536488511665697665f,(float16_t)-0.22059769010887325669f,
-(float16_t)-0.97433938278557585821f,(float16_t)-0.22508391135979261000f,
-(float16_t)-0.97329324605469824672f,(float16_t)-0.22956536582051870199f,
-(float16_t)-0.97222649707893638027f,(float16_t)-0.23404195858354326365f,
-(float16_t)-0.97113915844972520386f,(float16_t)-0.23851359484431830515f,
-(float16_t)-0.97003125319454397424f,(float16_t)-0.24298017990326381543f,
-(float16_t)-0.96890280477642887202f,(float16_t)-0.24744161916777326904f,
-(float16_t)-0.96775383709347551076f,(float16_t)-0.25189781815421696809f,
-(float16_t)-0.96658437447833311928f,(float16_t)-0.25634868248994291395f,
-(float16_t)-0.96539444169768939830f,(float16_t)-0.26079411791527562503f,
-(float16_t)-0.96418406395174582890f,(float16_t)-0.26523403028551151284f,
-(float16_t)-0.96295326687368398844f,(float16_t)-0.26966832557291481320f,
-(float16_t)-0.96170207652912265139f,(float16_t)-0.27409690986870616225f,
-(float16_t)-0.96043051941556589757f,(float16_t)-0.27851968938505289319f,
-(float16_t)-0.95913862246184200533f,(float16_t)-0.28293657045705516984f,
-(float16_t)-0.95782641302753290802f,(float16_t)-0.28734745954472939999f,
-(float16_t)-0.95649391890239510161f,(float16_t)-0.29175226323498920644f,
-(float16_t)-0.95514116830577078243f,(float16_t)-0.29615088824362378883f,
-(float16_t)-0.95376818988599032512f,(float16_t)-0.30054324141727345454f,
-(float16_t)-0.95237501271976587880f,(float16_t)-0.30492922973540242948f,
-(float16_t)-0.95096166631157508231f,(float16_t)-0.30930876031226878231f,
-(float16_t)-0.94952818059303678577f,(float16_t)-0.31368174039889118454f,
-(float16_t)-0.94807458592227633609f,(float16_t)-0.31804807738501467140f,
-(float16_t)-0.94660091308328364601f,(float16_t)-0.32240767880106963039f,
-(float16_t)-0.94510719328526060501f,(float16_t)-0.32676045232013156694f,
-(float16_t)-0.94359345816196038559f,(float16_t)-0.33110630575987626267f,
-(float16_t)-0.94205973977101742367f,(float16_t)-0.33544514708453149199f,
-(float16_t)-0.94050607059326840620f,(float16_t)-0.33977688440682679571f,
-(float16_t)-0.93893248353206459900f,(float16_t)-0.34410142598993881391f,
-(float16_t)-0.93733901191257495977f,(float16_t)-0.34841868024943456472f,
-(float16_t)-0.93572568948108036935f,(float16_t)-0.35272855575521072646f,
-(float16_t)-0.93409255040425887007f,(float16_t)-0.35703096123343008861f,
-(float16_t)-0.93243962926846246653f,(float16_t)-0.36132580556845395048f,
-(float16_t)-0.93076696107898382326f,(float16_t)-0.36561299780477357624f,
-(float16_t)-0.92907458125931585702f,(float16_t)-0.36989244714893387833f,
-(float16_t)-0.92736252565040111495f,(float16_t)-0.37416406297145782256f,
-(float16_t)-0.92563083050987282618f,(float16_t)-0.37842775480876539307f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,
-(float16_t)-0.92210866874334518339f,(float16_t)-0.38693100551438852630f,
-(float16_t)-0.92031827670911059425f,(float16_t)-0.39117038430225381518f,
-(float16_t)-0.91850839432521225181f,(float16_t)-0.39540147894781629834f,
-(float16_t)-0.91667905992104270485f,(float16_t)-0.39962419984564684361f,
-(float16_t)-0.91483031223794608611f,(float16_t)-0.40383845756765418544f,
-(float16_t)-0.91296219042839832358f,(float16_t)-0.40804416286497835475f,
-(float16_t)-0.91107473405517647169f,(float16_t)-0.41224122666988260999f,
-(float16_t)-0.90916798309052249127f,(float16_t)-0.41642956009763693048f,
-(float16_t)-0.90724197791529592738f,(float16_t)-0.42060907444840234248f,
-(float16_t)-0.90529675931811881551f,(float16_t)-0.42477968120910863936f,
-(float16_t)-0.90333236849451192807f,(float16_t)-0.42894129205532938176f,
-(float16_t)-0.90134884704602202810f,(float16_t)-0.43309381885315184624f,
-(float16_t)-0.89934623697934157338f,(float16_t)-0.43723717366104403181f,
-(float16_t)-0.89732458070541831763f,(float16_t)-0.44137126873171667052f,
-(float16_t)-0.89528392103855747308f,(float16_t)-0.44549601651398174074f,
-(float16_t)-0.89322430119551532446f,(float16_t)-0.44961132965460665067f,
-(float16_t)-0.89114576479458340597f,(float16_t)-0.45371712100016353686f,
-(float16_t)-0.88904835585466468473f,(float16_t)-0.45781330359887695280f,
-(float16_t)-0.88693211879434230571f,(float16_t)-0.46189979070246250936f,
-(float16_t)-0.88479709843093790056f,(float16_t)-0.46597649576796595916f,
-(float16_t)-0.88264333997956290201f,(float16_t)-0.47004333245959545318f,
-(float16_t)-0.88047088905216086552f,(float16_t)-0.47410021465054985601f,
-(float16_t)-0.87827979165654157523f,(float16_t)-0.47814705642484295334f,
-(float16_t)-0.87607009419540660122f,(float16_t)-0.48218377207912266336f,
-(float16_t)-0.87384184346536686316f,(float16_t)-0.48621027612448636246f,
-(float16_t)-0.87159508665595109012f,(float16_t)-0.49022648328829115938f,
-(float16_t)-0.86932987134860673084f,(float16_t)-0.49423230851595978397f,
-(float16_t)-0.86704624551569287050f,(float16_t)-0.49822766697278153547f,
-(float16_t)-0.86474425751946248919f,(float16_t)-0.50221247404571056627f,
-(float16_t)-0.86242395611104072373f,(float16_t)-0.50618664534515500630f,
-(float16_t)-0.86008539042939025077f,(float16_t)-0.51015009670676658704f,
-(float16_t)-0.85772861000027211809f,(float16_t)-0.51410274419322155026f,
-(float16_t)-0.85535366473519613972f,(float16_t)-0.51804450409599922533f,
-(float16_t)-0.85296060493036374162f,(float16_t)-0.52197529293715427823f,
-(float16_t)-0.85054948126560347976f,(float16_t)-0.52589502747108463065f,
-(float16_t)-0.84812034480329723252f,(float16_t)-0.52980362468629460526f,
-(float16_t)-0.84567324698729906540f,(float16_t)-0.53370100180715296379f,
-(float16_t)-0.84320823964184543620f,(float16_t)-0.53758707629564550512f,
-(float16_t)-0.84072537497045818355f,(float16_t)-0.54146176585312322249f,
-(float16_t)-0.83822470555483818977f,(float16_t)-0.54532498842204613076f,
-(float16_t)-0.83570628435375271525f,(float16_t)-0.54917666218771943321f,
-(float16_t)-0.83317016470191329613f,(float16_t)-0.55301670558002735678f,
-(float16_t)-0.83061640030884642538f,(float16_t)-0.55684503727515988203f,
-(float16_t)-0.82804504525775590729f,(float16_t)-0.56066157619733592021f,
-(float16_t)-0.82545615400437755138f,(float16_t)-0.56446624152051938506f,
-(float16_t)-0.82284978137582642788f,(float16_t)-0.56825895267013148970f,
-(float16_t)-0.82022598256943468620f,(float16_t)-0.57203962932475704850f,
-(float16_t)-0.81758481315158371139f,(float16_t)-0.57580819141784533866f,
-(float16_t)-0.81492632905652662156f,(float16_t)-0.57956455913940574387f,
-(float16_t)-0.81225058658520388200f,(float16_t)-0.58330865293769829094f,
-(float16_t)-0.80955764240405148069f,(float16_t)-0.58704039352091774706f,
-(float16_t)-0.80684755354379944503f,(float16_t)-0.59075970185887394237f,
-(float16_t)-0.80412037739826591753f,(float16_t)-0.59446649918466420992f,
-(float16_t)-0.80137617172314035141f,(float16_t)-0.59816070699634216190f,
-(float16_t)-0.79861499463476093297f,(float16_t)-0.60184224705857991555f,
-(float16_t)-0.79583690460888356633f,(float16_t)-0.60551104140432543410f,
-(float16_t)-0.79304196047944375270f,(float16_t)-0.60916701233645309532f,
-(float16_t)-0.79023022143731003197f,(float16_t)-0.61281008242940970820f,
-(float16_t)-0.78740174702903142911f,(float16_t)-0.61644017453085364622f,
-(float16_t)-0.78455659715557524159f,(float16_t)-0.62005721176328920663f,
-(float16_t)-0.78169483207105938671f,(float16_t)-0.62366111752569464155f,
-(float16_t)-0.77881651238147620031f,(float16_t)-0.62725181549514386070f,
-(float16_t)-0.77592169904340779762f,(float16_t)-0.63082922962842424841f,
-(float16_t)-0.77301045336273710440f,(float16_t)-0.63439328416364526575f,
-(float16_t)-0.77008283699334811878f,(float16_t)-0.63794390362184394405f,
-(float16_t)-0.76713891193582051109f,(float16_t)-0.64148101280858305095f,
-(float16_t)-0.76417874053611678509f,(float16_t)-0.64500453681554381635f,
-(float16_t)-0.76120238548426188974f,(float16_t)-0.64851440102211233008f,
-(float16_t)-0.75820990981301539247f,(float16_t)-0.65201053109695950027f,
-(float16_t)-0.75520137689653654700f,(float16_t)-0.65549285299961534967f,
-(float16_t)-0.75217685044904269986f,(float16_t)-0.65896129298203731661f,
-(float16_t)-0.74913639452345925918f,(float16_t)-0.66241577759017178373f,
-(float16_t)-0.74608007351006400132f,(float16_t)-0.66585623366550938940f,
-(float16_t)-0.74300795213512194071f,(float16_t)-0.66928258834663578725f,
-(float16_t)-0.73992009545951631377f,(float16_t)-0.67269476907077274674f,
-(float16_t)-0.73681656887737001504f,(float16_t)-0.67609270357531581208f,
-(float16_t)-0.73369743811466037187f,(float16_t)-0.67947631989936485564f,
-(float16_t)-0.73056276922782770189f,(float16_t)-0.68284554638524797010f,
-(float16_t)-0.72741262860237587695f,(float16_t)-0.68620031168003847721f,
-(float16_t)-0.72424708295146700276f,(float16_t)-0.68954054473706682948f,
-(float16_t)-0.72106619931450810501f,(float16_t)-0.69286617481742462932f,
-(float16_t)-0.71787004505573170920f,(float16_t)-0.69617713149146298601f,
-(float16_t)-0.71465868786276898206f,(float16_t)-0.69947334464028387835f,
-(float16_t)-0.71143219574521665560f,(float16_t)-0.70275474445722507788f,
-(float16_t)-0.70819063703319551362f,(float16_t)-0.70602126144933952112f,
-(float16_t)-0.70493408037590510329f,(float16_t)-0.70927282643886546687f,
-(float16_t)-0.70166259474016867692f,(float16_t)-0.71250937056469221265f,
-(float16_t)-0.69837624940897302661f,(float16_t)-0.71573082528381848366f,
-(float16_t)-0.69507511398000099145f,(float16_t)-0.71893712237280438249f,
-(float16_t)-0.69175925836415785852f,(float16_t)-0.72212819392921523409f,
-(float16_t)-0.68842875278409054740f,(float16_t)-0.72530397237306065694f,
-(float16_t)-0.68508366777270035541f,(float16_t)-0.72846439044822519637f,
-(float16_t)-0.68172407417164981869f,(float16_t)-0.73160938122389251870f,
-(float16_t)-0.67835004312986146857f,(float16_t)-0.73473887809596349907f,
-(float16_t)-0.67496164610201225820f,(float16_t)-0.73785281478846576064f,
-(float16_t)-0.67155895484701866316f,(float16_t)-0.74095112535495888384f,
-(float16_t)-0.66814204142651867357f,(float16_t)-0.74403374417992906853f,
-(float16_t)-0.66471097820334501538f,(float16_t)-0.74710060598017991040f,
-(float16_t)-0.66126583783999237642f,(float16_t)-0.75015164580621496171f,
-(float16_t)-0.65780669329707874837f,(float16_t)-0.75318679904361240940f,
-(float16_t)-0.65433361783180066240f,(float16_t)-0.75620600141439442421f,
-(float16_t)-0.65084668499638098638f,(float16_t)-0.75920918897838796102f,
-(float16_t)-0.64734596863651250320f,(float16_t)-0.76219629813457856482f,
-(float16_t)-0.64383154288979149715f,(float16_t)-0.76516726562245895860f,
-(float16_t)-0.64030348218415200634f,(float16_t)-0.76812202852336519676f,
-(float16_t)-0.63676186123628419899f,(float16_t)-0.77106052426181381776f,
-(float16_t)-0.63320675505005752370f,(float16_t)-0.77398269060682256537f,
-(float16_t)-0.62963823891492687324f,(float16_t)-0.77688846567323255332f,
-(float16_t)-0.62605638840434374437f,(float16_t)-0.77977778792301433164f,
-(float16_t)-0.62246127937414974518f,(float16_t)-0.78265059616657584041f,
-(float16_t)-0.61885298796097643059f,(float16_t)-0.78550682956405382118f,
-(float16_t)-0.61523159058062726334f,(float16_t)-0.78834642762660589455f,
-(float16_t)-0.61159716392646201744f,(float16_t)-0.79116933021769009216f,
-(float16_t)-0.60794978496777407617f,(float16_t)-0.79397547755433683925f,
-(float16_t)-0.60428953094815607283f,(float16_t)-0.79676481020841871672f,
-(float16_t)-0.60061647938386930612f,(float16_t)-0.79953726910790479110f,
-(float16_t)-0.59693070806219639124f,(float16_t)-0.80229279553811572168f,
-(float16_t)-0.59323229503980012822f,(float16_t)-0.80503133114296343553f,
-(float16_t)-0.58952131864106382952f,(float16_t)-0.80775281792619046950f,
-(float16_t)-0.58579785745643908612f,(float16_t)-0.81045719825259465718f,
-(float16_t)-0.58206199034077532595f,(float16_t)-0.81314441484925370496f,
-(float16_t)-0.57831379641165570060f,(float16_t)-0.81581441080673366972f,
-(float16_t)-0.57455335504771631872f,(float16_t)-0.81846712958029832485f,
-(float16_t)-0.57078074588696736669f,(float16_t)-0.82110251499110464835f,
-(float16_t)-0.56699604882510901138f,(float16_t)-0.82372051122739109452f,
-(float16_t)-0.56319934401383409117f,(float16_t)-0.82632106284566342325f,
-(float16_t)-0.55939071185913646911f,(float16_t)-0.82890411477186465294f,
-(float16_t)-0.55557023301960217765f,(float16_t)-0.83146961230254523567f,
-(float16_t)-0.55173798840470766880f,(float16_t)-0.83401750110601791111f,
-(float16_t)-0.54789405917310007865f,(float16_t)-0.83654772722351211645f,
-(float16_t)-0.54403852673088415326f,(float16_t)-0.83906023707031252012f,
-(float16_t)-0.54017147272989274320f,(float16_t)-0.84155497743689855472f,
-(float16_t)-0.53629297906596329337f,(float16_t)-0.84403189549006629733f,
-(float16_t)-0.53240312787719845655f,(float16_t)-0.84649093877405179320f,
-(float16_t)-0.52850200154222859439f,(float16_t)-0.84893205521163961347f,
-(float16_t)-0.52458968267846928235f,(float16_t)-0.85135519310526486247f,
-(float16_t)-0.52066625414036715735f,(float16_t)-0.85376030113811141042f,
-(float16_t)-0.51673179901765020627f,(float16_t)-0.85614732837519424979f,
-(float16_t)-0.51278640063356295542f,(float16_t)-0.85851622426444285097f,
-(float16_t)-0.50883014254310732216f,(float16_t)-0.86086693863776708735f,
-(float16_t)-0.50486310853126736831f,(float16_t)-0.86319942171212427073f,
-(float16_t)-0.50088538261124104789f,(float16_t)-0.86551362409056897818f,
-(float16_t)-0.49689704902265435793f,(float16_t)-0.86780949676330332299f,
-(float16_t)-0.49289819222978420443f,(float16_t)-0.87008699110871134952f,
-(float16_t)-0.48888889691976367136f,(float16_t)-0.87234605889439120752f,
-(float16_t)-0.48486924800079117537f,(float16_t)-0.87458665227817611321f,
-(float16_t)-0.48083933060033440254f,(float16_t)-0.87680872380914542941f,
-(float16_t)-0.47679923006332214364f,(float16_t)-0.87901222642863341417f,
-(float16_t)-0.47274903195034317926f,(float16_t)-0.88119711347122187117f,
-(float16_t)-0.46868882203582790114f,(float16_t)-0.88336333866573157891f,
-(float16_t)-0.46461868630623814891f,(float16_t)-0.88551085613619973103f,
-(float16_t)-0.46053871095823989412f,(float16_t)-0.88763962040285404598f,
-(float16_t)-0.45644898239688419528f,(float16_t)-0.88974958638307266590f,
-(float16_t)-0.45234958723377066692f,(float16_t)-0.89184070939234283415f,
-(float16_t)-0.44824061228522010802f,(float16_t)-0.89391294514520314163f,
-(float16_t)-0.44412214457042975546f,(float16_t)-0.89596624975618488484f,
-(float16_t)-0.43999427130963336685f,(float16_t)-0.89800057974073976830f,
-(float16_t)-0.43585707992225597440f,(float16_t)-0.90001589201615994629f,
-(float16_t)-0.43171065802505731446f,(float16_t)-0.90201214390249317976f,
-(float16_t)-0.42755509343028247349f,(float16_t)-0.90398929312344311615f,
-(float16_t)-0.42339047414379599177f,(float16_t)-0.90594729780726845902f,
-(float16_t)-0.41921688836322429372f,(float16_t)-0.90788611648766603945f,
-(float16_t)-0.41503442447608152044f,(float16_t)-0.90980570810465233311f,
-(float16_t)-0.41084317105790418845f,(float16_t)-0.91170603200542976730f,
-(float16_t)-0.40664321687036886210f,(float16_t)-0.91358704794525091852f,
-(float16_t)-0.40243465085941865222f,(float16_t)-0.91544871608826772214f,
-(float16_t)-0.39821756215337417162f,(float16_t)-0.91729099700837768427f,
-(float16_t)-0.39399204006104820985f,(float16_t)-0.91911385169005765938f,
-(float16_t)-0.38975817406985696634f,(float16_t)-0.92091724152918930102f,
-(float16_t)-0.38551605384391890441f,(float16_t)-0.92270112833387851747f,
-(float16_t)-0.38126576922216276477f,(float16_t)-0.92446547432526249288f,
-(float16_t)-0.37700741021641820394f,(float16_t)-0.92621024213831137928f,
-(float16_t)-0.37274106700951614712f,(float16_t)-0.92793539482261766516f,
-(float16_t)-0.36846682995337221023f,(float16_t)-0.92964089584318132520f,
-(float16_t)-0.36418478956708016936f,(float16_t)-0.93132670908118031505f,
-(float16_t)-0.35989503653498794433f,(float16_t)-0.93299279883473895669f,
-(float16_t)-0.35559766170478407377f,(float16_t)-0.93463912981968066962f,
-(float16_t)-0.35129275608556687072f,(float16_t)-0.93626566717027837061f,
-(float16_t)-0.34698041084592379235f,(float16_t)-0.93787237643998977443f,
-(float16_t)-0.34266071731199487793f,(float16_t)-0.93945922360218969693f,
-(float16_t)-0.33833376696554123830f,(float16_t)-0.94102617505088925753f,
-(float16_t)-0.33399965144200982614f,(float16_t)-0.94257319760144675502f,
-(float16_t)-0.32965846252858749255f,(float16_t)-0.94410025849127265918f,
-(float16_t)-0.32531029216226331480f,(float16_t)-0.94560732538052116869f,
-(float16_t)-0.32095523242787515894f,(float16_t)-0.94709436635277721717f,
-(float16_t)-0.31659337555616617887f,(float16_t)-0.94856134991573015647f,
-(float16_t)-0.31222481392182477311f,(float16_t)-0.95000824500184311017f,
-(float16_t)-0.30784964004153508865f,(float16_t)-0.95143502096900833820f,
-(float16_t)-0.30346794657201103806f,(float16_t)-0.95284164760119871573f,
-(float16_t)-0.29907982630804058610f,(float16_t)-0.95422809510910555630f,
-(float16_t)-0.29468537218051488180f,(float16_t)-0.95559433413077088382f,
-(float16_t)-0.29028467725446244208f,(float16_t)-0.95694033573220882438f,
-(float16_t)-0.28587783472708105936f,(float16_t)-0.95826607140801756124f,
-(float16_t)-0.28146493792575794091f,(float16_t)-0.95957151308198451733f,
-(float16_t)-0.27704608030610028413f,(float16_t)-0.96085663310767954748f,
-(float16_t)-0.27262135544994886560f,(float16_t)-0.96212140426904158019f,
-(float16_t)-0.26819085706340350939f,(float16_t)-0.96336579978095393528f,
-(float16_t)-0.26375467897483123592f,(float16_t)-0.96458979328981275803f,
-(float16_t)-0.25931291513288645678f,(float16_t)-0.96579335887408357397f,
-(float16_t)-0.25486565960451434965f,(float16_t)-0.96697647104485218161f,
-(float16_t)-0.25041300657296539089f,(float16_t)-0.96813910474636233339f,
-(float16_t)-0.24595505033579515008f,(float16_t)-0.96928123535654830967f,
-(float16_t)-0.24149188530286941345f,(float16_t)-0.97040283868755550234f,
-(float16_t)-0.23702360599436766986f,(float16_t)-0.97150389098625167250f,
-(float16_t)-0.23255030703877521692f,(float16_t)-0.97258436893473221296f,
-(float16_t)-0.22807208317088611960f,(float16_t)-0.97364424965081186603f,
-(float16_t)-0.22358902922978990402f,(float16_t)-0.97468351068851066810f,
-(float16_t)-0.21910124015687010290f,(float16_t)-0.97570213003852845901f,
-(float16_t)-0.21460881099378659176f,(float16_t)-0.97670008612871184184f,
-(float16_t)-0.21011183688046985996f,(float16_t)-0.97767735782450992943f,
-(float16_t)-0.20561041305309901706f,(float16_t)-0.97863392442942320759f,
-(float16_t)-0.20110463484209206708f,(float16_t)-0.97956976568544051887f,
-(float16_t)-0.19659459767008077846f,(float16_t)-0.98048486177346927395f,
-(float16_t)-0.19208039704989252061f,(float16_t)-0.98137919331375456089f,
-(float16_t)-0.18756212858253007436f,(float16_t)-0.98225274136628937249f,
-(float16_t)-0.18303988795514095078f,(float16_t)-0.98310548743121628501f,
-(float16_t)-0.17851377093899792325f,(float16_t)-0.98393741344921881176f,
-(float16_t)-0.17398387338746373887f,(float16_t)-0.98474850180190420801f,
-(float16_t)-0.16945029123396829207f,(float16_t)-0.98553873531217606185f,
-(float16_t)-0.16491312048996975559f,(float16_t)-0.98630809724459866938f,
-(float16_t)-0.16037245724292850668f,(float16_t)-0.98705657130575097380f,
-(float16_t)-0.15582839765426498291f,(float16_t)-0.98778414164457217783f,
-(float16_t)-0.15128103795733036097f,(float16_t)-0.98849079285269658701f,
-(float16_t)-0.14673047445536230304f,(float16_t)-0.98917650996478090342f,
-(float16_t)-0.14217680351944814165f,(float16_t)-0.98984127845882052821f,
-(float16_t)-0.13762012158648653792f,(float16_t)-0.99048508425645698239f,
-(float16_t)-0.13306052515713906459f,(float16_t)-0.99110791372327688986f,
-(float16_t)-0.12849811079379358514f,(float16_t)-0.99170975366909952520f,
-(float16_t)-0.12393297511851208981f,(float16_t)-0.99229059134825736699f,
-(float16_t)-0.11936521481099168773f,(float16_t)-0.99285041445986510489f,
-(float16_t)-0.11479492660650993108f,(float16_t)-0.99338921114808065305f,
-(float16_t)-0.11022220729388330918f,(float16_t)-0.99390697000235606051f,
-(float16_t)-0.10564715371341037997f,(float16_t)-0.99440368005767909576f,
-(float16_t)-0.10106986275482798820f,(float16_t)-0.99487933079480561638f,
-(float16_t)-0.09649043135525316173f,(float16_t)-0.99533391214048216877f,
-(float16_t)-0.09190895649713282101f,(float16_t)-0.99576741446765981713f,
-(float16_t)-0.08732553520619255882f,(float16_t)-0.99617982859569687015f,
-(float16_t)-0.08274026454937570552f,(float16_t)-0.99657114579055483539f,
-(float16_t)-0.07815324163279464831f,(float16_t)-0.99694135776498205015f,
-(float16_t)-0.07356456359966735692f,(float16_t)-0.99729045667869020697f,
-(float16_t)-0.06897432762826707919f,(float16_t)-0.99761843513851955478f,
-(float16_t)-0.06438263092985731240f,(float16_t)-0.99792528619859599548f,
-(float16_t)-0.05978957074664013188f,(float16_t)-0.99821100336047818846f,
-(float16_t)-0.05519524434968971216f,(float16_t)-0.99847558057329477421f,
-(float16_t)-0.05059974903689945513f,(float16_t)-0.99871901223387293811f,
-(float16_t)-0.04600318213091520586f,(float16_t)-0.99894129318685687124f,
-(float16_t)-0.04140564097707683661f,(float16_t)-0.99914241872481690532f,
-(float16_t)-0.03680722294135933131f,(float16_t)-0.99932238458834943273f,
-(float16_t)-0.03220802540830459970f,(float16_t)-0.99948118696616694567f,
-(float16_t)-0.02760814577896616301f,(float16_t)-0.99961882249517863830f,
-(float16_t)-0.02300768146883930970f,(float16_t)-0.99973528826056168306f,
-(float16_t)-0.01840672990580516366f,(float16_t)-0.99983058179582340319f,
-(float16_t)-0.01380538852806025008f,(float16_t)-0.99990470108285289808f,
-(float16_t)-0.00920375478206008311f,(float16_t)-0.99995764455196389786f,
-(float16_t)-0.00460192612044835019f,(float16_t)-0.99998941108192840321f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99983058179582340319f,(float16_t)0.01840672990580482019f,
-(float16_t)0.99932238458834954375f,(float16_t)0.03680722294135883171f,
-(float16_t)0.99847558057329477421f,(float16_t)0.05519524434968993420f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.99576741446765981713f,(float16_t)0.09190895649713272386f,
-(float16_t)0.99390697000235606051f,(float16_t)0.11022220729388305938f,
-(float16_t)0.99170975366909952520f,(float16_t)0.12849811079379316880f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.98630809724459866938f,(float16_t)0.16491312048996989437f,
-(float16_t)0.98310548743121628501f,(float16_t)0.18303988795514095078f,
-(float16_t)0.97956976568544051887f,(float16_t)0.20110463484209190055f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.97150389098625178352f,(float16_t)0.23702360599436719801f,
-(float16_t)0.96697647104485207059f,(float16_t)0.25486565960451457169f,
-(float16_t)0.96212140426904158019f,(float16_t)0.27262135544994897662f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.95143502096900833820f,(float16_t)0.30784964004153486661f,
-(float16_t)0.94560732538052127971f,(float16_t)0.32531029216226292622f,
-(float16_t)0.93945922360218991898f,(float16_t)0.34266071731199437833f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.92621024213831137928f,(float16_t)0.37700741021641825945f,
-(float16_t)0.91911385169005777040f,(float16_t)0.39399204006104809883f,
-(float16_t)0.91170603200542987832f,(float16_t)0.41084317105790391089f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.89596624975618521791f,(float16_t)0.44412214457042920035f,
-(float16_t)0.88763962040285393496f,(float16_t)0.46053871095824000514f,
-(float16_t)0.87901222642863352519f,(float16_t)0.47679923006332208812f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.86086693863776730939f,(float16_t)0.50883014254310698909f,
-(float16_t)0.85135519310526519554f,(float16_t)0.52458968267846894928f,
-(float16_t)0.84155497743689844370f,(float16_t)0.54017147272989285423f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.82110251499110464835f,(float16_t)0.57078074588696725566f,
-(float16_t)0.81045719825259476821f,(float16_t)0.58579785745643886408f,
-(float16_t)0.79953726910790501314f,(float16_t)0.60061647938386897305f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.77688846567323244230f,(float16_t)0.62963823891492698426f,
-(float16_t)0.76516726562245895860f,(float16_t)0.64383154288979138613f,
-(float16_t)0.75318679904361252042f,(float16_t)0.65780669329707863735f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.72846439044822519637f,(float16_t)0.68508366777270035541f,
-(float16_t)0.71573082528381870571f,(float16_t)0.69837624940897280457f,
-(float16_t)0.70275474445722529993f,(float16_t)0.71143219574521643356f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.67609270357531603413f,(float16_t)0.73681656887736979300f,
-(float16_t)0.66241577759017178373f,(float16_t)0.74913639452345925918f,
-(float16_t)0.64851440102211255212f,(float16_t)0.76120238548426177871f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.62005721176328920663f,(float16_t)0.78455659715557524159f,
-(float16_t)0.60551104140432554512f,(float16_t)0.79583690460888345530f,
-(float16_t)0.59075970185887427544f,(float16_t)0.80684755354379922299f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.56066157619733603124f,(float16_t)0.82804504525775579626f,
-(float16_t)0.54532498842204646383f,(float16_t)0.83822470555483796772f,
-(float16_t)0.52980362468629482731f,(float16_t)0.84812034480329712149f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.49822766697278186854f,(float16_t)0.86704624551569264845f,
-(float16_t)0.48218377207912282989f,(float16_t)0.87607009419540660122f,
-(float16_t)0.46597649576796612569f,(float16_t)0.88479709843093778954f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.43309381885315201277f,(float16_t)0.90134884704602202810f,
-(float16_t)0.41642956009763731906f,(float16_t)0.90916798309052226923f,
-(float16_t)0.39962419984564678810f,(float16_t)0.91667905992104270485f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.36561299780477396482f,(float16_t)0.93076696107898371224f,
-(float16_t)0.34841868024943450921f,(float16_t)0.93733901191257495977f,
-(float16_t)0.33110630575987642921f,(float16_t)0.94359345816196038559f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.29615088824362395536f,(float16_t)0.95514116830577067141f,
-(float16_t)0.27851968938505305973f,(float16_t)0.96043051941556578655f,
-(float16_t)0.26079411791527556952f,(float16_t)0.96539444169768939830f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.22508391135979277653f,(float16_t)0.97433938278557585821f,
-(float16_t)0.20711137619221856032f,(float16_t)0.97831737071962765473f,
-(float16_t)0.18906866414980627589f,(float16_t)0.98196386910955524296f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.15279718525844340760f,(float16_t)0.98825756773074946437f,
-(float16_t)0.13458070850712622324f,(float16_t)0.99090263542778000971f,
-(float16_t)0.11631863091190487725f,(float16_t)0.99321194923479450001f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.07968243797143012563f,(float16_t)0.99682029929116566791f,
-(float16_t)0.06132073630220864768f,(float16_t)0.99811811290014917919f,
-(float16_t)0.04293825693494095902f,(float16_t)0.99907772775264536147f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)0.00613588464915451517f,(float16_t)0.99998117528260110909f,
-(float16_t)-0.01227153828571982304f,(float16_t)0.99992470183914450299f,
-(float16_t)-0.03067480317663645942f,(float16_t)0.99952941750109314256f,
-(float16_t)-0.04906767432741800800f,(float16_t)0.99879545620517240501f,
-(float16_t)-0.06744391956366398155f,(float16_t)0.99772306664419163624f,
-(float16_t)-0.08579731234443975507f,(float16_t)0.99631261218277800129f,
-(float16_t)-0.10412163387205460030f,(float16_t)0.99456457073425541537f,
-(float16_t)-0.12241067519921615403f,(float16_t)0.99247953459870996706f,
-(float16_t)-0.14065823933284912761f,(float16_t)0.99005821026229712256f,
-(float16_t)-0.15885814333386127917f,(float16_t)0.98730141815785843473f,
-(float16_t)-0.17700422041214874946f,(float16_t)0.98421009238692902521f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.21311031991609125091f,(float16_t)0.97702814265775439484f,
-(float16_t)-0.23105810828067113727f,(float16_t)0.97293995220556017678f,
-(float16_t)-0.24892760574572012078f,(float16_t)0.96852209427441737777f,
-(float16_t)-0.26671275747489830987f,(float16_t)0.96377606579543984022f,
-(float16_t)-0.28440753721127171039f,(float16_t)0.95870347489587159906f,
-(float16_t)-0.30200594931922808417f,(float16_t)0.95330604035419386211f,
-(float16_t)-0.31950203081601563637f,(float16_t)0.94758559101774120226f,
-(float16_t)-0.33688985339221994009f,(float16_t)0.94154406518302080631f,
-(float16_t)-0.35416352542049039931f,(float16_t)0.93518350993894761025f,
-(float16_t)-0.37131719395183748755f,(float16_t)0.92850608047321558924f,
-(float16_t)-0.38834504669882619066f,(float16_t)0.92151403934204201285f,
-(float16_t)-0.40524131400498974998f,(float16_t)0.91420975570353069095f,
-(float16_t)-0.42200027079979968159f,(float16_t)0.90659570451491533483f,
-(float16_t)-0.43861623853852738097f,(float16_t)0.89867446569395392775f,
-(float16_t)-0.45508358712634372489f,(float16_t)0.89044872324475798919f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.48755016014843571837f,(float16_t)0.87309497841829020182f,
-(float16_t)-0.50353838372571746440f,(float16_t)0.86397285612158680745f,
-(float16_t)-0.51935599016558964269f,(float16_t)0.85455798836540053376f,
-(float16_t)-0.53499761988709704230f,(float16_t)0.84485356524970722791f,
-(float16_t)-0.55045797293660470029f,(float16_t)0.83486287498638012128f,
-(float16_t)-0.56573181078361323149f,(float16_t)0.82458930278502517996f,
-(float16_t)-0.58081395809576441547f,(float16_t)0.81403632970594852480f,
-(float16_t)-0.59569930449243335691f,(float16_t)0.80320753148064494287f,
-(float16_t)-0.61038280627630958630f,(float16_t)0.79210657730021227785f,
-(float16_t)-0.62485948814238623239f,(float16_t)0.78073722857209459924f,
-(float16_t)-0.63912444486377573138f,(float16_t)0.76910333764557958780f,
-(float16_t)-0.65317284295377653347f,(float16_t)0.75720884650648467851f,
-(float16_t)-0.66699992230363736034f,(float16_t)0.74505778544146605835f,
-(float16_t)-0.68060099779545302212f,(float16_t)0.73265427167241281570f,
-(float16_t)-0.69397146088965377952f,(float16_t)0.72000250796138176579f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.72000250796138165477f,(float16_t)0.69397146088965389055f,
-(float16_t)-0.73265427167241270467f,(float16_t)0.68060099779545324417f,
-(float16_t)-0.74505778544146594733f,(float16_t)0.66699992230363758239f,
-(float16_t)-0.75720884650648467851f,(float16_t)0.65317284295377664449f,
-(float16_t)-0.76910333764557947678f,(float16_t)0.63912444486377584241f,
-(float16_t)-0.78073722857209448822f,(float16_t)0.62485948814238634341f,
-(float16_t)-0.79210657730021216683f,(float16_t)0.61038280627630969732f,
-(float16_t)-0.80320753148064483184f,(float16_t)0.59569930449243346793f,
-(float16_t)-0.81403632970594841378f,(float16_t)0.58081395809576452649f,
-(float16_t)-0.82458930278502506894f,(float16_t)0.56573181078361345353f,
-(float16_t)-0.83486287498638001026f,(float16_t)0.55045797293660492233f,
-(float16_t)-0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)-0.85455798836540042274f,(float16_t)0.51935599016558975372f,
-(float16_t)-0.86397285612158669643f,(float16_t)0.50353838372571757542f,
-(float16_t)-0.87309497841829009079f,(float16_t)0.48755016014843588490f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.89044872324475787817f,(float16_t)0.45508358712634389143f,
-(float16_t)-0.89867446569395392775f,(float16_t)0.43861623853852754751f,
-(float16_t)-0.90659570451491533483f,(float16_t)0.42200027079979984812f,
-(float16_t)-0.91420975570353069095f,(float16_t)0.40524131400498991651f,
-(float16_t)-0.92151403934204179080f,(float16_t)0.38834504669882657923f,
-(float16_t)-0.92850608047321547822f,(float16_t)0.37131719395183770960f,
-(float16_t)-0.93518350993894761025f,(float16_t)0.35416352542049039931f,
-(float16_t)-0.94154406518302069529f,(float16_t)0.33688985339222032867f,
-(float16_t)-0.94758559101774109124f,(float16_t)0.31950203081601580291f,
-(float16_t)-0.95330604035419386211f,(float16_t)0.30200594931922802866f,
-(float16_t)-0.95870347489587148804f,(float16_t)0.28440753721127209896f,
-(float16_t)-0.96377606579543984022f,(float16_t)0.26671275747489847641f,
-(float16_t)-0.96852209427441737777f,(float16_t)0.24892760574572009302f,
-(float16_t)-0.97293995220556006576f,(float16_t)0.23105810828067133156f,
-(float16_t)-0.97702814265775439484f,(float16_t)0.21311031991609141745f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.98421009238692902521f,(float16_t)0.17700422041214894375f,
-(float16_t)-0.98730141815785843473f,(float16_t)0.15885814333386147346f,
-(float16_t)-0.99005821026229701154f,(float16_t)0.14065823933284954395f,
-(float16_t)-0.99247953459870996706f,(float16_t)0.12241067519921634832f,
-(float16_t)-0.99456457073425541537f,(float16_t)0.10412163387205457254f,
-(float16_t)-0.99631261218277800129f,(float16_t)0.08579731234444015753f,
-(float16_t)-0.99772306664419163624f,(float16_t)0.06744391956366417584f,
-(float16_t)-0.99879545620517240501f,(float16_t)0.04906767432741796636f,
-(float16_t)-0.99952941750109314256f,(float16_t)0.03067480317663686534f,
-(float16_t)-0.99992470183914450299f,(float16_t)0.01227153828572000692f,
-(float16_t)-0.99998117528260110909f,(float16_t)-0.00613588464915455420f,
-(float16_t)-0.99969881869620424997f,(float16_t)-0.02454122852291207996f,
-(float16_t)-0.99907772775264536147f,(float16_t)-0.04293825693494077861f,
-(float16_t)-0.99811811290014917919f,(float16_t)-0.06132073630220824523f,
-(float16_t)-0.99682029929116577893f,(float16_t)-0.07968243797142994522f,
-(float16_t)-0.99518472667219692873f,(float16_t)-0.09801714032956058975f,
-(float16_t)-0.99321194923479461103f,(float16_t)-0.11631863091190447479f,
-(float16_t)-0.99090263542778000971f,(float16_t)-0.13458070850712605671f,
-(float16_t)-0.98825756773074946437f,(float16_t)-0.15279718525844343535f,
-(float16_t)-0.98527764238894133264f,(float16_t)-0.17096188876030096737f,
-(float16_t)-0.98196386910955524296f,(float16_t)-0.18906866414980610935f,
-(float16_t)-0.97831737071962765473f,(float16_t)-0.20711137619221858808f,
-(float16_t)-0.97433938278557585821f,(float16_t)-0.22508391135979261000f,
-(float16_t)-0.97003125319454397424f,(float16_t)-0.24298017990326381543f,
-(float16_t)-0.96539444169768939830f,(float16_t)-0.26079411791527562503f,
-(float16_t)-0.96043051941556589757f,(float16_t)-0.27851968938505289319f,
-(float16_t)-0.95514116830577078243f,(float16_t)-0.29615088824362378883f,
-(float16_t)-0.94952818059303678577f,(float16_t)-0.31368174039889118454f,
-(float16_t)-0.94359345816196038559f,(float16_t)-0.33110630575987626267f,
-(float16_t)-0.93733901191257495977f,(float16_t)-0.34841868024943456472f,
-(float16_t)-0.93076696107898382326f,(float16_t)-0.36561299780477357624f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,
-(float16_t)-0.91667905992104270485f,(float16_t)-0.39962419984564684361f,
-(float16_t)-0.90916798309052249127f,(float16_t)-0.41642956009763693048f,
-(float16_t)-0.90134884704602202810f,(float16_t)-0.43309381885315184624f,
-(float16_t)-0.89322430119551532446f,(float16_t)-0.44961132965460665067f,
-(float16_t)-0.88479709843093790056f,(float16_t)-0.46597649576796595916f,
-(float16_t)-0.87607009419540660122f,(float16_t)-0.48218377207912266336f,
-(float16_t)-0.86704624551569287050f,(float16_t)-0.49822766697278153547f,
-(float16_t)-0.85772861000027211809f,(float16_t)-0.51410274419322155026f,
-(float16_t)-0.84812034480329723252f,(float16_t)-0.52980362468629460526f,
-(float16_t)-0.83822470555483818977f,(float16_t)-0.54532498842204613076f,
-(float16_t)-0.82804504525775590729f,(float16_t)-0.56066157619733592021f,
-(float16_t)-0.81758481315158371139f,(float16_t)-0.57580819141784533866f,
-(float16_t)-0.80684755354379944503f,(float16_t)-0.59075970185887394237f,
-(float16_t)-0.79583690460888356633f,(float16_t)-0.60551104140432543410f,
-(float16_t)-0.78455659715557524159f,(float16_t)-0.62005721176328920663f,
-(float16_t)-0.77301045336273710440f,(float16_t)-0.63439328416364526575f,
-(float16_t)-0.76120238548426188974f,(float16_t)-0.64851440102211233008f,
-(float16_t)-0.74913639452345925918f,(float16_t)-0.66241577759017178373f,
-(float16_t)-0.73681656887737001504f,(float16_t)-0.67609270357531581208f,
-(float16_t)-0.72424708295146700276f,(float16_t)-0.68954054473706682948f,
-(float16_t)-0.71143219574521665560f,(float16_t)-0.70275474445722507788f,
-(float16_t)-0.69837624940897302661f,(float16_t)-0.71573082528381848366f,
-(float16_t)-0.68508366777270035541f,(float16_t)-0.72846439044822519637f,
-(float16_t)-0.67155895484701866316f,(float16_t)-0.74095112535495888384f,
-(float16_t)-0.65780669329707874837f,(float16_t)-0.75318679904361240940f,
-(float16_t)-0.64383154288979149715f,(float16_t)-0.76516726562245895860f,
-(float16_t)-0.62963823891492687324f,(float16_t)-0.77688846567323255332f,
-(float16_t)-0.61523159058062726334f,(float16_t)-0.78834642762660589455f,
-(float16_t)-0.60061647938386930612f,(float16_t)-0.79953726910790479110f,
-(float16_t)-0.58579785745643908612f,(float16_t)-0.81045719825259465718f,
-(float16_t)-0.57078074588696736669f,(float16_t)-0.82110251499110464835f,
-(float16_t)-0.55557023301960217765f,(float16_t)-0.83146961230254523567f,
-(float16_t)-0.54017147272989274320f,(float16_t)-0.84155497743689855472f,
-(float16_t)-0.52458968267846928235f,(float16_t)-0.85135519310526486247f,
-(float16_t)-0.50883014254310732216f,(float16_t)-0.86086693863776708735f,
-(float16_t)-0.49289819222978420443f,(float16_t)-0.87008699110871134952f,
-(float16_t)-0.47679923006332214364f,(float16_t)-0.87901222642863341417f,
-(float16_t)-0.46053871095823989412f,(float16_t)-0.88763962040285404598f,
-(float16_t)-0.44412214457042975546f,(float16_t)-0.89596624975618488484f,
-(float16_t)-0.42755509343028247349f,(float16_t)-0.90398929312344311615f,
-(float16_t)-0.41084317105790418845f,(float16_t)-0.91170603200542976730f,
-(float16_t)-0.39399204006104820985f,(float16_t)-0.91911385169005765938f,
-(float16_t)-0.37700741021641820394f,(float16_t)-0.92621024213831137928f,
-(float16_t)-0.35989503653498794433f,(float16_t)-0.93299279883473895669f,
-(float16_t)-0.34266071731199487793f,(float16_t)-0.93945922360218969693f,
-(float16_t)-0.32531029216226331480f,(float16_t)-0.94560732538052116869f,
-(float16_t)-0.30784964004153508865f,(float16_t)-0.95143502096900833820f,
-(float16_t)-0.29028467725446244208f,(float16_t)-0.95694033573220882438f,
-(float16_t)-0.27262135544994886560f,(float16_t)-0.96212140426904158019f,
-(float16_t)-0.25486565960451434965f,(float16_t)-0.96697647104485218161f,
-(float16_t)-0.23702360599436766986f,(float16_t)-0.97150389098625167250f,
-(float16_t)-0.21910124015687010290f,(float16_t)-0.97570213003852845901f,
-(float16_t)-0.20110463484209206708f,(float16_t)-0.97956976568544051887f,
-(float16_t)-0.18303988795514095078f,(float16_t)-0.98310548743121628501f,
-(float16_t)-0.16491312048996975559f,(float16_t)-0.98630809724459866938f,
-(float16_t)-0.14673047445536230304f,(float16_t)-0.98917650996478090342f,
-(float16_t)-0.12849811079379358514f,(float16_t)-0.99170975366909952520f,
-(float16_t)-0.11022220729388330918f,(float16_t)-0.99390697000235606051f,
-(float16_t)-0.09190895649713282101f,(float16_t)-0.99576741446765981713f,
-(float16_t)-0.07356456359966735692f,(float16_t)-0.99729045667869020697f,
-(float16_t)-0.05519524434968971216f,(float16_t)-0.99847558057329477421f,
-(float16_t)-0.03680722294135933131f,(float16_t)-0.99932238458834943273f,
-(float16_t)-0.01840672990580516366f,(float16_t)-0.99983058179582340319f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.99729045667869020697f,(float16_t)0.07356456359966742631f,
-(float16_t)0.98917650996478101444f,(float16_t)0.14673047445536174793f,
-(float16_t)0.97570213003852857003f,(float16_t)0.21910124015686979759f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.93299279883473895669f,(float16_t)0.35989503653498811087f,
-(float16_t)0.90398929312344333820f,(float16_t)0.42755509343028208491f,
-(float16_t)0.87008699110871146054f,(float16_t)0.49289819222978403790f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.78834642762660622761f,(float16_t)0.61523159058062681925f,
-(float16_t)0.74095112535495921691f,(float16_t)0.67155895484701833009f,
-(float16_t)0.68954054473706694051f,(float16_t)0.72424708295146689174f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.57580819141784533866f,(float16_t)0.81758481315158371139f,
-(float16_t)0.51410274419322166128f,(float16_t)0.85772861000027211809f,
-(float16_t)0.44961132965460659516f,(float16_t)0.89322430119551532446f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.31368174039889157312f,(float16_t)0.94952818059303667475f,
-(float16_t)0.24298017990326398197f,(float16_t)0.97003125319454397424f,
-(float16_t)0.17096188876030135595f,(float16_t)0.98527764238894122162f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)0.02454122852291226384f,(float16_t)0.99969881869620424997f,
-(float16_t)-0.04906767432741800800f,(float16_t)0.99879545620517240501f,
-(float16_t)-0.12241067519921615403f,(float16_t)0.99247953459870996706f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.26671275747489830987f,(float16_t)0.96377606579543984022f,
-(float16_t)-0.33688985339221994009f,(float16_t)0.94154406518302080631f,
-(float16_t)-0.40524131400498974998f,(float16_t)0.91420975570353069095f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.53499761988709704230f,(float16_t)0.84485356524970722791f,
-(float16_t)-0.59569930449243335691f,(float16_t)0.80320753148064494287f,
-(float16_t)-0.65317284295377653347f,(float16_t)0.75720884650648467851f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.75720884650648467851f,(float16_t)0.65317284295377664449f,
-(float16_t)-0.80320753148064483184f,(float16_t)0.59569930449243346793f,
-(float16_t)-0.84485356524970711689f,(float16_t)0.53499761988709715332f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.91420975570353069095f,(float16_t)0.40524131400498991651f,
-(float16_t)-0.94154406518302069529f,(float16_t)0.33688985339222032867f,
-(float16_t)-0.96377606579543984022f,(float16_t)0.26671275747489847641f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.99247953459870996706f,(float16_t)0.12241067519921634832f,
-(float16_t)-0.99879545620517240501f,(float16_t)0.04906767432741796636f,
-(float16_t)-0.99969881869620424997f,(float16_t)-0.02454122852291207996f,
-(float16_t)-0.99518472667219692873f,(float16_t)-0.09801714032956058975f,
-(float16_t)-0.98527764238894133264f,(float16_t)-0.17096188876030096737f,
-(float16_t)-0.97003125319454397424f,(float16_t)-0.24298017990326381543f,
-(float16_t)-0.94952818059303678577f,(float16_t)-0.31368174039889118454f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,
-(float16_t)-0.89322430119551532446f,(float16_t)-0.44961132965460665067f,
-(float16_t)-0.85772861000027211809f,(float16_t)-0.51410274419322155026f,
-(float16_t)-0.81758481315158371139f,(float16_t)-0.57580819141784533866f,
-(float16_t)-0.77301045336273710440f,(float16_t)-0.63439328416364526575f,
-(float16_t)-0.72424708295146700276f,(float16_t)-0.68954054473706682948f,
-(float16_t)-0.67155895484701866316f,(float16_t)-0.74095112535495888384f,
-(float16_t)-0.61523159058062726334f,(float16_t)-0.78834642762660589455f,
-(float16_t)-0.55557023301960217765f,(float16_t)-0.83146961230254523567f,
-(float16_t)-0.49289819222978420443f,(float16_t)-0.87008699110871134952f,
-(float16_t)-0.42755509343028247349f,(float16_t)-0.90398929312344311615f,
-(float16_t)-0.35989503653498794433f,(float16_t)-0.93299279883473895669f,
-(float16_t)-0.29028467725446244208f,(float16_t)-0.95694033573220882438f,
-(float16_t)-0.21910124015687010290f,(float16_t)-0.97570213003852845901f,
-(float16_t)-0.14673047445536230304f,(float16_t)-0.98917650996478090342f,
-(float16_t)-0.07356456359966735692f,(float16_t)-0.99729045667869020697f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.95694033573220882438f,(float16_t)0.29028467725446233105f,
-(float16_t)0.83146961230254523567f,(float16_t)0.55557023301960217765f,
-(float16_t)0.63439328416364548779f,(float16_t)0.77301045336273688235f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)0.09801714032956077016f,(float16_t)0.99518472667219681771f,
-(float16_t)-0.19509032201612819257f,(float16_t)0.98078528040323043058f,
-(float16_t)-0.47139673682599769755f,(float16_t)0.88192126434835504956f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.88192126434835493853f,(float16_t)0.47139673682599780857f,
-(float16_t)-0.98078528040323043058f,(float16_t)0.19509032201612860891f,
-(float16_t)-0.99518472667219692873f,(float16_t)-0.09801714032956058975f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,
-(float16_t)-0.77301045336273710440f,(float16_t)-0.63439328416364526575f,
-(float16_t)-0.55557023301960217765f,(float16_t)-0.83146961230254523567f,
-(float16_t)-0.29028467725446244208f,(float16_t)-0.95694033573220882438f,
-(float16_t)1.00000000000000000000f,(float16_t)0.00000000000000000000f,
-(float16_t)0.38268343236508983729f,(float16_t)0.92387953251128673848f,
-(float16_t)-0.70710678118654746172f,(float16_t)0.70710678118654757274f,
-(float16_t)-0.92387953251128684951f,(float16_t)-0.38268343236508967076f,};
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0046005249023f,
+(float16_t)1.0000000000000f,(float16_t)0.0092010498047f,
+(float16_t)1.0000000000000f,(float16_t)0.0138015747070f,
+(float16_t)1.0000000000000f,(float16_t)0.0184020996094f,
+(float16_t)0.9995117187500f,(float16_t)0.0230102539062f,
+(float16_t)0.9995117187500f,(float16_t)0.0276031494141f,
+(float16_t)0.9995117187500f,(float16_t)0.0321960449219f,
+(float16_t)0.9995117187500f,(float16_t)0.0368041992188f,
+(float16_t)0.9990234375000f,(float16_t)0.0414123535156f,
+(float16_t)0.9990234375000f,(float16_t)0.0459899902344f,
+(float16_t)0.9985351562500f,(float16_t)0.0505981445312f,
+(float16_t)0.9985351562500f,(float16_t)0.0552062988281f,
+(float16_t)0.9980468750000f,(float16_t)0.0597839355469f,
+(float16_t)0.9980468750000f,(float16_t)0.0643920898438f,
+(float16_t)0.9975585937500f,(float16_t)0.0689697265625f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9970703125000f,(float16_t)0.0781250000000f,
+(float16_t)0.9965820312500f,(float16_t)0.0827636718750f,
+(float16_t)0.9960937500000f,(float16_t)0.0873413085938f,
+(float16_t)0.9956054687500f,(float16_t)0.0919189453125f,
+(float16_t)0.9951171875000f,(float16_t)0.0964965820312f,
+(float16_t)0.9951171875000f,(float16_t)0.1010742187500f,
+(float16_t)0.9946289062500f,(float16_t)0.1056518554688f,
+(float16_t)0.9941406250000f,(float16_t)0.1102294921875f,
+(float16_t)0.9931640625000f,(float16_t)0.1148071289062f,
+(float16_t)0.9926757812500f,(float16_t)0.1193847656250f,
+(float16_t)0.9921875000000f,(float16_t)0.1239624023438f,
+(float16_t)0.9916992187500f,(float16_t)0.1285400390625f,
+(float16_t)0.9912109375000f,(float16_t)0.1330566406250f,
+(float16_t)0.9907226562500f,(float16_t)0.1375732421875f,
+(float16_t)0.9897460937500f,(float16_t)0.1422119140625f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9882812500000f,(float16_t)0.1512451171875f,
+(float16_t)0.9877929687500f,(float16_t)0.1558837890625f,
+(float16_t)0.9868164062500f,(float16_t)0.1604003906250f,
+(float16_t)0.9863281250000f,(float16_t)0.1649169921875f,
+(float16_t)0.9853515625000f,(float16_t)0.1694335937500f,
+(float16_t)0.9848632812500f,(float16_t)0.1739501953125f,
+(float16_t)0.9838867187500f,(float16_t)0.1784667968750f,
+(float16_t)0.9829101562500f,(float16_t)0.1829833984375f,
+(float16_t)0.9824218750000f,(float16_t)0.1876220703125f,
+(float16_t)0.9814453125000f,(float16_t)0.1921386718750f,
+(float16_t)0.9804687500000f,(float16_t)0.1966552734375f,
+(float16_t)0.9794921875000f,(float16_t)0.2010498046875f,
+(float16_t)0.9785156250000f,(float16_t)0.2055664062500f,
+(float16_t)0.9775390625000f,(float16_t)0.2100830078125f,
+(float16_t)0.9765625000000f,(float16_t)0.2145996093750f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9746093750000f,(float16_t)0.2236328125000f,
+(float16_t)0.9736328125000f,(float16_t)0.2280273437500f,
+(float16_t)0.9726562500000f,(float16_t)0.2325439453125f,
+(float16_t)0.9716796875000f,(float16_t)0.2370605468750f,
+(float16_t)0.9702148437500f,(float16_t)0.2414550781250f,
+(float16_t)0.9692382812500f,(float16_t)0.2459716796875f,
+(float16_t)0.9682617187500f,(float16_t)0.2504882812500f,
+(float16_t)0.9667968750000f,(float16_t)0.2548828125000f,
+(float16_t)0.9658203125000f,(float16_t)0.2592773437500f,
+(float16_t)0.9643554687500f,(float16_t)0.2636718750000f,
+(float16_t)0.9633789062500f,(float16_t)0.2683105468750f,
+(float16_t)0.9619140625000f,(float16_t)0.2727050781250f,
+(float16_t)0.9609375000000f,(float16_t)0.2770996093750f,
+(float16_t)0.9594726562500f,(float16_t)0.2814941406250f,
+(float16_t)0.9584960937500f,(float16_t)0.2858886718750f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9555664062500f,(float16_t)0.2946777343750f,
+(float16_t)0.9541015625000f,(float16_t)0.2990722656250f,
+(float16_t)0.9526367187500f,(float16_t)0.3034667968750f,
+(float16_t)0.9516601562500f,(float16_t)0.3078613281250f,
+(float16_t)0.9501953125000f,(float16_t)0.3122558593750f,
+(float16_t)0.9487304687500f,(float16_t)0.3166503906250f,
+(float16_t)0.9472656250000f,(float16_t)0.3210449218750f,
+(float16_t)0.9458007812500f,(float16_t)0.3251953125000f,
+(float16_t)0.9443359375000f,(float16_t)0.3295898437500f,
+(float16_t)0.9423828125000f,(float16_t)0.3339843750000f,
+(float16_t)0.9409179687500f,(float16_t)0.3383789062500f,
+(float16_t)0.9394531250000f,(float16_t)0.3427734375000f,
+(float16_t)0.9379882812500f,(float16_t)0.3469238281250f,
+(float16_t)0.9360351562500f,(float16_t)0.3513183593750f,
+(float16_t)0.9345703125000f,(float16_t)0.3557128906250f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9311523437500f,(float16_t)0.3642578125000f,
+(float16_t)0.9296875000000f,(float16_t)0.3684082031250f,
+(float16_t)0.9277343750000f,(float16_t)0.3728027343750f,
+(float16_t)0.9262695312500f,(float16_t)0.3769531250000f,
+(float16_t)0.9243164062500f,(float16_t)0.3813476562500f,
+(float16_t)0.9228515625000f,(float16_t)0.3854980468750f,
+(float16_t)0.9208984375000f,(float16_t)0.3896484375000f,
+(float16_t)0.9189453125000f,(float16_t)0.3940429687500f,
+(float16_t)0.9174804687500f,(float16_t)0.3981933593750f,
+(float16_t)0.9155273437500f,(float16_t)0.4023437500000f,
+(float16_t)0.9135742187500f,(float16_t)0.4067382812500f,
+(float16_t)0.9116210937500f,(float16_t)0.4108886718750f,
+(float16_t)0.9096679687500f,(float16_t)0.4150390625000f,
+(float16_t)0.9077148437500f,(float16_t)0.4191894531250f,
+(float16_t)0.9057617187500f,(float16_t)0.4233398437500f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.9018554687500f,(float16_t)0.4316406250000f,
+(float16_t)0.8999023437500f,(float16_t)0.4357910156250f,
+(float16_t)0.8979492187500f,(float16_t)0.4399414062500f,
+(float16_t)0.8959960937500f,(float16_t)0.4440917968750f,
+(float16_t)0.8940429687500f,(float16_t)0.4482421875000f,
+(float16_t)0.8916015625000f,(float16_t)0.4523925781250f,
+(float16_t)0.8896484375000f,(float16_t)0.4565429687500f,
+(float16_t)0.8876953125000f,(float16_t)0.4604492187500f,
+(float16_t)0.8857421875000f,(float16_t)0.4645996093750f,
+(float16_t)0.8833007812500f,(float16_t)0.4687500000000f,
+(float16_t)0.8813476562500f,(float16_t)0.4726562500000f,
+(float16_t)0.8789062500000f,(float16_t)0.4768066406250f,
+(float16_t)0.8769531250000f,(float16_t)0.4809570312500f,
+(float16_t)0.8745117187500f,(float16_t)0.4848632812500f,
+(float16_t)0.8725585937500f,(float16_t)0.4887695312500f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8676757812500f,(float16_t)0.4968261718750f,
+(float16_t)0.8657226562500f,(float16_t)0.5009765625000f,
+(float16_t)0.8632812500000f,(float16_t)0.5048828125000f,
+(float16_t)0.8608398437500f,(float16_t)0.5087890625000f,
+(float16_t)0.8583984375000f,(float16_t)0.5126953125000f,
+(float16_t)0.8559570312500f,(float16_t)0.5166015625000f,
+(float16_t)0.8540039062500f,(float16_t)0.5205078125000f,
+(float16_t)0.8515625000000f,(float16_t)0.5244140625000f,
+(float16_t)0.8491210937500f,(float16_t)0.5283203125000f,
+(float16_t)0.8466796875000f,(float16_t)0.5322265625000f,
+(float16_t)0.8442382812500f,(float16_t)0.5361328125000f,
+(float16_t)0.8417968750000f,(float16_t)0.5400390625000f,
+(float16_t)0.8388671875000f,(float16_t)0.5439453125000f,
+(float16_t)0.8364257812500f,(float16_t)0.5478515625000f,
+(float16_t)0.8339843750000f,(float16_t)0.5517578125000f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8291015625000f,(float16_t)0.5595703125000f,
+(float16_t)0.8261718750000f,(float16_t)0.5629882812500f,
+(float16_t)0.8237304687500f,(float16_t)0.5668945312500f,
+(float16_t)0.8212890625000f,(float16_t)0.5708007812500f,
+(float16_t)0.8183593750000f,(float16_t)0.5747070312500f,
+(float16_t)0.8159179687500f,(float16_t)0.5781250000000f,
+(float16_t)0.8129882812500f,(float16_t)0.5820312500000f,
+(float16_t)0.8105468750000f,(float16_t)0.5859375000000f,
+(float16_t)0.8076171875000f,(float16_t)0.5893554687500f,
+(float16_t)0.8051757812500f,(float16_t)0.5932617187500f,
+(float16_t)0.8022460937500f,(float16_t)0.5971679687500f,
+(float16_t)0.7993164062500f,(float16_t)0.6005859375000f,
+(float16_t)0.7968750000000f,(float16_t)0.6044921875000f,
+(float16_t)0.7939453125000f,(float16_t)0.6079101562500f,
+(float16_t)0.7910156250000f,(float16_t)0.6118164062500f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7856445312500f,(float16_t)0.6186523437500f,
+(float16_t)0.7827148437500f,(float16_t)0.6225585937500f,
+(float16_t)0.7797851562500f,(float16_t)0.6259765625000f,
+(float16_t)0.7768554687500f,(float16_t)0.6293945312500f,
+(float16_t)0.7739257812500f,(float16_t)0.6333007812500f,
+(float16_t)0.7709960937500f,(float16_t)0.6367187500000f,
+(float16_t)0.7680664062500f,(float16_t)0.6401367187500f,
+(float16_t)0.7651367187500f,(float16_t)0.6440429687500f,
+(float16_t)0.7622070312500f,(float16_t)0.6474609375000f,
+(float16_t)0.7592773437500f,(float16_t)0.6508789062500f,
+(float16_t)0.7563476562500f,(float16_t)0.6542968750000f,
+(float16_t)0.7534179687500f,(float16_t)0.6577148437500f,
+(float16_t)0.7500000000000f,(float16_t)0.6611328125000f,
+(float16_t)0.7470703125000f,(float16_t)0.6645507812500f,
+(float16_t)0.7441406250000f,(float16_t)0.6679687500000f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7377929687500f,(float16_t)0.6748046875000f,
+(float16_t)0.7348632812500f,(float16_t)0.6782226562500f,
+(float16_t)0.7314453125000f,(float16_t)0.6816406250000f,
+(float16_t)0.7285156250000f,(float16_t)0.6850585937500f,
+(float16_t)0.7250976562500f,(float16_t)0.6884765625000f,
+(float16_t)0.7221679687500f,(float16_t)0.6918945312500f,
+(float16_t)0.7187500000000f,(float16_t)0.6953125000000f,
+(float16_t)0.7158203125000f,(float16_t)0.6982421875000f,
+(float16_t)0.7124023437500f,(float16_t)0.7016601562500f,
+(float16_t)0.7094726562500f,(float16_t)0.7050781250000f,
+(float16_t)0.7060546875000f,(float16_t)0.7080078125000f,
+(float16_t)0.7026367187500f,(float16_t)0.7114257812500f,
+(float16_t)0.6997070312500f,(float16_t)0.7148437500000f,
+(float16_t)0.6962890625000f,(float16_t)0.7177734375000f,
+(float16_t)0.6928710937500f,(float16_t)0.7211914062500f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6860351562500f,(float16_t)0.7275390625000f,
+(float16_t)0.6826171875000f,(float16_t)0.7304687500000f,
+(float16_t)0.6796875000000f,(float16_t)0.7338867187500f,
+(float16_t)0.6762695312500f,(float16_t)0.7368164062500f,
+(float16_t)0.6728515625000f,(float16_t)0.7397460937500f,
+(float16_t)0.6694335937500f,(float16_t)0.7431640625000f,
+(float16_t)0.6660156250000f,(float16_t)0.7460937500000f,
+(float16_t)0.6625976562500f,(float16_t)0.7490234375000f,
+(float16_t)0.6591796875000f,(float16_t)0.7519531250000f,
+(float16_t)0.6552734375000f,(float16_t)0.7553710937500f,
+(float16_t)0.6518554687500f,(float16_t)0.7583007812500f,
+(float16_t)0.6484375000000f,(float16_t)0.7612304687500f,
+(float16_t)0.6450195312500f,(float16_t)0.7641601562500f,
+(float16_t)0.6416015625000f,(float16_t)0.7670898437500f,
+(float16_t)0.6381835937500f,(float16_t)0.7700195312500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.6308593750000f,(float16_t)0.7758789062500f,
+(float16_t)0.6274414062500f,(float16_t)0.7788085937500f,
+(float16_t)0.6235351562500f,(float16_t)0.7817382812500f,
+(float16_t)0.6201171875000f,(float16_t)0.7846679687500f,
+(float16_t)0.6162109375000f,(float16_t)0.7875976562500f,
+(float16_t)0.6127929687500f,(float16_t)0.7900390625000f,
+(float16_t)0.6093750000000f,(float16_t)0.7929687500000f,
+(float16_t)0.6054687500000f,(float16_t)0.7958984375000f,
+(float16_t)0.6020507812500f,(float16_t)0.7988281250000f,
+(float16_t)0.5981445312500f,(float16_t)0.8012695312500f,
+(float16_t)0.5942382812500f,(float16_t)0.8041992187500f,
+(float16_t)0.5908203125000f,(float16_t)0.8066406250000f,
+(float16_t)0.5869140625000f,(float16_t)0.8095703125000f,
+(float16_t)0.5834960937500f,(float16_t)0.8120117187500f,
+(float16_t)0.5795898437500f,(float16_t)0.8149414062500f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5722656250000f,(float16_t)0.8203125000000f,
+(float16_t)0.5683593750000f,(float16_t)0.8227539062500f,
+(float16_t)0.5644531250000f,(float16_t)0.8256835937500f,
+(float16_t)0.5605468750000f,(float16_t)0.8281250000000f,
+(float16_t)0.5566406250000f,(float16_t)0.8305664062500f,
+(float16_t)0.5532226562500f,(float16_t)0.8330078125000f,
+(float16_t)0.5493164062500f,(float16_t)0.8359375000000f,
+(float16_t)0.5454101562500f,(float16_t)0.8383789062500f,
+(float16_t)0.5415039062500f,(float16_t)0.8408203125000f,
+(float16_t)0.5375976562500f,(float16_t)0.8432617187500f,
+(float16_t)0.5336914062500f,(float16_t)0.8457031250000f,
+(float16_t)0.5297851562500f,(float16_t)0.8481445312500f,
+(float16_t)0.5258789062500f,(float16_t)0.8505859375000f,
+(float16_t)0.5219726562500f,(float16_t)0.8530273437500f,
+(float16_t)0.5180664062500f,(float16_t)0.8554687500000f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.5102539062500f,(float16_t)0.8598632812500f,
+(float16_t)0.5063476562500f,(float16_t)0.8623046875000f,
+(float16_t)0.5024414062500f,(float16_t)0.8647460937500f,
+(float16_t)0.4982910156250f,(float16_t)0.8671875000000f,
+(float16_t)0.4941406250000f,(float16_t)0.8691406250000f,
+(float16_t)0.4902343750000f,(float16_t)0.8715820312500f,
+(float16_t)0.4863281250000f,(float16_t)0.8740234375000f,
+(float16_t)0.4821777343750f,(float16_t)0.8759765625000f,
+(float16_t)0.4780273437500f,(float16_t)0.8784179687500f,
+(float16_t)0.4741210937500f,(float16_t)0.8803710937500f,
+(float16_t)0.4699707031250f,(float16_t)0.8828125000000f,
+(float16_t)0.4660644531250f,(float16_t)0.8847656250000f,
+(float16_t)0.4619140625000f,(float16_t)0.8867187500000f,
+(float16_t)0.4577636718750f,(float16_t)0.8891601562500f,
+(float16_t)0.4536132812500f,(float16_t)0.8911132812500f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.4455566406250f,(float16_t)0.8955078125000f,
+(float16_t)0.4414062500000f,(float16_t)0.8974609375000f,
+(float16_t)0.4372558593750f,(float16_t)0.8994140625000f,
+(float16_t)0.4331054687500f,(float16_t)0.9013671875000f,
+(float16_t)0.4289550781250f,(float16_t)0.9033203125000f,
+(float16_t)0.4248046875000f,(float16_t)0.9052734375000f,
+(float16_t)0.4206542968750f,(float16_t)0.9072265625000f,
+(float16_t)0.4165039062500f,(float16_t)0.9091796875000f,
+(float16_t)0.4123535156250f,(float16_t)0.9111328125000f,
+(float16_t)0.4079589843750f,(float16_t)0.9130859375000f,
+(float16_t)0.4038085937500f,(float16_t)0.9150390625000f,
+(float16_t)0.3996582031250f,(float16_t)0.9165039062500f,
+(float16_t)0.3955078125000f,(float16_t)0.9184570312500f,
+(float16_t)0.3911132812500f,(float16_t)0.9204101562500f,
+(float16_t)0.3869628906250f,(float16_t)0.9218750000000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3784179687500f,(float16_t)0.9257812500000f,
+(float16_t)0.3742675781250f,(float16_t)0.9272460937500f,
+(float16_t)0.3698730468750f,(float16_t)0.9291992187500f,
+(float16_t)0.3657226562500f,(float16_t)0.9306640625000f,
+(float16_t)0.3613281250000f,(float16_t)0.9326171875000f,
+(float16_t)0.3569335937500f,(float16_t)0.9340820312500f,
+(float16_t)0.3527832031250f,(float16_t)0.9355468750000f,
+(float16_t)0.3483886718750f,(float16_t)0.9375000000000f,
+(float16_t)0.3439941406250f,(float16_t)0.9389648437500f,
+(float16_t)0.3398437500000f,(float16_t)0.9404296875000f,
+(float16_t)0.3354492187500f,(float16_t)0.9418945312500f,
+(float16_t)0.3310546875000f,(float16_t)0.9433593750000f,
+(float16_t)0.3266601562500f,(float16_t)0.9453125000000f,
+(float16_t)0.3225097656250f,(float16_t)0.9467773437500f,
+(float16_t)0.3181152343750f,(float16_t)0.9482421875000f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.3093261718750f,(float16_t)0.9511718750000f,
+(float16_t)0.3049316406250f,(float16_t)0.9521484375000f,
+(float16_t)0.3005371093750f,(float16_t)0.9536132812500f,
+(float16_t)0.2961425781250f,(float16_t)0.9550781250000f,
+(float16_t)0.2917480468750f,(float16_t)0.9565429687500f,
+(float16_t)0.2873535156250f,(float16_t)0.9580078125000f,
+(float16_t)0.2829589843750f,(float16_t)0.9589843750000f,
+(float16_t)0.2785644531250f,(float16_t)0.9604492187500f,
+(float16_t)0.2741699218750f,(float16_t)0.9619140625000f,
+(float16_t)0.2697753906250f,(float16_t)0.9628906250000f,
+(float16_t)0.2651367187500f,(float16_t)0.9643554687500f,
+(float16_t)0.2607421875000f,(float16_t)0.9653320312500f,
+(float16_t)0.2563476562500f,(float16_t)0.9667968750000f,
+(float16_t)0.2519531250000f,(float16_t)0.9677734375000f,
+(float16_t)0.2474365234375f,(float16_t)0.9687500000000f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.2385253906250f,(float16_t)0.9711914062500f,
+(float16_t)0.2340087890625f,(float16_t)0.9721679687500f,
+(float16_t)0.2296142578125f,(float16_t)0.9731445312500f,
+(float16_t)0.2250976562500f,(float16_t)0.9741210937500f,
+(float16_t)0.2205810546875f,(float16_t)0.9755859375000f,
+(float16_t)0.2160644531250f,(float16_t)0.9765625000000f,
+(float16_t)0.2116699218750f,(float16_t)0.9775390625000f,
+(float16_t)0.2071533203125f,(float16_t)0.9785156250000f,
+(float16_t)0.2026367187500f,(float16_t)0.9794921875000f,
+(float16_t)0.1981201171875f,(float16_t)0.9799804687500f,
+(float16_t)0.1936035156250f,(float16_t)0.9809570312500f,
+(float16_t)0.1890869140625f,(float16_t)0.9819335937500f,
+(float16_t)0.1845703125000f,(float16_t)0.9829101562500f,
+(float16_t)0.1800537109375f,(float16_t)0.9838867187500f,
+(float16_t)0.1755371093750f,(float16_t)0.9843750000000f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.1663818359375f,(float16_t)0.9858398437500f,
+(float16_t)0.1618652343750f,(float16_t)0.9868164062500f,
+(float16_t)0.1573486328125f,(float16_t)0.9873046875000f,
+(float16_t)0.1528320312500f,(float16_t)0.9882812500000f,
+(float16_t)0.1481933593750f,(float16_t)0.9887695312500f,
+(float16_t)0.1436767578125f,(float16_t)0.9897460937500f,
+(float16_t)0.1391601562500f,(float16_t)0.9902343750000f,
+(float16_t)0.1345214843750f,(float16_t)0.9907226562500f,
+(float16_t)0.1300048828125f,(float16_t)0.9916992187500f,
+(float16_t)0.1254882812500f,(float16_t)0.9921875000000f,
+(float16_t)0.1209106445312f,(float16_t)0.9926757812500f,
+(float16_t)0.1163330078125f,(float16_t)0.9931640625000f,
+(float16_t)0.1117553710938f,(float16_t)0.9936523437500f,
+(float16_t)0.1071777343750f,(float16_t)0.9941406250000f,
+(float16_t)0.1026000976562f,(float16_t)0.9946289062500f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0934448242188f,(float16_t)0.9956054687500f,
+(float16_t)0.0888671875000f,(float16_t)0.9960937500000f,
+(float16_t)0.0842895507812f,(float16_t)0.9965820312500f,
+(float16_t)0.0797119140625f,(float16_t)0.9965820312500f,
+(float16_t)0.0750732421875f,(float16_t)0.9970703125000f,
+(float16_t)0.0704956054688f,(float16_t)0.9975585937500f,
+(float16_t)0.0659179687500f,(float16_t)0.9980468750000f,
+(float16_t)0.0613098144531f,(float16_t)0.9980468750000f,
+(float16_t)0.0567321777344f,(float16_t)0.9985351562500f,
+(float16_t)0.0521240234375f,(float16_t)0.9985351562500f,
+(float16_t)0.0475463867188f,(float16_t)0.9990234375000f,
+(float16_t)0.0429382324219f,(float16_t)0.9990234375000f,
+(float16_t)0.0383300781250f,(float16_t)0.9990234375000f,
+(float16_t)0.0337524414062f,(float16_t)0.9995117187500f,
+(float16_t)0.0291442871094f,(float16_t)0.9995117187500f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)0.0199432373047f,(float16_t)1.0000000000000f,
+(float16_t)0.0153427124023f,(float16_t)1.0000000000000f,
+(float16_t)0.0107345581055f,(float16_t)1.0000000000000f,
+(float16_t)0.0061340332031f,(float16_t)1.0000000000000f,
+(float16_t)0.0015335083008f,(float16_t)1.0000000000000f,
+(float16_t)-0.0030670166016f,(float16_t)1.0000000000000f,
+(float16_t)-0.0076713562012f,(float16_t)1.0000000000000f,
+(float16_t)-0.0122680664062f,(float16_t)1.0000000000000f,
+(float16_t)-0.0168762207031f,(float16_t)1.0000000000000f,
+(float16_t)-0.0214691162109f,(float16_t)1.0000000000000f,
+(float16_t)-0.0260772705078f,(float16_t)0.9995117187500f,
+(float16_t)-0.0306701660156f,(float16_t)0.9995117187500f,
+(float16_t)-0.0352783203125f,(float16_t)0.9995117187500f,
+(float16_t)-0.0398864746094f,(float16_t)0.9990234375000f,
+(float16_t)-0.0444641113281f,(float16_t)0.9990234375000f,
+(float16_t)-0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)-0.0536499023438f,(float16_t)0.9985351562500f,
+(float16_t)-0.0582580566406f,(float16_t)0.9985351562500f,
+(float16_t)-0.0628662109375f,(float16_t)0.9980468750000f,
+(float16_t)-0.0674438476562f,(float16_t)0.9975585937500f,
+(float16_t)-0.0720214843750f,(float16_t)0.9975585937500f,
+(float16_t)-0.0765991210938f,(float16_t)0.9970703125000f,
+(float16_t)-0.0812377929688f,(float16_t)0.9965820312500f,
+(float16_t)-0.0858154296875f,(float16_t)0.9960937500000f,
+(float16_t)-0.0903930664062f,(float16_t)0.9960937500000f,
+(float16_t)-0.0949707031250f,(float16_t)0.9956054687500f,
+(float16_t)-0.0995483398438f,(float16_t)0.9951171875000f,
+(float16_t)-0.1041259765625f,(float16_t)0.9946289062500f,
+(float16_t)-0.1087036132812f,(float16_t)0.9941406250000f,
+(float16_t)-0.1132812500000f,(float16_t)0.9936523437500f,
+(float16_t)-0.1178588867188f,(float16_t)0.9931640625000f,
+(float16_t)-0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)-0.1269531250000f,(float16_t)0.9916992187500f,
+(float16_t)-0.1315917968750f,(float16_t)0.9912109375000f,
+(float16_t)-0.1361083984375f,(float16_t)0.9907226562500f,
+(float16_t)-0.1406250000000f,(float16_t)0.9902343750000f,
+(float16_t)-0.1452636718750f,(float16_t)0.9892578125000f,
+(float16_t)-0.1497802734375f,(float16_t)0.9887695312500f,
+(float16_t)-0.1542968750000f,(float16_t)0.9877929687500f,
+(float16_t)-0.1588134765625f,(float16_t)0.9873046875000f,
+(float16_t)-0.1634521484375f,(float16_t)0.9863281250000f,
+(float16_t)-0.1679687500000f,(float16_t)0.9858398437500f,
+(float16_t)-0.1724853515625f,(float16_t)0.9848632812500f,
+(float16_t)-0.1770019531250f,(float16_t)0.9843750000000f,
+(float16_t)-0.1815185546875f,(float16_t)0.9833984375000f,
+(float16_t)-0.1860351562500f,(float16_t)0.9824218750000f,
+(float16_t)-0.1905517578125f,(float16_t)0.9814453125000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.1995849609375f,(float16_t)0.9799804687500f,
+(float16_t)-0.2041015625000f,(float16_t)0.9790039062500f,
+(float16_t)-0.2086181640625f,(float16_t)0.9780273437500f,
+(float16_t)-0.2131347656250f,(float16_t)0.9770507812500f,
+(float16_t)-0.2176513671875f,(float16_t)0.9760742187500f,
+(float16_t)-0.2220458984375f,(float16_t)0.9750976562500f,
+(float16_t)-0.2265625000000f,(float16_t)0.9741210937500f,
+(float16_t)-0.2310791015625f,(float16_t)0.9731445312500f,
+(float16_t)-0.2354736328125f,(float16_t)0.9716796875000f,
+(float16_t)-0.2399902343750f,(float16_t)0.9707031250000f,
+(float16_t)-0.2445068359375f,(float16_t)0.9697265625000f,
+(float16_t)-0.2489013671875f,(float16_t)0.9687500000000f,
+(float16_t)-0.2534179687500f,(float16_t)0.9672851562500f,
+(float16_t)-0.2578125000000f,(float16_t)0.9663085937500f,
+(float16_t)-0.2622070312500f,(float16_t)0.9648437500000f,
+(float16_t)-0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)-0.2712402343750f,(float16_t)0.9624023437500f,
+(float16_t)-0.2756347656250f,(float16_t)0.9614257812500f,
+(float16_t)-0.2800292968750f,(float16_t)0.9599609375000f,
+(float16_t)-0.2844238281250f,(float16_t)0.9584960937500f,
+(float16_t)-0.2888183593750f,(float16_t)0.9575195312500f,
+(float16_t)-0.2932128906250f,(float16_t)0.9560546875000f,
+(float16_t)-0.2976074218750f,(float16_t)0.9545898437500f,
+(float16_t)-0.3020019531250f,(float16_t)0.9531250000000f,
+(float16_t)-0.3063964843750f,(float16_t)0.9521484375000f,
+(float16_t)-0.3107910156250f,(float16_t)0.9506835937500f,
+(float16_t)-0.3151855468750f,(float16_t)0.9492187500000f,
+(float16_t)-0.3195800781250f,(float16_t)0.9477539062500f,
+(float16_t)-0.3239746093750f,(float16_t)0.9462890625000f,
+(float16_t)-0.3281250000000f,(float16_t)0.9448242187500f,
+(float16_t)-0.3325195312500f,(float16_t)0.9428710937500f,
+(float16_t)-0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)-0.3413085937500f,(float16_t)0.9399414062500f,
+(float16_t)-0.3454589843750f,(float16_t)0.9384765625000f,
+(float16_t)-0.3498535156250f,(float16_t)0.9370117187500f,
+(float16_t)-0.3542480468750f,(float16_t)0.9350585937500f,
+(float16_t)-0.3583984375000f,(float16_t)0.9335937500000f,
+(float16_t)-0.3627929687500f,(float16_t)0.9316406250000f,
+(float16_t)-0.3669433593750f,(float16_t)0.9301757812500f,
+(float16_t)-0.3713378906250f,(float16_t)0.9287109375000f,
+(float16_t)-0.3754882812500f,(float16_t)0.9267578125000f,
+(float16_t)-0.3798828125000f,(float16_t)0.9252929687500f,
+(float16_t)-0.3840332031250f,(float16_t)0.9233398437500f,
+(float16_t)-0.3884277343750f,(float16_t)0.9213867187500f,
+(float16_t)-0.3925781250000f,(float16_t)0.9199218750000f,
+(float16_t)-0.3967285156250f,(float16_t)0.9179687500000f,
+(float16_t)-0.4011230468750f,(float16_t)0.9160156250000f,
+(float16_t)-0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)-0.4094238281250f,(float16_t)0.9121093750000f,
+(float16_t)-0.4135742187500f,(float16_t)0.9106445312500f,
+(float16_t)-0.4177246093750f,(float16_t)0.9086914062500f,
+(float16_t)-0.4221191406250f,(float16_t)0.9067382812500f,
+(float16_t)-0.4262695312500f,(float16_t)0.9047851562500f,
+(float16_t)-0.4304199218750f,(float16_t)0.9028320312500f,
+(float16_t)-0.4345703125000f,(float16_t)0.9008789062500f,
+(float16_t)-0.4387207031250f,(float16_t)0.8984375000000f,
+(float16_t)-0.4426269531250f,(float16_t)0.8964843750000f,
+(float16_t)-0.4467773437500f,(float16_t)0.8945312500000f,
+(float16_t)-0.4509277343750f,(float16_t)0.8925781250000f,
+(float16_t)-0.4550781250000f,(float16_t)0.8906250000000f,
+(float16_t)-0.4592285156250f,(float16_t)0.8881835937500f,
+(float16_t)-0.4633789062500f,(float16_t)0.8862304687500f,
+(float16_t)-0.4672851562500f,(float16_t)0.8842773437500f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.4753417968750f,(float16_t)0.8798828125000f,
+(float16_t)-0.4794921875000f,(float16_t)0.8774414062500f,
+(float16_t)-0.4836425781250f,(float16_t)0.8754882812500f,
+(float16_t)-0.4875488281250f,(float16_t)0.8730468750000f,
+(float16_t)-0.4914550781250f,(float16_t)0.8706054687500f,
+(float16_t)-0.4956054687500f,(float16_t)0.8686523437500f,
+(float16_t)-0.4995117187500f,(float16_t)0.8662109375000f,
+(float16_t)-0.5034179687500f,(float16_t)0.8637695312500f,
+(float16_t)-0.5073242187500f,(float16_t)0.8618164062500f,
+(float16_t)-0.5112304687500f,(float16_t)0.8593750000000f,
+(float16_t)-0.5156250000000f,(float16_t)0.8569335937500f,
+(float16_t)-0.5195312500000f,(float16_t)0.8544921875000f,
+(float16_t)-0.5234375000000f,(float16_t)0.8520507812500f,
+(float16_t)-0.5273437500000f,(float16_t)0.8496093750000f,
+(float16_t)-0.5312500000000f,(float16_t)0.8471679687500f,
+(float16_t)-0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)-0.5390625000000f,(float16_t)0.8422851562500f,
+(float16_t)-0.5429687500000f,(float16_t)0.8398437500000f,
+(float16_t)-0.5463867187500f,(float16_t)0.8374023437500f,
+(float16_t)-0.5502929687500f,(float16_t)0.8349609375000f,
+(float16_t)-0.5541992187500f,(float16_t)0.8325195312500f,
+(float16_t)-0.5581054687500f,(float16_t)0.8295898437500f,
+(float16_t)-0.5620117187500f,(float16_t)0.8271484375000f,
+(float16_t)-0.5659179687500f,(float16_t)0.8247070312500f,
+(float16_t)-0.5693359375000f,(float16_t)0.8217773437500f,
+(float16_t)-0.5732421875000f,(float16_t)0.8193359375000f,
+(float16_t)-0.5771484375000f,(float16_t)0.8168945312500f,
+(float16_t)-0.5810546875000f,(float16_t)0.8139648437500f,
+(float16_t)-0.5844726562500f,(float16_t)0.8115234375000f,
+(float16_t)-0.5883789062500f,(float16_t)0.8085937500000f,
+(float16_t)-0.5917968750000f,(float16_t)0.8061523437500f,
+(float16_t)-0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)-0.5996093750000f,(float16_t)0.8002929687500f,
+(float16_t)-0.6030273437500f,(float16_t)0.7978515625000f,
+(float16_t)-0.6069335937500f,(float16_t)0.7949218750000f,
+(float16_t)-0.6103515625000f,(float16_t)0.7919921875000f,
+(float16_t)-0.6142578125000f,(float16_t)0.7890625000000f,
+(float16_t)-0.6176757812500f,(float16_t)0.7866210937500f,
+(float16_t)-0.6210937500000f,(float16_t)0.7836914062500f,
+(float16_t)-0.6250000000000f,(float16_t)0.7807617187500f,
+(float16_t)-0.6284179687500f,(float16_t)0.7778320312500f,
+(float16_t)-0.6318359375000f,(float16_t)0.7749023437500f,
+(float16_t)-0.6357421875000f,(float16_t)0.7719726562500f,
+(float16_t)-0.6391601562500f,(float16_t)0.7690429687500f,
+(float16_t)-0.6425781250000f,(float16_t)0.7661132812500f,
+(float16_t)-0.6459960937500f,(float16_t)0.7631835937500f,
+(float16_t)-0.6499023437500f,(float16_t)0.7602539062500f,
+(float16_t)-0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)-0.6567382812500f,(float16_t)0.7543945312500f,
+(float16_t)-0.6601562500000f,(float16_t)0.7509765625000f,
+(float16_t)-0.6635742187500f,(float16_t)0.7480468750000f,
+(float16_t)-0.6669921875000f,(float16_t)0.7451171875000f,
+(float16_t)-0.6704101562500f,(float16_t)0.7421875000000f,
+(float16_t)-0.6738281250000f,(float16_t)0.7387695312500f,
+(float16_t)-0.6772460937500f,(float16_t)0.7358398437500f,
+(float16_t)-0.6806640625000f,(float16_t)0.7324218750000f,
+(float16_t)-0.6840820312500f,(float16_t)0.7294921875000f,
+(float16_t)-0.6875000000000f,(float16_t)0.7265625000000f,
+(float16_t)-0.6904296875000f,(float16_t)0.7231445312500f,
+(float16_t)-0.6938476562500f,(float16_t)0.7202148437500f,
+(float16_t)-0.6972656250000f,(float16_t)0.7167968750000f,
+(float16_t)-0.7006835937500f,(float16_t)0.7133789062500f,
+(float16_t)-0.7036132812500f,(float16_t)0.7104492187500f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.7104492187500f,(float16_t)0.7036132812500f,
+(float16_t)-0.7133789062500f,(float16_t)0.7006835937500f,
+(float16_t)-0.7167968750000f,(float16_t)0.6972656250000f,
+(float16_t)-0.7202148437500f,(float16_t)0.6938476562500f,
+(float16_t)-0.7231445312500f,(float16_t)0.6904296875000f,
+(float16_t)-0.7265625000000f,(float16_t)0.6875000000000f,
+(float16_t)-0.7294921875000f,(float16_t)0.6840820312500f,
+(float16_t)-0.7324218750000f,(float16_t)0.6806640625000f,
+(float16_t)-0.7358398437500f,(float16_t)0.6772460937500f,
+(float16_t)-0.7387695312500f,(float16_t)0.6738281250000f,
+(float16_t)-0.7421875000000f,(float16_t)0.6704101562500f,
+(float16_t)-0.7451171875000f,(float16_t)0.6669921875000f,
+(float16_t)-0.7480468750000f,(float16_t)0.6635742187500f,
+(float16_t)-0.7509765625000f,(float16_t)0.6601562500000f,
+(float16_t)-0.7543945312500f,(float16_t)0.6567382812500f,
+(float16_t)-0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)-0.7602539062500f,(float16_t)0.6499023437500f,
+(float16_t)-0.7631835937500f,(float16_t)0.6459960937500f,
+(float16_t)-0.7661132812500f,(float16_t)0.6425781250000f,
+(float16_t)-0.7690429687500f,(float16_t)0.6391601562500f,
+(float16_t)-0.7719726562500f,(float16_t)0.6357421875000f,
+(float16_t)-0.7749023437500f,(float16_t)0.6318359375000f,
+(float16_t)-0.7778320312500f,(float16_t)0.6284179687500f,
+(float16_t)-0.7807617187500f,(float16_t)0.6250000000000f,
+(float16_t)-0.7836914062500f,(float16_t)0.6210937500000f,
+(float16_t)-0.7866210937500f,(float16_t)0.6176757812500f,
+(float16_t)-0.7890625000000f,(float16_t)0.6142578125000f,
+(float16_t)-0.7919921875000f,(float16_t)0.6103515625000f,
+(float16_t)-0.7949218750000f,(float16_t)0.6069335937500f,
+(float16_t)-0.7978515625000f,(float16_t)0.6030273437500f,
+(float16_t)-0.8002929687500f,(float16_t)0.5996093750000f,
+(float16_t)-0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)-0.8061523437500f,(float16_t)0.5917968750000f,
+(float16_t)-0.8085937500000f,(float16_t)0.5883789062500f,
+(float16_t)-0.8115234375000f,(float16_t)0.5844726562500f,
+(float16_t)-0.8139648437500f,(float16_t)0.5810546875000f,
+(float16_t)-0.8168945312500f,(float16_t)0.5771484375000f,
+(float16_t)-0.8193359375000f,(float16_t)0.5732421875000f,
+(float16_t)-0.8217773437500f,(float16_t)0.5693359375000f,
+(float16_t)-0.8247070312500f,(float16_t)0.5659179687500f,
+(float16_t)-0.8271484375000f,(float16_t)0.5620117187500f,
+(float16_t)-0.8295898437500f,(float16_t)0.5581054687500f,
+(float16_t)-0.8325195312500f,(float16_t)0.5541992187500f,
+(float16_t)-0.8349609375000f,(float16_t)0.5502929687500f,
+(float16_t)-0.8374023437500f,(float16_t)0.5463867187500f,
+(float16_t)-0.8398437500000f,(float16_t)0.5429687500000f,
+(float16_t)-0.8422851562500f,(float16_t)0.5390625000000f,
+(float16_t)-0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)-0.8471679687500f,(float16_t)0.5312500000000f,
+(float16_t)-0.8496093750000f,(float16_t)0.5273437500000f,
+(float16_t)-0.8520507812500f,(float16_t)0.5234375000000f,
+(float16_t)-0.8544921875000f,(float16_t)0.5195312500000f,
+(float16_t)-0.8569335937500f,(float16_t)0.5156250000000f,
+(float16_t)-0.8593750000000f,(float16_t)0.5112304687500f,
+(float16_t)-0.8618164062500f,(float16_t)0.5073242187500f,
+(float16_t)-0.8637695312500f,(float16_t)0.5034179687500f,
+(float16_t)-0.8662109375000f,(float16_t)0.4995117187500f,
+(float16_t)-0.8686523437500f,(float16_t)0.4956054687500f,
+(float16_t)-0.8706054687500f,(float16_t)0.4914550781250f,
+(float16_t)-0.8730468750000f,(float16_t)0.4875488281250f,
+(float16_t)-0.8754882812500f,(float16_t)0.4836425781250f,
+(float16_t)-0.8774414062500f,(float16_t)0.4794921875000f,
+(float16_t)-0.8798828125000f,(float16_t)0.4753417968750f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.8842773437500f,(float16_t)0.4672851562500f,
+(float16_t)-0.8862304687500f,(float16_t)0.4633789062500f,
+(float16_t)-0.8881835937500f,(float16_t)0.4592285156250f,
+(float16_t)-0.8906250000000f,(float16_t)0.4550781250000f,
+(float16_t)-0.8925781250000f,(float16_t)0.4509277343750f,
+(float16_t)-0.8945312500000f,(float16_t)0.4467773437500f,
+(float16_t)-0.8964843750000f,(float16_t)0.4426269531250f,
+(float16_t)-0.8984375000000f,(float16_t)0.4387207031250f,
+(float16_t)-0.9008789062500f,(float16_t)0.4345703125000f,
+(float16_t)-0.9028320312500f,(float16_t)0.4304199218750f,
+(float16_t)-0.9047851562500f,(float16_t)0.4262695312500f,
+(float16_t)-0.9067382812500f,(float16_t)0.4221191406250f,
+(float16_t)-0.9086914062500f,(float16_t)0.4177246093750f,
+(float16_t)-0.9106445312500f,(float16_t)0.4135742187500f,
+(float16_t)-0.9121093750000f,(float16_t)0.4094238281250f,
+(float16_t)-0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)-0.9160156250000f,(float16_t)0.4011230468750f,
+(float16_t)-0.9179687500000f,(float16_t)0.3967285156250f,
+(float16_t)-0.9199218750000f,(float16_t)0.3925781250000f,
+(float16_t)-0.9213867187500f,(float16_t)0.3884277343750f,
+(float16_t)-0.9233398437500f,(float16_t)0.3840332031250f,
+(float16_t)-0.9252929687500f,(float16_t)0.3798828125000f,
+(float16_t)-0.9267578125000f,(float16_t)0.3754882812500f,
+(float16_t)-0.9287109375000f,(float16_t)0.3713378906250f,
+(float16_t)-0.9301757812500f,(float16_t)0.3669433593750f,
+(float16_t)-0.9316406250000f,(float16_t)0.3627929687500f,
+(float16_t)-0.9335937500000f,(float16_t)0.3583984375000f,
+(float16_t)-0.9350585937500f,(float16_t)0.3542480468750f,
+(float16_t)-0.9370117187500f,(float16_t)0.3498535156250f,
+(float16_t)-0.9384765625000f,(float16_t)0.3454589843750f,
+(float16_t)-0.9399414062500f,(float16_t)0.3413085937500f,
+(float16_t)-0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)-0.9428710937500f,(float16_t)0.3325195312500f,
+(float16_t)-0.9448242187500f,(float16_t)0.3281250000000f,
+(float16_t)-0.9462890625000f,(float16_t)0.3239746093750f,
+(float16_t)-0.9477539062500f,(float16_t)0.3195800781250f,
+(float16_t)-0.9492187500000f,(float16_t)0.3151855468750f,
+(float16_t)-0.9506835937500f,(float16_t)0.3107910156250f,
+(float16_t)-0.9521484375000f,(float16_t)0.3063964843750f,
+(float16_t)-0.9531250000000f,(float16_t)0.3020019531250f,
+(float16_t)-0.9545898437500f,(float16_t)0.2976074218750f,
+(float16_t)-0.9560546875000f,(float16_t)0.2932128906250f,
+(float16_t)-0.9575195312500f,(float16_t)0.2888183593750f,
+(float16_t)-0.9584960937500f,(float16_t)0.2844238281250f,
+(float16_t)-0.9599609375000f,(float16_t)0.2800292968750f,
+(float16_t)-0.9614257812500f,(float16_t)0.2756347656250f,
+(float16_t)-0.9624023437500f,(float16_t)0.2712402343750f,
+(float16_t)-0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)-0.9648437500000f,(float16_t)0.2622070312500f,
+(float16_t)-0.9663085937500f,(float16_t)0.2578125000000f,
+(float16_t)-0.9672851562500f,(float16_t)0.2534179687500f,
+(float16_t)-0.9687500000000f,(float16_t)0.2489013671875f,
+(float16_t)-0.9697265625000f,(float16_t)0.2445068359375f,
+(float16_t)-0.9707031250000f,(float16_t)0.2399902343750f,
+(float16_t)-0.9716796875000f,(float16_t)0.2354736328125f,
+(float16_t)-0.9731445312500f,(float16_t)0.2310791015625f,
+(float16_t)-0.9741210937500f,(float16_t)0.2265625000000f,
+(float16_t)-0.9750976562500f,(float16_t)0.2220458984375f,
+(float16_t)-0.9760742187500f,(float16_t)0.2176513671875f,
+(float16_t)-0.9770507812500f,(float16_t)0.2131347656250f,
+(float16_t)-0.9780273437500f,(float16_t)0.2086181640625f,
+(float16_t)-0.9790039062500f,(float16_t)0.2041015625000f,
+(float16_t)-0.9799804687500f,(float16_t)0.1995849609375f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9814453125000f,(float16_t)0.1905517578125f,
+(float16_t)-0.9824218750000f,(float16_t)0.1860351562500f,
+(float16_t)-0.9833984375000f,(float16_t)0.1815185546875f,
+(float16_t)-0.9843750000000f,(float16_t)0.1770019531250f,
+(float16_t)-0.9848632812500f,(float16_t)0.1724853515625f,
+(float16_t)-0.9858398437500f,(float16_t)0.1679687500000f,
+(float16_t)-0.9863281250000f,(float16_t)0.1634521484375f,
+(float16_t)-0.9873046875000f,(float16_t)0.1588134765625f,
+(float16_t)-0.9877929687500f,(float16_t)0.1542968750000f,
+(float16_t)-0.9887695312500f,(float16_t)0.1497802734375f,
+(float16_t)-0.9892578125000f,(float16_t)0.1452636718750f,
+(float16_t)-0.9902343750000f,(float16_t)0.1406250000000f,
+(float16_t)-0.9907226562500f,(float16_t)0.1361083984375f,
+(float16_t)-0.9912109375000f,(float16_t)0.1315917968750f,
+(float16_t)-0.9916992187500f,(float16_t)0.1269531250000f,
+(float16_t)-0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)-0.9931640625000f,(float16_t)0.1178588867188f,
+(float16_t)-0.9936523437500f,(float16_t)0.1132812500000f,
+(float16_t)-0.9941406250000f,(float16_t)0.1087036132812f,
+(float16_t)-0.9946289062500f,(float16_t)0.1041259765625f,
+(float16_t)-0.9951171875000f,(float16_t)0.0995483398438f,
+(float16_t)-0.9956054687500f,(float16_t)0.0949707031250f,
+(float16_t)-0.9960937500000f,(float16_t)0.0903930664062f,
+(float16_t)-0.9960937500000f,(float16_t)0.0858154296875f,
+(float16_t)-0.9965820312500f,(float16_t)0.0812377929688f,
+(float16_t)-0.9970703125000f,(float16_t)0.0765991210938f,
+(float16_t)-0.9975585937500f,(float16_t)0.0720214843750f,
+(float16_t)-0.9975585937500f,(float16_t)0.0674438476562f,
+(float16_t)-0.9980468750000f,(float16_t)0.0628662109375f,
+(float16_t)-0.9985351562500f,(float16_t)0.0582580566406f,
+(float16_t)-0.9985351562500f,(float16_t)0.0536499023438f,
+(float16_t)-0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)-0.9990234375000f,(float16_t)0.0444641113281f,
+(float16_t)-0.9990234375000f,(float16_t)0.0398864746094f,
+(float16_t)-0.9995117187500f,(float16_t)0.0352783203125f,
+(float16_t)-0.9995117187500f,(float16_t)0.0306701660156f,
+(float16_t)-0.9995117187500f,(float16_t)0.0260772705078f,
+(float16_t)-1.0000000000000f,(float16_t)0.0214691162109f,
+(float16_t)-1.0000000000000f,(float16_t)0.0168762207031f,
+(float16_t)-1.0000000000000f,(float16_t)0.0122680664062f,
+(float16_t)-1.0000000000000f,(float16_t)0.0076713562012f,
+(float16_t)-1.0000000000000f,(float16_t)0.0030670166016f,
+(float16_t)-1.0000000000000f,(float16_t)-0.0015335083008f,
+(float16_t)-1.0000000000000f,(float16_t)-0.0061340332031f,
+(float16_t)-1.0000000000000f,(float16_t)-0.0107345581055f,
+(float16_t)-1.0000000000000f,(float16_t)-0.0153427124023f,
+(float16_t)-1.0000000000000f,(float16_t)-0.0199432373047f,
+(float16_t)-0.9995117187500f,(float16_t)-0.0245361328125f,
+(float16_t)-0.9995117187500f,(float16_t)-0.0291442871094f,
+(float16_t)-0.9995117187500f,(float16_t)-0.0337524414062f,
+(float16_t)-0.9990234375000f,(float16_t)-0.0383300781250f,
+(float16_t)-0.9990234375000f,(float16_t)-0.0429382324219f,
+(float16_t)-0.9990234375000f,(float16_t)-0.0475463867188f,
+(float16_t)-0.9985351562500f,(float16_t)-0.0521240234375f,
+(float16_t)-0.9985351562500f,(float16_t)-0.0567321777344f,
+(float16_t)-0.9980468750000f,(float16_t)-0.0613098144531f,
+(float16_t)-0.9980468750000f,(float16_t)-0.0659179687500f,
+(float16_t)-0.9975585937500f,(float16_t)-0.0704956054688f,
+(float16_t)-0.9970703125000f,(float16_t)-0.0750732421875f,
+(float16_t)-0.9965820312500f,(float16_t)-0.0797119140625f,
+(float16_t)-0.9965820312500f,(float16_t)-0.0842895507812f,
+(float16_t)-0.9960937500000f,(float16_t)-0.0888671875000f,
+(float16_t)-0.9956054687500f,(float16_t)-0.0934448242188f,
+(float16_t)-0.9951171875000f,(float16_t)-0.0980224609375f,
+(float16_t)-0.9946289062500f,(float16_t)-0.1026000976562f,
+(float16_t)-0.9941406250000f,(float16_t)-0.1071777343750f,
+(float16_t)-0.9936523437500f,(float16_t)-0.1117553710938f,
+(float16_t)-0.9931640625000f,(float16_t)-0.1163330078125f,
+(float16_t)-0.9926757812500f,(float16_t)-0.1209106445312f,
+(float16_t)-0.9921875000000f,(float16_t)-0.1254882812500f,
+(float16_t)-0.9916992187500f,(float16_t)-0.1300048828125f,
+(float16_t)-0.9907226562500f,(float16_t)-0.1345214843750f,
+(float16_t)-0.9902343750000f,(float16_t)-0.1391601562500f,
+(float16_t)-0.9897460937500f,(float16_t)-0.1436767578125f,
+(float16_t)-0.9887695312500f,(float16_t)-0.1481933593750f,
+(float16_t)-0.9882812500000f,(float16_t)-0.1528320312500f,
+(float16_t)-0.9873046875000f,(float16_t)-0.1573486328125f,
+(float16_t)-0.9868164062500f,(float16_t)-0.1618652343750f,
+(float16_t)-0.9858398437500f,(float16_t)-0.1663818359375f,
+(float16_t)-0.9853515625000f,(float16_t)-0.1710205078125f,
+(float16_t)-0.9843750000000f,(float16_t)-0.1755371093750f,
+(float16_t)-0.9838867187500f,(float16_t)-0.1800537109375f,
+(float16_t)-0.9829101562500f,(float16_t)-0.1845703125000f,
+(float16_t)-0.9819335937500f,(float16_t)-0.1890869140625f,
+(float16_t)-0.9809570312500f,(float16_t)-0.1936035156250f,
+(float16_t)-0.9799804687500f,(float16_t)-0.1981201171875f,
+(float16_t)-0.9794921875000f,(float16_t)-0.2026367187500f,
+(float16_t)-0.9785156250000f,(float16_t)-0.2071533203125f,
+(float16_t)-0.9775390625000f,(float16_t)-0.2116699218750f,
+(float16_t)-0.9765625000000f,(float16_t)-0.2160644531250f,
+(float16_t)-0.9755859375000f,(float16_t)-0.2205810546875f,
+(float16_t)-0.9741210937500f,(float16_t)-0.2250976562500f,
+(float16_t)-0.9731445312500f,(float16_t)-0.2296142578125f,
+(float16_t)-0.9721679687500f,(float16_t)-0.2340087890625f,
+(float16_t)-0.9711914062500f,(float16_t)-0.2385253906250f,
+(float16_t)-0.9702148437500f,(float16_t)-0.2429199218750f,
+(float16_t)-0.9687500000000f,(float16_t)-0.2474365234375f,
+(float16_t)-0.9677734375000f,(float16_t)-0.2519531250000f,
+(float16_t)-0.9667968750000f,(float16_t)-0.2563476562500f,
+(float16_t)-0.9653320312500f,(float16_t)-0.2607421875000f,
+(float16_t)-0.9643554687500f,(float16_t)-0.2651367187500f,
+(float16_t)-0.9628906250000f,(float16_t)-0.2697753906250f,
+(float16_t)-0.9619140625000f,(float16_t)-0.2741699218750f,
+(float16_t)-0.9604492187500f,(float16_t)-0.2785644531250f,
+(float16_t)-0.9589843750000f,(float16_t)-0.2829589843750f,
+(float16_t)-0.9580078125000f,(float16_t)-0.2873535156250f,
+(float16_t)-0.9565429687500f,(float16_t)-0.2917480468750f,
+(float16_t)-0.9550781250000f,(float16_t)-0.2961425781250f,
+(float16_t)-0.9536132812500f,(float16_t)-0.3005371093750f,
+(float16_t)-0.9521484375000f,(float16_t)-0.3049316406250f,
+(float16_t)-0.9511718750000f,(float16_t)-0.3093261718750f,
+(float16_t)-0.9497070312500f,(float16_t)-0.3137207031250f,
+(float16_t)-0.9482421875000f,(float16_t)-0.3181152343750f,
+(float16_t)-0.9467773437500f,(float16_t)-0.3225097656250f,
+(float16_t)-0.9453125000000f,(float16_t)-0.3266601562500f,
+(float16_t)-0.9433593750000f,(float16_t)-0.3310546875000f,
+(float16_t)-0.9418945312500f,(float16_t)-0.3354492187500f,
+(float16_t)-0.9404296875000f,(float16_t)-0.3398437500000f,
+(float16_t)-0.9389648437500f,(float16_t)-0.3439941406250f,
+(float16_t)-0.9375000000000f,(float16_t)-0.3483886718750f,
+(float16_t)-0.9355468750000f,(float16_t)-0.3527832031250f,
+(float16_t)-0.9340820312500f,(float16_t)-0.3569335937500f,
+(float16_t)-0.9326171875000f,(float16_t)-0.3613281250000f,
+(float16_t)-0.9306640625000f,(float16_t)-0.3657226562500f,
+(float16_t)-0.9291992187500f,(float16_t)-0.3698730468750f,
+(float16_t)-0.9272460937500f,(float16_t)-0.3742675781250f,
+(float16_t)-0.9257812500000f,(float16_t)-0.3784179687500f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,
+(float16_t)-0.9218750000000f,(float16_t)-0.3869628906250f,
+(float16_t)-0.9204101562500f,(float16_t)-0.3911132812500f,
+(float16_t)-0.9184570312500f,(float16_t)-0.3955078125000f,
+(float16_t)-0.9165039062500f,(float16_t)-0.3996582031250f,
+(float16_t)-0.9150390625000f,(float16_t)-0.4038085937500f,
+(float16_t)-0.9130859375000f,(float16_t)-0.4079589843750f,
+(float16_t)-0.9111328125000f,(float16_t)-0.4123535156250f,
+(float16_t)-0.9091796875000f,(float16_t)-0.4165039062500f,
+(float16_t)-0.9072265625000f,(float16_t)-0.4206542968750f,
+(float16_t)-0.9052734375000f,(float16_t)-0.4248046875000f,
+(float16_t)-0.9033203125000f,(float16_t)-0.4289550781250f,
+(float16_t)-0.9013671875000f,(float16_t)-0.4331054687500f,
+(float16_t)-0.8994140625000f,(float16_t)-0.4372558593750f,
+(float16_t)-0.8974609375000f,(float16_t)-0.4414062500000f,
+(float16_t)-0.8955078125000f,(float16_t)-0.4455566406250f,
+(float16_t)-0.8930664062500f,(float16_t)-0.4497070312500f,
+(float16_t)-0.8911132812500f,(float16_t)-0.4536132812500f,
+(float16_t)-0.8891601562500f,(float16_t)-0.4577636718750f,
+(float16_t)-0.8867187500000f,(float16_t)-0.4619140625000f,
+(float16_t)-0.8847656250000f,(float16_t)-0.4660644531250f,
+(float16_t)-0.8828125000000f,(float16_t)-0.4699707031250f,
+(float16_t)-0.8803710937500f,(float16_t)-0.4741210937500f,
+(float16_t)-0.8784179687500f,(float16_t)-0.4780273437500f,
+(float16_t)-0.8759765625000f,(float16_t)-0.4821777343750f,
+(float16_t)-0.8740234375000f,(float16_t)-0.4863281250000f,
+(float16_t)-0.8715820312500f,(float16_t)-0.4902343750000f,
+(float16_t)-0.8691406250000f,(float16_t)-0.4941406250000f,
+(float16_t)-0.8671875000000f,(float16_t)-0.4982910156250f,
+(float16_t)-0.8647460937500f,(float16_t)-0.5024414062500f,
+(float16_t)-0.8623046875000f,(float16_t)-0.5063476562500f,
+(float16_t)-0.8598632812500f,(float16_t)-0.5102539062500f,
+(float16_t)-0.8579101562500f,(float16_t)-0.5141601562500f,
+(float16_t)-0.8554687500000f,(float16_t)-0.5180664062500f,
+(float16_t)-0.8530273437500f,(float16_t)-0.5219726562500f,
+(float16_t)-0.8505859375000f,(float16_t)-0.5258789062500f,
+(float16_t)-0.8481445312500f,(float16_t)-0.5297851562500f,
+(float16_t)-0.8457031250000f,(float16_t)-0.5336914062500f,
+(float16_t)-0.8432617187500f,(float16_t)-0.5375976562500f,
+(float16_t)-0.8408203125000f,(float16_t)-0.5415039062500f,
+(float16_t)-0.8383789062500f,(float16_t)-0.5454101562500f,
+(float16_t)-0.8359375000000f,(float16_t)-0.5493164062500f,
+(float16_t)-0.8330078125000f,(float16_t)-0.5532226562500f,
+(float16_t)-0.8305664062500f,(float16_t)-0.5566406250000f,
+(float16_t)-0.8281250000000f,(float16_t)-0.5605468750000f,
+(float16_t)-0.8256835937500f,(float16_t)-0.5644531250000f,
+(float16_t)-0.8227539062500f,(float16_t)-0.5683593750000f,
+(float16_t)-0.8203125000000f,(float16_t)-0.5722656250000f,
+(float16_t)-0.8173828125000f,(float16_t)-0.5756835937500f,
+(float16_t)-0.8149414062500f,(float16_t)-0.5795898437500f,
+(float16_t)-0.8120117187500f,(float16_t)-0.5834960937500f,
+(float16_t)-0.8095703125000f,(float16_t)-0.5869140625000f,
+(float16_t)-0.8066406250000f,(float16_t)-0.5908203125000f,
+(float16_t)-0.8041992187500f,(float16_t)-0.5942382812500f,
+(float16_t)-0.8012695312500f,(float16_t)-0.5981445312500f,
+(float16_t)-0.7988281250000f,(float16_t)-0.6020507812500f,
+(float16_t)-0.7958984375000f,(float16_t)-0.6054687500000f,
+(float16_t)-0.7929687500000f,(float16_t)-0.6093750000000f,
+(float16_t)-0.7900390625000f,(float16_t)-0.6127929687500f,
+(float16_t)-0.7875976562500f,(float16_t)-0.6162109375000f,
+(float16_t)-0.7846679687500f,(float16_t)-0.6201171875000f,
+(float16_t)-0.7817382812500f,(float16_t)-0.6235351562500f,
+(float16_t)-0.7788085937500f,(float16_t)-0.6274414062500f,
+(float16_t)-0.7758789062500f,(float16_t)-0.6308593750000f,
+(float16_t)-0.7729492187500f,(float16_t)-0.6342773437500f,
+(float16_t)-0.7700195312500f,(float16_t)-0.6381835937500f,
+(float16_t)-0.7670898437500f,(float16_t)-0.6416015625000f,
+(float16_t)-0.7641601562500f,(float16_t)-0.6450195312500f,
+(float16_t)-0.7612304687500f,(float16_t)-0.6484375000000f,
+(float16_t)-0.7583007812500f,(float16_t)-0.6518554687500f,
+(float16_t)-0.7553710937500f,(float16_t)-0.6552734375000f,
+(float16_t)-0.7519531250000f,(float16_t)-0.6591796875000f,
+(float16_t)-0.7490234375000f,(float16_t)-0.6625976562500f,
+(float16_t)-0.7460937500000f,(float16_t)-0.6660156250000f,
+(float16_t)-0.7431640625000f,(float16_t)-0.6694335937500f,
+(float16_t)-0.7397460937500f,(float16_t)-0.6728515625000f,
+(float16_t)-0.7368164062500f,(float16_t)-0.6762695312500f,
+(float16_t)-0.7338867187500f,(float16_t)-0.6796875000000f,
+(float16_t)-0.7304687500000f,(float16_t)-0.6826171875000f,
+(float16_t)-0.7275390625000f,(float16_t)-0.6860351562500f,
+(float16_t)-0.7241210937500f,(float16_t)-0.6894531250000f,
+(float16_t)-0.7211914062500f,(float16_t)-0.6928710937500f,
+(float16_t)-0.7177734375000f,(float16_t)-0.6962890625000f,
+(float16_t)-0.7148437500000f,(float16_t)-0.6997070312500f,
+(float16_t)-0.7114257812500f,(float16_t)-0.7026367187500f,
+(float16_t)-0.7080078125000f,(float16_t)-0.7060546875000f,
+(float16_t)-0.7050781250000f,(float16_t)-0.7094726562500f,
+(float16_t)-0.7016601562500f,(float16_t)-0.7124023437500f,
+(float16_t)-0.6982421875000f,(float16_t)-0.7158203125000f,
+(float16_t)-0.6953125000000f,(float16_t)-0.7187500000000f,
+(float16_t)-0.6918945312500f,(float16_t)-0.7221679687500f,
+(float16_t)-0.6884765625000f,(float16_t)-0.7250976562500f,
+(float16_t)-0.6850585937500f,(float16_t)-0.7285156250000f,
+(float16_t)-0.6816406250000f,(float16_t)-0.7314453125000f,
+(float16_t)-0.6782226562500f,(float16_t)-0.7348632812500f,
+(float16_t)-0.6748046875000f,(float16_t)-0.7377929687500f,
+(float16_t)-0.6713867187500f,(float16_t)-0.7407226562500f,
+(float16_t)-0.6679687500000f,(float16_t)-0.7441406250000f,
+(float16_t)-0.6645507812500f,(float16_t)-0.7470703125000f,
+(float16_t)-0.6611328125000f,(float16_t)-0.7500000000000f,
+(float16_t)-0.6577148437500f,(float16_t)-0.7534179687500f,
+(float16_t)-0.6542968750000f,(float16_t)-0.7563476562500f,
+(float16_t)-0.6508789062500f,(float16_t)-0.7592773437500f,
+(float16_t)-0.6474609375000f,(float16_t)-0.7622070312500f,
+(float16_t)-0.6440429687500f,(float16_t)-0.7651367187500f,
+(float16_t)-0.6401367187500f,(float16_t)-0.7680664062500f,
+(float16_t)-0.6367187500000f,(float16_t)-0.7709960937500f,
+(float16_t)-0.6333007812500f,(float16_t)-0.7739257812500f,
+(float16_t)-0.6293945312500f,(float16_t)-0.7768554687500f,
+(float16_t)-0.6259765625000f,(float16_t)-0.7797851562500f,
+(float16_t)-0.6225585937500f,(float16_t)-0.7827148437500f,
+(float16_t)-0.6186523437500f,(float16_t)-0.7856445312500f,
+(float16_t)-0.6152343750000f,(float16_t)-0.7885742187500f,
+(float16_t)-0.6118164062500f,(float16_t)-0.7910156250000f,
+(float16_t)-0.6079101562500f,(float16_t)-0.7939453125000f,
+(float16_t)-0.6044921875000f,(float16_t)-0.7968750000000f,
+(float16_t)-0.6005859375000f,(float16_t)-0.7993164062500f,
+(float16_t)-0.5971679687500f,(float16_t)-0.8022460937500f,
+(float16_t)-0.5932617187500f,(float16_t)-0.8051757812500f,
+(float16_t)-0.5893554687500f,(float16_t)-0.8076171875000f,
+(float16_t)-0.5859375000000f,(float16_t)-0.8105468750000f,
+(float16_t)-0.5820312500000f,(float16_t)-0.8129882812500f,
+(float16_t)-0.5781250000000f,(float16_t)-0.8159179687500f,
+(float16_t)-0.5747070312500f,(float16_t)-0.8183593750000f,
+(float16_t)-0.5708007812500f,(float16_t)-0.8212890625000f,
+(float16_t)-0.5668945312500f,(float16_t)-0.8237304687500f,
+(float16_t)-0.5629882812500f,(float16_t)-0.8261718750000f,
+(float16_t)-0.5595703125000f,(float16_t)-0.8291015625000f,
+(float16_t)-0.5556640625000f,(float16_t)-0.8315429687500f,
+(float16_t)-0.5517578125000f,(float16_t)-0.8339843750000f,
+(float16_t)-0.5478515625000f,(float16_t)-0.8364257812500f,
+(float16_t)-0.5439453125000f,(float16_t)-0.8388671875000f,
+(float16_t)-0.5400390625000f,(float16_t)-0.8417968750000f,
+(float16_t)-0.5361328125000f,(float16_t)-0.8442382812500f,
+(float16_t)-0.5322265625000f,(float16_t)-0.8466796875000f,
+(float16_t)-0.5283203125000f,(float16_t)-0.8491210937500f,
+(float16_t)-0.5244140625000f,(float16_t)-0.8515625000000f,
+(float16_t)-0.5205078125000f,(float16_t)-0.8540039062500f,
+(float16_t)-0.5166015625000f,(float16_t)-0.8559570312500f,
+(float16_t)-0.5126953125000f,(float16_t)-0.8583984375000f,
+(float16_t)-0.5087890625000f,(float16_t)-0.8608398437500f,
+(float16_t)-0.5048828125000f,(float16_t)-0.8632812500000f,
+(float16_t)-0.5009765625000f,(float16_t)-0.8657226562500f,
+(float16_t)-0.4968261718750f,(float16_t)-0.8676757812500f,
+(float16_t)-0.4929199218750f,(float16_t)-0.8701171875000f,
+(float16_t)-0.4887695312500f,(float16_t)-0.8725585937500f,
+(float16_t)-0.4848632812500f,(float16_t)-0.8745117187500f,
+(float16_t)-0.4809570312500f,(float16_t)-0.8769531250000f,
+(float16_t)-0.4768066406250f,(float16_t)-0.8789062500000f,
+(float16_t)-0.4726562500000f,(float16_t)-0.8813476562500f,
+(float16_t)-0.4687500000000f,(float16_t)-0.8833007812500f,
+(float16_t)-0.4645996093750f,(float16_t)-0.8857421875000f,
+(float16_t)-0.4604492187500f,(float16_t)-0.8876953125000f,
+(float16_t)-0.4565429687500f,(float16_t)-0.8896484375000f,
+(float16_t)-0.4523925781250f,(float16_t)-0.8916015625000f,
+(float16_t)-0.4482421875000f,(float16_t)-0.8940429687500f,
+(float16_t)-0.4440917968750f,(float16_t)-0.8959960937500f,
+(float16_t)-0.4399414062500f,(float16_t)-0.8979492187500f,
+(float16_t)-0.4357910156250f,(float16_t)-0.8999023437500f,
+(float16_t)-0.4316406250000f,(float16_t)-0.9018554687500f,
+(float16_t)-0.4274902343750f,(float16_t)-0.9038085937500f,
+(float16_t)-0.4233398437500f,(float16_t)-0.9057617187500f,
+(float16_t)-0.4191894531250f,(float16_t)-0.9077148437500f,
+(float16_t)-0.4150390625000f,(float16_t)-0.9096679687500f,
+(float16_t)-0.4108886718750f,(float16_t)-0.9116210937500f,
+(float16_t)-0.4067382812500f,(float16_t)-0.9135742187500f,
+(float16_t)-0.4023437500000f,(float16_t)-0.9155273437500f,
+(float16_t)-0.3981933593750f,(float16_t)-0.9174804687500f,
+(float16_t)-0.3940429687500f,(float16_t)-0.9189453125000f,
+(float16_t)-0.3896484375000f,(float16_t)-0.9208984375000f,
+(float16_t)-0.3854980468750f,(float16_t)-0.9228515625000f,
+(float16_t)-0.3813476562500f,(float16_t)-0.9243164062500f,
+(float16_t)-0.3769531250000f,(float16_t)-0.9262695312500f,
+(float16_t)-0.3728027343750f,(float16_t)-0.9277343750000f,
+(float16_t)-0.3684082031250f,(float16_t)-0.9296875000000f,
+(float16_t)-0.3642578125000f,(float16_t)-0.9311523437500f,
+(float16_t)-0.3598632812500f,(float16_t)-0.9331054687500f,
+(float16_t)-0.3557128906250f,(float16_t)-0.9345703125000f,
+(float16_t)-0.3513183593750f,(float16_t)-0.9360351562500f,
+(float16_t)-0.3469238281250f,(float16_t)-0.9379882812500f,
+(float16_t)-0.3427734375000f,(float16_t)-0.9394531250000f,
+(float16_t)-0.3383789062500f,(float16_t)-0.9409179687500f,
+(float16_t)-0.3339843750000f,(float16_t)-0.9423828125000f,
+(float16_t)-0.3295898437500f,(float16_t)-0.9443359375000f,
+(float16_t)-0.3251953125000f,(float16_t)-0.9458007812500f,
+(float16_t)-0.3210449218750f,(float16_t)-0.9472656250000f,
+(float16_t)-0.3166503906250f,(float16_t)-0.9487304687500f,
+(float16_t)-0.3122558593750f,(float16_t)-0.9501953125000f,
+(float16_t)-0.3078613281250f,(float16_t)-0.9516601562500f,
+(float16_t)-0.3034667968750f,(float16_t)-0.9526367187500f,
+(float16_t)-0.2990722656250f,(float16_t)-0.9541015625000f,
+(float16_t)-0.2946777343750f,(float16_t)-0.9555664062500f,
+(float16_t)-0.2902832031250f,(float16_t)-0.9570312500000f,
+(float16_t)-0.2858886718750f,(float16_t)-0.9584960937500f,
+(float16_t)-0.2814941406250f,(float16_t)-0.9594726562500f,
+(float16_t)-0.2770996093750f,(float16_t)-0.9609375000000f,
+(float16_t)-0.2727050781250f,(float16_t)-0.9619140625000f,
+(float16_t)-0.2683105468750f,(float16_t)-0.9633789062500f,
+(float16_t)-0.2636718750000f,(float16_t)-0.9643554687500f,
+(float16_t)-0.2592773437500f,(float16_t)-0.9658203125000f,
+(float16_t)-0.2548828125000f,(float16_t)-0.9667968750000f,
+(float16_t)-0.2504882812500f,(float16_t)-0.9682617187500f,
+(float16_t)-0.2459716796875f,(float16_t)-0.9692382812500f,
+(float16_t)-0.2414550781250f,(float16_t)-0.9702148437500f,
+(float16_t)-0.2370605468750f,(float16_t)-0.9716796875000f,
+(float16_t)-0.2325439453125f,(float16_t)-0.9726562500000f,
+(float16_t)-0.2280273437500f,(float16_t)-0.9736328125000f,
+(float16_t)-0.2236328125000f,(float16_t)-0.9746093750000f,
+(float16_t)-0.2191162109375f,(float16_t)-0.9755859375000f,
+(float16_t)-0.2145996093750f,(float16_t)-0.9765625000000f,
+(float16_t)-0.2100830078125f,(float16_t)-0.9775390625000f,
+(float16_t)-0.2055664062500f,(float16_t)-0.9785156250000f,
+(float16_t)-0.2010498046875f,(float16_t)-0.9794921875000f,
+(float16_t)-0.1966552734375f,(float16_t)-0.9804687500000f,
+(float16_t)-0.1921386718750f,(float16_t)-0.9814453125000f,
+(float16_t)-0.1876220703125f,(float16_t)-0.9824218750000f,
+(float16_t)-0.1829833984375f,(float16_t)-0.9829101562500f,
+(float16_t)-0.1784667968750f,(float16_t)-0.9838867187500f,
+(float16_t)-0.1739501953125f,(float16_t)-0.9848632812500f,
+(float16_t)-0.1694335937500f,(float16_t)-0.9853515625000f,
+(float16_t)-0.1649169921875f,(float16_t)-0.9863281250000f,
+(float16_t)-0.1604003906250f,(float16_t)-0.9868164062500f,
+(float16_t)-0.1558837890625f,(float16_t)-0.9877929687500f,
+(float16_t)-0.1512451171875f,(float16_t)-0.9882812500000f,
+(float16_t)-0.1467285156250f,(float16_t)-0.9892578125000f,
+(float16_t)-0.1422119140625f,(float16_t)-0.9897460937500f,
+(float16_t)-0.1375732421875f,(float16_t)-0.9907226562500f,
+(float16_t)-0.1330566406250f,(float16_t)-0.9912109375000f,
+(float16_t)-0.1285400390625f,(float16_t)-0.9916992187500f,
+(float16_t)-0.1239624023438f,(float16_t)-0.9921875000000f,
+(float16_t)-0.1193847656250f,(float16_t)-0.9926757812500f,
+(float16_t)-0.1148071289062f,(float16_t)-0.9931640625000f,
+(float16_t)-0.1102294921875f,(float16_t)-0.9941406250000f,
+(float16_t)-0.1056518554688f,(float16_t)-0.9946289062500f,
+(float16_t)-0.1010742187500f,(float16_t)-0.9951171875000f,
+(float16_t)-0.0964965820312f,(float16_t)-0.9951171875000f,
+(float16_t)-0.0919189453125f,(float16_t)-0.9956054687500f,
+(float16_t)-0.0873413085938f,(float16_t)-0.9960937500000f,
+(float16_t)-0.0827636718750f,(float16_t)-0.9965820312500f,
+(float16_t)-0.0781250000000f,(float16_t)-0.9970703125000f,
+(float16_t)-0.0735473632812f,(float16_t)-0.9970703125000f,
+(float16_t)-0.0689697265625f,(float16_t)-0.9975585937500f,
+(float16_t)-0.0643920898438f,(float16_t)-0.9980468750000f,
+(float16_t)-0.0597839355469f,(float16_t)-0.9980468750000f,
+(float16_t)-0.0552062988281f,(float16_t)-0.9985351562500f,
+(float16_t)-0.0505981445312f,(float16_t)-0.9985351562500f,
+(float16_t)-0.0459899902344f,(float16_t)-0.9990234375000f,
+(float16_t)-0.0414123535156f,(float16_t)-0.9990234375000f,
+(float16_t)-0.0368041992188f,(float16_t)-0.9995117187500f,
+(float16_t)-0.0321960449219f,(float16_t)-0.9995117187500f,
+(float16_t)-0.0276031494141f,(float16_t)-0.9995117187500f,
+(float16_t)-0.0230102539062f,(float16_t)-0.9995117187500f,
+(float16_t)-0.0184020996094f,(float16_t)-1.0000000000000f,
+(float16_t)-0.0138015747070f,(float16_t)-1.0000000000000f,
+(float16_t)-0.0092010498047f,(float16_t)-1.0000000000000f,
+(float16_t)-0.0046005249023f,(float16_t)-1.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0184020996094f,
+(float16_t)0.9995117187500f,(float16_t)0.0368041992188f,
+(float16_t)0.9985351562500f,(float16_t)0.0552062988281f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9956054687500f,(float16_t)0.0919189453125f,
+(float16_t)0.9941406250000f,(float16_t)0.1102294921875f,
+(float16_t)0.9916992187500f,(float16_t)0.1285400390625f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9863281250000f,(float16_t)0.1649169921875f,
+(float16_t)0.9829101562500f,(float16_t)0.1829833984375f,
+(float16_t)0.9794921875000f,(float16_t)0.2010498046875f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9716796875000f,(float16_t)0.2370605468750f,
+(float16_t)0.9667968750000f,(float16_t)0.2548828125000f,
+(float16_t)0.9619140625000f,(float16_t)0.2727050781250f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9516601562500f,(float16_t)0.3078613281250f,
+(float16_t)0.9458007812500f,(float16_t)0.3251953125000f,
+(float16_t)0.9394531250000f,(float16_t)0.3427734375000f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9262695312500f,(float16_t)0.3769531250000f,
+(float16_t)0.9189453125000f,(float16_t)0.3940429687500f,
+(float16_t)0.9116210937500f,(float16_t)0.4108886718750f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.8959960937500f,(float16_t)0.4440917968750f,
+(float16_t)0.8876953125000f,(float16_t)0.4604492187500f,
+(float16_t)0.8789062500000f,(float16_t)0.4768066406250f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8608398437500f,(float16_t)0.5087890625000f,
+(float16_t)0.8515625000000f,(float16_t)0.5244140625000f,
+(float16_t)0.8417968750000f,(float16_t)0.5400390625000f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.8212890625000f,(float16_t)0.5708007812500f,
+(float16_t)0.8105468750000f,(float16_t)0.5859375000000f,
+(float16_t)0.7993164062500f,(float16_t)0.6005859375000f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7768554687500f,(float16_t)0.6293945312500f,
+(float16_t)0.7651367187500f,(float16_t)0.6440429687500f,
+(float16_t)0.7534179687500f,(float16_t)0.6577148437500f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.7285156250000f,(float16_t)0.6850585937500f,
+(float16_t)0.7158203125000f,(float16_t)0.6982421875000f,
+(float16_t)0.7026367187500f,(float16_t)0.7114257812500f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6762695312500f,(float16_t)0.7368164062500f,
+(float16_t)0.6625976562500f,(float16_t)0.7490234375000f,
+(float16_t)0.6484375000000f,(float16_t)0.7612304687500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.6201171875000f,(float16_t)0.7846679687500f,
+(float16_t)0.6054687500000f,(float16_t)0.7958984375000f,
+(float16_t)0.5908203125000f,(float16_t)0.8066406250000f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5605468750000f,(float16_t)0.8281250000000f,
+(float16_t)0.5454101562500f,(float16_t)0.8383789062500f,
+(float16_t)0.5297851562500f,(float16_t)0.8481445312500f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.4982910156250f,(float16_t)0.8671875000000f,
+(float16_t)0.4821777343750f,(float16_t)0.8759765625000f,
+(float16_t)0.4660644531250f,(float16_t)0.8847656250000f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.4331054687500f,(float16_t)0.9013671875000f,
+(float16_t)0.4165039062500f,(float16_t)0.9091796875000f,
+(float16_t)0.3996582031250f,(float16_t)0.9165039062500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3657226562500f,(float16_t)0.9306640625000f,
+(float16_t)0.3483886718750f,(float16_t)0.9375000000000f,
+(float16_t)0.3310546875000f,(float16_t)0.9433593750000f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.2961425781250f,(float16_t)0.9550781250000f,
+(float16_t)0.2785644531250f,(float16_t)0.9604492187500f,
+(float16_t)0.2607421875000f,(float16_t)0.9653320312500f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.2250976562500f,(float16_t)0.9741210937500f,
+(float16_t)0.2071533203125f,(float16_t)0.9785156250000f,
+(float16_t)0.1890869140625f,(float16_t)0.9819335937500f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.1528320312500f,(float16_t)0.9882812500000f,
+(float16_t)0.1345214843750f,(float16_t)0.9907226562500f,
+(float16_t)0.1163330078125f,(float16_t)0.9931640625000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0797119140625f,(float16_t)0.9965820312500f,
+(float16_t)0.0613098144531f,(float16_t)0.9980468750000f,
+(float16_t)0.0429382324219f,(float16_t)0.9990234375000f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)0.0061340332031f,(float16_t)1.0000000000000f,
+(float16_t)-0.0122680664062f,(float16_t)1.0000000000000f,
+(float16_t)-0.0306701660156f,(float16_t)0.9995117187500f,
+(float16_t)-0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)-0.0674438476562f,(float16_t)0.9975585937500f,
+(float16_t)-0.0858154296875f,(float16_t)0.9960937500000f,
+(float16_t)-0.1041259765625f,(float16_t)0.9946289062500f,
+(float16_t)-0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)-0.1406250000000f,(float16_t)0.9902343750000f,
+(float16_t)-0.1588134765625f,(float16_t)0.9873046875000f,
+(float16_t)-0.1770019531250f,(float16_t)0.9843750000000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.2131347656250f,(float16_t)0.9770507812500f,
+(float16_t)-0.2310791015625f,(float16_t)0.9731445312500f,
+(float16_t)-0.2489013671875f,(float16_t)0.9687500000000f,
+(float16_t)-0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)-0.2844238281250f,(float16_t)0.9584960937500f,
+(float16_t)-0.3020019531250f,(float16_t)0.9531250000000f,
+(float16_t)-0.3195800781250f,(float16_t)0.9477539062500f,
+(float16_t)-0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)-0.3542480468750f,(float16_t)0.9350585937500f,
+(float16_t)-0.3713378906250f,(float16_t)0.9287109375000f,
+(float16_t)-0.3884277343750f,(float16_t)0.9213867187500f,
+(float16_t)-0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)-0.4221191406250f,(float16_t)0.9067382812500f,
+(float16_t)-0.4387207031250f,(float16_t)0.8984375000000f,
+(float16_t)-0.4550781250000f,(float16_t)0.8906250000000f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.4875488281250f,(float16_t)0.8730468750000f,
+(float16_t)-0.5034179687500f,(float16_t)0.8637695312500f,
+(float16_t)-0.5195312500000f,(float16_t)0.8544921875000f,
+(float16_t)-0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)-0.5502929687500f,(float16_t)0.8349609375000f,
+(float16_t)-0.5659179687500f,(float16_t)0.8247070312500f,
+(float16_t)-0.5810546875000f,(float16_t)0.8139648437500f,
+(float16_t)-0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)-0.6103515625000f,(float16_t)0.7919921875000f,
+(float16_t)-0.6250000000000f,(float16_t)0.7807617187500f,
+(float16_t)-0.6391601562500f,(float16_t)0.7690429687500f,
+(float16_t)-0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)-0.6669921875000f,(float16_t)0.7451171875000f,
+(float16_t)-0.6806640625000f,(float16_t)0.7324218750000f,
+(float16_t)-0.6938476562500f,(float16_t)0.7202148437500f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.7202148437500f,(float16_t)0.6938476562500f,
+(float16_t)-0.7324218750000f,(float16_t)0.6806640625000f,
+(float16_t)-0.7451171875000f,(float16_t)0.6669921875000f,
+(float16_t)-0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)-0.7690429687500f,(float16_t)0.6391601562500f,
+(float16_t)-0.7807617187500f,(float16_t)0.6250000000000f,
+(float16_t)-0.7919921875000f,(float16_t)0.6103515625000f,
+(float16_t)-0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)-0.8139648437500f,(float16_t)0.5810546875000f,
+(float16_t)-0.8247070312500f,(float16_t)0.5659179687500f,
+(float16_t)-0.8349609375000f,(float16_t)0.5502929687500f,
+(float16_t)-0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)-0.8544921875000f,(float16_t)0.5195312500000f,
+(float16_t)-0.8637695312500f,(float16_t)0.5034179687500f,
+(float16_t)-0.8730468750000f,(float16_t)0.4875488281250f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.8906250000000f,(float16_t)0.4550781250000f,
+(float16_t)-0.8984375000000f,(float16_t)0.4387207031250f,
+(float16_t)-0.9067382812500f,(float16_t)0.4221191406250f,
+(float16_t)-0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)-0.9213867187500f,(float16_t)0.3884277343750f,
+(float16_t)-0.9287109375000f,(float16_t)0.3713378906250f,
+(float16_t)-0.9350585937500f,(float16_t)0.3542480468750f,
+(float16_t)-0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)-0.9477539062500f,(float16_t)0.3195800781250f,
+(float16_t)-0.9531250000000f,(float16_t)0.3020019531250f,
+(float16_t)-0.9584960937500f,(float16_t)0.2844238281250f,
+(float16_t)-0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)-0.9687500000000f,(float16_t)0.2489013671875f,
+(float16_t)-0.9731445312500f,(float16_t)0.2310791015625f,
+(float16_t)-0.9770507812500f,(float16_t)0.2131347656250f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9843750000000f,(float16_t)0.1770019531250f,
+(float16_t)-0.9873046875000f,(float16_t)0.1588134765625f,
+(float16_t)-0.9902343750000f,(float16_t)0.1406250000000f,
+(float16_t)-0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)-0.9946289062500f,(float16_t)0.1041259765625f,
+(float16_t)-0.9960937500000f,(float16_t)0.0858154296875f,
+(float16_t)-0.9975585937500f,(float16_t)0.0674438476562f,
+(float16_t)-0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)-0.9995117187500f,(float16_t)0.0306701660156f,
+(float16_t)-1.0000000000000f,(float16_t)0.0122680664062f,
+(float16_t)-1.0000000000000f,(float16_t)-0.0061340332031f,
+(float16_t)-0.9995117187500f,(float16_t)-0.0245361328125f,
+(float16_t)-0.9990234375000f,(float16_t)-0.0429382324219f,
+(float16_t)-0.9980468750000f,(float16_t)-0.0613098144531f,
+(float16_t)-0.9965820312500f,(float16_t)-0.0797119140625f,
+(float16_t)-0.9951171875000f,(float16_t)-0.0980224609375f,
+(float16_t)-0.9931640625000f,(float16_t)-0.1163330078125f,
+(float16_t)-0.9907226562500f,(float16_t)-0.1345214843750f,
+(float16_t)-0.9882812500000f,(float16_t)-0.1528320312500f,
+(float16_t)-0.9853515625000f,(float16_t)-0.1710205078125f,
+(float16_t)-0.9819335937500f,(float16_t)-0.1890869140625f,
+(float16_t)-0.9785156250000f,(float16_t)-0.2071533203125f,
+(float16_t)-0.9741210937500f,(float16_t)-0.2250976562500f,
+(float16_t)-0.9702148437500f,(float16_t)-0.2429199218750f,
+(float16_t)-0.9653320312500f,(float16_t)-0.2607421875000f,
+(float16_t)-0.9604492187500f,(float16_t)-0.2785644531250f,
+(float16_t)-0.9550781250000f,(float16_t)-0.2961425781250f,
+(float16_t)-0.9497070312500f,(float16_t)-0.3137207031250f,
+(float16_t)-0.9433593750000f,(float16_t)-0.3310546875000f,
+(float16_t)-0.9375000000000f,(float16_t)-0.3483886718750f,
+(float16_t)-0.9306640625000f,(float16_t)-0.3657226562500f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,
+(float16_t)-0.9165039062500f,(float16_t)-0.3996582031250f,
+(float16_t)-0.9091796875000f,(float16_t)-0.4165039062500f,
+(float16_t)-0.9013671875000f,(float16_t)-0.4331054687500f,
+(float16_t)-0.8930664062500f,(float16_t)-0.4497070312500f,
+(float16_t)-0.8847656250000f,(float16_t)-0.4660644531250f,
+(float16_t)-0.8759765625000f,(float16_t)-0.4821777343750f,
+(float16_t)-0.8671875000000f,(float16_t)-0.4982910156250f,
+(float16_t)-0.8579101562500f,(float16_t)-0.5141601562500f,
+(float16_t)-0.8481445312500f,(float16_t)-0.5297851562500f,
+(float16_t)-0.8383789062500f,(float16_t)-0.5454101562500f,
+(float16_t)-0.8281250000000f,(float16_t)-0.5605468750000f,
+(float16_t)-0.8173828125000f,(float16_t)-0.5756835937500f,
+(float16_t)-0.8066406250000f,(float16_t)-0.5908203125000f,
+(float16_t)-0.7958984375000f,(float16_t)-0.6054687500000f,
+(float16_t)-0.7846679687500f,(float16_t)-0.6201171875000f,
+(float16_t)-0.7729492187500f,(float16_t)-0.6342773437500f,
+(float16_t)-0.7612304687500f,(float16_t)-0.6484375000000f,
+(float16_t)-0.7490234375000f,(float16_t)-0.6625976562500f,
+(float16_t)-0.7368164062500f,(float16_t)-0.6762695312500f,
+(float16_t)-0.7241210937500f,(float16_t)-0.6894531250000f,
+(float16_t)-0.7114257812500f,(float16_t)-0.7026367187500f,
+(float16_t)-0.6982421875000f,(float16_t)-0.7158203125000f,
+(float16_t)-0.6850585937500f,(float16_t)-0.7285156250000f,
+(float16_t)-0.6713867187500f,(float16_t)-0.7407226562500f,
+(float16_t)-0.6577148437500f,(float16_t)-0.7534179687500f,
+(float16_t)-0.6440429687500f,(float16_t)-0.7651367187500f,
+(float16_t)-0.6293945312500f,(float16_t)-0.7768554687500f,
+(float16_t)-0.6152343750000f,(float16_t)-0.7885742187500f,
+(float16_t)-0.6005859375000f,(float16_t)-0.7993164062500f,
+(float16_t)-0.5859375000000f,(float16_t)-0.8105468750000f,
+(float16_t)-0.5708007812500f,(float16_t)-0.8212890625000f,
+(float16_t)-0.5556640625000f,(float16_t)-0.8315429687500f,
+(float16_t)-0.5400390625000f,(float16_t)-0.8417968750000f,
+(float16_t)-0.5244140625000f,(float16_t)-0.8515625000000f,
+(float16_t)-0.5087890625000f,(float16_t)-0.8608398437500f,
+(float16_t)-0.4929199218750f,(float16_t)-0.8701171875000f,
+(float16_t)-0.4768066406250f,(float16_t)-0.8789062500000f,
+(float16_t)-0.4604492187500f,(float16_t)-0.8876953125000f,
+(float16_t)-0.4440917968750f,(float16_t)-0.8959960937500f,
+(float16_t)-0.4274902343750f,(float16_t)-0.9038085937500f,
+(float16_t)-0.4108886718750f,(float16_t)-0.9116210937500f,
+(float16_t)-0.3940429687500f,(float16_t)-0.9189453125000f,
+(float16_t)-0.3769531250000f,(float16_t)-0.9262695312500f,
+(float16_t)-0.3598632812500f,(float16_t)-0.9331054687500f,
+(float16_t)-0.3427734375000f,(float16_t)-0.9394531250000f,
+(float16_t)-0.3251953125000f,(float16_t)-0.9458007812500f,
+(float16_t)-0.3078613281250f,(float16_t)-0.9516601562500f,
+(float16_t)-0.2902832031250f,(float16_t)-0.9570312500000f,
+(float16_t)-0.2727050781250f,(float16_t)-0.9619140625000f,
+(float16_t)-0.2548828125000f,(float16_t)-0.9667968750000f,
+(float16_t)-0.2370605468750f,(float16_t)-0.9716796875000f,
+(float16_t)-0.2191162109375f,(float16_t)-0.9755859375000f,
+(float16_t)-0.2010498046875f,(float16_t)-0.9794921875000f,
+(float16_t)-0.1829833984375f,(float16_t)-0.9829101562500f,
+(float16_t)-0.1649169921875f,(float16_t)-0.9863281250000f,
+(float16_t)-0.1467285156250f,(float16_t)-0.9892578125000f,
+(float16_t)-0.1285400390625f,(float16_t)-0.9916992187500f,
+(float16_t)-0.1102294921875f,(float16_t)-0.9941406250000f,
+(float16_t)-0.0919189453125f,(float16_t)-0.9956054687500f,
+(float16_t)-0.0735473632812f,(float16_t)-0.9970703125000f,
+(float16_t)-0.0552062988281f,(float16_t)-0.9985351562500f,
+(float16_t)-0.0368041992188f,(float16_t)-0.9995117187500f,
+(float16_t)-0.0184020996094f,(float16_t)-1.0000000000000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9970703125000f,(float16_t)0.0735473632812f,
+(float16_t)0.9892578125000f,(float16_t)0.1467285156250f,
+(float16_t)0.9755859375000f,(float16_t)0.2191162109375f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.9331054687500f,(float16_t)0.3598632812500f,
+(float16_t)0.9038085937500f,(float16_t)0.4274902343750f,
+(float16_t)0.8701171875000f,(float16_t)0.4929199218750f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.7885742187500f,(float16_t)0.6152343750000f,
+(float16_t)0.7407226562500f,(float16_t)0.6713867187500f,
+(float16_t)0.6894531250000f,(float16_t)0.7241210937500f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.5756835937500f,(float16_t)0.8173828125000f,
+(float16_t)0.5141601562500f,(float16_t)0.8579101562500f,
+(float16_t)0.4497070312500f,(float16_t)0.8930664062500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.3137207031250f,(float16_t)0.9497070312500f,
+(float16_t)0.2429199218750f,(float16_t)0.9702148437500f,
+(float16_t)0.1710205078125f,(float16_t)0.9853515625000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)0.0245361328125f,(float16_t)0.9995117187500f,
+(float16_t)-0.0490722656250f,(float16_t)0.9990234375000f,
+(float16_t)-0.1224365234375f,(float16_t)0.9926757812500f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.2666015625000f,(float16_t)0.9638671875000f,
+(float16_t)-0.3369140625000f,(float16_t)0.9414062500000f,
+(float16_t)-0.4052734375000f,(float16_t)0.9140625000000f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.5351562500000f,(float16_t)0.8447265625000f,
+(float16_t)-0.5957031250000f,(float16_t)0.8032226562500f,
+(float16_t)-0.6533203125000f,(float16_t)0.7573242187500f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.7573242187500f,(float16_t)0.6533203125000f,
+(float16_t)-0.8032226562500f,(float16_t)0.5957031250000f,
+(float16_t)-0.8447265625000f,(float16_t)0.5351562500000f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.9140625000000f,(float16_t)0.4052734375000f,
+(float16_t)-0.9414062500000f,(float16_t)0.3369140625000f,
+(float16_t)-0.9638671875000f,(float16_t)0.2666015625000f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9926757812500f,(float16_t)0.1224365234375f,
+(float16_t)-0.9990234375000f,(float16_t)0.0490722656250f,
+(float16_t)-0.9995117187500f,(float16_t)-0.0245361328125f,
+(float16_t)-0.9951171875000f,(float16_t)-0.0980224609375f,
+(float16_t)-0.9853515625000f,(float16_t)-0.1710205078125f,
+(float16_t)-0.9702148437500f,(float16_t)-0.2429199218750f,
+(float16_t)-0.9497070312500f,(float16_t)-0.3137207031250f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,
+(float16_t)-0.8930664062500f,(float16_t)-0.4497070312500f,
+(float16_t)-0.8579101562500f,(float16_t)-0.5141601562500f,
+(float16_t)-0.8173828125000f,(float16_t)-0.5756835937500f,
+(float16_t)-0.7729492187500f,(float16_t)-0.6342773437500f,
+(float16_t)-0.7241210937500f,(float16_t)-0.6894531250000f,
+(float16_t)-0.6713867187500f,(float16_t)-0.7407226562500f,
+(float16_t)-0.6152343750000f,(float16_t)-0.7885742187500f,
+(float16_t)-0.5556640625000f,(float16_t)-0.8315429687500f,
+(float16_t)-0.4929199218750f,(float16_t)-0.8701171875000f,
+(float16_t)-0.4274902343750f,(float16_t)-0.9038085937500f,
+(float16_t)-0.3598632812500f,(float16_t)-0.9331054687500f,
+(float16_t)-0.2902832031250f,(float16_t)-0.9570312500000f,
+(float16_t)-0.2191162109375f,(float16_t)-0.9755859375000f,
+(float16_t)-0.1467285156250f,(float16_t)-0.9892578125000f,
+(float16_t)-0.0735473632812f,(float16_t)-0.9970703125000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.9570312500000f,(float16_t)0.2902832031250f,
+(float16_t)0.8315429687500f,(float16_t)0.5556640625000f,
+(float16_t)0.6342773437500f,(float16_t)0.7729492187500f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)0.0980224609375f,(float16_t)0.9951171875000f,
+(float16_t)-0.1950683593750f,(float16_t)0.9809570312500f,
+(float16_t)-0.4714355468750f,(float16_t)0.8818359375000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.8818359375000f,(float16_t)0.4714355468750f,
+(float16_t)-0.9809570312500f,(float16_t)0.1950683593750f,
+(float16_t)-0.9951171875000f,(float16_t)-0.0980224609375f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,
+(float16_t)-0.7729492187500f,(float16_t)-0.6342773437500f,
+(float16_t)-0.5556640625000f,(float16_t)-0.8315429687500f,
+(float16_t)-0.2902832031250f,(float16_t)-0.9570312500000f,
+(float16_t)1.0000000000000f,(float16_t)0.0000000000000f,
+(float16_t)0.3825683593750f,(float16_t)0.9238281250000f,
+(float16_t)-0.7070312500000f,(float16_t)0.7070312500000f,
+(float16_t)-0.9238281250000f,(float16_t)-0.3825683593750f,};
 
 #endif
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f16.c
index 3fefe25..9b10a4a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_cmplx_conj_f16.c
  * Description:  Floating-point complex conjugate
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -33,28 +35,6 @@
   @ingroup groupCmplxMath
  */
 
-/**
-  @defgroup cmplx_conj Complex Conjugate
-
-  Conjugates the elements of a complex data vector.
-
-  The <code>pSrc</code> points to the source data and
-  <code>pDst</code> points to the destination data where the result should be written.
-  <code>numSamples</code> specifies the number of complex samples
-  and the data in each array is stored in an interleaved fashion
-  (real, imag, real, imag, ...).
-  Each array has a total of <code>2*numSamples</code> values.
-
-  The underlying algorithm is used:
-  <pre>
-  for (n = 0; n < numSamples; n++) {
-      pDst[(2*n)  ] =  pSrc[(2*n)  ];    // real part
-      pDst[(2*n)+1] = -pSrc[(2*n)+1];    // imag part
-  }
-  </pre>
-
-  There are separate functions for floating-point, Q15, and Q31 data types.
- */
 
 /**
   @addtogroup cmplx_conj
@@ -112,7 +92,7 @@ void arm_cmplx_conj_f16(
   
       /* Calculate Complex Conjugate and store result in destination buffer. */
       *pDst++ =  *pSrc++;
-      *pDst++ = -*pSrc++;
+      *pDst++ = -(_Float16)*pSrc++;
   
       /* Decrement loop counter */
       blkCnt--;
@@ -139,16 +119,16 @@ void arm_cmplx_conj_f16(
 
     /* Calculate Complex Conjugate and store result in destination buffer. */
     *pDst++ =  *pSrc++;
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
     *pDst++ =  *pSrc++;
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
     *pDst++ =  *pSrc++;
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
     *pDst++ =  *pSrc++;
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -170,7 +150,7 @@ void arm_cmplx_conj_f16(
 
     /* Calculate Complex Conjugate and store result in destination buffer. */
     *pDst++ =  *pSrc++;
-    *pDst++ = -*pSrc++;
+    *pDst++ = -(_Float16)*pSrc++;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -183,4 +163,5 @@ void arm_cmplx_conj_f16(
   @} end of cmplx_conj group
  */
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f32.c
index dcb276d..89cbe5b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_conj_f32.c
  * Description:  Floating-point complex conjugate
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c
index 3764614..b13e16d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_conj_q15.c
  * Description:  Q15 complex conjugate
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -124,11 +124,11 @@ void arm_cmplx_conj_q15(
 
     /* Calculate Complex Conjugate and store result in destination buffer. */
 
-    #if defined (ARM_MATH_DSP)
-    in1 = read_q15x2_ia ((q15_t **) &pSrc);
-    in2 = read_q15x2_ia ((q15_t **) &pSrc);
-    in3 = read_q15x2_ia ((q15_t **) &pSrc);
-    in4 = read_q15x2_ia ((q15_t **) &pSrc);
+#if defined (ARM_MATH_DSP)
+    in1 = read_q15x2_ia (&pSrc);
+    in2 = read_q15x2_ia (&pSrc);
+    in3 = read_q15x2_ia (&pSrc);
+    in4 = read_q15x2_ia (&pSrc);
 
 #ifndef ARM_MATH_BIG_ENDIAN
     in1 = __QASX(0, in1);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c
index aaf8707..879d679 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_conj_q31.c
  * Description:  Q31 complex conjugate
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f16.c
index 44ea9aa..6066f61 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_cmplx_dot_prod_f16.c
  * Description:  Floating-point complex dot product
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -35,32 +37,6 @@
   @ingroup groupCmplxMath
  */
 
-/**
-  @defgroup cmplx_dot_prod Complex Dot Product
-
-  Computes the dot product of two complex vectors.
-  The vectors are multiplied element-by-element and then summed.
-
-  The <code>pSrcA</code> points to the first complex input vector and
-  <code>pSrcB</code> points to the second complex input vector.
-  <code>numSamples</code> specifies the number of complex samples
-  and the data in each array is stored in an interleaved fashion
-  (real, imag, real, imag, ...).
-  Each array has a total of <code>2*numSamples</code> values.
-
-  The underlying algorithm is used:
-
-  <pre>
-  realResult = 0;
-  imagResult = 0;
-  for (n = 0; n < numSamples; n++) {
-      realResult += pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
-      imagResult += pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
-  }
-  </pre>
-
-  There are separate functions for floating-point, Q15, and Q31 data types.
- */
 
 /**
   @addtogroup cmplx_dot_prod
@@ -286,4 +262,5 @@ void arm_cmplx_dot_prod_f16(
  */
 
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c
index ddc0f6e..8282d6f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_dot_prod_f32.c
  * Description:  Floating-point complex dot product
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c
index 4ae4d05..2c93864 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_dot_prod_q15.c
  * Description:  Processing function for the Q15 Complex Dot product
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c
index 3e1ec7a..bd5e894 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_dot_prod_q31.c
  * Description:  Q31 complex dot product
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c
index 8cad742..a4c859d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_cmplx_mag_f16.c
  * Description:  Floating-point complex magnitude
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -33,29 +35,7 @@
   @ingroup groupCmplxMath
  */
 
-/**
-  @defgroup cmplx_mag Complex Magnitude
-
-  Computes the magnitude of the elements of a complex data vector.
-
-  The <code>pSrc</code> points to the source data and
-  <code>pDst</code> points to the where the result should be written.
-  <code>numSamples</code> specifies the number of complex samples
-  in the input array and the data is stored in an interleaved fashion
-  (real, imag, real, imag, ...).
-  The input array has a total of <code>2*numSamples</code> values;
-  the output array has a total of <code>numSamples</code> values.
-
-  The underlying algorithm is used:
 
-  <pre>
-  for (n = 0; n < numSamples; n++) {
-      pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);
-  }
-  </pre>
-
-  There are separate functions for floating-point, Q15, and Q31 data types.
- */
 
 /**
   @addtogroup cmplx_mag
@@ -239,4 +219,5 @@ void arm_cmplx_mag_f16(
  */
 
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f32.c
index 8209fce..b2c9230 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_mag_f32.c
  * Description:  Floating-point complex magnitude
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f64.c
new file mode 100644
index 0000000..2d651ac
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f64.c
@@ -0,0 +1,82 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cmplx_mag_f64.c
+ * Description:  Floating-point complex magnitude
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions.h"
+
+/**
+  @ingroup groupCmplxMath
+ */
+
+
+
+/**
+  @addtogroup cmplx_mag
+  @{
+ */
+
+/**
+  @brief         Floating-point complex magnitude.
+  @param[in]     pSrc        points to input vector
+  @param[out]    pDst        points to output vector
+  @param[in]     numSamples  number of samples in each vector
+  @return        none
+ */
+void arm_cmplx_mag_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t numSamples)
+{
+  uint32_t blkCnt;                               /* loop counter */
+  float64_t real, imag;                      /* Temporary variables to hold input values */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = numSamples;
+
+  while (blkCnt > 0U)
+  {
+    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
+
+    real = *pSrc++;
+    imag = *pSrc++;
+
+    /* store result in destination buffer. */
+    *pDst++ = sqrt((real * real) + (imag * imag));
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of cmplx_mag group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_fast_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_fast_q15.c
new file mode 100644
index 0000000..6a78a7e
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_fast_q15.c
@@ -0,0 +1,227 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cmplx_mag_fast_q15.c
+ * Description:  Q15 complex magnitude
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions.h"
+
+/**
+  @ingroup groupCmplxMath
+ */
+
+/**
+  @addtogroup cmplx_mag
+  @{
+ */
+
+/**
+  @brief         Q15 complex magnitude.
+  @param[in]     pSrc        points to input vector
+  @param[out]    pDst        points to output vector
+  @param[in]     numSamples  number of samples in each vector
+  @return        none
+
+  @par           Scaling and Overflow Behavior
+                   The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format.
+                   Fast functions are less accurate. This function will tend to clamp to 0
+                   the too small values. So sqrt(x*x) = x will not always be true.
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_cmplx_mag_fast_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t numSamples)
+{
+
+    int32_t blockSize = numSamples;  /* loop counters */
+    uint32_t  blkCnt;           /* loop counters */
+    q15x8x2_t vecSrc;
+    q15x8_t sum;
+    q31_t in;
+    q31_t acc0;
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U)
+    {
+        vecSrc = vld2q(pSrc);  
+        pSrc += 16;
+        sum = vqaddq(vmulhq(vecSrc.val[0], vecSrc.val[0]),
+                     vmulhq(vecSrc.val[1], vecSrc.val[1]));
+
+        sum = vshrq(sum, 1);
+
+        sum = FAST_VSQRT_Q15(sum);
+
+        vst1q(pDst, sum); 
+        pDst += 8;
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+
+    /*
+     * tail
+     */
+    blkCnt = blockSize & 7;
+
+    while (blkCnt > 0U)
+    {
+      /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
+  
+      in = read_q15x2_ia ((q15_t **) &pSrc);
+      acc0 = __SMUAD(in, in);
+  
+      /* store result in 2.14 format in destination buffer. */
+      arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
+  
+  
+      /* Decrement loop counter */
+      blkCnt--;
+    }
+}
+
+#else
+void arm_cmplx_mag_fast_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t numSamples)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+
+#if defined (ARM_MATH_DSP)
+        q31_t in;
+        q31_t acc0;                                    /* Accumulators */
+#else
+       q15_t real, imag;                              /* Temporary input variables */
+       q31_t acc0, acc1;                              /* Accumulators */
+#endif
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = numSamples >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
+
+#if defined (ARM_MATH_DSP)
+    in = read_q15x2_ia (&pSrc);
+    acc0 = __SMUAD(in, in);
+    /* store result in 2.14 format in destination buffer. */
+    arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
+
+    in = read_q15x2_ia (&pSrc);
+    acc0 = __SMUAD(in, in);
+    arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
+
+    in = read_q15x2_ia (&pSrc);
+    acc0 = __SMUAD(in, in);
+    arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
+
+    in = read_q15x2_ia (&pSrc);
+    acc0 = __SMUAD(in, in);
+    arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
+#else
+    real = *pSrc++;
+    imag = *pSrc++;
+    acc0 = ((q31_t) real * real);
+    acc1 = ((q31_t) imag * imag);
+
+    /* store result in 2.14 format in destination buffer. */
+    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
+
+    real = *pSrc++;
+    imag = *pSrc++;
+    acc0 = ((q31_t) real * real);
+    acc1 = ((q31_t) imag * imag);
+    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
+
+    real = *pSrc++;
+    imag = *pSrc++;
+    acc0 = ((q31_t) real * real);
+    acc1 = ((q31_t) imag * imag);
+    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
+
+    real = *pSrc++;
+    imag = *pSrc++;
+    acc0 = ((q31_t) real * real);
+    acc1 = ((q31_t) imag * imag);
+    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
+#endif /* #if defined (ARM_MATH_DSP) */
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = numSamples % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = numSamples;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
+
+#if defined (ARM_MATH_DSP)
+    in = read_q15x2_ia (&pSrc);
+    acc0 = __SMUAD(in, in);
+
+    /* store result in 2.14 format in destination buffer. */
+    arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
+#else
+    real = *pSrc++;
+    imag = *pSrc++;
+    acc0 = ((q31_t) real * real);
+    acc1 = ((q31_t) imag * imag);
+
+    /* store result in 2.14 format in destination buffer. */
+    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
+#endif
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+#endif /* defined(ARM_MATH_MVEI) */
+
+/**
+  @} end of cmplx_mag group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c
index 473ef07..9c06477 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_mag_q15.c
  * Description:  Q15 complex magnitude
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -49,6 +49,11 @@
   @par           Scaling and Overflow Behavior
                    The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format.
  */
+
+/* Sqrt q31 is used otherwise accuracy is not good enough
+           for small values and for some applications it is
+           an issue.
+        */
 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
@@ -62,23 +67,52 @@ void arm_cmplx_mag_q15(
     int32_t blockSize = numSamples;  /* loop counters */
     uint32_t  blkCnt;           /* loop counters */
     q15x8x2_t vecSrc;
-    q15x8_t sum;
+    q31x4_t prod0;
+    q31x4_t prod1;
+
     q31_t in;
     q31_t acc0;
+    q31x4_t acc0V;
+    q31x4_t acc1V;
+
+    q31_t res;
+    q15x8_t resV;
 
     blkCnt = blockSize >> 3;
     while (blkCnt > 0U)
     {
         vecSrc = vld2q(pSrc);  
         pSrc += 16;
-        sum = vqaddq(vmulhq(vecSrc.val[0], vecSrc.val[0]),
-                     vmulhq(vecSrc.val[1], vecSrc.val[1]));
 
-        sum = vshrq(sum, 1);
+        acc0V = vdupq_n_s32(0);
+        acc1V = vdupq_n_s32(0);
+
+        prod0 = vmullbq_int_s16(vecSrc.val[0], vecSrc.val[0]);
+        acc0V = vqaddq_s32(acc0V,prod0);
+
+        prod0 = vmullbq_int_s16(vecSrc.val[1], vecSrc.val[1]);
+        acc0V = vqaddq_s32(acc0V,prod0);
+
+
+        prod1 = vmulltq_int_s16(vecSrc.val[0], vecSrc.val[0]);
+        acc1V = vqaddq_s32(acc1V,prod1);
+
+        prod1 = vmulltq_int_s16(vecSrc.val[1], vecSrc.val[1]);
+        acc1V = vqaddq_s32(acc1V,prod1);
+
+       
+
+        acc0V = vshrq(acc0V, 1);
+        acc1V = vshrq(acc1V, 1);
+
+        acc0V = FAST_VSQRT_Q31(acc0V);
+        acc1V = FAST_VSQRT_Q31(acc1V);
 
-        sum = FAST_VSQRT_Q15(sum);
+        resV = vdupq_n_s16(0);
+        resV = vqshrnbq_n_s32(resV,acc0V,16);
+        resV = vqshrntq_n_s32(resV,acc1V,16);
 
-        vst1q(pDst, sum); 
+        vst1q(pDst, resV); 
         pDst += 8;
         /*
          * Decrement the blockSize loop counter
@@ -99,7 +133,8 @@ void arm_cmplx_mag_q15(
       acc0 = __SMUAD(in, in);
   
       /* store result in 2.14 format in destination buffer. */
-      arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
+      arm_sqrt_q31(acc0  >> 1 , &res);
+      *pDst++ = res >> 16;
   
   
       /* Decrement loop counter */
@@ -113,6 +148,7 @@ void arm_cmplx_mag_q15(
         q15_t * pDst,
         uint32_t numSamples)
 {
+        q31_t res; /* temporary result */
         uint32_t blkCnt;                               /* Loop counter */
 
 #if defined (ARM_MATH_DSP)
@@ -133,22 +169,26 @@ void arm_cmplx_mag_q15(
     /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
 
 #if defined (ARM_MATH_DSP)
-    in = read_q15x2_ia ((q15_t **) &pSrc);
+    in = read_q15x2_ia (&pSrc);
     acc0 = __SMUAD(in, in);
     /* store result in 2.14 format in destination buffer. */
-    arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
+    arm_sqrt_q31(acc0  >> 1 , &res);
+    *pDst++ = res >> 16;
 
-    in = read_q15x2_ia ((q15_t **) &pSrc);
+    in = read_q15x2_ia (&pSrc);
     acc0 = __SMUAD(in, in);
-    arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
+    arm_sqrt_q31(acc0  >> 1 , &res);
+    *pDst++ = res >> 16;
 
-    in = read_q15x2_ia ((q15_t **) &pSrc);
+    in = read_q15x2_ia (&pSrc);
     acc0 = __SMUAD(in, in);
-    arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
+    arm_sqrt_q31(acc0  >> 1 , &res);
+    *pDst++ = res >> 16;
 
-    in = read_q15x2_ia ((q15_t **) &pSrc);
+    in = read_q15x2_ia (&pSrc);
     acc0 = __SMUAD(in, in);
-    arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
+    arm_sqrt_q31(acc0  >> 1 , &res);
+    *pDst++ = res >> 16;
 #else
     real = *pSrc++;
     imag = *pSrc++;
@@ -156,25 +196,29 @@ void arm_cmplx_mag_q15(
     acc1 = ((q31_t) imag * imag);
 
     /* store result in 2.14 format in destination buffer. */
-    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
+    arm_sqrt_q31((acc0 + acc1) >> 1 , &res);
+    *pDst++ = res >> 16;
 
     real = *pSrc++;
     imag = *pSrc++;
     acc0 = ((q31_t) real * real);
     acc1 = ((q31_t) imag * imag);
-    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
+    arm_sqrt_q31((acc0 + acc1) >> 1 , &res);
+    *pDst++ = res >> 16;
 
     real = *pSrc++;
     imag = *pSrc++;
     acc0 = ((q31_t) real * real);
     acc1 = ((q31_t) imag * imag);
-    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
+    arm_sqrt_q31((acc0 + acc1) >> 1 , &res);
+    *pDst++ = res >> 16;
 
     real = *pSrc++;
     imag = *pSrc++;
     acc0 = ((q31_t) real * real);
     acc1 = ((q31_t) imag * imag);
-    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
+    arm_sqrt_q31((acc0 + acc1) >> 1 , &res);
+    *pDst++ = res >> 16;
 #endif /* #if defined (ARM_MATH_DSP) */
 
     /* Decrement loop counter */
@@ -196,11 +240,12 @@ void arm_cmplx_mag_q15(
     /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
 
 #if defined (ARM_MATH_DSP)
-    in = read_q15x2_ia ((q15_t **) &pSrc);
+    in = read_q15x2_ia (&pSrc);
     acc0 = __SMUAD(in, in);
 
     /* store result in 2.14 format in destination buffer. */
-    arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
+    arm_sqrt_q31(acc0  >> 1 , &res);
+    *pDst++ = res >> 16;
 #else
     real = *pSrc++;
     imag = *pSrc++;
@@ -208,7 +253,9 @@ void arm_cmplx_mag_q15(
     acc1 = ((q31_t) imag * imag);
 
     /* store result in 2.14 format in destination buffer. */
-    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
+    arm_sqrt_q31((acc0 + acc1) >> 1 , &res);
+    *pDst++ = res >> 16;
+ 
 #endif
 
     /* Decrement loop counter */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c
index fa5a4e4..0041620 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_mag_q31.c
  * Description:  Q31 complex magnitude
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c
index 1449000..5fd3af1 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_cmplx_mag_squared_f16.c
  * Description:  Floating-point complex magnitude squared
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -34,29 +36,6 @@
   @ingroup groupCmplxMath
  */
 
-/**
-  @defgroup cmplx_mag_squared Complex Magnitude Squared
-
-  Computes the magnitude squared of the elements of a complex data vector.
-
-  The <code>pSrc</code> points to the source data and
-  <code>pDst</code> points to the where the result should be written.
-  <code>numSamples</code> specifies the number of complex samples
-  in the input array and the data is stored in an interleaved fashion
-  (real, imag, real, imag, ...).
-  The input array has a total of <code>2*numSamples</code> values;
-  the output array has a total of <code>numSamples</code> values.
-
-  The underlying algorithm is used:
-
-  <pre>
-  for (n = 0; n < numSamples; n++) {
-      pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
-  }
-  </pre>
-
-  There are separate functions for floating-point, Q15, and Q31 data types.
- */
 
 /**
   @addtogroup cmplx_mag_squared
@@ -172,4 +151,5 @@ void arm_cmplx_mag_squared_f16(
  */
 
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c
index e611194..eaadf1c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_mag_squared_f32.c
  * Description:  Floating-point complex magnitude squared
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f64.c
new file mode 100644
index 0000000..d2a2b36
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f64.c
@@ -0,0 +1,80 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cmplx_mag_squared_f64.c
+ * Description:  Floating-point complex magnitude squared
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions.h"
+
+/**
+  @ingroup groupCmplxMath
+ */
+
+/**
+  @addtogroup cmplx_mag_squared
+  @{
+ */
+
+/**
+  @brief         Floating-point complex magnitude squared.
+  @param[in]     pSrc        points to input vector
+  @param[out]    pDst        points to output vector
+  @param[in]     numSamples  number of samples in each vector
+  @return        none
+ */
+void arm_cmplx_mag_squared_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t numSamples)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        float64_t real, imag;                          /* Temporary input variables */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = numSamples;
+
+  while (blkCnt > 0U)
+  {
+    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
+
+    real = *pSrc++;
+    imag = *pSrc++;
+
+    /* store result in destination buffer. */
+    *pDst++ = (real * real) + (imag * imag);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of cmplx_mag_squared group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c
index 5163b22..0e2b2ec 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_mag_squared_q15.c
  * Description:  Q15 complex magnitude squared
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -133,20 +133,20 @@ void arm_cmplx_mag_squared_q15(
     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
 
 #if defined (ARM_MATH_DSP)
-    in = read_q15x2_ia ((q15_t **) &pSrc);
+    in = read_q15x2_ia (&pSrc);
     acc0 = __SMUAD(in, in);
     /* store result in 3.13 format in destination buffer. */
     *pDst++ = (q15_t) (acc0 >> 17);
 
-    in = read_q15x2_ia ((q15_t **) &pSrc);
+    in = read_q15x2_ia (&pSrc);
     acc0 = __SMUAD(in, in);
     *pDst++ = (q15_t) (acc0 >> 17);
 
-    in = read_q15x2_ia ((q15_t **) &pSrc);
+    in = read_q15x2_ia (&pSrc);
     acc0 = __SMUAD(in, in);
     *pDst++ = (q15_t) (acc0 >> 17);
 
-    in = read_q15x2_ia ((q15_t **) &pSrc);
+    in = read_q15x2_ia (&pSrc);
     acc0 = __SMUAD(in, in);
     *pDst++ = (q15_t) (acc0 >> 17);
 #else
@@ -195,7 +195,7 @@ void arm_cmplx_mag_squared_q15(
     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
 
 #if defined (ARM_MATH_DSP)
-    in = read_q15x2_ia ((q15_t **) &pSrc);
+    in = read_q15x2_ia (&pSrc);
     acc0 = __SMUAD(in, in);
 
     /* store result in 3.13 format in destination buffer. */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c
index e9a7649..b533a60 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_mag_squared_q31.c
  * Description:  Q31 complex magnitude squared
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c
index 79f48e9..75fefa3 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_mult_cmplx_f16.c
  * Description:  Floating-point complex-by-complex multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -36,27 +36,7 @@
   @ingroup groupCmplxMath
  */
 
-/**
-  @defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
-
-  Multiplies a complex vector by another complex vector and generates a complex result.
-  The data in the complex arrays is stored in an interleaved fashion
-  (real, imag, real, imag, ...).
-  The parameter <code>numSamples</code> represents the number of complex
-  samples processed.  The complex arrays have a total of <code>2*numSamples</code>
-  real values.
 
-  The underlying algorithm is used:
-
-  <pre>
-  for (n = 0; n < numSamples; n++) {
-      pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
-      pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
-  }
-  </pre>
-
-  There are separate functions for floating-point, Q15, and Q31 data types.
- */
 
 /**
   @addtogroup CmplxByCmplxMult
@@ -271,4 +251,5 @@ void arm_cmplx_mult_cmplx_f16(
  */
 
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c
index 672ed89..d6ec828 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_mult_cmplx_f32.c
  * Description:  Floating-point complex-by-complex multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f64.c
new file mode 100644
index 0000000..603de64
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f64.c
@@ -0,0 +1,87 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cmplx_mult_cmplx_f64.c
+ * Description:  Floating-point complex-by-complex multiplication
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions.h"
+
+/**
+  @ingroup groupCmplxMath
+ */
+
+/**
+  @addtogroup CmplxByCmplxMult
+  @{
+ */
+
+/**
+  @brief         Floating-point complex-by-complex multiplication.
+  @param[in]     pSrcA       points to first input vector
+  @param[in]     pSrcB       points to second input vector
+  @param[out]    pDst        points to output vector
+  @param[in]     numSamples  number of samples in each vector
+  @return        none
+ */
+
+void arm_cmplx_mult_cmplx_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        float64_t * pDst,
+        uint32_t numSamples)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  float64_t a, b, c, d;  /* Temporary variables to store real and imaginary values */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = numSamples;
+
+  while (blkCnt > 0U)
+  {
+    /* C[2 * i    ] = A[2 * i] * B[2 * i    ] - A[2 * i + 1] * B[2 * i + 1]. */
+    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i    ]. */
+
+    a = *pSrcA++;
+    b = *pSrcA++;
+    c = *pSrcB++;
+    d = *pSrcB++;
+
+    /* store result in destination buffer. */
+    *pDst++ = (a * c) - (b * d);
+    *pDst++ = (a * d) + (b * c);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of CmplxByCmplxMult group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c
index 759b917..0790341 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_mult_cmplx_q15.c
  * Description:  Q15 complex-by-complex multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c
index 6280603..cbfc505 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_mult_cmplx_q31.c
  * Description:  Q31 complex-by-complex multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c
index 1bc40d2..740639e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_cmplx_mult_real_f16.c
  * Description:  Floating-point complex by real multiplication
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -34,28 +36,6 @@
   @ingroup groupCmplxMath
  */
 
-/**
-  @defgroup CmplxByRealMult Complex-by-Real Multiplication
-
-  Multiplies a complex vector by a real vector and generates a complex result.
-  The data in the complex arrays is stored in an interleaved fashion
-  (real, imag, real, imag, ...).
-  The parameter <code>numSamples</code> represents the number of complex
-  samples processed.  The complex arrays have a total of <code>2*numSamples</code>
-  real values while the real array has a total of <code>numSamples</code>
-  real values.
-
-  The underlying algorithm is used:
-
-  <pre>
-  for (n = 0; n < numSamples; n++) {
-      pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
-      pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
-  }
-  </pre>
-
-  There are separate functions for floating-point, Q15, and Q31 data types.
- */
 
 /**
   @addtogroup CmplxByRealMult
@@ -79,7 +59,7 @@ void arm_cmplx_mult_real_f16(
         float16_t * pCmplxDst,
         uint32_t numSamples)
 {
-    const static uint16_t stride_cmplx_x_real_16[8] = {
+    static const uint16_t stride_cmplx_x_real_16[8] = {
         0, 0, 1, 1, 2, 2, 3, 3
         };
     uint32_t blockSizeC = numSamples * CMPLX_DIM;   /* loop counters */
@@ -141,20 +121,20 @@ void arm_cmplx_mult_real_f16(
 
     in = *pSrcReal++;
     /* store result in destination buffer. */
-    *pCmplxDst++ = *pSrcCmplx++ * in;
-    *pCmplxDst++ = *pSrcCmplx++ * in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
 
     in = *pSrcReal++;
-    *pCmplxDst++ = *pSrcCmplx++ * in;
-    *pCmplxDst++ = *pSrcCmplx++ * in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
 
     in = *pSrcReal++;
-    *pCmplxDst++ = *pSrcCmplx++ * in;
-    *pCmplxDst++ = *pSrcCmplx++ * in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
 
     in = *pSrcReal++;
-    *pCmplxDst++ = *pSrcCmplx++* in;
-    *pCmplxDst++ = *pSrcCmplx++ * in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -177,8 +157,8 @@ void arm_cmplx_mult_real_f16(
 
     in = *pSrcReal++;
     /* store result in destination buffer. */
-    *pCmplxDst++ = *pSrcCmplx++ * in;
-    *pCmplxDst++ = *pSrcCmplx++ * in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
+    *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -192,4 +172,5 @@ void arm_cmplx_mult_real_f16(
  */
 
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c
index c946dfa..af346be 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_mult_real_f32.c
  * Description:  Floating-point complex by real multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -79,7 +79,7 @@ void arm_cmplx_mult_real_f32(
         float32_t * pCmplxDst,
         uint32_t numSamples)
 {
-    const static uint32_t stride_cmplx_x_real_32[4] = { 0, 0, 1, 1 };
+    static const uint32_t stride_cmplx_x_real_32[4] = { 0, 0, 1, 1 };
 
     uint32_t blockSizeC = numSamples * CMPLX_DIM;   /* loop counters */
     uint32_t blkCnt;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c
index 9495dcb..c2aab63 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_mult_real_q15.c
  * Description:  Q15 complex by real multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -59,7 +59,7 @@ void arm_cmplx_mult_real_q15(
         q15_t * pCmplxDst,
         uint32_t numSamples)
 {
-  const static uint16_t stride_cmplx_x_real_16[8] = {
+  static const uint16_t stride_cmplx_x_real_16[8] = {
       0, 0, 1, 1, 2, 2, 3, 3
       };
   q15x8_t rVec;
@@ -135,10 +135,10 @@ void arm_cmplx_mult_real_q15(
 
 #if defined (ARM_MATH_DSP)
     /* read 2 complex numbers both real and imaginary from complex input buffer */
-    inA1 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
-    inA2 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
+    inA1 = read_q15x2_ia (&pSrcCmplx);
+    inA2 = read_q15x2_ia (&pSrcCmplx);
     /* read 2 real values at a time from real input buffer */
-    inB1 = read_q15x2_ia ((q15_t **) &pSrcReal);
+    inB1 = read_q15x2_ia (&pSrcReal);
 
     /* multiply complex number with real numbers */
 #ifndef ARM_MATH_BIG_ENDIAN
@@ -163,9 +163,9 @@ void arm_cmplx_mult_real_q15(
     write_q15x2_ia (&pCmplxDst, __PKHBT(out1, out2, 16));
     write_q15x2_ia (&pCmplxDst, __PKHBT(out3, out4, 16));
 
-    inA1 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
-    inA2 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
-    inB1 = read_q15x2_ia ((q15_t **) &pSrcReal);
+    inA1 = read_q15x2_ia (&pSrcCmplx);
+    inA2 = read_q15x2_ia (&pSrcCmplx);
+    inB1 = read_q15x2_ia (&pSrcReal);
 
 #ifndef ARM_MATH_BIG_ENDIAN
     mul1 = (q31_t) ((q15_t) (inA1)       * (q15_t) (inB1));
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c
index 8303420..700468d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_mult_real_q31.c
  * Description:  Q31 complex by real multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -60,7 +60,7 @@ void arm_cmplx_mult_real_q31(
         uint32_t numSamples)
 {
 
-    const static uint32_t stride_cmplx_x_real_32[4] = {
+    static const uint32_t stride_cmplx_x_real_32[4] = {
         0, 0, 1, 1
     };
     q31x4_t rVec;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_init_f32.c
index 40892c1..976e91f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_pid_init_f32.c
  * Description:  Floating-point PID Control initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_init_q15.c
index 1c8e160..79f5f0d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_pid_init_q15.c
  * Description:  Q15 PID Control initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_init_q31.c
index d38c740..df5415c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_pid_init_q31.c
  * Description:  Q31 PID Control initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_reset_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_reset_f32.c
index fa29131..b0e6abb 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_reset_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_reset_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_pid_reset_f32.c
  * Description:  Floating-point PID Control reset function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_reset_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_reset_q15.c
index bcd451a..c42f45a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_reset_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_reset_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_pid_reset_q15.c
  * Description:  Q15 PID Control reset function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_reset_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_reset_q31.c
index c13df84..472a2c1 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_reset_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_pid_reset_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_pid_reset_q31.c
  * Description:  Q31 PID Control reset function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_sin_cos_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_sin_cos_f32.c
index 97a3e39..4c85db6 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_sin_cos_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_sin_cos_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_sin_cos_f32.c
  * Description:  Sine and Cosine calculation for floating-point values
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -31,35 +31,6 @@
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/controller_functions.h"
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables.h"
 
-/**
-  @ingroup groupController
- */
-
-/**
-  @defgroup SinCos Sine Cosine
-
-  Computes the trigonometric sine and cosine values using a combination of table lookup
-  and linear interpolation.
-  There are separate functions for Q31 and floating-point data types.
-  The input to the floating-point version is in degrees while the
-  fixed-point Q31 have a scaled input with the range
-  [-1 0.9999] mapping to [-180 +180] degrees.
-
-  The floating point function also allows values that are out of the usual range. When this happens, the function will
-  take extra time to adjust the input value to the range of [-180 180].
-
-  The result is accurate to 5 digits after the decimal point.
-
-  The implementation is based on table lookup using 360 values together with linear interpolation.
-  The steps used are:
-   -# Calculation of the nearest integer table index.
-   -# Compute the fractional portion (fract) of the input.
-   -# Fetch the value corresponding to \c index from sine table to \c y0 and also value from \c index+1 to \c y1.
-   -# Sine value is computed as <code> *psinVal = y0 + (fract * (y1 - y0))</code>.
-   -# Fetch the value corresponding to \c index from cosine table to \c y0 and also value from \c index+1 to \c y1.
-   -# Cosine value is computed as <code> *pcosVal = y0 + (fract * (y1 - y0))</code>.
- */
-
 /**
   @addtogroup SinCos
   @{
@@ -109,8 +80,6 @@ void arm_sin_cos_f32(
   d1 = -sinTable_f32[indexS  ];
   d2 = -sinTable_f32[indexS+1];
 
-  temp = (1.0f - fract) * f1 + fract * f2;
-
   Dn = 0.0122718463030f; /* delta between the two points (fixed), in this case 2*pi/FAST_MATH_TABLE_SIZE */
   Df = f2 - f1;          /* delta between the values of the functions */
 
@@ -127,7 +96,6 @@ void arm_sin_cos_f32(
   d1 = sinTable_f32[indexC  ];
   d2 = sinTable_f32[indexC+1];
 
-  temp = (1.0f - fract) * f1 + fract * f2;
 
   Df = f2 - f1; // delta between the values of the functions
   temp = Dn * (d1 + d2) - 2 * Df;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_sin_cos_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_sin_cos_q31.c
index 7e7c881..4198307 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_sin_cos_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/ControllerFunctions/arm_sin_cos_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_sin_cos_q31.c
  * Description:  Cosine & Sine calculation for Q31 values
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_boolean_distance.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_boolean_distance.c
index df49f29..921d039 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_boolean_distance.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_boolean_distance.c
@@ -6,11 +6,13 @@
  * Title:        arm_svm_linear_init_f32.c
  * Description:  SVM Linear Instance Initialization
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_boolean_distance_template.h b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_boolean_distance_template.h
index 70a96cd..b50c739 100755
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_boolean_distance_template.h
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_boolean_distance_template.h
@@ -4,11 +4,13 @@
  * Title:        arm_boolean_distance.c
  * Description:  Templates for boolean distances
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f16.c
index bc899da..1c056f2 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f16.c
@@ -6,11 +6,13 @@
  * Title:        arm_braycurtis_distance_f16.c
  * Description:  Bray-Curtis distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -133,8 +135,8 @@ float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, u
    {
       tmpA = *pA++;
       tmpB = *pB++;
-      accumDiff += (_Float16)fabsf(tmpA - tmpB);
-      accumSum += (_Float16)fabsf(tmpA + tmpB);
+      accumDiff += (_Float16)fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
+      accumSum += (_Float16)fabsf((float32_t)((_Float16)tmpA + (_Float16)tmpB));
       blockSize --;
    }
    /*
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f32.c
index b616cd1..4a8fd6b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f32.c
@@ -6,11 +6,13 @@
  * Title:        arm_braycurtis_distance_f32.c
  * Description:  Bray-Curtis distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f16.c
index ef0f411..7cfffc1 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f16.c
@@ -6,11 +6,13 @@
  * Title:        arm_canberra_distance_f16.c
  * Description:  Canberra distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -150,11 +152,11 @@ float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uin
       tmpA = *pA++;
       tmpB = *pB++;
 
-      diff = fabsf(tmpA - tmpB);
-      sum = fabsf(tmpA) + fabsf(tmpB);
-      if ((tmpA != 0.0f16) || (tmpB != 0.0f16))
+      diff = fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
+      sum = (_Float16)fabsf((float32_t)tmpA) + (_Float16)fabsf((float32_t)tmpB);
+      if (((_Float16)tmpA != 0.0f16) || ((_Float16)tmpB != 0.0f16))
       {
-         accum += (diff / sum);
+         accum += ((_Float16)diff / (_Float16)sum);
       }
       blockSize --;
    }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f32.c
index 153124c..78d1353 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f32.c
@@ -6,11 +6,13 @@
  * Title:        arm_canberra_distance_f32.c
  * Description:  Canberra distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f16.c
index f825ac2..bbf41dc 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f16.c
@@ -6,11 +6,13 @@
  * Title:        arm_chebyshev_distance_f16.c
  * Description:  Chebyshev distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -117,7 +119,7 @@ float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, ui
 
    tmpA = *pA++;
    tmpB = *pB++;
-   diff = fabsf(tmpA - tmpB);
+   diff = (_Float16)fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
    maxVal = diff;
    blockSize--;
 
@@ -125,8 +127,8 @@ float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, ui
    {
       tmpA = *pA++;
       tmpB = *pB++;
-      diff = fabsf(tmpA - tmpB);
-      if (diff > maxVal)
+      diff = (_Float16)fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
+      if ((_Float16)diff > (_Float16)maxVal)
       {
         maxVal = diff;
       }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f32.c
index e306011..ee45e3d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f32.c
@@ -6,11 +6,13 @@
  * Title:        arm_chebyshev_distance_f32.c
  * Description:  Chebyshev distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f64.c
new file mode 100644
index 0000000..0b64f72
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f64.c
@@ -0,0 +1,80 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_chebyshev_distance_f64.c
+ * Description:  Chebyshev distance between two vectors
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup Chebyshev
+  @{
+ */
+
+
+/**
+ * @brief        Chebyshev distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float64_t arm_chebyshev_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
+{
+   float64_t diff=0.,  maxVal,tmpA, tmpB;
+
+   tmpA = *pA++;
+   tmpB = *pB++;
+   diff = fabs(tmpA - tmpB);
+   maxVal = diff;
+   blockSize--;
+
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      diff = fabs(tmpA - tmpB);
+      if (diff > maxVal)
+      {
+        maxVal = diff;
+      }
+      blockSize --;
+   }
+  
+   return(maxVal);
+}
+
+/**
+ * @} end of Chebyshev group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f16.c
index 876da7d..0c9cc2f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f16.c
@@ -6,11 +6,13 @@
  * Title:        arm_cityblock_distance_f16.c
  * Description:  Cityblock (Manhattan) distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -111,7 +113,7 @@ float16_t arm_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, ui
    {
       tmpA = *pA++;
       tmpB = *pB++;
-      accum  += (_Float16)fabsf(tmpA - tmpB);
+      accum  += (_Float16)fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
       
       blockSize --;
    }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f32.c
index d35239b..a749055 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f32.c
@@ -6,11 +6,13 @@
  * Title:        arm_cityblock_distance_f32.c
  * Description:  Cityblock (Manhattan) distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f64.c
new file mode 100644
index 0000000..e07e7a7
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f64.c
@@ -0,0 +1,71 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cityblock_distance_f64.c
+ * Description:  Cityblock (Manhattan) distance between two vectors
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+/**
+  @addtogroup Manhattan
+  @{
+ */
+
+
+/**
+ * @brief        Cityblock (Manhattan) distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float64_t arm_cityblock_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
+{
+   float64_t accum,tmpA, tmpB;
+
+   accum = 0.;
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      accum  += fabs(tmpA - tmpB);
+      
+      blockSize --;
+   }
+  
+   return(accum);
+}
+
+/**
+ * @} end of Manhattan group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f16.c
index e7d3638..715484b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f16.c
@@ -6,11 +6,13 @@
  * Title:        arm_correlation_distance_f16.c
  * Description:  Correlation distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -70,21 +72,21 @@ float16_t arm_correlation_distance_f16(float16_t *pA,float16_t *pB, uint32_t blo
     arm_mean_f16(pA, blockSize, &ma);
     arm_mean_f16(pB, blockSize, &mb);
 
-    arm_offset_f16(pA, -ma, pA, blockSize);
-    arm_offset_f16(pB, -mb, pB, blockSize);
+    arm_offset_f16(pA, -(_Float16)ma, pA, blockSize);
+    arm_offset_f16(pB, -(_Float16)mb, pB, blockSize);
 
     arm_power_f16(pA, blockSize, &pwra);
     arm_power_f16(pB, blockSize, &pwrb);
 
     arm_dot_prod_f16(pA,pB,blockSize,&dot);
 
-    dot = dot / blockSize;
-    pwra = pwra / blockSize;
-    pwrb = pwrb / blockSize;
+    dot = (_Float16)dot / (_Float16)blockSize;
+    pwra = (_Float16)pwra / (_Float16)blockSize;
+    pwrb = (_Float16)pwrb / (_Float16)blockSize;
 
-    arm_sqrt_f16(pwra * pwrb,&tmp);
+    arm_sqrt_f16((_Float16)pwra * (_Float16)pwrb,&tmp);
  
-    return(1.0f - dot / tmp);
+    return(1.0f16 - (_Float16)dot / (_Float16)tmp);
 
    
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f32.c
index e71fd7f..79d26a9 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f32.c
@@ -6,11 +6,13 @@
  * Title:        arm_correlation_distance_f32.c
  * Description:  Correlation distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f16.c
index 0046263..453aebf 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f16.c
@@ -6,11 +6,13 @@
  * Title:        arm_cosine_distance_f16.c
  * Description:  Cosine distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -60,6 +62,8 @@
  * @param[in]    blockSize  vector length
  * @return distance
  *
+ * @par           Description
+ *                  cosine_distance(u,v) is 1 - u . v / (Norm(u) Norm(v))
  */
 
 float16_t arm_cosine_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
@@ -71,8 +75,8 @@ float16_t arm_cosine_distance_f16(const float16_t *pA,const float16_t *pB, uint3
 
     arm_dot_prod_f16(pA,pB,blockSize,&dot);
 
-    arm_sqrt_f16(pwra * pwrb, &tmp);
-    return(1.0f - dot / tmp);
+    arm_sqrt_f16((_Float16)pwra * (_Float16)pwrb, &tmp);
+    return(1.0f16 - (_Float16)dot / (_Float16)tmp);
 
 }
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f32.c
index 1ad6cc7..871c7af 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f32.c
@@ -6,11 +6,13 @@
  * Title:        arm_cosine_distance_f32.c
  * Description:  Cosine distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -47,6 +49,8 @@
  * @param[in]    blockSize  vector length
  * @return distance
  *
+ * @par           Description
+ *                  cosine_distance(u,v) is 1 - u . v / (Norm(u) Norm(v))
  */
 
 float32_t arm_cosine_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f64.c
new file mode 100644
index 0000000..ea5e654
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f64.c
@@ -0,0 +1,74 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cosine_distance_f64.c
+ * Description:  Cosine distance between two vectors
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup CosineDist
+  @{
+ */
+
+
+
+/**
+ * @brief        Cosine distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float64_t arm_cosine_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
+{
+    float64_t pwra,pwrb,dot,tmp;
+
+    arm_power_f64(pA, blockSize, &pwra);
+    arm_power_f64(pB, blockSize, &pwrb);
+
+    arm_dot_prod_f64(pA,pB,blockSize,&dot);
+
+    tmp = sqrt(pwra * pwrb);
+    return(1. - dot / tmp);
+
+}
+
+
+
+/**
+ * @} end of CosineDist group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_dice_distance.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_dice_distance.c
index 4fd2963..d27dfc9 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_dice_distance.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_dice_distance.c
@@ -6,11 +6,13 @@
  * Title:        arm_dice_distance.c
  * Description:  Dice distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f16.c
index 67a703e..dd1d9ca 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f16.c
@@ -6,11 +6,13 @@
  * Title:        arm_euclidean_distance_f16.c
  * Description:  Euclidean distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f32.c
index 101151e..ccbdc77 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f32.c
@@ -6,11 +6,13 @@
  * Title:        arm_euclidean_distance_f32.c
  * Description:  Euclidean distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f64.c
new file mode 100644
index 0000000..04c42f7
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f64.c
@@ -0,0 +1,70 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_euclidean_distance_f64.c
+ * Description:  Euclidean distance between two vectors
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+
+/**
+  @addtogroup Euclidean
+  @{
+ */
+
+
+/**
+ * @brief        Euclidean distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float64_t arm_euclidean_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
+{
+   float64_t accum=0.,tmp;
+
+   while(blockSize > 0)
+   {
+      tmp = *pA++ - *pB++;
+      accum += SQ(tmp);
+      blockSize --;
+   }
+   tmp = sqrt(accum);
+   return(tmp);
+}
+
+/**
+ * @} end of Euclidean group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_hamming_distance.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_hamming_distance.c
index 8a6e4f7..28f2733 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_hamming_distance.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_hamming_distance.c
@@ -6,11 +6,13 @@
  * Title:        arm_hamming_distance.c
  * Description:  Hamming distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_jaccard_distance.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_jaccard_distance.c
index d3dc3bb..30d061b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_jaccard_distance.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_jaccard_distance.c
@@ -6,11 +6,13 @@
  * Title:        arm_jaccard_distance.c
  * Description:  Jaccard distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
index 87a14d8..14bd4b0 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
@@ -6,11 +6,13 @@
  * Title:        arm_jensenshannon_distance_f16.c
  * Description:  Jensen-Shannon distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -54,7 +56,7 @@
 /// @private
 __STATIC_INLINE float16_t rel_entr(float16_t x, float16_t y)
 {
-    return (x * logf(x / y));
+    return ((_Float16)x * (_Float16)logf((float32_t)((_Float16)x / (_Float16)y)));
 }
 #endif
 
@@ -117,7 +119,7 @@ float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB
 
     }
 
-    arm_sqrt_f16(vecAddAcrossF16Mve(accumV) / 2.0f, &tmp);
+    arm_sqrt_f16((_Float16)vecAddAcrossF16Mve(accumV) / 2.0f16, &tmp);
     return (tmp);
 }
 
@@ -162,7 +164,7 @@ float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB
 
 
     sum = left + right;
-    arm_sqrt_f16(sum/2.0f, &result);
+    arm_sqrt_f16((_Float16)sum/2.0f16, &result);
     return(result);
 
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f32.c
index 56af92e..6aeb797 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f32.c
@@ -6,11 +6,13 @@
  * Title:        arm_jensenshannon_distance_f32.c
  * Description:  Jensen-Shannon distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_kulsinski_distance.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_kulsinski_distance.c
index 2941de9..1bcb2ef 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_kulsinski_distance.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_kulsinski_distance.c
@@ -6,11 +6,13 @@
  * Title:        arm_kulsinski_distance.c
  * Description:  Kulsinski distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f16.c
index 79d1b8a..ae9c3cb 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f16.c
@@ -6,11 +6,13 @@
  * Title:        arm_minkowski_distance_f16.c
  * Description:  Minkowski distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -69,10 +71,9 @@
 float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize)
 {
     uint32_t        blkCnt;
-    f16x8_t         a, b, tmpV, accumV, sumV;
+    f16x8_t         a, b, tmpV, sumV;
 
     sumV = vdupq_n_f16(0.0f);
-    accumV = vdupq_n_f16(0.0f);
 
     blkCnt = blockSize >> 3;
     while (blkCnt > 0U) {
@@ -104,7 +105,7 @@ float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, in
         sumV = vaddq_m(sumV, sumV, tmpV, p0);
     }
 
-    return (powf(vecAddAcrossF16Mve(sumV), (1.0f / (float16_t) order)));
+    return (powf((float32_t)vecAddAcrossF16Mve(sumV), (1.0f / (float32_t) order)));
 }
 
 
@@ -116,14 +117,14 @@ float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, in
     _Float16 sum;
     uint32_t i;
 
-    sum = 0.0f; 
+    sum = 0.0f16; 
     for(i=0; i < blockSize; i++)
     {
-       sum += (_Float16)powf(fabsf(pA[i] - pB[i]),order);
+       sum += (_Float16)powf(fabsf((float32_t)((_Float16)pA[i] - (_Float16)pB[i])),order);
     }
 
 
-    return(powf(sum,(1.0f/order)));
+    return(_Float16)(powf((float32_t)sum,(1.0f/(float32_t)order)));
 
 }
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f32.c
index 51a904a..e29d8e1 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f32.c
@@ -6,11 +6,13 @@
  * Title:        arm_minkowski_distance_f32.c
  * Description:  Minkowski distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -76,10 +78,9 @@ __attribute__((weak)) float __powisf2(float a, int b)
 float32_t arm_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize)
 {
     uint32_t        blkCnt;
-    f32x4_t         a, b, tmpV, accumV, sumV;
+    f32x4_t         a, b, tmpV, sumV;
 
     sumV = vdupq_n_f32(0.0f);
-    accumV = vdupq_n_f32(0.0f);
 
     blkCnt = blockSize >> 2;
     while (blkCnt > 0U) {
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_rogerstanimoto_distance.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_rogerstanimoto_distance.c
index 2f923dd..eb7820d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_rogerstanimoto_distance.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_rogerstanimoto_distance.c
@@ -6,11 +6,13 @@
  * Title:        arm_rogerstanimoto_distance.c
  * Description:  Roger Stanimoto distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_russellrao_distance.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_russellrao_distance.c
index d924ea2..0be143e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_russellrao_distance.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_russellrao_distance.c
@@ -6,11 +6,13 @@
  * Title:        arm_russellrao_distance.c
  * Description:  Russell-Rao distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -63,7 +65,7 @@ float32_t arm_russellrao_distance(const uint32_t *pA, const uint32_t *pB, uint32
     arm_boolean_distance_TT(pA, pB, numberOfBools, &ctt);
 
 
-    return(1.0*(numberOfBools - ctt) / ((float32_t)numberOfBools));
+    return(1.0f*(numberOfBools - ctt) / ((float32_t)numberOfBools));
 }
 
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_sokalmichener_distance.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_sokalmichener_distance.c
index d18904e..3b7fd14 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_sokalmichener_distance.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_sokalmichener_distance.c
@@ -6,11 +6,13 @@
  * Title:        arm_sokalmichener_distance.c
  * Description:  Sokal-Michener distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_sokalsneath_distance.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_sokalsneath_distance.c
index 48b24fc..707466e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_sokalsneath_distance.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_sokalsneath_distance.c
@@ -6,11 +6,13 @@
  * Title:        arm_sokalsneath_distance.c
  * Description:  Sokal-Sneath distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_yule_distance.c b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_yule_distance.c
index 0535e5b..cf52c90 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_yule_distance.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/DistanceFunctions/arm_yule_distance.c
@@ -6,11 +6,13 @@
  * Title:        arm_yule_distance.c
  * Description:  Yule distance between two vectors
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_f16.c
new file mode 100644
index 0000000..93c898c
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_f16.c
@@ -0,0 +1,175 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_atan2_f16.c
+ * Description:  float16 Arc tangent of y/x
+ *
+ * $Date:        22 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions_f16.h"        
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+/*
+
+atan for argument between in [0, 1.0]
+
+
+*/
+
+#define PIF16 3.14f16
+#define PI16HALF 1.571f16
+
+#define ATANHALFF16 0.463648f16
+
+#define ATAN2_NB_COEFS_F16 5
+
+static const float16_t atan2_coefs_f16[ATAN2_NB_COEFS_F16]={0.f16
+,1.f16
+,0.f16
+,-0.367f16
+,0.152f16
+};
+
+__STATIC_FORCEINLINE float16_t arm_atan_limited_f16(float16_t x)
+{
+    float16_t res=atan2_coefs_f16[ATAN2_NB_COEFS_F16-1];
+    int i=1;
+    for(i=1;i<ATAN2_NB_COEFS_F16;i++)
+    {
+        res = (_Float16)x*(_Float16)res + (_Float16)atan2_coefs_f16[ATAN2_NB_COEFS_F16-1-i];
+    }
+
+
+    return(res);
+}
+
+__STATIC_FORCEINLINE float16_t arm_atan_f16(float16_t x)
+{
+   int sign=0;
+   float16_t res=0.0f16;
+
+   if ((_Float16)x < 0.0f16)
+   {
+      sign=1;
+      x=-(_Float16)x;
+   }
+
+   if ((_Float16)x > 1.0f16)
+   {
+      x = 1.0f16 / (_Float16)x;
+      res = (_Float16)PI16HALF - (_Float16)arm_atan_limited_f16(x);
+   }
+   else
+   {
+     res += (_Float16)arm_atan_limited_f16(x);
+   }
+
+
+   if (sign)
+   {
+     res = -(_Float16)res;
+   }
+
+   return(res);
+}
+
+/**
+  @ingroup groupFastMath
+ */
+
+
+/**
+  @addtogroup atan2
+  @{
+ */
+
+/**
+  @brief       Arc Tangent of y/x using sign of y and x to get right quadrant
+  @param[in]   y  y coordinate
+  @param[in]   x  x coordinate
+  @param[out]  result  Result
+  @return  error status.
+ 
+  @par         Compute the Arc tangent of y/x:
+                   The sign of y and x are used to determine the right quadrant
+                   and compute the right angle.
+
+*/
+arm_status arm_atan2_f16(float16_t y,float16_t x,float16_t *result)
+{
+    if ((_Float16)x > 0.0f16)
+    {
+        *result=arm_atan_f16((_Float16)y/(_Float16)x);
+        return(ARM_MATH_SUCCESS);
+    }
+    if ((_Float16)x < 0.0f16)
+    {
+        if ((_Float16)y > 0.0f16)
+        {
+           *result=(_Float16)arm_atan_f16((_Float16)y/(_Float16)x) + (_Float16)PIF16;
+        }
+        else if ((_Float16)y < 0.0f16)
+        {
+           *result=(_Float16)arm_atan_f16((_Float16)y/(_Float16)x) - (_Float16)PIF16;
+        }
+        else
+        {
+            if (signbit((float)y))
+            {
+               *result= -(_Float16)PIF16;
+            }
+            else
+            {
+               *result= PIF16;
+            }
+        }
+        return(ARM_MATH_SUCCESS);
+    }
+    if ((_Float16)x == 0.0f16)
+    {
+        if ((_Float16)y > 0.0f16)
+        {
+            *result=PI16HALF;
+            return(ARM_MATH_SUCCESS);
+        }
+        if ((_Float16)y < 0.0f16)
+        {
+            *result=-(_Float16)PI16HALF;
+            return(ARM_MATH_SUCCESS);
+        }
+    }
+    
+
+    return(ARM_MATH_NANINF);
+
+}
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+/**
+  @} end of atan2 group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_f32.c
new file mode 100644
index 0000000..51f6812
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_f32.c
@@ -0,0 +1,187 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_atan2_f32.c
+ * Description:  float32 Arc tangent of y/x
+ *
+ * $Date:        22 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"        
+
+/*
+
+atan for argument between in [0, 1.0]
+
+
+*/
+
+#define ATANHALFF32 0.463648f
+#define PIHALFF32 1.5707963267948966192313f
+
+#define ATAN2_NB_COEFS_F32 10
+
+static const float32_t atan2_coefs_f32[ATAN2_NB_COEFS_F32]={0.0f
+,1.0000001638308195518f
+,-0.0000228941363602264f
+,-0.3328086544578890873f
+,-0.004404814619311061f
+,0.2162217461808173258f
+,-0.0207504842057097504f
+,-0.1745263362250363339f
+,0.1340557235283553386f
+,-0.0323664125927477625f
+};
+
+__STATIC_FORCEINLINE float32_t arm_atan_limited_f32(float32_t x)
+{
+    float32_t res=atan2_coefs_f32[ATAN2_NB_COEFS_F32-1];
+    int i=1;
+    for(i=1;i<ATAN2_NB_COEFS_F32;i++)
+    {
+        res = x*res + atan2_coefs_f32[ATAN2_NB_COEFS_F32-1-i];
+    }
+
+
+    return(res);
+}
+
+__STATIC_FORCEINLINE float32_t arm_atan_f32(float32_t x)
+{
+   int sign=0;
+   float32_t res=0.0f;
+
+   if (x < 0.0f)
+   {
+      sign=1;
+      x=-x;
+   }
+
+   if (x > 1.0f)
+   {
+      x = 1.0f / x;
+      res = PIHALFF32 - arm_atan_limited_f32(x);
+   }
+   else
+   {
+     res += arm_atan_limited_f32(x);
+   }
+
+
+   if (sign)
+   {
+     res = -res;
+   }
+
+   return(res);
+}
+
+
+/**
+  @ingroup groupFastMath
+ */
+
+/**
+  @defgroup atan2 ArcTan2
+
+  Computing Arc tangent only using the ratio y/x is not enough to determine the angle
+  since there is an indeterminacy. Opposite quadrants are giving the same ratio.
+
+  ArcTan2 is not using y/x to compute the angle but y and x and use the sign of y and x
+  to determine the quadrant.
+
+ */
+
+/**
+  @addtogroup atan2
+  @{
+ */
+
+/**
+  @brief       Arc Tangent of y/x using sign of y and x to get right quadrant
+  @param[in]   y  y coordinate
+  @param[in]   x  x coordinate
+  @param[out]  result  Result
+  @return  error status.
+ 
+  @par         Compute the Arc tangent of y/x:
+                   The sign of y and x are used to determine the right quadrant
+                   and compute the right angle.
+*/
+
+
+arm_status arm_atan2_f32(float32_t y,float32_t x,float32_t *result)
+{
+    if (x > 0.0f)
+    {
+        *result=arm_atan_f32(y/x);
+        return(ARM_MATH_SUCCESS);
+    }
+    if (x < 0.0f)
+    {
+        if (y > 0.0f)
+        {
+           *result=arm_atan_f32(y/x) + PI;
+        }
+        else if (y < 0.0f)
+        {
+           *result=arm_atan_f32(y/x) - PI;
+        }
+        else
+        {
+            if (signbit(y))
+            {
+               *result= -PI;
+            }
+            else
+            {
+               *result= PI;
+            }
+        }
+        return(ARM_MATH_SUCCESS);
+    }
+    if (x == 0.0f)
+    {
+        if (y > 0.0f)
+        {
+            *result=PIHALFF32;
+            return(ARM_MATH_SUCCESS);
+        }
+        if (y < 0.0f)
+        {
+            *result=-PIHALFF32;
+            return(ARM_MATH_SUCCESS);
+        }
+    }
+    
+
+    return(ARM_MATH_NANINF);
+
+}
+
+/**
+  @} end of atan2 group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_q15.c
new file mode 100644
index 0000000..c334bee
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_q15.c
@@ -0,0 +1,239 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_atan2_q15.c
+ * Description:  float32 Arc tangent of y/x
+ *
+ * $Date:        22 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"        
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/utils.h"        
+
+/*
+
+atan for argument between in [0, 1.0]
+
+*/
+
+
+/* Q2.13 */
+#define ATANHALFQ13 0xed6
+#define PIHALFQ13 0x3244
+#define PIQ13 0x6488
+
+#define ATAN2_NB_COEFS_Q15 10
+
+static const q15_t atan2_coefs_q15[ATAN2_NB_COEFS_Q15]={0x0000
+,0x7fff
+,0xffff
+,0xd567
+,0xff70
+,0x1bad
+,0xfd58
+,0xe9a9
+,0x1129
+,0xfbdb
+};
+
+__STATIC_FORCEINLINE q15_t arm_atan_limited_q15(q15_t x)
+{
+    q31_t res=(q31_t)atan2_coefs_q15[ATAN2_NB_COEFS_Q15-1];
+    int i=1;
+    for(i=1;i<ATAN2_NB_COEFS_Q15;i++)
+    {
+        res = ((q31_t) x * res) >> 15U;
+        res = res + ((q31_t) atan2_coefs_q15[ATAN2_NB_COEFS_Q15-1-i]) ;
+    }
+
+    res = __SSAT(res>>2,16);
+
+    
+    return(res);
+}
+
+
+__STATIC_FORCEINLINE q15_t arm_atan_q15(q15_t y,q15_t x)
+{
+   int sign=0;
+   q15_t res=0;
+
+   if (y<0)
+   {
+     /* Negate y */
+#if defined (ARM_MATH_DSP)
+     y = __QSUB16(0, y);
+#else 
+     y = (y == (q15_t) 0x8000) ? (q15_t) 0x7fff : -y;
+#endif
+
+     sign=1-sign;
+   }
+
+   if (x < 0)
+   {
+      sign=1 - sign;
+     
+      /* Negate x */
+#if defined (ARM_MATH_DSP)
+     x = __QSUB16(0, x);
+#else 
+     x = (x == (q15_t) 0x8000) ? (q15_t) 0x7fff : -x;
+#endif
+   }
+
+   if (y > x)
+   {
+    q15_t ratio;
+    int16_t shift;
+
+    arm_divide_q15(x,y,&ratio,&shift);
+
+    /* Shift ratio by shift */
+    if (shift >=0)
+    {
+       ratio = __SSAT(((q31_t) ratio << shift), 16);
+    }
+    else
+    {
+       ratio = (ratio >> -shift);
+    }
+   
+    res = PIHALFQ13 - arm_atan_limited_q15(ratio);
+      
+   }
+   else
+   {
+    q15_t ratio;
+    int16_t shift;
+
+    arm_divide_q15(y,x,&ratio,&shift);
+
+    /* Shift ratio by shift */
+    if (shift >=0)
+    {
+       ratio = __SSAT(((q31_t) ratio << shift), 16);
+    }
+    else
+    {
+       ratio = (ratio >> -shift);
+    }
+   
+
+    res = arm_atan_limited_q15(ratio);
+
+   }
+
+
+   if (sign)
+   {
+     /* Negate res */
+#if defined (ARM_MATH_DSP)
+     res = __QSUB16(0, res);
+#else 
+     res = (res == (q15_t) 0x8000) ? (q15_t) 0x7fff : -res;
+#endif
+   }
+
+   return(res);
+}
+
+
+/**
+  @ingroup groupFastMath
+ */
+
+
+/**
+  @addtogroup atan2
+  @{
+ */
+
+/**
+  @brief       Arc Tangent of y/x using sign of y and x to get right quadrant
+  @param[in]   y  y coordinate
+  @param[in]   x  x coordinate
+  @param[out]  result  Result in Q2.13
+  @return  error status.
+ 
+  @par         Compute the Arc tangent of y/x:
+                   The sign of y and x are used to determine the right quadrant
+                   and compute the right angle.
+*/
+
+
+arm_status arm_atan2_q15(q15_t y,q15_t x,q15_t *result)
+{
+    if (x > 0)
+    {
+        *result=arm_atan_q15(y,x);
+        return(ARM_MATH_SUCCESS);
+    }
+    if (x < 0)
+    {
+        if (y > 0)
+        {
+           *result=arm_atan_q15(y,x) + PIQ13;
+        }
+        else if (y < 0)
+        {
+           *result=arm_atan_q15(y,x) - PIQ13;
+        }
+        else
+        {
+            if (y<0)
+            {
+               *result= -PIQ13;
+            }
+            else
+            {
+               *result= PIQ13;
+            }
+        }
+        return(ARM_MATH_SUCCESS);
+    }
+    if (x == 0)
+    {
+        if (y > 0)
+        {
+            *result=PIHALFQ13;
+            return(ARM_MATH_SUCCESS);
+        }
+        if (y < 0)
+        {
+            *result=-PIHALFQ13;
+            return(ARM_MATH_SUCCESS);
+        }
+    }
+    
+
+    return(ARM_MATH_NANINF);
+
+}
+
+/**
+  @} end of atan2 group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_q31.c
new file mode 100644
index 0000000..6eba0ce
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_q31.c
@@ -0,0 +1,240 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_atan2_q31.c
+ * Description:  float32 Arc tangent of y/x
+ *
+ * $Date:        22 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"        
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/utils.h"        
+
+/*
+
+atan for argument between in [0, 1.0]
+
+*/
+
+
+/* Q2.29 */
+#define ATANHALF_Q29 0xed63383
+#define PIHALF_Q29 0x3243f6a9
+#define PIQ29 0x6487ed51
+
+#define ATAN2_NB_COEFS_Q31 13
+
+static const q31_t atan2_coefs_q31[ATAN2_NB_COEFS_Q31]={0x00000000
+,0x7ffffffe
+,0x000001b6
+,0xd555158e
+,0x00036463
+,0x1985f617
+,0x001992ae
+,0xeed53a7f
+,0xf8f15245
+,0x2215a3a4
+,0xe0fab004
+,0x0cdd4825
+,0xfddbc054
+};
+
+
+__STATIC_FORCEINLINE q31_t arm_atan_limited_q31(q31_t x)
+{
+    q63_t res=(q63_t)atan2_coefs_q31[ATAN2_NB_COEFS_Q31-1];
+    int i=1;
+    for(i=1;i<ATAN2_NB_COEFS_Q31;i++)
+    {
+        res = ((q63_t) x * res) >> 31U;
+        res = res + ((q63_t) atan2_coefs_q31[ATAN2_NB_COEFS_Q31-1-i]) ;
+    }
+
+    return(clip_q63_to_q31(res>>2));
+}
+
+
+__STATIC_FORCEINLINE q31_t arm_atan_q31(q31_t y,q31_t x)
+{
+   int sign=0;
+   q31_t res=0;
+
+   if (y<0)
+   {
+    /* Negate y */
+#if defined (ARM_MATH_DSP)
+    y = __QSUB(0, y);
+#else
+    y = (y == INT32_MIN) ? INT32_MAX : -y;
+#endif
+
+     sign=1-sign;
+   }
+
+   if (x < 0)
+   {
+      sign=1 - sign;
+
+    /* Negate x */
+#if defined (ARM_MATH_DSP)
+    x = __QSUB(0, x);
+#else
+    x = (x == INT32_MIN) ? INT32_MAX : -x;
+#endif
+   }
+
+   if (y > x)
+   {
+    q31_t ratio;
+    int16_t shift;
+
+    arm_divide_q31(x,y,&ratio,&shift);
+
+    /* Shift ratio by shift */
+    if (shift >= 0)
+    {
+         ratio = clip_q63_to_q31((q63_t) ratio << shift);
+    }
+    else
+    {
+         ratio = (ratio >> -shift);
+    }
+   
+    res = PIHALF_Q29 - arm_atan_limited_q31(ratio);
+      
+   }
+   else
+   {
+    q31_t ratio;
+    int16_t shift;
+
+    arm_divide_q31(y,x,&ratio,&shift);
+
+    /* Shift ratio by shift */
+    if (shift >= 0)
+    {
+         ratio = clip_q63_to_q31((q63_t) ratio << shift);
+    }
+    else
+    {
+         ratio = (ratio >> -shift);
+    }
+   
+
+    res = arm_atan_limited_q31(ratio);
+
+   }
+
+
+   if (sign)
+   {
+     /* Negate res */
+#if defined (ARM_MATH_DSP)
+     res = __QSUB(0, res);
+#else
+     res = (res == INT32_MIN) ? INT32_MAX : -res;
+#endif
+   }
+
+   return(res);
+}
+
+
+/**
+  @ingroup groupFastMath
+ */
+
+
+/**
+  @addtogroup atan2
+  @{
+ */
+
+/**
+  @brief       Arc Tangent of y/x using sign of y and x to get right quadrant
+  @param[in]   y  y coordinate
+  @param[in]   x  x coordinate
+  @param[out]  result  Result in Q2.29
+  @return  error status.
+ 
+  @par         Compute the Arc tangent of y/x:
+                   The sign of y and x are used to determine the right quadrant
+                   and compute the right angle.
+*/
+
+
+arm_status arm_atan2_q31(q31_t y,q31_t x,q31_t *result)
+{
+    if (x > 0)
+    {
+        *result=arm_atan_q31(y,x);
+        return(ARM_MATH_SUCCESS);
+    }
+    if (x < 0)
+    {
+        if (y > 0)
+        {
+           *result=arm_atan_q31(y,x) + PIQ29;
+        }
+        else if (y < 0)
+        {
+           *result=arm_atan_q31(y,x) - PIQ29;
+        }
+        else
+        {
+            if (y<0)
+            {
+               *result= -PIQ29;
+            }
+            else
+            {
+               *result= PIQ29;
+            }
+        }
+        return(ARM_MATH_SUCCESS);
+    }
+    if (x == 0)
+    {
+        if (y > 0)
+        {
+            *result=PIHALF_Q29;
+            return(ARM_MATH_SUCCESS);
+        }
+        if (y < 0)
+        {
+            *result=-PIHALF_Q29;
+            return(ARM_MATH_SUCCESS);
+        }
+    }
+    
+
+    return(ARM_MATH_NANINF);
+
+}
+
+/**
+  @} end of atan2 group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_cos_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_cos_f32.c
index ff7f0a2..ac428dc 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_cos_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_cos_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_cos_f32.c
  * Description:  Fast cosine calculation for floating-point values
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_cos_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_cos_q15.c
index ea995fd..c423b06 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_cos_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_cos_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_cos_q15.c
  * Description:  Fast cosine calculation for Q15 values
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_cos_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_cos_q31.c
index ab02d2b..749dd0e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_cos_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_cos_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_cos_q31.c
  * Description:  Fast cosine calculation for Q31 values
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_divide_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_divide_q15.c
new file mode 100644
index 0000000..c53a379
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_divide_q15.c
@@ -0,0 +1,114 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cos_q15.c
+ * Description:  Fast cosine calculation for Q15 values
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables.h"
+
+#include <stdlib.h>
+
+/**
+  @ingroup groupFastMath
+ */
+
+/**
+  @defgroup divide Fixed point division
+
+ */
+
+/**
+  @addtogroup divide
+  @{
+ */
+
+/**
+  @brief         Fixed point division
+  @param[in]     numerator    Numerator
+  @param[in]     denominator  Denominator
+  @param[out]    quotient     Quotient value normalized between -1.0 and 1.0
+  @param[out]    shift        Shift left value to get the unnormalized quotient
+  @return        error status
+
+  When dividing by 0, an error ARM_MATH_NANINF is returned. And the quotient is forced
+  to the saturated negative or positive value.
+ */
+
+arm_status arm_divide_q15(q15_t numerator,
+  q15_t denominator,
+  q15_t *quotient,
+  int16_t *shift)
+{
+  int16_t sign=0;
+  q31_t temp;
+  int16_t shiftForNormalizing;
+
+  *shift = 0;
+
+  sign = (numerator>>15) ^ (denominator>>15);
+
+  if (denominator == 0)
+  {
+     if (sign)
+     {
+        *quotient = 0x8000;
+     }
+     else
+     {
+        *quotient = 0x7FFF;
+     }
+     return(ARM_MATH_NANINF);
+  }
+
+  arm_abs_q15(&numerator,&numerator,1);
+  arm_abs_q15(&denominator,&denominator,1);
+  
+  temp = ((q31_t)numerator << 15) / ((q31_t)denominator);
+
+  shiftForNormalizing= 17 - __CLZ(temp);
+  if (shiftForNormalizing > 0)
+  {
+     *shift = shiftForNormalizing;
+     temp = temp >> shiftForNormalizing;
+  }
+
+  if (sign)
+  {
+    temp = -temp;
+  }
+
+  *quotient=temp;
+
+  return(ARM_MATH_SUCCESS);
+}
+
+/**
+  @} end of divide group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_divide_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_divide_q31.c
new file mode 100644
index 0000000..b1ae866
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_divide_q31.c
@@ -0,0 +1,109 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cos_q31.c
+ * Description:  Fast cosine calculation for Q31 values
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables.h"
+
+#include <stdlib.h>
+
+/**
+  @ingroup groupFastMath
+ */
+
+/**
+  @addtogroup divide
+  @{
+ */
+
+/**
+  @brief         Fixed point division
+  @param[in]     numerator    Numerator
+  @param[in]     denominator  Denominator
+  @param[out]    quotient     Quotient value normalized between -1.0 and 1.0
+  @param[out]    shift        Shift left value to get the unnormalized quotient
+  @return        error status
+
+  When dividing by 0, an error ARM_MATH_NANINF is returned. And the quotient is forced
+  to the saturated negative or positive value.
+ */
+
+arm_status arm_divide_q31(q31_t numerator,
+  q31_t denominator,
+  q31_t *quotient,
+  int16_t *shift)
+{
+  int16_t sign=0;
+  q63_t temp;
+  int16_t shiftForNormalizing;
+
+  *shift = 0;
+
+  sign = (numerator>>31) ^ (denominator>>31);
+
+  if (denominator == 0)
+  {
+     if (sign)
+     {
+        *quotient = 0x80000000;
+     }
+     else
+     {
+        *quotient = 0x7FFFFFFF;
+     }
+     return(ARM_MATH_NANINF);
+  }
+
+  arm_abs_q31(&numerator,&numerator,1);
+  arm_abs_q31(&denominator,&denominator,1);
+
+  temp = ((q63_t)numerator << 31) / ((q63_t)denominator);
+
+  shiftForNormalizing= 32 - __CLZ(temp >> 31);
+  if (shiftForNormalizing > 0)
+  {
+     *shift = shiftForNormalizing;
+     temp = temp >> shiftForNormalizing;
+  }
+
+  if (sign)
+  {
+    temp = -temp;
+  }
+
+  *quotient=(q31_t)temp;
+
+  return(ARM_MATH_SUCCESS);
+}
+
+/**
+  @} end of divide group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sin_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sin_f32.c
index 08d326d..89cc8b1 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sin_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sin_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_sin_f32.c
  * Description:  Fast sine calculation for floating-point values
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sin_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sin_q15.c
index 439b33a..7d99d9f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sin_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sin_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_sin_q15.c
  * Description:  Fast sine calculation for Q15 values
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sin_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sin_q31.c
index 01d9c6c..92f2ba6 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sin_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sin_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_sin_q31.c
  * Description:  Fast sine calculation for Q31 values
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sqrt_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sqrt_q15.c
index e499f2b..bfcb9b2 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sqrt_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sqrt_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_sqrt_q15.c
  * Description:  Q15 square root function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -31,6 +31,8 @@
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables.h"
 
+#define Q12QUARTER 0x2000
+
 /**
   @ingroup groupFastMath
  */
@@ -53,14 +55,7 @@ arm_status arm_sqrt_q15(
   q15_t in,
   q15_t * pOut)
 {
-  q31_t bits_val1;
-  q15_t number, temp1, var1, signBits1, half;
-  float32_t temp_float1;
-  union
-  {
-    q31_t fracval;
-    float32_t floatval;
-  } tempconv;
+  q15_t number, var1, signBits1,temp;
 
   number = in;
 
@@ -78,46 +73,30 @@ arm_status arm_sqrt_q15(
     {
       number = number << (signBits1 - 1);
     }
+    /* Start value for 1/sqrt(x) for the Newton iteration */
+    var1 = sqrt_initial_lut_q15[(number>> 11) - (Q12QUARTER >> 11)];
 
-    /* Calculate half value of the number */
-    half = number >> 1;
-    /* Store the number for later use */
-    temp1 = number;
-
-    /* Convert to float */
-    temp_float1 = number * 3.051757812500000e-005f;
-    /* Store as integer */
-    tempconv.floatval = temp_float1;
-    bits_val1 = tempconv.fracval;
-    /* Subtract the shifted value from the magic number to give intial guess */
-    bits_val1 = 0x5f3759df - (bits_val1 >> 1);  /* gives initial guess */
-    /* Store as float */
-    tempconv.fracval = bits_val1;
-    temp_float1 = tempconv.floatval;
-    /* Convert to integer format */
-    var1 = (q31_t) (temp_float1 * 16384);
-
+    /* 0.5 var1 * (3 - number * var1 * var1) */
     /* 1st iteration */
-    var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
-                                     ((q15_t)
-                                      ((((q15_t)
-                                         (((q31_t) var1 * var1) >> 15)) *
-                                        (q31_t) half) >> 15))) >> 15)) << 2;
-    /* 2nd iteration */
-    var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
-                                     ((q15_t)
-                                      ((((q15_t)
-                                         (((q31_t) var1 * var1) >> 15)) *
-                                        (q31_t) half) >> 15))) >> 15)) << 2;
-    /* 3rd iteration */
-    var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
-                                     ((q15_t)
-                                      ((((q15_t)
-                                         (((q31_t) var1 * var1) >> 15)) *
-                                        (q31_t) half) >> 15))) >> 15)) << 2;
+
+   temp = ((q31_t) var1 * var1) >> 12;
+   temp = ((q31_t) number * temp) >> 15;
+   temp = 0x3000 - temp; 
+   var1 = ((q31_t) var1 * temp) >> 13;
+
+   temp = ((q31_t) var1 * var1) >> 12;
+   temp = ((q31_t) number * temp) >> 15;
+   temp = 0x3000 - temp; 
+   var1 = ((q31_t) var1 * temp) >> 13;
+
+   temp = ((q31_t) var1 * var1) >> 12;
+   temp = ((q31_t) number * temp) >> 15;
+   temp = 0x3000 - temp; 
+   var1 = ((q31_t) var1 * temp) >> 13;
 
     /* Multiply the inverse square root with the original value */
-    var1 = ((q15_t) (((q31_t) temp1 * var1) >> 15)) << 1;
+
+    var1 = ((q15_t) (((q31_t) number * var1) >> 12));
 
     /* Shift the output down accordingly */
     if ((signBits1 % 2) == 0)
@@ -130,6 +109,7 @@ arm_status arm_sqrt_q15(
     }
     *pOut = var1;
 
+
     return (ARM_MATH_SUCCESS);
   }
   /* If the number is a negative number then store zero as its square root value */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sqrt_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sqrt_q31.c
index 0dbb6af..0b8954a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sqrt_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_sqrt_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_sqrt_q31.c
  * Description:  Q31 square root function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -31,6 +31,8 @@
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables.h"
 
+#define Q28QUARTER 0x20000000 
+
 /**
   @ingroup groupFastMath
  */
@@ -53,15 +55,8 @@ arm_status arm_sqrt_q31(
   q31_t in,
   q31_t * pOut)
 {
-  q31_t bits_val1;
-  q31_t number, temp1, var1, signBits1, half;
-  float32_t temp_float1;
-  union
-  {
-    q31_t fracval;
-    float32_t floatval;
-  } tempconv;
-
+  q31_t number, var1, signBits1 ,temp;
+ 
   number = in;
 
   /* If the input is a positive number then compute the signBits. */
@@ -79,45 +74,33 @@ arm_status arm_sqrt_q31(
       number = number << (signBits1 - 1);
     }
 
-    /* Calculate half value of the number */
-    half = number >> 1;
-    /* Store the number for later use */
-    temp1 = number;
-
-    /* Convert to float */
-    temp_float1 = number * 4.6566128731e-010f;
-    /* Store as integer */
-    tempconv.floatval = temp_float1;
-    bits_val1 = tempconv.fracval;
-    /* Subtract the shifted value from the magic number to give intial guess */
-    bits_val1 = 0x5f3759df - (bits_val1 >> 1);  /* gives initial guess */
-    /* Store as float */
-    tempconv.fracval = bits_val1;
-    temp_float1 = tempconv.floatval;
-    /* Convert to integer format */
-    var1 = (q31_t) (temp_float1 * 1073741824);
+    /* Start value for 1/sqrt(x) for the Newton iteration */
+    var1 = sqrt_initial_lut_q31[(number>> 26) - (Q28QUARTER >> 26)];
+
+    /* 0.5 var1 * (3 - number * var1 * var1) */
 
     /* 1st iteration */
-    var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
-                                     ((q31_t)
-                                      ((((q31_t)
-                                         (((q63_t) var1 * var1) >> 31)) *
-                                        (q63_t) half) >> 31))) >> 31)) << 2;
+
+    temp = ((q63_t) var1 * var1) >> 28;
+    temp = ((q63_t) number * temp) >> 31;
+    temp = 0x30000000 - temp; 
+    var1 = ((q63_t) var1 * temp) >> 29;
+
+    
     /* 2nd iteration */
-    var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
-                                     ((q31_t)
-                                      ((((q31_t)
-                                         (((q63_t) var1 * var1) >> 31)) *
-                                        (q63_t) half) >> 31))) >> 31)) << 2;
-    /* 3rd iteration */
-    var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
-                                     ((q31_t)
-                                      ((((q31_t)
-                                         (((q63_t) var1 * var1) >> 31)) *
-                                        (q63_t) half) >> 31))) >> 31)) << 2;
+    temp = ((q63_t) var1 * var1) >> 28;
+    temp = ((q63_t) number * temp) >> 31;
+    temp = 0x30000000 - temp; 
+    var1 = ((q63_t) var1 * temp) >> 29;
+
+    /* 3nd iteration */
+    temp = ((q63_t) var1 * var1) >> 28;
+    temp = ((q63_t) number * temp) >> 31;
+    temp = 0x30000000 - temp; 
+    var1 = ((q63_t) var1 * temp) >> 29;
 
     /* Multiply the inverse square root with the original value */
-    var1 = ((q31_t) (((q63_t) temp1 * var1) >> 31)) << 1;
+    var1 = ((q31_t) (((q63_t) number * var1) >> 28));
 
     /* Shift the output down accordingly */
     if ((signBits1 % 2) == 0)
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f16.c
index 02864e5..dffb4de 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_vlog_f16.c
  * Description:  Fast vectorized log
  *
- * $Date:        15. Octoboer 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -36,7 +36,18 @@
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_math_f16.h"
 
+/**
+  @addtogroup vexp
+  @{
+ */
 
+/**
+  @brief         Floating-point vector of exp values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
 void arm_vexp_f16(
   const float16_t * pSrc,
         float16_t * pDst,
@@ -73,7 +84,7 @@ void arm_vexp_f16(
       /* C = log(A) */
   
       /* Calculate log and store result in destination buffer. */
-      *pDst++ = expf(*pSrc++);
+      *pDst++ = (_Float16)expf((float32_t)*pSrc++);
   
       /* Decrement loop counter */
       blkCnt--;
@@ -82,5 +93,7 @@ void arm_vexp_f16(
 
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
 
-
+/**
+  @} end of vexp group
+ */
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f32.c
index cde8efe..3f23825 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_vlog_f32.c
  * Description:  Fast vectorized log
  *
- * $Date:        15. Octoboer 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -35,6 +35,28 @@
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_math.h"
 #endif
 
+/**
+  @ingroup groupFastMath
+ */
+
+/**
+   @defgroup vexp Vector Exponential
+
+   Compute the exp values of a vector of samples.
+*/
+
+/**
+  @addtogroup vexp
+  @{
+ */
+
+/**
+  @brief         Floating-point vector of exp values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
 void arm_vexp_f32(
   const float32_t * pSrc,
         float32_t * pDst,
@@ -98,4 +120,7 @@ void arm_vexp_f32(
    }
 }
 
+/**
+  @} end of vexp group
+ */
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f64.c
new file mode 100644
index 0000000..950c0a5
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f64.c
@@ -0,0 +1,70 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_vlog_f64.c
+ * Description:  Fast vectorized log
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables.h"
+
+/**
+  @addtogroup vexp
+  @{
+ */
+
+/**
+  @brief         Floating-point vector of exp values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+void arm_vexp_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+   uint32_t blkCnt; 
+
+   blkCnt = blockSize;
+
+   while (blkCnt > 0U)
+   {
+      /* C = log(A) */
+  
+      /* Calculate log and store result in destination buffer. */
+      *pDst++ = exp(*pSrc++);
+  
+      /* Decrement loop counter */
+      blkCnt--;
+   }
+}
+
+/**
+  @} end of vexp group
+ */
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vinverse_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vinverse_f16.c
index 11f0e8d..ec9e842 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vinverse_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vinverse_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_vinverse_f16.c
  * Description:  Fast vectorized inverse
  *
- * $Date:        15. Octoboer 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -70,7 +70,7 @@ void arm_vinverse_f16(
    while (blkCnt > 0U)
    {
       
-      *pDst++ = 1.0 / *pSrc++;
+      *pDst++ = 1.0f16 / (_Float16)*pSrc++;
   
       /* Decrement loop counter */
       blkCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f16.c
index b05f8e6..60b4af3 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_vlog_f16.c
  * Description:  Fast vectorized log
  *
- * $Date:        15. Octoboer 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -29,21 +29,157 @@
  */
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions_f16.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/support_functions_f16.h"
 
 #if defined(ARM_FLOAT16_SUPPORTED)
 
+/* Degree of the polynomial approximation */
+#define NB_DEG_LOGF16 3
+
+/*
+Related to the Log2 of the number of approximations.
+For instance, with 3 there are 1 + 2^3 polynomials
+*/
+#define NB_DIV_LOGF16 3
+
+/* Length of the LUT table */
+#define NB_LUT_LOGF16 (NB_DEG_LOGF16+1)*(1 + (1<<NB_DIV_LOGF16))
+
+
+/*
+
+LUT of polynomial approximations.
+
+Could be generated with:
+
+ClearAll[lut, coefs, nb, deg];
+nb = 3;
+deg = 3;
+lut = Table[
+   MiniMaxApproximation[
+     Log[x/2^nb + i], {x, {10^-6, 1.0/2^nb}, deg, 0},
+     MaxIterations -> 1000][[2, 1]], {i, 1, 2, (1.0/2^nb)}];
+coefs = Chop@Flatten[CoefficientList[lut, x]];
+
+*/
+static float16_t lut_logf16[NB_LUT_LOGF16]={
+   0,0.125,-0.00781197,0.00063974,0.117783,
+   0.111111,-0.00617212,0.000447935,0.223144,
+   0.1,-0.00499952,0.000327193,0.318454,0.0909091,
+   -0.00413191,0.000246234,0.405465,0.0833333,
+   -0.00347199,0.000189928,0.485508,0.0769231,
+   -0.00295841,0.00014956,0.559616,0.0714286,
+   -0.0025509,0.000119868,0.628609,0.0666667,
+   -0.00222213,0.0000975436,0.693147,
+   0.0625,-0.00195305,0.0000804357};
+
+
+float16_t logf16_scalar(float16_t x)
+{
+    int16_t i =  arm_typecast_s16_f16(x);
+
+    int32_t vecExpUnBiased = (i >> 10) - 15;
+    i = i - (vecExpUnBiased << 10);
+    float16_t vecTmpFlt1 = arm_typecast_f16_s16(i);
+
+    float16_t *lut;
+    int n;
+    float16_t tmp,v;
+
+    tmp = ((_Float16)vecTmpFlt1 - 1.0f16) * (1 << NB_DIV_LOGF16);
+    n = (int)floor((double)tmp);
+    v = (_Float16)tmp - (_Float16)n;
+
+    lut = lut_logf16 + n * (1+NB_DEG_LOGF16);
+
+    float16_t res = lut[NB_DEG_LOGF16-1];
+    for(int j=NB_DEG_LOGF16-2; j >=0 ; j--)
+    {
+       res = (_Float16)lut[j] + (_Float16)v * (_Float16)res;
+    }
+
+    res = (_Float16)res + 0.693147f16 * (_Float16)vecExpUnBiased;
+
+
+    return(res);
+}
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables.h"
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_math_f16.h"
 
+
+float16x8_t vlogq_lut_f16(float16x8_t vecIn)
+{
+    int16x8_t i =  vreinterpretq_s16_f16(vecIn);
+
+    int16x8_t vecExpUnBiased = vsubq_n_s16(vshrq_n_s16(i,10), 15);
+    i = vsubq_s16(i,vshlq_n_s16(vecExpUnBiased,10));
+    float16x8_t vecTmpFlt1 = vreinterpretq_f16_s16(i);
+
+
+    float16x8_t lutV;
+    int16x8_t n;
+    int16x8_t offset;
+
+    float16x8_t tmp,v,res;
+
+    tmp = vmulq_n_f16(vsubq_n_f16(vecTmpFlt1,1.0f16),(_Float16)(1 << NB_DIV_LOGF16));
+
+    n = vcvtq_s16_f16(tmp);
+    v = vsubq_f16(tmp,vcvtq_f16_s16(n));
+
+
+    offset = vmulq_n_s16(n,(1+NB_DEG_LOGF16));
+    offset = vaddq_n_s16(offset,NB_DEG_LOGF16-1);
+
+    res = vldrhq_gather_shifted_offset_f16(lut_logf16,(uint16x8_t)offset);
+    offset = vsubq_n_s16(offset,1);
+
+    for(int j=NB_DEG_LOGF16-2; j >=0 ; j--)
+    {
+       lutV = vldrhq_gather_shifted_offset_f16(lut_logf16,(uint16x8_t)offset);
+       res = vfmaq_f16(lutV,v,res);
+       offset = vsubq_n_s16(offset,1);
+
+    }
+
+    res = vfmaq_n_f16(res,vcvtq_f16_s16(vecExpUnBiased),0.693147f16);
+
+
+    return(res);
+
+}
+
+#endif
+
+/**
+  @ingroup groupFastMath
+ */
+
+/**
+  @addtogroup vlog
+  @{
+ */
+
+/**
+  @brief         Floating-point vector of log values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+
 void arm_vlog_f16(
   const float16_t * pSrc,
         float16_t * pDst,
         uint32_t blockSize)
 {
-   uint32_t blkCnt; 
+   uint32_t blkCnt;
 
 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
-
    f16x8_t src;
    f16x8_t dst;
 
@@ -52,7 +188,7 @@ void arm_vlog_f16(
    while (blkCnt > 0U)
    {
       src = vld1q(pSrc);
-      dst = vlogq_f16(src);
+      dst = vlogq_lut_f16(src);
       vst1q(pDst, dst);
 
       pSrc += 8;
@@ -69,16 +205,22 @@ void arm_vlog_f16(
    while (blkCnt > 0U)
    {
       /* C = log(A) */
-  
+
       /* Calculate log and store result in destination buffer. */
-      *pDst++ = logf(*pSrc++);
-  
+      *pDst++ = logf16_scalar(*pSrc++);
+
       /* Decrement loop counter */
       blkCnt--;
    }
 }
 
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
 
 
+/**
+  @} end of vlog group
+ */
+
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f32.c
index 5e92635..7c59553 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_vlog_f32.c
  * Description:  Fast vectorized log
  *
- * $Date:        15. Octoboer 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -31,6 +31,24 @@
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables.h"
 
+
+/**
+  @ingroup groupFastMath
+ */
+
+
+/**
+  @defgroup vlog Vector Log
+
+  Compute the log values of a vector of samples.
+
+ */
+
+/**
+  @addtogroup vlog
+  @{
+ */
+
 #if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_math.h"
 #endif
@@ -98,4 +116,8 @@ void arm_vlog_f32(
    }
 }
 
+/**
+  @} end of vlog group
+ */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f64.c
new file mode 100644
index 0000000..fae58bb
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f64.c
@@ -0,0 +1,55 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_vlog_f64.c
+ * Description:  Fast vectorized log
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables.h"
+
+void arm_vlog_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+   uint32_t blkCnt; 
+
+   blkCnt = blockSize;
+
+   while (blkCnt > 0U)
+   {
+      /* C = log(A) */
+  
+      /* Calculate log and store result in destination buffer. */
+      *pDst++ = log(*pSrc++);
+  
+      /* Decrement loop counter */
+      blkCnt--;
+   }
+}
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_q15.c
new file mode 100644
index 0000000..15d332e
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_q15.c
@@ -0,0 +1,268 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_vlog_q15
+ * Description:  Q15 vector log
+ *
+ * $Date:        19 July 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
+
+
+#define LOG_Q15_ACCURACY 15
+
+/* Bit to represent the normalization factor
+   It is Ceiling[Log2[LOG_Q15_ACCURACY]] of the previous value.
+   The Log2 algorithm is assuming that the value x is
+   1 <= x < 2.
+
+   But input value could be as small a 2^-LOG_Q15_ACCURACY
+   which would give an integer part of -15.
+*/
+#define LOG_Q15_INTEGER_PART 4
+
+/* 2.0 in q14 */
+#define LOQ_Q15_THRESHOLD (1u << LOG_Q15_ACCURACY)
+
+/* HALF */
+#define LOQ_Q15_Q16_HALF LOQ_Q15_THRESHOLD
+#define LOQ_Q15_Q14_HALF (LOQ_Q15_Q16_HALF >> 2)
+
+
+/* 1.0 / Log2[Exp[1]] in q15 */
+#define LOG_Q15_INVLOG2EXP 0x58b9u
+
+
+/* Clay Turner algorithm */
+static uint16_t arm_scalar_log_q15(uint16_t src)
+{
+   int i;
+
+   int16_t c = __CLZ(src)-16;
+   int16_t normalization=0;
+
+   /* 0.5 in q11 */
+   uint16_t inc = LOQ_Q15_Q16_HALF >> (LOG_Q15_INTEGER_PART + 1);
+
+   /* Will compute y = log2(x) for 1 <= x < 2.0 */
+   uint16_t x;
+
+   /* q11 */
+   uint16_t y=0;
+
+   /* q11 */
+   int16_t tmp;
+
+
+   /* Normalize and convert to q14 format */
+   x = src;
+   if ((c-1) < 0)
+   {
+     x = x >> (1-c);
+   }
+   else
+   {
+     x = x << (c-1);
+   }
+   normalization = c;
+
+
+
+   /* Compute the Log2. Result is in q11 instead of q16
+      because we know 0 <= y < 1.0 but
+      we want a result allowing to do a
+      product on int16 rather than having to go
+      through int32
+   */
+   for(i = 0; i < LOG_Q15_ACCURACY ; i++)
+   {
+      x = (((int32_t)x*x)) >> (LOG_Q15_ACCURACY - 1);
+
+      if (x >= LOQ_Q15_THRESHOLD)
+      {
+         y += inc ;
+         x = x >> 1;
+      }
+      inc = inc >> 1;
+   }
+
+
+   /*
+      Convert the Log2 to Log and apply normalization.
+      We compute (y - normalisation) * (1 / Log2[e]).
+
+   */
+
+   /* q11 */
+   //tmp = y - ((int32_t)normalization << (LOG_Q15_ACCURACY + 1));
+   tmp = (int16_t)y - (normalization << (LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART));
+
+   /* q4.11 */
+   y = ((int32_t)tmp * LOG_Q15_INVLOG2EXP) >> 15;
+
+   return(y);
+
+}
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+
+q15x8_t vlogq_q15(q15x8_t src)
+{
+
+   int i;
+
+   int16x8_t c = vclzq_s16(src);
+   int16x8_t normalization = c;
+
+
+   /* 0.5 in q11 */
+   uint16_t inc  = LOQ_Q15_Q16_HALF >> (LOG_Q15_INTEGER_PART + 1);
+
+   /* Will compute y = log2(x) for 1 <= x < 2.0 */
+   uint16x8_t x;
+
+
+   /* q11 */
+   uint16x8_t y = vdupq_n_u16(0);
+
+
+   /* q11 */
+   int16x8_t vtmp;
+
+
+   mve_pred16_t p;
+
+   /* Normalize and convert to q14 format */
+
+
+   vtmp = vsubq_n_s16(c,1);
+   x = vshlq_u16((uint16x8_t)src,vtmp);
+
+
+   /* Compute the Log2. Result is in q11 instead of q16
+      because we know 0 <= y < 1.0 but
+      we want a result allowing to do a
+      product on int16 rather than having to go
+      through int32
+   */
+   for(i = 0; i < LOG_Q15_ACCURACY ; i++)
+   {
+      x = vmulhq_u16(x,x);
+      x = vshlq_n_u16(x,2);
+
+
+      p = vcmphiq_u16(x,vdupq_n_u16(LOQ_Q15_THRESHOLD));
+      y = vaddq_m_n_u16(y, y,inc,p);
+      x = vshrq_m_n_u16(x,x,1,p);
+
+      inc = inc >> 1;
+   }
+
+
+   /*
+      Convert the Log2 to Log and apply normalization.
+      We compute (y - normalisation) * (1 / Log2[e]).
+
+   */
+
+   /* q11 */
+   // tmp = (int16_t)y - (normalization << (LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART));
+   vtmp = vshlq_n_s16(normalization,LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART);
+   vtmp = vsubq_s16((int16x8_t)y,vtmp);
+
+
+
+   /* q4.11 */
+   // y = ((int32_t)tmp * LOG_Q15_INVLOG2EXP) >> 15;
+   vtmp = vqdmulhq_n_s16(vtmp,LOG_Q15_INVLOG2EXP);
+
+   return(vtmp);
+}
+#endif
+
+/**
+  @ingroup groupFastMath
+ */
+
+/**
+  @addtogroup vlog
+  @{
+ */
+
+/**
+  @brief         q15 vector of log values.
+  @param[in]     pSrc       points to the input vector in q15
+  @param[out]    pDst       points to the output vector in q4.11
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+
+ */
+
+void arm_vlog_q15(
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t  blkCnt;           /* loop counters */
+
+  #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+  q15x8_t src;
+  q15x8_t dst;
+
+  blkCnt = blockSize >> 3;
+
+  while (blkCnt > 0U)
+  {
+      src = vld1q(pSrc);
+      dst = vlogq_q15(src);
+      vst1q(pDst, dst);
+
+      pSrc += 8;
+      pDst += 8;
+      /* Decrement loop counter */
+      blkCnt--;
+  }
+
+  blkCnt = blockSize & 7;
+  #else
+  blkCnt = blockSize;
+  #endif
+
+  while (blkCnt > 0U)
+  {
+     *pDst++ = arm_scalar_log_q15(*pSrc++);
+
+     /* Decrement loop counter */
+     blkCnt--;
+  }
+}
+
+/**
+  @} end of vlog group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_q31.c
new file mode 100644
index 0000000..5be5b72
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_q31.c
@@ -0,0 +1,262 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_vlog_q31
+ * Description:  Q31 vector log
+ *
+ * $Date:        19 July 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
+
+#define LOG_Q31_ACCURACY 31
+
+/* Bit to represent the normalization factor
+   It is Ceiling[Log2[LOG_Q31_ACCURACY]] of the previous value.
+   The Log2 algorithm is assuming that the value x is
+   1 <= x < 2.
+
+   But input value could be as small a 2^-LOG_Q31_ACCURACY
+   which would give an integer part of -31.
+*/
+#define LOG_Q31_INTEGER_PART 5
+
+/* 2.0 in Q30 */
+#define LOQ_Q31_THRESHOLD (1u << LOG_Q31_ACCURACY)
+
+/* HALF */
+#define LOQ_Q31_Q32_HALF LOQ_Q31_THRESHOLD
+#define LOQ_Q31_Q30_HALF (LOQ_Q31_Q32_HALF >> 2)
+
+
+/* 1.0 / Log2[Exp[1]] in Q31 */
+#define LOG_Q31_INVLOG2EXP 0x58b90bfbuL
+
+/* Clay Turner algorithm */
+static uint32_t arm_scalar_log_q31(uint32_t src)
+{
+   int32_t i;
+
+   int32_t c = __CLZ(src);
+   int32_t normalization=0;
+
+   /* 0.5 in q26 */
+   uint32_t inc = LOQ_Q31_Q32_HALF >> (LOG_Q31_INTEGER_PART + 1);
+
+   /* Will compute y = log2(x) for 1 <= x < 2.0 */
+   uint32_t x;
+
+   /* q26 */
+   uint32_t y=0;
+
+   /* q26 */
+   int32_t tmp;
+
+
+   /* Normalize and convert to q30 format */
+   x = src;
+   if ((c-1) < 0)
+   {
+     x = x >> (1-c);
+   }
+   else
+   {
+     x = x << (c-1);
+   }
+   normalization = c;
+
+   /* Compute the Log2. Result is in q26
+      because we know 0 <= y < 1.0 but
+      do not want to use q32 to allow
+      following computation with less instructions.
+   */
+   for(i = 0; i < LOG_Q31_ACCURACY ; i++)
+   {
+      x = ((int64_t)x*x)  >> (LOG_Q31_ACCURACY - 1);
+
+      if (x >= LOQ_Q31_THRESHOLD)
+      {
+         y += inc ;
+         x = x >> 1;
+      }
+      inc = inc >> 1;
+   }
+
+   /*
+      Convert the Log2 to Log and apply normalization.
+      We compute (y - normalisation) * (1 / Log2[e]).
+
+   */
+
+   /* q26 */
+   tmp = (int32_t)y - (normalization << (LOG_Q31_ACCURACY - LOG_Q31_INTEGER_PART));
+
+
+   /* q5.26 */
+   y = ((int64_t)tmp * LOG_Q31_INVLOG2EXP) >> 31;
+
+
+
+   return(y);
+
+}
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+
+q31x4_t vlogq_q31(q31x4_t src)
+{
+
+   int32_t i;
+
+   int32x4_t c = vclzq_s32(src);
+   int32x4_t normalization = c;
+
+
+   /* 0.5 in q11 */
+   uint32_t inc  = LOQ_Q31_Q32_HALF >> (LOG_Q31_INTEGER_PART + 1);
+
+   /* Will compute y = log2(x) for 1 <= x < 2.0 */
+   uint32x4_t x;
+
+
+   /* q11 */
+   uint32x4_t y = vdupq_n_u32(0);
+
+
+   /* q11 */
+   int32x4_t vtmp;
+
+
+   mve_pred16_t p;
+
+   /* Normalize and convert to q14 format */
+
+
+   vtmp = vsubq_n_s32(c,1);
+   x = vshlq_u32((uint32x4_t)src,vtmp);
+
+
+    /* Compute the Log2. Result is in Q26
+      because we know 0 <= y < 1.0 but
+      do not want to use Q32 to allow
+      following computation with less instructions.
+   */
+   for(i = 0; i < LOG_Q31_ACCURACY ; i++)
+   {
+      x = vmulhq_u32(x,x);
+      x = vshlq_n_u32(x,2);
+
+
+      p = vcmphiq_u32(x,vdupq_n_u32(LOQ_Q31_THRESHOLD));
+      y = vaddq_m_n_u32(y, y,inc,p);
+      x = vshrq_m_n_u32(x,x,1,p);
+
+      inc = inc >> 1;
+   }
+
+
+   /*
+      Convert the Log2 to Log and apply normalization.
+      We compute (y - normalisation) * (1 / Log2[e]).
+
+   */
+
+   /* q11 */
+   // tmp = (int16_t)y - (normalization << (LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART));
+   vtmp = vshlq_n_s32(normalization,LOG_Q31_ACCURACY - LOG_Q31_INTEGER_PART);
+   vtmp = vsubq_s32((int32x4_t)y,vtmp);
+
+
+
+   /* q4.11 */
+   // y = ((int32_t)tmp * LOG_Q15_INVLOG2EXP) >> 15;
+   vtmp = vqdmulhq_n_s32(vtmp,LOG_Q31_INVLOG2EXP);
+
+   return(vtmp);
+}
+#endif
+
+/**
+  @ingroup groupFastMath
+ */
+
+/**
+  @addtogroup vlog
+  @{
+ */
+
+/**
+  @brief         q31 vector of log values.
+  @param[in]     pSrc       points to the input vector in q31
+  @param[out]    pDst       points to the output vector q5.26
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+
+ */
+void arm_vlog_q31(
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t  blkCnt;           /* loop counters */
+
+  #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+  q31x4_t src;
+  q31x4_t dst;
+
+  blkCnt = blockSize >> 2;
+
+  while (blkCnt > 0U)
+  {
+      src = vld1q(pSrc);
+      dst = vlogq_q31(src);
+      vst1q(pDst, dst);
+
+      pSrc += 4;
+      pDst += 4;
+      /* Decrement loop counter */
+      blkCnt--;
+  }
+
+  blkCnt = blockSize & 3;
+  #else
+  blkCnt = blockSize;
+  #endif
+
+  while (blkCnt > 0U)
+  {
+     *pDst++=arm_scalar_log_q31(*pSrc++);
+
+     blkCnt--;
+  }
+
+}
+
+/**
+  @} end of vlog group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_init_q31.c
index 4c1d91a..64d61f1 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df1_32x64_init_q31.c
  * Description:  High precision Q31 Biquad cascade filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_q31.c
index 2c01a9c..1111311 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df1_32x64_q31.c
  * Description:  High precision Q31 Biquad cascade filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -298,7 +298,7 @@ void arm_biquad_cas_df1_32x64_q31(
     q31_t     b0, b1, b2, a1, a2;   /*  Filter coefficients           */
     int32_t   shift = (int32_t) S->postShift + 1;   /*  Shift to be applied to the output */
     uint32_t  sample, stage = S->numStages; /*  loop counters                     */
-    q31x4_t vecCoef, vecIn;
+    q31x4_t vecCoef = { 0 }, vecIn;
     q63_t     acc;
 
     if (blockSize <= 3)
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_f16.c
index 4986e95..c38e37b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df1_f16.c
  * Description:  Processing function for the floating-point Biquad cascade DirectFormI(DF1) filter
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -65,7 +65,7 @@ void arm_biquad_cascade_df1_f16(
     const float16_t *pCoeffs = S->pCoeffs;    /*  coefficient pointer       */
     float16_t Xn1, Xn2, Yn1, Yn2;   /*  Filter pState variables   */
     float16_t X0, X1, X2, X3;   /*  temporary input           */
-    float16_t X4, X5, X6, X7;   /*  temporary input           */
+    float16_t X4, X5, X6, X7 = 0;   /*  temporary input           */
     _Float16 lastX, lastY;             /*  X,Y history for tail handling */
     f16x8_t coeffs;
     f16x8_t accVec;           /* accumultor vector */
@@ -491,4 +491,5 @@ void arm_biquad_cascade_df1_f16(
 #endif /* #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
 
 #endif /*#if defined(ARM_FLOAT16_SUPPORTED)*/
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_f32.c
index ae17c46..931a6f0 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df1_f32.c
  * Description:  Processing function for the floating-point Biquad cascade DirectFormI(DF1) filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -178,7 +178,7 @@ void arm_biquad_cascade_df1_f32(
     const float32_t *pCoeffs = S->pCoeffs;    /*  coefficient pointer       */
     float32_t Xn1, Xn2, Yn1, Yn2;       /*  Filter pState variables   */
     float32_t lastX, lastY;             /*  X,Y history for tail handling */
-    float32_t X0, X1, X2, X3;           /*  temporary input           */
+    float32_t X0, X1, X2, X3 = 0;       /*  temporary input           */
     f32x4_t coeffs;
     f32x4_t accVec;                   /* accumultor vector */
     uint32_t  sample, stage = S->numStages; /*  loop counters             */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_fast_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_fast_q15.c
index e42af39..f6d7243 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_fast_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_fast_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df1_fast_q15.c
  * Description:  Fast processing function for the Q15 Biquad cascade filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -81,13 +81,13 @@ void arm_biquad_cascade_df1_fast_q15(
   do
   {
     /* Read the b0 and 0 coefficients using SIMD  */
-    b0 = read_q15x2_ia ((q15_t **) &pCoeffs);
+    b0 = read_q15x2_ia (&pCoeffs);
 
     /* Read the b1 and b2 coefficients using SIMD */
-    b1 = read_q15x2_ia ((q15_t **) &pCoeffs);
+    b1 = read_q15x2_ia (&pCoeffs);
 
     /* Read the a1 and a2 coefficients using SIMD */
-    a1 = read_q15x2_ia ((q15_t **) &pCoeffs);
+    a1 = read_q15x2_ia (&pCoeffs);
 
     /* Read the input state values from the state buffer:  x[n-1], x[n-2] */
     state_in = read_q15x2_ia (&pState);
@@ -111,7 +111,7 @@ void arm_biquad_cascade_df1_fast_q15(
     {
 
       /* Read the input */
-      in = read_q15x2_ia ((q15_t **) &pIn);
+      in = read_q15x2_ia (&pIn);
 
       /* out =  b0 * x[n] + 0 * 0 */
       out = __SMUAD(b0, in);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_fast_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_fast_q31.c
index dbf2d01..1ddff4d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_fast_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_fast_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df1_fast_q31.c
  * Description:  Processing function for the Q31 Fast Biquad cascade DirectFormI(DF1) filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_f16.c
index 4f291fe..0cbe6f6 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df1_init_f16.c
  * Description:  Floating-point Biquad cascade DirectFormI(DF1) filter initialization function
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -98,29 +98,35 @@ void arm_biquad_cascade_df1_init_f16(
 
 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 
+/*
+
+The computation of the coefficients is done in float32 otherwise the
+resulting filter is too different from the expected one.
+
+*/
 static void generateCoefsFastBiquadF16(float16_t b0, float16_t b1, float16_t b2, float16_t a1, float16_t a2,
                                 arm_biquad_mod_coef_f16 * newCoef)
 {
     float32_t coeffs[8][12] = {
-        {0, 0, 0, 0, 0, 0, 0, b0, b1, b2, a1, a2},
-        {0, 0, 0, 0, 0, 0, b0, b1, b2, 0, a2, 0},
-        {0, 0, 0, 0, 0, b0, b1, b2, 0, 0, 0, 0},
-        {0, 0, 0, 0, b0, b1, b2, 0, 0, 0, 0, 0},
-        {0, 0, 0, b0, b1, b2, 0, 0, 0, 0, 0, 0},
-        {0, 0, b0, b1, b2, 0, 0, 0, 0, 0, 0, 0},
-        {0, b0, b1, b2, 0, 0, 0, 0, 0, 0, 0, 0},
-        {b0, b1, b2, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+        {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, (float32_t)b0, (float32_t)b1, (float32_t)b2, (float32_t)a1, (float32_t)a2},
+        {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, (float32_t)b0, (float32_t)b1, (float32_t)b2, 0.0f, (float32_t)a2, 0.0f},
+        {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, (float32_t)b0, (float32_t)b1, (float32_t)b2, 0.0f, 0.0f, 0.0f, 0.0f},
+        {0.0f, 0.0f, 0.0f, 0.0f, (float32_t)b0, (float32_t)b1, (float32_t)b2, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+        {0.0f, 0.0f, 0.0f, (float32_t)b0, (float32_t)b1, (float32_t)b2, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+        {0.0f, 0.0f, (float32_t)b0, (float32_t)b1, (float32_t)b2, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+        {0.0f, (float32_t)b0, (float32_t)b1, (float32_t)b2, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+        {(float32_t)b0, (float32_t)b1, (float32_t)b2, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}
     };
 
     for (int i = 0; i < 12; i++)
     {
-        coeffs[1][i] += (a1 * coeffs[0][i]);
-        coeffs[2][i] += (a1 * coeffs[1][i]) + (a2 * coeffs[0][i]);
-        coeffs[3][i] += (a1 * coeffs[2][i]) + (a2 * coeffs[1][i]);
-        coeffs[4][i] += (a1 * coeffs[3][i]) + (a2 * coeffs[2][i]);
-        coeffs[5][i] += (a1 * coeffs[4][i]) + (a2 * coeffs[3][i]);
-        coeffs[6][i] += (a1 * coeffs[5][i]) + (a2 * coeffs[4][i]);
-        coeffs[7][i] += (a1 * coeffs[6][i]) + (a2 * coeffs[5][i]);
+        coeffs[1][i] += ((float32_t)a1 * coeffs[0][i]);
+        coeffs[2][i] += ((float32_t)a1 * coeffs[1][i]) + ((float32_t)a2 * coeffs[0][i]);
+        coeffs[3][i] += ((float32_t)a1 * coeffs[2][i]) + ((float32_t)a2 * coeffs[1][i]);
+        coeffs[4][i] += ((float32_t)a1 * coeffs[3][i]) + ((float32_t)a2 * coeffs[2][i]);
+        coeffs[5][i] += ((float32_t)a1 * coeffs[4][i]) + ((float32_t)a2 * coeffs[3][i]);
+        coeffs[6][i] += ((float32_t)a1 * coeffs[5][i]) + ((float32_t)a2 * coeffs[4][i]);
+        coeffs[7][i] += ((float32_t)a1 * coeffs[6][i]) + ((float32_t)a2 * coeffs[5][i]);
 
         /*
          * transpose
@@ -159,5 +165,6 @@ void arm_biquad_cascade_df1_mve_init_f16(arm_biquad_casd_df1_inst_f16 * S,
 /**
   @} end of BiquadCascadeDF1 group
  */
-#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+#endif /* #if defined(ARMfloat16_t_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_f32.c
index e904fd9..91b079b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df1_init_f32.c
  * Description:  Floating-point Biquad cascade DirectFormI(DF1) filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_q15.c
index 54aa5b0..8f3020e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df1_init_q15.c
  * Description:  Q15 Biquad cascade DirectFormI(DF1) filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_q31.c
index ee65719..0cc7acc 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df1_init_q31.c
  * Description:  Q31 Biquad cascade DirectFormI(DF1) filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c
index 0791bbc..df7d114 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df1_q15.c
  * Description:  Processing function for the Q15 Biquad cascade DirectFormI(DF1) filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_q31.c
index 5dbf177..ca2fce9 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df1_q31.c
  * Description:  Processing function for the Q31 Biquad cascade filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -71,7 +71,7 @@ void arm_biquad_cascade_df1_q31(
     uint32_t        stages = S->numStages;      /*  loop counters                 */
     int             postShift = S->postShift;
     q31x4_t         b0Coeffs, b1Coeffs, a0Coeffs, a1Coeffs;     /*  Coefficients vector           */
-    q31x4_t         stateVec;
+    q31x4_t         stateVec = { 0 };
     q31_t          *pState = S->pState; /*  pState pointer initialization */
     q31x4_t         inVec0;
     int64_t         acc;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_f16.c
index ea24338..a9ef2e7 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df2T_f16.c
  * Description:  Processing function for floating-point transposed direct form II Biquad cascade filter
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -49,7 +49,7 @@
   @return        none
  */
 
-#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+#if (defined(ARM_MATH_MVE_FLOAT16) && defined(ARM_MATH_HELIUM_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
 void arm_biquad_cascade_df2T_f16(
   const arm_biquad_cascade_df2T_instance_f16 * S,
   const float16_t * pSrc,
@@ -188,7 +188,7 @@ void arm_biquad_cascade_df2T_f16(
     while (stage > 0U);
 }
 #else
-LOW_OPTIMIZATION_ENTER
+
 void arm_biquad_cascade_df2T_f16(
   const arm_biquad_cascade_df2T_instance_f16 * S,
   const float16_t * pSrc,
@@ -488,7 +488,6 @@ void arm_biquad_cascade_df2T_f16(
    } while (stage > 0U);
 
 }
-LOW_OPTIMIZATION_EXIT
 #endif /* #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 /**
   @} end of BiquadCascadeDF2T group
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c
index 1398842..f75a614 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df2T_f32.c
  * Description:  Processing function for floating-point transposed direct form II Biquad cascade filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -47,7 +47,7 @@
   @param[in]     blockSize number of samples to process
   @return        none
  */
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+#if (defined(ARM_MATH_MVEF) && defined(ARM_MATH_HELIUM_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
 
 void arm_biquad_cascade_df2T_f32(
@@ -345,7 +345,7 @@ void arm_biquad_cascade_df2T_f32(
    }
 }
 #else
-LOW_OPTIMIZATION_ENTER
+
 void arm_biquad_cascade_df2T_f32(
   const arm_biquad_cascade_df2T_instance_f32 * S,
   const float32_t * pSrc,
@@ -645,7 +645,7 @@ void arm_biquad_cascade_df2T_f32(
    } while (stage > 0U);
 
 }
-LOW_OPTIMIZATION_EXIT
+
 #endif /* #if defined(ARM_MATH_NEON) */
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_f64.c
index f935a1b..6d72a5a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_f64.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df2T_f64.c
  * Description:  Processing function for floating-point transposed direct form II Biquad cascade filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -107,7 +107,7 @@
                    To do this manually without calling the init function, assign the follow subfields of the instance structure:
                    numStages, pCoeffs, pState. Also set all of the values in pState to zero.
   @par
-                   Use of the initialization function is optional.
+                   Use of the initialization function is optional except for the vectorized versions (Helium and Neon).
                    However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
                    To place an instance structure into a const data section, the instance structure must be manually initialized.
                    Set the values in the state buffer to zeros before static initialization.
@@ -119,6 +119,12 @@
                    where <code>numStages</code> is the number of Biquad stages in the filter;
                    <code>pState</code> is the address of the state buffer.
                    <code>pCoeffs</code> is the address of the coefficient buffer;
+  @par           Neon version
+                  For Neon version, the function arm_biquad_cascade_df2T_compute_coefs_x must be
+                  used in addition to arm_biquad_cascade_df2T_init_x.
+
+                  See the documentation of arm_biquad_cascade_df2T_init_x for more details.
+
 */
 
 /**
@@ -135,7 +141,7 @@
   @return        none
  */
 
-LOW_OPTIMIZATION_ENTER
+
 void arm_biquad_cascade_df2T_f64(
   const arm_biquad_cascade_df2T_instance_f64 * S,
   const float64_t * pSrc,
@@ -438,7 +444,7 @@ void arm_biquad_cascade_df2T_f64(
    } while (stage > 0U);
 
 }
-LOW_OPTIMIZATION_EXIT
+
 
 /**
   @} end of BiquadCascadeDF2T group
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f16.c
index ebd0fc4..fa07f91 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df2T_init_f16.c
  * Description:  Initialization function for floating-point transposed direct form II Biquad cascade filter
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -114,4 +114,5 @@ void arm_biquad_cascade_df2T_init_f16(
  */
 
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
index 00375d8..988d6ca 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df2T_init_f32.c
  * Description:  Initialization function for floating-point transposed direct form II Biquad cascade filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -39,78 +39,32 @@
   @{
  */
 
-/**
-  @brief         Initialization function for the floating-point transposed direct form II Biquad cascade filter.
-  @param[in,out] S           points to an instance of the filter data structure.
-  @param[in]     numStages   number of 2nd order stages in the filter.
-  @param[in]     pCoeffs     points to the filter coefficients.
-  @param[in]     pState      points to the state buffer.
-  @return        none
 
-  @par           Coefficient and State Ordering
-                   The coefficients are stored in the array <code>pCoeffs</code> in the following order
-                   in the not Neon version.
-  <pre>
-      {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
-  </pre>
-                   
-  @par
-                   where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
-                   <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
-                   and so on.  The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
-
-                   For Neon version, this array is bigger. If numstages = 4x + y, then the array has size:
-                   32*x + 5*y
-                   and it must be initialized using the function
-                   arm_biquad_cascade_df2T_compute_coefs_f32 which is taking the
-                   standard array coefficient as parameters.
-
-                   But, an array of 8*numstages is a good approximation.
-
-                   Then, the initialization can be done with:
-  <pre>
-                   arm_biquad_cascade_df2T_init_f32(&SNeon, nbCascade, neonCoefs, stateNeon);
-                   arm_biquad_cascade_df2T_compute_coefs_f32(&SNeon,nbCascade,coefs);
-  </pre>
-
-  @par             In this example, neonCoefs is a bigger array of size 8 * numStages.
-                   coefs is the standard array:
-
-  <pre>
-      {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
-  </pre>
-
-
-  @par
-                   The <code>pState</code> is a pointer to state array.
-                   Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
-                   The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
-                   The state array has a total length of <code>2*numStages</code> values.
-                   The state variables are updated after each block of data is processed; the coefficients are untouched.
- */
 
 #if defined(ARM_MATH_NEON) 
-/*
+/**
+  @brief         Compute new coefficient arrays for use in vectorized filter (Neon only).
+  @param[in]     numStages         number of 2nd order stages in the filter.
+  @param[in]     pCoeffs           points to the original filter coefficients.
+  @param[in]     pComputedCoeffs   points to the new computed coefficients for the vectorized Neon version.
+  @return        none
+
+  @par   Size of coefficient arrays:
+            pCoeffs has size 5 * numStages 
 
-Must be called after initializing the biquad instance.
-pCoeffs has size 5 * nbCascade
-Whereas the pCoeffs for the init has size (4*4 + 4*4)* nbCascade 
+            pComputedCoeffs has size 8 * numStages
 
-So this pCoeffs is the one which would be used for the not Neon version.
-The pCoeffs passed in init is bigger than the one for the not Neon version.
+            pComputedCoeffs is the array to be used in arm_biquad_cascade_df2T_init_f32.
 
 */
 void arm_biquad_cascade_df2T_compute_coefs_f32(
-  arm_biquad_cascade_df2T_instance_f32 * S,
   uint8_t numStages,
-  float32_t * pCoeffs)
+  const float32_t * pCoeffs,
+  float32_t * pComputedCoeffs)
 {
    uint8_t cnt;
-   float32_t *pDstCoeffs;
    float32_t b0[4],b1[4],b2[4],a1[4],a2[4];
 
-   pDstCoeffs = (float32_t*)S->pCoeffs;
-
    cnt = numStages >> 2; 
    while(cnt > 0)
    {
@@ -125,52 +79,52 @@ void arm_biquad_cascade_df2T_compute_coefs_f32(
       }
 
       /* Vec 1 */
-      *pDstCoeffs++ = 0;
-      *pDstCoeffs++ = b0[1];
-      *pDstCoeffs++ = b0[2];
-      *pDstCoeffs++ = b0[3];
+      *pComputedCoeffs++ = 0;
+      *pComputedCoeffs++ = b0[1];
+      *pComputedCoeffs++ = b0[2];
+      *pComputedCoeffs++ = b0[3];
 
       /* Vec 2 */
-      *pDstCoeffs++ = 0;
-      *pDstCoeffs++ = 0;
-      *pDstCoeffs++ = b0[1] * b0[2];
-      *pDstCoeffs++ = b0[2] * b0[3];
+      *pComputedCoeffs++ = 0;
+      *pComputedCoeffs++ = 0;
+      *pComputedCoeffs++ = b0[1] * b0[2];
+      *pComputedCoeffs++ = b0[2] * b0[3];
 
       /* Vec 3 */
-      *pDstCoeffs++ = 0;
-      *pDstCoeffs++ = 0;
-      *pDstCoeffs++ = 0;
-      *pDstCoeffs++ = b0[1] * b0[2] * b0[3];
+      *pComputedCoeffs++ = 0;
+      *pComputedCoeffs++ = 0;
+      *pComputedCoeffs++ = 0;
+      *pComputedCoeffs++ = b0[1] * b0[2] * b0[3];
       
       /* Vec 4 */
-      *pDstCoeffs++ = b0[0];
-      *pDstCoeffs++ = b0[0] * b0[1];
-      *pDstCoeffs++ = b0[0] * b0[1] * b0[2];
-      *pDstCoeffs++ = b0[0] * b0[1] * b0[2] * b0[3];
+      *pComputedCoeffs++ = b0[0];
+      *pComputedCoeffs++ = b0[0] * b0[1];
+      *pComputedCoeffs++ = b0[0] * b0[1] * b0[2];
+      *pComputedCoeffs++ = b0[0] * b0[1] * b0[2] * b0[3];
 
       /* Vec 5 */
-      *pDstCoeffs++ = b1[0];
-      *pDstCoeffs++ = b1[1];
-      *pDstCoeffs++ = b1[2];
-      *pDstCoeffs++ = b1[3];
+      *pComputedCoeffs++ = b1[0];
+      *pComputedCoeffs++ = b1[1];
+      *pComputedCoeffs++ = b1[2];
+      *pComputedCoeffs++ = b1[3];
 
       /* Vec 6 */
-      *pDstCoeffs++ = b2[0];
-      *pDstCoeffs++ = b2[1];
-      *pDstCoeffs++ = b2[2];
-      *pDstCoeffs++ = b2[3];
+      *pComputedCoeffs++ = b2[0];
+      *pComputedCoeffs++ = b2[1];
+      *pComputedCoeffs++ = b2[2];
+      *pComputedCoeffs++ = b2[3];
 
       /* Vec 7 */
-      *pDstCoeffs++ = a1[0];
-      *pDstCoeffs++ = a1[1];
-      *pDstCoeffs++ = a1[2];
-      *pDstCoeffs++ = a1[3];
+      *pComputedCoeffs++ = a1[0];
+      *pComputedCoeffs++ = a1[1];
+      *pComputedCoeffs++ = a1[2];
+      *pComputedCoeffs++ = a1[3];
 
       /* Vec 8 */
-      *pDstCoeffs++ = a2[0];
-      *pDstCoeffs++ = a2[1];
-      *pDstCoeffs++ = a2[2];
-      *pDstCoeffs++ = a2[3];
+      *pComputedCoeffs++ = a2[0];
+      *pComputedCoeffs++ = a2[1];
+      *pComputedCoeffs++ = a2[2];
+      *pComputedCoeffs++ = a2[3];
 
       cnt--;
    }
@@ -178,17 +132,66 @@ void arm_biquad_cascade_df2T_compute_coefs_f32(
    cnt = numStages & 0x3;
    while(cnt > 0)
    {
-      *pDstCoeffs++ = *pCoeffs++;
-      *pDstCoeffs++ = *pCoeffs++;
-      *pDstCoeffs++ = *pCoeffs++;
-      *pDstCoeffs++ = *pCoeffs++;
-      *pDstCoeffs++ = *pCoeffs++;
+      *pComputedCoeffs++ = *pCoeffs++;
+      *pComputedCoeffs++ = *pCoeffs++;
+      *pComputedCoeffs++ = *pCoeffs++;
+      *pComputedCoeffs++ = *pCoeffs++;
+      *pComputedCoeffs++ = *pCoeffs++;
       cnt--;
    }
 
 }
 #endif 
 
+/**
+  @brief         Initialization function for the floating-point transposed direct form II Biquad cascade filter.
+  @param[in,out] S           points to an instance of the filter data structure.
+  @param[in]     numStages   number of 2nd order stages in the filter.
+  @param[in]     pCoeffs     points to the filter coefficients.
+  @param[in]     pState      points to the state buffer.
+  @return        none
+
+  @par           Coefficient and State Ordering
+                   The coefficients are stored in the array <code>pCoeffs</code> in the following order
+                   in the not Neon version.
+  <pre>
+      {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
+  </pre>
+                   
+  @par
+                   where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
+                   <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
+                   and so on.  The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
+
+                   For Neon version, this array is bigger. If numstages = 4x + y, then the array has size:
+                   32*x + 5*y
+                   and it must be initialized using the function
+                   arm_biquad_cascade_df2T_compute_coefs_f32 which is taking the
+                   standard array coefficient as parameters.
+
+                   But, an array of 8*numstages is a good approximation.
+
+                   Then, the initialization can be done with:
+  <pre>
+                   arm_biquad_cascade_df2T_compute_coefs_f32(nbCascade,coefs,computedCoefs);
+                   arm_biquad_cascade_df2T_init_f32(&SNeon, nbCascade, computedCoefs, stateNeon);
+  </pre>
+
+  @par             In this example, computedCoefs is a bigger array of size 8 * numStages.
+                   coefs is the standard array:
+
+  <pre>
+      {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
+  </pre>
+
+
+  @par
+                   The <code>pState</code> is a pointer to state array.
+                   Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
+                   The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
+                   The state array has a total length of <code>2*numStages</code> values.
+                   The state variables are updated after each block of data is processed; the coefficients are untouched.
+ */
 void arm_biquad_cascade_df2T_init_f32(
         arm_biquad_cascade_df2T_instance_f32 * S,
         uint8_t numStages,
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f64.c
index c33c915..e06f35e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f64.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_df2T_init_f64.c
  * Description:  Initialization function for floating-point transposed direct form II Biquad cascade filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c
index 2767bc1..ef6b4cb 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_stereo_df2T_f16.c
  * Description:  Processing function for floating-point transposed direct form II Biquad cascade filter. 2 channels
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -53,7 +53,7 @@
 #pragma GCC warning "Scalar version of arm_biquad_cascade_stereo_df2T_f16 built. Helium version has build issues with gcc."
 #endif 
 
-#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(__CMSIS_GCC_H)
+#if (defined(ARM_MATH_MVE_FLOAT16) && defined(ARM_MATH_HELIUM_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(__CMSIS_GCC_H)
 void arm_biquad_cascade_stereo_df2T_f16(
   const arm_biquad_cascade_stereo_df2T_instance_f16 * S,
   const float16_t * pSrc,
@@ -194,7 +194,7 @@ void arm_biquad_cascade_stereo_df2T_f16(
     while (stage > 0U);
 }
 #else
-LOW_OPTIMIZATION_ENTER
+
 void arm_biquad_cascade_stereo_df2T_f16(
   const arm_biquad_cascade_stereo_df2T_instance_f16 * S,
   const float16_t * pSrc,
@@ -427,11 +427,12 @@ void arm_biquad_cascade_stereo_df2T_f16(
     } while (stage > 0U);
 
 }
-LOW_OPTIMIZATION_EXIT
+
 #endif /* #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
 /**
   @} end of BiquadCascadeDF2T group
  */
 
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f32.c
index 5851c91..e0a5d03 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_stereo_df2T_f32.c
  * Description:  Processing function for floating-point transposed direct form II Biquad cascade filter. 2 channels
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -47,7 +47,7 @@
   @param[in]     blockSize number of samples to process
   @return        none
  */
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+#if (defined(ARM_MATH_MVEF) && defined(ARM_MATH_HELIUM_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
 
 void arm_biquad_cascade_stereo_df2T_f32(
@@ -181,7 +181,7 @@ void arm_biquad_cascade_stereo_df2T_f32(
 }
 
 #else
-LOW_OPTIMIZATION_ENTER
+
 void arm_biquad_cascade_stereo_df2T_f32(
   const arm_biquad_cascade_stereo_df2T_instance_f32 * S,
   const float32_t * pSrc,
@@ -414,7 +414,7 @@ void arm_biquad_cascade_stereo_df2T_f32(
     } while (stage > 0U);
 
 }
-LOW_OPTIMIZATION_EXIT
+
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_init_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_init_f16.c
index 83f63ed..3277519 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_init_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_init_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_stereo_df2T_init_f16.c
  * Description:  Initialization function for floating-point transposed direct form II Biquad cascade filter
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_init_f32.c
index aa4ce89..f7dd819 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_biquad_cascade_stereo_df2T_init_f32.c
  * Description:  Initialization function for floating-point transposed direct form II Biquad cascade filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_f32.c
index 9080c75..5e123e4 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_f32.c
  * Description:  Convolution of floating-point sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -45,12 +45,14 @@
  @par            Algorithm
                    Let <code>a[n]</code> and <code>b[n]</code> be sequences of length <code>srcALen</code> and
                    <code>srcBLen</code> samples respectively. Then the convolution
-  <pre>
-     c[n] = a[n] * b[n]
-  </pre>
+                   \f[
+                      c[n] = a[n] * b[n]
+                   \f]
   @par
                    is defined as
-                   \image html ConvolutionEquation.gif
+                   \f[
+                   c[n] = \sum_{k=0}^{srcALen} a[k] b[n-k]
+                   \f]
   @par
                    Note that <code>c[n]</code> is of length <code>srcALen + srcBLen - 1</code> and is defined over the interval <code>n=0, 1, 2, ..., srcALen + srcBLen - 2</code>.
                    <code>pSrcA</code> points to the first input vector of length <code>srcALen</code> and
@@ -62,9 +64,9 @@
                    For each offset \c n, the overlapping portions of a[n] and b[n] are multiplied and summed together.
   @par
                    Note that convolution is a commutative operation:
-  <pre>
-     a[n] * b[n] = b[n] * a[n].
-  </pre>
+                   \f[
+                      a[n] * b[n] = b[n] * a[n].
+                   \f]
   @par
                    This means that switching the A and B arguments to the convolution functions has no effect.
 
@@ -80,6 +82,12 @@
   @par           Opt Versions
                    Opt versions are supported for Q15 and Q7. Design uses internal scratch buffer for getting good optimisation.
                    These versions are optimised in cycles and consumes more memory (Scratch memory) compared to Q15 and Q7 versions
+  
+  @par           Long versions:
+                   For convolution of long vectors, those functions are
+                   no more adapted and will be very slow.
+                   An implementation based upon FFTs should be used.
+
  */
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_fast_opt_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_fast_opt_q15.c
index dda46cf..62b1c95 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_fast_opt_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_fast_opt_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_fast_opt_q15.c
  * Description:  Fast Q15 Convolution
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_fast_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_fast_q15.c
index a0f4860..d00ad65 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_fast_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_fast_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_fast_q15.c
  * Description:  Fast Q15 Convolution
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_fast_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_fast_q31.c
index 70949a0..569e484 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_fast_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_fast_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_fast_q31.c
  * Description:  Fast Q31 Convolution
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_opt_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_opt_q15.c
index ad7bf76..6230627 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_opt_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_opt_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_opt_q15.c
  * Description:  Convolution of Q15 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_opt_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_opt_q7.c
index a4b251d..1afdb5d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_opt_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_opt_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_opt_q7.c
  * Description:  Convolution of Q7 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_f32.c
index 73c732e..1ce871c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_partial_f32.c
  * Description:  Partial convolution of floating-point sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -58,6 +58,12 @@
   @par           Opt Versions
                    Opt versions are supported for Q15 and Q7. Design uses internal scratch buffer for getting good optimisation.
                    These versions are optimised in cycles and consumes more memory (Scratch memory) compared to Q15 and Q7 versions of partial convolution
+ 
+  @par           Long versions:
+                   For convolution of long vectors, those functions are
+                   no more adapted and will be very slow.
+                   An implementation based upon FFTs should be used.
+
  */
 
 /**
@@ -97,7 +103,7 @@ arm_status arm_conv_partial_f32(
   const float32_t *pSrc1, *pSrc2;                      /* Intermediate pointers */
         float32_t sum;                                 /* Accumulator */
         uint32_t j, k, count, blkCnt, check;
-        uint32_t blockSize1, blockSize2, blockSize3;    /* Loop counters */
+        int32_t blockSize1, blockSize2, blockSize3;    /* Loop counters */
         arm_status status;                             /* Status of Partial convolution */
 
 #if defined (ARM_MATH_LOOPUNROLL)
@@ -144,7 +150,7 @@ arm_status arm_conv_partial_f32(
     blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
     blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
     blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
-    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : numPoints) : 0;
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : (int32_t)numPoints) : 0;
     blockSize2 = ((int32_t) check - blockSize3) - (blockSize1 + (int32_t) firstIndex);
     blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;
 
@@ -189,7 +195,7 @@ arm_status arm_conv_partial_f32(
      * ----------------------*/
 
     /* The first stage starts here */
-    while (blockSize1 > 0U)
+    while (blockSize1 > 0)
     {
       /* Accumulator is made zero for every iteration */
       sum = 0.0f;
@@ -541,7 +547,14 @@ arm_status arm_conv_partial_f32(
     count = srcBLen - 1U;
 
     /* Working pointer of inputA */
-    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
+    if (firstIndex > srcALen)
+    {
+       pSrc1 = (pIn1 + firstIndex) - (srcBLen - 1U);
+    }
+    else
+    {
+       pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
+    }
     px = pSrc1;
 
     /* Working pointer of inputB */
@@ -552,7 +565,7 @@ arm_status arm_conv_partial_f32(
      * Stage3 process
      * ------------------*/
 
-    while (blockSize3 > 0U)
+    while (blockSize3 > 0)
     {
       /* Accumulator is made zero for every iteration */
       sum = 0.0f;
@@ -629,7 +642,6 @@ arm_status arm_conv_partial_f32(
         float32_t sum;                                 /* Accumulator */
         uint32_t i, j;                                 /* Loop counters */
         arm_status status;                             /* Status of Partial convolution */
-
   /* Check for range of output samples to be calculated */
   if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
   {
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_fast_opt_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_fast_opt_q15.c
index 310d0a7..d181f6e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_fast_opt_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_fast_opt_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_partial_fast_opt_q15.c
  * Description:  Fast Q15 Partial convolution
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_fast_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_fast_q15.c
index 700e553..96cfe1c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_fast_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_fast_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_partial_fast_q15.c
  * Description:  Fast Q15 Partial convolution
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -576,7 +576,14 @@ arm_status arm_conv_partial_fast_q15(
     count = srcBLen - 1U;
 
     /* Working pointer of inputA */
-    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
+    if (firstIndex > srcALen)
+    {
+       pSrc1 = (pIn1 + firstIndex) - (srcBLen - 1U);
+    }
+    else
+    {
+       pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
+    }
     px = pSrc1;
 
     /* Working pointer of inputB */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_fast_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_fast_q31.c
index 2fb96f3..4f7a01a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_fast_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_fast_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_partial_fast_q31.c
  * Description:  Fast Q31 Partial convolution
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -73,7 +73,7 @@ arm_status arm_conv_partial_fast_q31(
   const q31_t *pSrc1, *pSrc2;                          /* Intermediate pointers */
         q31_t sum;                                     /* Accumulators */
         uint32_t j, k, count, check, blkCnt;
-        uint32_t blockSize1, blockSize2, blockSize3;    /* Loop counters */
+        int32_t blockSize1, blockSize2, blockSize3;    /* Loop counters */
         arm_status status;                             /* Status of Partial convolution */
 
 #if defined (ARM_MATH_LOOPUNROLL)
@@ -120,7 +120,7 @@ arm_status arm_conv_partial_fast_q31(
     blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
     blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
     blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
-    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :  numPoints) : 0;
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :  (int32_t)numPoints) : 0;
     blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex);
     blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;
 
@@ -165,7 +165,7 @@ arm_status arm_conv_partial_fast_q31(
      * ----------------------*/
 
     /* The first stage starts here */
-    while (blockSize1 > 0U)
+    while (blockSize1 > 0)
     {
       /* Accumulator is made zero for every iteration */
       sum = 0;
@@ -528,7 +528,14 @@ arm_status arm_conv_partial_fast_q31(
     count = srcBLen - 1U;
 
     /* Working pointer of inputA */
-    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
+    if (firstIndex > srcALen)
+    {
+       pSrc1 = (pIn1 + firstIndex) - (srcBLen - 1U);
+    }
+    else
+    {
+       pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
+    }
     px = pSrc1;
 
     /* Working pointer of inputB */
@@ -539,7 +546,7 @@ arm_status arm_conv_partial_fast_q31(
      * Stage3 process
      * ------------------*/
 
-    while (blockSize3 > 0U)
+    while (blockSize3 > 0)
     {
       /* Accumulator is made zero for every iteration */
       sum = 0;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_opt_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_opt_q15.c
index a2cc22c..1296674 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_opt_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_opt_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_partial_opt_q15.c
  * Description:  Partial convolution of Q15 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_opt_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_opt_q7.c
index 2befd5d..1b0527d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_opt_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_opt_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_partial_opt_q7.c
  * Description:  Partial convolution of Q7 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_q15.c
index 52f253c..41cd5c9 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_partial_q15.c
  * Description:  Partial convolution of Q15 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -78,7 +78,7 @@ arm_status arm_conv_partial_q15(
   const q15_t *py;                                     /* Intermediate inputB pointer */
   const q15_t *pSrc1, *pSrc2;                          /* Intermediate pointers */
         q31_t x0, x1, x2, x3, c0;                      /* Temporary input variables to hold state and coefficient values */
-        uint32_t blockSize1, blockSize2, blockSize3;    /* Loop counters */
+        int32_t blockSize1, blockSize2, blockSize3;    /* Loop counters */
         uint32_t j, k, count, blkCnt, check;
         arm_status status;                             /* Status of Partial convolution */
 
@@ -121,7 +121,7 @@ arm_status arm_conv_partial_q15(
     blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
     blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
     blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
-    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :  numPoints) : 0;
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :  (int32_t)numPoints) : 0;
     blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex);
     blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;
 
@@ -170,7 +170,7 @@ arm_status arm_conv_partial_q15(
     /* Second part of this stage computes the MAC operations greater than or equal to 4 */
 
     /* The first part of the stage starts here */
-    while ((count < 4U) && (blockSize1 > 0U))
+    while ((count < 4U) && (blockSize1 > 0))
     {
       /* Accumulator is made zero for every iteration */
       sum = 0;
@@ -208,7 +208,7 @@ arm_status arm_conv_partial_q15(
      * y[srcBLen] and y[srcBLen-1] coefficients, py is decremented by 1 */
     py = py - 1;
 
-    while (blockSize1 > 0U)
+    while (blockSize1 > 0)
     {
       /* Accumulator is made zero for every iteration */
       sum = 0;
@@ -582,7 +582,14 @@ arm_status arm_conv_partial_q15(
     count = srcBLen - 1U;
 
     /* Working pointer of inputA */
-    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
+    if (firstIndex > srcALen)
+    {
+       pSrc1 = (pIn1 + firstIndex) - (srcBLen - 1U);
+    }
+    else
+    {
+       pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
+    }
     px = pSrc1;
 
     /* Working pointer of inputB */
@@ -601,7 +608,7 @@ arm_status arm_conv_partial_q15(
     /* The first part of the stage starts here */
     j = count >> 2U;
 
-    while ((j > 0U) && (blockSize3 > 0U))
+    while ((j > 0U) && (blockSize3 > 0))
     {
       /* Accumulator is made zero for every iteration */
       sum = 0;
@@ -662,7 +669,7 @@ arm_status arm_conv_partial_q15(
      * so pointer py is updated to read only one sample at a time */
     py = py + 1U;
 
-    while (blockSize3 > 0U)
+    while (blockSize3 > 0)
     {
       /* Accumulator is made zero for every iteration */
       sum = 0;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_q31.c
index eb360b6..887aa71 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_partial_q31.c
  * Description:  Partial convolution of Q31 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -76,7 +76,7 @@ arm_status arm_conv_partial_q31(
   const q31_t *pSrc1, *pSrc2;                          /* Intermediate pointers */
         q63_t sum;                                     /* Accumulator */
         uint32_t j, k, count, blkCnt, check;
-        uint32_t blockSize1, blockSize2, blockSize3;    /* Loop counters */
+        int32_t blockSize1, blockSize2, blockSize3;    /* Loop counters */
         arm_status status;                             /* Status of Partial convolution */
 
 #if defined (ARM_MATH_LOOPUNROLL)
@@ -123,7 +123,7 @@ arm_status arm_conv_partial_q31(
     blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
     blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
     blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
-    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :  numPoints) : 0;
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :  (int32_t)numPoints) : 0;
     blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex);
     blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;
 
@@ -168,7 +168,7 @@ arm_status arm_conv_partial_q31(
      * ----------------------*/
 
     /* The first stage starts here */
-    while (blockSize1 > 0U)
+    while (blockSize1 > 0)
     {
       /* Accumulator is made zero for every iteration */
       sum = 0;
@@ -501,7 +501,14 @@ arm_status arm_conv_partial_q31(
     count = srcBLen - 1U;
 
     /* Working pointer of inputA */
-    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
+    if (firstIndex > srcALen)
+    {
+       pSrc1 = (pIn1 + firstIndex) - (srcBLen - 1U);
+    }
+    else
+    {
+       pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
+    }
     px = pSrc1;
 
     /* Working pointer of inputB */
@@ -512,7 +519,7 @@ arm_status arm_conv_partial_q31(
      * Stage3 process
      * ------------------*/
 
-    while (blockSize3 > 0U)
+    while (blockSize3 > 0)
     {
       /* Accumulator is made zero for every iteration */
       sum = 0;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_q7.c
index a4f03af..3589f63 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_partial_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_partial_q7.c
  * Description:  Partial convolution of Q7 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -66,7 +66,7 @@ arm_status arm_conv_partial_q7(
         uint32_t numPoints)
 {
 
-#if ARM_MATH_DSP
+#if defined(ARM_MATH_DSP)
 
   const q7_t *pIn1;                                    /* InputA pointer */
   const q7_t *pIn2;                                    /* InputB pointer */
@@ -76,7 +76,7 @@ arm_status arm_conv_partial_q7(
   const q7_t *pSrc1, *pSrc2;                           /* Intermediate pointers */
         q31_t sum;                                     /* Accumulator */
         uint32_t j, k, count, blkCnt, check;           /* Loop counters */
-        uint32_t blockSize1, blockSize2, blockSize3;    /* Loop counters */
+        int32_t blockSize1, blockSize2, blockSize3;    /* Loop counters */
         arm_status status;                             /* Status of Partial convolution */
 
 #if defined (ARM_MATH_LOOPUNROLL)
@@ -125,7 +125,7 @@ arm_status arm_conv_partial_q7(
     blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
     blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
     blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
-    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : numPoints) : 0;
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : (int32_t)numPoints) : 0;
     blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex);
     blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;
 
@@ -170,7 +170,7 @@ arm_status arm_conv_partial_q7(
      * ----------------------*/
 
     /* The first stage starts here */
-    while (blockSize1 > 0U)
+    while (blockSize1 > 0)
     {
       /* Accumulator is made zero for every iteration */
       sum = 0;
@@ -604,7 +604,14 @@ arm_status arm_conv_partial_q7(
     count = srcBLen - 1U;
 
     /* Working pointer of inputA */
-    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
+    if (firstIndex > srcALen)
+    {
+       pSrc1 = (pIn1 + firstIndex) - (srcBLen - 1U);
+    }
+    else
+    {
+       pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
+    }
     px = pSrc1;
 
     /* Working pointer of inputB */
@@ -615,7 +622,7 @@ arm_status arm_conv_partial_q7(
      * Stage3 process
      * ------------------*/
 
-    while (blockSize3 > 0U)
+    while (blockSize3 > 0)
     {
       /* Accumulator is made zero for every iteration */
       sum = 0;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_q15.c
index aae3708..38e652c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_q15.c
  * Description:  Convolution of Q15 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_q31.c
index 1e133f0..9d2dd29 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_q31.c
  * Description:  Convolution of Q31 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_q7.c
index 0c521c3..a0f96dd 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_conv_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_conv_q7.c
  * Description:  Convolution of Q7 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f16.c
index d35d92c..d584c25 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_correlate_f16.c
  * Description:  Correlation of floating-point sequences
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -35,51 +35,7 @@
   @ingroup groupFilters
  */
 
-/**
-  @defgroup Corr Correlation
-
-  Correlation is a mathematical operation that is similar to convolution.
-  As with convolution, correlation uses two signals to produce a third signal.
-  The underlying algorithms in correlation and convolution are identical except that one of the inputs is flipped in convolution.
-  Correlation is commonly used to measure the similarity between two signals.
-  It has applications in pattern recognition, cryptanalysis, and searching.
-  The CMSIS library provides correlation functions for Q7, Q15, Q31 and floating-point data types.
-  Fast versions of the Q15 and Q31 functions are also provided.
-
-  @par           Algorithm
-                   Let <code>a[n]</code> and <code>b[n]</code> be sequences of length <code>srcALen</code> and <code>srcBLen</code> samples respectively.
-                   The convolution of the two signals is denoted by
-  <pre>
-      c[n] = a[n] * b[n]
-  </pre>
-                   In correlation, one of the signals is flipped in time
-  <pre>
-       c[n] = a[n] * b[-n]
-  </pre>
-  @par
-                   and this is mathematically defined as
-                   \image html CorrelateEquation.gif
-  @par
-                   The <code>pSrcA</code> points to the first input vector of length <code>srcALen</code> and <code>pSrcB</code> points to the second input vector of length <code>srcBLen</code>.
-                   The result <code>c[n]</code> is of length <code>2 * max(srcALen, srcBLen) - 1</code> and is defined over the interval <code>n=0, 1, 2, ..., (2 * max(srcALen, srcBLen) - 2)</code>.
-                   The output result is written to <code>pDst</code> and the calling function must allocate <code>2 * max(srcALen, srcBLen) - 1</code> words for the result.
-
-  @note
-                   The <code>pDst</code> should be initialized to all zeros before being used.
-
-  @par           Fixed-Point Behavior
-                   Correlation requires summing up a large number of intermediate products.
-                   As such, the Q7, Q15, and Q31 functions run a risk of overflow and saturation.
-                   Refer to the function specific documentation below for further details of the particular algorithm used.
-
-  @par           Fast Versions
-                   Fast versions are supported for Q31 and Q15.  Cycles for Fast versions are less compared to Q31 and Q15 of correlate and the design requires
-                   the input signals should be scaled down to avoid intermediate overflows.
-
-  @par           Opt Versions
-                   Opt versions are supported for Q15 and Q7.  Design uses internal scratch buffer for getting good optimisation.
-                   These versions are optimised in cycles and consumes more memory (Scratch memory) compared to Q15 and Q7 versions of correlate
- */
+
 
 /**
   @addtogroup Corr
@@ -640,16 +596,16 @@ void arm_correlate_f16(
     while (k > 0U)
     {
       /* x[0] * y[srcBLen - 4] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* x[1] * y[srcBLen - 3] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* x[2] * y[srcBLen - 2] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* x[3] * y[srcBLen - 1] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* Decrement loop counter */
       k--;
@@ -669,7 +625,7 @@ void arm_correlate_f16(
     {
       /* Perform the multiply-accumulate */
       /* x[0] * y[srcBLen - 1] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* Decrement loop counter */
       k--;
@@ -752,13 +708,13 @@ void arm_correlate_f16(
 
         /* Perform the multiply-accumulate */
         /* acc0 +=  x[0] * y[0] */
-        acc0 += x0 * c0;
+        acc0 += (_Float16)x0 * (_Float16)c0;
         /* acc1 +=  x[1] * y[0] */
-        acc1 += x1 * c0;
+        acc1 += (_Float16)x1 * (_Float16)c0;
         /* acc2 +=  x[2] * y[0] */
-        acc2 += x2 * c0;
+        acc2 += (_Float16)x2 * (_Float16)c0;
         /* acc3 +=  x[3] * y[0] */
-        acc3 += x3 * c0;
+        acc3 += (_Float16)x3 * (_Float16)c0;
 
         /* Read y[1] sample */
         c0 = *(py++);
@@ -767,13 +723,13 @@ void arm_correlate_f16(
 
         /* Perform the multiply-accumulate */
         /* acc0 +=  x[1] * y[1] */
-        acc0 += x1 * c0;
+        acc0 += (_Float16)x1 * (_Float16)c0;
         /* acc1 +=  x[2] * y[1] */
-        acc1 += x2 * c0;
+        acc1 += (_Float16)x2 * (_Float16)c0;
         /* acc2 +=  x[3] * y[1] */
-        acc2 += x3 * c0;
+        acc2 += (_Float16)x3 * (_Float16)c0;
         /* acc3 +=  x[4] * y[1] */
-        acc3 += x0 * c0;
+        acc3 += (_Float16)x0 * (_Float16)c0;
 
         /* Read y[2] sample */
         c0 = *(py++);
@@ -782,13 +738,13 @@ void arm_correlate_f16(
 
         /* Perform the multiply-accumulate */
         /* acc0 +=  x[2] * y[2] */
-        acc0 += x2 * c0;
+        acc0 += (_Float16)x2 * (_Float16)c0;
         /* acc1 +=  x[3] * y[2] */
-        acc1 += x3 * c0;
+        acc1 += (_Float16)x3 * (_Float16)c0;
         /* acc2 +=  x[4] * y[2] */
-        acc2 += x0 * c0;
+        acc2 += (_Float16)x0 * (_Float16)c0;
         /* acc3 +=  x[5] * y[2] */
-        acc3 += x1 * c0;
+        acc3 += (_Float16)x1 * (_Float16)c0;
 
         /* Read y[3] sample */
         c0 = *(py++);
@@ -797,13 +753,13 @@ void arm_correlate_f16(
 
         /* Perform the multiply-accumulate */
         /* acc0 +=  x[3] * y[3] */
-        acc0 += x3 * c0;
+        acc0 += (_Float16)x3 * (_Float16)c0;
         /* acc1 +=  x[4] * y[3] */
-        acc1 += x0 * c0;
+        acc1 += (_Float16)x0 * (_Float16)c0;
         /* acc2 +=  x[5] * y[3] */
-        acc2 += x1 * c0;
+        acc2 += (_Float16)x1 * (_Float16)c0;
         /* acc3 +=  x[6] * y[3] */
-        acc3 += x2 * c0;
+        acc3 += (_Float16)x2 * (_Float16)c0;
 
       } while (--k);
 
@@ -820,13 +776,13 @@ void arm_correlate_f16(
 
         /* Perform the multiply-accumulate */
         /* acc0 +=  x[4] * y[4] */
-        acc0 += x0 * c0;
+        acc0 += (_Float16)x0 * (_Float16)c0;
         /* acc1 +=  x[5] * y[4] */
-        acc1 += x1 * c0;
+        acc1 += (_Float16)x1 * (_Float16)c0;
         /* acc2 +=  x[6] * y[4] */
-        acc2 += x2 * c0;
+        acc2 += (_Float16)x2 * (_Float16)c0;
         /* acc3 +=  x[7] * y[4] */
-        acc3 += x3 * c0;
+        acc3 += (_Float16)x3 * (_Float16)c0;
 
         /* Reuse the present samples for the next MAC */
         x0 = x1;
@@ -888,10 +844,10 @@ void arm_correlate_f16(
       while (k > 0U)
       {
         /* Perform the multiply-accumulate */
-        sum += *px++ * *py++;
-        sum += *px++ * *py++;
-        sum += *px++ * *py++;
-        sum += *px++ * *py++;
+        sum += (_Float16)*px++ * (_Float16)*py++;
+        sum += (_Float16)*px++ * (_Float16)*py++;
+        sum += (_Float16)*px++ * (_Float16)*py++;
+        sum += (_Float16)*px++ * (_Float16)*py++;
 
         /* Decrement loop counter */
         k--;
@@ -909,7 +865,7 @@ void arm_correlate_f16(
       while (k > 0U)
       {
         /* Perform the multiply-accumulate */
-        sum += *px++ * *py++;
+        sum += (_Float16)*px++ * (_Float16)*py++;
 
         /* Decrement the loop counter */
         k--;
@@ -949,7 +905,7 @@ void arm_correlate_f16(
       while (k > 0U)
       {
         /* Perform the multiply-accumulate */
-        sum += *px++ * *py++;
+        sum += (_Float16)*px++ * (_Float16)*py++;
 
         /* Decrement the loop counter */
         k--;
@@ -1016,16 +972,16 @@ void arm_correlate_f16(
     {
       /* Perform the multiply-accumulate */
       /* sum += x[srcALen - srcBLen + 4] * y[3] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* sum += x[srcALen - srcBLen + 3] * y[2] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* sum += x[srcALen - srcBLen + 2] * y[1] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* sum += x[srcALen - srcBLen + 1] * y[0] */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* Decrement loop counter */
       k--;
@@ -1044,7 +1000,7 @@ void arm_correlate_f16(
     while (k > 0U)
     {
       /* Perform the multiply-accumulate */
-      sum += *px++ * *py++;
+      sum += (_Float16)*px++ * (_Float16)*py++;
 
       /* Decrement loop counter */
       k--;
@@ -1138,7 +1094,7 @@ void arm_correlate_f16(
       if ((((i - j) < srcBLen) && (j < srcALen)))
       {
         /* z[i] += x[i-j] * y[j] */
-        sum += pIn1[j] * pIn2[-((int32_t) i - j)];
+        sum += (_Float16)pIn1[j] * (_Float16)pIn2[-((int32_t) i - (int32_t) j)];
       }
     }
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f32.c
index bf1eaf5..7d4880e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_correlate_f32.c
  * Description:  Correlation of floating-point sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -48,16 +48,20 @@
   @par           Algorithm
                    Let <code>a[n]</code> and <code>b[n]</code> be sequences of length <code>srcALen</code> and <code>srcBLen</code> samples respectively.
                    The convolution of the two signals is denoted by
-  <pre>
-      c[n] = a[n] * b[n]
-  </pre>
+                   \f[
+                   c[n] = a[n] * b[n]
+                   \f]
+
                    In correlation, one of the signals is flipped in time
-  <pre>
-       c[n] = a[n] * b[-n]
-  </pre>
+ 
+                   \f[
+                   c[n] = a[n] * b[-n]
+                   \f]
   @par
                    and this is mathematically defined as
-                   \image html CorrelateEquation.gif
+                   \f[
+                   c[n] = \sum_{k=0}^{srcALen} a[k] b[k-n]
+                   \f]
   @par
                    The <code>pSrcA</code> points to the first input vector of length <code>srcALen</code> and <code>pSrcB</code> points to the second input vector of length <code>srcBLen</code>.
                    The result <code>c[n]</code> is of length <code>2 * max(srcALen, srcBLen) - 1</code> and is defined over the interval <code>n=0, 1, 2, ..., (2 * max(srcALen, srcBLen) - 2)</code>.
@@ -78,6 +82,11 @@
   @par           Opt Versions
                    Opt versions are supported for Q15 and Q7.  Design uses internal scratch buffer for getting good optimisation.
                    These versions are optimised in cycles and consumes more memory (Scratch memory) compared to Q15 and Q7 versions of correlate
+ 
+  @par           Long versions:
+                   For convolution of long vectors, those functions are
+                   no more adapted and will be very slow.
+                   An implementation based upon FFTs should be used.
  */
 
 /**
@@ -1076,7 +1085,7 @@ void arm_correlate_f32(
       if ((((i - j) < srcBLen) && (j < srcALen)))
       {
         /* z[i] += x[i-j] * y[j] */
-        sum += pIn1[j] * pIn2[-((int32_t) i - j)];
+        sum += pIn1[j] * pIn2[-((int32_t) i - (int32_t) j)];
       }
     }
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f64.c
new file mode 100644
index 0000000..e0e9ba6
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_f64.c
@@ -0,0 +1,369 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_correlate_f64.c
+ * Description:  Correlation of floating-point sequences
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/filtering_functions.h"
+
+/**
+  @ingroup groupFilters
+ */
+
+/**
+  @addtogroup Corr
+  @{
+ */
+
+/**
+  @brief         Correlation of floating-point sequences.
+  @param[in]     pSrcA      points to the first input sequence
+  @param[in]     srcALen    length of the first input sequence
+  @param[in]     pSrcB      points to the second input sequence
+  @param[in]     srcBLen    length of the second input sequence
+  @param[out]    pDst       points to the location where the output result is written.  Length 2 * max(srcALen, srcBLen) - 1.
+  @return        none
+ */
+
+void arm_correlate_f64(
+  const float64_t * pSrcA,
+        uint32_t srcALen,
+  const float64_t * pSrcB,
+        uint32_t srcBLen,
+        float64_t * pDst)
+{
+  const float64_t *pIn1;                               /* InputA pointer */
+  const float64_t *pIn2;                               /* InputB pointer */
+        float64_t *pOut = pDst;                        /* Output pointer */
+  const float64_t *px;                                 /* Intermediate inputA pointer */
+  const float64_t *py;                                 /* Intermediate inputB pointer */
+  const float64_t *pSrc1;
+        float64_t sum;
+        uint32_t blockSize1, blockSize2, blockSize3;   /* Loop counters */
+        uint32_t j, k, count, blkCnt;                  /* Loop counters */
+        uint32_t outBlockSize;                         /* Loop counter */
+        int32_t inc = 1;                               /* Destination address modifier */
+
+  /* The algorithm implementation is based on the lengths of the inputs. */
+  /* srcB is always made to slide across srcA. */
+  /* So srcBLen is always considered as shorter or equal to srcALen */
+  /* But CORR(x, y) is reverse of CORR(y, x) */
+  /* So, when srcBLen > srcALen, output pointer is made to point to the end of the output buffer */
+  /* and the destination pointer modifier, inc is set to -1 */
+  /* If srcALen > srcBLen, zero pad has to be done to srcB to make the two inputs of same length */
+  /* But to improve the performance,
+   * we assume zeroes in the output instead of zero padding either of the the inputs*/
+  /* If srcALen > srcBLen,
+   * (srcALen - srcBLen) zeroes has to included in the starting of the output buffer */
+  /* If srcALen < srcBLen,
+   * (srcALen - srcBLen) zeroes has to included in the ending of the output buffer */
+  if (srcALen >= srcBLen)
+  {
+    /* Initialization of inputA pointer */
+    pIn1 = pSrcA;
+
+    /* Initialization of inputB pointer */
+    pIn2 = pSrcB;
+
+    /* Number of output samples is calculated */
+    outBlockSize = (2U * srcALen) - 1U;
+
+    /* When srcALen > srcBLen, zero padding has to be done to srcB
+     * to make their lengths equal.
+     * Instead, (outBlockSize - (srcALen + srcBLen - 1))
+     * number of output samples are made zero */
+    j = outBlockSize - (srcALen + (srcBLen - 1U));
+
+    /* Updating the pointer position to non zero value */
+    pOut += j;
+  }
+  else
+  {
+    /* Initialization of inputA pointer */
+    pIn1 = pSrcB;
+
+    /* Initialization of inputB pointer */
+    pIn2 = pSrcA;
+
+    /* srcBLen is always considered as shorter or equal to srcALen */
+    j = srcBLen;
+    srcBLen = srcALen;
+    srcALen = j;
+
+    /* CORR(x, y) = Reverse order(CORR(y, x)) */
+    /* Hence set the destination pointer to point to the last output sample */
+    pOut = pDst + ((srcALen + srcBLen) - 2U);
+
+    /* Destination address modifier is set to -1 */
+    inc = -1;
+  }
+
+  /* The function is internally
+   * divided into three stages according to the number of multiplications that has to be
+   * taken place between inputA samples and inputB samples. In the first stage of the
+   * algorithm, the multiplications increase by one for every iteration.
+   * In the second stage of the algorithm, srcBLen number of multiplications are done.
+   * In the third stage of the algorithm, the multiplications decrease by one
+   * for every iteration. */
+
+  /* The algorithm is implemented in three stages.
+     The loop counters of each stage is initiated here. */
+  blockSize1 = srcBLen - 1U;
+  blockSize2 = srcALen - (srcBLen - 1U);
+  blockSize3 = blockSize1;
+
+  /* --------------------------
+   * Initializations of stage1
+   * -------------------------*/
+
+  /* sum = x[0] * y[srcBlen - 1]
+   * sum = x[0] * y[srcBlen-2] + x[1] * y[srcBlen - 1]
+   * ....
+   * sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen - 1] * y[srcBLen - 1]
+   */
+
+  /* In this stage the MAC operations are increased by 1 for every iteration.
+     The count variable holds the number of MAC operations performed */
+  count = 1U;
+
+  /* Working pointer of inputA */
+  px = pIn1;
+
+  /* Working pointer of inputB */
+  pSrc1 = pIn2 + (srcBLen - 1U);
+  py = pSrc1;
+
+  /* ------------------------
+   * Stage1 process
+   * ----------------------*/
+
+  /* The first stage starts here */
+  while (blockSize1 > 0U)
+  {
+    /* Accumulator is made zero for every iteration */
+    sum = 0.;
+
+    /* Initialize k with number of samples */
+    k = count;
+
+    while (k > 0U)
+    {
+      /* Perform the multiply-accumulate */
+      /* x[0] * y[srcBLen - 1] */
+      sum += *px++ * *py++;
+
+      /* Decrement loop counter */
+      k--;
+    }
+
+    /* Store the result in the accumulator in the destination buffer. */
+    *pOut = sum;
+    /* Destination pointer is updated according to the address modifier, inc */
+    pOut += inc;
+
+    /* Update the inputA and inputB pointers for next MAC calculation */
+    py = pSrc1 - count;
+    px = pIn1;
+
+    /* Increment MAC count */
+    count++;
+
+    /* Decrement loop counter */
+    blockSize1--;
+  }
+
+  /* --------------------------
+   * Initializations of stage2
+   * ------------------------*/
+
+  /* sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen-1] * y[srcBLen-1]
+   * sum = x[1] * y[0] + x[2] * y[1] +...+ x[srcBLen]   * y[srcBLen-1]
+   * ....
+   * sum = x[srcALen-srcBLen-2] * y[0] + x[srcALen-srcBLen-1] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]
+   */
+
+  /* Working pointer of inputA */
+  px = pIn1;
+
+  /* Working pointer of inputB */
+  py = pIn2;
+
+  /* count is index by which the pointer pIn1 to be incremented */
+  count = 0U;
+
+  /* -------------------
+   * Stage2 process
+   * ------------------*/
+
+  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
+   * So, to loop unroll over blockSize2,
+   * srcBLen should be greater than or equal to 4 */
+  if (srcBLen >= 4U)
+  {
+    /* Initialize blkCnt with number of samples */
+    blkCnt = blockSize2;
+
+    while (blkCnt > 0U)
+    {
+      /* Accumulator is made zero for every iteration */
+      sum = 0.;
+
+      /* Initialize blkCnt with number of samples */
+      k = srcBLen;
+
+      while (k > 0U)
+      {
+        /* Perform the multiply-accumulate */
+        sum += *px++ * *py++;
+
+        /* Decrement the loop counter */
+        k--;
+      }
+
+      /* Store the result in the accumulator in the destination buffer. */
+      *pOut = sum;
+
+      /* Destination pointer is updated according to the address modifier, inc */
+      pOut += inc;
+
+      /* Increment the pointer pIn1 index, count by 1 */
+      count++;
+
+      /* Update the inputA and inputB pointers for next MAC calculation */
+      px = pIn1 + count;
+      py = pIn2;
+
+      /* Decrement the loop counter */
+      blkCnt--;
+    }
+  }
+  else
+  {
+    /* If the srcBLen is not a multiple of 4,
+     * the blockSize2 loop cannot be unrolled by 4 */
+    blkCnt = blockSize2;
+
+    while (blkCnt > 0U)
+    {
+      /* Accumulator is made zero for every iteration */
+      sum = 0.;
+
+      /* Loop over srcBLen */
+      k = srcBLen;
+
+      while (k > 0U)
+      {
+        /* Perform the multiply-accumulate */
+        sum += *px++ * *py++;
+
+        /* Decrement the loop counter */
+        k--;
+      }
+
+      /* Store the result in the accumulator in the destination buffer. */
+      *pOut = sum;
+      /* Destination pointer is updated according to the address modifier, inc */
+      pOut += inc;
+
+      /* Increment the pointer pIn1 index, count by 1 */
+      count++;
+
+      /* Update the inputA and inputB pointers for next MAC calculation */
+      px = pIn1 + count;
+      py = pIn2;
+
+      /* Decrement the loop counter */
+      blkCnt--;
+    }
+  }
+
+
+  /* --------------------------
+   * Initializations of stage3
+   * -------------------------*/
+
+  /* sum += x[srcALen-srcBLen+1] * y[0] + x[srcALen-srcBLen+2] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]
+   * sum += x[srcALen-srcBLen+2] * y[0] + x[srcALen-srcBLen+3] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]
+   * ....
+   * sum +=  x[srcALen-2] * y[0] + x[srcALen-1] * y[1]
+   * sum +=  x[srcALen-1] * y[0]
+   */
+
+  /* In this stage the MAC operations are decreased by 1 for every iteration.
+     The count variable holds the number of MAC operations performed */
+  count = srcBLen - 1U;
+
+  /* Working pointer of inputA */
+  pSrc1 = pIn1 + (srcALen - (srcBLen - 1U));
+  px = pSrc1;
+
+  /* Working pointer of inputB */
+  py = pIn2;
+
+  /* -------------------
+   * Stage3 process
+   * ------------------*/
+
+  while (blockSize3 > 0U)
+  {
+    /* Accumulator is made zero for every iteration */
+    sum = 0.;
+
+    /* Initialize blkCnt with number of samples */
+    k = count;
+
+    while (k > 0U)
+    {
+      /* Perform the multiply-accumulate */
+      sum += *px++ * *py++;
+
+      /* Decrement loop counter */
+      k--;
+    }
+
+    /* Store the result in the accumulator in the destination buffer. */
+    *pOut = sum;
+    /* Destination pointer is updated according to the address modifier, inc */
+    pOut += inc;
+
+    /* Update the inputA and inputB pointers for next MAC calculation */
+    px = ++pSrc1;
+    py = pIn2;
+
+    /* Decrement MAC count */
+    count--;
+
+    /* Decrement the loop counter */
+    blockSize3--;
+  }
+}
+
+/**
+  @} end of Corr group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_opt_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_opt_q15.c
index 71f01a9..2f655d7 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_opt_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_opt_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_correlate_fast_opt_q15.c
  * Description:  Fast Q15 Correlation
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_q15.c
index 970c7aa..ecb26da 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_correlate_fast_q15.c
  * Description:  Fast Q15 Correlation
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_q31.c
index a2967d9..5747e13 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_fast_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_correlate_fast_q31.c
  * Description:  Fast Q31 Correlation
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_opt_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_opt_q15.c
index c7d0dd1..5283f24 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_opt_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_opt_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_correlate_opt_q15.c
  * Description:  Correlation of Q15 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_opt_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_opt_q7.c
index db70a77..0cab9f2 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_opt_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_opt_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_correlate_opt_q7.c
  * Description:  Correlation of Q7 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_q15.c
index b7882cc..aa8bc35 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_correlate_q15.c
  * Description:  Correlation of Q15 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -884,7 +884,7 @@ void arm_correlate_q15(
       if (((i - j) < srcBLen) && (j < srcALen))
       {
         /* z[i] += x[i-j] * y[j] */
-        sum += ((q31_t) pIn1[j] * pIn2[-((int32_t) i - j)]);
+        sum += ((q31_t) pIn1[j] * pIn2[-((int32_t) i - (int32_t) j)]);
       }
     }
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_q31.c
index 44d2f27..4aa50da 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_correlate_q31.c
  * Description:  Correlation of Q31 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -860,7 +860,7 @@ void arm_correlate_q31(
       if (((i - j) < srcBLen) && (j < srcALen))
       {
         /* z[i] += x[i-j] * y[j] */
-        sum += ((q63_t) pIn1[j] * pIn2[-((int32_t) i - j)]);
+        sum += ((q63_t) pIn1[j] * pIn2[-((int32_t) i - (int32_t) j)]);
       }
     }
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_q7.c
index 4ff13c4..095ec99 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_correlate_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_correlate_q7.c
  * Description:  Correlation of Q7 sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -983,7 +983,7 @@ void arm_correlate_q7(
       if (((i - j) < srcBLen) && (j < srcALen))
       {
         /* z[i] += x[i-j] * y[j] */
-        sum += ((q15_t) pIn1[j] * pIn2[-((int32_t) i - j)]);
+        sum += ((q15_t) pIn1[j] * pIn2[-((int32_t) i - (int32_t) j)]);
       }
     }
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_f32.c
index 6bcf66f..cf641ec 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_decimate_f32.c
  * Description:  FIR decimation for floating-point sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -144,7 +144,7 @@ void arm_fir_decimate_f32(
     uint32_t  i, tapCnt, blkCnt, outBlockSize = blockSize / S->M;   /* Loop counters */
     uint32_t  blkCntN4;
     const float32_t *px0, *px1, *px2, *px3;
-    f32x4_t accv, acc0v, acc1v, acc2v, acc3v;
+    f32x4_t accv = { 0 }, acc0v, acc1v, acc2v, acc3v;
     f32x4_t x0v, x1v, x2v, x3v;
     f32x4_t c0v;
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_fast_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_fast_q15.c
index 42fdade..66f0e90 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_fast_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_fast_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_decimate_fast_q15.c
  * Description:  Fast Q15 FIR Decimator
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_fast_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_fast_q31.c
index 61c7c27..6aa1a23 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_fast_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_fast_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_decimate_fast_q31.c
  * Description:  Fast Q31 FIR Decimator
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_init_f32.c
index 8e08403..c67b49c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_decimate_init_f32.c
  * Description:  Floating-point FIR Decimator initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_init_q15.c
index 61562f9..9c4913f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_decimate_init_q15.c
  * Description:  Initialization function for the Q15 FIR Decimator
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_init_q31.c
index 04248e7..a4bb036 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_decimate_init_q31.c
  * Description:  Initialization function for Q31 FIR Decimation filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_q15.c
index 419c544..cd03e0c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_decimate_q15.c
  * Description:  Q15 FIR Decimator
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_q31.c
index 0eb7123..d104b35 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_decimate_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_decimate_q31.c
  * Description:  Q31 FIR Decimator
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f16.c
index ff74a44..28a974e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f16.c
@@ -5,10 +5,13 @@
  * Title:        arm_fir_f16.c
  * Description:  Floating-point FIR filter processing function
  *
- * Target Processor: Cortex-M cores
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -58,6 +61,7 @@
             vecAcc0 = vfmaq(vecAcc0, vecIn0, c[i]);                        \
         }
 
+#define NB_TAPS 4
 __STATIC_INLINE void arm_fir_f16_1_4_mve(const arm_fir_instance_f16 * S, 
     const float16_t * __restrict pSrc, 
     float16_t * __restrict pDst, uint32_t blockSize)
@@ -73,7 +77,6 @@ __STATIC_INLINE void arm_fir_f16_1_4_mve(const arm_fir_instance_f16 * S,
     int32_t         blkCnt;
     float16x8_t         vecIn0;
     float16x8_t         vecAcc0;
-    const int       NB_TAPS=4;
     float16_t       c[NB_TAPS];
 
 
@@ -146,8 +149,9 @@ __STATIC_INLINE void arm_fir_f16_1_4_mve(const arm_fir_instance_f16 * S,
     }
 
 }
+#undef NB_TAPS
 
-
+#define NB_TAPS 8
 __STATIC_INLINE void arm_fir_f16_5_8_mve(const arm_fir_instance_f16 * S, 
     const float16_t * __restrict pSrc, 
     float16_t * __restrict pDst, uint32_t blockSize)
@@ -163,7 +167,6 @@ __STATIC_INLINE void arm_fir_f16_5_8_mve(const arm_fir_instance_f16 * S,
     int32_t         blkCnt;
     float16x8_t         vecIn0;
     float16x8_t         vecAcc0;
-    const int       NB_TAPS=8;
     float16_t       c[NB_TAPS];
 
 
@@ -236,7 +239,7 @@ __STATIC_INLINE void arm_fir_f16_5_8_mve(const arm_fir_instance_f16 * S,
     }
 
 }
-
+#undef NB_TAPS
 
 void arm_fir_f16(const arm_fir_instance_f16 * S, 
   const float16_t * pSrc, 
@@ -871,7 +874,7 @@ void arm_fir_f16(
     while (i > 0U)
     {
       /* acc =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
-      acc0 += *px++ * *pb++;
+      acc0 += (_Float16)*px++ * (_Float16)*pb++;
 
       i--;
     }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f32.c
index d213bc4..8fcc5ae 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_f32.c
  * Description:  Floating-point FIR filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -99,13 +99,23 @@
                    where <code>numTaps</code> is the number of filter coefficients in the filter; <code>pState</code> is the address of the state buffer;
                    <code>pCoeffs</code> is the address of the coefficient buffer.
   @par          Initialization of Helium version
-                 For Helium version the array of coefficients must be a multiple of 16 even if less
-                 then 16 coefficients are used. The additional coefficients must be set to 0.
-                 It does not mean that all the coefficients will be used in the filter (numTaps
-                 is still set to its right value in the init function.) It just means that
+                 For Helium version the array of coefficients must be padded with zero to contain
+                 a full number of lanes.
+
+                 The array length L must be a multiple of x. L = x * a :
+                 - x is 4  for f32
+                 - x is 4  for q31
+                 - x is 4  for f16 (so managed like the f32 version and not like the q15 one)
+                 - x is 8  for q15
+                 - x is 16 for q7
+
+                 The additional coefficients 
+                 (x * a - numTaps) must be set to 0.
+                 numTaps is still set to its right value in the init function. It means that
                  the implementation may require to read more coefficients due to the vectorization and
                  to avoid having to manage too many different cases in the code.
 
+                
   @par          Helium state buffer
                  The state buffer must contain some additional temporary data
                  used during the computation but which is not the state of the FIR.
@@ -152,6 +162,7 @@
         }
 
 
+#define NB_TAPS 4
 __STATIC_INLINE void arm_fir_f32_1_4_mve(const arm_fir_instance_f32 * S, 
   const float32_t * __restrict pSrc, 
   float32_t * __restrict pDst, uint32_t blockSize)
@@ -168,7 +179,6 @@ __STATIC_INLINE void arm_fir_f32_1_4_mve(const arm_fir_instance_f32 * S,
     int32_t         blkCnt;
     float32x4_t         vecIn0;
     float32x4_t         vecAcc0;
-    const int       NB_TAPS=4;
     float32_t       c[NB_TAPS];
     const float32_t *pCoeffsCur = pCoeffs;
 
@@ -235,8 +245,7 @@ __STATIC_INLINE void arm_fir_f32_1_4_mve(const arm_fir_instance_f32 * S,
     }
     while (blkCnt > 0);
 }
-
-
+#undef NB_TAPS
 
 __STATIC_INLINE void arm_fir_f32_5_8_mve(const arm_fir_instance_f32 * S, 
   const float32_t * __restrict pSrc, 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f64.c
new file mode 100644
index 0000000..2aaa4fb
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_f64.c
@@ -0,0 +1,133 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_fir_f64.c
+ * Description:  Floating-point FIR filter processing function
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/filtering_functions.h"
+
+/**
+  @ingroup groupFilters
+ */
+
+/**
+  @addtogroup FIR
+  @{
+ */
+
+/**
+  @brief         Processing function for floating-point FIR filter.
+  @param[in]     S          points to an instance of the floating-point FIR filter structure
+  @param[in]     pSrc       points to the block of input data
+  @param[out]    pDst       points to the block of output data
+  @param[in]     blockSize  number of samples to process
+  @return        none
+ */
+
+void arm_fir_f64(
+  const arm_fir_instance_f64 * S,
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+        float64_t *pState = S->pState;                 /* State pointer */
+  const float64_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
+        float64_t *pStateCurnt;                        /* Points to the current sample of the state */
+        float64_t *px;                                 /* Temporary pointer for state buffer */
+  const float64_t *pb;                                 /* Temporary pointer for coefficient buffer */
+        float64_t acc0;                                /* Accumulator */
+        uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
+        uint32_t i, tapCnt, blkCnt;                    /* Loop counters */
+
+  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
+  /* pStateCurnt points to the location where the new input data should be written */
+  pStateCurnt = &(S->pState[(numTaps - 1U)]);
+
+  /* Initialize blkCnt with number of taps */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* Copy one sample at a time into state buffer */
+    *pStateCurnt++ = *pSrc++;
+
+    /* Set the accumulator to zero */
+    acc0 = 0.;
+
+    /* Initialize state pointer */
+    px = pState;
+
+    /* Initialize Coefficient pointer */
+    pb = pCoeffs;
+
+    i = numTaps;
+
+    /* Perform the multiply-accumulates */
+    while (i > 0U)
+    {
+      /* acc =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
+      acc0 += *px++ * *pb++;
+
+      i--;
+    }
+
+    /* Store result in destination buffer. */
+    *pDst++ = acc0;
+
+    /* Advance state pointer by 1 for the next sample */
+    pState = pState + 1U;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Processing is complete.
+     Now copy the last numTaps - 1 samples to the start of the state buffer.
+     This prepares the state buffer for the next function call. */
+
+  /* Points to the start of the state buffer */
+  pStateCurnt = S->pState;
+
+  /* Initialize tapCnt with number of taps */
+  tapCnt = (numTaps - 1U);
+
+  /* Copy remaining data */
+  while (tapCnt > 0U)
+  {
+    *pStateCurnt++ = *pState++;
+
+    /* Decrement loop counter */
+    tapCnt--;
+  }
+
+}
+
+/**
+* @} end of FIR group
+*/
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_fast_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_fast_q15.c
index 0603ce3..d33fb86 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_fast_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_fast_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_fast_q15.c
  * Description:  Q15 Fast FIR filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_fast_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_fast_q31.c
index 991af2f..d50f463 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_fast_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_fast_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_fast_q31.c
  * Description:  Processing function for the Q31 Fast FIR filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -60,7 +60,6 @@
                    Use function \ref arm_fir_init_q31() to initialize the filter structure.
  */
 
-IAR_ONLY_LOW_OPTIMIZATION_ENTER
 void arm_fir_fast_q31(
   const arm_fir_instance_q31 * S,
   const q31_t * pSrc,
@@ -320,7 +319,6 @@ void arm_fir_fast_q31(
   }
 
 }
-IAR_ONLY_LOW_OPTIMIZATION_EXIT
 /**
   @} end of FIR group
  */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_f16.c
index 9e52dc3..2bc43b5 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_f16.c
@@ -5,10 +5,13 @@
  * Title:        arm_fir_init_f16.c
  * Description:  Floating-point FIR filter initialization function
  *
- * Target Processor: Cortex-M cores
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -56,13 +59,14 @@
                    <code>pState</code> points to the array of state variables.
                    <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples (except for Helium - see below), where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_f16()</code>.
   @par          Initialization of Helium version
-                 For Helium version the array of coefficients must be a multiple of 16 even if less
-                 then 16 coefficients are used. The additional coefficients must be set to 0.
-                 It does not mean that all the coefficients will be used in the filter (numTaps
-                 is still set to its right value in the init function.) It just means that
+                 For Helium version the array of coefficients must be a multiple of 4 (4a) even if less
+                 then 4a coefficients are defined in the FIR. The additional coefficients 
+                 (4a - numTaps) must be set to 0.
+                 numTaps is still set to its right value in the init function. It means that
                  the implementation may require to read more coefficients due to the vectorization and
                  to avoid having to manage too many different cases in the code.
 
+
   @par          Helium state buffer
                  The state buffer must contain some additional temporary data
                  used during the computation but which is not the state of the FIR.
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_f32.c
index 4dd4333..cbc3989 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_init_f32.c
  * Description:  Floating-point FIR filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -57,10 +57,10 @@
                    <code>pState</code> points to the array of state variables and some working memory for the Helium version.
                    <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples (except for Helium - see below), where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_f32()</code>.
   @par          Initialization of Helium version
-                 For Helium version the array of coefficients must be a multiple of 16 even if less
-                 then 16 coefficients are used. The additional coefficients must be set to 0.
-                 It does not mean that all the coefficients will be used in the filter (numTaps
-                 is still set to its right value in the init function.) It just means that
+                 For Helium version the array of coefficients must be a multiple of 4 (4a) even if less
+                 then 4a coefficients are defined in the FIR. The additional coefficients 
+                 (4a - numTaps) must be set to 0.
+                 numTaps is still set to its right value in the init function. It means that
                  the implementation may require to read more coefficients due to the vectorization and
                  to avoid having to manage too many different cases in the code.
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_f64.c
new file mode 100644
index 0000000..16ca036
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_f64.c
@@ -0,0 +1,88 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_fir_init_f64.c
+ * Description:  Floating-point FIR filter initialization function
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/filtering_functions.h"
+
+/**
+  @ingroup groupFilters
+ */
+
+/**
+  @addtogroup FIR
+  @{
+ */
+
+/**
+  @brief         Initialization function for the floating-point FIR filter.
+  @param[in,out] S          points to an instance of the floating-point FIR filter structure
+  @param[in] 	 numTaps    number of filter coefficients in the filter
+  @param[in]     pCoeffs    points to the filter coefficients buffer
+  @param[in]     pState     points to the state buffer
+  @param[in]     blockSize  number of samples processed per call
+  @return        none
+
+  @par           Details
+                   <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:
+  <pre>
+      {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
+  </pre>
+  @par
+                   <code>pState</code> points to the array of state variables.
+                   <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples, where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_f64()</code>.
+  
+  @par
+                   There is no Helium version of the fir F64.
+
+ */
+
+void arm_fir_init_f64(
+        arm_fir_instance_f64 * S,
+        uint16_t numTaps,
+  const float64_t * pCoeffs,
+        float64_t * pState,
+        uint32_t blockSize)
+{
+  /* Assign filter taps */
+  S->numTaps = numTaps;
+
+  /* Assign coefficient pointer */
+  S->pCoeffs = pCoeffs;
+
+  /* Clear state buffer. The size is always (blockSize + numTaps - 1) */
+  memset(pState, 0, (numTaps + (blockSize - 1U)) * sizeof(float64_t));
+  /* Assign state pointer */
+  S->pState = pState;
+}
+
+/**
+  @} end of FIR group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_q15.c
index 605aff1..6853f1f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_init_q15.c
  * Description:  Q15 FIR filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -75,6 +75,14 @@
   </pre>
                    <code>pState</code> points to the array of state variables.
                    <code>pState</code> is of length <code>numTaps+blockSize</code>, when running on Cortex-M4 and Cortex-M3  and is of length <code>numTaps+blockSize-1</code>, when running on Cortex-M0 where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_q15()</code>.
+ 
+  @par          Initialization of Helium version
+                   For Helium version the array of coefficients must be a multiple of 8 (8a) even if less
+                   then 8a coefficients are defined in the FIR. The additional coefficients 
+                   (8a - numTaps) must be set to 0.
+                   numTaps is still set to its right value in the init function. It means that
+                   the implementation may require to read more coefficients due to the vectorization and
+                   to avoid having to manage too many different cases in the code.
  */
 
 arm_status arm_fir_init_q15(
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_q31.c
index df552ae..de44f74 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_init_q31.c
  * Description:  Q31 FIR filter initialization function.
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -57,10 +57,10 @@
                    <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples (except for Helium - see below), where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_q31()</code>.
 
    @par          Initialization of Helium version
-                   For Helium version the array of coefficients must be a multiple of 16 even if less
-                   then 16 coefficients are used. The additional coefficients must be set to 0.
-                   It does not mean that all the coefficients will be used in the filter (numTaps
-                   is still set to its right value in the init function.) It just means that
+                   For Helium version the array of coefficients must be a multiple of 4 (4a) even if less
+                   then 4a coefficients are defined in the FIR. The additional coefficients 
+                   (4a - numTaps) must be set to 0.
+                   numTaps is still set to its right value in the init function. It means that
                    the implementation may require to read more coefficients due to the vectorization and
                    to avoid having to manage too many different cases in the code.
   
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_q7.c
index 5101d72..db14670 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_init_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_init_q7.c
  * Description:  Q7 FIR filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -56,6 +56,15 @@
   @par
                    <code>pState</code> points to the array of state variables.
                    <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples, where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_q7()</code>.
+  
+  @par          Initialization of Helium version
+                   For Helium version the array of coefficients must be a multiple of 16 (16a) even if less
+                   then 16a coefficients are defined in the FIR. The additional coefficients 
+                   (16a - numTaps) must be set to 0.
+                   numTaps is still set to its right value in the init function. It means that
+                   the implementation may require to read more coefficients due to the vectorization and
+                   to avoid having to manage too many different cases in the code.
+
  */
 
 void arm_fir_init_q7(
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_f32.c
index c5a349b..ddff5c2 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_interpolate_f32.c
  * Description:  Floating-point FIR interpolation sequences
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -149,7 +149,7 @@ static void arm_fir_interpolate2_f32_mve(
     uint32_t  blkCnt;           /* Loop counters */
     uint16_t  phaseLen = S->phaseLength;    /* Length of each polyphase filter component */
     uint32_t  strides[4] = { 0, 1 * 2, 2 * 2, 3 * 2 };
-    uint32x4_t vec_strides0 = *(uint32x4_t *) strides;
+    uint32x4_t vec_strides0 = vld1q_u32(strides);
     uint32x4_t vec_strides1 = vec_strides0 + 1;
     f32x4_t acc0, acc1;
 
@@ -273,8 +273,8 @@ void arm_fir_interpolate_f32(
     uint16_t  phaseLen = S->phaseLength;    /* Length of each polyphase filter component */
     uint32_t  strides[4] = { 0, 1 * S->L, 2 * S->L, 3 * S->L };
     uint32_t  stridesM[4] = { 4, 3, 2, 1 };
-    uint32x4_t vec_stridesM = *(uint32x4_t *) stridesM;
-    uint32x4_t vec_strides = *(uint32x4_t *) strides;
+    uint32x4_t vec_stridesM =  vld1q_u32(stridesM);
+    uint32x4_t vec_strides =  vld1q_u32(strides);
     f32x4_t acc;
 
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_init_f32.c
index b135fa9..cfbf102 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_interpolate_init_f32.c
  * Description:  Floating-point FIR interpolator initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_init_q15.c
index 4cd35cb..f016592 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_interpolate_init_q15.c
  * Description:  Q15 FIR interpolator initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_init_q31.c
index 682ba10..cd40905 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_interpolate_init_q31.c
  * Description:  Q31 FIR interpolator initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_q15.c
index de3d48e..21691ee 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_interpolate_q15.c
  * Description:  Q15 FIR interpolation
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_q31.c
index 4e737da..edd0c70 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_interpolate_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_interpolate_q31.c
  * Description:  Q31 FIR interpolation
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -73,7 +73,7 @@ void arm_fir_interpolate_q31(
     uint32_t  i, blkCnt;        /* Loop counters */
     uint16_t  phaseLen = S->phaseLength;    /* Length of each polyphase filter component */
     uint32_t  strides[4] = { 0, 1 * S->L, 2 * S->L, 3 * S->L };
-    uint32x4_t vec_strides0 = *(uint32x4_t *) strides;
+    uint32x4_t vec_strides0 =  vld1q_u32(strides);
     uint32x4_t vec_strides1 = vec_strides0 + 1;
     uint32x4_t vec_strides2 = vec_strides0 + 2;
     uint32x4_t vec_strides3 = vec_strides0 + 3;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_f32.c
index 0f28abe..9655bb0 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_lattice_f32.c
  * Description:  Processing function for floating-point FIR Lattice filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -37,6 +37,9 @@
 /**
   @defgroup FIR_Lattice Finite Impulse Response (FIR) Lattice Filters
 
+  @deprecated Those functions are no more tested nor maintained and will be removed in
+              a future version.
+              
   This set of functions implements Finite Impulse Response (FIR) lattice filters
   for Q15, Q31 and floating-point data types.  Lattice filters are used in a
   variety of adaptive filter applications. The filter structure is feedforward and
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_init_f32.c
index 720dd17..2e7b6a4 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_lattice_init_f32.c
  * Description:  Floating-point FIR Lattice filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_init_q15.c
index 7743ebd..27fe5ca 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_lattice_init_q15.c
  * Description:  Q15 FIR Lattice filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_init_q31.c
index e85c34a..c2f29d2 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_lattice_init_q31.c
  * Description:  Q31 FIR lattice filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_q15.c
index ec87561..dbb91c2 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_lattice_q15.c
  * Description:  Q15 FIR lattice filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_q31.c
index ecf5880..e5de1f7 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_lattice_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_lattice_q31.c
  * Description:  Q31 FIR lattice filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_q15.c
index e1531b6..f197d15 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_q15.c
  * Description:  Q15 FIR filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -88,14 +88,13 @@
     uint32_t        numTaps = S->numTaps;   /* Number of filter coefficients in the filter */\
     int32_t         blkCnt;                                                                  \
     q15x8_t         vecIn0;                                                                  \
-    const int32_t   nbVecTaps = (NBTAPS / 8);                                                \
                                                                                              \
     /*                                                                                       \
      * load coefs                                                                            \
      */                                                                                      \
-    q15x8_t         vecCoeffs[nbVecTaps];                                                    \
+    q15x8_t         vecCoeffs[NBVECTAPS];                                                    \
                                                                                              \
-    for (int i = 0; i < nbVecTaps; i++)                                                      \
+    for (int i = 0; i < NBVECTAPS; i++)                                                      \
         vecCoeffs[i] = vldrhq_s16(pCoeffs + 8 * i);                                          \
                                                                                              \
     /*                                                                                       \
@@ -116,7 +115,7 @@
         pStateCur += 4;                                                                      \
         pTempSrc += 4;                                                                       \
                                                                                              \
-        FIR_Q15_CORE(pOutput, 4, nbVecTaps, pSamples, vecCoeffs);                            \
+        FIR_Q15_CORE(pOutput, 4, NBVECTAPS, pSamples, vecCoeffs);                            \
         pSamples += 4;                                                                       \
                                                                                              \
         blkCnt--;                                                                            \
@@ -128,7 +127,7 @@
     for (int i = 0; i < residual; i++)                                                       \
         *pStateCur++ = *pTempSrc++;                                                          \
                                                                                              \
-    FIR_Q15_CORE(pOutput, residual, nbVecTaps, pSamples, vecCoeffs);                         \
+    FIR_Q15_CORE(pOutput, residual, NBVECTAPS, pSamples, vecCoeffs);                         \
                                                                                              \
     /*                                                                                       \
      * Copy the samples back into the history buffer start                                   \
@@ -158,7 +157,9 @@ static void arm_fir_q15_25_32_mve(const arm_fir_instance_q15 * S,
   q15_t * __restrict pDst, uint32_t blockSize)
 {
     #define NBTAPS 32
+    #define NBVECTAPS (NBTAPS / 8)
     FIR_Q15_MAIN_CORE();
+    #undef NBVECTAPS
     #undef NBTAPS
 }
 
@@ -167,7 +168,9 @@ static void arm_fir_q15_17_24_mve(const arm_fir_instance_q15 * S,
   q15_t * __restrict pDst, uint32_t blockSize)
 {
     #define NBTAPS 24
+    #define NBVECTAPS (NBTAPS / 8)
     FIR_Q15_MAIN_CORE();
+    #undef NBVECTAPS
     #undef NBTAPS
 }
 
@@ -177,7 +180,9 @@ static void arm_fir_q15_9_16_mve(const arm_fir_instance_q15 * S,
   q15_t * __restrict pDst, uint32_t blockSize)
 {
     #define NBTAPS 16
+    #define NBVECTAPS (NBTAPS / 8)
     FIR_Q15_MAIN_CORE();
+    #undef NBVECTAPS
     #undef NBTAPS
 }
 
@@ -186,7 +191,9 @@ static void arm_fir_q15_1_8_mve(const arm_fir_instance_q15 * S,
   q15_t * __restrict pDst, uint32_t blockSize)
 {
     #define NBTAPS 8
+    #define NBVECTAPS (NBTAPS / 8)
     FIR_Q15_MAIN_CORE();
+    #undef NBVECTAPS
     #undef NBTAPS
 }
 
@@ -520,7 +527,7 @@ void arm_fir_q15(
     while (tapCnt > 0U)
     {
       /* Read the first two coefficients using SIMD:  b[N] and b[N-1] coefficients */
-      c0 = read_q15x2_ia ((q15_t **) &pb);
+      c0 = read_q15x2_ia (&pb);
 
       /* acc0 +=  b[N] * x[n-N] + b[N-1] * x[n-N-1] */
       acc0 = __SMLALD(x0, c0, acc0);
@@ -552,7 +559,7 @@ void arm_fir_q15(
       acc3 = __SMLALDX(x1, c0, acc3);
 
       /* Read coefficients b[N-2], b[N-3] */
-      c0 = read_q15x2_ia ((q15_t **) &pb);
+      c0 = read_q15x2_ia (&pb);
 
       /* acc0 +=  b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */
       acc0 = __SMLALD(x2, c0, acc0);
@@ -585,7 +592,7 @@ void arm_fir_q15(
     if ((numTaps & 0x3U) != 0U)
     {
       /* Read last two coefficients */
-      c0 = read_q15x2_ia ((q15_t **) &pb);
+      c0 = read_q15x2_ia (&pb);
 
       /* Perform the multiply-accumulates */
       acc0 = __SMLALD(x0, c0, acc0);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_q31.c
index 0b02824..16bd7e9 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_q31.c
  * Description:  Q31 FIR filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -119,14 +119,13 @@
     q31_t       *pTempDest;             /* Temporary pointer to the destination buffer */\
     uint32_t     numTaps = S->numTaps;  /* Number of filter coefficients in the filter */\
     int32_t      blkCnt;                                                                 \
-    const int32_t   nbVecTaps = (NBTAPS / 4);                                            \
                                                                                          \
     /*                                                                                   \
      * load coefs                                                                        \
      */                                                                                  \
-    q31x4_t         vecCoeffs[nbVecTaps];                                                \
+    q31x4_t         vecCoeffs[NBVECTAPS];                                                \
                                                                                          \
-    for (int i = 0; i < nbVecTaps; i++)                                                  \
+    for (int i = 0; i < NBVECTAPS; i++)                                                  \
         vecCoeffs[i] = vld1q(pCoeffs + 4 * i);                                           \
                                                                                          \
     /*                                                                                   \
@@ -147,7 +146,7 @@
         pStateCur += 4;                                                                  \
         pTempSrc += 4;                                                                   \
                                                                                          \
-        FIR_Q31_CORE(4, nbVecTaps, pSamples, vecCoeffs);                                 \
+        FIR_Q31_CORE(4, NBVECTAPS, pSamples, vecCoeffs);                                 \
                                                                                          \
         pSamples += 4;                                                                   \
         /*                                                                               \
@@ -164,7 +163,7 @@
               for (int i = 0; i < residual; i++)                                         \
                   *pStateCur++ = *pTempSrc++;                                            \
                                                                                          \
-              FIR_Q31_CORE(3, nbVecTaps, pSamples, vecCoeffs);                           \
+              FIR_Q31_CORE(3, NBVECTAPS, pSamples, vecCoeffs);                           \
           }                                                                              \
           break;                                                                         \
                                                                                          \
@@ -173,7 +172,7 @@
               for (int i = 0; i < residual; i++)                                         \
                   *pStateCur++ = *pTempSrc++;                                            \
                                                                                          \
-               FIR_Q31_CORE(2, nbVecTaps, pSamples, vecCoeffs);                          \
+               FIR_Q31_CORE(2, NBVECTAPS, pSamples, vecCoeffs);                          \
           }                                                                              \
           break;                                                                         \
                                                                                          \
@@ -182,7 +181,7 @@
               for (int i = 0; i < residual; i++)                                         \
                   *pStateCur++ = *pTempSrc++;                                            \
                                                                                          \
-              FIR_Q31_CORE(1, nbVecTaps, pSamples, vecCoeffs);                           \
+              FIR_Q31_CORE(1, NBVECTAPS, pSamples, vecCoeffs);                           \
           }                                                                              \
           break;                                                                         \
     }                                                                                    \
@@ -384,7 +383,9 @@ static void arm_fir_q31_5_8_mve(const arm_fir_instance_q31 * S,
     q31_t * __restrict pDst, uint32_t blockSize)
 {
     #define NBTAPS 8
+    #define NBVECTAPS (NBTAPS / 4)
     FIR_Q31_MAIN_CORE();
+    #undef NBVECTAPS
     #undef NBTAPS
 }
 
@@ -394,7 +395,9 @@ static void arm_fir_q31_9_12_mve(const arm_fir_instance_q31 * S,
     q31_t * __restrict pDst, uint32_t blockSize)
 {
     #define NBTAPS 12
+    #define NBVECTAPS (NBTAPS / 4)
     FIR_Q31_MAIN_CORE();
+    #undef NBVECTAPS
     #undef NBTAPS
 }
 
@@ -404,7 +407,9 @@ static void arm_fir_q31_13_16_mve(const arm_fir_instance_q31 * S,
     q31_t * __restrict pDst, uint32_t blockSize)
 {
     #define NBTAPS 16
+    #define NBVECTAPS (NBTAPS / 4)
     FIR_Q31_MAIN_CORE();
+    #undef NBVECTAPS
     #undef NBTAPS
 }
 
@@ -414,7 +419,9 @@ static void arm_fir_q31_17_20_mve(const arm_fir_instance_q31 * S,
     q31_t * __restrict pDst, uint32_t blockSize)
 {
     #define NBTAPS 20
+    #define NBVECTAPS (NBTAPS / 4)
     FIR_Q31_MAIN_CORE();
+    #undef NBVECTAPS
     #undef NBTAPS
 }
 
@@ -424,7 +431,9 @@ static void arm_fir_q31_21_24_mve(const arm_fir_instance_q31 * S,
     q31_t * __restrict pDst, uint32_t blockSize)
 {
     #define NBTAPS 24
+    #define NBVECTAPS (NBTAPS / 4)
     FIR_Q31_MAIN_CORE();
+    #undef NBVECTAPS
     #undef NBTAPS
 }
 
@@ -434,7 +443,9 @@ static void arm_fir_q31_25_28_mve(const arm_fir_instance_q31 * S,
     q31_t * __restrict pDst, uint32_t blockSize)
 {
     #define NBTAPS 28
+    #define NBVECTAPS (NBTAPS / 4)
     FIR_Q31_MAIN_CORE();
+    #undef NBVECTAPS
     #undef NBTAPS
 }
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_q7.c
index 241f896..5966646 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_q7.c
  * Description:  Q7 FIR filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -83,14 +83,13 @@
     uint32_t       numTaps = S->numTaps;   /* Number of filter coefficients in the filter */\
     int32_t        blkCnt;                                                                  \
     q7x16_t        vecIn0;                                                                  \
-    const int32_t  nbVecTaps = (NBTAPS / 16);                                     \
                                                                                             \
     /*                                                                                      \
      * load coefs                                                                           \
      */                                                                                     \
-    q7x16_t         vecCoeffs[nbVecTaps];                                                   \
+    q7x16_t         vecCoeffs[NBVECTAPS];                                                   \
                                                                                             \
-    for (int i = 0; i < nbVecTaps; i++)                                                     \
+    for (int i = 0; i < NBVECTAPS; i++)                                                     \
         vecCoeffs[i] = vldrbq_s8(pCoeffs + 16 * i);                               \
                                                                                             \
     /*                                                                                      \
@@ -111,7 +110,7 @@
         pStateCur += 4;                                                                     \
         pTempSrc += 4;                                                                      \
                                                                                             \
-        FIR_Q7_CORE(pOutput, 4, nbVecTaps, pSamples, vecCoeffs);                            \
+        FIR_Q7_CORE(pOutput, 4, NBVECTAPS, pSamples, vecCoeffs);                            \
         pSamples += 4;                                                                      \
                                                                                             \
         blkCnt--;                                                                           \
@@ -123,7 +122,7 @@
     for (int i = 0; i < residual; i++)                                                      \
         *pStateCur++ = *pTempSrc++;                                                         \
                                                                                             \
-    FIR_Q7_CORE(pOutput, residual, nbVecTaps, pSamples, vecCoeffs);                         \
+    FIR_Q7_CORE(pOutput, residual, NBVECTAPS, pSamples, vecCoeffs);                         \
                                                                                             \
                                                                                             \
     /*                                                                                      \
@@ -143,22 +142,50 @@
     while (blkCnt > 0);                                                                     \
 }
 
-static void arm_fir_q7_17_32_mve(const arm_fir_instance_q7 * S, 
+
+static void arm_fir_q7_49_64_mve(const arm_fir_instance_q7 * S,
+  const q7_t * __restrict pSrc,
+  q7_t * __restrict pDst, uint32_t blockSize)
+{
+    #define NBTAPS 64
+    #define NBVECTAPS (NBTAPS / 16)
+    FIR_Q7_MAIN_CORE();
+    #undef NBVECTAPS
+    #undef NBTAPS
+}
+
+
+void arm_fir_q7_33_48_mve(const arm_fir_instance_q7 * S,
+  const q7_t * __restrict pSrc,
+  q7_t * __restrict pDst, uint32_t blockSize)
+{
+    #define NBTAPS 48
+    #define NBVECTAPS (NBTAPS / 16)
+    FIR_Q7_MAIN_CORE();
+    #undef NBVECTAPS
+    #undef NBTAPS
+}
+
+static void arm_fir_q7_17_32_mve(const arm_fir_instance_q7 * S,
   const q7_t * __restrict pSrc,
   q7_t * __restrict pDst, uint32_t blockSize)
 {
     #define NBTAPS 32
+    #define NBVECTAPS (NBTAPS / 16)
     FIR_Q7_MAIN_CORE();
+    #undef NBVECTAPS
     #undef NBTAPS
 }
 
 
-void arm_fir_q7_1_16_mve(const arm_fir_instance_q7 * S, 
-  const q7_t * __restrict pSrc, 
+void arm_fir_q7_1_16_mve(const arm_fir_instance_q7 * S,
+  const q7_t * __restrict pSrc,
   q7_t * __restrict pDst, uint32_t blockSize)
 {
     #define NBTAPS 16
+    #define NBVECTAPS (NBTAPS / 16)
     FIR_Q7_MAIN_CORE();
+    #undef NBVECTAPS
     #undef NBTAPS
 }
 
@@ -198,6 +225,22 @@ void arm_fir_q7(
         arm_fir_q7_17_32_mve(S, pSrc, pDst, blockSize);
         return;
     }
+    else if (numTaps <= 48)
+    {
+        /*
+         * [33 to 48 taps] specialized routine
+         */
+        arm_fir_q7_33_48_mve(S, pSrc, pDst, blockSize);
+        return;
+    }
+    else if (numTaps <= 64)
+    {
+        /*
+         * [49 to 64 taps] specialized routine
+         */
+        arm_fir_q7_49_64_mve(S, pSrc, pDst, blockSize);
+        return;
+    }
 
     /*
      * pState points to state array which contains previous frame (numTaps - 1) samples
@@ -609,7 +652,7 @@ void arm_fir_q7(
     {
       acc0 += (q15_t) * (px++) * (*(pb++));
       i--;
-    } 
+    }
 
     /* The result is in 2.14 format. Convert to 1.7
        Then store the output in the destination buffer. */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_f32.c
index ca71b9a..b95ec65 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_sparse_f32.c
  * Description:  Floating-point sparse FIR filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -37,6 +37,9 @@
 /**
   @defgroup FIR_Sparse Finite Impulse Response (FIR) Sparse Filters
 
+  @deprecated Those functions are no more tested nor maintained and will be removed in
+              a future version.
+
   This group of functions implements sparse FIR filters.
   Sparse FIR filters are equivalent to standard FIR filters except that most of the coefficients are equal to zero.
   Sparse filters are used for simulating reflections in communications and audio applications.
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_f32.c
index c3e134b..963c050 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_sparse_init_f32.c
  * Description:  Floating-point sparse FIR filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_q15.c
index 688bb0b..72ec65a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_sparse_init_q15.c
  * Description:  Q15 sparse FIR filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_q31.c
index fcb0153..509c85e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_sparse_init_q31.c
  * Description:  Q31 sparse FIR filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_q7.c
index e2a437c..4f0f793 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_init_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_sparse_init_q7.c
  * Description:  Q7 sparse FIR filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_q15.c
index 5b19f77..8784737 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_sparse_q15.c
  * Description:  Q15 sparse FIR filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_q31.c
index 04cc5ea..6524e26 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_sparse_q31.c
  * Description:  Q31 sparse FIR filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_q7.c
index 193bc2b..85ec295 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_fir_sparse_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_fir_sparse_q7.c
  * Description:  Q7 sparse FIR filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_f32.c
index 5cf0548..4c48c85 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_iir_lattice_f32.c
  * Description:  Floating-point IIR Lattice filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_init_f32.c
index 94ebb5d..d9922ec 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_iir_lattice_init_f32.c
  * Description:  Floating-point IIR lattice filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_init_q15.c
index 5f2b5e6..1dae546 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_iir_lattice_init_q15.c
  * Description:  Q15 IIR lattice filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_init_q31.c
index a14b217..779d09e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_iir_lattice_init_q31.c
  * Description:  Initialization function for the Q31 IIR lattice filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_q15.c
index 25ed237..2768ffa 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_iir_lattice_q15.c
  * Description:  Q15 IIR Lattice filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_q31.c
index e5e6ee0..430c090 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_iir_lattice_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_iir_lattice_q31.c
  * Description:  Q31 IIR Lattice filter processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_f16.c
new file mode 100644
index 0000000..5129666
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_f16.c
@@ -0,0 +1,277 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_levinson_durbin_f16.c
+ * Description:  f16 version of Levinson Durbin algorithm
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/filtering_functions_f16.h"
+
+/**
+  @ingroup groupFilters
+ */
+
+
+
+/**
+  @addtogroup LD
+  @{
+ */
+
+/**
+  @brief         Levinson Durbin
+  @param[in]     phi      autocovariance vector starting with lag 0 (length is nbCoefs + 1)
+  @param[out]    a        autoregressive coefficients
+  @param[out]    err      prediction error (variance)
+  @param[in]     nbCoefs  number of autoregressive coefficients
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(__CMSIS_GCC_H)
+#pragma GCC warning "Scalar version of arm_levinson_durbin_f16 built. Helium version has build issues with gcc."
+#endif 
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) &&  !defined(__CMSIS_GCC_H)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+#define LANE4567_MASK 0xFF00
+
+void arm_levinson_durbin_f16(const float16_t *phi,
+  float16_t *a, 
+  float16_t *err,
+  int nbCoefs)
+{
+   _Float16 e;
+   static const uint16_t revOffsetArray[8] = {7,6,5,4,3,2,1,0};
+
+   a[0] = (_Float16)phi[1] / (_Float16)phi[0];
+
+   e = (_Float16)phi[0] - (_Float16)phi[1] * (_Float16)a[0];
+   for(int p=1; p < nbCoefs; p++)
+   {
+      _Float16 suma = 0.0f16;
+      _Float16 sumb = 0.0f16;
+      f16x8_t vecA,vecRevPhi,vecPhi,vecSumA, vecSumB;
+      _Float16 k;
+      uint32_t blkCnt; 
+      const float16_t *pPhi,*pRevPhi,*pA;
+      uint16x8_t revOffset;
+
+      int nb,j,i;
+
+      revOffset = vld1q(revOffsetArray);
+      vecSumA = vdupq_n_f16(0.0f16);
+      vecSumB = vdupq_n_f16(0.0f16);
+
+      pRevPhi = &phi[p-7];
+      pPhi = &phi[1];
+      pA = a;
+
+      i = 0;
+      blkCnt = p >> 3;
+      while(blkCnt > 0)
+      {
+         vecA = vld1q(pA);
+         pA += 8;
+
+         vecPhi = vld1q(pPhi);
+         pPhi += 8;
+
+         vecRevPhi = vldrhq_gather_shifted_offset_f16(pRevPhi,revOffset);
+         pRevPhi -= 8;
+
+         vecSumA = vfmaq(vecSumA,vecA,vecRevPhi);
+         vecSumB = vfmaq(vecSumB,vecA,vecPhi);
+
+         i += 8;
+         blkCnt--;
+
+      }
+
+      suma = vecAddAcrossF16Mve(vecSumA);
+      sumb = vecAddAcrossF16Mve(vecSumB);
+
+      blkCnt = p & 7;
+      while(blkCnt > 0)
+      {
+         suma += (_Float16)a[i] * (_Float16)phi[p - i];
+         sumb += (_Float16)a[i] * (_Float16)phi[i + 1];
+
+         i++;
+         blkCnt--;
+      }
+
+      k = ((_Float16)phi[p+1] - suma)/((_Float16)phi[0] - sumb);
+
+      f16x8_t vecRevA,tmp;
+      static int16_t orgOffsetArray[8]={0,1,2,3,-1,-2,-3,-4};
+      static const int16_t offsetIncArray[8]={4,4,4,4,-4,-4,-4,-4};
+
+      uint16x8_t offset,offsetInc,vecTmp;
+
+
+      offset = vld1q_u16((uint16_t*)orgOffsetArray);
+      vecTmp = vdupq_n_u16(p);
+
+      offset = vaddq_m_u16(offset,offset,vecTmp,LANE4567_MASK);
+      offsetInc = vld1q_u16((uint16_t*)offsetIncArray);
+
+      nb = p >> 3;
+      j=0;
+      for(int i = 0; i < nb ; i++)
+      {
+          
+          /*
+            x0=a[j] - k * a[p-1-j];
+            x1=a[j+1] - k * a[p-2-j];
+            x3=a[p-1-j] - k * a[j];
+            x4=a[p-2-j] - k * a[j+1];
+
+            a[j] = x0;
+            a[j+1] = x1;
+            a[p-1-j] = x2;
+            a[p-2-j] = x3;
+          */
+
+          uint64_t tmpa,tmpb;
+          vecA = vldrhq_gather_shifted_offset_f16(a,offset);
+
+          
+          tmpa = vgetq_lane_u64((uint64x2_t)vecA,0);
+          tmpb = vgetq_lane_u64((uint64x2_t)vecA,1);
+          vecRevA = (f16x8_t) vsetq_lane_u64(tmpb,(uint64x2_t)vecRevA,0);
+          vecRevA = (f16x8_t) vsetq_lane_u64(tmpa,(uint64x2_t)vecRevA,1);
+          
+
+          tmp = vsubq(vecA,vmulq_n_f16(vecRevA,k));
+          vstrhq_scatter_shifted_offset_f16(a, offset, tmp);
+
+          offset = vaddq(offset,offsetInc);
+ 
+          j+=4;
+
+      }
+
+      blkCnt = p & 7;
+
+      if (blkCnt)
+      {
+         nb = blkCnt >> 1;
+         for(int i =0;i < nb ; i++)
+         {
+             _Float16 x,y;
+   
+             x=(_Float16)a[j] - (_Float16)k * (_Float16)a[p-1-j];
+             y=(_Float16)a[p-1-j] - (_Float16)k * (_Float16)a[j];
+   
+             a[j] = x;
+             a[p-1-j] = y;
+   
+             j++;
+         }
+   
+         nb = blkCnt & 1;
+         if (nb)
+         {
+               a[j]=(_Float16)a[j]- (_Float16)k * (_Float16)a[p-1-j];
+         }
+      }
+
+     
+      a[p] = k;
+      e = e * (1.0f16 - k*k);
+
+
+   }
+   *err = e;
+}
+
+#else
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+void arm_levinson_durbin_f16(const float16_t *phi,
+  float16_t *a, 
+  float16_t *err,
+  int nbCoefs)
+{
+   _Float16 e;
+
+   a[0] = (_Float16)phi[1] / (_Float16)phi[0];
+
+   e = (_Float16)phi[0] - (_Float16)phi[1] * (_Float16)a[0];
+   for(int p=1; p < nbCoefs; p++)
+   {
+      _Float16 suma=0.0f16;
+      _Float16 sumb=0.0f16;
+      _Float16 k;
+      int nb,j;
+
+      for(int i=0; i < p; i++)
+      {
+         suma += (_Float16)a[i] * (_Float16)phi[p - i];
+         sumb += (_Float16)a[i] * (_Float16)phi[i + 1];
+      }
+
+      k = ((_Float16)phi[p+1]-suma)/((_Float16)phi[0] - sumb);
+
+
+      nb = p >> 1;
+      j=0;
+      for(int i =0;i < nb ; i++)
+      {
+          _Float16 x,y;
+
+          x=(_Float16)a[j] - (_Float16)k * (_Float16)a[p-1-j];
+          y=(_Float16)a[p-1-j] - (_Float16)k * (_Float16)a[j];
+
+          a[j] = x;
+          a[p-1-j] = y;
+
+          j++;
+      }
+
+      nb = p & 1;
+      if (nb)
+      {
+            a[j]=(_Float16)a[j]- (_Float16)k * (_Float16)a[p-1-j];
+      }
+
+      a[p] = k;
+      e = e * (1.0f16 - k*k);
+
+
+   }
+   *err = e;
+}
+#endif /* defined(ARM_FLOAT16_SUPPORTED */
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of LD group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_f32.c
new file mode 100644
index 0000000..0c4e650
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_f32.c
@@ -0,0 +1,283 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_levinson_durbin_f32.c
+ * Description:  f32 version of Levinson Durbin algorithm
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/filtering_functions.h"
+
+/**
+  @ingroup groupFilters
+ */
+
+/**
+  @defgroup LD Levinson Durbin Algorithm
+
+ */
+
+/**
+  @addtogroup LD
+  @{
+ */
+
+/**
+  @brief         Levinson Durbin
+  @param[in]     phi      autocovariance vector starting with lag 0 (length is nbCoefs + 1)
+  @param[out]    a        autoregressive coefficients
+  @param[out]    err      prediction error (variance)
+  @param[in]     nbCoefs  number of autoregressive coefficients
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(__CMSIS_GCC_H)
+#pragma GCC warning "Scalar version of arm_levinson_durbin_f32 built. Helium version has build issues with gcc."
+#endif 
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) &&  !defined(__CMSIS_GCC_H)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+#define LANE23_MASK 0xFF00
+
+void arm_levinson_durbin_f32(const float32_t *phi,
+  float32_t *a, 
+  float32_t *err,
+  int nbCoefs)
+{
+   float32_t e;
+   static const uint32_t revOffsetArray[4] = {3,2,1,0};
+
+   a[0] = phi[1] / phi[0];
+
+   e = phi[0] - phi[1] * a[0];
+   for(int p=1; p < nbCoefs; p++)
+   {
+      float32_t suma = 0.0f;
+      float32_t sumb = 0.0f;
+      f32x4_t vecA,vecRevPhi,vecPhi,vecSumA, vecSumB;
+      float32_t k;
+      uint32_t blkCnt; 
+      const float32_t *pPhi,*pRevPhi,*pA;
+      uint32x4_t revOffset;
+
+      int nb,j,i;
+
+      revOffset = vld1q(revOffsetArray);
+      vecSumA = vdupq_n_f32(0.0f);
+      vecSumB = vdupq_n_f32(0.0f);
+
+      pRevPhi = &phi[p-3];
+      pPhi = &phi[1];
+      pA = a;
+
+      i = 0;
+      blkCnt = p >> 2;
+      while(blkCnt > 0)
+      {
+         vecA = vld1q(pA);
+         pA += 4;
+
+         vecPhi = vld1q(pPhi);
+         pPhi += 4;
+
+         vecRevPhi = vldrwq_gather_shifted_offset_f32(pRevPhi,revOffset);
+         pRevPhi -= 4;
+
+         vecSumA = vfmaq(vecSumA,vecA,vecRevPhi);
+         vecSumB = vfmaq(vecSumB,vecA,vecPhi);
+
+         i += 4;
+         blkCnt--;
+
+      }
+
+      suma = vecAddAcrossF32Mve(vecSumA);
+      sumb = vecAddAcrossF32Mve(vecSumB);
+
+      blkCnt = p & 3;
+      while(blkCnt > 0)
+      {
+         suma += a[i] * phi[p - i];
+         sumb += a[i] * phi[i + 1];
+
+         i++;
+         blkCnt--;
+      }
+
+      k = (phi[p+1] - suma)/(phi[0] - sumb);
+
+      f32x4_t vecRevA,tmp;
+      static int32_t orgOffsetArray[4]={0,1,-1,-2};
+      static const int32_t offsetIncArray[4]={2,2,-2,-2};
+
+      uint32x4_t offset,offsetInc,vecTmp;
+
+
+      offset = vld1q_u32((uint32_t*)orgOffsetArray);
+      vecTmp = vdupq_n_u32(p);
+
+      offset = vaddq_m_u32(offset,offset,vecTmp,LANE23_MASK);
+      offsetInc = vld1q_u32((uint32_t*)offsetIncArray);
+
+      nb = p >> 2;
+      j=0;
+      for(int i = 0; i < nb ; i++)
+      {
+          
+          /*
+            x0=a[j] - k * a[p-1-j];
+            x1=a[j+1] - k * a[p-2-j];
+            x3=a[p-1-j] - k * a[j];
+            x4=a[p-2-j] - k * a[j+1];
+
+            a[j] = x0;
+            a[j+1] = x1;
+            a[p-1-j] = x2;
+            a[p-2-j] = x3;
+          */
+
+          uint64_t tmpa,tmpb;
+          vecA = vldrwq_gather_shifted_offset_f32(a,offset);
+
+          
+          tmpa = vgetq_lane_u64((uint64x2_t)vecA,0);
+          tmpb = vgetq_lane_u64((uint64x2_t)vecA,1);
+          vecRevA = (f32x4_t) vsetq_lane_u64(tmpb,(uint64x2_t)vecRevA,0);
+          vecRevA = (f32x4_t) vsetq_lane_u64(tmpa,(uint64x2_t)vecRevA,1);
+          
+
+          tmp = vsubq(vecA,vmulq_n_f32(vecRevA,k));
+          vstrwq_scatter_shifted_offset_f32(a, offset, tmp);
+
+          offset = vaddq(offset,offsetInc);
+ 
+          j+=2;
+
+      }
+
+      switch(p & 3)
+      {
+         case 3:
+         {
+            float32_t x,y;
+            x = a[j] - k * a[p-1-j];
+            y = a[p-1-j] - k * a[j];
+
+            a[j] = x;
+            a[p-1-j] = y;
+
+            a[j+1] = a[j+1] - k * a[p-1-(j+1)];
+         }
+         break;
+
+         case 2:
+         {
+            float32_t x,y;
+            x = a[j] - k * a[p-1-j];
+            y = a[p-1-j] - k * a[j];
+
+            a[j] = x;
+            a[p-1-j] = y;
+         }
+         break;
+
+         case 1:
+            a[j] = a[j]- k * a[p-1-j];
+         break;
+      }
+
+      a[p] = k;
+      e = e * (1.0f - k*k);
+
+
+   }
+   *err = e;
+}
+
+#else
+void arm_levinson_durbin_f32(const float32_t *phi,
+  float32_t *a, 
+  float32_t *err,
+  int nbCoefs)
+{
+   float32_t e;
+   int p;
+
+   a[0] = phi[1] / phi[0];
+
+   e = phi[0] - phi[1] * a[0];
+   for(p=1; p < nbCoefs; p++)
+   {
+      float32_t suma=0.0f;
+      float32_t sumb=0.0f;
+      float32_t k;
+      int nb,j,i;
+
+      for(i=0; i < p; i++)
+      {
+         suma += a[i] * phi[p - i];
+         sumb += a[i] * phi[i + 1];
+      }
+
+      k = (phi[p+1]-suma)/(phi[0] - sumb);
+
+
+      nb = p >> 1;
+      j=0;
+      for(i =0; i < nb ; i++)
+      {
+          float32_t x,y;
+
+          x=a[j] - k * a[p-1-j];
+          y=a[p-1-j] - k * a[j];
+
+          a[j] = x;
+          a[p-1-j] = y;
+
+          j++;
+      }
+
+      nb = p & 1;
+      if (nb)
+      {
+            a[j]=a[j]- k * a[p-1-j];
+      }
+
+      a[p] = k;
+      e = e * (1.0f - k*k);
+
+
+   }
+   *err = e;
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of LD group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_q31.c
new file mode 100644
index 0000000..b38b792
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_levinson_durbin_q31.c
@@ -0,0 +1,380 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_levinson_durbin_q31.c
+ * Description:  q31 version of Levinson Durbin algorithm
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/filtering_functions.h"
+
+#define ONE_Q31 0x7FFFFFFFL
+#define TWO_Q30 0x7FFFFFFFL
+
+#define HALF_Q31 0x00008000L
+#define ONE_Q15 0x7FFF
+#define HALF_Q15 0x3FFF
+#define LOWPART_MASK 0x07FFF
+
+__STATIC_FORCEINLINE q31_t mul32x16(q31_t a, q15_t b)
+{
+  q31_t r = ((q63_t)a * (q63_t)b) >> 15;
+
+  return(r);
+  
+}
+
+__STATIC_FORCEINLINE q31_t mul32x32(q31_t a, q31_t b)
+{
+  //q31_t r = __SSAT(((q63_t)a * b) >> 31,31);
+  q31_t r = ((q63_t)a * b) >> 31;
+
+  return(r);
+  
+}
+
+__STATIC_FORCEINLINE q31_t divide(q31_t n, q31_t d)
+{
+  arm_status status;
+  int16_t shift;
+  q15_t inverse;
+  q31_t r;
+  // We are computing:
+  // n / d = n / (h + l) where h and l are the high end and low end part.
+  // 1 / (h + l) = 1 / h (1 - l / h)
+  // Our division algorithm has a shift. So it is returning a scaled value sh.
+  // So we need a << shift to convert 1/ sh to 1/h.
+  // In below code, we are organizing the computation differently. Instead of computing:
+  // 1 / h (1 - l / h) 
+  // we are computing
+  // 1 / h (2 - (l + h) / h) 
+  // 1 / h (2 - d / h)
+  // Also, we are not computing 1/h in Q15 but in Q14.
+  // 2 is expressed in Q30.
+  // So at the end of all computation we need a << 2
+
+  // Result is in Q14 because of use of HALF_Q15 instead of ONE_Q15.
+  status=arm_divide_q15(HALF_Q15,d>>16,&inverse,&shift);
+  (void)status;
+  
+  // d is used instead of l
+  // So we will need to substract to 2 instead of 1.
+  r = mul32x16(d,inverse);
+  r = TWO_Q30 - (r << shift);
+  r = mul32x16(r, inverse);
+  r = mul32x32(r,n) ;
+  r = r << (shift + 2);
+  
+  return(r);
+  
+}
+
+/**
+  @ingroup groupFilters
+ */
+
+
+
+/**
+  @addtogroup LD
+  @{
+ */
+
+/**
+  @brief         Levinson Durbin
+  @param[in]     phi      autocovariance vector starting with lag 0 (length is nbCoefs + 1)
+  @param[out]    a        autoregressive coefficients
+  @param[out]    err      prediction error (variance)
+  @param[in]     nbCoefs  number of autoregressive coefficients
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(__CMSIS_GCC_H)
+#pragma GCC warning "Scalar version of arm_levinson_durbin_q31 built. Helium version has build issues with gcc."
+#endif 
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) &&  !defined(__CMSIS_GCC_H)
+
+#define LANE23_MASK 0xFF00
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+void arm_levinson_durbin_q31(const q31_t *phi,
+  q31_t *a, 
+  q31_t *err,
+  int nbCoefs)
+{
+    q31_t e;
+
+    static const uint32_t revOffsetArray[4] = {3,2,1,0};
+
+   //a[0] = phi[1] / phi[0];
+   a[0] = divide(phi[1], phi[0]);
+   
+
+   //e = phi[0] - phi[1] * a[0];
+   e = phi[0] - mul32x32(phi[1],a[0]);
+
+   for(int p=1; p < nbCoefs; p++)
+   {
+      q63_t suma=0;
+      q63_t sumb=0;
+      q31x4_t vecA,vecRevPhi,vecPhi;
+      q31_t k;
+      uint32_t blkCnt; 
+      const q31_t *pPhi,*pRevPhi,*pA;
+      uint32x4_t revOffset;
+
+      
+      int nb,j,i;
+
+      revOffset = vld1q(revOffsetArray);
+
+      pRevPhi = &phi[p-3];
+      pPhi = &phi[1];
+      pA = a;
+
+      i = 0;
+      blkCnt = p >> 2;
+      while(blkCnt > 0)
+      {
+         vecA = vld1q(pA);
+         pA += 4;
+
+         vecPhi = vld1q(pPhi);
+         pPhi += 4;
+
+         vecRevPhi = vldrwq_gather_shifted_offset_s32(pRevPhi,revOffset);
+         pRevPhi -= 4;
+
+         suma = vmlaldavaq(suma,vecA,vecRevPhi);
+         sumb = vmlaldavaq(sumb,vecA,vecPhi); 
+
+         i += 4;
+         blkCnt--;
+      }
+
+
+      blkCnt = p & 3;
+      while(blkCnt > 0)
+      {
+         suma += ((q63_t)a[i] * phi[p - i]);
+         sumb += ((q63_t)a[i] * phi[i + 1]);
+
+         i++;
+         blkCnt--;
+      }
+
+      suma = asrl(suma, 31);
+      sumb = asrl(sumb, 31);
+
+
+
+      //k = (phi[p+1]-suma)/(phi[0] - sumb);
+      k = divide(phi[p+1]-(q31_t)suma,phi[0] - (q31_t)sumb);
+
+      q31x4_t vecRevA,tmp;
+      static int32_t orgOffsetArray[4]={0,1,-1,-2};
+      static const int32_t offsetIncArray[4]={2,2,-2,-2};
+
+      uint32x4_t offset,offsetInc,vecTmp;
+
+
+      offset = vld1q_u32((uint32_t*)orgOffsetArray);
+      vecTmp = vdupq_n_u32(p);
+
+      offset = vaddq_m_u32(offset,offset,vecTmp,LANE23_MASK);
+      offsetInc = vld1q_u32((uint32_t*)offsetIncArray);
+
+
+      nb = p >> 2;
+      j=0;
+      for(int i =0;i < nb ; i++)
+      {
+        /*
+          q31_t x0,x1,x2,x3;
+
+          //x = a[j] - k * a[p-1-j];
+          x0 = a[j] - mul32x32(k,a[p-1-j]);
+          x1 = a[j+1] - mul32x32(k,a[p-2-j]);
+
+          //y = a[p-1-j] - k * a[j];
+          x2 = a[p-1-j] - mul32x32(k , a[j]);
+          x3 = a[p-2-j] - mul32x32(k , a[j+1]);
+
+          a[j] = x0;
+          a[j+1] = x1;
+          a[p-1-j] = x2;
+          a[p-2-j] = x3;
+        */
+
+          uint64_t tmpa,tmpb;
+          vecA = vldrwq_gather_shifted_offset_s32(a,offset);
+
+          
+          tmpa = vgetq_lane_u64((uint64x2_t)vecA,0);
+          tmpb = vgetq_lane_u64((uint64x2_t)vecA,1);
+          vecRevA = (q31x4_t) vsetq_lane_u64(tmpb,(uint64x2_t)vecRevA,0);
+          vecRevA = (q31x4_t) vsetq_lane_u64(tmpa,(uint64x2_t)vecRevA,1);
+          
+
+          tmp = vsubq(vecA,vqdmulhq_n_s32(vecRevA,k));
+          vstrwq_scatter_shifted_offset_s32(a, offset, tmp);
+
+          offset = vaddq(offset,offsetInc);
+
+          j+=2;
+      }
+
+      switch(p & 3)
+      {
+         case 3:
+         {
+          q31_t x,y;
+
+          //x = a[j] - k * a[p-1-j];
+          x = a[j] - mul32x32(k,a[p-1-j]);
+
+          //y = a[p-1-j] - k * a[j];
+          y = a[p-1-j] - mul32x32(k , a[j]);
+
+          a[j] = x;
+          a[p-1-j] = y;
+
+          //a[j] = a[j]- k * a[p-1-j];
+          a[j+1] = a[j+1] - mul32x32(k,a[p-2-j]);
+         }
+         break;
+
+         case 2:
+         {
+          q31_t x,y;
+
+          //x = a[j] - k * a[p-1-j];
+          x = a[j] - mul32x32(k,a[p-1-j]);
+
+          //y = a[p-1-j] - k * a[j];
+          y = a[p-1-j] - mul32x32(k , a[j]);
+
+          a[j] = x;
+          a[p-1-j] = y;
+         }
+         break;
+
+         case 1:
+            //a[j] = a[j]- k * a[p-1-j];
+            a[j] = a[j] - mul32x32(k,a[p-1-j]);
+         break;
+      }
+
+      a[p] = k;
+
+      // e = e * (1 - k*k);
+      e = mul32x32(e,ONE_Q31 - mul32x32(k,k));
+
+
+   }
+   *err = e;
+}
+
+#else
+
+void arm_levinson_durbin_q31(const q31_t *phi,
+  q31_t *a, 
+  q31_t *err,
+  int nbCoefs)
+{
+   q31_t e;
+   int p;
+
+   //a[0] = phi[1] / phi[0];
+   a[0] = divide(phi[1], phi[0]);
+   
+
+   //e = phi[0] - phi[1] * a[0];
+   e = phi[0] - mul32x32(phi[1],a[0]);
+
+   for(p=1; p < nbCoefs; p++)
+   {
+      q63_t suma=0;
+      q63_t sumb=0;
+      q31_t k;
+      int nb,j,i;
+
+      for(i=0; i < p; i++)
+      {
+         suma += ((q63_t)a[i] * phi[p - i]);
+         sumb += ((q63_t)a[i] * phi[i + 1]);
+      }
+
+      suma = suma >> 31;
+      sumb = sumb >> 31;
+
+
+
+      //k = (phi[p+1]-suma)/(phi[0] - sumb);
+      k = divide(phi[p+1]-(q31_t)suma,phi[0] - (q31_t)sumb);
+
+
+      nb = p >> 1;
+      j=0;
+      for(i =0;i < nb ; i++)
+      {
+          q31_t x,y;
+
+          //x = a[j] - k * a[p-1-j];
+          x = a[j] - mul32x32(k,a[p-1-j]);
+
+          //y = a[p-1-j] - k * a[j];
+          y = a[p-1-j] - mul32x32(k , a[j]);
+
+          a[j] = x;
+          a[p-1-j] = y;
+
+          j++;
+      }
+
+      nb = p & 1;
+      if (nb)
+      {
+            //a[j] = a[j]- k * a[p-1-j];
+            a[j] = a[j] - mul32x32(k,a[p-1-j]);
+      }
+
+      a[p] = k;
+
+      // e = e * (1 - k*k);
+      e = mul32x32(e,ONE_Q31 - mul32x32(k,k));
+
+
+   }
+   *err = e;
+}
+#endif /* defined(ARM_MATH_MVEI) */
+
+/**
+  @} end of LD group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_f32.c
index ef2e832..865999f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_lms_f32.c
  * Description:  Processing function for the floating-point LMS filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_init_f32.c
index e4e53a0..8d8e144 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_lms_init_f32.c
  * Description:  Floating-point LMS filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_init_q15.c
index 4918436..871caa0 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_lms_init_q15.c
  * Description:  Q15 LMS filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_init_q31.c
index 42d76f5..f4482d3 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_lms_init_q31.c
  * Description:  Q31 LMS filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_f32.c
index e269d7d..0e99319 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_lms_norm_f32.c
  * Description:  Processing function for the floating-point NLMS filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_init_f32.c
index 8fc9597..949f6c3 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_lms_norm_init_f32.c
  * Description:  Floating-point NLMS filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_init_q15.c
index 0c41794..aa05875 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_lms_norm_init_q15.c
  * Description:  Q15 NLMS filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_init_q31.c
index a261a30..28e3c5b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_lms_norm_init_q31.c
  * Description:  Q31 NLMS filter initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_q15.c
index 9785a78..2e4befd 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_lms_norm_q15.c
  * Description:  Processing function for Q15 normalized LMS filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_q31.c
index 37cce57..322219d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_norm_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_lms_norm_q31.c
  * Description:  Processing function for the Q31 NLMS filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_q15.c
index 536409b..b165d7f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_lms_q15.c
  * Description:  Processing function for Q15 LMS filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_q31.c
index cc63338..fedf570 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/FilteringFunctions/arm_lms_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_lms_q31.c
  * Description:  Processing function for the Q31 LMS filter
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_f16.c
index 1e974b6..d9a7d7c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_bilinear_interp_f16.c
  * Description:  Floating-point bilinear interpolation
  *
- * $Date:        22 July 2020
+ * $Date:        23 April 2021
  * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -37,57 +37,6 @@
   @ingroup groupInterpolation
  */
 
-/**
-   * @defgroup BilinearInterpolate Bilinear Interpolation
-   *
-   * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
-   * The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
-   * determines values between the grid points.
-   * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension.
-   * Bilinear interpolation is often used in image processing to rescale images.
-   * The CMSIS DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
-   *
-   * <b>Algorithm</b>
-   * \par
-   * The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
-   * For floating-point, the instance structure is defined as:
-   * <pre>
-   *   typedef struct
-   *   {
-   *     uint16_t numRows;
-   *     uint16_t numCols;
-   *     float16_t *pData;
-   * } arm_bilinear_interp_instance_f16;
-   * </pre>
-   *
-   * \par
-   * where <code>numRows</code> specifies the number of rows in the table;
-   * <code>numCols</code> specifies the number of columns in the table;
-   * and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
-   * The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
-   * That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
-   *
-   * \par
-   * Let <code>(x, y)</code> specify the desired interpolation point.  Then define:
-   * <pre>
-   *     XF = floor(x)
-   *     YF = floor(y)
-   * </pre>
-   * \par
-   * The interpolated output point is computed as:
-   * <pre>
-   *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
-   *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
-   *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
-   *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
-   * </pre>
-   * Note that the coordinates (x, y) contain integer and fractional components.
-   * The integer components specify which portion of the table to use while the
-   * fractional components control the interpolation processor.
-   *
-   * \par
-   * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
-   */
 
 
   /**
@@ -143,18 +92,19 @@
 
     /* Calculation of intermediate values */
     b1 = f00;
-    b2 = f01 - f00;
-    b3 = f10 - f00;
-    b4 = f00 - f01 - f10 + f11;
+    b2 = (_Float16)f01 - (_Float16)f00;
+    b3 = (_Float16)f10 - (_Float16)f00;
+    b4 = (_Float16)f00 - (_Float16)f01 - (_Float16)f10 + (_Float16)f11;
 
     /* Calculation of fractional part in X */
-    xdiff = X - xIndex;
+    xdiff = (_Float16)X - (_Float16)xIndex;
 
     /* Calculation of fractional part in Y */
-    ydiff = Y - yIndex;
+    ydiff = (_Float16)Y - (_Float16)yIndex;
 
     /* Calculation of bi-linear interpolated output */
-    out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
+    out = (_Float16)b1 + (_Float16)b2 * (_Float16)xdiff + 
+    (_Float16)b3 * (_Float16)ydiff + (_Float16)b4 * (_Float16)xdiff * (_Float16)ydiff;
 
     /* return to application */
     return (out);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_f32.c
index 41e99a4..3008a7a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_bilinear_interp_f32.c
  * Description:  Floating-point bilinear interpolation
  *
- * $Date:        22 July 2020
+ * $Date:        23 April 2021
  * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_q15.c
index 484b404..bc92417 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_linear_interp_q15.c
  * Description:  Q15 linear interpolation
  *
- * $Date:        22 July 2020
+ * $Date:        23 April 2021
  * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_q31.c
index 4a5f654..2375763 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_linear_interp_q31.c
  * Description:  Q31 linear interpolation
  *
- * $Date:        22 July 2020
+ * $Date:        23 April 2021
  * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_q7.c
index 31b3a68..0a78876 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_bilinear_interp_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_linear_interp_q7.c
  * Description:  Q7 linear interpolation
  *
- * $Date:        22 July 2020
+ * $Date:        23 April 2021
  * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_f16.c
index f2b0b36..c25a217 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_linear_interp_f16.c
  * Description:  Floating-point linear interpolation
  *
- * $Date:        22 July 2020
+ * $Date:        23 April 2021
  * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -37,37 +37,6 @@
   @ingroup groupInterpolation
  */
 
-/**
-   * @defgroup LinearInterpolate Linear Interpolation
-   *
-   * Linear interpolation is a method of curve fitting using linear polynomials.
-   * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line
-   *
-   * \par
-   * \image html LinearInterp.gif "Linear interpolation"
-   *
-   * \par
-   * A  Linear Interpolate function calculates an output value(y), for the input(x)
-   * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values)
-   *
-   * \par Algorithm:
-   * <pre>
-   *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
-   *       where x0, x1 are nearest values of input x
-   *             y0, y1 are nearest values to output y
-   * </pre>
-   *
-   * \par
-   * This set of functions implements Linear interpolation process
-   * for Q7, Q15, Q31, and floating-point data types.  The functions operate on a single
-   * sample of data and each call to the function returns a single processed value.
-   * <code>S</code> points to an instance of the Linear Interpolate function data structure.
-   * <code>x</code> is the input sample value. The functions returns the output value.
-   *
-   * \par
-   * if x is outside of the table boundary, Linear interpolation returns first value of the table
-   * if x is below input range and returns last value of table if x is above range.
-   */
 
 /**
    * @addtogroup LinearInterpolate
@@ -93,7 +62,7 @@
     float16_t *pYData = S->pYData;               /* pointer to output table */
 
     /* Calculation of index */
-    i = (int32_t) ((x - S->x1) / xSpacing);
+    i = (int32_t) (((_Float16)x - (_Float16)S->x1) / (_Float16)xSpacing);
 
     if (i < 0)
     {
@@ -108,15 +77,16 @@
     else
     {
       /* Calculation of nearest input values */
-      x0 = S->x1 +  i      * xSpacing;
-      x1 = S->x1 + (i + 1) * xSpacing;
+      x0 = (_Float16)S->x1 +  (_Float16)i      * (_Float16)xSpacing;
+      x1 = (_Float16)S->x1 + (_Float16)(i + 1) * (_Float16)xSpacing;
 
       /* Read of nearest output values */
       y0 = pYData[i];
       y1 = pYData[i + 1];
 
       /* Calculation of output */
-      y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0));
+      y = (_Float16)y0 + ((_Float16)x - (_Float16)x0) * 
+      (((_Float16)y1 - (_Float16)y0) / ((_Float16)x1 - (_Float16)x0));
 
     }
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_f32.c
index f8caa74..834d54e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_linear_interp_f32.c
  * Description:  Floating-point linear interpolation
  *
- * $Date:        22 July 2020
+ * $Date:        23 April 2021
  * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_q15.c
index 690c44a..f2cfc80 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_linear_interp_q15.c
  * Description:  Q15 linear interpolation
  *
- * $Date:        22 July 2020
+ * $Date:        23 April 2021
  * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -53,7 +53,7 @@
    *
    */
   q15_t arm_linear_interp_q15(
-  q15_t * pYData,
+  const q15_t * pYData,
   q31_t x,
   uint32_t nValues)
   {
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_q31.c
index 783e125..bdeefb8 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_linear_interp_q31.c
  * Description:  Q31 linear interpolation
  *
- * $Date:        22 July 2020
+ * $Date:        23 April 2021
  * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -54,7 +54,7 @@
    *
    */
   q31_t arm_linear_interp_q31(
-  q31_t * pYData,
+  const q31_t * pYData,
   q31_t x,
   uint32_t nValues)
   {
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_q7.c
index 0f32e3d..bde5678 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_linear_interp_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_linear_interp_q7.c
  * Description:  Q7 linear interpolation
  *
- * $Date:        22 July 2020
+ * $Date:        23 April 2021
  * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -53,7 +53,7 @@
    * This function can support maximum of table size 2^12.
    */
   q7_t arm_linear_interp_q7(
-  q7_t * pYData,
+  const q7_t * pYData,
   q31_t x,
   uint32_t nValues)
   {
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_spline_interp_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_spline_interp_f32.c
index 822986c..21a5edb 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_spline_interp_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_spline_interp_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_spline_interp_f32.c
  * Description:  Floating-point cubic spline interpolation
  *
- * $Date:        13 November 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -140,7 +140,7 @@
 /**
  * @brief Processing function for the floating-point cubic spline interpolation.
  * @param[in]  S          points to an instance of the floating-point spline structure.
- * @param[in]  xq         points to the x values ot the interpolated data points.
+ * @param[in]  xq         points to the x values of the interpolated data points.
  * @param[out] pDst       points to the block of output data.
  * @param[in]  blockSize  number of samples of output data.
  */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_spline_interp_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_spline_interp_init_f32.c
index 5e5c442..80e4d76 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_spline_interp_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/InterpolationFunctions/arm_spline_interp_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_spline_interp_init_f32.c
  * Description:  Floating-point cubic spline initialization function
  *
- * $Date:        13 November 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_householder_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_householder_f16.c
new file mode 100644
index 0000000..e9f15bc
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_householder_f16.c
@@ -0,0 +1,125 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_householder_f16.c
+ * Description:  Half floating-point Householder transform
+ *
+ * $Date:        15 June 2022
+ * $Revision:    V1.11.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions_f16.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions_f16.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions_f16.h"
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h"
+
+
+#include <math.h>
+
+
+
+/**
+  @ingroup groupMatrix
+ */
+
+
+/**
+  @addtogroup MatrixHouseholder
+  @{
+ */
+
+/**
+  @brief         Householder transform of a half floating point vector.
+  @param[in]     pSrc        points to the input vector.
+  @param[in]     threshold   norm2 threshold.  
+  @param[in]     blockSize   dimension of the vector space.
+  @param[out]    pOut        points to the output vector.
+  @return        beta        return the scaling factor beta
+ */
+
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+
+float16_t arm_householder_f16(
+    const float16_t * pSrc,
+    const float16_t threshold,
+    uint32_t    blockSize,
+    float16_t * pOut
+    )
+
+{
+  uint32_t i;
+  float16_t epsilon;
+  float16_t x1norm2,alpha;
+  float16_t beta,tau,r;
+
+  epsilon = threshold;
+
+  alpha = pSrc[0];
+
+  for(i=1; i < blockSize; i++)
+  {
+    pOut[i] = pSrc[i];
+  }
+  pOut[0] = 1.0f16;
+
+  arm_dot_prod_f16(pSrc+1,pSrc+1,blockSize-1,&x1norm2);
+
+  if ((_Float16)x1norm2<=(_Float16)epsilon)
+  {
+     tau = 0.0f16;
+     memset(pOut,0,blockSize * sizeof(float16_t));
+  }
+  else
+  {
+    beta =  (_Float16)alpha * (_Float16)alpha + (_Float16)x1norm2;
+    (void)arm_sqrt_f16(beta,&beta);
+
+    if ((_Float16)alpha > 0.0f16)
+    {
+      beta = -(_Float16)beta;
+    }
+
+    r = 1.0f16 / ((_Float16)alpha -(_Float16)beta);
+    arm_scale_f16(pOut,r,pOut,blockSize);
+    pOut[0] = 1.0f16;
+
+    
+    tau = ((_Float16)beta - (_Float16)alpha) / (_Float16)beta;
+
+  }
+
+  return(tau);
+
+}
+
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+/**
+  @} end of MatrixHouseholder group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_householder_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_householder_f32.c
new file mode 100644
index 0000000..c1510bc
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_householder_f32.c
@@ -0,0 +1,196 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_householder_f32.c
+ * Description:  Floating-point Householder transform
+ *
+ * $Date:        15 June 2022
+ * $Revision:    V1.11.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h"
+
+#include <math.h>
+
+
+
+/**
+  @ingroup groupMatrix
+ */
+
+/**
+  @defgroup MatrixHouseholder Householder transform of a vector
+
+    Computes the Householder transform of a vector x.
+
+    The Householder transform of x is a vector v with 
+
+    \f[
+        v_0 = 1
+    \f]
+
+    and a scalar \f$\beta\f$ such that:
+
+    \f[
+    P = I - \beta v v^T 
+    \f]
+
+    is an orthogonal matrix and 
+
+    \f[
+    P x = ||x||_2 e_1
+    \f]
+
+    So P is an hyperplane reflection such that the image of x 
+    is proportional to \f$e_1\f$. 
+
+    \f$e_1\f$ is the vector of coordinates:
+
+    \f[
+    \begin{pmatrix}
+        1       \\
+        0       \\
+        \vdots  \\
+    \end{pmatrix}
+    \f]
+
+    If x is already proportional to \f$e_1\f$ then 
+    the matrix P should be the identity. 
+
+    Thus, \f$\beta\f$ should be 0 and in this case the vector v
+    can also be null.
+
+    But how do we detect that x is already proportional to 
+    \f$e_1\f$.
+
+    If x
+    \f[
+    x =
+    \begin{pmatrix}
+        x_0     \\
+        xr      \\
+    \end{pmatrix}
+    \f]
+
+    where \f$xr\f$ is a vector.
+
+    The algorithm is computing the norm squared of this vector:
+
+    \f[
+        ||xr||^2
+    \f]
+
+    and this value is compared to a `threshold`. If the value
+    is smaller than the `threshold`, the algorithm is
+    returning 0 for \f$\beta\f$ and the householder vector. 
+
+    This `threshold` is an argument of the function.
+
+    Default values are provided in the header 
+    `dsp/matrix_functions.h` like for instance
+    `DEFAULT_HOUSEHOLDER_THRESHOLD_F32`
+
+
+
+ */
+
+/**
+  @addtogroup MatrixHouseholder
+  @{
+ */
+
+/**
+  @brief         Householder transform of a floating point vector.
+  @param[in]     pSrc        points to the input vector.
+  @param[in]     threshold   norm2 threshold.  
+  @param[in]     blockSize   dimension of the vector space.
+  @param[out]    pOut        points to the output vector.
+  @return        beta        return the scaling factor beta
+ */
+
+
+
+
+float32_t arm_householder_f32(
+    const float32_t * pSrc,
+    const float32_t threshold,
+    uint32_t    blockSize,
+    float32_t * pOut
+    )
+
+{
+  uint32_t i;
+  float32_t epsilon;
+  float32_t x1norm2,alpha;
+  float32_t beta,tau,r;
+
+  epsilon = threshold;
+
+  alpha = pSrc[0];
+
+  for(i=1; i < blockSize; i++)
+  {
+    pOut[i] = pSrc[i];
+  }
+  pOut[0] = 1.0f;
+
+  arm_dot_prod_f32(pSrc+1,pSrc+1,blockSize-1,&x1norm2);
+
+  if (x1norm2<=epsilon)
+  {
+     tau = 0.0f;
+     memset(pOut,0,blockSize * sizeof(float32_t));
+  }
+  else
+  {
+    beta =  alpha * alpha + x1norm2;
+    (void)arm_sqrt_f32(beta,&beta);
+
+    if (alpha > 0.0f)
+    {
+      beta = -beta;
+    }
+
+    r = 1.0f / (alpha -beta);
+    arm_scale_f32(pOut,r,pOut,blockSize);
+    pOut[0] = 1.0f;
+
+    
+    tau = (beta - alpha) / beta;
+
+  }
+
+  return(tau);
+
+}
+
+
+/**
+  @} end of MatrixHouseholder group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_householder_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_householder_f64.c
new file mode 100644
index 0000000..3f41011
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_householder_f64.c
@@ -0,0 +1,121 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_householder_f64.c
+ * Description:  Double floating-point Householder transform
+ *
+ * $Date:        15 June 2022
+ * $Revision:    V1.11.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h"
+
+#include <math.h>
+
+
+
+/**
+  @ingroup groupMatrix
+ */
+
+
+/**
+  @addtogroup MatrixHouseholder
+  @{
+ */
+
+/**
+  @brief         Householder transform of a double floating point vector.
+  @param[in]     pSrc        points to the input vector.
+  @param[in]     threshold   norm2 threshold.  
+  @param[in]     blockSize   dimension of the vector space.
+  @param[out]    pOut        points to the output vector.
+  @return        beta        return the scaling factor beta
+ */
+
+
+
+
+float64_t arm_householder_f64(
+    const float64_t * pSrc,
+    const float64_t threshold,
+    uint32_t    blockSize,
+    float64_t * pOut
+    )
+
+{
+  uint32_t i;
+  float64_t epsilon;
+  float64_t x1norm2,alpha;
+  float64_t beta,tau,r;
+
+  epsilon = threshold;
+
+  alpha = pSrc[0];
+
+  for(i=1; i < blockSize; i++)
+  {
+    pOut[i] = pSrc[i];
+  }
+  pOut[0] = 1.0;
+
+  arm_dot_prod_f64(pSrc+1,pSrc+1,blockSize-1,&x1norm2);
+
+  if (x1norm2<=epsilon)
+  {
+     tau = 0.0;
+     memset(pOut,0,blockSize * sizeof(float64_t));
+  }
+  else
+  {
+    beta =  alpha * alpha + x1norm2;
+    beta=sqrt(beta);
+
+    if (alpha > 0.0)
+    {
+      beta = -beta;
+    }
+
+    r = 1.0 / (alpha -beta);
+    arm_scale_f64(pOut,r,pOut,blockSize);
+    pOut[0] = 1.0;
+
+    
+    tau = (beta - alpha) / beta;
+
+  }
+
+  return(tau);
+
+}
+
+
+/**
+  @} end of MatrixHouseholder group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_f16.c
index b598b4a..77b4fab 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_add_f16.c
  * Description:  Floating-point matrix addition
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -64,7 +64,7 @@ arm_status arm_mat_add_f16(
     arm_status status;  
     uint32_t  numSamples;       /* total number of elements in the matrix  */
     float16_t *pDataA, *pDataB, *pDataDst;
-    f16x8_t vecA, vecB, vecDst;
+    f16x8_t vecA, vecB, vecDst = { 0 };
     float16_t const *pSrcAVec;
     float16_t const *pSrcBVec;
     uint32_t  blkCnt;           /* loop counters */
@@ -169,13 +169,13 @@ arm_status arm_mat_add_f16(
       /* C(m,n) = A(m,n) + B(m,n) */
 
       /* Add and store result in destination buffer. */
-      *pOut++ = *pInA++ + *pInB++;
+      *pOut++ = (_Float16)*pInA++ + (_Float16)*pInB++;
 
-      *pOut++ = *pInA++ + *pInB++;
+      *pOut++ = (_Float16)*pInA++ + (_Float16)*pInB++;
 
-      *pOut++ = *pInA++ + *pInB++;
+      *pOut++ = (_Float16)*pInA++ + (_Float16)*pInB++;
 
-      *pOut++ = *pInA++ + *pInB++;
+      *pOut++ = (_Float16)*pInA++ + (_Float16)*pInB++;
 
       /* Decrement loop counter */
       blkCnt--;
@@ -196,7 +196,7 @@ arm_status arm_mat_add_f16(
       /* C(m,n) = A(m,n) + B(m,n) */
 
       /* Add and store result in destination buffer. */
-      *pOut++ = *pInA++ + *pInB++;
+      *pOut++ = (_Float16)*pInA++ + (_Float16)*pInB++;
 
       /* Decrement loop counter */
       blkCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_f32.c
index 705d680..b777249 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_add_f32.c
  * Description:  Floating-point matrix addition
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -38,7 +38,27 @@
   @defgroup MatrixAdd Matrix Addition
 
   Adds two matrices.
-  \image html MatrixAddition.gif "Addition of two 3 x 3 matrices"
+  @par Addition of two 3 x 3 matrices
+
+  \f[
+  \begin{pmatrix}
+   a_{1,1} & a_{1,2} & a_{1,3} \\
+   a_{2,1} & a_{2,2} & a_{2,3} \\
+   a_{3,1} & a_{3,2} & a_{3,3} \\
+  \end{pmatrix}
+  +
+  \begin{pmatrix}
+   b_{1,1} & b_{1,2} & b_{1,3} \\
+   b_{2,1} & b_{2,2} & b_{2,3} \\
+   b_{3,1} & b_{3,2} & b_{3,3} \\
+  \end{pmatrix}
+  =
+  \begin{pmatrix}
+   a_{1,1}+b_{1,1} & a_{1,2}+b_{1,2} & a_{1,3}+b_{1,3} \\
+   a_{2,1}+b_{2,1} & a_{2,2}+b_{2,2} & a_{2,3}+b_{2,3} \\
+   a_{3,1}+b_{3,1} & a_{3,2}+b_{3,2} & a_{3,3}+b_{3,3} \\
+  \end{pmatrix}
+  \f]
 
   The functions check to make sure that
   <code>pSrcA</code>, <code>pSrcB</code>, and <code>pDst</code> have the same
@@ -70,7 +90,7 @@ arm_status arm_mat_add_f32(
     arm_status status;  
     uint32_t  numSamples;       /* total number of elements in the matrix  */
     float32_t *pDataA, *pDataB, *pDataDst;
-    f32x4_t vecA, vecB, vecDst;
+    f32x4_t vecA, vecB, vecDst = { 0 };
     float32_t const *pSrcAVec;
     float32_t const *pSrcBVec;
     uint32_t  blkCnt;           /* loop counters */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_q15.c
index 0e7d5fa..9a1dfa0 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_add_q15.c
  * Description:  Q15 matrix addition
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -61,7 +61,7 @@ arm_status arm_mat_add_q15(
 {
     uint32_t        numSamples;       /* total number of elements in the matrix  */
     q15_t          *pDataA, *pDataB, *pDataDst;
-    q15x8_t       vecA, vecB, vecDst;
+    q15x8_t       vecA, vecB, vecDst = { 0 };
     q15_t const   *pSrcAVec;
     q15_t const   *pSrcBVec;
     uint32_t        blkCnt;           /* loop counters */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_q31.c
index b5c98e4..7b08dbf 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_add_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_add_q31.c
  * Description:  Q31 matrix addition
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -61,7 +61,7 @@ arm_status arm_mat_add_q31(
     arm_status status;                             /* status of matrix addition */
     uint32_t        numSamples;       /* total number of elements in the matrix  */
     q31_t          *pDataA, *pDataB, *pDataDst;
-    q31x4_t       vecA, vecB, vecDst;
+    q31x4_t       vecA, vecB, vecDst = { 0 };
     q31_t const   *pSrcAVec;
     q31_t const   *pSrcBVec;
     uint32_t        blkCnt;           /* loop counters */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f16.c
index f6429f6..31b245e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_mat_cholesky_f16.c
  * Description:  Floating-point Cholesky decomposition
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -27,6 +29,7 @@
  */
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions_f16.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h"
 
 #if defined(ARM_FLOAT16_SUPPORTED)
 
@@ -50,7 +53,7 @@
                    - \ref ARM_MATH_DECOMPOSITION_FAILURE      : Input matrix cannot be decomposed
    * @par
    * If the matrix is ill conditioned or only semi-definite, then it is better using the LDL^t decomposition.
-   * The decomposition of A is returning a lower triangular matrix U such that A = U U^t
+   * The decomposition of A is returning a lower triangular matrix U such that A = L L^t
    */
 
 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
@@ -158,16 +161,13 @@ arm_status arm_mat_cholesky_f16(
           pG[j * n + i] = vecAddAcrossF16Mve(acc);
        }
 
-       if (pG[i * n + i] <= 0.0f16)
+       if ((_Float16)pG[i * n + i] <= 0.0f16)
        {
          return(ARM_MATH_DECOMPOSITION_FAILURE);
        }
 
-       invSqrtVj = (_Float16)1.0f/sqrtf(pG[i * n + i]);
-       for(j=i; j < n ; j++)
-       {
-         pG[j * n + i] = (_Float16)pG[j * n + i] * invSqrtVj ;
-       }
+       invSqrtVj = 1.0f16/(_Float16)sqrtf((float32_t)pG[i * n + i]);
+       SCALE_COL_F16(pDst,i,invSqrtVj,i);
     }
 
     status = ARM_MATH_SUCCESS;
@@ -220,20 +220,21 @@ arm_status arm_mat_cholesky_f16(
 
           for(k=0; k < i ; k++)
           {
-             pG[j * n + i] = pG[j * n + i] - pG[i * n + k] * pG[j * n + k];
+             pG[j * n + i] = (_Float16)pG[j * n + i] - (_Float16)pG[i * n + k] * (_Float16)pG[j * n + k];
           }
        }
 
-       if (pG[i * n + i] <= 0.0f)
+       if ((_Float16)pG[i * n + i] <= 0.0f16)
        {
          return(ARM_MATH_DECOMPOSITION_FAILURE);
        }
 
-       invSqrtVj = 1.0f/sqrtf(pG[i * n + i]);
-       for(j=i ; j < n ; j++)
-       {
-         pG[j * n + i] = pG[j * n + i] * invSqrtVj ;
-       }
+       /* The division is done in float32 for accuracy reason and
+       because doing it in f16 would not have any impact on the performances.
+       */
+       invSqrtVj = 1.0f/sqrtf((float32_t)pG[i * n + i]);
+       SCALE_COL_F16(pDst,i,invSqrtVj,i);
+
     }
 
     status = ARM_MATH_SUCCESS;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f32.c
index 89f5ae9..77890a4 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_mat_cholesky_f32.c
  * Description:  Floating-point Cholesky decomposition
  *
+ * $Date:        05 October 2021
+ * $Revision:    V1.9.1
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -27,6 +29,7 @@
  */
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h"
 
 /**
   @ingroup groupMatrix
@@ -35,7 +38,7 @@
 /**
   @defgroup MatrixChol Cholesky and LDLT decompositions
 
-  Computes the Cholesky or LDL^t decomposition of a matrix.
+  Computes the Cholesky or LL^t decomposition of a matrix.
 
 
   If the input matrix does not have a decomposition, then the 
@@ -58,7 +61,7 @@
                    - \ref ARM_MATH_DECOMPOSITION_FAILURE      : Input matrix cannot be decomposed
    * @par
    * If the matrix is ill conditioned or only semi-definite, then it is better using the LDL^t decomposition.
-   * The decomposition of A is returning a lower triangular matrix U such that A = U U^t
+   * The decomposition of A is returning a lower triangular matrix L such that A = L L^t
    */
 
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
@@ -170,10 +173,7 @@ arm_status arm_mat_cholesky_f32(
        }
 
        invSqrtVj = 1.0f/sqrtf(pG[i * n + i]);
-       for(j=i; j < n ; j++)
-       {
-         pG[j * n + i] = pG[j * n + i] * invSqrtVj ;
-       }
+       SCALE_COL_F32(pDst,i,invSqrtVj,i);
     }
 
     status = ARM_MATH_SUCCESS;
@@ -221,7 +221,9 @@ arm_status arm_mat_cholesky_f32(
     f32x4_t acc, acc0, acc1, acc2, acc3;
     f32x4_t vecGi;
     f32x4_t vecGj,vecGj0,vecGj1,vecGj2,vecGj3;
-    f32x2_t tmp = vdup_n_f32(0);    
+#if !defined(__aarch64__)
+    f32x2_t tmp = vdup_n_f32(0);   
+#endif    
     float32_t sum=0.0f;
     float32_t sum0=0.0f,sum1=0.0f,sum2=0.0f,sum3=0.0f;
 
@@ -264,7 +266,7 @@ arm_status arm_mat_cholesky_f32(
              k+=4;
           }
 
-#if __aarch64__
+#if defined(__aarch64__)
           sum0 = vpadds_f32(vpadd_f32(vget_low_f32(acc0), vget_high_f32(acc0)));
           sum1 = vpadds_f32(vpadd_f32(vget_low_f32(acc1), vget_high_f32(acc1)));
           sum2 = vpadds_f32(vpadd_f32(vget_low_f32(acc2), vget_high_f32(acc2)));
@@ -322,7 +324,7 @@ arm_status arm_mat_cholesky_f32(
              k+=4;
           }
 
-#if __aarch64__
+#if defined(__aarch64__)
           sum = vpadds_f32(vpadd_f32(vget_low_f32(acc), vget_high_f32(acc)));
 #else
           tmp = vpadd_f32(vget_low_f32(acc), vget_high_f32(acc));
@@ -348,10 +350,7 @@ arm_status arm_mat_cholesky_f32(
        }
 
        invSqrtVj = 1.0f/sqrtf(pG[i * n + i]);
-       for(j=i; j < n ; j++)
-       {
-         pG[j * n + i] = pG[j * n + i] * invSqrtVj ;
-       }
+       SCALE_COL_F32(pDst,i,invSqrtVj,i);
     }
 
     status = ARM_MATH_SUCCESS;
@@ -414,10 +413,8 @@ arm_status arm_mat_cholesky_f32(
        }
 
        invSqrtVj = 1.0f/sqrtf(pG[i * n + i]);
-       for(j=i ; j < n ; j++)
-       {
-         pG[j * n + i] = pG[j * n + i] * invSqrtVj ;
-       }
+       SCALE_COL_F32(pDst,i,invSqrtVj,i);
+      
     }
 
     status = ARM_MATH_SUCCESS;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f64.c
index d73edfe..b42f296 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cholesky_f64.c
@@ -5,11 +5,13 @@
  * Title:        arm_mat_cholesky_f64.c
  * Description:  Floating-point Cholesky decomposition
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -27,6 +29,7 @@
  */
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h"
 
 /**
   @ingroup groupMatrix
@@ -48,7 +51,7 @@
                    - \ref ARM_MATH_DECOMPOSITION_FAILURE      : Input matrix cannot be decomposed
    * @par
    * If the matrix is ill conditioned or only semi-definite, then it is better using the LDL^t decomposition.
-   * The decomposition of A is returning a lower triangular matrix U such that A = U U^t
+   * The decomposition of A is returning a lower triangular matrix L such that A = L L^t
    */
 
 
@@ -96,16 +99,14 @@ arm_status arm_mat_cholesky_f64(
           }
        }
 
-       if (pG[i * n + i] <= 0.0f)
+       if (pG[i * n + i] <= 0.0)
        {
          return(ARM_MATH_DECOMPOSITION_FAILURE);
        }
 
        invSqrtVj = 1.0/sqrt(pG[i * n + i]);
-       for(j=i ; j < n ; j++)
-       {
-         pG[j * n + i] = pG[j * n + i] * invSqrtVj ;
-       }
+       SCALE_COL_F64(pDst,i,invSqrtVj,i);
+
     }
 
     status = ARM_MATH_SUCCESS;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c
index 398e5eb..3822659 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_cmplx_mult_f16.c
  * Description:  Floating-point matrix multiplication
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -69,7 +69,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_2x2_mve(
     const arm_matrix_instance_f16 * pSrcB,
     arm_matrix_instance_f16 * pDst)
 {
-    const uint16_t   MATRIX_DIM = 2;
+#define MATRIX_DIM 2
     float16_t const *pInB = pSrcB->pData;  /* input data matrix pointer B */
     float16_t       *pInA = pSrcA->pData;  /* input data matrix pointer A */
     float16_t       *pOut = pDst->pData;   /* output data matrix pointer */
@@ -133,6 +133,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_2x2_mve(
      * Return to application
      */
     return (ARM_MATH_SUCCESS);
+#undef MATRIX_DIM
 }
 
 
@@ -142,7 +143,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_3x3_mve(
     const arm_matrix_instance_f16 * pSrcB,
     arm_matrix_instance_f16 * pDst)
 {
-    const uint16_t   MATRIX_DIM = 3;
+#define MATRIX_DIM 3
     float16_t const *pInB = pSrcB->pData;  /* input data matrix pointer B */
     float16_t       *pInA = pSrcA->pData;  /* input data matrix pointer A */
     float16_t       *pOut = pDst->pData;   /* output data matrix pointer */
@@ -228,6 +229,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_3x3_mve(
      * Return to application
      */
     return (ARM_MATH_SUCCESS);
+#undef MATRIX_DIM
 }
 
 
@@ -238,7 +240,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_4x4_mve(
     const arm_matrix_instance_f16 * pSrcB,
     arm_matrix_instance_f16 * pDst)
 {
-    const uint16_t   MATRIX_DIM = 4;
+#define MATRIX_DIM 4
     float16_t const *pInB = pSrcB->pData;  /* input data matrix pointer B */
     float16_t       *pInA = pSrcA->pData;  /* input data matrix pointer A */
     float16_t       *pOut = pDst->pData;   /* output data matrix pointer */
@@ -373,6 +375,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_4x4_mve(
      * Return to application
      */
     return (ARM_MATH_SUCCESS);
+#undef MATRIX_DIM
 }
 
 
@@ -417,8 +420,8 @@ if ((pSrcA->numCols != pSrcB->numRows) ||
     {
         if (numRowsA == 1)
         {
-            pOut[0] = pInA[0] * pInB[0] - pInA[1] * pInB[1];
-            pOut[1] = pInA[0] * pInB[1] + pInA[1] * pInB[0];
+            pOut[0] = (_Float16)pInA[0] * (_Float16)pInB[0] - (_Float16)pInA[1] * (_Float16)pInB[1];
+            pOut[1] = (_Float16)pInA[0] * (_Float16)pInB[1] + (_Float16)pInA[1] * (_Float16)pInB[0];
             return (ARM_MATH_SUCCESS);
         }
         else if  (numRowsA == 2)
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c
index 1619ccd..941849d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_cmplx_mult_f32.c
  * Description:  Floating-point matrix multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -534,7 +534,7 @@ arm_status arm_mat_cmplx_mult_f32(
     uint16_t  numRowsA = pSrcA->numRows;    /* number of rows of input matrix A    */
     uint16_t  numColsB = pSrcB->numCols;    /* number of columns of input matrix B */
     uint16_t  numColsA = pSrcA->numCols;    /* number of columns of input matrix A */
-    uint16_t  col, i = 0U, row = numRowsA, colCnt;  /* loop counters */
+    uint16_t  col, i = 0U, row = numRowsA;  /* loop counters */
     arm_status status;          /* status of matrix multiplication */
     uint32x4_t vecOffs, vecColBOffs;
     uint32_t  blkCnt, rowCnt;           /* loop counters */
@@ -613,7 +613,6 @@ arm_status arm_mat_cmplx_mult_f32(
             /*
              * Matrix A columns number of MAC operations are to be performed
              */
-            colCnt = numColsA;
 
             float32_t const *pSrcA0Vec, *pSrcA1Vec, *pSrcA2Vec, *pSrcA3Vec;
             float32_t const *pInA0 = pInA;
@@ -754,7 +753,6 @@ arm_status arm_mat_cmplx_mult_f32(
             /*
              * Matrix A columns number of MAC operations are to be performed
              */
-            colCnt = numColsA;
 
             float32_t const *pSrcA0Vec;
             float32_t const *pInA0 = pInA;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_q15.c
index 120c925..09f457f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_cmplx_mat_mult_q15.c
  * Description:  Q15 complex matrix multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -463,8 +463,8 @@ arm_status arm_mat_cmplx_mult_q15(
 #if defined (ARM_MATH_DSP)
 
           /* read real and imag values from pSrcA and pSrcB buffer */
-          pSourceA = read_q15x2_ia ((q15_t **) &pInA);
-          pSourceB = read_q15x2_ia ((q15_t **) &pInB);
+          pSourceA = read_q15x2_ia (&pInA);
+          pSourceB = read_q15x2_ia (&pInB);
 
           /* Multiply and Accumlates */
 #ifdef ARM_MATH_BIG_ENDIAN
@@ -477,8 +477,8 @@ arm_status arm_mat_cmplx_mult_q15(
           sumImag += (q63_t) prod2;
 
           /* read real and imag values from pSrcA and pSrcB buffer */
-          pSourceA = read_q15x2_ia ((q15_t **) &pInA);
-          pSourceB = read_q15x2_ia ((q15_t **) &pInB);
+          pSourceA = read_q15x2_ia (&pInA);
+          pSourceB = read_q15x2_ia (&pInB);
 
           /* Multiply and Accumlates */
 #ifdef ARM_MATH_BIG_ENDIAN
@@ -536,8 +536,8 @@ arm_status arm_mat_cmplx_mult_q15(
 
 #if defined (ARM_MATH_DSP)
           /* read real and imag values from pSrcA and pSrcB buffer */
-          pSourceA = read_q15x2_ia ((q15_t **) &pInA);
-          pSourceB = read_q15x2_ia ((q15_t **) &pInB);
+          pSourceA = read_q15x2_ia (&pInA);
+          pSourceB = read_q15x2_ia (&pInB);
 
           /* Multiply and Accumlates */
 #ifdef ARM_MATH_BIG_ENDIAN
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_q31.c
index 960f4a6..9933865 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_mult_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_cmplx_mult_q31.c
  * Description:  Floating-point matrix multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -528,7 +528,7 @@ arm_status arm_mat_cmplx_mult_q31(
     uint16_t  numRowsA = pSrcA->numRows;    /* number of rows of input matrix A    */
     uint16_t  numColsB = pSrcB->numCols;    /* number of columns of input matrix B */
     uint16_t  numColsA = pSrcA->numCols;    /* number of columns of input matrix A */
-    uint16_t  col, i = 0U, row = numRowsA, colCnt;  /* loop counters */
+    uint16_t  col, i = 0U, row = numRowsA;  /* loop counters */
     arm_status status;          /* status of matrix multiplication */
     uint32x4_t vecOffs, vecColBOffs;
     uint32_t  blkCnt, rowCnt;           /* loop counters */
@@ -613,7 +613,6 @@ arm_status arm_mat_cmplx_mult_q31(
             /*
              * Matrix A columns number of MAC operations are to be performed
              */
-            colCnt = numColsA;
 
             q31_t const *pSrcA0Vec, *pSrcA1Vec;
             q31_t const *pInA0 = pInA;
@@ -744,7 +743,6 @@ arm_status arm_mat_cmplx_mult_q31(
             /*
              * Matrix A columns number of MAC operations are to be performed
              */
-            colCnt = numColsA;
 
             q31_t const *pSrcA0Vec;
             q31_t const *pInA0 = pInA;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_f16.c
index bbcbe04..3e96414 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_cmplx_trans_f16.c
  * Description:  Floating-point complex matrix transpose
  *
- * $Date:        08. July 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_f32.c
index 38e77c3..e551d07 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_cmplx_trans_f32.c
  * Description:  Floating-point complex matrix transpose
  *
- * $Date:        08. July 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -40,7 +40,23 @@
   Tranposes a complex matrix.
 
   Transposing an <code>M x N</code> matrix flips it around the center diagonal and results in an <code>N x M</code> matrix.
-  \image html MatrixTranspose.gif "Transpose of a 3 x 3 matrix"
+ 
+  @par Transpose of a 3 x 3 matrix
+  
+  \f[
+  \begin{pmatrix}
+   a_{1,1} & a_{1,2} & a_{1,3} \\
+   a_{2,1} & a_{2,2} & a_{2,3} \\
+   a_{3,1} & a_{3,2} & a_{3,3} \\
+  \end{pmatrix}^T
+   =
+  \begin{pmatrix}
+   a_{1,1} & a_{2,1} & a_{3,1} \\
+   a_{1,2} & a_{2,2} & a_{3,2} \\
+   a_{1,3} & a_{2,3} & a_{3,3} \\
+  \end{pmatrix}
+  \f]
+
  */
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_q15.c
index 37d1724..1f80f12 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_cmplx_trans_q31.c
  * Description:  Q15 complex matrix transpose
  *
- * $Date:        08. July 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_q31.c
index 1ad551a..ba38341 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_cmplx_trans_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_cmplx_trans_q31.c
  * Description:  Q31 complex matrix transpose
  *
- * $Date:        08. July 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_f16.c
index ddf9390..e5a7aa2 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_init_f16.c
  * Description:  Floating-point matrix initialization
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_f32.c
index 0122f3d..c9348fd 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_init_f32.c
  * Description:  Floating-point matrix initialization
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_q15.c
index 281b165..bd2a7c2 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_init_q15.c
  * Description:  Q15 matrix initialization
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_q31.c
index 64f2e7e..a9bcb52 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_init_q31.c
  * Description:  Q31 matrix initialization
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -34,10 +34,7 @@
   @ingroup groupMatrix
  */
 
-/**
-  @defgroup MatrixInit Matrix Initialization
- 
- */
+
 
 /**
   @addtogroup MatrixInit
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f16.c
index 4565796..27ad218 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_inverse_f16.c
  * Description:  Floating-point matrix inverse
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -29,6 +29,7 @@
  */
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions_f16.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h"
 
 #if defined(ARM_FLOAT16_SUPPORTED)
 
@@ -52,526 +53,20 @@
                    - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
                    - \ref ARM_MATH_SINGULAR      : Input matrix is found to be singular (non-invertible)
  */
-#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-arm_status arm_mat_inverse_f16(
-  const arm_matrix_instance_f16 * pSrc,
-  arm_matrix_instance_f16 * pDst)
-{
-    float16_t *pIn = pSrc->pData;   /* input data matrix pointer */
-    float16_t *pOut = pDst->pData;  /* output data matrix pointer */
-    float16_t *pInT1, *pInT2;   /* Temporary input data matrix pointer */
-    float16_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
-    float16_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst;    /* Temporary input and output data matrix pointer */
-
-    uint32_t  numRows = pSrc->numRows;  /* Number of rows in the matrix  */
-    uint32_t  numCols = pSrc->numCols;  /* Number of Cols in the matrix  */
-    float16_t *pTmpA, *pTmpB;
-
-    _Float16 in = 0.0f16;        /* Temporary input values  */
-    uint32_t  i, rowCnt, flag = 0U, j, loopCnt, k, l;   /* loop counters */
-    arm_status status;          /* status of matrix inverse */
-    uint32_t  blkCnt;
-
-#ifdef ARM_MATH_MATRIX_CHECK
-   /* Check for matrix mismatch condition */
-  if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols)
-     || (pSrc->numRows != pDst->numRows))
-  {
-    /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
-  }
-  else
-#endif /*    #ifdef ARM_MATH_MATRIX_CHECK    */
-  {
-
-    /*--------------------------------------------------------------------------------------------------------------
-     * Matrix Inverse can be solved using elementary row operations.
-     *
-     *  Gauss-Jordan Method:
-     *
-     *     1. First combine the identity matrix and the input matrix separated by a bar to form an
-     *        augmented matrix as follows:
-     *                      _  _          _     _      _   _         _         _
-     *                     |  |  a11  a12  | | | 1   0  |   |       |  X11 X12  |
-     *                     |  |            | | |        |   |   =   |           |
-     *                     |_ |_ a21  a22 _| | |_0   1 _|  _|       |_ X21 X21 _|
-     *
-     *      2. In our implementation, pDst Matrix is used as identity matrix.
-     *
-     *      3. Begin with the first row. Let i = 1.
-     *
-     *      4. Check to see if the pivot for row i is zero.
-     *         The pivot is the element of the main diagonal that is on the current row.
-     *         For instance, if working with row i, then the pivot element is aii.
-     *         If the pivot is zero, exchange that row with a row below it that does not
-     *         contain a zero in column i. If this is not possible, then an inverse
-     *         to that matrix does not exist.
-     *
-     *      5. Divide every element of row i by the pivot.
-     *
-     *      6. For every row below and  row i, replace that row with the sum of that row and
-     *         a multiple of row i so that each new element in column i below row i is zero.
-     *
-     *      7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
-     *         for every element below and above the main diagonal.
-     *
-     *      8. Now an identical matrix is formed to the left of the bar(input matrix, src).
-     *         Therefore, the matrix to the right of the bar is our solution(dst matrix, dst).
-     *----------------------------------------------------------------------------------------------------------------*/
-
-        /*
-         * Working pointer for destination matrix
-         */
-        pOutT1 = pOut;
-        /*
-         * Loop over the number of rows
-         */
-        rowCnt = numRows;
-        /*
-         * Making the destination matrix as identity matrix
-         */
-        while (rowCnt > 0U)
-        {
-            /*
-             * Writing all zeroes in lower triangle of the destination matrix
-             */
-            j = numRows - rowCnt;
-            while (j > 0U)
-            {
-                *pOutT1++ = 0.0f16;
-                j--;
-            }
-            /*
-             * Writing all ones in the diagonal of the destination matrix
-             */
-            *pOutT1++ = 1.0f16;
-            /*
-             * Writing all zeroes in upper triangle of the destination matrix
-             */
-            j = rowCnt - 1U;
-            while (j > 0U)
-            {
-                *pOutT1++ = 0.0f16;
-                j--;
-            }
-            /*
-             * Decrement the loop counter
-             */
-            rowCnt--;
-        }
-
-        /*
-         * Loop over the number of columns of the input matrix.
-         * All the elements in each column are processed by the row operations
-         */
-        loopCnt = numCols;
-        /*
-         * Index modifier to navigate through the columns
-         */
-        l = 0U;
-        while (loopCnt > 0U)
-        {
-            /*
-             * Check if the pivot element is zero..
-             * If it is zero then interchange the row with non zero row below.
-             * If there is no non zero element to replace in the rows below,
-             * then the matrix is Singular.
-             */
-
-            /*
-             * Working pointer for the input matrix that points
-             * * to the pivot element of the particular row
-             */
-            pInT1 = pIn + (l * numCols);
-            /*
-             * Working pointer for the destination matrix that points
-             * * to the pivot element of the particular row
-             */
-            pOutT1 = pOut + (l * numCols);
-            /*
-             * Temporary variable to hold the pivot value
-             */
-            in = *pInT1;
-            /*
-             * Destination pointer modifier
-             */
-            k = 1U;
-
-            /*
-             * Check if the pivot element is zero
-             */
-            if (*pInT1 == 0.0f16)
-            {
-                /*
-                 * Loop over the number rows present below
-                 */
-                for (i = (l + 1U); i < numRows; i++)
-                {
-                    /*
-                     * Update the input and destination pointers
-                     */
-                    pInT2 = pInT1 + (numCols * i);
-                    pOutT2 = pOutT1 + (numCols * k);
-                    /*
-                     * Check if there is a non zero pivot element to
-                     * * replace in the rows below
-                     */
-                    if (*pInT2 != 0.0f16)
-                    {
-                        f16x8_t vecA, vecB;
-                        /*
-                         * Loop over number of columns
-                         * * to the right of the pilot element
-                         */
-                        pTmpA = pInT1;
-                        pTmpB = pInT2;
-                        blkCnt = (numCols - l) >> 3;
-                        while (blkCnt > 0U)
-                        {
-                            
-                            vecA = vldrhq_f16(pTmpA);
-                            vecB = vldrhq_f16(pTmpB);
-                            vstrhq_f16(pTmpB, vecA);
-                            vstrhq_f16(pTmpA, vecB);
-
-                            pTmpA += 8;
-                            pTmpB += 8;
-                            /*
-                             * Decrement the blockSize loop counter
-                             */
-                            blkCnt--;
-                        }
-                        /*
-                         * tail
-                         * (will be merged thru tail predication)
-                         */
-                        blkCnt = (numCols - l) & 7;
-                        if (blkCnt > 0U)
-                        {
-                            mve_pred16_t p0 = vctp16q(blkCnt);
-
-                            vecA = vldrhq_f16(pTmpA);
-                            vecB = vldrhq_f16(pTmpB);
-                            vstrhq_p_f16(pTmpB, vecA, p0);
-                            vstrhq_p_f16(pTmpA, vecB, p0);
-                        }
-
-                        pInT1 += numCols - l;
-                        pInT2 += numCols - l;
-                        pTmpA = pOutT1;
-                        pTmpB = pOutT2;
-                        blkCnt = numCols >> 3;
-                        while (blkCnt > 0U)
-                        {
-
-                            vecA = vldrhq_f16(pTmpA);
-                            vecB = vldrhq_f16(pTmpB);
-                            vstrhq_f16(pTmpB, vecA);
-                            vstrhq_f16(pTmpA, vecB);
-                            pTmpA += 8;
-                            pTmpB += 8;
-                            /*
-                             * Decrement the blockSize loop counter
-                             */
-                            blkCnt--;
-                        }
-                        /*
-                         * tail
-                         */
-                        blkCnt = numCols & 7;
-                        if (blkCnt > 0U)
-                        {
-                            mve_pred16_t p0 = vctp16q(blkCnt);
-
-                            vecA = vldrhq_f16(pTmpA);
-                            vecB = vldrhq_f16(pTmpB);
-                            vstrhq_p_f16(pTmpB, vecA, p0);
-                            vstrhq_p_f16(pTmpA, vecB, p0);
-                        }
-
-                        pOutT1 += numCols;
-                        pOutT2 += numCols;
-                        /*
-                         * Flag to indicate whether exchange is done or not
-                         */
-                        flag = 1U;
-
-                        /*
-                         * Break after exchange is done
-                         */
-                        break;
-                    }
-                    /*
-                     * Update the destination pointer modifier
-                     */
-                    k++;
-                }
-            }
-
-            /*
-             * Update the status if the matrix is singular
-             */
-            if ((flag != 1U) && (in == 0.0f16))
-            {
-                return ARM_MATH_SINGULAR;
-            }
-
-            /*
-             * Points to the pivot row of input and destination matrices
-             */
-            pPivotRowIn = pIn + (l * numCols);
-            pPivotRowDst = pOut + (l * numCols);
-
-            /*
-             * Temporary pointers to the pivot row pointers
-             */
-            pInT1 = pPivotRowIn;
-            pOutT1 = pPivotRowDst;
-
-            /*
-             * Pivot element of the row
-             */
-            in = *(pIn + (l * numCols));
-
-            pTmpA = pInT1;
-
-            f16x8_t invIn = vdupq_n_f16(1.0f16 / in);
-
-            blkCnt = (numCols - l) >> 3;
-            f16x8_t vecA;
-            while (blkCnt > 0U)
-            {
-                *(f16x8_t *) pTmpA = *(f16x8_t *) pTmpA * invIn;
-                pTmpA += 8;
-                /*
-                 * Decrement the blockSize loop counter
-                 */
-                blkCnt--;
-            }
-            /*
-             * tail
-             */
-            blkCnt = (numCols - l) & 7;
-            if (blkCnt > 0U)
-            {
-                mve_pred16_t p0 = vctp16q(blkCnt);
-                
-
-                vecA = vldrhq_f16(pTmpA);
-                vecA = vecA * invIn;
-                vstrhq_p_f16(pTmpA, vecA, p0);
-            }
-
-            pInT1 += numCols - l;
-            /*
-             * Loop over number of columns
-             * * to the right of the pilot element
-             */
-
-            pTmpA = pOutT1;
-            blkCnt = numCols >> 3;
-            while (blkCnt > 0U)
-            {
-                *(f16x8_t *) pTmpA = *(f16x8_t *) pTmpA *invIn;
-                pTmpA += 8;
-                /*
-                 * Decrement the blockSize loop counter
-                 */
-                blkCnt--;
-            }
-            /*
-             * tail
-             * (will be merged thru tail predication)
-             */
-            blkCnt = numCols & 7;
-            if (blkCnt > 0U)
-            {
-                mve_pred16_t p0 = vctp16q(blkCnt);
-
-                vecA = vldrhq_f16(pTmpA);
-                vecA = vecA * invIn;
-                vstrhq_p_f16(pTmpA, vecA, p0);
-            }
-
-            pOutT1 += numCols;
-
-            /*
-             * Replace the rows with the sum of that row and a multiple of row i
-             * * so that each new element in column i above row i is zero.
-             */
-
-            /*
-             * Temporary pointers for input and destination matrices
-             */
-            pInT1 = pIn;
-            pOutT1 = pOut;
-
-            for (i = 0U; i < numRows; i++)
-            {
-                /*
-                 * Check for the pivot element
-                 */
-                if (i == l)
-                {
-                    /*
-                     * If the processing element is the pivot element,
-                     * only the columns to the right are to be processed
-                     */
-                    pInT1 += numCols - l;
-                    pOutT1 += numCols;
-                }
-                else
-                {
-                    /*
-                     * Element of the reference row
-                     */
-
-                    /*
-                     * Working pointers for input and destination pivot rows
-                     */
-                    pPRT_in = pPivotRowIn;
-                    pPRT_pDst = pPivotRowDst;
-                    /*
-                     * Loop over the number of columns to the right of the pivot element,
-                     * to replace the elements in the input matrix
-                     */
-
-                    in = *pInT1;
-                    f16x8_t tmpV = vdupq_n_f16(in);
-
-                    blkCnt = (numCols - l) >> 3;
-                    while (blkCnt > 0U)
-                    {
-                        f16x8_t vec1, vec2;
-                        /*
-                         * Replace the element by the sum of that row
-                         * and a multiple of the reference row
-                         */
-                        vec1 = vldrhq_f16(pInT1);
-                        vec2 = vldrhq_f16(pPRT_in);
-                        vec1 = vfmsq_f16(vec1, tmpV, vec2);
-                        vstrhq_f16(pInT1, vec1);
-                        pPRT_in += 8;
-                        pInT1 += 8;
-                        /*
-                         * Decrement the blockSize loop counter
-                         */
-                        blkCnt--;
-                    }
-                    /*
-                     * tail
-                     * (will be merged thru tail predication)
-                     */
-                    blkCnt = (numCols - l) & 7;
-                    if (blkCnt > 0U)
-                    {
-                        f16x8_t vec1, vec2;
-                        mve_pred16_t p0 = vctp16q(blkCnt);
-
-                        vec1 = vldrhq_f16(pInT1);
-                        vec2 = vldrhq_f16(pPRT_in);
-                        vec1 = vfmsq_f16(vec1, tmpV, vec2);
-                        vstrhq_p_f16(pInT1, vec1, p0);
-                        pInT1 += blkCnt;
-                    }
-
-                    blkCnt = numCols >> 3;
-                    while (blkCnt > 0U)
-                    {
-                        f16x8_t vec1, vec2;
-
-                        /*
-                         * Replace the element by the sum of that row
-                         * and a multiple of the reference row
-                         */
-                        vec1 = vldrhq_f16(pOutT1);
-                        vec2 = vldrhq_f16(pPRT_pDst);
-                        vec1 = vfmsq_f16(vec1, tmpV, vec2);
-                        vstrhq_f16(pOutT1, vec1);
-                        pPRT_pDst += 8;
-                        pOutT1 += 8;
-                        /*
-                         * Decrement the blockSize loop counter
-                         */
-                        blkCnt--;
-                    }
-                    /*
-                     * tail
-                     * (will be merged thru tail predication)
-                     */
-                    blkCnt = numCols & 7;
-                    if (blkCnt > 0U)
-                    {
-                        f16x8_t vec1, vec2;
-                        mve_pred16_t p0 = vctp16q(blkCnt);
-
-                        vec1 = vldrhq_f16(pOutT1);
-                        vec2 = vldrhq_f16(pPRT_pDst);
-                        vec1 = vfmsq_f16(vec1, tmpV, vec2);
-                        vstrhq_p_f16(pOutT1, vec1, p0);
-
-                        pInT2 += blkCnt;
-                        pOutT1 += blkCnt;
-                    }
-                }
-                /*
-                 * Increment the temporary input pointer
-                 */
-                pInT1 = pInT1 + l;
-            }
-            /*
-             * Increment the input pointer
-             */
-            pIn++;
-            /*
-             * Decrement the loop counter
-             */
-            loopCnt--;
-            /*
-             * Increment the index modifier
-             */
-            l++;
-        }
-
-        /*
-         * Set status as ARM_MATH_SUCCESS
-         */
-        status = ARM_MATH_SUCCESS;
-
-        if ((flag != 1U) && (in == 0.0f16))
-        {
-            pIn = pSrc->pData;
-            for (i = 0; i < numRows * numCols; i++)
-            {
-                if (pIn[i] != 0.0f16)
-                    break;
-            }
-
-            if (i == numRows * numCols)
-                status = ARM_MATH_SINGULAR;
-        }
-  }
-  /* Return to application */
-  return (status);
-}
-
-#else
-
 arm_status arm_mat_inverse_f16(
   const arm_matrix_instance_f16 * pSrc,
         arm_matrix_instance_f16 * pDst)
 {
   float16_t *pIn = pSrc->pData;                  /* input data matrix pointer */
   float16_t *pOut = pDst->pData;                 /* output data matrix pointer */
-  float16_t *pInT1, *pInT2;                      /* Temporary input data matrix pointer */
-  float16_t *pOutT1, *pOutT2;                    /* Temporary output data matrix pointer */
-  float16_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst;  /* Temporary input and output data matrix pointer */
+  
+  float16_t *pTmp;
   uint32_t numRows = pSrc->numRows;              /* Number of rows in the matrix  */
   uint32_t numCols = pSrc->numCols;              /* Number of Cols in the matrix  */
 
-  _Float16 Xchg, in = 0.0f16, in1;                /* Temporary input values  */
-  uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l;      /* loop counters */
+
+  float16_t pivot = 0.0f16, newPivot=0.0f16;                /* Temporary input values  */
+  uint32_t selectedRow,pivotRow,i, rowNb, rowCnt, flag = 0U, j,column;      /* loop counters */
   arm_status status;                             /* status of matrix inverse */
 
 #ifdef ARM_MATH_MATRIX_CHECK
@@ -589,7 +84,6 @@ arm_status arm_mat_inverse_f16(
 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
 
   {
-
     /*--------------------------------------------------------------------------------------------------------------
      * Matrix Inverse can be solved using elementary row operations.
      *
@@ -626,7 +120,7 @@ arm_status arm_mat_inverse_f16(
      *----------------------------------------------------------------------------------------------------------------*/
 
     /* Working pointer for destination matrix */
-    pOutT1 = pOut;
+    pTmp = pOut;
 
     /* Loop over the number of rows */
     rowCnt = numRows;
@@ -638,18 +132,18 @@ arm_status arm_mat_inverse_f16(
       j = numRows - rowCnt;
       while (j > 0U)
       {
-        *pOutT1++ = 0.0f16;
+        *pTmp++ = 0.0f16;
         j--;
       }
 
       /* Writing all ones in the diagonal of the destination matrix */
-      *pOutT1++ = 1.0f16;
+      *pTmp++ = 1.0f16;
 
       /* Writing all zeroes in upper triangle of the destination matrix */
       j = rowCnt - 1U;
       while (j > 0U)
       {
-        *pOutT1++ = 0.0f16;
+        *pTmp++ = 0.0f16;
         j--;
       }
 
@@ -659,232 +153,105 @@ arm_status arm_mat_inverse_f16(
 
     /* Loop over the number of columns of the input matrix.
        All the elements in each column are processed by the row operations */
-    loopCnt = numCols;
 
     /* Index modifier to navigate through the columns */
-    l = 0U;
-
-    while (loopCnt > 0U)
+    for(column = 0U; column < numCols; column++)
     {
       /* Check if the pivot element is zero..
        * If it is zero then interchange the row with non zero row below.
        * If there is no non zero element to replace in the rows below,
        * then the matrix is Singular. */
 
-      /* Working pointer for the input matrix that points
-       * to the pivot element of the particular row  */
-      pInT1 = pIn + (l * numCols);
-
-      /* Working pointer for the destination matrix that points
-       * to the pivot element of the particular row  */
-      pOutT1 = pOut + (l * numCols);
+      pivotRow = column;
 
       /* Temporary variable to hold the pivot value */
-      in = *pInT1;
-
-      
-      /* Destination pointer modifier */
-      k = 1U;
+      pTmp = ELEM(pSrc,column,column) ;
+      pivot = *pTmp;
+      selectedRow = column;
 
-      /* Check if the pivot element is zero */
-      if (*pInT1 == 0.0f16)
-      {
+     
         /* Loop over the number rows present below */
 
-        for (i = (l + 1U); i < numRows; i++)
-        {
+      for (rowNb = column+1; rowNb < numRows; rowNb++)
+      {
           /* Update the input and destination pointers */
-          pInT2 = pInT1 + (numCols * i);
-          pOutT2 = pOutT1 + (numCols * k);
+          pTmp = ELEM(pSrc,rowNb,column);
+          newPivot = *pTmp;
+          if (fabsf((float32_t)newPivot) > fabsf((float32_t)pivot))
+          {
+            selectedRow = rowNb; 
+            pivot = newPivot;
+          }
+
+      }
 
           /* Check if there is a non zero pivot element to
            * replace in the rows below */
-          if (*pInT2 != 0.0f16)
-          {
+      if (((_Float16)pivot != 0.0f16) && (selectedRow != column))
+      {
             /* Loop over number of columns
              * to the right of the pilot element */
-            j = numCols - l;
-
-            while (j > 0U)
-            {
-              /* Exchange the row elements of the input matrix */
-              Xchg = *pInT2;
-              *pInT2++ = *pInT1;
-              *pInT1++ = Xchg;
 
-              /* Decrement the loop counter */
-              j--;
-            }
-
-            /* Loop over number of columns of the destination matrix */
-            j = numCols;
-
-            while (j > 0U)
-            {
-              /* Exchange the row elements of the destination matrix */
-              Xchg = *pOutT2;
-              *pOutT2++ = *pOutT1;
-              *pOutT1++ = Xchg;
-
-              /* Decrement loop counter */
-              j--;
-            }
+            SWAP_ROWS_F16(pSrc,column, pivotRow,selectedRow);
+            SWAP_ROWS_F16(pDst,0, pivotRow,selectedRow);
 
+    
             /* Flag to indicate whether exchange is done or not */
             flag = 1U;
 
-            /* Break after exchange is done */
-            break;
-          }
-
-          /* Update the destination pointer modifier */
-          k++;
-
-          /* Decrement loop counter */
-        }
       }
 
+
       /* Update the status if the matrix is singular */
-      if ((flag != 1U) && (in == 0.0f16))
+      if ((flag != 1U) && ((_Float16)pivot == 0.0f16))
       {
         return ARM_MATH_SINGULAR;
       }
 
-      /* Points to the pivot row of input and destination matrices */
-      pPivotRowIn = pIn + (l * numCols);
-      pPivotRowDst = pOut + (l * numCols);
-
-      /* Temporary pointers to the pivot row pointers */
-      pInT1 = pPivotRowIn;
-      pInT2 = pPivotRowDst;
-
+     
       /* Pivot element of the row */
-      in = *pPivotRowIn;
+      pivot = 1.0f16 / (_Float16)pivot;
 
-      /* Loop over number of columns
-       * to the right of the pilot element */
-      j = (numCols - l);
-
-      while (j > 0U)
-      {
-        /* Divide each element of the row of the input matrix
-         * by the pivot element */
-        in1 = *pInT1;
-        *pInT1++ = in1 / in;
-
-        /* Decrement the loop counter */
-        j--;
-      }
-
-      /* Loop over number of columns of the destination matrix */
-      j = numCols;
-
-      while (j > 0U)
-      {
-        /* Divide each element of the row of the destination matrix
-         * by the pivot element */
-        in1 = *pInT2;
-        *pInT2++ = in1 / in;
-
-        /* Decrement the loop counter */
-        j--;
-      }
+      SCALE_ROW_F16(pSrc,column,pivot,pivotRow);
+      SCALE_ROW_F16(pDst,0,pivot,pivotRow);
 
+      
       /* Replace the rows with the sum of that row and a multiple of row i
        * so that each new element in column i above row i is zero.*/
 
-      /* Temporary pointers for input and destination matrices */
-      pInT1 = pIn;
-      pInT2 = pOut;
-
-      /* index used to check for pivot element */
-      i = 0U;
-
-      /* Loop over number of rows */
-      /*  to be replaced by the sum of that row and a multiple of row i */
-      k = numRows;
-
-      while (k > 0U)
+      rowNb = 0;
+      for (;rowNb < pivotRow; rowNb++)
       {
-        /* Check for the pivot element */
-        if (i == l)
-        {
-          /* If the processing element is the pivot element,
-             only the columns to the right are to be processed */
-          pInT1 += numCols - l;
-
-          pInT2 += numCols;
-        }
-        else
-        {
-          /* Element of the reference row */
-          in = *pInT1;
-
-          /* Working pointers for input and destination pivot rows */
-          pPRT_in = pPivotRowIn;
-          pPRT_pDst = pPivotRowDst;
-
-          /* Loop over the number of columns to the right of the pivot element,
-             to replace the elements in the input matrix */
-          j = (numCols - l);
-
-          while (j > 0U)
-          {
-            /* Replace the element by the sum of that row
-               and a multiple of the reference row  */
-            in1 = *pInT1;
-            *pInT1++ = in1 - (in * *pPRT_in++);
-
-            /* Decrement the loop counter */
-            j--;
-          }
-
-          /* Loop over the number of columns to
-             replace the elements in the destination matrix */
-          j = numCols;
+           pTmp = ELEM(pSrc,rowNb,column) ;
+           pivot = *pTmp;
 
-          while (j > 0U)
-          {
-            /* Replace the element by the sum of that row
-               and a multiple of the reference row  */
-            in1 = *pInT2;
-            *pInT2++ = in1 - (in * *pPRT_pDst++);
+           MAS_ROW_F16(column,pSrc,rowNb,pivot,pSrc,pivotRow);
+           MAS_ROW_F16(0     ,pDst,rowNb,pivot,pDst,pivotRow);
 
-            /* Decrement loop counter */
-            j--;
-          }
 
-        }
+      }
 
-        /* Increment temporary input pointer */
-        pInT1 = pInT1 + l;
+      for (rowNb = pivotRow + 1; rowNb < numRows; rowNb++)
+      {
+           pTmp = ELEM(pSrc,rowNb,column) ;
+           pivot = *pTmp;
 
-        /* Decrement loop counter */
-        k--;
+           MAS_ROW_F16(column,pSrc,rowNb,pivot,pSrc,pivotRow);
+           MAS_ROW_F16(0     ,pDst,rowNb,pivot,pDst,pivotRow);
 
-        /* Increment pivot index */
-        i++;
       }
 
-      /* Increment the input pointer */
-      pIn++;
-
-      /* Decrement the loop counter */
-      loopCnt--;
-
-      /* Increment the index modifier */
-      l++;
     }
 
     /* Set status as ARM_MATH_SUCCESS */
     status = ARM_MATH_SUCCESS;
 
-    if ((flag != 1U) && (in == 0.0f16))
+    if ((flag != 1U) && ((_Float16)pivot == 0.0f16))
     {
       pIn = pSrc->pData;
       for (i = 0; i < numRows * numCols; i++)
       {
-        if (pIn[i] != 0.0f16)
+        if ((_Float16)pIn[i] != 0.0f16)
             break;
       }
 
@@ -896,8 +263,6 @@ arm_status arm_mat_inverse_f16(
   /* Return to application */
   return (status);
 }
-#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
-
 /**
   @} end of MatrixInv group
  */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f32.c
index f4c753b..83e8577 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_inverse_f32.c
  * Description:  Floating-point matrix inverse
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -29,6 +29,8 @@
  */
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h"
+
 
 /**
   @ingroup groupMatrix
@@ -52,7 +54,22 @@
   of elementary row-operations to an identity matrix yields the inverse matrix.
   If the input matrix is singular, then the algorithm terminates and returns error status
   <code>ARM_MATH_SINGULAR</code>.
-  \image html MatrixInverse.gif "Matrix Inverse of a 3 x 3 matrix using Gauss-Jordan Method"
+ 
+  @par Matrix Inverse of a 3 x 3 matrix using Gauss-Jordan Method 
+
+  \f[
+  \begin{pmatrix}
+   a_{1,1} & a_{1,2} & a_{1,3} & | & 1 & 0 & 0\\
+   a_{2,1} & a_{2,2} & a_{2,3} & | & 0 & 1 & 0\\
+   a_{3,1} & a_{3,2} & a_{3,3} & | & 0 & 0 & 1\\
+  \end{pmatrix}
+  \rightarrow
+  \begin{pmatrix}
+   1 & 0 & 0 & | & x_{1,1} & x_{2,1} & x_{3,1} \\
+   0 & 1 & 0 & | & x_{1,2} & x_{2,2} & x_{3,2} \\
+   0 & 0 & 1 & | & x_{1,3} & x_{2,3} & x_{3,3} \\
+  \end{pmatrix}
+  \f]
  */
 
 /**
@@ -69,937 +86,20 @@
                    - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
                    - \ref ARM_MATH_SINGULAR      : Input matrix is found to be singular (non-invertible)
  */
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-arm_status arm_mat_inverse_f32(
-  const arm_matrix_instance_f32 * pSrc,
-  arm_matrix_instance_f32 * pDst)
-{
-    float32_t *pIn = pSrc->pData;   /* input data matrix pointer */
-    float32_t *pOut = pDst->pData;  /* output data matrix pointer */
-    float32_t *pInT1, *pInT2;   /* Temporary input data matrix pointer */
-    float32_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
-    float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst;    /* Temporary input and output data matrix pointer */
-
-    uint32_t  numRows = pSrc->numRows;  /* Number of rows in the matrix  */
-    uint32_t  numCols = pSrc->numCols;  /* Number of Cols in the matrix  */
-    float32_t *pTmpA, *pTmpB;
-
-    float32_t in = 0.0f;        /* Temporary input values  */
-    uint32_t  i, rowCnt, flag = 0U, j, loopCnt, k, l;   /* loop counters */
-    arm_status status;          /* status of matrix inverse */
-    uint32_t  blkCnt;
-
-#ifdef ARM_MATH_MATRIX_CHECK
-   /* Check for matrix mismatch condition */
-  if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols)
-     || (pSrc->numRows != pDst->numRows))
-  {
-    /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
-  }
-  else
-#endif /*    #ifdef ARM_MATH_MATRIX_CHECK    */
-  {
-
-    /*--------------------------------------------------------------------------------------------------------------
-     * Matrix Inverse can be solved using elementary row operations.
-     *
-     *  Gauss-Jordan Method:
-     *
-     *     1. First combine the identity matrix and the input matrix separated by a bar to form an
-     *        augmented matrix as follows:
-     *                      _  _          _     _      _   _         _         _
-     *                     |  |  a11  a12  | | | 1   0  |   |       |  X11 X12  |
-     *                     |  |            | | |        |   |   =   |           |
-     *                     |_ |_ a21  a22 _| | |_0   1 _|  _|       |_ X21 X21 _|
-     *
-     *      2. In our implementation, pDst Matrix is used as identity matrix.
-     *
-     *      3. Begin with the first row. Let i = 1.
-     *
-     *      4. Check to see if the pivot for row i is zero.
-     *         The pivot is the element of the main diagonal that is on the current row.
-     *         For instance, if working with row i, then the pivot element is aii.
-     *         If the pivot is zero, exchange that row with a row below it that does not
-     *         contain a zero in column i. If this is not possible, then an inverse
-     *         to that matrix does not exist.
-     *
-     *      5. Divide every element of row i by the pivot.
-     *
-     *      6. For every row below and  row i, replace that row with the sum of that row and
-     *         a multiple of row i so that each new element in column i below row i is zero.
-     *
-     *      7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
-     *         for every element below and above the main diagonal.
-     *
-     *      8. Now an identical matrix is formed to the left of the bar(input matrix, src).
-     *         Therefore, the matrix to the right of the bar is our solution(dst matrix, dst).
-     *----------------------------------------------------------------------------------------------------------------*/
-
-        /*
-         * Working pointer for destination matrix
-         */
-        pOutT1 = pOut;
-        /*
-         * Loop over the number of rows
-         */
-        rowCnt = numRows;
-        /*
-         * Making the destination matrix as identity matrix
-         */
-        while (rowCnt > 0U)
-        {
-            /*
-             * Writing all zeroes in lower triangle of the destination matrix
-             */
-            j = numRows - rowCnt;
-            while (j > 0U)
-            {
-                *pOutT1++ = 0.0f;
-                j--;
-            }
-            /*
-             * Writing all ones in the diagonal of the destination matrix
-             */
-            *pOutT1++ = 1.0f;
-            /*
-             * Writing all zeroes in upper triangle of the destination matrix
-             */
-            j = rowCnt - 1U;
-            while (j > 0U)
-            {
-                *pOutT1++ = 0.0f;
-                j--;
-            }
-            /*
-             * Decrement the loop counter
-             */
-            rowCnt--;
-        }
-
-        /*
-         * Loop over the number of columns of the input matrix.
-         * All the elements in each column are processed by the row operations
-         */
-        loopCnt = numCols;
-        /*
-         * Index modifier to navigate through the columns
-         */
-        l = 0U;
-        while (loopCnt > 0U)
-        {
-            /*
-             * Check if the pivot element is zero..
-             * If it is zero then interchange the row with non zero row below.
-             * If there is no non zero element to replace in the rows below,
-             * then the matrix is Singular.
-             */
-
-            /*
-             * Working pointer for the input matrix that points
-             * * to the pivot element of the particular row
-             */
-            pInT1 = pIn + (l * numCols);
-            /*
-             * Working pointer for the destination matrix that points
-             * * to the pivot element of the particular row
-             */
-            pOutT1 = pOut + (l * numCols);
-            /*
-             * Temporary variable to hold the pivot value
-             */
-            in = *pInT1;
-            /*
-             * Destination pointer modifier
-             */
-            k = 1U;
-
-            /*
-             * Check if the pivot element is zero
-             */
-            if (*pInT1 == 0.0f)
-            {
-                /*
-                 * Loop over the number rows present below
-                 */
-                for (i = (l + 1U); i < numRows; i++)
-                {
-                    /*
-                     * Update the input and destination pointers
-                     */
-                    pInT2 = pInT1 + (numCols * i);
-                    pOutT2 = pOutT1 + (numCols * k);
-                    /*
-                     * Check if there is a non zero pivot element to
-                     * * replace in the rows below
-                     */
-                    if (*pInT2 != 0.0f)
-                    {
-                        f32x4_t vecA, vecB;
-                        /*
-                         * Loop over number of columns
-                         * * to the right of the pilot element
-                         */
-                        pTmpA = pInT1;
-                        pTmpB = pInT2;
-                        blkCnt = (numCols - l) >> 2;
-                        while (blkCnt > 0U)
-                        {
-                            
-                            vecA = vldrwq_f32(pTmpA);
-                            vecB = vldrwq_f32(pTmpB);
-                            vstrwq_f32(pTmpB, vecA);
-                            vstrwq_f32(pTmpA, vecB);
-
-                            pTmpA += 4;
-                            pTmpB += 4;
-                            /*
-                             * Decrement the blockSize loop counter
-                             */
-                            blkCnt--;
-                        }
-                        /*
-                         * tail
-                         * (will be merged thru tail predication)
-                         */
-                        blkCnt = (numCols - l) & 3;
-                        if (blkCnt > 0U)
-                        {
-                            mve_pred16_t p0 = vctp32q(blkCnt);
-
-                            vecA = vldrwq_f32(pTmpA);
-                            vecB = vldrwq_f32(pTmpB);
-                            vstrwq_p_f32(pTmpB, vecA, p0);
-                            vstrwq_p_f32(pTmpA, vecB, p0);
-                        }
-
-                        pInT1 += numCols - l;
-                        pInT2 += numCols - l;
-                        pTmpA = pOutT1;
-                        pTmpB = pOutT2;
-                        blkCnt = numCols >> 2;
-                        while (blkCnt > 0U)
-                        {
-
-                            vecA = vldrwq_f32(pTmpA);
-                            vecB = vldrwq_f32(pTmpB);
-                            vstrwq_f32(pTmpB, vecA);
-                            vstrwq_f32(pTmpA, vecB);
-                            pTmpA += 4;
-                            pTmpB += 4;
-                            /*
-                             * Decrement the blockSize loop counter
-                             */
-                            blkCnt--;
-                        }
-                        /*
-                         * tail
-                         */
-                        blkCnt = numCols & 3;
-                        if (blkCnt > 0U)
-                        {
-                            mve_pred16_t p0 = vctp32q(blkCnt);
-
-                            vecA = vldrwq_f32(pTmpA);
-                            vecB = vldrwq_f32(pTmpB);
-                            vstrwq_p_f32(pTmpB, vecA, p0);
-                            vstrwq_p_f32(pTmpA, vecB, p0);
-                        }
-
-                        pOutT1 += numCols;
-                        pOutT2 += numCols;
-                        /*
-                         * Flag to indicate whether exchange is done or not
-                         */
-                        flag = 1U;
-
-                        /*
-                         * Break after exchange is done
-                         */
-                        break;
-                    }
-                    /*
-                     * Update the destination pointer modifier
-                     */
-                    k++;
-                }
-            }
-
-            /*
-             * Update the status if the matrix is singular
-             */
-            if ((flag != 1U) && (in == 0.0f))
-            {
-                return ARM_MATH_SINGULAR;
-            }
-
-            /*
-             * Points to the pivot row of input and destination matrices
-             */
-            pPivotRowIn = pIn + (l * numCols);
-            pPivotRowDst = pOut + (l * numCols);
-
-            /*
-             * Temporary pointers to the pivot row pointers
-             */
-            pInT1 = pPivotRowIn;
-            pOutT1 = pPivotRowDst;
-
-            /*
-             * Pivot element of the row
-             */
-            in = *(pIn + (l * numCols));
-
-            pTmpA = pInT1;
-
-            f32x4_t invIn = vdupq_n_f32(1.0f / in);
-
-            blkCnt = (numCols - l) >> 2;
-            f32x4_t vecA;
-            while (blkCnt > 0U)
-            {
-                *(f32x4_t *) pTmpA = *(f32x4_t *) pTmpA * invIn;
-                pTmpA += 4;
-                /*
-                 * Decrement the blockSize loop counter
-                 */
-                blkCnt--;
-            }
-            /*
-             * tail
-             */
-            blkCnt = (numCols - l) & 3;
-            if (blkCnt > 0U)
-            {
-                mve_pred16_t p0 = vctp32q(blkCnt);
-                
-
-                vecA = vldrwq_f32(pTmpA);
-                vecA = vecA * invIn;
-                vstrwq_p_f32(pTmpA, vecA, p0);
-            }
-
-            pInT1 += numCols - l;
-            /*
-             * Loop over number of columns
-             * * to the right of the pilot element
-             */
-
-            pTmpA = pOutT1;
-            blkCnt = numCols >> 2;
-            while (blkCnt > 0U)
-            {
-                *(f32x4_t *) pTmpA = *(f32x4_t *) pTmpA *invIn;
-                pTmpA += 4;
-                /*
-                 * Decrement the blockSize loop counter
-                 */
-                blkCnt--;
-            }
-            /*
-             * tail
-             * (will be merged thru tail predication)
-             */
-            blkCnt = numCols & 3;
-            if (blkCnt > 0U)
-            {
-                mve_pred16_t p0 = vctp32q(blkCnt);
-
-                vecA = vldrwq_f32(pTmpA);
-                vecA = vecA * invIn;
-                vstrwq_p_f32(pTmpA, vecA, p0);
-            }
-
-            pOutT1 += numCols;
-
-            /*
-             * Replace the rows with the sum of that row and a multiple of row i
-             * * so that each new element in column i above row i is zero.
-             */
-
-            /*
-             * Temporary pointers for input and destination matrices
-             */
-            pInT1 = pIn;
-            pOutT1 = pOut;
-
-            for (i = 0U; i < numRows; i++)
-            {
-                /*
-                 * Check for the pivot element
-                 */
-                if (i == l)
-                {
-                    /*
-                     * If the processing element is the pivot element,
-                     * only the columns to the right are to be processed
-                     */
-                    pInT1 += numCols - l;
-                    pOutT1 += numCols;
-                }
-                else
-                {
-                    /*
-                     * Element of the reference row
-                     */
-
-                    /*
-                     * Working pointers for input and destination pivot rows
-                     */
-                    pPRT_in = pPivotRowIn;
-                    pPRT_pDst = pPivotRowDst;
-                    /*
-                     * Loop over the number of columns to the right of the pivot element,
-                     * to replace the elements in the input matrix
-                     */
-
-                    in = *pInT1;
-                    f32x4_t tmpV = vdupq_n_f32(in);
-
-                    blkCnt = (numCols - l) >> 2;
-                    while (blkCnt > 0U)
-                    {
-                        f32x4_t vec1, vec2;
-                        /*
-                         * Replace the element by the sum of that row
-                         * and a multiple of the reference row
-                         */
-                        vec1 = vldrwq_f32(pInT1);
-                        vec2 = vldrwq_f32(pPRT_in);
-                        vec1 = vfmsq_f32(vec1, tmpV, vec2);
-                        vstrwq_f32(pInT1, vec1);
-                        pPRT_in += 4;
-                        pInT1 += 4;
-                        /*
-                         * Decrement the blockSize loop counter
-                         */
-                        blkCnt--;
-                    }
-                    /*
-                     * tail
-                     * (will be merged thru tail predication)
-                     */
-                    blkCnt = (numCols - l) & 3;
-                    if (blkCnt > 0U)
-                    {
-                        f32x4_t vec1, vec2;
-                        mve_pred16_t p0 = vctp32q(blkCnt);
-
-                        vec1 = vldrwq_f32(pInT1);
-                        vec2 = vldrwq_f32(pPRT_in);
-                        vec1 = vfmsq_f32(vec1, tmpV, vec2);
-                        vstrwq_p_f32(pInT1, vec1, p0);
-                        pInT1 += blkCnt;
-                    }
-
-                    blkCnt = numCols >> 2;
-                    while (blkCnt > 0U)
-                    {
-                        f32x4_t vec1, vec2;
-
-                        /*
-                         * Replace the element by the sum of that row
-                         * and a multiple of the reference row
-                         */
-                        vec1 = vldrwq_f32(pOutT1);
-                        vec2 = vldrwq_f32(pPRT_pDst);
-                        vec1 = vfmsq_f32(vec1, tmpV, vec2);
-                        vstrwq_f32(pOutT1, vec1);
-                        pPRT_pDst += 4;
-                        pOutT1 += 4;
-                        /*
-                         * Decrement the blockSize loop counter
-                         */
-                        blkCnt--;
-                    }
-                    /*
-                     * tail
-                     * (will be merged thru tail predication)
-                     */
-                    blkCnt = numCols & 3;
-                    if (blkCnt > 0U)
-                    {
-                        f32x4_t vec1, vec2;
-                        mve_pred16_t p0 = vctp32q(blkCnt);
-
-                        vec1 = vldrwq_f32(pOutT1);
-                        vec2 = vldrwq_f32(pPRT_pDst);
-                        vec1 = vfmsq_f32(vec1, tmpV, vec2);
-                        vstrwq_p_f32(pOutT1, vec1, p0);
-
-                        pInT2 += blkCnt;
-                        pOutT1 += blkCnt;
-                    }
-                }
-                /*
-                 * Increment the temporary input pointer
-                 */
-                pInT1 = pInT1 + l;
-            }
-            /*
-             * Increment the input pointer
-             */
-            pIn++;
-            /*
-             * Decrement the loop counter
-             */
-            loopCnt--;
-            /*
-             * Increment the index modifier
-             */
-            l++;
-        }
-
-        /*
-         * Set status as ARM_MATH_SUCCESS
-         */
-        status = ARM_MATH_SUCCESS;
-
-        if ((flag != 1U) && (in == 0.0f))
-        {
-            pIn = pSrc->pData;
-            for (i = 0; i < numRows * numCols; i++)
-            {
-                if (pIn[i] != 0.0f)
-                    break;
-            }
-
-            if (i == numRows * numCols)
-                status = ARM_MATH_SINGULAR;
-        }
-  }
-  /* Return to application */
-  return (status);
-}
-
-#else
-#if defined(ARM_MATH_NEON)
-arm_status arm_mat_inverse_f32(
-  const arm_matrix_instance_f32 * pSrc,
-  arm_matrix_instance_f32 * pDst)
-{
-  float32_t *pIn = pSrc->pData;                  /* input data matrix pointer */
-  float32_t *pOut = pDst->pData;                 /* output data matrix pointer */
-  float32_t *pInT1, *pInT2;                      /* Temporary input data matrix pointer */
-  float32_t *pOutT1, *pOutT2;                    /* Temporary output data matrix pointer */
-  float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst;  /* Temporary input and output data matrix pointer */
-  uint32_t numRows = pSrc->numRows;              /* Number of rows in the matrix  */
-  uint32_t numCols = pSrc->numCols;              /* Number of Cols in the matrix  */
-
-
-  float32_t Xchg, in = 0.0f, in1;                /* Temporary input values  */
-  uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l;      /* loop counters */
-  arm_status status;                             /* status of matrix inverse */
-  float32x4_t vec1;
-  float32x4_t vec2;
-  float32x4_t tmpV;
-
-#ifdef ARM_MATH_MATRIX_CHECK
-
-  /* Check for matrix mismatch condition */
-  if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols)
-     || (pSrc->numRows != pDst->numRows))
-  {
-    /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
-  }
-  else
-#endif /*    #ifdef ARM_MATH_MATRIX_CHECK    */
-
-  {
-   /*--------------------------------------------------------------------------------------------------------------
-   * Matrix Inverse can be solved using elementary row operations.
-   *
-   *  Gauss-Jordan Method:
-   *
-   *     1. First combine the identity matrix and the input matrix separated by a bar to form an
-   *        augmented matrix as follows:
-   *              _                  _         _         _
-   *             |  a11  a12 | 1   0  |       |  X11 X12  |
-   *             |           |        |   =   |           |
-   *             |_ a21  a22 | 0   1 _|       |_ X21 X21 _|
-   *
-   *    2. In our implementation, pDst Matrix is used as identity matrix.
-   *
-   *    3. Begin with the first row. Let i = 1.
-   *
-   *    4. Check to see if the pivot for row i is zero.
-   *       The pivot is the element of the main diagonal that is on the current row.
-   *       For instance, if working with row i, then the pivot element is aii.
-   *       If the pivot is zero, exchange that row with a row below it that does not
-   *       contain a zero in column i. If this is not possible, then an inverse
-   *       to that matrix does not exist.
-   *
-   *      5. Divide every element of row i by the pivot.
-   *
-   *      6. For every row below and  row i, replace that row with the sum of that row and
-   *       a multiple of row i so that each new element in column i below row i is zero.
-   *
-   *      7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
-   *       for every element below and above the main diagonal.
-   *
-   *    8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc).
-   *       Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst).
-   *----------------------------------------------------------------------------------------------------------------*/
-
-    /* Working pointer for destination matrix */
-    pOutT1 = pOut;
-
-    /* Loop over the number of rows */
-    rowCnt = numRows;
-
-    /* Making the destination matrix as identity matrix */
-    while (rowCnt > 0U)
-    {
-      /* Writing all zeroes in lower triangle of the destination matrix */
-      j = numRows - rowCnt;
-      while (j > 0U)
-      {
-        *pOutT1++ = 0.0f;
-        j--;
-      }
-
-      /* Writing all ones in the diagonal of the destination matrix */
-      *pOutT1++ = 1.0f;
-
-      /* Writing all zeroes in upper triangle of the destination matrix */
-      j = rowCnt - 1U;
-
-      while (j > 0U)
-      {
-        *pOutT1++ = 0.0f;
-        j--;
-      }
-
-      /* Decrement the loop counter */
-      rowCnt--;
-    }
-
-    /* Loop over the number of columns of the input matrix.
-       All the elements in each column are processed by the row operations */
-    loopCnt = numCols;
-
-    /* Index modifier to navigate through the columns */
-    l = 0U;
-
-    while (loopCnt > 0U)
-    {
-      /* Check if the pivot element is zero..
-       * If it is zero then interchange the row with non zero row below.
-       * If there is no non zero element to replace in the rows below,
-       * then the matrix is Singular. */
-
-      /* Working pointer for the input matrix that points
-       * to the pivot element of the particular row  */
-      pInT1 = pIn + (l * numCols);
-
-      /* Working pointer for the destination matrix that points
-       * to the pivot element of the particular row  */
-      pOutT1 = pOut + (l * numCols);
-
-      /* Temporary variable to hold the pivot value */
-      in = *pInT1;
-
-    
-      /* Destination pointer modifier */
-      k = 1U;
-
-      /* Check if the pivot element is zero */
-      if (*pInT1 == 0.0f)
-      {
-        /* Loop over the number rows present below */
-        for (i = (l + 1U); i < numRows; i++)
-        {
-          /* Update the input and destination pointers */
-          pInT2 = pInT1 + (numCols * i);
-          pOutT2 = pOutT1 + (numCols * k);
-
-          /* Check if there is a non zero pivot element to
-           * replace in the rows below */
-          if (*pInT2 != 0.0f)
-          {
-            /* Loop over number of columns
-             * to the right of the pilot element */
-            j = numCols - l;
-
-            while (j > 0U)
-            {
-              /* Exchange the row elements of the input matrix */
-              Xchg = *pInT2;
-              *pInT2++ = *pInT1;
-              *pInT1++ = Xchg;
-
-              /* Decrement the loop counter */
-              j--;
-            }
-
-            /* Loop over number of columns of the destination matrix */
-            j = numCols;
-
-            while (j > 0U)
-            {
-              /* Exchange the row elements of the destination matrix */
-              Xchg = *pOutT2;
-              *pOutT2++ = *pOutT1;
-              *pOutT1++ = Xchg;
-
-              /* Decrement the loop counter */
-              j--;
-            }
-
-            /* Flag to indicate whether exchange is done or not */
-            flag = 1U;
-
-            /* Break after exchange is done */
-            break;
-          }
-
-          /* Update the destination pointer modifier */
-          k++;
-        }
-      }
-
-      /* Update the status if the matrix is singular */
-      if ((flag != 1U) && (in == 0.0f))
-      {
-        return ARM_MATH_SINGULAR;
-      }
-
-      /* Points to the pivot row of input and destination matrices */
-      pPivotRowIn = pIn + (l * numCols);
-      pPivotRowDst = pOut + (l * numCols);
-
-      /* Temporary pointers to the pivot row pointers */
-      pInT1 = pPivotRowIn;
-      pInT2 = pPivotRowDst;
-
-      /* Pivot element of the row */
-      in = *pPivotRowIn;
-      tmpV = vdupq_n_f32(1.0f/in);
-
-      /* Loop over number of columns
-       * to the right of the pilot element */
-      j = (numCols - l) >> 2;
-
-      while (j > 0U)
-      {
-        /* Divide each element of the row of the input matrix
-         * by the pivot element */
-        vec1 = vld1q_f32(pInT1);
-
-        vec1 = vmulq_f32(vec1, tmpV);
-        vst1q_f32(pInT1, vec1);
-        pInT1 += 4;
-
-        /* Decrement the loop counter */
-        j--;
-      }
-
-      /* Tail */
-      j = (numCols - l) & 3;
-
-      while (j > 0U)
-      {
-        /* Divide each element of the row of the input matrix
-         * by the pivot element */
-        in1 = *pInT1;
-        *pInT1++ = in1 / in;
-
-        /* Decrement the loop counter */
-        j--;
-      }
-
-      /* Loop over number of columns of the destination matrix */
-      j = numCols >> 2;
-
-      while (j > 0U)
-      {
-        /* Divide each element of the row of the destination matrix
-         * by the pivot element */
-        vec1 = vld1q_f32(pInT2);
-
-        vec1 = vmulq_f32(vec1, tmpV);
-        vst1q_f32(pInT2, vec1);
-        pInT2 += 4;
-      
-        /* Decrement the loop counter */
-        j--;
-      }
-
-      /* Tail */
-      j = numCols & 3;
-
-      while (j > 0U)
-      {
-        /* Divide each element of the row of the destination matrix
-         * by the pivot element */
-        in1 = *pInT2;
-        *pInT2++ = in1 / in;
-
-        /* Decrement the loop counter */
-        j--;
-      }
-
-      /* Replace the rows with the sum of that row and a multiple of row i
-       * so that each new element in column i above row i is zero.*/
-
-      /* Temporary pointers for input and destination matrices */
-      pInT1 = pIn;
-      pInT2 = pOut;
-
-      /* index used to check for pivot element */
-      i = 0U;
-
-      /* Loop over number of rows */
-      /*  to be replaced by the sum of that row and a multiple of row i */
-      k = numRows;
-
-      while (k > 0U)
-      {
-        /* Check for the pivot element */
-        if (i == l)
-        {
-          /* If the processing element is the pivot element,
-             only the columns to the right are to be processed */
-          pInT1 += numCols - l;
-
-          pInT2 += numCols;
-        }
-        else
-        {
-          /* Element of the reference row */
-          in = *pInT1;
-          tmpV = vdupq_n_f32(in);
-
-          /* Working pointers for input and destination pivot rows */
-          pPRT_in = pPivotRowIn;
-          pPRT_pDst = pPivotRowDst;
-
-          /* Loop over the number of columns to the right of the pivot element,
-             to replace the elements in the input matrix */
-          j = (numCols - l) >> 2;
-	  
-          while (j > 0U)
-          {
-            /* Replace the element by the sum of that row
-               and a multiple of the reference row  */
-            vec1 = vld1q_f32(pInT1);
-            vec2 = vld1q_f32(pPRT_in);
-            vec1 = vmlsq_f32(vec1, tmpV, vec2);
-            vst1q_f32(pInT1, vec1);
-            pPRT_in += 4;
-            pInT1 += 4;
-
-            /* Decrement the loop counter */
-            j--;
-          }
-
-	  /* Tail */
-          j = (numCols - l) & 3;
-
-          while (j > 0U)
-          {
-            /* Replace the element by the sum of that row
-               and a multiple of the reference row  */
-            in1 = *pInT1;
-            *pInT1++ = in1 - (in * *pPRT_in++);
-
-            /* Decrement the loop counter */
-            j--;
-          }
-
-          /* Loop over the number of columns to
-             replace the elements in the destination matrix */
-          j = numCols >> 2;
-
-          while (j > 0U)
-          {
-            /* Replace the element by the sum of that row
-               and a multiple of the reference row  */
-            vec1 = vld1q_f32(pInT2);
-            vec2 = vld1q_f32(pPRT_pDst);
-            vec1 = vmlsq_f32(vec1, tmpV, vec2);
-            vst1q_f32(pInT2, vec1);
-            pPRT_pDst += 4;
-            pInT2 += 4;
-
-            /* Decrement the loop counter */
-            j--;
-          }
-
-	  /* Tail */
-          j = numCols & 3;
-
-          while (j > 0U)
-          {
-            /* Replace the element by the sum of that row
-               and a multiple of the reference row  */
-            in1 = *pInT2;
-            *pInT2++ = in1 - (in * *pPRT_pDst++);
-
-            /* Decrement the loop counter */
-            j--;
-          }
-
-        }
-
-        /* Increment the temporary input pointer */
-        pInT1 = pInT1 + l;
-
-        /* Decrement the loop counter */
-        k--;
-
-        /* Increment the pivot index */
-        i++;
-      }
-
-      /* Increment the input pointer */
-      pIn++;
-
-      /* Decrement the loop counter */
-      loopCnt--;
-
-      /* Increment the index modifier */
-      l++;
-    }
-
-    /* Set status as ARM_MATH_SUCCESS */
-    status = ARM_MATH_SUCCESS;
-
-    if ((flag != 1U) && (in == 0.0f))
-    {
-      pIn = pSrc->pData;
-      for (i = 0; i < numRows * numCols; i++)
-      {
-        if (pIn[i] != 0.0f)
-            break;
-      }
-
-      if (i == numRows * numCols)
-        status = ARM_MATH_SINGULAR;
-    }
-  }
-  /* Return to application */
-  return (status);
-}
-#else
 arm_status arm_mat_inverse_f32(
   const arm_matrix_instance_f32 * pSrc,
         arm_matrix_instance_f32 * pDst)
 {
   float32_t *pIn = pSrc->pData;                  /* input data matrix pointer */
   float32_t *pOut = pDst->pData;                 /* output data matrix pointer */
-  float32_t *pInT1, *pInT2;                      /* Temporary input data matrix pointer */
-  float32_t *pOutT1, *pOutT2;                    /* Temporary output data matrix pointer */
-  float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst;  /* Temporary input and output data matrix pointer */
+  
+  float32_t *pTmp;
   uint32_t numRows = pSrc->numRows;              /* Number of rows in the matrix  */
   uint32_t numCols = pSrc->numCols;              /* Number of Cols in the matrix  */
 
-#if defined (ARM_MATH_DSP)
 
-  float32_t Xchg, in = 0.0f, in1;                /* Temporary input values  */
-  uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l;      /* loop counters */
+  float32_t pivot = 0.0f, newPivot=0.0f;                /* Temporary input values  */
+  uint32_t selectedRow,pivotRow,i, rowNb, rowCnt, flag = 0U, j,column;      /* loop counters */
   arm_status status;                             /* status of matrix inverse */
 
 #ifdef ARM_MATH_MATRIX_CHECK
@@ -1017,7 +117,6 @@ arm_status arm_mat_inverse_f32(
 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
 
   {
-
     /*--------------------------------------------------------------------------------------------------------------
      * Matrix Inverse can be solved using elementary row operations.
      *
@@ -1054,7 +153,7 @@ arm_status arm_mat_inverse_f32(
      *----------------------------------------------------------------------------------------------------------------*/
 
     /* Working pointer for destination matrix */
-    pOutT1 = pOut;
+    pTmp = pOut;
 
     /* Loop over the number of rows */
     rowCnt = numRows;
@@ -1066,18 +165,18 @@ arm_status arm_mat_inverse_f32(
       j = numRows - rowCnt;
       while (j > 0U)
       {
-        *pOutT1++ = 0.0f;
+        *pTmp++ = 0.0f;
         j--;
       }
 
       /* Writing all ones in the diagonal of the destination matrix */
-      *pOutT1++ = 1.0f;
+      *pTmp++ = 1.0f;
 
       /* Writing all zeroes in upper triangle of the destination matrix */
       j = rowCnt - 1U;
       while (j > 0U)
       {
-        *pOutT1++ = 0.0f;
+        *pTmp++ = 0.0f;
         j--;
       }
 
@@ -1087,486 +186,100 @@ arm_status arm_mat_inverse_f32(
 
     /* Loop over the number of columns of the input matrix.
        All the elements in each column are processed by the row operations */
-    loopCnt = numCols;
 
     /* Index modifier to navigate through the columns */
-    l = 0U;
-
-    while (loopCnt > 0U)
+    for(column = 0U; column < numCols; column++)
     {
       /* Check if the pivot element is zero..
        * If it is zero then interchange the row with non zero row below.
        * If there is no non zero element to replace in the rows below,
        * then the matrix is Singular. */
 
-      /* Working pointer for the input matrix that points
-       * to the pivot element of the particular row  */
-      pInT1 = pIn + (l * numCols);
-
-      /* Working pointer for the destination matrix that points
-       * to the pivot element of the particular row  */
-      pOutT1 = pOut + (l * numCols);
+      pivotRow = column;
 
       /* Temporary variable to hold the pivot value */
-      in = *pInT1;
+      pTmp = ELEM(pSrc,column,column) ;
+      pivot = *pTmp;
+      selectedRow = column;
 
+      /* Find maximum pivot in column */
       
-      /* Destination pointer modifier */
-      k = 1U;
-
-      /* Check if the pivot element is zero */
-      if (*pInT1 == 0.0f)
-      {
         /* Loop over the number rows present below */
 
-        for (i = (l + 1U); i < numRows; i++)
-        {
-          /* Update the input and destination pointers */
-          pInT2 = pInT1 + (numCols * i);
-          pOutT2 = pOutT1 + (numCols * k);
-
-          /* Check if there is a non zero pivot element to
-           * replace in the rows below */
-          if (*pInT2 != 0.0f)
-          {
-            /* Loop over number of columns
-             * to the right of the pilot element */
-            j = numCols - l;
-
-            while (j > 0U)
-            {
-              /* Exchange the row elements of the input matrix */
-              Xchg = *pInT2;
-              *pInT2++ = *pInT1;
-              *pInT1++ = Xchg;
-
-              /* Decrement the loop counter */
-              j--;
-            }
-
-            /* Loop over number of columns of the destination matrix */
-            j = numCols;
-
-            while (j > 0U)
-            {
-              /* Exchange the row elements of the destination matrix */
-              Xchg = *pOutT2;
-              *pOutT2++ = *pOutT1;
-              *pOutT1++ = Xchg;
-
-              /* Decrement loop counter */
-              j--;
-            }
-
-            /* Flag to indicate whether exchange is done or not */
-            flag = 1U;
-
-            /* Break after exchange is done */
-            break;
-          }
-
-          /* Update the destination pointer modifier */
-          k++;
-
-          /* Decrement loop counter */
-        }
-      }
-
-      /* Update the status if the matrix is singular */
-      if ((flag != 1U) && (in == 0.0f))
-      {
-        return ARM_MATH_SINGULAR;
-      }
-
-      /* Points to the pivot row of input and destination matrices */
-      pPivotRowIn = pIn + (l * numCols);
-      pPivotRowDst = pOut + (l * numCols);
-
-      /* Temporary pointers to the pivot row pointers */
-      pInT1 = pPivotRowIn;
-      pInT2 = pPivotRowDst;
-
-      /* Pivot element of the row */
-      in = *pPivotRowIn;
-
-      /* Loop over number of columns
-       * to the right of the pilot element */
-      j = (numCols - l);
-
-      while (j > 0U)
-      {
-        /* Divide each element of the row of the input matrix
-         * by the pivot element */
-        in1 = *pInT1;
-        *pInT1++ = in1 / in;
-
-        /* Decrement the loop counter */
-        j--;
-      }
-
-      /* Loop over number of columns of the destination matrix */
-      j = numCols;
-
-      while (j > 0U)
-      {
-        /* Divide each element of the row of the destination matrix
-         * by the pivot element */
-        in1 = *pInT2;
-        *pInT2++ = in1 / in;
-
-        /* Decrement the loop counter */
-        j--;
-      }
-
-      /* Replace the rows with the sum of that row and a multiple of row i
-       * so that each new element in column i above row i is zero.*/
-
-      /* Temporary pointers for input and destination matrices */
-      pInT1 = pIn;
-      pInT2 = pOut;
-
-      /* index used to check for pivot element */
-      i = 0U;
-
-      /* Loop over number of rows */
-      /*  to be replaced by the sum of that row and a multiple of row i */
-      k = numRows;
-
-      while (k > 0U)
+      for (rowNb = column+1; rowNb < numRows; rowNb++)
       {
-        /* Check for the pivot element */
-        if (i == l)
-        {
-          /* If the processing element is the pivot element,
-             only the columns to the right are to be processed */
-          pInT1 += numCols - l;
-
-          pInT2 += numCols;
-        }
-        else
-        {
-          /* Element of the reference row */
-          in = *pInT1;
-
-          /* Working pointers for input and destination pivot rows */
-          pPRT_in = pPivotRowIn;
-          pPRT_pDst = pPivotRowDst;
-
-          /* Loop over the number of columns to the right of the pivot element,
-             to replace the elements in the input matrix */
-          j = (numCols - l);
-
-          while (j > 0U)
-          {
-            /* Replace the element by the sum of that row
-               and a multiple of the reference row  */
-            in1 = *pInT1;
-            *pInT1++ = in1 - (in * *pPRT_in++);
-
-            /* Decrement the loop counter */
-            j--;
-          }
-
-          /* Loop over the number of columns to
-             replace the elements in the destination matrix */
-          j = numCols;
-
-          while (j > 0U)
+          /* Update the input and destination pointers */
+          pTmp = ELEM(pSrc,rowNb,column);
+          newPivot = *pTmp;
+          if (fabsf(newPivot) > fabsf(pivot))
           {
-            /* Replace the element by the sum of that row
-               and a multiple of the reference row  */
-            in1 = *pInT2;
-            *pInT2++ = in1 - (in * *pPRT_pDst++);
-
-            /* Decrement loop counter */
-            j--;
+            selectedRow = rowNb; 
+            pivot = newPivot;
           }
-
-        }
-
-        /* Increment temporary input pointer */
-        pInT1 = pInT1 + l;
-
-        /* Decrement loop counter */
-        k--;
-
-        /* Increment pivot index */
-        i++;
-      }
-
-      /* Increment the input pointer */
-      pIn++;
-
-      /* Decrement the loop counter */
-      loopCnt--;
-
-      /* Increment the index modifier */
-      l++;
-    }
-
-
-#else
-
-  float32_t Xchg, in = 0.0f;                     /* Temporary input values  */
-  uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l;      /* loop counters */
-  arm_status status;                             /* status of matrix inverse */
-
-#ifdef ARM_MATH_MATRIX_CHECK
-
-  /* Check for matrix mismatch condition */
-  if ((pSrc->numRows != pSrc->numCols) ||
-      (pDst->numRows != pDst->numCols) ||
-      (pSrc->numRows != pDst->numRows)   )
-  {
-    /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
-  }
-  else
-
-#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
-
-  {
-
-    /*--------------------------------------------------------------------------------------------------------------
-     * Matrix Inverse can be solved using elementary row operations.
-     *
-     *  Gauss-Jordan Method:
-     *
-     *      1. First combine the identity matrix and the input matrix separated by a bar to form an
-     *        augmented matrix as follows:
-     *                      _  _          _     _      _   _         _         _
-     *                     |  |  a11  a12  | | | 1   0  |   |       |  X11 X12  |
-     *                     |  |            | | |        |   |   =   |           |
-     *                     |_ |_ a21  a22 _| | |_0   1 _|  _|       |_ X21 X21 _|
-     *
-     *      2. In our implementation, pDst Matrix is used as identity matrix.
-     *
-     *      3. Begin with the first row. Let i = 1.
-     *
-     *      4. Check to see if the pivot for row i is zero.
-     *         The pivot is the element of the main diagonal that is on the current row.
-     *         For instance, if working with row i, then the pivot element is aii.
-     *         If the pivot is zero, exchange that row with a row below it that does not
-     *         contain a zero in column i. If this is not possible, then an inverse
-     *         to that matrix does not exist.
-     *
-     *      5. Divide every element of row i by the pivot.
-     *
-     *      6. For every row below and  row i, replace that row with the sum of that row and
-     *         a multiple of row i so that each new element in column i below row i is zero.
-     *
-     *      7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
-     *         for every element below and above the main diagonal.
-     *
-     *      8. Now an identical matrix is formed to the left of the bar(input matrix, src).
-     *         Therefore, the matrix to the right of the bar is our solution(dst matrix, dst).
-     *----------------------------------------------------------------------------------------------------------------*/
-
-    /* Working pointer for destination matrix */
-    pOutT1 = pOut;
-
-    /* Loop over the number of rows */
-    rowCnt = numRows;
-
-    /* Making the destination matrix as identity matrix */
-    while (rowCnt > 0U)
-    {
-      /* Writing all zeroes in lower triangle of the destination matrix */
-      j = numRows - rowCnt;
-      while (j > 0U)
-      {
-        *pOutT1++ = 0.0f;
-        j--;
       }
-
-      /* Writing all ones in the diagonal of the destination matrix */
-      *pOutT1++ = 1.0f;
-
-      /* Writing all zeroes in upper triangle of the destination matrix */
-      j = rowCnt - 1U;
-      while (j > 0U)
+        
+      /* Check if there is a non zero pivot element to
+       * replace in the rows below */
+      if ((pivot != 0.0f) && (selectedRow != column))
       {
-        *pOutT1++ = 0.0f;
-        j--;
-      }
-
-      /* Decrement loop counter */
-      rowCnt--;
-    }
-
-    /* Loop over the number of columns of the input matrix.
-       All the elements in each column are processed by the row operations */
-    loopCnt = numCols;
-
-    /* Index modifier to navigate through the columns */
-    l = 0U;
-
-    while (loopCnt > 0U)
-    {
-      /* Check if the pivot element is zero..
-       * If it is zero then interchange the row with non zero row below.
-       * If there is no non zero element to replace in the rows below,
-       * then the matrix is Singular. */
-
-      /* Working pointer for the input matrix that points
-       * to the pivot element of the particular row  */
-      pInT1 = pIn + (l * numCols);
-
-      /* Working pointer for the destination matrix that points
-       * to the pivot element of the particular row  */
-      pOutT1 = pOut + (l * numCols);
-
-      /* Temporary variable to hold the pivot value */
-      in = *pInT1;
-
-      /* Destination pointer modifier */
-      k = 1U;
-
-      /* Check if the pivot element is zero */
-      if (*pInT1 == 0.0f)
-      {
-        /* Loop over the number rows present below */
-        for (i = (l + 1U); i < numRows; i++)
-        {
-          /* Update the input and destination pointers */
-          pInT2 = pInT1 + (numCols * i);
-          pOutT2 = pOutT1 + (numCols * k);
-
-          /* Check if there is a non zero pivot element to
-           * replace in the rows below */
-          if (*pInT2 != 0.0f)
-          {
-            /* Loop over number of columns
-             * to the right of the pilot element */
-            for (j = 0U; j < (numCols - l); j++)
-            {
-              /* Exchange the row elements of the input matrix */
-              Xchg = *pInT2;
-              *pInT2++ = *pInT1;
-              *pInT1++ = Xchg;
-            }
-
-            for (j = 0U; j < numCols; j++)
-            {
-              Xchg = *pOutT2;
-              *pOutT2++ = *pOutT1;
-              *pOutT1++ = Xchg;
-            }
+            
+            SWAP_ROWS_F32(pSrc,column, pivotRow,selectedRow);
+            SWAP_ROWS_F32(pDst,0, pivotRow,selectedRow);
 
+    
             /* Flag to indicate whether exchange is done or not */
             flag = 1U;
+       }
 
-            /* Break after exchange is done */
-            break;
-          }
 
-          /* Update the destination pointer modifier */
-          k++;
-        }
-      }
+      
+      
 
       /* Update the status if the matrix is singular */
-      if ((flag != 1U) && (in == 0.0f))
+      if ((flag != 1U) && (pivot == 0.0f))
       {
         return ARM_MATH_SINGULAR;
       }
 
-      /* Points to the pivot row of input and destination matrices */
-      pPivotRowIn = pIn + (l * numCols);
-      pPivotRowDst = pOut + (l * numCols);
-
-      /* Temporary pointers to the pivot row pointers */
-      pInT1 = pPivotRowIn;
-      pOutT1 = pPivotRowDst;
-
+     
       /* Pivot element of the row */
-      in = *(pIn + (l * numCols));
+      pivot = 1.0f / pivot;
 
-      /* Loop over number of columns
-       * to the right of the pilot element */
-      for (j = 0U; j < (numCols - l); j++)
-      {
-        /* Divide each element of the row of the input matrix
-         * by the pivot element */
-        *pInT1 = *pInT1 / in;
-        pInT1++;
-      }
-      for (j = 0U; j < numCols; j++)
-      {
-        /* Divide each element of the row of the destination matrix
-         * by the pivot element */
-        *pOutT1 = *pOutT1 / in;
-        pOutT1++;
-      }
+      SCALE_ROW_F32(pSrc,column,pivot,pivotRow);
+      SCALE_ROW_F32(pDst,0,pivot,pivotRow);
 
+      
       /* Replace the rows with the sum of that row and a multiple of row i
        * so that each new element in column i above row i is zero.*/
 
-      /* Temporary pointers for input and destination matrices */
-      pInT1 = pIn;
-      pOutT1 = pOut;
-
-      for (i = 0U; i < numRows; i++)
+      rowNb = 0;
+      for (;rowNb < pivotRow; rowNb++)
       {
-        /* Check for the pivot element */
-        if (i == l)
-        {
-          /* If the processing element is the pivot element,
-             only the columns to the right are to be processed */
-          pInT1 += numCols - l;
-          pOutT1 += numCols;
-        }
-        else
-        {
-          /* Element of the reference row */
-          in = *pInT1;
-
-          /* Working pointers for input and destination pivot rows */
-          pPRT_in = pPivotRowIn;
-          pPRT_pDst = pPivotRowDst;
-
-          /* Loop over the number of columns to the right of the pivot element,
-             to replace the elements in the input matrix */
-          for (j = 0U; j < (numCols - l); j++)
-          {
-            /* Replace the element by the sum of that row
-               and a multiple of the reference row  */
-            *pInT1 = *pInT1 - (in * *pPRT_in++);
-            pInT1++;
-          }
+           pTmp = ELEM(pSrc,rowNb,column) ;
+           pivot = *pTmp;
 
-          /* Loop over the number of columns to
-             replace the elements in the destination matrix */
-          for (j = 0U; j < numCols; j++)
-          {
-            /* Replace the element by the sum of that row
-               and a multiple of the reference row  */
-            *pOutT1 = *pOutT1 - (in * *pPRT_pDst++);
-            pOutT1++;
-          }
+           MAS_ROW_F32(column,pSrc,rowNb,pivot,pSrc,pivotRow);
+           MAS_ROW_F32(0     ,pDst,rowNb,pivot,pDst,pivotRow);
 
-        }
 
-        /* Increment temporary input pointer */
-        pInT1 = pInT1 + l;
       }
 
-      /* Increment the input pointer */
-      pIn++;
+      for (rowNb = pivotRow + 1; rowNb < numRows; rowNb++)
+      {
+           pTmp = ELEM(pSrc,rowNb,column) ;
+           pivot = *pTmp;
 
-      /* Decrement the loop counter */
-      loopCnt--;
+           MAS_ROW_F32(column,pSrc,rowNb,pivot,pSrc,pivotRow);
+           MAS_ROW_F32(0     ,pDst,rowNb,pivot,pDst,pivotRow);
 
-      /* Increment the index modifier */
-      l++;
-    }
+      }
 
-#endif /* #if defined (ARM_MATH_DSP) */
+    }
 
     /* Set status as ARM_MATH_SUCCESS */
     status = ARM_MATH_SUCCESS;
 
-    if ((flag != 1U) && (in == 0.0f))
+    if ((flag != 1U) && (pivot == 0.0f))
     {
       pIn = pSrc->pData;
       for (i = 0; i < numRows * numCols; i++)
@@ -1583,9 +296,6 @@ arm_status arm_mat_inverse_f32(
   /* Return to application */
   return (status);
 }
-#endif /* #if defined(ARM_MATH_NEON) */
-#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
-
 /**
   @} end of MatrixInv group
  */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f64.c
index 73bbbee..9b13e3b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_inverse_f64.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_inverse_f64.c
  * Description:  Floating-point matrix inverse
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -29,6 +29,7 @@
  */
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h"
 
 /**
   @ingroup groupMatrix
@@ -56,16 +57,14 @@ arm_status arm_mat_inverse_f64(
 {
   float64_t *pIn = pSrc->pData;                  /* input data matrix pointer */
   float64_t *pOut = pDst->pData;                 /* output data matrix pointer */
-  float64_t *pInT1, *pInT2;                      /* Temporary input data matrix pointer */
-  float64_t *pOutT1, *pOutT2;                    /* Temporary output data matrix pointer */
-  float64_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst;  /* Temporary input and output data matrix pointer */
+  
+  float64_t *pTmp;
   uint32_t numRows = pSrc->numRows;              /* Number of rows in the matrix  */
   uint32_t numCols = pSrc->numCols;              /* Number of Cols in the matrix  */
 
-#if defined (ARM_MATH_DSP)
 
-  float64_t Xchg, in = 0.0, in1;                /* Temporary input values  */
-  uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l;      /* loop counters */
+  float64_t pivot = 0.0, newPivot=0.0;                /* Temporary input values  */
+  uint32_t selectedRow,pivotRow,i, rowNb, rowCnt, flag = 0U, j,column;      /* loop counters */
   arm_status status;                             /* status of matrix inverse */
 
 #ifdef ARM_MATH_MATRIX_CHECK
@@ -83,7 +82,6 @@ arm_status arm_mat_inverse_f64(
 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
 
   {
-
     /*--------------------------------------------------------------------------------------------------------------
      * Matrix Inverse can be solved using elementary row operations.
      *
@@ -120,7 +118,7 @@ arm_status arm_mat_inverse_f64(
      *----------------------------------------------------------------------------------------------------------------*/
 
     /* Working pointer for destination matrix */
-    pOutT1 = pOut;
+    pTmp = pOut;
 
     /* Loop over the number of rows */
     rowCnt = numRows;
@@ -132,18 +130,18 @@ arm_status arm_mat_inverse_f64(
       j = numRows - rowCnt;
       while (j > 0U)
       {
-        *pOutT1++ = 0.0;
+        *pTmp++ = 0.0;
         j--;
       }
 
       /* Writing all ones in the diagonal of the destination matrix */
-      *pOutT1++ = 1.0;
+      *pTmp++ = 1.0;
 
       /* Writing all zeroes in upper triangle of the destination matrix */
       j = rowCnt - 1U;
       while (j > 0U)
       {
-        *pOutT1++ = 0.0;
+        *pTmp++ = 0.0;
         j--;
       }
 
@@ -153,485 +151,99 @@ arm_status arm_mat_inverse_f64(
 
     /* Loop over the number of columns of the input matrix.
        All the elements in each column are processed by the row operations */
-    loopCnt = numCols;
 
     /* Index modifier to navigate through the columns */
-    l = 0U;
-
-    while (loopCnt > 0U)
+    for(column = 0U; column < numCols; column++)
     {
       /* Check if the pivot element is zero..
        * If it is zero then interchange the row with non zero row below.
        * If there is no non zero element to replace in the rows below,
        * then the matrix is Singular. */
 
-      /* Working pointer for the input matrix that points
-       * to the pivot element of the particular row  */
-      pInT1 = pIn + (l * numCols);
-
-      /* Working pointer for the destination matrix that points
-       * to the pivot element of the particular row  */
-      pOutT1 = pOut + (l * numCols);
+      pivotRow = column;
 
       /* Temporary variable to hold the pivot value */
-      in = *pInT1;
+      pTmp = ELEM(pSrc,column,column) ;
+      pivot = *pTmp;
+      selectedRow = column;
 
-      /* Destination pointer modifier */
-      k = 1U;
+      
+        /* Loop over the number rows present below */
 
-      /* Check if the pivot element is zero */
-      if (*pInT1 == 0.0)
+      for (rowNb = column+1; rowNb < numRows; rowNb++)
       {
-        /* Loop over the number rows present below */
-        for (i = (l + 1U); i < numRows; i++)
-        {
           /* Update the input and destination pointers */
-          pInT2 = pInT1 + (numCols * i);
-          pOutT2 = pOutT1 + (numCols * k);
+          pTmp = ELEM(pSrc,rowNb,column);
+          newPivot = *pTmp;
+          if (fabs(newPivot) > fabs(pivot))
+          {
+            selectedRow = rowNb; 
+            pivot = newPivot;
+          }
+      }
 
           /* Check if there is a non zero pivot element to
            * replace in the rows below */
-          if (*pInT2 != 0.0)
-          {
+      if ((pivot != 0.0) && (selectedRow != column))
+      {
             /* Loop over number of columns
              * to the right of the pilot element */
-            j = numCols - l;
-
-            while (j > 0U)
-            {
-              /* Exchange the row elements of the input matrix */
-              Xchg = *pInT2;
-              *pInT2++ = *pInT1;
-              *pInT1++ = Xchg;
-
-              /* Decrement the loop counter */
-              j--;
-            }
 
-            /* Loop over number of columns of the destination matrix */
-            j = numCols;
-
-            while (j > 0U)
-            {
-              /* Exchange the row elements of the destination matrix */
-              Xchg = *pOutT2;
-              *pOutT2++ = *pOutT1;
-              *pOutT1++ = Xchg;
-
-              /* Decrement loop counter */
-              j--;
-            }
+            SWAP_ROWS_F64(pSrc,column, pivotRow,selectedRow);
+            SWAP_ROWS_F64(pDst,0, pivotRow,selectedRow);
 
+    
             /* Flag to indicate whether exchange is done or not */
             flag = 1U;
 
-            /* Break after exchange is done */
-            break;
-          }
-
-          /* Update the destination pointer modifier */
-          k++;
-
-          /* Decrement loop counter */
-          i--;
-        }
       }
 
+
       /* Update the status if the matrix is singular */
-      if ((flag != 1U) && (in == 0.0))
+      if ((flag != 1U) && (pivot == 0.0))
       {
         return ARM_MATH_SINGULAR;
       }
 
-      /* Points to the pivot row of input and destination matrices */
-      pPivotRowIn = pIn + (l * numCols);
-      pPivotRowDst = pOut + (l * numCols);
-
-      /* Temporary pointers to the pivot row pointers */
-      pInT1 = pPivotRowIn;
-      pInT2 = pPivotRowDst;
-
+     
       /* Pivot element of the row */
-      in = *pPivotRowIn;
+      pivot = 1.0 / pivot;
 
-      /* Loop over number of columns
-       * to the right of the pilot element */
-      j = (numCols - l);
-
-      while (j > 0U)
-      {
-        /* Divide each element of the row of the input matrix
-         * by the pivot element */
-        in1 = *pInT1;
-        *pInT1++ = in1 / in;
-
-        /* Decrement the loop counter */
-        j--;
-      }
-
-      /* Loop over number of columns of the destination matrix */
-      j = numCols;
-
-      while (j > 0U)
-      {
-        /* Divide each element of the row of the destination matrix
-         * by the pivot element */
-        in1 = *pInT2;
-        *pInT2++ = in1 / in;
-
-        /* Decrement the loop counter */
-        j--;
-      }
+      SCALE_ROW_F64(pSrc,column,pivot,pivotRow);
+      SCALE_ROW_F64(pDst,0,pivot,pivotRow);
 
+      
       /* Replace the rows with the sum of that row and a multiple of row i
        * so that each new element in column i above row i is zero.*/
 
-      /* Temporary pointers for input and destination matrices */
-      pInT1 = pIn;
-      pInT2 = pOut;
-
-      /* index used to check for pivot element */
-      i = 0U;
-
-      /* Loop over number of rows */
-      /*  to be replaced by the sum of that row and a multiple of row i */
-      k = numRows;
-
-      while (k > 0U)
-      {
-        /* Check for the pivot element */
-        if (i == l)
-        {
-          /* If the processing element is the pivot element,
-             only the columns to the right are to be processed */
-          pInT1 += numCols - l;
-
-          pInT2 += numCols;
-        }
-        else
-        {
-          /* Element of the reference row */
-          in = *pInT1;
-
-          /* Working pointers for input and destination pivot rows */
-          pPRT_in = pPivotRowIn;
-          pPRT_pDst = pPivotRowDst;
-
-          /* Loop over the number of columns to the right of the pivot element,
-             to replace the elements in the input matrix */
-          j = (numCols - l);
-
-          while (j > 0U)
-          {
-            /* Replace the element by the sum of that row
-               and a multiple of the reference row  */
-            in1 = *pInT1;
-            *pInT1++ = in1 - (in * *pPRT_in++);
-
-            /* Decrement the loop counter */
-            j--;
-          }
-
-          /* Loop over the number of columns to
-             replace the elements in the destination matrix */
-          j = numCols;
-
-          while (j > 0U)
-          {
-            /* Replace the element by the sum of that row
-               and a multiple of the reference row  */
-            in1 = *pInT2;
-            *pInT2++ = in1 - (in * *pPRT_pDst++);
-
-            /* Decrement loop counter */
-            j--;
-          }
-
-        }
-
-        /* Increment temporary input pointer */
-        pInT1 = pInT1 + l;
-
-        /* Decrement loop counter */
-        k--;
-
-        /* Increment pivot index */
-        i++;
-      }
-
-      /* Increment the input pointer */
-      pIn++;
-
-      /* Decrement the loop counter */
-      loopCnt--;
-
-      /* Increment the index modifier */
-      l++;
-    }
-
-
-#else
-
-  float64_t Xchg, in = 0.0;                     /* Temporary input values  */
-  uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l;      /* loop counters */
-  arm_status status;                             /* status of matrix inverse */
-
-#ifdef ARM_MATH_MATRIX_CHECK
-
-  /* Check for matrix mismatch condition */
-  if ((pSrc->numRows != pSrc->numCols) ||
-      (pDst->numRows != pDst->numCols) ||
-      (pSrc->numRows != pDst->numRows)   )
-  {
-    /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
-  }
-  else
-
-#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
-
-  {
-
-    /*--------------------------------------------------------------------------------------------------------------
-     * Matrix Inverse can be solved using elementary row operations.
-     *
-     *  Gauss-Jordan Method:
-     *
-     *      1. First combine the identity matrix and the input matrix separated by a bar to form an
-     *        augmented matrix as follows:
-     *                      _  _          _     _      _   _         _         _
-     *                     |  |  a11  a12  | | | 1   0  |   |       |  X11 X12  |
-     *                     |  |            | | |        |   |   =   |           |
-     *                     |_ |_ a21  a22 _| | |_0   1 _|  _|       |_ X21 X21 _|
-     *
-     *      2. In our implementation, pDst Matrix is used as identity matrix.
-     *
-     *      3. Begin with the first row. Let i = 1.
-     *
-     *      4. Check to see if the pivot for row i is zero.
-     *         The pivot is the element of the main diagonal that is on the current row.
-     *         For instance, if working with row i, then the pivot element is aii.
-     *         If the pivot is zero, exchange that row with a row below it that does not
-     *         contain a zero in column i. If this is not possible, then an inverse
-     *         to that matrix does not exist.
-     *
-     *      5. Divide every element of row i by the pivot.
-     *
-     *      6. For every row below and  row i, replace that row with the sum of that row and
-     *         a multiple of row i so that each new element in column i below row i is zero.
-     *
-     *      7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
-     *         for every element below and above the main diagonal.
-     *
-     *      8. Now an identical matrix is formed to the left of the bar(input matrix, src).
-     *         Therefore, the matrix to the right of the bar is our solution(dst matrix, dst).
-     *----------------------------------------------------------------------------------------------------------------*/
-
-    /* Working pointer for destination matrix */
-    pOutT1 = pOut;
-
-    /* Loop over the number of rows */
-    rowCnt = numRows;
-
-    /* Making the destination matrix as identity matrix */
-    while (rowCnt > 0U)
-    {
-      /* Writing all zeroes in lower triangle of the destination matrix */
-      j = numRows - rowCnt;
-      while (j > 0U)
-      {
-        *pOutT1++ = 0.0;
-        j--;
-      }
-
-      /* Writing all ones in the diagonal of the destination matrix */
-      *pOutT1++ = 1.0;
-
-      /* Writing all zeroes in upper triangle of the destination matrix */
-      j = rowCnt - 1U;
-      while (j > 0U)
-      {
-        *pOutT1++ = 0.0;
-        j--;
-      }
-
-      /* Decrement loop counter */
-      rowCnt--;
-    }
-
-    /* Loop over the number of columns of the input matrix.
-       All the elements in each column are processed by the row operations */
-    loopCnt = numCols;
-
-    /* Index modifier to navigate through the columns */
-    l = 0U;
-
-    while (loopCnt > 0U)
-    {
-      /* Check if the pivot element is zero..
-       * If it is zero then interchange the row with non zero row below.
-       * If there is no non zero element to replace in the rows below,
-       * then the matrix is Singular. */
-
-      /* Working pointer for the input matrix that points
-       * to the pivot element of the particular row  */
-      pInT1 = pIn + (l * numCols);
-
-      /* Working pointer for the destination matrix that points
-       * to the pivot element of the particular row  */
-      pOutT1 = pOut + (l * numCols);
-
-      /* Temporary variable to hold the pivot value */
-      in = *pInT1;
-
-      /* Destination pointer modifier */
-      k = 1U;
-
-      /* Check if the pivot element is zero */
-      if (*pInT1 == 0.0)
+      rowNb = 0;
+      for (;rowNb < pivotRow; rowNb++)
       {
-        /* Loop over the number rows present below */
-        for (i = (l + 1U); i < numRows; i++)
-        {
-          /* Update the input and destination pointers */
-          pInT2 = pInT1 + (numCols * i);
-          pOutT2 = pOutT1 + (numCols * k);
-
-          /* Check if there is a non zero pivot element to
-           * replace in the rows below */
-          if (*pInT2 != 0.0)
-          {
-            /* Loop over number of columns
-             * to the right of the pilot element */
-            for (j = 0U; j < (numCols - l); j++)
-            {
-              /* Exchange the row elements of the input matrix */
-              Xchg = *pInT2;
-              *pInT2++ = *pInT1;
-              *pInT1++ = Xchg;
-            }
-
-            for (j = 0U; j < numCols; j++)
-            {
-              Xchg = *pOutT2;
-              *pOutT2++ = *pOutT1;
-              *pOutT1++ = Xchg;
-            }
+           pTmp = ELEM(pSrc,rowNb,column) ;
+           pivot = *pTmp;
 
-            /* Flag to indicate whether exchange is done or not */
-            flag = 1U;
+           MAS_ROW_F64(column,pSrc,rowNb,pivot,pSrc,pivotRow);
+           MAS_ROW_F64(0     ,pDst,rowNb,pivot,pDst,pivotRow);
 
-            /* Break after exchange is done */
-            break;
-          }
 
-          /* Update the destination pointer modifier */
-          k++;
-        }
       }
 
-      /* Update the status if the matrix is singular */
-      if ((flag != 1U) && (in == 0.0))
+      for (rowNb = pivotRow + 1; rowNb < numRows; rowNb++)
       {
-        return ARM_MATH_SINGULAR;
-      }
-
-      /* Points to the pivot row of input and destination matrices */
-      pPivotRowIn = pIn + (l * numCols);
-      pPivotRowDst = pOut + (l * numCols);
+           pTmp = ELEM(pSrc,rowNb,column) ;
+           pivot = *pTmp;
 
-      /* Temporary pointers to the pivot row pointers */
-      pInT1 = pPivotRowIn;
-      pOutT1 = pPivotRowDst;
+           MAS_ROW_F64(column,pSrc,rowNb,pivot,pSrc,pivotRow);
+           MAS_ROW_F64(0     ,pDst,rowNb,pivot,pDst,pivotRow);
 
-      /* Pivot element of the row */
-      in = *(pIn + (l * numCols));
-
-      /* Loop over number of columns
-       * to the right of the pilot element */
-      for (j = 0U; j < (numCols - l); j++)
-      {
-        /* Divide each element of the row of the input matrix
-         * by the pivot element */
-        *pInT1 = *pInT1 / in;
-        pInT1++;
       }
-      for (j = 0U; j < numCols; j++)
-      {
-        /* Divide each element of the row of the destination matrix
-         * by the pivot element */
-        *pOutT1 = *pOutT1 / in;
-        pOutT1++;
-      }
-
-      /* Replace the rows with the sum of that row and a multiple of row i
-       * so that each new element in column i above row i is zero.*/
-
-      /* Temporary pointers for input and destination matrices */
-      pInT1 = pIn;
-      pOutT1 = pOut;
 
-      for (i = 0U; i < numRows; i++)
-      {
-        /* Check for the pivot element */
-        if (i == l)
-        {
-          /* If the processing element is the pivot element,
-             only the columns to the right are to be processed */
-          pInT1 += numCols - l;
-          pOutT1 += numCols;
-        }
-        else
-        {
-          /* Element of the reference row */
-          in = *pInT1;
-
-          /* Working pointers for input and destination pivot rows */
-          pPRT_in = pPivotRowIn;
-          pPRT_pDst = pPivotRowDst;
-
-          /* Loop over the number of columns to the right of the pivot element,
-             to replace the elements in the input matrix */
-          for (j = 0U; j < (numCols - l); j++)
-          {
-            /* Replace the element by the sum of that row
-               and a multiple of the reference row  */
-            *pInT1 = *pInT1 - (in * *pPRT_in++);
-            pInT1++;
-          }
-
-          /* Loop over the number of columns to
-             replace the elements in the destination matrix */
-          for (j = 0U; j < numCols; j++)
-          {
-            /* Replace the element by the sum of that row
-               and a multiple of the reference row  */
-            *pOutT1 = *pOutT1 - (in * *pPRT_pDst++);
-            pOutT1++;
-          }
-
-        }
-
-        /* Increment temporary input pointer */
-        pInT1 = pInT1 + l;
-      }
-
-      /* Increment the input pointer */
-      pIn++;
-
-      /* Decrement the loop counter */
-      loopCnt--;
-
-      /* Increment the index modifier */
-      l++;
     }
 
-#endif /* #if defined (ARM_MATH_DSP) */
-
     /* Set status as ARM_MATH_SUCCESS */
     status = ARM_MATH_SUCCESS;
 
-    if ((flag != 1U) && (in == 0.0))
+    if ((flag != 1U) && (pivot == 0.0))
     {
       pIn = pSrc->pData;
       for (i = 0; i < numRows * numCols; i++)
@@ -648,7 +260,6 @@ arm_status arm_mat_inverse_f64(
   /* Return to application */
   return (status);
 }
-
 /**
   @} end of MatrixInv group
  */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f32.c
index b974031..3fa0b43 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_mat_ldl_f32.c
  * Description:  Floating-point LDL decomposition
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -27,44 +29,12 @@
  */
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
-
-
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h"
 
 
 
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 
-
-/// @private
-#define SWAP_ROWS(A,i,j)                 \
-  {                                      \
-    int cnt = n;                         \
-                                         \
-    for(int w=0;w < n; w+=4)             \
-    {                                    \
-       f32x4_t tmpa,tmpb;                \
-       mve_pred16_t p0 = vctp32q(cnt);   \
-                                         \
-       tmpa=vldrwq_z_f32(&A[i*n + w],p0);\
-       tmpb=vldrwq_z_f32(&A[j*n + w],p0);\
-                                         \
-       vstrwq_p(&A[i*n + w], tmpb, p0);  \
-       vstrwq_p(&A[j*n + w], tmpa, p0);  \
-                                         \
-       cnt -= 4;                         \
-    }                                    \
-  }
-
-/// @private
-#define SWAP_COLS(A,i,j)     \
-  for(int w=0;w < n; w++)    \
-  {                          \
-     float32_t tmp;          \
-     tmp = A[w*n + i];       \
-     A[w*n + i] = A[w*n + j];\
-     A[w*n + j] = tmp;       \
-  }
-
 /**
   @ingroup groupMatrix
  */
@@ -96,7 +66,7 @@ arm_status arm_mat_ldlt_f32(
 {
 
   arm_status status;                             /* status of matrix inverse */
- 
+
 
 #ifdef ARM_MATH_MATRIX_CHECK
 
@@ -104,8 +74,7 @@ arm_status arm_mat_ldlt_f32(
   if ((pSrc->numRows != pSrc->numCols) ||
       (pl->numRows != pl->numCols) ||
       (pd->numRows != pd->numCols) ||
-      (pp->numRows != pp->numCols) ||
-      (pl->numRows != pl->numRows)   )
+      (pl->numRows != pd->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
     status = ARM_MATH_SIZE_MISMATCH;
@@ -120,6 +89,7 @@ arm_status arm_mat_ldlt_f32(
     int fullRank = 1, diag,k;
     float32_t *pA;
 
+    memset(pd->pData,0,sizeof(float32_t)*n*n);
     memcpy(pl->pData,pSrc->pData,n*n*sizeof(float32_t));
     pA = pl->pData;
 
@@ -143,7 +113,7 @@ arm_status arm_mat_ldlt_f32(
     {
         /* Find pivot */
         float32_t m=F32_MIN,a;
-        int j=k; 
+        int j=k;
 
 
         for(int r=k;r<n;r++)
@@ -157,8 +127,8 @@ arm_status arm_mat_ldlt_f32(
 
         if(j != k)
         {
-          SWAP_ROWS(pA,k,j);
-          SWAP_COLS(pA,k,j);
+          SWAP_ROWS_F32(pl,0,k,j);
+          SWAP_COLS_F32(pl,0,k,j);
         }
 
 
@@ -166,7 +136,7 @@ arm_status arm_mat_ldlt_f32(
 
         a = pA[k*n+k];
 
-        if (fabs(a) < 1.0e-8)
+        if (fabsf(a) < 1.0e-8f)
         {
 
             fullRank = 0;
@@ -205,25 +175,25 @@ arm_status arm_mat_ldlt_f32(
              //pA[w*n+x] = pA[w*n+x] - pA[w*n+k] * (pA[x*n+k] * invA);
 
 
-             vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], vecOffs, p0);
+             vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], (uint32x4_t)vecOffs, p0);
              vecX = vmulq_m_n_f32(vuninitializedq_f32(),vecX,invA,p0);
 
-             
+
              vecA = vldrwq_z_f32(&pA[(w + 0)*n+x],p0);
              vecA = vfmsq_m(vecA, vecW0, vecX, p0);
-             vstrwq_p(&pA[(w + 0)*n+x], vecA, p0);  
+             vstrwq_p(&pA[(w + 0)*n+x], vecA, p0);
 
              vecA = vldrwq_z_f32(&pA[(w + 1)*n+x],p0);
              vecA = vfmsq_m(vecA, vecW1, vecX, p0);
-             vstrwq_p(&pA[(w + 1)*n+x], vecA, p0);  
+             vstrwq_p(&pA[(w + 1)*n+x], vecA, p0);
 
              vecA = vldrwq_z_f32(&pA[(w + 2)*n+x],p0);
              vecA = vfmsq_m(vecA, vecW2, vecX, p0);
-             vstrwq_p(&pA[(w + 2)*n+x], vecA, p0);  
+             vstrwq_p(&pA[(w + 2)*n+x], vecA, p0);
 
              vecA = vldrwq_z_f32(&pA[(w + 3)*n+x],p0);
              vecA = vfmsq_m(vecA, vecW3, vecX, p0);
-             vstrwq_p(&pA[(w + 3)*n+x], vecA, p0);  
+             vstrwq_p(&pA[(w + 3)*n+x], vecA, p0);
 
              cnt -= 4;
           }
@@ -247,13 +217,13 @@ arm_status arm_mat_ldlt_f32(
              //pA[w*n+x] = pA[w*n+x] - pA[w*n+k] * (pA[x*n+k] * invA);
 
              vecA = vldrwq_z_f32(&pA[w*n+x],p0);
-             
-             vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], vecOffs, p0);
+
+             vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], (uint32x4_t)vecOffs, p0);
              vecX = vmulq_m_n_f32(vuninitializedq_f32(),vecX,invA,p0);
 
              vecA = vfmsq_m(vecA, vecW, vecX, p0);
 
-             vstrwq_p(&pA[w*n+x], vecA, p0);  
+             vstrwq_p(&pA[w*n+x], vecA, p0);
 
              cnt -= 4;
           }
@@ -264,7 +234,7 @@ arm_status arm_mat_ldlt_f32(
                pA[w*n+k] = pA[w*n+k] * invA;
         }
 
-        
+
 
     }
 
@@ -276,15 +246,15 @@ arm_status arm_mat_ldlt_f32(
       diag--;
       for(int row=0; row < n;row++)
       {
-        mve_pred16_t p0; 
+        mve_pred16_t p0;
         int cnt= n-k;
         f32x4_t zero=vdupq_n_f32(0.0f);
 
         for(int col=k; col < n;col += 4)
         {
            p0 = vctp32q(cnt);
-         
-           vstrwq_p(&pl->pData[row*n+col], zero, p0);  
+
+           vstrwq_p(&pl->pData[row*n+col], zero, p0);
 
            cnt -= 4;
         }
@@ -293,15 +263,15 @@ arm_status arm_mat_ldlt_f32(
 
     for(int row=0; row < n;row++)
     {
-       mve_pred16_t p0; 
+       mve_pred16_t p0;
        int cnt= n-row-1;
        f32x4_t zero=vdupq_n_f32(0.0f);
-       
+
        for(int col=row+1; col < n;col+=4)
        {
          p0 = vctp32q(cnt);
-         
-         vstrwq_p(&pl->pData[row*n+col], zero, p0);  
+
+         vstrwq_p(&pl->pData[row*n+col], zero, p0);
 
          cnt -= 4;
        }
@@ -312,36 +282,17 @@ arm_status arm_mat_ldlt_f32(
       pd->pData[d*n+d] = pl->pData[d*n+d];
       pl->pData[d*n+d] = 1.0;
     }
-  
+
     status = ARM_MATH_SUCCESS;
 
   }
 
-  
+
   /* Return to application */
   return (status);
 }
 #else
 
-/// @private
-#define SWAP_ROWS(A,i,j)     \
-  for(int w=0;w < n; w++)    \
-  {                          \
-     float32_t tmp;          \
-     tmp = A[i*n + w];       \
-     A[i*n + w] = A[j*n + w];\
-     A[j*n + w] = tmp;       \
-  }
-
-/// @private
-#define SWAP_COLS(A,i,j)     \
-  for(int w=0;w < n; w++)    \
-  {                          \
-     float32_t tmp;          \
-     tmp = A[w*n + i];       \
-     A[w*n + i] = A[w*n + j];\
-     A[w*n + j] = tmp;       \
-  }
 
 /**
   @ingroup groupMatrix
@@ -351,7 +302,7 @@ arm_status arm_mat_ldlt_f32(
   @addtogroup MatrixChol
   @{
  */
-  
+
 /**
    * @brief Floating-point LDL^t decomposition of positive semi-definite matrix.
    * @param[in]  pSrc   points to the instance of the input floating-point matrix structure.
@@ -374,7 +325,7 @@ arm_status arm_mat_ldlt_f32(
 {
 
   arm_status status;                             /* status of matrix inverse */
- 
+
 
 #ifdef ARM_MATH_MATRIX_CHECK
 
@@ -382,8 +333,7 @@ arm_status arm_mat_ldlt_f32(
   if ((pSrc->numRows != pSrc->numCols) ||
       (pl->numRows != pl->numCols) ||
       (pd->numRows != pd->numCols) ||
-      (pp->numRows != pp->numCols) ||
-      (pl->numRows != pl->numRows)   )
+      (pl->numRows != pd->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
     status = ARM_MATH_SIZE_MISMATCH;
@@ -397,11 +347,13 @@ arm_status arm_mat_ldlt_f32(
     const int n=pSrc->numRows;
     int fullRank = 1, diag,k;
     float32_t *pA;
+    int row,d;
 
+    memset(pd->pData,0,sizeof(float32_t)*n*n);
     memcpy(pl->pData,pSrc->pData,n*n*sizeof(float32_t));
     pA = pl->pData;
 
-    for(int k=0;k < n; k++)
+    for(k=0;k < n; k++)
     {
       pp[k] = k;
     }
@@ -411,10 +363,13 @@ arm_status arm_mat_ldlt_f32(
     {
         /* Find pivot */
         float32_t m=F32_MIN,a;
-        int j=k; 
+        int j=k;
 
 
-        for(int r=k;r<n;r++)
+        int r;
+        int w;
+
+        for(r=k;r<n;r++)
         {
            if (pA[r*n+r] > m)
            {
@@ -425,8 +380,8 @@ arm_status arm_mat_ldlt_f32(
 
         if(j != k)
         {
-          SWAP_ROWS(pA,k,j);
-          SWAP_COLS(pA,k,j);
+          SWAP_ROWS_F32(pl,0,k,j);
+          SWAP_COLS_F32(pl,0,k,j);
         }
 
 
@@ -434,27 +389,28 @@ arm_status arm_mat_ldlt_f32(
 
         a = pA[k*n+k];
 
-        if (fabs(a) < 1.0e-8)
+        if (fabsf(a) < 1.0e-8f)
         {
 
             fullRank = 0;
             break;
         }
 
-        for(int w=k+1;w<n;w++)
+        for(w=k+1;w<n;w++)
         {
-          for(int x=k+1;x<n;x++)
+          int x;
+          for(x=k+1;x<n;x++)
           {
              pA[w*n+x] = pA[w*n+x] - pA[w*n+k] * pA[x*n+k] / a;
           }
         }
 
-        for(int w=k+1;w<n;w++)
+        for(w=k+1;w<n;w++)
         {
                pA[w*n+k] = pA[w*n+k] / a;
         }
 
-        
+
 
     }
 
@@ -464,34 +420,36 @@ arm_status arm_mat_ldlt_f32(
     if (!fullRank)
     {
       diag--;
-      for(int row=0; row < n;row++)
+      for(row=0; row < n;row++)
       {
-        for(int col=k; col < n;col++)
+        int col;
+        for(col=k; col < n;col++)
         {
            pl->pData[row*n+col]=0.0;
         }
       }
     }
 
-    for(int row=0; row < n;row++)
+    for(row=0; row < n;row++)
     {
-       for(int col=row+1; col < n;col++)
+       int col;
+       for(col=row+1; col < n;col++)
        {
          pl->pData[row*n+col] = 0.0;
        }
     }
 
-    for(int d=0; d < diag;d++)
+    for(d=0; d < diag;d++)
     {
       pd->pData[d*n+d] = pl->pData[d*n+d];
       pl->pData[d*n+d] = 1.0;
     }
-  
+
     status = ARM_MATH_SUCCESS;
 
   }
 
-  
+
   /* Return to application */
   return (status);
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f64.c
index 64e4d1a..55b131a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f64.c
@@ -5,11 +5,13 @@
  * Title:        arm_mat_ldl_f64.c
  * Description:  Floating-point LDL decomposition
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -27,29 +29,11 @@
  */
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
-#include <math.h>
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h"
 
+#include <math.h>
 
 
-/// @private
-#define SWAP_ROWS(A,i,j)     \
-  for(int w=0;w < n; w++)    \
-  {                          \
-     float64_t tmp;          \
-     tmp = A[i*n + w];       \
-     A[i*n + w] = A[j*n + w];\
-     A[j*n + w] = tmp;       \
-  }
-/// @private
-#define SWAP_COLS(A,i,j)     \
-  for(int w=0;w < n; w++)    \
-  {                          \
-     float64_t tmp;          \
-     tmp = A[w*n + i];       \
-     A[w*n + i] = A[w*n + j];\
-     A[w*n + j] = tmp;       \
-  }
-
 /**
   @ingroup groupMatrix
  */
@@ -90,8 +74,7 @@ arm_status arm_mat_ldlt_f64(
   if ((pSrc->numRows != pSrc->numCols) ||
       (pl->numRows != pl->numCols) ||
       (pd->numRows != pd->numCols) ||
-      (pp->numRows != pp->numCols) ||
-      (pl->numRows != pl->numRows)   )
+      (pl->numRows != pd->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
     status = ARM_MATH_SIZE_MISMATCH;
@@ -106,10 +89,12 @@ arm_status arm_mat_ldlt_f64(
     int fullRank = 1, diag,k;
     float64_t *pA;
 
+    memset(pd->pData,0,sizeof(float64_t)*n*n);
+
     memcpy(pl->pData,pSrc->pData,n*n*sizeof(float64_t));
     pA = pl->pData;
 
-    for(int k=0;k < n; k++)
+    for(k=0;k < n; k++)
     {
       pp[k] = k;
     }
@@ -119,10 +104,10 @@ arm_status arm_mat_ldlt_f64(
     {
         /* Find pivot */
         float64_t m=F64_MIN,a;
-        int j=k; 
+        int w,r,j=k; 
 
 
-        for(int r=k;r<n;r++)
+        for(r=k;r<n;r++)
         {
            if (pA[r*n+r] > m)
            {
@@ -133,8 +118,8 @@ arm_status arm_mat_ldlt_f64(
 
         if(j != k)
         {
-          SWAP_ROWS(pA,k,j);
-          SWAP_COLS(pA,k,j);
+          SWAP_ROWS_F64(pl,0,k,j);
+          SWAP_COLS_F64(pl,0,k,j);
         }
 
 
@@ -149,15 +134,16 @@ arm_status arm_mat_ldlt_f64(
             break;
         }
 
-        for(int w=k+1;w<n;w++)
+        for(w=k+1;w<n;w++)
         {
-          for(int x=k+1;x<n;x++)
+          int x;
+          for(x=k+1;x<n;x++)
           {
              pA[w*n+x] = pA[w*n+x] - pA[w*n+k] * pA[x*n+k] / a;
           }
         }
 
-        for(int w=k+1;w<n;w++)
+        for(w=k+1;w<n;w++)
         {
                pA[w*n+k] = pA[w*n+k] / a;
         }
@@ -172,27 +158,38 @@ arm_status arm_mat_ldlt_f64(
     if (!fullRank)
     {
       diag--;
-      for(int row=0; row < n;row++)
       {
-        for(int col=k; col < n;col++)
+        int row;
+        for(row=0; row < n;row++)
         {
-           pl->pData[row*n+col]=0.0;
+          int col;
+          for(col=k; col < n;col++)
+          {
+             pl->pData[row*n+col]=0.0;
+          }
         }
       }
     }
 
-    for(int row=0; row < n;row++)
     {
-       for(int col=row+1; col < n;col++)
-       {
-         pl->pData[row*n+col] = 0.0;
-       }
+      int row;
+      for(row=0; row < n;row++)
+      {
+         int col;
+         for(col=row+1; col < n;col++)
+         {
+           pl->pData[row*n+col] = 0.0;
+         }
+      }
     }
 
-    for(int d=0; d < diag;d++)
     {
-      pd->pData[d*n+d] = pl->pData[d*n+d];
-      pl->pData[d*n+d] = 1.0;
+      int d;
+      for(d=0; d < diag;d++)
+      {
+        pd->pData[d*n+d] = pl->pData[d*n+d];
+        pl->pData[d*n+d] = 1.0;
+      }
     }
   
     status = ARM_MATH_SUCCESS;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f16.c
index 571da6f..45c6570 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_mult_f16.c
  * Description:  Floating-point matrix multiplication
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -404,7 +404,7 @@ arm_status arm_mat_mult_f16(
       (pSrcB->numCols != pDst->numCols)    )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
+    return(ARM_MATH_SIZE_MISMATCH);
   }
   else
 
@@ -689,16 +689,16 @@ arm_status arm_mat_mult_f16(
           /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */
 
           /* Perform the multiply-accumulates */
-          sum += *pIn1++ * *pIn2;
+          sum += (_Float16)*pIn1++ * (_Float16)*pIn2;
           pIn2 += numColsB;
 
-          sum += *pIn1++ * *pIn2;
+          sum += (_Float16)*pIn1++ * (_Float16)*pIn2;
           pIn2 += numColsB;
 
-          sum += *pIn1++ * *pIn2;
+          sum += (_Float16)*pIn1++ * (_Float16)*pIn2;
           pIn2 += numColsB;
 
-          sum += *pIn1++ * *pIn2;
+          sum += (_Float16)*pIn1++ * (_Float16)*pIn2;
           pIn2 += numColsB;
 
           /* Decrement loop counter */
@@ -720,7 +720,7 @@ arm_status arm_mat_mult_f16(
           /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */
 
           /* Perform the multiply-accumulates */
-          sum += *pIn1++ * *pIn2;
+          sum += (_Float16)*pIn1++ * (_Float16)*pIn2;
           pIn2 += numColsB;
 
           /* Decrement loop counter */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f32.c
index 26eaec6..ed1fe6e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_mult_f32.c
  * Description:  Floating-point matrix multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -30,6 +30,10 @@
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
 
+#if defined(ARM_MATH_NEON)
+#define GROUPOFROWS 8
+#endif
+
 /**
  * @ingroup groupMatrix
  */
@@ -39,7 +43,27 @@
  *
  * Multiplies two matrices.
  *
- * \image html MatrixMultiplication.gif "Multiplication of two 3 x 3 matrices"
+ * @par Multiplication of two 3x3 matrices:
+ * 
+ * \f[
+ * \begin{pmatrix}
+ *  a_{1,1} & a_{1,2} & a_{1,3} \\
+ *  a_{2,1} & a_{2,2} & a_{2,3} \\
+ *  a_{3,1} & a_{3,2} & a_{3,3} \\
+ * \end{pmatrix}
+ * 
+ * \begin{pmatrix}
+ *  b_{1,1} & b_{1,2} & b_{1,3} \\
+ *  b_{2,1} & b_{2,2} & b_{2,3} \\
+ *  b_{3,1} & b_{3,2} & b_{3,3} \\
+ * \end{pmatrix}
+ * =
+ * \begin{pmatrix}
+ *  a_{1,1} b_{1,1}+a_{1,2} b_{2,1}+a_{1,3} b_{3,1} & a_{1,1} b_{1,2}+a_{1,2} b_{2,2}+a_{1,3} b_{3,2} & a_{1,1} b_{1,3}+a_{1,2} b_{2,3}+a_{1,3} b_{3,3} \\
+ *  a_{2,1} b_{1,1}+a_{2,2} b_{2,1}+a_{2,3} b_{3,1} & a_{2,1} b_{1,2}+a_{2,2} b_{2,2}+a_{2,3} b_{3,2} & a_{2,1} b_{1,3}+a_{2,2} b_{2,3}+a_{2,3} b_{3,3} \\
+ *  a_{3,1} b_{1,1}+a_{3,2} b_{2,1}+a_{3,3} b_{3,1} & a_{3,1} b_{1,2}+a_{3,2} b_{2,2}+a_{3,3} b_{3,2} & a_{3,1} b_{1,3}+a_{3,2} b_{2,3}+a_{3,3} b_{3,3} \\
+ * \end{pmatrix}
+ * \f]
 
  * Matrix multiplication is only defined if the number of columns of the
  * first matrix equals the number of rows of the second matrix.
@@ -56,14 +80,7 @@
  * @{
  */
 
-/**
- * @brief Floating-point matrix multiplication.
- * @param[in]       *pSrcA points to the first input matrix structure
- * @param[in]       *pSrcB points to the second input matrix structure
- * @param[out]      *pDst points to output matrix structure
- * @return     		The function returns either
- * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
- */
+
 
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 
@@ -260,6 +277,14 @@ __STATIC_INLINE arm_status arm_mat_mult_f32_4x4_mve(
 }
 
 
+/**
+ * @brief Floating-point matrix multiplication.
+ * @param[in]       *pSrcA points to the first input matrix structure
+ * @param[in]       *pSrcB points to the second input matrix structure
+ * @param[out]      *pDst points to output matrix structure
+ * @return          The function returns either
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+ */
 arm_status arm_mat_mult_f32(
   const arm_matrix_instance_f32 * pSrcA,
   const arm_matrix_instance_f32 * pSrcB,
@@ -514,9 +539,14 @@ arm_status arm_mat_mult_f32(
 #else
 
 #if defined(ARM_MATH_NEON)
-
-#define GROUPOFROWS 8
-
+/**
+ * @brief Floating-point matrix multiplication.
+ * @param[in]       *pSrcA points to the first input matrix structure
+ * @param[in]       *pSrcB points to the second input matrix structure
+ * @param[out]      *pDst points to output matrix structure
+ * @return          The function returns either
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+ */
 arm_status arm_mat_mult_f32(
   const arm_matrix_instance_f32 * pSrcA,
   const arm_matrix_instance_f32 * pSrcB,
@@ -845,6 +875,14 @@ arm_status arm_mat_mult_f32(
   return (status);
 }
 #else
+/**
+ * @brief Floating-point matrix multiplication.
+ * @param[in]       *pSrcA points to the first input matrix structure
+ * @param[in]       *pSrcB points to the second input matrix structure
+ * @param[out]      *pDst points to output matrix structure
+ * @return          The function returns either
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+ */
 arm_status arm_mat_mult_f32(
   const arm_matrix_instance_f32 * pSrcA,
   const arm_matrix_instance_f32 * pSrcB,
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f64.c
index 29e3a3e..08571c7 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_f64.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_mult_f64.c
  * Description:  Floating-point matrix multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -34,23 +34,6 @@
  * @ingroup groupMatrix
  */
 
-/**
- * @defgroup MatrixMult Matrix Multiplication
- *
- * Multiplies two matrices.
- *
- * \image html MatrixMultiplication.gif "Multiplication of two 3 x 3 matrices"
-
- * Matrix multiplication is only defined if the number of columns of the
- * first matrix equals the number of rows of the second matrix.
- * Multiplying an <code>M x N</code> matrix with an <code>N x P</code> matrix results
- * in an <code>M x P</code> matrix.
- * When matrix size checking is enabled, the functions check: (1) that the inner dimensions of
- * <code>pSrcA</code> and <code>pSrcB</code> are equal; and (2) that the size of the output
- * matrix equals the outer dimensions of <code>pSrcA</code> and <code>pSrcB</code>.
- */
-
-
 /**
  * @addtogroup MatrixMult
  * @{
@@ -116,7 +99,7 @@ arm_status arm_mat_mult_f64(
       do
       {
         /* Set the variable sum, that acts as accumulator, to zero */
-        sum = 0.0f;
+        sum = 0.0;
 
         /* Initialize pointer pIn1 to point to starting address of column being processed */
         pIn1 = pInA;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_fast_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_fast_q15.c
index 46981ff..57eda5b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_fast_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_fast_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_mult_fast_q15.c
  * Description:  Q15 matrix multiplication (fast variant)
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -80,7 +80,7 @@ arm_status arm_mat_mult_fast_q15(
         uint16_t numRowsA = pSrcA->numRows;            /* Number of rows of input matrix A */
         uint16_t numColsB = pSrcB->numCols;            /* Number of columns of input matrix B */
         uint16_t numColsA = pSrcA->numCols;            /* Number of columns of input matrix A */
-        uint16_t numRowsB = pSrcB->numRows;            /* Number of rows of input matrix A */
+        uint16_t numRowsB = pSrcB->numRows;            /* Number of rows of input matrix B */
         uint32_t col, i = 0U, row = numRowsB, colCnt;  /* Loop counters */
         arm_status status;                             /* Status of matrix multiplication */
 
@@ -127,7 +127,7 @@ arm_status arm_mat_mult_fast_q15(
 #if defined (ARM_MATH_DSP)
 
         /* Read two elements from row */
-        in = read_q15x2_ia ((q15_t **) &pInB);
+        in = read_q15x2_ia (&pInB);
 
         /* Unpack and store one element in destination */
 #ifndef ARM_MATH_BIG_ENDIAN
@@ -149,7 +149,7 @@ arm_status arm_mat_mult_fast_q15(
         /* Update pointer px to point to next row of transposed matrix */
         px += numRowsB;
 
-        in = read_q15x2_ia ((q15_t **) &pInB);
+        in = read_q15x2_ia (&pInB);
 #ifndef ARM_MATH_BIG_ENDIAN
         *px = (q15_t) in;
 #else
@@ -260,7 +260,7 @@ arm_status arm_mat_mult_fast_q15(
         pInA2 = pInA + numColsA;
         pInB2 = pInB + numRowsB;
 
-        /* Read in two elements at once - alows dual MAC instruction */
+        /* Read in two elements at once - allows dual MAC instruction */
         colCnt = numColsA >> 1U;
 #else
         colCnt = numColsA >> 2U;
@@ -273,13 +273,13 @@ arm_status arm_mat_mult_fast_q15(
 
 #if defined (ARM_MATH_DSP)
           /* read real and imag values from pSrcA and pSrcB buffer */
-          inA1 = read_q15x2_ia ((q15_t **) &pInA);
-          inB1 = read_q15x2_ia ((q15_t **) &pInB);
+          inA1 = read_q15x2_ia (&pInA);
+          inB1 = read_q15x2_ia (&pInB);
 
-          inA2 = read_q15x2_ia ((q15_t **) &pInA2);
-          inB2 = read_q15x2_ia ((q15_t **) &pInB2);
+          inA2 = read_q15x2_ia (&pInA2);
+          inB2 = read_q15x2_ia (&pInB2);
 
-          /* Multiply and Accumlates */
+          /* Multiply and Accumulates */
           sum  = __SMLAD(inA1, inB1, sum);
           sum2 = __SMLAD(inA1, inB2, sum2);
           sum3 = __SMLAD(inA2, inB1, sum3);
@@ -288,7 +288,7 @@ arm_status arm_mat_mult_fast_q15(
           /* read real and imag values from pSrcA and pSrcB buffer */
           inA1 = *pInA++;
           inB1 = *pInB++;
-          /* Multiply and Accumlates */
+          /* Multiply and Accumulates */
           sum += inA1 * inB1;
 
           inA2 = *pInA++;
@@ -391,10 +391,10 @@ arm_status arm_mat_mult_fast_q15(
         /* matrix multiplication */
         while (colCnt > 0U)
         {
-          inA1 = read_q15x2_ia ((q15_t **) &pInA);
-          inA2 = read_q15x2_ia ((q15_t **) &pInA);
-          inB1 = read_q15x2_ia ((q15_t **) &pInB);
-          inB2 = read_q15x2_ia ((q15_t **) &pInB);
+          inA1 = read_q15x2_ia (&pInA);
+          inA2 = read_q15x2_ia (&pInA);
+          inB1 = read_q15x2_ia (&pInB);
+          inB2 = read_q15x2_ia (&pInB);
 
           sum  = __SMLAD(inA1, inB1, sum);
           sum  = __SMLAD(inA2, inB2, sum);
@@ -443,10 +443,10 @@ arm_status arm_mat_mult_fast_q15(
         /* matrix multiplication */
         while (colCnt > 0U)
         {
-          inA1 = read_q15x2_ia ((q15_t **) &pInA);
-          inA2 = read_q15x2_ia ((q15_t **) &pInA);
-          inB1 = read_q15x2_ia ((q15_t **) &pInB);
-          inB2 = read_q15x2_ia ((q15_t **) &pInB);
+          inA1 = read_q15x2_ia (&pInA);
+          inA2 = read_q15x2_ia (&pInA);
+          inB1 = read_q15x2_ia (&pInB);
+          inB2 = read_q15x2_ia (&pInB);
 
           sum  = __SMLAD(inA1, inB1, sum);
           sum  = __SMLAD(inA2, inB2, sum);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_fast_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_fast_q31.c
index f4214af..1107562 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_fast_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_fast_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_mult_fast_q31.c
  * Description:  Q31 matrix multiplication (fast variant)
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_opt_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_opt_q31.c
new file mode 100644
index 0000000..69e0142
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_opt_q31.c
@@ -0,0 +1,788 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mat_mult_opt_q31.c
+ * Description:  Q31 matrix multiplication
+ *
+ * $Date:        3 Nov 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
+
+/**
+  @ingroup groupMatrix
+ */
+
+/**
+  @addtogroup MatrixMult
+  @{
+ */
+
+/**
+  @brief         Q31 matrix multiplication.
+  @param[in]     pSrcA      points to the first input matrix structure
+  @param[in]     pSrcB      points to the second input matrix structure
+  @param[out]    pDst       points to output matrix structure
+  @param[in]  pState  points to the array for storing intermediate results
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS       : Operation successful
+                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
+
+  @par           Scaling and Overflow Behavior
+                   The function is implemented using an internal 64-bit accumulator.
+                   The accumulator has a 2.62 format and maintains full precision of the intermediate
+                   multiplication results but provides only a single guard bit. There is no saturation
+                   on intermediate additions. Thus, if the accumulator overflows it wraps around and
+                   distorts the result. The input signals should be scaled down to avoid intermediate
+                   overflows. The input is thus scaled down by log2(numColsA) bits
+                   to avoid overflows, as a total of numColsA additions are performed internally.
+                   The 2.62 accumulator is right shifted by 31 bits and saturated to 1.31 format to yield the final result.
+  @remark
+                   Refer to \ref arm_mat_mult_fast_q31() for a faster but less precise implementation of this function.
+  @remark
+                   This function is a faster implementation of arm_mat_mult_q31 for MVE but it is requiring 
+                   additional storage for intermediate results.
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#define MATRIX_DIM2 2
+#define MATRIX_DIM3 3
+#define MATRIX_DIM4 4
+
+__STATIC_INLINE arm_status arm_mat_mult_opt_q31_2x2_mve(
+    const arm_matrix_instance_q31 * pSrcA,
+    const arm_matrix_instance_q31 * pSrcB,
+    arm_matrix_instance_q31 * pDst)
+{
+    q31_t       *pInB = pSrcB->pData;  /* input data matrix pointer B */
+    q31_t       *pInA = pSrcA->pData;  /* input data matrix pointer A */
+    q31_t       *pOut = pDst->pData;   /* output data matrix pointer */
+    uint32x4_t   vecColBOffs;
+    q31_t       *pInA0 = pInA;
+    q31_t       *pInA1 = pInA0 + MATRIX_DIM2;
+    q63_t        acc0, acc1;
+    q31x4_t      vecB, vecA0, vecA1;
+    /* enable predication to disable half of vector elements */
+    mve_pred16_t p0 = vctp32q(MATRIX_DIM2);
+
+    vecColBOffs = vidupq_u32((uint32_t)0, 1);
+    vecColBOffs = vecColBOffs * MATRIX_DIM2;
+
+    pInB = pSrcB->pData;
+
+    /* load 1st B column (partial load) */
+    vecB = vldrwq_gather_shifted_offset_z_s32(pInB, vecColBOffs, p0);
+
+    /* load A rows */
+    vecA0 = vldrwq_s32(pInA0);
+    vecA1 = vldrwq_s32(pInA1);
+
+    acc0 = vrmlaldavhq(vecA0, vecB);
+    acc1 = vrmlaldavhq(vecA1, vecB);
+
+    acc0 = asrl(acc0, 23);
+    acc1 = asrl(acc1, 23);
+
+    pOut[0 * MATRIX_DIM2] = (q31_t) acc0;
+    pOut[1 * MATRIX_DIM2] = (q31_t) acc1;
+    pOut++;
+
+    /* move to next B column */
+    pInB = pInB + 1;
+
+    vecB = vldrwq_gather_shifted_offset_z_s32(pInB, vecColBOffs, p0);
+
+    acc0 = vrmlaldavhq(vecA0, vecB);
+    acc1 = vrmlaldavhq(vecA1, vecB);
+
+    acc0 = asrl(acc0, 23);
+    acc1 = asrl(acc1, 23);
+
+    pOut[0 * MATRIX_DIM2] = (q31_t) acc0;
+    pOut[1 * MATRIX_DIM2] = (q31_t) acc1;
+    /*
+     * Return to application
+     */
+    return (ARM_MATH_SUCCESS);
+}
+
+
+
+__STATIC_INLINE arm_status arm_mat_mult_opt_q31_3x3_mve(
+    const arm_matrix_instance_q31 * pSrcA,
+    const arm_matrix_instance_q31 * pSrcB,
+    arm_matrix_instance_q31 * pDst)
+{
+    q31_t       *pInB = pSrcB->pData;  /* input data matrix pointer B */
+    q31_t       *pInA = pSrcA->pData;  /* input data matrix pointer A */
+    q31_t       *pOut = pDst->pData;   /* output data matrix pointer */
+    uint32x4_t   vecColBOffs;
+    q31_t       *pInA0 = pInA;
+    q31_t       *pInA1 = pInA0 + MATRIX_DIM3;
+    q31_t       *pInA2 = pInA1 + MATRIX_DIM3;
+    q63_t        acc0, acc1, acc2;
+    q31x4_t      vecB, vecA;
+    /* enable predication to disable last (4th) vector element */
+    mve_pred16_t p0 = vctp32q(MATRIX_DIM3);
+
+    vecColBOffs = vidupq_u32((uint32_t)0, 1);
+    vecColBOffs = vecColBOffs * MATRIX_DIM3;
+
+    pInB = pSrcB->pData;
+
+    vecB = vldrwq_gather_shifted_offset_z_s32(pInB, vecColBOffs, p0);
+
+    vecA = vldrwq_s32(pInA0);
+    acc0 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA1);
+    acc1 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA2);
+    acc2 = vrmlaldavhq(vecA, vecB);
+
+    acc0 = asrl(acc0, 23);
+    acc1 = asrl(acc1, 23);
+    acc2 = asrl(acc2, 23);
+
+    pOut[0 * MATRIX_DIM3] = (q31_t) acc0;
+    pOut[1 * MATRIX_DIM3] = (q31_t) acc1;
+    pOut[2 * MATRIX_DIM3] = (q31_t) acc2;
+    pOut++;
+
+    /* move to next B column */
+    pInB = pInB + 1;
+
+    vecB = vldrwq_gather_shifted_offset_z_s32(pInB, vecColBOffs, p0);
+
+    vecA = vldrwq_s32(pInA0);
+    acc0 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA1);
+    acc1 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA2);
+    acc2 = vrmlaldavhq(vecA, vecB);
+
+    acc0 = asrl(acc0, 23);
+    acc1 = asrl(acc1, 23);
+    acc2 = asrl(acc2, 23);
+
+    pOut[0 * MATRIX_DIM3] = (q31_t) acc0;
+    pOut[1 * MATRIX_DIM3] = (q31_t) acc1;
+    pOut[2 * MATRIX_DIM3] = (q31_t) acc2;
+    pOut++;
+
+    /* move to next B column */
+    pInB = pInB + 1;
+
+    vecB = vldrwq_gather_shifted_offset_z_s32(pInB, vecColBOffs, p0);
+
+    vecA = vldrwq_s32(pInA0);
+    acc0 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA1);
+    acc1 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA2);
+    acc2 = vrmlaldavhq(vecA, vecB);
+
+    acc0 = asrl(acc0, 23);
+    acc1 = asrl(acc1, 23);
+    acc2 = asrl(acc2, 23);
+
+    pOut[0 * MATRIX_DIM3] = (q31_t) acc0;
+    pOut[1 * MATRIX_DIM3] = (q31_t) acc1;
+    pOut[2 * MATRIX_DIM3] = (q31_t) acc2;
+    /*
+     * Return to application
+     */
+    return (ARM_MATH_SUCCESS);
+}
+
+__STATIC_INLINE arm_status arm_mat_mult_opt_q31_4x4_mve(
+    const arm_matrix_instance_q31 * pSrcA,
+    const arm_matrix_instance_q31 * pSrcB,
+    arm_matrix_instance_q31 * pDst)
+{
+    q31_t       *pInB = pSrcB->pData;  /* input data matrix pointer B */
+    q31_t       *pInA = pSrcA->pData;  /* input data matrix pointer A */
+    q31_t       *pOut = pDst->pData;   /* output data matrix pointer */
+    uint32x4_t   vecColBOffs;
+    q31_t       *pInA0 = pInA;
+    q31_t       *pInA1 = pInA0 + MATRIX_DIM4;
+    q31_t       *pInA2 = pInA1 + MATRIX_DIM4;
+    q31_t       *pInA3 = pInA2 + MATRIX_DIM4;
+    q63_t        acc0, acc1, acc2, acc3;
+    q31x4_t      vecB, vecA;
+
+    vecColBOffs = vidupq_u32((uint32_t)0, 4);
+
+    pInB = pSrcB->pData;
+
+    vecB = vldrwq_gather_shifted_offset_s32(pInB, vecColBOffs);
+
+    vecA = vldrwq_s32(pInA0);
+    acc0 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA1);
+    acc1 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA2);
+    acc2 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA3);
+    acc3 = vrmlaldavhq(vecA, vecB);
+
+    acc0 = asrl(acc0, 23);
+    acc1 = asrl(acc1, 23);
+    acc2 = asrl(acc2, 23);
+    acc3 = asrl(acc3, 23);
+
+    pOut[0 * MATRIX_DIM4] = (q31_t) acc0;
+    pOut[1 * MATRIX_DIM4] = (q31_t) acc1;
+    pOut[2 * MATRIX_DIM4] = (q31_t) acc2;
+    pOut[3 * MATRIX_DIM4] = (q31_t) acc3;
+    pOut++;
+
+    /* move to next B column */
+    pInB = pInB + 1;
+
+    vecB = vldrwq_gather_shifted_offset_s32(pInB, vecColBOffs);
+
+    vecA = vldrwq_s32(pInA0);
+    acc0 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA1);
+    acc1 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA2);
+    acc2 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA3);
+    acc3 = vrmlaldavhq(vecA, vecB);
+
+    acc0 = asrl(acc0, 23);
+    acc1 = asrl(acc1, 23);
+    acc2 = asrl(acc2, 23);
+    acc3 = asrl(acc3, 23);
+
+    pOut[0 * MATRIX_DIM4] = (q31_t) acc0;
+    pOut[1 * MATRIX_DIM4] = (q31_t) acc1;
+    pOut[2 * MATRIX_DIM4] = (q31_t) acc2;
+    pOut[3 * MATRIX_DIM4] = (q31_t) acc3;
+
+    pOut++;
+
+    /* move to next B column */
+    pInB = pInB + 1;
+
+    vecB = vldrwq_gather_shifted_offset_s32(pInB, vecColBOffs);
+
+    vecA = vldrwq_s32(pInA0);
+    acc0 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA1);
+    acc1 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA2);
+    acc2 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA3);
+    acc3 = vrmlaldavhq(vecA, vecB);
+
+    acc0 = asrl(acc0, 23);
+    acc1 = asrl(acc1, 23);
+    acc2 = asrl(acc2, 23);
+    acc3 = asrl(acc3, 23);
+
+    pOut[0 * MATRIX_DIM4] = (q31_t) acc0;
+    pOut[1 * MATRIX_DIM4] = (q31_t) acc1;
+    pOut[2 * MATRIX_DIM4] = (q31_t) acc2;
+    pOut[3 * MATRIX_DIM4] = (q31_t) acc3;
+
+    pOut++;
+
+    /* move to next B column */
+    pInB = pInB + 1;
+
+    vecB = vldrwq_gather_shifted_offset_s32(pInB, vecColBOffs);
+
+    vecA = vldrwq_s32(pInA0);
+    acc0 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA1);
+    acc1 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA2);
+    acc2 = vrmlaldavhq(vecA, vecB);
+    vecA = vldrwq_s32(pInA3);
+    acc3 = vrmlaldavhq(vecA, vecB);
+
+    acc0 = asrl(acc0, 23);
+    acc1 = asrl(acc1, 23);
+    acc2 = asrl(acc2, 23);
+    acc3 = asrl(acc3, 23);
+
+    pOut[0 * MATRIX_DIM4] = (q31_t) acc0;
+    pOut[1 * MATRIX_DIM4] = (q31_t) acc1;
+    pOut[2 * MATRIX_DIM4] = (q31_t) acc2;
+    pOut[3 * MATRIX_DIM4] = (q31_t) acc3;
+    /*
+     * Return to application
+     */
+    return (ARM_MATH_SUCCESS);
+}
+
+
+arm_status arm_mat_mult_opt_q31(
+    const arm_matrix_instance_q31 * pSrcA,
+    const arm_matrix_instance_q31 * pSrcB,
+    arm_matrix_instance_q31 * pDst,
+    q31_t *pState)
+{
+    q31_t          *pInA = pSrcA->pData;        /* input data matrix pointer A */
+    q31_t          *pInB = pSrcB->pData;        /* input data matrix pointer B */
+    q31_t          *pInA2;
+    q31_t          *pInB2;
+    q31_t          *px;         /* Temporary output data matrix pointer */
+    q31_t          *px2;        /* Temporary output data matrix pointer */
+    uint32_t        numRowsA = pSrcA->numRows;  /* number of rows of input matrix A    */
+    uint32_t        numColsB = pSrcB->numCols;  /* number of columns of input matrix B */
+    uint32_t        numColsA = pSrcA->numCols;  /* number of columns of input matrix A */
+    uint32_t        numRowsB = pSrcB->numRows;  /* number of rows of input matrix A    */
+    uint32_t        col, i = 0u, j, row = numRowsB;     /* loop counters */
+    q31_t          *pSrcBT = pState;     /* input data matrix pointer for transpose */
+    uint32_t        blkCnt;     /* loop counters */
+    arm_status      status;                            /* Status of matrix multiplication */
+    arm_matrix_instance_q31 BT;
+#ifdef ARM_MATH_MATRIX_CHECK
+
+    /* Check for matrix mismatch condition */
+    if ((pSrcA->numCols != pSrcB->numRows) ||
+        (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) {
+        /* Set status as ARM_MATH_SIZE_MISMATCH */
+        status = ARM_MATH_SIZE_MISMATCH;
+    } else
+#endif                          /* #ifdef ARM_MATH_MATRIX_CHECK */
+    {
+
+         /* small squared matrix specialized routines */
+    if(numRowsA == numColsB && numColsB == numColsA) {
+        if (numRowsA == 1)
+        {
+          q63_t sum =  (q63_t) *pInA * *pInB;
+          pDst->pData[0] = (q31_t)(sum >> 31);
+          return (ARM_MATH_SUCCESS);
+        }
+        else if(numRowsA == 2)
+            return arm_mat_mult_opt_q31_2x2_mve(pSrcA, pSrcB, pDst);
+        else if(numRowsA == 3)
+            return arm_mat_mult_opt_q31_3x3_mve(pSrcA, pSrcB, pDst);
+        else if (numRowsA == 4)
+            return arm_mat_mult_opt_q31_4x4_mve(pSrcA, pSrcB, pDst);
+    }
+
+
+        /*
+         * Matrix transpose
+         */
+        BT.numRows = numColsB;
+        BT.numCols = numRowsB;
+        BT.pData = pSrcBT;
+
+        arm_mat_trans_q31(pSrcB, &BT);
+
+
+        /*
+         * Reset the variables for the usage in the following multiplication process
+         */
+        i = 0;
+        row = numRowsA >> 1;
+        px = pDst->pData;
+        px2 = px + numColsB;
+
+        /*
+         * main loop
+         * compute 2 x 2 output blocks
+         * with dot products (Matrix A rows * Transposed MAtrix B rows)
+         */
+        while (row > 0u) {
+            /*
+             * For every row wise process, the column loop counter is to be initiated
+             * Compute 2 columns and 2 rows in parrallel
+             */
+            col = numColsB >> 1;
+            j = 0;
+
+            /*
+             * column pair loop
+             */
+            while (col > 0u) {
+                q31_t const    *pSrcAVec, *pSrcBVec, *pSrcA2Vec, *pSrcB2Vec;
+                q31x4_t         vecA, vecA2, vecB, vecB2;
+                q63_t           acc0, acc1, acc2, acc3;
+
+                /*
+                 * Initiate the pointers
+                 * - 2 x consecutive Matrix A rows (i increment is 2 x numColsA)
+                 * - 2 x consecutive Matrix B' rows (j increment is 2 x numRowsB)
+                 */
+                pInA = pSrcA->pData + i;
+                pInA2 = pInA + numColsA;
+                pInB = pSrcBT + j;
+                pInB2 = pInB + numRowsB;
+
+
+                pSrcAVec = (q31_t const *) pInA;
+                pSrcA2Vec = (q31_t const *) pInA2;
+                pSrcBVec = (q31_t const *) pInB;
+                pSrcB2Vec = (q31_t const *) pInB2;
+
+                acc0 = 0LL;
+                acc1 = 0LL;
+                acc2 = 0LL;
+                acc3 = 0LL;
+
+                /* load scheduling */
+                vecA = vld1q(pSrcAVec);
+                pSrcAVec += 4;
+
+                blkCnt = (numColsA / 4);
+                while (blkCnt > 0U) {
+                    vecB = vld1q(pSrcBVec);
+                    pSrcBVec += 4;
+                    acc0 = vrmlaldavhaq(acc0, vecA, vecB);
+                    vecA2 = vld1q(pSrcA2Vec);
+                    pSrcA2Vec += 4;
+                    acc1 = vrmlaldavhaq(acc1, vecA2, vecB);
+                    vecB2 = vld1q(pSrcB2Vec);
+                    pSrcB2Vec += 4;
+                    acc2 = vrmlaldavhaq(acc2, vecA, vecB2);
+                    vecA = vld1q(pSrcAVec);
+                    pSrcAVec += 4;
+                    acc3 = vrmlaldavhaq(acc3, vecA2, vecB2);
+
+                    blkCnt--;
+                }
+                /*
+                 * tail
+                 * (will be merged thru tail predication)
+                 */
+                blkCnt = (numColsA & 3);
+                if (blkCnt > 0U) {
+                    mve_pred16_t    p0 = vctp32q(blkCnt);
+                    vecB = vld1q(pSrcBVec);
+                    acc0 = vrmlaldavhaq_p(acc0, vecA, vecB, p0);
+                    vecA2 = vld1q(pSrcA2Vec);
+                    acc1 = vrmlaldavhaq_p(acc1, vecA2, vecB, p0);
+                    vecB2 = vld1q(pSrcB2Vec);
+                    acc2 = vrmlaldavhaq_p(acc2, vecA, vecB2, p0);
+                    vecA = vld1q(pSrcAVec);
+                    acc3 = vrmlaldavhaq_p(acc3, vecA2, vecB2, p0);
+                }
+
+                /* Convert to 1.31 */
+                acc0 = asrl(acc0, 23);
+                acc1 = asrl(acc1, 23);
+                acc2 = asrl(acc2, 23);
+                acc3 = asrl(acc3, 23);
+
+                /* Store the results (2 x 2 block) in the destination buffer */
+                *px++ = (q31_t) acc0;
+                *px++ = (q31_t) acc2;
+                *px2++ = (q31_t) acc1;
+                *px2++ = (q31_t) acc3;
+
+                j += numRowsB * 2;
+                /*
+                 * Decrement the column pair loop counter
+                 */
+                col--;
+
+            }
+
+            i = i + numColsA * 2;
+            px = px2 + (numColsB & 1u);
+            px2 = px + numColsB;
+            /*
+             * Decrement the row pair loop counter
+             */
+            row--;
+        }
+
+        /*
+         * Compute remaining row and/or column below
+         */
+        if (numColsB & 1u) {
+            row = numRowsA & (~0x1);    //avoid redundant computation
+            px = pDst->pData + numColsB - 1;
+            i = 0;
+
+            /*
+             * row loop
+             */
+            while (row > 0) {
+                q31_t const    *pSrcAVec, *pSrcBVec;
+                q31x4_t         vecA, vecB;
+                q63_t           acc0;
+
+                /*
+                 * point to last column in matrix B
+                 */
+                pInB = pSrcBT + numRowsB * (numColsB - 1);
+                pInA = pSrcA->pData + i;
+
+                pSrcAVec = (q31_t const *) pInA;
+                pSrcBVec = (q31_t const *) pInB;
+
+                /* single dot-product */
+                acc0 = 0LL;
+                blkCnt = (numColsA / 4);
+                while (blkCnt > 0U) {
+                    vecA = vld1q(pSrcAVec);
+                    pSrcAVec += 4;
+                    vecB = vld1q(pSrcBVec);
+                    pSrcBVec += 4;
+                    acc0 = vrmlaldavhaq(acc0, vecA, vecB);
+
+                    blkCnt--;
+                }
+                /*
+                 * tail
+                 * (will be merged thru tail predication)
+                 */
+                blkCnt = (numColsA & 3);
+                if (blkCnt > 0U) {
+                    mve_pred16_t    p0 = vctp32q(blkCnt);
+                    vecA = vld1q(pSrcAVec);
+                    vecB = vld1q(pSrcBVec);
+                    acc0 = vrmlaldavhaq_p(acc0, vecA, vecB, p0);
+                }
+
+                acc0 = asrl(acc0, 23);
+                *px = (q31_t) acc0;
+
+                px += numColsB;
+
+                i += numColsA;
+                /*
+                 * Decrement the row loop counter
+                 */
+                row--;
+            }
+        }
+
+        if (numRowsA & 1u) {
+            col = numColsB;
+            i = 0u;
+            /*
+             * point to last row in output matrix
+             */
+            px = pDst->pData + (numColsB) * (numRowsA - 1);
+            /*
+             * col loop
+             */
+            while (col > 0) {
+                q31_t const    *pSrcAVec, *pSrcBVec;
+                q31x4_t         vecA, vecB;
+                q63_t           acc0;
+
+                /*
+                 * point to last row in matrix A
+                 */
+                pInA = pSrcA->pData + (numRowsA - 1) * numColsA;
+                pInB = pSrcBT + i;
+
+                /*
+                 * Set the variable sum, that acts as accumulator, to zero
+                 */
+                pSrcAVec = (q31_t const *) pInA;
+                pSrcBVec = (q31_t const *) pInB;
+                acc0 = 0LL;
+
+                blkCnt = (numColsA / 4);
+                while (blkCnt > 0U) {
+                    vecA = vld1q(pSrcAVec);
+                    pSrcAVec += 4;
+                    vecB = vld1q(pSrcBVec);
+                    pSrcBVec += 4;
+                    acc0 = vrmlaldavhaq(acc0, vecA, vecB);
+
+                    blkCnt--;
+                }
+                /*
+                 * tail
+                 * (will be merged thru tail predication)
+                 */
+                blkCnt = (numColsA & 3);
+                if (blkCnt > 0U) {
+                    mve_pred16_t    p0 = vctp32q(blkCnt);
+                    vecA = vld1q(pSrcAVec);
+                    vecB = vld1q(pSrcBVec);
+                    acc0 = vrmlaldavhaq_p(acc0, vecA, vecB, p0);
+                }
+
+                acc0 = asrl(acc0, 23);
+                *px++ = (q31_t) acc0;
+
+                i += numColsA;
+                /*
+                 * Decrement the col loop counter
+                 */
+                col--;
+            }
+        }
+        /* Set status as ARM_MATH_SUCCESS */
+        status = ARM_MATH_SUCCESS;
+    }
+    /*
+     * Return to application
+     */
+    return (status);
+}
+
+#else
+arm_status arm_mat_mult_opt_q31(
+  const arm_matrix_instance_q31 * pSrcA,
+  const arm_matrix_instance_q31 * pSrcB,
+        arm_matrix_instance_q31 * pDst,
+        q31_t *pState)
+{
+  q31_t *pIn1 = pSrcA->pData;                    /* Input data matrix pointer A */
+  q31_t *pIn2 = pSrcB->pData;                    /* Input data matrix pointer B */
+  q31_t *pInA = pSrcA->pData;                    /* Input data matrix pointer A */
+  q31_t *pInB = pSrcB->pData;                    /* Input data matrix pointer B */
+  q31_t *pOut = pDst->pData;                     /* Output data matrix pointer */
+  q31_t *px;                                     /* Temporary output data matrix pointer */
+  q63_t sum;                                     /* Accumulator */
+  uint16_t numRowsA = pSrcA->numRows;            /* Number of rows of input matrix A */
+  uint16_t numColsB = pSrcB->numCols;            /* Number of columns of input matrix B */
+  uint16_t numColsA = pSrcA->numCols;            /* Number of columns of input matrix A */
+  uint32_t col, i = 0U, row = numRowsA, colCnt;  /* Loop counters */
+  arm_status status;                             /* Status of matrix multiplication */
+  (void)pState;
+#ifdef ARM_MATH_MATRIX_CHECK
+
+  /* Check for matrix mismatch condition */
+  if ((pSrcA->numCols != pSrcB->numRows) ||
+      (pSrcA->numRows != pDst->numRows)  ||
+      (pSrcB->numCols != pDst->numCols)    )
+  {
+    /* Set status as ARM_MATH_SIZE_MISMATCH */
+    status = ARM_MATH_SIZE_MISMATCH;
+  }
+  else
+
+#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
+
+  {
+    /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */
+    /* row loop */
+    do
+    {
+      /* Output pointer is set to starting address of row being processed */
+      px = pOut + i;
+
+      /* For every row wise process, column loop counter is to be initiated */
+      col = numColsB;
+
+      /* For every row wise process, pIn2 pointer is set to starting address of pSrcB data */
+      pIn2 = pSrcB->pData;
+
+      /* column loop */
+      do
+      {
+        /* Set the variable sum, that acts as accumulator, to zero */
+        sum = 0;
+
+        /* Initialize pointer pIn1 to point to starting address of column being processed */
+        pIn1 = pInA;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+        /* Loop unrolling: Compute 4 MACs at a time. */
+        colCnt = numColsA >> 2U;
+
+        /* matrix multiplication */
+        while (colCnt > 0U)
+        {
+          /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */
+
+          /* Perform the multiply-accumulates */
+          sum += (q63_t) *pIn1++ * *pIn2;
+          pIn2 += numColsB;
+
+          sum += (q63_t) *pIn1++ * *pIn2;
+          pIn2 += numColsB;
+
+          sum += (q63_t) *pIn1++ * *pIn2;
+          pIn2 += numColsB;
+
+          sum += (q63_t) *pIn1++ * *pIn2;
+          pIn2 += numColsB;
+
+          /* Decrement loop counter */
+          colCnt--;
+        }
+
+        /* Loop unrolling: Compute remaining MACs */
+        colCnt = numColsA % 0x4U;
+
+#else
+
+        /* Initialize cntCnt with number of columns */
+        colCnt = numColsA;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+        while (colCnt > 0U)
+        {
+          /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */
+
+          /* Perform the multiply-accumulates */
+          sum += (q63_t) *pIn1++ * *pIn2;
+          pIn2 += numColsB;
+
+          /* Decrement loop counter */
+          colCnt--;
+        }
+
+        /* Convert result from 2.62 to 1.31 format and store in destination buffer */
+        *px++ = (q31_t) (sum >> 31);
+
+        /* Decrement column loop counter */
+        col--;
+
+        /* Update pointer pIn2 to point to starting address of next column */
+        pIn2 = pInB + (numColsB - col);
+
+      } while (col > 0U);
+
+      /* Update pointer pInA to point to starting address of next row */
+      i = i + numColsB;
+      pInA = pInA + numColsA;
+
+      /* Decrement row loop counter */
+      row--;
+
+    } while (row > 0U);
+
+    /* Set status as ARM_MATH_SUCCESS */
+    status = ARM_MATH_SUCCESS;
+  }
+
+  /* Return to application */
+  return (status);
+}
+#endif /* defined(ARM_MATH_MVEI) */
+
+/**
+  @} end of MatrixMult group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_q15.c
index 612ad92..026a993 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_mult_q15.c
  * Description:  Q15 matrix multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        3 Nov 2021
+ * $Revision:    V1.10.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -44,7 +44,7 @@
   @param[in]     pSrcA      points to the first input matrix structure
   @param[in]     pSrcB      points to the second input matrix structure
   @param[out]    pDst       points to output matrix structure
-  @param[in]     pState     points to the array for storing intermediate results (Unused)
+  @param[in]     pState     points to the array for storing intermediate results
   @return        execution status
                    - \ref ARM_MATH_SUCCESS       : Operation successful
                    - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
@@ -317,282 +317,309 @@ __STATIC_INLINE arm_status arm_mat_mult_q15_4x4_mve(
     return (ARM_MATH_SUCCESS);
 }
 
+
 arm_status arm_mat_mult_q15(
-  const arm_matrix_instance_q15 * pSrcA,
-  const arm_matrix_instance_q15 * pSrcB,
-        arm_matrix_instance_q15 * pDst,
-        q15_t                   * pState)
+    const arm_matrix_instance_q15 * pSrcA,
+    const arm_matrix_instance_q15 * pSrcB,
+    arm_matrix_instance_q15 * pDst,
+    q15_t * pState)
 {
-    q15_t    *pInB = pSrcB->pData;  /* input data matrix pointer B */
-    q15_t    *pInA = pSrcA->pData;  /* input data matrix pointer A */
-    q15_t    *pOut = pDst->pData;   /* output data matrix pointer */
-    q15_t    *px;               /* Temporary output data matrix pointer */
-    uint16_t  numRowsA = pSrcA->numRows;    /* number of rows of input matrix A    */
-    uint16_t  numColsB = pSrcB->numCols;    /* number of columns of input matrix B */
-    uint16_t  numColsA = pSrcA->numCols;    /* number of columns of input matrix A */
-    uint16_t  col, i = 0U, row = numRowsA, colCnt;  /* loop counters */
-    uint16x8_t vecOffs, vecColBOffs;
-    uint32_t  blkCnt,rowCnt;           /* loop counters */
-    arm_status status;                             /* Status of matrix multiplication */
-    (void)pState;
+    q15_t          *pInA = pSrcA->pData;        /* input data matrix pointer A */
+    q15_t          *pInB = pSrcB->pData;        /* input data matrix pointer B */
+    q15_t          *pInA2;
+    q15_t          *pInB2;
+    q15_t          *px;         /* Temporary output data matrix pointer */
+    q15_t          *px2;        /* Temporary output data matrix pointer */
+    uint32_t        numRowsA = pSrcA->numRows;  /* number of rows of input matrix A    */
+    uint32_t        numColsB = pSrcB->numCols;  /* number of columns of input matrix B */
+    uint32_t        numColsA = pSrcA->numCols;  /* number of columns of input matrix A */
+    uint32_t        numRowsB = pSrcB->numRows;  /* number of rows of input matrix A    */
+    uint32_t        col, i = 0u, j, row = numRowsB;     /* loop counters */
+    q15_t          *pSrcBT = pState;    /* input data matrix pointer for transpose */
+    uint32_t        blkCnt;     /* loop counters */
+    arm_status      status;                             /* Status of matrix multiplication */
+    arm_matrix_instance_q15 BT;
 
 #ifdef ARM_MATH_MATRIX_CHECK
 
-  /* Check for matrix mismatch condition */
-  if ((pSrcA->numCols != pSrcB->numRows) ||
+    /* Check for matrix mismatch condition */
+    if ((pSrcA->numCols != pSrcB->numRows) ||
       (pSrcA->numRows != pDst->numRows)  ||
       (pSrcB->numCols != pDst->numCols)    )
-  {
-    /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
-  }
-  else
-#endif 
-  {
-    /* small squared matrix specialized routines */
-    if(numRowsA == numColsB && numColsB == numColsA) {
-
-        if (numRowsA == 1)
-        {
-           q63_t sum;
-           sum = pInA[0] * pInB[0];
-           pOut[0] = (q15_t) __SSAT((sum >> 15), 16);
-           return (ARM_MATH_SUCCESS);
-        }
-        else if(numRowsA == 2)
-            return arm_mat_mult_q15_2x2_mve(pSrcA, pSrcB, pDst);
-        else if(numRowsA == 3)
-            return arm_mat_mult_q15_3x3_mve(pSrcA, pSrcB, pDst);
-        else if (numRowsA == 4)
-            return arm_mat_mult_q15_4x4_mve(pSrcA, pSrcB, pDst);
+    {
+        /* Set status as ARM_MATH_SIZE_MISMATCH */
+        status = ARM_MATH_SIZE_MISMATCH;
     }
-
-    vecColBOffs = vidupq_u16((uint32_t)0, 1);
-    vecColBOffs = vecColBOffs * (uint16_t) (numColsB);
-
-    /*
-     * The following loop performs the dot-product of each row in pSrcA with each column in pSrcB
-     */
-
-    /*
-     * row loop
-     */
-    rowCnt = row >> 2;
-    while (rowCnt > 0U)
+    else
+#endif
     {
+        /* small squared matrix specialized routines */
+        if (numRowsA == numColsB && numColsB == numColsA) {
+
+            if (numRowsA == 1) {
+                q63_t           sum;
+                sum = pInA[0] * pInB[0];
+                pDst->pData[0] = (q15_t) __SSAT((sum >> 15), 16);
+                return (ARM_MATH_SUCCESS);
+            } else if (numRowsA == 2)
+                return arm_mat_mult_q15_2x2_mve(pSrcA, pSrcB, pDst);
+            else if (numRowsA == 3)
+                return arm_mat_mult_q15_3x3_mve(pSrcA, pSrcB, pDst);
+            else if (numRowsA == 4)
+                return arm_mat_mult_q15_4x4_mve(pSrcA, pSrcB, pDst);
+        }
+
         /*
-         * Output pointer is set to starting address of the row being processed
+         * Matrix transpose
          */
-        px = pOut + i;
-        i = i + 4 * numColsB;
+
+        BT.numRows = numColsB;
+        BT.numCols = numRowsB;
+        BT.pData = pSrcBT;
+
+        arm_mat_trans_q15(pSrcB, &BT);
+
+
         /*
-         * For every row wise process, the column loop counter is to be initiated
+         * Reset the variables for the usage in the following multiplication process
          */
-        col = numColsB;
+        i = 0;
+        row = numRowsA >> 1;
+        px = pDst->pData;
+        px2 = px + numColsB;
+
         /*
-         * For every row wise process, the pInB pointer is set
-         * to the starting address of the pSrcB data
+         * The following loop performs the dot-product of each row in pSrcA with each column in pSrcB
          */
-        pInB = pSrcB->pData;
+
         /*
-         * column loop
+         * row loop
          */
-        while (col > 0U)
-        {
+        while (row > 0u) {
             /*
-             * generate 4 columns elements
+             * For every row wise process, the column loop counter is to be initiated
              */
+            col = numColsB >> 1;
             /*
-             * Matrix A columns number of MAC operations are to be performed
+             * For every row wise process, the pIn2 pointer is set
+             * to the starting address of the transposed pSrcB data
              */
-            colCnt = numColsA;
-
-            q15_t const *pSrcA0Vec, *pSrcA1Vec, *pSrcA2Vec, *pSrcA3Vec;
-            q15_t    *pInA0 = pInA;
-            q15_t    *pInA1 = pInA0 + numColsA;
-            q15_t    *pInA2 = pInA1 + numColsA;
-            q15_t    *pInA3 = pInA2 + numColsA;
-            q63_t     acc0, acc1, acc2, acc3;
-
-            acc0 = 0LL;
-            acc1 = 0LL;
-            acc2 = 0LL;
-            acc3 = 0LL;
-
-            pSrcA0Vec = (q15_t const *) pInA0;
-            pSrcA1Vec = (q15_t const *) pInA1;
-            pSrcA2Vec = (q15_t const *) pInA2;
-            pSrcA3Vec = (q15_t const *) pInA3;
-
-            vecOffs = vecColBOffs;
-
-            blkCnt = (numColsA) >> 3;
-            while (blkCnt > 0U)
-            {
-                q15x8_t vecB, vecA;
-
-                vecB = vldrhq_gather_shifted_offset((int16_t const *)pInB, vecOffs);
-                vecOffs = vecOffs + (uint16_t) (numColsB * 8);
-
-                vecA = vld1q(pSrcA0Vec);  pSrcA0Vec += 8;
-                acc0 = vmlaldavaq(acc0, vecA, vecB);
-                vecA = vld1q(pSrcA1Vec);  pSrcA1Vec += 8;
-                acc1 = vmlaldavaq(acc1, vecA, vecB);
-                vecA = vld1q(pSrcA2Vec);  pSrcA2Vec += 8;
-                acc2 = vmlaldavaq(acc2, vecA, vecB);
-                vecA = vld1q(pSrcA3Vec);  pSrcA3Vec += 8;
-                acc3 = vmlaldavaq(acc3, vecA, vecB);
-                blkCnt--;
+            pInB = pSrcBT;
+            pInB2 = pInB + numRowsB;
+            j = 0;
 
-            }
             /*
-             * tail
+             * column loop
              */
-            blkCnt = numColsA & 7;
-            if (blkCnt > 0U)
-            {
-                mve_pred16_t p0 = vctp16q(blkCnt);
-                q15x8_t   vecB, vecA;
-
-                vecB = vldrhq_gather_shifted_offset((int16_t const *)pInB, vecOffs);
-                vecOffs = vecOffs + (uint16_t) (numColsB * 8);
-
-                vecA = vld1q(pSrcA0Vec);
-                acc0 = vmlaldavaq_p(acc0, vecA, vecB, p0);
-                vecA = vld1q(pSrcA1Vec);
-                acc1 = vmlaldavaq_p(acc1, vecA, vecB, p0);
-                vecA = vld1q(pSrcA2Vec);
-                acc2 = vmlaldavaq_p(acc2, vecA, vecB, p0);
-                vecA = vld1q(pSrcA3Vec);
-                acc3 = vmlaldavaq_p(acc3, vecA, vecB, p0);
+            while (col > 0u) {
+                q15_t const    *pSrcAVec, *pSrcBVec, *pSrcA2Vec, *pSrcB2Vec;
+                q15x8_t         vecA, vecA2, vecB, vecB2;
+                q63_t           acc0, acc1, acc2, acc3;
+
+                /*
+                 * Initiate the pointer pIn1 to point to the starting address of the column being processed
+                 */
+                pInA = pSrcA->pData + i;
+                pInA2 = pInA + numColsA;
+                pInB = pSrcBT + j;
+                pInB2 = pInB + numRowsB;
+
+
+                pSrcAVec = (q15_t const *) pInA;
+                pSrcA2Vec = (q15_t const *) pInA2;
+                pSrcBVec = (q15_t const *) pInB;
+                pSrcB2Vec = (q15_t const *) pInB2;
+
+                acc0 = 0LL;
+                acc1 = 0LL;
+                acc2 = 0LL;
+                acc3 = 0LL;
+
+                vecA = vld1q(pSrcAVec);
+                pSrcAVec += 8;
+
+                blkCnt = numColsA / 8;
+                while (blkCnt > 0U) {
+                    vecB = vld1q(pSrcBVec);
+                    pSrcBVec += 8;
+                    acc0 = vmlaldavaq(acc0, vecA, vecB);
+                    vecA2 = vld1q(pSrcA2Vec);
+                    pSrcA2Vec += 8;
+                    acc1 = vmlaldavaq(acc1, vecA2, vecB);
+                    vecB2 = vld1q(pSrcB2Vec);
+                    pSrcB2Vec += 8;
+                    acc2 = vmlaldavaq(acc2, vecA, vecB2);
+                    vecA = vld1q(pSrcAVec);
+                    pSrcAVec += 8;
+                    acc3 = vmlaldavaq(acc3, vecA2, vecB2);
+
+                    blkCnt--;
+                }
+                /*
+                 * tail
+                 */
+                blkCnt = numColsA & 7;
+                if (blkCnt > 0U) {
+                    mve_pred16_t    p0 = vctp16q(blkCnt);
+                    vecB = vld1q(pSrcBVec);
+                    acc0 = vmlaldavaq_p(acc0, vecA, vecB, p0);
+                    vecA2 = vld1q(pSrcA2Vec);
+                    acc1 = vmlaldavaq_p(acc1, vecA2, vecB, p0);
+                    vecB2 = vld1q(pSrcB2Vec);
+                    acc2 = vmlaldavaq_p(acc2, vecA, vecB2, p0);
+                    vecA = vld1q(pSrcAVec);
+                    acc3 = vmlaldavaq_p(acc3, vecA2, vecB2, p0);
+                }
+
+                *px++ = (q15_t) MVE_ASRL_SAT16(acc0, 15);
+                *px++ = (q15_t) MVE_ASRL_SAT16(acc2, 15);
+                *px2++ = (q15_t) MVE_ASRL_SAT16(acc1, 15);
+                *px2++ = (q15_t) MVE_ASRL_SAT16(acc3, 15);
+                j += numRowsB * 2;
+                /*
+                 * Decrement the column loop counter
+                 */
+                col--;
+
             }
 
-            px[0]            = (q15_t)MVE_ASRL_SAT16(acc0, 15);
-            px[1 * numColsB] = (q15_t)MVE_ASRL_SAT16(acc1, 15);
-            px[2 * numColsB] = (q15_t)MVE_ASRL_SAT16(acc2, 15);
-            px[3 * numColsB] = (q15_t)MVE_ASRL_SAT16(acc3, 15);
-            px++;
-            /*
-             * Decrement the column loop counter
-             */
-            col--;
+            i = i + numColsA * 2;
+            px = px2 + (numColsB & 1u);
+            px2 = px + numColsB;
             /*
-             * Update the pointer pInB to point to the  starting address of the next column
+             * Decrement the row loop counter
              */
-            pInB = pSrcB->pData + (numColsB - col);
+            row--;
         }
 
         /*
-         * Update the pointer pInA to point to the  starting address of the next row
-         */
-        pInA += (numColsA * 4);
-        /*
-         * Decrement the row loop counter
-         */
-        rowCnt --;
-
-    }
-
-    rowCnt = row & 3;
-    while (rowCnt > 0U)
-    {
-      /*
-         * Output pointer is set to starting address of the row being processed
-         */
-        px = pOut + i;
-        i = i + numColsB;
-        /*
-         * For every row wise process, the column loop counter is to be initiated
+         * Compute remaining row and/or column below
          */
-        col = numColsB;
-        /*
-         * For every row wise process, the pInB pointer is set
-         * to the starting address of the pSrcB data
-         */
-        pInB = pSrcB->pData;
-        /*
-         * column loop
-         */
-        while (col > 0U)
-        {
-            /*
-             * generate 4 columns elements
-             */
-            /*
-             * Matrix A columns number of MAC operations are to be performed
-             */
-            colCnt = numColsA;
-
-            q15_t const *pSrcA0Vec;
-            q15_t    *pInA0 = pInA;
-            q63_t     acc0;
-
-            acc0 = 0LL;
-
-            pSrcA0Vec = (q15_t const *) pInA0;
-           
-            vecOffs = vecColBOffs;
-
-            blkCnt = (numColsA) >> 3;
-            while (blkCnt > 0U)
-            {
-                q15x8_t vecB, vecA;
 
-                vecB = vldrhq_gather_shifted_offset((int16_t const *)pInB, vecOffs);
-                vecOffs = vecOffs + (uint16_t) (numColsB * 8);
+        if (numColsB & 1u) {
+            row = numRowsA & (~0x1);    //avoid redundant computation
+            px = pDst->pData + numColsB - 1;
+            i = 0;
 
-                vecA = vld1q(pSrcA0Vec);  
-                pSrcA0Vec += 8;
-                acc0 = vmlaldavaq(acc0, vecA, vecB);
-                
-                blkCnt--;
-
-            }
             /*
-             * tail
+             * row loop
              */
-            blkCnt = numColsA & 7;
-            if (blkCnt > 0U)
-            {
-                mve_pred16_t p0 = vctp16q(blkCnt);
-                q15x8_t   vecB, vecA;
-
-                vecB = vldrhq_gather_shifted_offset((int16_t const *)pInB, vecOffs);
-                vecOffs = vecOffs + (uint16_t) (numColsB * 8);
-
-                vecA = vld1q(pSrcA0Vec);
-                acc0 = vmlaldavaq_p(acc0, vecA, vecB, p0);
-                
+            while (row > 0) {
+                q15_t const    *pSrcAVec, *pSrcBVec;
+                q15x8_t         vecA, vecB;
+                q63_t           acc0;
+
+                /*
+                 * point to last column in matrix B
+                 */
+                pInB = pSrcBT + numRowsB * (numColsB - 1);
+                pInA = pSrcA->pData + i;
+
+                pSrcAVec = (q15_t const *) pInA;
+                pSrcBVec = (q15_t const *) pInB;
+
+                acc0 = 0LL;
+                blkCnt = (numColsA) / 8;
+                while (blkCnt > 0U) {
+                    vecA = vld1q(pSrcAVec);
+                    pSrcAVec += 8;
+                    vecB = vld1q(pSrcBVec);
+                    pSrcBVec += 8;
+                    acc0 = vmlaldavaq(acc0, vecA, vecB);
+
+                    blkCnt--;
+                }
+                /*
+                 * tail
+                 */
+                blkCnt = (numColsA & 7);
+                if (blkCnt > 0U) {
+                    mve_pred16_t    p0 = vctp16q(blkCnt);
+                    vecA = vld1q(pSrcAVec);
+                    vecB = vld1q(pSrcBVec);
+                    acc0 = vmlaldavaq_p(acc0, vecA, vecB, p0);
+                }
+
+                *px = (q15_t) MVE_ASRL_SAT16(acc0, 15);
+
+                px += numColsB;
+
+                i += numColsA;
+                /*
+                 * Decrement the row loop counter
+                 */
+                row--;
             }
+        }
 
-            px[0]            = (q15_t)MVE_ASRL_SAT16(acc0, 15);
-          
-            px++;
+        if (numRowsA & 1u) {
+            col = numColsB;
+            i = 0u;
             /*
-             * Decrement the column loop counter
+             * point to last row in output matrix
              */
-            col--;
+            px = pDst->pData + (numColsB) * (numRowsA - 1);
             /*
-             * Update the pointer pInB to point to the  starting address of the next column
+             * col loop
              */
-            pInB = pSrcB->pData + (numColsB - col);
+            while (col > 0) {
+                q15_t const    *pSrcAVec, *pSrcBVec;
+                q15x8_t         vecA, vecB;
+                q63_t           acc0;
+
+                /*
+                 * point to last row in matrix A
+                 */
+                pInA = pSrcA->pData + (numRowsA - 1) * numColsA;
+                pInB = pSrcBT + i;
+
+                /*
+                 * Set the variable sum, that acts as accumulator, to zero
+                 */
+                pSrcAVec = (q15_t const *) pInA;
+                pSrcBVec = (q15_t const *) pInB;
+                acc0 = 0LL;
+
+                blkCnt = ((numColsA) / 8);
+                while (blkCnt > 0U) {
+                    vecA = vld1q(pSrcAVec);
+                    pSrcAVec += 8;
+                    vecB = vld1q(pSrcBVec);
+                    pSrcBVec += 8;
+                    acc0 = vmlaldavaq(acc0, vecA, vecB);
+
+                    blkCnt--;
+                }
+                /*
+                 * tail
+                 */
+                blkCnt = (numColsA & 7);
+                if (blkCnt > 0U) {
+                    mve_pred16_t    p0 = vctp16q(blkCnt);
+                    vecA = vld1q(pSrcAVec);
+                    vecB = vld1q(pSrcBVec);
+                    acc0 = vmlaldavaq_p(acc0, vecA, vecB, p0);
+                }
+
+                *px++ = (q15_t) MVE_ASRL_SAT16(acc0, 15);
+
+                i += numColsA;
+
+                /*
+                 * Decrement the col loop counter
+                 */
+                col--;
+            }
         }
 
-        /*
-         * Update the pointer pInA to point to the  starting address of the next row
-         */
-        pInA += (numColsA );
-        rowCnt--;
+        /* Set status as ARM_MATH_SUCCESS */
+        status = ARM_MATH_SUCCESS;
     }
-    /* Set status as ARM_MATH_SUCCESS */
-    status = ARM_MATH_SUCCESS;
-  }
-
-  /* Return to application */
-  return (status);
-
+    /* Return to application */
+    return (status);
 }
-#else
+
+#else 
 arm_status arm_mat_mult_q15(
   const arm_matrix_instance_q15 * pSrcA,
   const arm_matrix_instance_q15 * pSrcB,
@@ -610,12 +637,12 @@ arm_status arm_mat_mult_q15(
         uint16_t numRowsA = pSrcA->numRows;            /* Number of rows of input matrix A */
         uint16_t numColsB = pSrcB->numCols;            /* Number of columns of input matrix B */
         uint16_t numColsA = pSrcA->numCols;            /* Number of columns of input matrix A */
-        uint16_t numRowsB = pSrcB->numRows;            /* Number of rows of input matrix A */
+        uint16_t numRowsB = pSrcB->numRows;            /* Number of rows of input matrix B */
         uint32_t col, i = 0U, row = numRowsB, colCnt;  /* Loop counters */
         arm_status status;                             /* Status of matrix multiplication */
-        
-        q31_t in;                                      /* Temporary variable to hold the input value */
+
         q31_t inA1, inB1, inA2, inB2;
+        arm_matrix_instance_q15 BT;
 
 #ifdef ARM_MATH_MATRIX_CHECK
 
@@ -630,89 +657,13 @@ arm_status arm_mat_mult_q15(
   else
 
 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
-
   {
-    /* Matrix transpose */
-    do
-    {
-      /* The pointer px is set to starting address of column being processed */
-      px = pSrcBT + i;
-
-      /* Apply loop unrolling and exchange columns with row elements */
-      col = numColsB >> 2U;
-
-      /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
-       ** a second loop below computes the remaining 1 to 3 samples. */
-      while (col > 0U)
-      {
-        /* Read two elements from row */
-        in = read_q15x2_ia ((q15_t **) &pInB);
-
-        /* Unpack and store one element in destination */
-#ifndef ARM_MATH_BIG_ENDIAN
-        *px = (q15_t) in;
-#else
-        *px = (q15_t) ((in & (q31_t) 0xffff0000) >> 16);
-#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
-
-        /* Update pointer px to point to next row of transposed matrix */
-        px += numRowsB;
-
-        /* Unpack and store second element in destination */
-#ifndef ARM_MATH_BIG_ENDIAN
-        *px = (q15_t) ((in & (q31_t) 0xffff0000) >> 16);
-#else
-        *px = (q15_t) in;
-#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
-
-        /* Update pointer px to point to next row of transposed matrix */
-        px += numRowsB;
-
-        /* Read two elements from row */
-        in = read_q15x2_ia ((q15_t **) &pInB);
-
-        /* Unpack and store one element in destination */
-#ifndef ARM_MATH_BIG_ENDIAN
-        *px = (q15_t) in;
-#else
-        *px = (q15_t) ((in & (q31_t) 0xffff0000) >> 16);
-#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
-        px += numRowsB;
-
-#ifndef ARM_MATH_BIG_ENDIAN
-        *px = (q15_t) ((in & (q31_t) 0xffff0000) >> 16);
-#else
-        *px = (q15_t) in;
-#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
-        px += numRowsB;
-
-        /* Decrement column loop counter */
-        col--;
-      }
 
-      /* If the columns of pSrcB is not a multiple of 4, compute any remaining output samples here.
-       ** No loop unrolling is used. */
-      col = numColsB % 0x4U;
-
-      while (col > 0U)
-      {
-        /* Read and store input element in destination */
-        *px = *pInB++;
-
-        /* Update pointer px to point to next row of transposed matrix */
-        px += numRowsB;
-
-        /* Decrement column loop counter */
-        col--;
-      }
-
-      i++;
-
-      /* Decrement row loop counter */
-      row--;
-
-    } while (row > 0U);
+    BT.numRows = numColsB;
+    BT.numCols = numRowsB;
+    BT.pData = pSrcBT;
 
+    arm_mat_trans_q15(pSrcB,&BT);
     /* Reset variables for usage in following multiplication process */
     row = numRowsA;
     i = 0U;
@@ -746,13 +697,13 @@ arm_status arm_mat_mult_q15(
           /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */
 
           /* read real and imag values from pSrcA and pSrcB buffer */
-          inA1 = read_q15x2_ia ((q15_t **) &pInA);
-          inB1 = read_q15x2_ia ((q15_t **) &pInB);
+          inA1 = read_q15x2_ia (&pInA);
+          inB1 = read_q15x2_ia (&pInB);
 
-          inA2 = read_q15x2_ia ((q15_t **) &pInA);
-          inB2 = read_q15x2_ia ((q15_t **) &pInB);
+          inA2 = read_q15x2_ia (&pInA);
+          inB2 = read_q15x2_ia (&pInB);
 
-          /* Multiply and Accumlates */
+          /* Multiply and Accumulates */
           sum = __SMLALD(inA1, inB1, sum);
           sum = __SMLALD(inA2, inB2, sum);
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_q31.c
index 54f1c09..252eebf 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_mult_q31.c
  * Description:  Q31 matrix multiplication
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -346,7 +346,7 @@ arm_status arm_mat_mult_q31(
     uint16_t    numRowsA = pSrcA->numRows;    /* number of rows of input matrix A    */
     uint16_t    numColsB = pSrcB->numCols;    /* number of columns of input matrix B */
     uint16_t    numColsA = pSrcA->numCols;    /* number of columns of input matrix A */
-    uint16_t    col, i = 0U, row = numRowsA, colCnt;  /* loop counters */
+    uint16_t    col, i = 0U, row = numRowsA;  /* loop counters */
     arm_status  status;          /* status of matrix multiplication */
     uint32x4_t  vecOffs, vecColBOffs;
     uint32_t    blkCnt, rowCnt;           /* loop counters */
@@ -420,7 +420,6 @@ arm_status arm_mat_mult_q31(
             /*
              * Matrix A columns number of MAC operations are to be performed
              */
-            colCnt = numColsA;
 
             q31_t const *pSrcA0Vec, *pSrcA1Vec, *pSrcA2Vec, *pSrcA3Vec;
             q31_t const   *pInA0 = pInA;
@@ -543,7 +542,6 @@ arm_status arm_mat_mult_q31(
             /*
              * Matrix A columns number of MAC operations are to be performed
              */
-            colCnt = numColsA;
 
             q31_t const *pSrcA0Vec;
             q31_t const   *pInA0 = pInA;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_q7.c
index 79334e9..e9541fa 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_mult_q7.c
@@ -5,12 +5,14 @@
  * Title:        arm_mat_mult_q7.c
  * Description:  Q15 matrix multiplication
  *
- * $Date:        06. July 2020
+ * $Date:        23 April 2021
  *
- * Target Processor: Cortex-M cores
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_qr_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_qr_f16.c
new file mode 100644
index 0000000..1e04295
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_qr_f16.c
@@ -0,0 +1,784 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mat_qr_f16.c
+ * Description:  Half floating-point matrix QR decomposition.
+ *
+ * $Date:        15 June 2022
+ * $Revision:    V1.11.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions_f16.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h"
+
+
+#if !defined(ARM_MATH_AUTOVECTORIZE)
+#if defined(ARM_MATH_MVE_FLOAT16)
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+#endif
+#endif
+
+/**
+  @ingroup groupMatrix
+ */
+
+
+/**
+  @addtogroup MatrixQR
+  @{
+ */
+
+/**
+  @brief         QR decomposition of a m x n half floating point matrix with m >= n.
+  @param[in]     pSrc      points to input matrix structure. The source matrix is modified by the function.
+  @param[in]     threshold norm2 threshold.    
+  @param[out]    pOutR     points to output R matrix structure of dimension m x n
+  @param[out]    pOutQ     points to output Q matrix structure of dimension m x m (can be NULL)
+  @param[out]    pOutTau   points to Householder scaling factors of dimension n
+  @param[inout]  pTmpA     points to a temporary vector of dimension m.
+  @param[inout]  pTmpB     points to a temporary vector of dimension m.
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS       : Operation successful
+                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
+  
+  @par           pOutQ is optional:
+                 pOutQ can be a NULL pointer.
+                 In this case, the argument will be ignored
+                 and the output Q matrix won't be computed.
+
+  @par           f16 implementation
+                 The f16 implementation is not very accurate.
+
+  @par           Norm2 threshold 
+                 For the meaning of this argument please 
+                 refer to the \ref MatrixHouseholder documentation
+
+ */
+
+#if !defined(ARM_MATH_AUTOVECTORIZE)
+#if defined(ARM_MATH_MVE_FLOAT16)
+
+arm_status arm_mat_qr_f16(
+    const arm_matrix_instance_f16 * pSrc,
+    const float16_t threshold,
+    arm_matrix_instance_f16 * pOutR,
+    arm_matrix_instance_f16 * pOutQ,
+    float16_t * pOutTau,
+    float16_t *pTmpA,
+    float16_t *pTmpB
+    )
+
+{
+  int32_t col=0;
+  int32_t nb,pos;
+  float16_t *pa,*pc;
+  float16_t beta;
+  float16_t *pv;
+  float16_t *pdst;
+  float16_t *p;
+
+  if (pSrc->numRows < pSrc->numCols)
+  {
+    return(ARM_MATH_SIZE_MISMATCH);
+  }
+
+  memcpy(pOutR->pData,pSrc->pData,pSrc->numCols * pSrc->numRows*sizeof(float16_t));
+  pOutR->numCols = pSrc->numCols;
+  pOutR->numRows = pSrc->numRows;
+  
+  p = pOutR->pData;
+  
+  pc = pOutTau;
+  for(col=0 ; col < pSrc->numCols; col++)
+  {
+      int32_t j,k,blkCnt,blkCnt2;
+      float16_t *pa0,*pa1,*pa2,*pa3,*ptemp;
+      float16_t temp;
+      float16x8_t v1,v2,vtemp;
+
+      COPY_COL_F16(pOutR,col,col,pTmpA);
+
+      beta = arm_householder_f16(pTmpA,threshold,pSrc->numRows - col,pTmpA);
+      *pc++ = beta;
+    
+      pdst = pTmpB;
+
+      /* v.T A(col:,col:) -> tmpb */
+      pv = pTmpA;
+      pa = p;
+
+      temp = *pv;
+      blkCnt = (pSrc->numCols-col) >> 3;
+      while (blkCnt > 0)
+      {
+          v1 = vld1q_f16(pa);
+          v2 = vmulq_n_f16(v1,temp);
+          vst1q_f16(pdst,v2);
+
+          pa += 8;
+          pdst += 8;
+          blkCnt--;
+      }
+      blkCnt = (pSrc->numCols-col) & 7;
+      if (blkCnt > 0)
+      {
+          mve_pred16_t p0 = vctp16q(blkCnt);
+          v1 = vld1q_f16(pa);
+          v2 = vmulq_n_f16(v1,temp);
+          vst1q_p_f16(pdst,v2,p0);
+
+          pa += blkCnt;
+      }
+
+      pa += col;
+      pv++;
+      pdst = pTmpB;
+
+      pa0 = pa;
+      pa1 = pa0 + pSrc->numCols;
+      pa2 = pa1 + pSrc->numCols;
+      pa3 = pa2 + pSrc->numCols;
+
+      /* Unrolled loop */
+      blkCnt = (pSrc->numRows-col - 1) >> 2;
+      k=1;
+      while(blkCnt > 0)
+      {
+          vtemp=vld1q_f16(pv);
+
+          blkCnt2 = (pSrc->numCols-col) >> 3;
+          while (blkCnt2 > 0)
+          {
+              v1 = vld1q_f16(pdst);
+
+              v2 = vld1q_f16(pa0);
+              v1 = vfmaq_n_f16(v1,v2,vgetq_lane(vtemp,0));
+
+              v2 = vld1q_f16(pa1);
+              v1 = vfmaq_n_f16(v1,v2,vgetq_lane(vtemp,1));
+
+              v2 = vld1q_f16(pa2);
+              v1 = vfmaq_n_f16(v1,v2,vgetq_lane(vtemp,2));
+
+              v2 = vld1q_f16(pa3);
+              v1 = vfmaq_n_f16(v1,v2,vgetq_lane(vtemp,3));
+
+              vst1q_f16(pdst,v1);
+
+              pdst += 8;
+              pa0 += 8;
+              pa1 += 8;
+              pa2 += 8;
+              pa3 += 8;
+              blkCnt2--;
+          }
+          blkCnt2 = (pSrc->numCols-col) & 7;
+          if (blkCnt2 > 0)
+          {
+              mve_pred16_t p0 = vctp16q(blkCnt2);
+
+              v1 = vld1q_f16(pdst);
+
+              v2 = vld1q_f16(pa0);
+              v1 = vfmaq_n_f16(v1,v2,vgetq_lane(vtemp,0));
+
+              v2 = vld1q_f16(pa1);
+              v1 = vfmaq_n_f16(v1,v2,vgetq_lane(vtemp,1));
+
+              v2 = vld1q_f16(pa2);
+              v1 = vfmaq_n_f16(v1,v2,vgetq_lane(vtemp,2));
+
+              v2 = vld1q_f16(pa3);
+              v1 = vfmaq_n_f16(v1,v2,vgetq_lane(vtemp,3));
+
+              vst1q_p_f16(pdst,v1,p0);
+
+              pa0 += blkCnt2;
+              pa1 += blkCnt2;
+              pa2 += blkCnt2;
+              pa3 += blkCnt2;
+          }
+              
+          pa0 += col + 3*pSrc->numCols;
+          pa1 += col + 3*pSrc->numCols;
+          pa2 += col + 3*pSrc->numCols;
+          pa3 += col + 3*pSrc->numCols;
+          pv  += 4;
+          pdst = pTmpB;
+          k += 4;
+          blkCnt--;
+      }
+
+      pa = pa0;
+      for(;k<pSrc->numRows-col; k++)
+      {
+          temp = *pv;
+          blkCnt2 = (pSrc->numCols-col) >> 3;
+          while (blkCnt2 > 0)
+          {
+              v1 = vld1q_f16(pa);
+              v2 = vld1q_f16(pdst);
+              v2 = vfmaq_n_f16(v2,v1,temp);
+              vst1q_f16(pdst,v2);
+
+              pa += 8;
+              pdst += 8;
+              blkCnt2--;
+          }
+          blkCnt2 = (pSrc->numCols-col) & 7;
+          if (blkCnt2 > 0)
+          {
+              mve_pred16_t p0 = vctp16q(blkCnt2);
+              v1 = vld1q_f16(pa);
+              v2 = vld1q_f16(pdst);
+              v2 = vfmaq_n_f16(v2,v1,temp);
+              vst1q_p_f16(pdst,v2,p0);
+
+              pa += blkCnt2;
+          }
+          
+          pa += col;
+          pv++;
+          pdst = pTmpB;
+      }
+
+      /* A(col:,col:) - beta v tmpb */
+      pa = p;
+      for(j=0;j<pSrc->numRows-col; j++)
+      {
+        float16_t f = -(_Float16)beta * (_Float16)pTmpA[j];
+        ptemp = pTmpB; 
+
+        blkCnt2 = (pSrc->numCols-col) >> 3;
+        while (blkCnt2 > 0)
+        {
+            v1 = vld1q_f16(pa);
+            v2 = vld1q_f16(ptemp);
+            v1 = vfmaq_n_f16(v1,v2,f);
+            vst1q_f16(pa,v1);
+
+            pa += 8;
+            ptemp += 8;
+
+            blkCnt2--;
+        }
+        blkCnt2 = (pSrc->numCols-col) & 7;
+        if (blkCnt2 > 0)
+        {
+            mve_pred16_t p0 = vctp16q(blkCnt2);
+
+            v1 = vld1q_f16(pa);
+            v2 = vld1q_f16(ptemp);
+            v1 = vfmaq_n_f16(v1,v2,f);
+            vst1q_p_f16(pa,v1,p0);
+
+            pa += blkCnt2;
+        }
+            
+        pa += col;
+      } 
+
+      /* Copy Householder reflectors into R matrix */
+      pa = p + pOutR->numCols;
+      for(k=0;k<pSrc->numRows-col-1; k++)
+      {
+         *pa = pTmpA[k+1];
+         pa += pOutR->numCols;
+      }
+
+      p += 1 + pOutR->numCols;
+  }
+
+  /* Generate Q if requested by user matrix */
+
+  if (pOutQ != NULL)
+  {
+     /* Initialize Q matrix to identity */
+     memset(pOutQ->pData,0,sizeof(float16_t)*pOutQ->numRows*pOutQ->numRows);
+     
+     pa = pOutQ->pData;
+     for(col=0 ; col < pOutQ->numCols; col++)
+     {
+        *pa = 1.0f16;
+        pa += pOutQ->numCols+1;
+     }
+   
+     nb = pOutQ->numRows - pOutQ->numCols + 1;
+   
+     pc = pOutTau + pOutQ->numCols - 1;
+     for(col=0 ; col < pOutQ->numCols; col++)
+     {
+       int32_t j,k, blkCnt, blkCnt2;
+       float16_t *pa0,*pa1,*pa2,*pa3,*ptemp;
+       float16_t temp;
+       float16x8_t v1,v2,vtemp;
+
+       pos = pSrc->numRows - nb;
+       p = pOutQ->pData + pos + pOutQ->numCols*pos ;
+   
+       
+       COPY_COL_F16(pOutR,pos,pos,pTmpA);
+       pTmpA[0] = 1.0f16;
+       pdst = pTmpB;
+      
+       /* v.T A(col:,col:) -> tmpb */
+       
+       pv = pTmpA;
+       pa = p;
+
+       temp = *pv;
+       blkCnt2 = (pOutQ->numRows-pos) >> 3;
+       while (blkCnt2 > 0)
+       {
+           v1 = vld1q_f16(pa);
+           v1 = vmulq_n_f16(v1, temp);
+           vst1q_f16(pdst,v1);
+
+           pa += 8;
+           pdst += 8;
+
+           blkCnt2--;
+       }
+       blkCnt2 = (pOutQ->numRows-pos) & 7;
+       if (blkCnt2 > 0)
+       {
+           mve_pred16_t p0 = vctp16q(blkCnt2);
+
+           v1 = vld1q_f16(pa);
+           v1 = vmulq_n_f16(v1, temp);
+           vst1q_p_f16(pdst,v1,p0);
+
+           pa += blkCnt2;
+       }
+           
+       pa += pos;
+       pv++;
+       pdst = pTmpB;
+       pa0 = pa;
+       pa1 = pa0 + pOutQ->numRows;
+       pa2 = pa1 + pOutQ->numRows;
+       pa3 = pa2 + pOutQ->numRows;
+
+       /* Unrolled loop */
+       blkCnt = (pOutQ->numRows-pos - 1) >> 2;
+       k=1;
+       while(blkCnt > 0)
+       {
+
+           vtemp = vld1q_f16(pv);
+           blkCnt2 = (pOutQ->numRows-pos) >> 3;
+           while (blkCnt2 > 0)
+           {
+               v1 = vld1q_f16(pdst);
+
+               v2 = vld1q_f16(pa0);
+               v1 = vfmaq_n_f16(v1, v2, vgetq_lane(vtemp,0));
+
+               v2 = vld1q_f16(pa1);
+               v1 = vfmaq_n_f16(v1, v2, vgetq_lane(vtemp,1));
+
+               v2 = vld1q_f16(pa2);
+               v1 = vfmaq_n_f16(v1, v2, vgetq_lane(vtemp,2));
+
+               v2 = vld1q_f16(pa3);
+               v1 = vfmaq_n_f16(v1, v2, vgetq_lane(vtemp,3));
+
+               vst1q_f16(pdst,v1);
+
+               pa0 += 8;
+               pa1 += 8;
+               pa2 += 8;
+               pa3 += 8;
+               pdst += 8;
+
+               blkCnt2--;
+           }
+           blkCnt2 = (pOutQ->numRows-pos) & 7;
+           if (blkCnt2 > 0)
+           {
+               mve_pred16_t p0 = vctp16q(blkCnt2);
+
+               v1 = vld1q_f16(pdst);
+
+               v2 = vld1q_f16(pa0);
+               v1 = vfmaq_n_f16(v1, v2, vgetq_lane(vtemp,0));
+
+               v2 = vld1q_f16(pa1);
+               v1 = vfmaq_n_f16(v1, v2, vgetq_lane(vtemp,1));
+
+               v2 = vld1q_f16(pa2);
+               v1 = vfmaq_n_f16(v1, v2, vgetq_lane(vtemp,2));
+
+               v2 = vld1q_f16(pa3);
+               v1 = vfmaq_n_f16(v1, v2, vgetq_lane(vtemp,3));
+
+               vst1q_p_f16(pdst,v1,p0);
+
+               pa0 += blkCnt2;
+               pa1 += blkCnt2;
+               pa2 += blkCnt2;
+               pa3 += blkCnt2;
+
+           }
+               
+           pa0 += pos + 3*pOutQ->numRows;
+           pa1 += pos + 3*pOutQ->numRows;
+           pa2 += pos + 3*pOutQ->numRows;
+           pa3 += pos + 3*pOutQ->numRows;
+           pv  += 4;
+           pdst = pTmpB;
+           k += 4;
+           blkCnt--;
+       }
+
+       pa = pa0;
+       for(;k<pOutQ->numRows-pos; k++)
+       {
+           temp = *pv;
+           blkCnt2 = (pOutQ->numRows-pos) >> 3;
+           while (blkCnt2 > 0)
+           {
+               v1 = vld1q_f16(pdst);
+               v2 = vld1q_f16(pa);
+               v1 = vfmaq_n_f16(v1, v2, temp);
+               vst1q_f16(pdst,v1);
+
+               pdst += 8;
+               pa += 8;
+
+               blkCnt2--;
+           }
+           blkCnt2 = (pOutQ->numRows-pos) & 7;
+           if (blkCnt2 > 0)
+           {
+               mve_pred16_t p0 = vctp16q(blkCnt2);
+               v1 = vld1q_f16(pdst);
+               v2 = vld1q_f16(pa);
+               v1 = vfmaq_n_f16(v1, v2, temp);
+               vst1q_p_f16(pdst,v1,p0);
+
+               pa += blkCnt2;
+           }
+               
+           pa += pos;
+           pv++;
+           pdst = pTmpB;
+       }
+   
+       pa = p;
+       beta = *pc--;
+       for(j=0;j<pOutQ->numRows-pos; j++)
+       {
+           float16_t f = -(_Float16)beta * (_Float16)pTmpA[j];
+           ptemp = pTmpB;
+
+           blkCnt2 = (pOutQ->numCols-pos) >> 3;
+           while (blkCnt2 > 0)
+           {
+               v1 = vld1q_f16(pa);
+               v2 = vld1q_f16(ptemp);
+               v1 = vfmaq_n_f16(v1,v2,f);
+               vst1q_f16(pa,v1);
+
+               pa += 8;
+               ptemp += 8;
+
+               blkCnt2--;
+           }
+           blkCnt2 = (pOutQ->numCols-pos) & 7;
+           if (blkCnt2 > 0)
+           {
+               mve_pred16_t p0 = vctp16q(blkCnt2);
+
+               v1 = vld1q_f16(pa);
+               v2 = vld1q_f16(ptemp);
+               v1 = vfmaq_n_f16(v1,v2,f);
+               vst1q_p_f16(pa,v1,p0);
+
+               pa += blkCnt2;
+           }
+               
+           pa += pos;
+       } 
+   
+   
+       nb++;
+     }
+  }
+
+  arm_status status = ARM_MATH_SUCCESS;
+  /* Return to application */
+  return (status);
+}
+
+#endif /*#if !defined(ARM_MATH_MVEF)*/
+
+
+#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/
+
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#if (!defined(ARM_MATH_MVE_FLOAT16)) || defined(ARM_MATH_AUTOVECTORIZE)
+
+
+arm_status arm_mat_qr_f16(
+    const arm_matrix_instance_f16 * pSrc,
+    const float16_t threshold,
+    arm_matrix_instance_f16 * pOutR,
+    arm_matrix_instance_f16 * pOutQ,
+    float16_t * pOutTau,
+    float16_t *pTmpA,
+    float16_t *pTmpB
+    )
+
+{
+  int32_t col=0;
+  int32_t nb,pos;
+  float16_t *pa,*pc;
+  float16_t beta;
+  float16_t *pv;
+  float16_t *pdst;
+  float16_t *p;
+
+  if (pSrc->numRows < pSrc->numCols)
+  {
+    return(ARM_MATH_SIZE_MISMATCH);
+  }
+
+  memcpy(pOutR->pData,pSrc->pData,pSrc->numCols * pSrc->numRows*sizeof(float16_t));
+  pOutR->numCols = pSrc->numCols;
+  pOutR->numRows = pSrc->numRows;
+  
+  p = pOutR->pData;
+  
+  pc = pOutTau;
+  for(col=0 ; col < pSrc->numCols; col++)
+  {
+      int32_t i,j,k,blkCnt;
+      float16_t *pa0,*pa1,*pa2,*pa3;
+      COPY_COL_F16(pOutR,col,col,pTmpA);
+
+      beta = arm_householder_f16(pTmpA,threshold,pSrc->numRows - col,pTmpA);
+      *pc++ = beta;
+    
+      pdst = pTmpB;
+
+      /* v.T A(col:,col:) -> tmpb */
+      pv = pTmpA;
+      pa = p;
+      for(j=0;j<pSrc->numCols-col; j++)
+      {
+              *pdst++ = (_Float16)*pv * (_Float16)*pa++; 
+      }
+      pa += col;
+      pv++;
+      pdst = pTmpB;
+
+      pa0 = pa;
+      pa1 = pa0 + pSrc->numCols;
+      pa2 = pa1 + pSrc->numCols;
+      pa3 = pa2 + pSrc->numCols;
+
+      /* Unrolled loop */
+      blkCnt = (pSrc->numRows-col - 1) >> 2;
+      k=1;
+      while(blkCnt > 0)
+      {
+          float16_t sum;
+
+          for(j=0;j<pSrc->numCols-col; j++)
+          {
+              sum = *pdst;
+
+              sum += (_Float16)pv[0] * (_Float16)*pa0++;
+              sum += (_Float16)pv[1] * (_Float16)*pa1++;
+              sum += (_Float16)pv[2] * (_Float16)*pa2++;
+              sum += (_Float16)pv[3] * (_Float16)*pa3++;
+              
+              *pdst++ = sum; 
+          }
+          pa0 += col + 3*pSrc->numCols;
+          pa1 += col + 3*pSrc->numCols;
+          pa2 += col + 3*pSrc->numCols;
+          pa3 += col + 3*pSrc->numCols;
+          pv  += 4;
+          pdst = pTmpB;
+          k += 4;
+          blkCnt--;
+      }
+
+      pa = pa0;
+      for(;k<pSrc->numRows-col; k++)
+      {
+          for(j=0;j<pSrc->numCols-col; j++)
+          {
+              *pdst++ += (_Float16)*pv * (_Float16)*pa++; 
+          }
+          pa += col;
+          pv++;
+          pdst = pTmpB;
+      }
+
+      /* A(col:,col:) - beta v tmpb */
+      pa = p;
+      for(j=0;j<pSrc->numRows-col; j++)
+      {
+        float16_t f = (_Float16)beta * (_Float16)pTmpA[j];
+
+        for(i=0;i<pSrc->numCols-col; i++)
+        {
+          *pa = (_Float16)*pa - (_Float16)f * (_Float16)pTmpB[i] ;
+          pa++;
+        }
+        pa += col;
+      } 
+
+      /* Copy Householder reflectors into R matrix */
+      pa = p + pOutR->numCols;
+      for(k=0;k<pSrc->numRows-col-1; k++)
+      {
+         *pa = pTmpA[k+1];
+         pa += pOutR->numCols;
+      }
+
+      p += 1 + pOutR->numCols;
+  }
+
+  /* Generate Q if requested by user matrix */
+
+  if (pOutQ != NULL)
+  {
+     /* Initialize Q matrix to identity */
+     memset(pOutQ->pData,0,sizeof(float16_t)*pOutQ->numRows*pOutQ->numRows);
+     
+     pa = pOutQ->pData;
+     for(col=0 ; col < pOutQ->numCols; col++)
+     {
+        *pa = 1.0f16;
+        pa += pOutQ->numCols+1;
+     }
+   
+     nb = pOutQ->numRows - pOutQ->numCols + 1;
+   
+     pc = pOutTau + pOutQ->numCols - 1;
+     for(col=0 ; col < pOutQ->numCols; col++)
+     {
+       int32_t i,j,k, blkCnt;
+       float16_t *pa0,*pa1,*pa2,*pa3;
+       pos = pSrc->numRows - nb;
+       p = pOutQ->pData + pos + pOutQ->numCols*pos ;
+   
+       
+       COPY_COL_F16(pOutR,pos,pos,pTmpA);
+       pTmpA[0] = 1.0f16;
+       pdst = pTmpB;
+      
+       /* v.T A(col:,col:) -> tmpb */
+       
+       pv = pTmpA;
+       pa = p;
+       for(j=0;j<pOutQ->numRows-pos; j++)
+       {
+               *pdst++ = (_Float16)*pv * (_Float16)*pa++; 
+       }
+       pa += pos;
+       pv++;
+       pdst = pTmpB;
+       pa0 = pa;
+       pa1 = pa0 + pOutQ->numRows;
+       pa2 = pa1 + pOutQ->numRows;
+       pa3 = pa2 + pOutQ->numRows;
+
+       /* Unrolled loop */
+       blkCnt = (pOutQ->numRows-pos - 1) >> 2;
+       k=1;
+       while(blkCnt > 0)
+       {
+           float16_t sum;
+
+           for(j=0;j<pOutQ->numRows-pos; j++)
+           {
+              sum = *pdst;
+
+              sum += (_Float16)pv[0] * (_Float16)*pa0++;
+              sum += (_Float16)pv[1] * (_Float16)*pa1++;
+              sum += (_Float16)pv[2] * (_Float16)*pa2++;
+              sum += (_Float16)pv[3] * (_Float16)*pa3++;
+              
+              *pdst++ = sum; 
+           }
+           pa0 += pos + 3*pOutQ->numRows;
+           pa1 += pos + 3*pOutQ->numRows;
+           pa2 += pos + 3*pOutQ->numRows;
+           pa3 += pos + 3*pOutQ->numRows;
+           pv  += 4;
+           pdst = pTmpB;
+           k += 4;
+           blkCnt--;
+       }
+
+       pa = pa0;
+       for(;k<pOutQ->numRows-pos; k++)
+       {
+           for(j=0;j<pOutQ->numRows-pos; j++)
+           {
+               *pdst++ += (_Float16)*pv * (_Float16)*pa++; 
+           }
+           pa += pos;
+           pv++;
+           pdst = pTmpB;
+       }
+   
+       pa = p;
+       beta = *pc--;
+       for(j=0;j<pOutQ->numRows-pos; j++)
+       {
+           float16_t f = (_Float16)beta * (_Float16)pTmpA[j];
+
+           for(i=0;i<pOutQ->numCols-pos; i++)
+           {
+             *pa = (_Float16)*pa - (_Float16)f * (_Float16)pTmpB[i] ;
+             pa++;
+           }
+           pa += pos;
+       } 
+   
+   
+       nb++;
+     }
+  }
+
+  arm_status status = ARM_MATH_SUCCESS;
+  /* Return to application */
+  return (status);
+}
+
+#endif /* end of test for Helium or Neon availability */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+/**
+  @} end of MatrixQR group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_qr_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_qr_f32.c
new file mode 100644
index 0000000..3e3027e
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_qr_f32.c
@@ -0,0 +1,854 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mat_qr_f32.c
+ * Description:  Floating-point matrix QR decomposition.
+ *
+ * $Date:        15 June 2022
+ * $Revision:    V1.11.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h"
+
+
+#if !defined(ARM_MATH_AUTOVECTORIZE)
+#if defined(ARM_MATH_MVEF)
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+#endif
+#endif
+
+/**
+  @ingroup groupMatrix
+ */
+
+/**
+  @defgroup MatrixQR QR decomposition of a Matrix
+
+    Computes the QR decomposition of a matrix M using Householder algorithm.
+
+    \f[
+        M = Q R
+    \f]
+
+    where Q is an orthogonal matrix and R is upper triangular.
+    No pivoting strategy is used.
+
+    The returned value for R is using a format a bit similar
+    to LAPACK : it is not just containing the matrix R but
+    also the Householder reflectors. 
+
+    The function is also returning a vector \f$\tau\f$
+    that is containing the scaling factor for the reflectors.
+
+    Returned value R has the structure:
+
+    \f[
+    \begin{pmatrix}
+    r_{11} & r_{12}     & \dots  & r_{1n} \\
+    v_{12} & r_{22}     & \dots  & r_{2n} \\
+    v_{13} & v_{22}     & \dots  & r_{3n} \\
+    \vdots & \vdots     & \ddots & \vdots   \\
+    v_{1m} & v_{2(m-1)} & \dots  & r_{mn} \\
+    \end{pmatrix}
+    \f]
+
+    where 
+
+    \f[
+    v_1 = 
+    \begin{pmatrix}
+    1       \\
+    v_{12}  \\
+    \vdots  \\
+    v_{1m}  \\
+    \end{pmatrix}
+    \f]
+
+    is the first householder reflector.
+    
+    The Householder Matrix is given by \f$H_1\f$
+
+    \f[
+    H_1 = I - \tau_1 v_1 v_1^T
+    \f]
+
+    The Matrix Q is the product of the Householder matrices:
+    
+    \f[
+    Q = H_1 H_2 \dots H_n
+    \f]
+
+    The computation of the matrix Q by this function is
+    optional.
+
+    And the matrix R, would be the returned value R without the
+    householder reflectors:
+
+    \f[
+    \begin{pmatrix}
+    r_{11} & r_{12} & \dots  & r_{1n} \\
+    0      & r_{22} & \dots  & r_{2n} \\
+    0      & 0      & \dots  & r_{3n} \\
+    \vdots & \vdots & \ddots & \vdots   \\
+    0      & 0      & \dots  & r_{mn} \\
+    \end{pmatrix}
+    \f]
+
+
+ */
+
+/**
+  @addtogroup MatrixQR
+  @{
+ */
+
+/**
+  @brief         QR decomposition of a m x n floating point matrix with m >= n.
+  @param[in]     pSrc      points to input matrix structure. The source matrix is modified by the function.
+  @param[in]     threshold norm2 threshold.    
+  @param[out]    pOutR     points to output R matrix structure of dimension m x n
+  @param[out]    pOutQ     points to output Q matrix structure of dimension m x m (can be NULL)
+  @param[out]    pOutTau   points to Householder scaling factors of dimension n
+  @param[inout]  pTmpA     points to a temporary vector of dimension m.
+  @param[inout]  pTmpB     points to a temporary vector of dimension m.
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS       : Operation successful
+                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
+  
+  @par           pOutQ is optional:
+                 pOutQ can be a NULL pointer.
+                 In this case, the argument will be ignored
+                 and the output Q matrix won't be computed.
+
+
+  @par           Norm2 threshold 
+                 For the meaning of this argument please 
+                 refer to the \ref MatrixHouseholder documentation
+
+ */
+
+#if !defined(ARM_MATH_AUTOVECTORIZE)
+#if defined(ARM_MATH_MVEF)
+
+arm_status arm_mat_qr_f32(
+    const arm_matrix_instance_f32 * pSrc,
+    const float32_t threshold,
+    arm_matrix_instance_f32 * pOutR,
+    arm_matrix_instance_f32 * pOutQ,
+    float32_t * pOutTau,
+    float32_t *pTmpA,
+    float32_t *pTmpB
+    )
+
+{
+  int32_t col=0;
+  int32_t nb,pos;
+  float32_t *pa,*pc;
+  float32_t beta;
+  float32_t *pv;
+  float32_t *pdst;
+  float32_t *p;
+
+  if (pSrc->numRows < pSrc->numCols)
+  {
+    return(ARM_MATH_SIZE_MISMATCH);
+  }
+
+  memcpy(pOutR->pData,pSrc->pData,pSrc->numCols * pSrc->numRows*sizeof(float32_t));
+  pOutR->numCols = pSrc->numCols;
+  pOutR->numRows = pSrc->numRows;
+  
+  p = pOutR->pData;
+  
+  pc = pOutTau;
+  for(col=0 ; col < pSrc->numCols; col++)
+  {
+      int32_t j,k,blkCnt,blkCnt2;
+      float32_t *pa0,*pa1,*pa2,*pa3,*ptemp;
+      float32_t temp;
+      float32x4_t v1,v2,vtemp;
+
+      COPY_COL_F32(pOutR,col,col,pTmpA);
+
+      beta = arm_householder_f32(pTmpA,threshold,pSrc->numRows - col,pTmpA);
+      *pc++ = beta;
+    
+      pdst = pTmpB;
+
+      /* v.T A(col:,col:) -> tmpb */
+      pv = pTmpA;
+      pa = p;
+
+      temp = *pv;
+      blkCnt = (pSrc->numCols-col) >> 2;
+      while (blkCnt > 0)
+      {
+          v1 = vld1q_f32(pa);
+          v2 = vmulq_n_f32(v1,temp);
+          vst1q_f32(pdst,v2);
+
+          pa += 4;
+          pdst += 4;
+          blkCnt--;
+      }
+      blkCnt = (pSrc->numCols-col) & 3;
+      if (blkCnt > 0)
+      {
+          mve_pred16_t p0 = vctp32q(blkCnt);
+          v1 = vld1q_f32(pa);
+          v2 = vmulq_n_f32(v1,temp);
+          vst1q_p_f32(pdst,v2,p0);
+
+          pa += blkCnt;
+      }
+
+      pa += col;
+      pv++;
+      pdst = pTmpB;
+
+      pa0 = pa;
+      pa1 = pa0 + pSrc->numCols;
+      pa2 = pa1 + pSrc->numCols;
+      pa3 = pa2 + pSrc->numCols;
+
+      /* Unrolled loop */
+      blkCnt = (pSrc->numRows-col - 1) >> 2;
+      k=1;
+      while(blkCnt > 0)
+      {
+          vtemp=vld1q_f32(pv);
+
+          blkCnt2 = (pSrc->numCols-col) >> 2;
+          while (blkCnt2 > 0)
+          {
+              v1 = vld1q_f32(pdst);
+
+              v2 = vld1q_f32(pa0);
+              v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,0));
+
+              v2 = vld1q_f32(pa1);
+              v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,1));
+
+              v2 = vld1q_f32(pa2);
+              v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,2));
+
+              v2 = vld1q_f32(pa3);
+              v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,3));
+
+              vst1q_f32(pdst,v1);
+
+              pdst += 4;
+              pa0 += 4;
+              pa1 += 4;
+              pa2 += 4;
+              pa3 += 4;
+              blkCnt2--;
+          }
+          blkCnt2 = (pSrc->numCols-col) & 3;
+          if (blkCnt2 > 0)
+          {
+              mve_pred16_t p0 = vctp32q(blkCnt2);
+
+              v1 = vld1q_f32(pdst);
+
+              v2 = vld1q_f32(pa0);
+              v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,0));
+
+              v2 = vld1q_f32(pa1);
+              v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,1));
+
+              v2 = vld1q_f32(pa2);
+              v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,2));
+
+              v2 = vld1q_f32(pa3);
+              v1 = vfmaq_n_f32(v1,v2,vgetq_lane(vtemp,3));
+
+              vst1q_p_f32(pdst,v1,p0);
+
+              pa0 += blkCnt2;
+              pa1 += blkCnt2;
+              pa2 += blkCnt2;
+              pa3 += blkCnt2;
+          }
+              
+          pa0 += col + 3*pSrc->numCols;
+          pa1 += col + 3*pSrc->numCols;
+          pa2 += col + 3*pSrc->numCols;
+          pa3 += col + 3*pSrc->numCols;
+          pv  += 4;
+          pdst = pTmpB;
+          k += 4;
+          blkCnt--;
+      }
+
+      pa = pa0;
+      for(;k<pSrc->numRows-col; k++)
+      {
+          temp = *pv;
+          blkCnt2 = (pSrc->numCols-col) >> 2;
+          while (blkCnt2 > 0)
+          {
+              v1 = vld1q_f32(pa);
+              v2 = vld1q_f32(pdst);
+              v2 = vfmaq_n_f32(v2,v1,temp);
+              vst1q_f32(pdst,v2);
+
+              pa += 4;
+              pdst += 4;
+              blkCnt2--;
+          }
+          blkCnt2 = (pSrc->numCols-col) & 3;
+          if (blkCnt2 > 0)
+          {
+              mve_pred16_t p0 = vctp32q(blkCnt2);
+              v1 = vld1q_f32(pa);
+              v2 = vld1q_f32(pdst);
+              v2 = vfmaq_n_f32(v2,v1,temp);
+              vst1q_p_f32(pdst,v2,p0);
+
+              pa += blkCnt2;
+          }
+          
+          pa += col;
+          pv++;
+          pdst = pTmpB;
+      }
+
+      /* A(col:,col:) - beta v tmpb */
+      pa = p;
+      for(j=0;j<pSrc->numRows-col; j++)
+      {
+        float32_t f = -beta * pTmpA[j];
+        ptemp = pTmpB; 
+
+        blkCnt2 = (pSrc->numCols-col) >> 2;
+        while (blkCnt2 > 0)
+        {
+            v1 = vld1q_f32(pa);
+            v2 = vld1q_f32(ptemp);
+            v1 = vfmaq_n_f32(v1,v2,f);
+            vst1q_f32(pa,v1);
+
+            pa += 4;
+            ptemp += 4;
+
+            blkCnt2--;
+        }
+        blkCnt2 = (pSrc->numCols-col) & 3;
+        if (blkCnt2 > 0)
+        {
+            mve_pred16_t p0 = vctp32q(blkCnt2);
+
+            v1 = vld1q_f32(pa);
+            v2 = vld1q_f32(ptemp);
+            v1 = vfmaq_n_f32(v1,v2,f);
+            vst1q_p_f32(pa,v1,p0);
+
+            pa += blkCnt2;
+        }
+            
+        pa += col;
+      } 
+
+      /* Copy Householder reflectors into R matrix */
+      pa = p + pOutR->numCols;
+      for(k=0;k<pSrc->numRows-col-1; k++)
+      {
+         *pa = pTmpA[k+1];
+         pa += pOutR->numCols;
+      }
+
+      p += 1 + pOutR->numCols;
+  }
+
+  /* Generate Q if requested by user matrix */
+
+  if (pOutQ != NULL)
+  {
+     /* Initialize Q matrix to identity */
+     memset(pOutQ->pData,0,sizeof(float32_t)*pOutQ->numRows*pOutQ->numRows);
+     
+     pa = pOutQ->pData;
+     for(col=0 ; col < pOutQ->numCols; col++)
+     {
+        *pa = 1.0f;
+        pa += pOutQ->numCols+1;
+     }
+   
+     nb = pOutQ->numRows - pOutQ->numCols + 1;
+   
+     pc = pOutTau + pOutQ->numCols - 1;
+     for(col=0 ; col < pOutQ->numCols; col++)
+     {
+       int32_t j,k, blkCnt, blkCnt2;
+       float32_t *pa0,*pa1,*pa2,*pa3,*ptemp;
+       float32_t temp;
+       float32x4_t v1,v2,vtemp;
+
+       pos = pSrc->numRows - nb;
+       p = pOutQ->pData + pos + pOutQ->numCols*pos ;
+   
+       
+       COPY_COL_F32(pOutR,pos,pos,pTmpA);
+       pTmpA[0] = 1.0f;
+       pdst = pTmpB;
+      
+       /* v.T A(col:,col:) -> tmpb */
+       
+       pv = pTmpA;
+       pa = p;
+
+       temp = *pv;
+       blkCnt2 = (pOutQ->numRows-pos) >> 2;
+       while (blkCnt2 > 0)
+       {
+           v1 = vld1q_f32(pa);
+           v1 = vmulq_n_f32(v1, temp);
+           vst1q_f32(pdst,v1);
+
+           pa += 4;
+           pdst += 4;
+
+           blkCnt2--;
+       }
+       blkCnt2 = (pOutQ->numRows-pos) & 3;
+       if (blkCnt2 > 0)
+       {
+           mve_pred16_t p0 = vctp32q(blkCnt2);
+
+           v1 = vld1q_f32(pa);
+           v1 = vmulq_n_f32(v1, temp);
+           vst1q_p_f32(pdst,v1,p0);
+
+           pa += blkCnt2;
+       }
+           
+       pa += pos;
+       pv++;
+       pdst = pTmpB;
+       pa0 = pa;
+       pa1 = pa0 + pOutQ->numRows;
+       pa2 = pa1 + pOutQ->numRows;
+       pa3 = pa2 + pOutQ->numRows;
+
+       /* Unrolled loop */
+       blkCnt = (pOutQ->numRows-pos - 1) >> 2;
+       k=1;
+       while(blkCnt > 0)
+       {
+
+           vtemp = vld1q_f32(pv);
+           blkCnt2 = (pOutQ->numRows-pos) >> 2;
+           while (blkCnt2 > 0)
+           {
+               v1 = vld1q_f32(pdst);
+
+               v2 = vld1q_f32(pa0);
+               v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,0));
+
+               v2 = vld1q_f32(pa1);
+               v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,1));
+
+               v2 = vld1q_f32(pa2);
+               v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,2));
+
+               v2 = vld1q_f32(pa3);
+               v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,3));
+
+               vst1q_f32(pdst,v1);
+
+               pa0 += 4;
+               pa1 += 4;
+               pa2 += 4;
+               pa3 += 4;
+               pdst += 4;
+
+               blkCnt2--;
+           }
+           blkCnt2 = (pOutQ->numRows-pos) & 3;
+           if (blkCnt2 > 0)
+           {
+               mve_pred16_t p0 = vctp32q(blkCnt2);
+
+               v1 = vld1q_f32(pdst);
+
+               v2 = vld1q_f32(pa0);
+               v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,0));
+
+               v2 = vld1q_f32(pa1);
+               v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,1));
+
+               v2 = vld1q_f32(pa2);
+               v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,2));
+
+               v2 = vld1q_f32(pa3);
+               v1 = vfmaq_n_f32(v1, v2, vgetq_lane(vtemp,3));
+
+               vst1q_p_f32(pdst,v1,p0);
+
+               pa0 += blkCnt2;
+               pa1 += blkCnt2;
+               pa2 += blkCnt2;
+               pa3 += blkCnt2;
+
+           }
+               
+           pa0 += pos + 3*pOutQ->numRows;
+           pa1 += pos + 3*pOutQ->numRows;
+           pa2 += pos + 3*pOutQ->numRows;
+           pa3 += pos + 3*pOutQ->numRows;
+           pv  += 4;
+           pdst = pTmpB;
+           k += 4;
+           blkCnt--;
+       }
+
+       pa = pa0;
+       for(;k<pOutQ->numRows-pos; k++)
+       {
+           temp = *pv;
+           blkCnt2 = (pOutQ->numRows-pos) >> 2;
+           while (blkCnt2 > 0)
+           {
+               v1 = vld1q_f32(pdst);
+               v2 = vld1q_f32(pa);
+               v1 = vfmaq_n_f32(v1, v2, temp);
+               vst1q_f32(pdst,v1);
+
+               pdst += 4;
+               pa += 4;
+
+               blkCnt2--;
+           }
+           blkCnt2 = (pOutQ->numRows-pos) & 3;
+           if (blkCnt2 > 0)
+           {
+               mve_pred16_t p0 = vctp32q(blkCnt2);
+               v1 = vld1q_f32(pdst);
+               v2 = vld1q_f32(pa);
+               v1 = vfmaq_n_f32(v1, v2, temp);
+               vst1q_p_f32(pdst,v1,p0);
+
+               pa += blkCnt2;
+           }
+               
+           pa += pos;
+           pv++;
+           pdst = pTmpB;
+       }
+   
+       pa = p;
+       beta = *pc--;
+       for(j=0;j<pOutQ->numRows-pos; j++)
+       {
+           float32_t f = -beta * pTmpA[j];
+           ptemp = pTmpB;
+
+           blkCnt2 = (pOutQ->numCols-pos) >> 2;
+           while (blkCnt2 > 0)
+           {
+               v1 = vld1q_f32(pa);
+               v2 = vld1q_f32(ptemp);
+               v1 = vfmaq_n_f32(v1,v2,f);
+               vst1q_f32(pa,v1);
+
+               pa += 4;
+               ptemp += 4;
+
+               blkCnt2--;
+           }
+           blkCnt2 = (pOutQ->numCols-pos) & 3;
+           if (blkCnt2 > 0)
+           {
+               mve_pred16_t p0 = vctp32q(blkCnt2);
+
+               v1 = vld1q_f32(pa);
+               v2 = vld1q_f32(ptemp);
+               v1 = vfmaq_n_f32(v1,v2,f);
+               vst1q_p_f32(pa,v1,p0);
+
+               pa += blkCnt2;
+           }
+               
+           pa += pos;
+       } 
+   
+   
+       nb++;
+     }
+  }
+
+  arm_status status = ARM_MATH_SUCCESS;
+  /* Return to application */
+  return (status);
+}
+
+#endif /*#if !defined(ARM_MATH_MVEF)*/
+
+
+#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/
+
+
+
+#if (!defined(ARM_MATH_MVEF)) || defined(ARM_MATH_AUTOVECTORIZE)
+
+arm_status arm_mat_qr_f32(
+    const arm_matrix_instance_f32 * pSrc,
+    const float32_t threshold,
+    arm_matrix_instance_f32 * pOutR,
+    arm_matrix_instance_f32 * pOutQ,
+    float32_t * pOutTau,
+    float32_t *pTmpA,
+    float32_t *pTmpB
+    )
+
+{
+  int32_t col=0;
+  int32_t nb,pos;
+  float32_t *pa,*pc;
+  float32_t beta;
+  float32_t *pv;
+  float32_t *pdst;
+  float32_t *p;
+
+  if (pSrc->numRows < pSrc->numCols)
+  {
+    return(ARM_MATH_SIZE_MISMATCH);
+  }
+
+  memcpy(pOutR->pData,pSrc->pData,pSrc->numCols * pSrc->numRows*sizeof(float32_t));
+  pOutR->numCols = pSrc->numCols;
+  pOutR->numRows = pSrc->numRows;
+  
+  p = pOutR->pData;
+  
+  pc = pOutTau;
+  for(col=0 ; col < pSrc->numCols; col++)
+  {
+      int32_t i,j,k,blkCnt;
+      float32_t *pa0,*pa1,*pa2,*pa3;
+      COPY_COL_F32(pOutR,col,col,pTmpA);
+
+      beta = arm_householder_f32(pTmpA,threshold,pSrc->numRows - col,pTmpA);
+      *pc++ = beta;
+    
+      pdst = pTmpB;
+
+      /* v.T A(col:,col:) -> tmpb */
+      pv = pTmpA;
+      pa = p;
+      for(j=0;j<pSrc->numCols-col; j++)
+      {
+              *pdst++ = *pv * *pa++; 
+      }
+      pa += col;
+      pv++;
+      pdst = pTmpB;
+
+      pa0 = pa;
+      pa1 = pa0 + pSrc->numCols;
+      pa2 = pa1 + pSrc->numCols;
+      pa3 = pa2 + pSrc->numCols;
+
+      /* Unrolled loop */
+      blkCnt = (pSrc->numRows-col - 1) >> 2;
+      k=1;
+      while(blkCnt > 0)
+      {
+          float32_t sum;
+
+          for(j=0;j<pSrc->numCols-col; j++)
+          {
+              sum = *pdst;
+
+              sum += pv[0] * *pa0++;
+              sum += pv[1] * *pa1++;
+              sum += pv[2] * *pa2++;
+              sum += pv[3] * *pa3++;
+              
+              *pdst++ = sum; 
+          }
+          pa0 += col + 3*pSrc->numCols;
+          pa1 += col + 3*pSrc->numCols;
+          pa2 += col + 3*pSrc->numCols;
+          pa3 += col + 3*pSrc->numCols;
+          pv  += 4;
+          pdst = pTmpB;
+          k += 4;
+          blkCnt--;
+      }
+
+      pa = pa0;
+      for(;k<pSrc->numRows-col; k++)
+      {
+          for(j=0;j<pSrc->numCols-col; j++)
+          {
+              *pdst++ += *pv * *pa++; 
+          }
+          pa += col;
+          pv++;
+          pdst = pTmpB;
+      }
+
+      /* A(col:,col:) - beta v tmpb */
+      pa = p;
+      for(j=0;j<pSrc->numRows-col; j++)
+      {
+        float32_t f = beta * pTmpA[j];
+
+        for(i=0;i<pSrc->numCols-col; i++)
+        {
+          *pa = *pa - f * pTmpB[i] ;
+          pa++;
+        }
+        pa += col;
+      } 
+
+      /* Copy Householder reflectors into R matrix */
+      pa = p + pOutR->numCols;
+      for(k=0;k<pSrc->numRows-col-1; k++)
+      {
+         *pa = pTmpA[k+1];
+         pa += pOutR->numCols;
+      }
+
+      p += 1 + pOutR->numCols;
+  }
+
+  /* Generate Q if requested by user matrix */
+
+  if (pOutQ != NULL)
+  {
+     /* Initialize Q matrix to identity */
+     memset(pOutQ->pData,0,sizeof(float32_t)*pOutQ->numRows*pOutQ->numRows);
+     
+     pa = pOutQ->pData;
+     for(col=0 ; col < pOutQ->numCols; col++)
+     {
+        *pa = 1.0f;
+        pa += pOutQ->numCols+1;
+     }
+   
+     nb = pOutQ->numRows - pOutQ->numCols + 1;
+   
+     pc = pOutTau + pOutQ->numCols - 1;
+     for(col=0 ; col < pOutQ->numCols; col++)
+     {
+       int32_t i,j,k, blkCnt;
+       float32_t *pa0,*pa1,*pa2,*pa3;
+       pos = pSrc->numRows - nb;
+       p = pOutQ->pData + pos + pOutQ->numCols*pos ;
+   
+       
+       COPY_COL_F32(pOutR,pos,pos,pTmpA);
+       pTmpA[0] = 1.0f;
+       pdst = pTmpB;
+      
+       /* v.T A(col:,col:) -> tmpb */
+       
+       pv = pTmpA;
+       pa = p;
+       for(j=0;j<pOutQ->numRows-pos; j++)
+       {
+               *pdst++ = *pv * *pa++; 
+       }
+       pa += pos;
+       pv++;
+       pdst = pTmpB;
+       pa0 = pa;
+       pa1 = pa0 + pOutQ->numRows;
+       pa2 = pa1 + pOutQ->numRows;
+       pa3 = pa2 + pOutQ->numRows;
+
+       /* Unrolled loop */
+       blkCnt = (pOutQ->numRows-pos - 1) >> 2;
+       k=1;
+       while(blkCnt > 0)
+       {
+           float32_t sum;
+
+           for(j=0;j<pOutQ->numRows-pos; j++)
+           {
+              sum = *pdst;
+
+              sum += pv[0] * *pa0++;
+              sum += pv[1] * *pa1++;
+              sum += pv[2] * *pa2++;
+              sum += pv[3] * *pa3++;
+              
+              *pdst++ = sum; 
+           }
+           pa0 += pos + 3*pOutQ->numRows;
+           pa1 += pos + 3*pOutQ->numRows;
+           pa2 += pos + 3*pOutQ->numRows;
+           pa3 += pos + 3*pOutQ->numRows;
+           pv  += 4;
+           pdst = pTmpB;
+           k += 4;
+           blkCnt--;
+       }
+
+       pa = pa0;
+       for(;k<pOutQ->numRows-pos; k++)
+       {
+           for(j=0;j<pOutQ->numRows-pos; j++)
+           {
+               *pdst++ += *pv * *pa++; 
+           }
+           pa += pos;
+           pv++;
+           pdst = pTmpB;
+       }
+   
+       pa = p;
+       beta = *pc--;
+       for(j=0;j<pOutQ->numRows-pos; j++)
+       {
+           float32_t f = beta * pTmpA[j];
+
+           for(i=0;i<pOutQ->numCols-pos; i++)
+           {
+             *pa = *pa - f * pTmpB[i] ;
+             pa++;
+           }
+           pa += pos;
+       } 
+   
+   
+       nb++;
+     }
+  }
+
+  arm_status status = ARM_MATH_SUCCESS;
+  /* Return to application */
+  return (status);
+}
+
+#endif /* end of test for Helium or Neon availability */
+
+/**
+  @} end of MatrixQR group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_qr_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_qr_f64.c
new file mode 100644
index 0000000..84ce3fb
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_qr_f64.c
@@ -0,0 +1,311 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mat_qr_f64.c
+ * Description:  Double floating-point matrix QR decomposition.
+ *
+ * $Date:        15 June 2022
+ * $Revision:    V1.11.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_utils.h"
+
+
+
+/**
+  @ingroup groupMatrix
+ */
+
+
+/**
+  @addtogroup MatrixQR
+  @{
+ */
+
+/**
+  @brief         QR decomposition of a m x n double floating point matrix with m >= n.
+  @param[in]     pSrc      points to input matrix structure. The source matrix is modified by the function.
+  @param[in]     threshold norm2 threshold.    
+  @param[out]    pOutR     points to output R matrix structure of dimension m x n
+  @param[out]    pOutQ     points to output Q matrix structure of dimension m x m (can be NULL)
+  @param[out]    pOutTau   points to Householder scaling factors of dimension n
+  @param[inout]  pTmpA     points to a temporary vector of dimension m.
+  @param[inout]  pTmpB     points to a temporary vector of dimension m.
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS       : Operation successful
+                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
+  
+  @par           pOutQ is optional:
+                 pOutQ can be a NULL pointer.
+                 In this case, the argument will be ignored
+                 and the output Q matrix won't be computed.
+
+
+  @par           Norm2 threshold 
+                 For the meaning of this argument please 
+                 refer to the \ref MatrixHouseholder documentation
+
+ */
+
+
+
+
+arm_status arm_mat_qr_f64(
+    const arm_matrix_instance_f64 * pSrc,
+    const float64_t threshold,
+    arm_matrix_instance_f64 * pOutR,
+    arm_matrix_instance_f64 * pOutQ,
+    float64_t * pOutTau,
+    float64_t *pTmpA,
+    float64_t *pTmpB
+    )
+
+{
+  int32_t col=0;
+  int32_t nb,pos;
+  float64_t *pa,*pc;
+  float64_t beta;
+  float64_t *pv;
+  float64_t *pdst;
+  float64_t *p;
+
+  if (pSrc->numRows < pSrc->numCols)
+  {
+    return(ARM_MATH_SIZE_MISMATCH);
+  }
+
+  memcpy(pOutR->pData,pSrc->pData,pSrc->numCols * pSrc->numRows*sizeof(float64_t));
+  pOutR->numCols = pSrc->numCols;
+  pOutR->numRows = pSrc->numRows;
+  
+  p = pOutR->pData;
+  
+  pc = pOutTau;
+  for(col=0 ; col < pSrc->numCols; col++)
+  {
+      int32_t i,j,k,blkCnt;
+      float64_t *pa0,*pa1,*pa2,*pa3;
+      COPY_COL_F64(pOutR,col,col,pTmpA);
+
+      beta = arm_householder_f64(pTmpA,threshold,pSrc->numRows - col,pTmpA);
+      *pc++ = beta;
+    
+      pdst = pTmpB;
+
+      /* v.T A(col:,col:) -> tmpb */
+      pv = pTmpA;
+      pa = p;
+      for(j=0;j<pSrc->numCols-col; j++)
+      {
+              *pdst++ = *pv * *pa++; 
+      }
+      pa += col;
+      pv++;
+      pdst = pTmpB;
+
+      pa0 = pa;
+      pa1 = pa0 + pSrc->numCols;
+      pa2 = pa1 + pSrc->numCols;
+      pa3 = pa2 + pSrc->numCols;
+
+      /* Unrolled loop */
+      blkCnt = (pSrc->numRows-col - 1) >> 2;
+      k=1;
+      while(blkCnt > 0)
+      {
+          float64_t sum;
+
+          for(j=0;j<pSrc->numCols-col; j++)
+          {
+              sum = *pdst;
+
+              sum += pv[0] * *pa0++;
+              sum += pv[1] * *pa1++;
+              sum += pv[2] * *pa2++;
+              sum += pv[3] * *pa3++;
+              
+              *pdst++ = sum; 
+          }
+          pa0 += col + 3*pSrc->numCols;
+          pa1 += col + 3*pSrc->numCols;
+          pa2 += col + 3*pSrc->numCols;
+          pa3 += col + 3*pSrc->numCols;
+          pv  += 4;
+          pdst = pTmpB;
+          k += 4;
+          blkCnt--;
+      }
+
+      pa = pa0;
+      for(;k<pSrc->numRows-col; k++)
+      {
+          for(j=0;j<pSrc->numCols-col; j++)
+          {
+              *pdst++ += *pv * *pa++; 
+          }
+          pa += col;
+          pv++;
+          pdst = pTmpB;
+      }
+
+      /* A(col:,col:) - beta v tmpb */
+      pa = p;
+      for(j=0;j<pSrc->numRows-col; j++)
+      {
+        float64_t f = beta * pTmpA[j];
+
+        for(i=0;i<pSrc->numCols-col; i++)
+        {
+          *pa = *pa - f * pTmpB[i] ;
+          pa++;
+        }
+        pa += col;
+      } 
+
+      /* Copy Householder reflectors into R matrix */
+      pa = p + pOutR->numCols;
+      for(k=0;k<pSrc->numRows-col-1; k++)
+      {
+         *pa = pTmpA[k+1];
+         pa += pOutR->numCols;
+      }
+
+      p += 1 + pOutR->numCols;
+  }
+
+  /* Generate Q if requested by user matrix */
+
+  if (pOutQ != NULL)
+  {
+     /* Initialize Q matrix to identity */
+     memset(pOutQ->pData,0,sizeof(float64_t)*pOutQ->numRows*pOutQ->numRows);
+     
+     pa = pOutQ->pData;
+     for(col=0 ; col < pOutQ->numCols; col++)
+     {
+        *pa = 1.0;
+        pa += pOutQ->numCols+1;
+     }
+   
+     nb = pOutQ->numRows - pOutQ->numCols + 1;
+   
+     pc = pOutTau + pOutQ->numCols - 1;
+     for(col=0 ; col < pOutQ->numCols; col++)
+     {
+       int32_t i,j,k, blkCnt;
+       float64_t *pa0,*pa1,*pa2,*pa3;
+       pos = pSrc->numRows - nb;
+       p = pOutQ->pData + pos + pOutQ->numCols*pos ;
+   
+       
+       COPY_COL_F64(pOutR,pos,pos,pTmpA);
+       pTmpA[0] = 1.0;
+       pdst = pTmpB;
+      
+       /* v.T A(col:,col:) -> tmpb */
+       
+       pv = pTmpA;
+       pa = p;
+       for(j=0;j<pOutQ->numRows-pos; j++)
+       {
+               *pdst++ = *pv * *pa++; 
+       }
+       pa += pos;
+       pv++;
+       pdst = pTmpB;
+       pa0 = pa;
+       pa1 = pa0 + pOutQ->numRows;
+       pa2 = pa1 + pOutQ->numRows;
+       pa3 = pa2 + pOutQ->numRows;
+
+       /* Unrolled loop */
+       blkCnt = (pOutQ->numRows-pos - 1) >> 2;
+       k=1;
+       while(blkCnt > 0)
+       {
+           float64_t sum;
+
+           for(j=0;j<pOutQ->numRows-pos; j++)
+           {
+              sum = *pdst;
+
+              sum += pv[0] * *pa0++;
+              sum += pv[1] * *pa1++;
+              sum += pv[2] * *pa2++;
+              sum += pv[3] * *pa3++;
+              
+              *pdst++ = sum; 
+           }
+           pa0 += pos + 3*pOutQ->numRows;
+           pa1 += pos + 3*pOutQ->numRows;
+           pa2 += pos + 3*pOutQ->numRows;
+           pa3 += pos + 3*pOutQ->numRows;
+           pv  += 4;
+           pdst = pTmpB;
+           k += 4;
+           blkCnt--;
+       }
+
+       pa = pa0;
+       for(;k<pOutQ->numRows-pos; k++)
+       {
+           for(j=0;j<pOutQ->numRows-pos; j++)
+           {
+               *pdst++ += *pv * *pa++; 
+           }
+           pa += pos;
+           pv++;
+           pdst = pTmpB;
+       }
+   
+       pa = p;
+       beta = *pc--;
+       for(j=0;j<pOutQ->numRows-pos; j++)
+       {
+           float64_t f = beta * pTmpA[j];
+
+           for(i=0;i<pOutQ->numCols-pos; i++)
+           {
+             *pa = *pa - f * pTmpB[i] ;
+             pa++;
+           }
+           pa += pos;
+       } 
+   
+   
+       nb++;
+     }
+  }
+
+  arm_status status = ARM_MATH_SUCCESS;
+  /* Return to application */
+  return (status);
+}
+
+
+/**
+  @} end of MatrixQR group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_f16.c
index 4c8d4eb..3b14b51 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_scale_f16.c
  * Description:  Multiplies a floating-point matrix by a scalar
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -163,10 +163,10 @@ arm_status arm_mat_scale_f16(
       /* C(m,n) = A(m,n) * scale */
 
       /* Scale and store result in destination buffer. */
-      *pOut++ = (*pIn++) * scale;
-      *pOut++ = (*pIn++) * scale;
-      *pOut++ = (*pIn++) * scale;
-      *pOut++ = (*pIn++) * scale;
+      *pOut++ = (_Float16)(*pIn++) * (_Float16)scale;
+      *pOut++ = (_Float16)(*pIn++) * (_Float16)scale;
+      *pOut++ = (_Float16)(*pIn++) * (_Float16)scale;
+      *pOut++ = (_Float16)(*pIn++) * (_Float16)scale;
 
       /* Decrement loop counter */
       blkCnt--;
@@ -187,7 +187,7 @@ arm_status arm_mat_scale_f16(
       /* C(m,n) = A(m,n) * scale */
 
       /* Scale and store result in destination buffer. */
-      *pOut++ = (*pIn++) * scale;
+      *pOut++ = (_Float16)(*pIn++) * (_Float16)scale;
 
       /* Decrement loop counter */
       blkCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_f32.c
index 5d1dfd5..63aad92 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_scale_f32.c
  * Description:  Multiplies a floating-point matrix by a scalar
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -39,7 +39,22 @@
 
   Multiplies a matrix by a scalar.  This is accomplished by multiplying each element in the
   matrix by the scalar.  For example:
-  \image html MatrixScale.gif "Matrix Scaling of a 3 x 3 matrix"
+
+  @par Matrix Scaling of a 3 x 3 matrix 
+
+  \f[
+  \begin{pmatrix}
+  a_{1,1} & a_{1,2} & a_{1,3} \\
+  a_{2,1} & a_{2,2} & a_{2,3} \\
+  a_{3,1} & a_{3,2} & a_{3,3} \\
+  \end{pmatrix}
+  * K = 
+  \begin{pmatrix}
+   K a_{1,1} & K a_{1,2} & K a_{1,3} \\
+   K a_{2,1} & K a_{2,2} & K a_{2,3} \\
+   K a_{3,1} & K a_{3,2} & K a_{3,3} \\
+  \end{pmatrix}
+  \f]
 
   The function checks to make sure that the input and output matrices are of the same size.
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_q15.c
index 800ca46..5d1ea8b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_scale_q15.c
  * Description:  Multiplies a Q15 matrix by a scalar
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -179,8 +179,8 @@ arm_status arm_mat_scale_q15(
 
 #if defined (ARM_MATH_DSP)
       /* read 2 times 2 samples at a time from source */
-      inA1 = read_q15x2_ia ((q15_t **) &pIn);
-      inA2 = read_q15x2_ia ((q15_t **) &pIn);
+      inA1 = read_q15x2_ia (&pIn);
+      inA2 = read_q15x2_ia (&pIn);
 
       /* Scale inputs and store result in temporary variables
        * in single cycle by packing the outputs */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_q31.c
index 1292c65..f4e87e6 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_scale_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_scale_q31.c
  * Description:  Multiplies a Q31 matrix by a scalar
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f16.c
index 8833566..6b3de66 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_mat_solve_lower_triangular_f16.c
  * Description:  Solve linear system LT X = A with LT lower triangular matrix
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -63,9 +65,8 @@
 #ifdef ARM_MATH_MATRIX_CHECK
 
   /* Check for matrix mismatch condition */
-  if ((ut->numRows != lt->numCols) ||
-      (a->numRows != a->numCols) ||
-      (ut->numRows != a->numRows)   )
+  if ((lt->numRows != lt->numCols) ||
+      (lt->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
     status = ARM_MATH_SIZE_MISMATCH;
@@ -83,9 +84,10 @@
     x2 = (a2 - c2 x3) / b2
 
     */
-    int i,j,k,n;
+    int i,j,k,n,cols;
 
     n = dst->numRows;
+    cols = dst->numCols;
 
     float16_t *pX = dst->pData;
     float16_t *pLT = lt->pData;
@@ -102,45 +104,45 @@
     for(i=0; i < n ; i++)
     {
 
-      for(j=0; j+7 < n; j += 8)
+      for(j=0; j+7 < cols; j += 8)
       {
-            vecA = vld1q_f16(&pA[i * n + j]);
+            vecA = vld1q_f16(&pA[i * cols + j]);
 
             for(k=0; k < i; k++)
             {
-                vecX = vld1q_f16(&pX[n*k+j]);
+                vecX = vld1q_f16(&pX[cols*k+j]);
                 vecA = vfmsq(vecA,vdupq_n_f16(pLT[n*i + k]),vecX);
             }
 
-            if (pLT[n*i + i]==0.0f16)
+            if ((_Float16)pLT[n*i + i]==0.0f16)
             {
               return(ARM_MATH_SINGULAR);
             }
 
             invLT = 1.0f16 / (_Float16)pLT[n*i + i];
             vecA = vmulq(vecA,vdupq_n_f16(invLT));
-            vst1q(&pX[i*n+j],vecA);
+            vst1q(&pX[i*cols+j],vecA);
 
        }
 
-       for(; j < n; j ++)
+       for(; j < cols; j ++)
        {
             a_col = &pA[j];
             lt_row = &pLT[n*i];
 
-            _Float16 tmp=a_col[i * n];
+            _Float16 tmp=a_col[i * cols];
             
             for(k=0; k < i; k++)
             {
-                tmp -= (_Float16)lt_row[k] * (_Float16)pX[n*k+j];
+                tmp -= (_Float16)lt_row[k] * (_Float16)pX[cols*k+j];
             }
 
-            if (lt_row[i]==0.0f16)
+            if ((_Float16)lt_row[i]==0.0f16)
             {
               return(ARM_MATH_SINGULAR);
             }
             tmp = tmp / (_Float16)lt_row[i];
-            pX[i*n+j] = tmp;
+            pX[i*cols+j] = tmp;
         }
 
     }
@@ -164,9 +166,8 @@
 #ifdef ARM_MATH_MATRIX_CHECK
 
   /* Check for matrix mismatch condition */
-  if ((ut->numRows != lt->numCols) ||
-      (a->numRows != a->numCols) ||
-      (ut->numRows != a->numRows)   )
+  if ((lt->numRows != lt->numCols) ||
+      (lt->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
     status = ARM_MATH_SIZE_MISMATCH;
@@ -184,9 +185,10 @@
     x2 = (a2 - c2 x3) / b2
 
     */
-    int i,j,k,n;
+    int i,j,k,n,cols;
 
     n = dst->numRows;
+    cols = dst->numCols;
 
     float16_t *pX = dst->pData;
     float16_t *pLT = lt->pData;
@@ -195,7 +197,7 @@
     float16_t *lt_row;
     float16_t *a_col;
 
-    for(j=0; j < n; j ++)
+    for(j=0; j < cols; j ++)
     {
        a_col = &pA[j];
 
@@ -203,19 +205,19 @@
        {
             lt_row = &pLT[n*i];
 
-            float16_t tmp=a_col[i * n];
+            float16_t tmp=a_col[i * cols];
             
             for(k=0; k < i; k++)
             {
-                tmp -= lt_row[k] * pX[n*k+j];
+                tmp -= (_Float16)lt_row[k] * (_Float16)pX[cols*k+j];
             }
 
-            if (lt_row[i]==0.0f)
+            if ((_Float16)lt_row[i]==0.0f16)
             {
               return(ARM_MATH_SINGULAR);
             }
-            tmp = tmp / lt_row[i];
-            pX[i*n+j] = tmp;
+            tmp = (_Float16)tmp / (_Float16)lt_row[i];
+            pX[i*cols+j] = tmp;
        }
 
     }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f32.c
index dcd529c..3ffd076 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_mat_solve_lower_triangular_f32.c
  * Description:  Solve linear system LT X = A with LT lower triangular matrix
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -62,9 +64,8 @@
 #ifdef ARM_MATH_MATRIX_CHECK
 
   /* Check for matrix mismatch condition */
-  if ((ut->numRows != lt->numCols) ||
-      (a->numRows != a->numCols) ||
-      (ut->numRows != a->numRows)   )
+  if ((lt->numRows != lt->numCols) ||
+      (lt->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
     status = ARM_MATH_SIZE_MISMATCH;
@@ -82,9 +83,10 @@
     x2 = (a2 - c2 x3) / b2
 
     */
-    int i,j,k,n;
+    int i,j,k,n,cols;
 
     n = dst->numRows;
+    cols = dst->numCols;
 
     float32_t *pX = dst->pData;
     float32_t *pLT = lt->pData;
@@ -101,13 +103,13 @@
     for(i=0; i < n ; i++)
     {
 
-      for(j=0; j+3 < n; j += 4)
+      for(j=0; j+3 < cols; j += 4)
       {
-            vecA = vld1q_f32(&pA[i * n + j]);
+            vecA = vld1q_f32(&pA[i * cols + j]);
 
             for(k=0; k < i; k++)
             {
-                vecX = vld1q_f32(&pX[n*k+j]);
+                vecX = vld1q_f32(&pX[cols*k+j]);
                 vecA = vfmsq(vecA,vdupq_n_f32(pLT[n*i + k]),vecX);
             }
 
@@ -118,20 +120,20 @@
 
             invLT = 1.0f / pLT[n*i + i];
             vecA = vmulq(vecA,vdupq_n_f32(invLT));
-            vst1q(&pX[i*n+j],vecA);
+            vst1q(&pX[i*cols+j],vecA);
 
        }
 
-       for(; j < n; j ++)
+       for(; j < cols; j ++)
        {
             a_col = &pA[j];
             lt_row = &pLT[n*i];
 
-            float32_t tmp=a_col[i * n];
+            float32_t tmp=a_col[i * cols];
             
             for(k=0; k < i; k++)
             {
-                tmp -= lt_row[k] * pX[n*k+j];
+                tmp -= lt_row[k] * pX[cols*k+j];
             }
 
             if (lt_row[i]==0.0f)
@@ -139,7 +141,7 @@
               return(ARM_MATH_SINGULAR);
             }
             tmp = tmp / lt_row[i];
-            pX[i*n+j] = tmp;
+            pX[i*cols+j] = tmp;
         }
 
     }
@@ -163,9 +165,8 @@
 #ifdef ARM_MATH_MATRIX_CHECK
 
   /* Check for matrix mismatch condition */
-  if ((ut->numRows != lt->numCols) ||
-      (a->numRows != a->numCols) ||
-      (ut->numRows != a->numRows)   )
+  if ((lt->numRows != lt->numCols) ||
+      (lt->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
     status = ARM_MATH_SIZE_MISMATCH;
@@ -183,9 +184,10 @@
     x2 = (a2 - c2 x3) / b2
 
     */
-    int i,j,k,n;
+    int i,j,k,n,cols;
 
     n = dst->numRows;
+    cols = dst->numCols;
 
     float32_t *pX = dst->pData;
     float32_t *pLT = lt->pData;
@@ -202,13 +204,13 @@
     for(i=0; i < n ; i++)
     {
 
-      for(j=0; j+3 < n; j += 4)
+      for(j=0; j+3 < cols; j += 4)
       {
-            vecA = vld1q_f32(&pA[i * n + j]);
+            vecA = vld1q_f32(&pA[i * cols + j]);
 
             for(k=0; k < i; k++)
             {
-                vecX = vld1q_f32(&pX[n*k+j]);
+                vecX = vld1q_f32(&pX[cols*k+j]);
                 vecA = vfmsq_f32(vecA,vdupq_n_f32(pLT[n*i + k]),vecX);
             }
 
@@ -219,20 +221,20 @@
 
             invLT = 1.0f / pLT[n*i + i];
             vecA = vmulq_f32(vecA,vdupq_n_f32(invLT));
-            vst1q_f32(&pX[i*n+j],vecA);
+            vst1q_f32(&pX[i*cols+j],vecA);
 
        }
 
-       for(; j < n; j ++)
+       for(; j < cols; j ++)
        {
             a_col = &pA[j];
             lt_row = &pLT[n*i];
 
-            float32_t tmp=a_col[i * n];
+            float32_t tmp=a_col[i * cols];
             
             for(k=0; k < i; k++)
             {
-                tmp -= lt_row[k] * pX[n*k+j];
+                tmp -= lt_row[k] * pX[cols*k+j];
             }
 
             if (lt_row[i]==0.0f)
@@ -240,7 +242,7 @@
               return(ARM_MATH_SINGULAR);
             }
             tmp = tmp / lt_row[i];
-            pX[i*n+j] = tmp;
+            pX[i*cols+j] = tmp;
         }
 
     }
@@ -261,11 +263,9 @@
 
 
 #ifdef ARM_MATH_MATRIX_CHECK
-
   /* Check for matrix mismatch condition */
-  if ((ut->numRows != lt->numCols) ||
-      (a->numRows != a->numCols) ||
-      (ut->numRows != a->numRows)   )
+  if ((lt->numRows != lt->numCols) ||
+      (lt->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
     status = ARM_MATH_SIZE_MISMATCH;
@@ -283,9 +283,7 @@
     x2 = (a2 - c2 x3) / b2
 
     */
-    int i,j,k,n;
-
-    n = dst->numRows;
+    int i,j,k,n,cols;
 
     float32_t *pX = dst->pData;
     float32_t *pLT = lt->pData;
@@ -294,19 +292,23 @@
     float32_t *lt_row;
     float32_t *a_col;
 
-    for(j=0; j < n; j ++)
+    n = dst->numRows;
+    cols = dst -> numCols;
+
+
+    for(j=0; j < cols; j ++)
     {
        a_col = &pA[j];
 
        for(i=0; i < n ; i++)
        {
-            lt_row = &pLT[n*i];
+            float32_t tmp=a_col[i * cols];
 
-            float32_t tmp=a_col[i * n];
+            lt_row = &pLT[n*i];
             
             for(k=0; k < i; k++)
             {
-                tmp -= lt_row[k] * pX[n*k+j];
+                tmp -= lt_row[k] * pX[cols*k+j];
             }
 
             if (lt_row[i]==0.0f)
@@ -314,7 +316,7 @@
               return(ARM_MATH_SINGULAR);
             }
             tmp = tmp / lt_row[i];
-            pX[i*n+j] = tmp;
+            pX[i*cols+j] = tmp;
        }
 
     }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f64.c
index 67dc3be..cc73005 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f64.c
@@ -5,11 +5,13 @@
  * Title:        arm_mat_solve_lower_triangular_f64.c
  * Description:  Solve linear system LT X = A with LT lower triangular matrix
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -57,9 +59,8 @@
 #ifdef ARM_MATH_MATRIX_CHECK
 
   /* Check for matrix mismatch condition */
-  if ((ut->numRows != lt->numCols) ||
-      (a->numRows != a->numCols) ||
-      (ut->numRows != a->numRows)   )
+  if ((lt->numRows != lt->numCols) ||
+      (lt->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
     status = ARM_MATH_SIZE_MISMATCH;
@@ -77,9 +78,7 @@
     x2 = (a2 - c2 x3) / b2
 
     */
-    int i,j,k,n;
-
-    n = dst->numRows;
+    int i,j,k,n,cols;
 
     float64_t *pX = dst->pData;
     float64_t *pLT = lt->pData;
@@ -88,27 +87,30 @@
     float64_t *lt_row;
     float64_t *a_col;
 
-    for(j=0; j < n; j ++)
+    n = dst->numRows;
+    cols = dst->numCols;
+
+    for(j=0; j < cols; j ++)
     {
        a_col = &pA[j];
 
        for(i=0; i < n ; i++)
        {
+            float64_t tmp=a_col[i * cols];
+
             lt_row = &pLT[n*i];
 
-            float64_t tmp=a_col[i * n];
-            
             for(k=0; k < i; k++)
             {
-                tmp -= lt_row[k] * pX[n*k+j];
+                tmp -= lt_row[k] * pX[cols*k+j];
             }
 
-            if (lt_row[i]==0.0f)
+            if (lt_row[i]==0.0)
             {
               return(ARM_MATH_SINGULAR);
             }
             tmp = tmp / lt_row[i];
-            pX[i*n+j] = tmp;
+            pX[i*cols+j] = tmp;
        }
 
     }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f16.c
index 427317c..0f03eaa 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_mat_solve_upper_triangular_f16.c
  * Description:  Solve linear system UT X = A with UT upper triangular matrix
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -66,7 +68,6 @@ arm_status status;                             /* status of matrix inverse */
 
   /* Check for matrix mismatch condition */
   if ((ut->numRows != ut->numCols) ||
-      (a->numRows != a->numCols) ||
       (ut->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
@@ -78,9 +79,10 @@ arm_status status;                             /* status of matrix inverse */
 
   {
 
-    int i,j,k,n;
+    int i,j,k,n,cols;
 
     n = dst->numRows;
+    cols = dst->numCols;
 
     float16_t *pX = dst->pData;
     float16_t *pUT = ut->pData;
@@ -96,17 +98,17 @@ arm_status status;                             /* status of matrix inverse */
     
     for(i=n-1; i >= 0 ; i--)
     {
-      for(j=0; j+7 < n; j +=8)
+      for(j=0; j+7 < cols; j +=8)
       {
-            vecA = vld1q_f16(&pA[i * n + j]);
+            vecA = vld1q_f16(&pA[i * cols + j]);
             
             for(k=n-1; k > i; k--)
             {
-                vecX = vld1q_f16(&pX[n*k+j]);          
+                vecX = vld1q_f16(&pX[cols*k+j]);          
                 vecA = vfmsq(vecA,vdupq_n_f16(pUT[n*i + k]),vecX);
             }
 
-            if (pUT[n*i + i]==0.0f16)
+            if ((_Float16)pUT[n*i + i]==0.0f16)
             {
               return(ARM_MATH_SINGULAR);
             }
@@ -115,28 +117,28 @@ arm_status status;                             /* status of matrix inverse */
             vecA = vmulq(vecA,vdupq_n_f16(invUT));
            
 
-            vst1q(&pX[i*n+j],vecA);
+            vst1q(&pX[i*cols+j],vecA);
       }
 
-      for(; j < n; j ++)
+      for(; j < cols; j ++)
       {
             a_col = &pA[j];
 
             ut_row = &pUT[n*i];
 
-            _Float16 tmp=a_col[i * n];
+            _Float16 tmp=a_col[i * cols];
             
             for(k=n-1; k > i; k--)
             {
-                tmp -= (_Float16)ut_row[k] * (_Float16)pX[n*k+j];
+                tmp -= (_Float16)ut_row[k] * (_Float16)pX[cols*k+j];
             }
 
-            if (ut_row[i]==0.0f16)
+            if ((_Float16)ut_row[i]==0.0f16)
             {
               return(ARM_MATH_SINGULAR);
             }
             tmp = tmp / (_Float16)ut_row[i];
-            pX[i*n+j] = tmp;
+            pX[i*cols+j] = tmp;
        }
 
     }
@@ -162,7 +164,6 @@ arm_status status;                             /* status of matrix inverse */
 
   /* Check for matrix mismatch condition */
   if ((ut->numRows != ut->numCols) ||
-      (a->numRows != a->numCols) ||
       (ut->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
@@ -174,9 +175,10 @@ arm_status status;                             /* status of matrix inverse */
 
   {
 
-    int i,j,k,n;
+    int i,j,k,n,cols;
 
     n = dst->numRows;
+    cols = dst->numCols;
 
     float16_t *pX = dst->pData;
     float16_t *pUT = ut->pData;
@@ -185,7 +187,7 @@ arm_status status;                             /* status of matrix inverse */
     float16_t *ut_row;
     float16_t *a_col;
 
-    for(j=0; j < n; j ++)
+    for(j=0; j < cols; j ++)
     {
        a_col = &pA[j];
 
@@ -193,19 +195,19 @@ arm_status status;                             /* status of matrix inverse */
        {
             ut_row = &pUT[n*i];
 
-            float16_t tmp=a_col[i * n];
+            float16_t tmp=a_col[i * cols];
             
             for(k=n-1; k > i; k--)
             {
-                tmp -= ut_row[k] * pX[n*k+j];
+                tmp -= (_Float16)ut_row[k] * (_Float16)pX[cols*k+j];
             }
 
-            if (ut_row[i]==0.0f)
+            if ((_Float16)ut_row[i]==0.0f16)
             {
               return(ARM_MATH_SINGULAR);
             }
-            tmp = tmp / ut_row[i];
-            pX[i*n+j] = tmp;
+            tmp = (_Float16)tmp / (_Float16)ut_row[i];
+            pX[i*cols+j] = tmp;
        }
 
     }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f32.c
index 074901d..4b3ef86 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_mat_solve_upper_triangular_f32.c
  * Description:  Solve linear system UT X = A with UT upper triangular matrix
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -63,7 +65,6 @@ arm_status status;                             /* status of matrix inverse */
 
   /* Check for matrix mismatch condition */
   if ((ut->numRows != ut->numCols) ||
-      (a->numRows != a->numCols) ||
       (ut->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
@@ -75,9 +76,10 @@ arm_status status;                             /* status of matrix inverse */
 
   {
 
-    int i,j,k,n;
+    int i,j,k,n,cols;
 
     n = dst->numRows;
+    cols = dst->numCols;
 
     float32_t *pX = dst->pData;
     float32_t *pUT = ut->pData;
@@ -93,13 +95,13 @@ arm_status status;                             /* status of matrix inverse */
     
     for(i=n-1; i >= 0 ; i--)
     {
-      for(j=0; j+3 < n; j +=4)
+      for(j=0; j+3 < cols; j +=4)
       {
-            vecA = vld1q_f32(&pA[i * n + j]);
+            vecA = vld1q_f32(&pA[i * cols + j]);
             
             for(k=n-1; k > i; k--)
             {
-                vecX = vld1q_f32(&pX[n*k+j]);          
+                vecX = vld1q_f32(&pX[cols*k+j]);          
                 vecA = vfmsq(vecA,vdupq_n_f32(pUT[n*i + k]),vecX);
             }
 
@@ -112,20 +114,20 @@ arm_status status;                             /* status of matrix inverse */
             vecA = vmulq(vecA,vdupq_n_f32(invUT));
            
 
-            vst1q(&pX[i*n+j],vecA);
+            vst1q(&pX[i*cols+j],vecA);
       }
 
-      for(; j < n; j ++)
+      for(; j < cols; j ++)
       {
             a_col = &pA[j];
 
             ut_row = &pUT[n*i];
 
-            float32_t tmp=a_col[i * n];
+            float32_t tmp=a_col[i * cols];
             
             for(k=n-1; k > i; k--)
             {
-                tmp -= ut_row[k] * pX[n*k+j];
+                tmp -= ut_row[k] * pX[cols*k+j];
             }
 
             if (ut_row[i]==0.0f)
@@ -133,7 +135,7 @@ arm_status status;                             /* status of matrix inverse */
               return(ARM_MATH_SINGULAR);
             }
             tmp = tmp / ut_row[i];
-            pX[i*n+j] = tmp;
+            pX[i*cols+j] = tmp;
        }
 
     }
@@ -160,7 +162,6 @@ arm_status status;                             /* status of matrix inverse */
 
   /* Check for matrix mismatch condition */
   if ((ut->numRows != ut->numCols) ||
-      (a->numRows != a->numCols) ||
       (ut->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
@@ -172,9 +173,10 @@ arm_status status;                             /* status of matrix inverse */
 
   {
 
-    int i,j,k,n;
+    int i,j,k,n,cols;
 
     n = dst->numRows;
+    cols = dst->numCols;
 
     float32_t *pX = dst->pData;
     float32_t *pUT = ut->pData;
@@ -190,13 +192,13 @@ arm_status status;                             /* status of matrix inverse */
     
     for(i=n-1; i >= 0 ; i--)
     {
-      for(j=0; j+3 < n; j +=4)
+      for(j=0; j+3 < cols; j +=4)
       {
-            vecA = vld1q_f32(&pA[i * n + j]);
+            vecA = vld1q_f32(&pA[i * cols + j]);
             
             for(k=n-1; k > i; k--)
             {
-                vecX = vld1q_f32(&pX[n*k+j]);          
+                vecX = vld1q_f32(&pX[cols*k+j]);          
                 vecA = vfmsq_f32(vecA,vdupq_n_f32(pUT[n*i + k]),vecX);
             }
 
@@ -209,20 +211,20 @@ arm_status status;                             /* status of matrix inverse */
             vecA = vmulq_f32(vecA,vdupq_n_f32(invUT));
            
 
-            vst1q_f32(&pX[i*n+j],vecA);
+            vst1q_f32(&pX[i*cols+j],vecA);
       }
 
-      for(; j < n; j ++)
+      for(; j < cols; j ++)
       {
             a_col = &pA[j];
 
             ut_row = &pUT[n*i];
 
-            float32_t tmp=a_col[i * n];
+            float32_t tmp=a_col[i * cols];
             
             for(k=n-1; k > i; k--)
             {
-                tmp -= ut_row[k] * pX[n*k+j];
+                tmp -= ut_row[k] * pX[cols*k+j];
             }
 
             if (ut_row[i]==0.0f)
@@ -230,7 +232,7 @@ arm_status status;                             /* status of matrix inverse */
               return(ARM_MATH_SINGULAR);
             }
             tmp = tmp / ut_row[i];
-            pX[i*n+j] = tmp;
+            pX[i*cols+j] = tmp;
        }
 
     }
@@ -256,7 +258,6 @@ arm_status status;                             /* status of matrix inverse */
 
   /* Check for matrix mismatch condition */
   if ((ut->numRows != ut->numCols) ||
-      (a->numRows != a->numCols) ||
       (ut->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
@@ -268,9 +269,7 @@ arm_status status;                             /* status of matrix inverse */
 
   {
 
-    int i,j,k,n;
-
-    n = dst->numRows;
+    int i,j,k,n,cols;
 
     float32_t *pX = dst->pData;
     float32_t *pUT = ut->pData;
@@ -279,19 +278,22 @@ arm_status status;                             /* status of matrix inverse */
     float32_t *ut_row;
     float32_t *a_col;
 
-    for(j=0; j < n; j ++)
+    n = dst->numRows;
+    cols = dst->numCols;
+
+    for(j=0; j < cols; j ++)
     {
        a_col = &pA[j];
 
        for(i=n-1; i >= 0 ; i--)
        {
+            float32_t tmp=a_col[i * cols];
+
             ut_row = &pUT[n*i];
 
-            float32_t tmp=a_col[i * n];
-            
             for(k=n-1; k > i; k--)
             {
-                tmp -= ut_row[k] * pX[n*k+j];
+                tmp -= ut_row[k] * pX[cols*k+j];
             }
 
             if (ut_row[i]==0.0f)
@@ -299,7 +301,7 @@ arm_status status;                             /* status of matrix inverse */
               return(ARM_MATH_SINGULAR);
             }
             tmp = tmp / ut_row[i];
-            pX[i*n+j] = tmp;
+            pX[i*cols+j] = tmp;
        }
 
     }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f64.c
index d10eae2..ce6153b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f64.c
@@ -5,11 +5,13 @@
  * Title:        arm_mat_solve_upper_triangular_f64.c
  * Description:  Solve linear system UT X = A with UT upper triangular matrix
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -58,7 +60,6 @@ arm_status status;                             /* status of matrix inverse */
 
   /* Check for matrix mismatch condition */
   if ((ut->numRows != ut->numCols) ||
-      (a->numRows != a->numCols) ||
       (ut->numRows != a->numRows)   )
   {
     /* Set status as ARM_MATH_SIZE_MISMATCH */
@@ -70,9 +71,7 @@ arm_status status;                             /* status of matrix inverse */
 
   {
 
-    int i,j,k,n;
-
-    n = dst->numRows;
+    int i,j,k,n,cols;
 
     float64_t *pX = dst->pData;
     float64_t *pUT = ut->pData;
@@ -81,27 +80,30 @@ arm_status status;                             /* status of matrix inverse */
     float64_t *ut_row;
     float64_t *a_col;
 
-    for(j=0; j < n; j ++)
+    n = dst->numRows;
+    cols = dst->numCols;
+
+    for(j=0; j < cols; j ++)
     {
        a_col = &pA[j];
 
        for(i=n-1; i >= 0 ; i--)
        {
+            float64_t tmp=a_col[i * cols];
+
             ut_row = &pUT[n*i];
 
-            float64_t tmp=a_col[i * n];
-            
             for(k=n-1; k > i; k--)
             {
-                tmp -= ut_row[k] * pX[n*k+j];
+                tmp -= ut_row[k] * pX[cols*k+j];
             }
 
-            if (ut_row[i]==0.0f)
+            if (ut_row[i]==0.0)
             {
               return(ARM_MATH_SINGULAR);
             }
             tmp = tmp / ut_row[i];
-            pX[i*n+j] = tmp;
+            pX[i*cols+j] = tmp;
        }
 
     }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f16.c
index fb0f7b7..2e07194 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_sub_f16.c
  * Description:  Floating-point matrix subtraction
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -62,7 +62,7 @@ arm_status arm_mat_sub_f16(
     arm_status status;                             /* status of matrix subtraction */
     uint32_t  numSamples;       /* total number of elements in the matrix  */
     float16_t *pDataA, *pDataB, *pDataDst;
-    f16x8_t vecA, vecB, vecDst;
+    f16x8_t vecA, vecB, vecDst = { 0 };
     float16_t const *pSrcAVec;
     float16_t const *pSrcBVec;
     uint32_t  blkCnt;           /* loop counters */
@@ -170,10 +170,10 @@ arm_status arm_mat_sub_f16(
       /* C(m,n) = A(m,n) - B(m,n) */
 
       /* Subtract and store result in destination buffer. */
-      *pOut++ = (*pInA++) - (*pInB++);
-      *pOut++ = (*pInA++) - (*pInB++);
-      *pOut++ = (*pInA++) - (*pInB++);
-      *pOut++ = (*pInA++) - (*pInB++);
+      *pOut++ = (_Float16)(*pInA++) - (_Float16)(*pInB++);
+      *pOut++ = (_Float16)(*pInA++) - (_Float16)(*pInB++);
+      *pOut++ = (_Float16)(*pInA++) - (_Float16)(*pInB++);
+      *pOut++ = (_Float16)(*pInA++) - (_Float16)(*pInB++);
 
       /* Decrement loop counter */
       blkCnt--;
@@ -194,7 +194,7 @@ arm_status arm_mat_sub_f16(
       /* C(m,n) = A(m,n) - B(m,n) */
 
       /* Subtract and store result in destination buffer. */
-      *pOut++ = (*pInA++) - (*pInB++);
+      *pOut++ = (_Float16)(*pInA++) - (_Float16)(*pInB++);
 
       /* Decrement loop counter */
       blkCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f32.c
index 0748e08..df58b98 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_sub_f32.c
  * Description:  Floating-point matrix subtraction
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -38,8 +38,27 @@
   @defgroup MatrixSub Matrix Subtraction
 
   Subtract two matrices.
-  \image html MatrixSubtraction.gif "Subraction of two 3 x 3 matrices"
-
+  @par Subraction of two 3 x 3 matrices
+  
+  \f[
+  \begin{pmatrix}
+   a_{1,1} & a_{1,2} & a_{1,3} \\
+   a_{2,1} & a_{2,2} & a_{2,3} \\
+   a_{3,1} & a_{3,2} & a_{3,3} \\
+  \end{pmatrix}
+  -
+  \begin{pmatrix}
+   b_{1,1} & b_{1,2} & b_{1,3} \\
+   b_{2,1} & b_{2,2} & b_{2,3} \\
+   b_{3,1} & b_{3,2} & b_{3,3} \\
+  \end{pmatrix}
+  =
+  \begin{pmatrix}
+   a_{1,1}-b_{1,1} & a_{1,2}-b_{1,2} & a_{1,3}-b_{1,3} \\
+   a_{2,1}-b_{2,1} & a_{2,2}-b_{2,2} & a_{2,3}-b_{2,3} \\
+   a_{3,1}-b_{3,1} & a_{3,2}-b_{3,2} & a_{3,3}-b_{3,3} \\
+  \end{pmatrix}
+  \f]
   The functions check to make sure that
   <code>pSrcA</code>, <code>pSrcB</code>, and <code>pDst</code> have the same
   number of rows and columns.
@@ -68,7 +87,7 @@ arm_status arm_mat_sub_f32(
     arm_status status;                             /* status of matrix subtraction */
     uint32_t  numSamples;       /* total number of elements in the matrix  */
     float32_t *pDataA, *pDataB, *pDataDst;
-    f32x4_t vecA, vecB, vecDst;
+    f32x4_t vecA, vecB, vecDst = { 0 };
     float32_t const *pSrcAVec;
     float32_t const *pSrcBVec;
     uint32_t  blkCnt;           /* loop counters */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f64.c
index e41c7dc..3f405d6 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_f64.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_sub_f64.c
  * Description:  Floating-point matrix subtraction
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -34,16 +34,6 @@
   @ingroup groupMatrix
  */
 
-/**
-  @defgroup MatrixSub Matrix Subtraction
-
-  Subtract two matrices.
-  \image html MatrixSubtraction.gif "Subraction of two 3 x 3 matrices"
-
-  The functions check to make sure that
-  <code>pSrcA</code>, <code>pSrcB</code>, and <code>pDst</code> have the same
-  number of rows and columns.
- */
 
 /**
   @addtogroup MatrixSub
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_q15.c
index dff3aa1..e611663 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_sub_q15.c
  * Description:  Q15 Matrix subtraction
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -61,7 +61,7 @@ arm_status arm_mat_sub_q15(
 {
     uint32_t        numSamples;       /* total number of elements in the matrix  */
     q15_t          *pDataA, *pDataB, *pDataDst;
-    q15x8_t       vecA, vecB, vecDst;
+    q15x8_t       vecA, vecB, vecDst = { 0 };
     q15_t const   *pSrcAVec;
     q15_t const   *pSrcBVec;
     uint32_t        blkCnt;           /* loop counters */
@@ -169,8 +169,8 @@ arm_status arm_mat_sub_q15(
 
       /* Subtract, Saturate and store result in destination buffer. */
 #if defined (ARM_MATH_DSP)
-      write_q15x2_ia (&pOut, __QSUB16(read_q15x2_ia ((q15_t **) &pInA), read_q15x2_ia ((q15_t **) &pInB)));
-      write_q15x2_ia (&pOut, __QSUB16(read_q15x2_ia ((q15_t **) &pInA), read_q15x2_ia ((q15_t **) &pInB)));
+      write_q15x2_ia (&pOut, __QSUB16(read_q15x2_ia (&pInA), read_q15x2_ia (&pInB)));
+      write_q15x2_ia (&pOut, __QSUB16(read_q15x2_ia (&pInA), read_q15x2_ia (&pInB)));
 #else
       *pOut++ = (q15_t) __SSAT(((q31_t) * pInA++ - *pInB++), 16);
       *pOut++ = (q15_t) __SSAT(((q31_t) * pInA++ - *pInB++), 16);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_q31.c
index b81ca7c..9643bdc 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_sub_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_sub_q31.c
  * Description:  Q31 matrix subtraction
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -60,7 +60,7 @@ arm_status arm_mat_sub_q31(
 {
     uint32_t        numSamples;       /* total number of elements in the matrix  */
     q31_t          *pDataA, *pDataB, *pDataDst;
-    q31x4_t       vecA, vecB, vecDst;
+    q31x4_t       vecA, vecB, vecDst = { 0 };
     q31_t const   *pSrcAVec;
     q31_t const   *pSrcBVec;
     uint32_t        blkCnt;           /* loop counters */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_f16.c
index b63e988..b162f2c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_trans_f16.c
  * Description:  Floating-point matrix transpose
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_f32.c
index 906d755..b2baa63 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_trans_f32.c
  * Description:  Floating-point matrix transpose
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -40,7 +40,23 @@
   Tranposes a matrix.
 
   Transposing an <code>M x N</code> matrix flips it around the center diagonal and results in an <code>N x M</code> matrix.
-  \image html MatrixTranspose.gif "Transpose of a 3 x 3 matrix"
+
+  @par Transpose of a 3 x 3 matrix
+  
+  \f[
+  \begin{pmatrix}
+   a_{1,1} & a_{1,2} & a_{1,3} \\
+   a_{2,1} & a_{2,2} & a_{2,3} \\
+   a_{3,1} & a_{3,2} & a_{3,3} \\
+  \end{pmatrix}^T
+   =
+  \begin{pmatrix}
+   a_{1,1} & a_{2,1} & a_{3,1} \\
+   a_{1,2} & a_{2,2} & a_{3,2} \\
+   a_{1,3} & a_{2,3} & a_{3,3} \\
+  \end{pmatrix}
+  \f]
+  
  */
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_f64.c
index d4d94a4..d01ce3b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_f64.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_trans_f64.c
  * Description:  Floating-point matrix transpose
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -34,14 +34,7 @@
   @ingroup groupMatrix
  */
 
-/**
-  @defgroup MatrixTrans Matrix Transpose
-
-  Tranposes a matrix.
 
-  Transposing an <code>M x N</code> matrix flips it around the center diagonal and results in an <code>N x M</code> matrix.
-  \image html MatrixTranspose.gif "Transpose of a 3 x 3 matrix"
- */
 
 /**
   @addtogroup MatrixTrans
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_q15.c
index 9a39c08..de00d1b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_trans_q15.c
  * Description:  Q15 matrix transpose
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -142,7 +142,7 @@ arm_status arm_mat_trans_q15(
       while (col > 0U)        /* column loop */
       {
         /* Read two elements from row */
-        in = read_q15x2_ia ((q15_t **) &pIn);
+        in = read_q15x2_ia (&pIn);
 
         /* Unpack and store one element in  destination */
 #ifndef ARM_MATH_BIG_ENDIAN
@@ -165,7 +165,7 @@ arm_status arm_mat_trans_q15(
         pOut += nRows;
 
         /* Read two elements from row */
-        in = read_q15x2_ia ((q15_t **) &pIn);
+        in = read_q15x2_ia (&pIn);
 
         /* Unpack and store one element in destination */
 #ifndef ARM_MATH_BIG_ENDIAN
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_q31.c
index be2a306..4f77a28 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_trans_q31.c
  * Description:  Q31 matrix transpose
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_q7.c
index cd40f97..666cdfa 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_trans_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_mat_trans_q7.c
  * Description:  Q7 matrix transpose
  *
- * $Date:        06. July 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -63,10 +63,10 @@ arm_status arm_mat_trans_q7(const arm_matrix_instance_q7 *pSrc, arm_matrix_insta
 
 #ifdef ARM_MATH_MATRIX_CHECK
     /* Check for matrix mismatch condition */
-    if ((pSrc->numRows != pDst->dstCols) || (pSrc->srcCols != pDst->numCols))
+    if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
     {
         /* Set status as ARM_MATH_SIZE_MISMATCH */
-        return = ARM_MATH_SIZE_MISMATCH;
+        return ARM_MATH_SIZE_MISMATCH;
     }
 #endif
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f16.c
index 7944086..fb7e53c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f16.c
@@ -5,12 +5,14 @@
  * Title:        arm_mat_vec_mult_f16.c
  * Description:  Floating-point matrix and vector multiplication
  *
- * $Date:        07. July 202
+ * $Date:        23 April 2021
  *
- * Target Processor: Cortex-M cores
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -310,10 +312,10 @@ void arm_mat_vec_mult_f16(const arm_matrix_instance_f16 *pSrcMat, const float16_
         pInVec = pVec;
 
         /* Initialize accumulators */
-        float16_t sum1 = 0.0f;
-        float16_t sum2 = 0.0f;
-        float16_t sum3 = 0.0f;
-        float16_t sum4 = 0.0f;
+        float16_t sum1 = 0.0f16;
+        float16_t sum2 = 0.0f16;
+        float16_t sum3 = 0.0f16;
+        float16_t sum4 = 0.0f16;
 
         /* Loop unrolling: process 2 columns per iteration */
         colCnt = numCols;
@@ -331,13 +333,13 @@ void arm_mat_vec_mult_f16(const arm_matrix_instance_f16 *pSrcMat, const float16_
             vecData = *(pInVec)++;
             // Read 8 values from the matrix - 2 values from each of 4 rows, and do multiply accumulate
             matData = *(pInA1)++;
-            sum1 += matData * vecData;
+            sum1 += (_Float16)matData * (_Float16)vecData;
             matData = *(pInA2)++;
-            sum2 += matData * vecData;
+            sum2 += (_Float16)matData * (_Float16)vecData;
             matData = *(pInA3)++;
-            sum3 += matData * vecData;
+            sum3 += (_Float16)matData * (_Float16)vecData;
             matData = *(pInA4)++;
-            sum4 += matData * vecData;
+            sum4 += (_Float16)matData * (_Float16)vecData;
 
             // Decrement the loop counter
             colCnt--;
@@ -359,7 +361,7 @@ void arm_mat_vec_mult_f16(const arm_matrix_instance_f16 *pSrcMat, const float16_
     row = numRows & 3u;
     while (row > 0) {
 
-        float16_t sum = 0.0f;
+        float16_t sum = 0.0f16;
         pInVec = pVec;
         pInA1 = pSrcA + i;
 
@@ -370,14 +372,14 @@ void arm_mat_vec_mult_f16(const arm_matrix_instance_f16 *pSrcMat, const float16_
             vecData2 = *(pInVec)++;
             matData = *(pInA1)++;
             matData2 = *(pInA1)++;
-            sum += matData * vecData;
-            sum += matData2 * vecData2;
+            sum += (_Float16)matData * (_Float16)vecData;
+            sum += (_Float16)matData2 * (_Float16)vecData2;
             colCnt--;
         }
         // process remainder of row
         colCnt = numCols & 1u;
         while (colCnt > 0) {
-            sum += *pInA1++ * *pInVec++;
+            sum += (_Float16)*pInA1++ * (_Float16)*pInVec++;
             colCnt--;
         }
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f32.c
index 67b390a..145ec15 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_f32.c
@@ -5,12 +5,14 @@
  * Title:        arm_mat_vec_mult_f32.c
  * Description:  Floating-point matrix and vector multiplication
  *
- * $Date:        07. July 202
+ * $Date:        23 April 2021
  *
- * Target Processor: Cortex-M cores
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -165,7 +167,7 @@ void arm_mat_vec_mult_f32(
     }
 
     /*
-     * compute 2 rows in parrallel
+     * compute 2 rows in parallel
      */
     if (row >= 2)
     {
@@ -310,16 +312,16 @@ void arm_mat_vec_mult_f32(const arm_matrix_instance_f32 *pSrcMat, const float32_
     /* The following loop performs the dot-product of each row in pSrcA with the vector */
     /* row loop */
     while (row > 0) {
-        /* For every row wise process, the pInVec pointer is set
-         ** to the starting address of the vector */
-        pInVec = pVec;
-
         /* Initialize accumulators */
         float32_t sum1 = 0.0f;
         float32_t sum2 = 0.0f;
         float32_t sum3 = 0.0f;
         float32_t sum4 = 0.0f;
 
+        /* For every row wise process, the pInVec pointer is set
+         ** to the starting address of the vector */
+        pInVec = pVec;
+
         /* Loop unrolling: process 2 columns per iteration */
         colCnt = numCols;
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q15.c
index 177de77..9d9b1b4 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q15.c
@@ -5,12 +5,14 @@
  * Title:        arm_mat_vec_mult_q15.c
  * Description:  Q15 matrix and vector multiplication
  *
- * $Date:        07. July 202
+ * $Date:        23 April 2021
  *
- * Target Processor: Cortex-M cores
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -289,16 +291,16 @@ void arm_mat_vec_mult_q15(const arm_matrix_instance_q15 *pSrcMat, const q15_t *p
     /* The following loop performs the dot-product of each row in pSrcA with the vector */
     /* row loop */
     while (row > 0) {
-        /* For every row wise process, the pInVec pointer is set
-         ** to the starting address of the vector */
-        pInVec = pVec;
-
         /* Initialize accumulators */
         q63_t sum1 = 0;
         q63_t sum2 = 0;
         q63_t sum3 = 0;
         q63_t sum4 = 0;
 
+        /* For every row wise process, the pInVec pointer is set
+         ** to the starting address of the vector */
+        pInVec = pVec;
+
         /* Loop unrolling: process 2 columns per iteration */
         colCnt = numCols >> 1;
 
@@ -311,16 +313,16 @@ void arm_mat_vec_mult_q15(const arm_matrix_instance_q15 *pSrcMat, const q15_t *p
         // Main loop: matrix-vector multiplication
         while (colCnt > 0u) {
             // Read 2 values from vector
-            vecData = read_q15x2_ia ((q15_t **) &pInVec);
+            vecData = read_q15x2_ia (&pInVec);
 
             // Read 8 values from the matrix - 2 values from each of 4 rows, and do multiply accumulate
-            matData =  read_q15x2_ia ((q15_t **) &pInA1);
+            matData =  read_q15x2_ia (&pInA1);
             sum1 = __SMLALD(matData, vecData, sum1);
-            matData = read_q15x2_ia ((q15_t **) &pInA2);
+            matData = read_q15x2_ia (&pInA2);
             sum2 = __SMLALD(matData, vecData, sum2);
-            matData = read_q15x2_ia ((q15_t **) &pInA3);
+            matData = read_q15x2_ia (&pInA3);
             sum3 = __SMLALD(matData, vecData, sum3);
-            matData = read_q15x2_ia ((q15_t **) &pInA4);
+            matData = read_q15x2_ia (&pInA4);
             sum4 = __SMLALD(matData, vecData, sum4);
 
             // Decrement the loop counter
@@ -361,10 +363,10 @@ void arm_mat_vec_mult_q15(const arm_matrix_instance_q15 *pSrcMat, const q15_t *p
         colCnt = numCols >> 2;
 
         while (colCnt > 0) {
-            vecData = read_q15x2_ia ((q15_t **) &pInVec);
-            vecData2 = read_q15x2_ia ((q15_t **) &pInVec);
-            matData = read_q15x2_ia ((q15_t **) &pInA1);
-            matData2 = read_q15x2_ia ((q15_t **) &pInA1);
+            vecData = read_q15x2_ia (&pInVec);
+            vecData2 = read_q15x2_ia (&pInVec);
+            matData = read_q15x2_ia (&pInA1);
+            matData2 = read_q15x2_ia (&pInA1);
             sum = __SMLALD(matData, vecData, sum);
             sum = __SMLALD(matData2, vecData2, sum);
             colCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q31.c
index f9ab581..6e0b855 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q31.c
@@ -5,12 +5,14 @@
  * Title:        arm_mat_vec_mult_q31.c
  * Description:  Q31 matrix and vector multiplication
  *
- * $Date:        07. July 202
+ * $Date:        23 April 2021
  *
- * Target Processor: Cortex-M cores
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -286,16 +288,16 @@ void arm_mat_vec_mult_q31(const arm_matrix_instance_q31 *pSrcMat, const q31_t *p
     /* The following loop performs the dot-product of each row in pSrcA with the vector */
     /* row loop */
     while (row > 0) {
-        /* For every row wise process, the pInVec pointer is set
-         ** to the starting address of the vector */
-        pInVec = pVec;
-
         /* Initialize accumulators */
         q63_t sum1 = 0;
         q63_t sum2 = 0;
         q63_t sum3 = 0;
         q63_t sum4 = 0;
 
+        /* For every row wise process, the pInVec pointer is set
+         ** to the starting address of the vector */
+        pInVec = pVec;
+
         /* Loop unrolling: process 2 columns per iteration */
         colCnt = numCols;
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q7.c
index d4e4d21..5262ce3 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/MatrixFunctions/arm_mat_vec_mult_q7.c
@@ -5,12 +5,14 @@
  * Title:        arm_mat_vec_mult_q7.c
  * Description:  Q7 matrix and vector multiplication
  *
- * $Date:        07. July 202
+ * $Date:        23 April 2021
  *
- * Target Processor: Cortex-M cores
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -301,16 +303,16 @@ void arm_mat_vec_mult_q7(const arm_matrix_instance_q7 *pSrcMat, const q7_t *pVec
 
     /* The following loop performs the dot-product of each row in pSrcA with the vector */
     while (row > 0) {
-        /* For every row wise process, the pInVec pointer is set
-         ** to the starting address of the vector */
-        pInVec = pVec;
-
         /* Initialize accumulators */
         q31_t sum1 = 0;
         q31_t sum2 = 0;
         q31_t sum3 = 0;
         q31_t sum4 = 0;
 
+        /* For every row wise process, the pInVec pointer is set
+         ** to the starting address of the vector */
+        pInVec = pVec;
+
         /* Loop unrolling: process 4 columns per iteration */
         colCnt = numCols >> 2;
 
@@ -325,26 +327,26 @@ void arm_mat_vec_mult_q7(const arm_matrix_instance_q7 *pSrcMat, const q7_t *pVec
 
         while (colCnt > 0u) {
             // Read 4 values from vector
-            vecData = read_q7x4_ia ((q7_t **) &pInVec);
+            vecData = read_q7x4_ia (&pInVec);
             vecData2 = __SXTB16(__ROR(vecData, 8));
             vecData = __SXTB16(vecData);
             // Read 16 values from the matrix - 4 values from each of 4 rows, and do multiply accumulate
-            matData = read_q7x4_ia ((q7_t **) &pInA1);
+            matData = read_q7x4_ia (&pInA1);
             matData2 = __SXTB16(__ROR(matData, 8));
             matData = __SXTB16(matData);
             sum1 = __SMLAD(matData, vecData, sum1);
             sum1 = __SMLAD(matData2, vecData2, sum1);
-            matData = read_q7x4_ia ((q7_t **) &pInA2);
+            matData = read_q7x4_ia (&pInA2);
             matData2 = __SXTB16(__ROR(matData, 8));
             matData = __SXTB16(matData);
             sum2 = __SMLAD(matData, vecData, sum2);
             sum2 = __SMLAD(matData2, vecData2, sum2);
-            matData = read_q7x4_ia ((q7_t **) &pInA3);
+            matData = read_q7x4_ia (&pInA3);
             matData2 = __SXTB16(__ROR(matData, 8));
             matData = __SXTB16(matData);
             sum3 = __SMLAD(matData, vecData, sum3);
             sum3 = __SMLAD(matData2, vecData2, sum3);
-            matData = read_q7x4_ia ((q7_t **) &pInA4);
+            matData = read_q7x4_ia (&pInA4);
             matData2 = __SXTB16(__ROR(matData, 8));
             matData = __SXTB16(matData);
             sum4 = __SMLAD(matData, vecData, sum4);
@@ -391,10 +393,10 @@ void arm_mat_vec_mult_q7(const arm_matrix_instance_q7 *pSrcMat, const q7_t *pVec
         colCnt = numCols >> 2;
 
         while (colCnt > 0) {
-            vecData = read_q7x4_ia ((q7_t **) &pInVec);
+            vecData = read_q7x4_ia (&pInVec);
             vecData2 = __SXTB16(__ROR(vecData, 8));
             vecData = __SXTB16(vecData);
-            matData = read_q7x4_ia ((q7_t **) &pInA1);
+            matData = read_q7x4_ia (&pInA1);
             matData2 = __SXTB16(__ROR(matData, 8));
             matData = __SXTB16(matData);
             sum = __SMLAD(matData, vecData, sum);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion2rotation_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion2rotation_f32.c
index 25ff0de..6d1ee09 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion2rotation_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion2rotation_f32.c
@@ -5,8 +5,10 @@
  * Title:        arm_quaternion2rotation_f32.c
  * Description:  Floating-point quaternion 2 rotation conversion
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
@@ -145,7 +147,8 @@ void arm_quaternion2rotation_f32(const float32_t *pInputQuaternions,
     float32_t *pOutputRotations, 
     uint32_t nbQuaternions)
 {
-   for(uint32_t nb=0; nb < nbQuaternions; nb++)
+   uint32_t nb;
+   for(nb=0; nb < nbQuaternions; nb++)
    {
         float32_t q00 = SQ(pInputQuaternions[0 + nb * 4]);
         float32_t q11 = SQ(pInputQuaternions[1 + nb * 4]);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_conjugate_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_conjugate_f32.c
index c6d6df1..c3d80f9 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_conjugate_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_conjugate_f32.c
@@ -5,8 +5,10 @@
  * Title:        arm_quaternion_conjugate_f32.c
  * Description:  Floating-point quaternion conjugate
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
@@ -81,7 +83,8 @@ void arm_quaternion_conjugate_f32(const float32_t *pInputQuaternions,
     float32_t *pConjugateQuaternions, 
     uint32_t nbQuaternions)
 {
-   for(uint32_t i=0; i < nbQuaternions; i++)
+   uint32_t i;
+   for(i=0; i < nbQuaternions; i++)
    {
 
       pConjugateQuaternions[4 * i + 0] = pInputQuaternions[4 * i + 0];
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_inverse_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_inverse_f32.c
index df24db7..d4227eb 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_inverse_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_inverse_f32.c
@@ -5,8 +5,10 @@
  * Title:        arm_quaternion_inverse_f32.c
  * Description:  Floating-point quaternion inverse
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
@@ -92,7 +94,8 @@ void arm_quaternion_inverse_f32(const float32_t *pInputQuaternions,
 {
    float32_t temp;
 
-   for(uint32_t i=0; i < nbQuaternions; i++)
+   uint32_t i;
+   for(i=0; i < nbQuaternions; i++)
    {
 
       temp = SQ(pInputQuaternions[4 * i + 0]) +
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_norm_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_norm_f32.c
index a793d01..e5a6130 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_norm_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_norm_f32.c
@@ -5,8 +5,10 @@
  * Title:        arm_quaternion_norm_f32.c
  * Description:  Floating-point quaternion Norm
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
@@ -84,8 +86,9 @@ void arm_quaternion_norm_f32(const float32_t *pInputQuaternions,
   uint32_t nbQuaternions)
 {
    float32_t temp;
+   uint32_t i;
 
-   for(uint32_t i=0; i < nbQuaternions; i++)
+   for(i=0; i < nbQuaternions; i++)
    {
       temp = SQ(pInputQuaternions[4 * i + 0]) +
              SQ(pInputQuaternions[4 * i + 1]) +
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_normalize_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_normalize_f32.c
index 70ec340..1380f6b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_normalize_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_normalize_f32.c
@@ -5,8 +5,10 @@
  * Title:        arm_quaternion_normalize_f32.c
  * Description:  Floating-point quaternion normalization
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
@@ -85,7 +87,8 @@ void arm_quaternion_normalize_f32(const float32_t *pInputQuaternions,
 {
    float32_t temp;
 
-   for(uint32_t i=0; i < nbQuaternions; i++)
+   uint32_t i;
+   for(i=0; i < nbQuaternions; i++)
    {
       temp = SQ(pInputQuaternions[4 * i + 0]) +
              SQ(pInputQuaternions[4 * i + 1]) +
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_product_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_product_f32.c
index bfb996d..fef8388 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_product_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_product_f32.c
@@ -5,8 +5,10 @@
  * Title:        arm_quaternion_product_f32.c
  * Description:  Floating-point quaternion product
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
@@ -132,7 +134,8 @@ void arm_quaternion_product_f32(const float32_t *qa,
     float32_t *qr,
     uint32_t nbQuaternions)
 {
-   for(uint32_t i=0; i < nbQuaternions; i++)
+   uint32_t i;
+   for(i=0; i < nbQuaternions; i++)
    {
      arm_quaternion_product_single_f32(qa, qb, qr);
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_product_single_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_product_single_f32.c
index 54f56e8..e8149fd 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_product_single_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_quaternion_product_single_f32.c
@@ -5,8 +5,10 @@
  * Title:        arm_quaternion_product_single_f32.c
  * Description:  Floating-point quaternion product
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_rotation2quaternion_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_rotation2quaternion_f32.c
index 0632ce7..54d56a1 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_rotation2quaternion_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/QuaternionMathFunctions/arm_rotation2quaternion_f32.c
@@ -5,8 +5,10 @@
  * Title:        arm_rotation2quaternion_f32.c
  * Description:  Floating-point rotation to quaternion conversion
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
@@ -100,51 +102,51 @@ void arm_rotation2quaternion_f32(const float32_t *pInputRotations,
 
       if (trace > 0)
       {
-        (void)arm_sqrt_f32(trace + 1.0, &doubler) ; // invs=4*qw
-        doubler = 2*doubler;
-        s = 1.0 / doubler;
+        (void)arm_sqrt_f32(trace + 1.0f, &doubler) ; // invs=4*qw
+        doubler = 2.0f*doubler;
+        s = 1.0f / doubler;
 
         q1 = vmulq_n_f32(q1,s);
         q2 = vmulq_n_f32(q2,s);
 
-        q[0] = 0.25 * doubler;
+        q[0] = 0.25f * doubler;
         q[1] = R21 - R12;
         q[2] = R02 - R20;
         q[3] = R10 - R01;
       }
       else if ((R00 > R11) && (R00 > R22) )
       {
-        (void)arm_sqrt_f32(1.0 + R00 - R11 - R22,&doubler); // invs=4*qx
-        doubler = 2*doubler;
-        s = 1.0 / doubler;
+        (void)arm_sqrt_f32(1.0f + R00 - R11 - R22,&doubler); // invs=4*qx
+        doubler = 2.0f*doubler;
+        s = 1.0f / doubler;
 
         q1 = vmulq_n_f32(q1,s);
         q2 = vmulq_n_f32(q2,s);
 
         q[0] = R21 - R12;
-        q[1] = 0.25 * doubler;
+        q[1] = 0.25f * doubler;
         q[2] = R01 + R10;
         q[3] = R02 + R20;
       }
       else if (R11 > R22)
       {
-        (void)arm_sqrt_f32(1.0 + R11 - R00 - R22,&doubler); // invs=4*qy
-        doubler = 2*doubler;
-        s = 1.0 / doubler;
+        (void)arm_sqrt_f32(1.0f + R11 - R00 - R22,&doubler); // invs=4*qy
+        doubler = 2.0f*doubler;
+        s = 1.0f / doubler;
 
         q1 = vmulq_n_f32(q1,s);
         q2 = vmulq_n_f32(q2,s);
 
         q[0] = R02 - R20;
         q[1] = R01 + R10;
-        q[2] = 0.25 * doubler;
+        q[2] = 0.25f * doubler;
         q[3] = R12 + R21;
       }
       else
       {
-        (void)arm_sqrt_f32(1.0 + R22 - R00 - R11,&doubler); // invs=4*qz
-        doubler = 2*doubler;
-        s = 1.0 / doubler;
+        (void)arm_sqrt_f32(1.0f + R22 - R00 - R11,&doubler); // invs=4*qz
+        doubler = 2.0f*doubler;
+        s = 1.0f / doubler;
 
         q1 = vmulq_n_f32(q1,s);
         q2 = vmulq_n_f32(q2,s);
@@ -152,7 +154,7 @@ void arm_rotation2quaternion_f32(const float32_t *pInputRotations,
         q[0] = R10 - R01;
         q[1] = R02 + R20;
         q[2] = R12 + R21;
-        q[3] = 0.25 * doubler;
+        q[3] = 0.25f * doubler;
       }
 
       vst1q(pOutputQuaternions, q);
@@ -166,7 +168,8 @@ void arm_rotation2quaternion_f32(const float32_t *pInputRotations,
     float32_t *pOutputQuaternions,  
     uint32_t nbQuaternions)
 {
-   for(uint32_t nb=0; nb < nbQuaternions; nb++)
+   uint32_t nb;
+   for(nb=0; nb < nbQuaternions; nb++)
    {
        const float32_t *r=&pInputRotations[nb*9];
        float32_t *q=&pOutputQuaternions[nb*4];
@@ -178,41 +181,41 @@ void arm_rotation2quaternion_f32(const float32_t *pInputRotations,
 
 
 
-      if (trace > 0)
+      if (trace > 0.0f)
       {
-        doubler = sqrtf(trace + 1.0) * 2; // invs=4*qw
-        s = 1.0 / doubler;
-        q[0] = 0.25 * doubler;
+        doubler = sqrtf(trace + 1.0f) * 2.0f; // invs=4*qw
+        s = 1.0f / doubler;
+        q[0] = 0.25f * doubler;
         q[1] = (RI(2,1) - RI(1,2)) * s;
         q[2] = (RI(0,2) - RI(2,0)) * s;
         q[3] = (RI(1,0) - RI(0,1)) * s;
       }
       else if ((RI(0,0) > RI(1,1)) && (RI(0,0) > RI(2,2)) )
       {
-        doubler = sqrtf(1.0 + RI(0,0) - RI(1,1) - RI(2,2)) * 2; // invs=4*qx
-        s = 1.0 / doubler;
+        doubler = sqrtf(1.0f + RI(0,0) - RI(1,1) - RI(2,2)) * 2.0f; // invs=4*qx
+        s = 1.0f / doubler;
         q[0] = (RI(2,1) - RI(1,2)) * s;
-        q[1] = 0.25 * doubler;
+        q[1] = 0.25f * doubler;
         q[2] = (RI(0,1) + RI(1,0)) * s;
         q[3] = (RI(0,2) + RI(2,0)) * s;
       }
       else if (RI(1,1) > RI(2,2))
       {
-        doubler = sqrtf(1.0 + RI(1,1) - RI(0,0) - RI(2,2)) * 2; // invs=4*qy
-        s = 1.0 / doubler;
+        doubler = sqrtf(1.0f + RI(1,1) - RI(0,0) - RI(2,2)) * 2.0f; // invs=4*qy
+        s = 1.0f / doubler;
         q[0] = (RI(0,2) - RI(2,0)) * s;
         q[1] = (RI(0,1) + RI(1,0)) * s;
-        q[2] = 0.25 * doubler;
+        q[2] = 0.25f * doubler;
         q[3] = (RI(1,2) + RI(2,1)) * s;
       }
       else
       {
-        doubler = sqrtf(1.0 + RI(2,2) - RI(0,0) - RI(1,1)) * 2; // invs=4*qz
-        s = 1.0 / doubler;
+        doubler = sqrtf(1.0f + RI(2,2) - RI(0,0) - RI(1,1)) * 2.0f; // invs=4*qz
+        s = 1.0f / doubler;
         q[0] = (RI(1,0) - RI(0,1)) * s;
         q[1] = (RI(0,2) + RI(2,0)) * s;
         q[2] = (RI(1,2) + RI(2,1)) * s;
-        q[3] = 0.25 * doubler;
+        q[3] = 0.25f * doubler;
       }
 
     }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_init_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_init_f16.c
index 1190975..71bb9cb 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_init_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_init_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_linear_init_f16.c
  * Description:  SVM Linear Instance Initialization
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -33,20 +35,11 @@
 #include <limits.h>
 #include <math.h>
 
-/**
- * @defgroup groupSVM SVM Functions
- *
- */
 
 /**
   @ingroup groupSVM
  */
 
-/**
-  @defgroup linearsvm Linear SVM
-
-  Linear SVM classifier
- */
 
 /**
  * @addtogroup linearsvm
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_init_f32.c
index 989bf76..4c92653 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_init_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_linear_init_f32.c
  * Description:  SVM Linear Instance Initialization
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_predict_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_predict_f16.c
index 2f9ca3c..8e5a55c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_predict_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_predict_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_linear_predict_f16.c
  * Description:  SVM Linear Classifier
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -298,9 +300,9 @@ void arm_svm_linear_predict_f16(
         dot=0;
         for(j=0; j < S->vectorDimension; j++)
         {
-            dot = dot + in[j]* *pSupport++;
+            dot = (_Float16)dot + (_Float16)in[j]* (_Float16)*pSupport++;
         }
-        sum += S->dualCoefficients[i] * dot;
+        sum += (_Float16)S->dualCoefficients[i] * (_Float16)dot;
     }
     *pResult=S->classes[STEP(sum)];
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_predict_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_predict_f32.c
index b6d1dfe..8cf9678 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_predict_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_linear_predict_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_linear_predict_f32.c
  * Description:  SVM Linear Classifier
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_init_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_init_f16.c
index 9dfe908..a2ed980 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_init_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_init_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_polynomial_init_f16.c
  * Description:  SVM Polynomial Instance Initialization
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -37,11 +39,6 @@
   @ingroup groupSVM
  */
 
-/**
-  @defgroup polysvm Polynomial SVM
-
-  Polynomial SVM classifier
- */
 
 /**
  * @addtogroup polysvm
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_init_f32.c
index cef8d12..082399b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_init_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_polynomial_init_f32.c
  * Description:  SVM Polynomial Instance Initialization
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c
index 3e8a127..3cd6912 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_polynomial_predict_f16.c
  * Description:  SVM Polynomial Classifier
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -33,6 +35,28 @@
 #include <limits.h>
 #include <math.h>
 
+#if !defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_AUTOVECTORIZE)
+
+/*
+
+_Float16 is not supported in g++ so we avoid putting _Float16 definitions
+in the public headers.
+
+This function should at some point be moved in FastMath.
+
+*/
+__STATIC_INLINE float16_t arm_exponent_f16(float16_t x, int32_t nb)
+{
+    float16_t r = x;
+    nb --;
+    while(nb > 0)
+    {
+        r = (_Float16)r * (_Float16)x;
+        nb--;
+    }
+    return(r);
+}
+#endif
 
 /**
  * @addtogroup polysvm
@@ -40,6 +64,13 @@
  */
 
 
+
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_math_f16.h"
+
 /**
  * @brief SVM polynomial prediction
  * @param[in]    S          Pointer to an instance of the polynomial SVM structure.
@@ -48,12 +79,6 @@
  * @return none.
  *
  */
-
-#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
-#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_math_f16.h"
-
 void arm_svm_polynomial_predict_f16(
     const arm_svm_polynomial_instance_f16 *S,
     const float16_t * in,
@@ -303,6 +328,16 @@ void arm_svm_polynomial_predict_f16(
 }
 
 #else
+
+
+/**
+ * @brief SVM polynomial prediction
+ * @param[in]    S          Pointer to an instance of the polynomial SVM structure.
+ * @param[in]    in         Pointer to input vector
+ * @param[out]   pResult    Decision value
+ * @return none.
+ *
+ */
 void arm_svm_polynomial_predict_f16(
     const arm_svm_polynomial_instance_f16 *S,
     const float16_t * in,
@@ -318,9 +353,9 @@ void arm_svm_polynomial_predict_f16(
         dot=0;
         for(j=0; j < S->vectorDimension; j++)
         {
-            dot = dot + (_Float16)in[j]* (_Float16)*pSupport++;
+            dot = (_Float16)dot + (_Float16)in[j]* (_Float16)*pSupport++;
         }
-        sum += S->dualCoefficients[i] * (_Float16)arm_exponent_f16(S->gamma * dot + S->coef0, S->degree);
+        sum += (_Float16)S->dualCoefficients[i] * (_Float16)arm_exponent_f16((_Float16)S->gamma * (_Float16)dot + (_Float16)S->coef0, S->degree);
     }
 
     *pResult=S->classes[STEP(sum)];
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_predict_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_predict_f32.c
index 31fc471..2d97e2b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_predict_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_polynomial_predict_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_polynomial_predict_f32.c
  * Description:  SVM Polynomial Classifier
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_init_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_init_f16.c
index 1f0bcf5..5b2492f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_init_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_init_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_rbf_init_f16.c
  * Description:  SVM Radial Basis Function Instance Initialization
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -37,11 +39,6 @@
   @ingroup groupSVM
  */
 
-/**
-  @defgroup rbfsvm RBF SVM
-
-  RBF SVM classifier
- */
 
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_init_f32.c
index cd2c620..9fddb02 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_init_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_rbf_init_f32.c
  * Description:  SVM Radial Basis Function Instance Initialization
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_predict_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_predict_f16.c
index 056562f..15dd7e6 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_predict_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_predict_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_rbf_predict_f16.c
  * Description:  SVM Radial Basis Function Classifier
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -70,7 +72,7 @@ void arm_svm_rbf_predict_f16(
     uint32_t         blkCnt;     /* loop counters */
     const float16_t *pDualCoef = S->dualCoefficients;
     _Float16       sum = S->intercept;
-    f16x8_t         vSum = vdupq_n_f16(0);
+    f16x8_t         vSum = vdupq_n_f16(0.0f16);
 
     row = numRows;
 
@@ -97,10 +99,10 @@ void arm_svm_rbf_predict_f16(
         /*
          * reset accumulators
          */
-        acc0 = vdupq_n_f16(0.0f);
-        acc1 = vdupq_n_f16(0.0f);
-        acc2 = vdupq_n_f16(0.0f);
-        acc3 = vdupq_n_f16(0.0f);
+        acc0 = vdupq_n_f16(0.0f16);
+        acc1 = vdupq_n_f16(0.0f16);
+        acc2 = vdupq_n_f16(0.0f16);
+        acc3 = vdupq_n_f16(0.0f16);
 
         pSrcA0Vec = pInA0;
         pSrcA1Vec = pInA1;
@@ -170,7 +172,7 @@ void arm_svm_rbf_predict_f16(
 
         vSum =
             vfmaq_m_f16(vSum, vld1q(pDualCoef),
-                      vexpq_f16(vmulq_n_f16(vtmp, -S->gamma)),vctp16q(4));
+                      vexpq_f16(vmulq_n_f16(vtmp, -(_Float16)S->gamma)),vctp16q(4));
         pDualCoef += 4;
         pSrcA += numCols * 4;
         /*
@@ -199,8 +201,8 @@ void arm_svm_rbf_predict_f16(
         /*
          * reset accumulators
          */
-        acc0 = vdupq_n_f16(0.0f);
-        acc1 = vdupq_n_f16(0.0f);
+        acc0 = vdupq_n_f16(0.0f16);
+        acc1 = vdupq_n_f16(0.0f16);
         pSrcA0Vec = pInA0;
         pSrcA1Vec = pInA1;
 
@@ -248,7 +250,7 @@ void arm_svm_rbf_predict_f16(
 
         vSum =
             vfmaq_m_f16(vSum, vld1q(pDualCoef),
-                        vexpq_f16(vmulq_n_f16(vtmp, -S->gamma)), vctp16q(2));
+                        vexpq_f16(vmulq_n_f16(vtmp, -(_Float16)S->gamma)), vctp16q(2));
         pDualCoef += 2;
 
         pSrcA += numCols * 2;
@@ -309,12 +311,12 @@ void arm_svm_rbf_predict_f16(
 
         vSum =
             vfmaq_m_f16(vSum, vld1q(pDualCoef),
-                        vexpq_f16(vmulq_n_f16(vtmp, -S->gamma)), vctp16q(1));
+                        vexpq_f16(vmulq_n_f16(vtmp, -(_Float16)S->gamma)), vctp16q(1));
 
     }
 
 
-    sum += vecAddAcrossF16Mve(vSum);
+    sum += (_Float16)vecAddAcrossF16Mve(vSum);
     *pResult = S->classes[STEP(sum)];
 }
 
@@ -337,7 +339,7 @@ void arm_svm_rbf_predict_f16(
             dot = dot + SQ((_Float16)in[j] - (_Float16) *pSupport);
             pSupport++;
         }
-        sum += (_Float16)S->dualCoefficients[i] * (_Float16)expf(-(_Float16)S->gamma * dot);
+        sum += (_Float16)S->dualCoefficients[i] * (_Float16)expf((float32_t)(-(_Float16)S->gamma * (_Float16)dot));
     }
     *pResult=S->classes[STEP(sum)];
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_predict_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_predict_f32.c
index 52ab0d5..87d71e3 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_predict_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_rbf_predict_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_rbf_predict_f32.c
  * Description:  SVM Radial Basis Function Classifier
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_init_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_init_f16.c
index 60f33af..33aaf42 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_init_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_init_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_sigmoid_predict_f16.c
  * Description:  SVM Sigmoid Instance Initialization
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -37,11 +39,6 @@
   @ingroup groupSVM
  */
 
-/**
-  @defgroup sigmoidsvm Sigmoid SVM
-
-  Sigmoid SVM classifier
- */
 
 /**
  * @addtogroup sigmoidsvm
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_init_f32.c
index a483345..2274e72 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_init_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_sigmoid_predict_f32.c
  * Description:  SVM Sigmoid Instance Initialization
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_predict_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_predict_f16.c
index dcce835..572bc83 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_predict_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_predict_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_sigmoid_predict_f16.c
  * Description:  SVM Sigmoid Classifier
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -295,7 +297,7 @@ void arm_svm_sigmoid_predict_f16(
                         vtanhq_f16(vaddq_n_f16(vmulq_n_f16(vtmp, S->gamma), S->coef0)),
                         vctp16q(1));
     }
-    sum += vecAddAcrossF16Mve(vSum);
+    sum += (_Float16)vecAddAcrossF16Mve(vSum);
 
     *pResult = S->classes[STEP(sum)];
 }
@@ -316,9 +318,9 @@ void arm_svm_sigmoid_predict_f16(
         dot=0.0f16;
         for(j=0; j < S->vectorDimension; j++)
         {
-            dot = dot + (_Float16)in[j] * (_Float16)*pSupport++;
+            dot = (_Float16)dot + (_Float16)in[j] * (_Float16)*pSupport++;
         }
-        sum += (_Float16)S->dualCoefficients[i] * (_Float16)tanhf((_Float16)S->gamma * dot + (_Float16)S->coef0);
+        sum += (_Float16)S->dualCoefficients[i] * (_Float16)tanhf((float32_t)((_Float16)S->gamma * (_Float16)dot + (_Float16)S->coef0));
     }
     *pResult=S->classes[STEP(sum)];
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_predict_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_predict_f32.c
index 94b2a50..b607820 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_predict_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SVMFunctions/arm_svm_sigmoid_predict_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_svm_sigmoid_predict_f32.c
  * Description:  SVM Sigmoid Classifier
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f16.c
new file mode 100644
index 0000000..2c50961
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f16.c
@@ -0,0 +1,278 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmax_f16.c
+ * Description:  Maximum value of a absolute values of a floating-point vector
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include <limits.h>
+#endif
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup AbsMax
+  @{
+ */
+
+/**
+  @brief         Maximum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @param[out]    pIndex     index of maximum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+void arm_absmax_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex)
+{
+      uint16_t        blkCnt;           /* loop counters */
+    f16x8_t       vecSrc;
+    float16_t const *pSrcVec;
+    f16x8_t       curExtremValVec = vdupq_n_f16(F16_ABSMIN);
+    float16_t       maxValue = F16_ABSMIN;
+    uint16_t        idx = blockSize;
+    uint16x8_t    indexVec;
+    uint16x8_t    curExtremIdxVec;
+    mve_pred16_t    p0;
+
+
+    indexVec = vidupq_u16((uint32_t)0, 1);
+    curExtremIdxVec = vdupq_n_u16(0);
+
+    pSrcVec = (float16_t const *) pSrc;
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U)
+    {
+        vecSrc = vldrhq_f16(pSrcVec);  
+        pSrcVec += 8;
+        vecSrc = vabsq(vecSrc);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        p0 = vcmpgeq(vecSrc, curExtremValVec);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+
+        indexVec = indexVec +  8;
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U)
+    {
+        vecSrc = vldrhq_f16(pSrcVec);  
+        pSrcVec += 8;
+        vecSrc = vabsq(vecSrc);
+
+        p0 = vctp16q(blkCnt);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        p0 = vcmpgeq_m(vecSrc, curExtremValVec, p0);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+    }
+    /*
+     * Get max value across the vector
+     */
+    maxValue = vmaxnmvq(maxValue, curExtremValVec);
+    /*
+     * set index for lower values to max possible index
+     */
+    p0 = vcmpgeq(curExtremValVec, maxValue);
+    indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
+    /*
+     * Get min index which is thus for a max value
+     */
+    idx = vminvq(idx, indexVec);
+    /*
+     * Save result
+     */
+    *pIndex = idx;
+    *pResult = maxValue;
+}
+#else
+#if defined(ARM_MATH_LOOPUNROLL)
+void arm_absmax_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex)
+{
+        float16_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
+        uint32_t index;                                /* index of maximum value */                         \
+                                                                                                            \
+  /* Initialize index value to zero. */                                                                     \
+  outIndex = 0U;                                                                                            \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out;                                                                             \
+  /* Initialize index of extrema value. */                                                                  \
+  index = 0U;                                                                                               \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmax to next consecutive values one by one */                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    /* compare for the extrema value */                                                                     \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 1U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 2U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 3U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 4U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    index += 4U;                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = blockSize - blkCnt;                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+  *pIndex = outIndex;  
+}
+#else
+void arm_absmax_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex)
+{
+        float16_t maxVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = (_Float16)fabsf((float32_t)*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = (_Float16)fabsf((float32_t)*pSrc++);
+
+    /* compare for the maximum value */
+    if ((_Float16)out < (_Float16)maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+#endif /* defined(ARM_MATH_LOOPUNROLL) */
+#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMax group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f32.c
new file mode 100644
index 0000000..7ddc9ae
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f32.c
@@ -0,0 +1,264 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmax_f32.c
+ * Description:  Maximum value of absolute values of a floating-point vector
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include <limits.h>
+#endif
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup AbsMax Absolute Maximum
+
+  Computes the maximum value of absolute values of an array of data.
+  The function returns both the maximum value and its position within the array.
+  There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+  @addtogroup AbsMax
+  @{
+ */
+
+/**
+  @brief         Maximum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @param[out]    pIndex     index of maximum value returned here
+  @return        none
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_absmax_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex)
+{
+    int32_t blkSize = blockSize;
+    f32x4_t vecSrc;
+    f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMIN);
+    float32_t maxValue = F32_ABSMIN;
+    uint32_t idx = blockSize;
+    uint32x4_t indexVec;
+    uint32x4_t curExtremIdxVec;
+    uint32_t curIdx = 0;
+    mve_pred16_t p0;
+
+
+    indexVec = vidupq_wb_u32(&curIdx, 1);
+    curExtremIdxVec = vdupq_n_u32(0);
+
+    do {
+        mve_pred16_t p = vctp32q(blkSize);
+
+        vecSrc = vldrwq_z_f32((float32_t const *) pSrc, p);
+        vecSrc = vabsq_m(vuninitializedq_f32(), vecSrc, p);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        p0 = vcmpgeq_m(vecSrc, curExtremValVec, p);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+
+        /* Does TP detection works here ?? */
+        indexVec = vidupq_wb_u32(&curIdx, 1);
+
+        blkSize -= 4;
+        pSrc += 4;
+    }
+    while (blkSize > 0);
+
+    /*
+     * Get max value across the vector
+     */
+    maxValue = vmaxnmvq(maxValue, curExtremValVec);
+    /*
+     * set index for lower values to max possible index
+     */
+    p0 = vcmpgeq(curExtremValVec, maxValue);
+    indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
+    /*
+     * Get min index which is thus for a max value
+     */
+    idx = vminvq(idx, indexVec);
+    /*
+     * Save result
+     */
+    *pIndex = idx;
+    *pResult = maxValue;
+}
+
+
+#else
+#if defined(ARM_MATH_LOOPUNROLL)
+void arm_absmax_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex)
+{
+        float32_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
+        uint32_t index;                                /* index of maximum value */                         \
+                                                                                                            \
+  /* Initialize index value to zero. */                                                                     \
+  outIndex = 0U;                                                                                            \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0.0f) ? out : -out;                                                                             \
+  /* Initialize index of extrema value. */                                                                  \
+  index = 0U;                                                                                               \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmax to next consecutive values one by one */                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 1U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 2U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 3U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 4U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    index += 4U;                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = blockSize - blkCnt;                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+  *pIndex = outIndex;  
+}
+#else
+void arm_absmax_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex)
+{
+        float32_t maxVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = fabsf(*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = fabsf(*pSrc++);
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+#endif /* defined(ARM_MATH_LOOPUNROLL) */
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMax group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f64.c
new file mode 100644
index 0000000..23a4e4e
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f64.c
@@ -0,0 +1,96 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmax_f64.c
+ * Description:  Maximum value of absolute values of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup AbsMax
+  @{
+ */
+
+/**
+  @brief         Maximum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @param[out]    pIndex     index of maximum value returned here
+  @return        none
+ */
+void arm_absmax_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex)
+{
+        float64_t maxVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = fabs(*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = fabs(*pSrc++);
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**
+  @} end of AbsMax group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f16.c
new file mode 100644
index 0000000..d1c225c
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f16.c
@@ -0,0 +1,232 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmax_no_idx_f16.c
+ * Description:  Maximum value of a absolute values of a floating-point vector
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include <limits.h>
+#endif
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup AbsMax
+  @{
+ */
+
+/**
+  @brief         Maximum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+void arm_absmax_no_idx_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult)
+{
+   uint16_t  blkCnt;           /* loop counters */
+    f16x8_t vecSrc;
+    float16_t const *pSrcVec;
+    f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMIN);
+    float16_t maxValue = F16_ABSMIN;
+    mve_pred16_t p0;
+
+
+    pSrcVec = (float16_t const *) pSrc;
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0)
+    {
+        vecSrc = vldrhq_f16(pSrcVec);  
+        pSrcVec += 8;
+        /*
+         * update per-lane max.
+         */
+        curExtremValVec = vmaxnmaq(vecSrc, curExtremValVec);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U)
+    {
+        vecSrc = vldrhq_f16(pSrcVec);  
+        pSrcVec += 8;
+        p0 = vctp16q(blkCnt);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+         curExtremValVec = vmaxnmaq_m(curExtremValVec, vecSrc, p0);
+    }
+    /*
+     * Get max value across the vector
+     */
+    maxValue = vmaxnmavq(maxValue, curExtremValVec);
+    *pResult = maxValue;
+}
+#else
+#if defined(ARM_MATH_LOOPUNROLL)
+void arm_absmax_no_idx_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult)
+{
+        float16_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt;                     /* Loop counter */                                   \
+                                                                                                            \
+                                                                                           \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out;                                                                             \
+                                                                                             \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmax to next consecutive values one by one */                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    /* compare for the extrema value */                                                                     \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+}
+#else
+void arm_absmax_no_idx_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult)
+{
+        float16_t maxVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt;                     /* Loop counter */
+
+
+
+  /* Load first input value that act as reference value for comparision */
+  out = (_Float16)fabsf((float32_t)*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = (_Float16)fabsf((float32_t)*pSrc++);
+
+    /* compare for the maximum value */
+    if ((_Float16)out < (_Float16)maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+}
+#endif /* defined(ARM_MATH_LOOPUNROLL) */
+#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMax group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f32.c
new file mode 100644
index 0000000..485ccd5
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f32.c
@@ -0,0 +1,229 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmax_no_idx_f32.c
+ * Description:  Maximum value of absolute values of a floating-point vector
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include <limits.h>
+#endif
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup AbsMax
+  @{
+ */
+
+/**
+  @brief         Maximum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @return        none
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_absmax_no_idx_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult)
+{
+    int32_t  blkCnt;           /* loop counters */
+    f32x4_t vecSrc;
+    float32_t const *pSrcVec;
+    f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMIN);
+    float32_t maxValue = F32_ABSMIN;
+    mve_pred16_t p0;
+
+
+    pSrcVec = (float32_t const *) pSrc;
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0)
+    {
+        vecSrc = vldrwq_f32(pSrcVec);  
+        pSrcVec += 4;
+        /*
+         * update per-lane max.
+         */
+        curExtremValVec = vmaxnmaq(vecSrc, curExtremValVec);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0)
+    {
+        vecSrc = vldrwq_f32(pSrcVec);  
+        pSrcVec += 4;
+        p0 = vctp32q(blkCnt);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+         curExtremValVec = vmaxnmaq_m(curExtremValVec, vecSrc, p0);
+    }
+    /*
+     * Get max value across the vector
+     */
+    maxValue = vmaxnmavq(maxValue, curExtremValVec);
+    *pResult = maxValue;
+}
+
+
+#else
+#if defined(ARM_MATH_LOOPUNROLL)
+void arm_absmax_no_idx_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult)
+{
+        float32_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt;                     /* Loop counter */                                   \
+                                                                                                            \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0.0f) ? out : -out;                                                                             \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmax to next consecutive values one by one */                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax;                                                                 \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+}
+#else
+void arm_absmax_no_idx_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult)
+{
+        float32_t maxVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt;                     /* Loop counter */
+
+
+
+
+
+  /* Load first input value that act as reference value for comparision */
+  out = fabsf(*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = fabsf(*pSrc++);
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+}
+#endif /* defined(ARM_MATH_LOOPUNROLL) */
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMax group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f64.c
new file mode 100644
index 0000000..017c588
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f64.c
@@ -0,0 +1,91 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmax_no_idx_f64.c
+ * Description:  Maximum value of absolute values of a floating-point vector
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup AbsMax
+  @{
+ */
+
+/**
+  @brief         Maximum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @return        none
+ */
+void arm_absmax_no_idx_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult)
+{
+        float64_t maxVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt;                     /* Loop counter */
+
+
+
+
+
+  /* Load first input value that act as reference value for comparision */
+  out = fabs(*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = fabs(*pSrc++);
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+}
+
+/**
+  @} end of AbsMax group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q15.c
new file mode 100644
index 0000000..9c3a86a
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q15.c
@@ -0,0 +1,224 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmax_no_idx_q15.c
+ * Description:  Maximum value of absolute values of a Q15 vector
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup AbsMax
+  @{
+ */
+
+/**
+  @brief         Maximum value of absolute values of a Q15 vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @return        none
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_absmax_no_idx_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult)
+{
+    uint16_t        blkCnt;           /* loop counters */
+    q15x8_t       vecSrc;
+    q15_t   const *pSrcVec;
+    uint16x8_t    curExtremValVec = vdupq_n_s16(Q15_ABSMIN);
+    q15_t           maxValue = Q15_ABSMIN;
+    mve_pred16_t    p0;
+
+
+    pSrcVec = (q15_t const *) pSrc;
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 8;
+        /*
+         * update per-lane max.
+         */
+        curExtremValVec = vmaxaq(curExtremValVec, vecSrc);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 8;
+        p0 = vctp16q(blkCnt);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+         curExtremValVec = vmaxaq_m(curExtremValVec, vecSrc, p0);
+    }
+    /*
+     * Get max value across the vector
+     */
+    maxValue = vmaxavq(maxValue, (q15x8_t)curExtremValVec);
+    *pResult = maxValue;
+}
+
+#else
+#if defined(ARM_MATH_DSP)
+void arm_absmax_no_idx_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult)
+{
+        q15_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt;                     /* Loop counter */                                   \
+                                                                                                            \
+                                                                                            \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0) ? out : (q15_t)__QSUB16(0, out);                                                                           \
+                                                                                              \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmax to next consecutive values one by one */                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax);                                                                \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax);                                                                \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax);                                                                \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax);                                                                 \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax);                                                                 \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+}
+#else
+void arm_absmax_no_idx_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult)
+{
+       q15_t maxVal, out;                             /* Temporary variables to store the output value. */
+        uint32_t blkCnt;                     /* Loop counter */
+
+
+  /* Load first input value that act as reference value for comparision */
+  out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
+  pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
+    pSrc++;
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+}
+#endif /* defined(ARM_MATH_DSP) */
+#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMax group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q31.c
new file mode 100644
index 0000000..5610a8a
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q31.c
@@ -0,0 +1,224 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmax_no_idx_q31.c
+ * Description:  Maximum value of absolute values of a Q31 vector
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup AbsMax
+  @{
+ */
+
+/**
+  @brief         Maximum value of absolute values of a Q31 vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+void arm_absmax_no_idx_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult)
+{
+    int32_t  blkCnt;           /* loop counters */
+    q31x4_t       vecSrc;
+    q31_t   const *pSrcVec;
+    uint32x4_t    curExtremValVec = vdupq_n_s32(Q31_ABSMIN);
+    q31_t           maxValue = Q31_ABSMIN;
+    mve_pred16_t    p0;
+
+
+    pSrcVec = (q31_t const *) pSrc;
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0)
+    {
+        vecSrc = vldrwq_s32(pSrcVec);  
+        pSrcVec += 4;
+        /*
+         * update per-lane max.
+         */
+        curExtremValVec = vmaxaq(curExtremValVec, vecSrc);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0)
+    {
+        vecSrc = vldrwq_s32(pSrcVec);  
+        pSrcVec += 4;
+        p0 = vctp32q(blkCnt);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+         curExtremValVec = vmaxaq_m(curExtremValVec, vecSrc, p0);
+    }
+    /*
+     * Get max value across the vector
+     */
+    maxValue = vmaxavq(maxValue, (q31x4_t)curExtremValVec);
+    *pResult = maxValue;
+}
+#else
+#if defined(ARM_MATH_DSP)
+void arm_absmax_no_idx_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult)
+{
+        q31_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt;                     /* Loop counter */                                   \
+                                                                                                            \
+                                                                                           \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0) ? out : (q31_t)__QSUB(0, out);                                                                           \
+                                                                                              \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmax to next consecutive values one by one */                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax);                                                                \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax);                                                                \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax);                                                                \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax);                                                                 \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax);                                                                 \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+}
+#else
+void arm_absmax_no_idx_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult)
+{
+        q31_t maxVal, out;                             /* Temporary variables to store the output value. */
+        uint32_t blkCnt;                     /* Loop counter */
+
+
+
+  /* Load first input value that act as reference value for comparision */
+  out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
+  pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
+    pSrc++;
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+}
+#endif /* defined(ARM_MATH_DSP) */
+#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMax group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q7.c
new file mode 100644
index 0000000..26e1813
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q7.c
@@ -0,0 +1,228 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmax_no_idx_q7.c
+ * Description:  Maximum value of absolute values of a Q7 vector
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup AbsMax
+  @{
+ */
+
+/**
+  @brief         Maximum value of absolute values of a Q7 vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include <stdint.h>
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+
+
+void arm_absmax_no_idx_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult)
+{
+    int32_t  blkCnt;           /* loop counters */
+    q7x16_t        vecSrc;
+    q7_t   const *pSrcVec;
+    uint8x16_t     curExtremValVec = vdupq_n_s8(Q7_ABSMIN);
+    q7_t            maxValue = Q7_ABSMIN;
+    mve_pred16_t    p0;
+
+
+    pSrcVec = (q7_t const *) pSrc;
+    blkCnt = blockSize >> 4;
+    while (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 16;
+        /*
+         * update per-lane max.
+         */
+        curExtremValVec = vmaxaq(curExtremValVec, vecSrc);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 0xF;
+    if (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 16;
+        p0 = vctp8q(blkCnt);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+         curExtremValVec = vmaxaq_m(curExtremValVec, vecSrc, p0);
+    }
+    /*
+     * Get max value across the vector
+     */
+    maxValue = vmaxavq(maxValue, (q7x16_t)curExtremValVec);
+    *pResult = maxValue;
+}
+#else
+#if defined(ARM_MATH_DSP)
+void arm_absmax_no_idx_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult)
+{
+        q7_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt;                     /* Loop counter */                                   \
+                                                                                                            \
+                                                                                           \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0) ? out : (q7_t)__QSUB8(0, out);                                                                           \
+                                                                                              \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmax to next consecutive values one by one */                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                 \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                 \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+}
+#else
+void arm_absmax_no_idx_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult)
+{
+       q7_t maxVal, out;                              /* Temporary variables to store the output value. */
+        uint32_t blkCnt;                     /* Loop counter */
+
+
+
+  /* Load first input value that act as reference value for comparision */
+  out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
+  pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
+    pSrc++;
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+}
+#endif /* defined(ARM_MATH_DSP) */
+#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMax group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q15.c
new file mode 100644
index 0000000..656fcf9
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q15.c
@@ -0,0 +1,240 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmax_q15.c
+ * Description:  Maximum value of absolute values of a Q15 vector
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup AbsMax
+  @{
+ */
+
+/**
+  @brief         Maximum value of absolute values of a Q15 vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @param[out]    pIndex     index of maximum value returned here
+  @return        none
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_absmax_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex)
+{
+      int32_t         blkCnt;     /* loop counters */
+    q15x8_t         extremValVec = vdupq_n_s16(Q15_ABSMIN);
+    q15_t           maxValue = Q15_ABSMIN;
+    uint16x8_t      indexVec;
+    uint16x8_t      extremIdxVec;
+    mve_pred16_t    p0;
+    uint16_t        extremIdxArr[8];
+
+    indexVec = vidupq_u16(0U, 1);
+
+    blkCnt = blockSize;
+    do {
+        mve_pred16_t    p = vctp16q(blkCnt);
+        q15x8_t         extremIdxVal = vld1q_z_s16(pSrc, p);
+
+        extremIdxVal = vqabsq(extremIdxVal);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
+
+        extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
+        /* store per-lane extrema indexes */
+        vst1q_p_u16(extremIdxArr, indexVec, p0);
+
+        indexVec += 8;
+        pSrc += 8;
+        blkCnt -= 8;
+    }
+    while (blkCnt > 0);
+
+
+    /* Get max value across the vector   */
+    maxValue = vmaxvq(maxValue, extremValVec);
+
+    /* set index for lower values to max possible index   */
+    p0 = vcmpgeq(extremValVec, maxValue);
+    extremIdxVec = vld1q_u16(extremIdxArr);
+
+    indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0);
+    *pIndex = vminvq(blockSize - 1, indexVec);
+    *pResult = maxValue;
+}
+
+#else
+#if defined(ARM_MATH_DSP)
+void arm_absmax_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex)
+{
+        q15_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
+        uint32_t index;                                /* index of maximum value */                         \
+                                                                                                            \
+  /* Initialize index value to zero. */                                                                     \
+  outIndex = 0U;                                                                                            \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0) ? out : (q15_t)__QSUB16(0, out);                                                                           \
+  /* Initialize index of extrema value. */                                                                  \
+  index = 0U;                                                                                               \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmax to next consecutive values one by one */                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax);                                                                \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 1U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax);                                                                \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 2U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax);                                                                \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 3U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax);                                                                 \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 4U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    index += 4U;                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax);                                                                 \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = blockSize - blkCnt;                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+  *pIndex = outIndex;  
+}
+#else
+void arm_absmax_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex)
+{
+       q15_t maxVal, out;                             /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+  /* Load first input value that act as reference value for comparision */
+  out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
+  pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
+    pSrc++;
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+#endif /* defined(ARM_MATH_DSP) */
+#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMax group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q31.c
new file mode 100644
index 0000000..d3cfa3c
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q31.c
@@ -0,0 +1,240 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmax_q31.c
+ * Description:  Maximum value of absolute values of a Q31 vector
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup AbsMax
+  @{
+ */
+
+/**
+  @brief         Maximum value of absolute values of a Q31 vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @param[out]    pIndex     index of maximum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+void arm_absmax_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex)
+{
+    int32_t         blkCnt;     /* loop counters */
+    q31x4_t         extremValVec = vdupq_n_s32(Q31_ABSMIN);
+    q31_t           maxValue = Q31_ABSMIN;
+    uint32x4_t      indexVec;
+    uint32x4_t      extremIdxVec;
+    mve_pred16_t    p0;
+    uint32_t        extremIdxArr[4];
+
+    indexVec = vidupq_u32(0U, 1);
+
+    blkCnt = blockSize;
+    do {
+        mve_pred16_t    p = vctp32q(blkCnt);
+        q31x4_t         extremIdxVal = vld1q_z_s32(pSrc, p);
+
+        extremIdxVal = vqabsq(extremIdxVal);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
+
+        extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
+        /* store per-lane extrema indexes */
+        vst1q_p_u32(extremIdxArr, indexVec, p0);
+
+        indexVec += 4;
+        pSrc += 4;
+        blkCnt -= 4;
+    }
+    while (blkCnt > 0);
+
+
+    /* Get max value across the vector   */
+    maxValue = vmaxvq(maxValue, extremValVec);
+
+    /* set index for lower values to max possible index   */
+    p0 = vcmpgeq(extremValVec, maxValue);
+    extremIdxVec = vld1q_u32(extremIdxArr);
+
+    indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0);
+    *pIndex = vminvq(blockSize - 1, indexVec);
+    *pResult = maxValue;  
+}
+#else
+#if defined(ARM_MATH_DSP)
+void arm_absmax_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex)
+{
+        q31_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
+        uint32_t index;                                /* index of maximum value */                         \
+                                                                                                            \
+  /* Initialize index value to zero. */                                                                     \
+  outIndex = 0U;                                                                                            \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0) ? out : (q31_t)__QSUB(0, out);                                                                           \
+  /* Initialize index of extrema value. */                                                                  \
+  index = 0U;                                                                                               \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmax to next consecutive values one by one */                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax);                                                                \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 1U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax);                                                                \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 2U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax);                                                                \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 3U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax);                                                                 \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 4U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    index += 4U;                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax);                                                                 \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = blockSize - blkCnt;                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+  *pIndex = outIndex;  
+}
+#else
+void arm_absmax_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex)
+{
+        q31_t maxVal, out;                             /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+  /* Load first input value that act as reference value for comparision */
+  out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
+  pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
+    pSrc++;
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+#endif /* defined(ARM_MATH_DSP) */
+#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMax group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q7.c
new file mode 100644
index 0000000..30595de
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q7.c
@@ -0,0 +1,298 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmax_q7.c
+ * Description:  Maximum value of absolute values of a Q7 vector
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup AbsMax
+  @{
+ */
+
+/**
+  @brief         Maximum value of absolute values of a Q7 vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @param[out]    pIndex     index of maximum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include <stdint.h>
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+#define MAX_BLKSZ_S8  (UINT8_MAX+1)
+
+static void arm_small_blk_absmax_q7(
+    const q7_t * pSrc,
+    uint16_t blockSize,
+    q7_t * pResult,
+    uint32_t * pIndex)
+{
+    int32_t        blkCnt;     /* loop counters */
+    q7x16_t        extremValVec = vdupq_n_s8(Q7_ABSMIN);
+    q7_t           maxValue = Q7_ABSMIN;
+    uint8x16_t     indexVec;
+    uint8x16_t     extremIdxVec;
+    mve_pred16_t   p0;
+    uint8_t        extremIdxArr[16];
+
+    indexVec = vidupq_u8(0U, 1);
+
+    blkCnt = blockSize;
+    do {
+        mve_pred16_t    p = vctp8q(blkCnt);
+        q7x16_t         extremIdxVal = vld1q_z_s8(pSrc, p);
+
+        extremIdxVal = vqabsq(extremIdxVal);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
+
+        extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
+        /* store per-lane extrema indexes */
+        vst1q_p_u8(extremIdxArr, indexVec, p0);
+
+        indexVec += 16;
+        pSrc += 16;
+        blkCnt -= 16;
+    }
+    while (blkCnt > 0);
+
+
+    /* Get max value across the vector   */
+    maxValue = vmaxvq(maxValue, extremValVec);
+
+    /* set index for lower values to max possible index   */
+    p0 = vcmpgeq(extremValVec, maxValue);
+    extremIdxVec = vld1q_u8(extremIdxArr);
+
+    indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0);
+    *pIndex = vminvq_u8(blockSize - 1, indexVec);
+    *pResult = maxValue;
+}
+
+void arm_absmax_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult,
+        uint32_t * pIndex)
+{
+    int32_t   totalSize = blockSize;
+
+    if (totalSize <= MAX_BLKSZ_S8)
+    {
+        arm_small_blk_absmax_q7(pSrc, blockSize, pResult, pIndex);
+    }
+    else
+    {
+        uint32_t  curIdx = 0;
+        q7_t      curBlkExtr = Q7_MIN;
+        uint32_t  curBlkPos = 0;
+        uint32_t  curBlkIdx = 0;
+        /*
+         * process blocks of 255 elts
+         */
+        while (totalSize >= MAX_BLKSZ_S8)
+        {
+            const q7_t     *curSrc = pSrc;
+
+            arm_small_blk_absmax_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex);
+            if (*pResult > curBlkExtr)
+            {
+                /*
+                 * update partial extrema
+                 */
+                curBlkExtr = *pResult;
+                curBlkPos = *pIndex;
+                curBlkIdx = curIdx;
+            }
+            curIdx++;
+            pSrc += MAX_BLKSZ_S8;
+            totalSize -= MAX_BLKSZ_S8;
+        }
+        /*
+         * remainder
+         */
+        arm_small_blk_absmax_q7(pSrc, totalSize, pResult, pIndex);
+        if (*pResult > curBlkExtr)
+        {
+            curBlkExtr = *pResult;
+            curBlkPos = *pIndex;
+            curBlkIdx = curIdx;
+        }
+        *pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos;
+        *pResult = curBlkExtr;
+    }
+}
+#else
+#if defined(ARM_MATH_DSP)
+void arm_absmax_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult,
+        uint32_t * pIndex)
+{
+        q7_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
+        uint32_t index;                                /* index of maximum value */                         \
+                                                                                                            \
+  /* Initialize index value to zero. */                                                                     \
+  outIndex = 0U;                                                                                            \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0) ? out : (q7_t)__QSUB8(0, out);                                                                           \
+  /* Initialize index of extrema value. */                                                                  \
+  index = 0U;                                                                                               \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmax to next consecutive values one by one */                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 1U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 2U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 3U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                 \
+    if (cur_absmax > out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 4U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    index += 4U;                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax);                                                                 \
+    if (cur_absmax > out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = blockSize - blkCnt;                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+  *pIndex = outIndex;  
+}
+#else
+void arm_absmax_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult,
+        uint32_t * pIndex)
+{
+       q7_t maxVal, out;                              /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+  /* Load first input value that act as reference value for comparision */
+  out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
+  pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
+    pSrc++;
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+#endif /* defined(ARM_MATH_DSP) */
+#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMax group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f16.c
new file mode 100644
index 0000000..335f502
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f16.c
@@ -0,0 +1,280 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmin_f16.c
+ * Description:  Minimum value of absolute values of a floating-point vector
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include <limits.h>
+#endif
+
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup AbsMin
+  @{
+ */
+
+/**
+  @brief         Minimum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @param[out]    pIndex     index of minimum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+void arm_absmin_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex)
+{
+    uint16_t  blkCnt;           /* loop counters */
+    f16x8_t vecSrc;
+    float16_t const *pSrcVec;
+    f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMAX);
+    float16_t minValue = F16_ABSMAX;
+    uint16_t  idx = blockSize;
+    uint16x8_t indexVec;
+    uint16x8_t curExtremIdxVec;
+    mve_pred16_t p0;
+
+
+    indexVec = vidupq_u16((uint32_t)0, 1);
+    curExtremIdxVec = vdupq_n_u16(0);
+
+    pSrcVec = (float16_t const *) pSrc;
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U)
+    {
+        vecSrc = vldrhq_f16(pSrcVec);  
+        pSrcVec += 8;
+        vecSrc = vabsq(vecSrc);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        p0 = vcmpleq(vecSrc, curExtremValVec);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+
+        indexVec = indexVec +  8;
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U)
+    {
+        p0 = vctp16q(blkCnt);
+
+        vecSrc = vldrhq_f16(pSrcVec);  
+        pSrcVec += 8;
+        vecSrc = vabsq(vecSrc);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminnmvq(minValue, curExtremValVec);
+    /*
+     * set index for lower values to max possible index
+     */
+    p0 = vcmpleq(curExtremValVec, minValue);
+    indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
+    /*
+     * Get min index which is thus for a max value
+     */
+    idx = vminvq(idx, indexVec);
+    /*
+     * Save result
+     */
+    *pIndex = idx;
+    *pResult = minValue;
+}
+
+#else
+#if defined(ARM_MATH_LOOPUNROLL)
+void arm_absmin_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex)
+{
+        float16_t cur_absmin, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
+        uint32_t index;                                /* index of maximum value */                         \
+                                                                                                            \
+  /* Initialize index value to zero. */                                                                     \
+  outIndex = 0U;                                                                                            \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out;                                                                             \
+  /* Initialize index of extrema value. */                                                                  \
+  index = 0U;                                                                                               \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmin to next consecutive values one by one */                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
+    /* compare for the extrema value */                                                                     \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 1U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 2U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 3U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 4U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    index += 4U;                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = blockSize - blkCnt;                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+  *pIndex = outIndex;  
+}
+#else
+void arm_absmin_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex)
+{
+        float16_t minVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = (_Float16)fabsf((float32_t)*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = (_Float16)fabsf((float32_t)*pSrc++);
+
+    /* compare for the minimum value */
+    if ((_Float16)out > (_Float16)minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+#endif /* defined(ARM_MATH_LOOPUNROLL) */
+#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMin group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f32.c
new file mode 100644
index 0000000..521093a
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f32.c
@@ -0,0 +1,283 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmin_f32.c
+ * Description:  Minimum value of absolute values of a floating-point vector
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include <limits.h>
+#endif
+
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup AbsMin Absolute Minimum
+
+  Computes the minimum value of absolute values of an array of data.
+  The function returns both the minimum value and its position within the array.
+  There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+  @addtogroup AbsMin
+  @{
+ */
+
+/**
+  @brief         Minimum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @param[out]    pIndex     index of minimum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+void arm_absmin_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex)
+{
+    int32_t  blkCnt;           /* loop counters */
+    f32x4_t vecSrc;
+    float32_t const *pSrcVec;
+    f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMAX);
+    float32_t minValue = F32_ABSMAX;
+    uint32_t  idx = blockSize;
+    uint32x4_t indexVec;
+    uint32x4_t curExtremIdxVec;
+    mve_pred16_t p0;
+
+
+    indexVec = vidupq_u32((uint32_t)0, 1);
+    curExtremIdxVec = vdupq_n_u32(0);
+
+    pSrcVec = (float32_t const *) pSrc;
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0)
+    {
+        vecSrc = vldrwq_f32(pSrcVec);  
+        pSrcVec += 4;
+        vecSrc = vabsq(vecSrc);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        p0 = vcmpleq(vecSrc, curExtremValVec);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+
+        indexVec = indexVec +  4;
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0)
+    {
+        p0 = vctp32q(blkCnt);
+
+        vecSrc = vldrwq_f32(pSrcVec);  
+        pSrcVec += 4;
+        vecSrc = vabsq(vecSrc);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminnmvq(minValue, curExtremValVec);
+    /*
+     * set index for lower values to max possible index
+     */
+    p0 = vcmpleq(curExtremValVec, minValue);
+    indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
+    /*
+     * Get min index which is thus for a max value
+     */
+    idx = vminvq(idx, indexVec);
+    /*
+     * Save result
+     */
+    *pIndex = idx;
+    *pResult = minValue;
+}
+
+#else
+#if defined(ARM_MATH_LOOPUNROLL)
+void arm_absmin_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex)
+{
+        float32_t cur_absmin, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
+        uint32_t index;                                /* index of maximum value */                         \
+                                                                                                            \
+  /* Initialize index value to zero. */                                                                     \
+  outIndex = 0U;                                                                                            \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0.0f) ? out : -out;                                                                             \
+  /* Initialize index of extrema value. */                                                                  \
+  index = 0U;                                                                                               \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmin to next consecutive values one by one */                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin;                                                                 \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 1U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin;                                                                 \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 2U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin;                                                                 \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 3U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin;                                                                 \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 4U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    index += 4U;                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin;                                                                 \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = blockSize - blkCnt;                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+  *pIndex = outIndex;  
+}
+#else
+void arm_absmin_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex)
+{
+       float32_t minVal, out;                         /* Temporary variables to store the output value. */
+       uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = fabsf(*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = fabsf(*pSrc++);
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+#endif /* defined(ARM_MATH_LOOPUNROLL) */
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMin group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f64.c
new file mode 100644
index 0000000..518651a
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f64.c
@@ -0,0 +1,94 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmin_f64.c
+ * Description:  Minimum value of absolute values of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup AbsMin
+  @{
+ */
+
+/**
+  @brief         Minimum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @param[out]    pIndex     index of minimum value returned here
+  @return        none
+ */
+void arm_absmin_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex)
+{
+       float64_t minVal, out;                         /* Temporary variables to store the output value. */
+       uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = fabs(*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = fabs(*pSrc++);
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**
+  @} end of AbsMin group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f16.c
new file mode 100644
index 0000000..1e90c91
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f16.c
@@ -0,0 +1,234 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmin_no_idx_f16.c
+ * Description:  Minimum value of absolute values of a floating-point vector
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include <limits.h>
+#endif
+
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup AbsMin
+  @{
+ */
+
+/**
+  @brief         Minimum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+void arm_absmin_no_idx_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult)
+{
+   int32_t  blkCnt;           /* loop counters */
+    f16x8_t vecSrc;
+    float16_t const *pSrcVec;
+    f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMAX);
+    float16_t minValue = F16_ABSMAX;
+    mve_pred16_t p0;
+
+
+    pSrcVec = (float16_t const *) pSrc;
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 8;
+        /*
+         * update per-lane min.
+         */
+        curExtremValVec = vminnmaq(vecSrc, curExtremValVec);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 8;
+        p0 = vctp16q(blkCnt);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+         curExtremValVec = vminnmaq_m(curExtremValVec, vecSrc, p0);
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminnmavq(minValue, curExtremValVec);
+    *pResult = minValue;
+}
+
+#else
+#if defined(ARM_MATH_LOOPUNROLL)
+void arm_absmin_no_idx_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult)
+{
+        float16_t cur_absmin, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt;                     /* Loop counter */                                   \
+                                                                                                            \
+                                                                                          \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out;                                                                             \
+                                                                                              \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmin to next consecutive values one by one */                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
+    /* compare for the extrema value */                                                                     \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin;                                                                 \
+    if ((_Float16)cur_absmin < (_Float16)out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+}
+#else
+void arm_absmin_no_idx_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult)
+{
+        float16_t minVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt;                     /* Loop counter */
+
+
+
+  /* Load first input value that act as reference value for comparision */
+  out = (_Float16)fabsf((float32_t)*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = (_Float16)fabsf((float32_t)*pSrc++);
+
+    /* compare for the minimum value */
+    if ((_Float16)out > (_Float16)minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+}
+#endif /* defined(ARM_MATH_LOOPUNROLL) */
+#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMin group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f32.c
new file mode 100644
index 0000000..20aca41
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f32.c
@@ -0,0 +1,230 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmin_no_idx_f32.c
+ * Description:  Minimum value of absolute values of a floating-point vector
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include <limits.h>
+#endif
+
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup AbsMin
+  @{
+ */
+
+/**
+  @brief         Minimum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+void arm_absmin_no_idx_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult)
+{
+    int32_t  blkCnt;           /* loop counters */
+    f32x4_t vecSrc;
+    float32_t const *pSrcVec;
+    f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMAX);
+    float32_t minValue = F32_ABSMAX;
+    mve_pred16_t p0;
+
+
+    pSrcVec = (float32_t const *) pSrc;
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0)
+    {
+        vecSrc = vldrwq_f32(pSrcVec);  
+        pSrcVec += 4;
+        /*
+         * update per-lane min.
+         */
+        curExtremValVec = vminnmaq(vecSrc, curExtremValVec);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0)
+    {
+        vecSrc = vldrwq_f32(pSrcVec);  
+        pSrcVec += 4;
+        p0 = vctp32q(blkCnt);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+         curExtremValVec = vminnmaq_m(curExtremValVec, vecSrc, p0);
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminnmavq(minValue, curExtremValVec);
+    *pResult = minValue;
+}
+
+#else
+#if defined(ARM_MATH_LOOPUNROLL)
+void arm_absmin_no_idx_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult)
+{
+        float32_t cur_absmin, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt;                     /* Loop counter */                                   \
+                                                                                                            \
+                                                                                           \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0.0f) ? out : -out;                                                                             \
+                                                                                              \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmin to next consecutive values one by one */                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin;                                                                 \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin;                                                                 \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin;                                                                 \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin;                                                                 \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin;                                                                 \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+}
+#else
+void arm_absmin_no_idx_f32(
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult)
+{
+       float32_t minVal, out;                         /* Temporary variables to store the output value. */
+       uint32_t blkCnt;                     /* Loop counter */
+
+
+
+  /* Load first input value that act as reference value for comparision */
+  out = fabsf(*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = fabsf(*pSrc++);
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+}
+
+#endif /* defined(ARM_MATH_LOOPUNROLL) */
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMin group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f64.c
new file mode 100644
index 0000000..143271d
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f64.c
@@ -0,0 +1,88 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmin_no_idx_f64.c
+ * Description:  Minimum value of absolute values of a floating-point vector
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup AbsMin
+  @{
+ */
+
+/**
+  @brief         Minimum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @return        none
+ */
+void arm_absmin_no_idx_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult)
+{
+       float64_t minVal, out;                         /* Temporary variables to store the output value. */
+       uint32_t blkCnt;                     /* Loop counter */
+
+
+  /* Load first input value that act as reference value for comparision */
+  out = fabs(*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = fabs(*pSrc++);
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+}
+
+/**
+  @} end of AbsMin group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q15.c
new file mode 100644
index 0000000..c6dd15e
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q15.c
@@ -0,0 +1,226 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmin_no_idx_q15.c
+ * Description:  Minimum value of absolute values of a Q15 vector
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup AbsMin
+  @{
+ */
+
+/**
+  @brief         Minimum value of absolute values of a Q15 vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @return        none
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_absmin_no_idx_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult)
+{
+    uint16_t        blkCnt;           /* loop counters */
+    q15x8_t       vecSrc;
+    q15_t   const *pSrcVec;
+    uint16x8_t    curExtremValVec = vdupq_n_s16(Q15_ABSMAX);
+    q15_t           minValue = Q15_ABSMAX;
+    mve_pred16_t    p0;
+
+
+    pSrcVec = (q15_t const *) pSrc;
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 8;
+        /*
+         * update per-lane min.
+         */
+        curExtremValVec = vminaq(curExtremValVec, vecSrc);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 8;
+        p0 = vctp16q(blkCnt);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+         curExtremValVec = vminaq_m(curExtremValVec, vecSrc, p0);
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminavq(minValue, (q15x8_t)curExtremValVec);
+    *pResult = minValue;
+}
+
+#else
+#if defined(ARM_MATH_DSP)
+void arm_absmin_no_idx_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult)
+{
+        q15_t cur_absmin, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt;                     /* Loop counter */                                   \
+                                                                                                            \
+                                                                                           \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0) ? out : (q15_t)__QSUB16(0, out);                                                                           \
+                                                                                             \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmin to next consecutive values one by one */                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin);                                                                \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin);                                                                \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin);                                                                \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin);                                                                 \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin);                                                                 \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+}
+#else
+void arm_absmin_no_idx_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult)
+{
+       q15_t minVal, out;                             /* Temporary variables to store the output value. */
+        uint32_t blkCnt;                     /* Loop counter */
+
+
+
+  /* Load first input value that act as reference value for comparision */
+  out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
+  pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
+    pSrc++;
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+}
+#endif /* defined(ARM_MATH_DSP) */
+#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMin group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q31.c
new file mode 100644
index 0000000..90281a4
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q31.c
@@ -0,0 +1,225 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmin_no_idx_q31.c
+ * Description:  Minimum value of absolute values of a Q31 vector
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup AbsMin
+  @{
+ */
+
+/**
+  @brief         Minimum value of absolute values of a Q31 vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_absmin_no_idx_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult)
+{
+    int32_t  blkCnt;           /* loop counters */
+    q31x4_t       vecSrc;
+    q31_t   const *pSrcVec;
+    uint32x4_t    curExtremValVec = vdupq_n_s32(Q31_ABSMAX);
+    q31_t           minValue = Q31_ABSMAX;
+    mve_pred16_t    p0;
+
+
+    pSrcVec = (q31_t const *) pSrc;
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0)
+    {
+        vecSrc = vldrwq_s32(pSrcVec);  
+        pSrcVec += 4;
+        /*
+         * update per-lane min.
+         */
+        curExtremValVec = vminaq(curExtremValVec, vecSrc);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0)
+    {
+        vecSrc = vldrwq_s32(pSrcVec);  
+        pSrcVec += 4;
+        p0 = vctp32q(blkCnt);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+         curExtremValVec = vminaq_m(curExtremValVec, vecSrc, p0);
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminavq(minValue, (q31x4_t)curExtremValVec);
+    *pResult = minValue;
+}
+
+#else
+#if defined(ARM_MATH_DSP)
+void arm_absmin_no_idx_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult)
+{
+        q31_t cur_absmin, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt;                     /* Loop counter */                                   \
+                                                                                                            \
+                                                                                           \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0) ? out : (q31_t)__QSUB(0, out);                                                                           \
+                                                                                              \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmin to next consecutive values one by one */                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin);                                                                \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin);                                                                \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin);                                                                \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin);                                                                 \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin);                                                                 \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+}
+#else
+void arm_absmin_no_idx_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult)
+{
+        q31_t minVal, out;                             /* Temporary variables to store the output value. */
+        uint32_t blkCnt;                     /* Loop counter */
+
+  /* Load first input value that act as reference value for comparision */
+  out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
+  pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
+    pSrc++;
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+}
+#endif /* defined(ARM_MATH_DSP) */
+#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMin group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q7.c
new file mode 100644
index 0000000..e0f712b
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q7.c
@@ -0,0 +1,227 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmin_no_idx_q7.c
+ * Description:  Minimum value of absolute values of a Q7 vector
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup AbsMin
+  @{
+ */
+
+/**
+  @brief         Minimum value of absolute values of a Q7 vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @return        none
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include <stdint.h>
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+
+
+void arm_absmin_no_idx_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult)
+{
+    int32_t  blkCnt;           /* loop counters */
+    q7x16_t        vecSrc;
+    q7_t   const *pSrcVec;
+    uint8x16_t     curExtremValVec = vdupq_n_s8(Q7_ABSMAX);
+    q7_t            minValue = Q7_ABSMAX;
+    mve_pred16_t    p0;
+
+
+    pSrcVec = (q7_t const *) pSrc;
+    blkCnt = blockSize >> 4;
+    while (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 16;
+        /*
+         * update per-lane min.
+         */
+        curExtremValVec = vminaq(curExtremValVec, vecSrc);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 0xF;
+    if (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 16;
+        p0 = vctp8q(blkCnt);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+         curExtremValVec = vminaq_m(curExtremValVec, vecSrc, p0);
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminavq(minValue, (q7x16_t)curExtremValVec);
+    *pResult = minValue;
+}
+
+#else
+#if defined(ARM_MATH_DSP)
+void arm_absmin_no_idx_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult)
+{
+        q7_t cur_absmin, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt;                     /* Loop counter */                                   \
+                                                                                                            \
+                                                                                           \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0) ? out : (q7_t)__QSUB8(0, out);                                                                           \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmin to next consecutive values one by one */                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                 \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                 \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+}
+#else
+void arm_absmin_no_idx_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult)
+{
+        q7_t minVal, out;                              /* Temporary variables to store the output value. */
+        uint32_t blkCnt;                     /* Loop counter */
+
+
+  /* Load first input value that act as reference value for comparision */
+  out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
+  pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
+    pSrc++;
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+}
+#endif /* defined(ARM_MATH_DSP) */
+#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMin group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q15.c
new file mode 100644
index 0000000..ef389ba
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q15.c
@@ -0,0 +1,273 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmin_q15.c
+ * Description:  Minimum value of absolute values of a Q15 vector
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup AbsMin
+  @{
+ */
+
+/**
+  @brief         Minimum value of absolute values of a Q15 vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @param[out]    pIndex     index of minimum value returned here
+  @return        none
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_absmin_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex)
+{
+      uint16_t        blkCnt;           /* loop counters */
+    q15x8_t       vecSrc;
+    q15_t const   *pSrcVec;
+    q15x8_t       curExtremValVec = vdupq_n_s16(Q15_ABSMAX);
+    q15_t           minValue = Q15_ABSMAX;
+    uint16_t        idx = blockSize;
+    uint16x8_t    indexVec;
+    uint16x8_t    curExtremIdxVec;
+    uint32_t        startIdx = 0;
+    mve_pred16_t    p0;
+
+
+    indexVec = vidupq_wb_u16(&startIdx, 1);
+    curExtremIdxVec = vdupq_n_u16(0);
+
+    pSrcVec = (q15_t const *) pSrc;
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 8;
+        vecSrc = vabsq(vecSrc);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+        p0 = vcmpleq(vecSrc, curExtremValVec);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+
+        indexVec = vidupq_wb_u16(&startIdx, 1);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 8;
+        vecSrc = vabsq(vecSrc);
+
+        p0 = vctp16q(blkCnt);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+        p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminvq(minValue, curExtremValVec);
+    /*
+     * set index for lower values to min possible index
+     */
+    p0 = vcmpleq(curExtremValVec, minValue);
+    indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
+    /*
+     * Get min index which is thus for a min value
+     */
+    idx = vminvq(idx, indexVec);
+    /*
+     * Save result
+     */
+    *pIndex = idx;
+    *pResult = minValue;
+}
+
+#else
+#if defined(ARM_MATH_DSP)
+void arm_absmin_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex)
+{
+        q15_t cur_absmin, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
+        uint32_t index;                                /* index of maximum value */                         \
+                                                                                                            \
+  /* Initialize index value to zero. */                                                                     \
+  outIndex = 0U;                                                                                            \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0) ? out : (q15_t)__QSUB16(0, out);                                                                           \
+  /* Initialize index of extrema value. */                                                                  \
+  index = 0U;                                                                                               \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmin to next consecutive values one by one */                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin);                                                                \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 1U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin);                                                                \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 2U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin);                                                                \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 3U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin);                                                                 \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 4U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    index += 4U;                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin);                                                                 \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = blockSize - blkCnt;                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+  *pIndex = outIndex;  
+}
+#else
+void arm_absmin_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex)
+{
+       q15_t minVal, out;                             /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+  /* Load first input value that act as reference value for comparision */
+  out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
+  pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
+    pSrc++;
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+#endif /* defined(ARM_MATH_DSP) */
+#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMin group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q31.c
new file mode 100644
index 0000000..0f28026
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q31.c
@@ -0,0 +1,273 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmin_q31.c
+ * Description:  Minimum value of absolute values of a Q31 vector
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup AbsMin
+  @{
+ */
+
+/**
+  @brief         Minimum value of absolute values of a Q31 vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @param[out]    pIndex     index of minimum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_absmin_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex)
+{
+      uint16_t        blkCnt;           /* loop counters */
+    q31x4_t       vecSrc;
+    q31_t const   *pSrcVec;
+    q31x4_t       curExtremValVec = vdupq_n_s32(Q31_ABSMAX);
+    q31_t           minValue = Q31_ABSMAX;
+    uint16_t        idx = blockSize;
+    uint32x4_t    indexVec;
+    uint32x4_t    curExtremIdxVec;
+    uint32_t        startIdx = 0;
+    mve_pred16_t    p0;
+
+
+    indexVec = vidupq_wb_u32(&startIdx, 1);
+    curExtremIdxVec = vdupq_n_u32(0);
+
+    pSrcVec = (q31_t const *) pSrc;
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0U)
+    {
+        vecSrc = vldrwq_s32(pSrcVec);  
+        pSrcVec += 4;
+        vecSrc = vabsq(vecSrc);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+        p0 = vcmpleq(vecSrc, curExtremValVec);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+
+        indexVec = vidupq_wb_u32(&startIdx, 1);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0U)
+    {
+        vecSrc = vldrwq_s32(pSrcVec);  
+        pSrcVec += 4;
+        vecSrc = vabsq(vecSrc);
+
+        p0 = vctp32q(blkCnt);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+        p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminvq(minValue, curExtremValVec);
+    /*
+     * set index for lower values to min possible index
+     */
+    p0 = vcmpleq(curExtremValVec, minValue);
+    indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
+    /*
+     * Get min index which is thus for a min value
+     */
+    idx = vminvq(idx, indexVec);
+    /*
+     * Save result
+     */
+    *pIndex = idx;
+    *pResult = minValue;
+}
+
+#else
+#if defined(ARM_MATH_DSP)
+void arm_absmin_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex)
+{
+        q31_t cur_absmin, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
+        uint32_t index;                                /* index of maximum value */                         \
+                                                                                                            \
+  /* Initialize index value to zero. */                                                                     \
+  outIndex = 0U;                                                                                            \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0) ? out : (q31_t)__QSUB(0, out);                                                                           \
+  /* Initialize index of extrema value. */                                                                  \
+  index = 0U;                                                                                               \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmin to next consecutive values one by one */                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin);                                                                \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 1U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin);                                                                \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 2U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin);                                                                \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 3U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin);                                                                 \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 4U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    index += 4U;                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin);                                                                 \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = blockSize - blkCnt;                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+  *pIndex = outIndex;  
+}
+#else
+void arm_absmin_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex)
+{
+        q31_t minVal, out;                             /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+  /* Load first input value that act as reference value for comparision */
+  out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
+  pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
+    pSrc++;
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+#endif /* defined(ARM_MATH_DSP) */
+#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMin group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q7.c
new file mode 100644
index 0000000..99bb473
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q7.c
@@ -0,0 +1,326 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmin_q7.c
+ * Description:  Minimum value of absolute values of a Q7 vector
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup AbsMin
+  @{
+ */
+
+/**
+  @brief         Minimum value of absolute values of a Q7 vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @param[out]    pIndex     index of minimum value returned here
+  @return        none
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include <stdint.h>
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+#define MAX_BLKSZ_S8  (UINT8_MAX+1)
+
+static void arm_small_blk_absmin_q7(
+    const q7_t        *pSrc,
+    uint32_t     blockSize,
+    q7_t        *pResult,
+    uint32_t    *pIndex)
+{
+    uint16_t        blkCnt;           /* loop counters */
+    q7x16_t       vecSrc;
+    q7_t const   *pSrcVec;
+    q7x16_t       curExtremValVec = vdupq_n_s8(Q7_ABSMAX);
+    q7_t           minValue = Q7_ABSMAX;
+    uint16_t       idx = blockSize - 1;
+    uint8x16_t    indexVec;
+    uint8x16_t    curExtremIdxVec;
+    uint32_t       startIdx = 0;
+    mve_pred16_t   p0;
+
+
+    indexVec = vidupq_wb_u8(&startIdx, 1);
+    curExtremIdxVec = vdupq_n_u8(0);
+
+    pSrcVec = (q7_t const *) pSrc;
+    blkCnt = blockSize >> 4;
+    while (blkCnt > 0U)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 16;
+        vecSrc = vabsq(vecSrc);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+        p0 = vcmpleq(vecSrc, curExtremValVec);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+
+        indexVec = vidupq_wb_u8(&startIdx, 1);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 0xF;
+    if (blkCnt > 0U)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 16;
+        vecSrc = vabsq(vecSrc);
+
+        p0 = vctp8q(blkCnt);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+        p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminvq(minValue, curExtremValVec);
+    /*
+     * set index for lower values to min possible index
+     */
+    p0 = vcmpleq(curExtremValVec, minValue);
+    idx = vminvq_p_u8(idx, curExtremIdxVec, p0);
+    /*
+     * Save result
+     */
+    *pIndex = idx;
+    *pResult = minValue;
+}
+
+
+void arm_absmin_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult,
+        uint32_t * pIndex)
+{
+     int32_t   totalSize = blockSize;
+
+    if (totalSize <= MAX_BLKSZ_S8)
+    {
+        arm_small_blk_absmin_q7(pSrc, blockSize, pResult, pIndex);
+    }
+    else
+    {
+        uint32_t  curIdx = 0;
+        q7_t      curBlkExtr = Q7_MAX;
+        uint32_t  curBlkPos = 0;
+        uint32_t  curBlkIdx = 0;
+        /*
+         * process blocks of 255 elts
+         */
+        while (totalSize >= MAX_BLKSZ_S8)
+        {
+            const q7_t     *curSrc = pSrc;
+
+            arm_small_blk_absmin_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex);
+            if (*pResult < curBlkExtr)
+            {
+                /*
+                 * update partial extrema
+                 */
+                curBlkExtr = *pResult;
+                curBlkPos = *pIndex;
+                curBlkIdx = curIdx;
+            }
+            curIdx++;
+            pSrc += MAX_BLKSZ_S8;
+            totalSize -= MAX_BLKSZ_S8;
+        }
+        /*
+         * remainder
+         */
+        arm_small_blk_absmin_q7(pSrc, totalSize, pResult, pIndex);
+        if (*pResult < curBlkExtr)
+        {
+            curBlkExtr = *pResult;
+            curBlkPos = *pIndex;
+            curBlkIdx = curIdx;
+        }
+        *pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos;
+        *pResult = curBlkExtr;
+    }
+}
+
+#else
+#if defined(ARM_MATH_DSP)
+void arm_absmin_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult,
+        uint32_t * pIndex)
+{
+        q7_t cur_absmin, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
+        uint32_t index;                                /* index of maximum value */                         \
+                                                                                                            \
+  /* Initialize index value to zero. */                                                                     \
+  outIndex = 0U;                                                                                            \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = (out > 0) ? out : (q7_t)__QSUB8(0, out);                                                                           \
+  /* Initialize index of extrema value. */                                                                  \
+  index = 0U;                                                                                               \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmin to next consecutive values one by one */                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                \
+    /* compare for the extrema value */                                                                     \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 1U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 2U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 3U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                 \
+    if (cur_absmin < out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = index + 4U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    index += 4U;                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmin = *pSrc++;                                                                                     \
+    cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin);                                                                 \
+    if (cur_absmin < out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmin;                                                                                       \
+      outIndex = blockSize - blkCnt;                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+  *pIndex = outIndex;  
+}
+#else
+void arm_absmin_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult,
+        uint32_t * pIndex)
+{
+        q7_t minVal, out;                              /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+  /* Load first input value that act as reference value for comparision */
+  out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
+  pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
+    pSrc++;
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+#endif /* defined(ARM_MATH_DSP) */
+#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMin group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_accumulate_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_accumulate_f16.c
new file mode 100644
index 0000000..71be5f1
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_accumulate_f16.c
@@ -0,0 +1,125 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_accumulate_f16.c
+ * Description:  accumulation value of a floating-point vector
+ *
+ * $Date:        14 July 2022
+ * $Revision:    V1.0.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup Accumulation Accumulation functions
+ 
+ Calculates the accumulation of the input vector. Sum is defined as the addition of the elements in the vector.
+ The underlying algorithm is used:
+ 
+ <pre>
+ Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]);
+ </pre>
+ 
+ There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ @addtogroup Accumulation
+ @{
+ */
+
+/**
+ @brief         accumulate value of a floating-point vector.
+ @param[in]     pSrc       points to the input vector.
+ @param[in]     blockSize  number of samples in input vector.
+ @param[out]    pResult    sum of values in input vector.
+ @return        none
+ */
+
+void arm_accumulate_f16(
+                        const float16_t * pSrc,
+                        uint32_t blockSize,
+                        float16_t * pResult)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  float16_t sum = 0.0f16;                          /* Temporary result storage */
+  
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+  
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += (_Float16)*pSrc++;
+    
+    sum += (_Float16)*pSrc++;
+    
+    sum += (_Float16)*pSrc++;
+    
+    sum += (_Float16)*pSrc++;
+    
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+  
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+  
+#else
+  
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+  
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += (_Float16)*pSrc++;
+    
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+  /* Store result to destination */
+  *pResult = sum ;
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+ @} end of Accumulation group
+ */
+
+
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_accumulate_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_accumulate_f32.c
new file mode 100644
index 0000000..353ab17
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_accumulate_f32.c
@@ -0,0 +1,213 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_accumulate_f32.c
+ * Description:  Sum value of a floating-point vector
+ *
+ * $Date:        14 July 2022
+ * $Revision:    V1.0.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+ @ingroup groupStats
+ */
+
+
+/**
+ @addtogroup Accumulation
+ @{
+ */
+
+/**
+ @brief         Accumulation value of a floating-point vector.
+ @param[in]     pSrc       points to the input vector.
+ @param[in]     blockSize  number of samples in input vector.
+ @param[out]    pResult    sum of values in input vector.
+ @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_accumulate_f32(
+                        const float32_t * pSrc,
+                        uint32_t blockSize,
+                        float32_t * pResult)
+{
+    f32x4_t vecA;
+    f32x4_t vecSum;
+    uint32_t blkCnt; 
+    float32_t sum = 0.0f;  
+    vecSum = vdupq_n_f32(0.0f);
+
+    /* Compute 4 outputs at a time */
+    blkCnt = blockSize >> 2U;
+    while (blkCnt > 0U)
+    {
+        /*
+         * C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1]
+         * Calculate dot product and then store the result in a temporary buffer.
+         * and advance vector source and destination pointers
+         */
+        vecA = vld1q_f32(pSrc);
+        pSrc += 4;
+        
+        vecSum = vaddq_f32(vecSum, vecA);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt --;
+    }
+
+
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0U)
+    {
+        /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+
+        mve_pred16_t p0 = vctp32q(blkCnt);
+        vecA = vld1q(pSrc);
+        vecSum = vaddq_m(vecSum,vecSum, vecA, p0);
+    }
+
+    sum = vecAddAcrossF32Mve(vecSum);
+
+    /* Store result in destination buffer */
+    *pResult = sum;
+}
+
+#else
+
+#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
+void arm_accumulate_f32(
+                        const float32_t * pSrc,
+                        uint32_t blockSize,
+                        float32_t * pResult)
+{
+  float32_t sum = 0.0f;                          /* Temporary result storage */
+  float32x4_t sumV = vdupq_n_f32(0.0f);                          /* Temporary result storage */
+  float32x2_t sumV2;
+  
+  uint32_t blkCnt;                               /* Loop counter */
+  
+  float32x4_t inV;
+  
+  blkCnt = blockSize >> 2U;
+  
+  /* Compute 4 outputs at a time.
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    inV = vld1q_f32(pSrc);
+    sumV = vaddq_f32(sumV, inV);
+    
+    pSrc += 4;
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+  
+  sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
+  sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
+  
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+   ** No loop unrolling is used. */
+  blkCnt = blockSize & 3;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1])  */
+  /* Store the result to the destination */
+  *pResult = sum;
+}
+
+#else
+void arm_accumulate_f32(
+                        const float32_t * pSrc,
+                        uint32_t blockSize,
+                        float32_t * pResult)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  float32_t sum = 0.0f;                          /* Temporary result storage */
+  
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+  
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    sum += *pSrc++;
+    
+    sum += *pSrc++;
+    
+    sum += *pSrc++;
+    
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+  
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+  
+#else
+  
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+  
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1])  */
+  /* Store result to destination */
+  *pResult = sum ;
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+
+#endif /* #if defined(ARM_MATH_MVEF) */
+/**
+ @} end of Accumulation group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_accumulate_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_accumulate_f64.c
new file mode 100644
index 0000000..25420ac
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_accumulate_f64.c
@@ -0,0 +1,131 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_accumulate_f64.c
+ * Description:  Accumulation value of a floating-point vector
+ *
+ * $Date:        14 July 2022
+ * $Revision:    V1.0.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+
+/**
+ @ingroup groupStats
+ */
+
+
+/**
+ @addtogroup Accumulation
+ @{
+ */
+
+/**
+ @brief         Accumulation value of a floating-point vector.
+ @param[in]     pSrc       points to the input vector.
+ @param[in]     blockSize  number of samples in input vector.
+ @param[out]    pResult    sum of values in input vector.
+ @return        none
+ */
+#if defined(ARM_MATH_NEON) && defined(__aarch64__)
+void arm_accumulate_f64(
+                        const float64_t * pSrc,
+                        uint32_t blockSize,
+                        float64_t * pResult)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  
+  /*Neon buffers*/
+  float64x2_t vSum = vdupq_n_f64(0.0);
+  float64x2_t afterLoad ;
+  
+  float64_t sum = 0.;                            /* Temporary result storage */
+  
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize >> 1U;
+  
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    
+    afterLoad = vld1q_f64(pSrc);
+    vSum = vaddq_f64(vSum, afterLoad);
+    
+    /* Decrement loop counter */
+    blkCnt--;
+    
+    pSrc += 2;
+  }
+  sum = vaddvq_f64(vSum);
+  
+  /* Tail */
+  blkCnt = blockSize & 1 ;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1])  */
+  /* Store result to destination */
+  *pResult = sum;
+}
+#else
+void arm_accumulate_f64(
+                        const float64_t * pSrc,
+                        uint32_t blockSize,
+                        float64_t *  pResult)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  float64_t sum = 0.;                            /* Temporary result storage */
+  
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1])  */
+  /* Store result to destination */
+  *pResult = sum;
+}
+
+#endif
+
+
+/**
+ @} end of Accumulation group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f16.c
index 9a5bf6a..4e223c7 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_logsumexp_f16.c
  * Description:  LogSumExp
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -97,7 +99,7 @@ float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize)
     while(blkCnt > 0)
     {
        p = *pSrcA++;
-       accum += p * logf(p);
+       accum += p * (_Float16)logf((float32_t)p);
        
        blkCnt--;
     
@@ -122,7 +124,7 @@ float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize)
     while(blkCnt > 0)
     {
        p = *pIn++;
-       accum += p * logf(p);
+       accum += p * (_Float16)logf((float32_t)p);
        
        blkCnt--;
     
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f32.c
index 163f8be..290e5c1 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_logsumexp_f32.c
  * Description:  LogSumExp
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f64.c
index 5be9be9..5cb2ef5 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f64.c
@@ -5,11 +5,13 @@
  * Title:        arm_logsumexp_f64.c
  * Description:  LogSumExp
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -53,7 +55,7 @@ float64_t arm_entropy_f64(const float64_t * pSrcA, uint32_t blockSize)
     pIn = pSrcA;
     blkCnt = blockSize;
 
-    accum = 0.0f;
+    accum = 0.0;
 
     while(blkCnt > 0)
     {
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f16.c
index 10e1528..6c291fe 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_logsumexp_f16.c
  * Description:  LogSumExp
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -81,7 +83,7 @@ float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSr
 
     accum = 0.0f16;
 
-    f16x8_t         vSum = vdupq_n_f16(0.0f);
+    f16x8_t         vSum = vdupq_n_f16(0.0f16);
     blkCnt = blockSize >> 3;
     while(blkCnt > 0)
     {
@@ -108,7 +110,7 @@ float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSr
     {
        pA = *pSrcA++;
        pB = *pSrcB++;
-       accum += pA * logf(pB / pA);
+       accum += pA * (_Float16)logf((float32_t)pB / (float32_t)pA);
        
        blkCnt--;
     
@@ -134,7 +136,7 @@ float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSr
     {
        pA = *pInA++;
        pB = *pInB++;
-       accum += pA * logf(pB / pA);
+       accum += pA * (_Float16)logf((float32_t)pB / (float32_t)pA);
        
        blkCnt--;
     
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f32.c
index 7193b4e..993e102 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_logsumexp_f32.c
  * Description:  LogSumExp
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f64.c
index 1eede11..8bde9c2 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f64.c
@@ -5,11 +5,13 @@
  * Title:        arm_logsumexp_f64.c
  * Description:  LogSumExp
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -55,7 +57,7 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA, const float64_t * pS
     pInB = pSrcB;
     blkCnt = blockSize;
 
-    accum = 0.0f;
+    accum = 0.0;
 
     while(blkCnt > 0)
     {
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f16.c
index 28cb1df..08fb197 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_logsumexp_f16.c
  * Description:  LogSumExp
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f32.c
index 95ae872..bb5d90f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_logsumexp_f32.c
  * Description:  LogSumExp
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f16.c
index 1b809f3..dc151f7 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_logsumexp_f16.c
  * Description:  LogSumExp
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -111,12 +113,12 @@ float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
     while(blkCnt > 0)
     {
        tmp = *pIn++;
-       accum += expf(tmp - maxVal);
+       accum += (_Float16)expf((float32_t)((_Float16)tmp - (_Float16)maxVal));
        blkCnt--;
     
     }
 
-    accum = maxVal + logf(accum);
+    accum = (_Float16)maxVal + (_Float16)logf((float32_t)accum);
 
     return (accum);
 }
@@ -154,11 +156,11 @@ float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
     while(blkCnt > 0)
     {
        tmp = *pIn++;
-       accum += expf(tmp - maxVal);
+       accum += (_Float16)expf((float32_t)((_Float16)tmp - (_Float16)maxVal));
        blkCnt--;
     
     }
-    accum = maxVal + logf(accum);
+    accum = (_Float16)maxVal + (_Float16)logf((float32_t)accum);
 
     return(accum);
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f32.c
index 6156a1a..8f0cc74 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_logsumexp_f32.c
  * Description:  LogSumExp
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -113,7 +115,7 @@ float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize)
     
     }
 
-    accum = maxVal + log(accum);
+    accum = maxVal + logf(accum);
 
     return (accum);
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f16.c
index c405ae2..3fb0512 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_max_f16.c
  * Description:  Maximum value of a floating-point vector
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -122,7 +122,7 @@ void arm_max_f16(
       tmp = *pSrc++;
 
       /* compare for the maximum value */
-      if (maxValue < tmp)
+      if ((_Float16)maxValue < (_Float16)tmp)
       {
         /* Update the maximum value and it's index */
         maxValue = tmp;
@@ -173,7 +173,7 @@ void arm_max_f16(
     maxVal = *pSrc++;
 
     /* compare for the maximum value */
-    if (out < maxVal)
+    if ((_Float16)out < (_Float16)maxVal)
     {
       /* Update the maximum value and it's index */
       out = maxVal;
@@ -181,21 +181,21 @@ void arm_max_f16(
     }
 
     maxVal = *pSrc++;
-    if (out < maxVal)
+    if ((_Float16)out < (_Float16)maxVal)
     {
       out = maxVal;
       outIndex = index + 2U;
     }
 
     maxVal = *pSrc++;
-    if (out < maxVal)
+    if ((_Float16)out < (_Float16)maxVal)
     {
       out = maxVal;
       outIndex = index + 3U;
     }
 
     maxVal = *pSrc++;
-    if (out < maxVal)
+    if ((_Float16)out < (_Float16)maxVal)
     {
       out = maxVal;
       outIndex = index + 4U;
@@ -223,7 +223,7 @@ void arm_max_f16(
     maxVal = *pSrc++;
 
     /* compare for the maximum value */
-    if (out < maxVal)
+    if ((_Float16)out < (_Float16)maxVal)
     {
       /* Update the maximum value and it's index */
       out = maxVal;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c
index 3ee95bb..4856c46 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_max_f32.c
  * Description:  Maximum value of a floating-point vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -167,7 +167,7 @@ void arm_max_f32(
   uint32x4_t countV;
   uint32x2_t countV2;
 
-  maxIdx = vdupq_n_u32(ULONG_MAX);
+  maxIdx = vdupq_n_u32(UINT_MAX);
   delta = vdupq_n_u32(4);
   index = vld1q_u32(indexInit);
   countV = vld1q_u32(countVInit);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f64.c
new file mode 100644
index 0000000..66cfd34
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f64.c
@@ -0,0 +1,94 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_max_f64.c
+ * Description:  Maximum value of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup Max
+  @{
+ */
+
+/**
+  @brief         Maximum value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @param[out]    pIndex     index of maximum value returned here
+  @return        none
+ */
+void arm_max_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex)
+{
+        float64_t maxVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = *pSrc++;
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**
+  @} end of Max group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f16.c
index 5a7b514..a7232da 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_max_no_idx_f16.c
  * Description:  Maximum value of a floating-point vector without returning the index
  *
- * $Date:        16. October 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -97,7 +97,7 @@ void arm_max_no_idx_f16(
         newVal = *pSrc++;
 
         /* compare for the maximum value */
-        if (maxValue < newVal)
+        if ((_Float16)maxValue < (_Float16)newVal)
         {
             /* Update the maximum value and it's index */
             maxValue = newVal;
@@ -124,7 +124,7 @@ void arm_max_no_idx_f16(
        newVal = *pSrc++;
    
        /* compare for the maximum value */
-       if (maxValue < newVal)
+       if ((_Float16)maxValue < (_Float16)newVal)
        {
            /* Update the maximum value and it's index */
            maxValue = newVal;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f32.c
index 3961416..c578e6b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_max_no_idx_f32.c
  * Description:  Maximum value of a floating-point vector without returning the index
  *
- * $Date:        16. October 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f64.c
new file mode 100644
index 0000000..dcb7afb
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f64.c
@@ -0,0 +1,79 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_max_no_idx_f64.c
+ * Description:  Maximum value of a floating-point vector without returning the index
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup Max
+  @{
+ */
+
+/**
+  @brief         Maximum value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @return        none
+ */
+void arm_max_no_idx_f64(
+    const float64_t *pSrc,
+    uint32_t   blockSize,
+    float64_t *pResult)
+{
+   float64_t   maxValue = F64_MIN;
+   float64_t   newVal;
+
+   while (blockSize > 0U)
+   {
+       newVal = *pSrc++;
+   
+       /* compare for the maximum value */
+       if (maxValue < newVal)
+       {
+           /* Update the maximum value and it's index */
+           maxValue = newVal;
+       }
+   
+       blockSize --;
+   }
+    
+   *pResult = maxValue;
+}
+
+/**
+  @} end of Max group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q15.c
new file mode 100644
index 0000000..063a5e3
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q15.c
@@ -0,0 +1,146 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_max_no_idx_q15.c
+ * Description:  Maximum value of a q15 vector without returning the index
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup Max
+  @{
+ */
+
+/**
+  @brief         Maximum value of a q15 vector without index.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_max_no_idx_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult)
+{
+    int32_t  blkCnt;           /* loop counters */
+    q15x8_t       vecSrc;
+    q15_t const *pSrcVec;
+    q15x8_t       curExtremValVec = vdupq_n_s16(Q15_MIN);
+    q15_t           maxValue = Q15_MIN;
+    mve_pred16_t    p0;
+
+
+    pSrcVec = (q15_t const *) pSrc;
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 8;
+        /*
+         * update per-lane max.
+         */
+        curExtremValVec = vmaxq(vecSrc, curExtremValVec);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 8;
+        p0 = vctp16q(blkCnt);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+         curExtremValVec = vmaxq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
+    }
+    /*
+     * Get max value across the vector
+     */
+    maxValue = vmaxvq(maxValue, curExtremValVec);
+    *pResult = maxValue;
+}
+
+#else
+void arm_max_no_idx_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult)
+{
+  q15_t maxVal1, out;       /* Temporary variables to store the output value. */     
+  uint32_t blkCnt;              /* loop counter */                                 
+                                                                                   
+  /* Load first input value that act as reference value for comparision */         
+  out = *pSrc++;                                                                   
+                                                                                   
+  blkCnt = (blockSize - 1U);                                                       
+                                                                                   
+                                                                                   
+  while (blkCnt > 0U)                                                              
+  {                                                                                
+    /* Initialize maxVal to the next consecutive values one by one */              
+    maxVal1 = *pSrc++;                                                             
+                                                                                   
+    /* compare for the maximum value */                                            
+    if (out < maxVal1)                                                             
+    {                                                                              
+      /* Update the maximum value */                                               
+      out = maxVal1;                                                               
+    }                                                                              
+                                                                                   
+    /* Decrement the loop counter */                                               
+    blkCnt--;                                                                      
+  }                                                                                
+                                                                                   
+  /* Store the maximum value into destination pointer */                           
+  *pResult = out;
+}
+
+#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of Max group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q31.c
new file mode 100644
index 0000000..cffdd13
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q31.c
@@ -0,0 +1,146 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_max_no_idx_q31.c
+ * Description:  Maximum value of a q31 vector without returning the index
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup Max
+  @{
+ */
+
+/**
+  @brief         Maximum value of a q31 vector without index.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_max_no_idx_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult)
+{
+    int32_t  blkCnt;           /* loop counters */
+    q31x4_t       vecSrc;
+    q31_t const *pSrcVec;
+    q31x4_t       curExtremValVec = vdupq_n_s32(Q31_MIN);
+    q31_t           maxValue = Q31_MIN;
+    mve_pred16_t    p0;
+
+
+    pSrcVec = (q31_t const *) pSrc;
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0)
+    {
+        vecSrc = vldrwq_s32(pSrcVec);  
+        pSrcVec += 4;
+        /*
+         * update per-lane max.
+         */
+        curExtremValVec = vmaxq(vecSrc, curExtremValVec);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0)
+    {
+        vecSrc = vldrwq_s32(pSrcVec);  
+        pSrcVec += 4;
+        p0 = vctp32q(blkCnt);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+         curExtremValVec = vmaxq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
+    }
+    /*
+     * Get max value across the vector
+     */
+    maxValue = vmaxvq(maxValue, curExtremValVec);
+    *pResult = maxValue;
+}
+
+#else
+void arm_max_no_idx_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult)
+{
+  q31_t maxVal1, out;       /* Temporary variables to store the output value. */    
+  uint32_t blkCnt;              /* loop counter */                                  
+                                                                                    
+  /* Load first input value that act as reference value for comparision */          
+  out = *pSrc++;                                                                    
+                                                                                    
+  blkCnt = (blockSize - 1U);                                                        
+                                                                                    
+                                                                                    
+  while (blkCnt > 0U)                                                               
+  {                                                                                 
+    /* Initialize maxVal to the next consecutive values one by one */               
+    maxVal1 = *pSrc++;                                                              
+                                                                                    
+    /* compare for the maximum value */                                             
+    if (out < maxVal1)                                                              
+    {                                                                               
+      /* Update the maximum value */                                                
+      out = maxVal1;                                                                
+    }                                                                               
+                                                                                    
+    /* Decrement the loop counter */                                                
+    blkCnt--;                                                                       
+  }                                                                                 
+                                                                                    
+  /* Store the maximum value into destination pointer */                            
+  *pResult = out;
+}
+
+#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of Max group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q7.c
new file mode 100644
index 0000000..059acf5
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q7.c
@@ -0,0 +1,147 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_max_no_idx_q7.c
+ * Description:  Maximum value of a q7 vector without returning the index
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup Max
+  @{
+ */
+
+/**
+  @brief         Maximum value of a q7 vector without index.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_max_no_idx_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult)
+{
+    int32_t  blkCnt;           /* loop counters */
+    q7x16_t       vecSrc;
+    q7_t const *pSrcVec;
+    q7x16_t       curExtremValVec = vdupq_n_s8(Q7_MIN);
+    q7_t           maxValue = Q7_MIN;
+    mve_pred16_t    p0;
+
+
+    pSrcVec = (q7_t const *) pSrc;
+    blkCnt = blockSize >> 4;
+    while (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 16;
+        /*
+         * update per-lane max.
+         */
+        curExtremValVec = vmaxq(vecSrc, curExtremValVec);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 0xF;
+    if (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 16;
+        p0 = vctp8q(blkCnt);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+         curExtremValVec = vmaxq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
+    }
+    /*
+     * Get max value across the vector
+     */
+    maxValue = vmaxvq(maxValue, curExtremValVec);
+    *pResult = maxValue;
+}
+
+#else
+
+void arm_max_no_idx_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult)
+{
+  q7_t maxVal1, out;       /* Temporary variables to store the output value. */     
+  uint32_t blkCnt;              /* loop counter */                                  
+                                                                                    
+  /* Load first input value that act as reference value for comparision */          
+  out = *pSrc++;                                                                    
+                                                                                    
+  blkCnt = (blockSize - 1U);                                                        
+                                                                                    
+                                                                                    
+  while (blkCnt > 0U)                                                               
+  {                                                                                 
+    /* Initialize maxVal to the next consecutive values one by one */               
+    maxVal1 = *pSrc++;                                                              
+                                                                                    
+    /* compare for the maximum value */                                             
+    if (out < maxVal1)                                                              
+    {                                                                               
+      /* Update the maximum value */                                                
+      out = maxVal1;                                                                
+    }                                                                               
+                                                                                    
+    /* Decrement the loop counter */                                                
+    blkCnt--;                                                                       
+  }                                                                                 
+                                                                                    
+  /* Store the maximum value into destination pointer */                            
+  *pResult = out;
+}
+
+#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of Max group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c
index 32663e3..9f30ece 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_max_q15.c
  * Description:  Maximum value of a Q15 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -57,78 +57,49 @@ void arm_max_q15(
         q15_t * pResult,
         uint32_t * pIndex)
 {
-    uint32_t blkCnt;           /* loop counters */
-    q15x8_t vecSrc;
-    q15x8_t curExtremValVec = vdupq_n_s16(Q15_MIN);
-    q15_t maxValue = Q15_MIN, temp;
-    uint32_t  idx = blockSize;
-    uint16x8_t indexVec;
-    uint16x8_t curExtremIdxVec;
-    mve_pred16_t p0;
-
-
-    indexVec = vidupq_u16((uint32_t)0, 1);
-    curExtremIdxVec = vdupq_n_u16(0);
-
-    blkCnt = blockSize >> 3;
-    while (blkCnt > 0U)
-    {
-        vecSrc = vldrhq_s16(pSrc);  
-        pSrc += 8;
+    int32_t         blkCnt;     /* loop counters */
+    q15x8_t         extremValVec = vdupq_n_s16(Q15_MIN);
+    q15_t           maxValue = Q15_MIN;
+    uint16x8_t      indexVec;
+    uint16x8_t      extremIdxVec;
+    mve_pred16_t    p0;
+    uint16_t        extremIdxArr[8];
+
+    indexVec = vidupq_u16(0U, 1);
+
+    blkCnt = blockSize;
+    do {
+        mve_pred16_t    p = vctp16q(blkCnt);
+        q15x8_t         extremIdxVal = vld1q_z_s16(pSrc, p);
         /*
          * Get current max per lane and current index per lane
          * when a max is selected
          */
-        p0 = vcmpgeq(vecSrc, curExtremValVec);
-        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
-        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+        p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
 
-        indexVec = indexVec +  8;
-        /*
-         * Decrement the blockSize loop counter
-         */
-        blkCnt--;
-    }
-   
-    /*
-     * Get max value across the vector
-     */
-    maxValue = vmaxvq(maxValue, curExtremValVec);
-    /*
-     * set index for lower values to max possible index
-     */
-    p0 = vcmpgeq(curExtremValVec, maxValue);
-    indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
-    /*
-     * Get min index which is thus for a max value
-     */
-    idx = vminvq(idx, indexVec);
-
-    /* Tail */
-    blkCnt = blockSize & 0x7;
-    while (blkCnt > 0U)
-    {
-      /* Initialize temp to the next consecutive values one by one */
-      temp = *pSrc++;
-  
-      /* compare for the maximum value */
-      if (maxValue < temp)
-      {
-        /* Update the maximum value and it's index */
-        maxValue = temp;
-        idx = blockSize - blkCnt;
-      }
-  
-      /* Decrement loop counter */
-      blkCnt--;
+        extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
+        /* store per-lane extrema indexes */
+        vst1q_p_u16(extremIdxArr, indexVec, p0);
+
+        indexVec += 8;
+        pSrc += 8;
+        blkCnt -= 8;
     }
+    while (blkCnt > 0);
+
 
-    /*
-     * Save result
-     */
-    *pIndex = idx;
+    /* Get max value across the vector   */
+    maxValue = vmaxvq(maxValue, extremValVec);
+
+    /* set index for lower values to max possible index   */
+    p0 = vcmpgeq(extremValVec, maxValue);
+    extremIdxVec = vld1q_u16(extremIdxArr);
+
+    indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0);
+    *pIndex = vminvq(blockSize - 1, indexVec);
     *pResult = maxValue;
 }
+
 #else
 void arm_max_q15(
   const q15_t * pSrc,
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c
index 2b3288c..d0665a4 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_max_q31.c
  * Description:  Maximum value of a Q31 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -52,86 +52,54 @@
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
 
 void arm_max_q31(
-  const q31_t * pSrc,
-        uint32_t blockSize,
-        q31_t * pResult,
-        uint32_t * pIndex)
+    const q31_t * pSrc,
+    uint32_t blockSize,
+    q31_t * pResult,
+    uint32_t * pIndex)
 {
-    uint32_t  blkCnt;           /* loop counters */
-    q31x4_t vecSrc;
-    q31x4_t curExtremValVec = vdupq_n_s32( Q31_MIN);
-    q31_t maxValue = Q31_MIN;
-    q31_t temp;
-    uint32_t  idx = blockSize;
-    uint32x4_t indexVec;
-    uint32x4_t curExtremIdxVec;
-    mve_pred16_t p0;
-
-
-    indexVec = vidupq_u32((uint32_t)0, 1);
-    curExtremIdxVec = vdupq_n_u32(0);
-
-    /* Compute 4 outputs at a time */
-    blkCnt = blockSize >> 2U;
-    while (blkCnt > 0U)
-    {
-        vecSrc = vldrwq_s32(pSrc);  
-        pSrc += 4;
+    int32_t         blkCnt;     /* loop counters */
+    q31x4_t         extremValVec = vdupq_n_s32(Q31_MIN);
+    q31_t           maxValue = Q31_MIN;
+    uint32x4_t      indexVec;
+    uint32x4_t      extremIdxVec;
+    mve_pred16_t    p0;
+    uint32_t        extremIdxArr[4];
+
+    indexVec = vidupq_u32(0U, 1);
+
+    blkCnt = blockSize;
+    do {
+        mve_pred16_t    p = vctp32q(blkCnt);
+        q31x4_t         extremIdxVal = vld1q_z_s32(pSrc, p);
         /*
          * Get current max per lane and current index per lane
          * when a max is selected
          */
-        p0 = vcmpgeq(vecSrc, curExtremValVec);
-        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
-        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+        p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
 
-        indexVec = indexVec +  4;
-        /*
-         * Decrement the blockSize loop counter
-         */
-        blkCnt--;
-    }
-   
-    /*
-     * Get max value across the vector
-     */
-    maxValue = vmaxvq(maxValue, curExtremValVec);
-    /*
-     * set index for lower values to max possible index
-     */
-    p0 = vcmpgeq(curExtremValVec, maxValue);
-    indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
-    /*
-     * Get min index which is thus for a max value
-     */
-    idx = vminvq(idx, indexVec);
-
-    /* Tail */
-    blkCnt = blockSize & 0x3;
-
-    while (blkCnt > 0U)
-    {
-       /* Initialize maxVal to the next consecutive values one by one */
-       temp = *pSrc++;
-   
-       /* compare for the maximum value */
-       if (maxValue < temp)
-       {
-         /* Update the maximum value and it's index */
-         maxValue = temp;
-         idx = blockSize - blkCnt;
-       }
-
-       /* Decrement loop counter */
-       blkCnt--;
+        extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
+        /* store per-lane extrema indexes */
+        vst1q_p_u32(extremIdxArr, indexVec, p0);
+
+        indexVec += 4;
+        pSrc += 4;
+        blkCnt -= 4;
     }
+    while (blkCnt > 0);
 
-    /*
-     * Save result
-     */
-    *pIndex = idx;
+
+    /* Get max value across the vector   */
+    maxValue = vmaxvq(maxValue, extremValVec);
+
+    /* set index for lower values to max possible index   */
+    p0 = vcmpgeq(extremValVec, maxValue);
+    extremIdxVec = vld1q_u32(extremIdxArr);
+
+    indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0);
+    *pIndex = vminvq(blockSize - 1, indexVec);
     *pResult = maxValue;
 }
+
 #else
 void arm_max_q31(
   const q31_t * pSrc,
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c
index 72fdf31..377db4a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_max_q7.c
  * Description:  Maximum value of a Q7 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -53,83 +53,50 @@
 
 static void arm_small_blk_max_q7(
     const q7_t * pSrc,
-    uint8_t blockSize,
+    uint16_t blockSize,
     q7_t * pResult,
     uint32_t * pIndex)
 {
-    uint32_t        blkCnt;           /* loop counters */
-    q7x16_t         vecSrc;
-    q7x16_t         curExtremValVec = vdupq_n_s8( Q7_MIN);
-    q7_t            maxValue = Q7_MIN, temp;
-    uint32_t        idx = blockSize;
-    uint8x16_t      indexVec;
-    uint8x16_t      curExtremIdxVec;
-    mve_pred16_t    p0;
-
-
-    indexVec = vidupq_u8((uint32_t)0, 1);
-    curExtremIdxVec = vdupq_n_u8(0);
-
-    blkCnt = blockSize >> 4;
-    while (blkCnt > 0U)
-    {
-        vecSrc = vldrbq_s8(pSrc);  
-        pSrc += 16;
+    int32_t        blkCnt;     /* loop counters */
+    q7x16_t        extremValVec = vdupq_n_s8(Q7_MIN);
+    q7_t           maxValue = Q7_MIN;
+    uint8x16_t     indexVec;
+    uint8x16_t     extremIdxVec;
+    mve_pred16_t   p0;
+    uint8_t        extremIdxArr[16];
+
+    indexVec = vidupq_u8(0U, 1);
+
+    blkCnt = blockSize;
+    do {
+        mve_pred16_t    p = vctp8q(blkCnt);
+        q7x16_t         extremIdxVal = vld1q_z_s8(pSrc, p);
         /*
          * Get current max per lane and current index per lane
          * when a max is selected
          */
-        p0 = vcmpgeq(vecSrc, curExtremValVec);
-        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
-        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+        p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
 
-        indexVec = indexVec +  16;
-        /*
-         * Decrement the blockSize loop counter
-         */
-        blkCnt--;
-    }
-   
-    
-    /*
-     * Get max value across the vector
-     */
-    maxValue = vmaxvq(maxValue, curExtremValVec);
-    /*
-     * set index for lower values to max possible index
-     */
-    p0 = vcmpgeq(curExtremValVec, maxValue);
-    indexVec = vpselq(curExtremIdxVec, vdupq_n_u8(blockSize), p0);
-    /*
-     * Get min index which is thus for a max value
-     */
-    idx = vminvq(idx, indexVec);
-
-    /*
-     * tail
-     */
-    blkCnt = blockSize & 0xF;
-
-    while (blkCnt > 0U)
-    {
-      /* Initialize temp to the next consecutive values one by one */
-      temp = *pSrc++;
-  
-      /* compare for the maximum value */
-      if (maxValue < temp)
-      {
-        /* Update the maximum value and it's index */
-        maxValue = temp;
-        idx = blockSize - blkCnt;
-      }
-  
-      /* Decrement loop counter */
-      blkCnt--;
+        extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
+        /* store per-lane extrema indexes */
+        vst1q_p_u8(extremIdxArr, indexVec, p0);
+
+        indexVec += 16;
+        pSrc += 16;
+        blkCnt -= 16;
     }
-    /*
-     * Save result
-     */
-    *pIndex = idx;
+    while (blkCnt > 0);
+
+
+    /* Get max value across the vector   */
+    maxValue = vmaxvq(maxValue, extremValVec);
+
+    /* set index for lower values to max possible index   */
+    p0 = vcmpgeq(extremValVec, maxValue);
+    extremIdxVec = vld1q_u8(extremIdxArr);
+
+    indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0);
+    *pIndex = vminvq_u8(blockSize - 1, indexVec);
     *pResult = maxValue;
 }
 
@@ -140,8 +107,9 @@ void arm_max_q7(
         uint32_t * pIndex)
 {
     int32_t   totalSize = blockSize;
+    const uint16_t sub_blk_sz = UINT8_MAX + 1;
 
-    if (totalSize <= UINT8_MAX)
+    if (totalSize <= sub_blk_sz)
     {
         arm_small_blk_max_q7(pSrc, blockSize, pResult, pIndex);
     }
@@ -154,11 +122,11 @@ void arm_max_q7(
         /*
          * process blocks of 255 elts
          */
-        while (totalSize >= UINT8_MAX)
+        while (totalSize >= sub_blk_sz)
         {
             const q7_t     *curSrc = pSrc;
 
-            arm_small_blk_max_q7(curSrc, UINT8_MAX, pResult, pIndex);
+            arm_small_blk_max_q7(curSrc, sub_blk_sz, pResult, pIndex);
             if (*pResult > curBlkExtr)
             {
                 /*
@@ -169,8 +137,8 @@ void arm_max_q7(
                 curBlkIdx = curIdx;
             }
             curIdx++;
-            pSrc += UINT8_MAX;
-            totalSize -= UINT8_MAX;
+            pSrc += sub_blk_sz;
+            totalSize -= sub_blk_sz;
         }
         /*
          * remainder
@@ -182,7 +150,7 @@ void arm_max_q7(
             curBlkPos = *pIndex;
             curBlkIdx = curIdx;
         }
-        *pIndex = curBlkIdx * UINT8_MAX + curBlkPos;
+        *pIndex = curBlkIdx * sub_blk_sz + curBlkPos;
         *pResult = curBlkExtr;
     }
 }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f16.c
index a2739ac..19ded7d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_mean_f16.c
  * Description:  Mean value of a floating-point vector
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -87,7 +87,7 @@ void arm_mean_f16(
     }
     while (blkCnt > 0);
 
-    *pResult = vecAddAcrossF16Mve(sumVec) / (float16_t) blockSize;
+    *pResult = (_Float16)vecAddAcrossF16Mve(sumVec) / (_Float16) blockSize;
 }
 
 
@@ -109,13 +109,13 @@ void arm_mean_f16(
   while (blkCnt > 0U)
   {
     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
-    sum += *pSrc++;
+    sum += (_Float16)*pSrc++;
 
-    sum += *pSrc++;
+    sum += (_Float16)*pSrc++;
 
-    sum += *pSrc++;
+    sum += (_Float16)*pSrc++;
 
-    sum += *pSrc++;
+    sum += (_Float16)*pSrc++;
 
     /* Decrement the loop counter */
     blkCnt--;
@@ -134,7 +134,7 @@ void arm_mean_f16(
   while (blkCnt > 0U)
   {
     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
-    sum += *pSrc++;
+    sum += (_Float16)*pSrc++;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -142,7 +142,7 @@ void arm_mean_f16(
 
   /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
   /* Store result to destination */
-  *pResult = (sum / (float16_t)blockSize);
+  *pResult = ((_Float16)sum / (_Float16)blockSize);
 }
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c
index 79bf476..99c6dbe 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_mean_f32.c
  * Description:  Mean value of a floating-point vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f64.c
new file mode 100644
index 0000000..cb91116
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f64.c
@@ -0,0 +1,79 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mean_f64.c
+ * Description:  Mean value of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup mean
+  @{
+ */
+
+/**
+  @brief         Mean value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector.
+  @param[in]     blockSize  number of samples in input vector.
+  @param[out]    pResult    mean value returned here.
+  @return        none
+ */
+void arm_mean_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        float64_t sum = 0.;                            /* Temporary result storage */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+  /* Store result to destination */
+  *pResult = (sum / blockSize);
+}
+
+/**
+  @} end of mean group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c
index de20f9a..0eefbdb 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_mean_q15.c
  * Description:  Mean value of a Q15 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -116,11 +116,11 @@ void arm_mean_q15(
   while (blkCnt > 0U)
   {
     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
-    in = read_q15x2_ia ((q15_t **) &pSrc);
+    in = read_q15x2_ia (&pSrc);
     sum += ((in << 16U) >> 16U);
     sum +=  (in >> 16U);
 
-    in = read_q15x2_ia ((q15_t **) &pSrc);
+    in = read_q15x2_ia (&pSrc);
     sum += ((in << 16U) >> 16U);
     sum +=  (in >> 16U);
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c
index 03e2327..1b95ce5 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_mean_q31.c
  * Description:  Mean value of a Q31 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -90,7 +90,7 @@ void arm_mean_q31(
       blkCnt --;
     }
 
-    *pResult = arm_div_q63_to_q31(sum, blockSize);
+    *pResult = arm_div_int64_to_int32(sum, blockSize);
 }
 #else
 void arm_mean_q31(
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c
index 44ca51d..5ac4517 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_mean_q7.c
  * Description:  Mean value of a Q7 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -115,7 +115,7 @@ void arm_mean_q7(
   while (blkCnt > 0U)
   {
     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
-    in = read_q7x4_ia ((q7_t **) &pSrc);
+    in = read_q7x4_ia (&pSrc);
     sum += ((in << 24U) >> 24U);
     sum += ((in << 16U) >> 24U);
     sum += ((in <<  8U) >> 24U);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f16.c
index 46ddb94..4e08799 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_min_f16.c
  * Description:  Minimum value of a floating-point vector
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -167,7 +167,7 @@ void arm_min_f16(
     minVal = *pSrc++;
 
     /* compare for the minimum value */
-    if (out > minVal)
+    if ((_Float16)out > (_Float16)minVal)
     {
       /* Update the minimum value and it's index */
       out = minVal;
@@ -175,21 +175,21 @@ void arm_min_f16(
     }
 
     minVal = *pSrc++;
-    if (out > minVal)
+    if ((_Float16)out > (_Float16)minVal)
     {
       out = minVal;
       outIndex = index + 2U;
     }
 
     minVal = *pSrc++;
-    if (out > minVal)
+    if ((_Float16)out > (_Float16)minVal)
     {
       out = minVal;
       outIndex = index + 3U;
     }
 
     minVal = *pSrc++;
-    if (out > minVal)
+    if ((_Float16)out > (_Float16)minVal)
     {
       out = minVal;
       outIndex = index + 4U;
@@ -217,7 +217,7 @@ void arm_min_f16(
     minVal = *pSrc++;
 
     /* compare for the minimum value */
-    if (out > minVal)
+    if ((_Float16)out > (_Float16)minVal)
     {
       /* Update the minimum value and it's index */
       out = minVal;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c
index 6c49822..b581473 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_min_f32.c
  * Description:  Minimum value of a floating-point vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -168,7 +168,7 @@ void arm_min_f32(
   uint32x4_t countV;
   uint32x2_t countV2;
 
-  maxIdx = vdupq_n_u32(ULONG_MAX);
+  maxIdx = vdupq_n_u32(UINT_MAX);
   delta = vdupq_n_u32(4);
   index = vld1q_u32(indexInit);
   countV = vld1q_u32(countVInit);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f64.c
new file mode 100644
index 0000000..525470f
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f64.c
@@ -0,0 +1,94 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_min_f64.c
+ * Description:  Minimum value of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup Min
+  @{
+ */
+
+/**
+  @brief         Minimum value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @param[out]    pIndex     index of minimum value returned here
+  @return        none
+ */
+void arm_min_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex)
+{
+        float64_t minVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = *pSrc++;
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**
+  @} end of Min group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f16.c
new file mode 100644
index 0000000..a2a64db
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f16.c
@@ -0,0 +1,148 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_min_no_idx_f16.c
+ * Description:  Minimum value of a floating-point vector without returning the index
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include <limits.h>
+#endif
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup Min
+  @{
+ */
+
+/**
+  @brief         Minimum value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+void arm_min_no_idx_f16(
+    const float16_t *pSrc,
+    uint32_t   blockSize,
+    float16_t *pResult)
+{
+   f16x8_t     vecSrc;
+   f16x8_t     curExtremValVec = vdupq_n_f16(F16_MAX);
+   float16_t   minValue = F16_MAX;
+   float16_t   newVal;
+   uint32_t    blkCnt;
+
+   /* Loop unrolling: Compute 4 outputs at a time */
+   blkCnt = blockSize >> 3U;
+
+   while (blkCnt > 0U)
+   {
+
+        vecSrc = vldrhq_f16(pSrc);
+        /*
+         * update per-lane min.
+         */
+        curExtremValVec = vminnmq(vecSrc, curExtremValVec);
+        /*
+         * Decrement the blockSize loop counter
+         * Advance vector source and destination pointers
+         */
+        pSrc += 8;
+        blkCnt --;
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminnmvq(minValue, curExtremValVec);
+
+    blkCnt = blockSize & 7;
+
+    while (blkCnt > 0U)
+    {
+        newVal = *pSrc++;
+
+        /* compare for the minimum value */
+        if ((_Float16)minValue > (_Float16)newVal)
+        {
+            /* Update the minimum value and it's index */
+            minValue = newVal;
+        }
+
+        blkCnt --;
+    }
+
+    *pResult = minValue;
+}
+
+#else
+
+void arm_min_no_idx_f16(
+    const float16_t *pSrc,
+    uint32_t   blockSize,
+    float16_t *pResult)
+{
+   float16_t   minValue = F16_MAX;
+   float16_t   newVal;
+
+   while (blockSize > 0U)
+   {
+       newVal = *pSrc++;
+   
+       /* compare for the minimum value */
+       if ((_Float16)minValue > (_Float16)newVal)
+       {
+           /* Update the minimum value and it's index */
+           minValue = newVal;
+       }
+   
+       blockSize --;
+   }
+    
+   *pResult = minValue;
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of Min group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f32.c
new file mode 100644
index 0000000..eafae73
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f32.c
@@ -0,0 +1,142 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_min_no_idx_f32.c
+ * Description:  Minimum value of a floating-point vector without returning the index
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include <limits.h>
+#endif
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup Min
+  @{
+ */
+
+/**
+  @brief         Minimum value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+void arm_min_no_idx_f32(
+    const float32_t *pSrc,
+    uint32_t   blockSize,
+    float32_t *pResult)
+{
+   f32x4_t     vecSrc;
+   f32x4_t     curExtremValVec = vdupq_n_f32(F32_MAX);
+   float32_t   minValue = F32_MAX;
+   float32_t   newVal;
+   uint32_t    blkCnt;
+
+   /* Loop unrolling: Compute 4 outputs at a time */
+   blkCnt = blockSize >> 2U;
+
+   while (blkCnt > 0U)
+   {
+
+        vecSrc = vldrwq_f32(pSrc);
+        /*
+         * update per-lane min.
+         */
+        curExtremValVec = vminnmq(vecSrc, curExtremValVec);
+        /*
+         * Decrement the blockSize loop counter
+         * Advance vector source and destination pointers
+         */
+        pSrc += 4;
+        blkCnt --;
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminnmvq(minValue, curExtremValVec);
+
+    blkCnt = blockSize & 3;
+
+    while (blkCnt > 0U)
+    {
+        newVal = *pSrc++;
+
+        /* compare for the minimum value */
+        if (minValue > newVal)
+        {
+            /* Update the minimum value and it's index */
+            minValue = newVal;
+        }
+
+        blkCnt --;
+    }
+
+    *pResult = minValue;
+}
+
+#else
+
+void arm_min_no_idx_f32(
+    const float32_t *pSrc,
+    uint32_t   blockSize,
+    float32_t *pResult)
+{
+   float32_t   minValue = F32_MAX;
+   float32_t   newVal;
+
+   while (blockSize > 0U)
+   {
+       newVal = *pSrc++;
+   
+       /* compare for the minimum value */
+       if (minValue > newVal)
+       {
+           /* Update the minimum value and it's index */
+           minValue = newVal;
+       }
+   
+       blockSize --;
+   }
+    
+   *pResult = minValue;
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of Min group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f64.c
new file mode 100644
index 0000000..5e3317e
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f64.c
@@ -0,0 +1,79 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_min_no_idx_f64.c
+ * Description:  Maximum value of a floating-point vector without returning the index
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup Min
+  @{
+ */
+
+/**
+  @brief         Maximum value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @return        none
+ */
+void arm_min_no_idx_f64(
+    const float64_t *pSrc,
+    uint32_t   blockSize,
+    float64_t *pResult)
+{
+   float64_t   minValue = F64_MAX;
+   float64_t   newVal;
+
+   while (blockSize > 0U)
+   {
+       newVal = *pSrc++;
+   
+       /* compare for the minimum value */
+       if (minValue > newVal)
+       {
+           /* Update the minimum value and it's index */
+           minValue = newVal;
+       }
+   
+       blockSize --;
+   }
+    
+   *pResult = minValue;
+}
+
+/**
+  @} end of Min group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q15.c
new file mode 100644
index 0000000..f588e70
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q15.c
@@ -0,0 +1,146 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_min_no_idx_q15.c
+ * Description:  Minimum value of a q15 vector without returning the index
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup Min
+  @{
+ */
+
+/**
+  @brief         Minimum value of a q15 vector without index.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_min_no_idx_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult)
+{
+    int32_t  blkCnt;           /* loop counters */
+    q15x8_t vecSrc;
+    q15_t const *pSrcVec;
+    q15x8_t curExtremValVec = vdupq_n_s16(Q15_MAX);
+    q15_t minValue = Q15_MAX;
+    mve_pred16_t p0;
+
+
+    pSrcVec = (q15_t const *) pSrc;
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 8;
+        /*
+         * update per-lane min.
+         */
+        curExtremValVec = vminq(vecSrc, curExtremValVec);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 8;
+        p0 = vctp16q(blkCnt);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+         curExtremValVec = vminq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminvq(minValue, curExtremValVec);
+    *pResult = minValue;
+}
+
+#else
+void arm_min_no_idx_q15(
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult)
+{
+  q15_t minVal1, out;       /* Temporary variables to store the output value. */    
+  uint32_t blkCnt;              /* loop counter */                                  
+                                                                                    
+  /* Load first input value that act as reference value for comparision */          
+  out = *pSrc++;                                                                    
+                                                                                    
+  blkCnt = (blockSize - 1U);                                                        
+                                                                                    
+                                                                                    
+  while (blkCnt > 0U)                                                               
+  {                                                                                 
+    /* Initialize minVal to the next consecutive values one by one */               
+    minVal1 = *pSrc++;                                                              
+                                                                                    
+    /* compare for the minimum value */                                             
+    if (out > minVal1)                                                              
+    {                                                                               
+      /* Update the minimum value */                                                
+      out = minVal1;                                                                
+    }                                                                               
+                                                                                    
+    /* Decrement the loop counter */                                                
+    blkCnt--;                                                                       
+  }                                                                                 
+                                                                                    
+  /* Store the minimum value into destination pointer */                            
+  *pResult = out;
+}
+
+#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of Min group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q31.c
new file mode 100644
index 0000000..b00a5ba
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q31.c
@@ -0,0 +1,145 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_min_no_idx_q31.c
+ * Description:  Minimum value of a q31 vector without returning the index
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup Min
+  @{
+ */
+
+/**
+  @brief         Minimum value of a q31 vector without index.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+void arm_min_no_idx_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult)
+{
+    int32_t  blkCnt;           /* loop counters */
+    q31x4_t vecSrc;
+    q31_t const *pSrcVec;
+    q31x4_t curExtremValVec = vdupq_n_s32(Q31_MAX);
+    q31_t minValue = Q31_MAX;
+    mve_pred16_t p0;
+
+
+    pSrcVec = (q31_t const *) pSrc;
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0)
+    {
+        vecSrc = vldrwq_s32(pSrcVec);  
+        pSrcVec += 4;
+        /*
+         * update per-lane min.
+         */
+        curExtremValVec = vminq(vecSrc, curExtremValVec);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0)
+    {
+        vecSrc = vldrwq_s32(pSrcVec);  
+        pSrcVec += 4;
+        p0 = vctp32q(blkCnt);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+         curExtremValVec = vminq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminvq(minValue, curExtremValVec);
+    *pResult = minValue;
+}
+
+#else
+void arm_min_no_idx_q31(
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult)
+{
+  q31_t minVal1, out;       /* Temporary variables to store the output value. */    
+  uint32_t blkCnt;              /* loop counter */                                  
+                                                                                    
+  /* Load first input value that act as reference value for comparision */          
+  out = *pSrc++;                                                                    
+                                                                                    
+  blkCnt = (blockSize - 1U);                                                        
+                                                                                    
+                                                                                    
+  while (blkCnt > 0U)                                                               
+  {                                                                                 
+    /* Initialize minVal to the next consecutive values one by one */               
+    minVal1 = *pSrc++;                                                              
+                                                                                    
+    /* compare for the minimum value */                                             
+    if (out > minVal1)                                                              
+    {                                                                               
+      /* Update the minimum value */                                                
+      out = minVal1;                                                                
+    }                                                                               
+                                                                                    
+    /* Decrement the loop counter */                                                
+    blkCnt--;                                                                       
+  }                                                                                 
+                                                                                    
+  /* Store the minimum value into destination pointer */                            
+  *pResult = out;
+}
+
+#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of Min group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q7.c
new file mode 100644
index 0000000..e0a8396
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q7.c
@@ -0,0 +1,145 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_min_no_idx_q7.c
+ * Description:  Minimum value of a q7 vector without returning the index
+ *
+ * $Date:        16 November 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup Min
+  @{
+ */
+
+/**
+  @brief         Minimum value of a q7 vector without index.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @return        none
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_min_no_idx_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult)
+{
+    int32_t  blkCnt;           /* loop counters */
+    q7x16_t vecSrc;
+    q7_t const *pSrcVec;
+    q7x16_t curExtremValVec = vdupq_n_s8(Q7_MAX);
+    q7_t minValue = Q7_MAX;
+    mve_pred16_t p0;
+
+
+    pSrcVec = (q7_t const *) pSrc;
+    blkCnt = blockSize >> 4;
+    while (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 16;
+        /*
+         * update per-lane min.
+         */
+        curExtremValVec = vminq(vecSrc, curExtremValVec);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 0xF;
+    if (blkCnt > 0)
+    {
+        vecSrc = vld1q(pSrcVec); 
+        pSrcVec += 16;
+        p0 = vctp8q(blkCnt);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+         curExtremValVec = vminq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminvq(minValue, curExtremValVec);
+    *pResult = minValue;
+}
+
+#else
+void arm_min_no_idx_q7(
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult)
+{
+  q7_t minVal1, out;       /* Temporary variables to store the output value. */    
+  uint32_t blkCnt;              /* loop counter */                                  
+                                                                                    
+  /* Load first input value that act as reference value for comparision */          
+  out = *pSrc++;                                                                    
+                                                                                    
+  blkCnt = (blockSize - 1U);                                                        
+                                                                                    
+                                                                                    
+  while (blkCnt > 0U)                                                               
+  {                                                                                 
+    /* Initialize minVal to the next consecutive values one by one */               
+    minVal1 = *pSrc++;                                                              
+                                                                                    
+    /* compare for the minimum value */                                             
+    if (out > minVal1)                                                              
+    {                                                                               
+      /* Update the minimum value */                                                
+      out = minVal1;                                                                
+    }                                                                               
+                                                                                    
+    /* Decrement the loop counter */                                                
+    blkCnt--;                                                                       
+  }                                                                                 
+                                                                                    
+  /* Store the minimum value into destination pointer */                            
+  *pResult = out;
+}
+
+#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of Min group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c
index 3f4a59f..3a4d99e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_min_q15.c
  * Description:  Minimum value of a Q15 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -58,79 +58,48 @@ void arm_min_q15(
         q15_t * pResult,
         uint32_t * pIndex)
 {
-    uint32_t  blkCnt;           /* loop counters */
-    q15x8_t vecSrc;
-    q15x8_t curExtremValVec = vdupq_n_s16(Q15_MAX);
-    q15_t minValue = Q15_MAX,temp;
-    uint32_t  idx = blockSize;
-    uint16x8_t indexVec;
-    uint16x8_t curExtremIdxVec;
-    mve_pred16_t p0;
 
+    int32_t         blkCnt;     /* loop counters */
+    q15x8_t         extremValVec = vdupq_n_s16(Q15_MAX);
+    q15_t           minValue = Q15_MAX;
+    uint16x8_t      indexVec;
+    uint16x8_t      extremIdxVec;
+    mve_pred16_t    p0;
+    uint16_t        extremIdxArr[8];
 
-    indexVec = vidupq_u16((uint32_t)0, 1);
-    curExtremIdxVec = vdupq_n_u16(0);
+    indexVec = vidupq_u16(0U, 1);
 
-    blkCnt = blockSize >> 3;
-    while (blkCnt > 0U)
-    {
-        vecSrc = vldrhq_s16(pSrc);  
-        pSrc += 8;
+    blkCnt = blockSize;
+    do {
+        mve_pred16_t    p = vctp16q(blkCnt);
+        q15x8_t         extremIdxVal = vld1q_z_s16(pSrc, p);
         /*
          * Get current min per lane and current index per lane
          * when a min is selected
          */
-        p0 = vcmpleq(vecSrc, curExtremValVec);
-        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
-        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+        p0 = vcmpleq_m(extremIdxVal, extremValVec, p);
 
-        indexVec = indexVec +  8;
-        /*
-         * Decrement the blockSize loop counter
-         */
-        blkCnt--;
-    }
-   
-    /*
-     * Get min value across the vector
-     */
-    minValue = vminvq(minValue, curExtremValVec);
-    /*
-     * set index for lower values to min possible index
-     */
-    p0 = vcmpleq(curExtremValVec, minValue);
-    indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
-    /*
-     * Get min index which is thus for a min value
-     */
-    idx = vminvq(idx, indexVec);
-
-    /*
-     * tail
-    */
-    blkCnt = blockSize & 7;
-    while (blkCnt > 0U)
-    {
-      /* Initialize minVal to the next consecutive values one by one */
-      temp = *pSrc++;
-  
-      /* compare for the minimum value */
-      if (minValue > temp)
-      {
-        /* Update the minimum value and it's index */
-        minValue = temp;
-        idx = blockSize - blkCnt;
-      }
-  
-      /* Decrement loop counter */
-      blkCnt--;
+        extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
+        /* store per-lane extrema indexes */
+        vst1q_p_u16(extremIdxArr, indexVec, p0);
+
+        indexVec += 8;
+        pSrc += 8;
+        blkCnt -= 8;
     }
+    while (blkCnt > 0);
+
+    /* Get min value across the vector   */
+    minValue = vminvq(minValue, extremValVec);
+
+    /* set index for lower values to min possible index   */
+    p0 = vcmpleq(extremValVec, minValue);
+    extremIdxVec = vld1q_u16(extremIdxArr);
 
-    /*
-     * Save result
-     */
-    *pIndex = idx;
+    indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0);
+    *pIndex = vminvq(blockSize - 1, indexVec);
     *pResult = minValue;
+ 
 }
 #else
 void arm_min_q15(
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c
index df96c95..7c889e5 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_min_q31.c
  * Description:  Minimum value of a Q31 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -58,79 +58,49 @@ void arm_min_q31(
         q31_t * pResult,
         uint32_t * pIndex)
 {
-    uint32_t  blkCnt;           /* loop counters */
-    q31x4_t vecSrc;
-    q31x4_t curExtremValVec = vdupq_n_s32(Q31_MAX);
-    q31_t minValue = Q31_MAX, temp;
-    uint32_t  idx = blockSize;
-    uint32x4_t indexVec;
-    uint32x4_t curExtremIdxVec;
-    mve_pred16_t p0;
-
-
-    indexVec = vidupq_u32((uint32_t)0, 1);
-    curExtremIdxVec = vdupq_n_u32(0);
-
-    /* Compute 4 outputs at a time */
-    blkCnt = blockSize >> 2U;
-    while (blkCnt > 0U)
-    {
-        vecSrc = vldrwq_s32(pSrc);  
-        pSrc += 4;
+    int32_t         blkCnt;     /* loop counters */
+    q31x4_t         extremValVec = vdupq_n_s32(Q31_MAX);
+    q31_t           minValue = Q31_MAX;
+    uint32x4_t      indexVec;
+    uint32x4_t      extremIdxVec;
+    mve_pred16_t    p0;
+    uint32_t        extremIdxArr[4];
+
+    indexVec = vidupq_u32(0U, 1);
+
+    blkCnt = blockSize;
+    do {
+        mve_pred16_t    p = vctp32q(blkCnt);
+        q31x4_t         extremIdxVal = vld1q_z_s32(pSrc, p);
         /*
          * Get current min per lane and current index per lane
          * when a min is selected
          */
-        p0 = vcmpleq(vecSrc, curExtremValVec);
-        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
-        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+        p0 = vcmpleq_m(extremIdxVal, extremValVec, p);
 
-        indexVec = indexVec +  4;
-        /*
-         * Decrement the blockSize loop counter
-         */
-        blkCnt--;
-    }
-    
-    /*
-     * Get min value across the vector
-     */
-    minValue = vminvq(minValue, curExtremValVec);
-    /*
-     * set index for lower values to min possible index
-     */
-    p0 = vcmpleq(curExtremValVec, minValue);
-    indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
-    /*
-     * Get min index which is thus for a min value
-     */
-    idx = vminvq(idx, indexVec);
-
-
-    /* Tail */
-    blkCnt = blockSize & 0x3;
-    while (blkCnt > 0U)
-    {
-      /* Initialize temp to the next consecutive values one by one */
-      temp = *pSrc++;
-  
-      /* compare for the minimum value */
-      if (minValue > temp)
-      {
-        /* Update the minimum value and it's index */
-        minValue = temp;
-        idx = blockSize - blkCnt;
-      }
-  
-      /* Decrement loop counter */
-      blkCnt--;
+        extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
+        /* store per-lane extrema indexes */
+        vst1q_p_u32(extremIdxArr, indexVec, p0);
+
+        indexVec += 4;
+        pSrc += 4;
+        blkCnt -= 4;
     }
-    /*
-     * Save result
-     */
-    *pIndex = idx;
+    while (blkCnt > 0);
+
+
+    /* Get min value across the vector   */
+    minValue = vminvq(minValue, extremValVec);
+
+    /* set index for lower values to min possible index   */
+    p0 = vcmpleq(extremValVec, minValue);
+    extremIdxVec = vld1q_u32(extremIdxArr);
+
+    indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0);
+    *pIndex = vminvq(blockSize - 1, indexVec);
     *pResult = minValue;
 }
+
 #else
 void arm_min_q31(
   const q31_t * pSrc,
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c
index 25e607f..6d8451b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_min_q7.c
  * Description:  Minimum value of a Q7 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f16.c
new file mode 100644
index 0000000..20c8083
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f16.c
@@ -0,0 +1,207 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mse_f16.c
+ * Description:  Half floating point mean square error
+ *
+ * $Date:        05 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions_f16.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup MSE
+  @{
+ */
+
+/**
+  @brief         Mean square error between two half floating point vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize   number of samples in input vector
+  @param[out]    result      mean square error
+  @return        none
+ */
+
+#if !defined(ARM_MATH_AUTOVECTORIZE)
+
+#if defined(ARM_MATH_MVE_FLOAT16)
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_mse_f16(
+    const float16_t * pSrcA,
+    const float16_t * pSrcB,
+    uint32_t    blockSize,
+    float16_t * result)
+
+{
+    float16x8_t vecA, vecB;
+    float16x8_t vecSum;
+    uint32_t blkCnt; 
+    _Float16 sum = 0.0f16;  
+    vecSum = vdupq_n_f16(0.0f16);
+
+    blkCnt = (blockSize) >> 3;
+    while (blkCnt > 0U)
+    {
+        vecA = vld1q(pSrcA);
+        pSrcA += 8;
+        
+        vecB = vld1q(pSrcB);
+        pSrcB += 8;
+
+        vecA = vsubq(vecA, vecB);
+
+        vecSum = vfmaq(vecSum, vecA, vecA);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt --;
+    }
+
+
+    blkCnt = (blockSize) & 7;
+    if (blkCnt > 0U)
+    {
+        mve_pred16_t p0 = vctp16q(blkCnt);
+        vecA = vld1q(pSrcA);
+        vecB = vld1q(pSrcB);
+
+        vecA = vsubq(vecA, vecB);
+        vecSum = vfmaq_m(vecSum, vecA, vecA, p0);
+    }
+
+    sum = vecAddAcrossF16Mve(vecSum);
+
+    /* Store result in destination buffer */
+    *result = (_Float16)sum / (_Float16)blockSize;
+
+}
+
+#endif
+
+
+#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/
+
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#if (!defined(ARM_MATH_MVE_FLOAT16)) || defined(ARM_MATH_AUTOVECTORIZE)
+
+
+
+void arm_mse_f16(
+    const float16_t * pSrcA,
+    const float16_t * pSrcB,
+    uint32_t    blockSize,
+    float16_t * result)
+
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  _Float16 inA, inB;
+  _Float16 sum = 0.0f16;                          /* Temporary return variable */
+#if defined (ARM_MATH_LOOPUNROLL)
+  blkCnt = (blockSize) >> 3;
+
+ 
+  while (blkCnt > 0U)
+  {
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = (blockSize) & 7;
+#else
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+#endif
+  while (blkCnt > 0U)
+  {
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in destination buffer */
+  *result = (_Float16)sum / (_Float16)blockSize;
+}
+
+#endif /* end of test for vector instruction availability */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+/**
+  @} end of MSE group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f32.c
new file mode 100644
index 0000000..622abb5
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f32.c
@@ -0,0 +1,251 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mse_f32.c
+ * Description:  Floating point mean square error
+ *
+ * $Date:        05 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup MSE
+  @{
+ */
+
+/**
+  @brief         Mean square error between two floating point vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize   number of samples in input vector
+  @param[out]    result      mean square error
+  @return        none
+ */
+
+#if !defined(ARM_MATH_AUTOVECTORIZE)
+
+#if defined(ARM_MATH_MVEF)
+#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
+
+void arm_mse_f32(
+    const float32_t * pSrcA,
+    const float32_t * pSrcB,
+    uint32_t    blockSize,
+    float32_t * result)
+
+{
+    float32x4_t vecA, vecB;
+    float32x4_t vecSum;
+    uint32_t blkCnt; 
+    float32_t sum = 0.0f;  
+    vecSum = vdupq_n_f32(0.0f);
+
+    /* Compute 4 outputs at a time */
+    blkCnt = (blockSize) >> 2;
+    while (blkCnt > 0U)
+    {
+        vecA = vld1q(pSrcA);
+        pSrcA += 4;
+        
+        vecB = vld1q(pSrcB);
+        pSrcB += 4;
+
+        vecA = vsubq(vecA, vecB);
+
+        vecSum = vfmaq(vecSum, vecA, vecA);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt --;
+    }
+
+
+    blkCnt = (blockSize) & 3;
+    if (blkCnt > 0U)
+    {
+        mve_pred16_t p0 = vctp32q(blkCnt);
+        vecA = vld1q(pSrcA);
+        vecB = vld1q(pSrcB);
+
+        vecA = vsubq(vecA, vecB);
+        vecSum = vfmaq_m(vecSum, vecA, vecA, p0);
+    }
+
+    sum = vecAddAcrossF32Mve(vecSum);
+
+    /* Store result in destination buffer */
+    *result = sum / blockSize;
+
+}
+
+#endif
+
+#if defined(ARM_MATH_NEON) 
+void arm_mse_f32(
+    const float32_t * pSrcA,
+    const float32_t * pSrcB,
+    uint32_t    blockSize,
+    float32_t * result)
+
+{
+    float32x4_t vecA, vecB;
+    float32x4_t vecSum;
+    uint32_t blkCnt; 
+    float32_t inA, inB;
+    float32_t sum = 0.0f;  
+    vecSum = vdupq_n_f32(0.0f);
+#if !defined(__aarch64__)
+    f32x2_t tmp = vdup_n_f32(0.0f); 
+#endif 
+
+    /* Compute 4 outputs at a time */
+    blkCnt = (blockSize) >> 2;
+    while (blkCnt > 0U)
+    {
+        vecA = vld1q_f32(pSrcA);
+        pSrcA += 4;
+        
+        vecB = vld1q_f32(pSrcB);
+        pSrcB += 4;
+
+        vecA = vsubq_f32(vecA, vecB);
+
+        vecSum = vfmaq_f32(vecSum, vecA, vecA);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt --;
+    }
+
+#if defined(__aarch64__)
+    sum = vpadds_f32(vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum)));
+#else
+    tmp = vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum));
+    sum = vget_lane_f32(tmp, 0) + vget_lane_f32(tmp, 1);
+
+#endif 
+
+    blkCnt = (blockSize) & 3;
+    while (blkCnt > 0U)
+    {
+        /* Calculate dot product and store result in a temporary buffer. */
+        inA = *pSrcA++; 
+        inB = *pSrcB++;
+        inA = inA - inB;
+        sum += inA * inA;
+    
+        /* Decrement loop counter */
+        blkCnt--;
+    }
+    
+    /* Store result in destination buffer */
+    *result = sum / blockSize;
+
+}
+#endif
+
+#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/
+
+
+
+#if (!defined(ARM_MATH_MVEF) && !defined(ARM_MATH_NEON)) || defined(ARM_MATH_AUTOVECTORIZE)
+
+
+void arm_mse_f32(
+    const float32_t * pSrcA,
+    const float32_t * pSrcB,
+    uint32_t    blockSize,
+    float32_t * result)
+
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  float32_t inA, inB;
+  float32_t sum = 0.0f;                          /* Temporary return variable */
+#if defined (ARM_MATH_LOOPUNROLL)
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = (blockSize) >> 2;
+
+  /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while (blkCnt > 0U)
+  {
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = (blockSize) & 3;
+#else
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+#endif
+  while (blkCnt > 0U)
+  {
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in destination buffer */
+  *result = sum / blockSize;
+}
+
+#endif /* end of test for vector instruction availability */
+
+/**
+  @} end of MSE group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f64.c
new file mode 100644
index 0000000..d63674b
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f64.c
@@ -0,0 +1,114 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mse_f64.c
+ * Description:  Double floating point mean square error
+ *
+ * $Date:        05 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup MSE
+  @{
+ */
+
+/**
+  @brief         Mean square error between two double floating point vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize   number of samples in input vector
+  @param[out]    result      mean square error
+  @return        none
+ */
+
+
+
+
+
+void arm_mse_f64(
+    const float64_t * pSrcA,
+    const float64_t * pSrcB,
+    uint32_t    blockSize,
+    float64_t * result)
+
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  float64_t inA, inB;
+  float64_t sum = 0.0;                          /* Temporary return variable */
+#if defined (ARM_MATH_LOOPUNROLL)
+  blkCnt = (blockSize) >> 1;
+
+ 
+  while (blkCnt > 0U)
+  {
+
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = (blockSize) & 1;
+#else
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+#endif
+  while (blkCnt > 0U)
+  {
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in destination buffer */
+  *result = sum / blockSize;
+}
+
+
+/**
+  @} end of MSE group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q15.c
new file mode 100644
index 0000000..3412a4f
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q15.c
@@ -0,0 +1,179 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mse_q15.c
+ * Description:  Mean square error between two Q15 vectors
+ *
+ * $Date:        04 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup MSE
+  @{
+ */
+
+/**
+  @brief         Mean square error between two Q15 vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize   number of samples in input vector
+  @param[out]    pResult     mean square error
+  @return        none
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+void arm_mse_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        uint32_t blockSize,
+        q15_t * pResult)
+{
+    uint32_t  blkCnt;           /* loop counters */
+    q15x8_t vecSrcA,vecSrcB;
+    q63_t   sum = 0LL;
+
+    blkCnt = blockSize >> 3U;
+    while (blkCnt > 0U)
+    {
+        vecSrcA = vld1q(pSrcA);
+        vecSrcB = vld1q(pSrcB);
+
+        vecSrcA = vshrq(vecSrcA,1);
+        vecSrcB = vshrq(vecSrcB,1);
+
+        vecSrcA = vqsubq(vecSrcA,vecSrcB);
+        /*
+         * sum lanes
+         */
+        sum = vmlaldavaq(sum, vecSrcA, vecSrcA);
+
+        blkCnt--;
+        pSrcA += 8;
+        pSrcB += 8;
+    }
+
+    /*
+     * tail
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U)
+    {
+        mve_pred16_t p0 = vctp16q(blkCnt);
+        vecSrcA = vld1q(pSrcA);
+        vecSrcB = vld1q(pSrcB);
+
+        vecSrcA = vshrq(vecSrcA,1);
+        vecSrcB = vshrq(vecSrcB,1);
+
+        vecSrcA = vqsubq(vecSrcA,vecSrcB);
+
+        sum = vmlaldavaq_p(sum, vecSrcA, vecSrcA, p0);
+    }
+
+    
+
+    *pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
+}
+#else
+void arm_mse_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        uint32_t blockSize,
+        q15_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        q63_t sum = 0;                                 /* Temporary result storage */
+        q15_t inA,inB;                                       /* Temporary variable to store input value */
+
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+    inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
+    sum += (q63_t)((q31_t) inA * inA);
+
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+    inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
+    sum += (q63_t)((q31_t) inA * inA);
+
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+    inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
+    sum += (q63_t)((q31_t) inA * inA);
+
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+    inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
+    sum += (q63_t)((q31_t) inA * inA);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+    inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
+    sum += (q63_t)((q31_t) inA * inA);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in q15 format */
+  *pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
+}
+#endif /* defined(ARM_MATH_MVEI) */
+
+/**
+  @} end of MSE group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q31.c
new file mode 100644
index 0000000..f89a768
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q31.c
@@ -0,0 +1,180 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mse_q31.c
+ * Description:  Mean square error between two Q31 vectors
+ *
+ * $Date:        04 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup MSE
+  @{
+ */
+
+/**
+  @brief         Mean square error between two Q31 vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    mean square error
+  @return        none
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+void arm_mse_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        uint32_t blockSize,
+        q31_t * pResult)
+{
+    uint32_t  blkCnt;           /* loop counters */
+    q31x4_t vecSrcA,vecSrcB;
+    q63_t   sum = 0LL;
+
+   /* Compute 4 outputs at a time */
+    blkCnt = blockSize >> 2U;
+    while (blkCnt > 0U)
+    {
+        vecSrcA = vld1q(pSrcA);
+        vecSrcB = vld1q(pSrcB);
+
+        vecSrcA = vshrq(vecSrcA,1);
+        vecSrcB = vshrq(vecSrcB,1);
+
+
+        vecSrcA = vqsubq(vecSrcA,vecSrcB);
+        /*
+         * sum lanes
+         */
+        sum = vrmlaldavhaq(sum, vecSrcA, vecSrcA);
+
+        blkCnt--;
+        pSrcA += 4;
+        pSrcB += 4;
+    }
+
+    /*
+     * tail
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0U)
+    {
+        mve_pred16_t p0 = vctp32q(blkCnt);
+        vecSrcA = vld1q(pSrcA);
+        vecSrcB = vld1q(pSrcB);
+
+        vecSrcA = vshrq(vecSrcA,1);
+        vecSrcB = vshrq(vecSrcB,1);
+
+        vecSrcA = vqsubq(vecSrcA,vecSrcB);
+
+        sum = vrmlaldavhaq_p(sum, vecSrcA, vecSrcA, p0);
+    }
+
+    
+    *pResult = (q31_t) ((sum / blockSize)>>21);
+
+}
+#else
+void arm_mse_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        uint32_t blockSize,
+        q31_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        q63_t sum = 0;                                 /* Temporary result storage */
+
+        q31_t inA32,inB32;                                    /* Temporary variable to store packed input value */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    inA32 = *pSrcA++ >> 1;
+    inB32 = *pSrcB++ >> 1;
+    inA32 = __QSUB(inA32, inB32);
+    sum += ((q63_t) inA32 * inA32) >> 14U;
+
+    inA32 = *pSrcA++ >> 1;
+    inB32 = *pSrcB++ >> 1;
+    inA32 = __QSUB(inA32, inB32);
+    sum += ((q63_t) inA32 * inA32) >> 14U;
+
+    inA32 = *pSrcA++ >> 1;
+    inB32 = *pSrcB++ >> 1;
+    inA32 = __QSUB(inA32, inB32);
+    sum += ((q63_t) inA32 * inA32) >> 14U;
+
+    inA32 = *pSrcA++ >> 1;
+    inB32 = *pSrcB++ >> 1;
+    inA32 = __QSUB(inA32, inB32);
+    sum += ((q63_t) inA32 * inA32) >> 14U;
+
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    inA32 = *pSrcA++ >> 1;
+    inB32 = *pSrcB++ >> 1;
+    inA32 = __QSUB(inA32, inB32);
+    sum += ((q63_t) inA32 * inA32) >> 14U;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in q31 format */
+  *pResult = (q31_t) ((sum / blockSize)>>15);
+}
+#endif /* defined(ARM_MATH_MVEI) */
+
+/**
+  @} end of MSE group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q7.c
new file mode 100644
index 0000000..fb28d90
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q7.c
@@ -0,0 +1,183 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mse_q7.c
+ * Description:  Mean square error between two Q7 vectors
+ *
+ * $Date:        04 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup MSE Mean Square Error
+
+  Calculates the mean square error between two vectors.
+
+ */
+
+/**
+  @addtogroup MSE
+  @{
+ */
+
+/**
+  @brief         Mean square error between two Q7 vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize   number of samples in input vector
+  @param[out]    pResult     mean square error
+  @return        none
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+void arm_mse_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        uint32_t blockSize,
+        q7_t * pResult)
+{
+    uint32_t  blkCnt;           /* loop counters */
+    q7x16_t vecSrcA,vecSrcB;
+    q31_t   sum = 0LL;
+
+   /* Compute 16 outputs at a time */
+    blkCnt = blockSize >> 4U;
+    while (blkCnt > 0U)
+    {
+        vecSrcA = vld1q(pSrcA);
+        vecSrcB = vld1q(pSrcB);
+
+        vecSrcA = vshrq(vecSrcA,1);
+        vecSrcB = vshrq(vecSrcB,1);
+
+        vecSrcA = vqsubq(vecSrcA,vecSrcB);
+        /*
+         * sum lanes
+         */
+        sum = vmladavaq(sum, vecSrcA, vecSrcA);
+
+        blkCnt--;
+        pSrcA += 16;
+        pSrcB += 16;
+    }
+
+    /*
+     * tail
+     */
+    blkCnt = blockSize & 0xF;
+    if (blkCnt > 0U)
+    {
+        mve_pred16_t p0 = vctp8q(blkCnt);
+        vecSrcA = vld1q(pSrcA);
+        vecSrcB = vld1q(pSrcB);
+
+        vecSrcA = vshrq(vecSrcA,1);
+        vecSrcB = vshrq(vecSrcB,1);
+
+        vecSrcA = vqsubq(vecSrcA,vecSrcB);
+
+        sum = vmladavaq_p(sum, vecSrcA, vecSrcA, p0);
+    }
+
+    *pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>5, 8);
+}
+#else
+void arm_mse_q7(
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        uint32_t blockSize,
+        q7_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        q31_t sum = 0;                                 /* Temporary result storage */
+        q7_t inA,inB;                                       /* Temporary variable to store input value */
+
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+    inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
+    sum += ((q15_t) inA * inA);
+
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+    inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
+    sum += ((q15_t) inA * inA);
+
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+    inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
+    sum += ((q15_t) inA * inA);
+
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+    inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
+    sum += ((q15_t) inA * inA);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+
+    inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
+    sum += ((q15_t) inA * inA);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in q7 format */
+  *pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>5, 8);;
+}
+#endif /* defined(ARM_MATH_MVEI) */
+
+/**
+  @} end of MSE group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f16.c
index 1a2d5b1..f9833c0 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_power_f16.c
  * Description:  Sum of the squares of the elements of a floating-point vector
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c
index 80ed5d9..ec07058 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_power_f32.c
  * Description:  Sum of the squares of the elements of a floating-point vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f64.c
new file mode 100644
index 0000000..d2e1e03
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f64.c
@@ -0,0 +1,81 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_power_f64.c
+ * Description:  Sum of the squares of the elements of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup power
+  @{
+ */
+
+/**
+  @brief         Sum of the squares of the elements of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    sum of the squares value returned here
+  @return        none
+ */
+void arm_power_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        float64_t sum = 0.;                          /* Temporary result storage */
+        float64_t in;                                  /* Temporary variable to store input value */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+    /* Compute Power and store result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result to destination */
+  *pResult = sum;
+}
+
+/**
+  @} end of power group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c
index 22c3afd..1cb3845 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_power_q15.c
  * Description:  Sum of the squares of the elements of a Q15 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -124,10 +124,10 @@ void arm_power_q15(
 
     /* Compute Power and store result in a temporary variable, sum. */
 #if defined (ARM_MATH_DSP)
-    in32 = read_q15x2_ia ((q15_t **) &pSrc);
+    in32 = read_q15x2_ia (&pSrc);
     sum = __SMLALD(in32, in32, sum);
 
-    in32 = read_q15x2_ia ((q15_t **) &pSrc);
+    in32 = read_q15x2_ia (&pSrc);
     sum = __SMLALD(in32, in32, sum);
 #else
     in = *pSrc++;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c
index 71ce6b5..db83d3b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_power_q31.c
  * Description:  Sum of the squares of the elements of a Q31 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c
index bdbc041..7f74aa2 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_power_q7.c
  * Description:  Sum of the squares of the elements of a Q7 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -124,7 +124,7 @@ void arm_power_q7(
 
     /* Compute Power and store result in a temporary variable, sum. */
 #if defined (ARM_MATH_DSP)
-    in32 = read_q7x4_ia ((q7_t **) &pSrc);
+    in32 = read_q7x4_ia (&pSrc);
 
     in1 = __SXTB16(__ROR(in32, 8));
     in2 = __SXTB16(in32);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f16.c
index 3a98ffc..e2c878c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_rms_f16.c
  * Description:  Root mean square value of the elements of a floating-point vector
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -37,18 +37,7 @@
   @ingroup groupStats
  */
 
-/**
-  @defgroup RMS Root mean square (RMS)
-
-  Calculates the Root Mean Square of the elements in the input vector.
-  The underlying algorithm is used:
 
-  <pre>
-      Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));
-  </pre>
-
-  There are separate functions for floating point, Q31, and Q15 data types.
- */
 
 /**
   @addtogroup RMS
@@ -75,7 +64,7 @@ void arm_rms_f16(
     arm_power_f16(pSrc, blockSize, &pow);
 
     /* Compute Rms and store the result in the destination */
-    arm_sqrt_f16(pow / (float16_t) blockSize, pResult);
+    arm_sqrt_f16((_Float16)pow / (_Float16) blockSize, pResult);
 }
 #else
 
@@ -137,7 +126,7 @@ void arm_rms_f16(
   }
 
   /* Compute Rms and store result in destination */
-  arm_sqrt_f16(sum / (float16_t) blockSize, pResult);
+  arm_sqrt_f16((_Float16)sum / (_Float16) blockSize, pResult);
 }
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c
index e86b7f2..7dadc34 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_rms_f32.c
  * Description:  Root mean square value of the elements of a floating-point vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c
index 8e3dc55..2ed47f6 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_rms_q15.c
  * Description:  Root Mean Square of the elements of a Q15 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -95,10 +95,10 @@ void arm_rms_q15(
 
     /* Compute sum of squares and store result in a temporary variable. */
 #if defined (ARM_MATH_DSP)
-    in32 = read_q15x2_ia ((q15_t **) &pSrc);
+    in32 = read_q15x2_ia (&pSrc);
     sum = __SMLALD(in32, in32, sum);
 
-    in32 = read_q15x2_ia ((q15_t **) &pSrc);
+    in32 = read_q15x2_ia (&pSrc);
     sum = __SMLALD(in32, in32, sum);
 #else
     in = *pSrc++;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c
index 93303a8..f334db8 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_rms_q31.c
  * Description:  Root Mean Square of the elements of a Q31 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f16.c
index dc8f4a1..b941f24 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_std_f16.c
  * Description:  Standard deviation of the elements of a floating-point vector
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c
index 245f27d..ea60d3c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_std_f32.c
  * Description:  Standard deviation of the elements of a floating-point vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f64.c
new file mode 100644
index 0000000..a193f57
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f64.c
@@ -0,0 +1,63 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_std_f64.c
+ * Description:  Standard deviation of the elements of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup STD
+  @{
+ */
+
+/**
+  @brief         Standard deviation of the elements of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    standard deviation value returned here
+  @return        none
+ */
+void arm_std_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult)
+{
+  float64_t var;
+  arm_var_f64(pSrc,blockSize,&var);
+  *pResult = sqrt(var);
+}
+
+/**
+  @} end of STD group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c
index 08d205b..4e15a85 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_std_q15.c
  * Description:  Standard deviation of an array of Q15 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -102,12 +102,12 @@ void arm_std_q15(
     /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
     /* Compute sum and store result in a temporary variable, sum. */
 #if defined (ARM_MATH_DSP)
-    in32 = read_q15x2_ia ((q15_t **) &pSrc);
+    in32 = read_q15x2_ia (&pSrc);
     sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
     sum += ((in32 << 16U) >> 16U);
     sum +=  (in32 >> 16U);
 
-    in32 = read_q15x2_ia ((q15_t **) &pSrc);
+    in32 = read_q15x2_ia (&pSrc);
     sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
     sum += ((in32 << 16U) >> 16U);
     sum +=  (in32 >> 16U);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c
index 2248665..3036a2b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_std_q31.c
  * Description:  Standard deviation of the elements of a Q31 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f16.c
index e2ffd47..8700428 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_var_f16.c
  * Description:  Variance of the elements of a floating-point vector
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -63,7 +63,7 @@ void arm_var_f16(
 {
     int32_t         blkCnt;     /* loop counters */
     f16x8_t         vecSrc;
-    f16x8_t         sumVec = vdupq_n_f16((float16_t) 0.0);
+    f16x8_t         sumVec = vdupq_n_f16(0.0f16);
     float16_t       fMean;
 
     if (blockSize <= 1U) {
@@ -74,15 +74,6 @@ void arm_var_f16(
 
     arm_mean_f16(pSrc, blockSize, &fMean);
 
-/* 6.14 bug */
-#if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) && (__ARMCC_VERSION < 6150001)
-    __asm volatile(
-        "   vmov.i32                     %[acc], #0 \n"
-        : [acc] "+t"(sumVec)
-        : 
-        : );
-#endif
-
     blkCnt = blockSize;
     do {
         mve_pred16_t    p = vctp16q(blkCnt);
@@ -100,7 +91,7 @@ void arm_var_f16(
     while (blkCnt > 0);
     
     /* Variance */
-    *pResult = vecAddAcrossF16Mve(sumVec) / (float16_t) (blockSize - 1.0f);
+    *pResult = (_Float16)vecAddAcrossF16Mve(sumVec) / (_Float16) (blockSize - 1.0f16);
 }
 #else
 
@@ -130,10 +121,10 @@ void arm_var_f16(
   {
     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
 
-    sum += *pInput++;
-    sum += *pInput++;
-    sum += *pInput++;
-    sum += *pInput++;
+    sum += (_Float16)*pInput++;
+    sum += (_Float16)*pInput++;
+    sum += (_Float16)*pInput++;
+    sum += (_Float16)*pInput++;
 
 
     /* Decrement loop counter */
@@ -154,14 +145,14 @@ void arm_var_f16(
   {
     /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
 
-    sum += *pInput++;
+    sum += (_Float16)*pInput++;
 
     /* Decrement loop counter */
     blkCnt--;
   }
 
   /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
-  fMean = sum / (float16_t) blockSize;
+  fMean = (_Float16)sum / (_Float16) blockSize;
 
   pInput = pSrc;
 
@@ -172,17 +163,17 @@ void arm_var_f16(
 
   while (blkCnt > 0U)
   {
-    fValue = *pInput++ - fMean;
-    fSum += fValue * fValue;
+    fValue = (_Float16)*pInput++ - (_Float16)fMean;
+    fSum += (_Float16)fValue * (_Float16)fValue;
 
-    fValue = *pInput++ - fMean;
-    fSum += fValue * fValue;
+    fValue = (_Float16)*pInput++ - (_Float16)fMean;
+    fSum += (_Float16)fValue * (_Float16)fValue;
 
-    fValue = *pInput++ - fMean;
-    fSum += fValue * fValue;
+    fValue = (_Float16)*pInput++ - (_Float16)fMean;
+    fSum += (_Float16)fValue * (_Float16)fValue;
 
-    fValue = *pInput++ - fMean;
-    fSum += fValue * fValue;
+    fValue = (_Float16)*pInput++ - (_Float16)fMean;
+    fSum += (_Float16)fValue * (_Float16)fValue;
 
     /* Decrement loop counter */
     blkCnt--;
@@ -200,15 +191,15 @@ void arm_var_f16(
 
   while (blkCnt > 0U)
   {
-    fValue = *pInput++ - fMean;
-    fSum += fValue * fValue;
+    fValue = (_Float16)*pInput++ - (_Float16)fMean;
+    fSum += (_Float16)fValue * (_Float16)fValue;
 
     /* Decrement loop counter */
     blkCnt--;
   }
 
   /* Variance */
-  *pResult = fSum / (float16_t)(blockSize - 1.0f);
+  *pResult = (_Float16)fSum / ((_Float16)blockSize - 1.0f16);
 }
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c
index b00b1ad..69ab060 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_var_f32.c
  * Description:  Variance of the elements of a floating-point vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f64.c
new file mode 100644
index 0000000..229db63
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f64.c
@@ -0,0 +1,104 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_var_f64.c
+ * Description:  Variance of the elements of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup variance
+  @{
+ */
+
+/**
+  @brief         Variance of the elements of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    variance value returned here
+  @return        none
+ */
+void arm_var_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        float64_t sum = 0.;                          /* Temporary result storage */
+        float64_t fSum = 0.;
+        float64_t fMean, fValue;
+  const float64_t * pInput = pSrc;
+
+  if (blockSize <= 1U)
+  {
+    *pResult = 0;
+    return;
+  }
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+
+    sum += *pInput++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+  fMean = sum / (float64_t) blockSize;
+
+  pInput = pSrc;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    fValue = *pInput++ - fMean;
+    fSum += fValue * fValue;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Variance */
+  *pResult = fSum / (float64_t)(blockSize - 1.);
+}
+
+/**
+  @} end of variance group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c
index a6be746..9c78d34 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_var_q15.c
  * Description:  Variance of an array of Q15 type
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -113,10 +113,10 @@ void arm_var_q15(
 
     /* Compute Mean of squares of the input samples
      * and then store the result in a temporary variable, meanOfSquares. */
-    meanOfSquares = arm_div_q63_to_q31(sumOfSquares, (blockSize - 1U));
+    meanOfSquares = arm_div_int64_to_int32(sumOfSquares, (blockSize - 1U));
 
     /* Compute square of mean */
-    squareOfMean = arm_div_q63_to_q31((q63_t)sum * sum, (q31_t)(blockSize * (blockSize - 1U)));
+    squareOfMean = arm_div_int64_to_int32((q63_t)sum * sum, (q31_t)(blockSize * (blockSize - 1U)));
 
     /* mean of the squares minus the square of the mean. */
     *pResult = (meanOfSquares - squareOfMean) >> 15;
@@ -156,12 +156,12 @@ void arm_var_q15(
     /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
     /* Compute sum and store result in a temporary variable, sum. */
 #if defined (ARM_MATH_DSP)
-    in32 = read_q15x2_ia ((q15_t **) &pSrc);
+    in32 = read_q15x2_ia (&pSrc);
     sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
     sum += ((in32 << 16U) >> 16U);
     sum +=  (in32 >> 16U);
 
-    in32 = read_q15x2_ia ((q15_t **) &pSrc);
+    in32 = read_q15x2_ia (&pSrc);
     sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
     sum += ((in32 << 16U) >> 16U);
     sum +=  (in32 >> 16U);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c
index 0da41b1..025cc56 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_var_q31.c
  * Description:  Variance of an array of Q31 type
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c
index 161b6ec..9a1aa6a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_barycenter_f16.c
  * Description:  Barycenter
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -105,7 +107,7 @@ void arm_barycenter_f16(const float16_t *in,
         w2 = *pW++;
         w3 = *pW++;
         w4 = *pW++;
-        accum += w1 + w2 + w3 + w4;
+        accum += (_Float16)w1 + (_Float16)w2 + (_Float16)w3 + (_Float16)w4;
 
         blkCntSample = vecDim >> 3;
         while (blkCntSample > 0) {
@@ -131,10 +133,10 @@ void arm_barycenter_f16(const float16_t *in,
 
         blkCntSample = vecDim & 7;
         while (blkCntSample > 0) {
-            *pOut = *pOut + *pIn1++ * w1;
-            *pOut = *pOut + *pIn2++ * w2;
-            *pOut = *pOut + *pIn3++ * w3;
-            *pOut = *pOut + *pIn4++ * w4;
+            *pOut = (_Float16)*pOut + (_Float16)*pIn1++ * (_Float16)w1;
+            *pOut = (_Float16)*pOut + (_Float16)*pIn2++ * (_Float16)w2;
+            *pOut = (_Float16)*pOut + (_Float16)*pIn3++ * (_Float16)w3;
+            *pOut = (_Float16)*pOut + (_Float16)*pIn4++ * (_Float16)w4;
             pOut++;
             blkCntSample--;
         }
@@ -156,7 +158,7 @@ void arm_barycenter_f16(const float16_t *in,
 
         pOut = out;
         w = *pW++;
-        accum += w;
+        accum += (_Float16)w;
 
         blkCntSample = vecDim >> 3;
         while (blkCntSample > 0) 
@@ -174,7 +176,7 @@ void arm_barycenter_f16(const float16_t *in,
         blkCntSample = vecDim & 7;
         while (blkCntSample > 0) 
         {
-            *pOut = *pOut + *pIn++ * w;
+            *pOut = (_Float16)*pOut + (_Float16)*pIn++ * (_Float16)w;
             pOut++;
             blkCntSample--;
         }
@@ -184,7 +186,7 @@ void arm_barycenter_f16(const float16_t *in,
 
     /* Normalize */
     pOut = out;
-    accum = 1.0f / accum;
+    accum = 1.0f16 / (_Float16)accum;
 
     blkCntSample = vecDim >> 3;
     while (blkCntSample > 0) 
@@ -201,7 +203,7 @@ void arm_barycenter_f16(const float16_t *in,
     blkCntSample = vecDim & 7;
     while (blkCntSample > 0) 
     {
-        *pOut = *pOut * accum;
+        *pOut = (_Float16)*pOut * (_Float16)accum;
         pOut++;
         blkCntSample--;
     }
@@ -218,7 +220,7 @@ void arm_barycenter_f16(const float16_t *in, const float16_t *weights, float16_t
    blkCntVector = nbVectors;
    blkCntSample = vecDim;
 
-   accum = 0.0f;
+   accum = 0.0f16;
 
    pW = weights;
    pIn = in;
@@ -229,7 +231,7 @@ void arm_barycenter_f16(const float16_t *in, const float16_t *weights, float16_t
 
    while(blkCntSample > 0)
    {
-         *pOut = 0.0f;
+         *pOut = 0.0f16;
          pOut++;
          blkCntSample--;
    }
@@ -239,12 +241,12 @@ void arm_barycenter_f16(const float16_t *in, const float16_t *weights, float16_t
    {
       pOut = out;
       w = *pW++;
-      accum += w;
+      accum += (_Float16)w;
 
       blkCntSample = vecDim;
       while(blkCntSample > 0)
       {
-          *pOut = *pOut + *pIn++ * w;
+          *pOut = (_Float16)*pOut + (_Float16)*pIn++ * (_Float16)w;
           pOut++;
           blkCntSample--;
       }
@@ -258,7 +260,7 @@ void arm_barycenter_f16(const float16_t *in, const float16_t *weights, float16_t
 
    while(blkCntSample > 0)
    {
-         *pOut = *pOut / accum;
+         *pOut = (_Float16)*pOut / (_Float16)accum;
          pOut++;
          blkCntSample--;
    }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f32.c
index 9f41c07..e941cbd 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_barycenter_f32.c
  * Description:  Barycenter
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -33,6 +35,7 @@
 
 /**
   @ingroup barycenter
+  @{
  */
 
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_bitonic_sort_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_bitonic_sort_f32.c
index 131a5da..05edb29 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_bitonic_sort_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_bitonic_sort_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_bitonic_sort_f32.c
  * Description:  Floating point bitonic sort
  *
- * $Date:        2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_bubble_sort_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_bubble_sort_f32.c
index f84c057..4044ed0 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_bubble_sort_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_bubble_sort_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_bubble_sort_f32.c
  * Description:  Floating point bubble sort
  *
- * $Date:        2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_f16.c
index a77f4e9..6d0003b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_copy_f16.c
  * Description:  Copies the elements of a floating-point vector
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_f32.c
index 51c0b19..f6f2a33 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_copy_f32.c
  * Description:  Copies the elements of a floating-point vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_f64.c
new file mode 100644
index 0000000..05c21e1
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_f64.c
@@ -0,0 +1,75 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_copy_f64.c
+ * Description:  Copies the elements of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/support_functions.h"
+
+/**
+  @ingroup groupSupport
+ */
+
+/**
+  @addtogroup copy
+  @{
+ */
+
+/**
+  @brief         Copies the elements of a floating-point vector.
+  @param[in]     pSrc       points to input vector
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+void arm_copy_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A */
+
+    /* Copy and store result in destination buffer */
+    *pDst++ = *pSrc++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+}
+
+/**
+  @} end of BasicCopy group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_q15.c
index 765b038..fcb7d22 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_copy_q15.c
  * Description:  Copies the elements of a Q15 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -97,8 +97,8 @@ void arm_copy_q15(
     /* C = A */
 
     /* read 2 times 2 samples at a time */
-    write_q15x2_ia (&pDst, read_q15x2_ia ((q15_t **) &pSrc));
-    write_q15x2_ia (&pDst, read_q15x2_ia ((q15_t **) &pSrc));
+    write_q15x2_ia (&pDst, read_q15x2_ia (&pSrc));
+    write_q15x2_ia (&pDst, read_q15x2_ia (&pSrc));
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_q31.c
index 07b9b22..1249380 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_copy_q31.c
  * Description:  Copies the elements of a Q31 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_q7.c
index 1eaa857..70f7b7f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_copy_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_copy_q7.c
  * Description:  Copies the elements of a Q7 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -100,7 +100,7 @@ void arm_copy_q7(
     /* C = A */
 
     /* read 4 samples at a time */
-    write_q7x4_ia (&pDst, read_q7x4_ia ((q7_t **) &pSrc));
+    write_q7x4_ia (&pDst, read_q7x4_ia (&pSrc));
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_float.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_float.c
index 87b1e46..cf4451f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_float.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_float.c
@@ -5,13 +5,13 @@
  * Title:        arm_float_to_q15.c
  * Description:  Converts the elements of the floating-point vector to Q15 vector
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -55,7 +55,11 @@
 
  */
 
-#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(__CMSIS_GCC_H)
+#pragma GCC warning "Scalar version of arm_f16_to_float built. Helium version has build issues with gcc."
+#endif 
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) &&  !defined(__CMSIS_GCC_H)
 
 void arm_f16_to_float(
   const float16_t * pSrc,
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_q15.c
index a454881..cdd714e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_float_to_q15.c
  * Description:  Converts the elements of the floating-point vector to Q15 vector
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -138,7 +138,7 @@ void arm_f16_to_q15(
         /*
          * convert from float to Q31 and then store the results in the destination buffer
          */
-        *pDst++ = clip_q31_to_q15((q31_t) (*pIn++ * 32768.0));
+        *pDst++ = clip_q31_to_q15((q31_t) ((_Float16)*pIn++ * 32768.0f16));
 
 #endif                          /*      #ifdef ARM_MATH_ROUNDING        */
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_f16.c
index 6f46139..f52f505 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_fill_f16.c
  * Description:  Fills a constant value into a floating-point vector
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_f32.c
index 14c851b..b06ceb3 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_fill_f32.c
  * Description:  Fills a constant value into a floating-point vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_f64.c
new file mode 100644
index 0000000..152cb4c
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_f64.c
@@ -0,0 +1,75 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_fill_f64.c
+ * Description:  Fills a constant value into a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/support_functions.h"
+
+/**
+  @ingroup groupSupport
+ */
+
+/**
+  @addtogroup Fill
+  @{
+ */
+
+/**
+  @brief         Fills a constant value into a floating-point vector.
+  @param[in]     value      input value to be filled
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+void arm_fill_f64(
+  float64_t value,
+  float64_t * pDst,
+  uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = value */
+
+    /* Fill value in destination buffer */
+    *pDst++ = value;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+}
+
+/**
+  @} end of Fill group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_q15.c
index c9b46c4..a45aae4 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_fill_q15.c
  * Description:  Fills a constant value into a Q15 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_q31.c
index 4a9a6c1..9a8b129 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_fill_q31.c
  * Description:  Fills a constant value into a Q31 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_q7.c
index 2050480..1211436 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_fill_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_fill_q7.c
  * Description:  Fills a constant value into a Q7 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_f16.c
index 3a1e9a5..c726153 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_float_to_q15.c
  * Description:  Converts the elements of the floating-point vector to Q15 vector
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -51,7 +51,11 @@
 
  */
 
-#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(__CMSIS_GCC_H)
+#pragma GCC warning "Scalar version of arm_float_to_f16 built. Helium version has build issues with gcc."
+#endif 
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) &&  !defined(__CMSIS_GCC_H)
 
 void arm_float_to_f16(
   const float32_t * pSrc,
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q15.c
index c6bd214..dd5bab1 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_float_to_q15.c
  * Description:  Converts the elements of the floating-point vector to Q15 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -70,7 +70,7 @@ void arm_float_to_q15(
     uint32_t         blkCnt;
     float32_t       maxQ = (float32_t) Q15_MAX;
     f32x4x2_t       tmp;
-    q15x8_t         vecDst;
+    q15x8_t         vecDst = { 0 };
 #ifdef ARM_MATH_ROUNDING
     float32_t in;
 #endif
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q31.c
index 3cd44ad..76cd238 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_float_to_q31.c
  * Description:  Converts the elements of the floating-point vector to Q31 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q7.c
index 613976c..f64e6d2 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_float_to_q7.c
  * Description:  Converts the elements of the floating-point vector to Q7 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -69,8 +69,8 @@ void arm_float_to_q7(
     uint32_t         blkCnt;     /* loop counters */
     float32_t       maxQ = powf(2.0, 7);
     f32x4x4_t       tmp;
-    q15x8_t         evVec, oddVec;
-    q7x16_t         vecDst;
+    q15x8_t         evVec = { 0 }, oddVec = { 0 };
+    q7x16_t         vecDst = { 0 };
     float32_t const *pSrcVec;
 #ifdef ARM_MATH_ROUNDING
     float32_t in;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_heap_sort_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_heap_sort_f32.c
index 78985b7..aa52173 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_heap_sort_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_heap_sort_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_heap_sort_f32.c
  * Description:  Floating point heap sort
  *
- * $Date:        2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_insertion_sort_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_insertion_sort_f32.c
index 440b26e..386c5ce 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_insertion_sort_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_insertion_sort_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_insertion_sort_f32.c
  * Description:  Floating point insertion sort
  *
- * $Date:        2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_f32.c
index 13c7a33..a74a961 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_merge_sort_f32.c
  * Description:  Floating point merge sort
  *
- * $Date:        2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_init_f32.c
index 901554a..73f916a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_merge_sort_init_f32.c
  * Description:  Floating point merge sort initialization function
  *
- * $Date:        2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_f16.c
index 8b95b12..a250b0b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_q15_to_float.c
  * Description:  Converts the elements of the Q15 vector to floating-point vector
  *
- * $Date:        18. March 2020
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -38,7 +38,7 @@
  */
 
 /**
- * @defgroup q15_to_x  Convert 16-bit Integer value
+ * @defgroup q15_to_x  Convert 16-bit fixed point value
  */
 
 /**
@@ -116,10 +116,10 @@ void arm_q15_to_f16(
     /* C = (float16_t) A / 32768 */
 
     /* Convert from q15 to float and store result in destination buffer */
-    *pDst++ = ((float16_t) * pIn++ / 32768.0f);
-    *pDst++ = ((float16_t) * pIn++ / 32768.0f);
-    *pDst++ = ((float16_t) * pIn++ / 32768.0f);
-    *pDst++ = ((float16_t) * pIn++ / 32768.0f);
+    *pDst++ = ((_Float16) * pIn++ / 32768.0f16);
+    *pDst++ = ((_Float16) * pIn++ / 32768.0f16);
+    *pDst++ = ((_Float16) * pIn++ / 32768.0f16);
+    *pDst++ = ((_Float16) * pIn++ / 32768.0f16);
 
     /* Decrement loop counter */
     blkCnt--;
@@ -140,7 +140,7 @@ void arm_q15_to_f16(
     /* C = (float16_t) A / 32768 */
 
     /* Convert from q15 to float and store result in destination buffer */
-    *pDst++ = ((float16_t) *pIn++ / 32768.0f);
+    *pDst++ = ((_Float16) *pIn++ / 32768.0f16);
 
     /* Decrement loop counter */
     blkCnt--;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_float.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_float.c
index 9f8dc33..1bc9729 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_float.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_float.c
@@ -5,13 +5,13 @@
  * Title:        arm_q15_to_float.c
  * Description:  Converts the elements of the Q15 vector to floating-point vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -35,7 +35,7 @@
  */
 
 /**
- * @defgroup q15_to_x  Convert 16-bit Integer value
+ * @defgroup q15_to_x  Convert 16-bit fixed point value
  */
 
 /**
@@ -67,16 +67,16 @@ void arm_q15_to_float(
 
   q15x8_t vecDst;
   q15_t const *pSrcVec;
-  
+
   pSrcVec = (q15_t const *) pSrc;
   blkCnt = blockSize >> 2;
   while (blkCnt > 0U)
   {
       /* C = (float32_t) A / 32768 */
       /* convert from q15 to float and then store the results in the destination buffer */
-      vecDst = vldrhq_s32(pSrcVec); 
+      vecDst = vldrhq_s32(pSrcVec);
       pSrcVec += 4;
-      vstrwq(pDst, vcvtq_n_f32_s32(vecDst, 15));  
+      vstrwq(pDst, vcvtq_n_f32_s32((int32x4_t)vecDst, 15));
       pDst += 4;
       /*
        * Decrement the blockSize loop counter
@@ -131,7 +131,7 @@ void arm_q15_to_float(
     outV = vcvtq_n_f32_s32(inV1,15);
     vst1q_f32(pDst, outV);
     pDst += 4;
-  
+
     /* Decrement the loop counter */
     blkCnt--;
   }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_q31.c
index 3e59523..2a56392 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_q15_to_q31.c
  * Description:  Converts the elements of the Q15 vector to Q31 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -119,8 +119,8 @@ void arm_q15_to_q31(
     /* C = (q31_t)A << 16 */
 
     /* Convert from q15 to q31 and store result in destination buffer */
-    in1 = read_q15x2_ia ((q15_t **) &pIn);
-    in2 = read_q15x2_ia ((q15_t **) &pIn);
+    in1 = read_q15x2_ia (&pIn);
+    in2 = read_q15x2_ia (&pIn);
 
 #ifndef ARM_MATH_BIG_ENDIAN
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_q7.c
index abb68b5..8a33729 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_q15_to_q7.c
  * Description:  Converts the elements of the Q15 vector to Q7 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -62,7 +62,7 @@ void arm_q15_to_q7(
     uint32_t  blkCnt;           /* loop counters */
     q15x8x2_t tmp;
     q15_t const *pSrcVec;
-    q7x16_t vecDst;
+    q7x16_t vecDst = { 0 };
 
 
     pSrcVec = (q15_t const *) pSrc;
@@ -121,8 +121,8 @@ void arm_q15_to_q7(
     /* Convert from q15 to q7 and store result in destination buffer */
 #if defined (ARM_MATH_DSP)
 
-    in1 = read_q15x2_ia ((q15_t **) &pIn);
-    in2 = read_q15x2_ia ((q15_t **) &pIn);
+    in1 = read_q15x2_ia (&pIn);
+    in2 = read_q15x2_ia (&pIn);
 
 #ifndef ARM_MATH_BIG_ENDIAN
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_float.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_float.c
index fc4b280..a478044 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_float.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_float.c
@@ -5,13 +5,13 @@
  * Title:        arm_q31_to_float.c
  * Description:  Converts the elements of the Q31 vector to floating-point vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -35,7 +35,7 @@
  */
 
 /**
- * @defgroup q31_to_x  Convert 32-bit Integer value
+ * @defgroup q31_to_x  Convert 32-bit fixed point value
  */
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_q15.c
index 27e04b5..2d0c58a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_q31_to_q15.c
  * Description:  Converts the elements of the Q31 vector to Q15 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -60,7 +60,7 @@ void arm_q31_to_q15(
 {
     uint32_t  blkCnt;           /* loop counters */
     q31x4x2_t tmp;
-    q15x8_t vecDst;
+    q15x8_t vecDst = { 0 };
     q31_t const *pSrcVec;
 
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_q7.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_q7.c
index 14f25bc..27d1423 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_q7.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_q7.c
@@ -5,13 +5,13 @@
  * Title:        arm_q31_to_q7.c
  * Description:  Converts the elements of the Q31 vector to Q7 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -60,8 +60,8 @@ void arm_q31_to_q7(
 {
     uint32_t  blkCnt;           /* loop counters */
     q31x4x4_t tmp;
-    q15x8_t evVec, oddVec;
-    q7x16_t  vecDst;
+    q15x8_t evVec = { 0 }, oddVec = { 0 };
+    q7x16_t  vecDst = { 0 };
     q31_t const *pSrcVec;
 
     pSrcVec = (q31_t const *) pSrc;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_float.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_float.c
index 6535dd0..f70206d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_float.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_float.c
@@ -5,13 +5,13 @@
  * Title:        arm_q7_to_float.c
  * Description:  Converts the elements of the Q7 vector to floating-point vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -35,7 +35,7 @@
  */
 
 /**
- * @defgroup q7_to_x  Convert 8-bit Integer value
+ * @defgroup q7_to_x  Convert 8-bit fixed point value
  */
 
 /**
@@ -74,7 +74,7 @@ void arm_q7_to_float(
         /* convert from q7 to float and then store the results in the destination buffer */
         vecDst = vldrbq_s32(pSrcVec);    
         pSrcVec += 4;
-        vstrwq(pDst, vcvtq_n_f32_s32(vecDst, 7));   
+        vstrwq(pDst, vcvtq_n_f32_s32((int32x4_t)vecDst, 7));   
         pDst += 4;
         /*
          * Decrement the blockSize loop counter
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_q15.c
index 75bb856..b169fba 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_q7_to_q15.c
  * Description:  Converts the elements of the Q7 vector to Q15 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -123,7 +123,7 @@ void arm_q7_to_q15(
     /* Convert from q7 to q15 and store result in destination buffer */
 #if defined (ARM_MATH_DSP)
 
-    in = read_q7x4_ia ((q7_t **) &pIn);
+    in = read_q7x4_ia (&pIn);
 
     /* rotatate in by 8 and extend two q7_t values to q15_t values */
     in1 = __SXTB16(__ROR(in, 8));
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_q31.c
index a01894a..7867a08 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_q7_to_q31.c
  * Description:  Converts the elements of the Q7 vector to Q31 vector
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -115,7 +115,7 @@ void arm_q7_to_q31(
     /* C = (q31_t) A << 24 */
 
     /* Convert from q7 to q31 and store result in destination buffer */
-    in = read_q7x4_ia ((q7_t **) &pIn);
+    in = read_q7x4_ia (&pIn);
 
 #ifndef ARM_MATH_BIG_ENDIAN
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_quick_sort_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_quick_sort_f32.c
index 4723d13..6c0e638 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_quick_sort_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_quick_sort_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_quick_sort_f32.c
  * Description:  Floating point quick sort
  *
- * $Date:        2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_selection_sort_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_selection_sort_f32.c
index 7100f04..7fa49ae 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_selection_sort_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_selection_sort_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_selection_sort_f32.c
  * Description:  Floating point selection sort
  *
- * $Date:        2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_sort_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_sort_f32.c
index 3d3ecd8..931fc2d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_sort_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_sort_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_sort_f32.c
  * Description:  Floating point sort
  *
- * $Date:        2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_sort_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_sort_init_f32.c
index 723db0b..7220b4c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_sort_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_sort_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_sort_init_f32.c
  * Description:  Floating point sort initialization function
  *
- * $Date:        2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_weighted_sum_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_weighted_sum_f16.c
index d0b6f99..2c80545 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_weighted_sum_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_weighted_sum_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_weighted_sum_f16.c
  * Description:  Weighted Sum
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_weighted_sum_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_weighted_sum_f32.c
index 7f28207..243378d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_weighted_sum_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/SupportFunctions/arm_weighted_sum_f32.c
@@ -5,11 +5,13 @@
  * Title:        arm_weighted_sum_f32.c
  * Description:  Weighted Sum
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
  * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal.c
index a1504f1..cc8e8b0 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal.c
@@ -5,13 +5,13 @@
  * Title:        arm_bitreversal.c
  * Description:  Bitreversal functions
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.c
index a22e8cb..e093aec 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.c
@@ -5,10 +5,10 @@
  * Title:        arm_bitreversal2.c
  * Description:  Bitreversal functions
  *
- * $Date:        18. March 2019
- * $Revision:    V1.0.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal_f16.c
index 7809ea9..bd13013 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal_f16.c
@@ -5,8 +5,10 @@
  * Title:        arm_bitreversal_f16.c
  * Description:  Bitreversal functions
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
@@ -100,4 +102,5 @@ const uint16_t * pBitRevTab)
    }
 }
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f16.c
index 2d6d436..ee4f926 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_f32.c
  * Description:  Combined Radix Decimation in Frequency CFFT Floating point processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -42,111 +42,51 @@
 static float16_t arm_inverse_fft_length_f16(uint16_t fftLen)
 {
   float16_t retValue=1.0;
-                                                      
-  switch (fftLen)                                     
-  {                                                   
-                                                      
-  case 4096U:                                         
-    retValue = (float16_t)0.000244140625f;                        
-    break;                                            
-                                                      
-  case 2048U:                                         
-    retValue = (float16_t)0.00048828125f;                         
-    break;                                            
-                                                      
-  case 1024U:                                         
-    retValue = (float16_t)0.0009765625f;                         
-    break;                                            
-                                                      
-  case 512U:                                          
-    retValue = (float16_t)0.001953125f;                           
-    break;                                            
-                                                      
-  case 256U:                                          
-    retValue = (float16_t)0.00390625f;                           
-    break;                                            
-                                                      
-  case 128U:                                          
-    retValue = (float16_t)0.0078125f;                             
-    break;                                            
-                                                      
-  case 64U:                                           
-    retValue = (float16_t)0.015625f;                             
-    break;                                            
-                                                      
-  case 32U:                                           
-    retValue = (float16_t)0.03125f;                               
-    break;                                            
-                                                      
-  case 16U:                                           
-    retValue = (float16_t)0.0625f;                               
-    break;                                            
-                                                      
-                                                      
-  default:                                            
-    break;                                            
-  }                                                   
-  return(retValue); 
-}
-
-
-static void arm_bitreversal_f16_inpl_mve(
-        uint16_t *pSrc,
-  const uint16_t bitRevLen,
-  const uint16_t *pBitRevTab)
-
-{
-    uint32_t       *src = (uint32_t *)pSrc;
-    uint32_t        blkCnt;     /* loop counters */
-    uint32x4_t      bitRevTabOff;
-    uint16x8_t      one = vdupq_n_u16(1);
-
-    blkCnt = (bitRevLen / 2) / 4;
-    while (blkCnt > 0U) {
-        bitRevTabOff = vldrhq_u16(pBitRevTab);
-        pBitRevTab += 8;
 
-        uint32x4_t      bitRevOff1 = vmullbq_int_u16(bitRevTabOff, one);
-        uint32x4_t      bitRevOff2 = vmulltq_int_u16(bitRevTabOff, one);
+  switch (fftLen)
+  {
 
-        bitRevOff1 = bitRevOff1 >> 3;
-        bitRevOff2 = bitRevOff2 >> 3;
+  case 4096U:
+    retValue = (float16_t)0.000244140625f;
+    break;
 
-        uint32x4_t      in1 = vldrwq_gather_shifted_offset_u32(src, bitRevOff1);
-        uint32x4_t      in2 = vldrwq_gather_shifted_offset_u32(src, bitRevOff2);
+  case 2048U:
+    retValue = (float16_t)0.00048828125f;
+    break;
 
-        vstrwq_scatter_shifted_offset_u32(src, bitRevOff1, in2);
-        vstrwq_scatter_shifted_offset_u32(src, bitRevOff2, in1);
+  case 1024U:
+    retValue = (float16_t)0.0009765625f;
+    break;
 
-        /*
-         * Decrement the blockSize loop counter
-         */
-        blkCnt--;
-    }
+  case 512U:
+    retValue = (float16_t)0.001953125f;
+    break;
 
+  case 256U:
+    retValue = (float16_t)0.00390625f;
+    break;
 
-    /*
-     * tail
-     * (will be merged thru tail predication)
-     */
-    blkCnt = bitRevLen & 7;
-    if (blkCnt > 0U) {
-        mve_pred16_t    p0 = vctp16q(blkCnt);
+  case 128U:
+    retValue = (float16_t)0.0078125f;
+    break;
 
-        bitRevTabOff = vldrhq_z_u16(pBitRevTab, p0);
+  case 64U:
+    retValue = (float16_t)0.015625f;
+    break;
 
-        uint32x4_t      bitRevOff1 = vmullbq_int_u16(bitRevTabOff, one);
-        uint32x4_t      bitRevOff2 = vmulltq_int_u16(bitRevTabOff, one);
+  case 32U:
+    retValue = (float16_t)0.03125f;
+    break;
 
-        bitRevOff1 = bitRevOff1 >> 3;
-        bitRevOff2 = bitRevOff2 >> 3;
+  case 16U:
+    retValue = (float16_t)0.0625f;
+    break;
 
-        uint32x4_t      in1 = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff1, p0);
-        uint32x4_t      in2 = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff2, p0);
 
-        vstrwq_scatter_shifted_offset_p_u32(src, bitRevOff1, in2, p0);
-        vstrwq_scatter_shifted_offset_p_u32(src, bitRevOff2, in1, p0);
-    }
+  default:
+    break;
+  }
+  return(retValue);
 }
 
 
@@ -159,39 +99,37 @@ static void _arm_radix4_butterfly_f16_mve(const arm_cfft_instance_f16 * S,float1
     uint32_t  n1, n2;
     uint32_t  stage = 0;
     int32_t  iter = 1;
-    static const uint32_t strides[4] =
-       {(0 - 16) * sizeof(float16_t *)
-       , (4 - 16) * sizeof(float16_t *)
-       , (8 - 16) * sizeof(float16_t *)
-       , (12 - 16) * sizeof(float16_t *)};
+    static const int32_t strides[4] =
+       { ( 0 - 16) * (int32_t)sizeof(float16_t *)
+       , ( 4 - 16) * (int32_t)sizeof(float16_t *)
+       , ( 8 - 16) * (int32_t)sizeof(float16_t *)
+       , (12 - 16) * (int32_t)sizeof(float16_t *)};
 
     n2 = fftLen;
     n1 = n2;
     n2 >>= 2u;
     for (int k = fftLen / 4u; k > 1; k >>= 2)
     {
+        float16_t const     *p_rearranged_twiddle_tab_stride1 =
+                            &S->rearranged_twiddle_stride1[
+                            S->rearranged_twiddle_tab_stride1_arr[stage]];
+        float16_t const     *p_rearranged_twiddle_tab_stride2 =
+                            &S->rearranged_twiddle_stride2[
+                            S->rearranged_twiddle_tab_stride2_arr[stage]];
+        float16_t const     *p_rearranged_twiddle_tab_stride3 =
+                            &S->rearranged_twiddle_stride3[
+                            S->rearranged_twiddle_tab_stride3_arr[stage]];
+        float16_t * pBase = pSrc;
         for (int i = 0; i < iter; i++)
         {
-            float16_t const     *p_rearranged_twiddle_tab_stride1 =
-                                &S->rearranged_twiddle_stride1[
-                                S->rearranged_twiddle_tab_stride1_arr[stage]];
-            float16_t const     *p_rearranged_twiddle_tab_stride2 =
-                                &S->rearranged_twiddle_stride2[
-                                S->rearranged_twiddle_tab_stride2_arr[stage]];
-            float16_t const     *p_rearranged_twiddle_tab_stride3 =
-                                &S->rearranged_twiddle_stride3[
-                                S->rearranged_twiddle_tab_stride3_arr[stage]];
-            float16_t const    *pW1, *pW2, *pW3;
-            float16_t           *inA = pSrc + CMPLX_DIM * i * n1;
-            float16_t           *inB = inA + n2 * CMPLX_DIM;
-            float16_t           *inC = inB + n2 * CMPLX_DIM;
-            float16_t           *inD = inC + n2 * CMPLX_DIM;
-            f16x8_t            vecW;
-
-
-            pW1 = p_rearranged_twiddle_tab_stride1;
-            pW2 = p_rearranged_twiddle_tab_stride2;
-            pW3 = p_rearranged_twiddle_tab_stride3;
+            float16_t    *inA = pBase;
+            float16_t    *inB = inA + n2 * CMPLX_DIM;
+            float16_t    *inC = inB + n2 * CMPLX_DIM;
+            float16_t    *inD = inC + n2 * CMPLX_DIM;
+            float16_t const *pW1 = p_rearranged_twiddle_tab_stride1;
+            float16_t const *pW2 = p_rearranged_twiddle_tab_stride2;
+            float16_t const *pW3 = p_rearranged_twiddle_tab_stride3;
+            f16x8_t       vecW;
 
             blkCnt = n2 / 4;
             /*
@@ -260,6 +198,7 @@ static void _arm_radix4_butterfly_f16_mve(const arm_cfft_instance_f16 * S,float1
 
                 blkCnt--;
             }
+            pBase +=  CMPLX_DIM * n1;
         }
         n1 = n2;
         n2 >>= 2u;
@@ -270,7 +209,7 @@ static void _arm_radix4_butterfly_f16_mve(const arm_cfft_instance_f16 * S,float1
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /* load scheduling */
@@ -362,16 +301,15 @@ static void _arm_radix4_butterfly_inverse_f16_mve(const arm_cfft_instance_f16 *
     f16x8_t vecTmp0, vecTmp1;
     f16x8_t vecSum0, vecDiff0, vecSum1, vecDiff1;
     f16x8_t vecA, vecB, vecC, vecD;
-    f16x8_t vecW;
     uint32_t  blkCnt;
     uint32_t  n1, n2;
     uint32_t  stage = 0;
     int32_t  iter = 1;
-    static const uint32_t strides[4] = {
-        (0 - 16) * sizeof(q31_t *),
-        (4 - 16) * sizeof(q31_t *),
-        (8 - 16) * sizeof(q31_t *),
-        (12 - 16) * sizeof(q31_t *)
+    static const int32_t strides[4] = {
+        ( 0 - 16) * (int32_t)sizeof(q31_t *),
+        ( 4 - 16) * (int32_t)sizeof(q31_t *),
+        ( 8 - 16) * (int32_t)sizeof(q31_t *),
+        (12 - 16) * (int32_t)sizeof(q31_t *)
     };
 
     n2 = fftLen;
@@ -379,26 +317,27 @@ static void _arm_radix4_butterfly_inverse_f16_mve(const arm_cfft_instance_f16 *
     n2 >>= 2u;
     for (int k = fftLen / 4; k > 1; k >>= 2)
     {
+        float16_t const *p_rearranged_twiddle_tab_stride1 =
+                &S->rearranged_twiddle_stride1[
+                S->rearranged_twiddle_tab_stride1_arr[stage]];
+        float16_t const *p_rearranged_twiddle_tab_stride2 =
+                &S->rearranged_twiddle_stride2[
+                S->rearranged_twiddle_tab_stride2_arr[stage]];
+        float16_t const *p_rearranged_twiddle_tab_stride3 =
+                &S->rearranged_twiddle_stride3[
+                S->rearranged_twiddle_tab_stride3_arr[stage]];
+
+        float16_t * pBase = pSrc;
         for (int i = 0; i < iter; i++)
         {
-            float16_t const *p_rearranged_twiddle_tab_stride1 =
-                    &S->rearranged_twiddle_stride1[
-                    S->rearranged_twiddle_tab_stride1_arr[stage]];
-            float16_t const *p_rearranged_twiddle_tab_stride2 =
-                    &S->rearranged_twiddle_stride2[
-                    S->rearranged_twiddle_tab_stride2_arr[stage]];
-            float16_t const *p_rearranged_twiddle_tab_stride3 =
-                    &S->rearranged_twiddle_stride3[
-                    S->rearranged_twiddle_tab_stride3_arr[stage]];
-            float16_t const *pW1, *pW2, *pW3;
-            float16_t *inA = pSrc + CMPLX_DIM * i * n1;
-            float16_t *inB = inA + n2 * CMPLX_DIM;
-            float16_t *inC = inB + n2 * CMPLX_DIM;
-            float16_t *inD = inC + n2 * CMPLX_DIM;
-
-            pW1 = p_rearranged_twiddle_tab_stride1;
-            pW2 = p_rearranged_twiddle_tab_stride2;
-            pW3 = p_rearranged_twiddle_tab_stride3;
+            float16_t    *inA = pBase;
+            float16_t    *inB = inA + n2 * CMPLX_DIM;
+            float16_t    *inC = inB + n2 * CMPLX_DIM;
+            float16_t    *inD = inC + n2 * CMPLX_DIM;
+            float16_t const *pW1 = p_rearranged_twiddle_tab_stride1;
+            float16_t const *pW2 = p_rearranged_twiddle_tab_stride2;
+            float16_t const *pW3 = p_rearranged_twiddle_tab_stride3;
+            f16x8_t       vecW;
 
             blkCnt = n2 / 4;
             /*
@@ -466,6 +405,7 @@ static void _arm_radix4_butterfly_inverse_f16_mve(const arm_cfft_instance_f16 *
 
                 blkCnt--;
             }
+            pBase +=  CMPLX_DIM * n1;
         }
         n1 = n2;
         n2 >>= 2u;
@@ -476,7 +416,7 @@ static void _arm_radix4_butterfly_inverse_f16_mve(const arm_cfft_instance_f16 *
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /*
@@ -592,53 +532,53 @@ void arm_cfft_f16(
         float16_t * pSrc,
         uint8_t ifftFlag,
         uint8_t bitReverseFlag)
-{                                                                                
-        uint32_t fftLen = S->fftLen;     
-
-        if (ifftFlag == 1U) {                                                            
-                                                                                         
-            switch (fftLen) {                                                            
-            case 16:                                                                     
-            case 64:                                                                     
-            case 256:                                                                    
-            case 1024:                                                                   
-            case 4096:                                                                   
-                _arm_radix4_butterfly_inverse_f16_mve(S, pSrc, fftLen, arm_inverse_fft_length_f16(S->fftLen)); 
-                break;                                                                   
-                                                                                         
-            case 32:                                                                     
-            case 128:                                                                    
-            case 512:                                                                    
-            case 2048:                                                                   
-                arm_cfft_radix4by2_inverse_f16_mve(S, pSrc, fftLen);              
-                break;                                                                   
-            }  
-        } else {                                                                         
-            switch (fftLen) {                                                            
-            case 16:                                                                     
-            case 64:                                                                     
-            case 256:                                                                    
-            case 1024:                                                                   
-            case 4096:                                                                   
-                _arm_radix4_butterfly_f16_mve(S, pSrc, fftLen);         
-                break;                                                                   
-                                                                                         
-            case 32:                                                                     
-            case 128:                                                                    
-            case 512:                                                                    
-            case 2048:                                                                   
-                arm_cfft_radix4by2_f16_mve(S, pSrc, fftLen);                      
-                break;                                                                   
-            }                                                                            
-        }                                                                                
-                                                                                         
-                                                                                         
-        if (bitReverseFlag) 
-        {                                                            
-            
-            arm_bitreversal_f16_inpl_mve((uint16_t*)pSrc, S->bitRevLength, S->pBitRevTable);
-                    
-        } 
+{
+        uint32_t fftLen = S->fftLen;
+
+        if (ifftFlag == 1U) {
+
+            switch (fftLen) {
+            case 16:
+            case 64:
+            case 256:
+            case 1024:
+            case 4096:
+                _arm_radix4_butterfly_inverse_f16_mve(S, pSrc, fftLen, arm_inverse_fft_length_f16(S->fftLen));
+                break;
+
+            case 32:
+            case 128:
+            case 512:
+            case 2048:
+                arm_cfft_radix4by2_inverse_f16_mve(S, pSrc, fftLen);
+                break;
+            }
+        } else {
+            switch (fftLen) {
+            case 16:
+            case 64:
+            case 256:
+            case 1024:
+            case 4096:
+                _arm_radix4_butterfly_f16_mve(S, pSrc, fftLen);
+                break;
+
+            case 32:
+            case 128:
+            case 512:
+            case 2048:
+                arm_cfft_radix4by2_f16_mve(S, pSrc, fftLen);
+                break;
+            }
+        }
+
+
+        if (bitReverseFlag)
+        {
+
+            arm_bitreversal_16_inpl_mve((uint16_t*)pSrc, S->bitRevLength, S->pBitRevTable);
+
+        }
 }
 
 #else
@@ -666,162 +606,6 @@ extern void arm_radix4_butterfly_f16(
   @ingroup groupTransforms
  */
 
-/**
-  @defgroup ComplexFFT Complex FFT Functions
- 
-  @par
-                   The Fast Fourier Transform (FFT) is an efficient algorithm for computing the
-                   Discrete Fourier Transform (DFT).  The FFT can be orders of magnitude faster
-                   than the DFT, especially for long lengths.
-                   The algorithms described in this section
-                   operate on complex data.  A separate set of functions is devoted to handling
-                   of real sequences.
-  @par
-                   There are separate algorithms for handling floating-point, Q15, and Q31 data
-                   types.  The algorithms available for each data type are described next.
-  @par
-                   The FFT functions operate in-place.  That is, the array holding the input data
-                   will also be used to hold the corresponding result.  The input data is complex
-                   and contains <code>2*fftLen</code> interleaved values as shown below.
-                   <pre>{real[0], imag[0], real[1], imag[1], ...} </pre>
-                   The FFT result will be contained in the same array and the frequency domain
-                   values will have the same interleaving.
- 
-  @par Floating-point
-                   The floating-point complex FFT uses a mixed-radix algorithm.  Multiple radix-8
-                   stages are performed along with a single radix-2 or radix-4 stage, as needed.
-                   The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
-                   a different twiddle factor table.
-  @par
-                   The function uses the standard FFT definition and output values may grow by a
-                   factor of <code>fftLen</code> when computing the forward transform.  The
-                   inverse transform includes a scale of <code>1/fftLen</code> as part of the
-                   calculation and this matches the textbook definition of the inverse FFT.
-  @par
-                   For the MVE version, the new arm_cfft_init_f32 initialization function is 
-                   <b>mandatory</b>. <b>Compilation flags are available to include only the required tables for the
-                   needed FFTs.</b> Other FFT versions can continue to be initialized as 
-                   explained below.
-  @par
-                   For not MVE versions, pre-initialized data structures containing twiddle factors 
-                   and bit reversal tables are provided and defined in <code>arm_const_structs.h</code>.  Include
-                   this header in your function and then pass one of the constant structures as
-                   an argument to arm_cfft_f32.  For example:
-  @par
-                   <code>arm_cfft_f32(arm_cfft_sR_f32_len64, pSrc, 1, 1)</code>
-  @par
-                   computes a 64-point inverse complex FFT including bit reversal.
-                   The data structures are treated as constant data and not modified during the
-                   calculation.  The same data structure can be reused for multiple transforms
-                   including mixing forward and inverse transforms.
-  @par
-                   Earlier releases of the library provided separate radix-2 and radix-4
-                   algorithms that operated on floating-point data.  These functions are still
-                   provided but are deprecated.  The older functions are slower and less general
-                   than the new functions.
-  @par
-                   An example of initialization of the constants for the arm_cfft_f32 function follows:
-  @code
-                   const static arm_cfft_instance_f32 *S;
-                   ...
-                     switch (length) {
-                       case 16:
-                         S = &arm_cfft_sR_f32_len16;
-                         break;
-                       case 32:
-                         S = &arm_cfft_sR_f32_len32;
-                         break;
-                       case 64:
-                         S = &arm_cfft_sR_f32_len64;
-                         break;
-                       case 128:
-                         S = &arm_cfft_sR_f32_len128;
-                         break;
-                       case 256:
-                         S = &arm_cfft_sR_f32_len256;
-                         break;
-                       case 512:
-                         S = &arm_cfft_sR_f32_len512;
-                         break;
-                       case 1024:
-                         S = &arm_cfft_sR_f32_len1024;
-                         break;
-                       case 2048:
-                         S = &arm_cfft_sR_f32_len2048;
-                         break;
-                       case 4096:
-                         S = &arm_cfft_sR_f32_len4096;
-                         break;
-                     }
-  @endcode
-  @par
-                   The new arm_cfft_init_f32 can also be used.
-  @par Q15 and Q31
-                   The floating-point complex FFT uses a mixed-radix algorithm.  Multiple radix-4
-                   stages are performed along with a single radix-2 stage, as needed.
-                   The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
-                   a different twiddle factor table.
-  @par
-                   The function uses the standard FFT definition and output values may grow by a
-                   factor of <code>fftLen</code> when computing the forward transform.  The
-                   inverse transform includes a scale of <code>1/fftLen</code> as part of the
-                   calculation and this matches the textbook definition of the inverse FFT.
-  @par
-                   Pre-initialized data structures containing twiddle factors and bit reversal
-                   tables are provided and defined in <code>arm_const_structs.h</code>.  Include
-                   this header in your function and then pass one of the constant structures as
-                   an argument to arm_cfft_q31. For example:
-  @par
-                   <code>arm_cfft_q31(arm_cfft_sR_q31_len64, pSrc, 1, 1)</code>
-  @par
-                   computes a 64-point inverse complex FFT including bit reversal.
-                   The data structures are treated as constant data and not modified during the
-                   calculation.  The same data structure can be reused for multiple transforms
-                   including mixing forward and inverse transforms.
-  @par
-                   Earlier releases of the library provided separate radix-2 and radix-4
-                   algorithms that operated on floating-point data.  These functions are still
-                   provided but are deprecated.  The older functions are slower and less general
-                   than the new functions.
-  @par
-                   An example of initialization of the constants for the arm_cfft_q31 function follows:
-  @code
-                   const static arm_cfft_instance_q31 *S;
-                   ...
-                     switch (length) {
-                       case 16:
-                         S = &arm_cfft_sR_q31_len16;
-                         break;
-                       case 32:
-                         S = &arm_cfft_sR_q31_len32;
-                         break;
-                       case 64:
-                         S = &arm_cfft_sR_q31_len64;
-                         break;
-                       case 128:
-                         S = &arm_cfft_sR_q31_len128;
-                         break;
-                       case 256:
-                         S = &arm_cfft_sR_q31_len256;
-                         break;
-                       case 512:
-                         S = &arm_cfft_sR_q31_len512;
-                         break;
-                       case 1024:
-                         S = &arm_cfft_sR_q31_len1024;
-                         break;
-                       case 2048:
-                         S = &arm_cfft_sR_q31_len2048;
-                         break;
-                       case 4096:
-                         S = &arm_cfft_sR_q31_len4096;
-                         break;
-                     }
-  @endcode
- 
- */
-
-
 /**
   @addtogroup ComplexFFT
   @{
@@ -855,7 +639,7 @@ void arm_cfft_f16(
         pSrc = p1 + 1;
         for(l=0; l<L; l++)
         {
-            *pSrc = -*pSrc;
+            *pSrc = -(_Float16)*pSrc;
             pSrc += 2;
         }
     }
@@ -877,7 +661,7 @@ void arm_cfft_f16(
         case 2048:
         arm_cfft_radix4by2_f16  ( p1, L, (float16_t*)S->pTwiddle);
         break;
-   
+
     }
 
     if ( bitReverseFlag )
@@ -885,13 +669,13 @@ void arm_cfft_f16(
 
     if (ifftFlag == 1U)
     {
-        invL = 1.0f/(float16_t)L;
+        invL = 1.0f16/(_Float16)L;
         /*  Conjugate and scale output data */
         pSrc = p1;
         for(l=0; l<L; l++)
         {
-            *pSrc++ *=   invL ;
-            *pSrc  = -(*pSrc) * invL;
+            *pSrc++ *=   (_Float16)invL ;
+            *pSrc  = -(_Float16)(*pSrc) * (_Float16)invL;
             pSrc++;
         }
     }
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f32.c
index 97a12f3..c7a5be6 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_f32.c
  * Description:  Combined Radix Decimation in Frequency CFFT Floating point processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -41,101 +41,70 @@
 static float32_t arm_inverse_fft_length_f32(uint16_t fftLen)
 {
   float32_t retValue=1.0;
-                                                      
-  switch (fftLen)                                     
-  {                                                   
-                                                      
-  case 4096U:                                         
-    retValue = 0.000244140625;                        
-    break;                                            
-                                                      
-  case 2048U:                                         
-    retValue = 0.00048828125;                         
-    break;                                            
-                                                      
-  case 1024U:                                         
-    retValue = 0.0009765625f;                         
-    break;                                            
-                                                      
-  case 512U:                                          
-    retValue = 0.001953125;                           
-    break;                                            
-                                                      
-  case 256U:                                          
-    retValue = 0.00390625f;                           
-    break;                                            
-                                                      
-  case 128U:                                          
-    retValue = 0.0078125;                             
-    break;                                            
-                                                      
-  case 64U:                                           
-    retValue = 0.015625f;                             
-    break;                                            
-                                                      
-  case 32U:                                           
-    retValue = 0.03125;                               
-    break;                                            
-                                                      
-  case 16U:                                           
-    retValue = 0.0625f;                               
-    break;                                            
-                                                      
-                                                      
-  default:                                            
-    break;                                            
-  }                                                   
-  return(retValue); 
-}
 
+  switch (fftLen)
+  {
 
-static void arm_bitreversal_f32_inpl_mve(
-        uint32_t *pSrc,
-  const uint16_t  bitRevLen,
-  const uint16_t *pBitRevTab)
+  case 4096U:
+    retValue = 0.000244140625;
+    break;
 
-{
-    uint64_t       *src = (uint64_t *) pSrc;
-    uint32_t        blkCnt;     /* loop counters */
-    uint32x4_t      bitRevTabOff;
-    uint32x4_t      one = vdupq_n_u32(1);
+  case 2048U:
+    retValue = 0.00048828125;
+    break;
 
-    blkCnt = (bitRevLen / 2) / 2;
-    while (blkCnt > 0U) {
-        bitRevTabOff = vldrhq_u32(pBitRevTab);
-        pBitRevTab += 4;
+  case 1024U:
+    retValue = 0.0009765625f;
+    break;
 
-        uint64x2_t      bitRevOff1 = vmullbq_int_u32(bitRevTabOff, one);
-        uint64x2_t      bitRevOff2 = vmulltq_int_u32(bitRevTabOff, one);
+  case 512U:
+    retValue = 0.001953125;
+    break;
 
-        uint64x2_t      in1 = vldrdq_gather_offset_u64(src, bitRevOff1);
-        uint64x2_t      in2 = vldrdq_gather_offset_u64(src, bitRevOff2);
+  case 256U:
+    retValue = 0.00390625f;
+    break;
 
-        vstrdq_scatter_offset_u64(src, bitRevOff1, in2);
-        vstrdq_scatter_offset_u64(src, bitRevOff2, in1);
+  case 128U:
+    retValue = 0.0078125;
+    break;
 
-        /*
-         * Decrement the blockSize loop counter
-         */
-        blkCnt--;
-    }
+  case 64U:
+    retValue = 0.015625f;
+    break;
+
+  case 32U:
+    retValue = 0.03125;
+    break;
+
+  case 16U:
+    retValue = 0.0625f;
+    break;
+
+
+  default:
+    break;
+  }
+  return(retValue);
 }
 
 
+
+
 static void _arm_radix4_butterfly_f32_mve(const arm_cfft_instance_f32 * S,float32_t * pSrc, uint32_t fftLen)
 {
-    f32x4_t vecTmp0, vecTmp1;
-    f32x4_t vecSum0, vecDiff0, vecSum1, vecDiff1;
-    f32x4_t vecA, vecB, vecC, vecD;
-    uint32_t  blkCnt;
-    uint32_t  n1, n2;
-    uint32_t  stage = 0;
-    int32_t  iter = 1;
-    static const uint32_t strides[4] = {
-        (0 - 16) * sizeof(q31_t *),
-        (1 - 16) * sizeof(q31_t *),
-        (8 - 16) * sizeof(q31_t *),
-        (9 - 16) * sizeof(q31_t *)
+    f32x4_t     vecTmp0, vecTmp1;
+    f32x4_t     vecSum0, vecDiff0, vecSum1, vecDiff1;
+    f32x4_t     vecA, vecB, vecC, vecD;
+    uint32_t    blkCnt;
+    uint32_t    n1, n2;
+    uint32_t    stage = 0;
+    int32_t     iter = 1;
+    static const int32_t strides[4] = {
+        (0 - 16) * (int32_t)sizeof(q31_t *),
+        (1 - 16) * (int32_t)sizeof(q31_t *),
+        (8 - 16) * (int32_t)sizeof(q31_t *),
+        (9 - 16) * (int32_t)sizeof(q31_t *)
     };
 
     n2 = fftLen;
@@ -143,29 +112,28 @@ static void _arm_radix4_butterfly_f32_mve(const arm_cfft_instance_f32 * S,float3
     n2 >>= 2u;
     for (int k = fftLen / 4u; k > 1; k >>= 2)
     {
+        float32_t const     *p_rearranged_twiddle_tab_stride1 =
+                            &S->rearranged_twiddle_stride1[
+                            S->rearranged_twiddle_tab_stride1_arr[stage]];
+        float32_t const     *p_rearranged_twiddle_tab_stride2 =
+                            &S->rearranged_twiddle_stride2[
+                            S->rearranged_twiddle_tab_stride2_arr[stage]];
+        float32_t const     *p_rearranged_twiddle_tab_stride3 =
+                            &S->rearranged_twiddle_stride3[
+                            S->rearranged_twiddle_tab_stride3_arr[stage]];
+
+        float32_t * pBase = pSrc;
         for (int i = 0; i < iter; i++)
         {
-            float32_t const     *p_rearranged_twiddle_tab_stride1 =
-                                &S->rearranged_twiddle_stride1[
-                                S->rearranged_twiddle_tab_stride1_arr[stage]];
-            float32_t const     *p_rearranged_twiddle_tab_stride2 =
-                                &S->rearranged_twiddle_stride2[
-                                S->rearranged_twiddle_tab_stride2_arr[stage]];
-            float32_t const     *p_rearranged_twiddle_tab_stride3 =
-                                &S->rearranged_twiddle_stride3[
-                                S->rearranged_twiddle_tab_stride3_arr[stage]];
-            float32_t const    *pW1, *pW2, *pW3;
-            float32_t           *inA = pSrc + CMPLX_DIM * i * n1;
-            float32_t           *inB = inA + n2 * CMPLX_DIM;
-            float32_t           *inC = inB + n2 * CMPLX_DIM;
-            float32_t           *inD = inC + n2 * CMPLX_DIM;
+            float32_t    *inA = pBase;
+            float32_t    *inB = inA + n2 * CMPLX_DIM;
+            float32_t    *inC = inB + n2 * CMPLX_DIM;
+            float32_t    *inD = inC + n2 * CMPLX_DIM;
+            float32_t const *pW1 = p_rearranged_twiddle_tab_stride1;
+            float32_t const *pW2 = p_rearranged_twiddle_tab_stride2;
+            float32_t const *pW3 = p_rearranged_twiddle_tab_stride3;
             f32x4_t            vecW;
 
-
-            pW1 = p_rearranged_twiddle_tab_stride1;
-            pW2 = p_rearranged_twiddle_tab_stride2;
-            pW3 = p_rearranged_twiddle_tab_stride3;
-
             blkCnt = n2 / 2;
             /*
              * load 2 f32 complex pair
@@ -233,6 +201,7 @@ static void _arm_radix4_butterfly_f32_mve(const arm_cfft_instance_f32 * S,float3
 
                 blkCnt--;
             }
+            pBase +=  CMPLX_DIM * n1;
         }
         n1 = n2;
         n2 >>= 2u;
@@ -243,7 +212,7 @@ static void _arm_radix4_butterfly_f32_mve(const arm_cfft_instance_f32 * S,float3
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /* load scheduling */
@@ -335,16 +304,15 @@ static void _arm_radix4_butterfly_inverse_f32_mve(const arm_cfft_instance_f32 *
     f32x4_t vecTmp0, vecTmp1;
     f32x4_t vecSum0, vecDiff0, vecSum1, vecDiff1;
     f32x4_t vecA, vecB, vecC, vecD;
-    f32x4_t vecW;
     uint32_t  blkCnt;
     uint32_t  n1, n2;
     uint32_t  stage = 0;
     int32_t  iter = 1;
-    static const uint32_t strides[4] = {
-        (0 - 16) * sizeof(q31_t *),
-        (1 - 16) * sizeof(q31_t *),
-        (8 - 16) * sizeof(q31_t *),
-        (9 - 16) * sizeof(q31_t *)
+    static const int32_t strides[4] = {
+        (0 - 16) * (int32_t)sizeof(q31_t *),
+        (1 - 16) * (int32_t)sizeof(q31_t *),
+        (8 - 16) * (int32_t)sizeof(q31_t *),
+        (9 - 16) * (int32_t)sizeof(q31_t *)
     };
 
     n2 = fftLen;
@@ -352,26 +320,27 @@ static void _arm_radix4_butterfly_inverse_f32_mve(const arm_cfft_instance_f32 *
     n2 >>= 2u;
     for (int k = fftLen / 4; k > 1; k >>= 2)
     {
+        float32_t const *p_rearranged_twiddle_tab_stride1 =
+                &S->rearranged_twiddle_stride1[
+                S->rearranged_twiddle_tab_stride1_arr[stage]];
+        float32_t const *p_rearranged_twiddle_tab_stride2 =
+                &S->rearranged_twiddle_stride2[
+                S->rearranged_twiddle_tab_stride2_arr[stage]];
+        float32_t const *p_rearranged_twiddle_tab_stride3 =
+                &S->rearranged_twiddle_stride3[
+                S->rearranged_twiddle_tab_stride3_arr[stage]];
+
+        float32_t * pBase = pSrc;
         for (int i = 0; i < iter; i++)
         {
-            float32_t const *p_rearranged_twiddle_tab_stride1 =
-                    &S->rearranged_twiddle_stride1[
-                    S->rearranged_twiddle_tab_stride1_arr[stage]];
-            float32_t const *p_rearranged_twiddle_tab_stride2 =
-                    &S->rearranged_twiddle_stride2[
-                    S->rearranged_twiddle_tab_stride2_arr[stage]];
-            float32_t const *p_rearranged_twiddle_tab_stride3 =
-                    &S->rearranged_twiddle_stride3[
-                    S->rearranged_twiddle_tab_stride3_arr[stage]];
-            float32_t const *pW1, *pW2, *pW3;
-            float32_t *inA = pSrc + CMPLX_DIM * i * n1;
-            float32_t *inB = inA + n2 * CMPLX_DIM;
-            float32_t *inC = inB + n2 * CMPLX_DIM;
-            float32_t *inD = inC + n2 * CMPLX_DIM;
-
-            pW1 = p_rearranged_twiddle_tab_stride1;
-            pW2 = p_rearranged_twiddle_tab_stride2;
-            pW3 = p_rearranged_twiddle_tab_stride3;
+            float32_t    *inA = pBase;
+            float32_t    *inB = inA + n2 * CMPLX_DIM;
+            float32_t    *inC = inB + n2 * CMPLX_DIM;
+            float32_t    *inD = inC + n2 * CMPLX_DIM;
+            float32_t const *pW1 = p_rearranged_twiddle_tab_stride1;
+            float32_t const *pW2 = p_rearranged_twiddle_tab_stride2;
+            float32_t const *pW3 = p_rearranged_twiddle_tab_stride3;
+            f32x4_t       vecW;
 
             blkCnt = n2 / 2;
             /*
@@ -439,6 +408,7 @@ static void _arm_radix4_butterfly_inverse_f32_mve(const arm_cfft_instance_f32 *
 
                 blkCnt--;
             }
+            pBase +=  CMPLX_DIM * n1;
         }
         n1 = n2;
         n2 >>= 2u;
@@ -449,7 +419,7 @@ static void _arm_radix4_butterfly_inverse_f32_mve(const arm_cfft_instance_f32 *
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32 ((uint32_t*)strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /*
@@ -565,53 +535,53 @@ void arm_cfft_f32(
         float32_t * pSrc,
         uint8_t ifftFlag,
         uint8_t bitReverseFlag)
-{                                                                                
-        uint32_t fftLen = S->fftLen;     
-
-        if (ifftFlag == 1U) {                                                            
-                                                                                         
-            switch (fftLen) {                                                            
-            case 16:                                                                     
-            case 64:                                                                     
-            case 256:                                                                    
-            case 1024:                                                                   
-            case 4096:                                                                   
-                _arm_radix4_butterfly_inverse_f32_mve(S, pSrc, fftLen, arm_inverse_fft_length_f32(S->fftLen)); 
-                break;                                                                   
-                                                                                         
-            case 32:                                                                     
-            case 128:                                                                    
-            case 512:                                                                    
-            case 2048:                                                                   
-                arm_cfft_radix4by2_inverse_f32_mve(S, pSrc, fftLen);              
-                break;                                                                   
-            }  
-        } else {                                                                         
-            switch (fftLen) {                                                            
-            case 16:                                                                     
-            case 64:                                                                     
-            case 256:                                                                    
-            case 1024:                                                                   
-            case 4096:                                                                   
-                _arm_radix4_butterfly_f32_mve(S, pSrc, fftLen);         
-                break;                                                                   
-                                                                                         
-            case 32:                                                                     
-            case 128:                                                                    
-            case 512:                                                                    
-            case 2048:                                                                   
-                arm_cfft_radix4by2_f32_mve(S, pSrc, fftLen);                      
-                break;                                                                   
-            }                                                                            
-        }                                                                                
-                                                                                         
-                                                                                         
-        if (bitReverseFlag) 
-        {                                                            
-            
-            arm_bitreversal_f32_inpl_mve((uint32_t*)pSrc, S->bitRevLength, S->pBitRevTable);
-                    
-        } 
+{
+        uint32_t fftLen = S->fftLen;
+
+        if (ifftFlag == 1U) {
+
+            switch (fftLen) {
+            case 16:
+            case 64:
+            case 256:
+            case 1024:
+            case 4096:
+                _arm_radix4_butterfly_inverse_f32_mve(S, pSrc, fftLen, arm_inverse_fft_length_f32(S->fftLen));
+                break;
+
+            case 32:
+            case 128:
+            case 512:
+            case 2048:
+                arm_cfft_radix4by2_inverse_f32_mve(S, pSrc, fftLen);
+                break;
+            }
+        } else {
+            switch (fftLen) {
+            case 16:
+            case 64:
+            case 256:
+            case 1024:
+            case 4096:
+                _arm_radix4_butterfly_f32_mve(S, pSrc, fftLen);
+                break;
+
+            case 32:
+            case 128:
+            case 512:
+            case 2048:
+                arm_cfft_radix4by2_f32_mve(S, pSrc, fftLen);
+                break;
+            }
+        }
+
+
+        if (bitReverseFlag)
+        {
+
+            arm_bitreversal_32_inpl_mve((uint32_t*)pSrc, S->bitRevLength, S->pBitRevTable);
+
+        }
 }
 
 
@@ -633,7 +603,7 @@ extern void arm_bitreversal_32(
 
 /**
   @defgroup ComplexFFT Complex FFT Functions
- 
+
   @par
                    The Fast Fourier Transform (FFT) is an efficient algorithm for computing the
                    Discrete Fourier Transform (DFT).  The FFT can be orders of magnitude faster
@@ -651,7 +621,7 @@ extern void arm_bitreversal_32(
                    <pre>{real[0], imag[0], real[1], imag[1], ...} </pre>
                    The FFT result will be contained in the same array and the frequency domain
                    values will have the same interleaving.
- 
+
   @par Floating-point
                    The floating-point complex FFT uses a mixed-radix algorithm.  Multiple radix-8
                    stages are performed along with a single radix-2 or radix-4 stage, as needed.
@@ -663,12 +633,12 @@ extern void arm_bitreversal_32(
                    inverse transform includes a scale of <code>1/fftLen</code> as part of the
                    calculation and this matches the textbook definition of the inverse FFT.
   @par
-                   For the MVE version, the new arm_cfft_init_f32 initialization function is 
+                   For the MVE version, the new arm_cfft_init_f32 initialization function is
                    <b>mandatory</b>. <b>Compilation flags are available to include only the required tables for the
-                   needed FFTs.</b> Other FFT versions can continue to be initialized as 
+                   needed FFTs.</b> Other FFT versions can continue to be initialized as
                    explained below.
   @par
-                   For not MVE versions, pre-initialized data structures containing twiddle factors 
+                   For not MVE versions, pre-initialized data structures containing twiddle factors
                    and bit reversal tables are provided and defined in <code>arm_const_structs.h</code>.  Include
                    this header in your function and then pass one of the constant structures as
                    an argument to arm_cfft_f32.  For example:
@@ -689,36 +659,37 @@ extern void arm_bitreversal_32(
   @code
                    const static arm_cfft_instance_f32 *S;
                    ...
-                     switch (length) {
-                       case 16:
-                         S = &arm_cfft_sR_f32_len16;
-                         break;
-                       case 32:
-                         S = &arm_cfft_sR_f32_len32;
-                         break;
-                       case 64:
-                         S = &arm_cfft_sR_f32_len64;
-                         break;
-                       case 128:
-                         S = &arm_cfft_sR_f32_len128;
-                         break;
-                       case 256:
-                         S = &arm_cfft_sR_f32_len256;
-                         break;
-                       case 512:
-                         S = &arm_cfft_sR_f32_len512;
-                         break;
-                       case 1024:
-                         S = &arm_cfft_sR_f32_len1024;
-                         break;
-                       case 2048:
-                         S = &arm_cfft_sR_f32_len2048;
-                         break;
-                       case 4096:
-                         S = &arm_cfft_sR_f32_len4096;
-                         break;
-                     }
+                   switch (length) {
+                     case 16:
+                       S = &arm_cfft_sR_f32_len16;
+                       break;
+                     case 32:
+                       S = &arm_cfft_sR_f32_len32;
+                       break;
+                     case 64:
+                       S = &arm_cfft_sR_f32_len64;
+                       break;
+                     case 128:
+                       S = &arm_cfft_sR_f32_len128;
+                       break;
+                     case 256:
+                       S = &arm_cfft_sR_f32_len256;
+                       break;
+                     case 512:
+                       S = &arm_cfft_sR_f32_len512;
+                       break;
+                     case 1024:
+                       S = &arm_cfft_sR_f32_len1024;
+                       break;
+                     case 2048:
+                       S = &arm_cfft_sR_f32_len2048;
+                       break;
+                     case 4096:
+                       S = &arm_cfft_sR_f32_len4096;
+                       break;
+                   }
   @endcode
+
   @par
                    The new arm_cfft_init_f32 can also be used.
   @par Q15 and Q31
@@ -783,7 +754,7 @@ extern void arm_bitreversal_32(
                          break;
                      }
   @endcode
- 
+
  */
 
 void arm_cfft_radix8by2_f32 (arm_cfft_instance_f32 * S, float32_t * p1)
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f64.c
index 3f5a91b..83b2cd3 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f64.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_f64.c
  * Description:  Combined Radix Decimation in Frequency CFFT Double Precision Floating point processing function
  *
- * $Date:        29. November 2019
- * $Revision:    V1.0.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -43,10 +43,6 @@ extern void arm_bitreversal_64(
   const uint16_t   bitRevLen,
   const uint16_t * pBitRevTable);
 
-/**
-* @} end of ComplexFFT group
-*/
-
 /* ----------------------------------------------------------------------
  * Internal helper function used by the FFTs
  * ---------------------------------------------------------------------- */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f16.c
index 7dfaf62..ac8260a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_init_f16.c
  * Description:  Initialization function for cfft f16 instance
  *
- * $Date:        07. January 2020
- * $Revision:    V1.7.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f32.c
index 98db754..b82f5ce 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_init_f32.c
  * Description:  Initialization function for cfft f32 instance
  *
- * $Date:        07. January 2020
- * $Revision:    V1.7.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f64.c
index 05e691a..cb2dae8 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f64.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_init_f64.c
  * Description:  Initialization function for cfft f64 instance
  *
- * $Date:        23. January 2020
- * $Revision:    V1.7.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -72,7 +72,7 @@ arm_status arm_cfft_init_f64(
 
         /*  Initializations of Instance structure depending on the FFT length */
         switch (S->fftLen) {
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096))
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096))
             /*  Initializations of structure parameters for 4096 point FFT */
         case 4096U:
             /*  Initialise the bit reversal table modifier */
@@ -80,7 +80,7 @@ arm_status arm_cfft_init_f64(
             break;
 #endif
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048))
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048))
             /*  Initializations of structure parameters for 2048 point FFT */
         case 2048U:
             /*  Initialise the bit reversal table modifier */
@@ -89,7 +89,7 @@ arm_status arm_cfft_init_f64(
             break;
 #endif
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024))
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024))
             /*  Initializations of structure parameters for 1024 point FFT */
         case 1024U:
             /*  Initialise the bit reversal table modifier */
@@ -98,7 +98,7 @@ arm_status arm_cfft_init_f64(
             break;
 #endif
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_512) && defined(ARM_TABLE_BITREVIDX_FLT_512))
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_512) && defined(ARM_TABLE_BITREVIDX_FLT_512))
             /*  Initializations of structure parameters for 512 point FFT */
         case 512U:
             /*  Initialise the bit reversal table modifier */
@@ -106,31 +106,31 @@ arm_status arm_cfft_init_f64(
             break;
 #endif
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_256) && defined(ARM_TABLE_BITREVIDX_FLT_256))
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_256) && defined(ARM_TABLE_BITREVIDX_FLT_256))
         case 256U:
             FFTINIT(f64,256);
             break;
 #endif
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_128) && defined(ARM_TABLE_BITREVIDX_FLT_128))
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_128) && defined(ARM_TABLE_BITREVIDX_FLT_128))
         case 128U:
             FFTINIT(f64,128);
             break;
 #endif 
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_64) && defined(ARM_TABLE_BITREVIDX_FLT_64))
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_64) && defined(ARM_TABLE_BITREVIDX_FLT_64))
         case 64U:
             FFTINIT(f64,64);
             break;
 #endif 
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_32) && defined(ARM_TABLE_BITREVIDX_FLT_32))
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_32) && defined(ARM_TABLE_BITREVIDX_FLT_32))
         case 32U:
             FFTINIT(f64,32);
             break;
 #endif 
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_f64_16) && defined(ARM_TABLE_BITREVIDX_FLT_16))
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_16) && defined(ARM_TABLE_BITREVIDX_FLT_16))
         case 16U:
             /*  Initializations of structure parameters for 16 point FFT */
             FFTINIT(f64,16);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_q15.c
index d08b97e..a0f6356 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_init_q15.c
  * Description:  Initialization function for cfft q15 instance
  *
- * $Date:        07. January 2020
- * $Revision:    V1.7.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_q31.c
index 8b9c970..0877d2c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_init_q31.c
  * Description:  Initialization function for cfft q31 instance
  *
- * $Date:        07. January 2020
- * $Revision:    V1.7.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q15.c
index 1bebc2b..83ca024 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_q15.c
  * Description:  Combined Radix Decimation in Q15 Frequency CFFT processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -35,65 +35,6 @@
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_fft.h"
 
 
-static void arm_bitreversal_16_inpl_mve(
-        uint16_t *pSrc,
-  const uint16_t bitRevLen,
-  const uint16_t *pBitRevTab)
-
-{
-    uint32_t       *src = (uint32_t *)pSrc;
-    uint32_t        blkCnt;     /* loop counters */
-    uint32x4_t      bitRevTabOff;
-    uint16x8_t      one = vdupq_n_u16(1);
-
-    blkCnt = (bitRevLen / 2) / 4;
-    while (blkCnt > 0U) {
-        bitRevTabOff = vldrhq_u16(pBitRevTab);
-        pBitRevTab += 8;
-
-        uint32x4_t      bitRevOff1 = vmullbq_int_u16(bitRevTabOff, one);
-        uint32x4_t      bitRevOff2 = vmulltq_int_u16(bitRevTabOff, one);
-
-        bitRevOff1 = bitRevOff1 >> 3;
-        bitRevOff2 = bitRevOff2 >> 3;
-
-        uint32x4_t      in1 = vldrwq_gather_shifted_offset_u32(src, bitRevOff1);
-        uint32x4_t      in2 = vldrwq_gather_shifted_offset_u32(src, bitRevOff2);
-
-        vstrwq_scatter_shifted_offset_u32(src, bitRevOff1, in2);
-        vstrwq_scatter_shifted_offset_u32(src, bitRevOff2, in1);
-
-        /*
-         * Decrement the blockSize loop counter
-         */
-        blkCnt--;
-    }
-
-
-    /*
-     * tail
-     * (will be merged thru tail predication)
-     */
-    blkCnt = bitRevLen & 7;
-    if (blkCnt > 0U) {
-        mve_pred16_t    p0 = vctp16q(blkCnt);
-
-        bitRevTabOff = vldrhq_z_u16(pBitRevTab, p0);
-
-        uint32x4_t      bitRevOff1 = vmullbq_int_u16(bitRevTabOff, one);
-        uint32x4_t      bitRevOff2 = vmulltq_int_u16(bitRevTabOff, one);
-
-        bitRevOff1 = bitRevOff1 >> 3;
-        bitRevOff2 = bitRevOff2 >> 3;
-
-        uint32x4_t      in1 = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff1, p0);
-        uint32x4_t      in2 = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff2, p0);
-
-        vstrwq_scatter_shifted_offset_p_u32(src, bitRevOff1, in2, p0);
-        vstrwq_scatter_shifted_offset_p_u32(src, bitRevOff2, in1, p0);
-    }
-}
-
 static void _arm_radix4_butterfly_q15_mve(
     const arm_cfft_instance_q15 * S,
     q15_t   *pSrc,
@@ -102,14 +43,13 @@ static void _arm_radix4_butterfly_q15_mve(
     q15x8_t vecTmp0, vecTmp1;
     q15x8_t vecSum0, vecDiff0, vecSum1, vecDiff1;
     q15x8_t vecA, vecB, vecC, vecD;
-    q15x8_t vecW;
     uint32_t  blkCnt;
     uint32_t  n1, n2;
     uint32_t  stage = 0;
     int32_t  iter = 1;
-    static const uint32_t strides[4] = {
-        (0 - 16) * sizeof(q15_t *), (4 - 16) * sizeof(q15_t *),
-        (8 - 16) * sizeof(q15_t *), (12 - 16) * sizeof(q15_t *)
+    static const int32_t strides[4] = {
+        (0 - 16) * (int32_t)sizeof(q15_t *), (4 - 16) * (int32_t)sizeof(q15_t *),
+        (8 - 16) * (int32_t)sizeof(q15_t *), (12 - 16) * (int32_t)sizeof(q15_t *)
     };
 
     /*
@@ -122,25 +62,26 @@ static void _arm_radix4_butterfly_q15_mve(
 
     for (int k = fftLen / 4u; k > 1; k >>= 2u)
     {
+        q15_t const *p_rearranged_twiddle_tab_stride2 =
+            &S->rearranged_twiddle_stride2[
+            S->rearranged_twiddle_tab_stride2_arr[stage]];
+        q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
+            S->rearranged_twiddle_tab_stride3_arr[stage]];
+        q15_t const *p_rearranged_twiddle_tab_stride1 =
+            &S->rearranged_twiddle_stride1[
+            S->rearranged_twiddle_tab_stride1_arr[stage]];
+
+        q15_t * pBase = pSrc;
         for (int i = 0; i < iter; i++)
         {
-            q15_t const *p_rearranged_twiddle_tab_stride2 =
-                &S->rearranged_twiddle_stride2[
-                S->rearranged_twiddle_tab_stride2_arr[stage]];
-            q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
-                S->rearranged_twiddle_tab_stride3_arr[stage]];
-            q15_t const *p_rearranged_twiddle_tab_stride1 =
-                &S->rearranged_twiddle_stride1[
-                S->rearranged_twiddle_tab_stride1_arr[stage]];
-            q15_t const *pW1, *pW2, *pW3;
-            q15_t    *inA = pSrc + CMPLX_DIM * i * n1;
+            q15_t    *inA = pBase;
             q15_t    *inB = inA + n2 * CMPLX_DIM;
             q15_t    *inC = inB + n2 * CMPLX_DIM;
             q15_t    *inD = inC + n2 * CMPLX_DIM;
-
-            pW1 = p_rearranged_twiddle_tab_stride1;
-            pW2 = p_rearranged_twiddle_tab_stride2;
-            pW3 = p_rearranged_twiddle_tab_stride3;
+            q15_t const *pW1 = p_rearranged_twiddle_tab_stride1;
+            q15_t const *pW2 = p_rearranged_twiddle_tab_stride2;
+            q15_t const *pW3 = p_rearranged_twiddle_tab_stride3;
+            q15x8_t    vecW;
 
             blkCnt = n2 / 4;
             /*
@@ -173,7 +114,7 @@ static void _arm_radix4_butterfly_q15_mve(
                  */
                 vecW = vld1q(pW2);
                 pW2 += 8;
-                vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
+                vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t);
 
                 vst1q(inB, vecTmp1);
                 inB += 8;
@@ -186,7 +127,7 @@ static void _arm_radix4_butterfly_q15_mve(
                  */
                 vecW = vld1q(pW1);
                 pW1 += 8;
-                vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
+                vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t);
                 vst1q(inC, vecTmp1);
                 inC += 8;
 
@@ -199,7 +140,7 @@ static void _arm_radix4_butterfly_q15_mve(
                  */
                 vecW = vld1q(pW3);
                 pW3 += 8;
-                vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
+                vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t);
                 vst1q(inD, vecTmp1);
                 inD += 8;
 
@@ -208,6 +149,7 @@ static void _arm_radix4_butterfly_q15_mve(
 
                 blkCnt--;
             }
+            pBase +=  CMPLX_DIM * n1;
         }
         n1 = n2;
         n2 >>= 2u;
@@ -218,7 +160,7 @@ static void _arm_radix4_butterfly_q15_mve(
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32 ((uint32_t*)strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /*
@@ -245,16 +187,16 @@ static void _arm_radix4_butterfly_q15_mve(
         vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
 
         vecTmp0 = vhaddq(vecSum0, vecSum1);
-        vstrwq_scatter_base_s32(vecScGathAddr, -64, (q15x8_t) vecTmp0);
+        vstrwq_scatter_base_s32(vecScGathAddr, -64, (int32x4_t) vecTmp0);
 
         vecTmp0 = vhsubq(vecSum0, vecSum1);
-        vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (q15x8_t) vecTmp0);
+        vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (int32x4_t) vecTmp0);
 
         vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
-        vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (q15x8_t) vecTmp0);
+        vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (int32x4_t) vecTmp0);
 
         vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
-        vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (q15x8_t) vecTmp0);
+        vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (int32x4_t) vecTmp0);
 
         blkCnt--;
     }
@@ -295,7 +237,7 @@ static void arm_cfft_radix4by2_q15_mve(const arm_cfft_instance_q15 *S, q15_t *pS
         pCoefVec += 8;
 
         vecDiff = vhsubq(vecIn0, vecIn1);
-        vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw);
+        vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw, q15x8_t);
         vst1q(pIn1, vecCmplxTmp);
         pIn1 += 8;
 
@@ -337,14 +279,13 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S
     q15x8_t vecTmp0, vecTmp1;
     q15x8_t vecSum0, vecDiff0, vecSum1, vecDiff1;
     q15x8_t vecA, vecB, vecC, vecD;
-    q15x8_t vecW;
     uint32_t  blkCnt;
     uint32_t  n1, n2;
     uint32_t  stage = 0;
     int32_t  iter = 1;
-    static const uint32_t strides[4] = {
-        (0 - 16) * sizeof(q15_t *), (4 - 16) * sizeof(q15_t *),
-        (8 - 16) * sizeof(q15_t *), (12 - 16) * sizeof(q15_t *)
+    static const int32_t strides[4] = {
+        (0 - 16) * (int32_t)sizeof(q15_t *), (4 - 16) * (int32_t)sizeof(q15_t *),
+        (8 - 16) * (int32_t)sizeof(q15_t *), (12 - 16) * (int32_t)sizeof(q15_t *)
     };
 
 
@@ -358,25 +299,27 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S
 
     for (int k = fftLen / 4u; k > 1; k >>= 2u)
     {
+        q15_t const *p_rearranged_twiddle_tab_stride2 =
+            &S->rearranged_twiddle_stride2[
+            S->rearranged_twiddle_tab_stride2_arr[stage]];
+        q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
+            S->rearranged_twiddle_tab_stride3_arr[stage]];
+        q15_t const *p_rearranged_twiddle_tab_stride1 =
+            &S->rearranged_twiddle_stride1[
+            S->rearranged_twiddle_tab_stride1_arr[stage]];
+
+        q15_t * pBase = pSrc;
         for (int i = 0; i < iter; i++)
         {
-            q15_t const *p_rearranged_twiddle_tab_stride2 =
-                &S->rearranged_twiddle_stride2[
-                S->rearranged_twiddle_tab_stride2_arr[stage]];
-            q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
-                S->rearranged_twiddle_tab_stride3_arr[stage]];
-            q15_t const *p_rearranged_twiddle_tab_stride1 =
-                &S->rearranged_twiddle_stride1[
-                S->rearranged_twiddle_tab_stride1_arr[stage]];
-            q15_t const *pW1, *pW2, *pW3;
-            q15_t    *inA = pSrc + CMPLX_DIM * i * n1;
+            q15_t    *inA = pBase;
             q15_t    *inB = inA + n2 * CMPLX_DIM;
             q15_t    *inC = inB + n2 * CMPLX_DIM;
             q15_t    *inD = inC + n2 * CMPLX_DIM;
+            q15_t const *pW1 = p_rearranged_twiddle_tab_stride1;
+            q15_t const *pW2 = p_rearranged_twiddle_tab_stride2;
+            q15_t const *pW3 = p_rearranged_twiddle_tab_stride3;
+            q15x8_t    vecW;
 
-            pW1 = p_rearranged_twiddle_tab_stride1;
-            pW2 = p_rearranged_twiddle_tab_stride2;
-            pW3 = p_rearranged_twiddle_tab_stride3;
 
             blkCnt = n2 / 4;
             /*
@@ -409,7 +352,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S
                  */
                 vecW = vld1q(pW2);
                 pW2 += 8;
-                vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
+                vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t);
 
                 vst1q(inB, vecTmp1);
                 inB += 8;
@@ -422,7 +365,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S
                  */
                 vecW = vld1q(pW1);
                 pW1 += 8;
-                vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
+                vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t);
                 vst1q(inC, vecTmp1);
                 inC += 8;
                 /*
@@ -434,7 +377,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S
                  */
                 vecW = vld1q(pW3);
                 pW3 += 8;
-                vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
+                vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t);
                 vst1q(inD, vecTmp1);
                 inD += 8;
 
@@ -443,6 +386,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S
 
                 blkCnt--;
             }
+            pBase +=  CMPLX_DIM * n1;
         }
         n1 = n2;
         n2 >>= 2u;
@@ -453,7 +397,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /*
@@ -480,16 +424,16 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S
         vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
 
         vecTmp0 = vhaddq(vecSum0, vecSum1);
-        vstrwq_scatter_base_s32(vecScGathAddr, -64, (q15x8_t) vecTmp0);
+        vstrwq_scatter_base_s32(vecScGathAddr, -64, (int32x4_t) vecTmp0);
 
         vecTmp0 = vhsubq(vecSum0, vecSum1);
-        vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (q15x8_t) vecTmp0);
+        vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (int32x4_t) vecTmp0);
 
         vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
-        vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (q15x8_t) vecTmp0);
+        vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (int32x4_t) vecTmp0);
 
         vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
-        vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (q15x8_t) vecTmp0);
+        vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (int32x4_t) vecTmp0);
 
         blkCnt--;
     }
@@ -594,53 +538,53 @@ void arm_cfft_q15(
         q15_t * pSrc,
         uint8_t ifftFlag,
         uint8_t bitReverseFlag)
-{                                                                             
-        uint32_t fftLen = S->fftLen;     
-
-        if (ifftFlag == 1U) {                                                            
-                                                                                         
-            switch (fftLen) {                                                            
-            case 16:                                                                     
-            case 64:                                                                     
-            case 256:                                                                    
-            case 1024:                                                                   
-            case 4096:                                                                   
-                _arm_radix4_butterfly_inverse_q15_mve(S, pSrc, fftLen); 
-                break;                                                                   
-                                                                                         
-            case 32:                                                                     
-            case 128:                                                                    
-            case 512:                                                                    
-            case 2048:                                                                   
-                arm_cfft_radix4by2_inverse_q15_mve(S, pSrc, fftLen);              
-                break;                                                                   
-            }  
-        } else {                                                                         
-            switch (fftLen) {                                                            
-            case 16:                                                                     
-            case 64:                                                                     
-            case 256:                                                                    
-            case 1024:                                                                   
-            case 4096:    
-                _arm_radix4_butterfly_q15_mve(S, pSrc, fftLen);         
-                break;                                                                   
-                                                                                         
-            case 32:                                                                     
-            case 128:                                                                    
-            case 512:                                                                    
-            case 2048:                                                                   
-                arm_cfft_radix4by2_q15_mve(S, pSrc, fftLen);                      
-                break;                                                                   
-            }                                                                            
-        }                                                                                
-                                                                                         
-                                                                                         
-        if (bitReverseFlag) 
-        {                                                            
-            
+{
+        uint32_t fftLen = S->fftLen;
+
+        if (ifftFlag == 1U) {
+
+            switch (fftLen) {
+            case 16:
+            case 64:
+            case 256:
+            case 1024:
+            case 4096:
+                _arm_radix4_butterfly_inverse_q15_mve(S, pSrc, fftLen);
+                break;
+
+            case 32:
+            case 128:
+            case 512:
+            case 2048:
+                arm_cfft_radix4by2_inverse_q15_mve(S, pSrc, fftLen);
+                break;
+            }
+        } else {
+            switch (fftLen) {
+            case 16:
+            case 64:
+            case 256:
+            case 1024:
+            case 4096:
+                _arm_radix4_butterfly_q15_mve(S, pSrc, fftLen);
+                break;
+
+            case 32:
+            case 128:
+            case 512:
+            case 2048:
+                arm_cfft_radix4by2_q15_mve(S, pSrc, fftLen);
+                break;
+            }
+        }
+
+
+        if (bitReverseFlag)
+        {
+
             arm_bitreversal_16_inpl_mve((uint16_t*)pSrc, S->bitRevLength, S->pBitRevTable);
-       
-        } 
+
+        }
 }
 
 #else
@@ -776,7 +720,7 @@ void arm_cfft_radix4by2_q15(
 
   for (i = n2; i > 0; i--)
   {
-      coeff = read_q15x2_ia ((q15_t **) &pC);
+      coeff = read_q15x2_ia (&pC);
 
       T = read_q15x2 (pSi);
       T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
@@ -875,7 +819,7 @@ void arm_cfft_radix4by2_inverse_q15(
 
   for (i = n2; i > 0; i--)
   {
-     coeff = read_q15x2_ia ((q15_t **) &pC);
+     coeff = read_q15x2_ia (&pC);
 
      T = read_q15x2 (pSi);
      T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q31.c
index d0fb253..373e8a7 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_q31.c
  * Description:  Combined Radix Decimation in Frequency CFFT fixed point processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -36,37 +36,6 @@
 
 #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_vec_fft.h"
 
-static void arm_bitreversal_32_inpl_mve(
-        uint32_t *pSrc,
-  const uint16_t  bitRevLen,
-  const uint16_t *pBitRevTab)
-
-{
-    uint64_t       *src = (uint64_t *) pSrc;
-    uint32_t        blkCnt;     /* loop counters */
-    uint32x4_t      bitRevTabOff;
-    uint32x4_t      one = vdupq_n_u32(1);
-
-    blkCnt = (bitRevLen / 2) / 2;
-    while (blkCnt > 0U) {
-        bitRevTabOff = vldrhq_u32(pBitRevTab);
-        pBitRevTab += 4;
-
-        uint64x2_t      bitRevOff1 = vmullbq_int_u32(bitRevTabOff, one);
-        uint64x2_t      bitRevOff2 = vmulltq_int_u32(bitRevTabOff, one);
-
-        uint64x2_t      in1 = vldrdq_gather_offset_u64(src, bitRevOff1);
-        uint64x2_t      in2 = vldrdq_gather_offset_u64(src, bitRevOff2);
-
-        vstrdq_scatter_offset_u64(src, bitRevOff1, in2);
-        vstrdq_scatter_offset_u64(src, bitRevOff2, in1);
-
-        /*
-         * Decrement the blockSize loop counter
-         */
-        blkCnt--;
-    }
-}
 
 static void _arm_radix4_butterfly_q31_mve(
     const arm_cfft_instance_q31 * S,
@@ -76,14 +45,13 @@ static void _arm_radix4_butterfly_q31_mve(
     q31x4_t vecTmp0, vecTmp1;
     q31x4_t vecSum0, vecDiff0, vecSum1, vecDiff1;
     q31x4_t vecA, vecB, vecC, vecD;
-    q31x4_t vecW;
     uint32_t  blkCnt;
     uint32_t  n1, n2;
     uint32_t  stage = 0;
     int32_t  iter = 1;
-    static const uint32_t strides[4] = {
-        (0 - 16) * sizeof(q31_t *), (1 - 16) * sizeof(q31_t *),
-        (8 - 16) * sizeof(q31_t *), (9 - 16) * sizeof(q31_t *)
+    static const int32_t strides[4] = {
+        (0 - 16) * (int32_t)sizeof(q31_t *), (1 - 16) * (int32_t)sizeof(q31_t *),
+        (8 - 16) * (int32_t)sizeof(q31_t *), (9 - 16) * (int32_t)sizeof(q31_t *)
     };
 
 
@@ -97,25 +65,27 @@ static void _arm_radix4_butterfly_q31_mve(
 
     for (int k = fftLen / 4u; k > 1; k >>= 2u)
     {
+        q31_t const *p_rearranged_twiddle_tab_stride2 =
+            &S->rearranged_twiddle_stride2[
+            S->rearranged_twiddle_tab_stride2_arr[stage]];
+        q31_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
+            S->rearranged_twiddle_tab_stride3_arr[stage]];
+        q31_t const *p_rearranged_twiddle_tab_stride1 =
+            &S->rearranged_twiddle_stride1[
+            S->rearranged_twiddle_tab_stride1_arr[stage]];
+
+        q31_t * pBase = pSrc;
         for (int i = 0; i < iter; i++)
         {
-            q31_t const *p_rearranged_twiddle_tab_stride2 =
-                &S->rearranged_twiddle_stride2[
-                S->rearranged_twiddle_tab_stride2_arr[stage]];
-            q31_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
-                S->rearranged_twiddle_tab_stride3_arr[stage]];
-            q31_t const *p_rearranged_twiddle_tab_stride1 =
-                &S->rearranged_twiddle_stride1[
-                S->rearranged_twiddle_tab_stride1_arr[stage]];
-            q31_t const *pW1, *pW2, *pW3;
-            q31_t    *inA = pSrc + CMPLX_DIM * i * n1;
+            q31_t    *inA = pBase;
             q31_t    *inB = inA + n2 * CMPLX_DIM;
             q31_t    *inC = inB + n2 * CMPLX_DIM;
             q31_t    *inD = inC + n2 * CMPLX_DIM;
+            q31_t const *pW1 = p_rearranged_twiddle_tab_stride1;
+            q31_t const *pW2 = p_rearranged_twiddle_tab_stride2;
+            q31_t const *pW3 = p_rearranged_twiddle_tab_stride3;
+            q31x4_t    vecW;
 
-            pW1 = p_rearranged_twiddle_tab_stride1;
-            pW2 = p_rearranged_twiddle_tab_stride2;
-            pW3 = p_rearranged_twiddle_tab_stride3;
 
             blkCnt = n2 / 2;
             /*
@@ -148,7 +118,7 @@ static void _arm_radix4_butterfly_q31_mve(
                  */
                 vecW = vld1q(pW2);
                 pW2 += 4;
-                vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
+                vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t);
 
                 vst1q(inB, vecTmp1);
                 inB += 4;
@@ -161,7 +131,7 @@ static void _arm_radix4_butterfly_q31_mve(
                  */
                 vecW = vld1q(pW1);
                 pW1 += 4;
-                vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
+                vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t);
                 vst1q(inC, vecTmp1);
                 inC += 4;
                 /*
@@ -173,7 +143,7 @@ static void _arm_radix4_butterfly_q31_mve(
                  */
                 vecW = vld1q(pW3);
                 pW3 += 4;
-                vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
+                vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t);
                 vst1q(inD, vecTmp1);
                 inD += 4;
 
@@ -182,6 +152,7 @@ static void _arm_radix4_butterfly_q31_mve(
 
                 blkCnt--;
             }
+            pBase +=  CMPLX_DIM * n1;
         }
         n1 = n2;
         n2 >>= 2u;
@@ -200,7 +171,7 @@ static void _arm_radix4_butterfly_q31_mve(
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /*
@@ -281,7 +252,7 @@ static void arm_cfft_radix4by2_q31_mve(const arm_cfft_instance_q31 *S, q31_t *pS
         pCoef += 4;
         vecDiff = vhsubq(vecIn0, vecIn1);
 
-        vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw);
+        vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw, q31x4_t);
         vst1q(pIn1, vecCmplxTmp);
         pIn1 += 4;
 
@@ -326,14 +297,13 @@ static void _arm_radix4_butterfly_inverse_q31_mve(
     q31x4_t vecTmp0, vecTmp1;
     q31x4_t vecSum0, vecDiff0, vecSum1, vecDiff1;
     q31x4_t vecA, vecB, vecC, vecD;
-    q31x4_t vecW;
     uint32_t  blkCnt;
     uint32_t  n1, n2;
     uint32_t  stage = 0;
     int32_t  iter = 1;
-    static const uint32_t strides[4] = {
-        (0 - 16) * sizeof(q31_t *), (1 - 16) * sizeof(q31_t *),
-        (8 - 16) * sizeof(q31_t *), (9 - 16) * sizeof(q31_t *)
+    static const int32_t strides[4] = {
+        (0 - 16) * (int32_t)sizeof(q31_t *), (1 - 16) * (int32_t)sizeof(q31_t *),
+        (8 - 16) * (int32_t)sizeof(q31_t *), (9 - 16) * (int32_t)sizeof(q31_t *)
     };
 
     /*
@@ -346,26 +316,26 @@ static void _arm_radix4_butterfly_inverse_q31_mve(
 
     for (int k = fftLen / 4u; k > 1; k >>= 2u)
     {
+        q31_t const *p_rearranged_twiddle_tab_stride2 =
+            &S->rearranged_twiddle_stride2[
+            S->rearranged_twiddle_tab_stride2_arr[stage]];
+        q31_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
+            S->rearranged_twiddle_tab_stride3_arr[stage]];
+        q31_t const *p_rearranged_twiddle_tab_stride1 =
+            &S->rearranged_twiddle_stride1[
+            S->rearranged_twiddle_tab_stride1_arr[stage]];
+
+        q31_t * pBase = pSrc;
         for (int i = 0; i < iter; i++)
         {
-            q31_t const *p_rearranged_twiddle_tab_stride2 =
-                &S->rearranged_twiddle_stride2[
-                S->rearranged_twiddle_tab_stride2_arr[stage]];
-            q31_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
-                S->rearranged_twiddle_tab_stride3_arr[stage]];
-            q31_t const *p_rearranged_twiddle_tab_stride1 =
-                &S->rearranged_twiddle_stride1[
-                S->rearranged_twiddle_tab_stride1_arr[stage]];
-
-            q31_t const *pW1, *pW2, *pW3;
-            q31_t    *inA = pSrc + CMPLX_DIM * i * n1;
+            q31_t    *inA = pBase;
             q31_t    *inB = inA + n2 * CMPLX_DIM;
             q31_t    *inC = inB + n2 * CMPLX_DIM;
             q31_t    *inD = inC + n2 * CMPLX_DIM;
-
-            pW1 = p_rearranged_twiddle_tab_stride1;
-            pW2 = p_rearranged_twiddle_tab_stride2;
-            pW3 = p_rearranged_twiddle_tab_stride3;
+            q31_t const *pW1 = p_rearranged_twiddle_tab_stride1;
+            q31_t const *pW2 = p_rearranged_twiddle_tab_stride2;
+            q31_t const *pW3 = p_rearranged_twiddle_tab_stride3;
+            q31x4_t    vecW;
 
             blkCnt = n2 / 2;
             /*
@@ -398,7 +368,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve(
                  */
                 vecW = vld1q(pW2);
                 pW2 += 4;
-                vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
+                vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t);
 
                 vst1q(inB, vecTmp1);
                 inB += 4;
@@ -411,7 +381,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve(
                  */
                 vecW = vld1q(pW1);
                 pW1 += 4;
-                vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
+                vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t);
                 vst1q(inC, vecTmp1);
                 inC += 4;
                 /*
@@ -423,7 +393,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve(
                  */
                 vecW = vld1q(pW3);
                 pW3 += 4;
-                vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
+                vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t);
                 vst1q(inD, vecTmp1);
                 inD += 4;
 
@@ -432,6 +402,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve(
 
                 blkCnt--;
             }
+            pBase +=  CMPLX_DIM * n1;
         }
         n1 = n2;
         n2 >>= 2u;
@@ -450,7 +421,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve(
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /*
@@ -536,7 +507,7 @@ static void arm_cfft_radix4by2_inverse_q31_mve(const arm_cfft_instance_q31 *S, q
         pCoef += 4;
         vecDiff = vhsubq(vecIn0, vecIn1);
 
-        vecCmplxTmp = MVE_CMPLX_MULT_FX_AxB(vecDiff, vecTw);
+        vecCmplxTmp = MVE_CMPLX_MULT_FX_AxB(vecDiff, vecTw, q31x4_t);
         vst1q(pIn1, vecCmplxTmp);
         pIn1 += 4;
 
@@ -600,55 +571,55 @@ void arm_cfft_q31(
         q31_t * pSrc,
         uint8_t ifftFlag,
         uint8_t bitReverseFlag)
-{                                                                             
-        uint32_t fftLen = S->fftLen;     
-
-        if (ifftFlag == 1U) {                                                            
-                                                                                         
-            switch (fftLen) {                                                            
-            case 16:                                                                     
-            case 64:                                                                     
-            case 256:                                                                    
-            case 1024:                                                                   
-            case 4096:                                                                   
-                _arm_radix4_butterfly_inverse_q31_mve(S, pSrc, fftLen); 
-                break;                                                                   
-                                                                                         
-            case 32:                                                                     
-            case 128:                                                                    
-            case 512:                                                                    
-            case 2048:                                                                   
-                arm_cfft_radix4by2_inverse_q31_mve(S, pSrc, fftLen);              
-                break;                                                                   
-            }  
-        } else {                                                                         
-            switch (fftLen) {                                                            
-            case 16:                                                                     
-            case 64:                                                                     
-            case 256:                                                                    
-            case 1024:                                                                   
-            case 4096:    
-                _arm_radix4_butterfly_q31_mve(S, pSrc, fftLen);         
-                break;                                                                   
-                                                                                         
-            case 32:                                                                     
-            case 128:                                                                    
-            case 512:                                                                    
-            case 2048:                                                                   
-                arm_cfft_radix4by2_q31_mve(S, pSrc, fftLen);                      
-                break;                                                                   
-            }                                                                            
-        }                                                                                
-                                                                                         
-                                                                                         
-        if (bitReverseFlag) 
-        {                                                            
-            
+{
+        uint32_t fftLen = S->fftLen;
+
+        if (ifftFlag == 1U) {
+
+            switch (fftLen) {
+            case 16:
+            case 64:
+            case 256:
+            case 1024:
+            case 4096:
+                _arm_radix4_butterfly_inverse_q31_mve(S, pSrc, fftLen);
+                break;
+
+            case 32:
+            case 128:
+            case 512:
+            case 2048:
+                arm_cfft_radix4by2_inverse_q31_mve(S, pSrc, fftLen);
+                break;
+            }
+        } else {
+            switch (fftLen) {
+            case 16:
+            case 64:
+            case 256:
+            case 1024:
+            case 4096:
+                _arm_radix4_butterfly_q31_mve(S, pSrc, fftLen);
+                break;
+
+            case 32:
+            case 128:
+            case 512:
+            case 2048:
+                arm_cfft_radix4by2_q31_mve(S, pSrc, fftLen);
+                break;
+            }
+        }
+
+
+        if (bitReverseFlag)
+        {
+
             arm_bitreversal_32_inpl_mve((uint32_t*)pSrc, S->bitRevLength, S->pBitRevTable);
-       
-        } 
+
+        }
 }
-#else 
+#else
 
 extern void arm_radix4_butterfly_q31(
         q31_t * pSrc,
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f16.c
index d45ec07..c95a01f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_radix2_f16.c
  * Description:  Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -148,22 +148,22 @@ uint16_t twidCoefModifier)
       l = i + n2;
 
       /*  Butterfly implementation */
-      a0 = pSrc[2 * i] + pSrc[2 * l];
-      xt = pSrc[2 * i] - pSrc[2 * l];
+      a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+      xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
 
-      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-      a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+      yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
+      a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
 
-      p0 = xt * cosVal;
-      p1 = yt * sinVal;
-      p2 = yt * cosVal;
-      p3 = xt * sinVal;
+      p0 = (_Float16)xt * (_Float16)cosVal;
+      p1 = (_Float16)yt * (_Float16)sinVal;
+      p2 = (_Float16)yt * (_Float16)cosVal;
+      p3 = (_Float16)xt * (_Float16)sinVal;
 
       pSrc[2 * i]     = a0;
       pSrc[2 * i + 1] = a1;
 
-      pSrc[2 * l]     = p0 + p1;
-      pSrc[2 * l + 1] = p2 - p3;
+      pSrc[2 * l]     = (_Float16)p0 + (_Float16)p1;
+      pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
 
       i++;
    }                             // groups loop end
@@ -190,22 +190,22 @@ uint16_t twidCoefModifier)
          do
          {
             l = i + n2;
-            a0 = pSrc[2 * i] + pSrc[2 * l];
-            xt = pSrc[2 * i] - pSrc[2 * l];
+            a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+            xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
 
-            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
+            a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
 
-            p0 = xt * cosVal;
-            p1 = yt * sinVal;
-            p2 = yt * cosVal;
-            p3 = xt * sinVal;
+            p0 = (_Float16)xt * (_Float16)cosVal;
+            p1 = (_Float16)yt * (_Float16)sinVal;
+            p2 = (_Float16)yt * (_Float16)cosVal;
+            p3 = (_Float16)xt * (_Float16)sinVal;
 
             pSrc[2 * i] = a0;
             pSrc[2 * i + 1] = a1;
 
-            pSrc[2 * l]     = p0 + p1;
-            pSrc[2 * l + 1] = p2 - p3;
+            pSrc[2 * l]     = (_Float16)p0 + (_Float16)p1;
+            pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
 
             i += n1;
          } while ( i < fftLen );                        // butterfly loop end
@@ -217,11 +217,11 @@ uint16_t twidCoefModifier)
    // loop for butterfly
    for (i = 0; i < fftLen; i += 2)
    {
-      a0 = pSrc[2 * i] + pSrc[2 * i + 2];
-      xt = pSrc[2 * i] - pSrc[2 * i + 2];
+      a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2];
+      xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2];
 
-      yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
-      a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
+      yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3];
+      a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1];
 
       pSrc[2 * i] = a0;
       pSrc[2 * i + 1] = a1;
@@ -253,22 +253,22 @@ uint16_t twidCoefModifier)
          do
          {
             l = i + n2;
-            a0 = pSrc[2 * i] + pSrc[2 * l];
-            xt = pSrc[2 * i] - pSrc[2 * l];
+            a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+            xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
 
-            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
+            a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
 
-            p0 = xt * cosVal;
-            p1 = yt * sinVal;
-            p2 = yt * cosVal;
-            p3 = xt * sinVal;
+            p0 = (_Float16)xt * (_Float16)cosVal;
+            p1 = (_Float16)yt * (_Float16)sinVal;
+            p2 = (_Float16)yt * (_Float16)cosVal;
+            p3 = (_Float16)xt * (_Float16)sinVal;
 
             pSrc[2 * i] = a0;
             pSrc[2 * i + 1] = a1;
 
-            pSrc[2 * l]     = p0 + p1;
-            pSrc[2 * l + 1] = p2 - p3;
+            pSrc[2 * l]     = (_Float16)p0 + (_Float16)p1;
+            pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
 
             i += n1;
          } while (i < fftLen);
@@ -309,22 +309,22 @@ float16_t onebyfftLen)
       ia += twidCoefModifier;
 
       l = i + n2;
-      a0 = pSrc[2 * i] + pSrc[2 * l];
-      xt = pSrc[2 * i] - pSrc[2 * l];
+      a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+      xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
 
-      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-      a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+      yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
+      a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
 
-      p0 = xt * cosVal;
-      p1 = yt * sinVal;
-      p2 = yt * cosVal;
-      p3 = xt * sinVal;
+      p0 = (_Float16)xt * (_Float16)cosVal;
+      p1 = (_Float16)yt * (_Float16)sinVal;
+      p2 = (_Float16)yt * (_Float16)cosVal;
+      p3 = (_Float16)xt * (_Float16)sinVal;
 
       pSrc[2 * i] = a0;
       pSrc[2 * i + 1] = a1;
 
-      pSrc[2 * l]     = p0 - p1;
-      pSrc[2 * l + 1] = p2 + p3;
+      pSrc[2 * l]     = (_Float16)p0 - (_Float16)p1;
+      pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
    }                             // groups loop end
 
    twidCoefModifier <<= 1U;
@@ -349,22 +349,22 @@ float16_t onebyfftLen)
          do
          {
             l = i + n2;
-            a0 = pSrc[2 * i] + pSrc[2 * l];
-            xt = pSrc[2 * i] - pSrc[2 * l];
+            a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+            xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
 
-            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
+            a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
 
-            p0 = xt * cosVal;
-            p1 = yt * sinVal;
-            p2 = yt * cosVal;
-            p3 = xt * sinVal;
+            p0 = (_Float16)xt * (_Float16)cosVal;
+            p1 = (_Float16)yt * (_Float16)sinVal;
+            p2 = (_Float16)yt * (_Float16)cosVal;
+            p3 = (_Float16)xt * (_Float16)sinVal;
 
             pSrc[2 * i] = a0;
             pSrc[2 * i + 1] = a1;
 
-            pSrc[2 * l]     = p0 - p1;
-            pSrc[2 * l + 1] = p2 + p3;
+            pSrc[2 * l]     = (_Float16)p0 - (_Float16)p1;
+            pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
 
             i += n1;
          } while ( i < fftLen );                 // butterfly loop end
@@ -377,16 +377,16 @@ float16_t onebyfftLen)
    // loop for butterfly
    for (i = 0; i < fftLen; i += 2)
    {
-      a0 = pSrc[2 * i] + pSrc[2 * i + 2];
-      xt = pSrc[2 * i] - pSrc[2 * i + 2];
+      a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2];
+      xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2];
 
-      a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
-      yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
+      a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1];
+      yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3];
 
-      p0 = a0 * onebyfftLen;
-      p2 = xt * onebyfftLen;
-      p1 = a1 * onebyfftLen;
-      p3 = yt * onebyfftLen;
+      p0 = (_Float16)a0 * (_Float16)onebyfftLen;
+      p2 = (_Float16)xt * (_Float16)onebyfftLen;
+      p1 = (_Float16)a1 * (_Float16)onebyfftLen;
+      p3 = (_Float16)yt * (_Float16)onebyfftLen;
 
       pSrc[2 * i] = p0;
       pSrc[2 * i + 1] = p1;
@@ -418,22 +418,22 @@ float16_t onebyfftLen)
          do
          {
             l = i + n2;
-            a0 = pSrc[2 * i] + pSrc[2 * l];
-            xt = pSrc[2 * i] - pSrc[2 * l];
+            a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+            xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
 
-            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
+            a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
 
-            p0 = xt * cosVal;
-            p1 = yt * sinVal;
-            p2 = yt * cosVal;
-            p3 = xt * sinVal;
+            p0 = (_Float16)xt * (_Float16)cosVal;
+            p1 = (_Float16)yt * (_Float16)sinVal;
+            p2 = (_Float16)yt * (_Float16)cosVal;
+            p3 = (_Float16)xt * (_Float16)sinVal;
 
             pSrc[2 * i] = a0;
             pSrc[2 * i + 1] = a1;
 
-            pSrc[2 * l]     = p0 - p1;
-            pSrc[2 * l + 1] = p2 + p3;
+            pSrc[2 * l]     = (_Float16)p0 - (_Float16)p1;
+            pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
 
             i += n1;
          } while ( i < fftLen );                    // butterfly loop end
@@ -451,16 +451,16 @@ float16_t onebyfftLen)
    {
       l = i + n2;
 
-      a0 = pSrc[2 * i] + pSrc[2 * l];
-      xt = pSrc[2 * i] - pSrc[2 * l];
+      a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+      xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
 
-      a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
-      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+      a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
+      yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
 
-      p0 = a0 * onebyfftLen;
-      p2 = xt * onebyfftLen;
-      p1 = a1 * onebyfftLen;
-      p3 = yt * onebyfftLen;
+      p0 = (_Float16)a0 * (_Float16)onebyfftLen;
+      p2 = (_Float16)xt * (_Float16)onebyfftLen;
+      p1 = (_Float16)a1 * (_Float16)onebyfftLen;
+      p3 = (_Float16)yt * (_Float16)onebyfftLen;
 
       pSrc[2 * i] = p0;
       pSrc[2U * l] = p2;
@@ -475,4 +475,5 @@ float16_t onebyfftLen)
 
 
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f32.c
index bdad034..dba45f4 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_radix2_f32.c
  * Description:  Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_f16.c
index 4671765..17e7c80 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_f16.c
@@ -5,10 +5,13 @@
  * Title:        arm_cfft_radix2_init_f16.c
  * Description:  Radix-2 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
  *
- * Target Processor: Cortex-M cores
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_f32.c
index db63a37..71fba78 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_radix2_init_f32.c
  * Description:  Radix-2 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q15.c
index 934cd54..f07cad4 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_radix2_init_q15.c
  * Description:  Radix-2 Decimation in Frequency Q15 FFT & IFFT initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -94,7 +94,7 @@ arm_status arm_cfft_radix2_init_q15(
   /*  Initialise the Flag for calculation Bit reversal or not */
   S->bitReverseFlag = bitReverseFlag;
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024)
 
   /*  Initializations of structure parameters depending on the FFT length */
   switch (S->fftLen)
@@ -107,7 +107,7 @@ arm_status arm_cfft_radix2_init_q15(
     /*  Initialise the bit reversal table modifier */
     S->bitRevFactor = 1U;
     /*  Initialise the bit reversal table pointer */
-    S->pBitRevTable = (uint16_t *) armBitRevIndexTable_fixed_4096;
+    S->pBitRevTable = (uint16_t *) armBitRevTable;
 
     break;
 
@@ -119,7 +119,7 @@ arm_status arm_cfft_radix2_init_q15(
     /*  Initialise the bit reversal table modifier */
     S->bitRevFactor = 2U;
     /*  Initialise the bit reversal table pointer */
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[1];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
 
     break;
 
@@ -127,7 +127,7 @@ arm_status arm_cfft_radix2_init_q15(
     /*  Initializations of structure parameters for 1024 point FFT */
     S->twidCoefModifier = 4U;
     S->bitRevFactor = 4U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[3];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
 
     break;
 
@@ -135,7 +135,7 @@ arm_status arm_cfft_radix2_init_q15(
     /*  Initializations of structure parameters for 512 point FFT */
     S->twidCoefModifier = 8U;
     S->bitRevFactor = 8U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[7];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
 
     break;
 
@@ -143,7 +143,7 @@ arm_status arm_cfft_radix2_init_q15(
     /*  Initializations of structure parameters for 256 point FFT */
     S->twidCoefModifier = 16U;
     S->bitRevFactor = 16U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[15];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
 
     break;
 
@@ -151,7 +151,7 @@ arm_status arm_cfft_radix2_init_q15(
     /*  Initializations of structure parameters for 128 point FFT */
     S->twidCoefModifier = 32U;
     S->bitRevFactor = 32U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[31];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
 
     break;
 
@@ -159,7 +159,7 @@ arm_status arm_cfft_radix2_init_q15(
     /*  Initializations of structure parameters for 64 point FFT */
     S->twidCoefModifier = 64U;
     S->bitRevFactor = 64U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[63];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
 
     break;
 
@@ -167,7 +167,7 @@ arm_status arm_cfft_radix2_init_q15(
     /*  Initializations of structure parameters for 32 point FFT */
     S->twidCoefModifier = 128U;
     S->bitRevFactor = 128U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[127];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
 
     break;
 
@@ -175,7 +175,7 @@ arm_status arm_cfft_radix2_init_q15(
     /*  Initializations of structure parameters for 16 point FFT */
     S->twidCoefModifier = 256U;
     S->bitRevFactor = 256U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[255];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
 
     break;
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q31.c
index 8f171f7..5823559 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_radix2_init_q31.c
  * Description:  Radix-2 Decimation in Frequency Fixed-point CFFT & CIFFT Initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -95,7 +95,7 @@ arm_status arm_cfft_radix2_init_q31(
   /*  Initialise the Flag for calculation Bit reversal or not */
   S->bitReverseFlag = bitReverseFlag;
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024)
 
   /*  Initializations of Instance structure depending on the FFT length */
   switch (S->fftLen)
@@ -107,7 +107,7 @@ arm_status arm_cfft_radix2_init_q31(
     /*  Initialise the bit reversal table modifier */
     S->bitRevFactor = 1U;
     /*  Initialise the bit reversal table pointer */
-    S->pBitRevTable = (uint16_t *) armBitRevIndexTable_fixed_4096;
+    S->pBitRevTable = (uint16_t *) armBitRevTable;
     break;
 
     /*  Initializations of structure parameters for 2048 point FFT */
@@ -117,7 +117,7 @@ arm_status arm_cfft_radix2_init_q31(
     /*  Initialise the bit reversal table modifier */
     S->bitRevFactor = 2U;
     /*  Initialise the bit reversal table pointer */
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[1];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
     break;
 
     /*  Initializations of structure parameters for 1024 point FFT */
@@ -127,7 +127,7 @@ arm_status arm_cfft_radix2_init_q31(
     /*  Initialise the bit reversal table modifier */
     S->bitRevFactor = 4U;
     /*  Initialise the bit reversal table pointer */
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[3];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
     break;
 
     /*  Initializations of structure parameters for 512 point FFT */
@@ -137,42 +137,42 @@ arm_status arm_cfft_radix2_init_q31(
     /*  Initialise the bit reversal table modifier */
     S->bitRevFactor = 8U;
     /*  Initialise the bit reversal table pointer */
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[7];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
     break;
 
   case 256U:
     /*  Initializations of structure parameters for 256 point FFT */
     S->twidCoefModifier = 16U;
     S->bitRevFactor = 16U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[15];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
     break;
 
   case 128U:
     /*  Initializations of structure parameters for 128 point FFT */
     S->twidCoefModifier = 32U;
     S->bitRevFactor = 32U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[31];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
     break;
 
   case 64U:
     /*  Initializations of structure parameters for 64 point FFT */
     S->twidCoefModifier = 64U;
     S->bitRevFactor = 64U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[63];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
     break;
 
   case 32U:
     /*  Initializations of structure parameters for 32 point FFT */
     S->twidCoefModifier = 128U;
     S->bitRevFactor = 128U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[127];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
     break;
 
   case 16U:
     /*  Initializations of structure parameters for 16 point FFT */
     S->twidCoefModifier = 256U;
     S->bitRevFactor = 256U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[255];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
     break;
 
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q15.c
index 8d3347b..49f6d9d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_radix2_q15.c
  * Description:  Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q31.c
index d647396..6f36181 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_radix2_q31.c
  * Description:  Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f16.c
index cbc0552..4c46bc1 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_radix4_f16.c
  * Description:  Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -97,22 +97,22 @@ void arm_cfft_radix4by2_f16(
         l = i + n2;
        
         /*  Butterfly implementation */
-        a0 = pSrc[2 * i] + pSrc[2 * l];
-        xt = pSrc[2 * i] - pSrc[2 * l];
+        a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
+        xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
   
-        yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
-        a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+        yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
+        a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
   
-        p0 = xt * cosVal;
-        p1 = yt * sinVal;
-        p2 = yt * cosVal;
-        p3 = xt * sinVal;
+        p0 = (_Float16)xt * (_Float16)cosVal;
+        p1 = (_Float16)yt * (_Float16)sinVal;
+        p2 = (_Float16)yt * (_Float16)cosVal;
+        p3 = (_Float16)xt * (_Float16)sinVal;
   
         pSrc[2 * i]     = a0;
         pSrc[2 * i + 1] = a1;
   
-        pSrc[2 * l]     = p0 + p1;
-        pSrc[2 * l + 1] = p2 - p3;
+        pSrc[2 * l]     = (_Float16)p0 + (_Float16)p1;
+        pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
 
     }
 
@@ -230,13 +230,13 @@ uint16_t twidCoefModifier)
       ydIn = pSrc[(2U * i3) + 1U];
 
       /* xa + xc */
-      Xaplusc = xaIn + xcIn;
+      Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
       /* xb + xd */
-      Xbplusd = xbIn + xdIn;
+      Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
       /* ya + yc */
-      Yaplusc = yaIn + ycIn;
+      Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
       /* yb + yd */
-      Ybplusd = ybIn + ydIn;
+      Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
 
       /*  index calculation for the coefficients */
       ia2 = ia1 + ia1;
@@ -244,31 +244,31 @@ uint16_t twidCoefModifier)
       si2 = pCoef[(ia2 * 2U) + 1U];
 
       /* xa - xc */
-      Xaminusc = xaIn - xcIn;
+      Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
       /* xb - xd */
-      Xbminusd = xbIn - xdIn;
+      Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
       /* ya - yc */
-      Yaminusc = yaIn - ycIn;
+      Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
       /* yb - yd */
-      Ybminusd = ybIn - ydIn;
+      Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
 
       /* xa' = xa + xb + xc + xd */
-      pSrc[(2U * i0)] = Xaplusc + Xbplusd;
+      pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
       /* ya' = ya + yb + yc + yd */
-      pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
+      pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
 
       /* (xa - xc) + (yb - yd) */
-      Xb12C_out = (Xaminusc + Ybminusd);
+      Xb12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
       /* (ya - yc) + (xb - xd) */
-      Yb12C_out = (Yaminusc - Xbminusd);
+      Yb12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
       /* (xa + xc) - (xb + xd) */
-      Xc12C_out = (Xaplusc - Xbplusd);
+      Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
       /* (ya + yc) - (yb + yd) */
-      Yc12C_out = (Yaplusc - Ybplusd);
+      Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
       /* (xa - xc) - (yb - yd) */
-      Xd12C_out = (Xaminusc - Ybminusd);
+      Xd12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
       /* (ya - yc) + (xb - xd) */
-      Yd12C_out = (Xbminusd + Yaminusc);
+      Yd12C_out = ((_Float16)Xbminusd + (_Float16)Yaminusc);
 
       co1 = pCoef[ia1 * 2U];
       si1 = pCoef[(ia1 * 2U) + 1U];
@@ -278,38 +278,38 @@ uint16_t twidCoefModifier)
       co3 = pCoef[ia3 * 2U];
       si3 = pCoef[(ia3 * 2U) + 1U];
 
-      Xb12_out = Xb12C_out * co1;
-      Yb12_out = Yb12C_out * co1;
-      Xc12_out = Xc12C_out * co2;
-      Yc12_out = Yc12C_out * co2;
-      Xd12_out = Xd12C_out * co3;
-      Yd12_out = Yd12C_out * co3;
+      Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
+      Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
+      Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
+      Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
+      Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
+      Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
 
       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
       //Xb12_out -= Yb12C_out * si1;
-      p0 = Yb12C_out * si1;
+      p0 = (_Float16)Yb12C_out * (_Float16)si1;
       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
       //Yb12_out += Xb12C_out * si1;
-      p1 = Xb12C_out * si1;
+      p1 = (_Float16)Xb12C_out * (_Float16)si1;
       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
       //Xc12_out -= Yc12C_out * si2;
-      p2 = Yc12C_out * si2;
+      p2 = (_Float16)Yc12C_out * (_Float16)si2;
       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
       //Yc12_out += Xc12C_out * si2;
-      p3 = Xc12C_out * si2;
+      p3 = (_Float16)Xc12C_out * (_Float16)si2;
       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
       //Xd12_out -= Yd12C_out * si3;
-      p4 = Yd12C_out * si3;
+      p4 = (_Float16)Yd12C_out * (_Float16)si3;
       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
       //Yd12_out += Xd12C_out * si3;
-      p5 = Xd12C_out * si3;
+      p5 = (_Float16)Xd12C_out * (_Float16)si3;
 
-      Xb12_out += p0;
-      Yb12_out -= p1;
-      Xc12_out += p2;
-      Yc12_out -= p3;
-      Xd12_out += p4;
-      Yd12_out -= p5;
+      Xb12_out += (_Float16)p0;
+      Yb12_out -= (_Float16)p1;
+      Xc12_out += (_Float16)p2;
+      Yc12_out -= (_Float16)p3;
+      Xd12_out += (_Float16)p4;
+      Yd12_out -= (_Float16)p5;
 
       /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
       pSrc[2U * i1] = Xc12_out;
@@ -387,71 +387,71 @@ uint16_t twidCoefModifier)
             ydIn = pSrc[(2U * i3) + 1U];
 
             /* xa - xc */
-            Xaminusc = xaIn - xcIn;
+            Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
             /* (xb - xd) */
-            Xbminusd = xbIn - xdIn;
+            Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
             /* ya - yc */
-            Yaminusc = yaIn - ycIn;
+            Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
             /* (yb - yd) */
-            Ybminusd = ybIn - ydIn;
+            Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
 
             /* xa + xc */
-            Xaplusc = xaIn + xcIn;
+            Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
             /* xb + xd */
-            Xbplusd = xbIn + xdIn;
+            Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
             /* ya + yc */
-            Yaplusc = yaIn + ycIn;
+            Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
             /* yb + yd */
-            Ybplusd = ybIn + ydIn;
+            Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
 
             /* (xa - xc) + (yb - yd) */
-            Xb12C_out = (Xaminusc + Ybminusd);
+            Xb12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
             /* (ya - yc) -  (xb - xd) */
-            Yb12C_out = (Yaminusc - Xbminusd);
+            Yb12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
             /* xa + xc -(xb + xd) */
-            Xc12C_out = (Xaplusc - Xbplusd);
+            Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
             /* (ya + yc) - (yb + yd) */
-            Yc12C_out = (Yaplusc - Ybplusd);
+            Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
             /* (xa - xc) - (yb - yd) */
-            Xd12C_out = (Xaminusc - Ybminusd);
+            Xd12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
             /* (ya - yc) +  (xb - xd) */
-            Yd12C_out = (Xbminusd + Yaminusc);
+            Yd12C_out = ((_Float16)Xbminusd + (_Float16)Yaminusc);
 
-            pSrc[(2U * i0)] = Xaplusc + Xbplusd;
-            pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
+            pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
+            pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
 
-            Xb12_out = Xb12C_out * co1;
-            Yb12_out = Yb12C_out * co1;
-            Xc12_out = Xc12C_out * co2;
-            Yc12_out = Yc12C_out * co2;
-            Xd12_out = Xd12C_out * co3;
-            Yd12_out = Yd12C_out * co3;
+            Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
+            Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
+            Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
+            Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
+            Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
+            Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
 
             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
             //Xb12_out -= Yb12C_out * si1;
-            p0 = Yb12C_out * si1;
+            p0 = (_Float16)Yb12C_out * (_Float16)si1;
             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
             //Yb12_out += Xb12C_out * si1;
-            p1 = Xb12C_out * si1;
+            p1 = (_Float16)Xb12C_out * (_Float16)si1;
             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
             //Xc12_out -= Yc12C_out * si2;
-            p2 = Yc12C_out * si2;
+            p2 = (_Float16)Yc12C_out * (_Float16)si2;
             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
             //Yc12_out += Xc12C_out * si2;
-            p3 = Xc12C_out * si2;
+            p3 = (_Float16)Xc12C_out * (_Float16)si2;
             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
             //Xd12_out -= Yd12C_out * si3;
-            p4 = Yd12C_out * si3;
+            p4 = (_Float16)Yd12C_out * (_Float16)si3;
             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
             //Yd12_out += Xd12C_out * si3;
-            p5 = Xd12C_out * si3;
+            p5 = (_Float16)Xd12C_out * (_Float16)si3;
 
-            Xb12_out += p0;
-            Yb12_out -= p1;
-            Xc12_out += p2;
-            Yc12_out -= p3;
-            Xd12_out += p4;
-            Yd12_out -= p5;
+            Xb12_out += (_Float16)p0;
+            Yb12_out -= (_Float16)p1;
+            Xc12_out += (_Float16)p2;
+            Yc12_out -= (_Float16)p3;
+            Xd12_out += (_Float16)p4;
+            Yd12_out -= (_Float16)p5;
 
             /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
             pSrc[2U * i1] = Xc12_out;
@@ -494,45 +494,45 @@ uint16_t twidCoefModifier)
       ydIn = ptr1[7];
 
       /* xa + xc */
-      Xaplusc = xaIn + xcIn;
+      Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
 
       /* xa - xc */
-      Xaminusc = xaIn - xcIn;
+      Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
 
       /* ya + yc */
-      Yaplusc = yaIn + ycIn;
+      Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
 
       /* ya - yc */
-      Yaminusc = yaIn - ycIn;
+      Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
 
       /* xb + xd */
-      Xbplusd = xbIn + xdIn;
+      Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
 
       /* yb + yd */
-      Ybplusd = ybIn + ydIn;
+      Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
 
       /* (xb-xd) */
-      Xbminusd = xbIn - xdIn;
+      Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
 
       /* (yb-yd) */
-      Ybminusd = ybIn - ydIn;
+      Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
 
       /* xa' = xa + xb + xc + xd */
-      a0 = (Xaplusc + Xbplusd);
+      a0 = ((_Float16)Xaplusc + (_Float16)Xbplusd);
       /* ya' = ya + yb + yc + yd */
-      a1 = (Yaplusc + Ybplusd);
+      a1 = ((_Float16)Yaplusc + (_Float16)Ybplusd);
       /* xc' = (xa-xb+xc-xd) */
-      a2 = (Xaplusc - Xbplusd);
+      a2 = ((_Float16)Xaplusc - (_Float16)Xbplusd);
       /* yc' = (ya-yb+yc-yd) */
-      a3 = (Yaplusc - Ybplusd);
+      a3 = ((_Float16)Yaplusc - (_Float16)Ybplusd);
       /* xb' = (xa+yb-xc-yd) */
-      a4 = (Xaminusc + Ybminusd);
+      a4 = ((_Float16)Xaminusc + (_Float16)Ybminusd);
       /* yb' = (ya-xb-yc+xd) */
-      a5 = (Yaminusc - Xbminusd);
+      a5 = ((_Float16)Yaminusc - (_Float16)Xbminusd);
       /* xd' = (xa-yb-xc+yd)) */
-      a6 = (Xaminusc - Ybminusd);
+      a6 = ((_Float16)Xaminusc - (_Float16)Ybminusd);
       /* yd' = (ya+xb-yc-xd) */
-      a7 = (Xbminusd + Yaminusc);
+      a7 = ((_Float16)Xbminusd + (_Float16)Yaminusc);
 
       ptr1[0] = a0;
       ptr1[1] = a1;
@@ -590,70 +590,70 @@ uint16_t twidCoefModifier)
             i3 = i2 + n2;
 
             /* xa + xc */
-            r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
+            r1 = (_Float16)pSrc[(2U * i0)] + (_Float16)pSrc[(2U * i2)];
 
             /* xa - xc */
-            r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
+            r2 = (_Float16)pSrc[(2U * i0)] - (_Float16)pSrc[(2U * i2)];
 
             /* ya + yc */
-            s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
+            s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
 
             /* ya - yc */
-            s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
+            s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
 
             /* xb + xd */
-            t1 = pSrc[2U * i1] + pSrc[2U * i3];
+            t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
 
             /* xa' = xa + xb + xc + xd */
-            pSrc[2U * i0] = r1 + t1;
+            pSrc[2U * i0] = (_Float16)r1 + (_Float16)t1;
 
             /* xa + xc -(xb + xd) */
-            r1 = r1 - t1;
+            r1 = (_Float16)r1 - (_Float16)t1;
 
             /* yb + yd */
-            t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
+            t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
 
             /* ya' = ya + yb + yc + yd */
-            pSrc[(2U * i0) + 1U] = s1 + t2;
+            pSrc[(2U * i0) + 1U] = (_Float16)s1 + (_Float16)t2;
 
             /* (ya + yc) - (yb + yd) */
-            s1 = s1 - t2;
+            s1 = (_Float16)s1 - (_Float16)t2;
 
             /* (yb - yd) */
-            t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
+            t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
 
             /* (xb - xd) */
-            t2 = pSrc[2U * i1] - pSrc[2U * i3];
+            t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
 
             /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
-            pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
+            pSrc[2U * i1] = ((_Float16)r1 * (_Float16)co2) + ((_Float16)s1 * (_Float16)si2);
 
             /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
-            pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
+            pSrc[(2U * i1) + 1U] = ((_Float16)s1 * (_Float16)co2) - ((_Float16)r1 * (_Float16)si2);
 
             /* (xa - xc) + (yb - yd) */
-            r1 = r2 + t1;
+            r1 = (_Float16)r2 + (_Float16)t1;
 
             /* (xa - xc) - (yb - yd) */
-            r2 = r2 - t1;
+            r2 = (_Float16)r2 - (_Float16)t1;
 
             /* (ya - yc) -  (xb - xd) */
-            s1 = s2 - t2;
+            s1 = (_Float16)s2 - (_Float16)t2;
 
             /* (ya - yc) +  (xb - xd) */
-            s2 = s2 + t2;
+            s2 = (_Float16)s2 + (_Float16)t2;
 
             /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
-            pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
+            pSrc[2U * i2] = ((_Float16)r1 * (_Float16)co1) + ((_Float16)s1 * (_Float16)si1);
 
             /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
-            pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
+            pSrc[(2U * i2) + 1U] = ((_Float16)s1 * (_Float16)co1) - ((_Float16)r1 * (_Float16)si1);
 
             /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
-            pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
+            pSrc[2U * i3] = ((_Float16)r2 * (_Float16)co3) + ((_Float16)s2 * (_Float16)si3);
 
             /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
-            pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
+            pSrc[(2U * i3) + 1U] = ((_Float16)s2 * (_Float16)co3) - ((_Float16)r2 * (_Float16)si3);
 
             i0 += n1;
          } while ( i0 < fftLen);
@@ -734,13 +734,13 @@ float16_t onebyfftLen)
       ydIn = pSrc[(2U * i3) + 1U];
 
       /* xa + xc */
-      Xaplusc = xaIn + xcIn;
+      Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
       /* xb + xd */
-      Xbplusd = xbIn + xdIn;
+      Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
       /* ya + yc */
-      Yaplusc = yaIn + ycIn;
+      Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
       /* yb + yd */
-      Ybplusd = ybIn + ydIn;
+      Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
 
       /*  index calculation for the coefficients */
       ia2 = ia1 + ia1;
@@ -748,32 +748,32 @@ float16_t onebyfftLen)
       si2 = pCoef[(ia2 * 2U) + 1U];
 
       /* xa - xc */
-      Xaminusc = xaIn - xcIn;
+      Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
       /* xb - xd */
-      Xbminusd = xbIn - xdIn;
+      Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
       /* ya - yc */
-      Yaminusc = yaIn - ycIn;
+      Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
       /* yb - yd */
-      Ybminusd = ybIn - ydIn;
+      Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
 
       /* xa' = xa + xb + xc + xd */
-      pSrc[(2U * i0)] = Xaplusc + Xbplusd;
+      pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
 
       /* ya' = ya + yb + yc + yd */
-      pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
+      pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
 
       /* (xa - xc) - (yb - yd) */
-      Xb12C_out = (Xaminusc - Ybminusd);
+      Xb12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
       /* (ya - yc) + (xb - xd) */
-      Yb12C_out = (Yaminusc + Xbminusd);
+      Yb12C_out = ((_Float16)Yaminusc + (_Float16)Xbminusd);
       /* (xa + xc) - (xb + xd) */
-      Xc12C_out = (Xaplusc - Xbplusd);
+      Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
       /* (ya + yc) - (yb + yd) */
-      Yc12C_out = (Yaplusc - Ybplusd);
+      Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
       /* (xa - xc) + (yb - yd) */
-      Xd12C_out = (Xaminusc + Ybminusd);
+      Xd12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
       /* (ya - yc) - (xb - xd) */
-      Yd12C_out = (Yaminusc - Xbminusd);
+      Yd12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
 
       co1 = pCoef[ia1 * 2U];
       si1 = pCoef[(ia1 * 2U) + 1U];
@@ -783,38 +783,38 @@ float16_t onebyfftLen)
       co3 = pCoef[ia3 * 2U];
       si3 = pCoef[(ia3 * 2U) + 1U];
 
-      Xb12_out = Xb12C_out * co1;
-      Yb12_out = Yb12C_out * co1;
-      Xc12_out = Xc12C_out * co2;
-      Yc12_out = Yc12C_out * co2;
-      Xd12_out = Xd12C_out * co3;
-      Yd12_out = Yd12C_out * co3;
+      Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
+      Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
+      Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
+      Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
+      Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
+      Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
 
       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
       //Xb12_out -= Yb12C_out * si1;
-      p0 = Yb12C_out * si1;
+      p0 = (_Float16)Yb12C_out * (_Float16)si1;
       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
       //Yb12_out += Xb12C_out * si1;
-      p1 = Xb12C_out * si1;
+      p1 = (_Float16)Xb12C_out * (_Float16)si1;
       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
       //Xc12_out -= Yc12C_out * si2;
-      p2 = Yc12C_out * si2;
+      p2 = (_Float16)Yc12C_out * (_Float16)si2;
       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
       //Yc12_out += Xc12C_out * si2;
-      p3 = Xc12C_out * si2;
+      p3 = (_Float16)Xc12C_out * (_Float16)si2;
       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
       //Xd12_out -= Yd12C_out * si3;
-      p4 = Yd12C_out * si3;
+      p4 = (_Float16)Yd12C_out * (_Float16)si3;
       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
       //Yd12_out += Xd12C_out * si3;
-      p5 = Xd12C_out * si3;
+      p5 =(_Float16) Xd12C_out * (_Float16)si3;
 
-      Xb12_out -= p0;
-      Yb12_out += p1;
-      Xc12_out -= p2;
-      Yc12_out += p3;
-      Xd12_out -= p4;
-      Yd12_out += p5;
+      Xb12_out -= (_Float16)p0;
+      Yb12_out += (_Float16)p1;
+      Xc12_out -= (_Float16)p2;
+      Yc12_out += (_Float16)p3;
+      Xd12_out -= (_Float16)p4;
+      Yd12_out += (_Float16)p5;
 
       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
       pSrc[2U * i1] = Xc12_out;
@@ -891,71 +891,71 @@ float16_t onebyfftLen)
             ydIn = pSrc[(2U * i3) + 1U];
 
             /* xa - xc */
-            Xaminusc = xaIn - xcIn;
+            Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
             /* (xb - xd) */
-            Xbminusd = xbIn - xdIn;
+            Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
             /* ya - yc */
-            Yaminusc = yaIn - ycIn;
+            Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
             /* (yb - yd) */
-            Ybminusd = ybIn - ydIn;
+            Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
 
             /* xa + xc */
-            Xaplusc = xaIn + xcIn;
+            Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
             /* xb + xd */
-            Xbplusd = xbIn + xdIn;
+            Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
             /* ya + yc */
-            Yaplusc = yaIn + ycIn;
+            Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
             /* yb + yd */
-            Ybplusd = ybIn + ydIn;
+            Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
 
             /* (xa - xc) - (yb - yd) */
-            Xb12C_out = (Xaminusc - Ybminusd);
+            Xb12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
             /* (ya - yc) +  (xb - xd) */
-            Yb12C_out = (Yaminusc + Xbminusd);
+            Yb12C_out = ((_Float16)Yaminusc + (_Float16)Xbminusd);
             /* xa + xc -(xb + xd) */
-            Xc12C_out = (Xaplusc - Xbplusd);
+            Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
             /* (ya + yc) - (yb + yd) */
-            Yc12C_out = (Yaplusc - Ybplusd);
+            Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
             /* (xa - xc) + (yb - yd) */
-            Xd12C_out = (Xaminusc + Ybminusd);
+            Xd12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
             /* (ya - yc) -  (xb - xd) */
-            Yd12C_out = (Yaminusc - Xbminusd);
+            Yd12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
 
-            pSrc[(2U * i0)] = Xaplusc + Xbplusd;
-            pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
+            pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
+            pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
 
-            Xb12_out = Xb12C_out * co1;
-            Yb12_out = Yb12C_out * co1;
-            Xc12_out = Xc12C_out * co2;
-            Yc12_out = Yc12C_out * co2;
-            Xd12_out = Xd12C_out * co3;
-            Yd12_out = Yd12C_out * co3;
+            Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
+            Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
+            Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
+            Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
+            Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
+            Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
 
             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
             //Xb12_out -= Yb12C_out * si1;
-            p0 = Yb12C_out * si1;
+            p0 = (_Float16)Yb12C_out * (_Float16)si1;
             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
             //Yb12_out += Xb12C_out * si1;
-            p1 = Xb12C_out * si1;
+            p1 = (_Float16)Xb12C_out * (_Float16)si1;
             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
             //Xc12_out -= Yc12C_out * si2;
-            p2 = Yc12C_out * si2;
+            p2 = (_Float16)Yc12C_out * (_Float16)si2;
             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
             //Yc12_out += Xc12C_out * si2;
-            p3 = Xc12C_out * si2;
+            p3 = (_Float16)Xc12C_out * (_Float16)si2;
             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
             //Xd12_out -= Yd12C_out * si3;
-            p4 = Yd12C_out * si3;
+            p4 = (_Float16)Yd12C_out * (_Float16)si3;
             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
             //Yd12_out += Xd12C_out * si3;
-            p5 = Xd12C_out * si3;
+            p5 = (_Float16)Xd12C_out * (_Float16)si3;
 
-            Xb12_out -= p0;
-            Yb12_out += p1;
-            Xc12_out -= p2;
-            Yc12_out += p3;
-            Xd12_out -= p4;
-            Yd12_out += p5;
+            Xb12_out -= (_Float16)p0;
+            Yb12_out += (_Float16)p1;
+            Xc12_out -= (_Float16)p2;
+            Yc12_out += (_Float16)p3;
+            Xd12_out -= (_Float16)p4;
+            Yd12_out += (_Float16)p5;
 
             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
             pSrc[2U * i1] = Xc12_out;
@@ -1000,54 +1000,54 @@ float16_t onebyfftLen)
 
       /*  Butterfly implementation */
       /* xa + xc */
-      Xaplusc = xaIn + xcIn;
+      Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
 
       /* xa - xc */
-      Xaminusc = xaIn - xcIn;
+      Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
 
       /* ya + yc */
-      Yaplusc = yaIn + ycIn;
+      Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
 
       /* ya - yc */
-      Yaminusc = yaIn - ycIn;
+      Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
 
       /* xb + xd */
-      Xbplusd = xbIn + xdIn;
+      Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
 
       /* yb + yd */
-      Ybplusd = ybIn + ydIn;
+      Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
 
       /* (xb-xd) */
-      Xbminusd = xbIn - xdIn;
+      Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
 
       /* (yb-yd) */
-      Ybminusd = ybIn - ydIn;
+      Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
 
       /* xa' = (xa+xb+xc+xd) * onebyfftLen */
-      a0 = (Xaplusc + Xbplusd);
+      a0 = ((_Float16)Xaplusc + (_Float16)Xbplusd);
       /* ya' = (ya+yb+yc+yd) * onebyfftLen */
-      a1 = (Yaplusc + Ybplusd);
+      a1 = ((_Float16)Yaplusc + (_Float16)Ybplusd);
       /* xc' = (xa-xb+xc-xd) * onebyfftLen */
-      a2 = (Xaplusc - Xbplusd);
+      a2 = ((_Float16)Xaplusc - (_Float16)Xbplusd);
       /* yc' = (ya-yb+yc-yd) * onebyfftLen  */
-      a3 = (Yaplusc - Ybplusd);
+      a3 = ((_Float16)Yaplusc - (_Float16)Ybplusd);
       /* xb' = (xa-yb-xc+yd) * onebyfftLen */
-      a4 = (Xaminusc - Ybminusd);
+      a4 = ((_Float16)Xaminusc - (_Float16)Ybminusd);
       /* yb' = (ya+xb-yc-xd) * onebyfftLen */
-      a5 = (Yaminusc + Xbminusd);
+      a5 = ((_Float16)Yaminusc + (_Float16)Xbminusd);
       /* xd' = (xa-yb-xc+yd) * onebyfftLen */
-      a6 = (Xaminusc + Ybminusd);
+      a6 = ((_Float16)Xaminusc + (_Float16)Ybminusd);
       /* yd' = (ya-xb-yc+xd) * onebyfftLen */
-      a7 = (Yaminusc - Xbminusd);
+      a7 = ((_Float16)Yaminusc - (_Float16)Xbminusd);
 
-      p0 = a0 * onebyfftLen;
-      p1 = a1 * onebyfftLen;
-      p2 = a2 * onebyfftLen;
-      p3 = a3 * onebyfftLen;
-      p4 = a4 * onebyfftLen;
-      p5 = a5 * onebyfftLen;
-      p6 = a6 * onebyfftLen;
-      p7 = a7 * onebyfftLen;
+      p0 = (_Float16)a0 * (_Float16)onebyfftLen;
+      p1 = (_Float16)a1 * (_Float16)onebyfftLen;
+      p2 = (_Float16)a2 * (_Float16)onebyfftLen;
+      p3 = (_Float16)a3 * (_Float16)onebyfftLen;
+      p4 = (_Float16)a4 * (_Float16)onebyfftLen;
+      p5 = (_Float16)a5 * (_Float16)onebyfftLen;
+      p6 = (_Float16)a6 * (_Float16)onebyfftLen;
+      p7 = (_Float16)a7 * (_Float16)onebyfftLen;
 
       /* xa' = (xa+xb+xc+xd) * onebyfftLen */
       ptr1[0] = p0;
@@ -1116,70 +1116,70 @@ float16_t onebyfftLen)
             i3 = i2 + n2;
 
             /* xa + xc */
-            r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
+            r1 = (_Float16)pSrc[(2U * i0)] + (_Float16)pSrc[(2U * i2)];
 
             /* xa - xc */
-            r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
+            r2 = (_Float16)pSrc[(2U * i0)] - (_Float16)pSrc[(2U * i2)];
 
             /* ya + yc */
-            s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
+            s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
 
             /* ya - yc */
-            s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
+            s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
 
             /* xb + xd */
-            t1 = pSrc[2U * i1] + pSrc[2U * i3];
+            t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
 
             /* xa' = xa + xb + xc + xd */
-            pSrc[2U * i0] = r1 + t1;
+            pSrc[2U * i0] = (_Float16)r1 + (_Float16)t1;
 
             /* xa + xc -(xb + xd) */
-            r1 = r1 - t1;
+            r1 = (_Float16)r1 - (_Float16)t1;
 
             /* yb + yd */
-            t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
+            t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
 
             /* ya' = ya + yb + yc + yd */
-            pSrc[(2U * i0) + 1U] = s1 + t2;
+            pSrc[(2U * i0) + 1U] = (_Float16)s1 + (_Float16)t2;
 
             /* (ya + yc) - (yb + yd) */
-            s1 = s1 - t2;
+            s1 = (_Float16)s1 - (_Float16)t2;
 
             /* (yb - yd) */
-            t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
+            t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
 
             /* (xb - xd) */
-            t2 = pSrc[2U * i1] - pSrc[2U * i3];
+            t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
 
             /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
-            pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
+            pSrc[2U * i1] = ((_Float16)r1 * (_Float16)co2) - ((_Float16)s1 * (_Float16)si2);
 
             /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
-            pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
+            pSrc[(2U * i1) + 1U] = ((_Float16)s1 * (_Float16)co2) + ((_Float16)r1 * (_Float16)si2);
 
             /* (xa - xc) - (yb - yd) */
-            r1 = r2 - t1;
+            r1 = (_Float16)r2 - (_Float16)t1;
 
             /* (xa - xc) + (yb - yd) */
-            r2 = r2 + t1;
+            r2 = (_Float16)r2 + (_Float16)t1;
 
             /* (ya - yc) +  (xb - xd) */
-            s1 = s2 + t2;
+            s1 = (_Float16)s2 + (_Float16)t2;
 
             /* (ya - yc) -  (xb - xd) */
-            s2 = s2 - t2;
+            s2 = (_Float16)s2 - (_Float16)t2;
 
             /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
-            pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
+            pSrc[2U * i2] = ((_Float16)r1 * (_Float16)co1) - ((_Float16)s1 * (_Float16)si1);
 
             /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
-            pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
+            pSrc[(2U * i2) + 1U] = ((_Float16)s1 * (_Float16)co1) + ((_Float16)r1 * (_Float16)si1);
 
             /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
-            pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
+            pSrc[2U * i3] = ((_Float16)r2 * (_Float16)co3) - ((_Float16)s2 * (_Float16)si3);
 
             /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
-            pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
+            pSrc[(2U * i3) + 1U] = ((_Float16)s2 * (_Float16)co3) + ((_Float16)r2 * (_Float16)si3);
 
             i0 += n1;
          } while ( i0 < fftLen);
@@ -1202,74 +1202,75 @@ float16_t onebyfftLen)
 
       /*  Butterfly implementation */
       /* xa + xc */
-      r1 = pSrc[2U * i0] + pSrc[2U * i2];
+      r1 = (_Float16)pSrc[2U * i0] + (_Float16)pSrc[2U * i2];
 
       /* xa - xc */
-      r2 = pSrc[2U * i0] - pSrc[2U * i2];
+      r2 = (_Float16)pSrc[2U * i0] - (_Float16)pSrc[2U * i2];
 
       /* ya + yc */
-      s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
+      s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
 
       /* ya - yc */
-      s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
+      s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
 
       /* xc + xd */
-      t1 = pSrc[2U * i1] + pSrc[2U * i3];
+      t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
 
       /* xa' = xa + xb + xc + xd */
-      pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
+      pSrc[2U * i0] = ((_Float16)r1 + (_Float16)t1) * (_Float16)onebyfftLen;
 
       /* (xa + xb) - (xc + xd) */
-      r1 = r1 - t1;
+      r1 = (_Float16)r1 - (_Float16)t1;
 
       /* yb + yd */
-      t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
+      t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
 
       /* ya' = ya + yb + yc + yd */
-      pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
+      pSrc[(2U * i0) + 1U] = ((_Float16)s1 + (_Float16)t2) * (_Float16)onebyfftLen;
 
       /* (ya + yc) - (yb + yd) */
-      s1 = s1 - t2;
+      s1 = (_Float16)s1 - (_Float16)t2;
 
       /* (yb-yd) */
-      t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
+      t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
 
       /* (xb-xd) */
-      t2 = pSrc[2U * i1] - pSrc[2U * i3];
+      t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
 
       /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
-      pSrc[2U * i1] = r1 * onebyfftLen;
+      pSrc[2U * i1] = (_Float16)r1 * (_Float16)onebyfftLen;
 
       /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
-      pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
+      pSrc[(2U * i1) + 1U] = (_Float16)s1 * (_Float16)onebyfftLen;
 
       /* (xa - xc) - (yb-yd) */
-      r1 = r2 - t1;
+      r1 = (_Float16)r2 - (_Float16)t1;
 
       /* (xa - xc) + (yb-yd) */
-      r2 = r2 + t1;
+      r2 = (_Float16)r2 + (_Float16)t1;
 
       /* (ya - yc) + (xb-xd) */
-      s1 = s2 + t2;
+      s1 = (_Float16)s2 + (_Float16)t2;
 
       /* (ya - yc) - (xb-xd) */
-      s2 = s2 - t2;
+      s2 = (_Float16)s2 - (_Float16)t2;
 
       /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
-      pSrc[2U * i2] = r1 * onebyfftLen;
+      pSrc[2U * i2] = (_Float16)r1 * (_Float16)onebyfftLen;
 
       /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
-      pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
+      pSrc[(2U * i2) + 1U] = (_Float16)s1 * (_Float16)onebyfftLen;
 
       /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
-      pSrc[2U * i3] = r2 * onebyfftLen;
+      pSrc[2U * i3] = (_Float16)r2 * (_Float16)onebyfftLen;
 
       /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
-      pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
+      pSrc[(2U * i3) + 1U] = (_Float16)s2 * (_Float16)onebyfftLen;
    }
 
 #endif /* #if defined (ARM_MATH_DSP) */
 }
 
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f32.c
index 1bc2f77..4c7020a 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_radix4_f32.c
  * Description:  Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_f16.c
index d83e138..152542d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_f16.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_radix4_init_f16.c
  * Description:  Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_f32.c
index d218140..3d1a5ef 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_radix4_init_f32.c
  * Description:  Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q15.c
index 49858ee..c4a024e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_radix4_init_q15.c
  * Description:  Radix-4 Decimation in Frequency Q15 FFT & IFFT initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -93,7 +93,7 @@ arm_status arm_cfft_radix4_init_q15(
   /*  Initialise the Flag for calculation Bit reversal or not */
   S->bitReverseFlag = bitReverseFlag;
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024)
 
   /*  Initializations of structure parameters depending on the FFT length */
   switch (S->fftLen)
@@ -106,7 +106,7 @@ arm_status arm_cfft_radix4_init_q15(
     /*  Initialise the bit reversal table modifier */
     S->bitRevFactor = 1U;
     /*  Initialise the bit reversal table pointer */
-    S->pBitRevTable = (uint16_t *) armBitRevIndexTable_fixed_4096;
+    S->pBitRevTable = (uint16_t *) armBitRevTable;
 
     break;
 
@@ -114,7 +114,7 @@ arm_status arm_cfft_radix4_init_q15(
     /*  Initializations of structure parameters for 1024 point FFT */
     S->twidCoefModifier = 4U;
     S->bitRevFactor = 4U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[3];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
 
     break;
 
@@ -122,7 +122,7 @@ arm_status arm_cfft_radix4_init_q15(
     /*  Initializations of structure parameters for 256 point FFT */
     S->twidCoefModifier = 16U;
     S->bitRevFactor = 16U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[15];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
 
     break;
 
@@ -130,7 +130,7 @@ arm_status arm_cfft_radix4_init_q15(
     /*  Initializations of structure parameters for 64 point FFT */
     S->twidCoefModifier = 64U;
     S->bitRevFactor = 64U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[63];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
 
     break;
 
@@ -138,7 +138,7 @@ arm_status arm_cfft_radix4_init_q15(
     /*  Initializations of structure parameters for 16 point FFT */
     S->twidCoefModifier = 256U;
     S->bitRevFactor = 256U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[255];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
 
     break;
 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q31.c
index 6cde656..9b6273f 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_radix4_init_q31.c
  * Description:  Radix-4 Decimation in Frequency Q31 FFT & IFFT initialization function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -80,7 +80,7 @@ arm_status arm_cfft_radix4_init_q31(
 
 #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_4096)
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_4096)
 
   /*  Initialise the default arm status */
   status = ARM_MATH_SUCCESS;
@@ -93,7 +93,7 @@ arm_status arm_cfft_radix4_init_q31(
   /*  Initialise the Flag for calculation Bit reversal or not */
   S->bitReverseFlag = bitReverseFlag;
 
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024)
 
   /*  Initializations of Instance structure depending on the FFT length */
   switch (S->fftLen)
@@ -105,7 +105,7 @@ arm_status arm_cfft_radix4_init_q31(
     /*  Initialise the bit reversal table modifier */
     S->bitRevFactor = 1U;
     /*  Initialise the bit reversal table pointer */
-    S->pBitRevTable = (uint16_t *) armBitRevIndexTable_fixed_4096;
+    S->pBitRevTable = (uint16_t *) armBitRevTable;
     break;
 
     /*  Initializations of structure parameters for 1024 point FFT */
@@ -115,28 +115,28 @@ arm_status arm_cfft_radix4_init_q31(
     /*  Initialise the bit reversal table modifier */
     S->bitRevFactor = 4U;
     /*  Initialise the bit reversal table pointer */
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[3];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
     break;
 
   case 256U:
     /*  Initializations of structure parameters for 256 point FFT */
     S->twidCoefModifier = 16U;
     S->bitRevFactor = 16U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[15];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
     break;
 
   case 64U:
     /*  Initializations of structure parameters for 64 point FFT */
     S->twidCoefModifier = 64U;
     S->bitRevFactor = 64U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[63];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
     break;
 
   case 16U:
     /*  Initializations of structure parameters for 16 point FFT */
     S->twidCoefModifier = 256U;
     S->bitRevFactor = 256U;
-    S->pBitRevTable = (uint16_t *) & armBitRevIndexTable_fixed_4096[255];
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
     break;
 
   default:
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q15.c
index 33edbf1..33b5029 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q15.c
@@ -6,13 +6,13 @@
  * Description:  This file has function definition of Radix-4 FFT & IFFT function and
  *               In-place bit reversal using bit reversal table
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -72,8 +72,21 @@ void arm_bitreversal_q15(
                  Hence the output format is different for different FFT sizes.
                  The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
   @par
-                 \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT"
-                 \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT"
+                
+| CFFT Size | Input format  | Output format | Number of bits to upscale |
+| --------: | ------------: | ------------: | ------------------------: |
+| 16        | 1.15          | 5.11          | 4                         |
+| 64        | 1.15          | 7.9           | 6                         |
+| 256       | 1.15          | 9.7           | 8                         |
+| 1024      | 1.15          | 11.5          | 10                        |
+
+| CIFFT Size | Input format  | Output format | Number of bits to upscale |
+| ---------: | ------------: | ------------: | ------------------------: |
+| 16         | 1.15          | 5.11          | 0                         |
+| 64         | 1.15          | 7.9           | 0                         |
+| 256        | 1.15          | 9.7           | 0                         |
+| 1024       | 1.15          | 11.5          | 0                         |
+
  */
 
 void arm_cfft_radix4_q15(
@@ -497,16 +510,16 @@ void arm_radix4_butterfly_q15(
   do
   {
     /* Read xa (real), ya(imag) input */
-    xaya = read_q15x2_ia ((q15_t **) &ptr1);
+    xaya = read_q15x2_ia (&ptr1);
 
     /* Read xb (real), yb(imag) input */
-    xbyb = read_q15x2_ia ((q15_t **) &ptr1);
+    xbyb = read_q15x2_ia (&ptr1);
 
     /* Read xc (real), yc(imag) input */
-    xcyc = read_q15x2_ia ((q15_t **) &ptr1);
+    xcyc = read_q15x2_ia (&ptr1);
 
     /* Read xd (real), yd(imag) input */
-    xdyd = read_q15x2_ia ((q15_t **) &ptr1);
+    xdyd = read_q15x2_ia (&ptr1);
 
     /* R = packed((ya + yc), (xa + xc)) */
     R = __QADD16(xaya, xcyc);
@@ -1360,16 +1373,16 @@ void arm_radix4_butterfly_inverse_q15(
   do
   {
     /* Read xa (real), ya(imag) input */
-    xaya = read_q15x2_ia ((q15_t **) &ptr1);
+    xaya = read_q15x2_ia (&ptr1);
 
     /* Read xb (real), yb(imag) input */
-    xbyb = read_q15x2_ia ((q15_t **) &ptr1);
+    xbyb = read_q15x2_ia (&ptr1);
 
     /* Read xc (real), yc(imag) input */
-    xcyc = read_q15x2_ia ((q15_t **) &ptr1);
+    xcyc = read_q15x2_ia (&ptr1);
 
     /* Read xd (real), yd(imag) input */
-    xdyd = read_q15x2_ia ((q15_t **) &ptr1);
+    xdyd = read_q15x2_ia (&ptr1);
 
     /* R = packed((ya + yc), (xa + xc)) */
     R = __QADD16(xaya, xcyc);
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q31.c
index 7e5d38b..bad1640 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q31.c
@@ -6,13 +6,13 @@
  * Description:  This file has function definition of Radix-4 FFT & IFFT function and
  *               In-place bit reversal using bit reversal table
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -70,8 +70,21 @@ void arm_bitreversal_q31(
                  Hence the output format is different for different FFT sizes.
                  The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
   @par
-                 \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"
-                 \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"
+
+| CFFT Size | Input format  | Output format | Number of bits to upscale |
+| --------: | ------------: | ------------: | ------------------------: |
+| 16        | 1.31          | 5.27          | 4                         |
+| 64        | 1.31          | 7.25          | 6                         |
+| 256       | 1.31          | 9.23          | 8                         |
+| 1024      | 1.31          | 11.21         | 10                        |
+
+| CIFFT Size | Input format  | Output format | Number of bits to upscale |
+| ---------: | ------------: | ------------: | ------------------------: |
+| 16         | 1.31          | 5.27          | 0                         |
+| 64         | 1.31          | 7.25          | 0                         |
+| 256        | 1.31          | 9.23          | 0                         |
+| 1024       | 1.31          | 11.21         | 0                         |
+
  */
 
 void arm_cfft_radix4_q31(
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f16.c
index d9582f5..77dfc5b 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_cfft_radix8_f16.c
  * Description:  Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -61,7 +63,7 @@ void arm_radix8_butterfly_f16(
    float16_t p1, p2, p3, p4;
    float16_t co2, co3, co4, co5, co6, co7, co8;
    float16_t si2, si3, si4, si5, si6, si7, si8;
-   const float16_t C81 = 0.70710678118f;
+   const float16_t C81 = 0.70710678118f16;
 
    n2 = fftLen;
 
@@ -80,58 +82,58 @@ void arm_radix8_butterfly_f16(
          i6 = i5 + n2;
          i7 = i6 + n2;
          i8 = i7 + n2;
-         r1 = pSrc[2 * i1] + pSrc[2 * i5];
-         r5 = pSrc[2 * i1] - pSrc[2 * i5];
-         r2 = pSrc[2 * i2] + pSrc[2 * i6];
-         r6 = pSrc[2 * i2] - pSrc[2 * i6];
-         r3 = pSrc[2 * i3] + pSrc[2 * i7];
-         r7 = pSrc[2 * i3] - pSrc[2 * i7];
-         r4 = pSrc[2 * i4] + pSrc[2 * i8];
-         r8 = pSrc[2 * i4] - pSrc[2 * i8];
-         t1 = r1 - r3;
-         r1 = r1 + r3;
-         r3 = r2 - r4;
-         r2 = r2 + r4;
-         pSrc[2 * i1] = r1 + r2;
-         pSrc[2 * i5] = r1 - r2;
-         r1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
-         s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
-         r2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
-         s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
-         s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
-         s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
-         r4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
-         s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
-         t2 = r1 - s3;
-         r1 = r1 + s3;
-         s3 = r2 - r4;
-         r2 = r2 + r4;
-         pSrc[2 * i1 + 1] = r1 + r2;
-         pSrc[2 * i5 + 1] = r1 - r2;
-         pSrc[2 * i3]     = t1 + s3;
-         pSrc[2 * i7]     = t1 - s3;
-         pSrc[2 * i3 + 1] = t2 - r3;
-         pSrc[2 * i7 + 1] = t2 + r3;
-         r1 = (r6 - r8) * C81;
-         r6 = (r6 + r8) * C81;
-         r2 = (s6 - s8) * C81;
-         s6 = (s6 + s8) * C81;
-         t1 = r5 - r1;
-         r5 = r5 + r1;
-         r8 = r7 - r6;
-         r7 = r7 + r6;
-         t2 = s5 - r2;
-         s5 = s5 + r2;
-         s8 = s7 - s6;
-         s7 = s7 + s6;
-         pSrc[2 * i2]     = r5 + s7;
-         pSrc[2 * i8]     = r5 - s7;
-         pSrc[2 * i6]     = t1 + s8;
-         pSrc[2 * i4]     = t1 - s8;
-         pSrc[2 * i2 + 1] = s5 - r7;
-         pSrc[2 * i8 + 1] = s5 + r7;
-         pSrc[2 * i6 + 1] = t2 - r8;
-         pSrc[2 * i4 + 1] = t2 + r8;
+         r1 = (_Float16)pSrc[2 * i1] + (_Float16)pSrc[2 * i5];
+         r5 = (_Float16)pSrc[2 * i1] - (_Float16)pSrc[2 * i5];
+         r2 = (_Float16)pSrc[2 * i2] + (_Float16)pSrc[2 * i6];
+         r6 = (_Float16)pSrc[2 * i2] - (_Float16)pSrc[2 * i6];
+         r3 = (_Float16)pSrc[2 * i3] + (_Float16)pSrc[2 * i7];
+         r7 = (_Float16)pSrc[2 * i3] - (_Float16)pSrc[2 * i7];
+         r4 = (_Float16)pSrc[2 * i4] + (_Float16)pSrc[2 * i8];
+         r8 = (_Float16)pSrc[2 * i4] - (_Float16)pSrc[2 * i8];
+         t1 = (_Float16)r1 - (_Float16)r3;
+         r1 = (_Float16)r1 + (_Float16)r3;
+         r3 = (_Float16)r2 - (_Float16)r4;
+         r2 = (_Float16)r2 + (_Float16)r4;
+         pSrc[2 * i1] = (_Float16)r1 + (_Float16)r2;
+         pSrc[2 * i5] = (_Float16)r1 - (_Float16)r2;
+         r1 = (_Float16)pSrc[2 * i1 + 1] + (_Float16)pSrc[2 * i5 + 1];
+         s5 = (_Float16)pSrc[2 * i1 + 1] - (_Float16)pSrc[2 * i5 + 1];
+         r2 = (_Float16)pSrc[2 * i2 + 1] + (_Float16)pSrc[2 * i6 + 1];
+         s6 = (_Float16)pSrc[2 * i2 + 1] - (_Float16)pSrc[2 * i6 + 1];
+         s3 = (_Float16)pSrc[2 * i3 + 1] + (_Float16)pSrc[2 * i7 + 1];
+         s7 = (_Float16)pSrc[2 * i3 + 1] - (_Float16)pSrc[2 * i7 + 1];
+         r4 = (_Float16)pSrc[2 * i4 + 1] + (_Float16)pSrc[2 * i8 + 1];
+         s8 = (_Float16)pSrc[2 * i4 + 1] - (_Float16)pSrc[2 * i8 + 1];
+         t2 = (_Float16)r1 - (_Float16)s3;
+         r1 = (_Float16)r1 + (_Float16)s3;
+         s3 = (_Float16)r2 - (_Float16)r4;
+         r2 = (_Float16)r2 + (_Float16)r4;
+         pSrc[2 * i1 + 1] = (_Float16)r1 + (_Float16)r2;
+         pSrc[2 * i5 + 1] = (_Float16)r1 - (_Float16)r2;
+         pSrc[2 * i3]     = (_Float16)t1 + (_Float16)s3;
+         pSrc[2 * i7]     = (_Float16)t1 - (_Float16)s3;
+         pSrc[2 * i3 + 1] = (_Float16)t2 - (_Float16)r3;
+         pSrc[2 * i7 + 1] = (_Float16)t2 + (_Float16)r3;
+         r1 = ((_Float16)r6 - (_Float16)r8) * (_Float16)C81;
+         r6 = ((_Float16)r6 + (_Float16)r8) * (_Float16)C81;
+         r2 = ((_Float16)s6 - (_Float16)s8) * (_Float16)C81;
+         s6 = ((_Float16)s6 + (_Float16)s8) * (_Float16)C81;
+         t1 = (_Float16)r5 - (_Float16)r1;
+         r5 = (_Float16)r5 + (_Float16)r1;
+         r8 = (_Float16)r7 - (_Float16)r6;
+         r7 = (_Float16)r7 + (_Float16)r6;
+         t2 = (_Float16)s5 - (_Float16)r2;
+         s5 = (_Float16)s5 + (_Float16)r2;
+         s8 = (_Float16)s7 - (_Float16)s6;
+         s7 = (_Float16)s7 + (_Float16)s6;
+         pSrc[2 * i2]     = (_Float16)r5 + (_Float16)s7;
+         pSrc[2 * i8]     = (_Float16)r5 - (_Float16)s7;
+         pSrc[2 * i6]     = (_Float16)t1 + (_Float16)s8;
+         pSrc[2 * i4]     = (_Float16)t1 - (_Float16)s8;
+         pSrc[2 * i2 + 1] = (_Float16)s5 - (_Float16)r7;
+         pSrc[2 * i8 + 1] = (_Float16)s5 + (_Float16)r7;
+         pSrc[2 * i6 + 1] = (_Float16)t2 - (_Float16)r8;
+         pSrc[2 * i4 + 1] = (_Float16)t2 + (_Float16)r8;
 
          i1 += n1;
       } while (i1 < fftLen);
@@ -181,100 +183,100 @@ void arm_radix8_butterfly_f16(
             i6 = i5 + n2;
             i7 = i6 + n2;
             i8 = i7 + n2;
-            r1 = pSrc[2 * i1] + pSrc[2 * i5];
-            r5 = pSrc[2 * i1] - pSrc[2 * i5];
-            r2 = pSrc[2 * i2] + pSrc[2 * i6];
-            r6 = pSrc[2 * i2] - pSrc[2 * i6];
-            r3 = pSrc[2 * i3] + pSrc[2 * i7];
-            r7 = pSrc[2 * i3] - pSrc[2 * i7];
-            r4 = pSrc[2 * i4] + pSrc[2 * i8];
-            r8 = pSrc[2 * i4] - pSrc[2 * i8];
-            t1 = r1 - r3;
-            r1 = r1 + r3;
-            r3 = r2 - r4;
-            r2 = r2 + r4;
-            pSrc[2 * i1] = r1 + r2;
-            r2 = r1 - r2;
-            s1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
-            s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
-            s2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
-            s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
-            s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
-            s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
-            s4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
-            s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
-            t2 = s1 - s3;
-            s1 = s1 + s3;
-            s3 = s2 - s4;
-            s2 = s2 + s4;
-            r1 = t1 + s3;
-            t1 = t1 - s3;
-            pSrc[2 * i1 + 1] = s1 + s2;
-            s2 = s1 - s2;
-            s1 = t2 - r3;
-            t2 = t2 + r3;
-            p1 = co5 * r2;
-            p2 = si5 * s2;
-            p3 = co5 * s2;
-            p4 = si5 * r2;
-            pSrc[2 * i5]     = p1 + p2;
-            pSrc[2 * i5 + 1] = p3 - p4;
-            p1 = co3 * r1;
-            p2 = si3 * s1;
-            p3 = co3 * s1;
-            p4 = si3 * r1;
-            pSrc[2 * i3]     = p1 + p2;
-            pSrc[2 * i3 + 1] = p3 - p4;
-            p1 = co7 * t1;
-            p2 = si7 * t2;
-            p3 = co7 * t2;
-            p4 = si7 * t1;
-            pSrc[2 * i7]     = p1 + p2;
-            pSrc[2 * i7 + 1] = p3 - p4;
-            r1 = (r6 - r8) * C81;
-            r6 = (r6 + r8) * C81;
-            s1 = (s6 - s8) * C81;
-            s6 = (s6 + s8) * C81;
-            t1 = r5 - r1;
-            r5 = r5 + r1;
-            r8 = r7 - r6;
-            r7 = r7 + r6;
-            t2 = s5 - s1;
-            s5 = s5 + s1;
-            s8 = s7 - s6;
-            s7 = s7 + s6;
-            r1 = r5 + s7;
-            r5 = r5 - s7;
-            r6 = t1 + s8;
-            t1 = t1 - s8;
-            s1 = s5 - r7;
-            s5 = s5 + r7;
-            s6 = t2 - r8;
-            t2 = t2 + r8;
-            p1 = co2 * r1;
-            p2 = si2 * s1;
-            p3 = co2 * s1;
-            p4 = si2 * r1;
-            pSrc[2 * i2]     = p1 + p2;
-            pSrc[2 * i2 + 1] = p3 - p4;
-            p1 = co8 * r5;
-            p2 = si8 * s5;
-            p3 = co8 * s5;
-            p4 = si8 * r5;
-            pSrc[2 * i8]     = p1 + p2;
-            pSrc[2 * i8 + 1] = p3 - p4;
-            p1 = co6 * r6;
-            p2 = si6 * s6;
-            p3 = co6 * s6;
-            p4 = si6 * r6;
-            pSrc[2 * i6]     = p1 + p2;
-            pSrc[2 * i6 + 1] = p3 - p4;
-            p1 = co4 * t1;
-            p2 = si4 * t2;
-            p3 = co4 * t2;
-            p4 = si4 * t1;
-            pSrc[2 * i4]     = p1 + p2;
-            pSrc[2 * i4 + 1] = p3 - p4;
+            r1 = (_Float16)pSrc[2 * i1] + (_Float16)pSrc[2 * i5];
+            r5 = (_Float16)pSrc[2 * i1] - (_Float16)pSrc[2 * i5];
+            r2 = (_Float16)pSrc[2 * i2] + (_Float16)pSrc[2 * i6];
+            r6 = (_Float16)pSrc[2 * i2] - (_Float16)pSrc[2 * i6];
+            r3 = (_Float16)pSrc[2 * i3] + (_Float16)pSrc[2 * i7];
+            r7 = (_Float16)pSrc[2 * i3] - (_Float16)pSrc[2 * i7];
+            r4 = (_Float16)pSrc[2 * i4] + (_Float16)pSrc[2 * i8];
+            r8 = (_Float16)pSrc[2 * i4] - (_Float16)pSrc[2 * i8];
+            t1 = (_Float16)r1 - (_Float16)r3;
+            r1 = (_Float16)r1 + (_Float16)r3;
+            r3 = (_Float16)r2 - (_Float16)r4;
+            r2 = (_Float16)r2 + (_Float16)r4;
+            pSrc[2 * i1] = (_Float16)r1 + (_Float16)r2;
+            r2 = (_Float16)r1 - (_Float16)r2;
+            s1 = (_Float16)pSrc[2 * i1 + 1] + (_Float16)pSrc[2 * i5 + 1];
+            s5 = (_Float16)pSrc[2 * i1 + 1] - (_Float16)pSrc[2 * i5 + 1];
+            s2 = (_Float16)pSrc[2 * i2 + 1] + (_Float16)pSrc[2 * i6 + 1];
+            s6 = (_Float16)pSrc[2 * i2 + 1] - (_Float16)pSrc[2 * i6 + 1];
+            s3 = (_Float16)pSrc[2 * i3 + 1] + (_Float16)pSrc[2 * i7 + 1];
+            s7 = (_Float16)pSrc[2 * i3 + 1] - (_Float16)pSrc[2 * i7 + 1];
+            s4 = (_Float16)pSrc[2 * i4 + 1] + (_Float16)pSrc[2 * i8 + 1];
+            s8 = (_Float16)pSrc[2 * i4 + 1] - (_Float16)pSrc[2 * i8 + 1];
+            t2 = (_Float16)s1 - (_Float16)s3;
+            s1 = (_Float16)s1 + (_Float16)s3;
+            s3 = (_Float16)s2 - (_Float16)s4;
+            s2 = (_Float16)s2 + (_Float16)s4;
+            r1 = (_Float16)t1 + (_Float16)s3;
+            t1 = (_Float16)t1 - (_Float16)s3;
+            pSrc[2 * i1 + 1] = (_Float16)s1 + (_Float16)s2;
+            s2 = (_Float16)s1 - (_Float16)s2;
+            s1 = (_Float16)t2 - (_Float16)r3;
+            t2 = (_Float16)t2 + (_Float16)r3;
+            p1 = (_Float16)co5 * (_Float16)r2;
+            p2 = (_Float16)si5 * (_Float16)s2;
+            p3 = (_Float16)co5 * (_Float16)s2;
+            p4 = (_Float16)si5 * (_Float16)r2;
+            pSrc[2 * i5]     = (_Float16)p1 + (_Float16)p2;
+            pSrc[2 * i5 + 1] = (_Float16)p3 - (_Float16)p4;
+            p1 = (_Float16)co3 * (_Float16)r1;
+            p2 = (_Float16)si3 * (_Float16)s1;
+            p3 = (_Float16)co3 * (_Float16)s1;
+            p4 = (_Float16)si3 * (_Float16)r1;
+            pSrc[2 * i3]     = (_Float16)p1 + (_Float16)p2;
+            pSrc[2 * i3 + 1] = (_Float16)p3 - (_Float16)p4;
+            p1 = (_Float16)co7 * (_Float16)t1;
+            p2 = (_Float16)si7 * (_Float16)t2;
+            p3 = (_Float16)co7 * (_Float16)t2;
+            p4 = (_Float16)si7 * (_Float16)t1;
+            pSrc[2 * i7]     = (_Float16)p1 + (_Float16)p2;
+            pSrc[2 * i7 + 1] = (_Float16)p3 - (_Float16)p4;
+            r1 = ((_Float16)r6 - (_Float16)r8) * (_Float16)C81;
+            r6 = ((_Float16)r6 + (_Float16)r8) * (_Float16)C81;
+            s1 = ((_Float16)s6 - (_Float16)s8) * (_Float16)C81;
+            s6 = ((_Float16)s6 + (_Float16)s8) * (_Float16)C81;
+            t1 = (_Float16)r5 - (_Float16)r1;
+            r5 = (_Float16)r5 + (_Float16)r1;
+            r8 = (_Float16)r7 - (_Float16)r6;
+            r7 = (_Float16)r7 + (_Float16)r6;
+            t2 = (_Float16)s5 - (_Float16)s1;
+            s5 = (_Float16)s5 + (_Float16)s1;
+            s8 = (_Float16)s7 - (_Float16)s6;
+            s7 = (_Float16)s7 + (_Float16)s6;
+            r1 = (_Float16)r5 + (_Float16)s7;
+            r5 = (_Float16)r5 - (_Float16)s7;
+            r6 = (_Float16)t1 + (_Float16)s8;
+            t1 = (_Float16)t1 - (_Float16)s8;
+            s1 = (_Float16)s5 - (_Float16)r7;
+            s5 = (_Float16)s5 + (_Float16)r7;
+            s6 = (_Float16)t2 - (_Float16)r8;
+            t2 = (_Float16)t2 + (_Float16)r8;
+            p1 = (_Float16)co2 * (_Float16)r1;
+            p2 = (_Float16)si2 * (_Float16)s1;
+            p3 = (_Float16)co2 * (_Float16)s1;
+            p4 = (_Float16)si2 * (_Float16)r1;
+            pSrc[2 * i2]     = (_Float16)p1 + (_Float16)p2;
+            pSrc[2 * i2 + 1] = (_Float16)p3 - (_Float16)p4;
+            p1 = (_Float16)co8 * (_Float16)r5;
+            p2 = (_Float16)si8 * (_Float16)s5;
+            p3 = (_Float16)co8 * (_Float16)s5;
+            p4 = (_Float16)si8 * (_Float16)r5;
+            pSrc[2 * i8]     = (_Float16)p1 + (_Float16)p2;
+            pSrc[2 * i8 + 1] = (_Float16)p3 - (_Float16)p4;
+            p1 = (_Float16)co6 * (_Float16)r6;
+            p2 = (_Float16)si6 * (_Float16)s6;
+            p3 = (_Float16)co6 * (_Float16)s6;
+            p4 = (_Float16)si6 * (_Float16)r6;
+            pSrc[2 * i6]     = (_Float16)p1 + (_Float16)p2;
+            pSrc[2 * i6 + 1] = (_Float16)p3 - (_Float16)p4;
+            p1 = (_Float16)co4 * (_Float16)t1;
+            p2 = (_Float16)si4 * (_Float16)t2;
+            p3 = (_Float16)co4 * (_Float16)t2;
+            p4 = (_Float16)si4 * (_Float16)t1;
+            pSrc[2 * i4]     = (_Float16)p1 + (_Float16)p2;
+            pSrc[2 * i4 + 1] = (_Float16)p3 - (_Float16)p4;
 
             i1 += n1;
          } while (i1 < fftLen);
@@ -287,4 +289,5 @@ void arm_radix8_butterfly_f16(
 }
 
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f32.c
index a37d50a..328a725 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_cfft_radix8_f32.c
  * Description:  Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_f32.c
index 2214ca6..7367b11 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_dct4_f32.c
  * Description:  Processing function of DCT4 & IDCT4 F32
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -62,11 +62,15 @@
  
   @par           Algorithm
                    The N-point type-IV DCT is defined as a real, linear transformation by the formula:
-                   \image html DCT4Equation.gif
+                   \f[
+                   X_c(k) = \sqrt{\frac{2}{N}}\sum_{n=0}^{N-1} x(n)cos\Big[\Big(n+\frac{1}{2}\Big)\Big(k+\frac{1}{2}\Big)\frac{\pi}{N}\Big]
+                   \f]
                    where <code>k = 0, 1, 2, ..., N-1</code>
   @par
                    Its inverse is defined as follows:
-                   \image html IDCT4Equation.gif
+                   \f[
+                   x(n) = \sqrt{\frac{2}{N}}\sum_{k=0}^{N-1} X_c(k)cos\Big[\Big(n+\frac{1}{2}\Big)\Big(k+\frac{1}{2}\Big)\frac{\pi}{N}\Big]
+                   \f]
                    where <code>n = 0, 1, 2, ..., N-1</code>
   @par
                    The DCT4 matrices become involutory (i.e. they are self-inverse) by multiplying with an overall scale factor of sqrt(2/N).
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_f32.c
index adac8a4..957e01e 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_dct4_init_f32.c
  * Description:  Initialization function of DCT-4 & IDCT4 F32
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -56,7 +56,13 @@
                    The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.
                    Floating-point normalizing factors are mentioned in the table below for different DCT sizes:
 
-                   \image html dct4NormalizingF32Table.gif
+ 
+| DCT Size  | Normalizing factor value  | 
+| --------: | ------------------------: | 
+| 2048      | 0.03125                   | 
+| 512       | 0.0625                    | 
+| 128       | 0.125                     | 
+
  */
 
 arm_status arm_dct4_init_f32(
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q15.c
index 20a2cd2..0cd18fb 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_dct4_init_q15.c
  * Description:  Initialization function of DCT-4 & IDCT4 Q15
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -56,7 +56,12 @@
                    The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.
                    Normalizing factors in 1.15 format are mentioned in the table below for different DCT sizes:
 
-                   \image html dct4NormalizingQ15Table.gif
+| DCT Size  | Normalizing factor value (hexadecimal)  | 
+| --------: | ---------------------------------------:| 
+| 2048      | 0x400                                   | 
+| 512       | 0x800                                   | 
+| 128       | 0x1000                                  | 
+
  */
 
 arm_status arm_dct4_init_q15(
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q31.c
index 0ee5da3..1d7d2f1 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_dct4_init_q31.c
  * Description:  Initialization function of DCT-4 & IDCT4 Q31
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -55,8 +55,13 @@
   @par           Normalizing factor:
                    The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.
                    Normalizing factors in 1.31 format are mentioned in the table below for different DCT sizes:
+ 
+| DCT Size  | Normalizing factor value (hexadecimal)  | 
+| --------: | ---------------------------------------:| 
+| 2048      | 0x4000000                               | 
+| 512       | 0x8000000                               | 
+| 128       | 0x10000000                              | 
 
-                   \image html dct4NormalizingQ31Table.gif
  */
 
 arm_status arm_dct4_init_q31(
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q15.c
index b590c38..a9d4e78 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_dct4_q15.c
  * Description:  Processing function of DCT4 & IDCT4 Q15
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -46,8 +46,14 @@
                    Internally inputs are downscaled in the RFFT process function to avoid overflows.
                    Number of bits downscaled, depends on the size of the transform. The input and output
                    formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
+ 
+| DCT Size  | Input format  | Output format | Number of bits to upscale |
+| --------: | ------------: | ------------: | ------------------------: |
+| 2048      | 1.15          | 11.5          | 10                        |
+| 512       | 1.15          | 9.7           | 8                         |
+| 128       | 1.15          | 7.9           | 6                         |
+
 
-                   \image html dct4FormatsQ15Table.gif
  */
 
 void arm_dct4_q15(
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q31.c
index 259dc9a..5976bd0 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_dct4_q31.c
  * Description:  Processing function of DCT4 & IDCT4 Q31
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -50,7 +50,12 @@
                    The input and output formats for different DCT sizes and number of bits to upscale are
                    mentioned in the table below:
 
-                   \image html dct4FormatsQ31Table.gif
+| DCT Size  | Input format  | Output format | Number of bits to upscale |
+| --------: | ------------: | ------------: | ------------------------: |
+| 2048      | 2.30          | 12.20         | 11                        |
+| 512       | 2.30          | 10.22         | 9                         |
+| 128       | 2.30          | 8.24          | 7                         |
+
  */
 
 void arm_dct4_q31(
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_f16.c
new file mode 100644
index 0000000..f9cf6fd
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_f16.c
@@ -0,0 +1,165 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mfcc_f16.c
+ * Description:  MFCC function for the f16 version
+ *
+ * $Date:        07 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions_f16.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions_f16.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions_f16.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions_f16.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions_f16.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+/**
+  @ingroup groupTransforms
+ */
+
+
+/**
+  @defgroup MFCC MFCC
+
+  MFCC Transform
+
+  There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+
+
+/**
+  @addtogroup MFCC
+  @{
+ */
+
+/**
+  @brief         MFCC F16
+  @param[in]    S       points to the mfcc instance structure
+  @param[in]     pSrc points to the input samples
+  @param[out]     pDst  points to the output MFCC values
+  @param[inout]     pTmp  points to a temporary buffer of complex
+
+  @return        none
+
+  @par           Description
+                   The number of input samples if the FFT length used
+                   when initializing the instance data structure.
+
+                   The temporary buffer has a 2*fft length size when MFCC
+                   is implemented with CFFT.
+                   It has length FFT Length + 2 when implemented with RFFT
+                   (default implementation).
+
+                   The source buffer is modified by this function.
+
+ */
+void arm_mfcc_f16(
+  const arm_mfcc_instance_f16 * S,
+  float16_t *pSrc,
+  float16_t *pDst,
+  float16_t *pTmp
+  )
+{
+  float16_t maxValue;
+  uint32_t  index; 
+  uint32_t i;
+  float16_t result;
+  const float16_t *coefs=S->filterCoefs;
+  arm_matrix_instance_f16 pDctMat;
+
+  /* Normalize */
+  arm_absmax_f16(pSrc,S->fftLen,&maxValue,&index);
+
+  arm_scale_f16(pSrc,1.0f16/(_Float16)maxValue,pSrc,S->fftLen);
+
+  /* Multiply by window */
+  arm_mult_f16(pSrc,S->windowCoefs,pSrc,S->fftLen);
+
+  /* Compute spectrum magnitude 
+  */
+#if defined(ARM_MFCC_CFFT_BASED)
+  /* some HW accelerator for CMSIS-DSP used in some boards
+     are only providing acceleration for CFFT.
+     With ARM_MFCC_CFFT_BASED enabled, CFFT is used and the MFCC
+     will be accelerated on those boards.
+ 
+     The default is to use RFFT
+  */
+  /* Convert from real to complex */
+  for(i=0; i < S->fftLen ; i++)
+  {
+    pTmp[2*i] = pSrc[i];
+    pTmp[2*i+1] = 0.0f16;
+  }
+  arm_cfft_f16(&(S->cfft),pTmp,0,1);
+#else
+  /* Default RFFT based implementation */
+  arm_rfft_fast_f16(&(S->rfft),pSrc,pTmp,0);
+  /* Unpack real values */
+  pTmp[S->fftLen]=pTmp[1];
+  pTmp[S->fftLen+1]=0.0f16;
+  pTmp[1]=0.0f;
+#endif
+  arm_cmplx_mag_f16(pTmp,pSrc,S->fftLen);
+
+  /* Apply MEL filters */
+  for(i=0; i<S->nbMelFilters; i++)
+  {
+      arm_dot_prod_f16(pSrc+S->filterPos[i],
+        coefs,
+        S->filterLengths[i],
+        &result);
+
+      coefs += S->filterLengths[i];
+
+      pTmp[i] = result;
+
+  }
+
+  /* Compute the log */
+  arm_offset_f16(pTmp,1.0e-4f16,pTmp,S->nbMelFilters);
+  arm_vlog_f16(pTmp,pTmp,S->nbMelFilters);
+
+  /* Multiply with the DCT matrix */
+
+  pDctMat.numRows=S->nbDctOutputs;
+  pDctMat.numCols=S->nbMelFilters;
+  pDctMat.pData=(float16_t*)S->dctCoefs;
+
+  arm_mat_vec_mult_f16(&pDctMat, pTmp, pDst);
+      
+
+}
+
+#endif /* defined(ARM_FLOAT16_SUPPORTED) */
+/**
+  @} end of MFCC group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_f32.c
new file mode 100644
index 0000000..544f717
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_f32.c
@@ -0,0 +1,154 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mfcc_f32.c
+ * Description:  MFCC function for the f32 version
+ *
+ * $Date:        07 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
+
+/**
+  @ingroup groupTransforms
+ */
+
+
+
+/**
+  @addtogroup MFCC
+  @{
+ */
+
+/**
+  @brief         MFCC F32
+  @param[in]    S       points to the mfcc instance structure
+  @param[in]     pSrc points to the input samples
+  @param[out]     pDst  points to the output MFCC values
+  @param[inout]     pTmp  points to a temporary buffer of complex
+
+  @return        none
+
+  @par           Description
+                   The number of input samples if the FFT length used
+                   when initializing the instance data structure.
+
+                   The temporary buffer has a 2*fft length size when MFCC
+                   is implemented with CFFT.
+                   It has length FFT Length + 2 when implemented with RFFT
+                   (default implementation).
+
+                   The source buffer is modified by this function.
+
+ */
+void arm_mfcc_f32(
+  const arm_mfcc_instance_f32 * S,
+  float32_t *pSrc,
+  float32_t *pDst,
+  float32_t *pTmp
+  )
+{
+  float32_t maxValue;
+  uint32_t  index; 
+  uint32_t i;
+  float32_t result;
+  const float32_t *coefs=S->filterCoefs;
+  arm_matrix_instance_f32 pDctMat;
+
+  /* Normalize */
+  arm_absmax_f32(pSrc,S->fftLen,&maxValue,&index);
+
+  arm_scale_f32(pSrc,1.0f/maxValue,pSrc,S->fftLen);
+
+  /* Multiply by window */
+  arm_mult_f32(pSrc,S->windowCoefs,pSrc,S->fftLen);
+
+  /* Compute spectrum magnitude 
+  */
+#if defined(ARM_MFCC_CFFT_BASED)
+  /* some HW accelerator for CMSIS-DSP used in some boards
+     are only providing acceleration for CFFT.
+     With ARM_MFCC_CFFT_BASED enabled, CFFT is used and the MFCC
+     will be accelerated on those boards.
+ 
+     The default is to use RFFT
+  */
+  /* Convert from real to complex */
+  for(i=0; i < S->fftLen ; i++)
+  {
+    pTmp[2*i] = pSrc[i];
+    pTmp[2*i+1] = 0.0f;
+  }
+  arm_cfft_f32(&(S->cfft),pTmp,0,1);
+#else
+  /* Default RFFT based implementation */
+  arm_rfft_fast_f32(&(S->rfft),pSrc,pTmp,0);
+  /* Unpack real values */
+  pTmp[S->fftLen]=pTmp[1];
+  pTmp[S->fftLen+1]=0.0f;
+  pTmp[1]=0.0f;
+#endif
+  arm_cmplx_mag_f32(pTmp,pSrc,S->fftLen);
+
+  /* Apply MEL filters */
+  for(i=0; i<S->nbMelFilters; i++)
+  {
+      arm_dot_prod_f32(pSrc+S->filterPos[i],
+        coefs,
+        S->filterLengths[i],
+        &result);
+
+      coefs += S->filterLengths[i];
+
+      pTmp[i] = result;
+
+  }
+
+  /* Compute the log */
+  arm_offset_f32(pTmp,1.0e-6f,pTmp,S->nbMelFilters);
+  arm_vlog_f32(pTmp,pTmp,S->nbMelFilters);
+
+  /* Multiply with the DCT matrix */
+
+  pDctMat.numRows=S->nbDctOutputs;
+  pDctMat.numCols=S->nbMelFilters;
+  pDctMat.pData=(float32_t*)S->dctCoefs;
+
+  arm_mat_vec_mult_f32(&pDctMat, pTmp, pDst);
+      
+
+}
+
+/**
+  @} end of MFCC group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_f16.c
new file mode 100644
index 0000000..74b6c3d
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_f16.c
@@ -0,0 +1,114 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mfcc_init_f16.c
+ * Description:  MFCC initialization function for the f16 version
+ *
+ * $Date:        07 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+  @ingroup groupTransforms
+ */
+
+
+/**
+  @addtogroup MFCC
+  @{
+ */
+
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+
+/**
+  @brief         Initialization of the MFCC F16 instance structure
+  @param[out]    S       points to the mfcc instance structure
+  @param[in]     fftLen  fft length
+  @param[in]     nbMelFilters  number of Mel filters
+  @param[in]     nbDctOutputs  number of Dct outputs
+  @param[in]     dctCoefs  points to an array of DCT coefficients
+  @param[in]     filterPos  points of the array of filter positions
+  @param[in]     filterLengths  points to the array of filter lengths
+  @param[in]     filterCoefs  points to the array of filter coefficients
+  @param[in]     windowCoefs  points to the array of window coefficients
+
+  @return        error status
+
+  @par           Description
+                   The matrix of Mel filter coefficients is sparse.
+                   Most of the coefficients are zero.
+                   To avoid multiplying the spectrogram by those zeros, the
+                   filter is applied only to a given position in the spectrogram
+                   and on a given number of FFT bins (the filter length).
+                   It is the reason for the arrays filterPos and filterLengths.
+
+                   window coefficients can describe (for instance) a Hamming window.
+                   The array has the same size as the FFT length.
+
+                   The folder Scripts is containing a Python script which can be used
+                   to generate the filter, dct and window arrays.
+ */
+
+arm_status arm_mfcc_init_f16(
+  arm_mfcc_instance_f16 * S,
+  uint32_t fftLen,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float16_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float16_t *filterCoefs,
+  const float16_t *windowCoefs
+  )
+{
+ arm_status status;
+
+ S->fftLen=fftLen;
+ S->nbMelFilters=nbMelFilters;
+ S->nbDctOutputs=nbDctOutputs;
+ S->dctCoefs=dctCoefs;
+ S->filterPos=filterPos;
+ S->filterLengths=filterLengths;
+ S->filterCoefs=filterCoefs;
+ S->windowCoefs=windowCoefs;
+
+ #if defined(ARM_MFCC_CFFT_BASED)
+ status=arm_cfft_init_f16(&(S->cfft),fftLen);
+ #else
+ status=arm_rfft_fast_init_f16(&(S->rfft),fftLen);
+ #endif
+ 
+ return(status);
+}
+
+#endif /* defined(ARM_FLOAT16_SUPPORTED) */
+/**
+  @} end of MFCC group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_f32.c
new file mode 100644
index 0000000..9e0bf0c
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_f32.c
@@ -0,0 +1,111 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mfcc_init_f32.c
+ * Description:  MFCC initialization function for the f32 version
+ *
+ * $Date:        07 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+  @ingroup groupTransforms
+ */
+
+
+/**
+  @addtogroup MFCC
+  @{
+ */
+
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions.h"
+
+
+
+/**
+  @brief         Initialization of the MFCC F32 instance structure
+  @param[out]    S       points to the mfcc instance structure
+  @param[in]     fftLen  fft length
+  @param[in]     nbMelFilters  number of Mel filters
+  @param[in]     nbDctOutputs  number of Dct outputs
+  @param[in]     dctCoefs  points to an array of DCT coefficients
+  @param[in]     filterPos  points of the array of filter positions
+  @param[in]     filterLengths  points to the array of filter lengths
+  @param[in]     filterCoefs  points to the array of filter coefficients
+  @param[in]     windowCoefs  points to the array of window coefficients
+
+  @return        error status
+
+  @par           Description
+                   The matrix of Mel filter coefficients is sparse.
+                   Most of the coefficients are zero.
+                   To avoid multiplying the spectrogram by those zeros, the
+                   filter is applied only to a given position in the spectrogram
+                   and on a given number of FFT bins (the filter length).
+                   It is the reason for the arrays filterPos and filterLengths.
+
+                   window coefficients can describe (for instance) a Hamming window.
+                   The array has the same size as the FFT length.
+
+                   The folder Scripts is containing a Python script which can be used
+                   to generate the filter, dct and window arrays.
+ */
+
+arm_status arm_mfcc_init_f32(
+  arm_mfcc_instance_f32 * S,
+  uint32_t fftLen,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const float32_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const float32_t *filterCoefs,
+  const float32_t *windowCoefs
+  )
+{
+ arm_status status;
+
+ S->fftLen=fftLen;
+ S->nbMelFilters=nbMelFilters;
+ S->nbDctOutputs=nbDctOutputs;
+ S->dctCoefs=dctCoefs;
+ S->filterPos=filterPos;
+ S->filterLengths=filterLengths;
+ S->filterCoefs=filterCoefs;
+ S->windowCoefs=windowCoefs;
+
+ #if defined(ARM_MFCC_CFFT_BASED)
+ status=arm_cfft_init_f32(&(S->cfft),fftLen);
+ #else
+ status=arm_rfft_fast_init_f32(&(S->rfft),fftLen);
+ #endif
+ 
+ return(status);
+}
+
+/**
+  @} end of MFCC group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_q15.c
new file mode 100644
index 0000000..ccd6da9
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_q15.c
@@ -0,0 +1,111 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mfcc_init_q15.c
+ * Description:  MFCC initialization function for the q15 version
+ *
+ * $Date:        07 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+  @ingroup groupTransforms
+ */
+
+
+/**
+  @addtogroup MFCC
+  @{
+ */
+
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions.h"
+
+
+
+/**
+  @brief         Initialization of the MFCC F32 instance structure
+  @param[out]    S       points to the mfcc instance structure
+  @param[in]     fftLen  fft length
+  @param[in]     nbMelFilters  number of Mel filters
+  @param[in]     nbDctOutputs  number of Dct outputs
+  @param[in]     dctCoefs  points to an array of DCT coefficients
+  @param[in]     filterPos  points of the array of filter positions
+  @param[in]     filterLengths  points to the array of filter lengths
+  @param[in]     filterCoefs  points to the array of filter coefficients
+  @param[in]     windowCoefs  points to the array of window coefficients
+
+  @return        error status
+
+  @par           Description
+                   The matrix of Mel filter coefficients is sparse.
+                   Most of the coefficients are zero.
+                   To avoid multiplying the spectrogram by those zeros, the
+                   filter is applied only to a given position in the spectrogram
+                   and on a given number of FFT bins (the filter length).
+                   It is the reason for the arrays filterPos and filterLengths.
+
+                   window coefficients can describe (for instance) a Hamming window.
+                   The array has the same size as the FFT length.
+
+                   The folder Scripts is containing a Python script which can be used
+                   to generate the filter, dct and window arrays.
+ */
+
+arm_status arm_mfcc_init_q15(
+  arm_mfcc_instance_q15 * S,
+  uint32_t fftLen,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q15_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q15_t *filterCoefs,
+  const q15_t *windowCoefs
+  )
+{
+ arm_status status;
+
+ S->fftLen=fftLen;
+ S->nbMelFilters=nbMelFilters;
+ S->nbDctOutputs=nbDctOutputs;
+ S->dctCoefs=dctCoefs;
+ S->filterPos=filterPos;
+ S->filterLengths=filterLengths;
+ S->filterCoefs=filterCoefs;
+ S->windowCoefs=windowCoefs;
+
+ #if defined(ARM_MFCC_CFFT_BASED)
+ status=arm_cfft_init_q15(&(S->cfft),fftLen);
+ #else
+ status=arm_rfft_init_q15(&(S->rfft),fftLen,0,1);
+ #endif
+ 
+ return(status);
+}
+
+/**
+  @} end of MFCC group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_q31.c
new file mode 100644
index 0000000..5573b33
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_q31.c
@@ -0,0 +1,111 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mfcc_init_q31.c
+ * Description:  MFCC initialization function for the q31 version
+ *
+ * $Date:        07 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+  @ingroup groupTransforms
+ */
+
+
+/**
+  @addtogroup MFCC
+  @{
+ */
+
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions.h"
+
+
+
+/**
+  @brief         Initialization of the MFCC F32 instance structure
+  @param[out]    S       points to the mfcc instance structure
+  @param[in]     fftLen  fft length
+  @param[in]     nbMelFilters  number of Mel filters
+  @param[in]     nbDctOutputs  number of Dct outputs
+  @param[in]     dctCoefs  points to an array of DCT coefficients
+  @param[in]     filterPos  points of the array of filter positions
+  @param[in]     filterLengths  points to the array of filter lengths
+  @param[in]     filterCoefs  points to the array of filter coefficients
+  @param[in]     windowCoefs  points to the array of window coefficients
+
+  @return        error status
+
+  @par           Description
+                   The matrix of Mel filter coefficients is sparse.
+                   Most of the coefficients are zero.
+                   To avoid multiplying the spectrogram by those zeros, the
+                   filter is applied only to a given position in the spectrogram
+                   and on a given number of FFT bins (the filter length).
+                   It is the reason for the arrays filterPos and filterLengths.
+
+                   window coefficients can describe (for instance) a Hamming window.
+                   The array has the same size as the FFT length.
+
+                   The folder Scripts is containing a Python script which can be used
+                   to generate the filter, dct and window arrays.
+ */
+
+arm_status arm_mfcc_init_q31(
+  arm_mfcc_instance_q31 * S,
+  uint32_t fftLen,
+  uint32_t nbMelFilters,
+  uint32_t nbDctOutputs,
+  const q31_t *dctCoefs,
+  const uint32_t *filterPos,
+  const uint32_t *filterLengths,
+  const q31_t *filterCoefs,
+  const q31_t *windowCoefs
+  )
+{
+ arm_status status;
+
+ S->fftLen=fftLen;
+ S->nbMelFilters=nbMelFilters;
+ S->nbDctOutputs=nbDctOutputs;
+ S->dctCoefs=dctCoefs;
+ S->filterPos=filterPos;
+ S->filterLengths=filterLengths;
+ S->filterCoefs=filterCoefs;
+ S->windowCoefs=windowCoefs;
+
+ #if defined(ARM_MFCC_CFFT_BASED)
+ status=arm_cfft_init_q31(&(S->cfft),fftLen);
+ #else
+ status=arm_rfft_init_q31(&(S->rfft),fftLen,0,1);
+ #endif
+ 
+ return(status);
+}
+
+/**
+  @} end of MFCC group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_q15.c
new file mode 100644
index 0000000..9cbd447
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_q15.c
@@ -0,0 +1,203 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mfcc_q15.c
+ * Description:  MFCC function for the q15 version
+ *
+ * $Date:        07 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
+
+/* Constants for Q15 implementation */
+#define LOG2TOLOG_Q15 0x02C5C860
+#define MICRO_Q15 0x00000219
+#define SHIFT_MELFILTER_SATURATION_Q15 10
+/**
+  @ingroup groupTransforms
+ */
+
+
+
+/**
+  @addtogroup MFCC
+  @{
+ */
+
+/**
+  @brief         MFCC Q15
+  @param[in]    S       points to the mfcc instance structure
+  @param[in]     pSrc points to the input samples in Q15
+  @param[out]     pDst  points to the output MFCC values in q8.7 format
+  @param[inout]     pTmp  points to a temporary buffer of complex
+
+  @return        none
+
+  @par           Description
+                   The number of input samples is the FFT length used
+                   when initializing the instance data structure.
+
+                   The temporary buffer has a 2*fft length.
+
+                   The source buffer is modified by this function.
+
+                   The function may saturate. If the FFT length is too
+                   big and the number of MEL filters too small then the fixed
+                   point computations may saturate.
+
+ */
+
+arm_status arm_mfcc_q15(
+  const arm_mfcc_instance_q15 * S,
+  q15_t *pSrc,
+  q15_t *pDst,
+  q31_t *pTmp
+  )
+{
+    q15_t m;
+    uint32_t index;
+    uint32_t fftShift=0;
+    q31_t logExponent;
+    q63_t result;
+    arm_matrix_instance_q15 pDctMat;
+    uint32_t i;
+    uint32_t coefsPos;
+    uint32_t filterLimit;
+    q15_t *pTmp2=(q15_t*)pTmp;
+
+    arm_status status = ARM_MATH_SUCCESS;
+    
+    // q15
+    arm_absmax_q15(pSrc,S->fftLen,&m,&index);
+
+    if (m !=0)
+    {
+       q15_t quotient;
+       int16_t shift;
+
+       status = arm_divide_q15(0x7FFF,m,&quotient,&shift);
+       if (status != ARM_MATH_SUCCESS)
+       {
+          return(status);
+       }
+ 
+       arm_scale_q15(pSrc,quotient,shift,pSrc,S->fftLen);
+    }
+
+
+    // q15
+    arm_mult_q15(pSrc,S->windowCoefs, pSrc, S->fftLen);
+
+
+    /* Compute spectrum magnitude 
+    */
+    fftShift = 31 - __CLZ(S->fftLen);
+#if defined(ARM_MFCC_CFFT_BASED)
+    /* some HW accelerator for CMSIS-DSP used in some boards
+       are only providing acceleration for CFFT.
+       With ARM_MFCC_CFFT_BASED enabled, CFFT is used and the MFCC
+       will be accelerated on those boards.
+ 
+       The default is to use RFFT
+    */
+    /* Convert from real to complex */
+    for(i=0; i < S->fftLen ; i++)
+    {
+      pTmp2[2*i] = pSrc[i];
+      pTmp2[2*i+1] = 0;
+    }
+    arm_cfft_q15(&(S->cfft),pTmp2,0,1);
+#else
+    /* Default RFFT based implementation */
+    arm_rfft_q15(&(S->rfft),pSrc,pTmp2);
+#endif
+    filterLimit = 1 + (S->fftLen >> 1);
+
+
+    // q15 - fftShift
+    arm_cmplx_mag_q15(pTmp2,pSrc,filterLimit);
+    // q14 - fftShift
+
+    /* Apply MEL filters */
+    coefsPos = 0;
+    for(i=0; i<S->nbMelFilters; i++)
+    {
+      arm_dot_prod_q15(pSrc+S->filterPos[i],
+        &(S->filterCoefs[coefsPos]),
+        S->filterLengths[i],
+        &result);
+
+      coefsPos += S->filterLengths[i];
+
+      // q34.29 - fftShift
+      result += MICRO_Q15;
+      result >>= SHIFT_MELFILTER_SATURATION_Q15;
+      // q34.29 - fftShift - satShift
+      pTmp[i] = __SSAT(result,31) ;
+
+    }
+
+
+    // q34.29 - fftShift - satShift
+    /* Compute the log */
+    arm_vlog_q31(pTmp,pTmp,S->nbMelFilters);
+
+
+    // q5.26
+   
+    logExponent = fftShift + 2 + SHIFT_MELFILTER_SATURATION_Q15;
+    logExponent = logExponent * LOG2TOLOG_Q15;
+
+
+    // q8.26
+    arm_offset_q31(pTmp,logExponent,pTmp,S->nbMelFilters);
+    arm_shift_q31(pTmp,-19,pTmp,S->nbMelFilters);
+    for(i=0; i<S->nbMelFilters; i++)
+    { 
+      pSrc[i] = __SSAT((q15_t)pTmp[i],16);
+    }
+
+    // q8.7
+
+    pDctMat.numRows=S->nbDctOutputs;
+    pDctMat.numCols=S->nbMelFilters;
+    pDctMat.pData=(q15_t*)S->dctCoefs;
+
+    arm_mat_vec_mult_q15(&pDctMat, pSrc, pDst);
+
+    return(status);
+}
+
+/**
+  @} end of MFCC group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_q31.c
new file mode 100644
index 0000000..6993c55
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_q31.c
@@ -0,0 +1,202 @@
+#include "edge-impulse-sdk/dsp/config.hpp"
+#if EIDSP_LOAD_CMSIS_DSP_SOURCES
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mfcc_q31.c
+ * Description:  MFCC function for the q31 version
+ *
+ * $Date:        07 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/transform_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/basic_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/complex_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/fast_math_functions.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/matrix_functions.h"
+
+/* Constants for Q31 implementation */
+#define LOG2TOLOG_Q31 0x02C5C860
+#define MICRO_Q31 0x08637BD0
+#define SHIFT_MELFILTER_SATURATION_Q31 10
+/**
+  @ingroup groupTransforms
+ */
+
+
+
+/**
+  @addtogroup MFCC
+  @{
+ */
+
+/**
+  @brief         MFCC Q31
+  @param[in]    S       points to the mfcc instance structure
+  @param[in]     pSrc points to the input samples in Q31
+  @param[out]     pDst  points to the output MFCC values in q8.23 format
+  @param[inout]     pTmp  points to a temporary buffer of complex
+
+  @return        none
+
+  @par           Description
+                   The number of input samples is the FFT length used
+                   when initializing the instance data structure.
+
+                   The temporary buffer has a 2*fft length.
+
+                   The source buffer is modified by this function.
+
+                   The function may saturate. If the FFT length is too
+                   big and the number of MEL filters too small then the fixed
+                   point computations may saturate.
+
+ */
+
+
+arm_status arm_mfcc_q31(
+  const arm_mfcc_instance_q31 * S,
+  q31_t *pSrc,
+  q31_t *pDst,
+  q31_t *pTmp
+  )
+{
+    q31_t m;
+    uint32_t index;
+    uint32_t fftShift=0;
+    q31_t logExponent;
+    q63_t result;
+    arm_matrix_instance_q31 pDctMat;
+    uint32_t i;
+    uint32_t coefsPos;
+    uint32_t filterLimit;
+    q31_t *pTmp2=(q31_t*)pTmp;
+
+    arm_status status = ARM_MATH_SUCCESS;
+    
+    // q31
+    arm_absmax_q31(pSrc,S->fftLen,&m,&index);
+
+    if (m !=0)
+    {
+       q31_t quotient;
+       int16_t shift;
+
+       status = arm_divide_q31(0x7FFFFFFF,m,&quotient,&shift);
+       if (status != ARM_MATH_SUCCESS)
+       {
+          return(status);
+       }
+ 
+       arm_scale_q31(pSrc,quotient,shift,pSrc,S->fftLen);
+    }
+
+
+    // q31
+    arm_mult_q31(pSrc,S->windowCoefs, pSrc, S->fftLen);
+
+
+    /* Compute spectrum magnitude 
+    */
+    fftShift = 31 - __CLZ(S->fftLen);
+#if defined(ARM_MFCC_CFFT_BASED)
+    /* some HW accelerator for CMSIS-DSP used in some boards
+       are only providing acceleration for CFFT.
+       With ARM_MFCC_CFFT_BASED enabled, CFFT is used and the MFCC
+       will be accelerated on those boards.
+ 
+       The default is to use RFFT
+    */
+    /* Convert from real to complex */
+    for(i=0; i < S->fftLen ; i++)
+    {
+      pTmp2[2*i] = pSrc[i];
+      pTmp2[2*i+1] = 0;
+    }
+    arm_cfft_q31(&(S->cfft),pTmp2,0,1);
+#else
+    /* Default RFFT based implementation */
+    arm_rfft_q31(&(S->rfft),pSrc,pTmp2);
+#endif
+    filterLimit = 1 + (S->fftLen >> 1);
+
+
+    // q31 - fftShift
+    arm_cmplx_mag_q31(pTmp2,pSrc,filterLimit);
+    // q30 - fftShift
+
+
+    /* Apply MEL filters */
+    coefsPos = 0;
+    for(i=0; i<S->nbMelFilters; i++)
+    {
+      arm_dot_prod_q31(pSrc+S->filterPos[i],
+        &(S->filterCoefs[coefsPos]),
+        S->filterLengths[i],
+        &result);
+
+      coefsPos += S->filterLengths[i];
+
+      // q16.48 - fftShift
+      result += MICRO_Q31;
+      result >>= (SHIFT_MELFILTER_SATURATION_Q31 + 18);
+      // q16.29 - fftShift - satShift
+      pTmp[i] = __SSAT(result,31) ;
+
+    }
+
+
+    // q16.29 - fftShift - satShift
+    /* Compute the log */
+    arm_vlog_q31(pTmp,pTmp,S->nbMelFilters);
+
+
+    // q5.26
+   
+    logExponent = fftShift + 2 + SHIFT_MELFILTER_SATURATION_Q31;
+    logExponent = logExponent * LOG2TOLOG_Q31;
+
+
+    // q5.26
+    arm_offset_q31(pTmp,logExponent,pTmp,S->nbMelFilters);
+    arm_shift_q31(pTmp,-3,pTmp,S->nbMelFilters);
+
+    
+    // q8.23
+
+    pDctMat.numRows=S->nbDctOutputs;
+    pDctMat.numCols=S->nbMelFilters;
+    pDctMat.pData=(q31_t*)S->dctCoefs;
+
+    arm_mat_vec_mult_q31(&pDctMat, pTmp, pDst);
+
+    return(status);
+}
+
+/**
+  @} end of MFCC group
+ */
+
+#endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_f32.c
index 9d0a2a3..ea6d9df 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_rfft_f32.c
  * Description:  RFFT & RIFFT Floating point process function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f16.c
index d5b64a0..367b2ca 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_rfft_fast_f16.c
  * Description:  RFFT & RIFFT Floating point process function
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -72,15 +74,15 @@ void stage_rfft_f16(
    twI = *pCoeff++ ;
 
    // U1 = XA(1) + XB(1); % It is real
-   t1a = xBR + xAR  ;
+   t1a = (_Float16)xBR + (_Float16)xAR  ;
 
    // U2 = XB(1) - XA(1); % It is imaginary
-   t1b = xBI + xAI  ;
+   t1b = (_Float16)xBI + (_Float16)xAI  ;
 
    // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
    // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
-   *pOut++ = 0.5f * ( t1a + t1b );
-   *pOut++ = 0.5f * ( t1a - t1b );
+   *pOut++ = 0.5f16 * ( (_Float16)t1a + (_Float16)t1b );
+   *pOut++ = 0.5f16 * ( (_Float16)t1a - (_Float16)t1b );
 
    // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
    pB  = p + 2*k - 14;
@@ -174,18 +176,18 @@ void stage_rfft_f16(
       twR = *pCoeff++;
       twI = *pCoeff++;
 
-      t1a = xBR - xAR ;
-      t1b = xBI + xAI ;
+      t1a = (_Float16)xBR - (_Float16)xAR ;
+      t1b = (_Float16)xBI + (_Float16)xAI ;
 
       // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
       // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
-      p0 = twR * t1a;
-      p1 = twI * t1a;
-      p2 = twR * t1b;
-      p3 = twI * t1b;
+      p0 = (_Float16)twR * (_Float16)t1a;
+      p1 = (_Float16)twI * (_Float16)t1a;
+      p2 = (_Float16)twR * (_Float16)t1b;
+      p3 = (_Float16)twI * (_Float16)t1b;
 
-      *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
-      *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
+      *pOut++ = 0.5f16 * ((_Float16)xAR + (_Float16)xBR + (_Float16)p0 + (_Float16)p3 ); //xAR
+      *pOut++ = 0.5f16 * ((_Float16)xAI - (_Float16)xBI + (_Float16)p1 - (_Float16)p2 ); //xAI
 
       pA += 2;
       pB -= 2;
@@ -223,8 +225,8 @@ void merge_rfft_f16(
 
    pCoeff += 2 ;
 
-   *pOut++ = 0.5f * ( xAR + xAI );
-   *pOut++ = 0.5f * ( xAR - xAI );
+   *pOut++ = 0.5f16 * ( (_Float16)xAR + (_Float16)xAI );
+   *pOut++ = 0.5f16 * ( (_Float16)xAR - (_Float16)xAI );
 
    pB  =  p + 2*k - 14;
    pA +=  2    ;
@@ -293,18 +295,18 @@ void merge_rfft_f16(
       twR = *pCoeff++;
       twI = *pCoeff++;
 
-      t1a = xAR - xBR ;
-      t1b = xAI + xBI ;
+      t1a = (_Float16)xAR - (_Float16)xBR ;
+      t1b = (_Float16)xAI + (_Float16)xBI ;
 
-      r = twR * t1a;
-      s = twI * t1b;
-      t = twI * t1a;
-      u = twR * t1b;
+      r = (_Float16)twR * (_Float16)t1a;
+      s = (_Float16)twI * (_Float16)t1b;
+      t = (_Float16)twI * (_Float16)t1a;
+      u = (_Float16)twR * (_Float16)t1b;
 
       // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
       // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
-      *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
-      *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
+      *pOut++ = 0.5f16 * ((_Float16)xAR + (_Float16)xBR - (_Float16)r - (_Float16)s ); //xAR
+      *pOut++ = 0.5f16 * ((_Float16)xAI - (_Float16)xBI + (_Float16)t - (_Float16)u ); //xAI
 
       pA += 2;
       pB -= 2;
@@ -342,15 +344,15 @@ void stage_rfft_f16(
 
 
    // U1 = XA(1) + XB(1); % It is real
-   t1a = xBR + xAR  ;
+   t1a = (_Float16)xBR + (_Float16)xAR  ;
 
    // U2 = XB(1) - XA(1); % It is imaginary
-   t1b = xBI + xAI  ;
+   t1b = (_Float16)xBI + (_Float16)xAI  ;
 
    // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
    // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
-   *pOut++ = 0.5f * ( t1a + t1b );
-   *pOut++ = 0.5f * ( t1a - t1b );
+   *pOut++ = 0.5f16 * ( (_Float16)t1a + (_Float16)t1b );
+   *pOut++ = 0.5f16 * ( (_Float16)t1a - (_Float16)t1b );
 
    // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
    pB  = p + 2*k;
@@ -381,18 +383,18 @@ void stage_rfft_f16(
       twR = *pCoeff++;
       twI = *pCoeff++;
 
-      t1a = xBR - xAR ;
-      t1b = xBI + xAI ;
+      t1a = (_Float16)xBR - (_Float16)xAR ;
+      t1b = (_Float16)xBI + (_Float16)xAI ;
 
       // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
       // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
-      p0 = twR * t1a;
-      p1 = twI * t1a;
-      p2 = twR * t1b;
-      p3 = twI * t1b;
+      p0 = (_Float16)twR * (_Float16)t1a;
+      p1 = (_Float16)twI * (_Float16)t1a;
+      p2 = (_Float16)twR * (_Float16)t1b;
+      p3 = (_Float16)twI * (_Float16)t1b;
 
-      *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
-      *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
+      *pOut++ = 0.5f16 * ((_Float16)xAR + (_Float16)xBR + (_Float16)p0 + (_Float16)p3 ); //xAR
+      *pOut++ = 0.5f16 * ((_Float16)xAI - (_Float16)xBI + (_Float16)p1 - (_Float16)p2 ); //xAI
 
 
       pA += 2;
@@ -422,8 +424,8 @@ void merge_rfft_f16(
 
    pCoeff += 2 ;
 
-   *pOut++ = 0.5f * ( xAR + xAI );
-   *pOut++ = 0.5f * ( xAR - xAI );
+   *pOut++ = 0.5f16 * ( (_Float16)xAR + (_Float16)xAI );
+   *pOut++ = 0.5f16 * ( (_Float16)xAR - (_Float16)xAI );
 
    pB  =  p + 2*k ;
    pA +=  2	   ;
@@ -441,18 +443,18 @@ void merge_rfft_f16(
       twR = *pCoeff++;
       twI = *pCoeff++;
 
-      t1a = xAR - xBR ;
-      t1b = xAI + xBI ;
+      t1a = (_Float16)xAR - (_Float16)xBR ;
+      t1b = (_Float16)xAI + (_Float16)xBI ;
 
-      r = twR * t1a;
-      s = twI * t1b;
-      t = twI * t1a;
-      u = twR * t1b;
+      r = (_Float16)twR * (_Float16)t1a;
+      s = (_Float16)twI * (_Float16)t1b;
+      t = (_Float16)twI * (_Float16)t1a;
+      u = (_Float16)twR * (_Float16)t1b;
 
       // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
       // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
-      *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
-      *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
+      *pOut++ = 0.5f16 * ((_Float16)xAR + (_Float16)xBR - (_Float16)r - (_Float16)s ); //xAR
+      *pOut++ = 0.5f16 * ((_Float16)xAI - (_Float16)xBI + (_Float16)t - (_Float16)u ); //xAI
 
       pA += 2;
       pB -= 2;
@@ -467,99 +469,6 @@ void merge_rfft_f16(
   @ingroup groupTransforms
 */
 
-/**
-  @defgroup RealFFT Real FFT Functions
- 
-  @par
-                   The CMSIS DSP library includes specialized algorithms for computing the
-                   FFT of real data sequences.  The FFT is defined over complex data but
-                   in many applications the input is real.  Real FFT algorithms take advantage
-                   of the symmetry properties of the FFT and have a speed advantage over complex
-                   algorithms of the same length.
-  @par
-                   The Fast RFFT algorith relays on the mixed radix CFFT that save processor usage.
-  @par
-                   The real length N forward FFT of a sequence is computed using the steps shown below.
-  @par
-                   \image html RFFT.gif "Real Fast Fourier Transform"
-  @par
-                   The real sequence is initially treated as if it were complex to perform a CFFT.
-                   Later, a processing stage reshapes the data to obtain half of the frequency spectrum
-                   in complex format. Except the first complex number that contains the two real numbers
-                   X[0] and X[N/2] all the data is complex. In other words, the first complex sample
-                   contains two real values packed.
-  @par
-                   The input for the inverse RFFT should keep the same format as the output of the
-                   forward RFFT. A first processing stage pre-process the data to later perform an
-                   inverse CFFT.
-  @par
-                   \image html RIFFT.gif "Real Inverse Fast Fourier Transform"
-  @par
-                   The algorithms for floating-point, Q15, and Q31 data are slightly different
-                   and we describe each algorithm in turn.
-  @par           Floating-point
-                   The main functions are \ref arm_rfft_fast_f16() and \ref arm_rfft_fast_init_f16().
-                   
-  @par
-                   The FFT of a real N-point sequence has even symmetry in the frequency domain. 
-                   The second half of the data equals the conjugate of the first half flipped in frequency. 
-                   Looking at the data, we see that we can uniquely represent the FFT using only N/2 complex numbers.
-                   These are packed into the output array in alternating real and imaginary components:
-  @par
-                   X = { real[0], imag[0], real[1], imag[1], real[2], imag[2] ...
-                   real[(N/2)-1], imag[(N/2)-1 }
-  @par
-                   It happens that the first complex number (real[0], imag[0]) is actually
-                   all real. real[0] represents the DC offset, and imag[0] should be 0.
-                   (real[1], imag[1]) is the fundamental frequency, (real[2], imag[2]) is
-                   the first harmonic and so on.
-  @par
-                   The real FFT functions pack the frequency domain data in this fashion.
-                   The forward transform outputs the data in this form and the inverse
-                   transform expects input data in this form. The function always performs
-                   the needed bitreversal so that the input and output data is always in
-                   normal order. The functions support lengths of [32, 64, 128, ..., 4096]
-                   samples.
-  @par           Q15 and Q31
-                   The real algorithms are defined in a similar manner and utilize N/2 complex
-                   transforms behind the scenes.
-  @par
-                   The complex transforms used internally include scaling to prevent fixed-point
-                   overflows.  The overall scaling equals 1/(fftLen/2).
-                   Due to the use of complex transform internally, the source buffer is
-                   modified by the rfft.
-  @par
-                   A separate instance structure must be defined for each transform used but
-                   twiddle factor and bit reversal tables can be reused.
-  @par
-                   There is also an associated initialization function for each data type.
-                   The initialization function performs the following operations:
-                    - Sets the values of the internal structure fields.
-                    - Initializes twiddle factor table and bit reversal table pointers.
-                    - Initializes the internal complex FFT data structure.
-  @par
-                   Use of the initialization function is optional **except for MVE versions where it is mandatory**.
-                   If you don't use the initialization functions, then the structures should be initialized with code
-                   similar to the one below:
-  <pre>
-      arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
-      arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
-  </pre>
-                   where <code>fftLenReal</code> is the length of the real transform;
-                   <code>fftLenBy2</code> length of  the internal complex transform (fftLenReal/2).
-                   <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
-                   <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
-                   output (=1).
-                   <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
-                   The value is based on the FFT length;
-                   <code>pTwiddleAReal</code>points to the A array of twiddle coefficients;
-                   <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;
-                   <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
-                   must also be initialized.  
-@par
-                   Note that with MVE versions you can't initialize instance structures directly and **must
-                   use the initialization function**.
- */
 
 /**
   @addtogroup RealFFT
@@ -610,4 +519,5 @@ void arm_rfft_fast_f16(
 */
 
 #endif /*  #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f32.c
index 7c58076..c93f6a0 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_rfft_fast_f32.c
  * Description:  RFFT & RIFFT Floating point process function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -473,7 +473,7 @@ void merge_rfft_f32(
                    of the symmetry properties of the FFT and have a speed advantage over complex
                    algorithms of the same length.
   @par
-                   The Fast RFFT algorith relays on the mixed radix CFFT that save processor usage.
+                   The Fast RFFT algorithm relays on the mixed radix CFFT that save processor usage.
   @par
                    The real length N forward FFT of a sequence is computed using the steps shown below.
   @par
@@ -497,6 +497,8 @@ void merge_rfft_f32(
                    The main functions are \ref arm_rfft_fast_f32() and \ref arm_rfft_fast_init_f32().
                    The older functions \ref arm_rfft_f32() and \ref arm_rfft_init_f32() have been deprecated
                    but are still documented.
+                   For f16, the functions are \ref arm_rfft_fast_f16() and \ref arm_rfft_fast_init_f16().
+                   For f64, the functions are \ref arm_rfft_fast_f64() and \ref arm_rfft_fast_init_f64().
   @par
                    The FFT of a real N-point sequence has even symmetry in the frequency domain. 
                    The second half of the data equals the conjugate of the first half flipped in frequency. 
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f64.c
index 01594b4..2b0ba10 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f64.c
@@ -5,13 +5,13 @@
  * Title:        arm_rfft_fast_f64.c
  * Description:  RFFT & RIFFT Double precision Floating point process function
  *
- * $Date:        29. November 2019
- * $Revision:    V1.0.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f16.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f16.c
index 1d0dda6..1496b74 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f16.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f16.c
@@ -5,11 +5,13 @@
  * Title:        arm_rfft_fast_init_f16.c
  * Description:  Split Radix Decimation in Frequency CFFT Floating point processing function
  *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -342,7 +344,7 @@ arm_status arm_rfft_fast_init_f16(
     break;
 #endif
   default:
-    return ARM_MATH_ARGUMENT_ERROR;
+    break;
   }
 
   if( ! fptr ) return ARM_MATH_ARGUMENT_ERROR;
@@ -355,4 +357,5 @@ arm_status arm_rfft_fast_init_f16(
  */
 
 #endif /*  #if defined(ARM_FLOAT16_SUPPORTED) */
+
 #endif // EIDSP_LOAD_CMSIS_DSP_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f32.c
index bc181b3..f469ac4 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_rfft_fast_init_f32.c
  * Description:  Split Radix Decimation in Frequency CFFT Floating point processing function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -341,7 +341,7 @@ arm_status arm_rfft_fast_init_f32(
     break;
 #endif
   default:
-    return ARM_MATH_ARGUMENT_ERROR;
+    break;
   }
 
   if( ! fptr ) return ARM_MATH_ARGUMENT_ERROR;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f64.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f64.c
index 7423d9e..e653f86 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f64.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f64.c
@@ -5,13 +5,13 @@
  * Title:        arm_rfft_fast_init_f64.c
  * Description:  Split Radix Decimation in Frequency CFFT Double Precision Floating point processing function
  *
- * $Date:        29. November 2019
- * $Revision:    V1.0.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -333,7 +333,7 @@ arm_status arm_rfft_fast_init_f64(
     break;
 #endif
   default:
-    return ARM_MATH_ARGUMENT_ERROR;
+    break;
   }
 
   if( ! fptr ) return ARM_MATH_ARGUMENT_ERROR;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_f32.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_f32.c
index 66f8ede..e1b088d 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_f32.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_f32.c
@@ -5,13 +5,13 @@
  * Title:        arm_rfft_init_f32.c
  * Description:  RFFT & RIFFT Floating point initialisation function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q15.c
index 9408d49..79b0f4c 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_rfft_init_q15.c
  * Description:  RFFT & RIFFT Q15 initialisation function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q31.c
index f9c5112..fa81090 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_rfft_init_q31.c
  * Description:  RFFT & RIFFT Q31 initialisation function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q15.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q15.c
index 45307dc..ee8b613 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q15.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q15.c
@@ -5,13 +5,13 @@
  * Title:        arm_rfft_q15.c
  * Description:  RFFT & RIFFT Q15 process function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -66,10 +66,34 @@ void arm_split_rifft_q15(
                    Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
                    Hence the output format is different for different RFFT sizes.
                    The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
-  @par
-                   \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"
-  @par
-                   \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"
+  @par             Input and Output formats for RFFT Q15
+
+| RFFT Size  | Input Format  | Output Format  | Number of bits to upscale |
+| ---------: | ------------: | -------------: | ------------------------: |
+| 32         | 1.15          | 5.11           | 5                         |
+| 64         | 1.15          | 6.10           | 6                         |
+| 128        | 1.15          | 7.9            | 7                         |
+| 256        | 1.15          | 8.8            | 8                         |
+| 512        | 1.15          | 9.7            | 9                         |
+| 1024       | 1.15          | 10.6           | 10                        |
+| 2048       | 1.15          | 11.5           | 11                        |
+| 4096       | 1.15          | 12.4           | 12                        |
+| 8192       | 1.15          | 13.3           | 13                        |
+             
+  @par             Input and Output formats for RIFFT Q15
+
+| RIFFT Size  | Input Format  | Output Format  | Number of bits to upscale |
+| ----------: | ------------: | -------------: | ------------------------: |
+| 32          | 1.15          | 5.11           | 0                         |
+| 64          | 1.15          | 6.10           | 0                         |
+| 128         | 1.15          | 7.9            | 0                         |
+| 256         | 1.15          | 8.8            | 0                         |
+| 512         | 1.15          | 9.7            | 0                         |
+| 1024        | 1.15          | 10.6           | 0                         |
+| 2048        | 1.15          | 11.5           | 0                         |
+| 4096        | 1.15          | 12.4           | 0                         |
+| 8192        | 1.15          | 13.3           | 0                         |
+  
   @par
                    If the input buffer is of length N, the output buffer must have length 2*N.
                    The input buffer is modified by this function.
@@ -190,8 +214,8 @@ void arm_split_rfft_q15(
         q15x8_t         out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB_S16(in1, coefA),
                                      MVE_CMPLX_MULT_FX_AxConjB_S16(coefB, in2));
 #else
-        q15x8_t         out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB(in1, coefA),
-                                     MVE_CMPLX_MULT_FX_AxConjB(coefB, in2));
+        q15x8_t         out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB(in1, coefA, q15x8_t),
+                                         MVE_CMPLX_MULT_FX_AxConjB(coefB, in2, q15x8_t));
 #endif
         vst1q_s16(pOut1, out);
         pOut1 += 8;
@@ -415,8 +439,8 @@ void arm_split_rifft_q15(
         q15x8_t         coefB = vldrhq_gather_shifted_offset_s16(pCoefBb, offsetCoef);
 
         /* can we avoid the conjugate here ? */
-        q15x8_t         out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA),
-                                     vmulq(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB)));
+        q15x8_t         out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA, q15x8_t),
+                                         vmulq(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB, q15x8_t)));
 
         vst1q_s16(pDst, out);
         pDst += 8;
diff --git a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q31.c b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q31.c
index 1741685..20d93cf 100644
--- a/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q31.c
+++ b/edge-impulse-sdk/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q31.c
@@ -5,13 +5,13 @@
  * Title:        arm_rfft_q31.c
  * Description:  FFT & RIFFT Q31 process function
  *
- * $Date:        18. March 2019
- * $Revision:    V1.6.0
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
  *
- * Target Processor: Cortex-M cores
+ * Target Processor: Cortex-M and Cortex-A cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -66,10 +66,34 @@ void arm_split_rifft_q31(
                    Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
                    Hence the output format is different for different RFFT sizes.
                    The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
-  @par
-                   \image html RFFTQ31.gif "Input and Output Formats for Q31 RFFT"
-  @par
-                   \image html RIFFTQ31.gif "Input and Output Formats for Q31 RIFFT"
+  @par             Input and Output formats for RFFT Q31
+
+| RFFT Size  | Input Format  | Output Format  | Number of bits to upscale |
+| ---------: | ------------: | -------------: | ------------------------: |
+| 32         | 1.31          | 5.27           | 5                         |
+| 64         | 1.31          | 6.26           | 6                         |
+| 128        | 1.31          | 7.25           | 7                         |
+| 256        | 1.31          | 8.24           | 8                         |
+| 512        | 1.31          | 9.23           | 9                         |
+| 1024       | 1.31          | 10.22          | 10                        |
+| 2048       | 1.31          | 11.21          | 11                        |
+| 4096       | 1.31          | 12.20          | 12                        |
+| 8192       | 1.31          | 13.19          | 13                        |
+             
+  @par             Input and Output formats for RIFFT Q31
+
+| RIFFT Size  | Input Format  | Output Format  | Number of bits to upscale |
+| ----------: | ------------: | -------------: | ------------------------: |
+| 32          | 1.31          | 5.27           | 0                         |
+| 64          | 1.31          | 6.26           | 0                         |
+| 128         | 1.31          | 7.25           | 0                         |
+| 256         | 1.31          | 8.24           | 0                         |
+| 512         | 1.31          | 9.23           | 0                         |
+| 1024        | 1.31          | 10.22          | 0                         |
+| 2048        | 1.31          | 11.21          | 0                         |
+| 4096        | 1.31          | 12.20          | 0                         |
+| 8192        | 1.31          | 13.19          | 0                         |
+
   @par
                    If the input buffer is of length N, the output buffer must have length 2*N.
                    The input buffer is modified by this function.
@@ -183,7 +207,8 @@ void arm_split_rfft_q31(
 #if defined(__CMSIS_GCC_H)
         q31x4_t         out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB_S32(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB_S32(coefB, in2));
 #else
-        q31x4_t         out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB(coefB, in2));
+        q31x4_t         out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB(in1, coefA, q31x4_t),
+                                         MVE_CMPLX_MULT_FX_AxConjB(coefB, in2, q31x4_t));
 #endif
         vst1q(pOut1, out);
         pOut1 += 4;
@@ -342,8 +367,8 @@ void arm_split_rifft_q31(
         q31x4_t         out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB_S32(in1, coefA),
                                      vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB_S32(in2, coefB)));
 #else
-        q31x4_t         out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA),
-                                     vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB)));
+        q31x4_t         out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA, q31x4_t),
+                                         vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB, q31x4_t)));
 #endif
         vst1q_s32(pDst, out);
         pDst += 4;
diff --git a/edge-impulse-sdk/CMSIS/NN/Include/arm_nn_math_types.h b/edge-impulse-sdk/CMSIS/NN/Include/arm_nn_math_types.h
new file mode 100644
index 0000000..d650980
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Include/arm_nn_math_types.h
@@ -0,0 +1,172 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/******************************************************************************
+ * @file     arm_nn_math_types.h
+ * @brief    Compiler include and basic types
+ * @version  V1.2.0
+ * @date     20 June 2022
+ * Target Processor: Cortex-M
+ ******************************************************************************/
+
+/**
+   Copied from CMSIS/DSP/arm_math_types.h and modified
+*/
+
+#ifndef _ARM_NN_MATH_TYPES_H_
+
+#define _ARM_NN_MATH_TYPES_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <float.h>
+#include <limits.h>
+#include <math.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Integer aliases */
+typedef int8_t q7_t;
+typedef int16_t q15_t;
+typedef int32_t q31_t;
+typedef int64_t q63_t;
+
+/* Compiler specific diagnostic adjustment */
+#if defined(__CC_ARM)
+
+#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
+
+#elif defined(__GNUC__)
+
+#elif defined(__ICCARM__)
+
+#elif defined(__TI_ARM__)
+
+#elif defined(__CSMC__)
+
+#elif defined(__TASKING__)
+
+#elif defined(_MSC_VER)
+
+#else
+#error Unknown compiler
+#endif
+
+/* Included for instrinsics definitions */
+#if defined(_MSC_VER)
+#ifndef __STATIC_FORCEINLINE
+#define __STATIC_FORCEINLINE static __forceinline
+#endif
+#ifndef __STATIC_INLINE
+#define __STATIC_INLINE static __inline
+#endif
+#ifndef __ALIGNED
+#define __ALIGNED(x) __declspec(align(x))
+#endif
+
+#elif defined(__GNUC_PYTHON__)
+#ifndef __ALIGNED
+#define __ALIGNED(x) __attribute__((aligned(x)))
+#endif
+#ifndef __STATIC_FORCEINLINE
+#define __STATIC_FORCEINLINE static inline __attribute__((always_inline))
+#endif
+#ifndef __STATIC_INLINE
+#define __STATIC_INLINE static inline
+#endif
+
+#else
+#include "edge-impulse-sdk/CMSIS/Core/Include/cmsis_compiler.h"
+#endif
+
+/* evaluate ARM DSP feature */
+#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
+#ifndef ARM_MATH_DSP
+#define ARM_MATH_DSP 1
+#endif
+#endif
+
+#if __ARM_FEATURE_MVE
+#ifndef ARM_MATH_MVEI
+#define ARM_MATH_MVEI
+#endif
+#endif
+
+/* Compiler specific diagnostic adjustment */
+#if defined(__CC_ARM)
+
+#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
+
+#elif defined(__GNUC__)
+// #pragma GCC diagnostic pop
+
+#elif defined(__ICCARM__)
+
+#elif defined(__TI_ARM__)
+
+#elif defined(__CSMC__)
+
+#elif defined(__TASKING__)
+
+#elif defined(_MSC_VER)
+
+#else
+#error Unknown compiler
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#if __ARM_FEATURE_MVE
+#include <arm_mve.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @brief Add necessary typedefs
+ */
+
+#define NN_Q31_MAX ((q31_t)(0x7FFFFFFFL))
+#define NN_Q15_MAX ((q15_t)(0x7FFF))
+#define NN_Q7_MAX ((q7_t)(0x7F))
+#define NN_Q31_MIN ((q31_t)(0x80000000L))
+#define NN_Q15_MIN ((q15_t)(0x8000))
+#define NN_Q7_MIN ((q7_t)(0x80))
+
+/**
+ * @brief Error status returned by some functions in the library.
+ */
+
+typedef enum
+{
+    ARM_CMSIS_NN_SUCCESS = 0,        /**< No error */
+    ARM_CMSIS_NN_ARG_ERROR = -1,     /**< One or more arguments are incorrect */
+    ARM_CMSIS_NN_NO_IMPL_ERROR = -2, /**<  No implementation available */
+} arm_cmsis_nn_status;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*ifndef _ARM_NN_MATH_TYPES_H_ */
diff --git a/edge-impulse-sdk/CMSIS/NN/Include/arm_nn_tables.h b/edge-impulse-sdk/CMSIS/NN/Include/arm_nn_tables.h
index 3d2b534..85a7537 100644
--- a/edge-impulse-sdk/CMSIS/NN/Include/arm_nn_tables.h
+++ b/edge-impulse-sdk/CMSIS/NN/Include/arm_nn_tables.h
@@ -3,8 +3,8 @@
  * Title:        arm_nn_tables.h
  * Description:  Extern declaration for NN tables
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        17. August 2021
+ * $Revision:    V.1.0.2
  *
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
@@ -29,12 +29,12 @@
 #ifndef _ARM_NN_TABLES_H
 #define _ARM_NN_TABLES_H
 
-#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nn_math_types.h"
 
 /**
-* @brief tables for various activation functions
-*
-*/
+ * @brief tables for various activation functions
+ *
+ */
 
 extern const q15_t sigmoidTable_q15[256];
 extern const q7_t sigmoidTable_q7[256];
diff --git a/edge-impulse-sdk/CMSIS/NN/Include/arm_nn_types.h b/edge-impulse-sdk/CMSIS/NN/Include/arm_nn_types.h
index 206af07..6040d72 100644
--- a/edge-impulse-sdk/CMSIS/NN/Include/arm_nn_types.h
+++ b/edge-impulse-sdk/CMSIS/NN/Include/arm_nn_types.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2020-2022 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -22,8 +22,8 @@
  * Description:  Public header file to contain the CMSIS-NN structs for the
  *               TensorFlowLite micro compliant functions
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.0
+ * $Date:        22. Februari 2022
+ * $Revision:    V.2.1.0
  *
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
@@ -112,7 +112,7 @@ typedef struct
 typedef struct
 {
     int32_t input_offset;  /**< Zero value for the input tensor */
-    int32_t filter_offset; /**< Zero value for the filter tensor */
+    int32_t filter_offset; /**< Zero value for the filter tensor. Not used */
     int32_t output_offset; /**< Zero value for the output tensor */
     cmsis_nn_activation activation;
 } cmsis_nn_fc_params;
@@ -127,4 +127,11 @@ typedef struct
     cmsis_nn_activation output_activation;
 } cmsis_nn_svdf_params;
 
+/** CMSIS-NN object for Softmax s16 layer parameters */
+typedef struct
+{
+    const int16_t *exp_lut;
+    const int16_t *one_by_one_lut;
+} cmsis_nn_softmax_lut_s16;
+
 #endif // _ARM_NN_TYPES_H
diff --git a/edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h b/edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h
index f43c0de..1548a20 100644
--- a/edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h
+++ b/edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_nnfunctions.h
  * Description:  Public header file for CMSIS NN Library
  *
- * $Date:        19 January 2021
- * $Revision:    V.6.5.3
+ * $Date:        7 Aug 2022
+ * $Revision:    V.10.1.2
  *
  * Target Processor:  Cortex-M CPUs
  * -------------------------------------------------------------------- */
@@ -51,6 +51,15 @@
    * kernels are included in the function description. The implementation details are also
    * described in this paper [1].
    *
+   * Supported Processors
+   * -------
+   * CMSIS-NN targets Cortex-M processors with typically three different implementations for each function. Each
+   * targets a different group of processors.
+   *  - Processors without SIMD capability (e.g, Cortex-M0)
+   *  - Processors with DSP extention (e.g Cortex-M4)
+   *  - Processors with MVE extension (e.g Cortex-M55)
+   * The right implementation is picked through feature flags and the user usually does not have to explicit set it.
+   *
    * Function Classification
    * --------
    * The functions can be classified into two segments
@@ -103,7 +112,7 @@
    * Copyright Notice
    * ------------
    *
-   * Copyright (C) 2010-2019 Arm Limited. All rights reserved.
+   * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
    *
    * [1] CMSIS-NN: Efficient Neural Network Kernels for Arm Cortex-M CPUs https://arxiv.org/abs/1801.06601
    *
@@ -124,809 +133,620 @@
 #ifndef _ARM_NNFUNCTIONS_H
 #define _ARM_NNFUNCTIONS_H
 
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nn_math_types.h"
 #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/none.h"
-#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types.h"
-#include "arm_nn_types.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nn_types.h"
 
 #define USE_INTRINSIC
 
 //#define ARM_NN_TRUNCATE /* This config the rounding model to floor or round to the nearest int */
 
 #ifdef __cplusplus
-extern "C"
-{
+extern "C" {
 #endif
 
-    /**
-     * @brief Struct for specifying activation function types
-     *
-     */
-    typedef enum
-    {
-        ARM_SIGMOID = 0,
-        /**< Sigmoid activation function */
-        ARM_TANH = 1,
-        /**< Tanh activation function */
-    } arm_nn_activation_type;
-
-    /**
-     * @defgroup NNConv Convolution Functions
-     *
-     * Collection of convolution, depthwise convolution functions and their variants.
-     *
-     * The convolution is implemented in 2 steps: im2col and GEMM
-     *
-     * im2col is a process of converting each patch of image data into
-     * a column. After im2col, the convolution is computed as matrix-matrix
-     * multiplication.
-     *
-     * To reduce the memory footprint, the im2col is performed partially.
-     * Each iteration, only a few column (i.e., patches) are generated and
-     * computed with GEMM kernels similar to CMSIS-DSP arm_mat_mult functions.
-     *
-     */
-
-    /**
-     * @brief s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in
-     cmsis-nn
-     *        to perform the convolution.
-     *
-     * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
-                                      arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required
-     * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
-     *                                Range of conv_params->input_offset  : [-127, 128]
-     *                                Range of conv_params->output_offset : [-128, 127]
-     * @param[in]      quant_params   Per-channel quantization info.
-     *                                It contains the multiplier and shift values to be applied to each output channel
-     * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
-     * @param[in]      input_data     Input (activation) data pointer. Data type: int8
-     * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
-     *                                spatial filter dimensions
-     * @param[in]      filter_data    Filter data pointer. Data type: int8
-     * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
-     * @param[in]      bias_data      Bias data pointer. Data type: int32
-     * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
-     * @param[out]     output_data    Output data pointer. Data type: int8
-     *
-     * @return     The function returns either
-     *                  <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
-     *                  <code>ARM_MATH_SUCCESS</code> on successful completion.
-     *
-     */
-    arm_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx,
-                                       const cmsis_nn_conv_params *conv_params,
-                                       const cmsis_nn_per_channel_quant_params *quant_params,
-                                       const cmsis_nn_dims *input_dims,
-                                       const q7_t *input_data,
-                                       const cmsis_nn_dims *filter_dims,
-                                       const q7_t *filter_data,
-                                       const cmsis_nn_dims *bias_dims,
-                                       const int32_t *bias_data,
-                                       const cmsis_nn_dims *output_dims,
-                                       q7_t *output_data);
-
-    /**
-     * @brief Get the required buffer size for arm_convolve_wrapper_s8
-     *
-     * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
-     *                                Range of conv_params->input_offset  : [-127, 128]
-     *                                Range of conv_params->output_offset : [-128, 127]
-     * @param[in]      input_dims     Input (activation) dimensions. Format: [N, H, W, C_IN]
-     * @param[in]      filter_dims    Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial
-     *                                filter dimensions
-     * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
-     *
-     * @return         The function returns  required buffer size(bytes)
-     *
-     */
-    int32_t arm_convolve_wrapper_s8_get_buffer_size(const cmsis_nn_conv_params *conv_params,
-                                                    const cmsis_nn_dims *input_dims,
-                                                    const cmsis_nn_dims *filter_dims,
-                                                    const cmsis_nn_dims *output_dims);
-
-    /**
-     * @brief Basic s8 convolution function
-     * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
-                                      arm_convolve_s8_get_buffer_size will return the buffer_size if required
-     * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
-     *                                Range of conv_params->input_offset  : [-127, 128]
-     *                                Range of conv_params->output_offset : [-128, 127]
-     * @param[in]      quant_params   Per-channel quantization info.
-     *                                It contains the multiplier and shift values to be applied to each output channel
-     * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
-     * @param[in]      input_data     Input (activation) data pointer. Data type: int8
-     * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
-     *                                spatial filter dimensions
-     * @param[in]      filter_data    Filter data pointer. Data type: int8
-     * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
-     * @param[in]      bias_data      Optional bias data pointer. Data type: int32
-     * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
-     * @param[out]     output_data    Output data pointer. Data type: int8
-
-     * @return     The function returns <code>ARM_MATH_SUCCESS</code>
-     *
-     * @details
-     *    1. Supported framework: TensorFlow Lite micro
-     *    2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
-     *    3. Additional memory is required for optimization. Refer to argument 'ctx' for details.
-     *
-     */
-    arm_status arm_convolve_s8(const cmsis_nn_context *ctx,
-                               const cmsis_nn_conv_params *conv_params,
-                               const cmsis_nn_per_channel_quant_params *quant_params,
-                               const cmsis_nn_dims *input_dims,
-                               const q7_t *input_data,
-                               const cmsis_nn_dims *filter_dims,
-                               const q7_t *filter_data,
-                               const cmsis_nn_dims *bias_dims,
-                               const int32_t *bias_data,
-                               const cmsis_nn_dims *output_dims,
-                               q7_t *output_data);
-
-    /**
-     * @brief Get the required buffer size for s8 convolution function
-     *
-     * @param[in]       input_dims            Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
-     * @param[in]       filter_dims           Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
-     * are the spatial filter dimensions
-     * @return          The function returns  required buffer size(bytes)
-     *
-     */
-    int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
-
-    /**
-     * @brief Basic Q7 convolution function
-     * @param[in]       Im_in       pointer to input tensor
-     * @param[in]       dim_im_in   input tensor dimension
-     * @param[in]       ch_im_in    number of input tensor channels
-     * @param[in]       wt          pointer to kernel weights
-     * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
-     * @param[in]       dim_kernel  filter kernel size
-     * @param[in]       padding     padding sizes
-     * @param[in]       stride      convolution stride
-     * @param[in]       bias        pointer to bias
-     * @param[in]       bias_shift  amount of left-shift for bias
-     * @param[in]       out_shift   amount of right-shift for output
-     * @param[in,out]   Im_out      pointer to output tensor
-     * @param[in]       dim_im_out  output tensor dimension
-     * @param[in,out]   bufferA     pointer to buffer space for input
-     * @param[in,out]   bufferB     pointer to buffer space for output
-     * @return     The function returns <code>ARM_MATH_SUCCESS</code>
-     *
-     */
-    arm_status arm_convolve_HWC_q7_basic(const q7_t *Im_in,
-                                         const uint16_t dim_im_in,
-                                         const uint16_t ch_im_in,
-                                         const q7_t *wt,
-                                         const uint16_t ch_im_out,
-                                         const uint16_t dim_kernel,
-                                         const uint16_t padding,
-                                         const uint16_t stride,
-                                         const q7_t *bias,
-                                         const uint16_t bias_shift,
-                                         const uint16_t out_shift,
-                                         q7_t *Im_out,
-                                         const uint16_t dim_im_out,
-                                         q15_t *bufferA,
-                                         q7_t *bufferB);
-
-    /**
-     * @brief Basic Q7 convolution function (non-square shape)
-     * @param[in]       Im_in        pointer to input tensor
-     * @param[in]       dim_im_in_x  input tensor dimension x
-     * @param[in]       dim_im_in_y  input tensor dimension y
-     * @param[in]       ch_im_in     number of input tensor channels
-     * @param[in]       wt           pointer to kernel weights
-     * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
-     * @param[in]       dim_kernel_x filter kernel size x
-     * @param[in]       dim_kernel_y filter kernel size y
-     * @param[in]       padding_x    padding size x
-     * @param[in]       padding_y    padding size y
-     * @param[in]       stride_x     convolution stride x
-     * @param[in]       stride_y     convolution stride y
-     * @param[in]       bias         pointer to bias
-     * @param[in]       bias_shift   amount of left-shift for bias
-     * @param[in]       out_shift    amount of right-shift for output
-     * @param[in,out]   Im_out       pointer to output tensor
-     * @param[in]       dim_im_out_x output tensor dimension x
-     * @param[in]       dim_im_out_y output tensor dimension y
-     * @param[in,out]   bufferA      pointer to buffer space for input
-     * @param[in,out]   bufferB      pointer to buffer space for output
-     * @return     The function returns <code>ARM_MATH_SUCCESS</code>
-     */
-    arm_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in,
-                                                   const uint16_t dim_im_in_x,
-                                                   const uint16_t dim_im_in_y,
-                                                   const uint16_t ch_im_in,
-                                                   const q7_t *wt,
-                                                   const uint16_t ch_im_out,
-                                                   const uint16_t dim_kernel_x,
-                                                   const uint16_t dim_kernel_y,
-                                                   const uint16_t padding_x,
-                                                   const uint16_t padding_y,
-                                                   const uint16_t stride_x,
-                                                   const uint16_t stride_y,
-                                                   const q7_t *bias,
-                                                   const uint16_t bias_shift,
-                                                   const uint16_t out_shift,
-                                                   q7_t *Im_out,
-                                                   const uint16_t dim_im_out_x,
-                                                   const uint16_t dim_im_out_y,
-                                                   q15_t *bufferA,
-                                                   q7_t *bufferB);
-
-    /**
-     * @brief Basic Q15 convolution function
-     * @param[in]       Im_in       pointer to input tensor
-     * @param[in]       dim_im_in   input tensor dimension
-     * @param[in]       ch_im_in    number of input tensor channels
-     * @param[in]       wt          pointer to kernel weights
-     * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
-     * @param[in]       dim_kernel  filter kernel size
-     * @param[in]       padding     padding sizes
-     * @param[in]       stride      convolution stride
-     * @param[in]       bias        pointer to bias
-     * @param[in]       bias_shift  amount of left-shift for bias
-     * @param[in]       out_shift   amount of right-shift for output
-     * @param[in,out]   Im_out      pointer to output tensor
-     * @param[in]       dim_im_out  output tensor dimension
-     * @param[in,out]   bufferA     pointer to buffer space for input
-     * @param[in,out]   bufferB     pointer to buffer space for output
-     * @return     The function returns <code>ARM_MATH_SUCCESS</code>
-     *
-     */
-    arm_status arm_convolve_HWC_q15_basic(const q15_t *Im_in,
-                                          const uint16_t dim_im_in,
-                                          const uint16_t ch_im_in,
-                                          const q15_t *wt,
-                                          const uint16_t ch_im_out,
-                                          const uint16_t dim_kernel,
-                                          const uint16_t padding,
-                                          const uint16_t stride,
-                                          const q15_t *bias,
-                                          const uint16_t bias_shift,
-                                          const uint16_t out_shift,
-                                          q15_t *Im_out,
-                                          const uint16_t dim_im_out,
-                                          q15_t *bufferA,
-                                          q7_t *bufferB);
-
-    /**
-     * @brief Fast Q7 convolution function
-     * @param[in]       Im_in       pointer to input tensor
-     * @param[in]       dim_im_in   input tensor dimension
-     * @param[in]       ch_im_in    number of input tensor channels
-     * @param[in]       wt          pointer to kernel weights
-     * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
-     * @param[in]       dim_kernel  filter kernel size
-     * @param[in]       padding     padding sizes
-     * @param[in]       stride      convolution stride
-     * @param[in]       bias        pointer to bias
-     * @param[in]       bias_shift  amount of left-shift for bias
-     * @param[in]       out_shift   amount of right-shift for output
-     * @param[in,out]   Im_out      pointer to output tensor
-     * @param[in]       dim_im_out  output tensor dimension
-     * @param[in,out]   bufferA     pointer to buffer space for input
-     * @param[in,out]   bufferB     pointer to buffer space for output
-     * @return     The function returns either
-     * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
-     *
-     * This function is the version with full list of optimization tricks, but with
-     * some contraints:
-     *   ch_im_in is multiple of 4
-     *   ch_im_out is multiple of 2
-     */
-    arm_status arm_convolve_HWC_q7_fast(const q7_t *Im_in,
-                                        const uint16_t dim_im_in,
-                                        const uint16_t ch_im_in,
-                                        const q7_t *wt,
-                                        const uint16_t ch_im_out,
-                                        const uint16_t dim_kernel,
-                                        const uint16_t padding,
-                                        const uint16_t stride,
-                                        const q7_t *bias,
-                                        const uint16_t bias_shift,
-                                        const uint16_t out_shift,
-                                        q7_t *Im_out,
-                                        const uint16_t dim_im_out,
-                                        q15_t *bufferA,
-                                        q7_t *bufferB);
-
-    /**
-     * @brief Fast Q7 convolution function (non-sqaure shape)
-     * @param[in]       Im_in        pointer to input tensor
-     * @param[in]       dim_im_in_x  input tensor dimension x
-     * @param[in]       dim_im_in_y  input tensor dimension y
-     * @param[in]       ch_im_in     number of input tensor channels
-     * @param[in]       wt           pointer to kernel weights
-     * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
-     * @param[in]       dim_kernel_x filter kernel size x
-     * @param[in]       dim_kernel_y filter kernel size y
-     * @param[in]       padding_x    padding size x
-     * @param[in]       padding_y    padding size y
-     * @param[in]       stride_x     convolution stride x
-     * @param[in]       stride_y     convolution stride y
-     * @param[in]       bias         pointer to bias
-     * @param[in]       bias_shift   amount of left-shift for bias
-     * @param[in]       out_shift    amount of right-shift for output
-     * @param[in,out]   Im_out       pointer to output tensor
-     * @param[in]       dim_im_out_x output tensor dimension x
-     * @param[in]       dim_im_out_y output tensor dimension y
-     * @param[in,out]   bufferA      pointer to buffer space for input
-     * @param[in,out]   bufferB      pointer to buffer space for output
-     * @return     The function returns either
-     * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
-     *
-     * This function is the version with full list of optimization tricks, but with
-     * some contraints:
-     *   ch_im_in is multiple of 4
-     *   ch_im_out is multiple of 2
-     */
-
-    arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in,
-                                                  const uint16_t dim_im_in_x,
-                                                  const uint16_t dim_im_in_y,
-                                                  const uint16_t ch_im_in,
-                                                  const q7_t *wt,
-                                                  const uint16_t ch_im_out,
-                                                  const uint16_t dim_kernel_x,
-                                                  const uint16_t dim_kernel_y,
-                                                  const uint16_t padding_x,
-                                                  const uint16_t padding_y,
-                                                  const uint16_t stride_x,
-                                                  const uint16_t stride_y,
-                                                  const q7_t *bias,
-                                                  const uint16_t bias_shift,
-                                                  const uint16_t out_shift,
-                                                  q7_t *Im_out,
-                                                  const uint16_t dim_im_out_x,
-                                                  const uint16_t dim_im_out_y,
-                                                  q15_t *bufferA,
-                                                  q7_t *bufferB);
-
-    /**
-     * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape)
-     * @param[in]       Im_in        pointer to input tensor
-     * @param[in]       dim_im_in_x  input tensor dimension x
-     * @param[in]       dim_im_in_y  input tensor dimension y
-     * @param[in]       ch_im_in     number of input tensor channels
-     * @param[in]       wt           pointer to kernel weights
-     * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
-     * @param[in]       dim_kernel_x filter kernel size x
-     * @param[in]       dim_kernel_y filter kernel size y
-     * @param[in]       padding_x    padding size x
-     * @param[in]       padding_y    padding size y
-     * @param[in]       stride_x     convolution stride x
-     * @param[in]       stride_y     convolution stride y
-     * @param[in]       bias         pointer to bias
-     * @param[in]       bias_shift   amount of left-shift for bias
-     * @param[in]       out_shift    amount of right-shift for output
-     * @param[in,out]   Im_out       pointer to output tensor
-     * @param[in]       dim_im_out_x output tensor dimension x
-     * @param[in]       dim_im_out_y output tensor dimension y
-     * @param[in,out]   bufferA      pointer to buffer space for input
-     * @param[in,out]   bufferB      pointer to buffer space for output
-     * @return     The function returns either
-     *                          <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
-     *                          <code>ARM_MATH_SUCCESS</code> on successful completion.
-     *
-     * This function implement convolution with 1x1 kernel size (i.e., dim_kernel_x=1
-     * and dim_kernel_y=1). It can be used for
-     * second half of MobileNets after depthwise separable convolution.
-     *
-     * This function is the version with full list of optimization tricks, but with
-     * some contraints:
-     *   ch_im_in is multiple of 4
-     *   ch_im_out is multiple of 2
-     */
-    arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in,
-                                                      const uint16_t dim_im_in_x,
-                                                      const uint16_t dim_im_in_y,
-                                                      const uint16_t ch_im_in,
-                                                      const q7_t *wt,
-                                                      const uint16_t ch_im_out,
-                                                      const uint16_t dim_kernel_x,
-                                                      const uint16_t dim_kernel_y,
-                                                      const uint16_t padding_x,
-                                                      const uint16_t padding_y,
-                                                      const uint16_t stride_x,
-                                                      const uint16_t stride_y,
-                                                      const q7_t *bias,
-                                                      const uint16_t bias_shift,
-                                                      const uint16_t out_shift,
-                                                      q7_t *Im_out,
-                                                      const uint16_t dim_im_out_x,
-                                                      const uint16_t dim_im_out_y,
-                                                      q15_t *bufferA,
-                                                      q7_t *bufferB);
-
-    /**
-     * @brief Fast s8 version for 1x1 convolution (non-square shape)
-     *
-     * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
-                                      arm_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required
-     * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
-     *                                Range of conv_params->input_offset  : [-127, 128]
-     *                                Range of conv_params->output_offset : [-128, 127]
-     * @param[in]      quant_params   Per-channel quantization info.
-     *                                It contains the multiplier and shift values to be applied to each output channel
-     * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
-     * @param[in]      input_data     Input (activation) data pointer. Data type: int8
-     * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN]
-     * @param[in]      filter_data    Filter data pointer. Data type: int8
-     * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
-     * @param[in]      bias_data      Optional bias data pointer. Data type: int32
-     * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
-     * @param[out]     output_data    Output data pointer. Data type: int8
-     *
-     * @return     The function returns either
-     *                  <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
-     *                  <code>ARM_MATH_SUCCESS</code> on successful completion.
-     *
-     * @details
-     *   - Supported framework : TensorFlow Lite Micro
-     *   - The following constrains on the arguments apply
-     *      -# input_dims->c is a multiple of 4
-     *      -# conv_params->padding.w = conv_params->padding.h = 0
-     *      -# conv_params->stride.w = conv_params->stride.h = 1
-     *
-     */
-    arm_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx,
-                                        const cmsis_nn_conv_params *conv_params,
-                                        const cmsis_nn_per_channel_quant_params *quant_params,
-                                        const cmsis_nn_dims *input_dims,
-                                        const q7_t *input_data,
-                                        const cmsis_nn_dims *filter_dims,
-                                        const q7_t *filter_data,
-                                        const cmsis_nn_dims *bias_dims,
-                                        const int32_t *bias_data,
-                                        const cmsis_nn_dims *output_dims,
-                                        q7_t *output_data);
-
-    /**
-     * @brief Get the required buffer size for arm_convolve_1x1_s8_fast
-     *
-     * @param[in]       input_dims            Input (activation) dimensions
-     * @return          The function returns the required buffer size in bytes
-     *
-     */
-    int32_t arm_convolve_1x1_s8_fast_get_buffer_size(const cmsis_nn_dims *input_dims);
-
-    /**
-     * @brief 1xn convolution
-     *
-     * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
-                                      arm_convolve_1_x_n_s8_get_buffer_size will return the buffer_size if required
-     * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
-     *                                Range of conv_params->input_offset  : [-127, 128]
-     *                                Range of conv_params->output_offset : [-128, 127]
-     * @param[in]      quant_params   Per-channel quantization info.
-     *                                It contains the multiplier and shift values to be applied to each output channel
-     * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
-     * @param[in]      input_data     Input (activation) data pointer. Data type: int8
-     * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal
-     *                                spatial filter dimension
-     * @param[in]      filter_data    Filter data pointer. Data type: int8
-     * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
-     * @param[in]      bias_data      Optional bias data pointer. Data type: int32
-     * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
-     * @param[out]     output_data    Output data pointer. Data type: int8
-     *
-     * @return     The function returns either
-     *                  <code>ARM_MATH_SIZE_MISMATCH</code> if argument constraints fail. or,
-     *                  <code>ARM_MATH_SUCCESS</code> on successful completion.
-     *
-     * @details
-     *   - Supported framework : TensorFlow Lite Micro
-     *   - The following constrains on the arguments apply
-     *      -# input_dims->n equals 1
-     *      -# ouput_dims->w is a multiple of 4
-     *      -# Explicit constraints(since it is for 1xN convolution)
-     *      -## input_dims->h equals 1
-     *      -## output_dims->h equals 1
-     *      -## filter_dims->h equals 1
-     *@todo  Remove constraint on output_dims->w to make the function generic.
-     *
-     */
-    arm_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
+/**
+ * @brief Struct for specifying activation function types
+ *
+ */
+typedef enum
+{
+    ARM_SIGMOID = 0,
+    /**< Sigmoid activation function */
+    ARM_TANH = 1,
+    /**< Tanh activation function */
+} arm_nn_activation_type;
+
+/**
+ * @defgroup NNConv Convolution Functions
+ *
+ * Collection of convolution, depthwise convolution functions and their variants.
+ *
+ * The convolution is implemented in 2 steps: im2col and GEMM
+ *
+ * im2col is a process of converting each patch of image data into
+ * a column. After im2col, the convolution is computed as matrix-matrix
+ * multiplication.
+ *
+ * To reduce the memory footprint, the im2col is performed partially.
+ * Each iteration, only a few column (i.e., patches) are generated and
+ * computed with GEMM kernels similar to CMSIS-DSP arm_mat_mult functions.
+ *
+ */
+
+/**
+ * @brief s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in
+ *        cmsis-nn  to perform the convolution.
+ *
+ * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
+ *                                arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required.
+ *                                The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                Range of conv_params->input_offset  : [-127, 128]
+ *                                Range of conv_params->output_offset : [-128, 127]
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                                It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
+ *                                spatial filter dimensions
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Bias data pointer. Data type: int32
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data    Output data pointer. Data type: int8
+ *
+ * @return     The function returns either
+ *                  <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
+ *                  <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
+ *
+ */
+arm_cmsis_nn_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx,
+                                            const cmsis_nn_conv_params *conv_params,
+                                            const cmsis_nn_per_channel_quant_params *quant_params,
+                                            const cmsis_nn_dims *input_dims,
+                                            const q7_t *input_data,
+                                            const cmsis_nn_dims *filter_dims,
+                                            const q7_t *filter_data,
+                                            const cmsis_nn_dims *bias_dims,
+                                            const int32_t *bias_data,
+                                            const cmsis_nn_dims *output_dims,
+                                            q7_t *output_data);
+
+/**
+ * @brief Get the required buffer size for arm_convolve_wrapper_s8
+ *
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                Range of conv_params->input_offset  : [-127, 128]
+ *                                Range of conv_params->output_offset : [-128, 127]
+ * @param[in]      input_dims     Input (activation) dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      filter_dims    Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial
+ *                                filter dimensions
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ *
+ * @return         The function returns  required buffer size(bytes)
+ *
+ */
+int32_t arm_convolve_wrapper_s8_get_buffer_size(const cmsis_nn_conv_params *conv_params,
+                                                const cmsis_nn_dims *input_dims,
+                                                const cmsis_nn_dims *filter_dims,
+                                                const cmsis_nn_dims *output_dims);
+
+/**
+ * @brief s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in
+ *        cmsis-nn to perform the convolution.
+ *
+ * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
+ *                                arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required
+ *                                The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                conv_params->input_offset  : Not used
+ *                                conv_params->output_offset : Not used
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                                It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int16
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
+ *                                spatial filter dimensions
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Bias data pointer. Data type: int64
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data    Output data pointer. Data type: int16
+ *
+ * @return     The function returns either
+ *                  <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
+ *                  <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
+ *
+ */
+arm_cmsis_nn_status arm_convolve_wrapper_s16(const cmsis_nn_context *ctx,
+                                             const cmsis_nn_conv_params *conv_params,
+                                             const cmsis_nn_per_channel_quant_params *quant_params,
+                                             const cmsis_nn_dims *input_dims,
+                                             const q15_t *input_data,
+                                             const cmsis_nn_dims *filter_dims,
+                                             const q7_t *filter_data,
+                                             const cmsis_nn_dims *bias_dims,
+                                             const int64_t *bias_data,
+                                             const cmsis_nn_dims *output_dims,
+                                             q15_t *output_data);
+
+/**
+ * @brief Get the required buffer size for arm_convolve_wrapper_s16
+ *
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                conv_params->input_offset  : Not used
+ *                                conv_params->output_offset : Not used
+ * @param[in]      input_dims     Input (activation) dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      filter_dims    Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial
+ *                                filter dimensions
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ *
+ * @return         The function returns  required buffer size(bytes)
+ *
+ */
+int32_t arm_convolve_wrapper_s16_get_buffer_size(const cmsis_nn_conv_params *conv_params,
+                                                 const cmsis_nn_dims *input_dims,
+                                                 const cmsis_nn_dims *filter_dims,
+                                                 const cmsis_nn_dims *output_dims);
+
+/**
+ * @brief Basic s8 convolution function
+ * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
+ *                                arm_convolve_s8_get_buffer_size will return the buffer_size if required.
+ *                                The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                Range of conv_params->input_offset  : [-127, 128]
+ *                                Range of conv_params->output_offset : [-128, 127]
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                                It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
+ *                                spatial filter dimensions
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Optional bias data pointer. Data type: int32
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data    Output data pointer. Data type: int8
+
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ * @details
+ *    1. Supported framework: TensorFlow Lite micro
+ *    2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ *    3. Additional memory is required for optimization. Refer to argument 'ctx' for details.
+ *
+ */
+arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
+                                    const cmsis_nn_conv_params *conv_params,
+                                    const cmsis_nn_per_channel_quant_params *quant_params,
+                                    const cmsis_nn_dims *input_dims,
+                                    const q7_t *input_data,
+                                    const cmsis_nn_dims *filter_dims,
+                                    const q7_t *filter_data,
+                                    const cmsis_nn_dims *bias_dims,
+                                    const int32_t *bias_data,
+                                    const cmsis_nn_dims *output_dims,
+                                    q7_t *output_data);
+
+/**
+ * @brief Get the required buffer size for s8 convolution function
+ *
+ * @param[in]       input_dims            Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]       filter_dims           Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
+ * are the spatial filter dimensions
+ * @return          The function returns  required buffer size(bytes)
+ *
+ */
+int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
+
+/**
+ * @brief Basic s16 convolution function
+ * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
+ *                                arm_convolve_s16_get_buffer_size will return the buffer_size if required.
+ *                                The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                conv_params->input_offset  : Not used
+ *                                conv_params->output_offset : Not used
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                                It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int16
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
+ *                                spatial filter dimensions
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Optional bias data pointer. Data type: int64
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data    Output data pointer. Data type: int16
+
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ * @details
+ *    1. Supported framework: TensorFlow Lite micro
+ *    2. q7/q15 is used as data type eventhough it is s8/s16 data. It is done so to be consistent with existing APIs.
+ *    3. Additional memory is required for optimization. Refer to argument 'ctx' for details.
+ *
+ */
+arm_cmsis_nn_status arm_convolve_s16(const cmsis_nn_context *ctx,
                                      const cmsis_nn_conv_params *conv_params,
                                      const cmsis_nn_per_channel_quant_params *quant_params,
                                      const cmsis_nn_dims *input_dims,
-                                     const q7_t *input_data,
+                                     const q15_t *input_data,
                                      const cmsis_nn_dims *filter_dims,
                                      const q7_t *filter_data,
                                      const cmsis_nn_dims *bias_dims,
-                                     const int32_t *bias_data,
+                                     const int64_t *bias_data,
                                      const cmsis_nn_dims *output_dims,
-                                     q7_t *output_data);
-
-    /**
-     * @brief Get the required additional buffer size for 1xn convolution
-     *
-     * @param[in]       input_dims            Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
-     * @param[in]       filter_dims           Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the
-     *                                        horizontal spatial filter dimension
-     * @return          The function returns  required buffer size(bytes)
-     *
-     */
-    int32_t arm_convolve_1_x_n_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
-
-    /**
-     * @brief Q7 version of convolution for RGB image
-     * @param[in]       Im_in       pointer to input tensor
-     * @param[in]       dim_im_in   input tensor dimension
-     * @param[in]       ch_im_in    number of input tensor channels
-     * @param[in]       wt          pointer to kernel weights
-     * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
-     * @param[in]       dim_kernel  filter kernel size
-     * @param[in]       padding     padding sizes
-     * @param[in]       stride      convolution stride
-     * @param[in]       bias        pointer to bias
-     * @param[in]       bias_shift  amount of left-shift for bias
-     * @param[in]       out_shift   amount of right-shift for output
-     * @param[in,out]   Im_out      pointer to output tensor
-     * @param[in]       dim_im_out  output tensor dimension
-     * @param[in,out]   bufferA     pointer to buffer space for input
-     * @param[in,out]   bufferB     pointer to buffer space for output
-     * @return     The function returns either
-     * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
-     *
-     * This kernel is written exclusively for convolution with ch_im_in
-     * equals 3. This applies on the first layer of CNNs which has input
-     * image with RGB format.
-     */
-
-    arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in,
-                                       const uint16_t dim_im_in,
-                                       const uint16_t ch_im_in,
-                                       const q7_t *wt,
-                                       const uint16_t ch_im_out,
-                                       const uint16_t dim_kernel,
-                                       const uint16_t padding,
-                                       const uint16_t stride,
-                                       const q7_t *bias,
-                                       const uint16_t bias_shift,
-                                       const uint16_t out_shift,
-                                       q7_t *Im_out,
-                                       const uint16_t dim_im_out,
-                                       q15_t *bufferA,
-                                       q7_t *bufferB);
-
-    /**
-     * @brief Fast Q15 convolution function
-     * @param[in]       Im_in       pointer to input tensor
-     * @param[in]       dim_im_in   input tensor dimension
-     * @param[in]       ch_im_in    number of input tensor channels
-     * @param[in]       wt          pointer to kernel weights
-     * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
-     * @param[in]       dim_kernel  filter kernel size
-     * @param[in]       padding     padding sizes
-     * @param[in]       stride      convolution stride
-     * @param[in]       bias        pointer to bias
-     * @param[in]       bias_shift  amount of left-shift for bias
-     * @param[in]       out_shift   amount of right-shift for output
-     * @param[in,out]   Im_out      pointer to output tensor
-     * @param[in]       dim_im_out  output tensor dimension
-     * @param[in,out]   bufferA     pointer to buffer space for input
-     * @param[in,out]   bufferB     pointer to buffer space for output
-     * @return     The function returns either
-     * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
-     *
-     * This function is the version with full list of optimization tricks, but with
-     * some contraints:
-     *   ch_im_in is multiple of 2
-     *   ch_im_out is multiple of 2
-     */
-
-    arm_status arm_convolve_HWC_q15_fast(const q15_t *Im_in,
-                                         const uint16_t dim_im_in,
-                                         const uint16_t ch_im_in,
-                                         const q15_t *wt,
-                                         const uint16_t ch_im_out,
-                                         const uint16_t dim_kernel,
-                                         const uint16_t padding,
-                                         const uint16_t stride,
-                                         const q15_t *bias,
-                                         const uint16_t bias_shift,
-                                         const uint16_t out_shift,
-                                         q15_t *Im_out,
-                                         const uint16_t dim_im_out,
-                                         q15_t *bufferA,
-                                         q7_t *bufferB);
-
-    /**
-     * @brief Fast Q15 convolution function (non-sqaure shape)
-     * @param[in]       Im_in        pointer to input tensor
-     * @param[in]       dim_im_in_x  input tensor dimension x
-     * @param[in]       dim_im_in_y  input tensor dimension y
-     * @param[in]       ch_im_in     number of input tensor channels
-     * @param[in]       wt           pointer to kernel weights
-     * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
-     * @param[in]       dim_kernel_x filter kernel size x
-     * @param[in]       dim_kernel_y filter kernel size y
-     * @param[in]       padding_x    padding size x
-     * @param[in]       padding_y    padding size y
-     * @param[in]       stride_x     convolution stride x
-     * @param[in]       stride_y     convolution stride y
-     * @param[in]       bias         pointer to bias
-     * @param[in]       bias_shift   amount of left-shift for bias
-     * @param[in]       out_shift    amount of right-shift for output
-     * @param[in,out]   Im_out       pointer to output tensor
-     * @param[in]       dim_im_out_x output tensor dimension x
-     * @param[in]       dim_im_out_y output tensor dimension y
-     * @param[in,out]   bufferA      pointer to buffer space for input
-     * @param[in,out]   bufferB      pointer to buffer space for output
-     * @return     The function returns either
-     * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
-     *
-     * @details
-     *
-     * <b>Buffer size:</b>
-     *
-     * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
-     *
-     * bufferB size: 0
-     *
-     * <b>Input dimension constraints:</b>
-     *
-     * ch_im_in is multiple of 2
-     *
-     * ch_im_out is multipe of 2
-     *
-     */
-
-    arm_status arm_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in,
-                                                   const uint16_t dim_im_in_x,
-                                                   const uint16_t dim_im_in_y,
-                                                   const uint16_t ch_im_in,
-                                                   const q15_t *wt,
-                                                   const uint16_t ch_im_out,
-                                                   const uint16_t dim_kernel_x,
-                                                   const uint16_t dim_kernel_y,
-                                                   const uint16_t padding_x,
-                                                   const uint16_t padding_y,
-                                                   const uint16_t stride_x,
-                                                   const uint16_t stride_y,
-                                                   const q15_t *bias,
-                                                   const uint16_t bias_shift,
-                                                   const uint16_t out_shift,
-                                                   q15_t *Im_out,
-                                                   const uint16_t dim_im_out_x,
-                                                   const uint16_t dim_im_out_y,
-                                                   q15_t *bufferA,
-                                                   q7_t *bufferB);
-
-    /**
-     * @brief Q7 depthwise separable convolution function
-     * @param[in]       Im_in       pointer to input tensor
-     * @param[in]       dim_im_in   input tensor dimension
-     * @param[in]       ch_im_in    number of input tensor channels
-     * @param[in]       wt          pointer to kernel weights
-     * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
-     * @param[in]       dim_kernel  filter kernel size
-     * @param[in]       padding     padding sizes
-     * @param[in]       stride      convolution stride
-     * @param[in]       bias        pointer to bias
-     * @param[in]       bias_shift  amount of left-shift for bias
-     * @param[in]       out_shift   amount of right-shift for output
-     * @param[in,out]   Im_out      pointer to output tensor
-     * @param[in]       dim_im_out  output tensor dimension
-     * @param[in,out]   bufferA     pointer to buffer space for input
-     * @param[in,out]   bufferB     pointer to buffer space for output
-     * @return     The function returns either
-     * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
-     *
-     * This function is the version with full list of optimization tricks, but with
-     * some contraints:
-     *   ch_im_in is multiple of 2
-     *   ch_im_out is multiple of 2
-     */
-
-    arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in,
-                                                   const uint16_t dim_im_in,
-                                                   const uint16_t ch_im_in,
-                                                   const q7_t *wt,
-                                                   const uint16_t ch_im_out,
-                                                   const uint16_t dim_kernel,
-                                                   const uint16_t padding,
-                                                   const uint16_t stride,
-                                                   const q7_t *bias,
-                                                   const uint16_t bias_shift,
-                                                   const uint16_t out_shift,
-                                                   q7_t *Im_out,
-                                                   const uint16_t dim_im_out,
-                                                   q15_t *bufferA,
-                                                   q7_t *bufferB);
-
-    /**
-     * @brief Q7 depthwise separable convolution function (non-square shape)
-     * @param[in]       Im_in         pointer to input tensor
-     * @param[in]       dim_im_in_x   input tensor dimension x
-     * @param[in]       dim_im_in_y   input tensor dimension y
-     * @param[in]       ch_im_in      number of input tensor channels
-     * @param[in]       wt            pointer to kernel weights
-     * @param[in]       ch_im_out     number of filters, i.e., output tensor channels
-     * @param[in]       dim_kernel_x  filter kernel size x
-     * @param[in]       dim_kernel_y  filter kernel size y
-     * @param[in]       padding_x     padding sizes x
-     * @param[in]       padding_y     padding sizes y
-     * @param[in]       stride_x      convolution stride x
-     * @param[in]       stride_y      convolution stride y
-     * @param[in]       bias          pointer to bias
-     * @param[in]       bias_shift    amount of left-shift for bias
-     * @param[in]       out_shift     amount of right-shift for output
-     * @param[in,out]   Im_out        pointer to output tensor
-     * @param[in]       dim_im_out_x  output tensor dimension x
-     * @param[in]       dim_im_out_y  output tensor dimension y
-     * @param[in,out]   bufferA       pointer to buffer space for input
-     * @param[in,out]   bufferB       pointer to buffer space for output
-     * @return     The function returns either
-     * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
-     *
-     * This function is the version with full list of optimization tricks, but with
-     * some contraints:
-     *   ch_im_in is multiple of 2
-     *   ch_im_out is multiple of 2
-     */
-    arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,
-                                                             const uint16_t dim_im_in_x,
-                                                             const uint16_t dim_im_in_y,
-                                                             const uint16_t ch_im_in,
-                                                             const q7_t *wt,
-                                                             const uint16_t ch_im_out,
-                                                             const uint16_t dim_kernel_x,
-                                                             const uint16_t dim_kernel_y,
-                                                             const uint16_t padding_x,
-                                                             const uint16_t padding_y,
-                                                             const uint16_t stride_x,
-                                                             const uint16_t stride_y,
-                                                             const q7_t *bias,
-                                                             const uint16_t bias_shift,
-                                                             const uint16_t out_shift,
-                                                             q7_t *Im_out,
-                                                             const uint16_t dim_im_out_x,
-                                                             const uint16_t dim_im_out_y,
-                                                             q15_t *bufferA,
-                                                             q7_t *bufferB);
-
-    /**
-     * @brief Wrapper function to pick the right optimized s8 depthwise convolution function
-     *
-     * @param[in, out] ctx            Function context (e.g. temporary buffer). Check the function
-     *                                definition file to see if an additional buffer is required.
-     *                                Optional function {API}_get_buffer_size() provides the buffer
-     *                                size if required.
-     * @param[in]      dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
-     *                                dw_conv_params->dilation is not used.
-     *                                Range of dw_conv_params->input_offset : [-127, 128]
-     *                                Range of dw_conv_params->output_offset : [-128, 127]
-     * @param[in]      quant_params   Per-channel quantization info.
-     *                               It contains the multiplier and shift values to be applied to each
-     *                               output channel
-     * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [H, W, C_IN]
-     *                                Batch argument N is not used and assumed to be 1.
-     * @param[in]      input_data     Input (activation) data pointer. Data type: int8
-     * @param[in]      filter_dims    Filter tensor dimensions. Format: [1, H, W, C_OUT]
-     * @param[in]      filter_data    Filter data pointer. Data type: int8
-     * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
-     * @param[in]      bias_data      Bias data pointer. Data type: int32
-     * @param[in]      output_dims    Output tensor dimensions. Format: [1, H, W, C_OUT]
-     * @param[in, out] output_data    Output data pointer. Data type: int8
-     * @return     The function returns
-     *                <code>ARM_MATH_SUCCESS</code>   -  Successful completion.
-     *
-     * @details
-     *    - Supported framework: TensorFlow Lite
-     *    - Picks one of the the following functions
-     *        -# arm_depthwise_conv_s8()
-     *        -# arm_depthwise_conv_3x3_s8() - Cortex-M CPUs with DSP extension only
-     *        -# arm_depthwise_conv_s8_opt()
-     *    - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
-     *    - Check details of arm_depthwise_conv_s8_opt() for potential data that can be accessed outside of the
-     * boundary.
-     */
-    arm_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx,
-                                             const cmsis_nn_dw_conv_params *dw_conv_params,
+                                     q15_t *output_data);
+/**
+ * @brief Optimized s16 convolution function
+ * @param[in, out] ctx            Function context that contains the additional buffer if required by the function.
+ *                                arm_convolve_fast_s16_get_buffer_size will return the buffer_size if required.
+ *                                The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      conv_params    Convolution parameters (e.g. strides, dilations, pads,...).
+ *                                conv_params->input_offset  : Not used
+ *                                conv_params->output_offset : Not used
+ * @param[in]      quant_params   Per-channel quantization info.
+ *                                It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data     Input (activation) data pointer. Data type: int16
+ * @param[in]      filter_dims    Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
+ *                                spatial filter dimensions. (filter_dims->w * filter_dims->h * input_dims->c) must not
+ exceed 512
+ * @param[in]      filter_data    Filter data pointer. Data type: int8
+ * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data      Optional bias data pointer. Data type: int64
+ * @param[in]      output_dims    Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data    Output data pointer. Data type: int16
+
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ * @details
+ *    1. Supported framework: TensorFlow Lite micro
+ *    2. q7/q15 is used as data type eventhough it is s8/s16 data. It is done so to be consistent with existing APIs.
+ *    3. Additional memory is required for optimization. Refer to argument 'ctx' for details.
+ *    4. Implementation supports kernel volumes (filter width * filter height * input channels) < 512.
+ *
+ */
+
+arm_cmsis_nn_status arm_convolve_fast_s16(const cmsis_nn_context *ctx,
+                                          const cmsis_nn_conv_params *conv_params,
+                                          const cmsis_nn_per_channel_quant_params *quant_params,
+                                          const cmsis_nn_dims *input_dims,
+                                          const q15_t *input_data,
+                                          const cmsis_nn_dims *filter_dims,
+                                          const q7_t *filter_data,
+                                          const cmsis_nn_dims *bias_dims,
+                                          const int64_t *bias_data,
+                                          const cmsis_nn_dims *output_dims,
+                                          q15_t *output_data);
+
+/**
+ * @brief Get the required buffer size for s16 convolution function
+ *
+ * @param[in]       input_dims    Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]       filter_dims   Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
+ *                                are the spatial filter dimensions
+ * @return          The function returns  required buffer size(bytes)
+ *
+ */
+int32_t arm_convolve_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
+
+/**
+ * @brief Get the required buffer size for fast s16 convolution function
+ *
+ * @param[in]       input_dims    Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]       filter_dims   Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
+ *                                are the spatial filter dimensions
+ * @return          The function returns required buffer size(bytes)
+ *
+ */
+int32_t arm_convolve_fast_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
+
+/**
+ * @brief Basic Q7 convolution function
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       wt          pointer to kernel weights
+ * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       bias        pointer to bias
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in,out]   Im_out      pointer to output tensor
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   bufferB     pointer to buffer space for output
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ */
+arm_cmsis_nn_status arm_convolve_HWC_q7_basic(const q7_t *Im_in,
+                                              const uint16_t dim_im_in,
+                                              const uint16_t ch_im_in,
+                                              const q7_t *wt,
+                                              const uint16_t ch_im_out,
+                                              const uint16_t dim_kernel,
+                                              const uint16_t padding,
+                                              const uint16_t stride,
+                                              const q7_t *bias,
+                                              const uint16_t bias_shift,
+                                              const uint16_t out_shift,
+                                              q7_t *Im_out,
+                                              const uint16_t dim_im_out,
+                                              q15_t *bufferA,
+                                              q7_t *bufferB);
+
+/**
+ * @brief Basic Q7 convolution function (non-square shape)
+ * @param[in]       Im_in        pointer to input tensor
+ * @param[in]       dim_im_in_x  input tensor dimension x
+ * @param[in]       dim_im_in_y  input tensor dimension y
+ * @param[in]       ch_im_in     number of input tensor channels
+ * @param[in]       wt           pointer to kernel weights
+ * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel_x filter kernel size x
+ * @param[in]       dim_kernel_y filter kernel size y
+ * @param[in]       padding_x    padding size x
+ * @param[in]       padding_y    padding size y
+ * @param[in]       stride_x     convolution stride x
+ * @param[in]       stride_y     convolution stride y
+ * @param[in]       bias         pointer to bias
+ * @param[in]       bias_shift   amount of left-shift for bias
+ * @param[in]       out_shift    amount of right-shift for output
+ * @param[in,out]   Im_out       pointer to output tensor
+ * @param[in]       dim_im_out_x output tensor dimension x
+ * @param[in]       dim_im_out_y output tensor dimension y
+ * @param[in,out]   bufferA      pointer to buffer space for input
+ * @param[in,out]   bufferB      pointer to buffer space for output
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ */
+arm_cmsis_nn_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in,
+                                                        const uint16_t dim_im_in_x,
+                                                        const uint16_t dim_im_in_y,
+                                                        const uint16_t ch_im_in,
+                                                        const q7_t *wt,
+                                                        const uint16_t ch_im_out,
+                                                        const uint16_t dim_kernel_x,
+                                                        const uint16_t dim_kernel_y,
+                                                        const uint16_t padding_x,
+                                                        const uint16_t padding_y,
+                                                        const uint16_t stride_x,
+                                                        const uint16_t stride_y,
+                                                        const q7_t *bias,
+                                                        const uint16_t bias_shift,
+                                                        const uint16_t out_shift,
+                                                        q7_t *Im_out,
+                                                        const uint16_t dim_im_out_x,
+                                                        const uint16_t dim_im_out_y,
+                                                        q15_t *bufferA,
+                                                        q7_t *bufferB);
+
+/**
+ * @brief Basic Q15 convolution function
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       wt          pointer to kernel weights
+ * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       bias        pointer to bias
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in,out]   Im_out      pointer to output tensor
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   bufferB     pointer to buffer space for output
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ */
+arm_cmsis_nn_status arm_convolve_HWC_q15_basic(const q15_t *Im_in,
+                                               const uint16_t dim_im_in,
+                                               const uint16_t ch_im_in,
+                                               const q15_t *wt,
+                                               const uint16_t ch_im_out,
+                                               const uint16_t dim_kernel,
+                                               const uint16_t padding,
+                                               const uint16_t stride,
+                                               const q15_t *bias,
+                                               const uint16_t bias_shift,
+                                               const uint16_t out_shift,
+                                               q15_t *Im_out,
+                                               const uint16_t dim_im_out,
+                                               q15_t *bufferA,
+                                               q7_t *bufferB);
+
+/**
+ * @brief Fast Q7 convolution function
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       wt          pointer to kernel weights
+ * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       bias        pointer to bias
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in,out]   Im_out      pointer to output tensor
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   bufferB     pointer to buffer space for output
+ * @return     The function returns either
+ * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
+ * constraints checking.
+ *
+ * This function is the version with full list of optimization tricks, but with
+ * some contraints:
+ *   ch_im_in is multiple of 4
+ *   ch_im_out is multiple of 2
+ */
+arm_cmsis_nn_status arm_convolve_HWC_q7_fast(const q7_t *Im_in,
+                                             const uint16_t dim_im_in,
+                                             const uint16_t ch_im_in,
+                                             const q7_t *wt,
+                                             const uint16_t ch_im_out,
+                                             const uint16_t dim_kernel,
+                                             const uint16_t padding,
+                                             const uint16_t stride,
+                                             const q7_t *bias,
+                                             const uint16_t bias_shift,
+                                             const uint16_t out_shift,
+                                             q7_t *Im_out,
+                                             const uint16_t dim_im_out,
+                                             q15_t *bufferA,
+                                             q7_t *bufferB);
+
+/**
+ * @brief Fast Q7 convolution function (non-sqaure shape)
+ * @param[in]       Im_in        pointer to input tensor
+ * @param[in]       dim_im_in_x  input tensor dimension x
+ * @param[in]       dim_im_in_y  input tensor dimension y
+ * @param[in]       ch_im_in     number of input tensor channels
+ * @param[in]       wt           pointer to kernel weights
+ * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel_x filter kernel size x
+ * @param[in]       dim_kernel_y filter kernel size y
+ * @param[in]       padding_x    padding size x
+ * @param[in]       padding_y    padding size y
+ * @param[in]       stride_x     convolution stride x
+ * @param[in]       stride_y     convolution stride y
+ * @param[in]       bias         pointer to bias
+ * @param[in]       bias_shift   amount of left-shift for bias
+ * @param[in]       out_shift    amount of right-shift for output
+ * @param[in,out]   Im_out       pointer to output tensor
+ * @param[in]       dim_im_out_x output tensor dimension x
+ * @param[in]       dim_im_out_y output tensor dimension y
+ * @param[in,out]   bufferA      pointer to buffer space for input
+ * @param[in,out]   bufferB      pointer to buffer space for output
+ * @return     The function returns either
+ * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
+ * constraints checking.
+ *
+ * This function is the version with full list of optimization tricks, but with
+ * some contraints:
+ *   ch_im_in is multiple of 4
+ *   ch_im_out is multiple of 2
+ */
+
+arm_cmsis_nn_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in,
+                                                       const uint16_t dim_im_in_x,
+                                                       const uint16_t dim_im_in_y,
+                                                       const uint16_t ch_im_in,
+                                                       const q7_t *wt,
+                                                       const uint16_t ch_im_out,
+                                                       const uint16_t dim_kernel_x,
+                                                       const uint16_t dim_kernel_y,
+                                                       const uint16_t padding_x,
+                                                       const uint16_t padding_y,
+                                                       const uint16_t stride_x,
+                                                       const uint16_t stride_y,
+                                                       const q7_t *bias,
+                                                       const uint16_t bias_shift,
+                                                       const uint16_t out_shift,
+                                                       q7_t *Im_out,
+                                                       const uint16_t dim_im_out_x,
+                                                       const uint16_t dim_im_out_y,
+                                                       q15_t *bufferA,
+                                                       q7_t *bufferB);
+
+/**
+ * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape)
+ * @param[in]       Im_in         pointer to input tensor
+ * @param[in]       dim_im_in_x   input tensor dimension x
+ * @param[in]       dim_im_in_y   input tensor dimension y
+ * @param[in]       ch_im_in      number of input tensor channels
+ * @param[in]       wt            pointer to kernel weights
+ * @param[in]       ch_im_out     number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel_x  filter kernel size x
+ * @param[in]       dim_kernel_y  filter kernel size y
+ * @param[in]       padding_x     padding size x
+ * @param[in]       padding_y     padding size y
+ * @param[in]       stride_x      convolution stride x
+ * @param[in]       stride_y      convolution stride y
+ * @param[in]       bias          pointer to bias
+ * @param[in]       bias_shift    amount of left-shift for bias
+ * @param[in]       out_shift     amount of right-shift for output
+ * @param[in,out]   Im_out        pointer to output tensor
+ * @param[in]       dim_im_out_x  output tensor dimension x
+ * @param[in]       dim_im_out_y  output tensor dimension y
+ * @param[in,out]   bufferA       pointer to buffer space for input
+ * @param[in,out]   bufferB       pointer to buffer space for output
+ * @return     The function returns either
+ *                          <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
+ *                          <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
+ *
+ * This function implement convolution with 1x1 kernel size (i.e., dim_kernel_x=1
+ * and dim_kernel_y=1). It can be used for
+ * second half of MobileNets after depthwise separable convolution.
+ *
+ * This function is the version with full list of optimization tricks, but with
+ * some contraints:
+ *   ch_im_in is multiple of 4
+ *   ch_im_out is multiple of 2
+ */
+arm_cmsis_nn_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in,
+                                                           const uint16_t dim_im_in_x,
+                                                           const uint16_t dim_im_in_y,
+                                                           const uint16_t ch_im_in,
+                                                           const q7_t *wt,
+                                                           const uint16_t ch_im_out,
+                                                           const uint16_t dim_kernel_x,
+                                                           const uint16_t dim_kernel_y,
+                                                           const uint16_t padding_x,
+                                                           const uint16_t padding_y,
+                                                           const uint16_t stride_x,
+                                                           const uint16_t stride_y,
+                                                           const q7_t *bias,
+                                                           const uint16_t bias_shift,
+                                                           const uint16_t out_shift,
+                                                           q7_t *Im_out,
+                                                           const uint16_t dim_im_out_x,
+                                                           const uint16_t dim_im_out_y,
+                                                           q15_t *bufferA,
+                                                           q7_t *bufferB);
+
+/**
+ * @brief Fast s8 version for 1x1 convolution (non-square shape)
+ *
+ * @param[in, out] ctx           Function context that contains the additional buffer if required by the function.
+ *                               arm_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required.
+ *                               The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      conv_params   Convolution parameters (e.g. strides, dilations, pads,...).
+ *                               Range of conv_params->input_offset  : [-127, 128]
+ *                               Range of conv_params->output_offset : [-128, 127]
+ * @param[in]      quant_params  Per-channel quantization info.
+ *                               It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims    Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data    Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims   Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN]
+ * @param[in]      filter_data   Filter data pointer. Data type: int8
+ * @param[in]      bias_dims     Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data     Optional bias data pointer. Data type: int32
+ * @param[in]      output_dims   Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data   Output data pointer. Data type: int8
+ *
+ * @return     The function returns either
+ *                  <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
+ *                  <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
+ *
+ * @details
+ *   - Supported framework : TensorFlow Lite Micro
+ *   - The following constrains on the arguments apply
+ *      -# input_dims->c is a multiple of 4
+ *      -# conv_params->padding.w = conv_params->padding.h = 0
+ *      -# conv_params->stride.w = conv_params->stride.h = 1
+ *
+ */
+arm_cmsis_nn_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx,
+                                             const cmsis_nn_conv_params *conv_params,
                                              const cmsis_nn_per_channel_quant_params *quant_params,
                                              const cmsis_nn_dims *input_dims,
                                              const q7_t *input_data,
@@ -937,480 +757,966 @@ extern "C"
                                              const cmsis_nn_dims *output_dims,
                                              q7_t *output_data);
 
-    /**
-     * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s8()
-     *
-     * @param[in]      dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
-     *                                dw_conv_params->dilation is not used.
-     *                                Range of dw_conv_params->input_offset : [-127, 128]
-     *                                Range of dw_conv_params->input_offset : [-128, 127]
-     * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [H, W, C_IN]
-     *                                Batch argument N is not used and assumed to be 1.
-     * @param[in]      filter_dims    Filter tensor dimensions. Format: [1, H, W, C_OUT]
-     * @param[in]      output_dims    Output tensor dimensions. Format: [1, H, W, C_OUT]
-     * @return                        Size of additional memory required for optimizations in bytes.
-     *
-     */
-    int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params,
-                                                          const cmsis_nn_dims *input_dims,
-                                                          const cmsis_nn_dims *filter_dims,
-                                                          const cmsis_nn_dims *output_dims);
-
-    /**
-     * @brief Basic s8 depthwise convolution function that doesn't have any constraints on the input dimensions.
-     *
-     * @param[in, out] ctx            Function context (e.g. temporary buffer). Check the function
-     *                                definition file to see if an additional buffer is required.
-     *                                Optional function {API}_get_buffer_size() provides the buffer
-     *                                size if an additional buffer is required.
-     *                                exists if additional memory is.
-     * @param[in]      dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...)
-     *                                dw_conv_params->dilation is not used.
-     *                                Range of dw_conv_params->input_offset : [-127, 128]
-     *                                Range of dw_conv_params->input_offset : [-128, 127]
-     * @param[in]      quant_params   Per-channel quantization info.
-     *                               It contains the multiplier and shift values to be applied to each
-     *                               output channel
-     * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [1, H, W, C_IN]
-     *                                Batch argument N is not used.
-     * @param[in]      input_data     Input (activation) data pointer. Data type: int8
-     * @param[in]      filter_dims    Filter tensor dimensions. Format: [1, H, W, C_OUT]
-     * @param[in]      filter_data    Filter data pointer. Data type: int8
-     * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
-     * @param[in]      bias_data      Bias data pointer. Data type: int32
-     * @param[in]      output_dims    Output tensor dimensions. Format: [1, H, W, C_OUT]
-     * @param[in, out] output_data    Output data pointer. Data type: int8
-     * @return     The function returns <code>ARM_MATH_SUCCESS</code>
-     *
-     * @details
-     *    - Supported framework: TensorFlow Lite
-     *    - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
-     */
-    arm_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx,
-                                     const cmsis_nn_dw_conv_params *dw_conv_params,
-                                     const cmsis_nn_per_channel_quant_params *quant_params,
-                                     const cmsis_nn_dims *input_dims,
-                                     const q7_t *input_data,
-                                     const cmsis_nn_dims *filter_dims,
-                                     const q7_t *filter_data,
-                                     const cmsis_nn_dims *bias_dims,
-                                     const int32_t *bias_data,
-                                     const cmsis_nn_dims *output_dims,
-                                     q7_t *output_data);
-
-    /**
-     * @brief Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on
-     *        the input arguments(documented below). Refer arm_depthwise_conv_s8() for function
-     *        argument details.
-     *
-     * @return     The function returns one of the following
-     *                <code>ARM_MATH_SIZE_MISMATCH</code> - Unsupported dimension of tensors
-     *                <code>ARM_MATH_ARGUMENT_ERROR</code> - Unsupported pad size along the x axis
-     *                <code>ARM_MATH_SUCCESS</code> - Successful operation
-     *
-     * @details
-     *   - Supported framework : TensorFlow Lite Micro
-     *   - The following constrains on the arguments apply
-     *      -# Number of input channel equals number of output channels
-     *      -# Filter height and width equals 3
-     *      -# Padding along x is either 0 or 1.
-     *
-     */
-    arm_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx,
-                                         const cmsis_nn_dw_conv_params *dw_conv_params,
-                                         const cmsis_nn_per_channel_quant_params *quant_params,
-                                         const cmsis_nn_dims *input_dims,
-                                         const q7_t *input_data,
-                                         const cmsis_nn_dims *filter_dims,
-                                         const q7_t *filter_data,
-                                         const cmsis_nn_dims *bias_dims,
-                                         const int32_t *bias_data,
-                                         const cmsis_nn_dims *output_dims,
-                                         q7_t *output_data);
-
-    /**
-     * @brief Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel.
-     *        Refer arm_depthwise_conv_s8() for function argument details.
-     *
-     * @return     The function returns one of the following
-     *                <code>ARM_MATH_SIZE_MISMATCH</code> - input channel != output channel or
-     *                                                      ch_mult != 1
-     *                <code>ARM_MATH_SUCCESS</code> - Successful operation
-     *
-     * @note       If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out
-     *             for the following if MVE optimizations(Arm Helium Technology) are used.
-     *               - Output shift
-     *               - Output multiplier
-     *               - Output bias
-     *               - kernel
-     * @details
-     *    - Supported framework: TensorFlow Lite
-     *    - The following constrains on the arguments apply
-     *        -# Number of input channel equals number of output channels or ch_mult equals 1
-     *    - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
-     *    - Reccomended when number of channels is 4 or greater.
-     *
-     */
-    arm_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
-                                         const cmsis_nn_dw_conv_params *dw_conv_params,
-                                         const cmsis_nn_per_channel_quant_params *quant_params,
-                                         const cmsis_nn_dims *input_dims,
-                                         const q7_t *input_data,
-                                         const cmsis_nn_dims *filter_dims,
-                                         const q7_t *filter_data,
-                                         const cmsis_nn_dims *bias_dims,
-                                         const int32_t *bias_data,
-                                         const cmsis_nn_dims *output_dims,
-                                         q7_t *output_data);
-
-    /**
-     * @brief Get the required buffer size for optimized s8 depthwise convolution
-     * function with constraint that in_channel equals out_channel.
-     * @param[in]       input_dims     Input (activation) tensor dimensions. Format: [1, H, W, C_IN]
-     *                                 Batch argument N is not used.
-     * @param[in]       filter_dims    Filter tensor dimensions. Format: [1, H, W, C_OUT]
-     * @return          The function returns  required buffer size in bytes
-     *
-     */
-    int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const cmsis_nn_dims *input_dims,
-                                                      const cmsis_nn_dims *filter_dims);
-
-    /**
-     * @defgroup FC Fully-connected Layer Functions
-     *
-     * Collection of fully-connected and matrix multiplication functions.
-     *
-     * Fully-connected layer is basically a matrix-vector multiplication
-     * with bias. The matrix is the weights and the input/output vectors
-     * are the activation values. Supported {weight, activation} precisions
-     * include {8-bit, 8-bit}, {16-bit, 16-bit}, and {8-bit, 16-bit}.
-     *
-     * Here we have two types of kernel functions. The basic function
-     * implements the function using regular GEMV approach. The opt functions
-     * operates with weights in interleaved formats.
-     *
-     */
-
-    /**
-     *@brief Q7 basic fully-connected layer function
-     *@param[in]       pV          pointer to input vector
-     *@param[in]       pM          pointer to matrix weights
-     *@param[in]       dim_vec     length of the vector
-     *@param[in]       num_of_rows number of rows in weight matrix
-     *@param[in]       bias_shift  amount of left-shift for bias
-     *@param[in]       out_shift   amount of right-shift for output
-     *@param[in]       bias        pointer to bias
-     *@param[in,out]   pOut        pointer to output vector
-     *@param[in,out]   vec_buffer  pointer to buffer space for input
-     *@return     The function returns <code>ARM_MATH_SUCCESS</code>
-     *
-     */
-
-    arm_status arm_fully_connected_q7(const q7_t *pV,
-                                      const q7_t *pM,
-                                      const uint16_t dim_vec,
-                                      const uint16_t num_of_rows,
-                                      const uint16_t bias_shift,
-                                      const uint16_t out_shift,
-                                      const q7_t *bias,
-                                      q7_t *pOut,
-                                      q15_t *vec_buffer);
-
-    /**
-     * @brief Basic s8 Fully Connected function.
-     *
-     * @param[in, out] ctx            Function context (e.g. temporary buffer). Check the function
-     *                                definition file to see if an additional buffer is required.
-     *                                Optional function {API}_get_buffer_size() provides the buffer
-     *                                size if an additional buffer is required.
-     * @param[in]      fc_params      Fully Connected layer parameters (e.g. strides, dilations, pads,...)
-     *                                Range of fc_params->input_offset  : [-127, 128]
-     *                                Range of fc_params->filter_offset : [-127, 128]
-     *                                Range of fc_params->output_offset : [-128, 127]
-     * @param[in]      quant_params   Per-tensor quantization info.
-     *                                It contains the multiplier and shift values to be applied to the output tensor.
-     * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
-     *                                Input dimension is taken as Nx(H * W * C_IN)
-     * @param[in]      input_data     Input (activation) data pointer. Data type: int8
-     * @param[in]      filter_dims    Two dimensional filter dimensions. Format: [N, C]
-     *                                N : accumulation depth and equals (H * W * C_IN) from input_dims
-     *                                C : output depth and equals C_OUT in output_dims
-     *                                H & W : Not used
-     * @param[in]      filter_data    Filter data pointer. Data type: int8
-     * @param[in]      bias_dims      Bias tensor dimensions. Format: [C_OUT]
-     *                                N, H, W : Not used
-     * @param[in]      bias_data      Bias data pointer. Data type: int32
-     * @param[in]      output_dims    Output tensor dimensions. Format: [N, C_OUT]
-     *                                N : Batches
-     *                                C_OUT : Output depth
-     *                                H & W : Not used.
-     * @param[in, out] output_data    Output data pointer. Data type: int8
-     * @return     The function returns <code>ARM_MATH_SUCCESS</code>
-     *
-     * @details
-     *    - Supported framework: TensorFlow Lite
-     *    - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
-     */
-    arm_status arm_fully_connected_s8(const cmsis_nn_context *ctx,
-                                      const cmsis_nn_fc_params *fc_params,
-                                      const cmsis_nn_per_tensor_quant_params *quant_params,
-                                      const cmsis_nn_dims *input_dims,
-                                      const q7_t *input_data,
-                                      const cmsis_nn_dims *filter_dims,
-                                      const q7_t *filter_data,
-                                      const cmsis_nn_dims *bias_dims,
-                                      const int32_t *bias_data,
-                                      const cmsis_nn_dims *output_dims,
-                                      q7_t *output_data);
-
-    /**
-     * @brief Get the required buffer size for S8 basic fully-connected and
-     * matrix multiplication layer function for TF Lite
-     * @param[in]      filter_dims             dimension of filter
-     * @return         The function returns    required buffer size in bytes
-     *
-     */
-    int32_t arm_fully_connected_s8_get_buffer_size(const cmsis_nn_dims *filter_dims);
-
-    /**
-     * @brief Q7 opt fully-connected layer function
-     * @param[in]       pV          pointer to input vector
-     * @param[in]       pM          pointer to matrix weights
-     * @param[in]       dim_vec     length of the vector
-     * @param[in]       num_of_rows number of rows in weight matrix
-     * @param[in]       bias_shift  amount of left-shift for bias
-     * @param[in]       out_shift   amount of right-shift for output
-     * @param[in]       bias        pointer to bias
-     * @param[in,out]   pOut        pointer to output vector
-     * @param[in,out]   vec_buffer  pointer to buffer space for input
-     * @return     The function returns <code>ARM_MATH_SUCCESS</code>
-     *
-     */
-
-    arm_status arm_fully_connected_q7_opt(const q7_t *pV,
-                                          const q7_t *pM,
-                                          const uint16_t dim_vec,
-                                          const uint16_t num_of_rows,
-                                          const uint16_t bias_shift,
-                                          const uint16_t out_shift,
-                                          const q7_t *bias,
-                                          q7_t *pOut,
-                                          q15_t *vec_buffer);
-
-    /**
-     * @brief Q15 basic fully-connected layer function
-     * @param[in]       pV          pointer to input vector
-     * @param[in]       pM          pointer to matrix weights
-     * @param[in]       dim_vec     length of the vector
-     * @param[in]       num_of_rows number of rows in weight matrix
-     * @param[in]       bias_shift  amount of left-shift for bias
-     * @param[in]       out_shift   amount of right-shift for output
-     * @param[in]       bias        pointer to bias
-     * @param[in,out]   pOut        pointer to output vector
-     * @param[in,out]   vec_buffer  pointer to buffer space for input
-     * @return     The function returns <code>ARM_MATH_SUCCESS</code>
-     *
-     */
-
-    arm_status arm_fully_connected_q15(const q15_t *pV,
-                                       const q15_t *pM,
-                                       const uint16_t dim_vec,
-                                       const uint16_t num_of_rows,
-                                       const uint16_t bias_shift,
-                                       const uint16_t out_shift,
-                                       const q15_t *bias,
-                                       q15_t *pOut,
-                                       q15_t *vec_buffer);
-
-    /**
-     * @brief Q15 opt fully-connected layer function
-     * @param[in]       pV          pointer to input vector
-     * @param[in]       pM          pointer to matrix weights
-     * @param[in]       dim_vec     length of the vector
-     * @param[in]       num_of_rows number of rows in weight matrix
-     * @param[in]       bias_shift  amount of left-shift for bias
-     * @param[in]       out_shift   amount of right-shift for output
-     * @param[in]       bias        pointer to bias
-     * @param[in,out]   pOut        pointer to output vector
-     * @param[in,out]   vec_buffer  pointer to buffer space for input
-     * @return     The function returns <code>ARM_MATH_SUCCESS</code>
-     *
-     */
-
-    arm_status arm_fully_connected_q15_opt(const q15_t *pV,
-                                           const q15_t *pM,
+/**
+ * @brief Get the required buffer size for arm_convolve_1x1_s8_fast
+ *
+ * @param[in]       input_dims            Input (activation) dimensions
+ * @return          The function returns the required buffer size in bytes
+ *
+ */
+int32_t arm_convolve_1x1_s8_fast_get_buffer_size(const cmsis_nn_dims *input_dims);
+
+/**
+ * @brief 1xn convolution
+ *
+ * @param[in, out] ctx           Function context that contains the additional buffer if required by the function.
+ *                               arm_convolve_1_x_n_s8_get_buffer_size will return the buffer_size if required
+ *                               The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      conv_params   Convolution parameters (e.g. strides, dilations, pads,...).
+ *                               Range of conv_params->input_offset  : [-127, 128]
+ *                               Range of conv_params->output_offset : [-128, 127]
+ * @param[in]      quant_params  Per-channel quantization info.
+ *                               It contains the multiplier and shift values to be applied to each output channel
+ * @param[in]      input_dims    Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]      input_data    Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims   Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal
+ *                               spatial filter dimension
+ * @param[in]      filter_data   Filter data pointer. Data type: int8
+ * @param[in]      bias_dims     Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data     Optional bias data pointer. Data type: int32
+ * @param[in]      output_dims   Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[out]     output_data   Output data pointer. Data type: int8
+ *
+ * @return     The function returns either
+ *                  <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
+ *                  <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
+ *
+ * @details
+ *   - Supported framework : TensorFlow Lite Micro
+ *   - The following constrains on the arguments apply
+ *      -# input_dims->n equals 1
+ *      -# ouput_dims->w is a multiple of 4
+ *      -# Explicit constraints(since it is for 1xN convolution)
+ *      -## input_dims->h equals 1
+ *      -## output_dims->h equals 1
+ *      -## filter_dims->h equals 1
+ *@todo  Remove constraint on output_dims->w to make the function generic.
+ *
+ */
+arm_cmsis_nn_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
+                                          const cmsis_nn_conv_params *conv_params,
+                                          const cmsis_nn_per_channel_quant_params *quant_params,
+                                          const cmsis_nn_dims *input_dims,
+                                          const q7_t *input_data,
+                                          const cmsis_nn_dims *filter_dims,
+                                          const q7_t *filter_data,
+                                          const cmsis_nn_dims *bias_dims,
+                                          const int32_t *bias_data,
+                                          const cmsis_nn_dims *output_dims,
+                                          q7_t *output_data);
+
+/**
+ * @brief Get the required additional buffer size for 1xn convolution
+ *
+ * @param[in]       input_dims            Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ * @param[in]       filter_dims           Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the
+ *                                        horizontal spatial filter dimension
+ * @return          The function returns  required buffer size(bytes)
+ *
+ */
+int32_t arm_convolve_1_x_n_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
+
+/**
+ * @brief Q7 version of convolution for RGB image
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       wt          pointer to kernel weights
+ * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       bias        pointer to bias
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in,out]   Im_out      pointer to output tensor
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   bufferB     pointer to buffer space for output
+ * @return     The function returns either
+ * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
+ * constraints checking.
+ *
+ * This kernel is written exclusively for convolution with ch_im_in
+ * equals 3. This applies on the first layer of CNNs which has input
+ * image with RGB format.
+ */
+
+arm_cmsis_nn_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in,
+                                            const uint16_t dim_im_in,
+                                            const uint16_t ch_im_in,
+                                            const q7_t *wt,
+                                            const uint16_t ch_im_out,
+                                            const uint16_t dim_kernel,
+                                            const uint16_t padding,
+                                            const uint16_t stride,
+                                            const q7_t *bias,
+                                            const uint16_t bias_shift,
+                                            const uint16_t out_shift,
+                                            q7_t *Im_out,
+                                            const uint16_t dim_im_out,
+                                            q15_t *bufferA,
+                                            q7_t *bufferB);
+
+/**
+ * @brief Fast Q15 convolution function
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       wt          pointer to kernel weights
+ * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       bias        pointer to bias
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in,out]   Im_out      pointer to output tensor
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   bufferB     pointer to buffer space for output
+ * @return     The function returns either
+ * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
+ * constraints checking.
+ *
+ * This function is the version with full list of optimization tricks, but with
+ * some contraints:
+ *   ch_im_in is multiple of 2
+ *   ch_im_out is multiple of 2
+ *   dim_im_out is a multiple of 2
+ */
+
+arm_cmsis_nn_status arm_convolve_HWC_q15_fast(const q15_t *Im_in,
+                                              const uint16_t dim_im_in,
+                                              const uint16_t ch_im_in,
+                                              const q15_t *wt,
+                                              const uint16_t ch_im_out,
+                                              const uint16_t dim_kernel,
+                                              const uint16_t padding,
+                                              const uint16_t stride,
+                                              const q15_t *bias,
+                                              const uint16_t bias_shift,
+                                              const uint16_t out_shift,
+                                              q15_t *Im_out,
+                                              const uint16_t dim_im_out,
+                                              q15_t *bufferA,
+                                              q7_t *bufferB);
+
+/**
+ * @brief Fast Q15 convolution function (non-sqaure shape)
+ * @param[in]       Im_in        pointer to input tensor
+ * @param[in]       dim_im_in_x  input tensor dimension x
+ * @param[in]       dim_im_in_y  input tensor dimension y
+ * @param[in]       ch_im_in     number of input tensor channels
+ * @param[in]       wt           pointer to kernel weights
+ * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel_x filter kernel size x
+ * @param[in]       dim_kernel_y filter kernel size y
+ * @param[in]       padding_x    padding size x
+ * @param[in]       padding_y    padding size y
+ * @param[in]       stride_x     convolution stride x
+ * @param[in]       stride_y     convolution stride y
+ * @param[in]       bias         pointer to bias
+ * @param[in]       bias_shift   amount of left-shift for bias
+ * @param[in]       out_shift    amount of right-shift for output
+ * @param[in,out]   Im_out       pointer to output tensor
+ * @param[in]       dim_im_out_x output tensor dimension x
+ * @param[in]       dim_im_out_y output tensor dimension y
+ * @param[in,out]   bufferA      pointer to buffer space for input
+ * @param[in,out]   bufferB      pointer to buffer space for output
+ * @return     The function returns either
+ * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
+ * constraints checking.
+ *
+ * @details
+ *
+ * <b>Buffer size:</b>
+ *
+ * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
+ *
+ * bufferB size: 0
+ *
+ * <b>Input dimension constraints:</b>
+ *
+ * ch_im_in is multiple of 2
+ *
+ * ch_im_out is multipe of 2
+ *
+ */
+
+arm_cmsis_nn_status arm_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in,
+                                                        const uint16_t dim_im_in_x,
+                                                        const uint16_t dim_im_in_y,
+                                                        const uint16_t ch_im_in,
+                                                        const q15_t *wt,
+                                                        const uint16_t ch_im_out,
+                                                        const uint16_t dim_kernel_x,
+                                                        const uint16_t dim_kernel_y,
+                                                        const uint16_t padding_x,
+                                                        const uint16_t padding_y,
+                                                        const uint16_t stride_x,
+                                                        const uint16_t stride_y,
+                                                        const q15_t *bias,
+                                                        const uint16_t bias_shift,
+                                                        const uint16_t out_shift,
+                                                        q15_t *Im_out,
+                                                        const uint16_t dim_im_out_x,
+                                                        const uint16_t dim_im_out_y,
+                                                        q15_t *bufferA,
+                                                        q7_t *bufferB);
+
+/**
+ * @brief Q7 depthwise separable convolution function
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       wt          pointer to kernel weights
+ * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       bias        pointer to bias
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in,out]   Im_out      pointer to output tensor
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   bufferB     pointer to buffer space for output
+ * @return     The function returns either
+ * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
+ * constraints checking.
+ *
+ * This function is the version with full list of optimization tricks, but with
+ * some contraints:
+ *   ch_im_in is multiple of 2
+ *   ch_im_out is multiple of 2
+ */
+
+arm_cmsis_nn_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in,
+                                                        const uint16_t dim_im_in,
+                                                        const uint16_t ch_im_in,
+                                                        const q7_t *wt,
+                                                        const uint16_t ch_im_out,
+                                                        const uint16_t dim_kernel,
+                                                        const uint16_t padding,
+                                                        const uint16_t stride,
+                                                        const q7_t *bias,
+                                                        const uint16_t bias_shift,
+                                                        const uint16_t out_shift,
+                                                        q7_t *Im_out,
+                                                        const uint16_t dim_im_out,
+                                                        q15_t *bufferA,
+                                                        q7_t *bufferB);
+
+/**
+ * @brief Q7 depthwise separable convolution function (non-square shape)
+ * @param[in]       Im_in         pointer to input tensor
+ * @param[in]       dim_im_in_x   input tensor dimension x
+ * @param[in]       dim_im_in_y   input tensor dimension y
+ * @param[in]       ch_im_in      number of input tensor channels
+ * @param[in]       wt            pointer to kernel weights
+ * @param[in]       ch_im_out     number of filters, i.e., output tensor channels
+ * @param[in]       dim_kernel_x  filter kernel size x
+ * @param[in]       dim_kernel_y  filter kernel size y
+ * @param[in]       padding_x     padding sizes x
+ * @param[in]       padding_y     padding sizes y
+ * @param[in]       stride_x      convolution stride x
+ * @param[in]       stride_y      convolution stride y
+ * @param[in]       bias          pointer to bias
+ * @param[in]       bias_shift    amount of left-shift for bias
+ * @param[in]       out_shift     amount of right-shift for output
+ * @param[in,out]   Im_out        pointer to output tensor
+ * @param[in]       dim_im_out_x  output tensor dimension x
+ * @param[in]       dim_im_out_y  output tensor dimension y
+ * @param[in,out]   bufferA       pointer to buffer space for input
+ * @param[in,out]   bufferB       pointer to buffer space for output
+ * @return     The function returns either
+ * <code>ARM_CMSIS_NN_ARG_ERROR</code> or <code>ARM_CMSIS_NN_SUCCESS</code> based on the outcome of input arguments
+ * constraints checking.
+ *
+ * This function is the version with full list of optimization tricks, but with
+ * some contraints:
+ *   ch_im_in is multiple of 2
+ *   ch_im_out is multiple of 2
+ */
+arm_cmsis_nn_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,
+                                                                  const uint16_t dim_im_in_x,
+                                                                  const uint16_t dim_im_in_y,
+                                                                  const uint16_t ch_im_in,
+                                                                  const q7_t *wt,
+                                                                  const uint16_t ch_im_out,
+                                                                  const uint16_t dim_kernel_x,
+                                                                  const uint16_t dim_kernel_y,
+                                                                  const uint16_t padding_x,
+                                                                  const uint16_t padding_y,
+                                                                  const uint16_t stride_x,
+                                                                  const uint16_t stride_y,
+                                                                  const q7_t *bias,
+                                                                  const uint16_t bias_shift,
+                                                                  const uint16_t out_shift,
+                                                                  q7_t *Im_out,
+                                                                  const uint16_t dim_im_out_x,
+                                                                  const uint16_t dim_im_out_y,
+                                                                  q15_t *bufferA,
+                                                                  q7_t *bufferB);
+
+/**
+ * @brief Wrapper function to pick the right optimized s8 depthwise convolution function
+ *
+ * @param[in, out] ctx             Function context (e.g. temporary buffer). Check the function
+ *                                 definition file to see if an additional buffer is required.
+ *                                 Optional function {API}_get_buffer_size() provides the buffer
+ *                                 size if required.
+ *                                 The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      dw_conv_params  Depthwise convolution parameters (e.g. strides, dilations, pads,...)
+ *                                 dw_conv_params->dilation is not used.
+ *                                 Range of dw_conv_params->input_offset : [-127, 128]
+ *                                 Range of dw_conv_params->output_offset : [-128, 127]
+ * @param[in]      quant_params    Per-channel quantization info.
+ *                                 It contains the multiplier and shift values to be applied to each
+ *                                 output channel
+ * @param[in]      input_dims      Input (activation) tensor dimensions. Format: [H, W, C_IN]
+ *                                 Batch argument N is not used and assumed to be 1.
+ * @param[in]      input_data      Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims     Filter tensor dimensions. Format: [1, H, W, C_OUT]
+ * @param[in]      filter_data     Filter data pointer. Data type: int8
+ * @param[in]      bias_dims       Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data       Bias data pointer. Data type: int32
+ * @param[in]      output_dims     Output tensor dimensions. Format: [1, H, W, C_OUT]
+ * @param[in, out] output_data     Output data pointer. Data type: int8
+ * @return     The function returns
+ *                <code>ARM_CMSIS_NN_SUCCESS</code>   -  Successful completion.
+ *
+ * @details
+ *    - Supported framework: TensorFlow Lite
+ *    - Picks one of the the following functions
+ *        -# arm_depthwise_conv_s8()
+ *        -# arm_depthwise_conv_3x3_s8() - Cortex-M CPUs with DSP extension only
+ *        -# arm_depthwise_conv_s8_opt()
+ *    - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ *    - Check details of arm_depthwise_conv_s8_opt() for potential data that can be accessed outside of the
+ * boundary.
+ */
+arm_cmsis_nn_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx,
+                                                  const cmsis_nn_dw_conv_params *dw_conv_params,
+                                                  const cmsis_nn_per_channel_quant_params *quant_params,
+                                                  const cmsis_nn_dims *input_dims,
+                                                  const q7_t *input_data,
+                                                  const cmsis_nn_dims *filter_dims,
+                                                  const q7_t *filter_data,
+                                                  const cmsis_nn_dims *bias_dims,
+                                                  const int32_t *bias_data,
+                                                  const cmsis_nn_dims *output_dims,
+                                                  q7_t *output_data);
+
+/**
+ * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s8()
+ *
+ * @param[in]      dw_conv_params  Depthwise convolution parameters (e.g. strides, dilations, pads,...)
+ *                                 Range of dw_conv_params->input_offset : [-127, 128]
+ *                                 Range of dw_conv_params->input_offset : [-128, 127]
+ * @param[in]      input_dims      Input (activation) tensor dimensions. Format: [H, W, C_IN]
+ *                                 Batch argument N is not used and assumed to be 1.
+ * @param[in]      filter_dims     Filter tensor dimensions. Format: [1, H, W, C_OUT]
+ * @param[in]      output_dims     Output tensor dimensions. Format: [1, H, W, C_OUT]
+ * @return                         Size of additional memory required for optimizations in bytes.
+ *
+ */
+int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params,
+                                                      const cmsis_nn_dims *input_dims,
+                                                      const cmsis_nn_dims *filter_dims,
+                                                      const cmsis_nn_dims *output_dims);
+
+/**
+ * @brief Basic s8 depthwise convolution function that doesn't have any constraints on the input dimensions.
+ *
+ * @param[in, out] ctx             Function context (e.g. temporary buffer). Check the function
+ *                                 definition file to see if an additional buffer is required.
+ *                                 Optional function {API}_get_buffer_size() provides the buffer
+ *                                 size if an additional buffer is required exists if additional memory is.
+ *                                 The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      dw_conv_params  Depthwise convolution parameters (e.g. strides, dilations, pads,...)
+ *                                 dw_conv_params->dilation is not used.
+ *                                 Range of dw_conv_params->input_offset : [-127, 128]
+ *                                 Range of dw_conv_params->input_offset : [-128, 127]
+ * @param[in]      quant_params    Per-channel quantization info.
+ *                                 It contains the multiplier and shift values to be applied to each
+ *                                 output channel
+ * @param[in]      input_dims      Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ *                                 Batch argument N is not used.
+ * @param[in]      input_data      Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims     Filter tensor dimensions. Format: [1, H, W, C_OUT]
+ * @param[in]      filter_data     Filter data pointer. Data type: int8
+ * @param[in]      bias_dims       Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data       Bias data pointer. Data type: int32
+ * @param[in]      output_dims     Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[in, out] output_data     Output data pointer. Data type: int8
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ * @details
+ *    - Supported framework: TensorFlow Lite
+ *    - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ */
+arm_cmsis_nn_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx,
+                                          const cmsis_nn_dw_conv_params *dw_conv_params,
+                                          const cmsis_nn_per_channel_quant_params *quant_params,
+                                          const cmsis_nn_dims *input_dims,
+                                          const q7_t *input_data,
+                                          const cmsis_nn_dims *filter_dims,
+                                          const q7_t *filter_data,
+                                          const cmsis_nn_dims *bias_dims,
+                                          const int32_t *bias_data,
+                                          const cmsis_nn_dims *output_dims,
+                                          q7_t *output_data);
+
+/**
+ * @brief Basic s16 depthwise convolution function that doesn't have any constraints on the input dimensions.
+ *
+ * @param[in, out] ctx             Function context (e.g. temporary buffer). Check the function
+ *                                 definition file to see if an additional buffer is required.
+ *                                 Optional function {API}_get_buffer_size() provides the buffer
+ *                                 size if an additional buffer is required.
+ *                                 exists if additional memory is.
+ *                                 The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      dw_conv_params  Depthwise convolution parameters (e.g. strides, dilations, pads,...)
+ *                                 conv_params->input_offset  : Not used
+ *                                 conv_params->output_offset : Not used
+ * @param[in]      quant_params    Per-channel quantization info.
+ *                                 It contains the multiplier and shift values to be applied to each
+ *                                 output channel
+ * @param[in]      input_dims      Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ *                                 Batch argument N is not used.
+ * @param[in]      input_data      Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims     Filter tensor dimensions. Format: [1, H, W, C_OUT]
+ * @param[in]      filter_data     Filter data pointer. Data type: int8
+ * @param[in]      bias_dims       Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data       Bias data pointer. Data type: int64
+ * @param[in]      output_dims     Output tensor dimensions. Format: [N, H, W, C_OUT]
+ * @param[in, out] output_data     Output data pointer. Data type: int16
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ * @details
+ *    - Supported framework: TensorFlow Lite
+ *    - q15 is used as data type eventhough it is s16 data. It is done so to be consistent with existing APIs.
+ */
+arm_cmsis_nn_status arm_depthwise_conv_s16(const cmsis_nn_context *ctx,
+                                           const cmsis_nn_dw_conv_params *dw_conv_params,
+                                           const cmsis_nn_per_channel_quant_params *quant_params,
+                                           const cmsis_nn_dims *input_dims,
+                                           const q15_t *input_data,
+                                           const cmsis_nn_dims *filter_dims,
+                                           const q7_t *filter_data,
+                                           const cmsis_nn_dims *bias_dims,
+                                           const int64_t *bias_data,
+                                           const cmsis_nn_dims *output_dims,
+                                           q15_t *output_data);
+
+/**
+ * @brief Wrapper function to pick the right optimized s16 depthwise convolution function
+ *
+ * @param[in, out] ctx             Function context (e.g. temporary buffer). Check the function
+ *                                 definition file to see if an additional buffer is required.
+ *                                 Optional function {API}_get_buffer_size() provides the buffer
+ *                                 size if required.
+ *                                 The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      dw_conv_params  Depthwise convolution parameters (e.g. strides, dilations, pads,...)
+ *                                 dw_conv_params->dilation is not used.
+ *                                 Range of dw_conv_params->input_offset : Not used
+ *                                 Range of dw_conv_params->output_offset : Not used
+ * @param[in]      quant_params    Per-channel quantization info.
+ *                                 It contains the multiplier and shift values to be applied to each
+ *                                 output channel
+ * @param[in]      input_dims      Input (activation) tensor dimensions. Format: [H, W, C_IN]
+ *                                 Batch argument N is not used and assumed to be 1.
+ * @param[in]      input_data      Input (activation) data pointer. Data type: int16
+ * @param[in]      filter_dims     Filter tensor dimensions. Format: [1, H, W, C_OUT]
+ * @param[in]      filter_data     Filter data pointer. Data type: int8
+ * @param[in]      bias_dims       Bias tensor dimensions. Format: [C_OUT]
+ * @param[in]      bias_data       Bias data pointer. Data type: int64
+ * @param[in]      output_dims     Output tensor dimensions. Format: [1, H, W, C_OUT]
+ * @param[in, out] output_data     Output data pointer. Data type: int16
+ * @return     The function returns
+ *                <code>ARM_CMSIS_NN_SUCCESS</code>   -  Successful completion.
+ *
+ * @details
+ *    - Supported framework: TensorFlow Lite
+ *    - Picks one of the the following functions
+ *        -# arm_depthwise_conv_s16()
+ *        -# arm_depthwise_conv_fast_s16()  - Cortex-M CPUs with DSP extension only
+ *    - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ */
+arm_cmsis_nn_status arm_depthwise_conv_wrapper_s16(const cmsis_nn_context *ctx,
+                                                   const cmsis_nn_dw_conv_params *dw_conv_params,
+                                                   const cmsis_nn_per_channel_quant_params *quant_params,
+                                                   const cmsis_nn_dims *input_dims,
+                                                   const q15_t *input_data,
+                                                   const cmsis_nn_dims *filter_dims,
+                                                   const q7_t *filter_data,
+                                                   const cmsis_nn_dims *bias_dims,
+                                                   const int64_t *bias_data,
+                                                   const cmsis_nn_dims *output_dims,
+                                                   q15_t *output_data);
+
+/**
+ * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s16()
+ *
+ * @param[in]      dw_conv_params  Depthwise convolution parameters (e.g. strides, dilations, pads,...)
+ *                                 Range of dw_conv_params->input_offset : Not used
+ *                                 Range of dw_conv_params->input_offset : Not used
+ * @param[in]      input_dims      Input (activation) tensor dimensions. Format: [H, W, C_IN]
+ *                                 Batch argument N is not used and assumed to be 1.
+ * @param[in]      filter_dims     Filter tensor dimensions. Format: [1, H, W, C_OUT]
+ * @param[in]      output_dims     Output tensor dimensions. Format: [1, H, W, C_OUT]
+ * @return                         Size of additional memory required for optimizations in bytes.
+ *
+ */
+int32_t arm_depthwise_conv_wrapper_s16_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params,
+                                                       const cmsis_nn_dims *input_dims,
+                                                       const cmsis_nn_dims *filter_dims,
+                                                       const cmsis_nn_dims *output_dims);
+
+/**
+ * @brief Optimized s16 depthwise convolution function with constraint that in_channel equals out_channel.
+ *        Refer arm_depthwise_conv_s16() for function argument details.
+ *
+ * @return     The function returns one of the following
+ *                <code>ARM_CMSIS_NN_ARG_ERROR</code> - ctx-buff == NULL and
+ *                                                      arm_depthwise_conv_fast_s16_get_buffer_size() > 0 or
+ *                                                      input channel != output channel or
+ *                                                      ch_mult != 1
+ *
+ *                <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
+ *
+ * @details
+ *    - Supported framework: TensorFlow Lite
+ *    - The following constrains on the arguments apply
+ *        -# Number of input channel equals number of output channels or ch_mult equals 1
+ *    - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ *    - Reccomended when number of channels is 4 or greater.
+ *
+ */
+arm_cmsis_nn_status arm_depthwise_conv_fast_s16(const cmsis_nn_context *ctx,
+                                                const cmsis_nn_dw_conv_params *dw_conv_params,
+                                                const cmsis_nn_per_channel_quant_params *quant_params,
+                                                const cmsis_nn_dims *input_dims,
+                                                const q15_t *input_data,
+                                                const cmsis_nn_dims *filter_dims,
+                                                const q7_t *filter_data,
+                                                const cmsis_nn_dims *bias_dims,
+                                                const int64_t *bias_data,
+                                                const cmsis_nn_dims *output_dims,
+                                                q15_t *output_data);
+
+/**
+ * @brief Get the required buffer size for optimized s16 depthwise convolution
+ * function with constraint that in_channel equals out_channel.
+ * @param[in]       input_dims   Input (activation) tensor dimensions. Format: [1, H, W, C_IN]
+ *                               Batch argument N is not used.
+ * @param[in]       filter_dims  Filter tensor dimensions. Format: [1, H, W, C_OUT]
+ * @return          The function returns  required buffer size in bytes
+ *
+ */
+int32_t arm_depthwise_conv_fast_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
+
+/**
+ * @brief Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on
+ *        the input arguments(documented below). Refer arm_depthwise_conv_s8() for function
+ *        argument details.
+ *
+ * @return     The function returns one of the following
+ *                <code>ARM_CMSIS_NN_ARG_ERROR</code> - Unsupported dimension of tensors
+ *                                                    - Unsupported pad size along the x axis
+ *                <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
+ *
+ * @details
+ *   - Supported framework : TensorFlow Lite Micro
+ *   - The following constrains on the arguments apply
+ *      -# Number of input channel equals number of output channels
+ *      -# Filter height and width equals 3
+ *      -# Padding along x is either 0 or 1.
+ *
+ */
+arm_cmsis_nn_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx,
+                                              const cmsis_nn_dw_conv_params *dw_conv_params,
+                                              const cmsis_nn_per_channel_quant_params *quant_params,
+                                              const cmsis_nn_dims *input_dims,
+                                              const q7_t *input_data,
+                                              const cmsis_nn_dims *filter_dims,
+                                              const q7_t *filter_data,
+                                              const cmsis_nn_dims *bias_dims,
+                                              const int32_t *bias_data,
+                                              const cmsis_nn_dims *output_dims,
+                                              q7_t *output_data);
+
+/**
+ * @brief Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel.
+ *        Refer arm_depthwise_conv_s8() for function argument details.
+ *
+ * @return     The function returns one of the following
+ *                <code>ARM_CMSIS_NN_ARG_ERROR</code> - input channel != output channel or
+ *                                                      ch_mult != 1
+ *                <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
+ *
+ * @note       If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out
+ *             for the following if MVE optimizations(Arm Helium Technology) are used.
+ *               - Output shift
+ *               - Output multiplier
+ *               - Output bias
+ *               - kernel
+ * @details
+ *    - Supported framework: TensorFlow Lite
+ *    - The following constrains on the arguments apply
+ *        -# Number of input channel equals number of output channels or ch_mult equals 1
+ *    - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ *    - Reccomended when number of channels is 4 or greater.
+ *
+ */
+arm_cmsis_nn_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
+                                              const cmsis_nn_dw_conv_params *dw_conv_params,
+                                              const cmsis_nn_per_channel_quant_params *quant_params,
+                                              const cmsis_nn_dims *input_dims,
+                                              const q7_t *input_data,
+                                              const cmsis_nn_dims *filter_dims,
+                                              const q7_t *filter_data,
+                                              const cmsis_nn_dims *bias_dims,
+                                              const int32_t *bias_data,
+                                              const cmsis_nn_dims *output_dims,
+                                              q7_t *output_data);
+
+/**
+ * @brief Get the required buffer size for optimized s8 depthwise convolution
+ * function with constraint that in_channel equals out_channel.
+ * @param[in]       input_dims   Input (activation) tensor dimensions. Format: [1, H, W, C_IN]
+ *                               Batch argument N is not used.
+ * @param[in]       filter_dims  Filter tensor dimensions. Format: [1, H, W, C_OUT]
+ * @return          The function returns  required buffer size in bytes
+ *
+ */
+int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);
+
+/**
+ * @defgroup FC Fully-connected Layer Functions
+ *
+ * Collection of fully-connected and matrix multiplication functions.
+ *
+ * Fully-connected layer is basically a matrix-vector multiplication
+ * with bias. The matrix is the weights and the input/output vectors
+ * are the activation values. Supported {weight, activation} precisions
+ * include {8-bit, 8-bit}, {16-bit, 16-bit}, and {8-bit, 16-bit}.
+ *
+ * Here we have two types of kernel functions. The basic function
+ * implements the function using regular GEMV approach. The opt functions
+ * operates with weights in interleaved formats.
+ *
+ */
+
+/**
+ *@brief Q7 basic fully-connected layer function
+ *@param[in]       pV          pointer to input vector
+ *@param[in]       pM          pointer to matrix weights
+ *@param[in]       dim_vec     length of the vector
+ *@param[in]       num_of_rows number of rows in weight matrix
+ *@param[in]       bias_shift  amount of left-shift for bias
+ *@param[in]       out_shift   amount of right-shift for output
+ *@param[in]       bias        pointer to bias
+ *@param[in,out]   pOut        pointer to output vector
+ *@param[in,out]   vec_buffer  pointer to buffer space for input
+ *@return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ */
+
+arm_cmsis_nn_status arm_fully_connected_q7(const q7_t *pV,
+                                           const q7_t *pM,
                                            const uint16_t dim_vec,
                                            const uint16_t num_of_rows,
                                            const uint16_t bias_shift,
                                            const uint16_t out_shift,
-                                           const q15_t *bias,
-                                           q15_t *pOut,
+                                           const q7_t *bias,
+                                           q7_t *pOut,
                                            q15_t *vec_buffer);
 
-    /**
-     * @brief Mixed Q15-Q7 fully-connected layer function
-     * @param[in]       pV          pointer to input vector
-     * @param[in]       pM          pointer to matrix weights
-     * @param[in]       dim_vec     length of the vector
-     * @param[in]       num_of_rows number of rows in weight matrix
-     * @param[in]       bias_shift  amount of left-shift for bias
-     * @param[in]       out_shift   amount of right-shift for output
-     * @param[in]       bias        pointer to bias
-     * @param[in,out]   pOut        pointer to output vector
-     * @param[in,out]   vec_buffer  pointer to buffer space for input
-     * @return     The function returns <code>ARM_MATH_SUCCESS</code>
-     *
-     */
-
-    arm_status arm_fully_connected_mat_q7_vec_q15(const q15_t *pV,
-                                                  const q7_t *pM,
-                                                  const uint16_t dim_vec,
-                                                  const uint16_t num_of_rows,
-                                                  const uint16_t bias_shift,
-                                                  const uint16_t out_shift,
-                                                  const q7_t *bias,
-                                                  q15_t *pOut,
-                                                  q15_t *vec_buffer);
-
-    /**
-     * @brief Mixed Q15-Q7 opt fully-connected layer function
-     * @param[in]       pV          pointer to input vector
-     * @param[in]       pM          pointer to matrix weights
-     * @param[in]       dim_vec     length of the vector
-     * @param[in]       num_of_rows number of rows in weight matrix
-     * @param[in]       bias_shift  amount of left-shift for bias
-     * @param[in]       out_shift   amount of right-shift for output
-     * @param[in]       bias        pointer to bias
-     * @param[in,out]   pOut        pointer to output vector
-     * @param[in,out]   vec_buffer  pointer to buffer space for input
-     * @return     The function returns <code>ARM_MATH_SUCCESS</code>
-     *
-     */
-
-    arm_status arm_fully_connected_mat_q7_vec_q15_opt(const q15_t *pV,
-                                                      const q7_t *pM,
-                                                      const uint16_t dim_vec,
-                                                      const uint16_t num_of_rows,
-                                                      const uint16_t bias_shift,
-                                                      const uint16_t out_shift,
-                                                      const q7_t *bias,
-                                                      q15_t *pOut,
-                                                      q15_t *vec_buffer);
-
-    /**
-     * @brief Matrix-Multiplication Kernels for Convolution
-     *
-     * These functions are used within convolution layer functions for
-     * matrix multiplication.
-     *
-     * The implementation is similar to CMSIS-DSP arm_mat_mult functions
-     * with one Q7 and one Q15 operands. The Q15 operand is the im2col
-     * output which is always with 2 columns.
-     *
-     */
-
-    /**
-     * @brief Matrix-multiplication function for convolution
-     * @param[in]       pA          pointer to operand A
-     * @param[in]       pInBuffer   pointer to operand B, always conssists of 2 vectors
-     * @param[in]       ch_im_out   numRow of A
-     * @param[in]       numCol_A    numCol of A
-     * @param[in]       bias_shift  amount of left-shift for bias
-     * @param[in]       out_shift   amount of right-shift for output
-     * @param[in]       bias        the bias
-     * @param[in,out]   pOut        pointer to output
-     * @return     The function returns the incremented output pointer
-     */
-
-    q7_t *arm_nn_mat_mult_kernel_q7_q15(const q7_t *pA,
-                                        const q15_t *pInBuffer,
-                                        const uint16_t ch_im_out,
-                                        const uint16_t numCol_A,
-                                        const uint16_t bias_shift,
-                                        const uint16_t out_shift,
-                                        const q7_t *bias,
-                                        q7_t *pOut);
-    /**
-     * @brief Matrix-multiplication function for convolution with per-channel requantization.
-     * @param[in]       input_a     pointer to operand A
-     * @param[in]       input_b     pointer to operand B, always consists of 2 vectors.
-     * @param[in]       output_ch   number of rows of A
-     * @param[in]       out_shift  pointer to per output channel requantization shift parameter.
-     * @param[in]       out_mult   pointer to per output channel requantization multiplier parameter.
-     * @param[in]       out_offset      output tensor offset.
-     * @param[in]       activation_min   minimum value to clamp the output to. Range : int8
-     * @param[in]       activation_max   maximum value to clamp the output to. Range : int8
-     * @param[in]       num_col_a   number of columns of A
-     * @param[in]       output_bias per output channel bias. Range : int32
-     * @param[in,out]   out_0       pointer to output
-     * @return     The function returns one of the two
-     *              1. The incremented output pointer for a successful operation or
-     *              2. NULL if implementation is not available.
-     *
-     * @details   This function does the matrix multiplication of weight matrix for all output channels
-     *            with 2 columns from im2col and produces two elements/output_channel. The outputs are
-     *            clamped in the range provided by activation min and max.
-     *            Supported framework: TensorFlow Lite micro.
-     */
-    q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a,
-                                        const q15_t *input_b,
-                                        const uint16_t output_ch,
-                                        const int32_t *out_shift,
-                                        const int32_t *out_mult,
-                                        const int32_t out_offset,
-                                        const int16_t activation_min,
-                                        const int16_t activation_max,
-                                        const uint16_t num_col_a,
-                                        const int32_t *const output_bias,
-                                        q7_t *out_0);
-
-    /**
-     * @brief Matrix-multiplication of re-ordered input B with A.
-     *
-     * @details  For arguments, refer arm_nn_mat_mult_kernel_s8_s16. The re-ordering is a consequence
-     *           of sign extension done by the SXTB16 command on input_b. The outputs are clamped in the range
-     *           provided by activation min and max.
-     *   * @details
-     *   - Supported framework : TensorFlow Lite Micro
-     *   - The following constrains on the arguments apply
-     *      -# num_col_a is a multiple of 4
-     *      -# output_ch is a multiple of 2
-     *
-     */
-    q7_t *arm_nn_mat_mult_kernel_s8_s16_reordered(const q7_t *input_a,
-                                                  const q15_t *input_b,
-                                                  const uint16_t output_ch,
-                                                  const int32_t *out_shift,
-                                                  const int32_t *out_mult,
-                                                  const int32_t out_offset,
-                                                  const int16_t activation_min,
-                                                  const int16_t activation_max,
-                                                  const uint16_t num_col_a,
-                                                  const int32_t *const output_bias,
-                                                  q7_t *out_0);
-
-    /**
-     *@brief Matrix-multiplication function for convolution with reordered columns
-     *@param[in]       pA          pointer to operand A
-     *@param[in]       pInBuffer   pointer to operand B, always conssists of 2 vectors
-     *@param[in]       ch_im_out   numRow of A
-     *@param[in]       numCol_A    numCol of A
-     *@param[in]       bias_shift  amount of left-shift for bias
-     *@param[in]       out_shift   amount of right-shift for output
-     *@param[in]       bias        the bias
-     *@param[in,out]   pOut        pointer to output
-     *@return     The function returns the incremented output pointer
-     *
-     *@details  This function assumes that data in pInBuffer are reordered
-     */
-    q7_t *arm_nn_mat_mult_kernel_q7_q15_reordered(const q7_t *pA,
-                                                  const q15_t *pInBuffer,
-                                                  const uint16_t ch_im_out,
-                                                  const uint16_t numCol_A,
-                                                  const uint16_t bias_shift,
-                                                  const uint16_t out_shift,
-                                                  const q7_t *bias,
-                                                  q7_t *pOut);
+/**
+ * @brief Basic s8 Fully Connected function.
+ *
+ * @param[in, out] ctx           Function context (e.g. temporary buffer). Check the function
+ *                               definition file to see if an additional buffer is required.
+ *                               Optional function {API}_get_buffer_size() provides the buffer
+ *                               size if an additional buffer is required.
+ *                               The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      fc_params     Fully Connected layer parameters.
+ *                               Range of fc_params->input_offset  : [-127, 128]
+ *                               fc_params->filter_offset : 0
+ *                               Range of fc_params->output_offset : [-128, 127]
+ * @param[in]      quant_params  Per-tensor quantization info.
+ *                               It contains the multiplier and shift values to be applied to the output tensor.
+ * @param[in]      input_dims    Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ *                               Input dimension is taken as Nx(H * W * C_IN)
+ * @param[in]      input_data    Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims   Two dimensional filter dimensions. Format: [N, C]
+ *                               N : accumulation depth and equals (H * W * C_IN) from input_dims
+ *                               C : output depth and equals C_OUT in output_dims
+ *                               H & W : Not used
+ * @param[in]      filter_data   Filter data pointer. Data type: int8
+ * @param[in]      bias_dims     Bias tensor dimensions. Format: [C_OUT]
+ *                               N, H, W : Not used
+ * @param[in]      bias_data     Bias data pointer. Data type: int32
+ * @param[in]      output_dims   Output tensor dimensions. Format: [N, C_OUT]
+ *                               N : Batches
+ *                               C_OUT : Output depth
+ *                               H & W : Not used.
+ * @param[in, out] output_data    Output data pointer. Data type: int8
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ * @details
+ *    - Supported framework: TensorFlow Lite
+ *    - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ */
+arm_cmsis_nn_status arm_fully_connected_s8(const cmsis_nn_context *ctx,
+                                           const cmsis_nn_fc_params *fc_params,
+                                           const cmsis_nn_per_tensor_quant_params *quant_params,
+                                           const cmsis_nn_dims *input_dims,
+                                           const q7_t *input_data,
+                                           const cmsis_nn_dims *filter_dims,
+                                           const q7_t *filter_data,
+                                           const cmsis_nn_dims *bias_dims,
+                                           const int32_t *bias_data,
+                                           const cmsis_nn_dims *output_dims,
+                                           q7_t *output_data);
+
+/**
+ * @brief Get the required buffer size for S8 basic fully-connected and
+ * matrix multiplication layer function for TF Lite
+ * @param[in]      filter_dims             dimension of filter
+ * @return         The function returns    required buffer size in bytes
+ *
+ */
+int32_t arm_fully_connected_s8_get_buffer_size(const cmsis_nn_dims *filter_dims);
+
+/**
+ * @brief Basic s16 Fully Connected function.
+ *
+ * @param[in, out] ctx           Function context (e.g. temporary buffer). Check the function
+ *                               definition file to see if an additional buffer is required.
+ *                               Optional function {API}_get_buffer_size() provides the buffer
+ *                               size if an additional buffer is required.
+ *                               The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      fc_params     Fully Connected layer parameters.
+ *                               fc_params->input_offset  : 0
+ *                               fc_params->filter_offset : 0
+ *                               fc_params->output_offset : 0
+ * @param[in]      quant_params  Per-tensor quantization info.
+ *                               It contains the multiplier and shift values to be applied to the output tensor.
+ * @param[in]      input_dims    Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
+ *                               Input dimension is taken as Nx(H * W * C_IN)
+ * @param[in]      input_data    Input (activation) data pointer. Data type: int16
+ * @param[in]      filter_dims   Two dimensional filter dimensions. Format: [N, C]
+ *                               N : accumulation depth and equals (H * W * C_IN) from input_dims
+ *                               C : output depth and equals C_OUT in output_dims
+ *                               H & W : Not used
+ * @param[in]      filter_data   Filter data pointer. Data type: int8
+ * @param[in]      bias_dims     Bias tensor dimensions. Format: [C_OUT]
+ *                               N, H, W : Not used
+ * @param[in]      bias_data     Bias data pointer. Data type: int64
+ * @param[in]      output_dims   Output tensor dimensions. Format: [N, C_OUT]
+ *                               N : Batches
+ *                               C_OUT : Output depth
+ *                               H & W : Not used.
+ * @param[in, out] output_data    Output data pointer. Data type: int16
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ * @details
+ *    - Supported framework: TensorFlow Lite
+ *    - q15 is used as data type eventhough it is s16 data. It is done so to be consistent with existing APIs.
+ */
+arm_cmsis_nn_status arm_fully_connected_s16(const cmsis_nn_context *ctx,
+                                            const cmsis_nn_fc_params *fc_params,
+                                            const cmsis_nn_per_tensor_quant_params *quant_params,
+                                            const cmsis_nn_dims *input_dims,
+                                            const q15_t *input_data,
+                                            const cmsis_nn_dims *filter_dims,
+                                            const q7_t *filter_data,
+                                            const cmsis_nn_dims *bias_dims,
+                                            const int64_t *bias_data,
+                                            const cmsis_nn_dims *output_dims,
+                                            q15_t *output_data);
+
+/**
+ * @brief Get the required buffer size for S16 basic fully-connected and
+ * matrix multiplication layer function for TF Lite
+ * @param[in]      filter_dims             dimension of filter
+ * @return         The function returns    required buffer size in bytes
+ *
+ */
+int32_t arm_fully_connected_s16_get_buffer_size(const cmsis_nn_dims *filter_dims);
+
+/**
+ * @brief Q7 opt fully-connected layer function
+ * @param[in]       pV          pointer to input vector
+ * @param[in]       pM          pointer to matrix weights
+ * @param[in]       dim_vec     length of the vector
+ * @param[in]       num_of_rows number of rows in weight matrix
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in]       bias        pointer to bias
+ * @param[in,out]   pOut        pointer to output vector
+ * @param[in,out]   vec_buffer  pointer to buffer space for input
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ */
+
+arm_cmsis_nn_status arm_fully_connected_q7_opt(const q7_t *pV,
+                                               const q7_t *pM,
+                                               const uint16_t dim_vec,
+                                               const uint16_t num_of_rows,
+                                               const uint16_t bias_shift,
+                                               const uint16_t out_shift,
+                                               const q7_t *bias,
+                                               q7_t *pOut,
+                                               q15_t *vec_buffer);
+
+/**
+ * @brief Q15 basic fully-connected layer function
+ * @param[in]       pV          pointer to input vector
+ * @param[in]       pM          pointer to matrix weights
+ * @param[in]       dim_vec     length of the vector
+ * @param[in]       num_of_rows number of rows in weight matrix
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in]       bias        pointer to bias
+ * @param[in,out]   pOut        pointer to output vector
+ * @param[in,out]   vec_buffer  pointer to buffer space for input
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ */
+
+arm_cmsis_nn_status arm_fully_connected_q15(const q15_t *pV,
+                                            const q15_t *pM,
+                                            const uint16_t dim_vec,
+                                            const uint16_t num_of_rows,
+                                            const uint16_t bias_shift,
+                                            const uint16_t out_shift,
+                                            const q15_t *bias,
+                                            q15_t *pOut,
+                                            q15_t *vec_buffer);
+
+/**
+ * @brief Q15 opt fully-connected layer function
+ * @param[in]       pV          pointer to input vector
+ * @param[in]       pM          pointer to matrix weights
+ * @param[in]       dim_vec     length of the vector
+ * @param[in]       num_of_rows number of rows in weight matrix
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in]       bias        pointer to bias
+ * @param[in,out]   pOut        pointer to output vector
+ * @param[in,out]   vec_buffer  pointer to buffer space for input
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ */
+
+arm_cmsis_nn_status arm_fully_connected_q15_opt(const q15_t *pV,
+                                                const q15_t *pM,
+                                                const uint16_t dim_vec,
+                                                const uint16_t num_of_rows,
+                                                const uint16_t bias_shift,
+                                                const uint16_t out_shift,
+                                                const q15_t *bias,
+                                                q15_t *pOut,
+                                                q15_t *vec_buffer);
+
+/**
+ * @brief Mixed Q15-Q7 fully-connected layer function
+ * @param[in]       pV          pointer to input vector
+ * @param[in]       pM          pointer to matrix weights
+ * @param[in]       dim_vec     length of the vector
+ * @param[in]       num_of_rows number of rows in weight matrix
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in]       bias        pointer to bias
+ * @param[in,out]   pOut        pointer to output vector
+ * @param[in,out]   vec_buffer  pointer to buffer space for input
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ */
+
+arm_cmsis_nn_status arm_fully_connected_mat_q7_vec_q15(const q15_t *pV,
+                                                       const q7_t *pM,
+                                                       const uint16_t dim_vec,
+                                                       const uint16_t num_of_rows,
+                                                       const uint16_t bias_shift,
+                                                       const uint16_t out_shift,
+                                                       const q7_t *bias,
+                                                       q15_t *pOut,
+                                                       q15_t *vec_buffer);
+
+/**
+ * @brief Mixed Q15-Q7 opt fully-connected layer function
+ * @param[in]       pV          pointer to input vector
+ * @param[in]       pM          pointer to matrix weights
+ * @param[in]       dim_vec     length of the vector
+ * @param[in]       num_of_rows number of rows in weight matrix
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in]       bias        pointer to bias
+ * @param[in,out]   pOut        pointer to output vector
+ * @param[in,out]   vec_buffer  pointer to buffer space for input
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ */
+
+arm_cmsis_nn_status arm_fully_connected_mat_q7_vec_q15_opt(const q15_t *pV,
+                                                           const q7_t *pM,
+                                                           const uint16_t dim_vec,
+                                                           const uint16_t num_of_rows,
+                                                           const uint16_t bias_shift,
+                                                           const uint16_t out_shift,
+                                                           const q7_t *bias,
+                                                           q15_t *pOut,
+                                                           q15_t *vec_buffer);
+
+/**
+ * @brief Matrix-Multiplication Kernels for Convolution
+ *
+ * These functions are used within convolution layer functions for
+ * matrix multiplication.
+ *
+ * The implementation is similar to CMSIS-DSP arm_mat_mult functions
+ * with one Q7 and one Q15 operands. The Q15 operand is the im2col
+ * output which is always with 2 columns.
+ *
+ */
+
+/**
+ * @brief Matrix-multiplication function for convolution
+ * @param[in]       pA          pointer to operand A
+ * @param[in]       pInBuffer   pointer to operand B, always conssists of 2 vectors
+ * @param[in]       ch_im_out   numRow of A
+ * @param[in]       numCol_A    numCol of A
+ * @param[in]       bias_shift  amount of left-shift for bias
+ * @param[in]       out_shift   amount of right-shift for output
+ * @param[in]       bias        the bias
+ * @param[in,out]   pOut        pointer to output
+ * @return     The function returns the incremented output pointer
+ */
+
+q7_t *arm_nn_mat_mult_kernel_q7_q15(const q7_t *pA,
+                                    const q15_t *pInBuffer,
+                                    const uint16_t ch_im_out,
+                                    const uint16_t numCol_A,
+                                    const uint16_t bias_shift,
+                                    const uint16_t out_shift,
+                                    const q7_t *bias,
+                                    q7_t *pOut);
 
 #ifdef __cplusplus
 }
@@ -1423,676 +1729,925 @@ extern "C"
  */
 
 #ifdef __cplusplus
-extern "C"
-{
+extern "C" {
 #endif
 
-    /**
-     * @defgroup BasicMath Basic math functions
-     *
-     * Element wise add and multiplication functions.
-     *
-     */
-
-    /**
-     * @brief s8 element wise add of two vectors
-     * @param[in]       input_1_vect            pointer to input vector 1
-     * @param[in]       input_2_vect            pointer to input vector 2
-     * @param[in]       input_1_offset          offset for input 1. Range: Range: -127 to 128
-     * @param[in]       input_1_mult            multiplier for input 1
-     * @param[in]       input_1_shift           shift for input 1
-     * @param[in]       input_2_offset          offset for input 2. Range: Range: -127 to 128
-     * @param[in]       input_2_mult            multiplier for input 2
-     * @param[in]       input_2_shift           shift for input 2
-     * @param[in]       left_shift              input left shift
-     * @param[in,out]   output                  pointer to output vector
-     * @param[in]       out_offset              output offset
-     * @param[in]       out_mult                output multiplier
-     * @param[in]       out_shift               output shift
-     * @param[in]       out_activation_min      minimum value to clamp output to
-     * @param[in]       out_activation_max      maximum value to clamp output to
-     * @param[in]       block_size              number of samples
-     * @return          The function returns    ARM_MATH_SUCCESS
-     */
-    arm_status arm_elementwise_add_s8(const int8_t *input_1_vect,
-                                      const int8_t *input_2_vect,
-                                      const int32_t input_1_offset,
-                                      const int32_t input_1_mult,
-                                      const int32_t input_1_shift,
-                                      const int32_t input_2_offset,
-                                      const int32_t input_2_mult,
-                                      const int32_t input_2_shift,
-                                      const int32_t left_shift,
-                                      int8_t *output,
-                                      const int32_t out_offset,
-                                      const int32_t out_mult,
-                                      const int32_t out_shift,
-                                      const int32_t out_activation_min,
-                                      const int32_t out_activation_max,
-                                      const uint32_t block_size);
-
-    /**
-     * @brief s8 element wise multiplication
-     * @param[in]       input_1_vect            pointer to input vector 1
-     * @param[in]       input_2_vect            pointer to input vector 2
-     * @param[in]       input_1_offset          offset for input 1. Range: Range: -127 to 128
-     * @param[in]       input_2_offset          offset for input 2. Range: Range: -127 to 128
-     * @param[in,out]   output                  pointer to output vector
-     * @param[in]       out_offset              output offset
-     * @param[in]       out_mult                output multiplier
-     * @param[in]       out_shift               output shift
-     * @param[in]       out_activation_min      minimum value to clamp output to
-     * @param[in]       out_activation_max      maximum value to clamp output to
-     * @param[in]       block_size              number of samples
-     * @return          The function returns    ARM_MATH_SUCCESS
-     *
-     * @details   Supported framework: TensorFlow Lite micro
-     */
-    arm_status arm_elementwise_mul_s8(const int8_t *input_1_vect,
-                                      const int8_t *input_2_vect,
-                                      const int32_t input_1_offset,
-                                      const int32_t input_2_offset,
-                                      int8_t *output,
-                                      const int32_t out_offset,
-                                      const int32_t out_mult,
-                                      const int32_t out_shift,
-                                      const int32_t out_activation_min,
-                                      const int32_t out_activation_max,
-                                      const uint32_t block_size);
-    /**
-     * @defgroup Acti Activation Functions
-     *
-     * Perform activation layers, including ReLU (Rectified Linear Unit),
-     * sigmoid and tanh
-     *
-     */
-
-    /**
-     * @brief Q7 RELU function
-     * @param[in,out]   data        pointer to input
-     * @param[in]       size        number of elements
-     * @return none.
-     */
-
-    void arm_relu_q7(q7_t *data, uint16_t size);
-
-    /**
-     * @brief s8 ReLU6 function
-     * @param[in,out]   data        pointer to input
-     * @param[in]       size        number of elements
-     */
-
-    void arm_relu6_s8(q7_t *data, uint16_t size);
-
-    /**
-     * @brief Q15 RELU function
-     * @param[in,out]   data        pointer to input
-     * @param[in]       size        number of elements
-     * @return none.
-     */
-
-    void arm_relu_q15(q15_t *data, uint16_t size);
-
-    /**
-     * @brief Q7 neural network activation function using direct table look-up
-     * @param[in,out]   data        pointer to input
-     * @param[in]       size        number of elements
-     * @param[in]       int_width   bit-width of the integer part, assume to be smaller than 3
-     * @param[in]       type        type of activation functions
-     * @return none.
-     */
-
-    void arm_nn_activations_direct_q7(q7_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type);
-
-    /**
-     * @brief Q15 neural network activation function using direct table look-up
-     * @param[in,out]   data        pointer to input
-     * @param[in]       size        number of elements
-     * @param[in]       int_width   bit-width of the integer part, assume to be smaller than 3
-     * @param[in]       type        type of activation functions
-     * @return none.
-     *
-     * @details
-     *
-     * This is the direct table look-up approach.
-     *
-     * Assume here the integer part of the fixed-point is <= 3.
-     * More than 3 just not making much sense, makes no difference with
-     * saturation followed by any of these activation functions.
-     */
-
-    void arm_nn_activations_direct_q15(q15_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type);
-
-    /**
-     * @defgroup Pooling Pooling Functions
-     *
-     * Perform pooling functions, including max pooling and average pooling
-     *
-     */
-
-    /**
-     * @brief Q7 max pooling function
-     * @param[in]       Im_in       pointer to input tensor
-     * @param[in]       dim_im_in   input tensor dimension
-     * @param[in]       ch_im_in    number of input tensor channels
-     * @param[in]       dim_kernel  filter kernel size
-     * @param[in]       padding     padding sizes
-     * @param[in]       stride      convolution stride
-     * @param[in]       dim_im_out  output tensor dimension
-     * @param[in,out]   bufferA     pointer to buffer space for input
-     * @param[in,out]   Im_out      pointer to output tensor
-     * @return none.
-     *
-     */
-
-    void arm_maxpool_q7_HWC(q7_t *Im_in,
-                            const uint16_t dim_im_in,
-                            const uint16_t ch_im_in,
-                            const uint16_t dim_kernel,
-                            const uint16_t padding,
-                            const uint16_t stride,
-                            const uint16_t dim_im_out,
-                            q7_t *bufferA,
-                            q7_t *Im_out);
-
-    /**
-     * @brief Q7 average pooling function
-     * @param[in]       Im_in       pointer to input tensor
-     * @param[in]       dim_im_in   input tensor dimension
-     * @param[in]       ch_im_in    number of input tensor channels
-     * @param[in]       dim_kernel  filter kernel size
-     * @param[in]       padding     padding sizes
-     * @param[in]       stride      convolution stride
-     * @param[in]       dim_im_out  output tensor dimension
-     * @param[in,out]   bufferA     pointer to buffer space for input
-     * @param[in,out]   Im_out      pointer to output tensor
-     * @return none.
-     *
-     */
-
-    void arm_avepool_q7_HWC(q7_t *Im_in,
-                            const uint16_t dim_im_in,
-                            const uint16_t ch_im_in,
-                            const uint16_t dim_kernel,
-                            const uint16_t padding,
-                            const uint16_t stride,
-                            const uint16_t dim_im_out,
-                            q7_t *bufferA,
-                            q7_t *Im_out);
-
-    /**
-     * @brief s8 average pooling function.
-     *
-     * @param[in, out] ctx            Function context (e.g. temporary buffer). Check the function
-     *                                definition file to see if an additional buffer is required.
-     *                                Optional function {API}_get_buffer_size() provides the buffer
-     *                                size if an additional buffer is required.
-     * @param[in]      pool_params    Pooling parameters
-     * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [H, W, C_IN]
-     *                                Argument 'N' is not used.
-     * @param[in]      input_data     Input (activation) data pointer. Data type: int8
-     * @param[in]      filter_dims    Filter tensor dimensions. Format: [H, W]
-     *                                Argument N and C are not used.
-     * @param[in]      output_dims    Output tensor dimensions. Format: [H, W, C_OUT]
-     *                                Argument N is not used.
-     *                                C_OUT equals C_IN.
-     * @param[in, out] output_data    Output data pointer. Data type: int8
-     * @return                        The function returns
-     *                                    <code>ARM_MATH_SUCCESS</code> - Successful operation
-     *
-     * @details
-     *    - Supported Framework: TensorFlow Lite
-     *
-     */
-    arm_status arm_avgpool_s8(const cmsis_nn_context *ctx,
-                              const cmsis_nn_pool_params *pool_params,
-                              const cmsis_nn_dims *input_dims,
-                              const q7_t *input_data,
-                              const cmsis_nn_dims *filter_dims,
-                              const cmsis_nn_dims *output_dims,
-                              q7_t *output_data);
-
-    /**
-     * @brief Get the required buffer size for S8 average pooling function
-     * @param[in]       dim_dst_width         output tensor dimension
-     * @param[in]       ch_src                number of input tensor channels
-     * @return          The function returns  required buffer size in bytes
-     *
-     */
-    int32_t arm_avgpool_s8_get_buffer_size(const int dim_dst_width, const int ch_src);
-
-    /**
-     * @brief s8 max pooling function.
-     *
-     * @param[in, out] ctx            Function context (e.g. temporary buffer). Check the function
-     *                                definition file to see if an additional buffer is required.
-     *                                Optional function {API}_get_buffer_size() provides the buffer
-     *                                size if an additional buffer is required.
-     * @param[in]      pool_params    Pooling parameters
-     * @param[in]      input_dims     Input (activation) tensor dimensions. Format: [H, W, C_IN]
-     *                                Argument 'N' is not used.
-     * @param[in]      input_data     Input (activation) data pointer. Data type: int8
-     * @param[in]      filter_dims    Filter tensor dimensions. Format: [H, W]
-     *                                Argument N and C are not used.
-     * @param[in]      output_dims    Output tensor dimensions. Format: [H, W, C_OUT]
-     *                                Argument N is not used.
-     *                                C_OUT equals C_IN.
-     * @param[in, out] output_data    Output data pointer. Data type: int8
-     * @return                        The function returns
-     *                                    <code>ARM_MATH_SUCCESS</code> - Successful operation
-     *
-     * @details
-     *    - Supported Framework: TensorFlow Lite
-     *
-     */
-    arm_status arm_max_pool_s8(const cmsis_nn_context *ctx,
-                               const cmsis_nn_pool_params *pool_params,
-                               const cmsis_nn_dims *input_dims,
-                               const q7_t *input_data,
-                               const cmsis_nn_dims *filter_dims,
-                               const cmsis_nn_dims *output_dims,
-                               q7_t *output_data);
-    /**
-     * @defgroup Softmax Softmax Functions
-     *
-     * EXP(2) based softmax functions.
-     *
-     */
-
-    /**
-     * @brief Q7 softmax function
-     * @param[in]       vec_in      pointer to input vector
-     * @param[in]       dim_vec     input vector dimension
-     * @param[out]      p_out       pointer to output vector
-     *
-     * @note This function is an optimized version which is not bit-accurate with
-     *       TensorFlow Lite's kernel
-     *
-     */
-
-    void arm_softmax_q7(const q7_t *vec_in, const uint16_t dim_vec, q7_t *p_out);
-
-    /**
-     * @brief Q7 softmax function with batch parameter
-     * @param[in]       vec_in      pointer to input vector
-     * @param[in]       nb_batches  number of batches
-     * @param[in]       dim_vec     input vector dimension
-     * @param[out]      p_out       pointer to output vector
-     * @return none.
-     *
-     * @note This function is an optimized version which is not bit-accurate with
-     *       TensorFlow Lite's kernel
-     *
-     */
-
-    void arm_softmax_with_batch_q7(const q7_t *vec_in, const uint16_t nb_batches, const uint16_t dim_vec, q7_t *p_out);
-    /**
-     * @brief Q15 softmax function
-     * @param[in]       vec_in      pointer to input vector
-     * @param[in]       dim_vec     input vector dimension
-     * @param[out]      p_out       pointer to output vector
-     * @return none.
-     *
-     * @note This function is an optimized version which is not bit-accurate with
-     *       TensorFlow Lite's kernel
-     *
-     */
-
-    void arm_softmax_q15(const q15_t *vec_in, const uint16_t dim_vec, q15_t *p_out);
-
-    /**
-     * @brief S8 softmax function
-     * @param[in]  input     Pointer to the input tensor
-     * @param[in]  num_rows  Number of rows in the input tensor
-     * @param[in]  row_size  Number of elements in each input row
-     * @param[in]  mult      Input quantization multiplier
-     * @param[in]  shift     Input quantization shift within the range [0, 31]
-     * @param[in]  diff_min  Minimum difference with max in row. Used to check if
-     *                       the quantized exponential operation can be performed
-     * @param[out] output    Pointer to the output tensor
-     *
-     * @note Supported framework: TensorFlow Lite micro (bit-accurate)
-     *
-     */
-
-    void arm_softmax_s8(const int8_t *input,
-                        const int32_t num_rows,
-                        const int32_t row_size,
-                        const int32_t mult,
-                        const int32_t shift,
-                        const int32_t diff_min,
-                        int8_t *output);
-
-    /**
-     * @brief U8 softmax function
-     * @param[in]  input     Pointer to the input tensor
-     * @param[in]  num_rows  Number of rows in the input tensor
-     * @param[in]  row_size  Number of elements in each input row
-     * @param[in]  mult      Input quantization multiplier
-     * @param[in]  shift     Input quantization shift within the range [0, 31]
-     * @param[in]  diff_min  Minimum difference with max in row. Used to check if
-     *                       the quantized exponential operation can be performed
-     * @param[out] output    Pointer to the output tensor
-     *
-     * @note Supported framework: TensorFlow Lite micro (bit-accurate)
-     *
-     */
-
-    void arm_softmax_u8(const uint8_t *input,
+/**
+ * @defgroup BasicMath Basic math functions
+ *
+ * Elementwise add and multiplication functions.
+ *
+ */
+
+/**
+ * @brief s8 elementwise add of two vectors
+ * @param[in]       input_1_vect        pointer to input vector 1
+ * @param[in]       input_2_vect        pointer to input vector 2
+ * @param[in]       input_1_offset      offset for input 1. Range: -127 to 128
+ * @param[in]       input_1_mult        multiplier for input 1
+ * @param[in]       input_1_shift       shift for input 1
+ * @param[in]       input_2_offset      offset for input 2. Range: -127 to 128
+ * @param[in]       input_2_mult        multiplier for input 2
+ * @param[in]       input_2_shift       shift for input 2
+ * @param[in]       left_shift          input left shift
+ * @param[in,out]   output              pointer to output vector
+ * @param[in]       out_offset          output offset.  Range: -128 to 127
+ * @param[in]       out_mult            output multiplier
+ * @param[in]       out_shift           output shift
+ * @param[in]       out_activation_min  minimum value to clamp output to. Min: -128
+ * @param[in]       out_activation_max  maximum value to clamp output to. Max: 127
+ * @param[in]       block_size          number of samples
+ * @return          The function returns    ARM_CMSIS_NN_SUCCESS
+ */
+arm_cmsis_nn_status arm_elementwise_add_s8(const int8_t *input_1_vect,
+                                           const int8_t *input_2_vect,
+                                           const int32_t input_1_offset,
+                                           const int32_t input_1_mult,
+                                           const int32_t input_1_shift,
+                                           const int32_t input_2_offset,
+                                           const int32_t input_2_mult,
+                                           const int32_t input_2_shift,
+                                           const int32_t left_shift,
+                                           int8_t *output,
+                                           const int32_t out_offset,
+                                           const int32_t out_mult,
+                                           const int32_t out_shift,
+                                           const int32_t out_activation_min,
+                                           const int32_t out_activation_max,
+                                           const int32_t block_size);
+
+/**
+ * @brief s16 elementwise add of two vectors
+ * @param[in]       input_1_vect        pointer to input vector 1
+ * @param[in]       input_2_vect        pointer to input vector 2
+ * @param[in]       input_1_offset      offset for input 1. Not used.
+ * @param[in]       input_1_mult        multiplier for input 1
+ * @param[in]       input_1_shift       shift for input 1
+ * @param[in]       input_2_offset      offset for input 2. Not used.
+ * @param[in]       input_2_mult        multiplier for input 2
+ * @param[in]       input_2_shift       shift for input 2
+ * @param[in]       left_shift          input left shift
+ * @param[in,out]   output              pointer to output vector
+ * @param[in]       out_offset          output offset. Not used.
+ * @param[in]       out_mult            output multiplier
+ * @param[in]       out_shift           output shift
+ * @param[in]       out_activation_min  minimum value to clamp output to. Min: -32768
+ * @param[in]       out_activation_max  maximum value to clamp output to. Max: 32767
+ * @param[in]       block_size          number of samples
+ * @return          The function returns  ARM_CMSIS_NN_SUCCESS
+ */
+arm_cmsis_nn_status arm_elementwise_add_s16(const int16_t *input_1_vect,
+                                            const int16_t *input_2_vect,
+                                            const int32_t input_1_offset,
+                                            const int32_t input_1_mult,
+                                            const int32_t input_1_shift,
+                                            const int32_t input_2_offset,
+                                            const int32_t input_2_mult,
+                                            const int32_t input_2_shift,
+                                            const int32_t left_shift,
+                                            int16_t *output,
+                                            const int32_t out_offset,
+                                            const int32_t out_mult,
+                                            const int32_t out_shift,
+                                            const int32_t out_activation_min,
+                                            const int32_t out_activation_max,
+                                            const int32_t block_size);
+
+/**
+ * @brief s8 elementwise multiplication
+ * @param[in]       input_1_vect        pointer to input vector 1
+ * @param[in]       input_2_vect        pointer to input vector 2
+ * @param[in]       input_1_offset      offset for input 1. Range: -127 to 128
+ * @param[in]       input_2_offset      offset for input 2. Range: -127 to 128
+ * @param[in,out]   output              pointer to output vector
+ * @param[in]       out_offset          output offset. Range: -128 to 127
+ * @param[in]       out_mult            output multiplier
+ * @param[in]       out_shift           output shift
+ * @param[in]       out_activation_min  minimum value to clamp output to. Min: -128
+ * @param[in]       out_activation_max  maximum value to clamp output to. Max: 127
+ * @param[in]       block_size          number of samples
+ * @return          The function returns ARM_CMSIS_NN_SUCCESS
+ *
+ * @details   Supported framework: TensorFlow Lite micro
+ */
+arm_cmsis_nn_status arm_elementwise_mul_s8(const int8_t *input_1_vect,
+                                           const int8_t *input_2_vect,
+                                           const int32_t input_1_offset,
+                                           const int32_t input_2_offset,
+                                           int8_t *output,
+                                           const int32_t out_offset,
+                                           const int32_t out_mult,
+                                           const int32_t out_shift,
+                                           const int32_t out_activation_min,
+                                           const int32_t out_activation_max,
+                                           const int32_t block_size);
+
+/**
+ * @brief s16 elementwise multiplication
+ * @param[in]       input_1_vect        pointer to input vector 1
+ * @param[in]       input_2_vect        pointer to input vector 2
+ * @param[in]       input_1_offset      offset for input 1. Not used.
+ * @param[in]       input_2_offset      offset for input 2. Not used.
+ * @param[in,out]   output              pointer to output vector
+ * @param[in]       out_offset          output offset. Not used.
+ * @param[in]       out_mult            output multiplier
+ * @param[in]       out_shift           output shift
+ * @param[in]       out_activation_min  minimum value to clamp output to. Min: -32768
+ * @param[in]       out_activation_max  maximum value to clamp output to. Max: 32767
+ * @param[in]       block_size          number of samples
+ * @return          The function returns ARM_CMSIS_NN_SUCCESS
+ *
+ * @details   Supported framework: TensorFlow Lite micro
+ */
+arm_cmsis_nn_status arm_elementwise_mul_s16(const int16_t *input_1_vect,
+                                            const int16_t *input_2_vect,
+                                            const int32_t input_1_offset,
+                                            const int32_t input_2_offset,
+                                            int16_t *output,
+                                            const int32_t out_offset,
+                                            const int32_t out_mult,
+                                            const int32_t out_shift,
+                                            const int32_t out_activation_min,
+                                            const int32_t out_activation_max,
+                                            const int32_t block_size);
+
+/**
+ * @defgroup Acti Activation Functions
+ *
+ * Perform activation layers, including ReLU (Rectified Linear Unit),
+ * sigmoid and tanh
+ *
+ */
+
+/**
+ * @brief Q7 RELU function
+ * @param[in,out]   data        pointer to input
+ * @param[in]       size        number of elements
+ */
+
+void arm_relu_q7(q7_t *data, uint16_t size);
+
+/**
+ * @brief s8 ReLU6 function
+ * @param[in,out]   data        pointer to input
+ * @param[in]       size        number of elements
+ */
+
+void arm_relu6_s8(q7_t *data, uint16_t size);
+
+/**
+ * @brief Q15 RELU function
+ * @param[in,out]   data        pointer to input
+ * @param[in]       size        number of elements
+ */
+
+void arm_relu_q15(q15_t *data, uint16_t size);
+
+/**
+ * @brief Q7 neural network activation function using direct table look-up
+ * @param[in,out]   data        pointer to input
+ * @param[in]       size        number of elements
+ * @param[in]       int_width   bit-width of the integer part, assume to be smaller than 3
+ * @param[in]       type        type of activation functions
+ */
+
+void arm_nn_activations_direct_q7(q7_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type);
+
+/**
+ * @brief Q15 neural network activation function using direct table look-up
+ * @param[in,out]   data        pointer to input
+ * @param[in]       size        number of elements
+ * @param[in]       int_width   bit-width of the integer part, assume to be smaller than 3
+ * @param[in]       type        type of activation functions
+ *
+ * @details
+ *
+ * This is the direct table look-up approach.
+ *
+ * Assume here the integer part of the fixed-point is <= 3.
+ * More than 3 just not making much sense, makes no difference with
+ * saturation followed by any of these activation functions.
+ */
+
+void arm_nn_activations_direct_q15(q15_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type);
+
+/**
+ * @defgroup Pooling Pooling Functions
+ *
+ * Perform pooling functions, including max pooling and average pooling
+ *
+ */
+
+/**
+ * @brief Q7 max pooling function
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   Im_out      pointer to output tensor
+ *
+ */
+
+void arm_maxpool_q7_HWC(q7_t *Im_in,
+                        const uint16_t dim_im_in,
+                        const uint16_t ch_im_in,
+                        const uint16_t dim_kernel,
+                        const uint16_t padding,
+                        const uint16_t stride,
+                        const uint16_t dim_im_out,
+                        q7_t *bufferA,
+                        q7_t *Im_out);
+
+/**
+ * @brief Q7 average pooling function
+ * @param[in]       Im_in       pointer to input tensor
+ * @param[in]       dim_im_in   input tensor dimension
+ * @param[in]       ch_im_in    number of input tensor channels
+ * @param[in]       dim_kernel  filter kernel size
+ * @param[in]       padding     padding sizes
+ * @param[in]       stride      convolution stride
+ * @param[in]       dim_im_out  output tensor dimension
+ * @param[in,out]   bufferA     pointer to buffer space for input
+ * @param[in,out]   Im_out      pointer to output tensor
+ *
+ */
+
+void arm_avepool_q7_HWC(q7_t *Im_in,
+                        const uint16_t dim_im_in,
+                        const uint16_t ch_im_in,
+                        const uint16_t dim_kernel,
+                        const uint16_t padding,
+                        const uint16_t stride,
+                        const uint16_t dim_im_out,
+                        q7_t *bufferA,
+                        q7_t *Im_out);
+
+/**
+ * @brief s8 average pooling function.
+ *
+ * @param[in, out] ctx          Function context (e.g. temporary buffer). Check the function
+ *                              definition file to see if an additional buffer is required.
+ *                              Optional function {API}_get_buffer_size() provides the buffer
+ *                              size if an additional buffer is required.
+ *                              The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      pool_params  Pooling parameters
+ * @param[in]      input_dims   Input (activation) tensor dimensions. Format: [H, W, C_IN]
+ *                              Argument 'N' is not used.
+ * @param[in]      input_data   Input (activation) data pointer. Data type: int8
+ * @param[in]      filter_dims  Filter tensor dimensions. Format: [H, W]
+ *                              Argument N and C are not used.
+ * @param[in]      output_dims  Output tensor dimensions. Format: [H, W, C_OUT]
+ *                              Argument N is not used.
+ *                              C_OUT equals C_IN.
+ * @param[in, out] output_data Output data pointer. Data type: int8
+ * @return                     The function returns
+ *                             <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
+ *
+ * @details
+ *    - Supported Framework: TensorFlow Lite
+ *
+ */
+arm_cmsis_nn_status arm_avgpool_s8(const cmsis_nn_context *ctx,
+                                   const cmsis_nn_pool_params *pool_params,
+                                   const cmsis_nn_dims *input_dims,
+                                   const q7_t *input_data,
+                                   const cmsis_nn_dims *filter_dims,
+                                   const cmsis_nn_dims *output_dims,
+                                   q7_t *output_data);
+
+/**
+ * @brief Get the required buffer size for S8 average pooling function
+ * @param[in]       dim_dst_width         output tensor dimension
+ * @param[in]       ch_src                number of input tensor channels
+ * @return          The function returns  required buffer size in bytes
+ *
+ */
+int32_t arm_avgpool_s8_get_buffer_size(const int dim_dst_width, const int ch_src);
+
+/**
+ * @brief s16 average pooling function.
+ *
+ * @param[in, out] ctx          Function context (e.g. temporary buffer). Check the function
+ *                              definition file to see if an additional buffer is required.
+ *                              Optional function {API}_get_buffer_size() provides the buffer
+ *                              size if an additional buffer is required.
+ *                              The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      pool_params  Pooling parameters
+ * @param[in]      input_dims   Input (activation) tensor dimensions. Format: [H, W, C_IN]
+ *                              Argument 'N' is not used.
+ * @param[in]      input_data   Input (activation) data pointer. Data type: int16
+ * @param[in]      filter_dims  Filter tensor dimensions. Format: [H, W]
+ *                              Argument N and C are not used.
+ * @param[in]      output_dims  Output tensor dimensions. Format: [H, W, C_OUT]
+ *                              Argument N is not used.
+ *                              C_OUT equals C_IN.
+ * @param[in, out] output_data  Output data pointer. Data type: int16
+ * @return                        The function returns
+ *                                    <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
+ *                                    <code>ARM_CMSIS_NN_ARG_ERROR</code> - In case of invalid arguments
+ *
+ * @details
+ *    - Supported Framework: TensorFlow Lite
+ *
+ */
+arm_cmsis_nn_status arm_avgpool_s16(const cmsis_nn_context *ctx,
+                                    const cmsis_nn_pool_params *pool_params,
+                                    const cmsis_nn_dims *input_dims,
+                                    const int16_t *input_data,
+                                    const cmsis_nn_dims *filter_dims,
+                                    const cmsis_nn_dims *output_dims,
+                                    int16_t *output_data);
+
+/**
+ * @brief Get the required buffer size for S16 average pooling function
+ * @param[in]       dim_dst_width         output tensor dimension
+ * @param[in]       ch_src                number of input tensor channels
+ * @return          The function returns  required buffer size in bytes
+ *
+ */
+int32_t arm_avgpool_s16_get_buffer_size(const int dim_dst_width, const int ch_src);
+
+/**
+ * @brief s8 max pooling function.
+ *
+ * @param[in, out] ctx          Function context (e.g. temporary buffer). Check the function
+ *                              definition file to see if an additional buffer is required.
+ *                              Optional function {API}_get_buffer_size() provides the buffer
+ *                              size if an additional buffer is required.
+ *                              The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      pool_params  Pooling parameters
+ * @param[in]      input_dims   Input (activation) tensor dimensions. Format: [H, W, C_IN]
+ *                              Argument 'N' is not used.
+ * @param[in]      input_data   Input (activation) data pointer. The input tensor must not
+ *                              overlap with the output tensor. Data type: int8
+ * @param[in]      filter_dims  Filter tensor dimensions. Format: [H, W]
+ *                              Argument N and C are not used.
+ * @param[in]      output_dims  Output tensor dimensions. Format: [H, W, C_OUT]
+ *                              Argument N is not used.
+ *                              C_OUT equals C_IN.
+ * @param[in, out] output_data    Output data pointer. Data type: int8
+ * @return                        The function returns
+ *                                    <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
+ *
+ * @details
+ *    - Supported Framework: TensorFlow Lite
+ *
+ */
+arm_cmsis_nn_status arm_max_pool_s8(const cmsis_nn_context *ctx,
+                                    const cmsis_nn_pool_params *pool_params,
+                                    const cmsis_nn_dims *input_dims,
+                                    const q7_t *input_data,
+                                    const cmsis_nn_dims *filter_dims,
+                                    const cmsis_nn_dims *output_dims,
+                                    q7_t *output_data);
+
+/**
+ * @brief s16 max pooling function.
+ *
+ * @param[in, out] ctx          Function context (e.g. temporary buffer). Check the function
+ *                              definition file to see if an additional buffer is required.
+ *                              Optional function {API}_get_buffer_size() provides the buffer
+ *                              size if an additional buffer is required.
+ *                              The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]      pool_params  Pooling parameters
+ * @param[in]      input_dims   Input (activation) tensor dimensions. Format: [H, W, C_IN]
+ *                              Argument 'N' is not used.
+ * @param[in]      src          Input (activation) data pointer. The input tensor must not
+ *                              overlap with the output tensor. Data type: int16
+ * @param[in]      filter_dims  Filter tensor dimensions. Format: [H, W]
+ *                              Argument N and C are not used.
+ * @param[in]      output_dims  Output tensor dimensions. Format: [H, W, C_OUT]
+ *                              Argument N is not used.
+ *                              C_OUT equals C_IN.
+ * @param[in, out] dst          Output data pointer. Data type: int16
+ * @return                        The function returns
+ *                                    <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
+ *
+ * @details
+ *    - Supported Framework: TensorFlow Lite
+ *
+ */
+arm_cmsis_nn_status arm_max_pool_s16(const cmsis_nn_context *ctx,
+                                     const cmsis_nn_pool_params *pool_params,
+                                     const cmsis_nn_dims *input_dims,
+                                     const int16_t *src,
+                                     const cmsis_nn_dims *filter_dims,
+                                     const cmsis_nn_dims *output_dims,
+                                     int16_t *dst);
+
+/**
+ * @defgroup Softmax Softmax Functions
+ *
+ * EXP(2) based softmax functions.
+ *
+ */
+
+/**
+ * @brief Q7 softmax function
+ * @param[in]       vec_in      pointer to input vector
+ * @param[in]       dim_vec     input vector dimension
+ * @param[out]      p_out       pointer to output vector
+ *
+ * @note This function is an optimized version which is not bit-accurate with
+ *       TensorFlow Lite's kernel
+ *
+ */
+
+void arm_softmax_q7(const q7_t *vec_in, const uint16_t dim_vec, q7_t *p_out);
+
+/**
+ * @brief Q7 softmax function with batch parameter
+ * @param[in]       vec_in      pointer to input vector
+ * @param[in]       nb_batches  number of batches
+ * @param[in]       dim_vec     input vector dimension
+ * @param[out]      p_out       pointer to output vector
+ *
+ * @note This function is an optimized version which is not bit-accurate with
+ *       TensorFlow Lite's kernel
+ *
+ */
+
+void arm_softmax_with_batch_q7(const q7_t *vec_in, const uint16_t nb_batches, const uint16_t dim_vec, q7_t *p_out);
+/**
+ * @brief Q15 softmax function
+ * @param[in]       vec_in      pointer to input vector
+ * @param[in]       dim_vec     input vector dimension
+ * @param[out]      p_out       pointer to output vector
+ *
+ * @note This function is an optimized version which is not bit-accurate with
+ *       TensorFlow Lite's kernel
+ *
+ */
+
+void arm_softmax_q15(const q15_t *vec_in, const uint16_t dim_vec, q15_t *p_out);
+
+/**
+ * @brief S8 softmax function
+ * @param[in]  input     Pointer to the input tensor
+ * @param[in]  num_rows  Number of rows in the input tensor
+ * @param[in]  row_size  Number of elements in each input row
+ * @param[in]  mult      Input quantization multiplier
+ * @param[in]  shift     Input quantization shift within the range [0, 31]
+ * @param[in]  diff_min  Minimum difference with max in row. Used to check if
+ *                       the quantized exponential operation can be performed
+ * @param[out] output    Pointer to the output tensor
+ *
+ * @note Supported framework: TensorFlow Lite micro (bit-accurate)
+ *
+ */
+void arm_softmax_s8(const int8_t *input,
+                    const int32_t num_rows,
+                    const int32_t row_size,
+                    const int32_t mult,
+                    const int32_t shift,
+                    const int32_t diff_min,
+                    int8_t *output);
+
+/**
+ * @brief S8 to s16 softmax function
+ * @param[in]  input     Pointer to the input tensor
+ * @param[in]  num_rows  Number of rows in the input tensor
+ * @param[in]  row_size  Number of elements in each input row
+ * @param[in]  mult      Input quantization multiplier
+ * @param[in]  shift     Input quantization shift within the range [0, 31]
+ * @param[in]  diff_min  Minimum difference with max in row. Used to check if
+ *                       the quantized exponential operation can be performed
+ * @param[out] output    Pointer to the output tensor
+ *
+ * @note Supported framework: TensorFlow Lite micro (bit-accurate)
+ *
+ */
+void arm_softmax_s8_s16(const int8_t *input,
                         const int32_t num_rows,
                         const int32_t row_size,
                         const int32_t mult,
                         const int32_t shift,
                         const int32_t diff_min,
-                        uint8_t *output);
-
-    /**
-     * @brief uint8 depthwise convolution function with asymmetric quantization
-     *        Unless specified otherwise, arguments are mandatory.
-     *
-     * @param[in]     input     Pointer to input tensor
-     * @param[in]     input_x   Width of input tensor
-     * @param[in]     input_y   Height of input tensor
-     * @param[in]     input_ch  Channels in input tensor
-     * @param[in]     kernel    Pointer to kernel weights
-     * @param[in]     kernel_x  Width of kernel
-     * @param[in]     kernel_y  Height of kernel
-     * @param[in]     ch_mult   Number of channel multiplier
-     * @param[in]     pad_x     Padding sizes x
-     * @param[in]     pad_y     Padding sizes y
-     * @param[in]     stride_x  stride along the width
-     * @param[in]     stride_y  stride along the height
-     * @param[in]     dilation_x Dilation along width. Not used and intended for future enhancement.
-     * @param[in]     dilation_y Dilation along height. Not used and intended for future enhancement.
-     * @param[in]     bias       Pointer to optional bias values. If no bias is
-     *                           availble, NULL is expected
-     * @param[in]     input_offset  Input tensor zero offset
-     * @param[in]     filter_offset Kernel tensor zero offset
-     * @param[in]     output_offset Output tensor zero offset
-     * @param[in,out] output        Pointer to output tensor
-     * @param[in]     output_x  Width of output tensor
-     * @param[in]     output_y  Height of output tensor
-     * @param[in]     output_activation_min   Minimum value to clamp the output to. Range : {0, 255}
-     * @param[in]     output_activation_max   Minimum value to clamp the output to. Range : {0, 255}
-     * @param[in]     out_shift  Amount of right-shift for output
-     * @param[in]     out_mult   Output multiplier for requantization
-     * @return        The function returns the following
-     *                <code>ARM_MATH_SUCCESS</code> - Successful operation
-     *
-     */
-    arm_status arm_depthwise_conv_u8_basic_ver1(const uint8_t *input,
-                                                const uint16_t input_x,
-                                                const uint16_t input_y,
-                                                const uint16_t input_ch,
-                                                const uint8_t *kernel,
-                                                const uint16_t kernel_x,
-                                                const uint16_t kernel_y,
-                                                const int16_t ch_mult,
-                                                const int16_t pad_x,
-                                                const int16_t pad_y,
-                                                const int16_t stride_x,
-                                                const int16_t stride_y,
-                                                const int16_t dilation_x,
-                                                const int16_t dilation_y,
-                                                const int32_t *bias,
-                                                const int32_t input_offset,
-                                                const int32_t filter_offset,
-                                                const int32_t output_offset,
-                                                uint8_t *output,
-                                                const uint16_t output_x,
-                                                const uint16_t output_y,
-                                                const int32_t output_activation_min,
-                                                const int32_t output_activation_max,
-                                                const int32_t out_shift,
-                                                const int32_t out_mult);
-
-    /**
-     * @defgroup Reshape Reshape Functions
-     *
-     */
-
-    /**
-     * @brief Reshape a s8 vector into another with different shape
-     * @param[in]  input      points to the s8 input vector
-     * @param[out] output     points to the s8 output vector
-     * @param[in]  total_size total size of the input and output vectors in bytes
-     *
-     * @note The output is expected to be in a memory area that does not overlap with the input's
-     *
-     */
-    void arm_reshape_s8(const int8_t *input, int8_t *output, const uint32_t total_size);
-
-    /**
-     * @defgroup Concatenation Concatenation Functions
-     *
-     */
-
-    /**
-     * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the X axis
-     *        This function should be called for each input tensor to concatenate. The argument offset_x
-     *        will be used to store the input tensor in the correct position in the output tensor
-     *
-     *        i.e.    offset_x = 0
-     *                for(i = 0 i < num_input_tensors; ++i)
-     *                {
-     *                    arm_concatenation_s8_x(&input[i], ..., &output, ..., ..., offset_x)
-     *                    offset_x += input_x[i]
-     *                }
-     *
-     *        This function assumes that the output tensor has:
-     *        -# The same height of the input tensor
-     *        -# The same number of channels of the input tensor
-     *        -# The same batch size of the input tensor
-     *
-     *        Unless specified otherwise, arguments are mandatory.
-     *
-     * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
-     *      does not involve any arithmetic operation
-     *
-     * @param[in]  input    Pointer to input tensor
-     * @param[in]  input_x  Width of input tensor
-     * @param[in]  input_y  Height of input tensor
-     * @param[in]  input_z  Channels in input tensor
-     * @param[in]  input_w  Batch size in input tensor
-     * @param[out] output   Pointer to output tensor
-     * @param[in]  output_x Width of output tensor
-     * @param[in]  offset_x The offset (in number of elements) on the X axis to start concatenating the input tensor
-     *                      It is user responsibility to provide the correct value
-     *
-     * <b> Input constraints</b>
-     * offset_x is less than output_x
-     *
-     */
-    void arm_concatenation_s8_x(const int8_t *input,
-                                const uint16_t input_x,
-                                const uint16_t input_y,
-                                const uint16_t input_z,
-                                const uint16_t input_w,
-                                int8_t *output,
-                                const uint16_t output_x,
-                                const uint32_t offset_x);
-
-    /**
-     * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Y axis
-     *        This function should be called for each input tensor to concatenate. The argument offset_y
-     *        will be used to store the input tensor in the correct position in the output tensor
-     *
-     *        i.e.    offset_y = 0
-     *                for(i = 0 i < num_input_tensors; ++i)
-     *                {
-     *                    arm_concatenation_s8_y(&input[i], ..., &output, ..., ..., offset_y)
-     *                    offset_y += input_y[i]
-     *                }
-     *
-     *        This function assumes that the output tensor has:
-     *        -# The same width of the input tensor
-     *        -# The same number of channels of the input tensor
-     *        -# The same batch size of the input tensor
-     *
-     *        Unless specified otherwise, arguments are mandatory.
-     *
-     * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
-     *       does not involve any arithmetic operation
-     *
-     * @param[in]  input    Pointer to input tensor
-     * @param[in]  input_x  Width of input tensor
-     * @param[in]  input_y  Height of input tensor
-     * @param[in]  input_z  Channels in input tensor
-     * @param[in]  input_w  Batch size in input tensor
-     * @param[out] output   Pointer to output tensor
-     * @param[in]  output_y Height of output tensor
-     * @param[in]  offset_y The offset on the Y axis to start concatenating the input tensor
-     *                      It is user responsibility to provide the correct value
-     *
-     * <b> Input constraints</b>
-     * offset_y is less than output_y
-     *
-     */
-    void arm_concatenation_s8_y(const int8_t *input,
-                                const uint16_t input_x,
-                                const uint16_t input_y,
-                                const uint16_t input_z,
-                                const uint16_t input_w,
-                                int8_t *output,
-                                const uint16_t output_y,
-                                const uint32_t offset_y);
-
-    /**
-     * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Z axis
-     *        This function should be called for each input tensor to concatenate. The argument offset_z
-     *        will be used to store the input tensor in the correct position in the output tensor
-     *
-     *        i.e.    offset_z = 0
-     *                for(i = 0 i < num_input_tensors; ++i)
-     *                {
-     *                    arm_concatenation_s8_z(&input[i], ..., &output, ..., ..., offset_z)
-     *                    offset_z += input_z[i]
-     *                }
-     *
-     *        This function assumes that the output tensor has:
-     *        -# The same width of the input tensor
-     *        -# The same height of the input tensor
-     *        -# The same batch size of the input tensor
-     *
-     *        Unless specified otherwise, arguments are mandatory.
-     *
-     * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
-     *       does not involve any arithmetic operation
-     *
-     * @param[in]  input    Pointer to input tensor
-     * @param[in]  input_x  Width of input tensor
-     * @param[in]  input_y  Height of input tensor
-     * @param[in]  input_z  Channels in input tensor
-     * @param[in]  input_w  Batch size in input tensor
-     * @param[out] output   Pointer to output tensor
-     * @param[in]  output_z Channels in output tensor
-     * @param[in]  offset_z The offset on the Z axis to start concatenating the input tensor
-     *                      It is user responsibility to provide the correct value
-     *
-     * <b> Input constraints</b>
-     * offset_z is less than output_z
-     *
-     */
-    void arm_concatenation_s8_z(const int8_t *input,
-                                const uint16_t input_x,
-                                const uint16_t input_y,
-                                const uint16_t input_z,
-                                const uint16_t input_w,
-                                int8_t *output,
-                                const uint16_t output_z,
-                                const uint32_t offset_z);
-
-    /**
-     * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the W axis (Batch size)
-     *        This function should be called for each input tensor to concatenate. The argument offset_w
-     *        will be used to store the input tensor in the correct position in the output tensor
-     *
-     *        i.e.    offset_w = 0
-     *                for(i = 0 i < num_input_tensors; ++i)
-     *                {
-     *                    arm_concatenation_s8_w(&input[i], ..., &output, ..., ..., offset_w)
-     *                    offset_w += input_w[i]
-     *                }
-     *
-     *        This function assumes that the output tensor has:
-     *        -# The same width of the input tensor
-     *        -# The same height of the input tensor
-     *        -# The same number o channels of the input tensor
-     *
-     *        Unless specified otherwise, arguments are mandatory.
-     *
-     * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
-     *       does not involve any arithmetic operation
-     *
-     * @param[in]  input    Pointer to input tensor
-     * @param[in]  input_x  Width of input tensor
-     * @param[in]  input_y  Height of input tensor
-     * @param[in]  input_z  Channels in input tensor
-     * @param[in]  input_w  Batch size in input tensor
-     * @param[out] output   Pointer to output tensor
-     * @param[in]  offset_w The offset on the W axis to start concatenating the input tensor
-     *                      It is user responsibility to provide the correct value
-     *
-     */
-    void arm_concatenation_s8_w(const int8_t *input,
-                                const uint16_t input_x,
-                                const uint16_t input_y,
-                                const uint16_t input_z,
-                                const uint16_t input_w,
-                                int8_t *output,
-                                const uint32_t offset_w);
-    /**
-     * @defgroup SVDF SVDF Layer Functions
-     *
-     */
-
-    /**
-     * @brief s8 SVDF function
-     *
-     * @param[in]   input_ctx Temporary scratch buffer
-     * @param[in]   output_ctx Temporary output scratch buffer
-     * @param[in]   svdf_params SVDF Parameters
-     *              Range of svdf_params->input_offset  : [-128, 127]
-     *              Range of svdf_params->output_offset  : [-128, 127]
-     * @param[in]   input_quant_params Input quantization parameters
-     * @param[in]   output_quant_params Output quantization parameters
-     * @param[in]   input_dims Input tensor dimensions
-     * @param[in]   input_data Pointer to input tensor
-     * @param[in]   state_dims State tensor dimensions
-     * @param[in]   state_data Pointer to state tensor
-     * @param[in]   weights_feature_dims Weights (feature) tensor dimensions
-     * @param[in]   weights_feature_data Pointer to the weights (feature) tensor
-     * @param[in]   weights_time_dims Weights (time) tensor dimensions
-     * @param[in]   weights_time_data Pointer to the weights (time) tensor
-     * @param[in]   bias_dims Bias tensor dimensions
-     * @param[in]   bias_data Pointer to bias tensor
-     * @param[in]   output_dims Output tensor dimensions
-     * @param[out]  output_data Pointer to the output tensor
-     *
-     * @return     The function returns <code>ARM_MATH_SUCCESS</code>
-     *
-     * @details
-     *    1. Supported framework: TensorFlow Lite micro
-     *    2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
-     *
-     */
-    arm_status arm_svdf_s8(const cmsis_nn_context *input_ctx,
-                           const cmsis_nn_context *output_ctx,
-                           const cmsis_nn_svdf_params *svdf_params,
-                           const cmsis_nn_per_tensor_quant_params *input_quant_params,
-                           const cmsis_nn_per_tensor_quant_params *output_quant_params,
-                           const cmsis_nn_dims *input_dims,
-                           const q7_t *input_data,
-                           const cmsis_nn_dims *state_dims,
-                           q15_t *state_data,
-                           const cmsis_nn_dims *weights_feature_dims,
-                           const q7_t *weights_feature_data,
-                           const cmsis_nn_dims *weights_time_dims,
-                           const q15_t *weights_time_data,
-                           const cmsis_nn_dims *bias_dims,
-                           const q31_t *bias_data,
-                           const cmsis_nn_dims *output_dims,
-                           q7_t *output_data);
+                        int16_t *output);
+
+/**
+ * @brief S16 softmax function
+ * @param[in]  input           Pointer to the input tensor
+ * @param[in]  num_rows        Number of rows in the input tensor
+ * @param[in]  row_size        Number of elements in each input row
+ * @param[in]  mult            Input quantization multiplier
+ * @param[in]  shift           Input quantization shift within the range [0, 31]
+ * @param[in]  softmax_params  Softmax s16 layer parameters with two pointers to LUTs speficied below.
+ *                             For indexing the high 9 bits are used and 7 remaining for interpolation.
+ *                             That means 512 entries for the 9-bit indexing and 1 extra for interpolation, i.e. 513
+ *                             values for each LUT.
+ *                             - Lookup table for exp(x), where x uniform distributed between [-10.0 , 0.0]
+ *                             - Lookup table for 1 / (1 + x), where x uniform distributed between [0.0 , 1.0]
+ * @param[out] output          Pointer to the output tensor
+ * @return                        The function returns
+ *                                    <code>ARM_CMSIS_NN_ARG_ERROR</code> Argument error check failed
+ *                                    <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
+ *
+ * @note Supported framework: TensorFlow Lite micro (bit-accurate)
+ *
+ */
+arm_cmsis_nn_status arm_softmax_s16(const int16_t *input,
+                                    const int32_t num_rows,
+                                    const int32_t row_size,
+                                    const int32_t mult,
+                                    const int32_t shift,
+                                    const cmsis_nn_softmax_lut_s16 *softmax_params,
+                                    int16_t *output);
+
+/**
+ * @brief U8 softmax function
+ * @param[in]  input     Pointer to the input tensor
+ * @param[in]  num_rows  Number of rows in the input tensor
+ * @param[in]  row_size  Number of elements in each input row
+ * @param[in]  mult      Input quantization multiplier
+ * @param[in]  shift     Input quantization shift within the range [0, 31]
+ * @param[in]  diff_min  Minimum difference with max in row. Used to check if
+ *                       the quantized exponential operation can be performed
+ * @param[out] output    Pointer to the output tensor
+ *
+ * @note Supported framework: TensorFlow Lite micro (bit-accurate)
+ *
+ */
+
+void arm_softmax_u8(const uint8_t *input,
+                    const int32_t num_rows,
+                    const int32_t row_size,
+                    const int32_t mult,
+                    const int32_t shift,
+                    const int32_t diff_min,
+                    uint8_t *output);
+
+/**
+ * @brief uint8 depthwise convolution function with asymmetric quantization
+ *        Unless specified otherwise, arguments are mandatory.
+ *
+ * @param[in]     input     Pointer to input tensor
+ * @param[in]     input_x   Width of input tensor
+ * @param[in]     input_y   Height of input tensor
+ * @param[in]     input_ch  Channels in input tensor
+ * @param[in]     kernel    Pointer to kernel weights
+ * @param[in]     kernel_x  Width of kernel
+ * @param[in]     kernel_y  Height of kernel
+ * @param[in]     ch_mult   Number of channel multiplier
+ * @param[in]     pad_x     Padding sizes x
+ * @param[in]     pad_y     Padding sizes y
+ * @param[in]     stride_x  stride along the width
+ * @param[in]     stride_y  stride along the height
+ * @param[in]     dilation_x Dilation along width. Not used and intended for future enhancement.
+ * @param[in]     dilation_y Dilation along height. Not used and intended for future enhancement.
+ * @param[in]     bias       Pointer to optional bias values. If no bias is
+ *                           availble, NULL is expected
+ * @param[in]     input_offset  Input tensor zero offset
+ * @param[in]     filter_offset Kernel tensor zero offset
+ * @param[in]     output_offset Output tensor zero offset
+ * @param[in,out] output        Pointer to output tensor
+ * @param[in]     output_x  Width of output tensor
+ * @param[in]     output_y  Height of output tensor
+ * @param[in]     output_activation_min   Minimum value to clamp the output to. Range : {0, 255}
+ * @param[in]     output_activation_max   Minimum value to clamp the output to. Range : {0, 255}
+ * @param[in]     out_shift  Amount of right-shift for output
+ * @param[in]     out_mult   Output multiplier for requantization
+ * @return        The function returns the following
+ *                <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
+ *
+ */
+arm_cmsis_nn_status arm_depthwise_conv_u8_basic_ver1(const uint8_t *input,
+                                                     const uint16_t input_x,
+                                                     const uint16_t input_y,
+                                                     const uint16_t input_ch,
+                                                     const uint8_t *kernel,
+                                                     const uint16_t kernel_x,
+                                                     const uint16_t kernel_y,
+                                                     const int16_t ch_mult,
+                                                     const int16_t pad_x,
+                                                     const int16_t pad_y,
+                                                     const int16_t stride_x,
+                                                     const int16_t stride_y,
+                                                     const int16_t dilation_x,
+                                                     const int16_t dilation_y,
+                                                     const int32_t *bias,
+                                                     const int32_t input_offset,
+                                                     const int32_t filter_offset,
+                                                     const int32_t output_offset,
+                                                     uint8_t *output,
+                                                     const uint16_t output_x,
+                                                     const uint16_t output_y,
+                                                     const int32_t output_activation_min,
+                                                     const int32_t output_activation_max,
+                                                     const int32_t out_shift,
+                                                     const int32_t out_mult);
+
+/**
+ * @defgroup Reshape Reshape Functions
+ *
+ */
+
+/**
+ * @brief Reshape a s8 vector into another with different shape
+ * @param[in]  input      points to the s8 input vector
+ * @param[out] output     points to the s8 output vector
+ * @param[in]  total_size total size of the input and output vectors in bytes
+ *
+ * @note The output is expected to be in a memory area that does not overlap with the input's
+ *
+ */
+void arm_reshape_s8(const int8_t *input, int8_t *output, const uint32_t total_size);
+
+/**
+ * @defgroup Concatenation Concatenation Functions
+ *
+ */
+
+/**
+ * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the X axis
+ *        This function should be called for each input tensor to concatenate. The argument offset_x
+ *        will be used to store the input tensor in the correct position in the output tensor
+ *
+ *        i.e.    offset_x = 0
+ *                for(i = 0 i < num_input_tensors; ++i)
+ *                {
+ *                    arm_concatenation_s8_x(&input[i], ..., &output, ..., ..., offset_x)
+ *                    offset_x += input_x[i]
+ *                }
+ *
+ *        This function assumes that the output tensor has:
+ *        -# The same height of the input tensor
+ *        -# The same number of channels of the input tensor
+ *        -# The same batch size of the input tensor
+ *
+ *        Unless specified otherwise, arguments are mandatory.
+ *
+ * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
+ *      does not involve any arithmetic operation
+ *
+ * @param[in]  input    Pointer to input tensor. Input tensor must not overlap with the output tensor.
+ * @param[in]  input_x  Width of input tensor
+ * @param[in]  input_y  Height of input tensor
+ * @param[in]  input_z  Channels in input tensor
+ * @param[in]  input_w  Batch size in input tensor
+ * @param[out] output   Pointer to output tensor. Expected to be at least
+ *                          (input_x * input_y * input_z * input_w) + offset_x
+ *                      bytes.
+ * @param[in]  output_x Width of output tensor
+ * @param[in]  offset_x The offset (in number of elements) on the X axis to start concatenating the input tensor
+ *                      It is user responsibility to provide the correct value
+ *
+ * <b> Input constraints</b>
+ * offset_x is less than output_x
+ *
+ */
+void arm_concatenation_s8_x(const int8_t *input,
+                            const uint16_t input_x,
+                            const uint16_t input_y,
+                            const uint16_t input_z,
+                            const uint16_t input_w,
+                            int8_t *output,
+                            const uint16_t output_x,
+                            const uint32_t offset_x);
+
+/**
+ * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Y axis
+ *        This function should be called for each input tensor to concatenate. The argument offset_y
+ *        will be used to store the input tensor in the correct position in the output tensor
+ *
+ *        i.e.    offset_y = 0
+ *                for(i = 0 i < num_input_tensors; ++i)
+ *                {
+ *                    arm_concatenation_s8_y(&input[i], ..., &output, ..., ..., offset_y)
+ *                    offset_y += input_y[i]
+ *                }
+ *
+ *        This function assumes that the output tensor has:
+ *        -# The same width of the input tensor
+ *        -# The same number of channels of the input tensor
+ *        -# The same batch size of the input tensor
+ *
+ *        Unless specified otherwise, arguments are mandatory.
+ *
+ * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
+ *       does not involve any arithmetic operation
+ *
+ * @param[in]  input    Pointer to input tensor. Input tensor must not overlap with the output tensor.
+ * @param[in]  input_x  Width of input tensor
+ * @param[in]  input_y  Height of input tensor
+ * @param[in]  input_z  Channels in input tensor
+ * @param[in]  input_w  Batch size in input tensor
+ * @param[out] output   Pointer to output tensor. Expected to be at least
+ *                          (input_z * input_w * input_x * input_y) + offset_y
+ *                      bytes.
+ * @param[in]  output_y Height of output tensor
+ * @param[in]  offset_y The offset on the Y axis to start concatenating the input tensor
+ *                      It is user responsibility to provide the correct value
+ *
+ * <b> Input constraints</b>
+ * offset_y is less than output_y
+ *
+ */
+void arm_concatenation_s8_y(const int8_t *input,
+                            const uint16_t input_x,
+                            const uint16_t input_y,
+                            const uint16_t input_z,
+                            const uint16_t input_w,
+                            int8_t *output,
+                            const uint16_t output_y,
+                            const uint32_t offset_y);
+
+/**
+ * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Z axis
+ *        This function should be called for each input tensor to concatenate. The argument offset_z
+ *        will be used to store the input tensor in the correct position in the output tensor
+ *
+ *        i.e.    offset_z = 0
+ *                for(i = 0 i < num_input_tensors; ++i)
+ *                {
+ *                    arm_concatenation_s8_z(&input[i], ..., &output, ..., ..., offset_z)
+ *                    offset_z += input_z[i]
+ *                }
+ *
+ *        This function assumes that the output tensor has:
+ *        -# The same width of the input tensor
+ *        -# The same height of the input tensor
+ *        -# The same batch size of the input tensor
+ *
+ *        Unless specified otherwise, arguments are mandatory.
+ *
+ * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
+ *       does not involve any arithmetic operation
+ *
+ * @param[in]  input    Pointer to input tensor. Input tensor must not overlap with output tensor.
+ * @param[in]  input_x  Width of input tensor
+ * @param[in]  input_y  Height of input tensor
+ * @param[in]  input_z  Channels in input tensor
+ * @param[in]  input_w  Batch size in input tensor
+ * @param[out] output   Pointer to output tensor. Expected to be at least
+ *                          (input_x * input_y * input_z * input_w) + offset_z
+ *                      bytes.
+ * @param[in]  output_z Channels in output tensor
+ * @param[in]  offset_z The offset on the Z axis to start concatenating the input tensor
+ *                      It is user responsibility to provide the correct value
+ *
+ * <b> Input constraints</b>
+ * offset_z is less than output_z
+ *
+ */
+void arm_concatenation_s8_z(const int8_t *input,
+                            const uint16_t input_x,
+                            const uint16_t input_y,
+                            const uint16_t input_z,
+                            const uint16_t input_w,
+                            int8_t *output,
+                            const uint16_t output_z,
+                            const uint32_t offset_z);
+
+/**
+ * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the W axis (Batch size)
+ *        This function should be called for each input tensor to concatenate. The argument offset_w
+ *        will be used to store the input tensor in the correct position in the output tensor
+ *
+ *        i.e.    offset_w = 0
+ *                for(i = 0 i < num_input_tensors; ++i)
+ *                {
+ *                    arm_concatenation_s8_w(&input[i], ..., &output, ..., ..., offset_w)
+ *                    offset_w += input_w[i]
+ *                }
+ *
+ *        This function assumes that the output tensor has:
+ *        -# The same width of the input tensor
+ *        -# The same height of the input tensor
+ *        -# The same number o channels of the input tensor
+ *
+ *        Unless specified otherwise, arguments are mandatory.
+ *
+ * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it
+ *       does not involve any arithmetic operation
+ *
+ * @param[in]  input    Pointer to input tensor
+ * @param[in]  input_x  Width of input tensor
+ * @param[in]  input_y  Height of input tensor
+ * @param[in]  input_z  Channels in input tensor
+ * @param[in]  input_w  Batch size in input tensor
+ * @param[out] output   Pointer to output tensor. Expected to be at least
+ *                          input_x * input_y * input_z * input_w
+ *                      bytes.
+ * @param[in]  offset_w The offset on the W axis to start concatenating the input tensor
+ *                      It is user responsibility to provide the correct value
+ *
+ */
+void arm_concatenation_s8_w(const int8_t *input,
+                            const uint16_t input_x,
+                            const uint16_t input_y,
+                            const uint16_t input_z,
+                            const uint16_t input_w,
+                            int8_t *output,
+                            const uint32_t offset_w);
+/**
+ * @defgroup SVDF SVDF Layer Functions
+ *
+ */
+
+/**
+ * @brief s8 SVDF function with 8 bit state tensor and 8 bit time weights
+ *
+ * @param[in]   input_ctx             Temporary scratch buffer
+ *                                    The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]   output_ctx            Temporary output scratch buffer
+ *                                    The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]   svdf_params           SVDF Parameters
+ *                                    Range of svdf_params->input_offset  : [-128, 127]
+ *                                    Range of svdf_params->output_offset  : [-128, 127]
+ * @param[in]   input_quant_params    Input quantization parameters
+ * @param[in]   output_quant_params   Output quantization parameters
+ * @param[in]   input_dims            Input tensor dimensions
+ * @param[in]   input_data            Pointer to input tensor
+ * @param[in]   state_dims            State tensor dimensions
+ * @param[in]   state_data            Pointer to state tensor
+ * @param[in]   weights_feature_dims  Weights (feature) tensor dimensions
+ * @param[in]   weights_feature_data  Pointer to the weights (feature) tensor
+ * @param[in]   weights_time_dims     Weights (time) tensor dimensions
+ * @param[in]   weights_time_data     Pointer to the weights (time) tensor
+ * @param[in]   bias_dims             Bias tensor dimensions
+ * @param[in]   bias_data             Pointer to bias tensor
+ * @param[in]   output_dims           Output tensor dimensions
+ * @param[out]  output_data           Pointer to the output tensor
+ *
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ * @details
+ *    1. Supported framework: TensorFlow Lite micro
+ *    2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ *
+ */
+arm_cmsis_nn_status arm_svdf_s8(const cmsis_nn_context *input_ctx,
+                                const cmsis_nn_context *output_ctx,
+                                const cmsis_nn_svdf_params *svdf_params,
+                                const cmsis_nn_per_tensor_quant_params *input_quant_params,
+                                const cmsis_nn_per_tensor_quant_params *output_quant_params,
+                                const cmsis_nn_dims *input_dims,
+                                const q7_t *input_data,
+                                const cmsis_nn_dims *state_dims,
+                                q7_t *state_data,
+                                const cmsis_nn_dims *weights_feature_dims,
+                                const q7_t *weights_feature_data,
+                                const cmsis_nn_dims *weights_time_dims,
+                                const q7_t *weights_time_data,
+                                const cmsis_nn_dims *bias_dims,
+                                const q31_t *bias_data,
+                                const cmsis_nn_dims *output_dims,
+                                q7_t *output_data);
+
+/**
+ * @brief s8 SVDF function with 16 bit state tensor and 16 bit time weights
+ *
+ * @param[in]   input_ctx             Temporary scratch buffer
+ *                                    The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]   output_ctx            Temporary output scratch buffer
+ *                                    The caller is expected to clear the buffer ,if applicable, for security reasons.
+ * @param[in]   svdf_params           SVDF Parameters
+ *                                    Range of svdf_params->input_offset  : [-128, 127]
+ *                                    Range of svdf_params->output_offset  : [-128, 127]
+ * @param[in]   input_quant_params    Input quantization parameters
+ * @param[in]   output_quant_params   Output quantization parameters
+ * @param[in]   input_dims            Input tensor dimensions
+ * @param[in]   input_data            Pointer to input tensor
+ * @param[in]   state_dims            State tensor dimensions
+ * @param[in]   state_data            Pointer to state tensor
+ * @param[in]   weights_feature_dims  Weights (feature) tensor dimensions
+ * @param[in]   weights_feature_data  Pointer to the weights (feature) tensor
+ * @param[in]   weights_time_dims     Weights (time) tensor dimensions
+ * @param[in]   weights_time_data     Pointer to the weights (time) tensor
+ * @param[in]   bias_dims             Bias tensor dimensions
+ * @param[in]   bias_data             Pointer to bias tensor
+ * @param[in]   output_dims           Output tensor dimensions
+ * @param[out]  output_data           Pointer to the output tensor
+ *
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ * @details
+ *    1. Supported framework: TensorFlow Lite micro
+ *    2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs.
+ *
+ */
+arm_cmsis_nn_status arm_svdf_state_s16_s8(const cmsis_nn_context *input_ctx,
+                                          const cmsis_nn_context *output_ctx,
+                                          const cmsis_nn_svdf_params *svdf_params,
+                                          const cmsis_nn_per_tensor_quant_params *input_quant_params,
+                                          const cmsis_nn_per_tensor_quant_params *output_quant_params,
+                                          const cmsis_nn_dims *input_dims,
+                                          const q7_t *input_data,
+                                          const cmsis_nn_dims *state_dims,
+                                          q15_t *state_data,
+                                          const cmsis_nn_dims *weights_feature_dims,
+                                          const q7_t *weights_feature_data,
+                                          const cmsis_nn_dims *weights_time_dims,
+                                          const q15_t *weights_time_data,
+                                          const cmsis_nn_dims *bias_dims,
+                                          const q31_t *bias_data,
+                                          const cmsis_nn_dims *output_dims,
+                                          q7_t *output_data);
 
 #ifdef __cplusplus
 }
diff --git a/edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h b/edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h
index 3e2f941..232aa61 100644
--- a/edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h
+++ b/edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_nnsupportfunctions.h
  * Description:  Public header file of support functions for CMSIS NN Library
  *
- * $Date:        09. October 2020
- * $Revision:    V.4.5.5
+ * $Date:        8 August 2022
+ * $Revision:    V.10.0.0
  *
  * Target Processor:  Cortex-M CPUs
  * -------------------------------------------------------------------- */
@@ -30,8 +30,11 @@
 #ifndef _ARM_NNSUPPORTFUNCTIONS_H_
 #define _ARM_NNSUPPORTFUNCTIONS_H_
 
-#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_common_tables.h"
-#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nn_math_types.h"
+#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/none.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nn_types.h"
+
+#include <stdbool.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -46,6 +49,27 @@ extern "C" {
 #define MAX(A, B) ((A) > (B) ? (A) : (B))
 #define MIN(A, B) ((A) < (B) ? (A) : (B))
 #define CLAMP(x, h, l) MAX(MIN((x), (h)), (l))
+#define REDUCE_MULTIPLIER(_mult) ((_mult < 0x7FFF0000) ? ((_mult + (1 << 15)) >> 16) : 0x7FFF)
+
+// Number of channels processed in a block for DW Conv(MVE)
+// Requirement: Greater than 0 & less than 128
+// This can be fine tuned to match number of input channels for best performance.
+// A layer with lower number of channels than CH_IN_BLOCK_MVE will result in higher
+// scratch buffer usage and a layer with higher number of channels than CH_IN_BLOCK_MVE
+// will result in lower scratch buffer usage.
+#define CH_IN_BLOCK_MVE (124)
+
+/**
+ * @brief definition to pack four 8 bit values.
+ */
+#define PACK_Q7x4_32x1(v0, v1, v2, v3)                                                                                 \
+    ((((int32_t)(v0) << 0) & (int32_t)0x000000FF) | (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) |                     \
+     (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | (((int32_t)(v3) << 24) & (int32_t)0xFF000000))
+
+/**
+ * @brief definition to pack two 16 bit values.
+ */
+#define PACK_Q15x2_32x1(v0, v1) (((int32_t)v0 & (int32_t)0xFFFF) | ((int32_t)v1 << 16))
 
 /**
  * @brief Union for SIMD access of q31/q15/q7 types
@@ -114,7 +138,6 @@ void arm_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size);
  * @param[in]       *pSrc points to the q7 input vector
  * @param[out]      *pDst points to the q15 output vector
  * @param[in]       blockSize length of the input vector
- * @return none.
  *
  */
 void arm_q7_to_q15_reordered_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize);
@@ -143,7 +166,6 @@ void arm_q7_to_q15_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size,
  * @param[out]      dst        pointer to the q15 output vector
  * @param[in]       block_size length of the input vector
  * @param[in]       offset     offset to be added to each input vector element.
- * @return none.
  *
  * @details  This function does the q7 to q15 expansion with re-ordering of bytes. Re-ordering is a consequence of
  *           the sign extension intrinsic(DSP extension). The tail (i.e., last (N % 4) elements) retains its
@@ -236,58 +258,101 @@ q7_t *arm_nn_mat_mult_s8(const q7_t *input_row,
                          const uint16_t row_len,
                          const int32_t *const bias,
                          q7_t *out);
+/**
+ * @brief Matrix-multiplication function for convolution with per-channel requantization for 16 bits convolution.
+ * @param[in]       input_a     pointer to operand A
+ * @param[in]       input_b     pointer to operand B, always consists of 2 vectors.
+ * @param[in]       output_ch   number of rows of A
+ * @param[in]       out_shift  pointer to per output channel requantization shift parameter.
+ * @param[in]       out_mult   pointer to per output channel requantization multiplier parameter.
+ * @param[in]       activation_min   minimum value to clamp the output to. Range : int16
+ * @param[in]       activation_max   maximum value to clamp the output to. Range : int16
+ * @param[in]       num_col_a   number of columns of A
+ * @param[in]       output_bias per output channel bias. Range : int64
+ * @param[in,out]   out_0       pointer to output
+ * @return     The function returns one of the two
+ *              1. The incremented output pointer for a successful operation or
+ *              2. NULL if implementation is not available.
+ *
+ * @details   This function does the matrix multiplication of weight matrix for all output channels
+ *            with 2 columns from im2col and produces two elements/output_channel. The outputs are
+ *            clamped in the range provided by activation min and max.
+ *            Supported framework: TensorFlow Lite micro.
+ */
+q15_t *arm_nn_mat_mult_kernel_s16(const q7_t *input_a,
+                                  const q15_t *input_b,
+                                  const int32_t output_ch,
+                                  const int32_t *out_shift,
+                                  const int32_t *out_mult,
+                                  const int16_t activation_min,
+                                  const int16_t activation_max,
+                                  const int32_t num_col_a,
+                                  const int64_t *const output_bias,
+                                  q15_t *out_0);
 
 /**
- * @brief General Matrix-multiplication without requantization for one row & one column
- * @param[in]       row_elements  number of row elements
- * @param[in]       row_base      pointer to row operand
- * @param[in]       col_base      pointer to col operand
- * @param[out]      sum_col       pointer to store sum of column elements
- * @param[out]      output        pointer to store result of multiply-accumulate
- * @return     The function returns the multiply-accumulated result of the row by column.
+ * @brief General Vector by Matrix multiplication with requantization and storage of result.
+ * @param[in]       row_elements          number of row elements
+ * @param[in]       skipped_row_elements  number of row elements skipped due to padding.
+ *                                        row_elements + skipped_row_elements = (kernel_x * kernel_y) * input_ch
+ * @param[in]       row_base_ref          pointer to row operand
+ * @param[in]       col_base_ref          pointer to col operand
+ * @param[out]      out_ch                Number of output channels
+ * @param[in]       conv_params           Pointer to convolution parameters like offsets and activation values
+ * @param[in]       quant_params          Pointer to per-channel quantization parameters
+ * @param[in]       bias                  Pointer to optional per-channel bias
+ * @param[out]      output                Pointer to output where int8 results are stored.
+ * @return     The function performs matrix(row_base_ref) multiplication with vector(col_base_ref) and
+ *             scaled result is stored in memory.
  *
  * @details Pseudo-code
  *      *output = 0
  *      sum_col = 0
+ *      for (j = 0; j < out_ch; j++)
  *      for (i = 0; i < row_elements; i++)
- *          *output += row_base[i] * col_base[i]
- *          sum_col += col_base[i]
+ *          *output += row_base_ref[i] * col_base_ref[i]
+ *          sum_col += col_base_ref[i]
+ *      scale sum_col using quant_params and bias
+ *      store result in 'output'
+ *
  *
  */
-arm_status arm_nn_mat_mul_core_1x_s8(int32_t row_elements,
-                                     const int8_t *row_base,
-                                     const int8_t *col_base,
-                                     int32_t *const sum_col,
-                                     int32_t *const output);
+arm_cmsis_nn_status arm_nn_mat_mul_core_1x_s8(int32_t row_elements,
+                                              const int32_t skipped_row_elements,
+                                              const int8_t *row_base_ref,
+                                              const int8_t *col_base_ref,
+                                              const int32_t out_ch,
+                                              const cmsis_nn_conv_params *conv_params,
+                                              const cmsis_nn_per_channel_quant_params *quant_params,
+                                              const int32_t *bias,
+                                              int8_t *output);
 
 /**
- * @brief General Matrix-multiplication without requantization for four rows and one column
+ * @brief Matrix-multiplication with requantization & activation function for four rows and one column
  * @param[in]       row_elements  number of row elements
  * @param[in]       offset        offset between rows. Can be the same as row_elements.
  *                                For e.g, in a 1x1 conv scenario with stride as 1.
  * @param[in]       row_base      pointer to row operand
  * @param[in]       col_base      pointer to col operand
- * @param[out]      sum_col       pointer to store sum of column elements
- * @param[out]      output        pointer to store result(4 int32's) of multiply-accumulate
- * @return     The function returns the multiply-accumulated result of the row by column
+ * @param[in]       out_ch        Number of output channels
+ * @param[in]       conv_params   Pointer to convolution parameters like offsets and activation values
+ * @param[in]       quant_params  Pointer to per-channel quantization parameters
+ * @param[in]       bias          Pointer to per-channel bias
+ * @param[out]      output        Pointer to output where int8 results are stored.
  *
- * @details Pseudo-code
- *      output[0] = 0
- *         ..
- *      output[3] = 0
- *      sum_col = 0
- *      for (i = 0; i < row_elements; i++)
- *          output[0] += row_base[i] * col_base[i]
- *                ..
- *          output[3] += row_base[i + (row_elements * 3)] * col_base[i]
- *          sum_col += col_base[i]
+ * @return     The function returns the updated output pointer or NULL if implementation is not available.
+ *
+ * @details Compliant to TFLM int8 specification. MVE implementation only
  */
-arm_status arm_nn_mat_mul_core_4x_s8(const int32_t row_elements,
-                                     const int32_t offset,
-                                     const int8_t *row_base,
-                                     const int8_t *col_base,
-                                     int32_t *const sum_col,
-                                     int32_t *const output);
+int8_t *arm_nn_mat_mul_core_4x_s8(const int32_t row_elements,
+                                  const int32_t offset,
+                                  const int8_t *row_base,
+                                  const int8_t *col_base,
+                                  const int32_t out_ch,
+                                  const cmsis_nn_conv_params *conv_params,
+                                  const cmsis_nn_per_channel_quant_params *quant_params,
+                                  const int32_t *bias,
+                                  int8_t *output);
 
 /**
  * @brief General Matrix-multiplication function with per-channel requantization.
@@ -315,22 +380,22 @@ arm_status arm_nn_mat_mul_core_4x_s8(const int32_t row_elements,
  * @param[in]  activation_min     Minimum value to clamp down the output. Range : int8
  * @param[in]  activation_max     Maximum value to clamp up the output. Range : int8
  *
- * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+ * @return     The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
  *
  */
-arm_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs,
-                                   const q7_t *rhs,
-                                   const q31_t *bias,
-                                   q7_t *dst,
-                                   const int32_t *dst_multipliers,
-                                   const int32_t *dst_shifts,
-                                   const int32_t lhs_rows,
-                                   const int32_t rhs_rows,
-                                   const int32_t rhs_cols,
-                                   const int32_t lhs_offset,
-                                   const int32_t dst_offset,
-                                   const int32_t activation_min,
-                                   const int32_t activation_max);
+arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs,
+                                            const q7_t *rhs,
+                                            const q31_t *bias,
+                                            q7_t *dst,
+                                            const int32_t *dst_multipliers,
+                                            const int32_t *dst_shifts,
+                                            const int32_t lhs_rows,
+                                            const int32_t rhs_rows,
+                                            const int32_t rhs_cols,
+                                            const int32_t lhs_offset,
+                                            const int32_t dst_offset,
+                                            const int32_t activation_min,
+                                            const int32_t activation_max);
 
 /**
  * @brief s8 Vector by Matrix (transposed) multiplication
@@ -341,8 +406,7 @@ arm_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs,
  * @param[out]     dst             Output vector
  * @param[in]      lhs_offset      Offset to be added to the input values of the left-hand side vector.
  *                                 Range: -127 to 128
- * @param[in]      rhs_offset      Offset to be added to the input values of the right-hand side matrix.
- *                                 Range: -127 to 128
+ * @param[in]      rhs_offset      Not used
  * @param[in]      dst_offset      Offset to be added to the output values. Range: -127 to 128
  * @param[in]      dst_multiplier  Output multiplier
  * @param[in]      dst_shift       Output shift
@@ -350,23 +414,88 @@ arm_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs,
  * @param[in]      rhs_rows        Number of rows in the right-hand side input matrix
  * @param[in]      activation_min  Minimum value to clamp the output to. Range: int8
  * @param[in]      activation_max  Maximum value to clamp the output to. Range: int8
+ * @param[in]      address_offset  Memory position offset for dst. First output is stored at 'dst', the
+ *                                 second at 'dst + address_offset' and so on. Default value is typically 1.
  *
- * @return         The function returns <code>ARM_MATH_SUCCESS</code>
+ * @return         The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
  *
  */
-arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
-                                    const q7_t *rhs,
-                                    const q31_t *bias,
-                                    q7_t *dst,
-                                    const int32_t lhs_offset,
-                                    const int32_t rhs_offset,
-                                    const int32_t dst_offset,
-                                    const int32_t dst_multiplier,
-                                    const int32_t dst_shift,
-                                    const int32_t rhs_cols,
-                                    const int32_t rhs_rows,
-                                    const int32_t activation_min,
-                                    const int32_t activation_max);
+arm_cmsis_nn_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
+                                             const q7_t *rhs,
+                                             const q31_t *bias,
+                                             q7_t *dst,
+                                             const int32_t lhs_offset,
+                                             const int32_t rhs_offset,
+                                             const int32_t dst_offset,
+                                             const int32_t dst_multiplier,
+                                             const int32_t dst_shift,
+                                             const int32_t rhs_cols,
+                                             const int32_t rhs_rows,
+                                             const int32_t activation_min,
+                                             const int32_t activation_max,
+                                             const int32_t address_offset);
+
+/**
+ * @brief s16 Vector by Matrix (transposed) multiplication
+ *
+ * @param[in]      lhs             Input left-hand side vector
+ * @param[in]      rhs             Input right-hand side matrix (transposed)
+ * @param[in]      bias            Input bias
+ * @param[out]     dst             Output vector
+ * @param[in]      dst_multiplier  Output multiplier
+ * @param[in]      dst_shift       Output shift
+ * @param[in]      rhs_cols        Number of columns in the right-hand side input matrix
+ * @param[in]      rhs_rows        Number of rows in the right-hand side input matrix
+ * @param[in]      activation_min  Minimum value to clamp the output to. Range: int16
+ * @param[in]      activation_max  Maximum value to clamp the output to. Range: int16
+ *
+ * @return         The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ */
+arm_cmsis_nn_status arm_nn_vec_mat_mult_t_s16(const q15_t *lhs,
+                                              const q7_t *rhs,
+                                              const q63_t *bias,
+                                              q15_t *dst,
+                                              const int32_t dst_multiplier,
+                                              const int32_t dst_shift,
+                                              const int32_t rhs_cols,
+                                              const int32_t rhs_rows,
+                                              const int32_t activation_min,
+                                              const int32_t activation_max);
+
+/**
+ * @brief s8 Vector by Matrix (transposed) multiplication with s16 output
+ *
+ * @param[in]      lhs             Input left-hand side vector
+ * @param[in]      rhs             Input right-hand side matrix (transposed)
+ * @param[out]     dst             Output vector
+ * @param[in]      lhs_offset      Offset to be added to the input values of the left-hand side
+ *                                 vector. Range: -127 to 128
+ * @param[in]      rhs_offset      Not used
+ * @param[in]      scatter_offset  Address offset for dst. First output is stored at 'dst', the
+ *                                 second at 'dst + scatter_offset' and so on.
+ * @param[in]      dst_multiplier  Output multiplier
+ * @param[in]      dst_shift       Output shift
+ * @param[in]      rhs_cols        Number of columns in the right-hand side input matrix
+ * @param[in]      rhs_rows        Number of rows in the right-hand side input matrix
+ * @param[in]      activation_min  Minimum value to clamp the output to. Range: int16
+ * @param[in]      activation_max  Maximum value to clamp the output to. Range: int16
+ *
+ * @return         The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
+ *
+ */
+arm_cmsis_nn_status arm_nn_vec_mat_mult_t_svdf_s8(const q7_t *lhs,
+                                                  const q7_t *rhs,
+                                                  q15_t *dst,
+                                                  const int32_t lhs_offset,
+                                                  const int32_t rhs_offset,
+                                                  const int32_t scatter_offset,
+                                                  const int32_t dst_multiplier,
+                                                  const int32_t dst_shift,
+                                                  const int32_t rhs_cols,
+                                                  const int32_t rhs_rows,
+                                                  const int32_t activation_min,
+                                                  const int32_t activation_max);
 
 /**
  * @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where
@@ -375,7 +504,8 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
  * @param[in]      lhs             Input left-hand side matrix
  * @param[in]      rhs             Input right-hand side matrix (transposed)
  * @param[in]      lhs_offset      LHS matrix offset(input offset). Range: -127 to 128
- * @param[in]      num_ch          Number of channels in LHS/RHS
+ * @param[in]      active_ch       Subset of total_ch processed
+ * @param[in]      total_ch        Number of channels in LHS/RHS
  * @param[in]      out_shift       Per channel output shift. Length of vector is equal to number of channels
  * @param[in]      out_mult        Per channel output multiplier. Length of vector is equal to number of channels
  * @param[in]      out_offset      Offset to be added to the output values. Range: -127 to 128
@@ -386,7 +516,7 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
  * @param[in]      out             Output pointer
  *
  * @return         The function returns one of the two
- *                  - Updated output pointer if an implementaiton is available
+ *                  - Updated output pointer if an implementation is available
  *                  - NULL if no implementation is available.
  *
  * @note           If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read
@@ -396,18 +526,19 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
  *                  - Output bias
  *                  - rhs
  */
-q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs,
-                                           const q7_t *rhs,
-                                           const int32_t lhs_offset,
-                                           const uint16_t num_ch,
-                                           const int32_t *out_shift,
-                                           const int32_t *out_mult,
-                                           const int32_t out_offset,
-                                           const int32_t activation_min,
-                                           const int32_t activation_max,
-                                           const uint16_t row_x_col,
-                                           const int32_t *const output_bias,
-                                           q7_t *out);
+arm_cmsis_nn_status arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs,
+                                                         const q7_t *rhs,
+                                                         const int32_t lhs_offset,
+                                                         const int32_t active_ch,
+                                                         const int32_t total_ch,
+                                                         const int32_t *out_shift,
+                                                         const int32_t *out_mult,
+                                                         const int32_t out_offset,
+                                                         const int32_t activation_min,
+                                                         const int32_t activation_max,
+                                                         const uint16_t row_x_col,
+                                                         const int32_t *const output_bias,
+                                                         q7_t *out);
 
 /**
  * @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases.
@@ -416,7 +547,8 @@ q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs,
  * @param[in]      lhs             Input left-hand side matrix
  * @param[in]      rhs             Input right-hand side matrix (transposed)
  * @param[in]      lhs_offset      LHS matrix offset(input offset). Range: -127 to 128
- * @param[in]      num_ch          Number of channels in LHS/RHS
+ * @param[in]      active_ch       Subset of total_ch processed
+ * @param[in]      total_ch        Number of channels in LHS/RHS
  * @param[in]      out_shift       Per channel output shift. Length of vector is equal to number of channels.
  * @param[in]      out_mult        Per channel output multiplier. Length of vector is equal to number of channels.
  * @param[in]      out_offset      Offset to be added to the output values. Range: -127 to 128
@@ -427,7 +559,7 @@ q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs,
  * @param[in]      out             Output pointer
  *
  * @return         The function returns one of the two
- *                  - Updated output pointer if an implementaiton is available
+ *                  - Updated output pointer if an implementation is available
  *                  - NULL if no implementation is available.
  *
  * @note           If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read
@@ -437,18 +569,79 @@ q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs,
  *                  - Output bias
  *                  - rhs
  */
-q7_t *arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs,
-                                    const q7_t *rhs,
-                                    const int32_t lhs_offset,
-                                    const uint16_t num_ch,
-                                    const int32_t *out_shift,
-                                    const int32_t *out_mult,
-                                    const int32_t out_offset,
-                                    const int32_t activation_min,
-                                    const int32_t activation_max,
-                                    const uint16_t row_x_col,
-                                    const int32_t *const output_bias,
-                                    q7_t *out);
+arm_cmsis_nn_status arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs,
+                                                  const q7_t *rhs,
+                                                  const int32_t lhs_offset,
+                                                  const int32_t active_ch,
+                                                  const int32_t total_ch,
+                                                  const int32_t *out_shift,
+                                                  const int32_t *out_mult,
+                                                  const int32_t out_offset,
+                                                  const int32_t activation_min,
+                                                  const int32_t activation_max,
+                                                  const uint16_t row_x_col,
+                                                  const int32_t *const output_bias,
+                                                  q7_t *out);
+
+/**
+ * @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases.
+ *        Dimensions are the same for lhs and rhs.
+ *
+ * @param[in]      lhs             Input left-hand side matrix
+ * @param[in]      rhs             Input right-hand side matrix (transposed)
+ * @param[in]      num_ch          Number of channels in LHS/RHS
+ * @param[in]      out_shift       Per channel output shift. Length of vector is equal to number of channels.
+ * @param[in]      out_mult        Per channel output multiplier. Length of vector is equal to number of channels.
+ * @param[in]      activation_min  Minimum value to clamp the output to. Range: int8
+ * @param[in]      activation_max  Maximum value to clamp the output to. Range: int8
+ * @param[in]       row_x_col       (row_dimension * col_dimension) of LHS/RHS matrix
+ * @param[in]      output_bias     Per channel output bias. Length of vector is equal to number of channels.
+ * @param[in]      out             Output pointer
+ *
+ * @return         The function returns one of the two
+ *                  - Updated output pointer if an implementation is available
+ *                  - NULL if no implementation is available.
+ *
+ * @note           If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read
+ * out for the following.
+ *                  - Output shift
+ *                  - Output multiplier
+ *                  - Output bias
+ *                  - rhs
+ */
+int16_t *arm_nn_depthwise_conv_nt_t_s16(const int16_t *lhs,
+                                        const q7_t *rhs,
+                                        const uint16_t num_ch,
+                                        const int32_t *out_shift,
+                                        const int32_t *out_mult,
+                                        const int32_t activation_min,
+                                        const int32_t activation_max,
+                                        const uint16_t row_x_col,
+                                        const int64_t *const output_bias,
+                                        int16_t *out);
+
+/**
+ *@brief Matrix-multiplication function for convolution with reordered columns
+ *@param[in]       pA          pointer to operand A
+ *@param[in]       pInBuffer   pointer to operand B, always conssists of 2 vectors
+ *@param[in]       ch_im_out   numRow of A
+ *@param[in]       numCol_A    numCol of A
+ *@param[in]       bias_shift  amount of left-shift for bias
+ *@param[in]       out_shift   amount of right-shift for output
+ *@param[in]       bias        the bias
+ *@param[in,out]   pOut        pointer to output
+ *@return     The function returns the incremented output pointer
+ *
+ *@details  This function assumes that data in pInBuffer are reordered
+ */
+q7_t *arm_nn_mat_mult_kernel_q7_q15_reordered(const q7_t *pA,
+                                              const q15_t *pInBuffer,
+                                              const uint16_t ch_im_out,
+                                              const uint16_t numCol_A,
+                                              const uint16_t bias_shift,
+                                              const uint16_t out_shift,
+                                              const q7_t *bias,
+                                              q7_t *pOut);
 
 /**
   @brief         Read 2 q15 elements and post increment pointer.
@@ -505,6 +698,17 @@ __STATIC_FORCEINLINE q31_t arm_nn_read_q7x4(const q7_t *in_q7)
     return (val);
 }
 
+/**
+  @brief         Write four q7 to q7 pointer and increment pointer afterwards.
+  @param[in]     in       Double pointer to input value
+  @param[in]     value    Four bytes to copy
+ */
+__STATIC_FORCEINLINE void arm_nn_write_q7x4_ia(q7_t **in, q31_t value)
+{
+    memcpy(*in, &value, 4);
+    *in += 4;
+}
+
 /**
  * @brief           memset optimized for MVE
  * @param[in, out]  dst         Destination pointer
@@ -518,11 +722,11 @@ __STATIC_FORCEINLINE void arm_memset_q7(q7_t *dst, const q7_t val, uint32_t bloc
     __asm volatile("   vdup.8                  q0, %[set_val]             \n"
                    "   wlstp.8                 lr, %[cnt], 1f             \n"
                    "2:                                                    \n"
-                   "   vstrb.8                 q0, [%[in]], 16            \n"
+                   "   vstrb.8                 q0, [%[in]], #16            \n"
                    "   letp                    lr, 2b                     \n"
                    "1:                                                    \n"
-                   : [ in ] "+r"(dst)
-                   : [ cnt ] "r"(block_size), [ set_val ] "r"(val)
+                   : [in] "+r"(dst)
+                   : [cnt] "r"(block_size), [set_val] "r"(val)
                    : "q0", "memory", "r14");
 #else
     memset(dst, val, block_size);
@@ -538,7 +742,7 @@ __STATIC_FORCEINLINE void arm_memset_q7(q7_t *dst, const q7_t val, uint32_t bloc
 __STATIC_FORCEINLINE const q7_t *read_and_pad(const q7_t *source, q31_t *out1, q31_t *out2)
 {
     q31_t inA = arm_nn_read_q7x4_ia(&source);
-    q31_t inAbuf1 = __SXTB16(__ROR((uint32_t)inA, 8));
+    q31_t inAbuf1 = __SXTB16_RORn((uint32_t)inA, 8);
     q31_t inAbuf2 = __SXTB16(inA);
 
 #ifndef ARM_MATH_BIG_ENDIAN
@@ -607,7 +811,6 @@ read_and_pad_reordered_with_offset(const q7_t *source, q31_t *out1, q31_t *out2,
  * @param[out]      *pDst         pointer to the output vector
  * @param[in]       out_shift     amount of right-shift for output
  * @param[in]       blockSize     number of samples in each vector
- * @return none.
  *
  * <b>Scaling and Overflow Behavior:</b>
  * \par
@@ -624,7 +827,6 @@ void arm_nn_mult_q15(q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out
  * @param[out]      *pDst         pointer to the output vector
  * @param[in]       out_shift     amount of right-shift for output
  * @param[in]       blockSize     number of samples in each vector
- * @return none.
  *
  * <b>Scaling and Overflow Behavior:</b>
  * \par
@@ -634,11 +836,69 @@ void arm_nn_mult_q15(q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out
 
 void arm_nn_mult_q7(q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockSize);
 
+/**
+ * @brief Matrix-multiplication function for convolution with per-channel requantization.
+ * @param[in]       input_a     pointer to operand A
+ * @param[in]       input_b     pointer to operand B, always consists of 2 vectors.
+ * @param[in]       output_ch   number of rows of A
+ * @param[in]       out_shift  pointer to per output channel requantization shift parameter.
+ * @param[in]       out_mult   pointer to per output channel requantization multiplier parameter.
+ * @param[in]       out_offset      output tensor offset.
+ * @param[in]       activation_min   minimum value to clamp the output to. Range : int8
+ * @param[in]       activation_max   maximum value to clamp the output to. Range : int8
+ * @param[in]       num_col_a   number of columns of A
+ * @param[in]       output_bias per output channel bias. Range : int32
+ * @param[in,out]   out_0       pointer to output
+ * @return     The function returns one of the two
+ *              1. The incremented output pointer for a successful operation or
+ *              2. NULL if implementation is not available.
+ *
+ * @details   This function does the matrix multiplication of weight matrix for all output channels
+ *            with 2 columns from im2col and produces two elements/output_channel. The outputs are
+ *            clamped in the range provided by activation min and max.
+ *            Supported framework: TensorFlow Lite micro.
+ */
+q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a,
+                                    const q15_t *input_b,
+                                    const uint16_t output_ch,
+                                    const int32_t *out_shift,
+                                    const int32_t *out_mult,
+                                    const int32_t out_offset,
+                                    const int16_t activation_min,
+                                    const int16_t activation_max,
+                                    const uint16_t num_col_a,
+                                    const int32_t *const output_bias,
+                                    q7_t *out_0);
+
+/**
+ * @brief Common softmax function for s8 input and s8 or s16 output
+ * @param[in]  input          Pointer to the input tensor
+ * @param[in]  num_rows       Number of rows in the input tensor
+ * @param[in]  row_size       Number of elements in each input row
+ * @param[in]  mult           Input quantization multiplier
+ * @param[in]  shift          Input quantization shift within the range [0, 31]
+ * @param[in]  diff_min       Minimum difference with max in row. Used to check if
+ *                            the quantized exponential operation can be performed
+ * @param[in]  int16_output   Indicating s8 output if 0 else s16 output
+ * @param[out] output         Pointer to the output tensor
+ *
+ * @note Supported framework: TensorFlow Lite micro (bit-accurate)
+ *
+ */
+void arm_nn_softmax_common_s8(const int8_t *input,
+                              const int32_t num_rows,
+                              const int32_t row_size,
+                              const int32_t mult,
+                              const int32_t shift,
+                              const int32_t diff_min,
+                              const bool int16_output,
+                              void *output);
+
 /**
  * @brief macro for adding rounding offset
  */
 #ifndef ARM_NN_TRUNCATE
-#define NN_ROUND(out_shift) ((0x1u << out_shift) >> 1)
+#define NN_ROUND(out_shift) ((0x1 << out_shift) >> 1)
 #else
 #define NN_ROUND(out_shift) 0
 #endif
@@ -663,8 +923,8 @@ void arm_nn_mult_q7(q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shi
 /**
  * @brief           Saturating doubling high multiply. Result matches
  *                  NEON instruction VQRDMULH.
- * @param[in]       m1        Multiplicand. Range: {Q31_MIN, Q31_MAX}
- * @param[in]       m2        Multiplier. Range: {Q31_MIN, Q31_MAX}
+ * @param[in]       m1        Multiplicand. Range: {NN_Q31_MIN, NN_Q31_MAX}
+ * @param[in]       m2        Multiplier. Range: {NN_Q31_MIN, NN_Q31_MAX}
  * @return          Result of multiplication.
  *
  */
@@ -685,9 +945,9 @@ __STATIC_FORCEINLINE q31_t arm_nn_doubling_high_mult(const q31_t m1, const q31_t
     // as well.
     result = (int32_t)(mult / (1ll << 31));
 
-    if ((m1 == m2) && (m1 == (int32_t)Q31_MIN))
+    if ((m1 == m2) && (m1 == (int32_t)NN_Q31_MIN))
     {
-        result = Q31_MAX;
+        result = NN_Q31_MAX;
     }
     return result;
 }
@@ -696,13 +956,13 @@ __STATIC_FORCEINLINE q31_t arm_nn_doubling_high_mult(const q31_t m1, const q31_t
  * @brief           Doubling high multiply without saturation. This is intended
  *                  for requantization where the scale is a positive integer
  *
- * @param[in]       m1        Multiplicand. Range: {Q31_MIN, Q31_MAX}
- * @param[in]       m2        Multiplier Range: {Q31_MIN, Q31_MAX}
+ * @param[in]       m1        Multiplicand. Range: {NN_Q31_MIN, NN_Q31_MAX}
+ * @param[in]       m2        Multiplier Range: {NN_Q31_MIN, NN_Q31_MAX}
  * @return          Result of multiplication.
  * @note            The result of this matches that of neon instruction
- *                  VQRDMULH for m1 in range {Q31_MIN, Q31_MAX} and m2 in
- *                  range {Q31_MIN + 1, Q31_MAX}. Saturation occurs when
- *                  m1 equals m2 equals Q31_MIN and that is not handled by
+ *                  VQRDMULH for m1 in range {NN_Q31_MIN, NN_Q31_MAX} and m2 in
+ *                  range {NN_Q31_MIN + 1, NN_Q31_MAX}. Saturation occurs when
+ *                  m1 equals m2 equals NN_Q31_MIN and that is not handled by
  *                  this function.
  *
  */
@@ -759,7 +1019,7 @@ __STATIC_FORCEINLINE q31_t arm_nn_divide_by_power_of_two(const q31_t dividend, c
 /**
  * @brief           Requantize a given value.
  * @param[in]       val         Value to be requantized
- * @param[in]       multiplier  multiplier. Range {Q31_MIN + 1, Q32_MAX}
+ * @param[in]       multiplier  multiplier. Range {NN_Q31_MIN + 1, Q32_MAX}
  * @param[in]       shift       left or right shift for 'val * multiplier'
  *
  * @return          Returns (val * multiplier)/(2 ^ shift)
@@ -767,8 +1027,38 @@ __STATIC_FORCEINLINE q31_t arm_nn_divide_by_power_of_two(const q31_t dividend, c
  */
 __STATIC_FORCEINLINE q31_t arm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
 {
+#ifdef CMSIS_NN_USE_SINGLE_ROUNDING
+    const int64_t total_shift = 31 - shift;
+    const int64_t new_val = val * (int64_t)multiplier;
+
+    int32_t result = new_val >> (total_shift - 1);
+    result = (result + 1) >> 1;
+
+    return result;
+#else
     return arm_nn_divide_by_power_of_two(arm_nn_doubling_high_mult_no_sat(val * (1 << LEFT_SHIFT(shift)), multiplier),
                                          RIGHT_SHIFT(shift));
+#endif
+}
+
+/**
+ * @brief           Requantize a given 64 bit value.
+ * @param[in]       val                 Value to be requantized in the range {-(1<<47)} to {(1<<47) - 1}
+ * @param[in]       reduced_multiplier  Reduced multiplier in the range {NN_Q31_MIN + 1, Q32_MAX} to {Q16_MIN + 1,
+ * Q16_MAX}
+ * @param[in]       shift               Left or right shift for 'val * multiplier' in the range {-31} to {7}
+ *
+ * @return          Returns (val * multiplier)/(2 ^ shift)
+ *
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_requantize_s64(const q63_t val, const q31_t reduced_multiplier, const q31_t shift)
+{
+    const q63_t new_val = val * reduced_multiplier;
+
+    q31_t result = new_val >> (14 - shift); // 64->32 bit reduction
+    result = (result + 1) >> 1;             // Last shift position and insert round
+
+    return result;
 }
 
 /**
@@ -783,18 +1073,30 @@ __STATIC_FORCEINLINE void arm_memcpy_q7(q7_t *__RESTRICT dst, const q7_t *__REST
 #if defined(ARM_MATH_MVEI)
     __asm volatile("   wlstp.8                 lr, %[cnt], 1f             \n"
                    "2:                                                    \n"
-                   "   vldrb.8                 q0, [%[in]], 16            \n"
-                   "   vstrb.8                 q0, [%[out]], 16           \n"
+                   "   vldrb.8                 q0, [%[in]], #16            \n"
+                   "   vstrb.8                 q0, [%[out]], #16           \n"
                    "   letp                    lr, 2b                     \n"
                    "1:                                                    \n"
-                   : [ in ] "+r"(src), [ out ] "+r"(dst)
-                   : [ cnt ] "r"(block_size)
+                   : [in] "+r"(src), [out] "+r"(dst)
+                   : [cnt] "r"(block_size)
                    : "q0", "memory", "r14");
 #else
     memcpy(dst, src, block_size);
 #endif
 }
 
+/**
+ * @brief           memcpy wrapper for int16
+ * @param[in, out]  dst         Destination pointer
+ * @param[in]       src         Source pointer.
+ * @param[in]       block_size  Number of bytes to copy.
+ *
+ */
+__STATIC_FORCEINLINE void arm_memcpy_q15(q15_t *__RESTRICT dst, const q15_t *__RESTRICT src, uint32_t block_size)
+{
+    memcpy(dst, src, block_size);
+}
+
 #if defined(ARM_MATH_MVEI)
 /**
  * @brief           Vector saturating doubling high multiply returning high half.
@@ -835,8 +1137,21 @@ __STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve(const int32x4_t di
  */
 __STATIC_FORCEINLINE int32x4_t arm_requantize_mve(const int32x4_t val, const q31_t multiplier, const q31_t shift)
 {
+#ifdef CMSIS_NN_USE_SINGLE_ROUNDING
+    const int right_shift = MIN(-1, shift);
+    const int left_shift = shift - right_shift;
+
+    const int32x4_t left_shift_dup = vdupq_n_s32(left_shift);
+    const int32x4_t right_shift_dup = vdupq_n_s32(right_shift);
+
+    int32x4_t result = vqdmulhq_n_s32(vshlq_s32(val, left_shift_dup), multiplier);
+    result = vrshlq_s32(result, right_shift_dup);
+
+    return result;
+#else
     return arm_divide_by_power_of_two_mve(
         arm_doubling_high_mult_mve(vshlq_s32(val, vdupq_n_s32(LEFT_SHIFT(shift))), multiplier), RIGHT_SHIFT(shift));
+#endif
 }
 
 __STATIC_FORCEINLINE int32x4_t arm_doubling_high_mult_mve_32x4(const int32x4_t m1, const int32x4_t m2)
@@ -856,6 +1171,15 @@ __STATIC_FORCEINLINE int32x4_t arm_requantize_mve_32x4(const int32x4_t val,
                                                        const int32x4_t multiplier,
                                                        const int32x4_t shift)
 {
+#ifdef CMSIS_NN_USE_SINGLE_ROUNDING
+    const int32x4_t right_shift = vminq_s32(vdupq_n_s32(-1), shift);
+    const int32x4_t left_shift = vqsubq_s32(shift, right_shift);
+
+    int32x4_t result = vqdmulhq_s32(vshlq_s32(val, left_shift), multiplier);
+    result = vrshlq_s32(result, right_shift);
+
+    return result;
+#else
     const int32x4_t zz = vdupq_n_s32(0);
     const mve_pred16_t p = vcmpgtq_n_s32(shift, 0);
 
@@ -864,6 +1188,7 @@ __STATIC_FORCEINLINE int32x4_t arm_requantize_mve_32x4(const int32x4_t val,
 
     return arm_divide_by_power_of_two_mve_32x4(arm_doubling_high_mult_mve_32x4(vshlq_s32(val, left_shift), multiplier),
                                                right_shift);
+#endif
 }
 #endif
 
@@ -899,21 +1224,21 @@ __STATIC_FORCEINLINE int32_t arm_nn_exp_on_negative_values(int32_t val)
 #undef SELECT_IF_NON_ZERO
 
     mask = MASK_IF_ZERO(val);
-    return SELECT_USING_MASK(mask, Q31_MAX, result);
+    return SELECT_USING_MASK(mask, NN_Q31_MAX, result);
 }
 
 __STATIC_FORCEINLINE q31_t arm_nn_mult_by_power_of_two(const int32_t val, const int32_t exp)
 {
     const int32_t thresh = ((1 << (31 - exp)) - 1);
     int32_t result = val << exp;
-    result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val > thresh), Q31_MAX, result);
-    result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val < -thresh), Q31_MIN, result);
+    result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val > thresh), NN_Q31_MAX, result);
+    result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val < -thresh), NN_Q31_MIN, result);
     return result;
 }
 
 __STATIC_FORCEINLINE int32_t arm_nn_one_over_one_plus_x_for_x_in_0_1(int32_t val)
 {
-    const int64_t sum = (int64_t)val + (int64_t)Q31_MAX;
+    const int64_t sum = (int64_t)val + (int64_t)NN_Q31_MAX;
     const int32_t half_denominator = (int32_t)((sum + (sum >= 0 ? 1 : -1)) / 2L);
     int32_t x = 1515870810 + MUL_SAT(half_denominator, -1010580540);
 
@@ -929,7 +1254,6 @@ __STATIC_FORCEINLINE int32_t arm_nn_one_over_one_plus_x_for_x_in_0_1(int32_t val
   @brief         Write 2 q15 elements and post increment pointer.
   @param[in]     dest_q15  Pointer to pointer that holds address of destination.
   @param[in]     src_q31   Input value to be written.
-  @return        none
  */
 __STATIC_FORCEINLINE void arm_nn_write_q15x2_ia(q15_t **dest_q15, q31_t src_q31)
 {
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c b/edge-impulse-sdk/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c
index aedf55b..874f766 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2020, 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_nn_activations_q7.c
  * Description:  Q7 neural network activation function using direct table look-up
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.2
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,20 +42,11 @@
  * @{
  */
 
-/**
- * @brief Q7 neural network activation function using direct table look-up
- * @param[in,out]   data        pointer to input
- * @param[in]       size        number of elements
- * @param[in]       int_width   bit-width of the integer part, assume to be smaller than 3
- * @param[in]       type        type of activation functions
- *
- * @details
+/*
+ * Q7 neural network activation function using direct table look-up
  *
- * This is the direct table look-up approach.
+ * Refer header file for details.
  *
- * Assume here the integer part of the fixed-point is <= 3.
- * More than 3 just not making much sense, makes no difference with
- * saturation followed by any of these activation functions.
  */
 
 void arm_nn_activations_direct_q7(q7_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type)
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c b/edge-impulse-sdk/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c
index b408d2d..93ff722 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_relu_q15.c
  * Description:  Q15 version of ReLU
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.3
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,21 +42,17 @@
  * @{
  */
 
-/**
- * @brief Q15 RELU function
- * @param[in,out]   data        pointer to input
- * @param[in]       size        number of elements
- *
- * @details
+/*
+ * Q15 ReLu function
  *
- * Optimized relu with QSUB instructions.
+ * Refer header file for details.
  *
  */
 
 void arm_relu_q15(q15_t *data, uint16_t size)
 {
 
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for M cores with DSP extension */
 
     uint16_t i = size >> 1;
@@ -68,7 +64,7 @@ void arm_relu_q15(q15_t *data, uint16_t size)
 
     while (i)
     {
-        in = read_q15x2_ia(&input);
+        in = arm_nn_read_q15x2_ia((const q15_t **)&input);
 
         /* extract the first bit */
         buf = __ROR(in & 0x80008000, 15);
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c b/edge-impulse-sdk/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c
index beb00fd..029b39a 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_relu_q7.c
  * Description:  Q7 version of ReLU
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.3
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.1.4
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,21 +42,17 @@
  * @{
  */
 
-/**
- * @brief Q7 RELU function
- * @param[in,out]   data        pointer to input
- * @param[in]       size        number of elements
- *
- * @details
+/*
+ * Q7 ReLu function
  *
- * Optimized relu with QSUB instructions.
+ * Refer header file for details.
  *
  */
 
 void arm_relu_q7(q7_t *data, uint16_t size)
 {
 
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for M cores with DSP extension */
 
     uint16_t i = size >> 2;
@@ -68,7 +64,7 @@ void arm_relu_q7(q7_t *data, uint16_t size)
 
     while (i)
     {
-        in = read_q7x4_ia(&input);
+        in = arm_nn_read_q7x4_ia((const q7_t **)&input);
 
         /* extract the first bit */
         buf = (int32_t)__ROR((uint32_t)in & 0x80808080, 7);
@@ -76,7 +72,7 @@ void arm_relu_q7(q7_t *data, uint16_t size)
         /* if MSB=1, mask will be 0xFF, 0x0 otherwise */
         mask = __QSUB8(0x00000000, buf);
 
-        write_q7x4_ia(&output, in & (~mask));
+        arm_nn_write_q7x4_ia(&output, in & (~mask));
 
         i--;
     }
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c
new file mode 100644
index 0000000..7fbb104
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c
@@ -0,0 +1,140 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_elementwise_add_s16
+ * Description:  Elementwise add
+ *
+ * $Date:        10 May 2022
+ * $Revision:    V.2.1.0
+ *
+ * Target Processor:  Cortex-M CPUs
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup BasicMath
+ * @{
+ */
+
+/*
+ * s16 elementwise add
+ *
+ * Refer header file for details.
+ *
+ */
+
+/* Note: __SHIFT is expected to be <=0 */
+
+arm_cmsis_nn_status arm_elementwise_add_s16(const int16_t *input_1_vect,
+                                            const int16_t *input_2_vect,
+                                            const int32_t input_1_offset,
+                                            const int32_t input_1_mult,
+                                            const int32_t input_1_shift,
+                                            const int32_t input_2_offset,
+                                            const int32_t input_2_mult,
+                                            const int32_t input_2_shift,
+                                            const int32_t left_shift,
+                                            int16_t *output,
+                                            const int32_t out_offset,
+                                            const int32_t out_mult,
+                                            const int32_t out_shift,
+                                            const int32_t out_activation_min,
+                                            const int32_t out_activation_max,
+                                            const int32_t block_size)
+{
+    (void)input_1_offset;
+    (void)input_2_offset;
+    (void)out_offset;
+    int32_t input_1;
+    int32_t input_2;
+    int32_t sum;
+    int32_t two_halfword_1, two_halfword_2;
+    int16_t sum_1, sum_2;
+    int32_t loop_count = block_size / 2;
+
+    while (loop_count > 0)
+    {
+        two_halfword_1 = arm_nn_read_q15x2_ia(&input_1_vect);
+        two_halfword_2 = arm_nn_read_q15x2_ia(&input_2_vect);
+
+        input_1 = (int16_t)(two_halfword_1 & 0xFFFF) << left_shift;
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
+        input_2 = (int16_t)(two_halfword_2 & 0xFFFF) << left_shift;
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
+        sum = input_1 + input_2;
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
+        sum = MAX(sum, out_activation_min);
+        sum = MIN(sum, out_activation_max);
+        sum_1 = (int16_t)sum;
+
+        input_1 = (int16_t)(two_halfword_1 >> 16) << left_shift;
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
+        input_2 = (int16_t)(two_halfword_2 >> 16) << left_shift;
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
+        sum = input_1 + input_2;
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
+        sum = MAX(sum, out_activation_min);
+        sum = MIN(sum, out_activation_max);
+        sum_2 = (int16_t)sum;
+
+        arm_nn_write_q15x2_ia(&output, PACK_Q15x2_32x1(sum_1, sum_2));
+
+        loop_count--;
+    }
+    loop_count = block_size & 0x1;
+
+    while (loop_count > 0)
+    {
+        /* C = A + B */
+        input_1 = *input_1_vect++ << left_shift;
+        input_2 = *input_2_vect++ << left_shift;
+
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
+
+        sum = input_1 + input_2;
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
+
+        sum = MAX(sum, out_activation_min);
+        sum = MIN(sum, out_activation_max);
+
+        *output++ = (int16_t)sum;
+
+        /* Decrement loop counter */
+        loop_count--;
+    }
+
+    return (ARM_CMSIS_NN_SUCCESS);
+}
+
+/**
+ * @} end of BasicMath group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c
index 7f51fc8..9ff0311 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,10 +21,10 @@
 /* ----------------------------------------------------------------------
  * Project:      CMSIS NN Library
  * Title:        arm_elementwise_add_s8
- * Description:  Element wise add
+ * Description:  Elementwise add
  *
- * $Date:        09. October 2020
- * $Revision:    V.2.5.2
+ * $Date:        19 April 2022
+ * $Revision:    V.3.0.0
  *
  * Target Processor:  Cortex-M CPUs
  *
@@ -32,24 +32,6 @@
 
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
-#if defined(ARM_MATH_MVEI)
-#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_helium_utils.h"
-#endif
-
-#if defined(ARM_MATH_MVEI)
-#define SAT_INPUT_VECT(__INPUT_V, __MULT, __SHIFT)                                                                     \
-    __INPUT_V = arm_doubling_high_mult_mve(__INPUT_V, __MULT);                                                         \
-    __INPUT_V = arm_divide_by_power_of_two_mve(__INPUT_V, -__SHIFT);
-#endif
-
-/**
- * @note The *_no_sat API does not mean that the input not saturated, Since
- *       __MULT is a positive integer, it is saturated. The API definition
- *       has more info about it.
- */
-#define SAT_INPUT(__INPUT, __MULT, __SHIFT)                                                                            \
-    __INPUT = arm_nn_doubling_high_mult_no_sat(__INPUT, __MULT);                                                       \
-    __INPUT = arm_nn_divide_by_power_of_two(__INPUT, -__SHIFT);
 
 /**
  *  @ingroup groupNN
@@ -61,7 +43,7 @@
  */
 
 /*
- * s8 element wise add
+ * s8 elementwise add
  *
  * Refer header file for details.
  *
@@ -69,25 +51,25 @@
 
 /* Note: __SHIFT is expected to be <=0 */
 
-arm_status arm_elementwise_add_s8(const int8_t *input_1_vect,
-                                  const int8_t *input_2_vect,
-                                  const int32_t input_1_offset,
-                                  const int32_t input_1_mult,
-                                  const int32_t input_1_shift,
-                                  const int32_t input_2_offset,
-                                  const int32_t input_2_mult,
-                                  const int32_t input_2_shift,
-                                  const int32_t left_shift,
-                                  int8_t *output,
-                                  const int32_t out_offset,
-                                  const int32_t out_mult,
-                                  const int32_t out_shift,
-                                  const int32_t out_activation_min,
-                                  const int32_t out_activation_max,
-                                  const uint32_t block_size)
+arm_cmsis_nn_status arm_elementwise_add_s8(const int8_t *input_1_vect,
+                                           const int8_t *input_2_vect,
+                                           const int32_t input_1_offset,
+                                           const int32_t input_1_mult,
+                                           const int32_t input_1_shift,
+                                           const int32_t input_2_offset,
+                                           const int32_t input_2_mult,
+                                           const int32_t input_2_shift,
+                                           const int32_t left_shift,
+                                           int8_t *output,
+                                           const int32_t out_offset,
+                                           const int32_t out_mult,
+                                           const int32_t out_shift,
+                                           const int32_t out_activation_min,
+                                           const int32_t out_activation_max,
+                                           const int32_t block_size)
 {
 #if defined(ARM_MATH_MVEI)
-    int32_t count = (int32_t)block_size;
+    int32_t count = block_size;
 
     while (count > 0)
     {
@@ -105,11 +87,11 @@ arm_status arm_elementwise_add_s8(const int8_t *input_1_vect,
         vect_1 = vshlq_r_s32(vect_1, left_shift);
         vect_2 = vshlq_r_s32(vect_2, left_shift);
 
-        SAT_INPUT_VECT(vect_1, input_1_mult, input_1_shift);
-        SAT_INPUT_VECT(vect_2, input_2_mult, input_2_shift);
+        vect_1 = arm_requantize_mve(vect_1, input_1_mult, input_1_shift);
+        vect_2 = arm_requantize_mve(vect_2, input_2_mult, input_2_shift);
 
         vect_1 = vaddq_s32(vect_1, vect_2);
-        SAT_INPUT_VECT(vect_1, out_mult, out_shift);
+        vect_1 = arm_requantize_mve(vect_1, out_mult, out_shift);
 
         vect_1 = vaddq_n_s32(vect_1, out_offset);
 
@@ -124,7 +106,7 @@ arm_status arm_elementwise_add_s8(const int8_t *input_1_vect,
         count -= 4;
     }
 #else
-    uint32_t loop_count;
+    int32_t loop_count;
     int32_t input_1;
     int32_t input_2;
     int32_t sum;
@@ -141,7 +123,7 @@ arm_status arm_elementwise_add_s8(const int8_t *input_1_vect,
 
     loop_count = block_size >> 2;
 
-    while (loop_count > 0U)
+    while (loop_count > 0)
     {
         /* 4 outputs are calculated in one loop. The order of calculation is follows the order of output sign extension
            intrinsic */
@@ -155,62 +137,63 @@ arm_status arm_elementwise_add_s8(const int8_t *input_1_vect,
         b_2 = __SADD16(b_2, offset_2_packed);
 
         /* Sum 1 */
-        input_1 = (int16_t)(b_1 & 0x0FFFFL) << left_shift;
-        SAT_INPUT(input_1, input_1_mult, input_1_shift);
+        input_1 = (b_1 & 0x0FFFF) << left_shift;
 
-        input_2 = (int16_t)(b_2 & 0x0FFFFL) << left_shift;
-        SAT_INPUT(input_2, input_2_mult, input_2_shift);
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
+
+        input_2 = (b_2 & 0x0FFFF) << left_shift;
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
 
         sum = input_1 + input_2;
-        SAT_INPUT(sum, out_mult, out_shift);
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
         sum += out_offset;
         sum = MAX(sum, out_activation_min);
         sum = MIN(sum, out_activation_max);
         r1 = (q7_t)sum;
 
         /* Sum 3 */
-        input_1 = (int16_t)((b_1 >> 16) & 0x0FFFFL) << left_shift;
-        SAT_INPUT(input_1, input_1_mult, input_1_shift);
+        input_1 = ((b_1 >> 16) & 0x0FFFF) << left_shift;
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
 
-        input_2 = (int16_t)((b_2 >> 16) & 0x0FFFFL) << left_shift;
-        SAT_INPUT(input_2, input_2_mult, input_2_shift);
+        input_2 = ((b_2 >> 16) & 0x0FFFF) << left_shift;
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
 
         sum = input_1 + input_2;
-        SAT_INPUT(sum, out_mult, out_shift);
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
         sum += out_offset;
         sum = MAX(sum, out_activation_min);
         sum = MIN(sum, out_activation_max);
         r3 = (q7_t)sum;
 
         /* Sum 2 */
-        input_1 = (int16_t)(a_1 & 0x0FFFFL) << left_shift;
-        SAT_INPUT(input_1, input_1_mult, input_1_shift);
+        input_1 = (a_1 & 0x0FFFF) << left_shift;
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
 
-        input_2 = (int16_t)(a_2 & 0x0FFFFL) << left_shift;
-        SAT_INPUT(input_2, input_2_mult, input_2_shift);
+        input_2 = (a_2 & 0x0FFFF) << left_shift;
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
 
         sum = input_1 + input_2;
-        SAT_INPUT(sum, out_mult, out_shift);
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
         sum += out_offset;
         sum = MAX(sum, out_activation_min);
         sum = MIN(sum, out_activation_max);
         r2 = (q7_t)sum;
 
         /* Sum 4 */
-        input_1 = (int16_t)((a_1 >> 16) & 0x0FFFFL) << left_shift;
-        SAT_INPUT(input_1, input_1_mult, input_1_shift);
+        input_1 = ((a_1 >> 16) & 0x0FFFF) << left_shift;
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
 
-        input_2 = (int16_t)((a_2 >> 16) & 0x0FFFFL) << left_shift;
-        SAT_INPUT(input_2, input_2_mult, input_2_shift);
+        input_2 = ((a_2 >> 16) & 0x0FFFF) << left_shift;
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
 
         sum = input_1 + input_2;
-        SAT_INPUT(sum, out_mult, out_shift);
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
         sum += out_offset;
         sum = MAX(sum, out_activation_min);
         sum = MIN(sum, out_activation_max);
         r4 = (q7_t)sum;
 
-        write_q7x4_ia(&output, __PACKq7(r1, r2, r3, r4));
+        arm_nn_write_q7x4_ia(&output, PACK_Q7x4_32x1(r1, r2, r3, r4));
 
         loop_count--;
     }
@@ -220,21 +203,18 @@ arm_status arm_elementwise_add_s8(const int8_t *input_1_vect,
     loop_count = block_size;
 #endif
 
-    while (loop_count > 0U)
+    while (loop_count > 0)
     {
         /* C = A + B */
 
         input_1 = (*input_1_vect++ + input_1_offset) << left_shift;
         input_2 = (*input_2_vect++ + input_2_offset) << left_shift;
 
-        input_1 = arm_nn_doubling_high_mult(input_1, input_1_mult);
-        input_1 = arm_nn_divide_by_power_of_two(input_1, -input_1_shift);
-
-        input_2 = arm_nn_doubling_high_mult(input_2, input_2_mult);
-        input_2 = arm_nn_divide_by_power_of_two(input_2, -input_2_shift);
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
 
         sum = input_1 + input_2;
-        SAT_INPUT(sum, out_mult, out_shift);
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
         sum += out_offset;
 
         sum = MAX(sum, out_activation_min);
@@ -248,7 +228,7 @@ arm_status arm_elementwise_add_s8(const int8_t *input_1_vect,
 
 #endif /* ARM_MATH_MVEI */
 
-    return (ARM_MATH_SUCCESS);
+    return (ARM_CMSIS_NN_SUCCESS);
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c
new file mode 100644
index 0000000..5d53550
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c
@@ -0,0 +1,126 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_elementwise_mul_s16
+ * Description:  Element wise multiplication
+ *
+ * $Date:        10 May 2022
+ * $Revision:    V.2.1.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup BasicMath
+ * @{
+ */
+
+/**
+ * @brief s16 element wise multiplication of two vectors
+ *
+ * @note   Refer header file for details.
+ *
+ */
+arm_cmsis_nn_status arm_elementwise_mul_s16(const int16_t *input_1_vect,
+                                            const int16_t *input_2_vect,
+                                            const int32_t input_1_offset,
+                                            const int32_t input_2_offset,
+                                            int16_t *output,
+                                            const int32_t out_offset,
+                                            const int32_t out_mult,
+                                            const int32_t out_shift,
+                                            const int32_t out_activation_min,
+                                            const int32_t out_activation_max,
+                                            const int32_t block_size)
+{
+    (void)input_1_offset;
+    (void)input_2_offset;
+    (void)out_offset;
+    int32_t input_1;
+    int32_t input_2;
+    int32_t mul_res;
+    int32_t two_halfword_1, two_halfword_2;
+    int16_t mul_1, mul_2;
+    int32_t loop_count = block_size / 2;
+
+    while (loop_count > 0)
+    {
+        two_halfword_1 = arm_nn_read_q15x2_ia(&input_1_vect);
+        two_halfword_2 = arm_nn_read_q15x2_ia(&input_2_vect);
+
+        input_1 = (int16_t)(two_halfword_1 & 0xFFFF);
+        input_2 = (int16_t)(two_halfword_2 & 0xFFFF);
+        mul_res = input_1 * input_2;
+        mul_res = arm_nn_requantize(mul_res, out_mult, out_shift);
+        mul_res = MAX(mul_res, out_activation_min);
+        mul_res = MIN(mul_res, out_activation_max);
+        mul_1 = (int16_t)mul_res;
+
+        input_1 = (int16_t)(two_halfword_1 >> 16);
+        input_2 = (int16_t)(two_halfword_2 >> 16);
+        mul_res = input_1 * input_2;
+        mul_res = arm_nn_requantize(mul_res, out_mult, out_shift);
+        mul_res = MAX(mul_res, out_activation_min);
+        mul_res = MIN(mul_res, out_activation_max);
+        mul_2 = (int16_t)mul_res;
+
+        arm_nn_write_q15x2_ia(&output, PACK_Q15x2_32x1(mul_1, mul_2));
+
+        loop_count--;
+    }
+    loop_count = block_size & 0x1;
+
+    while (loop_count > 0)
+    {
+        /* C = A * B */
+
+        input_1 = *input_1_vect++;
+        input_2 = *input_2_vect++;
+
+        mul_res = input_1 * input_2;
+        mul_res = arm_nn_requantize(mul_res, out_mult, out_shift);
+
+        mul_res = MAX(mul_res, out_activation_min);
+        mul_res = MIN(mul_res, out_activation_max);
+
+        *output++ = (int16_t)mul_res;
+
+        /* Decrement loop counter */
+        loop_count--;
+    }
+
+    return ARM_CMSIS_NN_SUCCESS;
+}
+
+/**
+ * @} end of BasicMath group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c
index f38d024..663112a 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_elementwise_mul_s8
  * Description:  Element wise multiplication
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.5
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,24 +42,24 @@
  * @{
  */
 
-/**
- * @brief s8 element wise multiplication of two vectors
+/*
+ * s8 element wise multiplication of two vectors
  *
- * @note   Refer header file for details.
+ * Refer header file for details.
  *
  */
 
-arm_status arm_elementwise_mul_s8(const int8_t *input_1_vect,
-                                  const int8_t *input_2_vect,
-                                  const int32_t input_1_offset,
-                                  const int32_t input_2_offset,
-                                  int8_t *output,
-                                  const int32_t out_offset,
-                                  const int32_t out_mult,
-                                  const int32_t out_shift,
-                                  const int32_t out_activation_min,
-                                  const int32_t out_activation_max,
-                                  const uint32_t block_size)
+arm_cmsis_nn_status arm_elementwise_mul_s8(const int8_t *input_1_vect,
+                                           const int8_t *input_2_vect,
+                                           const int32_t input_1_offset,
+                                           const int32_t input_2_offset,
+                                           int8_t *output,
+                                           const int32_t out_offset,
+                                           const int32_t out_mult,
+                                           const int32_t out_shift,
+                                           const int32_t out_activation_min,
+                                           const int32_t out_activation_max,
+                                           const int32_t block_size)
 {
 
     int32_t loop_count;
@@ -165,7 +165,7 @@ arm_status arm_elementwise_mul_s8(const int8_t *input_1_vect,
         mul_res = MIN(mul_res, out_activation_max);
         r4 = (q7_t)mul_res;
 
-        write_q7x4_ia(&output, __PACKq7(r1, r2, r3, r4));
+        arm_nn_write_q7x4_ia(&output, PACK_Q7x4_32x1(r1, r2, r3, r4));
 
         loop_count--;
     }
@@ -194,7 +194,7 @@ arm_status arm_elementwise_mul_s8(const int8_t *input_1_vect,
         loop_count--;
     }
 #endif
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c b/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c
index b36c1a1..442a497 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -31,6 +31,7 @@
  * -------------------------------------------------------------------- */
 
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
 
 /**
  *  @ingroup groupNN
@@ -59,7 +60,7 @@ void arm_concatenation_s8_w(const int8_t *input,
 
     output += offset_w * (input_x * input_y * input_z);
 
-    memcpy(output, input, input_copy_size);
+    arm_memcpy_q7(output, input, input_copy_size);
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c b/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c
index 0e11558..bcc0d38 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -31,6 +31,7 @@
  * -------------------------------------------------------------------- */
 
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
 
 /**
  *  @ingroup groupNN
@@ -65,7 +66,7 @@ void arm_concatenation_s8_x(const int8_t *input,
     // Copy per row
     for (i = 0; i < num_iterations; ++i)
     {
-        memcpy(output, input, input_x);
+        arm_memcpy_q7(output, input, input_x);
         input += input_x;
         output += output_x;
     }
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c b/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c
index 55dbe27..b0f7f43 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -31,6 +31,7 @@
  * -------------------------------------------------------------------- */
 
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
 
 /**
  *  @ingroup groupNN
@@ -66,7 +67,7 @@ void arm_concatenation_s8_y(const int8_t *input,
     // Copy per tile
     for (i = 0; i < num_iterations; ++i)
     {
-        memcpy(output, input, input_copy_size);
+        arm_memcpy_q7(output, input, input_copy_size);
         input += input_copy_size;
         output += output_stride;
     }
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c b/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c
index 2a82910..4ba99f5 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -31,6 +31,7 @@
  * -------------------------------------------------------------------- */
 
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
 
 /**
  *  @ingroup groupNN
@@ -65,7 +66,7 @@ void arm_concatenation_s8_z(const int8_t *input,
 
     for (i = 0; i < input_w; ++i)
     {
-        memcpy(output, input, input_copy_size);
+        arm_memcpy_q7(output, input, input_copy_size);
         input += input_copy_size;
         output += output_stride;
     }
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
index ef0c5f0..64a24d6 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_convolve_1_x_n_s8.c
  * Description:  s8 version of 1xN convolution using symmetric quantization.
  *
- * $Date:        January 26, 2021
- * $Revision:    V.2.0.3
+ * $Date:        20 June 2022
+ * $Revision:    V.3.1.0
  *
  * Target Processor:  Cortex-M cores
  *
@@ -49,23 +49,24 @@
  *
  */
 
-arm_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
-                                 const cmsis_nn_conv_params *conv_params,
-                                 const cmsis_nn_per_channel_quant_params *quant_params,
-                                 const cmsis_nn_dims *input_dims,
-                                 const q7_t *input_data,
-                                 const cmsis_nn_dims *filter_dims,
-                                 const q7_t *filter_data,
-                                 const cmsis_nn_dims *bias_dims,
-                                 const int32_t *bias_data,
-                                 const cmsis_nn_dims *output_dims,
-                                 q7_t *output_data)
+arm_cmsis_nn_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
+                                          const cmsis_nn_conv_params *conv_params,
+                                          const cmsis_nn_per_channel_quant_params *quant_params,
+                                          const cmsis_nn_dims *input_dims,
+                                          const q7_t *input_data,
+                                          const cmsis_nn_dims *filter_dims,
+                                          const q7_t *filter_data,
+                                          const cmsis_nn_dims *bias_dims,
+                                          const int32_t *bias_data,
+                                          const cmsis_nn_dims *output_dims,
+                                          q7_t *output_data)
 {
     (void)bias_dims;
-    arm_status status = ARM_MATH_SUCCESS;
-    if (output_dims->w % 4 != 0)
+    arm_cmsis_nn_status status = ARM_CMSIS_NN_SUCCESS;
+    /* The wrapper API is the ultimate reference for argument check */
+    if ((input_dims->h != 1) || (output_dims->w % 4 != 0) || conv_params->dilation.w != 1)
     {
-        status = ARM_MATH_SIZE_MISMATCH;
+        status = ARM_CMSIS_NN_ARG_ERROR;
         goto out;
     }
 
@@ -80,94 +81,55 @@ arm_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
     const uint16_t pad_x = conv_params->padding.w;
     const uint16_t stride_x = conv_params->stride.w;
 
-    const int32_t input_offset = conv_params->input_offset;
-    const int32_t out_offset = conv_params->output_offset;
-    const int32_t out_activation_min = conv_params->activation.min;
-    const int32_t out_activation_max = conv_params->activation.max;
-    int32_t *output_mult = quant_params->multiplier;
-    int32_t *output_shift = quant_params->shift;
-
-    for (int i_out_x = 0; i_out_x <= (output_x - 4); i_out_x += 4)
+    int i_batch;
+    for (i_batch = 0; i_batch < input_dims->n; i_batch++)
     {
-        int32_t input_begin_idx[4];
-        int32_t ker_begin_idx[4];
-        int32_t ker_end_idx[4];
-
-        for (int i = 0; i < 4; i++)
+        for (int i_out_x = 0; i_out_x <= (output_x - 4); i_out_x += 4)
         {
-            const int32_t est_input_x_idx = stride_x * (i_out_x + i) - pad_x;
-            input_begin_idx[i] = MAX(0, est_input_x_idx);
-            ker_begin_idx[i] = MAX(0, -est_input_x_idx);
-            ker_end_idx[i] = MIN(kernel_x, input_x - est_input_x_idx);
-        }
+            int32_t input_begin_idx[4];
+            int32_t ker_begin_idx[4];
+            int32_t ker_end_idx[4];
 
-        for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++)
-        {
-            int32x4_t s_offset;
-            int32_t acc[4];
-            if ((ker_begin_idx[0] != 0) || (ker_end_idx[3] != kernel_x))
+            for (int i = 0; i < 4; i++)
             {
-                int32_t sum_row[4];
-
-                (void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[0] - ker_begin_idx[0]) * input_ch,
-                                                input_data + input_begin_idx[0] * input_ch,
-                                                filter_data + (input_ch * kernel_x * i_out_ch) +
-                                                    (ker_begin_idx[0] * input_ch),
-                                                &sum_row[0],
-                                                &acc[0]);
-                (void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[1] - ker_begin_idx[1]) * input_ch,
-                                                input_data + input_begin_idx[1] * input_ch,
-                                                filter_data + (input_ch * kernel_x * i_out_ch) +
-                                                    (ker_begin_idx[1] * input_ch),
-                                                &sum_row[1],
-                                                &acc[1]);
-
-                (void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[2] - ker_begin_idx[2]) * input_ch,
-                                                input_data + input_begin_idx[2] * input_ch,
-                                                filter_data + (input_ch * kernel_x * i_out_ch) +
-                                                    (ker_begin_idx[2] * input_ch),
-                                                &sum_row[2],
-                                                &acc[2]);
-
-                (void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[3] - ker_begin_idx[3]) * input_ch,
-                                                input_data + input_begin_idx[3] * input_ch,
-                                                filter_data + (input_ch * kernel_x * i_out_ch) +
-                                                    (ker_begin_idx[3] * input_ch),
-                                                &sum_row[3],
-                                                &acc[3]);
-
-                s_offset = vldrwq_s32(sum_row);
+                const int32_t est_input_x_idx = stride_x * (i_out_x + i) - pad_x;
+                input_begin_idx[i] = MAX(0, est_input_x_idx);
+                ker_begin_idx[i] = MAX(0, -est_input_x_idx);
+                ker_end_idx[i] = MIN(kernel_x, input_x - est_input_x_idx);
             }
-            else
+
+            if ((ker_begin_idx[0] != 0) || (ker_end_idx[3] != kernel_x))
             {
-                int32_t sum_row;
-                (void)arm_nn_mat_mul_core_4x_s8(kernel_x * input_ch,
-                                                stride_x * input_ch,
-                                                input_data + input_begin_idx[0] * input_ch,
-                                                filter_data + (input_ch * kernel_x * i_out_ch),
-                                                &sum_row,
-                                                acc);
-
-                s_offset = vdupq_n_s32(sum_row);
+                for (int i = 0; i < 4; i++)
+                {
+                    const int32_t actual_kernel_len = ker_end_idx[i] - ker_begin_idx[i];
+                    arm_nn_mat_mul_core_1x_s8(actual_kernel_len * input_ch,
+                                              (kernel_x - actual_kernel_len) * input_ch,
+                                              input_data + input_begin_idx[i] * input_ch,
+                                              filter_data + (ker_begin_idx[i] * input_ch),
+                                              output_ch,
+                                              conv_params,
+                                              quant_params,
+                                              bias_data,
+                                              output_data);
+                    output_data += output_ch;
+                }
             }
-            int32x4_t res = vldrwq_s32(acc);
-            s_offset = vmulq_n_s32(s_offset, input_offset);
-            res = vaddq_s32(res, s_offset);
-            if (bias_data)
+            else
             {
-                res = vaddq_n_s32(res, bias_data[i_out_ch]);
+                output_data = arm_nn_mat_mul_core_4x_s8(kernel_x * input_ch,
+                                                        stride_x * input_ch,
+                                                        input_data + input_begin_idx[0] * input_ch,
+                                                        filter_data,
+                                                        output_ch,
+                                                        conv_params,
+                                                        quant_params,
+                                                        bias_data,
+                                                        output_data);
             }
-            res = arm_requantize_mve(res, output_mult[i_out_ch], output_shift[i_out_ch]);
-            res = vaddq_n_s32(res, out_offset);
-
-            res = vmaxq_s32(res, vdupq_n_s32(out_activation_min));
-            res = vminq_s32(res, vdupq_n_s32(out_activation_max));
-
-            const uint32x4_t scatter_offset = {0, output_ch, output_ch * 2, output_ch * 3};
-            vstrbq_scatter_offset_s32(output_data, scatter_offset, res);
-            output_data++;
         }
-        output_data += (3 * output_ch);
+        /* Advance to the next batch */
+        input_data += (input_x * input_ch);
     }
 
 #else
@@ -191,8 +153,8 @@ arm_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
 
 int32_t arm_convolve_1_x_n_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
 {
-#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
-    return (2 * input_dims->c * filter_dims->w * filter_dims->h) * sizeof(int16_t);
+#if !defined(ARM_MATH_MVEI)
+    return arm_convolve_s8_get_buffer_size(input_dims, filter_dims);
 #else
     (void)input_dims;
     (void)filter_dims;
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c
index e2a360b..d0abf21 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_convolve_1x1_HWC_q7_fast_nonsquare.c
  * Description:  Fast Q7 version of 1x1 convolution (non-square shape)
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,67 +42,35 @@
  * @{
  */
 
-/**
- * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape)
- * @param[in]       Im_in        pointer to input tensor
- * @param[in]       dim_im_in_x  input tensor dimention x
- * @param[in]       dim_im_in_y  input tensor dimention y
- * @param[in]       ch_im_in     number of input tensor channels
- * @param[in]       wt           pointer to kernel weights
- * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel_x filter kernel size x
- * @param[in]       dim_kernel_y filter kernel size y
- * @param[in]       padding_x    padding size x
- * @param[in]       padding_y    padding size y
- * @param[in]       stride_x     convolution stride x
- * @param[in]       stride_y     convolution stride y
- * @param[in]       bias         pointer to bias
- * @param[in]       bias_shift   amount of left-shift for bias
- * @param[in]       out_shift    amount of right-shift for output
- * @param[in,out]   Im_out       pointer to output tensor
- * @param[in]       dim_im_out_x output tensor dimension x
- * @param[in]       dim_im_out_y output tensor dimension y
- * @param[in,out]   bufferA      pointer to buffer space for input
- * @param[in,out]   bufferB      pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
- *
- * This function is optimized for convolution with 1x1 kernel size (i.e., dim_kernel_x=1
- * and dim_kernel_y=1). It can be used for the second half of MobileNets [1] after depthwise
- * separable convolution.
- *
- * This function is the version with full list of optimization tricks, but with
- * some contraints:
- *   ch_im_in is multiple of 4
- *   ch_im_out is multiple of 2
+/*
+ * Fast Q7 version of 1x1 convolution (non-sqaure shape)
+ * Refer function header for details
  *
- * [1] MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications
- * https://arxiv.org/abs/1704.04861
  */
 
-arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in,
-                                                  const uint16_t dim_im_in_x,
-                                                  const uint16_t dim_im_in_y,
-                                                  const uint16_t ch_im_in,
-                                                  const q7_t *wt,
-                                                  const uint16_t ch_im_out,
-                                                  const uint16_t dim_kernel_x,
-                                                  const uint16_t dim_kernel_y,
-                                                  const uint16_t padding_x,
-                                                  const uint16_t padding_y,
-                                                  const uint16_t stride_x,
-                                                  const uint16_t stride_y,
-                                                  const q7_t *bias,
-                                                  const uint16_t bias_shift,
-                                                  const uint16_t out_shift,
-                                                  q7_t *Im_out,
-                                                  const uint16_t dim_im_out_x,
-                                                  const uint16_t dim_im_out_y,
-                                                  q15_t *bufferA,
-                                                  q7_t *bufferB)
+arm_cmsis_nn_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in,
+                                                           const uint16_t dim_im_in_x,
+                                                           const uint16_t dim_im_in_y,
+                                                           const uint16_t ch_im_in,
+                                                           const q7_t *wt,
+                                                           const uint16_t ch_im_out,
+                                                           const uint16_t dim_kernel_x,
+                                                           const uint16_t dim_kernel_y,
+                                                           const uint16_t padding_x,
+                                                           const uint16_t padding_y,
+                                                           const uint16_t stride_x,
+                                                           const uint16_t stride_y,
+                                                           const q7_t *bias,
+                                                           const uint16_t bias_shift,
+                                                           const uint16_t out_shift,
+                                                           q7_t *Im_out,
+                                                           const uint16_t dim_im_out_x,
+                                                           const uint16_t dim_im_out_y,
+                                                           q15_t *bufferA,
+                                                           q7_t *bufferB)
 {
     (void)bufferB;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
     (void)dim_im_in_y;
     int16_t i_out_y, i_out_x;
@@ -120,7 +88,7 @@ arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in,
         padding_y != 0 || stride_x != 1 || stride_y != 1)
     {
         /* check if the input dimension meets the constraints */
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
@@ -193,7 +161,7 @@ arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in,
         padding_y != 0 || stride_x != 1 || stride_y != 1)
     {
         /* check if the input dimension meets the constraints */
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     for (i = 0; i < ch_im_out; i++)
@@ -229,7 +197,7 @@ arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in,
 #endif /* ARM_MATH_DSP */
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
index 06c6f0a..98eb524 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,18 +23,16 @@
  * Title:        arm_convolve_1x1_s8_fast.c
  * Description:  Fast q7 version of 1x1 convolution (non-square shape)
  *
- * $Date:        09. October 2020
- * $Revision:    V.2.0.3
+ * $Date:        20 june 2022
+ * $Revision:    V.3.0.1
  *
- * Target Processor:  Cortex-M cores
+ * Target Processor:  Cortex-M Processors
  *
  * -------------------------------------------------------------------- */
 
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
-
-#define DIM_KER_X (1U)
-#define DIM_KER_Y (1U)
+#include <stdio.h>
 
 /**
  *  @ingroup groupNN
@@ -52,22 +50,22 @@
  *
  */
 
-arm_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx,
-                                    const cmsis_nn_conv_params *conv_params,
-                                    const cmsis_nn_per_channel_quant_params *quant_params,
-                                    const cmsis_nn_dims *input_dims,
-                                    const q7_t *input_data,
-                                    const cmsis_nn_dims *filter_dims,
-                                    const q7_t *filter_data,
-                                    const cmsis_nn_dims *bias_dims,
-                                    const int32_t *bias_data,
-                                    const cmsis_nn_dims *output_dims,
-                                    q7_t *output_data)
+arm_cmsis_nn_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx,
+                                             const cmsis_nn_conv_params *conv_params,
+                                             const cmsis_nn_per_channel_quant_params *quant_params,
+                                             const cmsis_nn_dims *input_dims,
+                                             const q7_t *input_data,
+                                             const cmsis_nn_dims *filter_dims,
+                                             const q7_t *filter_data,
+                                             const cmsis_nn_dims *bias_dims,
+                                             const int32_t *bias_data,
+                                             const cmsis_nn_dims *output_dims,
+                                             q7_t *output_data)
 {
-    if (input_dims->c % 4 != 0 || conv_params->padding.w != 0 || conv_params->padding.h != 0 ||
-        conv_params->stride.w != 1 || conv_params->stride.h != 1)
+    if (conv_params->padding.w != 0 || conv_params->padding.h != 0 || conv_params->stride.w != 1 ||
+        conv_params->stride.h != 1)
     {
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     (void)ctx;
@@ -79,70 +77,33 @@ arm_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx,
     const int32_t col_len = input_dims->w * input_dims->h * input_dims->n;
     const int32_t output_ch = output_dims->c;
     const int32_t input_ch = input_dims->c;
-    const int32_t input_offset = conv_params->input_offset;
-    const int32_t out_offset = conv_params->output_offset;
-    const int32_t out_activation_min = conv_params->activation.min;
-    const int32_t out_activation_max = conv_params->activation.max;
-    int32_t *output_mult = quant_params->multiplier;
-    int32_t *output_shift = quant_params->shift;
 
     for (int i_items = 0; i_items <= (col_len - 4); i_items += 4)
     {
-        for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++)
-        {
-            int32_t sum_row = 0;
-            int32_t temp_out[4];
-
-            (void)arm_nn_mat_mul_core_4x_s8(input_ch,
-                                            input_ch,
-                                            input_data + i_items * input_ch,
-                                            filter_data + i_out_ch * input_ch,
-                                            &sum_row,
-                                            temp_out);
-            int32x4_t res = vldrwq_s32(temp_out);
-            if (bias_data)
-            {
-                res = vaddq_n_s32(res, bias_data[i_out_ch]);
-            }
-            sum_row = sum_row * input_offset;
-            res = vaddq_n_s32(res, sum_row);
-            res = arm_requantize_mve(res, output_mult[i_out_ch], output_shift[i_out_ch]);
-            res = vaddq_n_s32(res, out_offset);
-
-            res = vmaxq_s32(res, vdupq_n_s32(out_activation_min));
-            res = vminq_s32(res, vdupq_n_s32(out_activation_max));
-
-            const uint32x4_t scatter_offset = {
-                0, (uint32_t)output_ch, (uint32_t)output_ch * 2, (uint32_t)output_ch * 3};
-            vstrbq_scatter_offset_s32(output_data, scatter_offset, res);
-            output_data++;
-        }
-        output_data += (3 * output_ch);
+        output_data = arm_nn_mat_mul_core_4x_s8(input_ch,
+                                                input_ch,
+                                                input_data + i_items * input_ch,
+                                                filter_data,
+                                                output_ch,
+                                                conv_params,
+                                                quant_params,
+                                                bias_data,
+                                                output_data);
     }
 
     /* Handle left over elements */
     for (int i_items = (col_len & ~0x3); i_items < col_len; i_items++)
     {
-        for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++)
-        {
-            int32_t sum_row = 0;
-
-            int32_t acc;
-            (void)arm_nn_mat_mul_core_1x_s8(
-                input_ch, input_data + i_items * input_ch, filter_data + i_out_ch * input_ch, &sum_row, &acc);
-            if (bias_data)
-            {
-                acc += bias_data[i_out_ch];
-            }
-            sum_row = (sum_row * input_offset);
-            acc += sum_row;
-            acc = arm_nn_requantize(acc, output_mult[i_out_ch], output_shift[i_out_ch]);
-            acc += out_offset;
-
-            acc = MAX(acc, out_activation_min);
-            acc = MIN(acc, out_activation_max);
-            *output_data++ = acc;
-        }
+        arm_nn_mat_mul_core_1x_s8(input_ch,
+                                  0,
+                                  input_data + i_items * input_ch,
+                                  filter_data,
+                                  output_ch,
+                                  conv_params,
+                                  quant_params,
+                                  bias_data,
+                                  output_data);
+        output_data += output_ch;
     }
 
 #else
@@ -169,7 +130,7 @@ arm_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx,
 #endif
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 int32_t arm_convolve_1x1_s8_fast_get_buffer_size(const cmsis_nn_dims *input_dims)
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c
index 11fd1d3..fe642d8 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_convolve_HWC_q15_basic.c
  * Description:  Q15 version of convolution
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,55 +42,29 @@
  * @{
  */
 
-/**
- * @brief Basic Q15 convolution function
- * @param[in]       Im_in       pointer to input tensor
- * @param[in]       dim_im_in   input tensor dimention
- * @param[in]       ch_im_in    number of input tensor channels
- * @param[in]       wt          pointer to kernel weights
- * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel  filter kernel size
- * @param[in]       padding     padding sizes
- * @param[in]       stride      convolution stride
- * @param[in]       bias        pointer to bias
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in,out]   Im_out      pointer to output tensor
- * @param[in]       dim_im_out  output tensor dimension
- * @param[in,out]   bufferA     pointer to buffer space for input
- * @param[in,out]   bufferB     pointer to buffer space for output
- * @return     The function returns <code>ARM_MATH_SUCCESS</code>
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * bufferA size: ch_im_in*dim_kernel*dim_kernel
- *
- * bufferB size: 0
- *
- * This basic version is designed to work for any input tensor and weight
- * dimension.
+/*
+ * Basic Q15 convolution function
+ * Refer function header for details
  */
 
-arm_status arm_convolve_HWC_q15_basic(const q15_t *Im_in,
-                                      const uint16_t dim_im_in,
-                                      const uint16_t ch_im_in,
-                                      const q15_t *wt,
-                                      const uint16_t ch_im_out,
-                                      const uint16_t dim_kernel,
-                                      const uint16_t padding,
-                                      const uint16_t stride,
-                                      const q15_t *bias,
-                                      const uint16_t bias_shift,
-                                      const uint16_t out_shift,
-                                      q15_t *Im_out,
-                                      const uint16_t dim_im_out,
-                                      q15_t *bufferA,
-                                      q7_t *bufferB)
+arm_cmsis_nn_status arm_convolve_HWC_q15_basic(const q15_t *Im_in,
+                                               const uint16_t dim_im_in,
+                                               const uint16_t ch_im_in,
+                                               const q15_t *wt,
+                                               const uint16_t ch_im_out,
+                                               const uint16_t dim_kernel,
+                                               const uint16_t padding,
+                                               const uint16_t stride,
+                                               const q15_t *bias,
+                                               const uint16_t bias_shift,
+                                               const uint16_t out_shift,
+                                               q15_t *Im_out,
+                                               const uint16_t dim_im_out,
+                                               q15_t *bufferA,
+                                               q7_t *bufferB)
 {
     (void)bufferB;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
@@ -203,7 +177,7 @@ arm_status arm_convolve_HWC_q15_basic(const q15_t *Im_in,
 #endif /* ARM_MATH_DSP */
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c
index 8dd880b..a0bbd22 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_convolve_HWC_q15_fast.c
  * Description:  Fast Q15 version of convolution
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,70 +42,39 @@
  * @{
  */
 
-/**
- * @brief Fast Q15 convolution function
- * @param[in]       Im_in       pointer to input tensor
- * @param[in]       dim_im_in   input tensor dimention
- * @param[in]       ch_im_in    number of input tensor channels
- * @param[in]       wt          pointer to kernel weights
- * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel  filter kernel size
- * @param[in]       padding     padding sizes
- * @param[in]       stride      convolution stride
- * @param[in]       bias        pointer to bias
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in,out]   Im_out      pointer to output tensor
- * @param[in]       dim_im_out  output tensor dimension
- * @param[in,out]   bufferA     pointer to buffer space for input
- * @param[in,out]   bufferB     pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
- *
- * bufferB size: 0
- *
- * <b>Input dimension constraints:</b>
- *
- * ch_im_in is multiple of 2
- *
- * ch_im_out is multipe of 2
- *
+/*
+ * Fast Q15 convolution function
+ * Refer function header for details
  */
 
-arm_status arm_convolve_HWC_q15_fast(const q15_t *Im_in,
-                                     const uint16_t dim_im_in,
-                                     const uint16_t ch_im_in,
-                                     const q15_t *wt,
-                                     const uint16_t ch_im_out,
-                                     const uint16_t dim_kernel,
-                                     const uint16_t padding,
-                                     const uint16_t stride,
-                                     const q15_t *bias,
-                                     const uint16_t bias_shift,
-                                     const uint16_t out_shift,
-                                     q15_t *Im_out,
-                                     const uint16_t dim_im_out,
-                                     q15_t *bufferA,
-                                     q7_t *bufferB)
+arm_cmsis_nn_status arm_convolve_HWC_q15_fast(const q15_t *Im_in,
+                                              const uint16_t dim_im_in,
+                                              const uint16_t ch_im_in,
+                                              const q15_t *wt,
+                                              const uint16_t ch_im_out,
+                                              const uint16_t dim_kernel,
+                                              const uint16_t padding,
+                                              const uint16_t stride,
+                                              const q15_t *bias,
+                                              const uint16_t bias_shift,
+                                              const uint16_t out_shift,
+                                              q15_t *Im_out,
+                                              const uint16_t dim_im_out,
+                                              q15_t *bufferA,
+                                              q7_t *bufferB)
 {
     (void)bufferB;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
 
     q15_t *pBuffer = bufferA;
     q15_t *im_buffer = bufferA;
     q15_t *pOut = Im_out;
 
-    if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0)
+    if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0 || dim_im_out & 0x1)
     {
         /* check if the input dimension meets the constraints */
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     /* Run the following code for Cortex-M4 and Cortex-M7 */
@@ -217,7 +186,7 @@ arm_status arm_convolve_HWC_q15_fast(const q15_t *Im_in,
     if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0)
     {
         /* check if the input dimension meets the constraints */
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     for (i = 0; i < ch_im_out; i++)
@@ -251,7 +220,7 @@ arm_status arm_convolve_HWC_q15_fast(const q15_t *Im_in,
 #endif /* ARM_MATH_DSP */
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c
index e24dd1c..7d62293 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_convolve_HWC_q15_fast.c
  * Description:  Fast Q15 version of convolution
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,70 +42,34 @@
  * @{
  */
 
-/**
- * @brief Fast Q15 convolution function (non-sqaure shape)
- * @param[in]       Im_in        pointer to input tensor
- * @param[in]       dim_im_in_x  input tensor dimention x
- * @param[in]       dim_im_in_y  input tensor dimention y
- * @param[in]       ch_im_in     number of input tensor channels
- * @param[in]       wt           pointer to kernel weights
- * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel_x filter kernel size x
- * @param[in]       dim_kernel_y filter kernel size y
- * @param[in]       padding_x    padding size x
- * @param[in]       padding_y    padding size y
- * @param[in]       stride_x     convolution stride x
- * @param[in]       stride_y     convolution stride y
- * @param[in]       bias         pointer to bias
- * @param[in]       bias_shift   amount of left-shift for bias
- * @param[in]       out_shift    amount of right-shift for output
- * @param[in,out]   Im_out       pointer to output tensor
- * @param[in]       dim_im_out_x output tensor dimension x
- * @param[in]       dim_im_out_y output tensor dimension y
- * @param[in,out]   bufferA      pointer to buffer space for input
- * @param[in,out]   bufferB      pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
- *
- * bufferB size: 0
- *
- * <b>Input dimension constraints:</b>
- *
- * ch_im_in is multiple of 2
- *
- * ch_im_out is multipe of 2
- *
+/*
+ * Fast Q15 convolution function (non-sqaure shape)
+ * Refer function header for details
  */
 
-arm_status arm_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in,
-                                               const uint16_t dim_im_in_x,
-                                               const uint16_t dim_im_in_y,
-                                               const uint16_t ch_im_in,
-                                               const q15_t *wt,
-                                               const uint16_t ch_im_out,
-                                               const uint16_t dim_kernel_x,
-                                               const uint16_t dim_kernel_y,
-                                               const uint16_t padding_x,
-                                               const uint16_t padding_y,
-                                               const uint16_t stride_x,
-                                               const uint16_t stride_y,
-                                               const q15_t *bias,
-                                               const uint16_t bias_shift,
-                                               const uint16_t out_shift,
-                                               q15_t *Im_out,
-                                               const uint16_t dim_im_out_x,
-                                               const uint16_t dim_im_out_y,
-                                               q15_t *bufferA,
-                                               q7_t *bufferB)
+arm_cmsis_nn_status arm_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in,
+                                                        const uint16_t dim_im_in_x,
+                                                        const uint16_t dim_im_in_y,
+                                                        const uint16_t ch_im_in,
+                                                        const q15_t *wt,
+                                                        const uint16_t ch_im_out,
+                                                        const uint16_t dim_kernel_x,
+                                                        const uint16_t dim_kernel_y,
+                                                        const uint16_t padding_x,
+                                                        const uint16_t padding_y,
+                                                        const uint16_t stride_x,
+                                                        const uint16_t stride_y,
+                                                        const q15_t *bias,
+                                                        const uint16_t bias_shift,
+                                                        const uint16_t out_shift,
+                                                        q15_t *Im_out,
+                                                        const uint16_t dim_im_out_x,
+                                                        const uint16_t dim_im_out_y,
+                                                        q15_t *bufferA,
+                                                        q7_t *bufferB)
 {
     (void)bufferB;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
 
     q15_t *pBuffer = bufferA;
@@ -115,7 +79,7 @@ arm_status arm_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in,
     if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0)
     {
         /* check if the input dimension meets the constraints */
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     /* Run the following code for Cortex-M4 and Cortex-M7 */
@@ -229,7 +193,7 @@ arm_status arm_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in,
     if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0)
     {
         /* check if the input dimension meets the constraints */
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     for (i = 0; i < ch_im_out; i++)
@@ -264,7 +228,7 @@ arm_status arm_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in,
 #endif /* ARM_MATH_DSP */
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c
index 01ef762..ed388a5 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_convolve_HWC_q7_RGB.c
  * Description:  Q7 version of convolution for RGB image
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,61 +42,29 @@
  * @{
  */
 
-/**
- * @brief Q7 convolution function for RGB image
- * @param[in]       Im_in       pointer to input tensor
- * @param[in]       dim_im_in   input tensor dimention
- * @param[in]       ch_im_in    number of input tensor channels
- * @param[in]       wt          pointer to kernel weights
- * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel  filter kernel size
- * @param[in]       padding     padding sizes
- * @param[in]       stride      convolution stride
- * @param[in]       bias        pointer to bias
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in,out]   Im_out      pointer to output tensor
- * @param[in]       dim_im_out  output tensor dimension
- * @param[in,out]   bufferA     pointer to buffer space for input
- * @param[in,out]   bufferB     pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
- *
- * bufferB size: 0
- *
- * <b>Input dimension constraints:</b>
- *
- * ch_im_in equals 3
- *
- * This kernel is written exclusively for convolution with ch_im_in
- * equals 3. This applies on the first layer of CNNs which has input
- * image with RGB format.
+/*
+ * Q7 convolution function for RGB image
+ * Refer function header for details
  */
 
-arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in,
-                                   const uint16_t dim_im_in,
-                                   const uint16_t ch_im_in,
-                                   const q7_t *wt,
-                                   const uint16_t ch_im_out,
-                                   const uint16_t dim_kernel,
-                                   const uint16_t padding,
-                                   const uint16_t stride,
-                                   const q7_t *bias,
-                                   const uint16_t bias_shift,
-                                   const uint16_t out_shift,
-                                   q7_t *Im_out,
-                                   const uint16_t dim_im_out,
-                                   q15_t *bufferA,
-                                   q7_t *bufferB)
+arm_cmsis_nn_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in,
+                                            const uint16_t dim_im_in,
+                                            const uint16_t ch_im_in,
+                                            const q7_t *wt,
+                                            const uint16_t ch_im_out,
+                                            const uint16_t dim_kernel,
+                                            const uint16_t padding,
+                                            const uint16_t stride,
+                                            const q7_t *bias,
+                                            const uint16_t bias_shift,
+                                            const uint16_t out_shift,
+                                            q7_t *Im_out,
+                                            const uint16_t dim_im_out,
+                                            q15_t *bufferA,
+                                            q7_t *bufferB)
 {
     (void)bufferB;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
     int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
 
@@ -110,7 +78,7 @@ arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in,
     // check if number of input channels is 3
     if (ch_im_in != 3)
     {
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
     // This part implements the im2col function
     for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
@@ -124,8 +92,7 @@ arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in,
                     if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
                     {
                         /* Equivalent to arm_fill_q15(0, pBuffer, ch_im_in) with assumption: ch_im_in = 3 */
-                        *__SIMD32(pBuffer) = 0x0;
-                        *(pBuffer + 2) = 0;
+                        arm_memset_q7((q7_t *)pBuffer, (q7_t)0, 3 * sizeof(q15_t));
                         pBuffer += 3;
                     }
                     else
@@ -157,7 +124,8 @@ arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in,
                          *  version 2, no weight shuffling required
                          */
                         *pBuffer++ = top.half_words[0];
-                        *__SIMD32(pBuffer) = __PKHBT(bottom.word, top.word, 0);
+                        int32_t packed_word = __PKHBT(bottom.word, top.word, 0);
+                        arm_memcpy_q7((q7_t *)pBuffer, (q7_t *)&packed_word, 4);
 #else
                         /*
                          *  big-endian,    | 1st  | 2nd  | 3rd  | omit |
@@ -171,7 +139,8 @@ arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in,
                          *  version 2, no weight shuffling required
                          */
                         *pBuffer++ = bottom.half_words[0];
-                        *__SIMD32(pBuffer) = __PKHTB(top.word, bottom.word, 0);
+                        int32_t packed_word = __PKHTB(top.word, bottom.word, 0);
+                        arm_memcpy_q7((q7_t *)pBuffer, (q7_t *)&packed_word, 4);
 #endif
                         pBuffer += 2;
                     }
@@ -238,7 +207,7 @@ arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in,
     // check if number of input channels is 3
     if (ch_im_in != 3)
     {
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     for (i = 0; i < ch_im_out; i++)
@@ -273,7 +242,7 @@ arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in,
 #endif /* ARM_MATH_DSP */
 
     /* Return to application */
-    return (ARM_MATH_SUCCESS);
+    return (ARM_CMSIS_NN_SUCCESS);
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c
index ba9ebd7..a74a1a7 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_convolve_HWC_q7_basic.c
  * Description:	 Q7 version of convolution
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,55 +42,29 @@
  * @{
  */
 
-/**
- * @brief Basic Q7 convolution function
- * @param[in]       Im_in       pointer to input tensor
- * @param[in]       dim_im_in   input tensor dimention
- * @param[in]       ch_im_in    number of input tensor channels
- * @param[in]       wt          pointer to kernel weights
- * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel  filter kernel size
- * @param[in]       padding     padding sizes
- * @param[in]       stride      convolution stride
- * @param[in]       bias        pointer to bias
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in,out]   Im_out      pointer to output tensor
- * @param[in]       dim_im_out  output tensor dimension
- * @param[in,out]   bufferA     pointer to buffer space for input
- * @param[in,out]   bufferB     pointer to buffer space for output
- * @return     The function returns <code>ARM_MATH_SUCCESS</code>
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
- *
- * bufferB size: 0
- *
- * This basic version is designed to work for any input tensor and weight
- * dimension.
+/*
+ * Basic Q7 convolution function
+ * Refer function header for details
  */
 
-arm_status arm_convolve_HWC_q7_basic(const q7_t *Im_in,
-                                     const uint16_t dim_im_in,
-                                     const uint16_t ch_im_in,
-                                     const q7_t *wt,
-                                     const uint16_t ch_im_out,
-                                     const uint16_t dim_kernel,
-                                     const uint16_t padding,
-                                     const uint16_t stride,
-                                     const q7_t *bias,
-                                     const uint16_t bias_shift,
-                                     const uint16_t out_shift,
-                                     q7_t *Im_out,
-                                     const uint16_t dim_im_out,
-                                     q15_t *bufferA,
-                                     q7_t *bufferB)
+arm_cmsis_nn_status arm_convolve_HWC_q7_basic(const q7_t *Im_in,
+                                              const uint16_t dim_im_in,
+                                              const uint16_t ch_im_in,
+                                              const q7_t *wt,
+                                              const uint16_t ch_im_out,
+                                              const uint16_t dim_kernel,
+                                              const uint16_t padding,
+                                              const uint16_t stride,
+                                              const q7_t *bias,
+                                              const uint16_t bias_shift,
+                                              const uint16_t out_shift,
+                                              q7_t *Im_out,
+                                              const uint16_t dim_im_out,
+                                              q15_t *bufferA,
+                                              q7_t *bufferB)
 {
     (void)bufferB;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
@@ -184,7 +158,7 @@ arm_status arm_convolve_HWC_q7_basic(const q7_t *Im_in,
     }
 #else
     /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
-
+    (void)bufferA;
     int i, j, k, l, m, n;
     int conv_out;
     int in_row, in_col;
@@ -221,7 +195,7 @@ arm_status arm_convolve_HWC_q7_basic(const q7_t *Im_in,
 #endif /* ARM_MATH_DSP */
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c
index 0c1cf7c..9079695 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_convolve_HWC_q7_basic.c
  * Description:	 Q7 version of convolution
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,54 +42,35 @@
  * @{
  */
 
-/**
- * @brief Basic Q7 convolution function (non-sqaure shape)
- * @param[in]       Im_in        pointer to input tensor
- * @param[in]       dim_im_in_x  input tensor dimention x
- * @param[in]       dim_im_in_y  input tensor dimention y
- * @param[in]       ch_im_in     number of input tensor channels
- * @param[in]       wt           pointer to kernel weights
- * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel_x filter kernel size x
- * @param[in]       dim_kernel_y filter kernel size y
- * @param[in]       padding_x    padding size x
- * @param[in]       padding_y    padding size y
- * @param[in]       stride_x     convolution stride x
- * @param[in]       stride_y     convolution stride y
- * @param[in]       bias         pointer to bias
- * @param[in]       bias_shift   amount of left-shift for bias
- * @param[in]       out_shift    amount of right-shift for output
- * @param[in,out]   Im_out       pointer to output tensor
- * @param[in]       dim_im_out_x output tensor dimension x
- * @param[in]       dim_im_out_y output tensor dimension y
- * @param[in,out]   bufferA      pointer to buffer space for input
- * @param[in,out]   bufferB      pointer to buffer space for output
- * @return     The function returns <code>ARM_MATH_SUCCESS</code>
+/*
+ * Basic Q7 convolution function (non-sqaure shape)
+ * Refer function header for details
+ *
  */
 
-arm_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in,
-                                               const uint16_t dim_im_in_x,
-                                               const uint16_t dim_im_in_y,
-                                               const uint16_t ch_im_in,
-                                               const q7_t *wt,
-                                               const uint16_t ch_im_out,
-                                               const uint16_t dim_kernel_x,
-                                               const uint16_t dim_kernel_y,
-                                               const uint16_t padding_x,
-                                               const uint16_t padding_y,
-                                               const uint16_t stride_x,
-                                               const uint16_t stride_y,
-                                               const q7_t *bias,
-                                               const uint16_t bias_shift,
-                                               const uint16_t out_shift,
-                                               q7_t *Im_out,
-                                               const uint16_t dim_im_out_x,
-                                               const uint16_t dim_im_out_y,
-                                               q15_t *bufferA,
-                                               q7_t *bufferB)
+arm_cmsis_nn_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in,
+                                                        const uint16_t dim_im_in_x,
+                                                        const uint16_t dim_im_in_y,
+                                                        const uint16_t ch_im_in,
+                                                        const q7_t *wt,
+                                                        const uint16_t ch_im_out,
+                                                        const uint16_t dim_kernel_x,
+                                                        const uint16_t dim_kernel_y,
+                                                        const uint16_t padding_x,
+                                                        const uint16_t padding_y,
+                                                        const uint16_t stride_x,
+                                                        const uint16_t stride_y,
+                                                        const q7_t *bias,
+                                                        const uint16_t bias_shift,
+                                                        const uint16_t out_shift,
+                                                        q7_t *Im_out,
+                                                        const uint16_t dim_im_out_x,
+                                                        const uint16_t dim_im_out_y,
+                                                        q15_t *bufferA,
+                                                        q7_t *bufferB)
 {
     (void)bufferB;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
@@ -223,7 +204,7 @@ arm_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in,
 #endif /* ARM_MATH_DSP */
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c
index 1792844..8f28bd6 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_convolve_HWC_q7_fast.c
  * Description:  Fast Q7 version of convolution
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,72 +42,29 @@
  * @{
  */
 
-/**
- * @brief Fast Q7 convolution function
- * @param[in]       Im_in       pointer to input tensor
- * @param[in]       dim_im_in   input tensor dimention
- * @param[in]       ch_im_in    number of input tensor channels
- * @param[in]       wt          pointer to kernel weights
- * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel  filter kernel size
- * @param[in]       padding     padding sizes
- * @param[in]       stride      convolution stride
- * @param[in]       bias        pointer to bias
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in,out]   Im_out      pointer to output tensor
- * @param[in]       dim_im_out  output tensor dimension
- * @param[in,out]   bufferA     pointer to buffer space for input
- * @param[in,out]   bufferB     pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
- *
- * bufferB size: 0
- *
- * <b>Input dimension constraints:</b>
- *
- * ch_im_in is multiple of 4    ( because of the SIMD32 read and swap )
- *
- * ch_im_out is multipe of 2    ( bacause 2x2 mat_mult kernel )
- *
- * The im2col converts the Q7 tensor input into Q15 column, which is stored in
- * bufferA. There is reordering happenning during this im2col process with
- * arm_q7_to_q15_reordered_no_shift. For every four elements, the second and
- * third elements are swapped.
- *
- * The computation kernel arm_nn_mat_mult_kernel_q7_q15_reordered does the
- * GEMM computation with the reordered columns.
- *
- * To speed-up the determination of the padding condition, we split the
- * computation into 3x3 parts, i.e., {top, mid, bottom} X {left, mid, right}.
- * This reduces the total number of boundary condition checks and improves
- * the data copying performance.
+/*
+ * Fast Q7 convolution function
+ * Refer function header for details
  */
 
-arm_status arm_convolve_HWC_q7_fast(const q7_t *Im_in,
-                                    const uint16_t dim_im_in,
-                                    const uint16_t ch_im_in,
-                                    const q7_t *wt,
-                                    const uint16_t ch_im_out,
-                                    const uint16_t dim_kernel,
-                                    const uint16_t padding,
-                                    const uint16_t stride,
-                                    const q7_t *bias,
-                                    const uint16_t bias_shift,
-                                    const uint16_t out_shift,
-                                    q7_t *Im_out,
-                                    const uint16_t dim_im_out,
-                                    q15_t *bufferA,
-                                    q7_t *bufferB)
+arm_cmsis_nn_status arm_convolve_HWC_q7_fast(const q7_t *Im_in,
+                                             const uint16_t dim_im_in,
+                                             const uint16_t ch_im_in,
+                                             const q7_t *wt,
+                                             const uint16_t ch_im_out,
+                                             const uint16_t dim_kernel,
+                                             const uint16_t padding,
+                                             const uint16_t stride,
+                                             const q7_t *bias,
+                                             const uint16_t bias_shift,
+                                             const uint16_t out_shift,
+                                             q7_t *Im_out,
+                                             const uint16_t dim_im_out,
+                                             q15_t *bufferA,
+                                             q7_t *bufferB)
 {
     (void)bufferB;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
@@ -123,7 +80,7 @@ arm_status arm_convolve_HWC_q7_fast(const q7_t *Im_in,
     if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0)
     {
         /* check if the input dimension meets the constraints */
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     /*
@@ -339,7 +296,7 @@ arm_status arm_convolve_HWC_q7_fast(const q7_t *Im_in,
     if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0)
     {
         /* check if the input dimension meets the constraints */
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     for (i = 0; i < ch_im_out; i++)
@@ -374,7 +331,7 @@ arm_status arm_convolve_HWC_q7_fast(const q7_t *Im_in,
 #endif /* ARM_MATH_DSP */
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c
index 4507d15..a091be3 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_convolve_HWC_q7_fast_nonsquare.c
  * Description:  Fast Q7 version of convolution (non-sqaure shape)
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,60 +42,34 @@
  * @{
  */
 
-/**
- * @brief Fast Q7 convolution function (non-sqaure shape)
- * @param[in]       Im_in        pointer to input tensor
- * @param[in]       dim_im_in_x  input tensor dimention x
- * @param[in]       dim_im_in_y  input tensor dimention y
- * @param[in]       ch_im_in     number of input tensor channels
- * @param[in]       wt           pointer to kernel weights
- * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel_x filter kernel size x
- * @param[in]       dim_kernel_y filter kernel size y
- * @param[in]       padding_x    padding size x
- * @param[in]       padding_y    padding size y
- * @param[in]       stride_x     convolution stride x
- * @param[in]       stride_y     convolution stride y
- * @param[in]       bias         pointer to bias
- * @param[in]       bias_shift   amount of left-shift for bias
- * @param[in]       out_shift    amount of right-shift for output
- * @param[in,out]   Im_out       pointer to output tensor
- * @param[in]       dim_im_out_x output tensor dimension x
- * @param[in]       dim_im_out_y output tensor dimension y
- * @param[in,out]   bufferA      pointer to buffer space for input
- * @param[in,out]   bufferB      pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
- *
- * This function is the version with full list of optimization tricks, but with
- * some contraints:
- *   ch_im_in is multiple of 4
- *   ch_im_out is multiple of 2
+/*
+ * Fast Q7 convolution function (non-sqaure shape)
+ * Refer function header for details
  */
 
-arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in,
-                                              const uint16_t dim_im_in_x,
-                                              const uint16_t dim_im_in_y,
-                                              const uint16_t ch_im_in,
-                                              const q7_t *wt,
-                                              const uint16_t ch_im_out,
-                                              const uint16_t dim_kernel_x,
-                                              const uint16_t dim_kernel_y,
-                                              const uint16_t padding_x,
-                                              const uint16_t padding_y,
-                                              const uint16_t stride_x,
-                                              const uint16_t stride_y,
-                                              const q7_t *bias,
-                                              const uint16_t bias_shift,
-                                              const uint16_t out_shift,
-                                              q7_t *Im_out,
-                                              const uint16_t dim_im_out_x,
-                                              const uint16_t dim_im_out_y,
-                                              q15_t *bufferA,
-                                              q7_t *bufferB)
+arm_cmsis_nn_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in,
+                                                       const uint16_t dim_im_in_x,
+                                                       const uint16_t dim_im_in_y,
+                                                       const uint16_t ch_im_in,
+                                                       const q7_t *wt,
+                                                       const uint16_t ch_im_out,
+                                                       const uint16_t dim_kernel_x,
+                                                       const uint16_t dim_kernel_y,
+                                                       const uint16_t padding_x,
+                                                       const uint16_t padding_y,
+                                                       const uint16_t stride_x,
+                                                       const uint16_t stride_y,
+                                                       const q7_t *bias,
+                                                       const uint16_t bias_shift,
+                                                       const uint16_t out_shift,
+                                                       q7_t *Im_out,
+                                                       const uint16_t dim_im_out_x,
+                                                       const uint16_t dim_im_out_y,
+                                                       q15_t *bufferA,
+                                                       q7_t *bufferB)
 {
     (void)bufferB;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
@@ -111,7 +85,7 @@ arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in,
     if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0)
     {
         /* check if the input dimension meets the constraints */
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     /*
@@ -336,7 +310,7 @@ arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in,
     if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0)
     {
         /* check if the input dimension meets the constraints */
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     for (i = 0; i < ch_im_out; i++)
@@ -372,7 +346,7 @@ arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in,
 #endif /* ARM_MATH_DSP */
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_fast_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_fast_s16.c
new file mode 100644
index 0000000..26c64fa
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_fast_s16.c
@@ -0,0 +1,245 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_convolve_fast_s16.c
+ * Description:  Optimized s16 version of convolution.
+ *
+ * $Date:        19 April 2022
+ * $Revision:    V.2.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup NNConv
+ * @{
+ */
+
+/*
+ * Basic s16 convolution function.
+ *
+ * Refer header file for details. Optimal use case for the DSP/MVE implementation is when input and output channels
+ * are multiples of 4 or atleast greater than 4.
+ *
+ */
+
+arm_cmsis_nn_status arm_convolve_fast_s16(const cmsis_nn_context *ctx,
+                                          const cmsis_nn_conv_params *conv_params,
+                                          const cmsis_nn_per_channel_quant_params *quant_params,
+                                          const cmsis_nn_dims *input_dims,
+                                          const q15_t *input_data,
+                                          const cmsis_nn_dims *filter_dims,
+                                          const q7_t *filter_data,
+                                          const cmsis_nn_dims *bias_dims,
+                                          const int64_t *bias_data,
+                                          const cmsis_nn_dims *output_dims,
+                                          q15_t *output_data)
+{
+    (void)bias_dims;
+    if (filter_dims->w * filter_dims->h * input_dims->c >= 512)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+
+    if (ctx->buf == NULL && arm_convolve_s8_get_buffer_size(input_dims, filter_dims) > 0)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+    q15_t *buffer_a = (q15_t *)ctx->buf;
+
+    const int32_t input_batches = input_dims->n;
+    const int32_t input_x = input_dims->w;
+    const int32_t input_y = input_dims->h;
+    const int32_t input_ch = input_dims->c;
+    const int32_t kernel_x = filter_dims->w;
+    const int32_t kernel_y = filter_dims->h;
+    const int32_t output_x = output_dims->w;
+    const int32_t output_y = output_dims->h;
+    const int32_t output_ch = output_dims->c;
+
+    const int32_t pad_x = conv_params->padding.w;
+    const int32_t pad_y = conv_params->padding.h;
+    const int32_t stride_x = conv_params->stride.w;
+    const int32_t stride_y = conv_params->stride.h;
+
+    const int16_t out_activation_min = conv_params->activation.min;
+    const int16_t out_activation_max = conv_params->activation.max;
+    int32_t *output_mult = quant_params->multiplier;
+    int32_t *output_shift = quant_params->shift;
+
+    for (int i_batch = 0; i_batch < input_batches; i_batch++)
+    {
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
+        /* Generate two columns from the input tensor a GEMM computation */
+        q15_t *two_column_buf = buffer_a;
+        q15_t *out = output_data;
+        /* This part implements the im2col function */
+        for (int32_t i_out_y = 0; i_out_y < output_y; i_out_y++)
+        {
+            for (int32_t i_out_x = 0; i_out_x < output_x; i_out_x++)
+            {
+                for (int32_t i_ker_y = i_out_y * stride_y - pad_y; i_ker_y < i_out_y * stride_y - pad_y + kernel_y;
+                     i_ker_y++)
+                {
+                    for (int32_t i_ker_x = i_out_x * stride_x - pad_x; i_ker_x < i_out_x * stride_x - pad_x + kernel_x;
+                         i_ker_x++)
+                    {
+                        if (i_ker_y < 0 || i_ker_y >= input_y || i_ker_x < 0 || i_ker_x >= input_x)
+                        {
+                            /* Filling 0 for out-of-bound paddings */
+                            arm_memset_q7((q7_t *)two_column_buf, 0, sizeof(q15_t) * input_ch);
+                        }
+                        else
+                        {
+                            arm_memcpy_q7((q7_t *)two_column_buf,
+                                          (const q7_t *)(input_data + (i_ker_y * input_x + i_ker_x) * input_ch),
+                                          input_ch * sizeof(q15_t));
+                        }
+                        two_column_buf += input_ch;
+                    }
+                }
+                /* Computation is filed for every 2 columns */
+                if (two_column_buf == buffer_a + 2 * input_ch * kernel_y * kernel_x)
+                {
+                    out = arm_nn_mat_mult_kernel_s16(filter_data,
+                                                     buffer_a,
+                                                     output_ch,
+                                                     output_shift,
+                                                     output_mult,
+                                                     out_activation_min,
+                                                     out_activation_max,
+                                                     (input_ch * kernel_y * kernel_x),
+                                                     bias_data,
+                                                     out);
+
+                    /* Counter reset */
+                    two_column_buf = buffer_a;
+                }
+            }
+        }
+
+        /* Left-over because odd number of output pixels */
+        if (two_column_buf != buffer_a)
+        {
+            const q7_t *ker_a = filter_data;
+            int i;
+
+            for (i = 0; i < output_ch; i++)
+            {
+                /* Init the accumulator*/
+                q31_t sum = 0;
+
+                /* Point to the beginning of the im2col buffer where the input is available as a rearranged column */
+                const q15_t *ip_as_col = buffer_a;
+
+                /* 4 multiply and accumulates are done in one loop. */
+                uint16_t col_count = (input_ch * kernel_y * kernel_x) >> 2;
+
+                while (col_count)
+                {
+                    q31_t ker_a1, ker_a2;
+                    q31_t ip_b1, ip_b2;
+
+                    ker_a = read_and_pad(ker_a, &ker_a1, &ker_a2);
+
+                    ip_b1 = arm_nn_read_q15x2_ia(&ip_as_col);
+                    sum = __SMLAD(ker_a1, ip_b1, sum);
+                    ip_b2 = arm_nn_read_q15x2_ia(&ip_as_col);
+                    sum = __SMLAD(ker_a2, ip_b2, sum);
+
+                    col_count--;
+                }
+                /* Handle left over mac */
+                col_count = input_ch * kernel_y * kernel_x & 0x3;
+                while (col_count)
+                {
+                    q7_t ker_a1 = *ker_a++;
+                    q15_t ip_b1 = *ip_as_col++;
+                    sum += ker_a1 * ip_b1;
+                    col_count--;
+                }
+                if (bias_data)
+                {
+                    q31_t reduced_multiplier = REDUCE_MULTIPLIER(output_mult[i]);
+                    q63_t acc_64 = sum + bias_data[i];
+                    sum = arm_nn_requantize_s64(acc_64, reduced_multiplier, output_shift[i]);
+                }
+                else
+                {
+                    sum = arm_nn_requantize(sum, output_mult[i], output_shift[i]);
+                }
+                sum = MAX(sum, out_activation_min);
+                sum = MIN(sum, out_activation_max);
+                *out++ = (q15_t)sum;
+            }
+        }
+#else
+        (void)input_data;
+        (void)output_data;
+        (void)bias_data;
+        (void)filter_data;
+        (void)buffer_a;
+        (void)kernel_x;
+        (void)kernel_y;
+        (void)pad_x;
+        (void)pad_y;
+        (void)stride_x;
+        (void)stride_y;
+        (void)out_activation_min;
+        (void)out_activation_max;
+        (void)output_mult;
+        (void)output_shift;
+        return ARM_CMSIS_NN_ARG_ERROR;
+#endif
+        /* Advance to the next batch */
+        input_data += (input_x * input_y * input_ch);
+        output_data += (output_x * output_y * output_ch);
+    }
+
+    /* Return to application */
+    return ARM_CMSIS_NN_SUCCESS;
+}
+
+int32_t arm_convolve_fast_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
+{
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
+    return (2 * input_dims->c * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int16_t);
+#else
+    (void)input_dims;
+    (void)filter_dims;
+    return 0;
+#endif
+}
+
+/**
+ * @} end of NNConv group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c
new file mode 100644
index 0000000..7d8d14f
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c
@@ -0,0 +1,160 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_convolve_s16.c
+ * Description:  s16 version of convolution using symmetric quantization.
+ *
+ * $Date:        19 April 2022
+ * $Revision:    V.2.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup NNConv
+ * @{
+ */
+
+/*
+ * Basic s16 convolution function.
+ *
+ * Refer header file for details. Optimal use case for the DSP/MVE implementation is when input and output channels
+ * are multiples of 4 or atleast greater than 4.
+ *
+ */
+
+arm_cmsis_nn_status arm_convolve_s16(const cmsis_nn_context *ctx,
+                                     const cmsis_nn_conv_params *conv_params,
+                                     const cmsis_nn_per_channel_quant_params *quant_params,
+                                     const cmsis_nn_dims *input_dims,
+                                     const q15_t *input_data,
+                                     const cmsis_nn_dims *filter_dims,
+                                     const q7_t *filter_data,
+                                     const cmsis_nn_dims *bias_dims,
+                                     const int64_t *bias_data,
+                                     const cmsis_nn_dims *output_dims,
+                                     q15_t *output_data)
+{
+    (void)bias_dims;
+    (void)ctx;
+
+    const int32_t input_batches = input_dims->n;
+    const int32_t input_x = input_dims->w;
+    const int32_t input_y = input_dims->h;
+    const int32_t input_ch = input_dims->c;
+    const int32_t kernel_x = filter_dims->w;
+    const int32_t kernel_y = filter_dims->h;
+    const int32_t output_x = output_dims->w;
+    const int32_t output_y = output_dims->h;
+    const int32_t output_ch = output_dims->c;
+
+    const int32_t pad_x = conv_params->padding.w;
+    const int32_t pad_y = conv_params->padding.h;
+    const int32_t stride_x = conv_params->stride.w;
+    const int32_t stride_y = conv_params->stride.h;
+    const int32_t dilation_x = conv_params->dilation.w;
+    const int32_t dilation_y = conv_params->dilation.h;
+
+    const int32_t out_activation_min = conv_params->activation.min;
+    const int32_t out_activation_max = conv_params->activation.max;
+    int32_t *output_mult = quant_params->multiplier;
+    int32_t *output_shift = quant_params->shift;
+
+    for (int i_batch = 0; i_batch < input_batches; i_batch++)
+    {
+        /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
+        for (int32_t i_out_ch = 0; i_out_ch < output_ch; i_out_ch++)
+        {
+            const q31_t reduced_multiplier = REDUCE_MULTIPLIER(output_mult[i_out_ch]);
+
+            for (int32_t base_idx_y = -pad_y, i_out_y = 0; i_out_y < output_y; base_idx_y += stride_y, i_out_y++)
+            {
+                for (int32_t base_idx_x = -pad_x, i_out_x = 0; i_out_x < output_x; base_idx_x += stride_x, i_out_x++)
+                {
+                    int64_t conv_out_acc = 0;
+
+                    const int32_t start_y_max = (-base_idx_y + dilation_y - 1) / dilation_y;
+                    const int32_t ker_y_start = MAX(0, start_y_max);
+                    const int32_t start_x_max = (-base_idx_x + dilation_x - 1) / dilation_x;
+                    const int32_t ker_x_start = MAX(0, start_x_max);
+                    const int32_t end_min_y = (input_y - base_idx_y + dilation_y - 1) / dilation_y;
+                    const int32_t ker_y_end = MIN(kernel_y, end_min_y);
+                    const int32_t end_min_x = (input_x - base_idx_x + dilation_x - 1) / dilation_x;
+                    const int32_t ker_x_end = MIN(kernel_x, end_min_x);
+
+                    for (int32_t i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++)
+                    {
+                        for (int32_t i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++)
+                        {
+                            const int32_t in_row = base_idx_y + dilation_y * i_ker_y;
+                            const int32_t in_col = base_idx_x + dilation_x * i_ker_x;
+
+                            for (int32_t i_input_ch = 0; i_input_ch < input_ch; i_input_ch++)
+                            {
+                                conv_out_acc += input_data[(in_row * input_x + in_col) * input_ch + i_input_ch] *
+                                    filter_data[i_out_ch * input_ch * kernel_y * kernel_x +
+                                                (i_ker_y * kernel_x + i_ker_x) * input_ch + i_input_ch];
+                            }
+                        }
+                    }
+
+                    if (bias_data)
+                    {
+                        conv_out_acc += bias_data[i_out_ch];
+                    }
+
+                    int32_t conv_out = arm_nn_requantize_s64(conv_out_acc, reduced_multiplier, output_shift[i_out_ch]);
+                    conv_out = MAX(conv_out, out_activation_min);
+                    conv_out = MIN(conv_out, out_activation_max);
+                    output_data[i_out_ch + (i_out_y * output_x + i_out_x) * output_ch] = (int16_t)conv_out;
+                }
+            }
+        }
+        /* Advance to the next batch */
+        input_data += (input_x * input_y * input_ch);
+        output_data += (output_x * output_y * output_ch);
+    }
+
+    /* Return to application */
+    return ARM_CMSIS_NN_SUCCESS;
+}
+
+int32_t arm_convolve_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
+{
+    (void)input_dims;
+    (void)filter_dims;
+    return 0;
+}
+
+/**
+ * @} end of NNConv group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c
index ab5dbf5..2782521 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_convolve_s8.c
  * Description:  s8 version of convolution using symmetric quantization.
  *
- * $Date:        January 26, 2021
- * $Revision:    V.2.0.4
+ * $Date:        19 April 2022
+ * $Revision:    V.3.0.0
  *
  * Target Processor:  Cortex-M cores
  *
@@ -50,22 +50,27 @@
  *
  */
 
-arm_status arm_convolve_s8(const cmsis_nn_context *ctx,
-                           const cmsis_nn_conv_params *conv_params,
-                           const cmsis_nn_per_channel_quant_params *quant_params,
-                           const cmsis_nn_dims *input_dims,
-                           const q7_t *input_data,
-                           const cmsis_nn_dims *filter_dims,
-                           const q7_t *filter_data,
-                           const cmsis_nn_dims *bias_dims,
-                           const int32_t *bias_data,
-                           const cmsis_nn_dims *output_dims,
-                           q7_t *output_data)
+arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
+                                    const cmsis_nn_conv_params *conv_params,
+                                    const cmsis_nn_per_channel_quant_params *quant_params,
+                                    const cmsis_nn_dims *input_dims,
+                                    const q7_t *input_data,
+                                    const cmsis_nn_dims *filter_dims,
+                                    const q7_t *filter_data,
+                                    const cmsis_nn_dims *bias_dims,
+                                    const int32_t *bias_data,
+                                    const cmsis_nn_dims *output_dims,
+                                    q7_t *output_data)
 {
     (void)bias_dims;
+
+    if (ctx->buf == NULL && arm_convolve_s8_get_buffer_size(input_dims, filter_dims) > 0)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
     q15_t *buffer_a = (q15_t *)ctx->buf;
 
-    const uint16_t input_batches = input_dims->n;
+    const int32_t input_batches = input_dims->n;
     const uint16_t input_x = input_dims->w;
     const uint16_t input_y = input_dims->h;
     const uint16_t input_ch = input_dims->c;
@@ -97,26 +102,32 @@ arm_status arm_convolve_s8(const cmsis_nn_context *ctx,
         int32_t buffer_fill_cnt = 0;
         int32_t padded = 0;
         const int32_t num_elem = kernel_x * kernel_y * input_ch;
+        const int32_t dilation_x = conv_params->dilation.w;
+        const int32_t dilation_y = conv_params->dilation.h;
 
         /* This part implements the im2col function */
         for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
         {
             for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
             {
-                for (int i_ker_y = i_out_y * stride_y - pad_y; i_ker_y < i_out_y * stride_y - pad_y + kernel_y;
-                     i_ker_y++)
+                const int32_t base_idx_x = stride_x * i_out_x - pad_x;
+                const int32_t base_idx_y = stride_y * i_out_y - pad_y;
+
+                for (int32_t i_ker_y = 0; i_ker_y < kernel_y; i_ker_y++)
                 {
-                    for (int i_ker_x = i_out_x * stride_x - pad_x; i_ker_x < i_out_x * stride_x - pad_x + kernel_x;
-                         i_ker_x++)
+                    for (int32_t i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++)
                     {
-                        if (i_ker_y < 0 || i_ker_y >= input_y || i_ker_x < 0 || i_ker_x >= input_x)
+                        const int32_t k_y = base_idx_y + dilation_y * i_ker_y;
+                        const int32_t k_x = base_idx_x + dilation_x * i_ker_x;
+
+                        if (k_y < 0 || k_y >= input_y || k_x < 0 || k_x >= input_x)
                         {
                             memset(im2col_buf, (int8_t)-input_offset, sizeof(q7_t) * input_ch);
                             padded = 1;
                         }
                         else
                         {
-                            arm_memcpy_q7(im2col_buf, input_data + (i_ker_y * input_x + i_ker_x) * input_ch, input_ch);
+                            arm_memcpy_q7(im2col_buf, input_data + (k_y * input_x + k_x) * input_ch, input_ch);
                         }
                         im2col_buf += input_ch;
                     }
@@ -128,33 +139,15 @@ arm_status arm_convolve_s8(const cmsis_nn_context *ctx,
                 if (buffer_fill_cnt == 4 && (padded == 0))
                 {
                     buffer_fill_cnt = 0;
-                    for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++)
-                    {
-                        int32_t sum_row;
-                        int32_t acc[4];
-
-                        (void)arm_nn_mat_mul_core_4x_s8(
-                            num_elem, num_elem, (q7_t *)buffer_a, filter_data + num_elem * i_out_ch, &sum_row, acc);
-                        int32x4_t s_offset = vdupq_n_s32(sum_row);
-
-                        int32x4_t res = vldrwq_s32(acc);
-                        s_offset = vmulq_n_s32(s_offset, input_offset);
-                        if (bias_data)
-                        {
-                            res = vaddq_n_s32(res, bias_data[i_out_ch]);
-                        }
-                        res = vaddq_s32(res, s_offset);
-                        res = arm_requantize_mve(res, output_mult[i_out_ch], output_shift[i_out_ch]);
-                        res = vaddq_n_s32(res, out_offset);
-
-                        res = vmaxq_s32(res, vdupq_n_s32(out_activation_min));
-                        res = vminq_s32(res, vdupq_n_s32(out_activation_max));
-
-                        const uint32x4_t scatter_offset = {0, output_ch, output_ch * 2, output_ch * 3};
-                        vstrbq_scatter_offset_s32(out, scatter_offset, res);
-                        out++;
-                    }
-                    out += (3 * output_ch);
+                    out = arm_nn_mat_mul_core_4x_s8(num_elem,
+                                                    num_elem,
+                                                    (q7_t *)buffer_a,
+                                                    filter_data,
+                                                    output_ch,
+                                                    conv_params,
+                                                    quant_params,
+                                                    bias_data,
+                                                    out);
                     im2col_buf = (q7_t *)buffer_a;
                 }
                 else if (buffer_fill_cnt == 4 && (padded != 0))
@@ -198,8 +191,10 @@ arm_status arm_convolve_s8(const cmsis_nn_context *ctx,
                                      bias_data,
                                      out);
         }
+#else // #if defined(ARM_MATH_MVEI)
+        const uint16_t dilation_x = conv_params->dilation.w;
+        const uint16_t dilation_y = conv_params->dilation.h;
 
-#elif defined(ARM_MATH_DSP)
         int32_t i_out_y, i_out_x, i_ker_y, i_ker_x;
 
         /* Generate two columns from the input tensor a GEMM computation */
@@ -211,12 +206,17 @@ arm_status arm_convolve_s8(const cmsis_nn_context *ctx,
         {
             for (i_out_x = 0; i_out_x < output_x; i_out_x++)
             {
-                for (i_ker_y = i_out_y * stride_y - pad_y; i_ker_y < i_out_y * stride_y - pad_y + kernel_y; i_ker_y++)
+                const int32_t base_idx_y = stride_y * i_out_y - pad_y;
+                const int32_t base_idx_x = stride_x * i_out_x - pad_x;
+
+                for (i_ker_y = 0; i_ker_y < kernel_y; i_ker_y++)
                 {
-                    for (i_ker_x = i_out_x * stride_x - pad_x; i_ker_x < i_out_x * stride_x - pad_x + kernel_x;
-                         i_ker_x++)
+                    for (i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++)
                     {
-                        if (i_ker_y < 0 || i_ker_y >= input_y || i_ker_x < 0 || i_ker_x >= input_x)
+                        const int32_t k_y = base_idx_y + dilation_y * i_ker_y;
+                        const int32_t k_x = base_idx_x + dilation_x * i_ker_x;
+
+                        if (k_y < 0 || k_y >= input_y || k_x < 0 || k_x >= input_x)
                         {
                             /* Filling 0 for out-of-bound paddings */
                             memset(two_column_buf, 0, sizeof(q15_t) * input_ch);
@@ -224,10 +224,8 @@ arm_status arm_convolve_s8(const cmsis_nn_context *ctx,
                         else
                         {
                             /* Copying the pixel data to column */
-                            arm_q7_to_q15_with_offset(input_data + (i_ker_y * input_x + i_ker_x) * input_ch,
-                                                      two_column_buf,
-                                                      input_ch,
-                                                      input_offset);
+                            arm_q7_to_q15_with_offset(
+                                input_data + (k_y * input_x + k_x) * input_ch, two_column_buf, input_ch, input_offset);
                         }
                         two_column_buf += input_ch;
                     }
@@ -273,6 +271,7 @@ arm_status arm_convolve_s8(const cmsis_nn_context *ctx,
                 const q15_t *ip_as_col = buffer_a;
 
                 /* 4 multiply and accumulates are done in one loop. */
+#if defined(ARM_MATH_DSP)
                 uint16_t col_count = (input_ch * kernel_y * kernel_x) >> 2;
 
                 while (col_count)
@@ -291,6 +290,9 @@ arm_status arm_convolve_s8(const cmsis_nn_context *ctx,
                 }
                 /* Handle left over mac */
                 col_count = input_ch * kernel_y * kernel_x & 0x3;
+#else
+                uint16_t col_count = input_ch * kernel_y * kernel_x;
+#endif
                 while (col_count)
                 {
                     q7_t ker_a1 = *ker_a++;
@@ -306,74 +308,27 @@ arm_status arm_convolve_s8(const cmsis_nn_context *ctx,
                 *out++ = (q7_t)sum;
             }
         }
-#else
-        /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
-        (void)buffer_a;
-        int32_t i_out_ch, i_out_y, i_out_x, i_input_ch, i_ker_y, i_ker_x;
-        int32_t conv_out;
-
-        for (i_out_ch = 0; i_out_ch < output_ch; i_out_ch++)
-        {
-            for (i_out_y = 0; i_out_y < output_y; i_out_y++)
-            {
-                for (i_out_x = 0; i_out_x < output_x; i_out_x++)
-                {
-                    conv_out = 0;
-
-                    const int32_t base_idx_y = stride_y * i_out_y - pad_y;
-                    const int32_t base_idx_x = stride_x * i_out_x - pad_x;
-
-                    const int32_t ker_y_start = MAX(0, -base_idx_y);
-                    const int32_t ker_x_start = MAX(0, -base_idx_x);
-
-                    const int32_t ker_y_end = MIN(kernel_y, input_y - base_idx_y);
-                    const int32_t ker_x_end = MIN(kernel_x, input_x - base_idx_x);
-
-                    for (i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++)
-                    {
-                        for (i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++)
-                        {
-                            const int32_t in_row = base_idx_y + i_ker_y;
-                            const int32_t in_col = base_idx_x + i_ker_x;
-                            for (i_input_ch = 0; i_input_ch < input_ch; i_input_ch++)
-                            {
-                                conv_out +=
-                                    (input_data[(in_row * input_x + in_col) * input_ch + i_input_ch] + input_offset) *
-                                    filter_data[i_out_ch * input_ch * kernel_y * kernel_x +
-                                                (i_ker_y * kernel_x + i_ker_x) * input_ch + i_input_ch];
-                            }
-                        }
-                    }
-                    if (bias_data)
-                    {
-                        conv_out += bias_data[i_out_ch];
-                    }
-                    conv_out = arm_nn_requantize(conv_out, output_mult[i_out_ch], output_shift[i_out_ch]);
-                    conv_out += out_offset;
-                    conv_out = MAX(conv_out, out_activation_min);
-                    conv_out = MIN(conv_out, out_activation_max);
-                    output_data[i_out_ch + (i_out_y * output_x + i_out_x) * output_ch] = (int8_t)conv_out;
-                }
-            }
-        }
-#endif
+#endif // #if defined(ARM_MATH_MVEI)
         /* Advance to the next batch */
         input_data += (input_x * input_y * input_ch);
         output_data += (output_x * output_y * output_ch);
     }
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
 {
-#if defined(ARM_MATH_DSP)
-    return (2 * input_dims->c * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int16_t);
+#if defined(ARM_MATH_MVEI)
+    int32_t col_length = input_dims->c * filter_dims->w * filter_dims->h;
+    // Get number of complete int16 lanes(multiple of 8) for given col_length. This is dependent on
+    // implementation of  arm_nn_mat_mult_s8
+    col_length = (col_length + 7) / 8;
+    // 4 -> number of im2col buffers, 8 -> 8 elements per Q register
+    return 4 * col_length * 8 * (int32_t)sizeof(int8_t);
 #else
-    (void)input_dims;
-    (void)filter_dims;
-    return 0;
+    return (2 * input_dims->c * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int16_t);
 #endif
 }
 
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s16.c
new file mode 100644
index 0000000..efdbc41
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s16.c
@@ -0,0 +1,134 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * Copyright (C) 2021-2022 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_convolve_wrapper_s16.c
+ * Description:  s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in
+ * cmsis-nn to perform the convolution.
+ *
+ * $Date:        19 April 2022
+ * $Revision:    V.2.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup NNConv
+ * @{
+ */
+
+/*
+ * Convolution layer
+ *
+ * Refer header file for details.
+ *
+ */
+
+arm_cmsis_nn_status arm_convolve_wrapper_s16(const cmsis_nn_context *ctx,
+                                             const cmsis_nn_conv_params *conv_params,
+                                             const cmsis_nn_per_channel_quant_params *quant_params,
+                                             const cmsis_nn_dims *input_dims,
+                                             const q15_t *input_data,
+                                             const cmsis_nn_dims *filter_dims,
+                                             const q7_t *filter_data,
+                                             const cmsis_nn_dims *bias_dims,
+                                             const int64_t *bias_data,
+                                             const cmsis_nn_dims *output_dims,
+                                             q15_t *output_data)
+{
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
+    if (filter_dims->w * filter_dims->h * input_dims->c < 512 &&
+        (conv_params->dilation.w == 1 && conv_params->dilation.h == 1))
+    {
+        return arm_convolve_fast_s16(ctx,
+                                     conv_params,
+                                     quant_params,
+                                     input_dims,
+                                     input_data,
+                                     filter_dims,
+                                     filter_data,
+                                     bias_dims,
+                                     bias_data,
+                                     output_dims,
+                                     output_data);
+    }
+    else
+    {
+        return arm_convolve_s16(ctx,
+                                conv_params,
+                                quant_params,
+                                input_dims,
+                                input_data,
+                                filter_dims,
+                                filter_data,
+                                bias_dims,
+                                bias_data,
+                                output_dims,
+                                output_data);
+    }
+#else
+    return arm_convolve_s16(ctx,
+                            conv_params,
+                            quant_params,
+                            input_dims,
+                            input_data,
+                            filter_dims,
+                            filter_data,
+                            bias_dims,
+                            bias_data,
+                            output_dims,
+                            output_data);
+#endif
+}
+
+int32_t arm_convolve_wrapper_s16_get_buffer_size(const cmsis_nn_conv_params *conv_params,
+                                                 const cmsis_nn_dims *input_dims,
+                                                 const cmsis_nn_dims *filter_dims,
+                                                 const cmsis_nn_dims *output_dims)
+{
+    (void)conv_params;
+    (void)output_dims;
+
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
+    if (filter_dims->w * filter_dims->h * input_dims->c < 512 &&
+        (conv_params->dilation.w == 1 && conv_params->dilation.h == 1))
+    {
+        return arm_convolve_fast_s16_get_buffer_size(input_dims, filter_dims);
+    }
+
+    return arm_convolve_s16_get_buffer_size(input_dims, filter_dims);
+#else
+    return arm_convolve_s16_get_buffer_size(input_dims, filter_dims);
+#endif
+}
+
+/**
+ * @} end of NNConv group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c
index 662b427..9cd898e 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -24,8 +24,8 @@
  * Description:  s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in
  * cmsis-nn to perform the convolution.
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 August 2022
+ * $Revision:    V.2.1.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -49,20 +49,21 @@
  *
  */
 
-arm_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx,
-                                   const cmsis_nn_conv_params *conv_params,
-                                   const cmsis_nn_per_channel_quant_params *quant_params,
-                                   const cmsis_nn_dims *input_dims,
-                                   const q7_t *input_data,
-                                   const cmsis_nn_dims *filter_dims,
-                                   const q7_t *filter_data,
-                                   const cmsis_nn_dims *bias_dims,
-                                   const int32_t *bias_data,
-                                   const cmsis_nn_dims *output_dims,
-                                   q7_t *output_data)
+arm_cmsis_nn_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx,
+                                            const cmsis_nn_conv_params *conv_params,
+                                            const cmsis_nn_per_channel_quant_params *quant_params,
+                                            const cmsis_nn_dims *input_dims,
+                                            const q7_t *input_data,
+                                            const cmsis_nn_dims *filter_dims,
+                                            const q7_t *filter_data,
+                                            const cmsis_nn_dims *bias_dims,
+                                            const int32_t *bias_data,
+                                            const cmsis_nn_dims *output_dims,
+                                            q7_t *output_data)
 {
-    if ((conv_params->padding.w == 0) && (conv_params->padding.h == 0) && (input_dims->c % 4 == 0) &&
-        (conv_params->stride.w == 1) && (conv_params->stride.h == 1) && (filter_dims->w == 1) && (filter_dims->h == 1))
+    if ((conv_params->padding.w == 0) && (conv_params->padding.h == 0) && (conv_params->stride.w == 1) &&
+        (conv_params->stride.h == 1) && (filter_dims->w == 1) && (filter_dims->h == 1) &&
+        (conv_params->dilation.w == 1 && conv_params->dilation.h == 1))
     {
         return arm_convolve_1x1_s8_fast(ctx,
                                         conv_params,
@@ -76,8 +77,7 @@ arm_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx,
                                         output_dims,
                                         output_data);
     }
-    else if ((output_dims->h == 1) && (input_dims->h == 1) && (filter_dims->h == 1) && (output_dims->w % 4 == 0) &&
-             (input_dims->n == 1))
+    else if ((input_dims->h == 1) && (output_dims->w % 4 == 0) && conv_params->dilation.w == 1 && (filter_dims->h == 1))
     {
         return arm_convolve_1_x_n_s8(ctx,
                                      conv_params,
@@ -112,13 +112,14 @@ int32_t arm_convolve_wrapper_s8_get_buffer_size(const cmsis_nn_conv_params *conv
                                                 const cmsis_nn_dims *filter_dims,
                                                 const cmsis_nn_dims *output_dims)
 {
-    if ((conv_params->padding.w == 0) && (conv_params->padding.h == 0) && (input_dims->c % 4 == 0) &&
-        (conv_params->stride.w == 1) && (conv_params->stride.h == 1) && (filter_dims->w == 1) && (filter_dims->h == 1))
+    if ((conv_params->padding.w == 0) && (conv_params->padding.h == 0) && (conv_params->stride.w == 1) &&
+        (conv_params->stride.h == 1) && (filter_dims->w == 1) && (filter_dims->h == 1) &&
+        (conv_params->dilation.w == 1 && conv_params->dilation.h == 1))
     {
         return arm_convolve_1x1_s8_fast_get_buffer_size(input_dims);
     }
-    else if ((output_dims->h == 1) && (input_dims->h == 1) && (filter_dims->h == 1) && (output_dims->w % 4 == 0) &&
-             (input_dims->n == 1))
+    else if ((input_dims->h == 1) && (output_dims->w % 4 == 0) && (conv_params->dilation.w == 1) &&
+             (filter_dims->h == 1))
     {
         return arm_convolve_1_x_n_s8_get_buffer_size(input_dims, filter_dims);
     }
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c
index bd0fbf5..def3b47 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -24,8 +24,8 @@
  * Description:  Optimized s8 depthwise convolution function for channel
  *               multiplier of 1 and 3x3 kernel size.
  *
- * $Date:        09. October 2020
- * $Revision:    V.2.0.1
+ * $Date:        19 July 2022
+ * $Revision:    V.3.1.0
  *
  * Target Processor:  Cortex-M CPUs
  *
@@ -51,17 +51,17 @@
  *
  */
 
-arm_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx,
-                                     const cmsis_nn_dw_conv_params *dw_conv_params,
-                                     const cmsis_nn_per_channel_quant_params *quant_params,
-                                     const cmsis_nn_dims *input_dims,
-                                     const q7_t *input,
-                                     const cmsis_nn_dims *filter_dims,
-                                     const q7_t *kernel,
-                                     const cmsis_nn_dims *bias_dims,
-                                     const int32_t *bias,
-                                     const cmsis_nn_dims *output_dims,
-                                     q7_t *output)
+arm_cmsis_nn_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx,
+                                              const cmsis_nn_dw_conv_params *dw_conv_params,
+                                              const cmsis_nn_per_channel_quant_params *quant_params,
+                                              const cmsis_nn_dims *input_dims,
+                                              const q7_t *input,
+                                              const cmsis_nn_dims *filter_dims,
+                                              const q7_t *kernel,
+                                              const cmsis_nn_dims *bias_dims,
+                                              const int32_t *bias,
+                                              const cmsis_nn_dims *output_dims,
+                                              q7_t *output)
 {
     (void)ctx;
     (void)bias_dims;
@@ -86,14 +86,14 @@ arm_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx,
     /* Check input constraints input_ch == output_ch */
     if (input_ch != output_ch)
     {
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
     /* Check input constraints pad_x <= 1 */
     if (pad_x > 1 || filter_dims->w != 3 || filter_dims->h != 3)
     {
-        return ARM_MATH_ARGUMENT_ERROR;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
-
+    const int32_t *bias_base = bias;
     for (int32_t in_h = -pad_y, out_h = 0, out_idx = 0; out_h < output_y; in_h += stride_y, ++out_h)
     {
         for (int32_t in_w = -pad_x, out_w = 0, ker_h_start = MAX(0, -in_h); out_w < output_x; in_w += stride_x, ++out_w)
@@ -101,12 +101,20 @@ arm_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx,
             int32_t in_ch = 0;
             int32_t ker_w_start = MAX(0, -in_w);
 
+            bias = bias_base;
             for (; in_ch <= (input_ch - 4); in_ch += 4)
             {
-                int32_t out_buff0 = bias[in_ch + 0];
-                int32_t out_buff1 = bias[in_ch + 1];
-                int32_t out_buff2 = bias[in_ch + 2];
-                int32_t out_buff3 = bias[in_ch + 3];
+                int32_t out_buff0 = 0;
+                int32_t out_buff1 = 0;
+                int32_t out_buff2 = 0;
+                int32_t out_buff3 = 0;
+                if (bias)
+                {
+                    out_buff0 = *bias++;
+                    out_buff1 = *bias++;
+                    out_buff2 = *bias++;
+                    out_buff3 = *bias++;
+                }
 
                 const int8_t *input_ptr = input + (in_h + ker_h_start) * (input_ch * input_x) + in_w * input_ch + in_ch;
                 const int8_t *kernel_ptr = kernel + ker_h_start * (input_ch * 3) + in_ch;
@@ -174,7 +182,11 @@ arm_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx,
             // Leftover
             for (; in_ch < input_ch; ++in_ch)
             {
-                int32_t out_buff = bias[in_ch];
+                int32_t out_buff = 0;
+                if (bias)
+                {
+                    out_buff = *bias++;
+                }
 
                 const int8_t *input_ptr = input + (in_h + ker_h_start) * (input_ch * input_x) + in_w * input_ch + in_ch;
                 const int8_t *kernel_ptr = kernel + ker_h_start * (input_ch * 3) + in_ch;
@@ -206,7 +218,7 @@ arm_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx,
     }
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_fast_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_fast_s16.c
new file mode 100644
index 0000000..20201b9
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_fast_s16.c
@@ -0,0 +1,471 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_depthwise_conv_fast_s16.c
+ * Description:  Optimized s16 depthwise separable convolution function for
+ *               channel multiplier of 1.
+ *
+ * $Date:        6 July 2022
+ * $Revision:    V.1.1.0
+ *
+ * Target Processor:  Cortex-M CPUs
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup NNConv
+ * @{
+ */
+
+/*
+ * Optimized s16 depthwise convolution function with constraint that in_channel equals out_channel
+ *
+ *  Refer prototype header file for details.
+ *
+ */
+
+arm_cmsis_nn_status arm_depthwise_conv_fast_s16(const cmsis_nn_context *ctx,
+                                                const cmsis_nn_dw_conv_params *dw_conv_params,
+                                                const cmsis_nn_per_channel_quant_params *quant_params,
+                                                const cmsis_nn_dims *input_dims,
+                                                const q15_t *input,
+                                                const cmsis_nn_dims *filter_dims,
+                                                const q7_t *kernel,
+                                                const cmsis_nn_dims *bias_dims,
+                                                const int64_t *bias,
+                                                const cmsis_nn_dims *output_dims,
+                                                q15_t *output)
+{
+    const int32_t input_ch = input_dims->c;
+    const int32_t output_ch = output_dims->c;
+
+    /* Check input constraints input_ch == output_ch */
+    if (input_ch != output_ch)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+
+    if (filter_dims->w * filter_dims->h >= 512)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+
+    if (ctx->buf == NULL && arm_depthwise_conv_fast_s16_get_buffer_size(input_dims, filter_dims) > 0)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+
+#if defined(ARM_MATH_DSP)
+    (void)bias_dims;
+    const int32_t input_x = input_dims->w;
+    const int32_t input_y = input_dims->h;
+    const int32_t input_batches = input_dims->n;
+    const int32_t kernel_x = filter_dims->w;
+    const int32_t kernel_y = filter_dims->h;
+    const int32_t pad_x = dw_conv_params->padding.w;
+    const int32_t pad_y = dw_conv_params->padding.h;
+    const int32_t stride_x = dw_conv_params->stride.w;
+    const int32_t stride_y = dw_conv_params->stride.h;
+    const int32_t *output_shift = quant_params->shift;
+    const int32_t *output_mult = quant_params->multiplier;
+    const int32_t output_x = output_dims->w;
+    const int32_t output_y = output_dims->h;
+    const int32_t output_activation_min = dw_conv_params->activation.min;
+    const int32_t output_activation_max = dw_conv_params->activation.max;
+    q15_t *buffer_a = (q15_t *)ctx->buf;
+
+#if defined(ARM_MATH_MVEI)
+    int16_t *lhs_buffer = buffer_a;
+    int16_t *out = output;
+    int buffer_count = 0;
+    const int32_t kernel_size = kernel_x * kernel_y;
+
+    for (int i_batch = 0; i_batch < input_batches; i_batch++)
+    {
+        /* This part implements the im2col function */
+        for (int i_out_y = 0, base_idx_y = -pad_y; i_out_y < output_y; base_idx_y += stride_y, i_out_y++)
+        {
+            for (int i_out_x = 0, base_idx_x = -pad_x; i_out_x < output_x; base_idx_x += stride_x, i_out_x++)
+            {
+                for (int i_ker_y = base_idx_y; i_ker_y < base_idx_y + kernel_y; i_ker_y++)
+                {
+                    for (int i_ker_x = base_idx_x; i_ker_x < base_idx_x + kernel_x; i_ker_x++)
+                    {
+                        if (i_ker_y < 0 || i_ker_y >= input_y || i_ker_x < 0 || i_ker_x >= input_x)
+                        {
+                            memset(lhs_buffer, (int16_t)0, (uint32_t)(input_ch * sizeof(int16_t)));
+                        }
+                        else
+                        {
+                            arm_memcpy_q15(lhs_buffer,
+                                           (int16_t *)(input + (i_ker_y * input_x + i_ker_x) * input_ch),
+                                           (uint32_t)(input_ch * sizeof(int16_t)));
+                        }
+                        lhs_buffer += input_ch;
+                    }
+                }
+                buffer_count++;
+                if (buffer_count == 4)
+                {
+                    lhs_buffer = buffer_a;
+
+                    out = arm_nn_depthwise_conv_nt_t_s16(lhs_buffer,
+                                                         kernel,
+                                                         input_ch,
+                                                         output_shift,
+                                                         output_mult,
+                                                         output_activation_min,
+                                                         output_activation_max,
+                                                         kernel_size,
+                                                         bias,
+                                                         out);
+                    buffer_count = 0;
+                }
+            }
+        }
+        input += input_x * input_y * input_ch;
+    }
+
+    /* Handle left over buffers */
+    lhs_buffer = buffer_a;
+    for (int i_buf = 0; i_buf < buffer_count; i_buf++)
+    {
+        int32_t loop_count = (input_ch + 3) / 4;
+        int32_t num_ch_to_process = input_ch;
+
+        for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count; num_ch_to_process -= 4, offset += 4, i_loop_cnt++)
+        {
+            const int8_t *row_0 = kernel + offset;
+            const int16_t *col_0 = lhs_buffer + (kernel_size * input_ch * i_buf) + offset;
+
+            int32x4_t out_0 = vdupq_n_s32(0);
+
+            for (int i_ker = 0; i_ker < kernel_size; i_ker++)
+            {
+                const int32x4_t ker_0 = vldrbq_s32(row_0);
+
+                int32x4_t ip_0 = vldrhq_s32(col_0);
+                out_0 += vmulq_s32(ip_0, ker_0);
+
+                col_0 += input_ch;
+                row_0 += input_ch;
+            }
+
+            int64_t in_requantize_0 = (int64_t)out_0[0];
+            int64_t in_requantize_1 = (int64_t)out_0[1];
+            int64_t in_requantize_2 = (int64_t)out_0[2];
+            int64_t in_requantize_3 = (int64_t)out_0[3];
+
+            if (bias)
+            {
+                in_requantize_0 += bias[offset];
+                in_requantize_1 += bias[offset + 1];
+                in_requantize_2 += bias[offset + 2];
+                in_requantize_3 += bias[offset + 3];
+            }
+
+            int32_t reduced_multiplier_0 = REDUCE_MULTIPLIER(output_mult[offset]);
+            int32_t reduced_multiplier_1 = REDUCE_MULTIPLIER(output_mult[offset + 1]);
+            int32_t reduced_multiplier_2 = REDUCE_MULTIPLIER(output_mult[offset + 2]);
+            int32_t reduced_multiplier_3 = REDUCE_MULTIPLIER(output_mult[offset + 3]);
+
+            out_0[0] = arm_nn_requantize_s64(in_requantize_0, reduced_multiplier_0, output_shift[offset]);
+            out_0[1] = arm_nn_requantize_s64(in_requantize_1, reduced_multiplier_1, output_shift[offset + 1]);
+            out_0[2] = arm_nn_requantize_s64(in_requantize_2, reduced_multiplier_2, output_shift[offset + 2]);
+            out_0[3] = arm_nn_requantize_s64(in_requantize_3, reduced_multiplier_3, output_shift[offset + 3]);
+
+            out_0 = vmaxq_s32(out_0, vdupq_n_s32(output_activation_min));
+            out_0 = vminq_s32(out_0, vdupq_n_s32(output_activation_max));
+
+            mve_pred16_t p = vctp32q((uint32_t)num_ch_to_process);
+            vstrhq_p_s32(out, out_0, p);
+
+            out += 4;
+        }
+
+        const int tail_ch = input_ch & 0x3;
+        if (tail_ch != 0)
+        {
+            out -= (4 - tail_ch);
+        }
+    }
+
+#else // ARM_MATH_DSP
+
+    /* Run the following code in cores using DSP extension */
+    q15_t *const col_buffer_start = buffer_a;
+    q15_t *col_buffer = col_buffer_start;
+    const int64_t *const bias_start_pos = bias;
+    const int32_t *const out_mult_start_pos = output_mult;
+    const int32_t *const out_shift_start_pos = output_shift;
+    uint16_t row_count;
+    uint16_t row_shift;
+    int32_t result;
+
+    for (int i_batch = 0; i_batch < input_batches; i_batch++)
+    {
+        for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
+        {
+            const int16_t base_idx_y = (i_out_y * stride_y) - pad_y;
+            for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
+            {
+                const int16_t base_idx_x = (i_out_x * stride_x) - pad_x;
+
+                /* Out of bounds is only considered for the y axis as it provides a contiguous zero'ing opportunity than
+                   along the x axis */
+                const int ker_y_start = MAX(0, -base_idx_y);
+                /* Condition for kernel end dimension: (base_idx_y + ker_y_end) < input_y */
+                const int ker_y_end = MIN(kernel_y, input_y - base_idx_y);
+
+                int32_t index = 0;
+                if (ker_y_start != 0)
+                {
+                    memset(&col_buffer[index], 0, (kernel_x * input_ch) * ker_y_start * sizeof(q15_t));
+                    index += (kernel_x * input_ch) * ker_y_start;
+                }
+
+                for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++)
+                {
+                    const int32_t idx_y = base_idx_y + i_ker_y;
+
+                    for (int i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++)
+                    {
+                        const int32_t idx_x = base_idx_x + i_ker_x;
+
+                        if (idx_x < 0 || idx_x >= input_x)
+                        {
+                            memset(&col_buffer[index], 0, input_ch * sizeof(q15_t));
+                        }
+                        else
+                        {
+                            arm_memcpy_q15(&col_buffer[index],
+                                           input + (idx_y * input_x + idx_x) * input_ch,
+                                           input_ch * sizeof(q15_t));
+                        }
+                        index += input_ch;
+                    }
+                }
+
+                const int diff = kernel_y - ker_y_end;
+                if (diff != 0)
+                {
+                    memset(&col_buffer[index], 0, (kernel_x * input_ch) * diff * sizeof(q15_t));
+                }
+
+                row_count = output_ch / 4;
+                row_shift = 0;
+                bias = bias_start_pos;
+                output_mult = out_mult_start_pos;
+                output_shift = out_shift_start_pos;
+
+                while (row_count)
+                {
+                    q31_t sum_1 = 0;
+                    q31_t sum_2 = 0;
+                    q31_t sum_3 = 0;
+                    q31_t sum_4 = 0;
+
+                    int32_t output_mult_1 = REDUCE_MULTIPLIER(output_mult[0]);
+                    int32_t output_mult_2 = REDUCE_MULTIPLIER(output_mult[1]);
+                    int32_t output_mult_3 = REDUCE_MULTIPLIER(output_mult[2]);
+                    int32_t output_mult_4 = REDUCE_MULTIPLIER(output_mult[3]);
+                    output_mult += 4;
+
+                    uint16_t col_count = (kernel_x * kernel_y) / 2;
+                    q15_t *col_pos = col_buffer_start + row_shift;
+                    const q7_t *row_pos = kernel + row_shift;
+                    row_shift += 4;
+
+                    while (col_count)
+                    {
+                        /* General idea is to read 4 + 4 (input, kernel) pair and re-arrange them in the right order to
+                        use in a SMLAD instruction . One run of this loop produces 4 partial outputs with 8 MACs. */
+                        q31_t row_a1, row_a2, row_b1, row_b2, col_a, row_c, col_b, col_c;
+
+                        /* Read 4 weights */
+                        row_b1 = arm_nn_read_q7x4(row_pos);
+                        row_a1 = arm_nn_read_q7x4(row_pos + input_ch);
+                        col_a = arm_nn_read_q15x2(col_pos);
+                        col_b = arm_nn_read_q15x2(col_pos + input_ch);
+
+                        row_a2 = __SXTB16(row_b1);
+                        row_b1 = __SXTB16(__ROR(row_b1, 8));
+
+                        row_b2 = __SXTB16(row_a1);
+                        row_a1 = __SXTB16(__ROR(row_a1, 8));
+
+                        col_c = __PKHBT(col_b, col_a, 16);
+                        col_a = __PKHTB(col_b, col_a, 16);
+                        row_c = __PKHBT(row_b2, row_a2, 16);
+                        sum_1 = __SMLAD(col_c, row_c, sum_1);
+
+                        row_c = __PKHBT(row_b1, row_a1, 16);
+                        sum_2 = __SMLAD(col_a, row_c, sum_2);
+
+                        col_a = arm_nn_read_q15x2(col_pos + 2);
+                        col_b = arm_nn_read_q15x2(col_pos + input_ch + 2);
+
+                        col_c = __PKHBT(col_b, col_a, 16);
+                        col_a = __PKHTB(col_b, col_a, 16);
+                        row_c = __PKHTB(row_a2, row_b2, 16);
+                        sum_3 = __SMLAD(col_c, row_c, sum_3);
+
+                        row_c = __PKHTB(row_a1, row_b1, 16);
+                        sum_4 = __SMLAD(col_a, row_c, sum_4);
+
+                        row_pos += input_ch << 1;
+                        col_pos += input_ch << 1;
+                        col_count--;
+                    }
+
+                    col_count = (kernel_x * kernel_y) & 0x1;
+                    while (col_count)
+                    {
+                        sum_1 += row_pos[0] * col_pos[0];
+                        sum_2 += row_pos[1] * col_pos[1];
+                        sum_3 += row_pos[2] * col_pos[2];
+                        sum_4 += row_pos[3] * col_pos[3];
+
+                        row_pos += input_ch;
+                        col_pos += input_ch;
+
+                        col_count--;
+                    }
+
+                    int64_t acc_1 = sum_1;
+                    int64_t acc_2 = sum_2;
+                    int64_t acc_3 = sum_3;
+                    int64_t acc_4 = sum_4;
+
+                    if (bias)
+                    {
+                        acc_1 += *bias++;
+                        acc_2 += *bias++;
+                        acc_3 += *bias++;
+                        acc_4 += *bias++;
+                    }
+
+                    result = arm_nn_requantize_s64(acc_1, output_mult_1, *output_shift++);
+                    result = MAX(result, output_activation_min);
+                    result = MIN(result, output_activation_max);
+                    *output++ = (q15_t)result;
+
+                    result = arm_nn_requantize_s64(acc_2, output_mult_2, *output_shift++);
+                    result = MAX(result, output_activation_min);
+                    result = MIN(result, output_activation_max);
+                    *output++ = (q15_t)result;
+
+                    result = arm_nn_requantize_s64(acc_3, output_mult_3, *output_shift++);
+                    result = MAX(result, output_activation_min);
+                    result = MIN(result, output_activation_max);
+                    *output++ = (q15_t)result;
+
+                    result = arm_nn_requantize_s64(acc_4, output_mult_4, *output_shift++);
+                    result = MAX(result, output_activation_min);
+                    result = MIN(result, output_activation_max);
+                    *output++ = (q15_t)result;
+
+                    row_count--;
+                }
+
+                row_count = output_ch & 0x3;
+                while (row_count)
+                {
+                    q15_t *col_pos = col_buffer_start + row_shift;
+                    const q7_t *row_pos = kernel + row_shift;
+                    q31_t sum = 0;
+                    const uint16_t col_count = (kernel_x * kernel_y);
+                    row_shift += 1;
+
+                    for (int i = 0; i < col_count; i++)
+                    {
+                        sum += row_pos[i * input_ch] * col_pos[i * input_ch];
+                    }
+                    int64_t acc = sum;
+                    if (bias)
+                    {
+                        acc += *bias++;
+                    }
+                    result = arm_nn_requantize_s64(acc, REDUCE_MULTIPLIER(*output_mult), *output_shift++);
+                    output_mult++;
+                    result = MAX(result, output_activation_min);
+                    result = MIN(result, output_activation_max);
+                    *output++ = (q15_t)result;
+
+                    row_count--;
+                }
+                // clear counter and pointers
+                col_buffer = col_buffer_start;
+            }
+        }
+
+        /* Advance to the next batch */
+        input += (input_x * input_y * input_ch);
+    }
+#endif
+#else
+    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
+    return arm_depthwise_conv_s16(ctx,
+                                  dw_conv_params,
+                                  quant_params,
+                                  input_dims,
+                                  input,
+                                  filter_dims,
+                                  kernel,
+                                  bias_dims,
+                                  bias,
+                                  output_dims,
+                                  output);
+#endif /* ARM_MATH_MVEI | ARM_MATH_DSP */
+
+    /* Return to application */
+    return ARM_CMSIS_NN_SUCCESS;
+}
+
+int32_t arm_depthwise_conv_fast_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
+{
+#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_MVEI)
+    /* The + 8 accounts for a worst case out of bounds read of the lhs buffers in the *_nt_t_* function.  */
+    return 4 * input_dims->c * filter_dims->w * filter_dims->h * sizeof(int16_t) + 8;
+#else // ARM_MATH_DSP
+    return input_dims->c * filter_dims->w * filter_dims->h * sizeof(int16_t);
+#endif
+#else
+    (void)input_dims;
+    (void)filter_dims;
+    return 0;
+#endif
+}
+
+/**
+ * @} end of NNConv group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s16.c
new file mode 100644
index 0000000..e0e39ca
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s16.c
@@ -0,0 +1,296 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_depthwise_conv_s16.c
+ * Description:  s16 version of depthwise convolution.
+ *
+ * $Date:        19 April 2022
+ * $Revision:    V.2.0.0
+ *
+ * Target Processor:  Cortex-M CPUs
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup NNConv
+ * @{
+ */
+
+static void __attribute__((unused)) depthwise_conv_s16_mult_4_s16(const int16_t *input,
+                                                                  const int32_t input_x,
+                                                                  const int32_t input_y,
+                                                                  const int32_t input_ch,
+                                                                  const int8_t *kernel,
+                                                                  const int32_t output_ch,
+                                                                  const int32_t ch_mult,
+                                                                  const int32_t kernel_x,
+                                                                  const int32_t kernel_y,
+                                                                  const int32_t pad_x,
+                                                                  const int32_t pad_y,
+                                                                  const int32_t stride_x,
+                                                                  const int32_t stride_y,
+                                                                  const int64_t *bias,
+                                                                  int16_t *output,
+                                                                  const int32_t *output_shift,
+                                                                  const int32_t *output_mult,
+                                                                  const int32_t output_x,
+                                                                  const int32_t output_y,
+                                                                  const int32_t output_activation_min,
+                                                                  const int32_t output_activation_max)
+{
+    for (int32_t in_h = -pad_y, out_h = 0, out_idx = 0; out_h < output_y; in_h += stride_y, ++out_h)
+    {
+        for (int32_t in_w = -pad_x, out_w = 0, ker_h_start = MAX(0, -in_h); out_w < output_x; in_w += stride_x, ++out_w)
+        {
+            for (int32_t in_ch = 0, out_ch = 0, ker_w_start = MAX(0, -in_w); out_ch < output_ch;
+                 ++in_ch, out_ch += ch_mult)
+            {
+                for (int mult_tile = 0; mult_tile < ch_mult; mult_tile += 4)
+                {
+                    int32_t out_buff32[4] = {REDUCE_MULTIPLIER(output_mult[out_ch + 0 + mult_tile]),
+                                             REDUCE_MULTIPLIER(output_mult[out_ch + 1 + mult_tile]),
+                                             REDUCE_MULTIPLIER(output_mult[out_ch + 2 + mult_tile]),
+                                             REDUCE_MULTIPLIER(output_mult[out_ch + 3 + mult_tile])};
+
+                    int64_t out_buff[4] = {0, 0, 0, 0};
+
+                    if (bias)
+                    {
+                        out_buff[0] = bias[out_ch + 0 + mult_tile];
+                        out_buff[1] = bias[out_ch + 1 + mult_tile];
+                        out_buff[2] = bias[out_ch + 2 + mult_tile];
+                        out_buff[3] = bias[out_ch + 3 + mult_tile];
+                    }
+
+                    for (int32_t ker_h = ker_h_start; ker_h < MIN(kernel_y, input_y - in_h); ++ker_h)
+                    {
+                        int32_t ker_idx = ker_h * (output_ch * kernel_x) + ker_w_start * output_ch + out_ch;
+                        int32_t in_idx = (in_h + ker_h) * (input_ch * input_x) + in_w * input_ch + in_ch;
+#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
+#pragma clang loop unroll(disable)
+#endif
+                        for (int32_t ker_w = ker_w_start; ker_w < MIN(kernel_x, input_x - in_w);
+                             ++ker_w, ker_idx += output_ch)
+                        {
+                            // TODO: Unroll of 4 with 64 bit accumulator will probably result in too much register
+                            // spills. Try with unroll of 2 when enabling this.
+                            int32_t in_val = input[in_idx + ker_w * input_ch];
+                            out_buff[0] += in_val * kernel[ker_idx + 0 + mult_tile];
+                            out_buff[1] += in_val * kernel[ker_idx + 1 + mult_tile];
+                            out_buff[2] += in_val * kernel[ker_idx + 2 + mult_tile];
+                            out_buff[3] += in_val * kernel[ker_idx + 3 + mult_tile];
+                        }
+                    }
+
+                    out_buff32[0] =
+                        arm_nn_requantize_s64(out_buff[0], out_buff32[0], output_shift[out_ch + 0 + mult_tile]);
+                    out_buff32[1] =
+                        arm_nn_requantize_s64(out_buff[1], out_buff32[1], output_shift[out_ch + 1 + mult_tile]);
+                    out_buff32[2] =
+                        arm_nn_requantize_s64(out_buff[2], out_buff32[2], output_shift[out_ch + 2 + mult_tile]);
+                    out_buff32[3] =
+                        arm_nn_requantize_s64(out_buff[3], out_buff32[3], output_shift[out_ch + 3 + mult_tile]);
+
+                    out_buff32[0] = MIN(MAX(out_buff32[0], output_activation_min), output_activation_max);
+                    out_buff32[1] = MIN(MAX(out_buff32[1], output_activation_min), output_activation_max);
+                    out_buff32[2] = MIN(MAX(out_buff32[2], output_activation_min), output_activation_max);
+                    out_buff32[3] = MIN(MAX(out_buff32[3], output_activation_min), output_activation_max);
+
+                    output[out_idx++] = (int16_t)out_buff32[0];
+                    output[out_idx++] = (int16_t)out_buff32[1];
+                    output[out_idx++] = (int16_t)out_buff32[2];
+                    output[out_idx++] = (int16_t)out_buff32[3];
+                }
+            }
+        }
+    }
+}
+
+static void depthwise_conv_s16_generic_s16(const int16_t *input,
+                                           const uint16_t input_batches,
+                                           const uint16_t input_x,
+                                           const uint16_t input_y,
+                                           const uint16_t input_ch,
+                                           const int8_t *kernel,
+                                           const uint16_t ch_mult,
+                                           const uint16_t kernel_x,
+                                           const uint16_t kernel_y,
+                                           const uint16_t pad_x,
+                                           const uint16_t pad_y,
+                                           const uint16_t stride_x,
+                                           const uint16_t stride_y,
+                                           const int64_t *bias,
+                                           int16_t *output,
+                                           const int32_t *output_shift,
+                                           const int32_t *output_mult,
+                                           const uint16_t output_x,
+                                           const uint16_t output_y,
+                                           const int32_t output_activation_min,
+                                           const int32_t output_activation_max,
+                                           const uint16_t dilation_x,
+                                           const uint16_t dilation_y)
+
+{
+    for (int i_batch = 0; i_batch < input_batches; i_batch++)
+    {
+        for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
+        {
+            const int16_t base_idx_y = (i_out_y * stride_y) - pad_y;
+            for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
+            {
+                const int16_t base_idx_x = (i_out_x * stride_x) - pad_x;
+                for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++)
+                {
+                    for (int i_ch_mult = 0; i_ch_mult < ch_mult; i_ch_mult++)
+                    {
+                        const int idx_out_ch = i_ch_mult + i_input_ch * ch_mult;
+
+                        const q31_t reduced_multiplier = REDUCE_MULTIPLIER(output_mult[idx_out_ch]);
+                        int64_t acc_0 = 0;
+
+                        int ker_y_start;
+                        int ker_x_start;
+                        int ker_y_end;
+                        int ker_x_end;
+
+                        if (dilation_x > 1)
+                        {
+                            const int32_t start_x_max = (-base_idx_x + dilation_x - 1) / dilation_x;
+                            ker_x_start = MAX(0, start_x_max);
+                            const int32_t end_min_x = (input_x - base_idx_x + dilation_x - 1) / dilation_x;
+                            ker_x_end = MIN(kernel_x, end_min_x);
+                        }
+                        else
+                        {
+                            ker_x_start = MAX(0, -base_idx_x);
+                            ker_x_end = MIN(kernel_x, input_x - base_idx_x);
+                        }
+
+                        if (dilation_y > 1)
+                        {
+                            const int32_t start_y_max = (-base_idx_y + dilation_y - 1) / dilation_y;
+                            ker_y_start = MAX(0, start_y_max);
+                            const int32_t end_min_y = (input_y - base_idx_y + dilation_y - 1) / dilation_y;
+                            ker_y_end = MIN(kernel_y, end_min_y);
+                        }
+                        else
+                        {
+                            ker_y_start = MAX(0, -base_idx_y);
+                            ker_y_end = MIN(kernel_y, input_y - base_idx_y);
+                        }
+
+                        if (bias)
+                        {
+                            acc_0 = bias[idx_out_ch];
+                        }
+
+                        for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++)
+                        {
+                            const int32_t idx_y = base_idx_y + dilation_y * i_ker_y;
+                            for (int i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++)
+                            {
+                                const int32_t idx_x = base_idx_x + dilation_x * i_ker_x;
+                                int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch;
+                                int32_t ker_idx_0 = (i_ker_y * kernel_x + i_ker_x) * (input_ch * ch_mult) + idx_out_ch;
+
+                                acc_0 += input[idx_0] * kernel[ker_idx_0];
+                            }
+                        }
+
+                        /* Requantize and clamp output to provided range */
+                        int32_t result = arm_nn_requantize_s64(acc_0, reduced_multiplier, output_shift[idx_out_ch]);
+                        result = MAX(result, output_activation_min);
+                        result = MIN(result, output_activation_max);
+                        *output++ = (int16_t)result;
+                    }
+                }
+            }
+        }
+        /* Advance to the next batch */
+        input += (input_x * input_y * input_ch);
+    }
+}
+
+/*
+ *  Basic s16 depthwise convolution function.
+ *
+ *  Refer header file for details.
+ *
+ */
+arm_cmsis_nn_status arm_depthwise_conv_s16(const cmsis_nn_context *ctx,
+                                           const cmsis_nn_dw_conv_params *dw_conv_params,
+                                           const cmsis_nn_per_channel_quant_params *quant_params,
+                                           const cmsis_nn_dims *input_dims,
+                                           const q15_t *input,
+                                           const cmsis_nn_dims *filter_dims,
+                                           const q7_t *kernel,
+                                           const cmsis_nn_dims *bias_dims,
+                                           const int64_t *bias,
+                                           const cmsis_nn_dims *output_dims,
+                                           q15_t *output)
+{
+    const uint16_t dilation_x = dw_conv_params->dilation.w;
+    const uint16_t dilation_y = dw_conv_params->dilation.h;
+
+    (void)bias_dims;
+    (void)ctx;
+
+    depthwise_conv_s16_generic_s16(input,
+                                   input_dims->n,
+                                   input_dims->w,
+                                   input_dims->h,
+                                   input_dims->c,
+                                   kernel,
+                                   dw_conv_params->ch_mult,
+                                   filter_dims->w,
+                                   filter_dims->h,
+                                   dw_conv_params->padding.w,
+                                   dw_conv_params->padding.h,
+                                   dw_conv_params->stride.w,
+                                   dw_conv_params->stride.h,
+                                   bias,
+                                   output,
+                                   quant_params->shift,
+                                   quant_params->multiplier,
+                                   output_dims->w,
+                                   output_dims->h,
+                                   dw_conv_params->activation.min,
+                                   dw_conv_params->activation.max,
+                                   dilation_x,
+                                   dilation_y);
+
+    /* Return to application */
+    return ARM_CMSIS_NN_SUCCESS;
+}
+
+/**
+ * @} end of NNConv group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
index 4f452c1..862e87f 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,10 +21,10 @@
 /* ----------------------------------------------------------------------
  * Project:      CMSIS NN Library
  * Title:        arm_depthwise_conv_s8.c
- * Description:	 s8 version of depthwise convolution.
+ * Description:  s8 version of depthwise convolution.
  *
- * $Date:        09. October 2020
- * $Revision:    V.2.0.1
+ * $Date:        29 July 2022
+ * $Revision:    V.3.0.3
  *
  * Target Processor:  Cortex-M CPUs
  *
@@ -42,67 +42,83 @@
  * @{
  */
 
-static void depthwise_conv_s8_mult_4(const int8_t *input,
-                                     const int32_t input_x,
-                                     const int32_t input_y,
-                                     const int32_t input_ch,
-                                     const int8_t *kernel,
-                                     const int32_t output_ch,
-                                     const int32_t ch_mult,
-                                     const int32_t kernel_x,
-                                     const int32_t kernel_y,
-                                     const int32_t pad_x,
-                                     const int32_t pad_y,
-                                     const int32_t stride_x,
-                                     const int32_t stride_y,
-                                     const int32_t *bias,
-                                     int8_t *output,
-                                     const int32_t *output_shift,
-                                     const int32_t *output_mult,
-                                     const int32_t output_x,
-                                     const int32_t output_y,
-                                     const int32_t output_offset,
-                                     const int32_t input_offset,
-                                     const int32_t output_activation_min,
-                                     const int32_t output_activation_max)
+#if !defined(__ARMCC_VERSION)
+__attribute__((optimize("no-unroll-loops")))
+#endif
+static void
+depthwise_conv_s8_mult_4(const int8_t *input,
+                         const int32_t input_x,
+                         const int32_t input_y,
+                         const int32_t input_ch,
+                         const int8_t *kernel,
+                         const int32_t output_ch,
+                         const int32_t ch_mult,
+                         const int32_t kernel_x,
+                         const int32_t kernel_y,
+                         const int32_t pad_x,
+                         const int32_t pad_y,
+                         const int32_t stride_x,
+                         const int32_t stride_y,
+                         const int32_t *bias,
+                         int8_t *output,
+                         const int32_t *output_shift,
+                         const int32_t *output_mult,
+                         const int32_t output_x,
+                         const int32_t output_y,
+                         const int32_t output_offset,
+                         const int32_t input_offset,
+                         const int32_t output_activation_min,
+                         const int32_t output_activation_max)
 {
-    for (int32_t in_h = -pad_y, out_h = 0, out_idx = 0; out_h < output_y; in_h += stride_y, ++out_h)
+    const int32_t *bias_base = bias;
+    const int32_t *mult_base = output_mult;
+    const int32_t *shift_base = output_shift;
+    const int8_t *kernel_base = kernel;
+
+    for (int32_t in_h = -pad_y, out_h = 0; out_h < output_y; in_h += stride_y, ++out_h)
     {
         for (int32_t in_w = -pad_x, out_w = 0, ker_h_start = MAX(0, -in_h); out_w < output_x; in_w += stride_x, ++out_w)
         {
+            bias = bias_base;
+            output_mult = mult_base;
+            output_shift = shift_base;
             for (int32_t in_ch = 0, out_ch = 0, ker_w_start = MAX(0, -in_w); out_ch < output_ch;
                  ++in_ch, out_ch += ch_mult)
             {
                 for (int mult_tile = 0; mult_tile < ch_mult; mult_tile += 4)
                 {
-                    int32_t out_buff[4];
-
-                    out_buff[0] = bias[out_ch + 0 + mult_tile];
-                    out_buff[1] = bias[out_ch + 1 + mult_tile];
-                    out_buff[2] = bias[out_ch + 2 + mult_tile];
-                    out_buff[3] = bias[out_ch + 3 + mult_tile];
+                    int32_t out_buff[4] = {0, 0, 0, 0};
+                    if (bias)
+                    {
+                        out_buff[0] = *bias++;
+                        out_buff[1] = *bias++;
+                        out_buff[2] = *bias++;
+                        out_buff[3] = *bias++;
+                    }
 
                     for (int32_t ker_h = ker_h_start; ker_h < MIN(kernel_y, input_y - in_h); ++ker_h)
                     {
                         int32_t ker_idx = ker_h * (output_ch * kernel_x) + ker_w_start * output_ch + out_ch;
+                        kernel = kernel_base + mult_tile + ker_idx;
                         int32_t in_idx = (in_h + ker_h) * (input_ch * input_x) + in_w * input_ch + in_ch;
-
+#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
+#pragma clang loop unroll(disable)
+#endif
                         for (int32_t ker_w = ker_w_start; ker_w < MIN(kernel_x, input_x - in_w);
-                             ++ker_w, ker_idx += output_ch)
+                             ++ker_w, kernel += output_ch)
                         {
                             int32_t in_val = input[in_idx + ker_w * input_ch] + input_offset;
-                            out_buff[0] += in_val * kernel[ker_idx + 0 + mult_tile];
-                            out_buff[1] += in_val * kernel[ker_idx + 1 + mult_tile];
-                            out_buff[2] += in_val * kernel[ker_idx + 2 + mult_tile];
-                            out_buff[3] += in_val * kernel[ker_idx + 3 + mult_tile];
+                            out_buff[0] += in_val * kernel[0];
+                            out_buff[1] += in_val * kernel[1];
+                            out_buff[2] += in_val * kernel[2];
+                            out_buff[3] += in_val * kernel[3];
                         }
                     }
 #if defined(ARM_MATH_MVEI)
-                    (void)out_idx;
                     int32x4_t res = vldrwq_s32(out_buff);
-                    res = arm_requantize_mve_32x4(res,
-                                                  vldrwq_s32(&output_mult[out_ch + mult_tile]),
-                                                  vldrwq_s32(&output_shift[out_ch + mult_tile]));
+                    res = arm_requantize_mve_32x4(res, vldrwq_s32(output_mult), vldrwq_s32(output_shift));
+                    output_mult += 4;
+                    output_shift += 4;
                     res = vaddq_n_s32(res, output_offset);
 
                     res = vmaxq_s32(res, vdupq_n_s32(output_activation_min));
@@ -110,14 +126,10 @@ static void depthwise_conv_s8_mult_4(const int8_t *input,
                     vstrbq_s32(output, res);
                     output += 4;
 #else
-                    out_buff[0] = arm_nn_requantize(
-                        out_buff[0], output_mult[out_ch + 0 + mult_tile], output_shift[out_ch + 0 + mult_tile]);
-                    out_buff[1] = arm_nn_requantize(
-                        out_buff[1], output_mult[out_ch + 1 + mult_tile], output_shift[out_ch + 1 + mult_tile]);
-                    out_buff[2] = arm_nn_requantize(
-                        out_buff[2], output_mult[out_ch + 2 + mult_tile], output_shift[out_ch + 2 + mult_tile]);
-                    out_buff[3] = arm_nn_requantize(
-                        out_buff[3], output_mult[out_ch + 3 + mult_tile], output_shift[out_ch + 3 + mult_tile]);
+                    out_buff[0] = arm_nn_requantize(out_buff[0], *output_mult++, *output_shift++);
+                    out_buff[1] = arm_nn_requantize(out_buff[1], *output_mult++, *output_shift++);
+                    out_buff[2] = arm_nn_requantize(out_buff[2], *output_mult++, *output_shift++);
+                    out_buff[3] = arm_nn_requantize(out_buff[3], *output_mult++, *output_shift++);
 
                     out_buff[0] += output_offset;
                     out_buff[1] += output_offset;
@@ -129,10 +141,10 @@ static void depthwise_conv_s8_mult_4(const int8_t *input,
                     out_buff[2] = MIN(MAX(out_buff[2], output_activation_min), output_activation_max);
                     out_buff[3] = MIN(MAX(out_buff[3], output_activation_min), output_activation_max);
 
-                    output[out_idx++] = (int8_t)out_buff[0];
-                    output[out_idx++] = (int8_t)out_buff[1];
-                    output[out_idx++] = (int8_t)out_buff[2];
-                    output[out_idx++] = (int8_t)out_buff[3];
+                    *output++ = (int8_t)out_buff[0];
+                    *output++ = (int8_t)out_buff[1];
+                    *output++ = (int8_t)out_buff[2];
+                    *output++ = (int8_t)out_buff[3];
 
 #endif
                 }
@@ -142,6 +154,7 @@ static void depthwise_conv_s8_mult_4(const int8_t *input,
 }
 
 static void depthwise_conv_s8_generic(const q7_t *input,
+                                      const uint16_t input_batches,
                                       const uint16_t input_x,
                                       const uint16_t input_y,
                                       const uint16_t input_ch,
@@ -163,53 +176,92 @@ static void depthwise_conv_s8_generic(const q7_t *input,
                                       const int32_t output_offset,
                                       const int32_t input_offset,
                                       const int32_t output_activation_min,
-                                      const int32_t output_activation_max)
+                                      const int32_t output_activation_max,
+                                      const uint16_t dilation_x,
+                                      const uint16_t dilation_y)
+
 {
     (void)output_ch;
     int i_out = 0;
-    for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
+    int i_batch;
+
+    for (i_batch = 0; i_batch < input_batches; i_batch++)
     {
-        const int16_t base_idx_y = (i_out_y * stride_y) - pad_y;
-        for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
+        for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
         {
-            const int16_t base_idx_x = (i_out_x * stride_x) - pad_x;
-            for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++)
+            const int16_t base_idx_y = (i_out_y * stride_y) - pad_y;
+            for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
             {
-                for (int i_ch_mult = 0; i_ch_mult < ch_mult; i_ch_mult++)
+                const int16_t base_idx_x = (i_out_x * stride_x) - pad_x;
+                for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++)
                 {
-                    const int idx_out_ch = i_ch_mult + i_input_ch * ch_mult;
-                    int32_t acc_0;
-                    /* Condition for kernel start dimension: (base_idx_<x,y> + ker_<x,y>_start) >= 0 */
-                    const int ker_y_start = MAX(0, -base_idx_y);
-                    const int ker_x_start = MAX(0, -base_idx_x);
-                    /* Condition for kernel end dimension: (base_idx_<x,y> + ker_<x,y>_end) < input_<x,y> */
-                    const int ker_y_end = MIN(kernel_y, input_y - base_idx_y);
-                    const int ker_x_end = MIN(kernel_x, input_x - base_idx_x);
-                    acc_0 = bias[idx_out_ch];
-
-                    for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++)
+                    for (int i_ch_mult = 0; i_ch_mult < ch_mult; i_ch_mult++)
                     {
-                        const int32_t idx_y = base_idx_y + i_ker_y;
-                        for (int i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++)
+                        const int idx_out_ch = i_ch_mult + i_input_ch * ch_mult;
+                        int32_t acc_0 = 0;
+
+                        int ker_y_start;
+                        int ker_x_start;
+                        int ker_y_end;
+                        int ker_x_end;
+
+                        if (dilation_x > 1)
+                        {
+                            const int32_t start_x_max = (-base_idx_x + dilation_x - 1) / dilation_x;
+                            ker_x_start = MAX(0, start_x_max);
+                            const int32_t end_min_x = (input_x - base_idx_x + dilation_x - 1) / dilation_x;
+                            ker_x_end = MIN(kernel_x, end_min_x);
+                        }
+                        else
                         {
-                            const int32_t idx_x = base_idx_x + i_ker_x;
-                            int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch;
-                            int32_t ker_idx_0 = (i_ker_y * kernel_x + i_ker_x) * (input_ch * ch_mult) + idx_out_ch;
+                            ker_x_start = MAX(0, -base_idx_x);
+                            ker_x_end = MIN(kernel_x, input_x - base_idx_x);
+                        }
 
-                            acc_0 += (input[idx_0] + input_offset) * kernel[ker_idx_0];
+                        if (dilation_y > 1)
+                        {
+                            const int32_t start_y_max = (-base_idx_y + dilation_y - 1) / dilation_y;
+                            ker_y_start = MAX(0, start_y_max);
+                            const int32_t end_min_y = (input_y - base_idx_y + dilation_y - 1) / dilation_y;
+                            ker_y_end = MIN(kernel_y, end_min_y);
                         }
-                    }
+                        else
+                        {
+                            ker_y_start = MAX(0, -base_idx_y);
+                            ker_y_end = MIN(kernel_y, input_y - base_idx_y);
+                        }
+
+                        if (bias)
+                        {
+                            acc_0 = bias[idx_out_ch];
+                        }
+
+                        for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++)
+                        {
+                            const int32_t idx_y = base_idx_y + dilation_y * i_ker_y;
+                            for (int i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++)
+                            {
+                                const int32_t idx_x = base_idx_x + dilation_x * i_ker_x;
+                                int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch;
+                                int32_t ker_idx_0 = (i_ker_y * kernel_x + i_ker_x) * (input_ch * ch_mult) + idx_out_ch;
 
-                    /* Requantize and clamp output to provided range */
-                    acc_0 = arm_nn_requantize(acc_0, output_mult[idx_out_ch], output_shift[idx_out_ch]);
-                    acc_0 += output_offset;
-                    acc_0 = MAX(acc_0, output_activation_min);
-                    acc_0 = MIN(acc_0, output_activation_max);
+                                acc_0 += (input[idx_0] + input_offset) * kernel[ker_idx_0];
+                            }
+                        }
 
-                    output[i_out++] = acc_0;
+                        /* Requantize and clamp output to provided range */
+                        acc_0 = arm_nn_requantize(acc_0, output_mult[idx_out_ch], output_shift[idx_out_ch]);
+                        acc_0 += output_offset;
+                        acc_0 = MAX(acc_0, output_activation_min);
+                        acc_0 = MIN(acc_0, output_activation_max);
+
+                        output[i_out++] = acc_0;
+                    }
                 }
             }
         }
+        /* Advance to the next batch */
+        input += (input_x * input_y * input_ch);
     }
 }
 
@@ -220,23 +272,26 @@ static void depthwise_conv_s8_generic(const q7_t *input,
  *  Optimization using DSP extension is not available for the generic case where channel multiplier is > 1.
  *
  */
-arm_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx,
-                                 const cmsis_nn_dw_conv_params *dw_conv_params,
-                                 const cmsis_nn_per_channel_quant_params *quant_params,
-                                 const cmsis_nn_dims *input_dims,
-                                 const q7_t *input,
-                                 const cmsis_nn_dims *filter_dims,
-                                 const q7_t *kernel,
-                                 const cmsis_nn_dims *bias_dims,
-                                 const int32_t *bias,
-                                 const cmsis_nn_dims *output_dims,
-                                 q7_t *output)
+arm_cmsis_nn_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx,
+                                          const cmsis_nn_dw_conv_params *dw_conv_params,
+                                          const cmsis_nn_per_channel_quant_params *quant_params,
+                                          const cmsis_nn_dims *input_dims,
+                                          const q7_t *input,
+                                          const cmsis_nn_dims *filter_dims,
+                                          const q7_t *kernel,
+                                          const cmsis_nn_dims *bias_dims,
+                                          const int32_t *bias,
+                                          const cmsis_nn_dims *output_dims,
+                                          q7_t *output)
 {
-    (void)dw_conv_params->dilation;
+    const uint16_t dilation_x = dw_conv_params->dilation.w;
+    const uint16_t dilation_y = dw_conv_params->dilation.h;
+
     (void)bias_dims;
     (void)ctx;
 
-    if (dw_conv_params->ch_mult % 4 == 0)
+    if (dw_conv_params->ch_mult % 4 == 0 && input_dims->n == 1 && dw_conv_params->dilation.w == 1 &&
+        dw_conv_params->dilation.h == 1)
     {
         depthwise_conv_s8_mult_4(input,
                                  input_dims->w,
@@ -265,6 +320,7 @@ arm_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx,
     else
     {
         depthwise_conv_s8_generic(input,
+                                  input_dims->n,
                                   input_dims->w,
                                   input_dims->h,
                                   input_dims->c,
@@ -286,11 +342,13 @@ arm_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx,
                                   dw_conv_params->output_offset,
                                   dw_conv_params->input_offset,
                                   dw_conv_params->activation.min,
-                                  dw_conv_params->activation.max);
+                                  dw_conv_params->activation.max,
+                                  dilation_x,
+                                  dilation_y);
     }
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
index b81d7ca..fc12e72 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -24,8 +24,8 @@
  * Description:  Optimized s8 depthwise separable convolution function for
  *               channel multiplier of 1.
  *
- * $Date:        January 26, 2021
- * $Revision:    V.2.0.3
+ * $Date:        27 July 2022
+ * $Revision:    V.3.1.0
  *
  * Target Processor:  Cortex-M CPUs
  *
@@ -50,28 +50,34 @@
  *
  */
 
-arm_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
-                                     const cmsis_nn_dw_conv_params *dw_conv_params,
-                                     const cmsis_nn_per_channel_quant_params *quant_params,
-                                     const cmsis_nn_dims *input_dims,
-                                     const q7_t *input,
-                                     const cmsis_nn_dims *filter_dims,
-                                     const q7_t *kernel,
-                                     const cmsis_nn_dims *bias_dims,
-                                     const int32_t *bias,
-                                     const cmsis_nn_dims *output_dims,
-                                     q7_t *output)
+arm_cmsis_nn_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
+                                              const cmsis_nn_dw_conv_params *dw_conv_params,
+                                              const cmsis_nn_per_channel_quant_params *quant_params,
+                                              const cmsis_nn_dims *input_dims,
+                                              const q7_t *input,
+                                              const cmsis_nn_dims *filter_dims,
+                                              const q7_t *kernel,
+                                              const cmsis_nn_dims *bias_dims,
+                                              const int32_t *bias,
+                                              const cmsis_nn_dims *output_dims,
+                                              q7_t *output)
 {
 
     const int32_t input_ch = input_dims->c;
     const int32_t output_ch = output_dims->c;
 
-    /* Check input constraints input_ch == output_ch */
+    /* Check depth multiplier is 1 */
     if (input_ch != output_ch)
     {
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+
+    if (ctx->buf == NULL && arm_depthwise_conv_s8_opt_get_buffer_size(input_dims, filter_dims) > 0)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 #ifdef ARM_MATH_DSP
+    (void)bias_dims;
     const int32_t input_x = input_dims->w;
     const int32_t input_y = input_dims->h;
     const int32_t kernel_x = filter_dims->w;
@@ -91,7 +97,6 @@ arm_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
     q15_t *buffer_a = (q15_t *)ctx->buf;
 
 #ifdef ARM_MATH_MVEI
-    (void)bias_dims;
     /* Generate two columns from the input tensor */
     q7_t *lhs_buffer = (q7_t *)buffer_a;
     q7_t *out = output;
@@ -99,116 +104,133 @@ arm_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
     int buffer_count = 0;
     const int32_t kernel_size = kernel_x * kernel_y;
 
-    /* This part implements the im2col function */
-    for (int i_out_y = 0, base_idx_y = -pad_y; i_out_y < output_y; base_idx_y += stride_y, i_out_y++)
+    const int32_t ch_loop = (input_ch + (CH_IN_BLOCK_MVE - 1)) / CH_IN_BLOCK_MVE;
+    int32_t remaining_ch = output_ch;
+    int32_t active_ch = MIN(CH_IN_BLOCK_MVE, remaining_ch);
+    remaining_ch -= CH_IN_BLOCK_MVE;
+
+    for (int i_ch = 0; i_ch < ch_loop; i_ch++)
     {
-        for (int i_out_x = 0, base_idx_x = -pad_x; i_out_x < output_x; base_idx_x += stride_x, i_out_x++)
+        out = output + i_ch * CH_IN_BLOCK_MVE;
+        const int8_t *input_slice = input + (i_ch * CH_IN_BLOCK_MVE);
+
+        for (int i_out_y = 0, base_idx_y = -pad_y; i_out_y < output_y; base_idx_y += stride_y, i_out_y++)
         {
-            for (int i_ker_y = base_idx_y; i_ker_y < base_idx_y + kernel_y; i_ker_y++)
+            for (int i_out_x = 0, base_idx_x = -pad_x; i_out_x < output_x; base_idx_x += stride_x, i_out_x++)
             {
-                for (int i_ker_x = base_idx_x; i_ker_x < base_idx_x + kernel_x; i_ker_x++)
+                for (int i_ker_y = base_idx_y; i_ker_y < base_idx_y + kernel_y; i_ker_y++)
+                {
+                    for (int i_ker_x = base_idx_x; i_ker_x < base_idx_x + kernel_x; i_ker_x++)
+                    {
+                        if (i_ker_y < 0 || i_ker_y >= input_y || i_ker_x < 0 || i_ker_x >= input_x)
+                        {
+                            arm_memset_q7(lhs_buffer, (int8_t)-input_offset, (uint32_t)active_ch);
+                            padded = 1;
+                        }
+                        else
+                        {
+                            arm_memcpy_q7(lhs_buffer,
+                                          input_slice + (i_ker_y * input_x + i_ker_x) * input_ch,
+                                          (uint32_t)active_ch);
+                        }
+                        lhs_buffer += CH_IN_BLOCK_MVE;
+                    }
+                }
+                buffer_count++;
+
+                if (buffer_count == 4)
                 {
-                    if (i_ker_y < 0 || i_ker_y >= input_y || i_ker_x < 0 || i_ker_x >= input_x)
+                    const int32_t block_offset = i_ch * CH_IN_BLOCK_MVE;
+                    lhs_buffer = (q7_t *)buffer_a;
+                    if (padded == 0)
                     {
-                        arm_memset_q7(lhs_buffer, (int8_t)-input_offset, (uint32_t)input_ch);
-                        padded = 1;
+                        arm_nn_depthwise_conv_nt_t_s8(lhs_buffer,
+                                                      kernel + block_offset,
+                                                      input_offset,
+                                                      active_ch,
+                                                      input_ch,
+                                                      output_shift + block_offset,
+                                                      output_mult + block_offset,
+                                                      output_offset,
+                                                      output_activation_min,
+                                                      output_activation_max,
+                                                      kernel_size,
+                                                      bias + block_offset,
+                                                      out);
                     }
                     else
                     {
-                        arm_memcpy_q7(lhs_buffer, input + (i_ker_y * input_x + i_ker_x) * input_ch, (uint32_t)input_ch);
+                        arm_nn_depthwise_conv_nt_t_padded_s8(lhs_buffer,
+                                                             kernel + block_offset,
+                                                             input_offset,
+                                                             active_ch,
+                                                             input_ch,
+                                                             output_shift + block_offset,
+                                                             output_mult + block_offset,
+                                                             output_offset,
+                                                             output_activation_min,
+                                                             output_activation_max,
+                                                             kernel_size,
+                                                             bias + block_offset,
+                                                             out);
+                        padded = 0;
                     }
-                    lhs_buffer += input_ch;
+                    out += (4 * input_ch);
+                    buffer_count = 0;
                 }
             }
-            buffer_count++;
+        }
+        /* Handle left over buffers */
+        lhs_buffer = (q7_t *)buffer_a;
 
-            if (buffer_count == 4)
+        int8_t *out_base = out;
+        for (int i_buf = 0; i_buf < buffer_count; i_buf++)
+        {
+            int32_t loop_count = (active_ch + 3) / 4;
+            int32_t num_ch_to_process = active_ch;
+            out = out_base + (i_buf * input_ch);
+            for (int i_loop_cnt = 0, offset = i_ch * CH_IN_BLOCK_MVE; i_loop_cnt < loop_count;
+                 num_ch_to_process -= 4, offset += 4, i_loop_cnt++)
             {
-                lhs_buffer = (q7_t *)buffer_a;
-                if (padded == 0)
+                const int8_t *col_0 = lhs_buffer + (kernel_size * CH_IN_BLOCK_MVE * i_buf) + (i_loop_cnt * 4);
+                const int8_t *row_0 = kernel + offset;
+                int32x4_t out_0 = vdupq_n_s32(0);
+                if (bias)
                 {
-                    out = arm_nn_depthwise_conv_nt_t_s8(lhs_buffer,
-                                                        kernel,
-                                                        input_offset,
-                                                        input_ch,
-                                                        output_shift,
-                                                        output_mult,
-                                                        output_offset,
-                                                        output_activation_min,
-                                                        output_activation_max,
-                                                        kernel_size,
-                                                        bias,
-                                                        out);
+                    out_0 = vldrwq_s32(&bias[offset]);
                 }
-                else
-                {
-                    out = arm_nn_depthwise_conv_nt_t_padded_s8(lhs_buffer,
-                                                               kernel,
-                                                               input_offset,
-                                                               input_ch,
-                                                               output_shift,
-                                                               output_mult,
-                                                               output_offset,
-                                                               output_activation_min,
-                                                               output_activation_max,
-                                                               kernel_size,
-                                                               bias,
-                                                               out);
-                    padded = 0;
-                }
-                buffer_count = 0;
-            }
-        }
-    }
-
-    /* Handle left over buffers */
-    lhs_buffer = (q7_t *)buffer_a;
 
-    for (int i_buf = 0; i_buf < buffer_count; i_buf++)
-    {
-        int32_t loop_count = (input_ch + 3) / 4;
+                for (int i_ker = 0; i_ker < kernel_size; i_ker++)
+                {
+                    const int32x4_t ker_0 = vldrbq_s32(row_0);
+                    int32x4_t ip_0 = vldrbq_s32(col_0);
+                    ip_0 = vaddq_n_s32(ip_0, input_offset);
+                    out_0 += vmulq_s32(ip_0, ker_0);
 
-        int32_t num_ch_to_process = input_ch;
-        for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count; num_ch_to_process -= 4, offset += 4, i_loop_cnt++)
-        {
-            const int8_t *col_0 = lhs_buffer + (kernel_size * input_ch * i_buf) + offset;
-            const int8_t *row_0 = kernel + offset;
-            int32x4_t out_0 = vldrwq_s32(&bias[offset]);
+                    col_0 += CH_IN_BLOCK_MVE;
+                    row_0 += input_ch;
+                }
 
-            for (int i_ker = 0; i_ker < kernel_size; i_ker++)
-            {
-                const int32x4_t ker_0 = vldrbq_s32(row_0);
+                const int32x4_t mult = vldrwq_s32(&output_mult[offset]);
+                const int32x4_t shift = vldrwq_s32(&output_shift[offset]);
 
-                int32x4_t ip_0 = vldrbq_s32(col_0);
-                ip_0 = vaddq_n_s32(ip_0, input_offset);
-                out_0 += vmulq_s32(ip_0, ker_0);
+                out_0 = arm_requantize_mve_32x4(out_0, mult, shift);
+                out_0 = vaddq_n_s32(out_0, output_offset);
+                out_0 = vmaxq_s32(out_0, vdupq_n_s32(output_activation_min));
+                out_0 = vminq_s32(out_0, vdupq_n_s32(output_activation_max));
+                mve_pred16_t p = vctp32q((uint32_t)num_ch_to_process);
+                vstrbq_p_s32(out, out_0, p);
 
-                col_0 += input_ch;
-                row_0 += input_ch;
+                out += 4;
             }
-
-            const int32x4_t mult = vldrwq_s32(&output_mult[offset]);
-            const int32x4_t shift = vldrwq_s32(&output_shift[offset]);
-
-            out_0 = arm_requantize_mve_32x4(out_0, mult, shift);
-            out_0 = vaddq_n_s32(out_0, output_offset);
-            out_0 = vmaxq_s32(out_0, vdupq_n_s32(output_activation_min));
-            out_0 = vminq_s32(out_0, vdupq_n_s32(output_activation_max));
-            mve_pred16_t p = vctp32q((uint32_t)num_ch_to_process);
-            vstrbq_p_s32(out, out_0, p);
-
-            out += 4;
         }
+        buffer_count = 0;
 
-        const int tail_ch = input_ch & 0x3;
-        if (tail_ch != 0)
-        {
-            out -= (4 - tail_ch);
-        }
+        active_ch = MIN(CH_IN_BLOCK_MVE, remaining_ch);
+        remaining_ch -= CH_IN_BLOCK_MVE;
     }
 
 #else // ARM_MATH_DSP
-    (void)bias_dims;
     /* Run the following code in cores using DSP extension */
     q15_t *const col_buffer_start = buffer_a;
     q15_t *col_buffer = col_buffer_start;
@@ -274,10 +296,17 @@ arm_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
 
             while (row_count)
             {
-                q31_t sum = *bias++;
-                q31_t sum_2 = *bias++;
-                q31_t sum_3 = *bias++;
-                q31_t sum_4 = *bias++;
+                q31_t sum = 0;
+                q31_t sum_2 = 0;
+                q31_t sum_3 = 0;
+                q31_t sum_4 = 0;
+                if (bias)
+                {
+                    sum = *bias++;
+                    sum_2 = *bias++;
+                    sum_3 = *bias++;
+                    sum_4 = *bias++;
+                }
 
                 uint16_t col_count = (kernel_x * kernel_y) / 2;
                 q15_t *col_pos = col_buffer_start + row_shift;
@@ -370,7 +399,11 @@ arm_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
             {
                 q15_t *col_pos = col_buffer_start + row_shift;
                 const q7_t *row_pos = kernel + row_shift;
-                q31_t sum = *bias++;
+                q31_t sum = 0;
+                if (bias)
+                {
+                    sum = *bias++;
+                }
                 const uint16_t col_count = (kernel_x * kernel_y);
                 row_shift += 1;
 
@@ -408,14 +441,14 @@ arm_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
 #endif /* ARM_MATH_MVEI | ARM_MATH_DSP */
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
 {
 #if defined(ARM_MATH_MVEI)
-    /* The + 4 accounts for out of bounds read of the lhs buffers in the *_nt_t_* functions.  */
-    return (2 * input_dims->c * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int16_t) + 4;
+    (void)input_dims;
+    return (4 * CH_IN_BLOCK_MVE * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int8_t);
 #elif defined(ARM_MATH_DSP)
     return (input_dims->c * filter_dims->w * filter_dims->h) * sizeof(int16_t);
 #else
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c
index 5daa300..0404276 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_depthwise_conv_u8_basic_ver1.c
  * Description:  u8 depthwise convolution function
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.1.1
+ * $Date:        19 April 2022
+ * $Revision:    V.2.0.0
  *
  * Target :  Cortex-M CPUs
  *
@@ -226,7 +226,7 @@ static void depthwise_conv_u8_generic(const uint8_t *input,
  * @param[in]     dilation_x Dilation along width. Not used and intended for future enhancement.
  * @param[in]     dilation_y Dilation along height. Not used and intended for future enhancement.
  * @param[in]     bias       Pointer to optional bias values. If no bias is
- *                           availble, NULL is expected
+ *                           available, NULL is expected
  * @param[in]     input_offset  Input tensor zero offset
  * @param[in]     filter_offset Kernel tensor zero offset
  * @param[in]     output_offset Output tensor zero offset
@@ -238,38 +238,35 @@ static void depthwise_conv_u8_generic(const uint8_t *input,
  * @param[in]     output_shift  Amount of right-shift for output
  * @param[in]     output_mult   Output multiplier for requantization
  * @return        The function returns one of the following
- *                <code>ARM_MATH_SIZE_MISMATCH</code> - Not supported dimension of tensors
- *                <code>ARM_MATH_SUCCESS</code> - Successful operation
- *                <code>ARM_MATH_ARGUMENT_ERROR</code> - Implementation not available
- *
+ *                <code>ARM_CMSIS_NN_SUCCESS</code> - Successful operation
  *
  */
 
-arm_status arm_depthwise_conv_u8_basic_ver1(const uint8_t *input,
-                                            const uint16_t input_x,
-                                            const uint16_t input_y,
-                                            const uint16_t input_ch,
-                                            const uint8_t *kernel,
-                                            const uint16_t kernel_x,
-                                            const uint16_t kernel_y,
-                                            const int16_t ch_mult,
-                                            const int16_t pad_x,
-                                            const int16_t pad_y,
-                                            const int16_t stride_x,
-                                            const int16_t stride_y,
-                                            const int16_t dilation_x,
-                                            const int16_t dilation_y,
-                                            const int32_t *bias,
-                                            const int32_t input_offset,
-                                            const int32_t filter_offset,
-                                            const int32_t output_offset,
-                                            uint8_t *output,
-                                            const uint16_t output_x,
-                                            const uint16_t output_y,
-                                            const int32_t output_activation_min,
-                                            const int32_t output_activation_max,
-                                            const int32_t output_shift,
-                                            const int32_t output_mult)
+arm_cmsis_nn_status arm_depthwise_conv_u8_basic_ver1(const uint8_t *input,
+                                                     const uint16_t input_x,
+                                                     const uint16_t input_y,
+                                                     const uint16_t input_ch,
+                                                     const uint8_t *kernel,
+                                                     const uint16_t kernel_x,
+                                                     const uint16_t kernel_y,
+                                                     const int16_t ch_mult,
+                                                     const int16_t pad_x,
+                                                     const int16_t pad_y,
+                                                     const int16_t stride_x,
+                                                     const int16_t stride_y,
+                                                     const int16_t dilation_x,
+                                                     const int16_t dilation_y,
+                                                     const int32_t *bias,
+                                                     const int32_t input_offset,
+                                                     const int32_t filter_offset,
+                                                     const int32_t output_offset,
+                                                     uint8_t *output,
+                                                     const uint16_t output_x,
+                                                     const uint16_t output_y,
+                                                     const int32_t output_activation_min,
+                                                     const int32_t output_activation_max,
+                                                     const int32_t output_shift,
+                                                     const int32_t output_mult)
 {
     (void)dilation_x;
     (void)dilation_y;
@@ -330,7 +327,7 @@ arm_status arm_depthwise_conv_u8_basic_ver1(const uint8_t *input,
     }
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s16.c
new file mode 100644
index 0000000..072e7ea
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s16.c
@@ -0,0 +1,125 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_depthwise_conv_wrapper_s16.c
+ * Description:  Wrapper API to select appropriate depthwise conv API based
+ *               on dimensions.
+ *
+ * $Date:        6 July 2022
+ * $Revision:    V.1.0.1
+ *
+ * Target Processor:  Cortex-M CPUs
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup NNConv
+ * @{
+ */
+
+#define USE_FAST_DW_CONV_FUNCTION(dw_conv_params, filter_dims, input_dims)                                             \
+    (dw_conv_params->ch_mult == 1 && dw_conv_params->dilation.w == 1 && dw_conv_params->dilation.h == 1 &&             \
+     filter_dims->w * filter_dims->h * input_dims->c < 512)
+
+/*
+ *  s16 Depthwise conv wrapper function
+ *
+ *  Refer header file for details.
+ *
+ */
+arm_cmsis_nn_status arm_depthwise_conv_wrapper_s16(const cmsis_nn_context *ctx,
+                                                   const cmsis_nn_dw_conv_params *dw_conv_params,
+                                                   const cmsis_nn_per_channel_quant_params *quant_params,
+                                                   const cmsis_nn_dims *input_dims,
+                                                   const q15_t *input,
+                                                   const cmsis_nn_dims *filter_dims,
+                                                   const q7_t *filter,
+                                                   const cmsis_nn_dims *bias_dims,
+                                                   const int64_t *bias,
+                                                   const cmsis_nn_dims *output_dims,
+                                                   q15_t *output)
+{
+    arm_cmsis_nn_status status = ARM_CMSIS_NN_SUCCESS;
+
+    if (USE_FAST_DW_CONV_FUNCTION(dw_conv_params, filter_dims, input_dims))
+    {
+        status = arm_depthwise_conv_fast_s16(ctx,
+                                             dw_conv_params,
+                                             quant_params,
+                                             input_dims,
+                                             input,
+                                             filter_dims,
+                                             filter,
+                                             bias_dims,
+                                             bias,
+                                             output_dims,
+                                             output);
+    }
+    else
+    {
+        status = arm_depthwise_conv_s16(ctx,
+                                        dw_conv_params,
+                                        quant_params,
+                                        input_dims,
+                                        input,
+                                        filter_dims,
+                                        filter,
+                                        bias_dims,
+                                        bias,
+                                        output_dims,
+                                        output);
+    }
+
+    /* Return to application */
+    return status;
+}
+
+int32_t arm_depthwise_conv_wrapper_s16_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params,
+                                                       const cmsis_nn_dims *input_dims,
+                                                       const cmsis_nn_dims *filter_dims,
+                                                       const cmsis_nn_dims *output_dims)
+{
+    (void)dw_conv_params;
+    (void)input_dims;
+    (void)filter_dims;
+    (void)output_dims;
+    int32_t size = 0;
+
+    if (USE_FAST_DW_CONV_FUNCTION(dw_conv_params, filter_dims, input_dims))
+    {
+        size = arm_depthwise_conv_fast_s16_get_buffer_size(input_dims, filter_dims);
+    }
+
+    return size;
+}
+
+/**
+ * @} end of NNConv group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c
index 89913c9..df2bb64 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -24,8 +24,8 @@
  * Description:  Wrapper API to select appropriate depthwise conv API based
  *               on dimensions.
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.2
+ * $Date:        19 April 2022
+ * $Revision:    V.2.0.0
  *
  * Target Processor:  Cortex-M CPUs
  *
@@ -48,23 +48,25 @@
  *  Refer header file for details.
  *
  */
-arm_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx,
-                                         const cmsis_nn_dw_conv_params *dw_conv_params,
-                                         const cmsis_nn_per_channel_quant_params *quant_params,
-                                         const cmsis_nn_dims *input_dims,
-                                         const q7_t *input,
-                                         const cmsis_nn_dims *filter_dims,
-                                         const q7_t *filter,
-                                         const cmsis_nn_dims *bias_dims,
-                                         const int32_t *bias,
-                                         const cmsis_nn_dims *output_dims,
-                                         q7_t *output)
+arm_cmsis_nn_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx,
+                                                  const cmsis_nn_dw_conv_params *dw_conv_params,
+                                                  const cmsis_nn_per_channel_quant_params *quant_params,
+                                                  const cmsis_nn_dims *input_dims,
+                                                  const q7_t *input,
+                                                  const cmsis_nn_dims *filter_dims,
+                                                  const q7_t *filter,
+                                                  const cmsis_nn_dims *bias_dims,
+                                                  const int32_t *bias,
+                                                  const cmsis_nn_dims *output_dims,
+                                                  q7_t *output)
 {
-    arm_status status = ARM_MATH_SUCCESS;
-    if (1 == dw_conv_params->ch_mult)
+    arm_cmsis_nn_status status = ARM_CMSIS_NN_SUCCESS;
+    if (1 == dw_conv_params->ch_mult && input_dims->n == 1 && dw_conv_params->dilation.w == 1 &&
+        dw_conv_params->dilation.h == 1)
     {
 #if !defined(ARM_MATH_MVEI)
-        if ((filter_dims->w == 3) && (filter_dims->h == 3) && (dw_conv_params->padding.h <= 1))
+        if ((filter_dims->w == 3) && (filter_dims->h == 3) && (dw_conv_params->padding.h <= 1) &&
+            (dw_conv_params->padding.w <= 1))
         {
             status = arm_depthwise_conv_3x3_s8(ctx,
                                                dw_conv_params,
@@ -121,7 +123,8 @@ int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size(const cmsis_nn_dw_conv_par
     (void)dw_conv_params;
     int32_t size = 0;
 
-    if (input_dims->c == output_dims->c)
+    if (input_dims->c == output_dims->c && input_dims->n == 1 && dw_conv_params->dilation.w == 1 &&
+        dw_conv_params->dilation.h == 1)
     {
         size = arm_depthwise_conv_s8_opt_get_buffer_size(input_dims, filter_dims);
     }
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c
index f0526a2..0a91889 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_depthwise_separable_conv_HWC_q7.c
  * Description:  Q7 depthwise separable convolution function
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,63 +42,29 @@
  * @{
  */
 
-/**
- * @brief Q7 depthwise separable convolution function
- * @param[in]       Im_in       pointer to input tensor
- * @param[in]       dim_im_in   input tensor dimension
- * @param[in]       ch_im_in    number of input tensor channels
- * @param[in]       wt          pointer to kernel weights
- * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel  filter kernel size
- * @param[in]       padding     padding sizes
- * @param[in]       stride      convolution stride
- * @param[in]       bias        pointer to bias
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in,out]   Im_out      pointer to output tensor
- * @param[in]       dim_im_out  output tensor dimension
- * @param[in,out]   bufferA     pointer to buffer space for input
- * @param[in,out]   bufferB     pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
- *
- * bufferB size: 0
- *
- * <b>Input dimension constraints:</b>
- *
- * ch_im_in equals ch_im_out
- *
- * Implementation:
- * There are 3 nested loop here:
- * Inner loop: calculate each output value with MAC instruction over an accumulator
- * Mid   loop: loop over different output channel
- * Outer loop: loop over different output (x, y)
+/*
+ * Q7 depthwise separable convolution function
+ * Refer function header for details
  */
 
-arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in,
-                                               const uint16_t dim_im_in,
-                                               const uint16_t ch_im_in,
-                                               const q7_t *wt,
-                                               const uint16_t ch_im_out,
-                                               const uint16_t dim_kernel,
-                                               const uint16_t padding,
-                                               const uint16_t stride,
-                                               const q7_t *bias,
-                                               const uint16_t bias_shift,
-                                               const uint16_t out_shift,
-                                               q7_t *Im_out,
-                                               const uint16_t dim_im_out,
-                                               q15_t *bufferA,
-                                               q7_t *bufferB)
+arm_cmsis_nn_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in,
+                                                        const uint16_t dim_im_in,
+                                                        const uint16_t ch_im_in,
+                                                        const q7_t *wt,
+                                                        const uint16_t ch_im_out,
+                                                        const uint16_t dim_kernel,
+                                                        const uint16_t padding,
+                                                        const uint16_t stride,
+                                                        const q7_t *bias,
+                                                        const uint16_t bias_shift,
+                                                        const uint16_t out_shift,
+                                                        q7_t *Im_out,
+                                                        const uint16_t dim_im_out,
+                                                        q15_t *bufferA,
+                                                        q7_t *bufferB)
 {
     (void)bufferB;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     int16_t i_out_y, i_out_x;
@@ -113,7 +79,7 @@ arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in,
     /* do some checking here, basically ch_im_in == ch_im_out */
     if (ch_im_in != ch_im_out)
     {
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
@@ -265,13 +231,13 @@ arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in,
                              "smlad %[sum4], r4, r5, %[sum4]\n"
                              "subs %[colCnt], #1\n"
                              "bne COL_LOOP_%=\n"
-                             : [ sum ] "+r"(sum),
-                               [ sum2 ] "+r"(sum2),
-                               [ sum3 ] "+r"(sum3),
-                               [ sum4 ] "+r"(sum4),
-                               [ pB ] "+r"(pB),
-                               [ pA ] "+r"(pA)
-                             : [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in)
+                             : [sum] "+r"(sum),
+                               [sum2] "+r"(sum2),
+                               [sum3] "+r"(sum3),
+                               [sum4] "+r"(sum4),
+                               [pB] "+r"(pB),
+                               [pA] "+r"(pA)
+                             : [colCnt] "r"(colCnt), [ch_im_in] "r"(ch_im_in)
                              : "r0", "r1", "r2", "r3", "r4", "r5");
 #else
                 /*
@@ -309,13 +275,13 @@ arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in,
                              "smlad %[sum3], r4, r5, %[sum3]\n"
                              "subs %[colCnt], #1\n"
                              "bne COL_LOOP_%=\n"
-                             : [ sum ] "+r"(sum),
-                               [ sum2 ] "+r"(sum2),
-                               [ sum3 ] "+r"(sum3),
-                               [ sum4 ] "+r"(sum4),
-                               [ pB ] "+r"(pB),
-                               [ pA ] "+r"(pA)
-                             : [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in)
+                             : [sum] "+r"(sum),
+                               [sum2] "+r"(sum2),
+                               [sum3] "+r"(sum3),
+                               [sum4] "+r"(sum4),
+                               [pB] "+r"(pB),
+                               [pA] "+r"(pA)
+                             : [colCnt] "r"(colCnt), [ch_im_in] "r"(ch_im_in)
                              : "r0", "r1", "r2", "r3", "r4", "r5");
 
 #endif /* ARM_MATH_BIG_ENDIAN */
@@ -383,7 +349,7 @@ arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in,
     /* do some checking here, basically ch_im_in == ch_im_out */
     if (ch_im_in != ch_im_out)
     {
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
@@ -416,7 +382,7 @@ arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in,
 #endif /* ARM_MATH_DSP */
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c
index 0c5d420..e85b01b 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_depthwise_separable_conv_HWC_q7_nonsquare.c
  * Description:  Q7 depthwise separable convolution function (non-square shape)
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,62 +42,36 @@
  * @{
  */
 
-/**
- * @brief Q7 depthwise separable convolution function (non-square shape)
- * @param[in]       Im_in         pointer to input tensor
- * @param[in]       dim_im_in_x   input tensor dimension x
- * @param[in]       dim_im_in_y   input tensor dimension y
- * @param[in]       ch_im_in      number of input tensor channels
- * @param[in]       wt            pointer to kernel weights
- * @param[in]       ch_im_out     number of filters, i.e., output tensor channels
- * @param[in]       dim_kernel_x  filter kernel size x
- * @param[in]       dim_kernel_y  filter kernel size y
- * @param[in]       padding_x     padding sizes x
- * @param[in]       padding_y     padding sizes y
- * @param[in]       stride_x      convolution stride x
- * @param[in]       stride_y      convolution stride y
- * @param[in]       bias          pointer to bias
- * @param[in]       bias_shift    amount of left-shift for bias
- * @param[in]       out_shift     amount of right-shift for output
- * @param[in,out]   Im_out        pointer to output tensor
- * @param[in]       dim_im_out_x  output tensor dimension x
- * @param[in]       dim_im_out_y  output tensor dimension y
- * @param[in,out]   bufferA       pointer to buffer space for input
- * @param[in,out]   bufferB       pointer to buffer space for output
- * @return     The function returns either
- * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
- *
- * This function is the version with full list of optimization tricks, but with
- * some contraints:
- *   ch_im_in is equal to ch_im_out
- *
+/*
+ * Q7 depthwise separable convolution function (non-square shape)
+ * Refer function header for details
  */
 
-arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,
-                                                         const uint16_t dim_im_in_x,
-                                                         const uint16_t dim_im_in_y,
-                                                         const uint16_t ch_im_in,
-                                                         const q7_t *wt,
-                                                         const uint16_t ch_im_out,
-                                                         const uint16_t dim_kernel_x,
-                                                         const uint16_t dim_kernel_y,
-                                                         const uint16_t padding_x,
-                                                         const uint16_t padding_y,
-                                                         const uint16_t stride_x,
-                                                         const uint16_t stride_y,
-                                                         const q7_t *bias,
-                                                         const uint16_t bias_shift,
-                                                         const uint16_t out_shift,
-                                                         q7_t *Im_out,
-                                                         const uint16_t dim_im_out_x,
-                                                         const uint16_t dim_im_out_y,
-                                                         q15_t *bufferA,
-                                                         q7_t *bufferB)
+arm_cmsis_nn_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,
+                                                                  const uint16_t dim_im_in_x,
+                                                                  const uint16_t dim_im_in_y,
+                                                                  const uint16_t ch_im_in,
+                                                                  const q7_t *wt,
+                                                                  const uint16_t ch_im_out,
+                                                                  const uint16_t dim_kernel_x,
+                                                                  const uint16_t dim_kernel_y,
+                                                                  const uint16_t padding_x,
+                                                                  const uint16_t padding_y,
+                                                                  const uint16_t stride_x,
+                                                                  const uint16_t stride_y,
+                                                                  const q7_t *bias,
+                                                                  const uint16_t bias_shift,
+                                                                  const uint16_t out_shift,
+                                                                  q7_t *Im_out,
+                                                                  const uint16_t dim_im_out_x,
+                                                                  const uint16_t dim_im_out_y,
+                                                                  q15_t *bufferA,
+                                                                  q7_t *bufferB)
 {
 
     (void)bufferB;
 
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     /*
@@ -121,7 +95,7 @@ arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,
     /* do some checking here, basically ch_im_in == ch_im_out */
     if (ch_im_in != ch_im_out)
     {
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
@@ -272,13 +246,13 @@ arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,
                              "smlad %[sum4], r4, r5, %[sum4]\n"
                              "subs %[colCnt], #1\n"
                              "bne COL_LOOP\n"
-                             : [ sum ] "+r"(sum),
-                               [ sum2 ] "+r"(sum2),
-                               [ sum3 ] "+r"(sum3),
-                               [ sum4 ] "+r"(sum4),
-                               [ pB ] "+r"(pB),
-                               [ pA ] "+r"(pA)
-                             : [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in)
+                             : [sum] "+r"(sum),
+                               [sum2] "+r"(sum2),
+                               [sum3] "+r"(sum3),
+                               [sum4] "+r"(sum4),
+                               [pB] "+r"(pB),
+                               [pA] "+r"(pA)
+                             : [colCnt] "r"(colCnt), [ch_im_in] "r"(ch_im_in)
                              : "r0", "r1", "r2", "r3", "r4", "r5");
 #else
                 //  r0    r1    r2    r3    r4   r5
@@ -314,13 +288,13 @@ arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,
                              "smlad %[sum3], r4, r5, %[sum3]\n"
                              "subs %[colCnt], #1\n"
                              "bne COL_LOOP\n"
-                             : [ sum ] "+r"(sum),
-                               [ sum2 ] "+r"(sum2),
-                               [ sum3 ] "+r"(sum3),
-                               [ sum4 ] "+r"(sum4),
-                               [ pB ] "+r"(pB),
-                               [ pA ] "+r"(pA)
-                             : [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in)
+                             : [sum] "+r"(sum),
+                               [sum2] "+r"(sum2),
+                               [sum3] "+r"(sum3),
+                               [sum4] "+r"(sum4),
+                               [pB] "+r"(pB),
+                               [pA] "+r"(pA)
+                             : [colCnt] "r"(colCnt), [ch_im_in] "r"(ch_im_in)
                              : "r0", "r1", "r2", "r3", "r4", "r5");
 #endif /*ARM_MATH_BIG_ENDIAN */
 
@@ -388,7 +362,7 @@ arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,
     /* do some checking here, basically ch_im_in == ch_im_out */
     if (ch_im_in != ch_im_out)
     {
-        return ARM_MATH_SIZE_MISMATCH;
+        return ARM_CMSIS_NN_ARG_ERROR;
     }
 
     for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
@@ -421,7 +395,7 @@ arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,
 #endif /* ARM_MATH_DSP */
 
     /* Return to application */
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c
index 17e6dc9..5c95485 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_nn_mat_mult_kernel_q7_q15.c
  * Description:  Matrix-multiplication function for convolution
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.3
  *
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
@@ -32,10 +32,10 @@
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
 
-/**
- * @brief Matrix-multiplication function for convolution.
+/*
+ * Matrix-multiplication function for convolution.
  *
- * @details Refer to header file for details.
+ * Refer to header file for details.
  *
  */
 
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c
index 1217c11..29043c8 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_nn_mat_mult_kernel_q7_q15_reordered.c
  * Description:  Matrix-multiplication function for convolution with reordered columns
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.3
  *
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
@@ -32,10 +32,10 @@
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
 
-/**
- * @brief Matrix-multiplication function for convolution with re-ordered input.
+/*
+ * Matrix-multiplication function for convolution with re-ordered input.
  *
- * @details Refer to header file for details.
+ * Refer to header file for details.
  *
  */
 
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c
index 8b37b34..62ee822 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_nn_mat_mult_kernel_s8_s16.c
  * Description:  Matrix-multiplication function for convolution
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.3
+ * $Date:        14. December 2021
+ * $Revision:    V.1.1.0
  *
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
@@ -51,174 +51,7 @@ q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a,
                                     const int32_t *const output_bias,
                                     q7_t *out_0)
 {
-#if defined(ARM_MATH_MVEI)
-#define ROW_PER_LOOP (4)
-#define COL_PER_LOOP (8)
-
-    const q7_t *ip_a0_s8 = input_a;
-    q7_t *out_1 = out_0 + output_ch;
-
-    const int32_t *bias = output_bias;
-
-    int32_t row_count = output_ch / ROW_PER_LOOP;
-
-    while (row_count)
-    {
-        const q15_t *ip_b0_s16 = input_b;
-        const q15_t *ip_b1_s16 = input_b + num_col_a;
-
-        const q7_t *ip_a1_s8 = ip_a0_s8 + num_col_a;
-        const q7_t *ip_a2_s8 = ip_a0_s8 + num_col_a * 2;
-        const q7_t *ip_a3_s8 = ip_a0_s8 + num_col_a * 3;
-
-        q31_t ch_0_out_n = bias[0];
-        q31_t ch_1_out_n = bias[1];
-        q31_t ch_2_out_n = bias[2];
-        q31_t ch_3_out_n = bias[3];
-
-        q31_t ch_0_out_n1 = ch_0_out_n;
-        q31_t ch_1_out_n1 = ch_1_out_n;
-        q31_t ch_2_out_n1 = ch_2_out_n;
-        q31_t ch_3_out_n1 = ch_3_out_n;
-        bias += 4;
-
-        int32_t col_count = num_col_a / COL_PER_LOOP;
-
-        while (col_count)
-        {
-            // Load inputs
-            const int16x8_t ip_b0 = vld1q_s16(ip_b0_s16);
-            ip_b0_s16 += COL_PER_LOOP;
-            const int16x8_t ip_b1 = vld1q_s16(ip_b1_s16);
-            ip_b1_s16 += COL_PER_LOOP;
-
-            // Load filters
-            const int16x8_t ip_a0 = vldrbq_s16(ip_a0_s8);
-            ip_a0_s8 += COL_PER_LOOP;
-            const int16x8_t ip_a1 = vldrbq_s16(ip_a1_s8);
-            ip_a1_s8 += COL_PER_LOOP;
-            const int16x8_t ip_a2 = vldrbq_s16(ip_a2_s8);
-            ip_a2_s8 += COL_PER_LOOP;
-            const int16x8_t ip_a3 = vldrbq_s16(ip_a3_s8);
-            ip_a3_s8 += COL_PER_LOOP;
-
-            // MAC
-            ch_0_out_n += vmladavq_s16(ip_b0, ip_a0);
-            ch_1_out_n += vmladavq_s16(ip_b0, ip_a1);
-            ch_2_out_n += vmladavq_s16(ip_b0, ip_a2);
-            ch_3_out_n += vmladavq_s16(ip_b0, ip_a3);
-            ch_0_out_n1 += vmladavq_s16(ip_b1, ip_a0);
-            ch_1_out_n1 += vmladavq_s16(ip_b1, ip_a1);
-            ch_2_out_n1 += vmladavq_s16(ip_b1, ip_a2);
-            ch_3_out_n1 += vmladavq_s16(ip_b1, ip_a3);
-
-            col_count--;
-        }
-
-        /* Handle tail */
-        col_count = (num_col_a & (COL_PER_LOOP - 1)) - 1;
-        while (col_count >= 0)
-        {
-            const int32_t b0 = ip_b0_s16[col_count];
-            const int32_t b1 = ip_b1_s16[col_count];
-
-            ch_0_out_n += b0 * ip_a0_s8[col_count];
-            ch_1_out_n += b0 * ip_a1_s8[col_count];
-            ch_2_out_n += b0 * ip_a2_s8[col_count];
-            ch_3_out_n += b0 * ip_a3_s8[col_count];
-
-            ch_0_out_n1 += b1 * ip_a0_s8[col_count];
-            ch_1_out_n1 += b1 * ip_a1_s8[col_count];
-            ch_2_out_n1 += b1 * ip_a2_s8[col_count];
-            ch_3_out_n1 += b1 * ip_a3_s8[col_count];
-            col_count--;
-        }
-        ip_a0_s8 += (num_col_a & (COL_PER_LOOP - 1));
-
-        int32x4_t out_vec_0;
-        int32x4_t out_vec_1;
-        out_vec_0[0] = ch_0_out_n;
-        out_vec_0[1] = ch_1_out_n;
-        out_vec_0[2] = ch_2_out_n;
-        out_vec_0[3] = ch_3_out_n;
-
-        out_vec_1[0] = ch_0_out_n1;
-        out_vec_1[1] = ch_1_out_n1;
-        out_vec_1[2] = ch_2_out_n1;
-        out_vec_1[3] = ch_3_out_n1;
-
-        int32x4_t mult = vldrwq_s32(out_mult);
-        int32x4_t shift = vldrwq_s32(out_shift);
-        out_mult += ROW_PER_LOOP;
-        out_shift += ROW_PER_LOOP;
-
-        out_vec_0 = arm_requantize_mve_32x4(out_vec_0, mult, shift);
-        out_vec_1 = arm_requantize_mve_32x4(out_vec_1, mult, shift);
-
-        out_vec_0 = vaddq_n_s32(out_vec_0, out_offset);
-        out_vec_0 = vmaxq_s32(out_vec_0, vdupq_n_s32(activation_min));
-        out_vec_0 = vminq_s32(out_vec_0, vdupq_n_s32(activation_max));
-        vstrbq_s32(out_0, out_vec_0);
-        out_0 += ROW_PER_LOOP;
-
-        out_vec_1 = vaddq_n_s32(out_vec_1, out_offset);
-        out_vec_1 = vmaxq_s32(out_vec_1, vdupq_n_s32(activation_min));
-        out_vec_1 = vminq_s32(out_vec_1, vdupq_n_s32(activation_max));
-        vstrbq_s32(out_1, out_vec_1);
-        out_1 += ROW_PER_LOOP;
-        row_count--;
-        ip_a0_s8 += (num_col_a * 3);
-    }
-
-    row_count = output_ch & (ROW_PER_LOOP - 1);
-
-    if (row_count)
-    {
-        ip_a0_s8 = input_a + num_col_a * (output_ch & ~3);
-        const mve_pred16_t p = vctp32q((uint32_t)row_count);
-        int32x4_t out_vec_0 = vdupq_n_s32(0);
-        int32x4_t out_vec_1 = vdupq_n_s32(0);
-        int32x4_t mult_tail;
-        int32x4_t shift_tail;
-
-        for (int i_ch = 0; i_ch < row_count; i_ch++)
-        {
-            int32_t output_0 = bias[i_ch];
-            int32_t output_1 = bias[i_ch];
-            const q15_t *ip_b0_s16 = input_b;
-            const q15_t *ip_b1_s16 = input_b + num_col_a;
-
-            for (int i_idx = 0; i_idx < num_col_a; i_idx++)
-            {
-                output_0 += ip_b0_s16[i_idx] * ip_a0_s8[i_idx];
-                output_1 += ip_b1_s16[i_idx] * ip_a0_s8[i_idx];
-            }
-
-            ip_a0_s8 += num_col_a;
-            out_vec_0[i_ch] = output_0;
-            out_vec_1[i_ch] = output_1;
-            mult_tail[i_ch] = out_mult[i_ch];
-            shift_tail[i_ch] = out_shift[i_ch];
-        }
-        out_vec_0 = arm_requantize_mve_32x4(out_vec_0, mult_tail, shift_tail);
-        out_vec_1 = arm_requantize_mve_32x4(out_vec_1, mult_tail, shift_tail);
-
-        out_vec_0 = vaddq_n_s32(out_vec_0, out_offset);
-        out_vec_0 = vmaxq_s32(out_vec_0, vdupq_n_s32(activation_min));
-        out_vec_0 = vminq_s32(out_vec_0, vdupq_n_s32(activation_max));
-        vstrbq_p_s32(out_0, out_vec_0, p);
-
-        out_vec_1 = vaddq_n_s32(out_vec_1, out_offset);
-        out_vec_1 = vmaxq_s32(out_vec_1, vdupq_n_s32(activation_min));
-        out_vec_1 = vminq_s32(out_vec_1, vdupq_n_s32(activation_max));
-
-        vstrbq_p_s32(out_1, out_vec_1, p);
-        out_1 += row_count;
-    }
-
-    return out_1;
-
-#elif defined(ARM_MATH_DSP)
+#if !defined(ARM_MATH_MVEI)
     /* set up the second output pointers */
     q7_t *out_1 = out_0 + output_ch;
     const int32_t *bias = output_bias;
@@ -235,12 +68,20 @@ q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a,
         /* align the second pointer for A */
         const q7_t *ip_a1 = ip_a0 + num_col_a;
 
+        q31_t ch_0_out_0 = 0;
+        q31_t ch_0_out_1 = 0;
+        q31_t ch_1_out_0 = 0;
+        q31_t ch_1_out_1 = 0;
         /* Init accumulator with bias for channel N and N + 1 */
-        q31_t ch_0_out_0 = *bias;
-        q31_t ch_0_out_1 = *bias++;
-        q31_t ch_1_out_0 = *bias;
-        q31_t ch_1_out_1 = *bias++;
+        if (bias)
+        {
+            ch_0_out_0 = *bias;
+            ch_0_out_1 = *bias++;
+            ch_1_out_0 = *bias;
+            ch_1_out_1 = *bias++;
+        }
 
+#if defined(ARM_MATH_DSP)
         uint16_t col_count = num_col_a / 4;
         /* accumulate over the vector */
         while (col_count)
@@ -268,6 +109,9 @@ q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a,
             col_count--;
         } /* while over col_count */
         col_count = num_col_a & 0x3;
+#else
+        uint16_t col_count = num_col_a;
+#endif
         while (col_count)
         {
             q7_t a0 = *ip_a0++;
@@ -322,10 +166,17 @@ q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a,
         const q15_t *ip_b0 = input_b;
         const q15_t *ip_b1 = ip_b0 + num_col_a;
 
+        q31_t ch_0_out_0 = 0;
+        q31_t ch_0_out_1 = 0;
+
         /* load the bias */
-        q31_t ch_0_out_0 = *bias;
-        q31_t ch_0_out_1 = *bias++;
+        if (bias)
+        {
+            ch_0_out_0 = *bias;
+            ch_0_out_1 = *bias++;
+        }
 
+#if defined(ARM_MATH_DSP)
         uint16_t col_count = num_col_a >> 2;
         while (col_count)
         {
@@ -346,6 +197,9 @@ q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a,
             col_count--;
         }
         col_count = num_col_a & 0x3;
+#else
+        uint16_t col_count = num_col_a;
+#endif
         while (col_count)
         {
             q7_t a0 = *ip_a0++;
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c
index 0f0ddbe..9eed28f 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_nn_mat_mult_s8.c
  * Description:  General Matrix-multiplication function
  *
- * $Date:        09. October 2020
- * $Revision:    V.2.0.5
+ * $Date:        16 August 2022
+ * $Revision:    V.2.0.7
  *
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
@@ -75,27 +75,27 @@ q7_t *arm_nn_mat_mult_s8(const q7_t *input_row,
             for (int i_row_loop = 0; i_row_loop < row_loop_cnt; i_row_loop++)
             {
                 mve_pred16_t p = vctp16q((uint32_t)row_len_tmp);
-                const int16x8_t offset = vdupq_m_n_s16(vuninitializedq_s16(), col_offset, p);
+                const int16x8_t offset = vdupq_x_n_s16(col_offset, p);
                 row_len_tmp -= 8;
 
-                int16x8_t r0 = vldrbq_z_s16(ip_r0, p);
-                ip_r0 += 8;
-
-                int16x8_t c0 = vldrbq_z_s16(ip_c0, p);
+                int16x8_t c0 = vldrbq_s16(ip_c0);
                 ip_c0 += 8;
-                c0 = vaddq_m_s16(vuninitializedq_s16(), c0, offset, p);
+                c0 = vaddq_s16(c0, offset);
 
-                int16x8_t c1 = vldrbq_z_s16(ip_c1, p);
+                int16x8_t c1 = vldrbq_s16(ip_c1);
                 ip_c1 += 8;
-                c1 = vaddq_m_s16(vuninitializedq_s16(), c1, offset, p);
+                c1 = vaddq_s16(c1, offset);
 
-                int16x8_t c2 = vldrbq_z_s16(ip_c2, p);
+                int16x8_t c2 = vldrbq_s16(ip_c2);
                 ip_c2 += 8;
-                c2 = vaddq_m_s16(vuninitializedq_s16(), c2, offset, p);
+                c2 = vaddq_s16(c2, offset);
 
-                int16x8_t c3 = vldrbq_z_s16(ip_c3, p);
+                int16x8_t c3 = vldrbq_s16(ip_c3);
                 ip_c3 += 8;
-                c3 = vaddq_m_s16(vuninitializedq_s16(), c3, offset, p);
+                c3 = vaddq_s16(c3, offset);
+
+                int16x8_t r0 = vldrbq_z_s16(ip_r0, p);
+                ip_r0 += 8;
 
                 acc_0 = vmladavaq_p_s16(acc_0, r0, c0, p);
                 acc_1 = vmladavaq_p_s16(acc_1, r0, c1, p);
@@ -135,15 +135,15 @@ q7_t *arm_nn_mat_mult_s8(const q7_t *input_row,
                 for (int i_row_loop = 0; i_row_loop < row_loop_cnt; i_row_loop++)
                 {
                     const mve_pred16_t p = vctp16q((uint32_t)row_len_tmp);
-                    const int16x8_t offset = vdupq_m_n_s16(vuninitializedq_s16(), col_offset, p);
+                    const int16x8_t offset = vdupq_x_n_s16(col_offset, p);
                     row_len_tmp -= 8;
 
-                    int16x8_t r0 = vldrbq_z_s16(ip_r0, p);
-                    ip_r0 += 8;
-                    int16x8_t c0 = vldrbq_z_s16(ip_c0, p);
+                    int16x8_t c0 = vldrbq_s16(ip_c0);
                     ip_c0 += 8;
+                    c0 = vaddq_s16(c0, offset);
 
-                    c0 = vaddq_m_s16(vuninitializedq_s16(), c0, offset, p);
+                    int16x8_t r0 = vldrbq_z_s16(ip_r0, p);
+                    ip_r0 += 8;
                     acc_0 = vmladavaq_p_s16(acc_0, r0, c0, p);
                 }
 
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c
index 8720a9e..0987a31 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_fully_connected_mat_q7_vec_q15.c
  * Description:  Mixed Q15-Q7 fully-connected layer function
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,43 +42,23 @@
  * @{
  */
 
-/**
+/*
  * @brief Mixed Q15-Q7 fully-connected layer function
- * @param[in]       pV          pointer to input vector
- * @param[in]       pM          pointer to matrix weights
- * @param[in]       dim_vec     length of the vector
- * @param[in]       num_of_rows number of rows in weight matrix
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in]       bias        pointer to bias
- * @param[in,out]   pOut        pointer to output vector
- * @param[in,out]   vec_buffer  pointer to buffer space for input
- * @return     The function returns <code>ARM_MATH_SUCCESS</code>
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * vec_buffer size: 0
- *
- *  Q7_Q15 version of the fully connected layer
- *
- *  Weights are in q7_t and Activations are in q15_t
- *
+ * Refer function header for details
  */
 
-arm_status arm_fully_connected_mat_q7_vec_q15(const q15_t *pV,
-                                              const q7_t *pM,
-                                              const uint16_t dim_vec,
-                                              const uint16_t num_of_rows,
-                                              const uint16_t bias_shift,
-                                              const uint16_t out_shift,
-                                              const q7_t *bias,
-                                              q15_t *pOut,
-                                              q15_t *vec_buffer)
+arm_cmsis_nn_status arm_fully_connected_mat_q7_vec_q15(const q15_t *pV,
+                                                       const q7_t *pM,
+                                                       const uint16_t dim_vec,
+                                                       const uint16_t num_of_rows,
+                                                       const uint16_t bias_shift,
+                                                       const uint16_t out_shift,
+                                                       const q7_t *bias,
+                                                       q15_t *pOut,
+                                                       q15_t *vec_buffer)
 {
     (void)vec_buffer;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     const q7_t *pB = pM;
@@ -190,8 +170,8 @@ arm_status arm_fully_connected_mat_q7_vec_q15(const q15_t *pV,
 
 #endif /* ARM_MATH_DSP */
 
-    /* Return to ARM_MATH_SUCCESS */
-    return (ARM_MATH_SUCCESS);
+    /* Return to ARM_CMSIS_NN_SUCCESS */
+    return (ARM_CMSIS_NN_SUCCESS);
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c
index f59825b..f4872c1 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_fully_connected_mat_q7_vec_q15_opt.c
  * Description:  Mixed Q15-Q7 opt fully-connected layer function
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,92 +42,24 @@
  * @{
  */
 
-/**
- * @brief Mixed Q15-Q7 opt fully-connected layer function
- * @param[in]       pV          pointer to input vector
- * @param[in]       pM          pointer to matrix weights
- * @param[in]       dim_vec     length of the vector
- * @param[in]       num_of_rows number of rows in weight matrix
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in]       bias        pointer to bias
- * @param[in,out]   pOut        pointer to output vector
- * @param[in,out]   vec_buffer  pointer to buffer space for input
- * @return     The function returns <code>ARM_MATH_SUCCESS</code>
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * vec_buffer size: 0
- *
- *  Q7_Q15 version of the fully connected layer
- *
- *  Weights are in q7_t and Activations are in q15_t
- *
- *  Limitation: x4 version requires weight reordering to work
- *
- *  Here we use only one pointer to read 4 rows in the weight
- *  matrix. So if the original q7_t matrix looks like this:
- *
- *  | a11 | a12 | a13 | a14 | a15 | a16 | a17 |
- *
- *  | a21 | a22 | a23 | a24 | a25 | a26 | a27 |
- *
- *  | a31 | a32 | a33 | a34 | a35 | a36 | a37 |
- *
- *  | a41 | a42 | a43 | a44 | a45 | a46 | a47 |
- *
- *  | a51 | a52 | a53 | a54 | a55 | a56 | a57 |
- *
- *  | a61 | a62 | a63 | a64 | a65 | a66 | a67 |
- *
- *  We operates on multiple-of-4 rows, so the first four rows becomes
- *
- *  | a11 | a21 | a12 | a22 | a31 | a41 | a32 | a42 |
- *
- *  | a13 | a23 | a14 | a24 | a33 | a43 | a34 | a44 |
- *
- *  | a15 | a25 | a16 | a26 | a35 | a45 | a36 | a46 |
- *
- *  The column left over will be in-order.
- *  which is:
- *  | a17 | a27 | a37 | a47 |
- *
- *  For the left-over rows, we do 1x1 computation, so the data remains
- *  as its original order.
- *
- *  So the stored weight matrix looks like this:
- *
- *  | a11 | a21 | a12 | a22 | a31 | a41 |
- *
- *  | a32 | a42 | a13 | a23 | a14 | a24 |
- *
- *  | a33 | a43 | a34 | a44 | a15 | a25 |
- *
- *  | a16 | a26 | a35 | a45 | a36 | a46 |
- *
- *  | a17 | a27 | a37 | a47 | a51 | a52 |
- *
- *  | a53 | a54 | a55 | a56 | a57 | a61 |
- *
- *  | a62 | a63 | a64 | a65 | a66 | a67 |
- *
+/*
+ * Mixed Q15-Q7 opt fully-connected layer function
+ * Refer function header for details
  */
 
-arm_status arm_fully_connected_mat_q7_vec_q15_opt(const q15_t *pV,
-                                                  const q7_t *pM,
-                                                  const uint16_t dim_vec,
-                                                  const uint16_t num_of_rows,
-                                                  const uint16_t bias_shift,
-                                                  const uint16_t out_shift,
-                                                  const q7_t *bias,
-                                                  q15_t *pOut,
-                                                  q15_t *vec_buffer)
+arm_cmsis_nn_status arm_fully_connected_mat_q7_vec_q15_opt(const q15_t *pV,
+                                                           const q7_t *pM,
+                                                           const uint16_t dim_vec,
+                                                           const uint16_t num_of_rows,
+                                                           const uint16_t bias_shift,
+                                                           const uint16_t out_shift,
+                                                           const q7_t *bias,
+                                                           q15_t *pOut,
+                                                           q15_t *vec_buffer)
 {
 
     (void)vec_buffer;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     const q7_t *pB = pM;
@@ -206,55 +138,47 @@ arm_status arm_fully_connected_mat_q7_vec_q15_opt(const q15_t *pV,
          */
 
 #ifndef ARM_MATH_BIG_ENDIAN
-        asm volatile("COL_LOOP_%=:\n"
-                     "ldr.w r4, [%[pA]], #4\n"
-                     "ldr.w r1, [%[pB]], #8\n"
-                     "mov.w r0, r1, ror #8\n"
-                     "sxtb16 r0, r0\n"
-                     "sxtb16 r1, r1\n"
-                     "smlad %[sum], r4, r1, %[sum]\n"
-                     "smlad %[sum2], r4, r0, %[sum2]\n"
-                     "ldr.w r3, [%[pB], #-4]\n"
-                     "mov.w r2, r3, ror #8\n"
-                     "sxtb16 r2, r2\n"
-                     "sxtb16 r3, r3\n"
-                     "smlad %[sum3], r4, r3, %[sum3]\n"
-                     "smlad %[sum4], r4, r2, %[sum4]\n"
-                     "subs %[colCnt], #1\n"
-                     "bne COL_LOOP_%=\n"
-                     : [ sum ] "+r"(sum),
-                       [ sum2 ] "+r"(sum2),
-                       [ sum3 ] "+r"(sum3),
-                       [ sum4 ] "+r"(sum4),
-                       [ pB ] "+r"(pB),
-                       [ pA ] "+r"(pA)
-                     : [ colCnt ] "r"(colCnt)
-                     : "r0", "r1", "r2", "r3", "r4");
+        asm volatile(
+            "COL_LOOP_%=:\n"
+            "ldr.w r4, [%[pA]], #4\n"
+            "ldr.w r1, [%[pB]], #8\n"
+            "mov.w r0, r1, ror #8\n"
+            "sxtb16 r0, r0\n"
+            "sxtb16 r1, r1\n"
+            "smlad %[sum], r4, r1, %[sum]\n"
+            "smlad %[sum2], r4, r0, %[sum2]\n"
+            "ldr.w r3, [%[pB], #-4]\n"
+            "mov.w r2, r3, ror #8\n"
+            "sxtb16 r2, r2\n"
+            "sxtb16 r3, r3\n"
+            "smlad %[sum3], r4, r3, %[sum3]\n"
+            "smlad %[sum4], r4, r2, %[sum4]\n"
+            "subs %[colCnt], #1\n"
+            "bne COL_LOOP_%=\n"
+            : [sum] "+r"(sum), [sum2] "+r"(sum2), [sum3] "+r"(sum3), [sum4] "+r"(sum4), [pB] "+r"(pB), [pA] "+r"(pA)
+            : [colCnt] "r"(colCnt)
+            : "r0", "r1", "r2", "r3", "r4");
 #else
-        asm volatile("COL_LOOP_%=:\n"
-                     "ldr.w r4, [%[pA]], #4\n"
-                     "ldr.w r1, [%[pB]], #8\n"
-                     "mov.w r0, r1, ror #8\n"
-                     "sxtb16 r0, r0\n"
-                     "sxtb16 r1, r1\n"
-                     "smlad %[sum], r4, r0, %[sum]\n"
-                     "smlad %[sum2], r4, r1, %[sum2]\n"
-                     "ldr.w r3, [%[pB], #-4]\n"
-                     "mov.w r2, r3, ror #8\n"
-                     "sxtb16 r2, r2\n"
-                     "sxtb16 r3, r3\n"
-                     "smlad %[sum3], r4, r2, %[sum3]\n"
-                     "smlad %[sum4], r4, r3, %[sum4]\n"
-                     "subs %[colCnt], #1\n"
-                     "bne COL_LOOP_%=\n"
-                     : [ sum ] "+r"(sum),
-                       [ sum2 ] "+r"(sum2),
-                       [ sum3 ] "+r"(sum3),
-                       [ sum4 ] "+r"(sum4),
-                       [ pB ] "+r"(pB),
-                       [ pA ] "+r"(pA)
-                     : [ colCnt ] "r"(colCnt)
-                     : "r0", "r1", "r2", "r3", "r4");
+        asm volatile(
+            "COL_LOOP_%=:\n"
+            "ldr.w r4, [%[pA]], #4\n"
+            "ldr.w r1, [%[pB]], #8\n"
+            "mov.w r0, r1, ror #8\n"
+            "sxtb16 r0, r0\n"
+            "sxtb16 r1, r1\n"
+            "smlad %[sum], r4, r0, %[sum]\n"
+            "smlad %[sum2], r4, r1, %[sum2]\n"
+            "ldr.w r3, [%[pB], #-4]\n"
+            "mov.w r2, r3, ror #8\n"
+            "sxtb16 r2, r2\n"
+            "sxtb16 r3, r3\n"
+            "smlad %[sum3], r4, r2, %[sum3]\n"
+            "smlad %[sum4], r4, r3, %[sum4]\n"
+            "subs %[colCnt], #1\n"
+            "bne COL_LOOP_%=\n"
+            : [sum] "+r"(sum), [sum2] "+r"(sum2), [sum3] "+r"(sum3), [sum4] "+r"(sum4), [pB] "+r"(pB), [pA] "+r"(pA)
+            : [colCnt] "r"(colCnt)
+            : "r0", "r1", "r2", "r3", "r4");
 #endif /* ARM_MATH_BIG_ENDIAN */
 
 #endif /* USE_INTRINSIC */
@@ -410,8 +334,8 @@ arm_status arm_fully_connected_mat_q7_vec_q15_opt(const q15_t *pV,
 
 #endif /* ARM_MATH_DSP */
 
-    /* Return to ARM_MATH_SUCCESS */
-    return (ARM_MATH_SUCCESS);
+    /* Return to ARM_CMSIS_NN_SUCCESS */
+    return (ARM_CMSIS_NN_SUCCESS);
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c
index a41299d..6ea0b27 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_fully_connected_q15.c
  * Description:  Q15 basic fully-connected layer function
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,40 +42,23 @@
  * @{
  */
 
-/**
- * @brief Q15 opt fully-connected layer function
- * @param[in]       pV          pointer to input vector
- * @param[in]       pM          pointer to matrix weights
- * @param[in]       dim_vec     length of the vector
- * @param[in]       num_of_rows number of rows in weight matrix
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in]       bias        pointer to bias
- * @param[in,out]   pOut        pointer to output vector
- * @param[in,out]   vec_buffer  pointer to buffer space for input
- * @return     The function returns <code>ARM_MATH_SUCCESS</code>
- *
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * vec_buffer size: 0
- *
+/*
+ * Q15 opt fully-connected layer function
+ * Refer function header for details
  */
 
-arm_status arm_fully_connected_q15(const q15_t *pV,
-                                   const q15_t *pM,
-                                   const uint16_t dim_vec,
-                                   const uint16_t num_of_rows,
-                                   const uint16_t bias_shift,
-                                   const uint16_t out_shift,
-                                   const q15_t *bias,
-                                   q15_t *pOut,
-                                   q15_t *vec_buffer)
+arm_cmsis_nn_status arm_fully_connected_q15(const q15_t *pV,
+                                            const q15_t *pM,
+                                            const uint16_t dim_vec,
+                                            const uint16_t num_of_rows,
+                                            const uint16_t bias_shift,
+                                            const uint16_t out_shift,
+                                            const q15_t *bias,
+                                            q15_t *pOut,
+                                            q15_t *vec_buffer)
 {
     (void)vec_buffer;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     const q15_t *pB = pM;
@@ -189,7 +172,7 @@ arm_status arm_fully_connected_q15(const q15_t *pV,
 #endif /* ARM_MATH_DSP */
 
     /* Return to application */
-    return (ARM_MATH_SUCCESS);
+    return (ARM_CMSIS_NN_SUCCESS);
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c
index 76738f3..82887fa 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_fully_connected_q15_opt.c
  * Description:  Q15 opt fully-connected layer function
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,71 +42,23 @@
  * @{
  */
 
-/**
+/*
  * @brief Q15 opt fully-connected layer function
- * @param[in]       pV          pointer to input vector
- * @param[in]       pM          pointer to matrix weights
- * @param[in]       dim_vec     length of the vector
- * @param[in]       num_of_rows number of rows in weight matrix
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in]       bias        pointer to bias
- * @param[in,out]   pOut        pointer to output vector
- * @param[in,out]   vec_buffer  pointer to buffer space for input
- * @return     The function returns <code>ARM_MATH_SUCCESS</code>
- *
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * vec_buffer size: 0
- *
- *  Here we use only one pointer to read 4 rows in the weight
- *  matrix. So if the original matrix looks like this:
- *
- *  | a11 | a12 | a13 |
- *
- *  | a21 | a22 | a23 |
- *
- *  | a31 | a32 | a33 |
- *
- *  | a41 | a42 | a43 |
- *
- *  | a51 | a52 | a53 |
- *
- *  | a61 | a62 | a63 |
- *
- *  We operates on multiple-of-4 rows, so the first four rows becomes
- *
- *  | a11 | a12 | a21 | a22 | a31 | a32 | a41 | a42 |
- *
- *  | a13 | a23 | a33 | a43 |
- *
- *  Remaining rows are kept the same original order.
- *
- *  So the stored weight matrix looks like this:
- *
- *
- *  | a11 | a12 | a21 | a22 | a31 | a32 | a41 | a42 |
- *
- *  | a13 | a23 | a33 | a43 | a51 | a52 | a53 | a61 |
- *
- *  | a62 | a63 |
+ * Refer function header for details
  */
 
-arm_status arm_fully_connected_q15_opt(const q15_t *pV,
-                                       const q15_t *pM,
-                                       const uint16_t dim_vec,
-                                       const uint16_t num_of_rows,
-                                       const uint16_t bias_shift,
-                                       const uint16_t out_shift,
-                                       const q15_t *bias,
-                                       q15_t *pOut,
-                                       q15_t *vec_buffer)
+arm_cmsis_nn_status arm_fully_connected_q15_opt(const q15_t *pV,
+                                                const q15_t *pM,
+                                                const uint16_t dim_vec,
+                                                const uint16_t num_of_rows,
+                                                const uint16_t bias_shift,
+                                                const uint16_t out_shift,
+                                                const q15_t *bias,
+                                                q15_t *pOut,
+                                                q15_t *vec_buffer)
 {
     (void)vec_buffer;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     const q15_t *pB = pM;
@@ -157,26 +109,22 @@ arm_status arm_fully_connected_q15_opt(const q15_t *pV,
          * activation data: inV
          */
 
-        asm volatile("COL_LOOP_%=:\n"
-                     "ldr.w r4, [%[pA]], #4\n"
-                     "ldr.w r0, [%[pB]], #16\n"
-                     "smlad %[sum], r4, r0, %[sum]\n"
-                     "ldr.w r1, [%[pB] , #-12]\n"
-                     "smlad %[sum2], r4, r1, %[sum2]\n"
-                     "ldr.w r2, [%[pB] , #-8]\n"
-                     "smlad %[sum3], r4, r2, %[sum3]\n"
-                     "ldr.w r3, [%[pB] , #-4]\n"
-                     "smlad %[sum4], r4, r3, %[sum4]\n"
-                     "subs %[colCnt], #1\n"
-                     "bne COL_LOOP_%=\n"
-                     : [ sum ] "+r"(sum),
-                       [ sum2 ] "+r"(sum2),
-                       [ sum3 ] "+r"(sum3),
-                       [ sum4 ] "+r"(sum4),
-                       [ pB ] "+r"(pB),
-                       [ pA ] "+r"(pA)
-                     : [ colCnt ] "r"(colCnt)
-                     : "r0", "r1", "r2", "r3", "r4");
+        asm volatile(
+            "COL_LOOP_%=:\n"
+            "ldr.w r4, [%[pA]], #4\n"
+            "ldr.w r0, [%[pB]], #16\n"
+            "smlad %[sum], r4, r0, %[sum]\n"
+            "ldr.w r1, [%[pB] , #-12]\n"
+            "smlad %[sum2], r4, r1, %[sum2]\n"
+            "ldr.w r2, [%[pB] , #-8]\n"
+            "smlad %[sum3], r4, r2, %[sum3]\n"
+            "ldr.w r3, [%[pB] , #-4]\n"
+            "smlad %[sum4], r4, r3, %[sum4]\n"
+            "subs %[colCnt], #1\n"
+            "bne COL_LOOP_%=\n"
+            : [sum] "+r"(sum), [sum2] "+r"(sum2), [sum3] "+r"(sum3), [sum4] "+r"(sum4), [pB] "+r"(pB), [pA] "+r"(pA)
+            : [colCnt] "r"(colCnt)
+            : "r0", "r1", "r2", "r3", "r4");
 
 #endif /* USE_INTRINSIC */
 
@@ -329,8 +277,8 @@ arm_status arm_fully_connected_q15_opt(const q15_t *pV,
 
 #endif /* ARM_MATH_DSP */
 
-    /* Return to ARM_MATH_SUCCESS */
-    return (ARM_MATH_SUCCESS);
+    /* Return to ARM_CMSIS_NN_SUCCESS */
+    return (ARM_CMSIS_NN_SUCCESS);
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c
index 7de8246..de67bb2 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_fully_connected_q7.c
  * Description:  Q7 basic fully-connected layer function
  *
- * $Date:        January 26, 2021
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,42 +42,23 @@
  * @{
  */
 
-/**
- * @brief Q7 basic fully-connected layer function
- * @param[in]       pV          pointer to input vector
- * @param[in]       pM          pointer to matrix weights
- * @param[in]       dim_vec     length of the vector
- * @param[in]       num_of_rows number of rows in weight matrix
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in]       bias        pointer to bias
- * @param[in,out]   pOut        pointer to output vector
- * @param[in,out]   vec_buffer  pointer to buffer space for input
- * @return     The function returns <code>ARM_MATH_SUCCESS</code>
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * vec_buffer size: dim_vec
- *
- * This basic function is designed to work with regular weight
- * matrix without interleaving.
- *
+/*
+ * Q7 basic fully-connected layer function
+ * Refer function header for details
  */
 
-arm_status arm_fully_connected_q7(const q7_t *pV,
-                                  const q7_t *pM,
-                                  const uint16_t dim_vec,
-                                  const uint16_t num_of_rows,
-                                  const uint16_t bias_shift,
-                                  const uint16_t out_shift,
-                                  const q7_t *bias,
-                                  q7_t *pOut,
-                                  q15_t *vec_buffer)
+arm_cmsis_nn_status arm_fully_connected_q7(const q7_t *pV,
+                                           const q7_t *pM,
+                                           const uint16_t dim_vec,
+                                           const uint16_t num_of_rows,
+                                           const uint16_t bias_shift,
+                                           const uint16_t out_shift,
+                                           const q7_t *bias,
+                                           q7_t *pOut,
+                                           q15_t *vec_buffer)
 {
 
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     const q7_t *pB = pM;
@@ -193,8 +174,8 @@ arm_status arm_fully_connected_q7(const q7_t *pV,
 
 #endif /* ARM_MATH_DSP */
 
-    /* Return to ARM_MATH_SUCCESS */
-    return (ARM_MATH_SUCCESS);
+    /* Return to ARM_CMSIS_NN_SUCCESS */
+    return (ARM_CMSIS_NN_SUCCESS);
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c
index f5fb5e0..0c8eae6 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_fully_connected_q7_opt.c
  * Description:  Q7 basic fully-connected layer function
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,103 +42,23 @@
  * @{
  */
 
-/**
- * @brief Q7 opt fully-connected layer function
- * @param[in]       pV          pointer to input vector
- * @param[in]       pM          pointer to matrix weights
- * @param[in]       dim_vec     length of the vector
- * @param[in]       num_of_rows number of rows in weight matrix
- * @param[in]       bias_shift  amount of left-shift for bias
- * @param[in]       out_shift   amount of right-shift for output
- * @param[in]       bias        pointer to bias
- * @param[in,out]   pOut        pointer to output vector
- * @param[in,out]   vec_buffer  pointer to buffer space for input
- * @return     The function returns <code>ARM_MATH_SUCCESS</code>
- *
- * @details
- *
- * <b>Buffer size:</b>
- *
- * vec_buffer size: dim_vec
- *
- * This opt function is designed to work with interleaved weight
- * matrix. The vector input is assumed in q7_t format, we call
- *  arm_q7_to_q15_no_shift_shuffle function to expand into
- *  q15_t format with certain weight re-ordering, refer to the function
- *  comments for more details.
- *  Here we use only one pointer to read 4 rows in the weight
- *  matrix. So if the original q7_t matrix looks like this:
- *
- *  | a11 | a12 | a13 | a14 | a15 | a16 | a17 |
- *
- *  | a21 | a22 | a23 | a24 | a25 | a26 | a27 |
- *
- *  | a31 | a32 | a33 | a34 | a35 | a36 | a37 |
- *
- *  | a41 | a42 | a43 | a44 | a45 | a46 | a47 |
- *
- *  | a51 | a52 | a53 | a54 | a55 | a56 | a57 |
- *
- *  | a61 | a62 | a63 | a64 | a65 | a66 | a67 |
- *
- *
- *  We operates on multiple-of-4 rows, so the first four rows becomes
- *
- *  | a11 | a21 | a13 | a23 | a31 | a41 | a33 | a43 |
- *
- *  | a12 | a22 | a14 | a24 | a32 | a42 | a34 | a44 |
- *
- *  | a15 | a25 | a35 | a45 | a16 | a26 | a36 | a46 |
- *
- *  So within the kernel, we first read the re-ordered vector in as:
- *
- *  | b1  | b3  | and | b2  | b4  |
- *
- *  the four q31_t weights will look like
- *
- *  | a11 | a13 |, | a21 | a23 |, | a31 | a33 |, | a41 | a43 |
- *
- *  | a12 | a14 |, | a22 | a24 |, | a32 | a34 |, | a42 | a44 |
- *
- *  The column left over will be in-order.
- *  which is:
- *
- *  | a17 | a27 | a37 | a47 |
- *
- *  For the left-over rows, we do 1x1 computation, so the data remains
- *  as its original order.
- *
- *  So the stored weight matrix looks like this:
- *
- *  | a11 | a21 | a13 | a23 | a31 | a41 |
- *
- *  | a33 | a43 | a12 | a22 | a14 | a24 |
- *
- *  | a32 | a42 | a34 | a44 | a15 | a25 |
- *
- *  | a35 | a45 | a16 | a26 | a36 | a46 |
- *
- *  | a17 | a27 | a37 | a47 | a51 | a52 |
- *
- *  | a53 | a54 | a55 | a56 | a57 | a61 |
- *
- *  | a62 | a63 | a64 | a65 | a66 | a67 |
- *
- *
+/*
+ * Q7 opt fully-connected layer function
+ * Refer function header for details
  */
 
-arm_status arm_fully_connected_q7_opt(const q7_t *pV,
-                                      const q7_t *pM,
-                                      const uint16_t dim_vec,
-                                      const uint16_t num_of_rows,
-                                      const uint16_t bias_shift,
-                                      const uint16_t out_shift,
-                                      const q7_t *bias,
-                                      q7_t *pOut,
-                                      q15_t *vec_buffer)
+arm_cmsis_nn_status arm_fully_connected_q7_opt(const q7_t *pV,
+                                               const q7_t *pM,
+                                               const uint16_t dim_vec,
+                                               const uint16_t num_of_rows,
+                                               const uint16_t bias_shift,
+                                               const uint16_t out_shift,
+                                               const q7_t *bias,
+                                               q7_t *pOut,
+                                               q15_t *vec_buffer)
 {
 
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     const q7_t *pB = pM;
@@ -239,81 +159,73 @@ arm_status arm_fully_connected_q7_opt(const q7_t *pV,
          */
 
 #ifndef ARM_MATH_BIG_ENDIAN
-        asm volatile("COL_LOOP_%=:\n"
-                     "ldr.w r4, [%[pA]], #8\n"
-                     "ldr.w r1, [%[pB]], #16\n"
-                     "mov.w r0, r1, ror #8\n"
-                     "sxtb16 r0, r0\n"
-                     "sxtb16 r1, r1\n"
-                     "smlad %[sum], r4, r1, %[sum]\n"
-                     "smlad %[sum2], r4, r0, %[sum2]\n"
-                     "ldr.w r3, [%[pB], #-12]\n"
-                     "mov.w r2, r3, ror #8\n"
-                     "sxtb16 r2, r2\n"
-                     "sxtb16 r3, r3\n"
-                     "smlad %[sum3], r4, r3, %[sum3]\n"
-                     "smlad %[sum4], r4, r2, %[sum4]\n"
-                     "ldr.w r4, [%[pA], #-4]\n"
-                     "ldr.w r1, [%[pB], #-8]\n"
-                     "mov.w r0, r1, ror #8\n"
-                     "sxtb16 r0, r0\n"
-                     "sxtb16 r1, r1\n"
-                     "smlad %[sum], r4, r1, %[sum]\n"
-                     "smlad %[sum2], r4, r0, %[sum2]\n"
-                     "ldr.w r3, [%[pB], #-4]\n"
-                     "mov.w r2, r3, ror #8\n"
-                     "sxtb16 r2, r2\n"
-                     "sxtb16 r3, r3\n"
-                     "smlad %[sum3], r4, r3, %[sum3]\n"
-                     "smlad %[sum4], r4, r2, %[sum4]\n"
-                     "subs %[colCnt], #1\n"
-                     "bne COL_LOOP_%=\n"
-                     : [ sum ] "+r"(sum),
-                       [ sum2 ] "+r"(sum2),
-                       [ sum3 ] "+r"(sum3),
-                       [ sum4 ] "+r"(sum4),
-                       [ pB ] "+r"(pB),
-                       [ pA ] "+r"(pA)
-                     : [ colCnt ] "r"(colCnt)
-                     : "r0", "r1", "r2", "r3", "r4");
+        asm volatile(
+            "COL_LOOP_%=:\n"
+            "ldr.w r4, [%[pA]], #8\n"
+            "ldr.w r1, [%[pB]], #16\n"
+            "mov.w r0, r1, ror #8\n"
+            "sxtb16 r0, r0\n"
+            "sxtb16 r1, r1\n"
+            "smlad %[sum], r4, r1, %[sum]\n"
+            "smlad %[sum2], r4, r0, %[sum2]\n"
+            "ldr.w r3, [%[pB], #-12]\n"
+            "mov.w r2, r3, ror #8\n"
+            "sxtb16 r2, r2\n"
+            "sxtb16 r3, r3\n"
+            "smlad %[sum3], r4, r3, %[sum3]\n"
+            "smlad %[sum4], r4, r2, %[sum4]\n"
+            "ldr.w r4, [%[pA], #-4]\n"
+            "ldr.w r1, [%[pB], #-8]\n"
+            "mov.w r0, r1, ror #8\n"
+            "sxtb16 r0, r0\n"
+            "sxtb16 r1, r1\n"
+            "smlad %[sum], r4, r1, %[sum]\n"
+            "smlad %[sum2], r4, r0, %[sum2]\n"
+            "ldr.w r3, [%[pB], #-4]\n"
+            "mov.w r2, r3, ror #8\n"
+            "sxtb16 r2, r2\n"
+            "sxtb16 r3, r3\n"
+            "smlad %[sum3], r4, r3, %[sum3]\n"
+            "smlad %[sum4], r4, r2, %[sum4]\n"
+            "subs %[colCnt], #1\n"
+            "bne COL_LOOP_%=\n"
+            : [sum] "+r"(sum), [sum2] "+r"(sum2), [sum3] "+r"(sum3), [sum4] "+r"(sum4), [pB] "+r"(pB), [pA] "+r"(pA)
+            : [colCnt] "r"(colCnt)
+            : "r0", "r1", "r2", "r3", "r4");
 #else
-        asm volatile("COL_LOOP_%=:\n"
-                     "ldr.w r4, [%[pA]], #8\n"
-                     "ldr.w r1, [%[pB]], #16\n"
-                     "mov.w r0, r1, ror #8\n"
-                     "sxtb16 r0, r0\n"
-                     "sxtb16 r1, r1\n"
-                     "smlad %[sum], r4, r0, %[sum]\n"
-                     "smlad %[sum2], r4, r1, %[sum2]\n"
-                     "ldr.w r3, [%[pB], #-12]\n"
-                     "mov.w r2, r3, ror #8\n"
-                     "sxtb16 r2, r2\n"
-                     "sxtb16 r3, r3\n"
-                     "smlad %[sum3], r4, r2, %[sum3]\n"
-                     "smlad %[sum4], r4, r3, %[sum4]\n"
-                     "ldr.w r4, [%[pA], #-4]\n"
-                     "ldr.w r1, [%[pB], #-8]\n"
-                     "mov.w r0, r1, ror #8\n"
-                     "sxtb16 r0, r0\n"
-                     "sxtb16 r1, r1\n"
-                     "smlad %[sum], r4, r0, %[sum]\n"
-                     "smlad %[sum2], r4, r1, %[sum2]\n"
-                     "ldr.w r3, [%[pB], #-4]\n"
-                     "mov.w r2, r3, ror #8\n"
-                     "sxtb16 r2, r2\n"
-                     "sxtb16 r3, r3\n"
-                     "smlad %[sum3], r4, r2, %[sum3]\n"
-                     "smlad %[sum4], r4, r3, %[sum4]\n"
-                     "subs %[colCnt], #1\n"
-                     "bne COL_LOOP_%=\n"
-                     : [ sum ] "+r"(sum),
-                       [ sum2 ] "+r"(sum2),
-                       [ sum3 ] "+r"(sum3),
-                       [ sum4 ] "+r"(sum4),
-                       [ pB ] "+r"(pB),
-                       [ pA ] "+r"(pA)
-                     : [ colCnt ] "r"(colCnt)
-                     : "r0", "r1", "r2", "r3", "r4");
+        asm volatile(
+            "COL_LOOP_%=:\n"
+            "ldr.w r4, [%[pA]], #8\n"
+            "ldr.w r1, [%[pB]], #16\n"
+            "mov.w r0, r1, ror #8\n"
+            "sxtb16 r0, r0\n"
+            "sxtb16 r1, r1\n"
+            "smlad %[sum], r4, r0, %[sum]\n"
+            "smlad %[sum2], r4, r1, %[sum2]\n"
+            "ldr.w r3, [%[pB], #-12]\n"
+            "mov.w r2, r3, ror #8\n"
+            "sxtb16 r2, r2\n"
+            "sxtb16 r3, r3\n"
+            "smlad %[sum3], r4, r2, %[sum3]\n"
+            "smlad %[sum4], r4, r3, %[sum4]\n"
+            "ldr.w r4, [%[pA], #-4]\n"
+            "ldr.w r1, [%[pB], #-8]\n"
+            "mov.w r0, r1, ror #8\n"
+            "sxtb16 r0, r0\n"
+            "sxtb16 r1, r1\n"
+            "smlad %[sum], r4, r0, %[sum]\n"
+            "smlad %[sum2], r4, r1, %[sum2]\n"
+            "ldr.w r3, [%[pB], #-4]\n"
+            "mov.w r2, r3, ror #8\n"
+            "sxtb16 r2, r2\n"
+            "sxtb16 r3, r3\n"
+            "smlad %[sum3], r4, r2, %[sum3]\n"
+            "smlad %[sum4], r4, r3, %[sum4]\n"
+            "subs %[colCnt], #1\n"
+            "bne COL_LOOP_%=\n"
+            : [sum] "+r"(sum), [sum2] "+r"(sum2), [sum3] "+r"(sum3), [sum4] "+r"(sum4), [pB] "+r"(pB), [pA] "+r"(pA)
+            : [colCnt] "r"(colCnt)
+            : "r0", "r1", "r2", "r3", "r4");
 #endif /* ARM_MATH_BIG_ENDIAN */
 
 #endif /* USE_INTRINSIC */
@@ -384,6 +296,7 @@ arm_status arm_fully_connected_q7_opt(const q7_t *pV,
 
 #else
     /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
+    (void)vec_buffer;
     uint16_t rowCnt = num_of_rows >> 2;
     const q7_t *pB = pM;
     const q7_t *pA;
@@ -487,8 +400,8 @@ arm_status arm_fully_connected_q7_opt(const q7_t *pV,
 
 #endif /* ARM_MATH_DSP */
 
-    /* Return to ARM_MATH_SUCCESS */
-    return (ARM_MATH_SUCCESS);
+    /* Return to ARM_CMSIS_NN_SUCCESS */
+    return (ARM_CMSIS_NN_SUCCESS);
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s16.c
new file mode 100644
index 0000000..8e43b71
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s16.c
@@ -0,0 +1,101 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_fully_connected_s16
+ * Description:  Fully connected function compatible with TF Lite.
+ *
+ * $Date:        19 April 2022
+ * $Revision:    V.2.0.0
+ *
+ * Target Processor:  Cortex-M and Cortex-A cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup FC
+ * @{
+ */
+
+/*
+ * S16 basic fully-connected and matrix multiplication layer function for TensorFlow Lite
+ *
+ * Refer header file for details.
+ *
+ */
+arm_cmsis_nn_status arm_fully_connected_s16(const cmsis_nn_context *ctx,
+                                            const cmsis_nn_fc_params *fc_params,
+                                            const cmsis_nn_per_tensor_quant_params *quant_params,
+                                            const cmsis_nn_dims *input_dims,
+                                            const q15_t *input,
+                                            const cmsis_nn_dims *filter_dims,
+                                            const q7_t *kernel,
+                                            const cmsis_nn_dims *bias_dims,
+                                            const int64_t *bias,
+                                            const cmsis_nn_dims *output_dims,
+                                            q15_t *output)
+{
+    (void)bias_dims;
+    (void)ctx;
+    (void)fc_params->filter_offset;
+
+    int32_t batch_cnt = input_dims->n;
+
+    const q31_t reduced_multiplier = REDUCE_MULTIPLIER(quant_params->multiplier);
+
+    while (batch_cnt)
+    {
+        arm_nn_vec_mat_mult_t_s16(input,
+                                  kernel,
+                                  bias,
+                                  output,
+                                  reduced_multiplier,
+                                  quant_params->shift,
+                                  filter_dims->n, /* col_dim or accum_depth */
+                                  output_dims->c, /* row_dim or output_depth */
+                                  fc_params->activation.min,
+                                  fc_params->activation.max);
+        input += filter_dims->n;
+        output += output_dims->c;
+        batch_cnt--;
+    }
+
+    return (ARM_CMSIS_NN_SUCCESS);
+}
+
+int32_t arm_fully_connected_s16_get_buffer_size(const cmsis_nn_dims *filter_dims)
+{
+    (void)filter_dims;
+    return 0;
+}
+
+/**
+ * @} end of FC group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c
index e91039b..08f100a 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_fully_connected_s8
  * Description:  Fully connected function compatible with TF Lite.
  *
- * $Date:        09. October 2020
- * $Revision:    V.2.0.1
+ * $Date:        19 April 2022
+ * $Revision:    V.4.0.0
  *
  * Target Processor:  Cortex-M and Cortex-A cores
  *
@@ -49,20 +49,22 @@
  *
  */
 
-arm_status arm_fully_connected_s8(const cmsis_nn_context *ctx,
-                                  const cmsis_nn_fc_params *fc_params,
-                                  const cmsis_nn_per_tensor_quant_params *quant_params,
-                                  const cmsis_nn_dims *input_dims,
-                                  const q7_t *input,
-                                  const cmsis_nn_dims *filter_dims,
-                                  const q7_t *kernel,
-                                  const cmsis_nn_dims *bias_dims,
-                                  const int32_t *bias,
-                                  const cmsis_nn_dims *output_dims,
-                                  q7_t *output)
+arm_cmsis_nn_status arm_fully_connected_s8(const cmsis_nn_context *ctx,
+                                           const cmsis_nn_fc_params *fc_params,
+                                           const cmsis_nn_per_tensor_quant_params *quant_params,
+                                           const cmsis_nn_dims *input_dims,
+                                           const q7_t *input,
+                                           const cmsis_nn_dims *filter_dims,
+                                           const q7_t *kernel,
+                                           const cmsis_nn_dims *bias_dims,
+                                           const int32_t *bias,
+                                           const cmsis_nn_dims *output_dims,
+                                           q7_t *output)
 {
     (void)bias_dims;
     (void)ctx;
+    (void)fc_params->filter_offset;
+
     int32_t batch_cnt = input_dims->n;
 
     while (batch_cnt)
@@ -72,19 +74,20 @@ arm_status arm_fully_connected_s8(const cmsis_nn_context *ctx,
                                  bias,
                                  output,
                                  fc_params->input_offset,
-                                 fc_params->filter_offset,
+                                 0,
                                  fc_params->output_offset,
                                  quant_params->multiplier,
                                  quant_params->shift,
                                  filter_dims->n, /* col_dim or accum_depth */
                                  output_dims->c, /* row_dim or output_depth */
                                  fc_params->activation.min,
-                                 fc_params->activation.max);
+                                 fc_params->activation.max,
+                                 1L);
         input += filter_dims->n;
         output += output_dims->c;
         batch_cnt--;
     }
-    return (ARM_MATH_SUCCESS);
+    return (ARM_CMSIS_NN_SUCCESS);
 }
 
 int32_t arm_fully_connected_s8_get_buffer_size(const cmsis_nn_dims *filter_dims)
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c
index e2bba44..7875682 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_nn_accumulate_q7_to_q15.c
  * Description:  Accumulate q7 vector into q15 one.
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.2
+ * $Date:        20 July 2021
+ * $Revision:    V.1.1.2
  *
  * pSrc Processor:  Cortex-M CPUs
  *
@@ -46,11 +46,13 @@ void arm_nn_accumulate_q7_to_q15(q15_t *pDst, const q7_t *pSrc, uint32_t length)
 {
     q15_t *pCnt = pDst;
     const q7_t *pV = pSrc;
+    int32_t count = length;
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     q31_t v1, v2, vo1, vo2;
-    int32_t cnt = length >> 2;
+    count = length >> 2;
     q31_t in;
 
-    while (cnt > 0l)
+    while (count > 0l)
     {
         q31_t value = arm_nn_read_q7x4_ia(&pV);
         v1 = __SXTB16(__ROR((uint32_t)value, 8));
@@ -69,13 +71,14 @@ void arm_nn_accumulate_q7_to_q15(q15_t *pDst, const q7_t *pSrc, uint32_t length)
         in = arm_nn_read_q15x2(pCnt);
         arm_nn_write_q15x2_ia(&pCnt, __QADD16(vo2, in));
 
-        cnt--;
+        count--;
     }
-    cnt = length & 0x3;
-    while (cnt > 0l)
+    count = length & 0x3;
+#endif
+    while (count > 0l)
     {
         *pCnt++ += *pV++;
-        cnt--;
+        count--;
     }
 }
 
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c
index 9df8a83..7ff743d 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c
@@ -23,8 +23,8 @@
  * Title:        arm_nn_add_q7.c
  * Description:  Non saturating addition of elements of a q7 vector.
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        20. July 2021
+ * $Revision:    V.1.1.1
  *
  * Target Processor:  Cortex-M cores
  *
@@ -46,7 +46,7 @@ void arm_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size)
 {
     uint32_t block_count;
     q31_t result = 0;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Loop unrolling: Compute 4 outputs at a time */
     block_count = block_size >> 2U;
 
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c
index f5725d6..7d12144 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2020, 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_nn_depthwise_conv_nt_t_padded_s8.c
  * Description:  Depthwise convolution with padded matrices.
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.2
+ * $Date:        27. July 2022
+ * $Revision:    V.2.0.0
  *
  * Target Processor:  Cortex-M processors with MVE extension
  * -------------------------------------------------------------------- */
@@ -48,38 +48,43 @@
  *
  */
 
-q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs,
-                                           const q7_t *rhs,
-                                           const int32_t input_offset,
-                                           const uint16_t num_ch,
-                                           const int32_t *out_shift,
-                                           const int32_t *out_mult,
-                                           const int32_t out_offset,
-                                           const int32_t activation_min,
-                                           const int32_t activation_max,
-                                           const uint16_t row_x_col,
-                                           const int32_t *const output_bias,
-                                           q7_t *out)
+arm_cmsis_nn_status arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs,
+                                                         const q7_t *rhs,
+                                                         const int32_t input_offset,
+                                                         const int32_t active_ch,
+                                                         const int32_t total_ch,
+                                                         const int32_t *out_shift,
+                                                         const int32_t *out_mult,
+                                                         const int32_t out_offset,
+                                                         const int32_t activation_min,
+                                                         const int32_t activation_max,
+                                                         const uint16_t row_x_col,
+                                                         const int32_t *const output_bias,
+                                                         q7_t *out)
 {
 #if defined(ARM_MATH_MVEI)
-    int32_t loop_count = (num_ch + 3) / 4;
+    int32_t loop_count = (active_ch + 3) / 4;
     const int32_t *bias = output_bias;
-    uint32_t num_ch_to_process = num_ch;
+    uint32_t num_ch_to_process = active_ch;
 
     for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count;
          num_ch_to_process -= 4, out += 4, offset += 4, i_loop_cnt++)
     {
-        int32x4_t out_0 = vldrwq_s32(bias);
+        int32x4_t out_0 = vdupq_n_s32(0);
+        if (bias)
+        {
+            out_0 = vldrwq_s32(bias);
+            bias += 4;
+        }
         int32x4_t out_1 = out_0;
         int32x4_t out_2 = out_0;
         int32x4_t out_3 = out_0;
-        bias += 4;
 
         const int8_t *rhs_0 = rhs + offset;
         const int8_t *lhs_0 = lhs + offset;
-        const int8_t *lhs_1 = lhs + row_x_col * num_ch + offset;
-        const int8_t *lhs_2 = lhs + (row_x_col * num_ch * 2) + offset;
-        const int8_t *lhs_3 = lhs + (row_x_col * num_ch * 3) + offset;
+        const int8_t *lhs_1 = lhs + row_x_col * CH_IN_BLOCK_MVE + offset;
+        const int8_t *lhs_2 = lhs + (row_x_col * CH_IN_BLOCK_MVE * 2) + offset;
+        const int8_t *lhs_3 = lhs + (row_x_col * CH_IN_BLOCK_MVE * 3) + offset;
 
         for (int i_row_x_col = 0; i_row_x_col < row_x_col; i_row_x_col++)
         {
@@ -102,12 +107,12 @@ q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs,
 
             out_3 += vmulq_s32(ip_3, ker_0);
 
-            lhs_0 += num_ch;
-            lhs_1 += num_ch;
-            lhs_2 += num_ch;
-            lhs_3 += num_ch;
+            lhs_0 += CH_IN_BLOCK_MVE;
+            lhs_1 += CH_IN_BLOCK_MVE;
+            lhs_2 += CH_IN_BLOCK_MVE;
+            lhs_3 += CH_IN_BLOCK_MVE;
 
-            rhs_0 += num_ch;
+            rhs_0 += total_ch;
         }
 
         const int32x4_t mult = vldrwq_s32(out_mult);
@@ -126,33 +131,29 @@ q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs,
         out_1 = vaddq_n_s32(out_1, out_offset);
         out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min));
         out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max));
-        vstrbq_p_s32(out + num_ch, out_1, p);
+        vstrbq_p_s32(out + total_ch, out_1, p);
 
         out_2 = arm_requantize_mve_32x4(out_2, mult, shift);
         out_2 = vaddq_n_s32(out_2, out_offset);
         out_2 = vmaxq_s32(out_2, vdupq_n_s32(activation_min));
         out_2 = vminq_s32(out_2, vdupq_n_s32(activation_max));
-        vstrbq_p_s32(out + 2 * num_ch, out_2, p);
+        vstrbq_p_s32(out + 2 * total_ch, out_2, p);
 
         out_3 = arm_requantize_mve_32x4(out_3, mult, shift);
         out_3 = vaddq_n_s32(out_3, out_offset);
         out_3 = vmaxq_s32(out_3, vdupq_n_s32(activation_min));
         out_3 = vminq_s32(out_3, vdupq_n_s32(activation_max));
-        vstrbq_p_s32(out + 3 * num_ch, out_3, p);
+        vstrbq_p_s32(out + 3 * total_ch, out_3, p);
     }
 
-    const int tail_ch = num_ch & 0x3;
-    if (tail_ch != 0)
-    {
-        out -= (4 - tail_ch);
-    }
-    return out + (3 * num_ch);
+    return ARM_CMSIS_NN_SUCCESS;
 
 #else
     (void)lhs;
     (void)rhs;
     (void)input_offset;
-    (void)num_ch;
+    (void)active_ch;
+    (void)total_ch;
     (void)out_shift;
     (void)out_mult;
     (void)out_offset;
@@ -161,7 +162,7 @@ q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs,
     (void)row_x_col;
     (void)output_bias;
     (void)out;
-    return NULL;
+    return ARM_CMSIS_NN_NO_IMPL_ERROR;
 #endif
 }
 
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s16.c
new file mode 100644
index 0000000..503aa64
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s16.c
@@ -0,0 +1,175 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_depthwise_conv_nt_t_s16.c
+ * Description:  Depthwise convolution on matrices with no padding.
+ *
+ * $Date:        6 July 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M processors with MVE extension
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+/**
+ * @ingroup groupSupport
+ */
+
+/**
+ * @addtogroup NNBasicMath
+ * @{
+ */
+
+/*
+ * Depthwise convolution of rhs matrix with 4 lhs matrices with no padding. Dimensions are the same for lhs and rhs.
+ *
+ * Refer header file for details.
+ *
+ */
+int16_t *arm_nn_depthwise_conv_nt_t_s16(const int16_t *lhs,
+                                        const q7_t *rhs,
+                                        const uint16_t num_ch,
+                                        const int32_t *out_shift,
+                                        const int32_t *out_mult,
+                                        const int32_t activation_min,
+                                        const int32_t activation_max,
+                                        const uint16_t row_x_col,
+                                        const int64_t *const output_bias,
+                                        int16_t *out)
+{
+#if defined(ARM_MATH_MVEI)
+
+    const int64_t *bias = output_bias;
+    int32_t loop_count = (num_ch + 3) / 4;
+    uint32_t num_ch_to_process = num_ch;
+
+    for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count;
+         num_ch_to_process -= 4, offset += 4, out += 4, i_loop_cnt++)
+    {
+        const int8_t *rhs_0 = rhs + offset;
+        const int16_t *lhs_0 = lhs + offset;
+        const int16_t *lhs_1 = lhs + row_x_col * num_ch + offset;
+        const int16_t *lhs_2 = lhs + (row_x_col * num_ch * 2) + offset;
+        const int16_t *lhs_3 = lhs + (row_x_col * num_ch * 3) + offset;
+
+        int32x4_t out_0 = vdupq_n_s32(0);
+        int32x4_t out_1 = vdupq_n_s32(0);
+        int32x4_t out_2 = vdupq_n_s32(0);
+        int32x4_t out_3 = vdupq_n_s32(0);
+
+        for (int i_row_x_col = 0; i_row_x_col < row_x_col; i_row_x_col++)
+        {
+            const int32x4_t ker_0 = vldrbq_s32(rhs_0);
+
+            int32x4_t ip_0 = vldrhq_s32(lhs_0);
+            out_0 += vmulq_s32(ip_0, ker_0);
+
+            int32x4_t ip_1 = vldrhq_s32(lhs_1);
+            out_1 += vmulq_s32(ip_1, ker_0);
+
+            int32x4_t ip_2 = vldrhq_s32(lhs_2);
+            out_2 += vmulq_s32(ip_2, ker_0);
+
+            int32x4_t ip_3 = vldrhq_s32(lhs_3);
+            out_3 += vmulq_s32(ip_3, ker_0);
+
+            lhs_0 += num_ch;
+            lhs_1 += num_ch;
+            lhs_2 += num_ch;
+            lhs_3 += num_ch;
+
+            rhs_0 += num_ch;
+        }
+
+        for (int i_requantize = 0; i_requantize < 4; i_requantize++)
+        {
+            int32_t reduced_multiplier = REDUCE_MULTIPLIER(out_mult[i_requantize]);
+            int32_t shift = out_shift[i_requantize];
+            int64_t in_requantize_0 = (int64_t)out_0[i_requantize];
+            int64_t in_requantize_1 = (int64_t)out_1[i_requantize];
+            int64_t in_requantize_2 = (int64_t)out_2[i_requantize];
+            int64_t in_requantize_3 = (int64_t)out_3[i_requantize];
+
+            if (bias)
+            {
+                in_requantize_0 += *bias;
+                in_requantize_1 += *bias;
+                in_requantize_2 += *bias;
+                in_requantize_3 += *bias;
+                bias++;
+            }
+
+            out_0[i_requantize] = arm_nn_requantize_s64(in_requantize_0, reduced_multiplier, shift);
+            out_1[i_requantize] = arm_nn_requantize_s64(in_requantize_1, reduced_multiplier, shift);
+            out_2[i_requantize] = arm_nn_requantize_s64(in_requantize_2, reduced_multiplier, shift);
+            out_3[i_requantize] = arm_nn_requantize_s64(in_requantize_3, reduced_multiplier, shift);
+        }
+
+        mve_pred16_t p = vctp32q(num_ch_to_process);
+
+        out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min));
+        out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max));
+        vstrhq_p_s32(out, out_0, p);
+
+        out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min));
+        out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max));
+        vstrhq_p_s32(out + num_ch, out_1, p);
+
+        out_2 = vmaxq_s32(out_2, vdupq_n_s32(activation_min));
+        out_2 = vminq_s32(out_2, vdupq_n_s32(activation_max));
+        vstrhq_p_s32(out + 2 * num_ch, out_2, p);
+
+        out_3 = vmaxq_s32(out_3, vdupq_n_s32(activation_min));
+        out_3 = vminq_s32(out_3, vdupq_n_s32(activation_max));
+        vstrhq_p_s32(out + 3 * num_ch, out_3, p);
+
+        out_mult += 4;
+        out_shift += 4;
+    }
+    const int tail_ch = num_ch & 0x3;
+    if (tail_ch != 0)
+    {
+        out -= (4 - tail_ch);
+    }
+
+    return out + (3 * num_ch);
+#else
+    (void)lhs;
+    (void)rhs;
+    (void)num_ch;
+    (void)out_shift;
+    (void)out_mult;
+    (void)activation_min;
+    (void)activation_max;
+    (void)row_x_col;
+    (void)output_bias;
+    (void)out;
+    return NULL;
+#endif
+}
+
+/**
+ * @} end of NNBasicMath group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c
index 66beef8..b8d0871 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2020, 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_nn_depthwise_conv_nt_t_s8.c
  * Description:  Depthwise convolution on matrices with no padding.
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.2
+ * $Date:        27. July 2022
+ * $Revision:    V.2.0.0
  *
  * Target Processor:  Cortex-M processors with MVE extension.
  * -------------------------------------------------------------------- */
@@ -46,39 +46,43 @@
  * Refer header file for details.
  *
  */
-
-q7_t *arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs,
-                                    const q7_t *rhs,
-                                    const int32_t input_offset,
-                                    const uint16_t num_ch,
-                                    const int32_t *out_shift,
-                                    const int32_t *out_mult,
-                                    const int32_t out_offset,
-                                    const int32_t activation_min,
-                                    const int32_t activation_max,
-                                    const uint16_t row_x_col,
-                                    const int32_t *const output_bias,
-                                    q7_t *out)
+arm_cmsis_nn_status arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs,
+                                                  const q7_t *rhs,
+                                                  const int32_t input_offset,
+                                                  const int32_t active_ch,
+                                                  const int32_t total_ch,
+                                                  const int32_t *out_shift,
+                                                  const int32_t *out_mult,
+                                                  const int32_t out_offset,
+                                                  const int32_t activation_min,
+                                                  const int32_t activation_max,
+                                                  const uint16_t row_x_col,
+                                                  const int32_t *const output_bias,
+                                                  q7_t *out)
 {
 #if defined(ARM_MATH_MVEI)
     const int32_t *bias = output_bias;
-    int32_t loop_count = (num_ch + 3) / 4;
-    uint32_t num_ch_to_process = num_ch;
+    int32_t loop_count = (active_ch + 3) / 4;
+    uint32_t num_ch_to_process = active_ch;
 
     for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count;
          num_ch_to_process -= 4, offset += 4, out += 4, i_loop_cnt++)
     {
-        int32x4_t out_0 = vldrwq_s32(bias);
+        int32x4_t out_0 = vdupq_n_s32(0);
+        if (bias)
+        {
+            out_0 = vldrwq_s32(bias);
+            bias += 4;
+        }
         int32x4_t out_1 = out_0;
         int32x4_t out_2 = out_0;
         int32x4_t out_3 = out_0;
-        bias += 4;
 
         const int8_t *rhs_0 = rhs + offset;
         const int8_t *lhs_0 = lhs + offset;
-        const int8_t *lhs_1 = lhs + row_x_col * num_ch + offset;
-        const int8_t *lhs_2 = lhs + (row_x_col * num_ch * 2) + offset;
-        const int8_t *lhs_3 = lhs + (row_x_col * num_ch * 3) + offset;
+        const int8_t *lhs_1 = lhs + row_x_col * CH_IN_BLOCK_MVE + offset;
+        const int8_t *lhs_2 = lhs + (row_x_col * CH_IN_BLOCK_MVE * 2) + offset;
+        const int8_t *lhs_3 = lhs + (row_x_col * CH_IN_BLOCK_MVE * 3) + offset;
         int32x4_t ker_sum = vdupq_n_s32(0);
 
         for (int i_row_x_col = 0; i_row_x_col < row_x_col; i_row_x_col++)
@@ -98,12 +102,12 @@ q7_t *arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs,
             int32x4_t ip_3 = vldrbq_s32(lhs_3);
             out_3 += vmulq_s32(ip_3, ker_0);
 
-            lhs_0 += num_ch;
-            lhs_1 += num_ch;
-            lhs_2 += num_ch;
-            lhs_3 += num_ch;
+            lhs_0 += CH_IN_BLOCK_MVE;
+            lhs_1 += CH_IN_BLOCK_MVE;
+            lhs_2 += CH_IN_BLOCK_MVE;
+            lhs_3 += CH_IN_BLOCK_MVE;
 
-            rhs_0 += num_ch;
+            rhs_0 += total_ch;
         }
 
         ker_sum = vmulq_n_s32(ker_sum, input_offset);
@@ -128,33 +132,28 @@ q7_t *arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs,
         out_1 = vaddq_n_s32(out_1, out_offset);
         out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min));
         out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max));
-        vstrbq_p_s32(out + num_ch, out_1, p);
+        vstrbq_p_s32(out + total_ch, out_1, p);
 
         out_2 = arm_requantize_mve_32x4(out_2, mult, shift);
         out_2 = vaddq_n_s32(out_2, out_offset);
         out_2 = vmaxq_s32(out_2, vdupq_n_s32(activation_min));
         out_2 = vminq_s32(out_2, vdupq_n_s32(activation_max));
-        vstrbq_p_s32(out + 2 * num_ch, out_2, p);
+        vstrbq_p_s32(out + 2 * total_ch, out_2, p);
 
         out_3 = arm_requantize_mve_32x4(out_3, mult, shift);
         out_3 = vaddq_n_s32(out_3, out_offset);
         out_3 = vmaxq_s32(out_3, vdupq_n_s32(activation_min));
         out_3 = vminq_s32(out_3, vdupq_n_s32(activation_max));
-        vstrbq_p_s32(out + 3 * num_ch, out_3, p);
-    }
-
-    const int tail_ch = num_ch & 0x3;
-    if (tail_ch != 0)
-    {
-        out -= (4 - tail_ch);
+        vstrbq_p_s32(out + 3 * total_ch, out_3, p);
     }
 
-    return out + (3 * num_ch);
+    return ARM_CMSIS_NN_SUCCESS;
 #else
     (void)lhs;
     (void)rhs;
     (void)input_offset;
-    (void)num_ch;
+    (void)active_ch;
+    (void)total_ch;
     (void)out_shift;
     (void)out_mult;
     (void)out_offset;
@@ -163,7 +162,7 @@ q7_t *arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs,
     (void)row_x_col;
     (void)output_bias;
     (void)out;
-    return NULL;
+    return ARM_CMSIS_NN_NO_IMPL_ERROR;
 #endif
 }
 
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c
index 9b96f86..67685df 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_nn_mat_mul_core_1x_s8.c
  * Description:  General Matrix-multiplication function
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.2
+ * $Date:        22 Aug 2022
+ * $Revision:    V.3.1.0
  *
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
@@ -46,41 +46,106 @@
  * Refer header file for details.
  *
  */
-
-arm_status arm_nn_mat_mul_core_1x_s8(int32_t row_elements,
-                                     const int8_t *row_base,
-                                     const int8_t *col_base,
-                                     int32_t *const sum_col,
-                                     int32_t *const output)
+arm_cmsis_nn_status arm_nn_mat_mul_core_1x_s8(int32_t row_elements,
+                                              const int32_t skipped_row_elements,
+                                              const int8_t *row_base_ref,
+                                              const int8_t *col_base_ref,
+                                              const int32_t out_ch,
+                                              const cmsis_nn_conv_params *conv_params,
+                                              const cmsis_nn_per_channel_quant_params *quant_params,
+                                              const int32_t *bias,
+                                              int8_t *output)
 {
-    int32_t acc_n0 = 0;
-    int32_t sum_tmp = 0;
+#if defined(ARM_MATH_MVEI)
+    const int8_t *col_base = col_base_ref;
+    int32_t *output_mult = quant_params->multiplier;
+    int32_t *output_shift = quant_params->shift;
+    const int32_t out_offset = conv_params->output_offset;
+    const int32_t out_activation_min = conv_params->activation.min;
+    const int32_t out_activation_max = conv_params->activation.max;
+
+    int32_t acc[4];
+    for (int i = 0; i < out_ch; i++)
+    {
+        int32_t acc_n0 = 0;
+        const int8_t *row_base = row_base_ref;
 
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+        int32_t sum_tmp = 0;
 
-    __ASM volatile("   vldrb.8         q0, [%[col]], 16     \n"
-                   "   wlstp.8         lr, %[cnt], 1f       \n"
-                   "2:                                      \n"
-                   "   vaddva.s8      %[sum], q0            \n"
-                   "   vldrb.8         q1, [%[row0]], 16    \n"
-                   "   vmladava.s8    %[out0], q0, q1       \n"
-                   "   vldrb.8         q0, [%[col]], 16     \n"
-                   "   letp            lr, 2b               \n"
-                   "1:                                      \n"
-                   : [col] "+r"(col_base), [sum] "+Te"(sum_tmp), [row0] "+r"(row_base), [out0] "+Te"(acc_n0)
-                   : [cnt] "r"(row_elements)
-                   : "q0", "q1", "memory", "r14");
+#if defined(ARM_MATH_AUTOVECTORIZE)
+        for (int j = 0; j < row_elements; j++)
+        {
+            int32_t col = col_base[j];
+            sum_tmp += col;
+            acc_n0 += row_base[j] * col;
+        }
 #else
-    for (int i = 0; i < row_elements; i++)
+        __ASM volatile("   vldrb.8         q0, [%[col]], #16     \n"
+                       "   wlstp.8         lr, %[cnt], 1f       \n"
+                       "2:                                      \n"
+                       "   vaddva.s8      %[sum], q0            \n"
+                       "   vldrb.8         q1, [%[row0]], #16    \n"
+                       "   vmladava.s8    %[out0], q0, q1       \n"
+                       "   vldrb.8         q0, [%[col]], #16     \n"
+                       "   letp            lr, 2b               \n"
+                       "1:                                      \n"
+                       : [col] "+r"(col_base), [sum] "+Te"(sum_tmp), [row0] "+r"(row_base), [out0] "+Te"(acc_n0)
+                       : [cnt] "r"(row_elements)
+                       : "q0", "q1", "memory", "r14");
+#endif
+
+        sum_tmp *= conv_params->input_offset;
+        acc_n0 += sum_tmp;
+
+        const int32_t index = i & 0x3;
+        acc[index] = acc_n0;
+
+        if (index == 3)
+        {
+            int32x4_t res = vldrwq_s32(acc);
+            if (bias)
+            {
+                res = vaddq_s32(res, vldrwq_s32(bias));
+                bias += 4;
+            }
+            res = arm_requantize_mve_32x4(res, vldrwq_s32(output_mult), vldrwq_s32(output_shift));
+            output_mult += 4;
+            output_shift += 4;
+            res = vaddq_n_s32(res, out_offset);
+            res = vmaxq_s32(res, vdupq_n_s32(out_activation_min));
+            res = vminq_s32(res, vdupq_n_s32(out_activation_max));
+            vstrbq_s32(output, res);
+            output += 4;
+        }
+        col_base = col_base_ref + (i + 1) * (row_elements + skipped_row_elements);
+    }
+    // Handle left over elements
+    for (int i = 0; i < (out_ch & 0x3); i++)
     {
-        sum_tmp += col_base[i];
-        acc_n0 += row_base[i] * col_base[i];
+        int32_t acc_n0 = acc[i];
+        if (bias)
+        {
+            acc_n0 += bias[i];
+        }
+        acc_n0 = arm_nn_requantize(acc_n0, output_mult[i], output_shift[i]);
+        acc_n0 += conv_params->output_offset;
+        acc_n0 = MAX(acc_n0, conv_params->activation.min);
+        acc_n0 = MIN(acc_n0, conv_params->activation.max);
+        *output++ = (q7_t)acc_n0;
     }
-#endif
 
-    *sum_col = sum_tmp;
-    *output = acc_n0;
-    return ARM_MATH_SUCCESS;
+#else
+    (void)row_elements;
+    (void)skipped_row_elements;
+    (void)row_base_ref;
+    (void)col_base_ref;
+    (void)out_ch;
+    (void)conv_params;
+    (void)quant_params;
+    (void)bias;
+    (void)output;
+#endif
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c
index e4ec2b2..b0ea228 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,14 +23,13 @@
  * Title:        arm_nn_mat_mul_core_4x_s8.c
  * Description:  General matrix multiplication function for MVE extension
  *
- * $Date:        09. October 2020
- * $Revision:    V.2.0.1
+ * $Date:        22. Aug 2022
+ * $Revision:    V.3.1.0
  *
- * Target Processor:  Cortex-M cores
+ * Target Processor:  Cortex-M processors
  * -------------------------------------------------------------------- */
-
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nn_types.h"
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
-
 /**
  * @ingroup groupSupport
  */
@@ -46,71 +45,105 @@
  * Refer header file for details.
  *
  */
-arm_status arm_nn_mat_mul_core_4x_s8(const int32_t row_elements,
-                                     const int32_t offset,
-                                     const int8_t *row_base,
-                                     const int8_t *col_base,
-                                     int32_t *const sum_col,
-                                     int32_t *const output)
+
+int8_t *arm_nn_mat_mul_core_4x_s8(const int32_t row_elements,
+                                  const int32_t offset,
+                                  const int8_t *row_base,
+                                  const int8_t *col_base_ref,
+                                  const int32_t out_ch,
+                                  const cmsis_nn_conv_params *conv_params,
+                                  const cmsis_nn_per_channel_quant_params *quant_params,
+                                  const int32_t *bias,
+                                  int8_t *output)
 {
-    int32_t acc_n0 = 0;
-    int32_t acc_n1 = 0;
-    int32_t acc_n2 = 0;
-    int32_t acc_n3 = 0;
 
-    const int8_t *ip_row_0 = row_base;
-    const int8_t *ip_row_1 = row_base + offset;
-    const int8_t *ip_row_2 = row_base + (2 * offset);
-    const int8_t *ip_row_3 = row_base + (3 * offset);
-    int32_t sum_tmp = 0;
+#if defined(ARM_MATH_MVEI)
+    for (int i = 0; i < out_ch; i++)
+    {
+        int32_t acc_n0 = 0;
+        int32_t acc_n1 = 0;
+        int32_t acc_n2 = 0;
+        int32_t acc_n3 = 0;
 
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-    __ASM volatile("   vldrb.8         q0, [%[col]], 16     \n"
-                   "   wlstp.8         lr, %[cnt], 1f       \n"
-                   "2:                                      \n"
-                   "   vaddva.s8      %[sum], q0            \n"
-                   "   vldrb.8         q1, [%[row0]], 16    \n"
-                   "   vmladava.s8    %[out0], q0, q1       \n"
-                   "   vldrb.8         q2, [%[row1]], 16    \n"
-                   "   vmladava.s8     %[out1], q0, q2      \n"
-                   "   vldrb.8         q3, [%[row2]], 16    \n"
-                   "   vmladava.s8     %[out2], q0, q3      \n"
-                   "   vldrb.8         q4, [%[row3]], 16    \n"
-                   "   vmladava.s8     %[out3], q0, q4      \n"
-                   "   vldrb.8         q0, [%[col]], 16     \n"
-                   "   letp            lr, 2b               \n"
-                   "1:                                      \n"
-                   : [col] "+r"(col_base),
-                     [sum] "+Te"(sum_tmp),
-                     [row0] "+r"(ip_row_0),
-                     [row1] "+r"(ip_row_1),
-                     [row2] "+r"(ip_row_2),
-                     [row3] "+r"(ip_row_3),
-                     [out0] "+Te"(acc_n0),
-                     [out1] "+Te"(acc_n1),
-                     [out2] "+Te"(acc_n2),
-                     [out3] "+Te"(acc_n3)
-                   : [cnt] "r"(row_elements)
-                   : "q0", "q1", "q2", "q3", "q4", "memory", "r14");
+        const int8_t *ip_row_0 = row_base;
+        const int8_t *ip_row_1 = row_base + offset;
+        const int8_t *ip_row_2 = row_base + (2 * offset);
+        const int8_t *ip_row_3 = row_base + (3 * offset);
+        const int8_t *col_base = col_base_ref + i * row_elements;
+        int32_t sum_tmp = 0;
+
+#if defined(ARM_MATH_AUTOVECTORIZE)
+        for (int j = 0; j < row_elements; j++)
+        {
+            int32_t col = col_base[j];
+            sum_tmp += col;
+            acc_n0 += ip_row_0[j] * col;
+            acc_n1 += ip_row_1[j] * col;
+            acc_n2 += ip_row_2[j] * col;
+            acc_n3 += ip_row_3[j] * col;
+        }
 #else
-    for (int i = 0; i < row_elements; i++)
-    {
-        int32_t col = col_base[i];
-        sum_tmp += col;
-        acc_n0 += ip_row_0[i] * col;
-        acc_n1 += ip_row_1[i] * col;
-        acc_n2 += ip_row_2[i] * col;
-        acc_n3 += ip_row_3[i] * col;
-    }
+        __ASM volatile("   vldrb.8         q0, [%[col]], #16     \n"
+                       "   wlstp.8         lr, %[cnt], 1f       \n"
+                       "2:                                      \n"
+                       "   vaddva.s8      %[sum], q0            \n"
+                       "   vldrb.8         q1, [%[row0]], #16    \n"
+                       "   vmladava.s8    %[out0], q0, q1       \n"
+                       "   vldrb.8         q2, [%[row1]], #16    \n"
+                       "   vmladava.s8     %[out1], q0, q2      \n"
+                       "   vldrb.8         q3, [%[row2]], #16    \n"
+                       "   vmladava.s8     %[out2], q0, q3      \n"
+                       "   vldrb.8         q4, [%[row3]], #16    \n"
+                       "   vmladava.s8     %[out3], q0, q4      \n"
+                       "   vldrb.8         q0, [%[col]], #16     \n"
+                       "   letp            lr, 2b               \n"
+                       "1:                                      \n"
+                       : [col] "+r"(col_base),
+                         [sum] "+Te"(sum_tmp),
+                         [row0] "+r"(ip_row_0),
+                         [row1] "+r"(ip_row_1),
+                         [row2] "+r"(ip_row_2),
+                         [row3] "+r"(ip_row_3),
+                         [out0] "+Te"(acc_n0),
+                         [out1] "+Te"(acc_n1),
+                         [out2] "+Te"(acc_n2),
+                         [out3] "+Te"(acc_n3)
+                       : [cnt] "r"(row_elements)
+                       : "q0", "q1", "q2", "q3", "q4", "memory", "r14");
 #endif
-    output[0] = acc_n0;
-    output[1] = acc_n1;
-    output[2] = acc_n2;
-    output[3] = acc_n3;
 
-    *sum_col = sum_tmp;
+        int32x4_t res = {acc_n0, acc_n1, acc_n2, acc_n3};
+        sum_tmp *= conv_params->input_offset;
+        if (bias)
+        {
+            sum_tmp += bias[i];
+        }
+        res = vaddq_n_s32(res, sum_tmp);
 
-    return ARM_MATH_SUCCESS;
+        res = arm_requantize_mve(res, quant_params->multiplier[i], quant_params->shift[i]);
+        res = vaddq_n_s32(res, conv_params->output_offset);
+
+        res = vmaxq_s32(res, vdupq_n_s32(conv_params->activation.min));
+        res = vminq_s32(res, vdupq_n_s32(conv_params->activation.max));
+
+        const uint32x4_t scatter_offset = {0, (uint32_t)out_ch, (uint32_t)out_ch * 2, (uint32_t)out_ch * 3};
+        vstrbq_scatter_offset_s32(output, scatter_offset, res);
+        output++;
+    }
+
+    return output + (3 * out_ch);
+#else
+    (void)row_elements;
+    (void)offset;
+    (void)row_base;
+    (void)col_base_ref;
+    (void)out_ch;
+    (void)conv_params;
+    (void)quant_params;
+    (void)bias;
+    (void)output;
+    return NULL;
+#endif
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_kernel_s16.c
similarity index 50%
rename from edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c
rename to edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_kernel_s16.c
index 4a3e2eb..b93e078 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_kernel_s16.c
@@ -20,11 +20,11 @@
 
 /* ----------------------------------------------------------------------
  * Project:      CMSIS NN Library
- * Title:        arm_nn_mat_mult_kernel_s8_s16_reordered.c
- * Description:  Matrix-multiplication function for convolution with reordered columns
+ * Title:        arm_nn_mat_mult_kernel_s16.c
+ * Description:  Matrix-multiplication function for convolution
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.3
+ * $Date:        12 August 2021
+ * $Revision:    V.1.1.0
  *
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
@@ -33,34 +33,31 @@
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
 
 /*
- * Matrix-multiplication with re-ordered input and bias inputs for convolution with per-channel
- *        requantization. The re-ordering is a consequence of sign extension is done by the SXTB16 command.
+ * Matrix-multiplication function for convolution with per-channel requantization.
  *
- * Refer header file for details. This function differs from arm_nn_mat_mult_kernel_s8_s16(), in that it uses
- *        read_and_pad_reordered() instead of arm_nn_mat_mult_kernel_s8_s16(). Investigating the cycles impact and
- *        unifying these two functions is a potential future improvement.
+ * Refer header file for details.
  *
  */
 
-q7_t *arm_nn_mat_mult_kernel_s8_s16_reordered(const q7_t *input_a,
-                                              const q15_t *input_b,
-                                              const uint16_t output_ch,
-                                              const int32_t *out_shift,
-                                              const int32_t *out_mult,
-                                              const int32_t out_offset,
-                                              const int16_t activation_min,
-                                              const int16_t activation_max,
-                                              const uint16_t num_col_a,
-                                              const int32_t *const output_bias,
-                                              q7_t *out_0)
+q15_t *arm_nn_mat_mult_kernel_s16(const q7_t *input_a,
+                                  const q15_t *input_b,
+                                  const int32_t output_ch,
+                                  const int32_t *out_shift,
+                                  const int32_t *out_mult,
+                                  const int16_t activation_min,
+                                  const int16_t activation_max,
+                                  const int32_t num_col_a,
+                                  const int64_t *const output_bias,
+                                  q15_t *out_0)
 {
-#if defined(ARM_MATH_DSP)
-    /* set up the second output pointers */
-    q7_t *out_1 = out_0 + output_ch;
-    const int32_t *bias = output_bias;
 
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
+    /* set up the second output pointers */
+    q15_t *out_1 = out_0 + output_ch;
+    const int64_t *bias = output_bias;
     uint16_t row_count = output_ch / 2;
     const q7_t *ip_a0 = input_a;
+
     /* this loop over rows in A */
     while (row_count)
     {
@@ -71,11 +68,11 @@ q7_t *arm_nn_mat_mult_kernel_s8_s16_reordered(const q7_t *input_a,
         /* align the second pointer for A */
         const q7_t *ip_a1 = ip_a0 + num_col_a;
 
-        /* Init accumulator with bias for channel N and N + 1 */
-        q31_t ch_0_out_0 = *bias;
-        q31_t ch_0_out_1 = *bias++;
-        q31_t ch_1_out_0 = *bias;
-        q31_t ch_1_out_1 = *bias++;
+        /* Init accumulator for channel N and N + 1 */
+        q31_t ch_0_out_0 = 0;
+        q31_t ch_0_out_1 = 0;
+        q31_t ch_1_out_0 = 0;
+        q31_t ch_1_out_1 = 0;
 
         uint16_t col_count = num_col_a / 4;
         /* accumulate over the vector */
@@ -85,8 +82,8 @@ q7_t *arm_nn_mat_mult_kernel_s8_s16_reordered(const q7_t *input_a,
             q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0);
             q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1);
 
-            ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02);
-            ip_a1 = read_and_pad_reordered(ip_a1, &a11, &a12);
+            ip_a0 = read_and_pad(ip_a0, &a01, &a02);
+            ip_a1 = read_and_pad(ip_a1, &a11, &a12);
 
             ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0);
             ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1);
@@ -103,33 +100,66 @@ q7_t *arm_nn_mat_mult_kernel_s8_s16_reordered(const q7_t *input_a,
 
             col_count--;
         } /* while over col_count */
-
-        ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift);
-        ch_0_out_0 += out_offset;
+        col_count = num_col_a & 0x3;
+        while (col_count)
+        {
+            q7_t a0 = *ip_a0++;
+            q15_t b0 = *ip_b0++;
+            q7_t a1 = *ip_a1++;
+            q15_t b1 = *ip_b1++;
+
+            ch_0_out_0 += a0 * b0;
+            ch_0_out_1 += a0 * b1;
+            ch_1_out_0 += a1 * b0;
+            ch_1_out_1 += a1 * b1;
+            col_count--;
+        } /* while over col_count */
+        if (bias)
+        {
+            q31_t reduced_multiplier = REDUCE_MULTIPLIER(*out_mult);
+            q63_t acc_64 = ch_0_out_0 + *bias;
+            ch_0_out_0 = arm_nn_requantize_s64(acc_64, reduced_multiplier, *out_shift);
+            acc_64 = ch_0_out_1 + *bias++;
+            ch_0_out_1 = arm_nn_requantize_s64(acc_64, reduced_multiplier, *out_shift);
+            out_mult++;
+        }
+        else
+        {
+            ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift);
+            ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift);
+            out_mult++;
+        }
         ch_0_out_0 = MAX(ch_0_out_0, activation_min);
         ch_0_out_0 = MIN(ch_0_out_0, activation_max);
-        *out_0++ = (q7_t)ch_0_out_0;
+        *out_0++ = (q15_t)ch_0_out_0;
 
-        ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift);
-        ch_0_out_1 += out_offset;
         ch_0_out_1 = MAX(ch_0_out_1, activation_min);
         ch_0_out_1 = MIN(ch_0_out_1, activation_max);
-        *out_1++ = (q7_t)ch_0_out_1;
-        out_mult++;
+        *out_1++ = (q15_t)ch_0_out_1;
         out_shift++;
 
-        ch_1_out_0 = arm_nn_requantize(ch_1_out_0, *out_mult, *out_shift);
-        ch_1_out_0 += out_offset;
+        if (bias)
+        {
+            q31_t reduced_multiplier = REDUCE_MULTIPLIER(*out_mult);
+            q63_t acc_64 = ch_1_out_0 + *bias;
+            ch_1_out_0 = arm_nn_requantize_s64(acc_64, reduced_multiplier, *out_shift);
+            acc_64 = ch_1_out_1 + *bias++;
+            ch_1_out_1 = arm_nn_requantize_s64(acc_64, reduced_multiplier, *out_shift);
+            out_mult++;
+        }
+        else
+        {
+            ch_1_out_0 = arm_nn_requantize(ch_1_out_0, *out_mult, *out_shift);
+            ch_1_out_1 = arm_nn_requantize(ch_1_out_1, *out_mult, *out_shift);
+            out_mult++;
+        }
         ch_1_out_0 = MAX(ch_1_out_0, activation_min);
         ch_1_out_0 = MIN(ch_1_out_0, activation_max);
-        *out_0++ = (q7_t)ch_1_out_0;
+        *out_0++ = (q15_t)ch_1_out_0;
 
-        ch_1_out_1 = arm_nn_requantize(ch_1_out_1, *out_mult, *out_shift);
-        ch_1_out_1 += out_offset;
         ch_1_out_1 = MAX(ch_1_out_1, activation_min);
         ch_1_out_1 = MIN(ch_1_out_1, activation_max);
-        *out_1++ = (q7_t)ch_1_out_1;
-        out_mult++;
+        *out_1++ = (q15_t)ch_1_out_1;
         out_shift++;
 
         /* skip row */
@@ -137,48 +167,68 @@ q7_t *arm_nn_mat_mult_kernel_s8_s16_reordered(const q7_t *input_a,
         row_count--;
     }
 
-    if (output_ch & 1)
+    /* compute the last odd numbered row if any */
+    if (output_ch & 0x1)
     {
         /* setup pointers for B */
         const q15_t *ip_b0 = input_b;
         const q15_t *ip_b1 = ip_b0 + num_col_a;
 
-        /* Init accumulator with bias for channel N + 1 */
-        q31_t ch_0_out_0 = *bias;
-        q31_t ch_0_out_1 = ch_0_out_0;
+        q31_t ch_0_out_0 = 0;
+        q31_t ch_0_out_1 = 0;
 
-        int32_t col_count = num_col_a / 4;
+        uint16_t col_count = num_col_a >> 2;
         while (col_count)
         {
             q31_t a01, a02;
             q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0);
             q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1);
 
-            ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02);
+            ip_a0 = read_and_pad(ip_a0, &a01, &a02);
 
             ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0);
             ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1);
 
             b0 = arm_nn_read_q15x2_ia(&ip_b0);
             b1 = arm_nn_read_q15x2_ia(&ip_b1);
-
             ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0);
             ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1);
 
             col_count--;
-        } /* while over col_count */
+        }
+        col_count = num_col_a & 0x3;
+        while (col_count)
+        {
+            q7_t a0 = *ip_a0++;
+            q15_t b0 = *ip_b0++;
+            q15_t b1 = *ip_b1++;
 
-        ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift);
-        ch_0_out_0 += out_offset;
+            ch_0_out_0 += a0 * b0;
+            ch_0_out_1 += a0 * b1;
+            col_count--;
+        }
+        if (bias)
+        {
+            q31_t reduced_multiplier = REDUCE_MULTIPLIER(*out_mult);
+            q63_t acc_64 = ch_0_out_0 + *bias;
+            ch_0_out_0 = arm_nn_requantize_s64(acc_64, reduced_multiplier, *out_shift);
+            acc_64 = ch_0_out_1 + *bias++;
+            ch_0_out_1 = arm_nn_requantize_s64(acc_64, reduced_multiplier, *out_shift);
+        }
+        else
+        {
+            ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift);
+            ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift);
+        }
         ch_0_out_0 = MAX(ch_0_out_0, activation_min);
         ch_0_out_0 = MIN(ch_0_out_0, activation_max);
-        *out_0++ = (q7_t)ch_0_out_0;
+        *out_0++ = (q15_t)ch_0_out_0;
 
-        ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift);
-        ch_0_out_1 += out_offset;
         ch_0_out_1 = MAX(ch_0_out_1, activation_min);
         ch_0_out_1 = MIN(ch_0_out_1, activation_max);
-        *out_1++ = (q7_t)ch_0_out_1;
+        *out_1++ = (q15_t)ch_0_out_1;
+        out_mult++;
+        out_shift++;
     }
 
     out_0 += output_ch;
@@ -191,7 +241,6 @@ q7_t *arm_nn_mat_mult_kernel_s8_s16_reordered(const q7_t *input_a,
     (void)output_ch;
     (void)out_shift;
     (void)out_mult;
-    (void)out_offset;
     (void)activation_min;
     (void)activation_max;
     (void)num_col_a;
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c
index 71d0b6d..552a4e1 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2020-2022 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_nn_mat_mult_s8_nt_t_s8
  * Description:  Matrix multiplication support function with the right-hand-side (rhs) matrix transposed
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.3
+ * $Date:        19 April 2022
+ * $Revision:    V.2.0.0
  *
  * Target Processor:  Cortex-M
  *
@@ -47,19 +47,19 @@
  * Refer header file for details.
  *
  */
-arm_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs,
-                                   const q7_t *rhs,
-                                   const q31_t *bias,
-                                   q7_t *dst,
-                                   const int32_t *dst_multipliers,
-                                   const int32_t *dst_shifts,
-                                   const int32_t lhs_rows,
-                                   const int32_t rhs_rows,
-                                   const int32_t rhs_cols,
-                                   const int32_t lhs_offset,
-                                   const int32_t dst_offset,
-                                   const int32_t activation_min,
-                                   const int32_t activation_max)
+arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs,
+                                            const q7_t *rhs,
+                                            const q31_t *bias,
+                                            q7_t *dst,
+                                            const int32_t *dst_multipliers,
+                                            const int32_t *dst_shifts,
+                                            const int32_t lhs_rows,
+                                            const int32_t rhs_rows,
+                                            const int32_t rhs_cols,
+                                            const int32_t lhs_offset,
+                                            const int32_t dst_offset,
+                                            const int32_t activation_min,
+                                            const int32_t activation_max)
 {
 #if defined(ARM_MATH_DSP)
     const int32_t off0 = rhs_cols - 4;
@@ -576,7 +576,7 @@ arm_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs,
         }
     }
 #endif
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c
index bc2d868..ec58c86 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_nn_mult_q15.c
  * Description:  Q15 vector multiplication with variable output shifts
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.1.3
  *
  * Target Processor:  Cortex-M cores
  *
@@ -41,88 +41,15 @@
  * @{
  */
 
-/**
- * @brief           Q7 vector multiplication with variable output shifts
- * @param[in]       *pSrcA        pointer to the first input vector
- * @param[in]       *pSrcB        pointer to the second input vector
- * @param[out]      *pDst         pointer to the output vector
- * @param[in]       out_shift     amount of right-shift for output
- * @param[in]       blockSize     number of samples in each vector
+/*
+ * Q7 vector multiplication with variable output shifts
+ * Refer function header for details
  *
- * <b>Scaling and Overflow Behavior:</b>
- * \par
- * The function uses saturating arithmetic.
- * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
  */
 
 void arm_nn_mult_q15(q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out_shift, uint32_t blockSize)
 {
-    uint32_t blkCnt; /* loop counters */
-
-#if defined(ARM_MATH_DSP)
-
-    /* Run the below code for Cortex-M4 and Cortex-M3 */
-    q31_t inA1, inA2, inB1, inB2; /* temporary input variables */
-    q15_t out1, out2, out3, out4; /* temporary output variables */
-    q31_t mul1, mul2, mul3, mul4; /* temporary variables */
-
-    /* loop Unrolling */
-    blkCnt = blockSize >> 2U;
-
-    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
-     ** a second loop below computes the remaining 1 to 3 samples. */
-    while (blkCnt > 0U)
-    {
-        /* read two samples at a time from sourceA */
-        inA1 = arm_nn_read_q15x2_ia((const q15_t **)&pSrcA);
-        /* read two samples at a time from sourceB */
-        inB1 = arm_nn_read_q15x2_ia((const q15_t **)&pSrcB);
-        /* read two samples at a time from sourceA */
-        inA2 = arm_nn_read_q15x2_ia((const q15_t **)&pSrcA);
-        /* read two samples at a time from sourceB */
-        inB2 = arm_nn_read_q15x2_ia((const q15_t **)&pSrcB);
-
-        /* multiply mul = sourceA * sourceB */
-        mul1 = (q31_t)((q15_t)(inA1 >> 16) * (q15_t)(inB1 >> 16));
-        mul2 = (q31_t)((q15_t)inA1 * (q15_t)inB1);
-        mul3 = (q31_t)((q15_t)(inA2 >> 16) * (q15_t)(inB2 >> 16));
-        mul4 = (q31_t)((q15_t)inA2 * (q15_t)inB2);
-
-        /* saturate result to 16 bit */
-        out1 = (q15_t)__SSAT((q31_t)(mul1 + NN_ROUND(out_shift)) >> out_shift, 16);
-        out2 = (q15_t)__SSAT((q31_t)(mul2 + NN_ROUND(out_shift)) >> out_shift, 16);
-        out3 = (q15_t)__SSAT((q31_t)(mul3 + NN_ROUND(out_shift)) >> out_shift, 16);
-        out4 = (q15_t)__SSAT((q31_t)(mul4 + NN_ROUND(out_shift)) >> out_shift, 16);
-
-        /* store the result */
-#ifndef ARM_MATH_BIG_ENDIAN
-
-        *__SIMD32(pDst)++ = __PKHBT(out2, out1, 16);
-        *__SIMD32(pDst)++ = __PKHBT(out4, out3, 16);
-
-#else
-
-        *__SIMD32(pDst)++ = __PKHBT(out2, out1, 16);
-        *__SIMD32(pDst)++ = __PKHBT(out4, out3, 16);
-
-#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
-
-        /* Decrement the blockSize loop counter */
-        blkCnt--;
-    }
-
-    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
-     ** No loop unrolling is used. */
-    blkCnt = blockSize % 0x4U;
-
-#else
-
-    /* Run the below code for Cortex-M0 */
-
-    /* Initialize blkCnt with number of samples */
-    blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_DSP) */
+    uint32_t blkCnt = blockSize; /* loop counters */
 
     while (blkCnt > 0U)
     {
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c
index 07aa7af..0d02f9a 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_nn_mult_q7.c
  * Description:  Q7 vector multiplication with variable output shifts
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.1.3
  *
  * Target Processor:  Cortex-M cores
  *
@@ -41,62 +41,14 @@
  * @{
  */
 
-/**
- * @brief           Q7 vector multiplication with variable output shifts
- * @param[in]       *pSrcA        pointer to the first input vector
- * @param[in]       *pSrcB        pointer to the second input vector
- * @param[out]      *pDst         pointer to the output vector
- * @param[in]       out_shift     amount of right-shift for output
- * @param[in]       blockSize     number of samples in each vector
- *
- * <b>Scaling and Overflow Behavior:</b>
- * \par
- * The function uses saturating arithmetic.
- * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
+/*
+ * Q7 vector multiplication with variable output shifts
+ * Refer function header for details
  */
 
 void arm_nn_mult_q7(q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockSize)
 {
-    uint32_t blkCnt; /* loop counters */
-
-#if defined(ARM_MATH_DSP)
-
-    /* Run the below code for Cortex-M4 and Cortex-M3 */
-    q7_t out1, out2, out3, out4; /* Temporary variables to store the product */
-
-    /* loop Unrolling */
-    blkCnt = blockSize >> 2U;
-
-    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
-     ** a second loop below computes the remaining 1 to 3 samples. */
-    while (blkCnt > 0U)
-    {
-        /* C = A * B */
-        /* Multiply the inputs and store the results in temporary variables */
-        out1 = (q7_t)__SSAT(((q15_t)((q15_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8);
-        out2 = (q7_t)__SSAT(((q15_t)((q15_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8);
-        out3 = (q7_t)__SSAT(((q15_t)((q15_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8);
-        out4 = (q7_t)__SSAT(((q15_t)((q15_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8);
-
-        /* Store the results of 4 inputs in the destination buffer in single cycle by packing */
-        *__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4);
-
-        /* Decrement the blockSize loop counter */
-        blkCnt--;
-    }
-
-    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
-     ** No loop unrolling is used. */
-    blkCnt = blockSize % 0x4U;
-
-#else
-
-    /* Run the below code for Cortex-M0 */
-
-    /* Initialize blkCnt with number of samples */
-    blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_DSP) */
+    uint32_t blkCnt = blockSize; /* loop counters */
 
     while (blkCnt > 0U)
     {
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s16.c
new file mode 100644
index 0000000..54f5403
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s16.c
@@ -0,0 +1,372 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * SPDX-FileCopyrightText: Copyright 2020-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_vec_mat_mult_t_s16
+ * Description:  s16 vector by matrix (transposed) multiplication
+ *
+ * $Date:        11 August 2022
+ * $Revision:    V.2.1.0
+ *
+ * Target Processor:  Cortex-M
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+#define MAX_COL_COUNT (512)
+
+/**
+ * @ingroup groupSupport
+ */
+
+/**
+ * @addtogroup NNBasicMath
+ * @{
+ */
+
+/*
+ * s16 vector(lhs) by matrix (transposed) multiplication
+ *
+ * Refer header file for details.
+ *
+ */
+arm_cmsis_nn_status arm_nn_vec_mat_mult_t_s16(const q15_t *lhs,
+                                              const q7_t *rhs,
+                                              const q63_t *bias,
+                                              q15_t *dst,
+                                              const int32_t dst_multiplier,
+                                              const int32_t dst_shift,
+                                              const int32_t rhs_cols,
+                                              const int32_t rhs_rows,
+                                              const int32_t activation_min,
+                                              const int32_t activation_max)
+{
+#if defined(ARM_MATH_DSP)
+
+    int32_t rhs_cols_fast = rhs_cols;
+
+    if (rhs_cols > MAX_COL_COUNT)
+    {
+        rhs_cols_fast = MAX_COL_COUNT;
+    }
+
+#if defined(ARM_MATH_MVEI)
+    int32_t row_loop_cnt = rhs_rows / 4;
+    int32_t col_loop_cnt = (rhs_cols_fast + 7) / 8;
+
+    for (int32_t i_row_loop_count = 0; i_row_loop_count < row_loop_cnt; i_row_loop_count++)
+    {
+        int32_t col_cnt = rhs_cols_fast;
+
+        const int16_t *lhs_ptr = lhs;
+        const int8_t *rhs_ptr_0 = rhs;
+        const int8_t *rhs_ptr_1 = rhs + rhs_cols;
+        const int8_t *rhs_ptr_2 = rhs + rhs_cols * 2;
+        const int8_t *rhs_ptr_3 = rhs + rhs_cols * 3;
+
+        int32_t result_0 = 0;
+        int32_t result_1 = 0;
+        int32_t result_2 = 0;
+        int32_t result_3 = 0;
+
+        for (int i_col_loop_cnt = 0; i_col_loop_cnt < col_loop_cnt; i_col_loop_cnt++)
+        {
+            mve_pred16_t pred = vctp16q(col_cnt);
+            col_cnt -= 8;
+
+            int16x8_t lhs_input = vldrhq_z_s16(lhs_ptr, pred);
+
+            int16x8_t rhs_input_0 = vldrbq_z_s16(rhs_ptr_0, pred);
+            int16x8_t rhs_input_1 = vldrbq_z_s16(rhs_ptr_1, pred);
+            int16x8_t rhs_input_2 = vldrbq_z_s16(rhs_ptr_2, pred);
+            int16x8_t rhs_input_3 = vldrbq_z_s16(rhs_ptr_3, pred);
+
+            result_0 = vmladavaq_s16(result_0, lhs_input, rhs_input_0);
+            result_1 = vmladavaq_s16(result_1, lhs_input, rhs_input_1);
+            result_2 = vmladavaq_s16(result_2, lhs_input, rhs_input_2);
+            result_3 = vmladavaq_s16(result_3, lhs_input, rhs_input_3);
+
+            lhs_ptr += 8;
+
+            rhs_ptr_0 += 8;
+            rhs_ptr_1 += 8;
+            rhs_ptr_2 += 8;
+            rhs_ptr_3 += 8;
+        }
+
+        int64_t result_64_0 = result_0;
+        int64_t result_64_1 = result_1;
+        int64_t result_64_2 = result_2;
+        int64_t result_64_3 = result_3;
+
+        if (rhs_cols > MAX_COL_COUNT)
+        {
+            for (int i_rhs_cols = MAX_COL_COUNT; i_rhs_cols < rhs_cols; i_rhs_cols++)
+            {
+                const int16_t lhs_temp = *lhs_ptr++;
+
+                result_64_0 += *rhs_ptr_0++ * lhs_temp;
+                result_64_1 += *rhs_ptr_1++ * lhs_temp;
+                result_64_2 += *rhs_ptr_2++ * lhs_temp;
+                result_64_3 += *rhs_ptr_3++ * lhs_temp;
+            }
+        }
+
+        if (bias)
+        {
+            result_64_0 += *bias++;
+            result_64_1 += *bias++;
+            result_64_2 += *bias++;
+            result_64_3 += *bias++;
+        }
+
+        int32_t tmp;
+        tmp = arm_nn_requantize_s64(result_64_0, dst_multiplier, dst_shift);
+        tmp = MAX(tmp, activation_min);
+        tmp = MIN(tmp, activation_max);
+        *dst++ = (q15_t)tmp;
+
+        tmp = 0;
+        tmp = arm_nn_requantize_s64(result_64_1, dst_multiplier, dst_shift);
+        tmp = MAX(tmp, activation_min);
+        tmp = MIN(tmp, activation_max);
+        *dst++ = (q15_t)tmp;
+
+        tmp = 0;
+        tmp = arm_nn_requantize_s64(result_64_2, dst_multiplier, dst_shift);
+        tmp = MAX(tmp, activation_min);
+        tmp = MIN(tmp, activation_max);
+        *dst++ = (q15_t)tmp;
+
+        tmp = 0;
+        tmp = arm_nn_requantize_s64(result_64_3, dst_multiplier, dst_shift);
+        tmp = MAX(tmp, activation_min);
+        tmp = MIN(tmp, activation_max);
+        *dst++ = (q15_t)tmp;
+
+        rhs += 4 * rhs_cols;
+    }
+
+    for (int8_t rows_left = rhs_rows & 0x3; rows_left > 0; rows_left--)
+    {
+        int32_t result = 0;
+
+        col_loop_cnt = (rhs_cols_fast + 7) / 8;
+
+        const int16_t *lhs_ptr = lhs;
+        const int8_t *rhs_ptr = rhs;
+
+        int32_t col_cnt = (int32_t)rhs_cols_fast;
+
+        for (int i_col_loop_cnt = 0; i_col_loop_cnt < col_loop_cnt; i_col_loop_cnt++)
+        {
+            mve_pred16_t pred = vctp16q(col_cnt);
+            col_cnt -= 8;
+
+            int16x8_t lhs_input = vldrhq_z_s16(lhs_ptr, pred);
+            int16x8_t rhs_input = vldrbq_z_s16(rhs_ptr, pred);
+
+            result = vmladavaq_p_s16(result, lhs_input, rhs_input, pred);
+
+            lhs_ptr += 8;
+            rhs_ptr += 8;
+        }
+
+        int64_t result_64 = result;
+
+        if (bias)
+        {
+            result_64 += *bias++;
+        }
+
+        if (rhs_cols > MAX_COL_COUNT)
+        {
+            for (int i_rhs_cols = MAX_COL_COUNT; i_rhs_cols < rhs_cols; i_rhs_cols++)
+            {
+                const int16_t lhs_temp = *lhs_ptr++;
+
+                result_64 += *rhs_ptr++ * lhs_temp;
+            }
+        }
+
+        int32_t tmp = 0;
+        tmp = arm_nn_requantize_s64(result_64, dst_multiplier, dst_shift);
+        tmp = MAX(tmp, activation_min);
+        tmp = MIN(tmp, activation_max);
+        *dst++ = (q15_t)tmp;
+
+        rhs += rhs_cols;
+    }
+
+#else // ARM_MATH_MVEI
+
+    const int32_t row_loop_cnt = rhs_rows / 2;
+
+    for (int32_t i = 0; i < row_loop_cnt; i++)
+    {
+
+        q63_t acc_64_0 = 0;
+        q63_t acc_64_1 = 0;
+        int32_t acc_0 = 0;
+        int32_t acc_1 = 0;
+
+        const int32_t col_loop_cnt = rhs_cols_fast / 4;
+
+        const int16_t *lhs_vec = lhs;
+        const int8_t *rhs_0 = rhs;
+        const int8_t *rhs_1 = rhs + rhs_cols;
+        rhs += 2 * rhs_cols;
+
+        for (int j = col_loop_cnt; j != 0; j--)
+        {
+            int32_t ker_0, ker_1, vec_part_0, vec_part_1;
+
+            vec_part_0 = arm_nn_read_q15x2_ia(&lhs_vec);
+            vec_part_1 = arm_nn_read_q15x2_ia(&lhs_vec);
+
+            rhs_0 = read_and_pad(rhs_0, &ker_0, &ker_1);
+
+            acc_0 = __SMLAD(ker_0, vec_part_0, acc_0);
+            acc_0 = __SMLAD(ker_1, vec_part_1, acc_0);
+
+            rhs_1 = read_and_pad(rhs_1, &ker_0, &ker_1);
+
+            acc_1 = __SMLAD(ker_0, vec_part_0, acc_1);
+            acc_1 = __SMLAD(ker_1, vec_part_1, acc_1);
+        }
+
+        acc_64_0 += acc_0;
+        acc_64_1 += acc_1;
+
+        for (int k = col_loop_cnt * 4; k < rhs_cols; k++)
+        {
+            const int32_t lhs_temp = (*lhs_vec);
+            lhs_vec++;
+            acc_64_0 += lhs_temp * (*rhs_0);
+            rhs_0++;
+            acc_64_1 += lhs_temp * (*rhs_1);
+            rhs_1++;
+        }
+
+        if (bias)
+        {
+            acc_64_0 += *bias++;
+            acc_64_1 += *bias++;
+        }
+        q31_t tmp;
+
+        tmp = arm_nn_requantize_s64(acc_64_0, dst_multiplier, dst_shift);
+        tmp = MAX(tmp, activation_min);
+        tmp = MIN(tmp, activation_max);
+        *dst++ = (q15_t)tmp;
+
+        tmp = arm_nn_requantize_s64(acc_64_1, dst_multiplier, dst_shift);
+        tmp = MAX(tmp, activation_min);
+        tmp = MIN(tmp, activation_max);
+        *dst++ = (q15_t)tmp;
+    }
+
+    if (rhs_rows & 0x1)
+    {
+        q63_t acc_64_0 = 0;
+        int32_t acc_0 = 0;
+        const int32_t col_loop_cnt = rhs_cols_fast / 4;
+
+        const int16_t *lhs_vec = lhs;
+        const int8_t *rhs_0 = rhs;
+
+        for (int i = col_loop_cnt; i != 0; i--)
+        {
+            int32_t ker_0, ker_1, vec;
+            rhs_0 = read_and_pad(rhs_0, &ker_0, &ker_1);
+
+            vec = arm_nn_read_q15x2_ia(&lhs_vec);
+            acc_0 = __SMLAD(ker_0, vec, acc_0);
+
+            vec = arm_nn_read_q15x2_ia(&lhs_vec);
+            acc_0 = __SMLAD(ker_1, vec, acc_0);
+        }
+
+        acc_64_0 += acc_0;
+
+        for (int j = col_loop_cnt * 4; j < rhs_cols; j++)
+        {
+            const int32_t lhs_temp = (*lhs_vec);
+            lhs_vec++;
+            acc_64_0 += lhs_temp * (*rhs_0);
+            rhs_0++;
+        }
+
+        if (bias)
+        {
+            acc_64_0 += *bias++;
+        }
+        q31_t tmp;
+        tmp = arm_nn_requantize_s64(acc_64_0, dst_multiplier, dst_shift);
+        tmp = MAX(tmp, activation_min);
+        tmp = MIN(tmp, activation_max);
+        *dst++ = (q15_t)tmp;
+    }
+
+#endif // ARM_MATH_MVEI
+#else  // ARM_MATH_DSP
+    for (int i_row_loop_cnt = 0; i_row_loop_cnt < rhs_rows; i_row_loop_cnt++)
+    {
+        const q15_t *lhs_ptr = lhs;
+        const q7_t *rhs_ptr_0 = &rhs[0];
+
+        q63_t result = 0;
+
+        for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx)
+        {
+            const q63_t rhs_value0 = (int8_t)*rhs_ptr_0;
+            const q63_t lhs_value = *lhs_ptr;
+
+            result += lhs_value * rhs_value0;
+
+            ++rhs_ptr_0;
+            ++lhs_ptr;
+        }
+
+        if (bias)
+        {
+            result += *bias++;
+        }
+        // Quantize down
+        result = arm_nn_requantize_s64(result, dst_multiplier, dst_shift);
+
+        // Clamp the result
+        result = ((result) > (activation_min) ? (result) : (activation_min));
+        result = ((result) < (activation_max) ? (result) : (activation_max));
+
+        *dst++ = (q15_t)result;
+        rhs += rhs_cols;
+    }
+#endif // ARM_MATH_DSP
+
+    return ARM_CMSIS_NN_SUCCESS;
+}
+
+/**
+ * @} end of NNBasicMath group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c
index e3e3a33..7663bb6 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2020-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_nn_vec_mat_mult_t_s8
  * Description:  s8 vector by matrix (transposed) multiplication
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.5.1
+ * $Date:        16 Aug 2022
+ * $Revision:    V.4.0.2
  *
  * Target Processor:  Cortex-M
  *
@@ -47,38 +47,25 @@
  * Refer header file for details.
  *
  */
-arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
-                                    const q7_t *rhs,
-                                    const q31_t *bias,
-                                    q7_t *dst,
-                                    const int32_t lhs_offset,
-                                    const int32_t rhs_offset,
-                                    const int32_t dst_offset,
-                                    const int32_t dst_multiplier,
-                                    const int32_t dst_shift,
-                                    const int32_t rhs_cols,
-                                    const int32_t rhs_rows,
-                                    const int32_t activation_min,
-                                    const int32_t activation_max)
+arm_cmsis_nn_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
+                                             const q7_t *rhs,
+                                             const q31_t *bias,
+                                             q7_t *dst,
+                                             const int32_t lhs_offset,
+                                             const int32_t rhs_offset,
+                                             const int32_t dst_offset,
+                                             const int32_t dst_multiplier,
+                                             const int32_t dst_shift,
+                                             const int32_t rhs_cols,
+                                             const int32_t rhs_rows,
+                                             const int32_t activation_min,
+                                             const int32_t activation_max,
+                                             const int32_t address_offset)
 {
+    (void)rhs_offset;
 #if defined(ARM_MATH_MVEI)
-    int32_t row_loop_cnt = rhs_rows / 3;
-
-    int32_t lhs_sum = 0;
-    {
-        const int32_t col_loop_cnt = (rhs_cols + 15) / 16;
-        uint32_t col_cnt = (uint32_t)rhs_cols;
-        const int8_t *lhs_vec = lhs;
-        for (int i = 0; i < col_loop_cnt; i++)
-        {
-            mve_pred16_t p = vctp8q(col_cnt);
-            col_cnt -= 16;
-
-            const int8x16_t input = vldrbq_z_s8(lhs_vec, p);
-            lhs_sum = vaddvaq_p_s8(lhs_sum, input, p);
-            lhs_vec += 16;
-        }
-    }
+    const int32_t row_loop_cnt = rhs_rows / 3;
+    const uint32x4_t address_offset_array = {0, address_offset, address_offset * 2, address_offset * 3};
 
     for (int i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++)
     {
@@ -130,21 +117,26 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
         if (bias)
         {
             int32x4_t b = vldrwq_z_s32(bias, p);
-            acc = vaddq_m_s32(vuninitializedq_s32(), acc, b, p);
+            acc = vaddq_x_s32(acc, b, p);
             bias += 3;
         }
         const int32x4_t rhs_sum = {rhs_sum_0, rhs_sum_1, rhs_sum_2, 0};
-
         acc += vdupq_n_s32(lhs_offset) * rhs_sum;
-        acc += vdupq_n_s32(rhs_offset * lhs_sum);
-        acc += vdupq_n_s32(lhs_offset * rhs_offset * rhs_cols);
 
         acc = arm_requantize_mve(acc, dst_multiplier, dst_shift);
         acc = vaddq_s32(acc, vdupq_n_s32(dst_offset));
         acc = vmaxq_s32(acc, vdupq_n_s32(activation_min));
         acc = vminq_s32(acc, vdupq_n_s32(activation_max));
-        vstrbq_p_s32(dst, acc, p);
-        dst += 3;
+
+        if (address_offset > 1L)
+        {
+            vstrbq_scatter_offset_s32(dst, address_offset_array, acc);
+        }
+        else
+        {
+            vstrbq_p_s32(dst, acc, p);
+        }
+        dst += 3 * address_offset;
     }
 
     const int loop_cnt = rhs_rows % 3;
@@ -177,8 +169,7 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
             acc_0 += *bias;
             bias++;
         }
-        const int32_t offsets =
-            (rhs_sum_0 * lhs_offset) + (lhs_sum * rhs_offset) + (lhs_offset * rhs_offset * rhs_cols);
+        const int32_t offsets = rhs_sum_0 * lhs_offset;
         acc_0 += offsets;
         acc_0 = arm_nn_requantize(acc_0, dst_multiplier, dst_shift);
         acc_0 += dst_offset;
@@ -186,279 +177,189 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
         // Clamp the result
         acc_0 = MAX(acc_0, activation_min);
         *dst = MIN(acc_0, activation_max);
-        dst++;
+        dst += address_offset;
     }
 
 #elif defined(ARM_MATH_DSP)
-    const int32_t off0 = rhs_cols - 4;
-    const int16_t lhs_offset_s16 = lhs_offset;
-    const int16_t rhs_offset_s16 = rhs_offset;
-
+    const int32_t row_loop_cnt = rhs_rows / 2;
+    const int16_t lhs_offset_s16 = (int16_t)lhs_offset;
     const uint32_t lhs_offset_s16x2 = __PKHBT(lhs_offset_s16, lhs_offset_s16, 16);
-    const uint32_t rhs_offset_s16x2 = __PKHBT(rhs_offset_s16, rhs_offset_s16, 16);
 
-    for (int32_t rhs_rows_idx = 0; rhs_rows_idx <= (rhs_rows - 2); rhs_rows_idx += 2)
+    for (int32_t i = 0; i < row_loop_cnt; i++)
     {
-        const q7_t *lhs_ptr = &lhs[0];
-        const q7_t *rhs_ptr = &rhs[0];
-
-        q31_t res00 = 0;
-        q31_t res01 = 0;
+        int32_t acc_0 = 0;
+        int32_t acc_1 = 0;
         if (bias)
         {
-            res00 = *bias++;
-            res01 = *bias++;
+            acc_0 = *bias++;
+            acc_1 = *bias++;
         }
 
-        int32_t rhs_cols_idx = 0;
+        const int32_t col_loop_cnt = rhs_cols / 4;
 
-        q31_t val0, val1, val2, val3, val4, val5;
-        for (; rhs_cols_idx <= (rhs_cols - 16); rhs_cols_idx += 16)
-        {
-            // Read 4 x int8 values from the RHS matrix
-            val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr);
-            val2 = __SXTAB16(rhs_offset_s16x2, val0);
-            // Read 4 x int8 values from the LHS vector
-            val1 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr);
-            val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8));
-            val3 = __SXTAB16(lhs_offset_s16x2, val1);
-            // Read 4 x int8 values from the RHS matrix
-            val4 = arm_nn_read_q7x4((const q7_t *)rhs_ptr + off0);
-            val1 = __SXTAB16(lhs_offset_s16x2, __ROR(val1, 8));
-
-            // Perform the accumulations
-            res00 = __SMLAD(val3, val2, res00);
-            val5 = __SXTAB16(rhs_offset_s16x2, val4);
-            res00 = __SMLAD(val1, val0, res00);
-            val4 = __SXTAB16(rhs_offset_s16x2, __ROR(val4, 8));
-            // Read 4 x int8 values from the RHS matrix
-            val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr);
-            res01 = __SMLAD(val3, val5, res01);
-            res01 = __SMLAD(val1, val4, res01);
-
-            val2 = __SXTAB16(rhs_offset_s16x2, val0);
-            // Read 4 x int8 values from the LHS vector
-            val1 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr);
-            val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8));
-            val3 = __SXTAB16(lhs_offset_s16x2, val1);
-            // Read 4 x int8 values from the RHS matrix
-            val4 = arm_nn_read_q7x4((const q7_t *)rhs_ptr + off0);
-            val1 = __SXTAB16(lhs_offset_s16x2, __ROR(val1, 8));
-
-            // Perform the accumulations
-            res00 = __SMLAD(val3, val2, res00);
-            val5 = __SXTAB16(rhs_offset_s16x2, val4);
-            res00 = __SMLAD(val1, val0, res00);
-            val4 = __SXTAB16(rhs_offset_s16x2, __ROR(val4, 8));
-            // Read 4 x int8 values from the RHS matrix
-            val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr);
-            res01 = __SMLAD(val3, val5, res01);
-            res01 = __SMLAD(val1, val4, res01);
-
-            val2 = __SXTAB16(rhs_offset_s16x2, val0);
-            // Read 4 x int8 values from the LHS vector
-            val1 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr);
-            val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8));
-            val3 = __SXTAB16(lhs_offset_s16x2, val1);
-            // Read 4 x int8 values from the RHS matrix
-            val4 = arm_nn_read_q7x4((const q7_t *)rhs_ptr + off0);
-            val1 = __SXTAB16(lhs_offset_s16x2, __ROR(val1, 8));
-
-            // Perform the accumulations
-            res00 = __SMLAD(val3, val2, res00);
-            val5 = __SXTAB16(rhs_offset_s16x2, val4);
-            res00 = __SMLAD(val1, val0, res00);
-            val4 = __SXTAB16(rhs_offset_s16x2, __ROR(val4, 8));
-            // Read 4 x int8 values from the RHS matrix
-            val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr);
-            res01 = __SMLAD(val3, val5, res01);
-            res01 = __SMLAD(val1, val4, res01);
-
-            val2 = __SXTAB16(rhs_offset_s16x2, val0);
-            // Read 4 x int8 values from the LHS vector
-            val1 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr);
-            val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8));
-            val3 = __SXTAB16(lhs_offset_s16x2, val1);
-            // Read 4 x int8 values from the RHS matrix
-            val4 = arm_nn_read_q7x4((const q7_t *)rhs_ptr + off0);
-            val1 = __SXTAB16(lhs_offset_s16x2, __ROR(val1, 8));
-
-            // Perform the accumulations
-            res00 = __SMLAD(val3, val2, res00);
-            val5 = __SXTAB16(rhs_offset_s16x2, val4);
-            res00 = __SMLAD(val1, val0, res00);
-            val4 = __SXTAB16(rhs_offset_s16x2, __ROR(val4, 8));
-            res01 = __SMLAD(val3, val5, res01);
-            res01 = __SMLAD(val1, val4, res01);
-        }
+        const int8_t *lhs_vec = lhs;
+        const int8_t *rhs_0 = rhs;
+        const int8_t *rhs_1 = rhs + rhs_cols;
+        rhs += 2 * rhs_cols;
 
-        for (; rhs_cols_idx < rhs_cols; ++rhs_cols_idx)
+        for (int j = col_loop_cnt; j != 0; j--)
         {
-            q31_t rhs_value0 = rhs_ptr[0] + rhs_offset;
-            q31_t rhs_value1 = rhs_ptr[rhs_cols] + rhs_offset;
-            q31_t lhs_value = lhs_ptr[0] + lhs_offset;
+            int32_t vec_0 = arm_nn_read_q7x4_ia(&lhs_vec);
+            int32_t vec_1 = __SXTAB16_RORn(lhs_offset_s16x2, (uint32_t)vec_0, 8);
 
-            res00 += lhs_value * rhs_value0;
-            res01 += lhs_value * rhs_value1;
+            vec_0 = __SXTAB16(lhs_offset_s16x2, vec_0);
 
-            ++rhs_ptr;
-            ++lhs_ptr;
-        }
+            int32_t ker_0 = arm_nn_read_q7x4_ia(&rhs_0);
+            int32_t ker_1 = __SXTB16_RORn((uint32_t)ker_0, 8);
+            ker_0 = __SXTB16(ker_0);
 
-        // Quantize down
-        res00 = arm_nn_requantize(res00, dst_multiplier, dst_shift);
-        res01 = arm_nn_requantize(res01, dst_multiplier, dst_shift);
+            acc_0 = __SMLAD(ker_1, vec_1, acc_0);
+            acc_0 = __SMLAD(ker_0, vec_0, acc_0);
 
-        // Add offset
-        res00 += dst_offset;
-        res01 += dst_offset;
+            ker_0 = arm_nn_read_q7x4_ia(&rhs_1);
+            ker_1 = __SXTB16_RORn((uint32_t)ker_0, 8);
+            ker_0 = __SXTB16(ker_0);
 
-        // Clamp the result
-        res00 = MAX(res00, activation_min);
-        res00 = MIN(res00, activation_max);
-        res01 = MAX(res01, activation_min);
-        res01 = MIN(res01, activation_max);
+            acc_1 = __SMLAD(ker_1, vec_1, acc_1);
+            acc_1 = __SMLAD(ker_0, vec_0, acc_1);
+        }
 
-        *dst++ = (q7_t)res00;
-        *dst++ = (q7_t)res01;
+        for (int k = col_loop_cnt * 4; k < rhs_cols; k++)
+        {
+            const int32_t lhs_temp = (*lhs_vec + lhs_offset);
+            lhs_vec++;
+            acc_0 += lhs_temp * (*rhs_0);
+            rhs_0++;
+            acc_1 += lhs_temp * (*rhs_1);
+            rhs_1++;
+        }
 
-        rhs += 2 * rhs_cols;
+        acc_0 = arm_nn_requantize(acc_0, dst_multiplier, dst_shift);
+        acc_1 = arm_nn_requantize(acc_1, dst_multiplier, dst_shift);
+
+        // Add offset
+        acc_0 += dst_offset;
+        acc_1 += dst_offset;
+        // Clamp the result
+        acc_0 = MAX(acc_0, activation_min);
+        acc_0 = MIN(acc_0, activation_max);
+        acc_1 = MAX(acc_1, activation_min);
+        acc_1 = MIN(acc_1, activation_max);
+        *dst = (int8_t)acc_0;
+        *(dst + address_offset) = (int8_t)acc_1;
+        dst += 2 * address_offset;
     }
 
-    if (rhs_rows % 2)
+    if (rhs_rows & 0x1)
     {
-        const q7_t *lhs_ptr = &lhs[0];
-        const q7_t *rhs_ptr = &rhs[0];
-
-        q31_t res00 = 0;
+        int32_t acc_0 = 0;
         if (bias)
         {
-            res00 = *bias++;
+            acc_0 = *bias++;
         }
+        const int32_t col_loop_cnt = rhs_cols / 4;
 
-        int32_t rhs_cols_idx = 0;
+        const int8_t *lhs_vec = lhs;
+        const int8_t *rhs_0 = rhs;
 
-        q31_t val0, val1, val2, val3;
-        for (; rhs_cols_idx <= (rhs_cols - 16); rhs_cols_idx += 16)
+        for (int i = col_loop_cnt; i != 0; i--)
         {
-            val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr);
-            val1 = __SXTAB16(rhs_offset_s16x2, val0);
-            val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr);
-            val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8));
-            val3 = __SXTAB16(lhs_offset_s16x2, val2);
-            val2 = __SXTAB16(lhs_offset_s16x2, __ROR(val2, 8));
-
-            // Partial accumulations
-            res00 = __SMLAD(val3, val1, res00);
-            res00 = __SMLAD(val2, val0, res00);
-
-            val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr);
-            val1 = __SXTAB16(rhs_offset_s16x2, val0);
-            val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr);
-            val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8));
-            val3 = __SXTAB16(lhs_offset_s16x2, val2);
-            val2 = __SXTAB16(lhs_offset_s16x2, __ROR(val2, 8));
-
-            // Partial accumulations
-            res00 = __SMLAD(val3, val1, res00);
-            res00 = __SMLAD(val2, val0, res00);
-
-            val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr);
-            val1 = __SXTAB16(rhs_offset_s16x2, val0);
-            val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr);
-            val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8));
-            val3 = __SXTAB16(lhs_offset_s16x2, val2);
-            val2 = __SXTAB16(lhs_offset_s16x2, __ROR(val2, 8));
-
-            // Partial accumulations
-            res00 = __SMLAD(val3, val1, res00);
-            res00 = __SMLAD(val2, val0, res00);
-
-            val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr);
-            val1 = __SXTAB16(rhs_offset_s16x2, val0);
-            val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr);
-            val0 = __SXTAB16(rhs_offset_s16x2, __ROR(val0, 8));
-            val3 = __SXTAB16(lhs_offset_s16x2, val2);
-            val2 = __SXTAB16(lhs_offset_s16x2, __ROR(val2, 8));
-
-            // Partial accumulations
-            res00 = __SMLAD(val3, val1, res00);
-            res00 = __SMLAD(val2, val0, res00);
-        }
+            int32_t vec_0 = arm_nn_read_q7x4_ia(&lhs_vec);
+            int32_t vec_1 = __SXTAB16_RORn(lhs_offset_s16x2, (uint32_t)vec_0, 8);
+            vec_0 = __SXTAB16(lhs_offset_s16x2, vec_0);
 
-        for (; rhs_cols_idx < rhs_cols; ++rhs_cols_idx)
-        {
-            q31_t rhs_value0 = rhs_ptr[0] + rhs_offset;
-            q31_t lhs_value = lhs_ptr[0] + lhs_offset;
+            int32_t ker_0 = arm_nn_read_q7x4_ia(&rhs_0);
+            int32_t ker_1 = __SXTB16_RORn((uint32_t)ker_0, 8);
+            ker_0 = __SXTB16(ker_0);
 
-            res00 += lhs_value * rhs_value0;
+            acc_0 = __SMLAD(ker_1, vec_1, acc_0);
+            acc_0 = __SMLAD(ker_0, vec_0, acc_0);
+        }
 
-            ++rhs_ptr;
-            ++lhs_ptr;
+        for (int j = col_loop_cnt * 4; j < rhs_cols; j++)
+        {
+            const int32_t lhs_temp = (*lhs_vec + lhs_offset);
+            lhs_vec++;
+            acc_0 += lhs_temp * (*rhs_0);
+            rhs_0++;
         }
 
-        // Quantize down
-        res00 = arm_nn_requantize(res00, dst_multiplier, dst_shift);
+        acc_0 = arm_nn_requantize(acc_0, dst_multiplier, dst_shift);
 
         // Add offset
-        res00 += dst_offset;
-
+        acc_0 += dst_offset;
         // Clamp the result
-        res00 = MAX(res00, activation_min);
-        res00 = MIN(res00, activation_max);
-
-        *dst = (q7_t)res00;
+        acc_0 = MAX(acc_0, activation_min);
+        acc_0 = MIN(acc_0, activation_max);
+        *dst = (int8_t)acc_0;
+        dst += address_offset;
     }
 
 #else
 
-    for (int32_t rhs_rows_idx = 0; rhs_rows_idx <= (rhs_rows - 2); rhs_rows_idx += 2)
+    const int32_t row_loop_cnt = rhs_rows / 3;
+
+    for (int i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++)
     {
-        const q7_t *lhs_ptr = &lhs[0];
-        const q7_t *rhs_ptr = &rhs[0];
+        const q7_t *lhs_ptr = lhs;
+        const q7_t *rhs_ptr_0 = &rhs[0];
+        const q7_t *rhs_ptr_1 = &rhs[rhs_cols];
+        const q7_t *rhs_ptr_2 = &rhs[rhs_cols * 2];
 
         q31_t res00 = 0;
         q31_t res01 = 0;
+        q31_t res02 = 0;
         if (bias)
         {
             res00 = *bias++;
             res01 = *bias++;
+            res02 = *bias++;
         }
-
         for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx)
         {
-            q31_t rhs_value0 = rhs_ptr[0] + rhs_offset;
-            q31_t rhs_value1 = rhs_ptr[rhs_cols] + rhs_offset;
-            q31_t lhs_value = lhs_ptr[0] + lhs_offset;
+            const q31_t rhs_value0 = (int8_t)*rhs_ptr_0;
+            const q31_t rhs_value1 = (int8_t)*rhs_ptr_1;
+            const q31_t rhs_value2 = (int8_t)*rhs_ptr_2;
+            const q31_t lhs_value = (int8_t)*lhs_ptr + lhs_offset;
 
             res00 += lhs_value * rhs_value0;
             res01 += lhs_value * rhs_value1;
+            res02 += lhs_value * rhs_value2;
 
-            ++rhs_ptr;
+            ++rhs_ptr_0;
+            ++rhs_ptr_1;
+            ++rhs_ptr_2;
             ++lhs_ptr;
         }
-
         // Quantize down
         res00 = arm_nn_requantize(res00, dst_multiplier, dst_shift);
         res01 = arm_nn_requantize(res01, dst_multiplier, dst_shift);
+        res02 = arm_nn_requantize(res02, dst_multiplier, dst_shift);
 
         // Add offset
         res00 += dst_offset;
         res01 += dst_offset;
+        res02 += dst_offset;
 
         // Clamp the result
         res00 = MAX(res00, activation_min);
         res00 = MIN(res00, activation_max);
         res01 = MAX(res01, activation_min);
         res01 = MIN(res01, activation_max);
+        res02 = MAX(res02, activation_min);
+        res02 = MIN(res02, activation_max);
 
-        *dst++ = (q7_t)res00;
-        *dst++ = (q7_t)res01;
+        *dst = (q7_t)res00;
+        *(dst + address_offset) = (q7_t)res01;
+        *(dst + 2 * address_offset) = (q7_t)res02;
+        dst += 3 * address_offset;
 
-        rhs += 2 * rhs_cols;
+        rhs += 3 * rhs_cols;
     }
 
-    if (rhs_rows % 2)
+    const int loop_cnt = rhs_rows % 3;
+
+    for (int i_loop_cnt = 0; i_loop_cnt < loop_cnt; i_loop_cnt++)
     {
         const q7_t *lhs_ptr = &lhs[0];
         const q7_t *rhs_ptr = &rhs[0];
@@ -471,8 +372,8 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
 
         for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx)
         {
-            q31_t rhs_value0 = rhs_ptr[0] + rhs_offset;
-            q31_t lhs_value = lhs_ptr[0] + lhs_offset;
+            q31_t rhs_value0 = (int8_t)rhs_ptr[0];
+            q31_t lhs_value = (int8_t)lhs_ptr[0] + lhs_offset;
 
             res00 += lhs_value * rhs_value0;
 
@@ -490,11 +391,12 @@ arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
         res00 = MAX(res00, activation_min);
         res00 = MIN(res00, activation_max);
 
-        *dst = (q7_t)res00;
+        *dst = (int8_t)res00;
+        dst += address_offset;
+        rhs += rhs_cols;
     }
 #endif
-
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c
new file mode 100644
index 0000000..293edb2
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c
@@ -0,0 +1,345 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * Copyright (C) 2021-2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_vec_mat_mult_t_svdf_s8
+ * Description:  s8 vector by matrix (transposed) multiplication with
+ *               s16 output. Targetted at SVDF operator.
+ *
+ * $Date:        19 April 2022
+ * $Revision:    V.2.0.0
+ *
+ * Target Processor:  Cortex-M
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+/**
+ * @ingroup groupSupport
+ */
+
+/**
+ * @addtogroup NNBasicMath
+ * @{
+ */
+
+/*
+ * s8 vector(lhs) by matrix (transposed) multiplication
+ *
+ * Refer header file for details.
+ *
+ */
+arm_cmsis_nn_status arm_nn_vec_mat_mult_t_svdf_s8(const q7_t *lhs,
+                                                  const q7_t *rhs,
+                                                  q15_t *dst,
+                                                  const int32_t lhs_offset,
+                                                  const int32_t rhs_offset,
+                                                  const int32_t dst_offset,
+                                                  const int32_t dst_multiplier,
+                                                  const int32_t dst_shift,
+                                                  const int32_t rhs_cols,
+                                                  const int32_t rhs_rows,
+                                                  const int32_t activation_min,
+                                                  const int32_t activation_max)
+{
+    (void)rhs_offset;
+    if (rhs_cols < 0 || (NN_Q31_MAX - rhs_cols) < 16 || dst_offset < 0)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+
+    (void)rhs_offset;
+#if defined(ARM_MATH_MVEI)
+    int32_t row_loop_cnt = rhs_rows / 3;
+
+    for (int i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++)
+    {
+        int32_t acc_0 = 0;
+        int32_t acc_1 = 0;
+        int32_t acc_2 = 0;
+
+        const int32_t col_loop_cnt = (rhs_cols + 15) / 16;
+
+        const int8_t *lhs_vec = lhs;
+        const int8_t *rhs_0 = rhs;
+        const int8_t *rhs_1 = rhs + rhs_cols;
+        const int8_t *rhs_2 = rhs + 2 * rhs_cols;
+
+        int32_t rhs_sum_0 = 0;
+        int32_t rhs_sum_1 = 0;
+        int32_t rhs_sum_2 = 0;
+
+        uint32_t col_cnt = (uint32_t)rhs_cols;
+
+        for (int i = 0; i < col_loop_cnt; i++)
+        {
+            mve_pred16_t p = vctp8q(col_cnt);
+            col_cnt -= 16;
+
+            const int8x16_t input = vldrbq_z_s8(lhs_vec, p);
+
+            const int8x16_t ker_0 = vldrbq_z_s8(rhs_0, p);
+            rhs_sum_0 = vaddvaq_p_s8(rhs_sum_0, ker_0, p);
+            acc_0 = vmladavaq_p_s8(acc_0, ker_0, input, p);
+
+            const int8x16_t ker_1 = vldrbq_z_s8(rhs_1, p);
+            rhs_sum_1 = vaddvaq_p_s8(rhs_sum_1, ker_1, p);
+            acc_1 = vmladavaq_p_s8(acc_1, ker_1, input, p);
+
+            const int8x16_t ker_2 = vldrbq_z_s8(rhs_2, p);
+            rhs_sum_2 = vaddvaq_p_s8(rhs_sum_2, ker_2, p);
+            acc_2 = vmladavaq_p_s8(acc_2, ker_2, input, p);
+
+            lhs_vec += 16;
+            rhs_0 += 16;
+            rhs_1 += 16;
+            rhs_2 += 16;
+        }
+        rhs += 3 * rhs_cols;
+
+        int32x4_t acc = {acc_0, acc_1, acc_2, 0};
+        const int32x4_t rhs_sum = {rhs_sum_0, rhs_sum_1, rhs_sum_2, 0};
+        acc += vdupq_n_s32(lhs_offset) * rhs_sum;
+
+        acc = arm_requantize_mve(acc, dst_multiplier, dst_shift);
+        acc = vmaxq_s32(acc, vdupq_n_s32(activation_min));
+        acc = vminq_s32(acc, vdupq_n_s32(activation_max));
+        *(dst) = (int16_t)acc[0];
+        *(dst + dst_offset) = (int16_t)acc[1];
+        *(dst + 2 * dst_offset) = (int16_t)acc[2];
+        dst += 3 * dst_offset;
+    }
+
+    const int loop_cnt = rhs_rows % 3;
+    for (int i_row_loop_cnt = 0; i_row_loop_cnt < loop_cnt; i_row_loop_cnt++)
+    {
+        int32_t acc_0 = 0;
+        const int32_t col_loop_cnt = (rhs_cols + 15) / 16;
+        const int8_t *lhs_vec = lhs;
+        const int8_t *rhs_0 = rhs;
+        int32_t rhs_sum_0 = 0;
+        uint32_t col_cnt = (uint32_t)rhs_cols;
+
+        for (int i = 0; i < col_loop_cnt; i++)
+        {
+            mve_pred16_t p = vctp8q(col_cnt);
+            col_cnt -= 16;
+            const int8x16_t input = vldrbq_z_s8(lhs_vec, p);
+
+            const int8x16_t ker_0 = vldrbq_z_s8(rhs_0, p);
+            rhs_sum_0 = vaddvaq_p_s8(rhs_sum_0, ker_0, p);
+            acc_0 = vmladavaq_p_s8(acc_0, ker_0, input, p);
+
+            lhs_vec += 16;
+            rhs_0 += 16;
+        }
+        rhs += rhs_cols;
+
+        const int32_t offsets = rhs_sum_0 * lhs_offset;
+        acc_0 = __QADD(acc_0, offsets);
+        acc_0 = arm_nn_requantize(acc_0, dst_multiplier, dst_shift);
+
+        // Clamp the result
+        acc_0 = MAX(acc_0, activation_min);
+        *dst = (q15_t)MIN(acc_0, activation_max);
+        dst += dst_offset;
+    }
+
+#elif defined(ARM_MATH_DSP)
+    int32_t row_loop_cnt = rhs_rows / 2;
+
+    const int16_t lhs_offset_s16 = lhs_offset;
+    const int16_t rhs_offset_s16 = rhs_offset;
+
+    const uint32_t lhs_offset_s16x2 = __PKHBT(lhs_offset_s16, lhs_offset_s16, 16);
+    const uint32_t rhs_offset_s16x2 = __PKHBT(rhs_offset_s16, rhs_offset_s16, 16);
+    for (int32_t i = 0; i < row_loop_cnt; i++)
+    {
+        int32_t acc_0 = 0;
+        int32_t acc_1 = 0;
+
+        const int32_t col_loop_cnt = rhs_cols / 4;
+        const int8_t *lhs_vec = lhs;
+        const int8_t *rhs_0 = rhs;
+        const int8_t *rhs_1 = rhs + rhs_cols;
+        rhs += 2 * rhs_cols;
+        for (int j = col_loop_cnt; j != 0; j--)
+        {
+            int32_t vec_0 = arm_nn_read_q7x4_ia(&lhs_vec);
+            int32_t vec_1 = __SXTAB16_RORn(lhs_offset_s16x2, (uint32_t)vec_0, 8);
+            vec_0 = __SXTAB16(lhs_offset_s16x2, vec_0);
+            int32_t ker_0 = arm_nn_read_q7x4_ia(&rhs_0);
+            int32_t ker_1 = __SXTAB16_RORn(rhs_offset_s16x2, (uint32_t)ker_0, 8);
+            ker_0 = __SXTAB16(rhs_offset_s16x2, ker_0);
+            acc_0 = __SMLAD(ker_1, vec_1, acc_0);
+            acc_0 = __SMLAD(ker_0, vec_0, acc_0);
+            ker_0 = arm_nn_read_q7x4_ia(&rhs_1);
+            ker_1 = __SXTAB16_RORn(rhs_offset_s16x2, (uint32_t)ker_0, 8);
+            ker_0 = __SXTAB16(rhs_offset_s16x2, ker_0);
+            acc_1 = __SMLAD(ker_1, vec_1, acc_1);
+            acc_1 = __SMLAD(ker_0, vec_0, acc_1);
+        }
+        for (int k = col_loop_cnt * 4; k < rhs_cols; k++)
+        {
+            const int32_t lhs_temp = (*lhs_vec + lhs_offset);
+            lhs_vec++;
+            acc_0 += lhs_temp * (*rhs_0 + rhs_offset);
+            rhs_0++;
+            acc_1 += lhs_temp * (*rhs_1 + rhs_offset);
+            rhs_1++;
+        }
+        acc_0 = arm_nn_requantize(acc_0, dst_multiplier, dst_shift);
+        acc_1 = arm_nn_requantize(acc_1, dst_multiplier, dst_shift);
+
+        // Clamp the result
+        acc_0 = MAX(acc_0, activation_min);
+        acc_0 = MIN(acc_0, activation_max);
+        acc_1 = MAX(acc_1, activation_min);
+        acc_1 = MIN(acc_1, activation_max);
+        *dst = (q15_t)acc_0;
+        *(dst + dst_offset) = (q15_t)acc_1;
+        dst += 2 * dst_offset;
+    }
+    if (rhs_rows & 0x1)
+    {
+        int32_t acc_0 = 0;
+        const int32_t col_loop_cnt = rhs_cols / 4;
+        const int8_t *lhs_vec = lhs;
+        const int8_t *rhs_0 = rhs;
+        for (int i = col_loop_cnt; i != 0; i--)
+        {
+            int32_t vec_0 = arm_nn_read_q7x4_ia(&lhs_vec);
+            int32_t vec_1 = __SXTAB16(lhs_offset_s16x2, __ROR((uint32_t)vec_0, 8));
+            vec_0 = __SXTAB16(lhs_offset_s16x2, vec_0);
+            int32_t ker_0 = arm_nn_read_q7x4_ia(&rhs_0);
+            int32_t ker_1 = __SXTAB16(rhs_offset_s16x2, __ROR((uint32_t)ker_0, 8));
+            ker_0 = __SXTAB16(rhs_offset_s16x2, ker_0);
+            acc_0 = __SMLAD(ker_1, vec_1, acc_0);
+            acc_0 = __SMLAD(ker_0, vec_0, acc_0);
+        }
+        for (int j = col_loop_cnt * 4; j < rhs_cols; j++)
+        {
+            const int32_t lhs_temp = (*lhs_vec + lhs_offset);
+            lhs_vec++;
+            acc_0 += lhs_temp * (*rhs_0 + rhs_offset);
+            rhs_0++;
+        }
+        acc_0 = arm_nn_requantize(acc_0, dst_multiplier, dst_shift);
+
+        // Clamp the result
+        acc_0 = MAX(acc_0, activation_min);
+        acc_0 = MIN(acc_0, activation_max);
+        *dst = (q15_t)acc_0;
+        dst += dst_offset;
+    }
+
+#else
+
+    int32_t row_loop_cnt = rhs_rows / 3;
+
+    for (int i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++)
+    {
+        const q7_t *lhs_ptr = lhs;
+        const q7_t *rhs_ptr_0 = &rhs[0];
+        const q7_t *rhs_ptr_1 = &rhs[rhs_cols];
+        const q7_t *rhs_ptr_2 = &rhs[rhs_cols * 2];
+
+        q31_t res00 = 0;
+        q31_t res01 = 0;
+        q31_t res02 = 0;
+        for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx)
+        {
+            const q31_t rhs_value0 = (int8_t)*rhs_ptr_0;
+            const q31_t rhs_value1 = (int8_t)*rhs_ptr_1;
+            const q31_t rhs_value2 = (int8_t)*rhs_ptr_2;
+            const q31_t lhs_value = (int8_t)*lhs_ptr + lhs_offset;
+
+            res00 += lhs_value * rhs_value0;
+            res01 += lhs_value * rhs_value1;
+            res02 += lhs_value * rhs_value2;
+
+            ++rhs_ptr_0;
+            ++rhs_ptr_1;
+            ++rhs_ptr_2;
+            ++lhs_ptr;
+        }
+        // Quantize down
+        res00 = arm_nn_requantize(res00, dst_multiplier, dst_shift);
+        res01 = arm_nn_requantize(res01, dst_multiplier, dst_shift);
+        res02 = arm_nn_requantize(res02, dst_multiplier, dst_shift);
+
+        // Clamp the result
+        res00 = MAX(res00, activation_min);
+        res00 = MIN(res00, activation_max);
+        res01 = MAX(res01, activation_min);
+        res01 = MIN(res01, activation_max);
+        res02 = MAX(res02, activation_min);
+        res02 = MIN(res02, activation_max);
+
+        *dst = (q15_t)res00;
+        *(dst + dst_offset) = (q15_t)res01;
+        *(dst + 2 * dst_offset) = (q15_t)res02;
+        dst += 3 * dst_offset;
+        rhs += 3 * rhs_cols;
+    }
+
+    const int loop_cnt = rhs_rows % 3;
+
+    for (int i_loop_cnt = 0; i_loop_cnt < loop_cnt; i_loop_cnt++)
+    {
+        const q7_t *lhs_ptr = &lhs[0];
+        const q7_t *rhs_ptr = &rhs[0];
+
+        q31_t res00 = 0;
+
+        for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx)
+        {
+            q31_t rhs_value0 = (int8_t)rhs_ptr[0] + rhs_offset;
+            q31_t lhs_value = (int8_t)lhs_ptr[0] + lhs_offset;
+
+            res00 += lhs_value * rhs_value0;
+
+            ++rhs_ptr;
+            ++lhs_ptr;
+        }
+
+        // Quantize down
+        res00 = arm_nn_requantize(res00, dst_multiplier, dst_shift);
+
+        // Clamp the result
+        res00 = MAX(res00, activation_min);
+        res00 = MIN(res00, activation_max);
+
+        *dst = (q15_t)res00;
+        dst += dst_offset;
+        rhs += rhs_cols;
+    }
+#endif
+
+    return ARM_CMSIS_NN_SUCCESS;
+}
+
+/**
+ * @} end of NNBasicMath group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c
index 5478451..110a93b 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_q7_to_q15_no_shift.c
  * Description:  Converts the elements of the Q7 vector to Q15 vector without left-shift
  *
- * $Date:        May 29, 2020
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.3
  *
  * Target Processor:  Cortex-M cores
  *
@@ -41,20 +41,9 @@
  * @{
  */
 
-/**
+/*
  * @brief Converts the elements of the Q7 vector to Q15 vector without left-shift
- * @param[in]       *pSrc points to the Q7 input vector
- * @param[out]      *pDst points to the Q15 output vector
- * @param[in]       blockSize length of the input vector
- *
- * \par Description:
- *
- * The equation used for the conversion process is:
- *
- * <pre>
- * 	pDst[n] = (q15_t) pSrc[n];   0 <= n < blockSize.
- * </pre>
- *
+ * Refer function header for details
  */
 
 void arm_q7_to_q15_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize)
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c
index 5f58691..c7ee063 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_q7_to_q15_reordered_no_shift.c
  * Description:  Converts the elements of the Q7 vector to reordered Q15 vector without left-shift
  *
- * $Date:        May 29, 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.1.2
  *
  * Target Processor:  Cortex-M cores
  *
@@ -41,13 +41,10 @@
  * @{
  */
 
-/**
- * @brief Converts the elements of the Q7 vector to reordered Q15 vector without left-shift
- * @param[in]       *pSrc points to the Q7 input vector
- * @param[out]      *pDst points to the Q15 output vector
- * @param[in]       blockSize length of the input vector
+/*
+ * Converts the elements of the Q7 vector to reordered Q15 vector without left-shift
  *
- * @details
+ * Refer to header for details
  *
  * This function does the q7 to q15 expansion with re-ordering
  *
@@ -81,7 +78,7 @@ void arm_q7_to_q15_reordered_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t bl
     const q7_t *pIn = pSrc; /* Src pointer */
     uint32_t blkCnt;        /* loop counter */
 
-#ifndef ARM_MATH_CM0_FAMILY
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     q31_t in;
     q31_t in1, in2;
 
@@ -105,11 +102,11 @@ void arm_q7_to_q15_reordered_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t bl
         in2 = __SXTB16(in);
 
 #ifndef ARM_MATH_BIG_ENDIAN
-        *__SIMD32(pDst)++ = in2;
-        *__SIMD32(pDst)++ = in1;
+        arm_nn_write_q7x4_ia((q7_t **)&pDst, in2);
+        arm_nn_write_q7x4_ia((q7_t **)&pDst, in1);
 #else
-        *__SIMD32(pDst)++ = in1;
-        *__SIMD32(pDst)++ = in2;
+        arm_nn_write_q7x4_ia((q7_t **)&pDst, in1);
+        arm_nn_write_q7x4_ia((q7_t **)&pDst, in2);
 #endif
 
         /* Decrement the loop counter */
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c
index d547c42..572c7bc 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -24,8 +24,8 @@
  * Description:  Converts the elements of the Q7 vector to a reordered Q15 vector with an added offset. The re-ordering
  *               is a signature of sign extension intrinsic(DSP extension).
  *
- * $Date:        May 29, 2020
- * $Revision:    V.2.0.3
+ * $Date:        4 Aug 2022
+ * $Revision:    V.2.0.4
  *
  * Target Processor:  Cortex-M cores
  *
@@ -42,10 +42,10 @@
  * @{
  */
 
-/**
- * @brief Converts the elements of the Q7 vector to a reordered Q15 vector with an added offset.
+/*
+ * Converts the elements of the Q7 vector to a reordered Q15 vector with an added offset.
  *
- * @note  Refer header file for details.
+ * Refer header file for details.
  *
  */
 
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s16.c
new file mode 100644
index 0000000..be5b7f0
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s16.c
@@ -0,0 +1,311 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_avgpool_s16.c
+ * Description:  Pooling function implementations
+ *
+ * $Date:        27 July 2022
+ * $Revision:    V.2.2.0
+ *
+ * Target Processor:  Cortex-M CPUs
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
+
+static void scale_q31_to_q15_and_clamp(const q31_t *buffer,
+                                       q15_t *target,
+                                       int32_t length,
+                                       const int32_t count,
+                                       const int act_min,
+                                       const int act_max)
+{
+    const int half_count = count / 2;
+
+    for (int i = 0; i < length; i++)
+    {
+        int32_t sum = buffer[i] > 0 ? (buffer[i] + half_count) : (buffer[i] - half_count);
+        sum = sum / count;
+        sum = MAX(sum, act_min);
+        sum = MIN(sum, act_max);
+
+        target[i] = (q15_t)sum;
+    }
+}
+#endif
+
+/**
+ *  @ingroup groupNN
+
+ */
+
+/**
+ * @addtogroup Pooling
+ * @{
+ */
+
+/*
+ * s16 average pooling function
+ *
+ * Refer to header file for details.
+ *
+ */
+arm_cmsis_nn_status arm_avgpool_s16(const cmsis_nn_context *ctx,
+                                    const cmsis_nn_pool_params *pool_params,
+                                    const cmsis_nn_dims *input_dims,
+                                    const q15_t *src,
+                                    const cmsis_nn_dims *filter_dims,
+                                    const cmsis_nn_dims *output_dims,
+                                    q15_t *dst)
+{
+    const int32_t input_y = input_dims->h;
+    const int32_t input_x = input_dims->w;
+    const int32_t output_y = output_dims->h;
+    const int32_t output_x = output_dims->w;
+    const int32_t stride_y = pool_params->stride.h;
+    const int32_t stride_x = pool_params->stride.w;
+    const int32_t kernel_y = filter_dims->h;
+    const int32_t kernel_x = filter_dims->w;
+    const int32_t pad_y = pool_params->padding.h;
+    const int32_t pad_x = pool_params->padding.w;
+    const int32_t act_min = pool_params->activation.min;
+    const int32_t act_max = pool_params->activation.max;
+    const int32_t ch_src = input_dims->c;
+#if defined(ARM_MATH_MVEI)
+    (void)ctx;
+    for (int i_y = 0; i_y < output_y; i_y++)
+    {
+        for (int i_x = 0; i_x < output_x; i_x++)
+        {
+            const int32_t k_y_start = MAX(0, i_y * stride_y - pad_y);
+            const int32_t k_y_end = MIN(i_y * stride_y - pad_y + kernel_y, input_y);
+
+            const int32_t k_x_start = MAX(0, i_x * stride_x - pad_x);
+            const int32_t k_x_end = MIN(i_x * stride_x - pad_x + kernel_x, input_x);
+
+            const int16_t *src_base = src;
+            int16_t *out = &dst[ch_src * (i_x + i_y * output_x)];
+
+            int32_t ch_count = (ch_src + 7) / 8;
+            int32_t channels = ch_src;
+
+            while (ch_count > 0)
+            {
+                int32_t count = 0;
+
+                int32x4_t sum_1 = vdupq_n_s32(0);
+                int32x4_t sum_2 = vdupq_n_s32(0);
+                // Load store tail predicate
+                const mve_pred16_t ld_st_p = vctp16q(channels);
+                channels -= 8;
+
+                for (int k_y = k_y_start; k_y < k_y_end; k_y++)
+                {
+                    for (int k_x = k_x_start; k_x < k_x_end; k_x++)
+                    {
+                        const int16_t *src_inner = src_base + (ch_src * (k_x + k_y * input_x));
+                        const int16x8_t temp = vldrhq_z_s16(src_inner, ld_st_p);
+
+                        const int32x4_t temp_lo = vmovlbq_s16(temp);
+                        const int32x4_t temp_hi = vmovltq_s16(temp);
+
+                        sum_1 = vaddq_s32(sum_1, temp_lo);
+                        sum_2 = vaddq_s32(sum_2, temp_hi);
+
+                        count++;
+                    }
+                }
+
+                // Prevent static code issue DIVIDE_BY_ZERO.
+                if (count == 0)
+                {
+                    return ARM_CMSIS_NN_ARG_ERROR;
+                }
+
+                // Perform the following operation
+                // sum = sum > 0 ? (sum + count / 2) / count : (sum - count / 2) / count;
+                const int32_t half_count = count / 2;
+                // Predicate for 'sum > 0' operation
+                mve_pred16_t p = vcmpgtq_n_s32(sum_1, 0);
+                sum_1 = vaddq_m_n_s32(sum_1, sum_1, half_count, p);
+                sum_1 = vsubq_m_n_s32(sum_1, sum_1, half_count, ~p);
+
+                p = vcmpgtq_n_s32(sum_2, 0);
+                sum_2 = vaddq_m_n_s32(sum_2, sum_2, half_count, p);
+                sum_2 = vsubq_m_n_s32(sum_2, sum_2, half_count, ~p);
+
+                for (int i = 0; i < 4; i++)
+                {
+                    sum_1[i] = sum_1[i] / count;
+                    sum_2[i] = sum_2[i] / count;
+                }
+
+                sum_1 = vmaxq_s32(sum_1, vdupq_n_s32(act_min));
+                sum_1 = vminq_s32(sum_1, vdupq_n_s32(act_max));
+
+                sum_2 = vmaxq_s32(sum_2, vdupq_n_s32(act_min));
+                sum_2 = vminq_s32(sum_2, vdupq_n_s32(act_max));
+
+                int16x8_t temp = vdupq_n_s16(0);
+                temp = vmovnbq_s32(temp, sum_1);
+                temp = vmovntq_s32(temp, sum_2);
+
+                vstrhq_p_s16(out, temp, ld_st_p);
+
+                out += 8;
+                ch_count--;
+                src_base += 8;
+            }
+        }
+    }
+#elif defined(ARM_MATH_DSP)
+
+    q31_t *buffer = (q31_t *)ctx->buf;
+
+    if (buffer == NULL)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+
+    /* Run the following code for CPU's with DSP extension
+     */
+    for (int i_y = 0, idx_y = -pad_y; i_y < output_y; idx_y += stride_y, i_y++)
+    {
+        for (int i_x = 0, idx_x = -pad_x; i_x < output_x; idx_x += stride_x, i_x++)
+        {
+            /* Condition for kernel start dimension:
+                      (base_idx_<x,y> + kernel_<x,y>_start) >= 0 */
+            const int32_t kernel_y_start = MAX(0, -idx_y);
+            const int32_t kernel_x_start = MAX(0, -idx_x);
+
+            /* Condition for kernel end dimension:
+                   (base_idx_<x,y> + kernel_<x,y>_end) < dim_src_<width,height> */
+            const int32_t kernel_y_end = MIN(kernel_y, input_y - idx_y);
+            const int32_t kernel_x_end = MIN(kernel_x, input_x - idx_x);
+
+            int count = 0;
+
+            for (int k_y = kernel_y_start; k_y < kernel_y_end; k_y++)
+            {
+                for (int k_x = kernel_x_start; k_x < kernel_x_end; k_x++)
+                {
+                    const q15_t *start = src + ch_src * (k_x + idx_x + (k_y + idx_y) * input_x);
+
+                    if (count == 0)
+                    {
+                        for (int i = 0; i < ch_src; i++)
+                        {
+                            buffer[i] = start[i];
+                        }
+                    }
+                    else
+                    {
+                        for (int i = 0; i < ch_src; i++)
+                        {
+                            buffer[i] = __QADD(start[i], buffer[i]);
+                        }
+                    }
+                    count++;
+                }
+            }
+
+            // Prevent static code issue DIVIDE_BY_ZERO.
+            if (count == 0)
+            {
+                return ARM_CMSIS_NN_ARG_ERROR;
+            }
+
+            scale_q31_to_q15_and_clamp(buffer, dst, ch_src, count, act_min, act_max);
+            dst += ch_src;
+        }
+    }
+
+#else
+    /* Reference C code adapted from CMSIS-NN arm_avgpool_s8.c.
+     */
+
+    (void)ctx;
+
+    for (int i_y = 0, base_idx_y = -pad_y; i_y < output_y; base_idx_y += stride_y, i_y++)
+    {
+        for (int i_x = 0, base_idx_x = -pad_x; i_x < output_x; base_idx_x += stride_x, i_x++)
+        {
+            /* Condition for kernel start dimension: (base_idx_<x,y> + kernel_<x,y>_start) >= 0 */
+            const int32_t ker_y_start = MAX(0, -base_idx_y);
+            const int32_t ker_x_start = MAX(0, -base_idx_x);
+
+            /* Condition for kernel end dimension: (base_idx_<x,y> + kernel_<x,y>_end) < dim_src_<width,height> */
+            const int32_t kernel_y_end = MIN(kernel_y, input_y - base_idx_y);
+            const int32_t kernel_x_end = MIN(kernel_x, input_x - base_idx_x);
+
+            for (int i_ch_in = 0; i_ch_in < ch_src; i_ch_in++)
+            {
+                int sum = 0;
+                int count = 0;
+
+                for (int k_y = ker_y_start; k_y < kernel_y_end; k_y++)
+                {
+                    for (int k_x = ker_x_start; k_x < kernel_x_end; k_x++)
+                    {
+                        sum += src[i_ch_in + ch_src * (k_x + base_idx_x + (k_y + base_idx_y) * input_x)];
+                        count++;
+                    }
+                }
+
+                // Prevent static code issue DIVIDE_BY_ZERO.
+                if (count == 0)
+                {
+                    return ARM_CMSIS_NN_ARG_ERROR;
+                }
+
+                sum = sum > 0 ? (sum + count / 2) / count : (sum - count / 2) / count;
+                sum = MAX(sum, act_min);
+                sum = MIN(sum, act_max);
+
+                dst[i_ch_in + ch_src * (i_x + i_y * output_x)] = sum;
+            }
+        }
+    }
+#endif
+
+    return ARM_CMSIS_NN_SUCCESS;
+}
+
+int32_t arm_avgpool_s16_get_buffer_size(const int output_x, const int ch_src)
+{
+    (void)output_x;
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
+    return (ch_src * (int32_t)sizeof(int32_t));
+#else
+    (void)ch_src;
+#endif
+    return 0;
+}
+
+/**
+ * @} end of Pooling group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c
index 0b41118..05c284f 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_avgpool_s8.c
  * Description:  Pooling function implementations
  *
- * $Date:        09. October 2020
- * $Revision:    V.2.0.3
+ * $Date:        7 July 2022
+ * $Revision:    V.3.0.2
  *
  * Target Processor:  Cortex-M CPUs
  *
@@ -34,7 +34,6 @@
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
 
 #if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
-
 static void scale_q31_to_q7_and_clamp(const q31_t *buffer,
                                       q7_t *target,
                                       int32_t length,
@@ -43,6 +42,7 @@ static void scale_q31_to_q7_and_clamp(const q31_t *buffer,
                                       const int act_max)
 {
     const int half_count = count / 2;
+
     for (int i = 0; i < length; i++)
     {
         int32_t sum = buffer[i] > 0 ? (buffer[i] + half_count) : (buffer[i] - half_count);
@@ -73,13 +73,13 @@ static void scale_q31_to_q7_and_clamp(const q31_t *buffer,
 
 #if defined(ARM_MATH_MVEI)
 
-arm_status arm_avgpool_s8(const cmsis_nn_context *ctx,
-                          const cmsis_nn_pool_params *pool_params,
-                          const cmsis_nn_dims *input_dims,
-                          const q7_t *src,
-                          const cmsis_nn_dims *filter_dims,
-                          const cmsis_nn_dims *output_dims,
-                          q7_t *dst)
+arm_cmsis_nn_status arm_avgpool_s8(const cmsis_nn_context *ctx,
+                                   const cmsis_nn_pool_params *pool_params,
+                                   const cmsis_nn_dims *input_dims,
+                                   const q7_t *src,
+                                   const cmsis_nn_dims *filter_dims,
+                                   const cmsis_nn_dims *output_dims,
+                                   q7_t *dst)
 {
     (void)ctx;
     const int32_t input_y = input_dims->h;
@@ -96,153 +96,136 @@ arm_status arm_avgpool_s8(const cmsis_nn_context *ctx,
     const int32_t act_max = pool_params->activation.max;
     const int32_t ch_src = input_dims->c;
 
-    int32_t i_x, i_y;
-    int32_t k_x, k_y;
-
-    for (i_y = 0; i_y < output_y; i_y++)
+    for (int i_y = 0; i_y < output_y; i_y++)
     {
-        for (i_x = 0; i_x < output_x; i_x++)
+        for (int i_x = 0; i_x < output_x; i_x++)
         {
+            const int32_t k_y_start = MAX(0, i_y * stride_y - pad_y);
+            const int32_t k_y_end = MIN(i_y * stride_y - pad_y + kernel_y, input_y);
 
-            int32_t k_y_start, k_y_end;
-            int32_t k_x_start, k_x_end;
-            int32_t chCnt;
-            const int8_t *pTmp, *pTmpInner;
-            int8_t *pDst;
+            const int32_t k_x_start = MAX(0, i_x * stride_x - pad_x);
+            const int32_t k_x_end = MIN(i_x * stride_x - pad_x + kernel_x, input_x);
 
-            k_y_start = MAX(0, i_y * stride_y - pad_y);
-            k_y_end = MIN(i_y * stride_y - pad_y + kernel_y, input_y);
+            const int8_t *src_base = src;
+            int8_t *out = &dst[ch_src * (i_x + i_y * output_x)];
 
-            k_x_start = MAX(0, i_x * stride_x - pad_x);
-            k_x_end = MIN(i_x * stride_x - pad_x + kernel_x, input_x);
+            int32_t ch_count = (ch_src + 15) / 16;
+            int32_t channels = ch_src;
 
-            pTmp = src;
-            pDst = &dst[ch_src * (i_x + i_y * output_x)];
-
-            chCnt = ch_src >> 4;
-            while (chCnt > 0)
+            while (ch_count > 0)
             {
-                int32x4_t sumV1, sumV2, sumV3, sumV4;
-
-                int8x16_t tempV;
-                int16x8_t tempVLO, tempVHI;
-                int32x4_t tempVLOLO, tempVLOHI, tempVHILO, tempVHIHI;
+                int8x16_t temp;
+                int16x8_t temp_lo, temp_hi;
+                int32x4_t temp_lo_lo, temp_lo_hi, temp_hi_lo, temp_hi_hi;
                 int32_t count = 0;
 
-                sumV1 = vdupq_n_s32(0);
-                sumV2 = vdupq_n_s32(0);
-                sumV3 = vdupq_n_s32(0);
-                sumV4 = vdupq_n_s32(0);
+                int32x4_t sum_1 = vdupq_n_s32(0);
+                int32x4_t sum_2 = vdupq_n_s32(0);
+                int32x4_t sum_3 = vdupq_n_s32(0);
+                int32x4_t sum_4 = vdupq_n_s32(0);
+                // Load store tail predicate
+                const mve_pred16_t ld_st_p = vctp8q(channels);
+                channels -= 16;
 
-                for (k_y = k_y_start; k_y < k_y_end; k_y++)
+                for (int k_y = k_y_start; k_y < k_y_end; k_y++)
                 {
-                    for (k_x = k_x_start; k_x < k_x_end; k_x++)
+                    for (int k_x = k_x_start; k_x < k_x_end; k_x++)
                     {
-                        pTmpInner = pTmp + (ch_src * (k_x + k_y * input_x));
-                        tempV = vldrbq_s8(pTmpInner);
+                        const int8_t *src_inner = src_base + (ch_src * (k_x + k_y * input_x));
+                        temp = vldrbq_z_s8(src_inner, ld_st_p);
 
-                        tempVLO = vmovlbq_s8(tempV);
-                        tempVHI = vmovltq_s8(tempV);
+                        temp_lo = vmovlbq_s8(temp);
+                        temp_hi = vmovltq_s8(temp);
 
-                        tempVLOLO = vmovlbq_s16(tempVLO);
-                        tempVLOHI = vmovltq_s16(tempVLO);
+                        temp_lo_lo = vmovlbq_s16(temp_lo);
+                        temp_lo_hi = vmovltq_s16(temp_lo);
 
-                        tempVHILO = vmovlbq_s16(tempVHI);
-                        tempVHIHI = vmovltq_s16(tempVHI);
+                        temp_hi_lo = vmovlbq_s16(temp_hi);
+                        temp_hi_hi = vmovltq_s16(temp_hi);
 
-                        sumV1 = vaddq_s32(sumV1, tempVLOLO);
-                        sumV2 = vaddq_s32(sumV2, tempVLOHI);
-                        sumV3 = vaddq_s32(sumV3, tempVHILO);
-                        sumV4 = vaddq_s32(sumV4, tempVHIHI);
+                        sum_1 = vaddq_s32(sum_1, temp_lo_lo);
+                        sum_2 = vaddq_s32(sum_2, temp_lo_hi);
+                        sum_3 = vaddq_s32(sum_3, temp_hi_lo);
+                        sum_4 = vaddq_s32(sum_4, temp_hi_hi);
 
                         count++;
                     }
                 }
 
-                sumV1[0] = sumV1[0] > 0 ? (sumV1[0] + count / 2) / count : (sumV1[0] - count / 2) / count;
-                sumV1[1] = sumV1[1] > 0 ? (sumV1[1] + count / 2) / count : (sumV1[1] - count / 2) / count;
-                sumV1[2] = sumV1[2] > 0 ? (sumV1[2] + count / 2) / count : (sumV1[2] - count / 2) / count;
-                sumV1[3] = sumV1[3] > 0 ? (sumV1[3] + count / 2) / count : (sumV1[3] - count / 2) / count;
+                // Prevent static code issue DIVIDE_BY_ZERO.
+                if (count == 0)
+                {
+                    return ARM_CMSIS_NN_ARG_ERROR;
+                }
 
-                sumV2[0] = sumV2[0] > 0 ? (sumV2[0] + count / 2) / count : (sumV2[0] - count / 2) / count;
-                sumV2[1] = sumV2[1] > 0 ? (sumV2[1] + count / 2) / count : (sumV2[1] - count / 2) / count;
-                sumV2[2] = sumV2[2] > 0 ? (sumV2[2] + count / 2) / count : (sumV2[2] - count / 2) / count;
-                sumV2[3] = sumV2[3] > 0 ? (sumV2[3] + count / 2) / count : (sumV2[3] - count / 2) / count;
+                // Perform the following operation
+                // sum = sum > 0 ? (sum + count / 2) / count : (sum - count / 2) / count;
+                const int32_t half_count = count / 2;
+                // Predicate for 'sum > 0' operation
+                mve_pred16_t p = vcmpgtq_n_s32(sum_1, 0);
+                sum_1 = vaddq_m_n_s32(sum_1, sum_1, half_count, p);
+                sum_1 = vsubq_m_n_s32(sum_1, sum_1, half_count, ~p);
 
-                sumV3[0] = sumV3[0] > 0 ? (sumV3[0] + count / 2) / count : (sumV3[0] - count / 2) / count;
-                sumV3[1] = sumV3[1] > 0 ? (sumV3[1] + count / 2) / count : (sumV3[1] - count / 2) / count;
-                sumV3[2] = sumV3[2] > 0 ? (sumV3[2] + count / 2) / count : (sumV3[2] - count / 2) / count;
-                sumV3[3] = sumV3[3] > 0 ? (sumV3[3] + count / 2) / count : (sumV3[3] - count / 2) / count;
+                p = vcmpgtq_n_s32(sum_2, 0);
+                sum_2 = vaddq_m_n_s32(sum_2, sum_2, half_count, p);
+                sum_2 = vsubq_m_n_s32(sum_2, sum_2, half_count, ~p);
 
-                sumV4[0] = sumV4[0] > 0 ? (sumV4[0] + count / 2) / count : (sumV4[0] - count / 2) / count;
-                sumV4[1] = sumV4[1] > 0 ? (sumV4[1] + count / 2) / count : (sumV4[1] - count / 2) / count;
-                sumV4[2] = sumV4[2] > 0 ? (sumV4[2] + count / 2) / count : (sumV4[2] - count / 2) / count;
-                sumV4[3] = sumV4[3] > 0 ? (sumV4[3] + count / 2) / count : (sumV4[3] - count / 2) / count;
+                p = vcmpgtq_n_s32(sum_3, 0);
+                sum_3 = vaddq_m_n_s32(sum_3, sum_3, half_count, p);
+                sum_3 = vsubq_m_n_s32(sum_3, sum_3, half_count, ~p);
 
-                sumV1 = vmaxq_s32(sumV1, vdupq_n_s32(act_min));
-                sumV1 = vminq_s32(sumV1, vdupq_n_s32(act_max));
+                p = vcmpgtq_n_s32(sum_4, 0);
+                sum_4 = vaddq_m_n_s32(sum_4, sum_4, half_count, p);
+                sum_4 = vsubq_m_n_s32(sum_4, sum_4, half_count, ~p);
 
-                sumV2 = vmaxq_s32(sumV2, vdupq_n_s32(act_min));
-                sumV2 = vminq_s32(sumV2, vdupq_n_s32(act_max));
+                for (int i = 0; i < 4; i++)
+                {
+                    sum_1[i] = sum_1[i] / count;
+                    sum_2[i] = sum_2[i] / count;
+                    sum_3[i] = sum_3[i] / count;
+                    sum_4[i] = sum_4[i] / count;
+                }
 
-                sumV3 = vmaxq_s32(sumV3, vdupq_n_s32(act_min));
-                sumV3 = vminq_s32(sumV3, vdupq_n_s32(act_max));
+                sum_1 = vmaxq_s32(sum_1, vdupq_n_s32(act_min));
+                sum_1 = vminq_s32(sum_1, vdupq_n_s32(act_max));
 
-                sumV4 = vmaxq_s32(sumV4, vdupq_n_s32(act_min));
-                sumV4 = vminq_s32(sumV4, vdupq_n_s32(act_max));
+                sum_2 = vmaxq_s32(sum_2, vdupq_n_s32(act_min));
+                sum_2 = vminq_s32(sum_2, vdupq_n_s32(act_max));
 
-                tempVLO = vmovnbq_s32(tempVLO, sumV1);
-                tempVLO = vmovntq_s32(tempVLO, sumV2);
+                sum_3 = vmaxq_s32(sum_3, vdupq_n_s32(act_min));
+                sum_3 = vminq_s32(sum_3, vdupq_n_s32(act_max));
 
-                tempVHI = vmovnbq_s32(tempVHI, sumV3);
-                tempVHI = vmovntq_s32(tempVHI, sumV4);
+                sum_4 = vmaxq_s32(sum_4, vdupq_n_s32(act_min));
+                sum_4 = vminq_s32(sum_4, vdupq_n_s32(act_max));
 
-                tempV = vmovnbq_s16(tempV, tempVLO);
-                tempV = vmovntq_s16(tempV, tempVHI);
+                temp_lo = vmovnbq_s32(temp_lo, sum_1);
+                temp_lo = vmovntq_s32(temp_lo, sum_2);
 
-                vstrbq_s8(pDst, tempV);
-                pDst += 16;
+                temp_hi = vmovnbq_s32(temp_hi, sum_3);
+                temp_hi = vmovntq_s32(temp_hi, sum_4);
 
-                chCnt--;
-                pTmp += 16;
-            }
+                temp = vmovnbq_s16(temp, temp_lo);
+                temp = vmovntq_s16(temp, temp_hi);
 
-            chCnt = ch_src & 0xF;
-            while (chCnt > 0)
-            {
-                int32_t sum = 0;
-                int32_t count = 0;
+                vstrbq_p_s8(out, temp, ld_st_p);
+                out += 16;
 
-                for (k_y = k_y_start; k_y < k_y_end; k_y++)
-                {
-                    for (k_x = k_x_start; k_x < k_x_end; k_x++)
-                    {
-                        sum += pTmp[ch_src * (k_x + k_y * input_x)];
-                        count++;
-                    }
-                }
-                sum = sum > 0 ? (sum + count / 2) / count : (sum - count / 2) / count;
-                sum = MAX(sum, act_min);
-                sum = MIN(sum, act_max);
-
-                *pDst++ = sum;
-
-                chCnt--;
-                pTmp++;
+                ch_count--;
+                src_base += 16;
             }
         }
     }
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 #else
-arm_status arm_avgpool_s8(const cmsis_nn_context *ctx,
-                          const cmsis_nn_pool_params *pool_params,
-                          const cmsis_nn_dims *input_dims,
-                          const q7_t *src,
-                          const cmsis_nn_dims *filter_dims,
-                          const cmsis_nn_dims *output_dims,
-                          q7_t *dst)
+arm_cmsis_nn_status arm_avgpool_s8(const cmsis_nn_context *ctx,
+                                   const cmsis_nn_pool_params *pool_params,
+                                   const cmsis_nn_dims *input_dims,
+                                   const q7_t *src,
+                                   const cmsis_nn_dims *filter_dims,
+                                   const cmsis_nn_dims *output_dims,
+                                   q7_t *dst)
 {
     const int32_t input_y = input_dims->h;
     const int32_t input_x = input_dims->w;
@@ -257,6 +240,11 @@ arm_status arm_avgpool_s8(const cmsis_nn_context *ctx,
     const int32_t act_min = pool_params->activation.min;
     const int32_t act_max = pool_params->activation.max;
     const int32_t ch_src = input_dims->c;
+
+    if (ctx->buf == NULL && arm_avgpool_s8_get_buffer_size(output_dims->w, input_dims->c))
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
     q31_t *buffer = (q31_t *)ctx->buf;
 
 #if defined(ARM_MATH_DSP)
@@ -302,6 +290,13 @@ arm_status arm_avgpool_s8(const cmsis_nn_context *ctx,
                     count++;
                 }
             }
+
+            // Prevent static code issue DIVIDE_BY_ZERO.
+            if (count == 0)
+            {
+                return ARM_CMSIS_NN_ARG_ERROR;
+            }
+
             scale_q31_to_q7_and_clamp(buffer, dst, ch_src, count, act_min, act_max);
             dst += ch_src;
         }
@@ -311,20 +306,18 @@ arm_status arm_avgpool_s8(const cmsis_nn_context *ctx,
     /* Reference C code adapted from CMSIS-NN arm_avepool_q7_HWC.
      */
     (void)buffer;
-    int16_t i_ch_in, i_x, i_y;
-    int16_t k_x, k_y;
 
-    for (i_y = 0; i_y < output_y; i_y++)
+    for (int i_y = 0; i_y < output_y; i_y++)
     {
-        for (i_x = 0; i_x < output_x; i_x++)
+        for (int i_x = 0; i_x < output_x; i_x++)
         {
-            for (i_ch_in = 0; i_ch_in < ch_src; i_ch_in++)
+            for (int i_ch_in = 0; i_ch_in < ch_src; i_ch_in++)
             {
                 int sum = 0;
                 int count = 0;
-                for (k_y = i_y * stride_y - pad_y; k_y < i_y * stride_y - pad_y + kernel_y; k_y++)
+                for (int k_y = i_y * stride_y - pad_y; k_y < i_y * stride_y - pad_y + kernel_y; k_y++)
                 {
-                    for (k_x = i_x * stride_x - pad_x; k_x < i_x * stride_x - pad_x + kernel_x; k_x++)
+                    for (int k_x = i_x * stride_x - pad_x; k_x < i_x * stride_x - pad_x + kernel_x; k_x++)
                     {
                         if (k_y >= 0 && k_x >= 0 && k_y < input_y && k_x < input_x)
                         {
@@ -333,6 +326,13 @@ arm_status arm_avgpool_s8(const cmsis_nn_context *ctx,
                         }
                     }
                 }
+
+                // Prevent static code issue DIVIDE_BY_ZERO.
+                if (count == 0)
+                {
+                    return ARM_CMSIS_NN_ARG_ERROR;
+                }
+
                 sum = sum > 0 ? (sum + count / 2) / count : (sum - count / 2) / count;
                 sum = MAX(sum, act_min);
                 sum = MIN(sum, act_max);
@@ -343,7 +343,7 @@ arm_status arm_avgpool_s8(const cmsis_nn_context *ctx,
     }
 
 #endif
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 #endif /* ARM_MATH_MVEI */
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s16.c
new file mode 100644
index 0000000..0b39d5e
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s16.c
@@ -0,0 +1,216 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_max_pool_s16.c
+ * Description:  Pooling function implementations
+ *
+ * $Date:        16 August 2022
+ * $Revision:    V.2.1.1
+ *
+ * Target Processor:  Cortex-M CPUs
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+static void compare_and_replace_if_larger(int16_t *base, const int16_t *target, int32_t length)
+{
+#if defined(ARM_MATH_MVEI)
+    int32_t loop_count = (length + 7) / 8;
+    for (int i = 0; i < loop_count; i++)
+    {
+        mve_pred16_t p = vctp16q((uint32_t)length);
+        const int16x8_t op_1 = vldrhq_z_s16(base, p);
+        const int16x8_t op_2 = vldrhq_z_s16(target, p);
+        const int16x8_t max = vmaxq_s16(op_1, op_2);
+        vstrhq_p_s16(base, max, p);
+        base += 8;
+        target += 8;
+        length -= 8;
+    }
+#else
+    q15_t *dst = base;
+    const q15_t *src = target;
+    union arm_nnword ref_max;
+    union arm_nnword comp_max;
+    int32_t cnt = length >> 1;
+
+    while (cnt > 0l)
+    {
+        ref_max.word = arm_nn_read_q15x2(dst);
+        comp_max.word = arm_nn_read_q15x2_ia(&src);
+
+        if (comp_max.half_words[0] > ref_max.half_words[0])
+        {
+            ref_max.half_words[0] = comp_max.half_words[0];
+        }
+        if (comp_max.half_words[1] > ref_max.half_words[1])
+        {
+            ref_max.half_words[1] = comp_max.half_words[1];
+        }
+
+        arm_nn_write_q15x2_ia(&dst, ref_max.word);
+
+        cnt--;
+    }
+
+    if (length & 0x1)
+    {
+        if (*src > *dst)
+        {
+            *dst = *src;
+        }
+    }
+#endif
+}
+
+static void clamp_output(int16_t *source, int32_t length, const int16_t act_min, const int16_t act_max)
+{
+#if defined(ARM_MATH_MVEI)
+    const int16x8_t min = vdupq_n_s16((int16_t)act_min);
+    const int16x8_t max = vdupq_n_s16((int16_t)act_max);
+
+    int32_t loop_count = (length + 7) / 8;
+    for (int i = 0; i < loop_count; i++)
+    {
+        mve_pred16_t p = vctp16q((uint32_t)length);
+        length -= 8;
+        const int16x8_t src = vldrhq_z_s16(source, p);
+        int16x8_t res = vmaxq_x_s16(src, min, p);
+        res = vminq_x_s16(res, max, p);
+        vstrhq_p_s16(source, res, p);
+        source += 8;
+    }
+#else
+    union arm_nnword in;
+    int32_t cnt = length >> 1;
+
+    while (cnt > 0l)
+    {
+        in.word = arm_nn_read_q15x2(source);
+
+        in.half_words[0] = MAX(in.half_words[0], act_min);
+        in.half_words[0] = MIN(in.half_words[0], act_max);
+        in.half_words[1] = MAX(in.half_words[1], act_min);
+        in.half_words[1] = MIN(in.half_words[1], act_max);
+
+        arm_nn_write_q15x2_ia(&source, in.word);
+        cnt--;
+    }
+
+    if (length & 0x1)
+    {
+        int16_t comp = *source;
+        comp = MAX(comp, act_min);
+        comp = MIN(comp, act_max);
+        *source = comp;
+    }
+#endif
+}
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup Pooling
+ * @{
+ */
+
+/*
+ * Optimized s16 max pooling function
+ *
+ * Refer to header file for details.
+ *
+ */
+
+arm_cmsis_nn_status arm_max_pool_s16(const cmsis_nn_context *ctx,
+                                     const cmsis_nn_pool_params *pool_params,
+                                     const cmsis_nn_dims *input_dims,
+                                     const int16_t *src,
+                                     const cmsis_nn_dims *filter_dims,
+                                     const cmsis_nn_dims *output_dims,
+                                     int16_t *dst)
+{
+    const int32_t input_y = input_dims->h;
+    const int32_t input_x = input_dims->w;
+    const int32_t output_y = output_dims->h;
+    const int32_t output_x = output_dims->w;
+    const int32_t stride_y = pool_params->stride.h;
+    const int32_t stride_x = pool_params->stride.w;
+    const int32_t kernel_y = filter_dims->h;
+    const int32_t kernel_x = filter_dims->w;
+    const int32_t pad_y = pool_params->padding.h;
+    const int32_t pad_x = pool_params->padding.w;
+    const int16_t act_min = pool_params->activation.min;
+    const int16_t act_max = pool_params->activation.max;
+    const int32_t channel_in = input_dims->c;
+    (void)ctx;
+    int16_t *dst_base = dst;
+
+    for (int i_y = 0, base_idx_y = -pad_y; i_y < output_y; base_idx_y += stride_y, i_y++)
+    {
+        for (int i_x = 0, base_idx_x = -pad_x; i_x < output_x; base_idx_x += stride_x, i_x++)
+        {
+            /* Condition for kernel start dimension: (base_idx_<x,y> + kernel_<x,y>_start) >= 0 */
+            const int32_t ker_y_start = MAX(0, -base_idx_y);
+            const int32_t ker_x_start = MAX(0, -base_idx_x);
+
+            /* Condition for kernel end dimension: (base_idx_<x,y> + kernel_<x,y>_end) < dim_src_<width,height> */
+            const int32_t kernel_y_end = MIN(kernel_y, input_y - base_idx_y);
+            const int32_t kernel_x_end = MIN(kernel_x, input_x - base_idx_x);
+
+            int count = 0;
+
+            for (int k_y = ker_y_start; k_y < kernel_y_end; k_y++)
+            {
+                for (int k_x = ker_x_start; k_x < kernel_x_end; k_x++)
+                {
+                    const int16_t *start = src + channel_in * (k_x + base_idx_x + (k_y + base_idx_y) * input_x);
+
+                    if (count == 0)
+                    {
+                        memcpy(dst, start, channel_in * sizeof(int16_t));
+                        count++;
+                    }
+                    else
+                    {
+                        compare_and_replace_if_larger(dst, start, channel_in);
+                    }
+                }
+            }
+            /* 'count' is expected to be non-zero here. */
+            dst += channel_in;
+        }
+    }
+
+    clamp_output(dst_base, output_x * output_y * channel_in, act_min, act_max);
+
+    return ARM_CMSIS_NN_SUCCESS;
+}
+
+/**
+ * @} end of Pooling group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c
index 9442df0..581a8c6 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_max_pool_s8.c
  * Description:  Pooling function implementations
  *
- * $Date:        19. Februari 2021
- * $Revision:    V.2.0.2
+ * $Date:        16 August 2022
+ * $Revision:    V.3.0.1
  *
  * Target Processor:  Cortex-M CPUs
  *
@@ -42,7 +42,7 @@ static void compare_and_replace_if_larger_q7(q7_t *base, const q7_t *target, int
         mve_pred16_t p = vctp8q((uint32_t)length);
         const int8x16_t op_1 = vldrbq_z_s8(base, p);
         const int8x16_t op_2 = vldrbq_z_s8(target, p);
-        const int8x16_t max = vmaxq_m_s8(vuninitializedq_s8(), op_1, op_2, p);
+        const int8x16_t max = vmaxq_x_s8(op_1, op_2, p);
         vstrbq_p_s8(base, max, p);
         base += 16;
         target += 16;
@@ -77,7 +77,7 @@ static void compare_and_replace_if_larger_q7(q7_t *base, const q7_t *target, int
             ref_max.bytes[3] = comp_max.bytes[3];
         }
 
-        write_q7x4_ia(&dst, ref_max.word);
+        arm_nn_write_q7x4_ia(&dst, ref_max.word);
 
         cnt--;
     }
@@ -100,15 +100,16 @@ static void clamp_output(q7_t *source, int32_t length, const int32_t act_min, co
 {
 #if defined(ARM_MATH_MVEI)
     int32_t loop_count = (length + 15) / 16;
+    const int8x16_t vmin = vdupq_n_s8((int8_t)act_min);
+    const int8x16_t vmax = vdupq_n_s8((int8_t)act_max);
+
     for (int i = 0; i < loop_count; i++)
     {
         mve_pred16_t p = vctp8q((uint32_t)length);
         length -= 16;
         const int8x16_t src = vldrbq_z_s8(source, p);
-        const int8x16_t predicated_min = vdupq_m_n_s8(vuninitializedq_s8(), (int8_t)act_min, p);
-        const int8x16_t predicated_max = vdupq_m_n_s8(vuninitializedq_s8(), (int8_t)act_max, p);
-        int8x16_t res = vmaxq_m_s8(vuninitializedq_s8(), src, predicated_min, p);
-        res = vminq_m_s8(vuninitializedq_s8(), res, predicated_max, p);
+        int8x16_t res = vmaxq_x_s8(src, vmin, p);
+        res = vminq_x_s8(res, vmax, p);
         vstrbq_p_s8(source, res, p);
         source += 16;
     }
@@ -129,7 +130,7 @@ static void clamp_output(q7_t *source, int32_t length, const int32_t act_min, co
         in.bytes[3] = MAX(in.bytes[3], act_min);
         in.bytes[3] = MIN(in.bytes[3], act_max);
 
-        write_q7x4_ia(&source, in.word);
+        arm_nn_write_q7x4_ia(&source, in.word);
         cnt--;
     }
 
@@ -161,13 +162,13 @@ static void clamp_output(q7_t *source, int32_t length, const int32_t act_min, co
  *
  */
 
-arm_status arm_max_pool_s8(const cmsis_nn_context *ctx,
-                           const cmsis_nn_pool_params *pool_params,
-                           const cmsis_nn_dims *input_dims,
-                           const q7_t *src,
-                           const cmsis_nn_dims *filter_dims,
-                           const cmsis_nn_dims *output_dims,
-                           q7_t *dst)
+arm_cmsis_nn_status arm_max_pool_s8(const cmsis_nn_context *ctx,
+                                    const cmsis_nn_pool_params *pool_params,
+                                    const cmsis_nn_dims *input_dims,
+                                    const q7_t *src,
+                                    const cmsis_nn_dims *filter_dims,
+                                    const cmsis_nn_dims *output_dims,
+                                    q7_t *dst)
 {
     const int32_t input_y = input_dims->h;
     const int32_t input_x = input_dims->w;
@@ -207,7 +208,7 @@ arm_status arm_max_pool_s8(const cmsis_nn_context *ctx,
 
                     if (count == 0)
                     {
-                        memcpy(dst, start, channel_in);
+                        arm_memcpy_q7(dst, start, channel_in);
                         count++;
                     }
                     else
@@ -223,7 +224,7 @@ arm_status arm_max_pool_s8(const cmsis_nn_context *ctx,
 
     clamp_output(dst_base, output_x * output_y * channel_in, act_min, act_max);
 
-    return ARM_MATH_SUCCESS;
+    return ARM_CMSIS_NN_SUCCESS;
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c b/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c
index 5590fc8..c88fc24 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_pool_q7_HWC.c
  * Description:  Pooling function implementations
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.1.2
  *
  * Target Processor:  Cortex-M cores
  *
@@ -33,10 +33,10 @@
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
 
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
 
-/**
- * @brief A few utility functions used by pooling functions
+/*
+ * A few utility functions used by pooling functions
  *
  *
  */
@@ -77,7 +77,7 @@ static void compare_and_replace_if_larger_q7(q7_t *base,           // base data
         if (com.bytes[3] > in.bytes[3])
             in.bytes[3] = com.bytes[3];
 
-        *__SIMD32(pIn)++ = in.word;
+        arm_nn_write_q7x4_ia(&pIn, in.word);
 
         cnt--;
     }
@@ -121,10 +121,10 @@ static void accumulate_q7_to_q15(q15_t *base, q7_t *target, const uint16_t lengt
 #endif
 
         in = arm_nn_read_q15x2(pCnt);
-        *__SIMD32(pCnt)++ = __QADD16(vo1, in);
+        arm_nn_write_q15x2_ia(&pCnt, __QADD16(vo1, in));
 
         in = arm_nn_read_q15x2(pCnt);
-        *__SIMD32(pCnt)++ = __QADD16(vo2, in);
+        arm_nn_write_q15x2_ia(&pCnt, __QADD16(vo2, in));
 
         cnt--;
     }
@@ -180,7 +180,7 @@ void arm_maxpool_q7_HWC(q7_t *Im_in,
                         q7_t *Im_out)
 {
     (void)bufferA;
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     int16_t i_x, i_y;
@@ -336,7 +336,7 @@ void arm_avepool_q7_HWC(q7_t *Im_in,
                         q7_t *Im_out)
 {
 
-#if defined(ARM_MATH_DSP)
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
     /* Run the following code for Cortex-M4 and Cortex-M7 */
 
     q15_t *buffer = (q15_t *)bufferA;
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c
index 7751b4e..0b1892b 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,14 +23,15 @@
  * Title:        arm_reshape_s8.c
  * Description:  Reshape a s8 vector
  *
- * $Date:        September 2019
- * $Revision:    V.1.0.0
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.1
  *
  * Target Processor:  Cortex-M cores
  *
  * -------------------------------------------------------------------- */
 
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
 
 /**
  *  @ingroup groupNN
@@ -41,7 +42,7 @@
  * @{
  */
 
-/**
+/*
  * Basic s8 reshape function.
  *
  * Refer header file for details.
@@ -50,10 +51,11 @@
 
 void arm_reshape_s8(const int8_t *input, int8_t *output, const uint32_t total_size)
 {
-    memcpy(output, input, total_size);
+    arm_memcpy_q7(output, input, total_size);
 }
 
 /**
  * @} end of Reshape group
  */
+
 #endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c
index f3763ee..3d386e8 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_svdf_s8.c
  * Description:  S8 basic SVDF layer function
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 May 2022
+ * $Revision:    V.4.0.1
  *
  * Target Processor:  Cortex-M processors
  *
@@ -43,29 +43,29 @@
  */
 
 /*
- * S8 SVDF layer function for TensorFlow Lite
+ * S8 SVDF layer function for TensorFlow Lite with 8 bit state tensor
  *
  * Refer to header file for details.
  *
  */
 
-arm_status arm_svdf_s8(const cmsis_nn_context *input_ctx,
-                       const cmsis_nn_context *output_ctx,
-                       const cmsis_nn_svdf_params *svdf_params,
-                       const cmsis_nn_per_tensor_quant_params *input_quant_params,
-                       const cmsis_nn_per_tensor_quant_params *output_quant_params,
-                       const cmsis_nn_dims *input_dims,
-                       const q7_t *input_data,
-                       const cmsis_nn_dims *state_dims,
-                       q15_t *state_data,
-                       const cmsis_nn_dims *weights_feature_dims,
-                       const q7_t *weights_feature_data,
-                       const cmsis_nn_dims *weights_time_dims,
-                       const q15_t *weights_time_data,
-                       const cmsis_nn_dims *bias_dims,
-                       const q31_t *bias_data,
-                       const cmsis_nn_dims *output_dims,
-                       q7_t *output_data)
+arm_cmsis_nn_status arm_svdf_s8(const cmsis_nn_context *input_ctx,
+                                const cmsis_nn_context *output_ctx,
+                                const cmsis_nn_svdf_params *svdf_params,
+                                const cmsis_nn_per_tensor_quant_params *input_quant_params,
+                                const cmsis_nn_per_tensor_quant_params *output_quant_params,
+                                const cmsis_nn_dims *input_dims,
+                                const q7_t *input_data,
+                                const cmsis_nn_dims *state_dims,
+                                q7_t *state_data,
+                                const cmsis_nn_dims *weights_feature_dims,
+                                const q7_t *weights_feature_data,
+                                const cmsis_nn_dims *weights_time_dims,
+                                const q7_t *weights_time_data,
+                                const cmsis_nn_dims *bias_dims,
+                                const q31_t *bias_data,
+                                const cmsis_nn_dims *output_dims,
+                                q7_t *output_data)
 {
     (void)bias_dims;
     (void)state_dims;
@@ -83,139 +83,189 @@ arm_status arm_svdf_s8(const cmsis_nn_context *input_ctx,
     const int32_t out_activation_max = svdf_params->output_activation.max;
     const int16_t rank = svdf_params->rank;
 
-    int32_t zp_32 = (-zp_in & 0xffff) | ((-zp_in & 0xffff) << 16);
-
     const int32_t input_batches = input_dims->n;
     const int32_t input_height = input_dims->h;
     const int32_t feature_batches = weights_feature_dims->n;
     const int32_t time_batches = weights_time_dims->h;
     const int32_t unit_count = feature_batches / rank;
 
+    if (input_ctx->buf == NULL)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
     q31_t *buffer_a = (q31_t *)input_ctx->buf;
+
+    if (output_ctx->buf == NULL)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
     q31_t *buffer_b = (q31_t *)output_ctx->buf;
 
-    memmove((q15_t *)state_data,
-            (q15_t *)state_data + 1,
-            (size_t)(input_batches * feature_batches * time_batches * (int32_t)sizeof(int16_t)));
+    // Left shift state
+    memmove((int8_t *)state_data,
+            (int8_t *)state_data + 1,
+            (size_t)((input_batches * feature_batches * time_batches - 1) * (int32_t)sizeof(int8_t)));
 
-    q15_t *res_ptr = state_data + (time_batches - 1);
+    // Matrix multiplication input * feature weight
     for (int i_batch = 0; i_batch < input_batches; i_batch++)
     {
-        const q7_t *buffer_1 = weights_feature_data;
-        for (int r = 0; r < feature_batches; r++)
-        {
-            q31_t dot_prod = 0;
+        q7_t *res_ptr = state_data + (time_batches * i_batch * feature_batches) + (time_batches - 1);
+        const q7_t *weight = weights_feature_data;
+        const q7_t *input = input_data + i_batch * input_height;
 
-            const q7_t *buffer_2 = input_data + i_batch * input_height;
+        arm_cmsis_nn_status res = arm_nn_vec_mat_mult_t_s8(input,
+                                                           weight,
+                                                           NULL,
+                                                           res_ptr,
+                                                           -zp_in,
+                                                           0,
+                                                           0,
+                                                           multiplier_in,
+                                                           shift_in,
+                                                           input_height,
+                                                           feature_batches,
+                                                           in_activation_min,
+                                                           in_activation_max,
+                                                           time_batches);
 
-#if defined(ARM_MATH_DSP)
-            int c = 0;
-            int32_t block_count = input_height >> 2;
-            for (int i = 0; i < block_count; i++)
-            {
-                c += 4;
+        if (res != ARM_CMSIS_NN_SUCCESS)
+        {
+            return res;
+        }
+    }
 
-                q31_t r1 = arm_nn_read_q7x4_ia(&buffer_1);
-                q31_t r1_a = __SXTB16(r1);
-                q31_t r1_b = __SXTB16(__ROR((uint32_t)r1, 8));
+    // Matrix multiplicate time weight * state tensors
+    {
+        q31_t *ptr_a = buffer_a;
+        const int8_t *v2 = state_data;
+        for (int i_batch = 0; i_batch < input_batches; i_batch++)
+        {
+            const int8_t *v1 = weights_time_data;
 
-                q31_t r2 = arm_nn_read_q7x4_ia(&buffer_2);
-                q31_t r2_a = __SXTAB16(zp_32, r2);
-                q31_t r2_b = __SXTAB16(zp_32, __ROR((uint32_t)r2, 8));
+            for (int i_feature_batch = 0; i_feature_batch < feature_batches; i_feature_batch++)
+            {
+                *ptr_a = 0;
+                int32_t sum = 0;
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
+                // Perform matrix multiplication in blocks of four
+                int j = 0;
+                int32_t block_count = time_batches >> 2;
+                for (int i = 0; i < block_count; i++)
+                {
+                    j += 4;
 
-                dot_prod = __SMLAD(r1_a, r2_a, dot_prod);
-                dot_prod = __SMLAD(r1_b, r2_b, dot_prod);
-            }
+                    q31_t r1_1, r1_2, r2_1, r2_2;
+                    v1 = read_and_pad_reordered(v1, &r1_1, &r1_2);
+                    v2 = read_and_pad_reordered(v2, &r2_1, &r2_2);
+                    sum = __SMLAD(r1_1, r2_1, sum);
+                    sum = __SMLAD(r1_2, r2_2, sum);
+                }
 
-            for (; c < input_height; c++)
-            {
-                dot_prod += *buffer_1 * (*buffer_2 - zp_in);
-                buffer_1++;
-                buffer_2++;
-            }
+                // Process the remaining data
+                for (; j < time_batches; j++)
+                {
+                    sum += *v1 * *v2;
+                    v1++;
+                    v2++;
+                }
 #else
-            for (int c = 0; c < input_height; c++)
-            {
-                dot_prod += *buffer_1 * (*buffer_2 - zp_in);
-                buffer_1++;
-                buffer_2++;
-            }
+                for (int j = 0; j < time_batches; j++)
+                {
+                    sum += *v1 * *v2;
+                    v1++;
+                    v2++;
+                }
 #endif
 
-            dot_prod = arm_nn_requantize(dot_prod, multiplier_in, shift_in);
-            dot_prod = CLAMP(dot_prod, in_activation_max, in_activation_min);
-            *res_ptr = dot_prod;
-            res_ptr += time_batches;
+                *ptr_a = sum;
+                ptr_a++;
+            }
         }
     }
 
-    for (int i_batch = 0; i_batch < input_batches; i_batch++)
+    if (bias_data)
     {
-        q31_t *ptr_a = buffer_a + i_batch * feature_batches;
-
-        const q15_t *v1 = weights_time_data;
-        const q15_t *v2 = state_data + i_batch * time_batches * feature_batches;
-        for (int i_feature_batch = 0; i_feature_batch < feature_batches; i_feature_batch++)
+        if (unit_count == feature_batches)
         {
-            *ptr_a = 0;
-
-            int32_t sum = 0;
-#if defined(ARM_MATH_DSP)
-            int j = 0;
-            int32_t block_count = time_batches >> 1;
-            for (int i = 0; i < block_count; i++)
+            for (int i = 0; i < input_batches; i++)
             {
-                j += 2;
-                q31_t r1 = arm_nn_read_q15x2_ia(&v1);
-                q31_t r2 = arm_nn_read_q15x2_ia(&v2);
+                q31_t *output_temp = buffer_b + i * feature_batches;
+                const q31_t *ptr_a = buffer_a + i * feature_batches;
 
-                sum = __SMLAD(r1, r2, sum);
-            }
-
-            // Process the remaining data
-            for (; j < time_batches; j++)
-            {
-                sum += *v1 * *v2;
-                v1++;
-                v2++;
+                const int32_t *bi = bias_data;
+                for (int j = 0; j < feature_batches; j++)
+                {
+                    output_temp[j] = ptr_a[j] + bi[j];
+                }
             }
-#else
-            for (int j = 0; j < time_batches; j++)
+        }
+        else
+        {
+            for (int i_batch = 0; i_batch < input_batches; i_batch++)
             {
-                sum += *v1 * *v2;
-                v1++;
-                v2++;
-            }
-#endif
+                q31_t *output_data_temp = buffer_b + i_batch * unit_count;
+                q31_t *ptr_a = buffer_a + i_batch * feature_batches;
 
-            *ptr_a = sum;
-            ptr_a++;
+                for (int i = 0; i < unit_count; i++)
+                {
+                    int32_t sum = bias_data[i];
+                    for (int j = 0; j < rank; j++)
+                    {
+                        sum += *ptr_a;
+                        ptr_a++;
+                    }
+                    output_data_temp[i] = sum;
+                }
+            }
         }
     }
-
-    for (int i_batch = 0; i_batch < input_batches; i_batch++)
+    else
     {
-        q31_t *output_data_temp = buffer_b + i_batch * unit_count;
-        q31_t *ptr_a = buffer_a + i_batch * feature_batches;
-
-        for (int i = 0; i < unit_count; i++)
+        for (int i_batch = 0; i_batch < input_batches; i_batch++)
         {
-            output_data_temp[i] = bias_data[i];
-            for (int j = 0; j < rank; j++)
+            q31_t *output_data_temp = buffer_b + i_batch * unit_count;
+            q31_t *ptr_a = buffer_a + i_batch * feature_batches;
+
+            for (int i = 0; i < unit_count; i++)
             {
-                output_data_temp[i] += *ptr_a;
-                ptr_a++;
+                int32_t sum = 0;
+                for (int j = 0; j < rank; j++)
+                {
+                    sum += *ptr_a;
+                    ptr_a++;
+                }
+                output_data_temp[i] = sum;
             }
         }
     }
 
+#if defined(ARM_MATH_MVEI)
+    int32_t num_elements = input_batches * unit_count;
+    const int32_t loop_count = (num_elements + 3) / 4;
+    for (int i_op = 0; i_op < loop_count; i_op++)
+    {
+        mve_pred16_t p = vctp32q((uint32_t)num_elements);
+        int32x4_t op = vldrwq_z_s32(buffer_b, p);
+        op = arm_requantize_mve(op, multiplier_out, shift_2);
+        op = vaddq_n_s32(op, zp_out);
+        const int32x4_t min_vec = vdupq_n_s32((int8_t)out_activation_min);
+        const int32x4_t max_vec = vdupq_n_s32((int8_t)out_activation_max);
+        op = vmaxq_s32(op, min_vec);
+        op = vminq_s32(op, max_vec);
+        vstrbq_p_s32(output_data, op, p);
+        output_data += 4;
+        buffer_b += 4;
+        num_elements -= 4;
+    }
+#else
     for (int i = 0; i < input_batches * unit_count; i++)
     {
         output_data[i] = (q7_t)CLAMP(
             arm_nn_requantize(buffer_b[i], multiplier_out, shift_2) + zp_out, out_activation_max, out_activation_min);
     }
+#endif
 
-    return (ARM_MATH_SUCCESS);
+    return (ARM_CMSIS_NN_SUCCESS);
 }
 
 /**
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/SVDFunctions/arm_svdf_state_s16_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/SVDFunctions/arm_svdf_state_s16_s8.c
new file mode 100644
index 0000000..d804121
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/SVDFunctions/arm_svdf_state_s16_s8.c
@@ -0,0 +1,271 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_svdf_s8.c
+ * Description:  S8 basic SVDF layer function with s16 state tensor
+ *
+ * $Date:        4 May 2022
+ * $Revision:    V.2.0.1
+ *
+ * Target Processor:  Cortex-M processors
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+/**
+ * @ingroup groupNN
+ */
+
+/**
+ * @addtogroup SVDF
+ * @{
+ */
+
+/*
+ * S8 SVDF layer function for TensorFlow Lite with 16 bit state tensor
+ *
+ * Refer to header file for details.
+ *
+ */
+
+arm_cmsis_nn_status arm_svdf_state_s16_s8(const cmsis_nn_context *input_ctx,
+                                          const cmsis_nn_context *output_ctx,
+                                          const cmsis_nn_svdf_params *svdf_params,
+                                          const cmsis_nn_per_tensor_quant_params *input_quant_params,
+                                          const cmsis_nn_per_tensor_quant_params *output_quant_params,
+                                          const cmsis_nn_dims *input_dims,
+                                          const q7_t *input_data,
+                                          const cmsis_nn_dims *state_dims,
+                                          q15_t *state_data,
+                                          const cmsis_nn_dims *weights_feature_dims,
+                                          const q7_t *weights_feature_data,
+                                          const cmsis_nn_dims *weights_time_dims,
+                                          const q15_t *weights_time_data,
+                                          const cmsis_nn_dims *bias_dims,
+                                          const q31_t *bias_data,
+                                          const cmsis_nn_dims *output_dims,
+                                          q7_t *output_data)
+{
+    (void)bias_dims;
+    (void)state_dims;
+    (void)output_dims;
+
+    const q31_t multiplier_in = input_quant_params->multiplier;
+    const q31_t shift_in = input_quant_params->shift;
+    const q31_t multiplier_out = output_quant_params->multiplier;
+    const q31_t shift_2 = output_quant_params->shift;
+    const int32_t zp_in = svdf_params->input_offset;
+    const int32_t zp_out = svdf_params->output_offset;
+    const int32_t in_activation_min = svdf_params->input_activation.min;
+    const int32_t in_activation_max = svdf_params->input_activation.max;
+    const int32_t out_activation_min = svdf_params->output_activation.min;
+    const int32_t out_activation_max = svdf_params->output_activation.max;
+    const int16_t rank = svdf_params->rank;
+
+    const int32_t input_batches = input_dims->n;
+    const int32_t input_height = input_dims->h;
+    const int32_t feature_batches = weights_feature_dims->n;
+    const int32_t time_batches = weights_time_dims->h;
+    const int32_t unit_count = feature_batches / rank;
+
+    if (input_ctx->buf == NULL)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+    q31_t *buffer_a = (q31_t *)input_ctx->buf;
+
+    if (output_ctx->buf == NULL)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+    q31_t *buffer_b = (q31_t *)output_ctx->buf;
+
+    // Left shift state
+    memmove((q15_t *)state_data,
+            (q15_t *)state_data + 1,
+            (size_t)((input_batches * feature_batches * time_batches - 1) * (int32_t)sizeof(int16_t)));
+
+    // Matrix multiplication input * feature weight
+    for (int i_batch = 0; i_batch < input_batches; i_batch++)
+    {
+        q15_t *res_ptr = state_data + (time_batches * i_batch * feature_batches) + (time_batches - 1);
+        const q7_t *weight = weights_feature_data;
+        const q7_t *input = input_data + i_batch * input_height;
+
+        arm_cmsis_nn_status res = arm_nn_vec_mat_mult_t_svdf_s8(input,
+                                                                weight,
+                                                                res_ptr,
+                                                                -zp_in,
+                                                                0,
+                                                                time_batches,
+                                                                multiplier_in,
+                                                                shift_in,
+                                                                input_height,
+                                                                feature_batches,
+                                                                in_activation_min,
+                                                                in_activation_max);
+
+        if (res != ARM_CMSIS_NN_SUCCESS)
+        {
+            return res;
+        }
+    }
+
+    {
+        // Matrix multiplication time weight * state tensors
+        q31_t *ptr_a = buffer_a;
+        const q15_t *v2 = state_data;
+        for (int i_batch = 0; i_batch < input_batches; i_batch++)
+        {
+            const q15_t *v1 = weights_time_data;
+
+            for (int i_feature_batch = 0; i_feature_batch < feature_batches; i_feature_batch++)
+            {
+                *ptr_a = 0;
+                int32_t sum = 0;
+#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
+                // Perform matrix multiplication in blocks of two
+                int j = 0;
+                int32_t block_count = time_batches >> 1;
+                for (int i = 0; i < block_count; i++)
+                {
+                    j += 2;
+                    q31_t r1 = arm_nn_read_q15x2_ia(&v1);
+                    q31_t r2 = arm_nn_read_q15x2_ia(&v2);
+
+                    sum = __SMLAD(r1, r2, sum);
+                }
+
+                // Process the remaining data
+                for (; j < time_batches; j++)
+                {
+                    sum += *v1 * *v2;
+                    v1++;
+                    v2++;
+                }
+#else
+                for (int j = 0; j < time_batches; j++)
+                {
+                    sum += *v1 * *v2;
+                    v1++;
+                    v2++;
+                }
+#endif
+
+                *ptr_a = sum;
+                ptr_a++;
+            }
+        }
+    }
+
+    if (bias_data)
+    {
+        if (unit_count == feature_batches)
+        {
+            for (int i = 0; i < input_batches; i++)
+            {
+                q31_t *output_temp = buffer_b + i * feature_batches;
+                const q31_t *ptr_a = buffer_a + i * feature_batches;
+
+                const int32_t *bi = bias_data;
+                for (int j = 0; j < feature_batches; j++)
+                {
+                    output_temp[j] = ptr_a[j] + bi[j];
+                }
+            }
+        }
+        else
+        {
+            for (int i_batch = 0; i_batch < input_batches; i_batch++)
+            {
+                q31_t *output_data_temp = buffer_b + i_batch * unit_count;
+                q31_t *ptr_a = buffer_a + i_batch * feature_batches;
+
+                for (int i = 0; i < unit_count; i++)
+                {
+                    int32_t sum = bias_data[i];
+                    for (int j = 0; j < rank; j++)
+                    {
+                        sum += *ptr_a;
+                        ptr_a++;
+                    }
+                    output_data_temp[i] = sum;
+                }
+            }
+        }
+    }
+    else
+    {
+        for (int i_batch = 0; i_batch < input_batches; i_batch++)
+        {
+            q31_t *output_data_temp = buffer_b + i_batch * unit_count;
+            q31_t *ptr_a = buffer_a + i_batch * feature_batches;
+
+            for (int i = 0; i < unit_count; i++)
+            {
+                int32_t sum = 0;
+                for (int j = 0; j < rank; j++)
+                {
+                    sum += *ptr_a;
+                    ptr_a++;
+                }
+                output_data_temp[i] = sum;
+            }
+        }
+    }
+
+#if defined(ARM_MATH_MVEI)
+    int32_t num_elements = input_batches * unit_count;
+    const int32_t loop_count = (num_elements + 3) / 4;
+    for (int i_op = 0; i_op < loop_count; i_op++)
+    {
+        mve_pred16_t p = vctp32q((uint32_t)num_elements);
+        int32x4_t op = vldrwq_z_s32(buffer_b, p);
+        op = arm_requantize_mve(op, multiplier_out, shift_2);
+        op = vaddq_n_s32(op, zp_out);
+        const int32x4_t min_vec = vdupq_n_s32((int8_t)out_activation_min);
+        const int32x4_t max_vec = vdupq_n_s32((int8_t)out_activation_max);
+        op = vmaxq_s32(op, min_vec);
+        op = vminq_s32(op, max_vec);
+        vstrbq_p_s32(output_data, op, p);
+        output_data += 4;
+        buffer_b += 4;
+        num_elements -= 4;
+    }
+#else
+    for (int i = 0; i < input_batches * unit_count; i++)
+    {
+        output_data[i] = (q7_t)CLAMP(
+            arm_nn_requantize(buffer_b[i], multiplier_out, shift_2) + zp_out, out_activation_max, out_activation_min);
+    }
+#endif
+
+    return (ARM_CMSIS_NN_SUCCESS);
+}
+
+/**
+ * @} end of SVDF group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_nn_softmax_common_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_nn_softmax_common_s8.c
new file mode 100644
index 0000000..5328340
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_nn_softmax_common_s8.c
@@ -0,0 +1,151 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_softmax_common_s8.c
+ * Description:  Softmax with s8 input and output of s8 or s16.
+ *
+ * $Date:        17 March 2022
+ * $Revision:    V.1.0.1
+ *
+ * Target Processor:  Cortex-M processors
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+#define ACCUM_BITS 12
+
+/**
+ * @ingroup groupSupport
+ */
+
+/**
+ * @addtogroup Softmax
+ * @{
+ */
+
+/*
+ * Softmax function with s8 input and output of s8 or s16.
+ *
+ * Refer header file for details.
+ *
+ */
+void arm_nn_softmax_common_s8(const int8_t *input,
+                              const int32_t num_rows,
+                              const int32_t row_size,
+                              const int32_t mult,
+                              const int32_t shift,
+                              const int32_t diff_min,
+                              const bool int16_output,
+                              void *output)
+{
+    const int32_t mask = (1 << shift);
+
+    int32_t col = 0;
+    int32_t row_idx;
+
+    for (row_idx = 0; row_idx < num_rows; ++row_idx)
+    {
+        // Find the maximum value in order to ensure numerical stability
+        int8_t max = *input;
+
+        for (col = 1; col < row_size; ++col)
+        {
+            max = MAX(max, input[col]);
+        }
+
+        int32_t diff = 0;
+        int32_t sum = 0;
+
+        for (col = 0; col < row_size; ++col)
+        {
+            diff = input[col] - max;
+            if (diff >= diff_min)
+            {
+                sum += DIV_POW2(EXP_ON_NEG(MUL_SAT(diff * mask, mult)), ACCUM_BITS);
+            }
+        }
+
+        const int32_t headroom = __CLZ(sum);
+        const int32_t shifted_scale = ONE_OVER1((sum > 0 ? sum << headroom : 0) - (1 << 31));
+        int32_t bits_over_unit;
+
+        if (int16_output)
+        {
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
+      return;
+#endif
+            int16_t *output_s16 = (int16_t *)output + row_idx * row_size;
+
+            bits_over_unit = ACCUM_BITS - headroom + 15;
+
+            for (col = 0; col < row_size; ++col)
+            {
+                diff = input[col] - max;
+
+                if (diff >= diff_min)
+                {
+                    const int32_t res =
+                        DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) +
+                        NN_Q15_MIN;
+                    output_s16[col] = (int16_t)CLAMP(res, (int32_t)NN_Q15_MAX, (int32_t)NN_Q15_MIN);
+                }
+                else
+                {
+                    output_s16[col] = NN_Q15_MIN;
+                }
+            }
+        }
+        else
+        {
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
+      return;
+#endif
+            int8_t *output_s8 = (int8_t *)output + row_idx * row_size;
+
+            bits_over_unit = ACCUM_BITS - headroom + 23;
+
+            for (col = 0; col < row_size; ++col)
+            {
+                diff = input[col] - max;
+                if (diff >= diff_min)
+                {
+                    const int32_t res =
+                        DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) +
+                        NN_Q7_MIN;
+                    output_s8[col] = (int8_t)CLAMP(res, (int32_t)NN_Q7_MAX, (int32_t)NN_Q7_MIN);
+                }
+                else
+                {
+                    output_s8[col] = NN_Q7_MIN;
+                }
+            }
+        }
+
+        input += row_size;
+    }
+}
+
+/**
+ * @} end of NNBasicMath group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c b/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c
index 559ca5f..550c111 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2018, 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_softmax_q15.c
  * Description:  Q15 softmax function
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.2
  *
  * Target Processor:  Cortex-M cores
  *
@@ -41,13 +41,9 @@
  * @{
  */
 
-/**
- * @brief Q15 softmax function
- * @param[in]       vec_in      pointer to input vector
- * @param[in]       dim_vec     input vector dimention
- * @param[out]      p_out       pointer to output vector
+/*
+ * Q15 softmax function
  *
- * @details
  *
  *  Here, instead of typical e based softmax, we use
  *  2-based softmax, i.e.,:
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c b/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c
index 7894d47..bb37660 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2020, 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_softmax_q7.c
  * Description:  Q7 softmax function
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.2
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.3
  *
  * Target Processor:  Cortex-M cores
  *
@@ -41,13 +41,8 @@
  * @{
  */
 
-/**
- * @brief Q7 softmax function
- * @param[in]       vec_in      pointer to input vector
- * @param[in]       dim_vec     input vector dimention
- * @param[out]      p_out       pointer to output vector
- *
- * @details
+/*
+ * Q7 softmax function
  *
  *  Here, instead of typical natural logarithm e based softmax, we use
  *  2-based softmax here, i.e.,:
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s16.c
new file mode 100644
index 0000000..be45eae
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s16.c
@@ -0,0 +1,126 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_softmax_s16.c
+ * Description:  S16 softmax function
+ *
+ * $Date:        19 April 2022
+ * $Revision:    V.2.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+/**
+ * @addtogroup Softmax
+ * @{
+ */
+
+arm_cmsis_nn_status arm_softmax_s16(const int16_t *input,
+                                    const int32_t num_rows,
+                                    const int32_t row_size,
+                                    const int32_t mult,
+                                    const int32_t shift,
+                                    const cmsis_nn_softmax_lut_s16 *softmax_params,
+                                    int16_t *output)
+{
+    int32_t col = 0;
+    int32_t row_idx;
+
+    if (softmax_params->exp_lut == NULL || softmax_params->one_by_one_lut == NULL)
+    {
+        return ARM_CMSIS_NN_ARG_ERROR;
+    }
+
+    for (row_idx = 0; row_idx < num_rows; ++row_idx)
+    {
+        // Find the maximum value in order to ensure numerical stability
+        int16_t max = *input;
+        for (col = 1; col < row_size; ++col)
+        {
+            max = MAX(max, input[col]);
+        }
+
+        int32_t diff = 0;
+        int32_t sum = 0;
+        int16_t *cached_exp_results = output;
+
+        for (col = 0; col < row_size; ++col)
+        {
+            diff = input[col] - max;
+            const int32_t scaled_diff = arm_nn_requantize(diff, mult, shift);
+            const int32_t symmetric_scaled_diff = scaled_diff + NN_Q15_MAX;
+            const int16_t saturated_symmetric_scaled_diff = MIN(MAX(symmetric_scaled_diff, NN_Q15_MIN), NN_Q15_MAX);
+
+            // Lookup from exp table and cache result for next step
+            const int16_t index = (256 + (saturated_symmetric_scaled_diff >> 7));
+            const int16_t offset = saturated_symmetric_scaled_diff & 0x7f;
+            const int16_t base = softmax_params->exp_lut[index];
+            const int16_t slope = softmax_params->exp_lut[index + 1] - softmax_params->exp_lut[index];
+            const int16_t delta = (slope * offset + 64) >> 7;
+            const int16_t result = (base + delta);
+            cached_exp_results[col] = result;
+
+            sum += cached_exp_results[col];
+        }
+
+        const int32_t headroom = __CLZ(sum);
+
+        // Compute the reciprocal 1/sum
+        const int32_t shifted_sum = (((sum) << (headroom - 1)) + (1 << 13)) >> 14;
+
+        // Since LUT computes 1/(1 + x), compute x = (sum - 1) => -65536
+        // Since LUT expects a symmetrical input, recenter from [UINT16_MIN, UINT16_MAX] to [INT16_MIN, INT16_MAX] =>
+        // -32768 ==> So in total -65536 -32768 => -98304
+        const int16_t symmetric_shifted_sum = shifted_sum - 98304;
+
+        // Lookup from one by one table
+        const int16_t index = (256 + (symmetric_shifted_sum >> 7));
+        const int16_t offset = symmetric_shifted_sum & 0x7f;
+        const int16_t base = softmax_params->one_by_one_lut[index];
+        const int16_t slope = softmax_params->one_by_one_lut[index + 1] - softmax_params->one_by_one_lut[index];
+        const int16_t delta = (slope * offset + 64) >> 7;
+        const int16_t one_by_one_result = (base + delta);
+
+        for (col = 0; col < row_size; ++col)
+        {
+            const int16_t right_shift = 30 - headroom;
+            int32_t result = (cached_exp_results[col] * one_by_one_result) >> right_shift;
+            result = (result + 1) >> 1; // Last shift position and insert round
+            output[col] = (int16_t)result;
+        }
+
+        output += row_size;
+        input += row_size;
+    }
+
+    return ARM_CMSIS_NN_SUCCESS;
+}
+
+/**
+ * @} end of Softmax group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c b/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c
index d017d1b..2de8707 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_softmax_s8.c
  * Description:  S8 softmax function
  *
- * $Date:        09. October 2020
- * $Revision:    V.2.0.1
+ * $Date:        9 March 2022
+ * $Revision:    V.2.1.0
  *
  * Target Processor:  Cortex-M cores
  *
@@ -71,7 +71,7 @@ static int32x4_t arm_exp_on_negative_values_mve_32x4(int32x4_t val)
     mve_pred16_t p = vcmpeqq_n_s32(val, 0);
     mask = vmvnq_m_s32(vdupq_n_s32(0), vdupq_n_s32(0), p);
 
-    result = SELECT_USING_MASK(mask, vdupq_n_s32(Q31_MAX), result);
+    result = SELECT_USING_MASK(mask, vdupq_n_s32(NN_Q31_MAX), result);
     return result;
 }
 #endif
@@ -95,8 +95,8 @@ void arm_softmax_s8(const int8_t *input,
 {
 #ifdef ARM_MATH_MVEI
 
-#define ACT_MIN ((int8_t)Q7_MIN)
-#define ACT_MAX ((int8_t)Q7_MAX)
+#define ACT_MIN ((int8_t)NN_Q7_MIN)
+#define ACT_MAX ((int8_t)NN_Q7_MAX)
 
     const int32_t mask = (1 << shift);
 
@@ -151,7 +151,7 @@ void arm_softmax_s8(const int8_t *input,
 
         const int32_t headroom = __CLZ((uint32_t)sum);
         const int32_t bits_over_unit = ACCUM_BITS - headroom + 23;
-        const int32_t shifted_scale = ONE_OVER1((sum << headroom) - (1 << 31));
+        const int32_t shifted_scale = ONE_OVER1((sum > 0 ? sum << headroom : 0) - (1 << 31));
 
         vec_count = row_size / 4;
         idx = 0;
@@ -194,7 +194,8 @@ void arm_softmax_s8(const int8_t *input,
             if (diff >= diff_min)
             {
                 const int32_t res =
-                    DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) - 128;
+                    DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) +
+                    NN_Q7_MIN;
                 output[tail_idx + i] = (int8_t)CLAMP(res, (int32_t)ACT_MAX, (int32_t)ACT_MIN);
             }
             else
@@ -207,57 +208,10 @@ void arm_softmax_s8(const int8_t *input,
         output += row_size;
     }
 #else
-    const int32_t mask = (1 << shift);
-
-    int32_t col = 0;
-    int32_t row_idx;
-
-    for (row_idx = 0; row_idx < num_rows; ++row_idx)
-    {
-        // Find the maximum value in order to ensure numerical stability
-        int8_t max = *input;
-
-        for (col = 1; col < row_size; ++col)
-        {
-            max = MAX(max, input[col]);
-        }
-
-        int32_t diff = 0;
-        int32_t sum = 0;
-
-        for (col = 0; col < row_size; ++col)
-        {
-            diff = input[col] - max;
-            if (diff >= diff_min)
-            {
-                sum += DIV_POW2(EXP_ON_NEG(MUL_SAT(diff * mask, mult)), ACCUM_BITS);
-            }
-        }
-
-        const int32_t headroom = __CLZ(sum);
-        const int32_t bits_over_unit = ACCUM_BITS - headroom + 23;
-        const int32_t shifted_scale = ONE_OVER1((sum << headroom) - (1 << 31));
-
-        for (col = 0; col < row_size; ++col)
-        {
-            diff = input[col] - max;
-            if (diff >= diff_min)
-            {
-                const int32_t res =
-                    DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) - 128;
-                output[col] = (int8_t)CLAMP(res, (int32_t)127, (int32_t)-128);
-            }
-            else
-            {
-                output[col] = -128;
-            }
-        }
-        input += row_size;
-        output += row_size;
-    }
-
+    arm_nn_softmax_common_s8(input, num_rows, row_size, mult, shift, diff_min, false, (void *)output);
 #endif
 }
+
 /**
  * @} end of Softmax group
  */
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8_s16.c b/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8_s16.c
new file mode 100644
index 0000000..a6eb67a
--- /dev/null
+++ b/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8_s16.c
@@ -0,0 +1,59 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_softmax_s8_s16.c
+ * Description:  S8 to s16 softmax function
+ *
+ * $Date:        7 January 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
+#include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup Softmax
+ * @{
+ */
+
+void arm_softmax_s8_s16(const int8_t *input,
+                        const int32_t num_rows,
+                        const int32_t row_size,
+                        const int32_t mult,
+                        const int32_t shift,
+                        const int32_t diff_min,
+                        int16_t *output)
+{
+    arm_nn_softmax_common_s8(input, num_rows, row_size, mult, shift, diff_min, true, (void *)output);
+}
+/**
+ * @} end of Softmax group
+ */
+
+#endif // EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
diff --git a/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c b/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c
index e6691e5..25220fe 100644
--- a/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c
+++ b/edge-impulse-sdk/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c
@@ -1,7 +1,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_LOAD_CMSIS_NN_SOURCES
 /*
- * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2010-2019, 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -23,8 +23,8 @@
  * Title:        arm_softmax_with_batch_q7.c
  * Description:  Q7 softmax function
  *
- * $Date:        09. October 2020
- * $Revision:    V.1.0.1
+ * $Date:        4 Aug 2022
+ * $Revision:    V.1.0.2
  *
  * Target Processor:  Cortex-M and Cortex-A cores
  *
@@ -41,14 +41,10 @@
  * @{
  */
 
-/**
- * @brief Q7 softmax function with batch parameter
- * @param[in]       vec_in      pointer to input vector
- * @param[in]       nb_batches  number of batches
- * @param[in]       dim_vec     input vector dimention
- * @param[out]      p_out       pointer to output vector
+/*
+ * Q7 softmax function with batch parameter
  *
- * @details
+ * details
  *
  *  Here, instead of typical natural logarithm e based softmax, we use
  *  2-based softmax here, i.e.,:
diff --git a/edge-impulse-sdk/Doxyfile b/edge-impulse-sdk/Doxyfile
new file mode 100644
index 0000000..98b3852
--- /dev/null
+++ b/edge-impulse-sdk/Doxyfile
@@ -0,0 +1,2582 @@
+# Doxyfile 1.8.17
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed in
+# front of the TAG it is preceding.
+#
+# All text after a single hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists, items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (\" \").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the configuration
+# file that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# https://www.gnu.org/software/libiconv/ for the list of possible encodings.
+# The default value is: UTF-8.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
+# double-quotes, unless you are using Doxywizard) that should identify the
+# project for which the documentation is generated. This name is used in the
+# title of most generated pages and in a few other places.
+# The default value is: My Project.
+
+PROJECT_NAME           = "Edge Impulse C++ SDK"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
+# could be handy for archiving the generated documentation or if some version
+# control system is used.
+
+PROJECT_NUMBER         =
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer a
+# quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          =
+
+# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
+# in the documentation. The maximum height of the logo should not exceed 55
+# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
+# the logo to the output directory.
+
+PROJECT_LOGO           =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
+# into which the generated documentation will be written. If a relative path is
+# entered, it will be relative to the location where doxygen was started. If
+# left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = doc/doxygen/
+
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
+# directories (in 2 levels) under the output directory of each output format and
+# will distribute the generated files over these directories. Enabling this
+# option can be useful when feeding doxygen a huge amount of source files, where
+# putting all generated files in the same directory would otherwise causes
+# performance problems for the file system.
+# The default value is: NO.
+
+CREATE_SUBDIRS         = NO
+
+# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
+# characters to appear in the names of generated files. If set to NO, non-ASCII
+# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
+# U+3044.
+# The default value is: NO.
+
+ALLOW_UNICODE_NAMES    = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
+# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
+# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
+# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
+# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
+# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
+# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
+# Ukrainian and Vietnamese.
+# The default value is: English.
+
+OUTPUT_LANGUAGE        = English
+
+# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all generated output in the proper direction.
+# Possible values are: None, LTR, RTL and Context.
+# The default value is: None.
+
+OUTPUT_TEXT_DIRECTION  = None
+
+# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
+# descriptions after the members that are listed in the file and class
+# documentation (similar to Javadoc). Set to NO to disable this.
+# The default value is: YES.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
+# description of a member or function before the detailed description
+#
+# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+# The default value is: YES.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator that is
+# used to form the text in various listings. Each string in this list, if found
+# as the leading text of the brief description, will be stripped from the text
+# and the result, after processing the whole list, is used as the annotated
+# text. Otherwise, the brief description is used as-is. If left blank, the
+# following values are used ($name is automatically replaced with the name of
+# the entity):The $name class, The $name widget, The $name file, is, provides,
+# specifies, contains, represents, a, an and the.
+
+ABBREVIATE_BRIEF       = "The $name class" \
+                         "The $name widget" \
+                         "The $name file" \
+                         is \
+                         provides \
+                         specifies \
+                         contains \
+                         represents \
+                         a \
+                         an \
+                         the
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# doxygen will generate a detailed section even if there is only a brief
+# description.
+# The default value is: NO.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+# The default value is: NO.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
+# before files name in the file list and in the header files. If set to NO the
+# shortest path that makes the file name unique will be used
+# The default value is: YES.
+
+FULL_PATH_NAMES        = YES
+
+# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
+# Stripping is only done if one of the specified strings matches the left-hand
+# part of the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the path to
+# strip.
+#
+# Note that you can specify absolute paths here, but also relative paths, which
+# will be relative from the directory where doxygen is started.
+# This tag requires that the tag FULL_PATH_NAMES is set to YES.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
+# path mentioned in the documentation of a class, which tells the reader which
+# header file to include in order to use a class. If left blank only the name of
+# the header file containing the class definition is used. Otherwise one should
+# specify the list of include paths that are normally passed to the compiler
+# using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
+# less readable) file names. This can be useful is your file systems doesn't
+# support long names like on DOS, Mac, or CD-ROM.
+# The default value is: NO.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
+# first line (until the first dot) of a Javadoc-style comment as the brief
+# description. If set to NO, the Javadoc-style will behave just like regular Qt-
+# style comments (thus requiring an explicit @brief command for a brief
+# description.)
+# The default value is: NO.
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line
+# such as
+# /***************
+# as being the beginning of a Javadoc-style comment "banner". If set to NO, the
+# Javadoc-style will behave just like regular comments and it will not be
+# interpreted by doxygen.
+# The default value is: NO.
+
+JAVADOC_BANNER         = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
+# line (until the first dot) of a Qt-style comment as the brief description. If
+# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
+# requiring an explicit \brief command for a brief description.)
+# The default value is: NO.
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
+# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
+# a brief description. This used to be the default behavior. The new default is
+# to treat a multi-line C++ comment block as a detailed description. Set this
+# tag to YES if you prefer the old behavior instead.
+#
+# Note that setting this tag to YES also means that rational rose comments are
+# not recognized any more.
+# The default value is: NO.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
+# documentation from any documented member that it re-implements.
+# The default value is: YES.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
+# page for each member. If set to NO, the documentation of a member will be part
+# of the file/class/namespace that contains it.
+# The default value is: NO.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
+# uses this value to replace tabs by spaces in code fragments.
+# Minimum value: 1, maximum value: 16, default value: 4.
+
+TAB_SIZE               = 4
+
+# This tag can be used to specify a number of aliases that act as commands in
+# the documentation. An alias has the form:
+# name=value
+# For example adding
+# "sideeffect=@par Side Effects:\n"
+# will allow you to put the command \sideeffect (or @sideeffect) in the
+# documentation, which will result in a user-defined paragraph with heading
+# "Side Effects:". You can put \n's in the value part of an alias to insert
+# newlines (in the resulting output). You can put ^^ in the value part of an
+# alias to insert a newline as if a physical newline was in the original file.
+# When you need a literal { or } or , in the value part of an alias you have to
+# escape them by means of a backslash (\), this can lead to conflicts with the
+# commands \{ and \} for these it is advised to use the version @{ and @} or use
+# a double escape (\\{ and \\})
+
+ALIASES                =
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding "class=itcl::class"
+# will allow you to use the command class in the itcl::class meaning.
+
+TCL_SUBST              =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C. For
+# instance, some of the names that are used will be different. The list of all
+# members will be omitted, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
+# Python sources only. Doxygen will then generate output that is more tailored
+# for that language. For instance, namespaces will be presented as packages,
+# qualified scopes will look different, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources. Doxygen will then generate output that is tailored for Fortran.
+# The default value is: NO.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for VHDL.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice
+# sources only. Doxygen will then generate output that is more tailored for that
+# language. For instance, namespaces will be presented as modules, types will be
+# separated into more groups, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_SLICE  = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension, and
+# language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
+# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice,
+# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
+# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
+# tries to guess whether the code is fixed or free formatted code, this is the
+# default for Fortran type files), VHDL, tcl. For instance to make doxygen treat
+# .inc files as Fortran files (default is PHP), and .f files as C (default is
+# Fortran), use: inc=Fortran f=C.
+#
+# Note: For files without extension you can use no_extension as a placeholder.
+#
+# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
+# the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
+# according to the Markdown format, which allows for more readable
+# documentation. See https://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you can
+# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
+# case of backward compatibilities issues.
+# The default value is: YES.
+
+MARKDOWN_SUPPORT       = YES
+
+# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up
+# to that level are automatically included in the table of contents, even if
+# they do not have an id attribute.
+# Note: This feature currently applies only to Markdown headings.
+# Minimum value: 0, maximum value: 99, default value: 5.
+# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
+
+TOC_INCLUDE_HEADINGS   = 5
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by putting a % sign in front of the word or
+# globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+
+AUTOLINK_SUPPORT       = NO
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+# The default value is: NO.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
+# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen
+# will parse them like normal C++ but will assume all classes use public instead
+# of private inheritance when no explicit protection keyword is present.
+# The default value is: NO.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES will make
+# doxygen to replace the get and set methods by a property in the documentation.
+# This will only work if the methods are indeed getting or setting a simple
+# type. If this is not the case, or you want to show the methods anyway, you
+# should set this option to NO.
+# The default value is: YES.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+# The default value is: NO.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# If one adds a struct or class to a group and this option is enabled, then also
+# any nested class or struct is added to the same group. By default this option
+# is disabled and one has to add nested compounds explicitly via \ingroup.
+# The default value is: NO.
+
+GROUP_NESTED_COMPOUNDS = NO
+
+# Set the SUBGROUPING tag to YES to allow class member groups of the same type
+# (for instance a group of public functions) to be put as a subgroup of that
+# type (e.g. under the Public Functions section). Set it to NO to prevent
+# subgrouping. Alternatively, this can be done per class using the
+# \nosubgrouping command.
+# The default value is: YES.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
+# are shown inside the group in which they are included (e.g. using \ingroup)
+# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
+# and RTF).
+#
+# Note that this feature does not work in combination with
+# SEPARATE_MEMBER_PAGES.
+# The default value is: NO.
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
+# with only public data fields or simple typedef fields will be shown inline in
+# the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO, structs, classes, and unions are shown on a separate page (for HTML and
+# Man pages) or section (for LaTeX and RTF).
+# The default value is: NO.
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
+# enum is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically be
+# useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+# The default value is: NO.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL            = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual
+# methods of a class will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIV_VIRTUAL   = NO
+
+# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC         = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO,
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. If set to YES, local methods,
+# which are defined in the implementation section but not in the interface are
+# included in the documentation. If set to NO, only methods in the interface are
+# included.
+# The default value is: NO.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO, these classes will be included in the various overviews. This option
+# has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# declarations. If set to NO, these declarations will be included in the
+# documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO, these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
+# names in lower-case letters. If set to YES, upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# (including Cygwin) ands Mac users are advised to set this option to NO.
+# The default value is: system dependent.
+
+CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES, the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
+# append additional text to a page's title, such as Class Reference. If set to
+# YES the compound reference will be hidden.
+# The default value is: NO.
+
+HIDE_COMPOUND_REFERENCE= NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
+# grouped member an include statement to the documentation, telling the reader
+# which file to include in order to use the member.
+# The default value is: NO.
+
+SHOW_GROUPED_MEMB_INC  = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
+# list. This list is created by putting \todo commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
+# list. This list is created by putting \test commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if <section_label> ... \endif and \cond <section_label>
+# ... \endcond blocks.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES, the
+# list will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES        = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE            =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. See also \cite for info how to create references.
+
+CITE_BIB_FILES         =
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS               = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some parameters
+# in a documented function, or documenting parameters that don't exist or using
+# markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO, doxygen will only warn about wrong or incomplete
+# parameter documentation, but not about the absence of documentation. If
+# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
+# The default value is: NO.
+
+WARN_NO_PARAMDOC       = NO
+
+# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
+# a warning is encountered.
+# The default value is: NO.
+
+WARN_AS_ERROR          = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr).
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
+# Note: If this tag is empty the current directory is searched.
+
+INPUT                  = classifier/ \
+                         dsp/ \
+                         porting/
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
+# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
+# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
+# possible encodings.
+# The default value is: UTF-8.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
+# *.h) to filter out the source-files in the directories.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# read by doxygen.
+#
+# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
+# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
+# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
+# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
+# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen
+# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f, *.for, *.tcl, *.vhd,
+# *.vhdl, *.ucf, *.qsf and *.ice.
+
+FILE_PATTERNS          = *.c \
+                         *.cc \
+                         *.cxx \
+                         *.cpp \
+                         *.c++ \
+                         *.java \
+                         *.ii \
+                         *.ixx \
+                         *.ipp \
+                         *.i++ \
+                         *.inl \
+                         *.idl \
+                         *.ddl \
+                         *.odl \
+                         *.h \
+                         *.hh \
+                         *.hxx \
+                         *.hpp \
+                         *.h++ \
+                         *.cs \
+                         *.d \
+                         *.php \
+                         *.php4 \
+                         *.php5 \
+                         *.phtml \
+                         *.inc \
+                         *.m \
+                         *.markdown \
+                         *.md \
+                         *.mm \
+                         *.dox \
+                         *.doc \
+                         *.txt \
+                         *.py \
+                         *.pyw \
+                         *.f90 \
+                         *.f95 \
+                         *.f03 \
+                         *.f08 \
+                         *.f \
+                         *.for \
+                         *.tcl \
+                         *.vhd \
+                         *.vhdl \
+                         *.ucf \
+                         *.qsf \
+                         *.ice
+
+# The RECURSIVE tag can be used to specify whether or not subdirectories should
+# be searched for input files as well.
+# The default value is: NO.
+
+RECURSIVE              = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+#
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE                =
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories use the pattern */test/*
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or directories
+# that contain example code fragments that are included (see the \include
+# command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank all
+# files are included.
+
+EXAMPLE_PATTERNS       = *
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude commands
+# irrespective of the value of the RECURSIVE tag.
+# The default value is: NO.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or directories
+# that contain images that are to be included in the documentation (see the
+# \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command:
+#
+# <filter> <input-file>
+#
+# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
+# name of an input file. Doxygen will then use the output that the filter
+# program writes to standard output. If FILTER_PATTERNS is specified, this tag
+# will be ignored.
+#
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form: pattern=filter
+# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
+# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
+# patterns match the file name, INPUT_FILTER is applied.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will also be used to filter the input files that are used for
+# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
+# The default value is: NO.
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
+# it is also possible to disable source filtering for a specific pattern using
+# *.ext= (so without naming a filter).
+# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
+
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE =
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
+# generated. Documented entities will be cross-referenced with these sources.
+#
+# Note: To get rid of all source code in the generated output, make sure that
+# also VERBATIM_HEADERS is set to NO.
+# The default value is: NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body of functions,
+# classes and enums directly into the documentation.
+# The default value is: NO.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
+# special comment blocks from generated source code fragments. Normal C, C++ and
+# Fortran comments will always remain visible.
+# The default value is: YES.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
+# entity all documented functions referencing it will be listed.
+# The default value is: NO.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES then for each documented function
+# all documented entities called/used by that function will be listed.
+# The default value is: NO.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
+# to YES then the hyperlinks from functions in REFERENCES_RELATION and
+# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
+# link to the documentation.
+# The default value is: YES.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
+# source code will show a tooltip with additional information such as prototype,
+# brief description and links to the definition and documentation. Since this
+# will make the HTML file larger and loading of large files a bit slower, you
+# can opt to disable this feature.
+# The default value is: YES.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+SOURCE_TOOLTIPS        = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code will
+# point to the HTML generated by the htags(1) tool instead of doxygen built-in
+# source browser. The htags tool is part of GNU's global source tagging system
+# (see https://www.gnu.org/software/global/global.html). You will need version
+# 4.8.6 or higher.
+#
+# To use it do the following:
+# - Install the latest version of global
+# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file
+# - Make sure the INPUT points to the root of the source tree
+# - Run doxygen as normal
+#
+# Doxygen will invoke htags (and that will in turn invoke gtags), so these
+# tools must be available from the command line (i.e. in the search path).
+#
+# The result: instead of the source browser generated by doxygen, the links to
+# source code will now point to the output of htags.
+# The default value is: NO.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
+# verbatim copy of the header file for each class for which an include is
+# specified. Set to NO to disable this.
+# See also: Section \class.
+# The default value is: YES.
+
+VERBATIM_HEADERS       = YES
+
+# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the
+# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the
+# cost of reduced performance. This can be particularly helpful with template
+# rich C++ code for which doxygen's built-in parser lacks the necessary type
+# information.
+# Note: The availability of this option depends on whether or not doxygen was
+# generated with the -Duse_libclang=ON option for CMake.
+# The default value is: NO.
+
+CLANG_ASSISTED_PARSING = NO
+
+# If clang assisted parsing is enabled you can provide the compiler with command
+# line options that you would normally use when invoking the compiler. Note that
+# the include paths will already be set by doxygen for the files and directories
+# specified with INPUT and INCLUDE_PATH.
+# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.
+
+CLANG_OPTIONS          =
+
+# If clang assisted parsing is enabled you can provide the clang parser with the
+# path to the compilation database (see:
+# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) used when the files
+# were built. This is equivalent to specifying the "-p" option to a clang tool,
+# such as clang-check. These options will then be passed to the parser.
+# Note: The availability of this option depends on whether or not doxygen was
+# generated with the -Duse_libclang=ON option for CMake.
+
+CLANG_DATABASE_PATH    =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
+# compounds will be generated. Enable this if the project contains a lot of
+# classes, structs, unions or interfaces.
+# The default value is: YES.
+
+ALPHABETICAL_INDEX     = YES
+
+# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
+# which the alphabetical index list will be split.
+# Minimum value: 1, maximum value: 20, default value: 5.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all classes will
+# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
+# can be used to specify a prefix (or a list of prefixes) that should be ignored
+# while generating the index headers.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
+# The default value is: YES.
+
+GENERATE_HTML          = NO
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
+# generated HTML page (for example: .htm, .php, .asp).
+# The default value is: .html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
+# each generated HTML page. If the tag is left blank doxygen will generate a
+# standard header.
+#
+# To get valid HTML the header file that includes any scripts and style sheets
+# that doxygen needs, which is dependent on the configuration options used (e.g.
+# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
+# default header using
+# doxygen -w html new_header.html new_footer.html new_stylesheet.css
+# YourConfigFile
+# and then modify the file new_header.html. See also section "Doxygen usage"
+# for information on how to generate the default header that doxygen normally
+# uses.
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. For a description
+# of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
+# generated HTML page. If the tag is left blank doxygen will generate a standard
+# footer. See HTML_HEADER for more information on how to generate a default
+# footer and what special commands can be used inside the footer. See also
+# section "Doxygen usage" for information on how to generate the default footer
+# that doxygen normally uses.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
+# sheet that is used by each HTML page. It can be used to fine-tune the look of
+# the HTML output. If left blank doxygen will generate a default style sheet.
+# See also section "Doxygen usage" for information on how to generate the style
+# sheet that doxygen normally uses.
+# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
+# it is more robust and this tag (HTML_STYLESHEET) will in the future become
+# obsolete.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_STYLESHEET        =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# cascading style sheets that are included after the standard style sheets
+# created by doxygen. Using this option one can overrule certain style aspects.
+# This is preferred over using HTML_STYLESHEET since it does not replace the
+# standard style sheet and is therefore more robust against future updates.
+# Doxygen will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list). For an example see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_STYLESHEET  =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
+# files will be copied as-is; there are no commands or markers available.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_FILES       =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
+# will adjust the colors in the style sheet and background images according to
+# this color. Hue is specified as an angle on a colorwheel, see
+# https://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
+# purple, and 360 is red again.
+# Minimum value: 0, maximum value: 359, default value: 220.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
+# in the HTML output. For a value of 0 the output will use grayscales only. A
+# value of 255 will produce the most vivid colors.
+# Minimum value: 0, maximum value: 255, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
+# luminance component of the colors in the HTML output. Values below 100
+# gradually make the output lighter, whereas values above 100 make the output
+# darker. The value divided by 100 is the actual gamma applied, so 80 represents
+# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
+# change the gamma.
+# Minimum value: 40, maximum value: 240, default value: 80.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting this
+# to YES can help to show when doxygen was last run and thus if the
+# documentation is up to date.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_TIMESTAMP         = NO
+
+# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
+# documentation will contain a main index with vertical navigation menus that
+# are dynamically created via JavaScript. If disabled, the navigation index will
+# consists of multiple levels of tabs that are statically embedded in every HTML
+# page. Disable this option to support browsers that do not have JavaScript,
+# like the Qt help browser.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_MENUS     = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
+# shown in the various tree structured indices initially; the user can expand
+# and collapse entries dynamically later on. Doxygen will expand the tree to
+# such a level that at most the specified number of entries are visible (unless
+# a fully collapsed tree already exceeds this amount). So setting the number of
+# entries 1 will produce a full collapsed tree by default. 0 is a special value
+# representing an infinite number of entries and will result in a full expanded
+# tree by default.
+# Minimum value: 0, maximum value: 9999, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files will be
+# generated that can be used as input for Apple's Xcode 3 integrated development
+# environment (see: https://developer.apple.com/xcode/), introduced with OSX
+# 10.5 (Leopard). To create a documentation set, doxygen will generate a
+# Makefile in the HTML output directory. Running make will produce the docset in
+# that directory and running make install will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
+# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
+# genXcode/_index.html for more information.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_DOCSET        = NO
+
+# This tag determines the name of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# The default value is: Doxygen generated docs.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# This tag specifies a string that should uniquely identify the documentation
+# set bundle. This should be a reverse domain-name style string, e.g.
+# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+# The default value is: org.doxygen.Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
+# The default value is: Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
+# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
+# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
+# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on
+# Windows.
+#
+# The HTML Help Workshop contains a compiler that can convert all HTML output
+# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
+# files are now used as the Windows 98 help format, and will replace the old
+# Windows help format (.hlp) on all Windows platforms in the future. Compressed
+# HTML files also contain an index, a table of contents, and you can search for
+# words in the documentation. The HTML workshop also contains a viewer for
+# compressed HTML files.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_HTMLHELP      = NO
+
+# The CHM_FILE tag can be used to specify the file name of the resulting .chm
+# file. You can add a path in front of the file if the result should not be
+# written to the html output directory.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_FILE               =
+
+# The HHC_LOCATION tag can be used to specify the location (absolute path
+# including file name) of the HTML help compiler (hhc.exe). If non-empty,
+# doxygen will try to run the HTML help compiler on the generated index.hhp.
+# The file has to be specified with full path.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+HHC_LOCATION           =
+
+# The GENERATE_CHI flag controls if a separate .chi index file is generated
+# (YES) or that it should be included in the master .chm file (NO).
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+GENERATE_CHI           = NO
+
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
+# and project file content.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_INDEX_ENCODING     =
+
+# The BINARY_TOC flag controls whether a binary table of contents is generated
+# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
+# enables the Previous and Next buttons.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members to
+# the table of contents of the HTML help documentation and to the tree view.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
+# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
+# (.qch) of the generated HTML documentation.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
+# the file name of the resulting .qch file. The path specified is relative to
+# the HTML output folder.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
+# Project output. For more information please see Qt Help Project / Namespace
+# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
+# Help Project output. For more information please see Qt Help Project / Virtual
+# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-
+# folders).
+# The default value is: doc.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
+# filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's filter section matches. Qt Help Project / Filter Attributes (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# The QHG_LOCATION tag can be used to specify the location of Qt's
+# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
+# generated .qhp file.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
+# generated, together with the HTML files, they form an Eclipse help plugin. To
+# install this plugin and make it available under the help contents menu in
+# Eclipse, the contents of the directory containing the HTML and XML files needs
+# to be copied into the plugins directory of eclipse. The name of the directory
+# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
+# After copying Eclipse needs to be restarted before the help appears.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the Eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have this
+# name. Each documentation set should have its own identifier.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# If you want full control over the layout of the generated HTML pages it might
+# be necessary to disable the index and replace it with your own. The
+# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
+# of each HTML page. A value of NO enables the index and the value YES disables
+# it. Since the tabs in the index contain the same information as the navigation
+# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+DISABLE_INDEX          = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information. If the tag
+# value is set to YES, a side panel will be generated containing a tree-like
+# index structure (just like the one that is generated for HTML Help). For this
+# to work a browser that supports JavaScript, DHTML, CSS and frames is required
+# (i.e. any modern browser). Windows users are probably better off using the
+# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
+# further fine-tune the look of the index. As an example, the default style
+# sheet generated by doxygen has an example that shows how to put an image at
+# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
+# the same information as the tab index, you could consider setting
+# DISABLE_INDEX to YES when enabling this option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_TREEVIEW      = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
+# doxygen will group on one line in the generated HTML documentation.
+#
+# Note that a value of 0 will completely suppress the enum values from appearing
+# in the overview section.
+# Minimum value: 0, maximum value: 20, default value: 4.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
+# to set the initial width (in pixels) of the frame in which the tree is shown.
+# Minimum value: 0, maximum value: 1500, default value: 250.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+TREEVIEW_WIDTH         = 250
+
+# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
+# external symbols imported via tag files in a separate window.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# Use this tag to change the font size of LaTeX formulas included as images in
+# the HTML documentation. When you change the font size after a successful
+# doxygen run you need to manually remove any form_*.png images from the HTML
+# output directory to force them to be regenerated.
+# Minimum value: 8, maximum value: 50, default value: 10.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are not
+# supported properly for IE 6.0, but are supported on all modern browsers.
+#
+# Note that when changing this option you need to delete any form_*.png files in
+# the HTML output directory before the changes have effect.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_TRANSPARENT    = YES
+
+# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
+# to create new LaTeX commands to be used in formulas as building blocks. See
+# the section "Including formulas" for details.
+
+FORMULA_MACROFILE      =
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
+# https://www.mathjax.org) which uses client side JavaScript for the rendering
+# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
+# installed or if you want to formulas look prettier in the HTML output. When
+# enabled you may also need to install MathJax separately and configure the path
+# to it using the MATHJAX_RELPATH option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+USE_MATHJAX            = NO
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. See the MathJax site (see:
+# http://docs.mathjax.org/en/latest/output.html) for more details.
+# Possible values are: HTML-CSS (which is slower, but has the best
+# compatibility), NativeMML (i.e. MathML) and SVG.
+# The default value is: HTML-CSS.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_FORMAT         = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the HTML
+# output directory using the MATHJAX_RELPATH option. The destination directory
+# should contain the MathJax.js script. For instance, if the mathjax directory
+# is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
+# Content Delivery Network so you can quickly see the result without installing
+# MathJax. However, it is strongly recommended to install a local copy of
+# MathJax from https://www.mathjax.org before deployment.
+# The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_RELPATH        = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
+# extension names that should be enabled during MathJax rendering. For example
+# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_EXTENSIONS     =
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
+# of code that will be used on startup of the MathJax code. See the MathJax site
+# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# example see the documentation.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_CODEFILE       =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
+# the HTML output. The underlying search engine uses javascript and DHTML and
+# should work on any modern browser. Note that when using HTML help
+# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
+# there is already a search function so this one should typically be disabled.
+# For large projects the javascript based search engine can be slow, then
+# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
+# search using the keyboard; to jump to the search box use <access key> + S
+# (what the <access key> is depends on the OS and browser, but it is typically
+# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
+# key> to jump into the search results window, the results can be navigated
+# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
+# the search. The filter options can be selected when the cursor is inside the
+# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
+# to select a filter and <Enter> or <escape> to activate or cancel the filter
+# option.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using JavaScript. There
+# are two flavors of web server based searching depending on the EXTERNAL_SEARCH
+# setting. When disabled, doxygen will generate a PHP script for searching and
+# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
+# and searching needs to be provided by external tools. See the section
+# "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SERVER_BASED_SEARCH    = NO
+
+# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
+# search results.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: https://xapian.org/).
+#
+# See the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH        = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will return the search results when EXTERNAL_SEARCH is enabled.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: https://xapian.org/). See the section "External Indexing and
+# Searching" for details.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHENGINE_URL       =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+# The default file is: searchdata.xml.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHDATA_FILE        = searchdata.xml
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH_ID     =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
+# to a relative location where the documentation can be found. The format is:
+# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTRA_SEARCH_MAPPINGS  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
+# The default value is: YES.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked.
+#
+# Note that when not enabling USE_PDFLATEX the default is latex when enabling
+# USE_PDFLATEX the default is pdflatex and when in the later case latex is
+# chosen this is overwritten by pdflatex. For specific output languages the
+# default can have been set differently, this depends on the implementation of
+# the output language.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_CMD_NAME         =
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
+# index for LaTeX.
+# Note: This tag is used in the Makefile / make.bat.
+# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file
+# (.tex).
+# The default file is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to
+# generate index for LaTeX. In case there is no backslash (\) as first character
+# it will be automatically added in the LaTeX code.
+# Note: This tag is used in the generated output file (.tex).
+# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat.
+# The default value is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_MAKEINDEX_CMD    = makeindex
+
+# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used by the
+# printer.
+# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
+# 14 inches) and executive (7.25 x 10.5 inches).
+# The default value is: a4.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PAPER_TYPE             = a4
+
+# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
+# that should be included in the LaTeX output. The package can be specified just
+# by its name or with the correct syntax as to be used with the LaTeX
+# \usepackage command. To get the times font for instance you can specify :
+# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}
+# To use the option intlimits with the amsmath package you can specify:
+# EXTRA_PACKAGES=[intlimits]{amsmath}
+# If left blank no extra packages will be included.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
+# generated LaTeX document. The header should contain everything until the first
+# chapter. If it is left blank doxygen will generate a standard header. See
+# section "Doxygen usage" for information on how to let doxygen write the
+# default header to a separate file.
+#
+# Note: Only use a user-defined header if you know what you are doing! The
+# following commands have a special meaning inside the header: $title,
+# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
+# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
+# string, for the replacement values of the other commands the user is referred
+# to HTML_HEADER.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HEADER           =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
+# generated LaTeX document. The footer should contain everything after the last
+# chapter. If it is left blank doxygen will generate a standard footer. See
+# LATEX_HEADER for more information on how to generate a default footer and what
+# special commands can be used inside the footer.
+#
+# Note: Only use a user-defined footer if you know what you are doing!
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_FOOTER           =
+
+# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# LaTeX style sheets that are included after the standard style sheets created
+# by doxygen. Using this option one can overrule certain style aspects. Doxygen
+# will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_STYLESHEET =
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the LATEX_OUTPUT output
+# directory. Note that the files will be copied as-is; there are no commands or
+# markers available.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_FILES      =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
+# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
+# contain links (just like the HTML output) instead of page references. This
+# makes the output suitable for online browsing using a PDF viewer.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
+# the PDF file directly from the LaTeX files. Set this option to YES, to get a
+# higher quality PDF documentation.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
+# command to the generated LaTeX files. This will instruct LaTeX to keep running
+# if errors occur, instead of asking the user for help. This option is also used
+# when generating formulas in HTML.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BATCHMODE        = NO
+
+# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
+# index chapters (such as File Index, Compound Index, etc.) in the output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HIDE_INDICES     = NO
+
+# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
+# code with syntax highlighting in the LaTeX output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_SOURCE_CODE      = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. See
+# https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# The default value is: plain.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BIB_STYLE        = plain
+
+# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_TIMESTAMP        = NO
+
+# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
+# path from which the emoji images will be read. If a relative path is entered,
+# it will be relative to the LATEX_OUTPUT directory. If left blank the
+# LATEX_OUTPUT directory will be used.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EMOJI_DIRECTORY  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The
+# RTF output is optimized for Word 97 and may not look too pretty with other RTF
+# readers/editors.
+# The default value is: NO.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: rtf.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
+# contain hyperlink fields. The RTF file will contain links (just like the HTML
+# output) instead of page references. This makes the output suitable for online
+# browsing using Word or some other Word compatible readers that support those
+# fields.
+#
+# Note: WordPad (write) and others do not support links.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# configuration file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+#
+# See also section "Doxygen usage" for information on how to generate the
+# default style sheet that doxygen normally uses.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an RTF document. Syntax is
+# similar to doxygen's configuration file. A template extensions file can be
+# generated using doxygen -e rtf extensionFile.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_EXTENSIONS_FILE    =
+
+# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
+# with syntax highlighting in the RTF output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_SOURCE_CODE        = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for
+# classes and files.
+# The default value is: NO.
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it. A directory man3 will be created inside the directory specified by
+# MAN_OUTPUT.
+# The default directory is: man.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to the generated
+# man pages. In case the manual section does not start with a number, the number
+# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
+# optional.
+# The default value is: .3.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_EXTENSION          = .3
+
+# The MAN_SUBDIR tag determines the name of the directory created within
+# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by
+# MAN_EXTENSION with the initial . removed.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_SUBDIR             =
+
+# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
+# will generate one additional man file for each entity documented in the real
+# man page(s). These additional files only source the real man page, but without
+# them the man command would be unable to find the correct page.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
+# captures the structure of the code including all documentation.
+# The default value is: NO.
+
+GENERATE_XML           = YES
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: xml.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_OUTPUT             = xml
+
+# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program
+# listings (including syntax highlighting and cross-referencing information) to
+# the XML output. Note that enabling this will significantly increase the size
+# of the XML output.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_PROGRAMLISTING     = YES
+
+# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include
+# namespace members in file scope as well, matching the HTML output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_NS_MEMB_FILE_SCOPE = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files
+# that can be used to generate PDF.
+# The default value is: NO.
+
+GENERATE_DOCBOOK       = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it.
+# The default directory is: docbook.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_OUTPUT         = docbook
+
+# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
+# program listings (including syntax highlighting and cross-referencing
+# information) to the DOCBOOK output. Note that enabling this will significantly
+# increase the size of the DOCBOOK output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_PROGRAMLISTING = NO
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
+# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
+# the structure of the code including all documentation. Note that this feature
+# is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module
+# file that captures the structure of the code including all documentation.
+#
+# Note that this feature is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary
+# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
+# output from the Perl module output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely
+# formatted so it can be parsed by a human reader. This is useful if you want to
+# understand what is going on. On the other hand, if this tag is set to NO, the
+# size of the Perl module output will be much smaller and Perl will parse it
+# just the same.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file are
+# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
+# so different doxyrules.make files included by the same Makefile don't
+# overwrite each other's variables.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
+# in the source code. If set to NO, only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION        = YES
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF     = YES
+
+# If the SEARCH_INCLUDES tag is set to YES, the include files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED             = __DOXYGEN__ \
+                        "__attribute__()="
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all references to function-like macros that are alone on a line, have
+# an all uppercase name, and do not end with a semicolon. Such function macros
+# are typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tag files. For each tag
+# file the location of the external documentation should be added. The format of
+# a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where loc1 and loc2 can be relative or absolute paths or URLs. See the
+# section "Linking to external documentation" for more information about the use
+# of tag files.
+# Note: Each tag file must have a unique name (where the name does NOT include
+# the path). If a tag file is not located in the directory in which doxygen is
+# run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
+# tag file that is based on the input files it reads. See section "Linking to
+# external documentation" for more information about the usage of tag files.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
+# the class index. If set to NO, only the inherited external classes will be
+# listed.
+# The default value is: NO.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will be
+# listed.
+# The default value is: YES.
+
+EXTERNAL_GROUPS        = YES
+
+# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in
+# the related pages index. If set to NO, only the current project's pages will
+# be listed.
+# The default value is: YES.
+
+EXTERNAL_PAGES         = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
+# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
+# NO turns the diagrams off. Note that this option also works with HAVE_DOT
+# disabled, but it is recommended to install and use dot, since it yields more
+# powerful graphs.
+# The default value is: YES.
+
+CLASS_DIAGRAMS         = YES
+
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
+
+DIA_PATH               =
+
+# If set to YES the inheritance and collaboration graphs will hide inheritance
+# and usage relations if the target is undocumented or is not a class.
+# The default value is: YES.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz (see:
+# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# Bell Labs. The other options in this section have no effect if this option is
+# set to NO
+# The default value is: YES.
+
+HAVE_DOT               = YES
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
+# to run in parallel. When set to 0 doxygen will base this on the number of
+# processors available in the system. You can set it explicitly to a value
+# larger than 0 to get control over the balance between CPU load and processing
+# speed.
+# Minimum value: 0, maximum value: 32, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NUM_THREADS        = 0
+
+# When you want a differently looking font in the dot files that doxygen
+# generates you can specify the font name using DOT_FONTNAME. You need to make
+# sure dot is able to find the font, which can be done by putting it in a
+# standard location or by setting the DOTFONTPATH environment variable or by
+# setting DOT_FONTPATH to the directory containing the font.
+# The default value is: Helvetica.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTNAME           = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
+# dot graphs.
+# Minimum value: 4, maximum value: 24, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the default font as specified with
+# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
+# the path where dot can find it using this tag.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
+# each documented class showing the direct and indirect inheritance relations.
+# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
+# graph for each documented class showing the direct and indirect implementation
+# dependencies (inheritance, containment, and class references variables) of the
+# class with other documented classes.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
+# groups, showing the direct groups dependencies.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LOOK               = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
+# class node. If there are many fields or methods and many nodes the graph may
+# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
+# number of items for each type to make the size more manageable. Set this to 0
+# for no limit. Note that the threshold may be exceeded by 50% before the limit
+# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
+# but if the number exceeds 15, the total amount of fields shown is limited to
+# 10.
+# Minimum value: 0, maximum value: 100, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LIMIT_NUM_FIELDS   = 10
+
+# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
+# collaboration graphs will show the relations between templates and their
+# instances.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
+# YES then doxygen will generate a graph for each documented file showing the
+# direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDE_GRAPH          = YES
+
+# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
+# set to YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command. Disabling a call graph can be
+# accomplished by means of the command \hidecallgraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command. Disabling a caller graph can be
+# accomplished by means of the command \hidecallergraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
+# hierarchy of all classes instead of a textual one.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
+# dependencies a directory has on other directories in a graphical way. The
+# dependency relations are determined by the #include relations between the
+# files in the directories.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. For an explanation of the image formats see the section
+# output formats in the documentation of the dot tool (Graphviz (see:
+# http://www.graphviz.org/)).
+# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
+# to make the SVG files visible in IE 9+ (other browsers do not have this
+# requirement).
+# Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd,
+# png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo,
+# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo,
+# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
+# png:gdiplus:gdiplus.
+# The default value is: png.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_IMAGE_FORMAT       = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+#
+# Note that this requires a modern browser other than Internet Explorer. Tested
+# and working are Firefox, Chrome, Safari, and Opera.
+# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
+# the SVG files visible. Older versions of IE do not have SVG support.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INTERACTIVE_SVG        = NO
+
+# The DOT_PATH tag can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the \dotfile
+# command).
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOTFILE_DIRS           =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           =
+
+# The DIAFILE_DIRS tag can be used to specify one or more directories that
+# contain dia files that are included in the documentation (see the \diafile
+# command).
+
+DIAFILE_DIRS           =
+
+# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
+# path where java can find the plantuml.jar file. If left blank, it is assumed
+# PlantUML is not used or called during a preprocessing step. Doxygen will
+# generate a warning when it encounters a \startuml command in this case and
+# will not generate output for the diagram.
+
+PLANTUML_JAR_PATH      =
+
+# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a
+# configuration file for plantuml.
+
+PLANTUML_CFG_FILE      =
+
+# When using plantuml, the specified paths are searched for files specified by
+# the !include statement in a plantuml block.
+
+PLANTUML_INCLUDE_PATH  =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
+# that will be shown in the graph. If the number of nodes in a graph becomes
+# larger than this value, doxygen will truncate the graph, which is visualized
+# by representing a node as a red box. Note that doxygen if the number of direct
+# children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
+# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+# Minimum value: 0, maximum value: 10000, default value: 50.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
+# generated by dot. A depth value of 3 means that only nodes reachable from the
+# root by following a path via at most 3 edges will be shown. Nodes that lay
+# further from the root node will be omitted. Note that setting this option to 1
+# or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+# Minimum value: 0, maximum value: 1000, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not seem
+# to support this out of the box.
+#
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10) support
+# this, this feature is disabled by default.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
+# explaining the meaning of the various boxes and arrows in the dot generated
+# graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# files that are used to generate the various graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_CLEANUP            = YES
diff --git a/edge-impulse-sdk/LICENSE-apache-2.0.txt b/edge-impulse-sdk/LICENSE-apache-2.0.txt
index 59cd3f8..0cdd12c 100644
--- a/edge-impulse-sdk/LICENSE-apache-2.0.txt
+++ b/edge-impulse-sdk/LICENSE-apache-2.0.txt
@@ -162,4 +162,4 @@ in accepting such obligations, You may act only on Your own behalf and on Your
 sole responsibility, not on behalf of any other Contributor, and only if You
 agree to indemnify, defend, and hold each Contributor harmless for any liability
 incurred by, or claims asserted against, such Contributor by reason of your
-accepting any such warranty or additional liability.
+accepting any such warranty or additional liability.
\ No newline at end of file
diff --git a/edge-impulse-sdk/classifier/ei_classifier_config.h b/edge-impulse-sdk/classifier/ei_classifier_config.h
index 69d9ec8..8865a85 100644
--- a/edge-impulse-sdk/classifier/ei_classifier_config.h
+++ b/edge-impulse-sdk/classifier/ei_classifier_config.h
@@ -75,8 +75,15 @@
 
 #ifndef EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN
     #if defined(ESP32)
+        #include "sdkconfig.h"
         #define EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN      1
+        #define ESP_NN                                  1
     #endif // ESP32 check
+    #if defined(CONFIG_IDF_TARGET_ESP32S3)
+        #define EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3      1
+    #endif // ESP32S3 check
+#else
+    #define ESP_NN                                  1
 #endif
 
 // no include checks in the compiler? then just include metadata and then ops_define (optional if on EON model)
diff --git a/edge-impulse-sdk/classifier/ei_classifier_smooth.h b/edge-impulse-sdk/classifier/ei_classifier_smooth.h
index 31be582..4f7e039 100644
--- a/edge-impulse-sdk/classifier/ei_classifier_smooth.h
+++ b/edge-impulse-sdk/classifier/ei_classifier_smooth.h
@@ -79,7 +79,7 @@ const char* ei_classifier_smooth_update(ei_classifier_smooth_t *smooth, ei_impul
             reading = (int)ix;
         }
     }
-#if EI_CLASSIFIER_HAS_ANOMALY == 1
+#if EI_CLASSIFIER_HAS_ANOMALY
     if (result->anomaly >= smooth->anomaly_confidence) {
         reading = -2; // anomaly
     }
diff --git a/edge-impulse-sdk/classifier/ei_classifier_types.h b/edge-impulse-sdk/classifier/ei_classifier_types.h
index c1807a9..b9404ea 100644
--- a/edge-impulse-sdk/classifier/ei_classifier_types.h
+++ b/edge-impulse-sdk/classifier/ei_classifier_types.h
@@ -26,40 +26,264 @@
 #define EI_CLASSIFIER_MAX_OBJECT_DETECTION_COUNT 10
 #endif
 
-#ifndef EI_CLASSIFIER_MAX_LABELS_COUNT
-#define EI_CLASSIFIER_MAX_LABELS_COUNT 25
-#endif
+/**
+ * @defgroup ei_structs Structs
+ * 
+ * Public-facing structs for Edge Impulse C++ SDK.
+ * 
+ * @addtogroup ei_structs
+ * @{
+ */
 
+/**
+ * @brief Holds the output of inference, anomaly results, and timing information.
+ * 
+ * `ei_impulse_result_t` holds the output of `run_classifier()`. If object detection is
+ * enabled, then the output results is a
+ * pointer to an array of bounding boxes of size `bounding_boxes_count`, as given by
+ * [ei_impulse_result_bounding_box_t](https://docs.edgeimpulse.com/reference/ei_impulse_result_bounding_box_t).
+ * Otherwise, results are stored as an array of classification scores, as given by
+ * [ei_impulse_result_classification_t](https://docs.edgeimpulse.com/reference/ei_impulse_result_classification_t).
+ * 
+ * If anomaly detection is enabled (e.g. `EI_CLASSIFIER_HAS_ANOMALY == 1`), then the
+ * anomaly score will be stored as a floating point value in `anomaly`.
+ * 
+ * Timing information is stored in an 
+ * [ei_impulse_result_timing_t](https://docs.edgeimpulse.com/reference/ei_impulse_result_timing_t)
+ * struct.
+ * 
+ * **Source**: [classifier/ei_classifier_types.h](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/classifier/ei_classifier_types.h)
+ * 
+ * **Example**: [standalone inferencing main.cpp](https://github.com/edgeimpulse/example-standalone-inferencing/blob/master/source/main.cpp)
+ */
 typedef struct {
+    /**
+     * Label of the detected object
+     */
     const char *label;
+
+    /**
+     * Value of the detected object
+     */
     float value;
 } ei_impulse_result_classification_t;
 
+/**
+ * @brief Holds the output of visual anomaly detection (FOMO-AD)
+ * 
+ * If visual anomaly detection is enabled (e.g. `EI_CLASSIFIER_HAS_VISUAL_ANOMALY ==
+ * 1`), then the output results will be a pointer to an array of grid cells of size 
+ * `visual_ad_count`, as given by 
+ * [ei_impulse_result_bounding_box_t](https://docs.edgeimpulse.com/reference/ei_impulse_result_bounding_box_t).
+ * 
+ * The visual anomaly detection result is stored in `visual_ad_result`, which contains the mean and max values of the grid cells.
+ * 
+ * **Source**: [classifier/ei_classifier_types.h](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/classifier/ei_classifier_types.h)
+ * 
+ * **Example**: [standalone inferencing main.cpp](https://github.com/edgeimpulse/example-standalone-inferencing/blob/master/source/main.cpp)
+*/
 typedef struct {
+    /**
+     * Mean value of the grid cells
+     */
+    float mean_value;
+
+    /**
+     * Max value of the grid cells
+     */
+    float max_value;
+} ei_impulse_visual_ad_result_t;
+
+/**
+ * @brief Holds information for a single bounding box.
+ * 
+ * If object detection is enabled (i.e. `EI_CLASSIFIER_OBJECT_DETECTION == 1`), then
+ * inference results will be one or more bounding boxes. The bounding boxes with the
+ * highest confidence scores (assuming those scores are equal to or greater than
+ * `EI_CLASSIFIER_OBJECT_DETECTION_THRESHOLD`), given by the `value` member, are
+ * returned from inference. The total number of bounding boxes returned will be at
+ * least `EI_CLASSIFIER_OBJECT_DETECTION_COUNT`. The exact number of bounding boxes
+ * is stored in `bounding_boxes_count` field of [ei_impulse_result_t]/C++ Inference
+ * SDK Library/structs/ei_impulse_result_t.md).
+ * 
+ * A bounding box is a rectangle that ideally surrounds the identified object. The 
+ * (`x`, `y`) coordinates in the struct identify the top-left corner of the box.
+ * `label` is the predicted class with the highest confidence score. `value` is the
+ * confidence score between [0.0..1.0] of the given `label`.
+ * 
+ * **Source**: [classifier/ei_classifier_types.h](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/classifier/ei_classifier_types.h)
+ * 
+ * **Example**: [standalone inferencing main.cpp](https://github.com/edgeimpulse/example-standalone-inferencing/blob/master/source/main.cpp)
+*/
+typedef struct {
+    /**
+     * Pointer to a character array describing the associated class of the given 
+     * bounding box. Taken from one of the elements of 
+     * `ei_classifier_inferencing_categories[]`.
+     */
     const char *label;
+
+    /**
+     * x coordinate of the top-left corner of the bounding box
+     */
     uint32_t x;
+
+    /**
+     * y coordinate of the top-left corner of the bounding box
+     */
     uint32_t y;
+
+    /**
+     * Width of the bounding box
+     */
     uint32_t width;
+
+    /**
+     * Height of the bounding box
+     */
     uint32_t height;
+
+    /**
+     * Confidence score of the label describing the bounding box
+     */
     float value;
 } ei_impulse_result_bounding_box_t;
 
+/**
+ * @brief Holds timing information about the processing (DSP) and inference blocks.
+ * 
+ * Records timing information during the execution of the preprocessing (DSP) and
+ * inference blocks. Can be used to determine if inference will meet timing requirements
+ * on your particular platform.
+ * 
+ * **Source**: [classifier/ei_classifier_types.h](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/classifier/ei_classifier_types.h)
+ * 
+ * **Example**: [standalone inferencing main.cpp](https://github.com/edgeimpulse/example-standalone-inferencing/blob/master/source/main.cpp)
+ */
 typedef struct {
+    /**
+     * If using `run_impulse()` to perform sampling and inference, it is the amount of
+     * time (in milliseconds) it took to fetch raw samples. Not used for 
+     * `run_classifier()`.
+     */
     int sampling;
+
+    /**
+     * Amount of time (in milliseconds) it took to run the preprocessing (DSP) block
+     */
     int dsp;
+
+    /**
+     * Amount of time (in milliseconds) it took to run the inference block
+     */
     int classification;
+
+    /**
+     * Amount of time (in milliseconds) it took to run anomaly detection. Valid only if
+     * `EI_CLASSIFIER_HAS_ANOMALY == 1`.
+     */
     int anomaly;
+
+    /**
+     * Amount of time (in milliseconds) it took to run the post-processing block
+     */
     int64_t dsp_us;
+
+    /**
+     * Amount of time (in milliseconds) it took to run the inference block
+     */
     int64_t classification_us;
+
+    /**
+     * Amount of time (in microseconds) it took to run anomaly detection. Valid only if
+     * `EI_CLASSIFIER_HAS_ANOMALY == 1`.
+     */
     int64_t anomaly_us;
 } ei_impulse_result_timing_t;
 
+/**
+ * @brief Holds the output of inference, anomaly results, and timing information.
+ * 
+ * `ei_impulse_result_t` holds the output of `run_classifier()`. If object detection is
+ * enabled (e.g. `EI_CLASSIFIER_OBJECT_DETECTION == 1`), then the output results is a
+ * pointer to an array of bounding boxes of size `bounding_boxes_count`, as given by
+ * [ei_impulse_result_bounding_box_t](https://docs.edgeimpulse.com/reference/ei_impulse_result_bounding_box_t). 
+ * Otherwise, results are stored as an array of classification scores, as given by
+ * [ei_impulse_result_classification_t](https://docs.edgeimpulse.com/reference/ei_impulse_result_classification_t).
+ * 
+ * If anomaly detection is enabled (e.g. `EI_CLASSIFIER_HAS_ANOMALY == 1`), then the
+ * anomaly score will be stored as a floating point value in `anomaly`.
+ * 
+ * Timing information is stored in an 
+ * [ei_impulse_result_timing_t](https://docs.edgeimpulse.com/reference/ei_impulse_result_timing_t) 
+ * struct.
+ * 
+ * **Source**: [classifier/ei_classifier_types.h](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/classifier/ei_classifier_types.h)
+ * 
+ * **Example**: [standalone inferencing main.cpp](https://github.com/edgeimpulse/example-standalone-inferencing/blob/master/source/main.cpp)
+ */
 typedef struct {
+    /**
+     * Array of bounding boxes of the detected objects, if object detection is enabled.
+     */
     ei_impulse_result_bounding_box_t *bounding_boxes;
+
+    /**
+     * Number of bounding boxes detected. If object detection is not enabled, this will
+     * be 0.
+     */
     uint32_t bounding_boxes_count;
-    ei_impulse_result_classification_t classification[EI_CLASSIFIER_MAX_LABELS_COUNT];
+
+    /**
+     * Array of classification results. If object detection is enabled, this will be
+     * empty.
+     */
+#if EI_CLASSIFIER_LABEL_COUNT == 0
+    // EI_CLASSIFIER_LABEL_COUNT can be 0 for anomaly only models
+    // to prevent compiler warnings/errors, we need to have at least one element
+    ei_impulse_result_classification_t classification[1];
+#else
+    ei_impulse_result_classification_t classification[EI_CLASSIFIER_LABEL_COUNT];
+#endif
+
+    /**
+     * Anomaly score. If anomaly detection is not enabled, this will be 0. A higher
+     * anomaly score indicates greater likelihood of an anomalous sample (e.g. it is
+     * farther away from its cluster).
+     */
     float anomaly;
+
+    /**
+     * Timing information for the processing (DSP) and inference blocks.
+     */
     ei_impulse_result_timing_t timing;
+
+    /**
+     * Copy the output data to a buffer. If set to false, the output data will be
+     * returned as a pointer to the internal buffer. If set to true, the output data
+     * will be copied to the buffer provided in `ei_impulse_output_t`.
+     */
+    bool copy_output;
+#if EI_CLASSIFIER_HAS_VISUAL_ANOMALY || __DOXYGEN__
+    /**
+     * Array of grid cells of the detected visual anomalies, if visual anomaly detection
+     * is enabled.
+     */
+    ei_impulse_result_bounding_box_t *visual_ad_grid_cells;
+
+    /**
+     * Number of grid cells detected as visual anomalies, if visual anomaly detection is
+     * enabled.
+     */
+    uint32_t visual_ad_count;
+
+    /**
+     * Visual anomaly detection result, if visual anomaly detection is enabled.
+     */
+    ei_impulse_visual_ad_result_t visual_ad_result;
+#endif // EI_CLASSIFIER_HAS_VISUAL_ANOMALY
 } ei_impulse_result_t;
 
+/** @} */
+
 #endif // _EDGE_IMPULSE_RUN_CLASSIFIER_TYPES_H_
diff --git a/edge-impulse-sdk/classifier/ei_fill_result_struct.h b/edge-impulse-sdk/classifier/ei_fill_result_struct.h
index c5bf6af..e16a589 100644
--- a/edge-impulse-sdk/classifier/ei_fill_result_struct.h
+++ b/edge-impulse-sdk/classifier/ei_fill_result_struct.h
@@ -27,6 +27,7 @@ using namespace ei;
 #include "edge-impulse-sdk/classifier/ei_model_types.h"
 #include "edge-impulse-sdk/classifier/ei_classifier_types.h"
 #include "edge-impulse-sdk/classifier/ei_nms.h"
+#include "edge-impulse-sdk/dsp/ei_vector.h"
 
 #ifndef EI_HAS_OBJECT_DETECTION
     #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_SSD)
@@ -44,8 +45,27 @@ using namespace ei;
     #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV7)
     #define EI_HAS_YOLOV7 1
     #endif
+    #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_TAO_RETINANET) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_TAO_SSD)
+    #define EI_HAS_TAO_DECODE_DETECTIONS 1
+    #endif
+    #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV3) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV4)
+    #define EI_HAS_TAO_YOLO 1
+    #endif
+    #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV3)
+    #define EI_HAS_TAO_YOLOV3 1
+    #endif
+    #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV4)
+    #define EI_HAS_TAO_YOLOV4 1
+    #endif
+    #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV2)
+    #define EI_HAS_YOLOV2 1
+    #endif
 #endif
 
+__attribute__((unused)) inline float sigmoid(float a) {
+    return 1.0f / (1.0f + exp(-a));
+}
+
 #ifdef EI_HAS_FOMO
 typedef struct cube {
     size_t x;
@@ -184,6 +204,7 @@ __attribute__((unused)) static void fill_result_struct_from_cubes(ei_impulse_res
 #endif
 
 __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_fomo(const ei_impulse_t *impulse,
+                                                                            const ei_learning_block_config_tflite_graph_t *block_config,
                                                                             ei_impulse_result_t *result,
                                                                             float *data,
                                                                             int out_width,
@@ -201,7 +222,7 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_fomo(cons
             for (size_t ix = 1; ix < impulse->label_count + 1; ix++) {
                 float vf = data[loc+ix];
 
-                ei_handle_cube(&cubes, x, y, vf, impulse->categories[ix - 1], impulse->object_detection_threshold);
+                ei_handle_cube(&cubes, x, y, vf, impulse->categories[ix - 1], block_config->threshold);
             }
         }
     }
@@ -215,6 +236,7 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_fomo(cons
 }
 
 __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_i8_fomo(const ei_impulse_t *impulse,
+                                                                           const ei_learning_block_config_tflite_graph_t *block_config,
                                                                            ei_impulse_result_t *result,
                                                                            int8_t *data,
                                                                            float zero_point,
@@ -235,7 +257,7 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_i8_fomo(const
                 int8_t v = data[loc+ix];
                 float vf = static_cast<float>(v - zero_point) * scale;
 
-                ei_handle_cube(&cubes, x, y, vf, impulse->categories[ix - 1], impulse->object_detection_threshold);
+                ei_handle_cube(&cubes, x, y, vf, impulse->categories[ix - 1], block_config->threshold);
             }
         }
     }
@@ -253,6 +275,7 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_i8_fomo(const
  * (we don't support quantized here a.t.m.)
  */
 __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_object_detection(const ei_impulse_t *impulse,
+                                                                                        const ei_learning_block_config_tflite_graph_t *block_config,
                                                                                         ei_impulse_result_t *result,
                                                                                         float *data,
                                                                                         float *scores,
@@ -267,7 +290,7 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_object_de
         float score = scores[ix];
         float label = labels[ix];
 
-        if (score >= impulse->object_detection_threshold) {
+        if (score >= block_config->threshold) {
             float ystart = data[(ix * 4) + 0];
             float xstart = data[(ix * 4) + 1];
             float yend = data[(ix * 4) + 2];
@@ -366,14 +389,80 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32(const ei_
     return EI_IMPULSE_OK;
 }
 
+/**
+ * Fill the visual anomaly result structures from an unquantized output tensor
+ */
+__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_visual_ad_struct_f32(const ei_impulse_t *impulse,
+                                                                       ei_impulse_result_t *result,
+                                                                       float *data,
+                                                                       float threshold,
+                                                                       bool debug) {
+#if EI_CLASSIFIER_HAS_VISUAL_ANOMALY
+    float max_val = 0;
+    float sum_val = 0;
+    // the feature extractor output will be 1/8 of input
+    // due to the cut-off layer chosen in MobileNetV2
+    uint32_t grid_size_x = (impulse->input_width / 8) / 2 - 1;
+    uint32_t grid_size_y = (impulse->input_height / 8) / 2 - 1;
+
+    for (uint32_t ix = 0; ix < grid_size_x * grid_size_y; ix++) {
+        float value = data[ix];
+        sum_val += value;
+        if (value > max_val) {
+            max_val = value;
+        }
+    }
+
+    result->visual_ad_result.mean_value = sum_val / (grid_size_x * grid_size_y);
+    result->visual_ad_result.max_value = max_val;
+
+    static ei_vector<ei_impulse_result_bounding_box_t> results;
+
+    int added_boxes_count = 0;
+    results.clear();
+
+    for (uint32_t x = 0; x <= grid_size_x - 1; x++) {
+        for (uint32_t y = 0; y <= grid_size_y - 1; y++) {
+            if (data[x * grid_size_x + y] >= threshold) {
+                ei_impulse_result_bounding_box_t tmp = {
+                    .label = "anomaly",
+                    .x = static_cast<uint32_t>(y * (static_cast<float>(impulse->input_height) / grid_size_y)),
+                    .y = static_cast<uint32_t>(x * (static_cast<float>(impulse->input_width) / grid_size_x)),
+                    .width = (impulse->input_width / grid_size_x),
+                    .height = (impulse->input_height / grid_size_y),
+                    .value = data[x * grid_size_x + y]
+                };
+
+                results.push_back(tmp);
+                added_boxes_count++;
+            }
+        }
+    }
+
+    // if we didn't detect min required objects, fill the rest with fixed value
+    if (added_boxes_count < impulse->object_detection_count) {
+        results.resize(impulse->object_detection_count);
+        for (size_t ix = added_boxes_count; ix < impulse->object_detection_count; ix++) {
+            results[ix].value = 0.0f;
+        }
+    }
+
+    result->visual_ad_grid_cells = results.data();
+    result->visual_ad_count = results.size();
+#endif // EI_CLASSIFIER_HAS_VISUAL_ANOMALY
+    return EI_IMPULSE_OK;
+}
+
 /**
   * Fill the result structure from an unquantized output tensor
   */
 __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolov5(const ei_impulse_t *impulse,
+                                                                              const ei_learning_block_config_tflite_graph_t *block_config,
                                                                               ei_impulse_result_t *result,
                                                                               int version,
                                                                               float *data,
-                                                                              size_t output_features_count) {
+                                                                              size_t output_features_count,
+                                                                              bool debug = false) {
 #ifdef EI_HAS_YOLOV5
     static std::vector<ei_impulse_result_bounding_box_t> results;
     results.clear();
@@ -417,7 +506,7 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolov5(co
             }
         }
 
-        if (score >= impulse->object_detection_threshold && score <= 1.0f) {
+        if (score >= block_config->threshold && score <= 1.0f) {
             ei_impulse_result_bounding_box_t r;
             r.label = impulse->categories[label];
 
@@ -437,7 +526,7 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolov5(co
         }
     }
 
-    EI_IMPULSE_ERROR nms_res = ei_run_nms(&results);
+    EI_IMPULSE_ERROR nms_res = ei_run_nms(impulse, &results, debug);
     if (nms_res != EI_IMPULSE_OK) {
         return nms_res;
     }
@@ -466,12 +555,14 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolov5(co
 */
 template<typename T>
 __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_quantized_yolov5(const ei_impulse_t *impulse,
+                                                                                    const ei_learning_block_config_tflite_graph_t *block_config,
                                                                                     ei_impulse_result_t *result,
                                                                                     int version,
                                                                                     T *data,
                                                                                     float zero_point,
                                                                                     float scale,
-                                                                                    size_t output_features_count) {
+                                                                                    size_t output_features_count,
+                                                                                    bool debug = false) {
 #ifdef EI_HAS_YOLOV5
     static std::vector<ei_impulse_result_bounding_box_t> results;
     results.clear();
@@ -515,7 +606,7 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_quantized_yol
             }
         }
 
-        if (score >= impulse->object_detection_threshold && score <= 1.0f) {
+        if (score >= block_config->threshold && score <= 1.0f) {
             ei_impulse_result_bounding_box_t r;
             r.label = ei_classifier_inferencing_categories[label];
 
@@ -535,7 +626,7 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_quantized_yol
         }
     }
 
-    EI_IMPULSE_ERROR nms_res = ei_run_nms(&results);
+    EI_IMPULSE_ERROR nms_res = ei_run_nms(impulse, &results, debug);
     if (nms_res != EI_IMPULSE_OK) {
         return nms_res;
     }
@@ -563,9 +654,12 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_quantized_yol
   * Fill the result structure from an unquantized output tensor
   * (we don't support quantized here a.t.m.)
   */
-__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolox(const ei_impulse_t *impulse, ei_impulse_result_t *result,
+__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolox(const ei_impulse_t *impulse,
+                                                                             const ei_learning_block_config_tflite_graph_t *block_config,
+                                                                             ei_impulse_result_t *result,
                                                                              float *data,
-                                                                             size_t output_features_count) {
+                                                                             size_t output_features_count,
+                                                                             bool debug = false) {
 #ifdef EI_HAS_YOLOX
     static std::vector<ei_impulse_result_bounding_box_t> results;
     results.clear();
@@ -700,7 +794,7 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolox(con
         for (int col = 0; col < (int)scores.cols; col++) {
             float confidence = scores.buffer[(row * scores.cols) + col];
 
-            if (confidence >= impulse->object_detection_threshold && confidence <= 1.0f) {
+            if (confidence >= block_config->threshold && confidence <= 1.0f) {
                 ei_impulse_result_bounding_box_t r;
                 r.label = impulse->categories[col];
                 r.value = confidence;
@@ -737,7 +831,7 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolox(con
         }
     }
 
-    EI_IMPULSE_ERROR nms_res = ei_run_nms(&results);
+    EI_IMPULSE_ERROR nms_res = ei_run_nms(impulse, &results, debug);
     if (nms_res != EI_IMPULSE_OK) {
         return nms_res;
     }
@@ -765,7 +859,9 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolox(con
   * Fill the result structure from an unquantized output tensor
   * (we don't support quantized here a.t.m.)
   */
-__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolox_detect(const ei_impulse_t *impulse, ei_impulse_result_t *result,
+__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolox_detect(const ei_impulse_t *impulse,
+                                                                             const ei_learning_block_config_tflite_graph_t *block_config,
+                                                                             ei_impulse_result_t *result,
                                                                              float *data,
                                                                              size_t output_features_count) {
 #ifdef EI_HAS_YOLOX
@@ -781,7 +877,7 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolox_det
         float confidence = outputs.buffer[(row * outputs.cols) + 4];
         int class_idx = (int)outputs.buffer[(row * outputs.cols) + 5];
 
-        if (confidence >= impulse->object_detection_threshold && confidence <= 1.0f) {
+        if (confidence >= block_config->threshold && confidence <= 1.0f) {
             ei_impulse_result_bounding_box_t r;
             r.label = ei_classifier_inferencing_categories[class_idx];
             r.value = confidence;
@@ -833,7 +929,9 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolox_det
   * Fill the result structure from an unquantized output tensor
   * (we don't support quantized here a.t.m.)
   */
-__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolov7(const ei_impulse_t *impulse, ei_impulse_result_t *result,
+__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolov7(const ei_impulse_t *impulse,
+                                                                              const ei_learning_block_config_tflite_graph_t *block_config,
+                                                                              ei_impulse_result_t *result,
                                                                               float *data,
                                                                               size_t output_features_count) {
 #ifdef EI_HAS_YOLOV7
@@ -854,7 +952,7 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolov7(co
         uint32_t label = (uint32_t)data[base_ix + 5];
         float score = data[base_ix + 6];
 
-        if (score >= impulse->object_detection_threshold && score <= 1.0f) {
+        if (score >= block_config->threshold && score <= 1.0f) {
             ei_impulse_result_bounding_box_t r;
             r.label = ei_classifier_inferencing_categories[label];
 
@@ -886,4 +984,749 @@ __attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolov7(co
 #endif // #ifdef EI_HAS_YOLOV7
 }
 
+#if (EI_HAS_TAO_DECODE_DETECTIONS == 1) || (EI_HAS_TAO_YOLO == 1)
+
+__attribute__((unused)) static void prepare_tao_results_common(const ei_impulse_t *impulse,
+                                                               ei_impulse_result_t *result,
+                                                               std::vector<ei_impulse_result_bounding_box_t> *results) {
+    #define EI_CLASSIFIER_OBJECT_DETECTION_KEEP_TOPK 200
+
+    // if we didn't detect min required objects, fill the rest with fixed value
+    size_t added_boxes_count = results->size();
+    size_t object_detection_count = impulse->object_detection_count;
+    if (added_boxes_count < object_detection_count) {
+        results->resize(object_detection_count);
+        for (size_t ix = added_boxes_count; ix < object_detection_count; ix++) {
+            (*results)[ix].value = 0.0f;
+        }
+    }
+
+    // we sort in reverse order accross all classes,
+    // since results for each class are pushed to the end.
+    std::sort(results->begin(), results->end(), [ ]( const ei_impulse_result_bounding_box_t& lhs, const ei_impulse_result_bounding_box_t& rhs )
+    {
+        return lhs.value > rhs.value;
+    });
+
+    // keep topK
+    if (results->size() > EI_CLASSIFIER_OBJECT_DETECTION_KEEP_TOPK) {
+        results->erase(results->begin() + EI_CLASSIFIER_OBJECT_DETECTION_KEEP_TOPK, results->end());
+    }
+
+    result->bounding_boxes = results->data();
+    result->bounding_boxes_count = results->size();
+}
+
+
+#endif
+
+#ifdef EI_HAS_TAO_DECODE_DETECTIONS
+/**
+ * Fill the result structure from an output tensor
+*/
+template<typename T>
+__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_tao_decode_detections_common(const ei_impulse_t *impulse,
+                                                                                                ei_impulse_result_t *result,
+                                                                                                T *data,
+                                                                                                float zero_point,
+                                                                                                float scale,
+                                                                                                size_t output_features_count,
+                                                                                                float threshold,
+                                                                                                bool debug = false) {
+
+    size_t col_size = 12 + impulse->label_count + 1;
+    size_t row_count = output_features_count / col_size;
+
+    static std::vector<ei_impulse_result_bounding_box_t> results;
+    static std::vector<ei_impulse_result_bounding_box_t> class_results;
+    results.clear();
+
+    for (size_t cls_idx = 1; cls_idx < (size_t)(impulse->label_count + 1); cls_idx++)  {
+
+        std::vector<float> boxes;
+        std::vector<float> scores;
+        std::vector<int> classes;
+        class_results.clear();
+
+        for (size_t ix = 0; ix < row_count; ix++) {
+
+            float score = (static_cast<float>(data[ix * col_size + cls_idx]) - zero_point) * scale;
+
+            if ((score < threshold) || (score > 1.0f)) {
+                continue;
+            }
+
+            // # 1. calculate boxes location
+            size_t base_ix = ix * col_size + col_size; // references the end of the row
+
+            float r_12 = (static_cast<float>(data[base_ix - 12]) - zero_point) * scale;
+            float r_11 = (static_cast<float>(data[base_ix - 11]) - zero_point) * scale;
+            float r_10 = (static_cast<float>(data[base_ix - 10]) - zero_point) * scale;
+            float r_9  = (static_cast<float>(data[base_ix -  9]) - zero_point) * scale;
+            float r_8  = (static_cast<float>(data[base_ix -  8]) - zero_point) * scale;
+            float r_7  = (static_cast<float>(data[base_ix -  7]) - zero_point) * scale;
+            float r_6  = (static_cast<float>(data[base_ix -  6]) - zero_point) * scale;
+            float r_5  = (static_cast<float>(data[base_ix -  5]) - zero_point) * scale;
+            float r_4  = (static_cast<float>(data[base_ix -  4]) - zero_point) * scale;
+            float r_3  = (static_cast<float>(data[base_ix -  3]) - zero_point) * scale;
+            float r_2  = (static_cast<float>(data[base_ix -  2]) - zero_point) * scale;
+            float r_1  = (static_cast<float>(data[base_ix -  1]) - zero_point) * scale;
+
+            // cx_pred = y_pred[..., -12]
+            // cy_pred = y_pred[..., -11]
+            // w_pred = y_pred[..., -10]
+            // h_pred = y_pred[..., -9]
+            float cx_pred = r_12;
+            float cy_pred = r_11;
+            float w_pred  = r_10;
+            float h_pred  = r_9;
+
+            // w_anchor = y_pred[..., -6] - y_pred[..., -8]
+            // h_anchor = y_pred[..., -5] - y_pred[..., -7]
+            float w_anchor = r_6 - r_8;
+            float h_anchor = r_5 - r_7;
+
+            // cx_anchor = tf.truediv(y_pred[..., -6] + y_pred[..., -8], 2.0)
+            // cy_anchor = tf.truediv(y_pred[..., -5] + y_pred[..., -7], 2.0)
+            float cx_anchor = (r_6 + r_8) / 2.0f;
+            float cy_anchor = (r_5 + r_7) / 2.0f;
+
+            // cx_variance = y_pred[..., -4]
+            // cy_variance = y_pred[..., -3]
+            float cx_variance = r_4;
+            float cy_variance = r_3;
+
+            // variance_w = y_pred[..., -2]
+            // variance_h = y_pred[..., -1]
+            float variance_w = r_2;
+            float variance_h = r_1;
+
+            // # Convert anchor box offsets to image offsets.
+            // cx = cx_pred * cx_variance * w_anchor + cx_anchor
+            // cy = cy_pred * cy_variance * h_anchor + cy_anchor
+            // w = tf.exp(w_pred * variance_w) * w_anchor
+            // h = tf.exp(h_pred * variance_h) * h_anchor
+            float cx = cx_pred * cx_variance * w_anchor + cx_anchor;
+            float cy = cy_pred * cy_variance * h_anchor + cy_anchor;
+            float w = exp(w_pred * variance_w) * w_anchor;
+            float h = exp(h_pred * variance_h) * h_anchor;
+
+            // # Convert 'centroids' to 'corners'.
+            float xmin = cx - (w / 2.0f);
+            float ymin = cy - (h / 2.0f);
+            float xmax = cx + (w / 2.0f);
+            float ymax = cy + (h / 2.0f);
+
+            xmin *= impulse->input_width;
+            ymin *= impulse->input_height;
+            xmax *= impulse->input_width;
+            ymax *= impulse->input_height;
+
+            boxes.push_back(ymin);
+            boxes.push_back(xmin);
+            boxes.push_back(ymax);
+            boxes.push_back(xmax);
+            scores.push_back(score);
+            classes.push_back((int)(cls_idx-1));
+        }
+
+        size_t nr_boxes = scores.size();
+        EI_IMPULSE_ERROR nms_res = ei_run_nms(impulse, &class_results,
+                                              boxes.data(), scores.data(), classes.data(),
+                                              nr_boxes,
+                                              false /*clip_boxes*/,
+                                              debug);
+
+        if (nms_res != EI_IMPULSE_OK) {
+            return nms_res;
+        }
+
+        for (auto bb: class_results) {
+            results.push_back(bb);
+        }
+    }
+
+    prepare_tao_results_common(impulse, result, &results);
+
+    return EI_IMPULSE_OK;
+}
+#endif // #ifdef EI_HAS_TAO_DETECT_DETECTIONS
+
+/**
+ * Fill the result structure from a quantized output tensor
+*/
+template<typename T>
+__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_quantized_tao_decode_detections(const ei_impulse_t *impulse,
+                                                                                                   const ei_learning_block_config_tflite_graph_t *block_config,
+                                                                                                   ei_impulse_result_t *result,
+                                                                                                   T *data,
+                                                                                                   float zero_point,
+                                                                                                   float scale,
+                                                                                                   size_t output_features_count,
+                                                                                                   bool debug = false) {
+#ifdef EI_HAS_TAO_DECODE_DETECTIONS
+    return fill_result_struct_tao_decode_detections_common(impulse, result, data, zero_point, scale, output_features_count, block_config->threshold, debug);
+#else
+    return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE;
+#endif // #ifdef EI_HAS_TAO_DETECT_DETECTIONS
+}
+
+
+/**
+  * Fill the result structure from an unquantized output tensor
+  */
+__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_tao_decode_detections(const ei_impulse_t *impulse,
+                                                                                     const ei_learning_block_config_tflite_graph_t *block_config,
+                                                                                     ei_impulse_result_t *result,
+                                                                                     float *data,
+                                                                                     size_t output_features_count,
+                                                                                     bool debug = false) {
+#ifdef EI_HAS_TAO_DECODE_DETECTIONS
+    return fill_result_struct_tao_decode_detections_common(impulse, result, data, 0.0f, 1.0f, output_features_count, block_config->threshold, debug);
+#else
+    return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE;
+#endif // #ifdef EI_HAS_TAO_DETECT_DETECTIONS
+}
+
+#ifdef EI_HAS_TAO_YOLOV3
+/**
+ * Fill the result structure from an output tensor
+*/
+template<typename T>
+__attribute__((unused)) static EI_IMPULSE_ERROR  fill_result_struct_tao_yolov3_common(const ei_impulse_t *impulse,
+                                                                         ei_impulse_result_t *result,
+                                                                         T *data,
+                                                                         float zero_point,
+                                                                         float scale,
+                                                                         size_t output_features_count,
+                                                                         float threshold,
+                                                                         bool debug) {
+    // # x: 3-D tensor. Last dimension is
+    //          (cy, cx, ph, pw, step_y, step_x, pred_y, pred_x, pred_h, pred_w, object, cls...)
+    size_t col_size = 11 + impulse->label_count;
+    size_t row_count = output_features_count / col_size;
+
+    static std::vector<ei_impulse_result_bounding_box_t> results;
+    static std::vector<ei_impulse_result_bounding_box_t> class_results;
+
+    results.clear();
+    for (size_t cls_idx = 0; cls_idx < (size_t)impulse->label_count; cls_idx++)  {
+
+        std::vector<float> boxes;
+        std::vector<float> scores;
+        std::vector<int> classes;
+        class_results.clear();
+
+        for (size_t ix = 0; ix < row_count; ix++) {
+            size_t data_ix = ix * col_size;
+            float r_0  = (static_cast<float>(data[data_ix +  0]) - zero_point) * scale;
+            float r_1  = (static_cast<float>(data[data_ix +  1]) - zero_point) * scale;
+            float r_2  = (static_cast<float>(data[data_ix +  2]) - zero_point) * scale;
+            float r_3  = (static_cast<float>(data[data_ix +  3]) - zero_point) * scale;
+            float r_4  = (static_cast<float>(data[data_ix +  4]) - zero_point) * scale;
+            float r_5  = (static_cast<float>(data[data_ix +  5]) - zero_point) * scale;
+            float r_6  = (static_cast<float>(data[data_ix +  6]) - zero_point) * scale;
+            float r_7  = (static_cast<float>(data[data_ix +  7]) - zero_point) * scale;
+            float r_8  = (static_cast<float>(data[data_ix +  8]) - zero_point) * scale;
+            float r_9  = (static_cast<float>(data[data_ix +  9]) - zero_point) * scale;
+            float r_10 = (static_cast<float>(data[data_ix + 10]) - zero_point) * scale;
+
+            float cls = (static_cast<float>(data[data_ix + 11 + cls_idx]) - zero_point) * scale;
+            float score = sigmoid(cls) * sigmoid(r_10);
+
+            if ((score < threshold) || (score > 1.0f)) {
+                continue;
+            }
+
+            float by = r_0 + sigmoid(r_6) * r_4;
+            float bx = r_1 + sigmoid(r_7) * r_5;
+            float bh = r_2 * exp(r_8);
+            float bw = r_3 * exp(r_9);
+
+            float ymin = by - 0.5 * bh;
+            float xmin = bx - 0.5 * bw;
+            float ymax = by + 0.5 * bh;
+            float xmax = bx + 0.5 * bw;
+
+            // from relative to absolute
+            ymin *= impulse->input_height;
+            xmin *= impulse->input_width;
+            ymax *= impulse->input_height;
+            xmax *= impulse->input_width;
+
+            boxes.push_back(ymin);
+            boxes.push_back(xmin);
+            boxes.push_back(ymax);
+            boxes.push_back(xmax);
+            scores.push_back(score);
+            classes.push_back((int)cls_idx);
+        }
+
+        size_t nr_boxes = scores.size();
+        EI_IMPULSE_ERROR nms_res = ei_run_nms(impulse, &class_results,
+                                              boxes.data(), scores.data(), classes.data(),
+                                              nr_boxes,
+                                              true /*clip_boxes*/,
+                                              debug);
+        if (nms_res != EI_IMPULSE_OK) {
+            return nms_res;
+        }
+
+        for (auto bb: class_results) {
+            results.push_back(bb);
+        }
+    }
+
+    prepare_tao_results_common(impulse, result, &results);
+    return EI_IMPULSE_OK;
+}
+#endif // #ifdef EI_HAS_TAO_YOLOV3
+
+#ifdef EI_HAS_TAO_YOLOV4
+/**
+ * Fill the result structure from an output tensor
+*/
+template<typename T>
+__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_tao_yolov4_common(const ei_impulse_t *impulse,
+                                                                         ei_impulse_result_t *result,
+                                                                         T *data,
+                                                                         float zero_point,
+                                                                         float scale,
+                                                                         size_t output_features_count,
+                                                                         float threshold,
+                                                                         bool debug) {
+    // # x: 3-D tensor. Last dimension is
+    //          (cy, cx, ph, pw, step_y, step_x, pred_y, pred_x, pred_h, pred_w, object, cls...)
+    size_t col_size = 11 + impulse->label_count;
+    size_t row_count = output_features_count / col_size;
+
+    static std::vector<ei_impulse_result_bounding_box_t> results;
+    static std::vector<ei_impulse_result_bounding_box_t> class_results;
+    results.clear();
+
+    const float grid_scale_xy = 1.0f;
+
+    for (size_t cls_idx = 0; cls_idx < (size_t)impulse->label_count; cls_idx++)  {
+
+        std::vector<float> boxes;
+        std::vector<float> scores;
+        std::vector<int> classes;
+        class_results.clear();
+
+        for (size_t ix = 0; ix < row_count; ix++) {
+
+            float r_0  = (static_cast<float>(data[ix * col_size +  0]) - zero_point) * scale;
+            float r_1  = (static_cast<float>(data[ix * col_size +  1]) - zero_point) * scale;
+            float r_2  = (static_cast<float>(data[ix * col_size +  2]) - zero_point) * scale;
+            float r_3  = (static_cast<float>(data[ix * col_size +  3]) - zero_point) * scale;
+            float r_4  = (static_cast<float>(data[ix * col_size +  4]) - zero_point) * scale;
+            float r_5  = (static_cast<float>(data[ix * col_size +  5]) - zero_point) * scale;
+            float r_6  = (static_cast<float>(data[ix * col_size +  6]) - zero_point) * scale;
+            float r_7  = (static_cast<float>(data[ix * col_size +  7]) - zero_point) * scale;
+            float r_8  = (static_cast<float>(data[ix * col_size +  8]) - zero_point) * scale;
+            float r_9  = (static_cast<float>(data[ix * col_size +  9]) - zero_point) * scale;
+            float r_10 = (static_cast<float>(data[ix * col_size + 10]) - zero_point) * scale;
+
+            float cls = (static_cast<float>(data[ix * col_size + 11 + cls_idx]) - zero_point) * scale;
+            float score = sigmoid(cls) * sigmoid(r_10);
+
+            if ((score < threshold) || (score > 1.0f)) {
+                continue;
+            }
+
+            float pred_y = sigmoid(r_6) * grid_scale_xy - (grid_scale_xy - 1.0f) / 2.0f;
+            float pred_x = sigmoid(r_7) * grid_scale_xy - (grid_scale_xy - 1.0f) / 2.0f;
+            float pred_h = exp(std::min(r_8, 8.0f));
+            float pred_w = exp(std::min(r_9, 8.0f));
+
+            r_6 = pred_y;
+            r_7 = pred_x;
+            r_8 = pred_h;
+            r_9 = pred_w;
+
+            float by = r_0 + r_6 * r_4;
+            float bx = r_1 + r_7 * r_5;
+            float bh = r_2 * r_8;
+            float bw = r_3 * r_9;
+
+            float ymin = by - 0.5 * bh;
+            float xmin = bx - 0.5 * bw;
+            float ymax = by + 0.5 * bh;
+            float xmax = bx + 0.5 * bw;
+
+            // from relative to absolute
+            ymin *= impulse->input_height;
+            xmin *= impulse->input_width;
+            ymax *= impulse->input_height;
+            xmax *= impulse->input_width;
+
+            boxes.push_back(ymin);
+            boxes.push_back(xmin);
+            boxes.push_back(ymax);
+            boxes.push_back(xmax);
+            scores.push_back(score);
+            classes.push_back((int)cls_idx);
+        }
+
+        size_t nr_boxes = scores.size();
+        EI_IMPULSE_ERROR nms_res = ei_run_nms(impulse, &class_results,
+                                              boxes.data(), scores.data(), classes.data(),
+                                              nr_boxes,
+                                              true /*clip_boxes*/,
+                                              debug);
+        if (nms_res != EI_IMPULSE_OK) {
+            return nms_res;
+        }
+
+        for (auto bb: class_results) {
+            results.push_back(bb);
+        }
+    }
+
+    prepare_tao_results_common(impulse, result, &results);
+    return EI_IMPULSE_OK;
+}
+#endif // #ifdef EI_HAS_TAO_YOLOV4
+
+/**
+  * Fill the result structure from an unquantized output tensor
+  */
+__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_tao_yolov3(const ei_impulse_t *impulse,
+                                                                                const ei_learning_block_config_tflite_graph_t *block_config,
+                                                                                ei_impulse_result_t *result,
+                                                                                float *data,
+                                                                                size_t output_features_count,
+                                                                                bool debug = false) {
+#ifdef EI_HAS_TAO_YOLOV3
+    return fill_result_struct_tao_yolov3_common(impulse, result, data, 0.0f, 1.0f, output_features_count, block_config->threshold, debug);
+#else
+    return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE;
+#endif // #ifdef EI_HAS_TAO_YOLOV3
+}
+
+/**
+ * Fill the result structure from a quantized output tensor
+*/
+template<typename T>
+__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_quantized_tao_yolov3(const ei_impulse_t *impulse,
+                                                                                      const ei_learning_block_config_tflite_graph_t *block_config,
+                                                                                      ei_impulse_result_t *result,
+                                                                                      T *data,
+                                                                                      float zero_point,
+                                                                                      float scale,
+                                                                                      size_t output_features_count,
+                                                                                      bool debug = false) {
+#ifdef EI_HAS_TAO_YOLOV3
+    return fill_result_struct_tao_yolov3_common(impulse, result, data, zero_point, scale, output_features_count, block_config->threshold, debug);
+#else
+    return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE;
+#endif // #ifdef EI_HAS_TAO_YOLOV3
+}
+
+/**
+  * Fill the result structure from an unquantized output tensor
+  */
+__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_tao_yolov4(const ei_impulse_t *impulse,
+                                                                                const ei_learning_block_config_tflite_graph_t *block_config,
+                                                                                ei_impulse_result_t *result,
+                                                                                float *data,
+                                                                                size_t output_features_count,
+                                                                                bool debug = false) {
+#ifdef EI_HAS_TAO_YOLOV4
+    return fill_result_struct_tao_yolov4_common(impulse, result, data, 0.0f, 1.0f, output_features_count, block_config->threshold, debug);
+#else
+    return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE;
+#endif // #ifdef EI_HAS_TAO_YOLOV4
+}
+
+/**
+ * Fill the result structure from a quantized output tensor
+*/
+template<typename T>
+__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_quantized_tao_yolov4(const ei_impulse_t *impulse,
+                                                                                      const ei_learning_block_config_tflite_graph_t *block_config,
+                                                                                      ei_impulse_result_t *result,
+                                                                                      T *data,
+                                                                                      float zero_point,
+                                                                                      float scale,
+                                                                                      size_t output_features_count,
+                                                                                      bool debug = false) {
+#ifdef EI_HAS_TAO_YOLOV4
+    return fill_result_struct_tao_yolov4_common(impulse, result, data, zero_point, scale, output_features_count, block_config->threshold, debug);
+#else
+    return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE;
+#endif // #ifdef EI_HAS_TAO_YOLOV4
+}
+
+#ifdef EI_HAS_YOLOV2
+// based on akida_models-1.2.0/detection/processing.py
+// input is "2D" array with shape [grid_h * grid_w * nb_box, nb_classes]
+__attribute__((unused)) static void softmax(std::vector<float>& input, const size_t nb_classes)
+{
+    const float max = *std::max_element(input.begin(), input.end());
+    const float min = *std::min_element(input.begin(), input.end());
+    const float t = -100.0f;
+
+    // x = x - np.max(x)
+    std::transform(input.begin(), input.end(), input.begin(),
+                   [max](float x) { return x - max; });
+
+    // if np.min(x) < t: x = x / np.min(x) * t
+    std::transform(input.begin(), input.end(), input.begin(),
+                   [min, t](float x) { return x < t ? (x / min * t): x; });
+
+    // e_x = np.exp(x)
+    // do it in place as we don't need raw the input anymore
+    std::transform(input.begin(), input.end(), input.begin(),
+                   [](float x) { return std::exp(x); });
+
+    // e_x / e_x.sum(axis, keepdims=True)
+    // calculated for each 'row', across nb_classes
+    for(auto it = input.begin(); it != input.end(); it += nb_classes) {
+        float sum = 0.0f;
+        // e_x.sum(axis, keepdims=True)
+        for(auto it2 = it; it2 != it + nb_classes; it2++) {
+            sum += *it2;
+        }
+        // e_x / e_x.sum(axis, keepdims=True)
+        std::transform(it, it + nb_classes, it,
+                       [sum](float ex) { return ex / sum; });
+    }
+}
+
+class BoundingBox {
+public:
+    float x1, y1, x2, y2, confidence;
+    std::vector<float> classes;
+
+    BoundingBox(float x1, float y1, float x2, float y2, float confidence, const std::vector<float>& classes)
+        : x1(x1), y1(y1), x2(x2), y2(y2), confidence(confidence), classes(classes) {}
+
+    float get_score() const {
+        return confidence;
+    }
+
+    int get_label() const {
+        auto maxElementIndex = std::max_element(classes.begin(), classes.end()) - classes.begin();
+        return maxElementIndex;
+    }
+
+    float _interval_overlap(float x1, float x2, float x3, float x4) const {
+        if(x3 < x1) {
+            if(x4 < x1) {
+                return 0;
+            }
+            return std::min(x2, x4) - x1;
+        }
+        if(x2 < x3) {
+            return 0;
+        }
+        return std::min(x2, x4) - x3;
+    }
+
+
+    float iou(const BoundingBox& other) const {
+        // Implementation of the Intersection over Union calculation
+        float intersect_w = this->_interval_overlap(this->x1, this->x2, other.x1, other.x2);
+        float intersect_h = this->_interval_overlap(this->y1, this->y2, other.y1, other.y2);
+
+        float intersect = intersect_w * intersect_h;
+
+        float w1 = this->x2 - this->x1;
+        float h1 = this->y2 - this->y1;
+        float w2 = other.x2 - other.x1;
+        float h2 = other.y2 - other.y1;
+
+        float un = w1 * h1 + w2 * h2 - intersect;
+
+        return float(intersect) / un;
+    }
+};
+#endif // EI_HAS_YOLOV2
+/**
+  * Fill the result structure from an unquantized output tensor
+  */
+__attribute__((unused)) static EI_IMPULSE_ERROR fill_result_struct_f32_yolov2(const ei_impulse_t *impulse,
+                                                                              const ei_learning_block_config_tflite_graph_t *block_config,
+                                                                              ei_impulse_result_t *result,
+                                                                              float *data,
+                                                                              size_t output_features_count,
+                                                                              bool debug = false) {
+#ifdef EI_HAS_YOLOV2
+    static std::vector<ei_impulse_result_bounding_box_t> results;
+    results.clear();
+
+    // Example output shape: (7, 7, 5, 7)
+    // TODO: calculate grid_h, grid_w, nb_box from output_features_count or get as a param
+    // grid_h, grid_w, nb_box = output.shape[:3]
+    const size_t grid_h = 7;
+    const size_t grid_w = 7;
+    const size_t nb_box = 5;
+    const std::vector<std::pair<float, float>> anchors = {{0.56594, 1.05012}, {1.0897, 2.03908}, {2.37823, 3.00376}, {2.4593, 4.913}, {5.15981, 5.56699}};
+
+    const size_t nb_classes = impulse->label_count;
+    const float obj_threshold = 0.5;
+    const float nms_threshold = 0.5;
+    std::vector<float> output;
+    const int stride = 4 + 1 + nb_classes;
+
+    output.assign(data, data + output_features_count);
+
+    // boxes = []
+    std::vector<BoundingBox> boxes;
+
+    // equivalent to: classes_confidences = output[..., 5:]
+    std::vector<float> classes_confidences;
+    const size_t dim = 5;
+    for(auto it = output.begin() + dim; it <= output.end(); it += (dim + nb_classes)) {
+        classes_confidences.insert(classes_confidences.end(), it, it + nb_classes);
+    }
+    // calculate softmax for later use, we need to calculate it across the whole input data so operate on a sliced output
+    softmax(classes_confidences, nb_classes);
+
+    for (size_t row = 0; row < grid_h; ++row) {
+        for (size_t col = 0; col < grid_w; ++col) {
+            for (size_t b = 0; b < nb_box; ++b) {
+                size_t idx = row * grid_w * nb_box * stride + col * nb_box * stride + b * stride;
+                size_t classes_idx = row * grid_w * nb_box * nb_classes + col * nb_box * nb_classes + b * nb_classes;
+
+                // Apply sigmoid to the 4th element
+                // output[..., 4] = _sigmoid(output[..., 4])
+                float sigmoid_val = sigmoid(output[idx + 4]);
+                output[idx + 4] = sigmoid_val;
+
+                // classes = output[row, col, b, 5:]
+                std::vector<float> classes(classes_confidences.begin() + classes_idx, classes_confidences.begin() + classes_idx + nb_classes);
+
+                // output[..., 5:] = output[..., 4][..., np.newaxis] * _softmax(output[..., 5:])
+                // output[..., 5:] *= output[..., 5:] > obj_threshold
+                std::transform(classes.begin(), classes.end(), classes.begin(),
+                               [sigmoid_val, obj_threshold](float c) { c *= sigmoid_val; return c > obj_threshold ? c : 0.0f; });
+
+                // if np.sum(classes) > 0:
+                float sum = 0.0f;
+                for(auto it = classes.begin(); it != classes.end(); it++) {
+                    sum += *it;
+                }
+                if(sum > 0.0f) {
+                    // x, y, w, h = output[row, col, b, :4]
+                    float x = output[idx + 0];
+                    float y = output[idx + 1];
+                    float w = output[idx + 2];
+                    float h = output[idx + 3];
+
+                    // x = (col + _sigmoid(x)) / grid_w  # center position, unit: image width
+                    x = (col + sigmoid(x)) / grid_w;
+                    // y = (row + _sigmoid(y)) / grid_h  # center position, unit: image height
+                    y = (row + sigmoid(y)) / grid_h;
+                    // w = anchors[b][0] * np.exp(w) / grid_w  # unit: image width
+                    w = anchors[b].first * std::exp(w) / grid_w;
+                    // h = anchors[b][1] * np.exp(h) / grid_h  # unit: image height
+                    h = anchors[b].second * std::exp(h) / grid_h;
+
+                    // confidence = output[row, col, b, 4]
+                    float confidence = output[idx + 4];
+
+                    // x1 = max(x - w / 2, 0)
+                    float x1 = std::max(x - w / 2, 0.0f);
+                    // y1 = max(y - h / 2, 0)
+                    float y1 = std::max(y - h / 2, 0.0f);
+                    // x2 = min(x + w / 2, grid_w)
+                    float x2 = std::min(x + w / 2, static_cast<float>(grid_w));
+                    // y2 = min(y + h / 2, grid_h)
+                    float y2 = std::min(y + h / 2, static_cast<float>(grid_h));
+
+                    boxes.emplace_back(x1, y1, x2, y2, confidence, classes);
+                }
+            }
+        }
+    }
+
+    // Non-maximal suppression (on boxes)
+    for (size_t c = 0; c < nb_classes; ++c) {
+        std::vector<std::pair<float, int>> sorted_indices;
+        for (size_t i = 0; i < boxes.size(); ++i) {
+            sorted_indices.emplace_back(boxes[i].classes[c], i);
+        }
+
+        std::sort(sorted_indices.begin(), sorted_indices.end(),
+                  [](const std::pair<float, int>& a, const std::pair<float, int>& b) {
+                      return a.first > b.first;
+                  });
+
+        for (size_t i = 0; i < sorted_indices.size(); ++i) {
+            int index_i = sorted_indices[i].second;
+            if (boxes[index_i].classes[c] == 0)
+                continue;
+
+            for (size_t j = i + 1; j < sorted_indices.size(); ++j) {
+                int index_j = sorted_indices[j].second;
+
+                if ((boxes[index_i].iou(boxes[index_j]) >= nms_threshold) &&
+                    (boxes[index_i].get_label() == (int)c) &&
+                    (boxes[index_j].get_label() == (int)c)) {
+                    boxes[index_j].confidence = 0;
+                }
+            }
+        }
+    }
+
+    // remove the boxes which are less likely than a obj_threshold
+    boxes.erase(std::remove_if(boxes.begin(), boxes.end(),
+                               [obj_threshold](const BoundingBox& box) {
+                                   return box.get_score() <= obj_threshold;
+                               }), boxes.end());
+
+    // sort boxes by box.get_score()
+    std::sort(boxes.begin(), boxes.end(),
+                [](const BoundingBox& a, const BoundingBox& b) {
+                return a.get_score() > b.get_score();
+                });
+
+    // convert relative coordinates to absolute coordinates
+    for(auto & box: boxes) {
+        ei_impulse_result_bounding_box_t res;
+        res.label = ei_classifier_inferencing_categories[box.get_label()];
+        res.x = ceil(box.x1 * impulse->input_width);
+        res.y = ceil(box.y1 * impulse->input_height);
+        res.width = ceil((box.x2 - box.x1) * impulse->input_width);
+        res.height = ceil((box.y2 - box.y1) * impulse->input_height);
+        res.value = box.get_score();
+        results.push_back(res);
+    }
+
+    // if we didn't detect min required objects, fill the rest with fixed value
+    size_t added_boxes_count = results.size();
+    size_t min_object_detection_count = impulse->object_detection_count;
+    if (added_boxes_count < min_object_detection_count) {
+        results.resize(min_object_detection_count);
+        for (size_t ix = added_boxes_count; ix < min_object_detection_count; ix++) {
+            results[ix].value = 0.0f;
+        }
+    }
+
+    result->bounding_boxes = results.data();
+    result->bounding_boxes_count = results.size();
+
+    return EI_IMPULSE_OK;
+#else
+    return EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE;
+#endif // #ifdef EI_HAS_YOLOV7
+}
+
+#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0
+bool find_mtx_by_idx(ei_feature_t* mtx, ei::matrix_t** matrix, uint32_t mtx_id, size_t mtx_size) {
+    for (size_t i = 0; i < mtx_size; i++) {
+        if (mtx[i].matrix == NULL) {
+            continue;
+        }
+        if (mtx[i].blockId == mtx_id || mtx[i].blockId == 0) {
+            *matrix = mtx[i].matrix;
+            return true;
+        }
+    }
+    return false;
+}
+#endif
+
 #endif // _EI_CLASSIFIER_FILL_RESULT_STRUCT_H_
diff --git a/edge-impulse-sdk/classifier/ei_model_types.h b/edge-impulse-sdk/classifier/ei_model_types.h
index e278c06..4c36205 100644
--- a/edge-impulse-sdk/classifier/ei_model_types.h
+++ b/edge-impulse-sdk/classifier/ei_model_types.h
@@ -21,8 +21,9 @@
 #include <stdint.h>
 
 #include "edge-impulse-sdk/classifier/ei_classifier_types.h"
+#include "edge-impulse-sdk/dsp/ei_dsp_handle.h"
 #include "edge-impulse-sdk/dsp/numpy.hpp"
-#if EI_CLASSIFIER_USE_FULL_TFLITE
+#if EI_CLASSIFIER_USE_FULL_TFLITE || (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_AKIDA) || (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_MEMRYX)
 #include "tensorflow-lite/tensorflow/lite/c/common.h"
 #else
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
@@ -61,13 +62,35 @@
 #define EI_CLASSIFIER_LAST_LAYER_YOLOX                 4
 #define EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI       5
 #define EI_CLASSIFIER_LAST_LAYER_YOLOV7                6
+#define EI_CLASSIFIER_LAST_LAYER_TAO_RETINANET         7
+#define EI_CLASSIFIER_LAST_LAYER_TAO_SSD               8
+#define EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV3            9
+#define EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV4            10
+#define EI_CLASSIFIER_LAST_LAYER_YOLOV2                11
 
 #define EI_CLASSIFIER_IMAGE_SCALING_NONE          0
 #define EI_CLASSIFIER_IMAGE_SCALING_0_255         1
 #define EI_CLASSIFIER_IMAGE_SCALING_TORCH         2
+#define EI_CLASSIFIER_IMAGE_SCALING_MIN1_1        3
+#define EI_CLASSIFIER_IMAGE_SCALING_MIN128_127    4
+#define EI_CLASSIFIER_IMAGE_SCALING_BGR_SUBTRACT_IMAGENET_MEAN    5
+
+// maps back to ClassificationMode in keras-types.ts
+#define EI_CLASSIFIER_CLASSIFICATION_MODE_CLASSIFICATION      1
+#define EI_CLASSIFIER_CLASSIFICATION_MODE_REGRESSION          2
+#define EI_CLASSIFIER_CLASSIFICATION_MODE_OBJECT_DETECTION    3
+#define EI_CLASSIFIER_CLASSIFICATION_MODE_ANOMALY_GMM         4
+#define EI_CLASSIFIER_CLASSIFICATION_MODE_VISUAL_ANOMALY      5
+#define EI_CLASSIFIER_CLASSIFICATION_MODE_ANOMALY_KMEANS      6
+#define EI_CLASSIFIER_CLASSIFICATION_MODE_DSP                 7
 
 struct ei_impulse;
 
+typedef struct {
+    ei::matrix_t* matrix;
+    uint32_t blockId;
+} ei_feature_t;
+
 typedef struct {
     uint16_t implementation_version;
     bool is_configured;
@@ -77,12 +100,18 @@ typedef struct {
     uint32_t suppression_flags;
 } ei_model_performance_calibration_t;
 
+typedef int (*extract_fn_t)(ei::signal_t *signal, ei::matrix_t *output_matrix, void *config, float frequency);
+
 typedef struct {
+    uint32_t blockId;
     size_t n_output_features;
-    int (*extract_fn)(ei::signal_t *signal, ei::matrix_t *output_matrix, void *config, const float frequency);
+    extract_fn_t extract_fn;
     void *config;
     uint8_t *axes;
     size_t axes_size;
+    int version;  // future proof, can easily add to this struct now
+    DspHandle* (*factory)(void* config, float sampling_freq); // nullptr means no state
+    // v1 ends here
 } ei_model_dsp_t;
 
 typedef struct {
@@ -91,9 +120,14 @@ typedef struct {
 } ei_classifier_anom_cluster_t;
 
 typedef struct {
-    EI_IMPULSE_ERROR (*infer_fn)(const ei_impulse *impulse, ei::matrix_t *fmatrix, ei_impulse_result_t *result, void *config, bool debug);
+    uint32_t blockId;
+    bool keep_output;
+    EI_IMPULSE_ERROR (*infer_fn)(const ei_impulse *impulse, ei_feature_t *fmatrix, uint32_t learn_block_index, uint32_t* input_block_ids, uint32_t input_block_ids_size, ei_impulse_result_t *result, void *config, bool debug);
     void *config;
     int image_scaling;
+    const uint32_t* input_block_ids;
+    const uint32_t input_block_ids_size;
+    uint32_t output_features_count;
 } ei_learning_block_t;
 
 typedef struct {
@@ -126,6 +160,7 @@ typedef struct {
 
 typedef struct {
     uint16_t implementation_version;
+    uint8_t classification_mode;
     uint32_t block_id;
     /* object detection */
     bool object_detection;
@@ -133,12 +168,18 @@ typedef struct {
     uint8_t output_data_tensor;
     uint8_t output_labels_tensor;
     uint8_t output_score_tensor;
+    /* object detection and visual AD */
+    float threshold;
+    /* tflite graph params */
+    bool quantized;
+    bool compiled;
     /* tflite graph config pointer */
     void *graph_config;
 } ei_learning_block_config_tflite_graph_t;
 
 typedef struct {
     uint16_t implementation_version;
+    uint8_t classification_mode;
     const uint16_t *anom_axis;
     uint16_t anom_axes_size;
     const ei_classifier_anom_cluster_t *anom_clusters;
@@ -149,11 +190,19 @@ typedef struct {
 
 typedef struct {
     uint16_t implementation_version;
+    uint8_t classification_mode;
     const uint16_t *anom_axis;
     uint16_t anom_axes_size;
+    float anomaly_threshold;
+    bool visual;
     void* graph_config;
 } ei_learning_block_config_anomaly_gmm_t;
 
+typedef struct {
+    float confidence_threshold;
+    float iou_threshold;
+} ei_object_detection_nms_config_t;
+
 typedef struct ei_impulse {
     /* project details */
     uint32_t project_id;
@@ -175,10 +224,7 @@ typedef struct ei_impulse {
     ei_model_dsp_t *dsp_blocks;
 
     /* object detection */
-    bool object_detection;
     uint16_t object_detection_count;
-    float object_detection_threshold;
-    int8_t object_detection_last_layer;
     uint32_t fomo_output_size;
     uint32_t tflite_output_features_count;
 
@@ -188,8 +234,6 @@ typedef struct ei_impulse {
 
     /* inference parameters */
     uint32_t inferencing_engine;
-    bool quantized;
-    bool compiled;
 
     /* sensors and on-device inference */
     uint32_t sensor;
@@ -198,12 +242,77 @@ typedef struct ei_impulse {
     uint32_t slices_per_model_window;
 
     /* output details */
-    bool has_anomaly;
+    uint16_t has_anomaly;
     uint16_t label_count;
     const ei_model_performance_calibration_t calibration;
     const char **categories;
+    ei_object_detection_nms_config_t object_detection_nms;
 } ei_impulse_t;
 
+class ei_impulse_state_t {
+typedef DspHandle* _dsp_handle_ptr_t;
+public:
+    const ei_impulse_t *impulse; // keep a pointer to the impulse
+    _dsp_handle_ptr_t *dsp_handles;
+    bool is_temp_handle = false; // to know if we're using the old (stateless) API
+    ei_impulse_state_t(const ei_impulse_t *impulse)
+        : impulse(impulse)
+    {
+        const auto num_dsp_blocks = impulse->dsp_blocks_size;
+        dsp_handles = (_dsp_handle_ptr_t*)ei_malloc(sizeof(_dsp_handle_ptr_t)*num_dsp_blocks);
+        for(size_t ix = 0; ix < num_dsp_blocks; ix++) {
+            dsp_handles[ix] = nullptr;
+        }
+    }
+
+    DspHandle* get_dsp_handle(size_t ix) {
+        if (dsp_handles[ix] == nullptr) {
+            dsp_handles[ix] = impulse->dsp_blocks[ix].factory(impulse->dsp_blocks[ix].config, impulse->frequency);
+        }
+        return dsp_handles[ix];
+    }
+
+    void reset()
+    {
+        for (size_t ix = 0; ix < impulse->dsp_blocks_size; ix++) {
+            if (dsp_handles[ix] != nullptr) {
+                delete dsp_handles[ix];
+                dsp_handles[ix] = nullptr;
+            }
+        }
+    }
+
+    void* operator new(size_t size) {
+        return ei_malloc(size);
+    }
+
+    void operator delete(void* ptr) {
+        ei_free(ptr);
+    }
+
+    void* operator new[](size_t size) {
+        return ei_malloc(size);
+    }
+
+    void operator delete[](void* ptr) {
+        ei_free(ptr);
+    }
+
+    ~ei_impulse_state_t()
+    {
+        reset();
+        ei_free(dsp_handles);
+    }
+};
+
+class ei_impulse_handle_t {
+public:
+    ei_impulse_handle_t(const ei_impulse_t *impulse)
+        : state(impulse), impulse(impulse) {};
+    ei_impulse_state_t state;
+    const ei_impulse_t *impulse;
+};
+
 typedef struct {
     uint32_t block_id;
     uint16_t implementation_version;
diff --git a/edge-impulse-sdk/classifier/ei_nms.h b/edge-impulse-sdk/classifier/ei_nms.h
index 5f6a4aa..5bfcdf8 100644
--- a/edge-impulse-sdk/classifier/ei_nms.h
+++ b/edge-impulse-sdk/classifier/ei_nms.h
@@ -26,7 +26,7 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_types.h"
 #include "edge-impulse-sdk/porting/ei_classifier_porting.h"
 
-#if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOX)
+#if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOX) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_TAO_RETINANET) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_TAO_SSD) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV3) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV4) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV2)
 
 // The code below comes from tensorflow/lite/kernels/internal/reference/non_max_suppression.h
 // Copyright 2019 The TensorFlow Authors.  All rights reserved.
@@ -190,7 +190,7 @@ static inline void NonMaxSuppression(const float* boxes, const int num_boxes,
         }
         ++*num_selected_indices;
       }
-      if (next_candidate.score > score_threshold) {
+      if ((soft_nms_sigma > 0.0) && (next_candidate.score > score_threshold)) {
         // Soft suppression might have occurred and current score is still
         // greater than score_threshold; add next_candidate back onto priority
         // queue.
@@ -203,45 +203,29 @@ static inline void NonMaxSuppression(const float* boxes, const int num_boxes,
 /**
  * Run non-max suppression over the results array (for bounding boxes)
  */
-EI_IMPULSE_ERROR ei_run_nms(std::vector<ei_impulse_result_bounding_box_t> *results) {
-
-    size_t bb_count = 0;
-    for (size_t ix = 0; ix < results->size(); ix++) {
-        auto bb = results->at(ix);
-        if (bb.value == 0) {
-            continue;
-        }
-        bb_count++;
+EI_IMPULSE_ERROR ei_run_nms(
+    const ei_impulse_t *impulse,
+    std::vector<ei_impulse_result_bounding_box_t> *results,
+    float *boxes,
+    float *scores,
+    int *classes,
+    size_t bb_count,
+    bool clip_boxes,
+    bool debug) {
+
+    if (bb_count < 1) {
+        return EI_IMPULSE_OK;
     }
 
-    float *boxes = (float*)malloc(4 * bb_count * sizeof(float));
-    float *scores = (float*)malloc(1 * bb_count * sizeof(float));
-    int *selected_indices = (int*)malloc(1 * bb_count * sizeof(int));
-    float *selected_scores = (float*)malloc(1 * bb_count * sizeof(float));
+    int *selected_indices = (int*)ei_malloc(1 * bb_count * sizeof(int));
+    float *selected_scores = (float*)ei_malloc(1 * bb_count * sizeof(float));
 
-    if (!scores || !boxes || !selected_indices || !selected_scores) {
-        free(boxes);
-        free(scores);
-        free(selected_indices);
-        free(selected_scores);
+    if (!scores || !boxes || !selected_indices || !selected_scores || !classes) {
+        ei_free(selected_indices);
+        ei_free(selected_scores);
         return EI_IMPULSE_OUT_OF_MEMORY;
     }
 
-    size_t box_ix = 0;
-    for (size_t ix = 0; ix < results->size(); ix++) {
-        auto bb = results->at(ix);
-        if (bb.value == 0) {
-            continue;
-        }
-        boxes[(box_ix * 4) + 0] = bb.y;
-        boxes[(box_ix * 4) + 1] = bb.x;
-        boxes[(box_ix * 4) + 2] = bb.y + bb.height;
-        boxes[(box_ix * 4) + 3] = bb.x + bb.width;
-        scores[box_ix] = bb.value;
-
-        box_ix++;
-    }
-
     //  boxes: box encodings in format [y1, x1, y2, x2], shape: [num_boxes, 4]
     //  num_boxes: number of candidates
     //  scores: scores for candidate boxes, in the same order. shape: [num_boxes]
@@ -257,8 +241,8 @@ EI_IMPULSE_ERROR ei_run_nms(std::vector<ei_impulse_result_bounding_box_t> *resul
         bb_count, // num_boxes
         (const float*)scores, // scores
         bb_count, // max_output_size
-        0.2f, // iou_threshold
-        0.0f, // score_threshold
+        impulse->object_detection_nms.iou_threshold, // iou_threshold
+        impulse->object_detection_nms.confidence_threshold, // score_threshold
         0.0f, // soft_nms_sigma
         selected_indices,
         selected_scores,
@@ -267,18 +251,34 @@ EI_IMPULSE_ERROR ei_run_nms(std::vector<ei_impulse_result_bounding_box_t> *resul
     std::vector<ei_impulse_result_bounding_box_t> new_results;
 
     for (size_t ix = 0; ix < (size_t)num_selected_indices; ix++) {
-        auto bb = results->at(selected_indices[ix]);
-
-        printf("Found bb with label %s\n", bb.label);
-
-        ei_impulse_result_bounding_box_t r;
-        r.label = bb.label;
-        r.x = bb.x;
-        r.y = bb.y;
-        r.width = bb.width;
-        r.height = bb.height;
-        r.value = selected_scores[ix];
-        new_results.push_back(r);
+
+        int out_ix = selected_indices[ix];
+        ei_impulse_result_bounding_box_t bb;
+        bb.label  = impulse->categories[classes[out_ix]];
+        bb.value  = selected_scores[ix];
+
+        float ymin = boxes[(out_ix * 4) + 0];
+        float xmin = boxes[(out_ix * 4) + 1];
+        float ymax = boxes[(out_ix * 4) + 2];
+        float xmax = boxes[(out_ix * 4) + 3];
+
+        if (clip_boxes) {
+            ymin = std::min(std::max(ymin, 0.0f), (float)impulse->input_height);
+            xmin = std::min(std::max(xmin, 0.0f), (float)impulse->input_width);
+            ymax = std::min(std::max(ymax, 0.0f), (float)impulse->input_height);
+            xmax = std::min(std::max(xmax, 0.0f), (float)impulse->input_width);
+        }
+
+        bb.y      = static_cast<uint32_t>(ymin);
+        bb.x      = static_cast<uint32_t>(xmin);
+        bb.height = static_cast<uint32_t>(ymax) - bb.y;
+        bb.width  = static_cast<uint32_t>(xmax) - bb.x;
+        new_results.push_back(bb);
+
+        if (debug) {
+          ei_printf("Found bb with label %s\n", bb.label);
+        }
+
     }
 
     results->clear();
@@ -287,14 +287,106 @@ EI_IMPULSE_ERROR ei_run_nms(std::vector<ei_impulse_result_bounding_box_t> *resul
         results->push_back(new_results[ix]);
     }
 
-    free(boxes);
-    free(scores);
-    free(selected_indices);
-    free(selected_scores);
+    ei_free(selected_indices);
+    ei_free(selected_scores);
 
     return EI_IMPULSE_OK;
+
+}
+
+/**
+ * Run non-max suppression over the results array (for bounding boxes)
+ */
+EI_IMPULSE_ERROR ei_run_nms(
+    const ei_impulse_t *impulse,
+    std::vector<ei_impulse_result_bounding_box_t> *results,
+    bool clip_boxes,
+    bool debug) {
+
+    size_t bb_count = 0;
+    for (size_t ix = 0; ix < results->size(); ix++) {
+        auto bb = results->at(ix);
+        if (bb.value == 0) {
+            continue;
+        }
+        bb_count++;
+    }
+
+    if (bb_count < 1) {
+        return EI_IMPULSE_OK;
+    }
+
+    float *boxes = (float*)ei_malloc(4 * bb_count * sizeof(float));
+    float *scores = (float*)ei_malloc(1 * bb_count * sizeof(float));
+    int *classes = (int*) ei_malloc(bb_count * sizeof(int));
+
+    if (!scores || !boxes || !classes) {
+        ei_free(boxes);
+        ei_free(scores);
+        ei_free(classes);
+        return EI_IMPULSE_OUT_OF_MEMORY;
+    }
+
+    size_t box_ix = 0;
+    for (size_t ix = 0; ix < results->size(); ix++) {
+        auto bb = results->at(ix);
+        if (bb.value == 0) {
+            continue;
+        }
+        boxes[(box_ix * 4) + 0] = bb.y;
+        boxes[(box_ix * 4) + 1] = bb.x;
+        boxes[(box_ix * 4) + 2] = bb.y + bb.height;
+        boxes[(box_ix * 4) + 3] = bb.x + bb.width;
+        scores[box_ix] = bb.value;
+
+        for (size_t j = 0; j < impulse->label_count; j++) {
+          if (strcmp(impulse->categories[j], bb.label) == 0)
+          classes[box_ix] = j;
+        }
+
+        box_ix++;
+    }
+
+    EI_IMPULSE_ERROR nms_res = ei_run_nms(impulse, results,
+                                          boxes, scores,
+                                          classes, bb_count,
+                                          clip_boxes,
+                                          debug);
+
+
+    ei_free(boxes);
+    ei_free(scores);
+    ei_free(classes);
+
+    return nms_res;
+
+}
+
+/**
+ * Run non-max suppression over the results array (for bounding boxes)
+ */
+EI_IMPULSE_ERROR ei_run_nms(
+    const ei_impulse_t *impulse,
+    std::vector<ei_impulse_result_bounding_box_t> *results,
+    bool debug = false) {
+  return ei_run_nms(impulse, results, true, debug);
+}
+
+/**
+ * Run non-max suppression over the results array (for bounding boxes)
+ */
+EI_IMPULSE_ERROR ei_run_nms(std::vector<ei_impulse_result_bounding_box_t> *results, bool debug = false) {
+#if EI_CLASSIFIER_HAS_MODEL_VARIABLES == 1
+  auto& impulse = *ei_default_impulse.impulse;
+#else
+  const ei_impulse_t impulse = {
+    .object_detection_nms.confidence_threshold = 0.0f,
+    .object_detection_nms.iou_threshold = 0.2f
+  };
+#endif
+  return ei_run_nms(&impulse, results, debug);
 }
 
-#endif // #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOX)
+#endif // #if (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOX) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_TAO_RETINANET) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_TAO_SSD) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV3) || (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV4)
 
 #endif // _EDGE_IMPULSE_NMS_H_
diff --git a/edge-impulse-sdk/classifier/ei_run_classifier.h b/edge-impulse-sdk/classifier/ei_run_classifier.h
index 7b2abed..6a5ff57 100644
--- a/edge-impulse-sdk/classifier/ei_run_classifier.h
+++ b/edge-impulse-sdk/classifier/ei_run_classifier.h
@@ -18,6 +18,7 @@
 #ifndef _EDGE_IMPULSE_RUN_CLASSIFIER_H_
 #define _EDGE_IMPULSE_RUN_CLASSIFIER_H_
 
+#include "ei_model_types.h"
 #include "model-parameters/model_metadata.h"
 
 #include "ei_run_dsp.h"
@@ -26,8 +27,10 @@
 #include "ei_performance_calibration.h"
 
 #include "edge-impulse-sdk/porting/ei_classifier_porting.h"
+#include "edge-impulse-sdk/porting/ei_logging.h"
+#include <memory>
 
-#if EI_CLASSIFIER_HAS_ANOMALY == 1
+#if EI_CLASSIFIER_HAS_ANOMALY
 #include "inferencing_engines/anomaly.h"
 #endif
 
@@ -61,32 +64,23 @@
 #error "Unknown inferencing engine"
 #endif
 
+// This file has an implicit dependency on ei_run_dsp.h, so must come after that include!
 #include "model-parameters/model_variables.h"
 
-#if ECM3532
-void*   __dso_handle = (void*) &__dso_handle;
-#endif
-
-// EI_CLASSIFIER_CALIBRATION_ENABLED needs to be added to new
-// model metadata, since we are getting rid of macro for sensors
-// (multiple impulses means we can have multiple sensors)
-// for now we just enable it if EI_CLASSIFIER_SENSOR is present and
-// is microphone (performance calibration only works for mic).
-#if defined(EI_CLASSIFIER_SENSOR) && (EI_CLASSIFIER_SENSOR == EI_CLASSIFIER_SENSOR_MICROPHONE)
-#define EI_CLASSIFIER_CALIBRATION_ENABLED 1
-#else
-#define EI_CLASSIFIER_CALIBRATION_ENABLED 0
-#endif
-
 #ifdef __cplusplus
 namespace {
 #endif // __cplusplus
 
 /* Function prototypes ----------------------------------------------------- */
-extern "C" EI_IMPULSE_ERROR run_inference(const ei_impulse_t *impulse, ei::matrix_t *fmatrix, ei_impulse_result_t *result, bool debug);
+extern "C" EI_IMPULSE_ERROR run_inference(ei_impulse_handle_t *handle, ei_feature_t *fmatrix, ei_impulse_result_t *result, bool debug);
 extern "C" EI_IMPULSE_ERROR run_classifier_image_quantized(const ei_impulse_t *impulse, signal_t *signal, ei_impulse_result_t *result, bool debug);
 static EI_IMPULSE_ERROR can_run_classifier_image_quantized(const ei_impulse_t *impulse, ei_learning_block_t block_ptr);
 
+#if EI_CLASSIFIER_LOAD_IMAGE_SCALING
+EI_IMPULSE_ERROR ei_scale_fmatrix(ei_learning_block_t *block, ei::matrix_t *fmatrix);
+EI_IMPULSE_ERROR ei_unscale_fmatrix(ei_learning_block_t *block, ei::matrix_t *fmatrix);
+#endif // EI_CLASSIFIER_LOAD_IMAGE_SCALING
+
 /* Private variables ------------------------------------------------------- */
 
 static uint64_t classifier_continuous_features_written = 0;
@@ -97,49 +91,70 @@ static RecognizeEvents *avg_scores = NULL;
 /* These functions (up to Public functions section) are not exposed to end-user,
 therefore changes are allowed. */
 
-#if EI_CLASSIFIER_LOAD_IMAGE_SCALING
-static const float torch_mean[] = { 0.485, 0.456, 0.406 };
-static const float torch_std[] = { 0.229, 0.224, 0.225 };
 
-static EI_IMPULSE_ERROR scale_fmatrix(ei_learning_block_t *block, ei::matrix_t *fmatrix) {
-    if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_TORCH) {
-        // @todo; could we write some faster vector math here?
-        for (size_t ix = 0; ix < fmatrix->rows * fmatrix->cols; ix += 3) {
-            fmatrix->buffer[ix + 0] = (fmatrix->buffer[ix + 0] - torch_mean[0]) / torch_std[0];
-            fmatrix->buffer[ix + 1] = (fmatrix->buffer[ix + 1] - torch_mean[1]) / torch_std[1];
-            fmatrix->buffer[ix + 2] = (fmatrix->buffer[ix + 2] - torch_mean[2]) / torch_std[2];
-        }
-    }
-    else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_0_255) {
-        int scale_res = numpy::scale(fmatrix, 255.0f);
-        if (scale_res != EIDSP_OK) {
-            ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
-            return EI_IMPULSE_DSP_ERROR;
+/**
+ * @brief      Display the results of the inference
+ *
+ * @param      result  The result
+ */
+__attribute__((unused)) void display_results(ei_impulse_result_t* result)
+{
+    // print the predictions
+    ei_printf("Predictions (DSP: %d ms., Classification: %d ms., Anomaly: %d ms.): \n",
+                result->timing.dsp, result->timing.classification, result->timing.anomaly);
+#if EI_CLASSIFIER_OBJECT_DETECTION == 1
+    ei_printf("#Object detection results:\r\n");
+    bool bb_found = result->bounding_boxes[0].value > 0;
+    for (size_t ix = 0; ix < result->bounding_boxes_count; ix++) {
+        auto bb = result->bounding_boxes[ix];
+        if (bb.value == 0) {
+            continue;
         }
+        ei_printf("    %s (", bb.label);
+        ei_printf_float(bb.value);
+        ei_printf(") [ x: %u, y: %u, width: %u, height: %u ]\n", bb.x, bb.y, bb.width, bb.height);
     }
 
-    return EI_IMPULSE_OK;
-}
+    if (!bb_found) {
+        ei_printf("    No objects found\n");
+    }
 
-static EI_IMPULSE_ERROR unscale_fmatrix(ei_learning_block_t *block, ei::matrix_t *fmatrix) {
-    if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_TORCH) {
-        // @todo; could we write some faster vector math here?
-        for (size_t ix = 0; ix < fmatrix->rows * fmatrix->cols; ix += 3) {
-            fmatrix->buffer[ix + 0] = (fmatrix->buffer[ix + 0] * torch_std[0]) + torch_mean[0];
-            fmatrix->buffer[ix + 1] = (fmatrix->buffer[ix + 1] * torch_std[1]) + torch_mean[1];
-            fmatrix->buffer[ix + 2] = (fmatrix->buffer[ix + 2] * torch_std[2]) + torch_mean[2];
-        }
+#elif (EI_CLASSIFIER_LABEL_COUNT == 1) && (!EI_CLASSIFIER_HAS_ANOMALY)// regression
+    ei_printf("#Regression results:\r\n");
+    ei_printf("    %s: ", result->classification[0].label);
+    ei_printf_float(result->classification[0].value);
+    ei_printf("\n");
+
+#elif EI_CLASSIFIER_LABEL_COUNT > 1 // if there is only one label, this is an anomaly only
+    ei_printf("#Classification results:\r\n");
+    for (size_t ix = 0; ix < EI_CLASSIFIER_LABEL_COUNT; ix++) {
+        ei_printf("    %s: ", result->classification[ix].label);
+        ei_printf_float(result->classification[ix].value);
+        ei_printf("\n");
     }
-    else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_0_255) {
-        int scale_res = numpy::scale(fmatrix, 1 / 255.0f);
-        if (scale_res != EIDSP_OK) {
-            ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
-            return EI_IMPULSE_DSP_ERROR;
+#endif
+#if EI_CLASSIFIER_HAS_ANOMALY == 3 // visual AD
+    ei_printf("#Visual anomaly grid results:\r\n");
+    for (uint32_t i = 0; i < result->visual_ad_count; i++) {
+        ei_impulse_result_bounding_box_t bb = result->visual_ad_grid_cells[i];
+        if (bb.value == 0) {
+            continue;
         }
+        ei_printf("    %s (", bb.label);
+        ei_printf_float(bb.value);
+        ei_printf(") [ x: %u, y: %u, width: %u, height: %u ]\n", bb.x, bb.y, bb.width, bb.height);
     }
-    return EI_IMPULSE_OK;
-}
+    ei_printf("Visual anomaly values: Mean ");
+    ei_printf_float(result->visual_ad_result.mean_value);
+    ei_printf(" Max ");
+    ei_printf_float(result->visual_ad_result.max_value);
+    ei_printf("\r\n");
+#elif (EI_CLASSIFIER_HAS_ANOMALY > 0) // except for visual AD
+    ei_printf("Anomaly prediction: ");
+    ei_printf_float(result->anomaly);
+    ei_printf("\r\n");
 #endif
+}
 
 /**
  * @brief      Do inferencing over the processed feature matrix
@@ -152,29 +167,35 @@ static EI_IMPULSE_ERROR unscale_fmatrix(ei_learning_block_t *block, ei::matrix_t
  * @return     The ei impulse error.
  */
 extern "C" EI_IMPULSE_ERROR run_inference(
-    const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_impulse_handle_t *handle,
+    ei_feature_t *fmatrix,
     ei_impulse_result_t *result,
     bool debug = false)
 {
+    auto& impulse = handle->impulse;
     for (size_t ix = 0; ix < impulse->learning_blocks_size; ix++) {
+
         ei_learning_block_t block = impulse->learning_blocks[ix];
 
 #if EI_CLASSIFIER_LOAD_IMAGE_SCALING
-        EI_IMPULSE_ERROR scale_res = scale_fmatrix(&block, fmatrix);
+        // we do not plan to have multiple dsp blocks with image
+        // so just apply scaling to the first one
+        EI_IMPULSE_ERROR scale_res = ei_scale_fmatrix(&block, fmatrix[0].matrix);
         if (scale_res != EI_IMPULSE_OK) {
             return scale_res;
         }
 #endif
 
-        EI_IMPULSE_ERROR res = block.infer_fn(impulse, fmatrix, result, block.config, debug);
+        result->copy_output = block.keep_output;
+
+        EI_IMPULSE_ERROR res = block.infer_fn(impulse, fmatrix, ix, (uint32_t*)block.input_block_ids, block.input_block_ids_size, result, block.config, debug);
         if (res != EI_IMPULSE_OK) {
             return res;
         }
 
 #if EI_CLASSIFIER_LOAD_IMAGE_SCALING
         // undo scaling
-        scale_res = unscale_fmatrix(&block, fmatrix);
+        scale_res = ei_unscale_fmatrix(&block, fmatrix[0].matrix);
         if (scale_res != EI_IMPULSE_OK) {
             return scale_res;
         }
@@ -194,83 +215,152 @@ extern "C" EI_IMPULSE_ERROR run_inference(
  * @param      impulse  struct with information about model and DSP
  * @param      signal   Sample data
  * @param      result   Output classifier results
+ * @param      handle   Handle from open_impulse. nullptr for backward compatibility
  * @param[in]  debug    Debug output enable
  *
  * @return     The ei impulse error.
  */
-extern "C" EI_IMPULSE_ERROR process_impulse(const ei_impulse_t *impulse,
+extern "C" EI_IMPULSE_ERROR process_impulse(ei_impulse_handle_t *handle,
                                             signal_t *signal,
                                             ei_impulse_result_t *result,
                                             bool debug = false)
 {
+    if(!handle) {
+        return EI_IMPULSE_INFERENCE_ERROR;
+    }
 
-#if (EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1 && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSAIFLOW || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_ONNX_TIDL)) || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI
+#if (EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSAIFLOW || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_ONNX_TIDL)) || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI
     // Shortcut for quantized image models
-    ei_learning_block_t block = impulse->learning_blocks[0];
-    if (can_run_classifier_image_quantized(impulse, block) == EI_IMPULSE_OK) {
-        return run_classifier_image_quantized(impulse, signal, result, debug);
+    ei_learning_block_t block = handle->impulse->learning_blocks[0];
+    if (can_run_classifier_image_quantized(handle->impulse, block) == EI_IMPULSE_OK) {
+        return run_classifier_image_quantized(handle->impulse, signal, result, debug);
     }
 #endif
 
     memset(result, 0, sizeof(ei_impulse_result_t));
+    uint32_t block_num = handle->impulse->dsp_blocks_size + handle->impulse->learning_blocks_size;
+
+    // smart pointer to features array
+    std::unique_ptr<ei_feature_t[]> features_ptr(new ei_feature_t[block_num]);
+    ei_feature_t* features = features_ptr.get();
+    memset(features, 0, sizeof(ei_feature_t) * block_num);
 
-    ei::matrix_t features_matrix(1, impulse->nn_input_frame_size);
+    // have it outside of the loop to avoid going out of scope
+    std::unique_ptr<ei::matrix_t> *matrix_ptrs = new std::unique_ptr<ei::matrix_t>[block_num];
 
     uint64_t dsp_start_us = ei_read_timer_us();
 
     size_t out_features_index = 0;
 
-    for (size_t ix = 0; ix < impulse->dsp_blocks_size; ix++) {
-        ei_model_dsp_t block = impulse->dsp_blocks[ix];
+    for (size_t ix = 0; ix < handle->impulse->dsp_blocks_size; ix++) {
+        ei_model_dsp_t block = handle->impulse->dsp_blocks[ix];
+        matrix_ptrs[ix] = std::unique_ptr<ei::matrix_t>(new ei::matrix_t(1, block.n_output_features));
+        features[ix].matrix = matrix_ptrs[ix].get();
+        features[ix].blockId = block.blockId;
 
-        if (out_features_index + block.n_output_features > impulse->nn_input_frame_size) {
+        if (out_features_index + block.n_output_features > handle->impulse->nn_input_frame_size) {
             ei_printf("ERR: Would write outside feature buffer\n");
+            delete[] matrix_ptrs;
             return EI_IMPULSE_DSP_ERROR;
         }
 
-        ei::matrix_t fm(1, block.n_output_features, features_matrix.buffer + out_features_index);
-
 #if EIDSP_SIGNAL_C_FN_POINTER
-        if (block.axes_size != impulse->raw_samples_per_frame) {
+        if (block.axes_size != handle->impulse->raw_samples_per_frame) {
             ei_printf("ERR: EIDSP_SIGNAL_C_FN_POINTER can only be used when all axes are selected for DSP blocks\n");
+            delete[] matrix_ptrs;
             return EI_IMPULSE_DSP_ERROR;
         }
-        int ret = block.extract_fn(signal, &fm, block.config, impulse->frequency);
+        auto internal_signal = signal;
 #else
-        SignalWithAxes swa(signal, block.axes, block.axes_size, impulse);
-        int ret = block.extract_fn(swa.get_signal(), &fm, block.config, impulse->frequency);
+        SignalWithAxes swa(signal, block.axes, block.axes_size, handle->impulse);
+        auto internal_signal = swa.get_signal();
 #endif
 
+        int ret;
+        if (block.factory) { // ie, if we're using state
+            // Msg user
+            static bool has_printed = false;
+            if (!has_printed) {
+                EI_LOGI("Impulse maintains state. Call run_classifier_init() to reset state (e.g. if data stream is interrupted.)\n");
+                has_printed = true;
+            }
+
+            // getter has a lazy init, so we can just call it
+            auto dsp_handle = handle->state.get_dsp_handle(ix);
+            if(dsp_handle) {
+                ret = dsp_handle->extract(internal_signal, features[ix].matrix, block.config, handle->impulse->frequency);
+            } else {
+                return EI_IMPULSE_OUT_OF_MEMORY;
+            }
+        } else {
+            ret = block.extract_fn(internal_signal, features[ix].matrix, block.config, handle->impulse->frequency);
+        }
+
         if (ret != EIDSP_OK) {
             ei_printf("ERR: Failed to run DSP process (%d)\n", ret);
+            delete[] matrix_ptrs;
             return EI_IMPULSE_DSP_ERROR;
         }
 
         if (ei_run_impulse_check_canceled() == EI_IMPULSE_CANCELED) {
+            delete[] matrix_ptrs;
             return EI_IMPULSE_CANCELED;
         }
 
         out_features_index += block.n_output_features;
     }
 
+#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0
+    for (size_t ix = 0; ix < handle->impulse->learning_blocks_size; ix++) {
+        ei_learning_block_t block = handle->impulse->learning_blocks[ix];
+
+        if (block.keep_output) {
+            matrix_ptrs[handle->impulse->dsp_blocks_size + ix] = std::unique_ptr<ei::matrix_t>(new ei::matrix_t(1, block.output_features_count));
+            features[handle->impulse->dsp_blocks_size + ix].matrix = matrix_ptrs[handle->impulse->dsp_blocks_size + ix].get();
+            features[handle->impulse->dsp_blocks_size + ix].blockId = block.blockId;
+        }
+    }
+#endif // EI_CLASSIFIER_SINGLE_FEATURE_INPUT
+
     result->timing.dsp_us = ei_read_timer_us() - dsp_start_us;
     result->timing.dsp = (int)(result->timing.dsp_us / 1000);
 
     if (debug) {
         ei_printf("Features (%d ms.): ", result->timing.dsp);
-        for (size_t ix = 0; ix < features_matrix.cols; ix++) {
-            ei_printf_float(features_matrix.buffer[ix]);
-            ei_printf(" ");
+        for (size_t ix = 0; ix < block_num; ix++) {
+            if (features[ix].matrix == nullptr) {
+                continue;
+            }
+            for (size_t jx = 0; jx < features[ix].matrix->cols; jx++) {
+                ei_printf_float(features[ix].matrix->buffer[jx]);
+                ei_printf(" ");
+            }
+            ei_printf("\n");
         }
-        ei_printf("\n");
     }
 
     if (debug) {
         ei_printf("Running impulse...\n");
     }
 
-    return run_inference(impulse, &features_matrix, result, debug);
+    EI_IMPULSE_ERROR res = run_inference(handle, features, result, debug);
+    delete[] matrix_ptrs;
+    return res;
+}
 
+/**
+ * @brief      Opens an impulse
+ *
+ * @param      impulse  struct with information about model and DSP
+ *
+ * @return     A pointer to the impulse handle, or nullptr if memory allocation failed.
+ */
+extern "C" EI_IMPULSE_ERROR init_impulse(ei_impulse_handle_t *handle) {
+    if (!handle) {
+        return EI_IMPULSE_OUT_OF_MEMORY;
+    }
+    handle->state.reset();
+    return EI_IMPULSE_OK;
 }
 
 /**
@@ -283,13 +373,13 @@ extern "C" EI_IMPULSE_ERROR process_impulse(const ei_impulse_t *impulse,
  *
  * @return     The ei impulse error.
  */
-extern "C" EI_IMPULSE_ERROR process_impulse_continuous(const ei_impulse_t *impulse,
+extern "C" EI_IMPULSE_ERROR process_impulse_continuous(ei_impulse_handle_t *handle,
                                             signal_t *signal,
                                             ei_impulse_result_t *result,
                                             bool debug,
                                             bool enable_maf)
 {
-
+    auto impulse = handle->impulse;
     static ei::matrix_t static_features_matrix(1, impulse->nn_input_frame_size);
     if (!static_features_matrix.buffer) {
         return EI_IMPULSE_ALLOC_FAILED;
@@ -302,9 +392,6 @@ extern "C" EI_IMPULSE_ERROR process_impulse_continuous(const ei_impulse_t *impul
     uint64_t dsp_start_us = ei_read_timer_us();
 
     size_t out_features_index = 0;
-    bool is_mfcc = false;
-    bool is_mfe = false;
-    bool is_spectrogram = false;
 
     for (size_t ix = 0; ix < impulse->dsp_blocks_size; ix++) {
         ei_model_dsp_t block = impulse->dsp_blocks[ix];
@@ -322,15 +409,12 @@ extern "C" EI_IMPULSE_ERROR process_impulse_continuous(const ei_impulse_t *impul
         /* Switch to the slice version of the mfcc feature extract function */
         if (block.extract_fn == extract_mfcc_features) {
             extract_fn_slice = &extract_mfcc_per_slice_features;
-            is_mfcc = true;
         }
         else if (block.extract_fn == extract_spectrogram_features) {
             extract_fn_slice = &extract_spectrogram_per_slice_features;
-            is_spectrogram = true;
         }
         else if (block.extract_fn == extract_mfe_features) {
             extract_fn_slice = &extract_mfe_per_slice_features;
-            is_mfe = true;
         }
         else {
             ei_printf("ERR: Unknown extract function, only MFCC, MFE and spectrogram supported\n");
@@ -378,22 +462,42 @@ extern "C" EI_IMPULSE_ERROR process_impulse_continuous(const ei_impulse_t *impul
 
     if (classifier_continuous_features_written >= impulse->nn_input_frame_size) {
         dsp_start_us = ei_read_timer_us();
-        ei::matrix_t classify_matrix(1, impulse->nn_input_frame_size);
 
-        /* Create a copy of the matrix for normalization */
-        for (size_t m_ix = 0; m_ix < impulse->nn_input_frame_size; m_ix++) {
-            classify_matrix.buffer[m_ix] = static_features_matrix.buffer[m_ix];
-        }
+        uint32_t block_num = impulse->dsp_blocks_size + impulse->learning_blocks_size;
 
-        if (is_mfcc) {
-            calc_cepstral_mean_and_var_normalization_mfcc(&classify_matrix, impulse->dsp_blocks[0].config);
-        }
-        else if (is_spectrogram) {
-            calc_cepstral_mean_and_var_normalization_spectrogram(&classify_matrix, impulse->dsp_blocks[0].config);
-        }
-        else if (is_mfe) {
-            calc_cepstral_mean_and_var_normalization_mfe(&classify_matrix, impulse->dsp_blocks[0].config);
+        // smart pointer to features array
+        std::unique_ptr<ei_feature_t[]> features_ptr(new ei_feature_t[block_num]);
+        ei_feature_t* features = features_ptr.get();
+        memset(features, 0, sizeof(ei_feature_t) * block_num);
+
+        // have it outside of the loop to avoid going out of scope
+        std::unique_ptr<ei::matrix_t> *matrix_ptrs = new std::unique_ptr<ei::matrix_t>[block_num];
+
+        out_features_index = 0;
+        // iterate over every dsp block and run normalization
+        for (size_t ix = 0; ix < impulse->dsp_blocks_size; ix++) {
+            ei_model_dsp_t block = impulse->dsp_blocks[ix];
+            matrix_ptrs[ix] = std::unique_ptr<ei::matrix_t>(new ei::matrix_t(1, block.n_output_features));
+            features[ix].matrix = matrix_ptrs[ix].get();
+            features[ix].blockId = block.blockId;
+
+            /* Create a copy of the matrix for normalization */
+            for (size_t m_ix = 0; m_ix < block.n_output_features; m_ix++) {
+                features[ix].matrix->buffer[m_ix] = static_features_matrix.buffer[out_features_index + m_ix];
+            }
+
+            if (block.extract_fn == extract_mfcc_features) {
+                calc_cepstral_mean_and_var_normalization_mfcc(features[ix].matrix, block.config);
+            }
+            else if (block.extract_fn == extract_spectrogram_features) {
+                calc_cepstral_mean_and_var_normalization_spectrogram(features[ix].matrix, block.config);
+            }
+            else if (block.extract_fn == extract_mfe_features) {
+                calc_cepstral_mean_and_var_normalization_mfe(features[ix].matrix, block.config);
+            }
+            out_features_index += block.n_output_features;
         }
+
         result->timing.dsp_us += ei_read_timer_us() - dsp_start_us;
         result->timing.dsp = (int)(result->timing.dsp_us / 1000);
 
@@ -401,7 +505,7 @@ extern "C" EI_IMPULSE_ERROR process_impulse_continuous(const ei_impulse_t *impul
             ei_printf("Running impulse...\n");
         }
 
-        ei_impulse_error = run_inference(impulse, &classify_matrix, result, debug);
+        ei_impulse_error = run_inference(handle, features, result, debug);
 
 #if EI_CLASSIFIER_CALIBRATION_ENABLED
         if (impulse->sensor == EI_CLASSIFIER_SENSOR_MICROPHONE) {
@@ -445,19 +549,16 @@ extern "C" EI_IMPULSE_ERROR process_impulse_continuous(const ei_impulse_t *impul
             }
         }
 #endif
+        delete[] matrix_ptrs;
     }
     else {
-        if (!impulse->object_detection) {
-            for (int i = 0; i < impulse->label_count; i++) {
-                // set label correctly in the result struct if we have no results (otherwise is nullptr)
-                result->classification[i].label = impulse->categories[(uint32_t)i];
-            }
+        for (int i = 0; i < impulse->label_count; i++) {
+            // set label correctly in the result struct if we have no results (otherwise is nullptr)
+            result->classification[i].label = impulse->categories[(uint32_t)i];
         }
     }
 
     return ei_impulse_error;
-
-
 }
 
 /**
@@ -473,7 +574,8 @@ __attribute__((unused)) static EI_IMPULSE_ERROR can_run_classifier_image_quantiz
         return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
     }
 
-    if (impulse->has_anomaly == 1){
+    // visual anomaly also needs to go through the normal path
+    if (impulse->has_anomaly){
         return EI_IMPULSE_ONLY_SUPPORTED_FOR_IMAGES;
     }
 
@@ -482,8 +584,9 @@ __attribute__((unused)) static EI_IMPULSE_ERROR can_run_classifier_image_quantiz
         return EI_IMPULSE_ONLY_SUPPORTED_FOR_IMAGES;
     }
 
-        // Check if we have a quantized NN Input layer (input is always quantized for DRP-AI)
-    if (impulse->quantized != 1) {
+    // Check if we have a quantized NN Input layer (input is always quantized for DRP-AI)
+    ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)block_ptr.config;
+    if (block_config->quantized != 1) {
         return EI_IMPULSE_ONLY_SUPPORTED_FOR_IMAGES;
     }
 
@@ -495,10 +598,10 @@ __attribute__((unused)) static EI_IMPULSE_ERROR can_run_classifier_image_quantiz
     return EI_IMPULSE_OK;
 }
 
-#if EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1 && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSAIFLOW || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_ONNX_TIDL)
+#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSAIFLOW || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_ONNX_TIDL)
 
 /**
- * Special function to run the classifier on images, only works on TFLite models (either interpreter, EON, tensaiflow, drpai or tidl)
+ * Special function to run the classifier on images, only works on TFLite models (either interpreter, EON, tensaiflow, drpai, tidl, memryx)
  * that allocates a lot less memory by quantizing in place. This only works if 'can_run_classifier_image_quantized'
  * returns EI_IMPULSE_OK.
  */
@@ -513,43 +616,200 @@ extern "C" EI_IMPULSE_ERROR run_classifier_image_quantized(
     return run_nn_inference_image_quantized(impulse, signal, result, impulse->learning_blocks[0].config, debug);
 }
 
-#endif // #if EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1 && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSAIFLOW || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI)
+#endif // #if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1 && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TENSAIFLOW || EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI)
+
+#if EI_CLASSIFIER_LOAD_IMAGE_SCALING
+static const float torch_mean[] = { 0.485, 0.456, 0.406 };
+static const float torch_std[] = { 0.229, 0.224, 0.225 };
+// This is ordered BGR
+static const float tao_mean[] = { 103.939, 116.779, 123.68 };
+
+EI_IMPULSE_ERROR ei_scale_fmatrix(ei_learning_block_t *block, ei::matrix_t *fmatrix) {
+    if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_TORCH) {
+        // @todo; could we write some faster vector math here?
+        for (size_t ix = 0; ix < fmatrix->rows * fmatrix->cols; ix += 3) {
+            fmatrix->buffer[ix + 0] = (fmatrix->buffer[ix + 0] - torch_mean[0]) / torch_std[0];
+            fmatrix->buffer[ix + 1] = (fmatrix->buffer[ix + 1] - torch_mean[1]) / torch_std[1];
+            fmatrix->buffer[ix + 2] = (fmatrix->buffer[ix + 2] - torch_mean[2]) / torch_std[2];
+        }
+    }
+    else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_0_255) {
+        int scale_res = numpy::scale(fmatrix, 255.0f);
+        if (scale_res != EIDSP_OK) {
+            ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
+            return EI_IMPULSE_DSP_ERROR;
+        }
+    }
+    else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_MIN128_127) {
+        int scale_res = numpy::scale(fmatrix, 255.0f);
+        if (scale_res != EIDSP_OK) {
+            ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
+            return EI_IMPULSE_DSP_ERROR;
+        }
+        scale_res = numpy::subtract(fmatrix, 128.0f);
+        if (scale_res != EIDSP_OK) {
+            ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
+            return EI_IMPULSE_DSP_ERROR;
+        }
+    }
+    else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_MIN1_1) {
+        int scale_res = numpy::scale(fmatrix, 2.0f);
+        if (scale_res != EIDSP_OK) {
+            ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
+            return EI_IMPULSE_DSP_ERROR;
+        }
+        scale_res = numpy::subtract(fmatrix, 1.0f);
+        if (scale_res != EIDSP_OK) {
+            ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
+            return EI_IMPULSE_DSP_ERROR;
+        }
+    }
+    else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_BGR_SUBTRACT_IMAGENET_MEAN) {
+        int scale_res = numpy::scale(fmatrix, 255.0f);
+        if (scale_res != EIDSP_OK) {
+            ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
+            return EI_IMPULSE_DSP_ERROR;
+        }
+        // Transpose RGB to BGR and subtract mean
+        for (size_t ix = 0; ix < fmatrix->rows * fmatrix->cols; ix += 3) {
+            float r = fmatrix->buffer[ix + 0];
+            fmatrix->buffer[ix + 0] = fmatrix->buffer[ix + 2] - tao_mean[0];
+            fmatrix->buffer[ix + 1] -= tao_mean[1];
+            fmatrix->buffer[ix + 2] = r - tao_mean[2];
+        }
+    }
+
+    return EI_IMPULSE_OK;
+}
+
+EI_IMPULSE_ERROR ei_unscale_fmatrix(ei_learning_block_t *block, ei::matrix_t *fmatrix) {
+    if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_TORCH) {
+        // @todo; could we write some faster vector math here?
+        for (size_t ix = 0; ix < fmatrix->rows * fmatrix->cols; ix += 3) {
+            fmatrix->buffer[ix + 0] = (fmatrix->buffer[ix + 0] * torch_std[0]) + torch_mean[0];
+            fmatrix->buffer[ix + 1] = (fmatrix->buffer[ix + 1] * torch_std[1]) + torch_mean[1];
+            fmatrix->buffer[ix + 2] = (fmatrix->buffer[ix + 2] * torch_std[2]) + torch_mean[2];
+        }
+    }
+    else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_MIN128_127) {
+        int scale_res = numpy::add(fmatrix, 128.0f);
+        if (scale_res != EIDSP_OK) {
+            ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
+            return EI_IMPULSE_DSP_ERROR;
+        }
+        scale_res = numpy::scale(fmatrix, 1 / 255.0f);
+        if (scale_res != EIDSP_OK) {
+            ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
+            return EI_IMPULSE_DSP_ERROR;
+        }
+    }
+    else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_MIN1_1) {
+        int scale_res = numpy::add(fmatrix, 1.0f);
+        if (scale_res != EIDSP_OK) {
+            ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
+            return EI_IMPULSE_DSP_ERROR;
+        }
+        scale_res = numpy::scale(fmatrix, 1 / 2.0f);
+        if (scale_res != EIDSP_OK) {
+            ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
+            return EI_IMPULSE_DSP_ERROR;
+        }
+    }
+    else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_0_255) {
+        int scale_res = numpy::scale(fmatrix, 1 / 255.0f);
+        if (scale_res != EIDSP_OK) {
+            ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
+            return EI_IMPULSE_DSP_ERROR;
+        }
+    }
+    else if (block->image_scaling == EI_CLASSIFIER_IMAGE_SCALING_BGR_SUBTRACT_IMAGENET_MEAN) {
+        // Transpose BGR to RGB and add mean
+        for (size_t ix = 0; ix < fmatrix->rows * fmatrix->cols; ix += 3) {
+            float b = fmatrix->buffer[ix + 0];
+            fmatrix->buffer[ix + 0] = fmatrix->buffer[ix + 2] + tao_mean[2];
+            fmatrix->buffer[ix + 1] += tao_mean[1];
+            fmatrix->buffer[ix + 2] = b + tao_mean[0];
+        }
+        int scale_res = numpy::scale(fmatrix, 1 / 255.0f);
+        if (scale_res != EIDSP_OK) {
+            ei_printf("ERR: Failed to scale matrix (%d)\n", scale_res);
+            return EI_IMPULSE_DSP_ERROR;
+        }
+    }
+    return EI_IMPULSE_OK;
+}
+#endif
 
 /* Public functions ------------------------------------------------------- */
 
-/* Thread carefully: public functions are not to be changed
-to preserve backwards compatibility. */
+/* Tread carefully: public functions are not to be changed
+to preserve backwards compatibility. Anything in this public section
+will be documented by Doxygen. */
 
 /**
- * @brief      Init static vars
+ * @defgroup ei_functions Functions
+ * 
+ * Public-facing functions for running inference using the Edge Impulse C++ library. 
+ * 
+ * **Source**: [classifier/ei_run_classifier.h](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/classifier/ei_run_classifier.h)
+ * 
+ * @addtogroup ei_functions
+ * @{
  */
-extern "C" void run_classifier_init()
+
+/**
+ * @brief Initialize static variables for running preprocessing and inference 
+ *  continuously.
+ * 
+ * Initializes and clears any internal static variables needed by `run_classifier_continuous()`.
+ * This includes the moving average filter (MAF). This function should be called prior to
+ * calling `run_classifier_continuous()`.
+ * 
+ * **Blocking**: yes
+ * 
+ * **Example**: [nano_ble33_sense_microphone_continuous.ino](https://github.com/edgeimpulse/example-lacuna-ls200/blob/main/nano_ble33_sense_microphone_continous/nano_ble33_sense_microphone_continuous.ino)
+ */
+extern "C" void run_classifier_init(void)
 {
 
     classifier_continuous_features_written = 0;
     ei_dsp_clear_continuous_audio_state();
+    init_impulse(&ei_default_impulse);
 
 #if EI_CLASSIFIER_CALIBRATION_ENABLED
 
-    const ei_impulse_t impulse = ei_default_impulse;
-    const ei_model_performance_calibration_t *calibration = &impulse.calibration;
+    const auto impulse = ei_default_impulse.impulse;
+    const ei_model_performance_calibration_t *calibration = &impulse->calibration;
 
     if(calibration != NULL) {
         avg_scores = new RecognizeEvents(calibration,
-            impulse.label_count, impulse.slice_size, impulse.interval_ms);
+            impulse->label_count, impulse->slice_size, impulse->interval_ms);
     }
 #endif
 }
 
 /**
- * @brief      Init static vars, for multi-model support
+ * @brief Initialize static variables for running preprocessing and inference 
+ *  continuously.
+ * 
+ * Initializes and clears any internal static variables needed by `run_classifier_continuous()`.
+ * This includes the moving average filter (MAF). This function should be called prior to
+ * calling `run_classifier_continuous()`.
+ * 
+ * **Blocking**: yes
+ * 
+ * **Example**: [nano_ble33_sense_microphone_continuous.ino](https://github.com/edgeimpulse/example-lacuna-ls200/blob/main/nano_ble33_sense_microphone_continous/nano_ble33_sense_microphone_continuous.ino)
+ * 
+ * @param[in]   handle struct with information about model and DSP
  */
-__attribute__((unused)) void run_classifier_init(const ei_impulse_t *impulse)
+__attribute__((unused)) void run_classifier_init(ei_impulse_handle_t *handle)
 {
     classifier_continuous_features_written = 0;
     ei_dsp_clear_continuous_audio_state();
+    init_impulse(handle);
 
 #if EI_CLASSIFIER_CALIBRATION_ENABLED
+    auto impulse = handle->impulse;
     const ei_model_performance_calibration_t *calibration = &impulse->calibration;
 
     if(calibration != NULL) {
@@ -559,6 +819,17 @@ __attribute__((unused)) void run_classifier_init(const ei_impulse_t *impulse)
 #endif
 }
 
+/**
+ * @brief Deletes static variables when running preprocessing and inference continuously.
+ * 
+ * Deletes internal static variables used by `run_classifier_continuous()`, which
+ * includes the moving average filter (MAF). This function should be called when you
+ * are done running continuous classification.
+ * 
+ * **Blocking**: yes
+ * 
+ * **Example**: [ei_run_audio_impulse.cpp](https://github.com/edgeimpulse/firmware-nordic-thingy53/blob/main/src/inference/ei_run_audio_impulse.cpp)
+ */
 extern "C" void run_classifier_deinit(void)
 {
     if((void *)avg_scores != NULL) {
@@ -567,14 +838,53 @@ extern "C" void run_classifier_deinit(void)
 }
 
 /**
- * @brief      Fill the complete matrix with sample slices. From there, run inference
- *             on the matrix.
+ * @brief Run preprocessing (DSP) on new slice of raw features. Add output features 
+ *  to rolling matrix and run inference on full sample.
  *
- * @param      signal  Sample data
- * @param      result  Classification output
- * @param[in]  debug   Debug output enable boot
+ * Accepts a new slice of features give by the callback defined in the `signal` parameter. 
+ * It performs preprocessing (DSP) on this new slice of features and appends the output to 
+ * a sliding window of pre-processed features (stored in a static features matrix). The matrix
+ * stores the new slice and as many old slices as necessary to make up one full sample for 
+ * performing inference.
+ * 
+ * `run_classifier_init()` must be called before making any calls to 
+ * `run_classifier_continuous().`
+ * 
+ * For example, if you are doing keyword spotting on 1-second slices of audio and you want to
+ * perform inference 4 times per second (given by `EI_CLASSIFIER_SLICES_PER_MODEL_WINDOW`), you
+ * would collect 0.25 seconds of audio and call run_classifier_continuous(). The function would
+ * compute the Mel-Frequency Cepstral Coefficients (MFCCs) for that 0.25 second slice of audio,
+ * drop the oldest 0.25 seconds' worth of MFCCs from its internal matrix, and append the newest
+ * slice of MFCCs. This process allows the library to keep track of the pre-processed features
+ * (e.g. MFCCs) in the window instead of the entire set of raw features (e.g. raw audio data),
+ * which can potentially save a lot of space in RAM. After updating the static matrix, 
+ * inference is performed using the whole matrix, which acts as a sliding window of 
+ * pre-processed features.
+ * 
+ * Additionally, a moving average filter (MAF) can be enabled for `run_classifier_continuous()`, 
+ * which averages (arithmetic mean) the last *n* inference results for each class. *n* is 
+ * `EI_CLASSIFIER_SLICES_PER_MODEL_WINDOW / 2`. In our example above, if we enabled the MAF, the 
+ * values in `result` would contain predictions averaged from the previous 2 inferences.
+ * 
+ * To learn more about `run_classifier_continuous()`, see 
+ * [this guide](https://docs.edgeimpulse.com/docs/tutorials/advanced-inferencing/continuous-audio-sampling) 
+ * on continuous audio sampling. While the guide is written for audio signals, the concepts of continuous sampling and inference can be extrapolated to any time-series data.
+ * 
+ * **Blocking**: yes
+ * 
+ * **Example**: [nano_ble33_sense_microphone_continuous.ino](https://github.com/edgeimpulse/example-lacuna-ls200/blob/main/nano_ble33_sense_microphone_continous/nano_ble33_sense_microphone_continuous.ino)
+ * 
+ * @param[in] signal  Pointer to a signal_t struct that contains the number of elements in the 
+ *  slice of raw features (e.g. `EI_CLASSIFIER_SLICE_SIZE`) and a pointer to a callback that reads 
+ *  in the slice of raw features.
+ * @param[out] result Pointer to an `ei_impulse_result_t` struct that contains the various output 
+ *  results from inference after run_classifier() returns.
+ * @param[in]  debug Print internal preprocessing and inference debugging information via 
+ *  `ei_printf()`.
+ * @param[in]  enable_maf Enable the moving average filter (MAF) for the classifier.
  *
- * @return     The ei impulse error.
+ * @return Error code as defined by `EI_IMPULSE_ERROR` enum. Will be `EI_IMPULSE_OK` if inference 
+ *  completed successfully.
  */
 extern "C" EI_IMPULSE_ERROR run_classifier_continuous(
     signal_t *signal,
@@ -582,23 +892,62 @@ extern "C" EI_IMPULSE_ERROR run_classifier_continuous(
     bool debug = false,
     bool enable_maf = true)
 {
-    const ei_impulse_t impulse = ei_default_impulse;
+    auto& impulse = ei_default_impulse;
     return process_impulse_continuous(&impulse, signal, result, debug, enable_maf);
 }
 
 /**
- * @brief      Fill the complete matrix with sample slices. From there, run impulse
- *             on the matrix.
+ * @brief Run preprocessing (DSP) on new slice of raw features. Add output features 
+ *  to rolling matrix and run inference on full sample.
  *
- * @param      impulse struct with information about model and DSP
- * @param      signal  Sample data
- * @param      result  Classification output
- * @param[in]  debug   Debug output enable boot
+ * Accepts a new slice of features give by the callback defined in the `signal` parameter. 
+ * It performs preprocessing (DSP) on this new slice of features and appends the output to 
+ * a sliding window of pre-processed features (stored in a static features matrix). The matrix
+ * stores the new slice and as many old slices as necessary to make up one full sample for 
+ * performing inference.
+ * 
+ * `run_classifier_init()` must be called before making any calls to 
+ * `run_classifier_continuous().`
+ * 
+ * For example, if you are doing keyword spotting on 1-second slices of audio and you want to
+ * perform inference 4 times per second (given by `EI_CLASSIFIER_SLICES_PER_MODEL_WINDOW`), you
+ * would collect 0.25 seconds of audio and call run_classifier_continuous(). The function would
+ * compute the Mel-Frequency Cepstral Coefficients (MFCCs) for that 0.25 second slice of audio,
+ * drop the oldest 0.25 seconds' worth of MFCCs from its internal matrix, and append the newest
+ * slice of MFCCs. This process allows the library to keep track of the pre-processed features
+ * (e.g. MFCCs) in the window instead of the entire set of raw features (e.g. raw audio data),
+ * which can potentially save a lot of space in RAM. After updating the static matrix, 
+ * inference is performed using the whole matrix, which acts as a sliding window of 
+ * pre-processed features.
+ * 
+ * Additionally, a moving average filter (MAF) can be enabled for `run_classifier_continuous()`, 
+ * which averages (arithmetic mean) the last *n* inference results for each class. *n* is 
+ * `EI_CLASSIFIER_SLICES_PER_MODEL_WINDOW / 2`. In our example above, if we enabled the MAF, the 
+ * values in `result` would contain predictions averaged from the previous 2 inferences.
+ * 
+ * To learn more about `run_classifier_continuous()`, see 
+ * [this guide](https://docs.edgeimpulse.com/docs/tutorials/advanced-inferencing/continuous-audio-sampling) 
+ * on continuous audio sampling. While the guide is written for audio signals, the concepts of continuous sampling and inference can be extrapolated to any time-series data.
+ * 
+ * **Blocking**: yes
+ * 
+ * **Example**: [nano_ble33_sense_microphone_continuous.ino](https://github.com/edgeimpulse/example-lacuna-ls200/blob/main/nano_ble33_sense_microphone_continous/nano_ble33_sense_microphone_continuous.ino)
+ * 
+ * @param[in] impulse `ei_impulse_handle_t` struct with information about preprocessing and model.
+ * @param[in] signal  Pointer to a signal_t struct that contains the number of elements in the 
+ *  slice of raw features (e.g. `EI_CLASSIFIER_SLICE_SIZE`) and a pointer to a callback that reads 
+ *  in the slice of raw features.
+ * @param[out] result Pointer to an `ei_impulse_result_t` struct that contains the various output 
+ *  results from inference after run_classifier() returns.
+ * @param[in] debug Print internal preprocessing and inference debugging information via 
+ *  `ei_printf()`.
+ * @param[in] enable_maf Enable the moving average filter (MAF) for the classifier.
  *
- * @return     The ei impulse error.
+ * @return Error code as defined by `EI_IMPULSE_ERROR` enum. Will be `EI_IMPULSE_OK` if inference 
+ *  completed successfully.
  */
 __attribute__((unused)) EI_IMPULSE_ERROR run_classifier_continuous(
-    const ei_impulse_t *impulse,
+    ei_impulse_handle_t *impulse,
     signal_t *signal,
     ei_impulse_result_t *result,
     bool debug = false,
@@ -608,31 +957,58 @@ __attribute__((unused)) EI_IMPULSE_ERROR run_classifier_continuous(
 }
 
 /**
- * Run the classifier over a raw features array
- * @param raw_features Raw features array
- * @param raw_features_size Size of the features array
- * @param result Object to store the results in
- * @param debug Whether to show debug messages (default: false)
+ * @brief Run the classifier over a raw features array.
+ * 
+ * 
+ * Overloaded function [run_classifier()](#run_classifier-1) that defaults to the single impulse.
+ * 
+ * **Blocking**: yes
+ * 
+ * @param[in] signal Pointer to a `signal_t` struct that contains the total length of the raw 
+ *  feature array, which must match EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE, and a pointer to a callback
+ *  that reads in the raw features.
+ * @param[out] result  Pointer to an ei_impulse_result_t struct that will contain the various output 
+ *  results from inference after `run_classifier()` returns.
+ * @param[in] debug Print internal preprocessing and inference debugging information via `ei_printf()`.
+ * 
+ * @return Error code as defined by `EI_IMPULSE_ERROR` enum. Will be `EI_IMPULSE_OK` if inference
+ *  completed successfully.
  */
 extern "C" EI_IMPULSE_ERROR run_classifier(
     signal_t *signal,
     ei_impulse_result_t *result,
     bool debug = false)
 {
-    const ei_impulse_t impulse = ei_default_impulse;
-    return process_impulse(&impulse, signal, result, debug);
+    return process_impulse(&ei_default_impulse, signal, result, debug);
 }
 
 /**
- * Run the impulse over a raw features array
- * @param impulse struct with information about model and DSP
- * @param raw_features Raw features array
- * @param raw_features_size Size of the features array
- * @param result Object to store the results in
- * @param debug Whether to show debug messages (default: false)
+ * @brief Run the classifier over a raw features array.
+ * 
+ * 
+ * Accepts a `signal_t` input struct pointing to a callback that reads in pages of raw features. 
+ * `run_classifier()` performs any necessary preprocessing on the raw features (e.g. DSP, cropping 
+ * of images, etc.) before performing inference. Results from inference are stored in an 
+ * `ei_impulse_result_t` struct.
+ * 
+ * **Blocking**: yes
+ * 
+ * **Example**: [standalone inferencing main.cpp](https://github.com/edgeimpulse/example-standalone-inferencing/blob/master/source/main.cpp)
+ * 
+ * @param[in] impulse Pointer to an `ei_impulse_handle_t` struct that contains the model and
+ *  preprocessing information.
+ * @param[in] signal Pointer to a `signal_t` struct that contains the total length of the raw 
+ *  feature array, which must match EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE, and a pointer to a callback
+ *  that reads in the raw features.
+ * @param[out] result  Pointer to an ei_impulse_result_t struct that will contain the various output 
+ *  results from inference after `run_classifier()` returns.
+ * @param[in] debug Print internal preprocessing and inference debugging information via `ei_printf()`.
+ * 
+ * @return Error code as defined by `EI_IMPULSE_ERROR` enum. Will be `EI_IMPULSE_OK` if inference
+ *  completed successfully.
  */
 __attribute__((unused)) EI_IMPULSE_ERROR run_classifier(
-    const ei_impulse_t *impulse,
+    ei_impulse_handle_t *impulse,
     signal_t *signal,
     ei_impulse_result_t *result,
     bool debug = false)
@@ -640,6 +1016,8 @@ __attribute__((unused)) EI_IMPULSE_ERROR run_classifier(
     return process_impulse(impulse, signal, result, debug);
 }
 
+/** @} */ // end of ei_functions Doxygen group
+
 /* Deprecated functions ------------------------------------------------------- */
 
 /* These functions are being deprecated and possibly will be removed or moved in future.
@@ -648,14 +1026,22 @@ Do not use these - if possible, change your code to reflect the upcoming changes
 #if EIDSP_SIGNAL_C_FN_POINTER == 0
 
 /**
- * Run the impulse, if you provide an instance of sampler it will also persist the data for you
- * @param sampler Instance to an **initialized** sampler
- * @param result Object to store the results in
- * @param data_fn Function to retrieve data from sensors
- * @param debug Whether to log debug messages (default false)
+ * @brief Run the impulse, if you provide an instance of sampler it will also persist 
+ *  the data for you.
+ * 
+ * @deprecated This function is deprecated and will be removed in future versions. Use 
+ *  `run_classifier()` instead.
+ * 
+ * @param[in] sampler Instance to an **initialized** sampler
+ * @param[out] result Object to store the results in
+ * @param[in] data_fn Callback function to retrieve data from sensors
+ * @param[in] debug Whether to log debug messages (default false)
+ * 
+ * @return Error code as defined by `EI_IMPULSE_ERROR` enum. Will be `EI_IMPULSE_OK` if inference
+ *  completed successfully.
  */
 __attribute__((unused)) EI_IMPULSE_ERROR run_impulse(
-#if defined(EI_CLASSIFIER_HAS_SAMPLER) && EI_CLASSIFIER_HAS_SAMPLER == 1
+#if (defined(EI_CLASSIFIER_HAS_SAMPLER) && EI_CLASSIFIER_HAS_SAMPLER == 1) || defined(__DOXYGEN__)
         EdgeSampler *sampler,
 #endif
         ei_impulse_result_t *result,
@@ -666,7 +1052,7 @@ __attribute__((unused)) EI_IMPULSE_ERROR run_impulse(
 #endif
         bool debug = false) {
 
-    const ei_impulse_t impulse = ei_default_impulse;
+    auto& impulse = *(ei_default_impulse.impulse);
 
     float *x = (float*)calloc(impulse.dsp_input_frame_size, sizeof(float));
     if (!x) {
@@ -713,12 +1099,19 @@ __attribute__((unused)) EI_IMPULSE_ERROR run_impulse(
     return r;
 }
 
-#if defined(EI_CLASSIFIER_HAS_SAMPLER) && EI_CLASSIFIER_HAS_SAMPLER == 1
+#if (defined(EI_CLASSIFIER_HAS_SAMPLER) && EI_CLASSIFIER_HAS_SAMPLER == 1) || defined(__DOXYGEN__)
 /**
- * Run the impulse, does not persist data
- * @param result Object to store the results in
- * @param data_fn Function to retrieve data from sensors
- * @param debug Whether to log debug messages (default false)
+ * @brief Run the impulse, does not persist data.
+ * 
+ * @deprecated This function is deprecated and will be removed in future versions. Use 
+ *  `run_classifier()` instead.
+ * 
+ * @param[out] result Object to store the results in
+ * @param[in] data_fn Callback function to retrieve data from sensors
+ * @param[out] debug Whether to log debug messages (default false)
+ * 
+ * @return Error code as defined by `EI_IMPULSE_ERROR` enum. Will be `EI_IMPULSE_OK` if inference
+ *  completed successfully.
  */
 __attribute__((unused)) EI_IMPULSE_ERROR run_impulse(
         ei_impulse_result_t *result,
diff --git a/edge-impulse-sdk/classifier/ei_run_dsp.h b/edge-impulse-sdk/classifier/ei_run_dsp.h
index 4e2fb01..e46612c 100644
--- a/edge-impulse-sdk/classifier/ei_run_dsp.h
+++ b/edge-impulse-sdk/classifier/ei_run_dsp.h
@@ -22,8 +22,13 @@
 #include "edge-impulse-sdk/dsp/spectral/spectral.hpp"
 #include "edge-impulse-sdk/dsp/speechpy/speechpy.hpp"
 #include "edge-impulse-sdk/classifier/ei_signal_with_range.h"
+#include "edge-impulse-sdk/dsp/ei_flatten.h"
 #include "model-parameters/model_metadata.h"
 
+#if EI_CLASSIFIER_HR_ENABLED
+#include "edge-impulse-sdk/dsp/ei_hr.hpp"
+#endif
+
 #if defined(__cplusplus) && EI_C_LINKAGE == 1
 extern "C" {
     extern void ei_printf(const char *format, ...);
@@ -47,6 +52,23 @@ static float *ei_dsp_cont_current_frame = nullptr;
 static size_t ei_dsp_cont_current_frame_size = 0;
 static int ei_dsp_cont_current_frame_ix = 0;
 
+__attribute__((unused)) int extract_hr_features(
+    signal_t *signal,
+    matrix_t *output_matrix,
+    void *config_ptr,
+    const float frequency)
+{
+#if EI_CLASSIFIER_HR_ENABLED
+    auto handle = hr_class::create(config_ptr, frequency);
+    auto ret = handle->extract(signal, output_matrix, config_ptr, frequency);
+    delete handle;
+    return ret;
+#else
+    ei_printf("ERR: Please contact EI sales to enable heart rate processing in deployment");
+    return EIDSP_NOT_SUPPORTED;
+#endif
+}
+
 __attribute__((unused)) int extract_spectral_analysis_features(
     signal_t *signal,
     matrix_t *output_matrix,
@@ -136,104 +158,10 @@ __attribute__((unused)) int extract_raw_features(signal_t *signal, matrix_t *out
 }
 
 __attribute__((unused)) int extract_flatten_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float frequency) {
-    ei_dsp_config_flatten_t config = *((ei_dsp_config_flatten_t*)config_ptr);
-
-    uint32_t expected_matrix_size = 0;
-    if (config.average) expected_matrix_size += config.axes;
-    if (config.minimum) expected_matrix_size += config.axes;
-    if (config.maximum) expected_matrix_size += config.axes;
-    if (config.rms) expected_matrix_size += config.axes;
-    if (config.stdev) expected_matrix_size += config.axes;
-    if (config.skewness) expected_matrix_size += config.axes;
-    if (config.kurtosis) expected_matrix_size += config.axes;
-
-    if (output_matrix->rows * output_matrix->cols != expected_matrix_size) {
-        EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
-    }
-
-    int ret;
-
-    // input matrix from the raw signal
-    matrix_t input_matrix(signal->total_length / config.axes, config.axes);
-    if (!input_matrix.buffer) {
-        EIDSP_ERR(EIDSP_OUT_OF_MEM);
-    }
-    signal->get_data(0, signal->total_length, input_matrix.buffer);
-
-    // scale the signal
-    ret = numpy::scale(&input_matrix, config.scale_axes);
-    if (ret != EIDSP_OK) {
-        ei_printf("ERR: Failed to scale signal (%d)\n", ret);
-        EIDSP_ERR(ret);
-    }
-
-    // transpose the matrix so we have one row per axis (nifty!)
-    ret = numpy::transpose(&input_matrix);
-    if (ret != EIDSP_OK) {
-        ei_printf("ERR: Failed to transpose matrix (%d)\n", ret);
-        EIDSP_ERR(ret);
-    }
-
-    size_t out_matrix_ix = 0;
-
-    for (size_t row = 0; row < input_matrix.rows; row++) {
-        matrix_t row_matrix(1, input_matrix.cols, input_matrix.buffer + (row * input_matrix.cols));
-
-        if (config.average) {
-            float fbuffer;
-            matrix_t out_matrix(1, 1, &fbuffer);
-            numpy::mean(&row_matrix, &out_matrix);
-            output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0];
-        }
-
-        if (config.minimum) {
-            float fbuffer;
-            matrix_t out_matrix(1, 1, &fbuffer);
-            numpy::min(&row_matrix, &out_matrix);
-            output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0];
-        }
-
-        if (config.maximum) {
-            float fbuffer;
-            matrix_t out_matrix(1, 1, &fbuffer);
-            numpy::max(&row_matrix, &out_matrix);
-            output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0];
-        }
-
-        if (config.rms) {
-            float fbuffer;
-            matrix_t out_matrix(1, 1, &fbuffer);
-            numpy::rms(&row_matrix, &out_matrix);
-            output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0];
-        }
-
-        if (config.stdev) {
-            float fbuffer;
-            matrix_t out_matrix(1, 1, &fbuffer);
-            numpy::stdev(&row_matrix, &out_matrix);
-            output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0];
-        }
-
-        if (config.skewness) {
-            float fbuffer;
-            matrix_t out_matrix(1, 1, &fbuffer);
-            numpy::skew(&row_matrix, &out_matrix);
-            output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0];
-        }
-
-        if (config.kurtosis) {
-            float fbuffer;
-            matrix_t out_matrix(1, 1, &fbuffer);
-            numpy::kurtosis(&row_matrix, &out_matrix);
-            output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0];
-        }
-    }
-
-    // flatten again
-    output_matrix->cols = output_matrix->rows * output_matrix->cols;
-    output_matrix->rows = 1;
-
-    return EIDSP_OK;
+    auto handle = flatten_class::create(config_ptr, frequency);
+    auto ret = handle->extract(signal, output_matrix, config_ptr, frequency);
+    delete handle;
+    return ret;
 }
 
 static class speechpy::processing::preemphasis *preemphasis;
@@ -303,7 +231,7 @@ __attribute__((unused)) int extract_mfcc_features(signal_t *signal, matrix_t *ou
 }
 
 
-static int extract_mfcc_run_slice(signal_t *signal, matrix_t *output_matrix, ei_dsp_config_mfcc_t *config, const float sampling_frequency, matrix_size_t *matrix_size_out, int implementation_version) {
+__attribute__((unused)) static int extract_mfcc_run_slice(signal_t *signal, matrix_t *output_matrix, ei_dsp_config_mfcc_t *config, const float sampling_frequency, matrix_size_t *matrix_size_out, int implementation_version) {
     uint32_t frequency = (uint32_t)sampling_frequency;
 
     int x;
@@ -557,7 +485,7 @@ __attribute__((unused)) int extract_spectrogram_features(signal_t *signal, matri
     }
     else {
         // normalization
-        ret = speechpy::processing::spectrogram_normalization(output_matrix, config.noise_floor_db);
+        ret = speechpy::processing::spectrogram_normalization(output_matrix, config.noise_floor_db, config.implementation_version == 3);
         if (ret != EIDSP_OK) {
             ei_printf("ERR: normalization failed (%d)\n", ret);
             EIDSP_ERR(ret);
@@ -571,7 +499,7 @@ __attribute__((unused)) int extract_spectrogram_features(signal_t *signal, matri
 }
 
 
-static int extract_spectrogram_run_slice(signal_t *signal, matrix_t *output_matrix, ei_dsp_config_spectrogram_t *config, const float sampling_frequency, matrix_size_t *matrix_size_out) {
+__attribute__((unused)) static int extract_spectrogram_run_slice(signal_t *signal, matrix_t *output_matrix, ei_dsp_config_spectrogram_t *config, const float sampling_frequency, matrix_size_t *matrix_size_out) {
     uint32_t frequency = (uint32_t)sampling_frequency;
 
     int x;
@@ -794,6 +722,10 @@ __attribute__((unused)) int extract_mfe_features(signal_t *signal, matrix_t *out
         EIDSP_ERR(EIDSP_PARAMETER_INVALID);
     }
 
+    if ((config.implementation_version == 0) || (config.implementation_version > 4)) {
+        EIDSP_ERR(EIDSP_BLOCK_VERSION_INCORRECT);
+    }
+
     const uint32_t frequency = static_cast<uint32_t>(sampling_frequency);
 
     signal_t preemphasized_audio_signal;
@@ -878,7 +810,7 @@ __attribute__((unused)) int extract_mfe_features(signal_t *signal, matrix_t *out
     return EIDSP_OK;
 }
 
-static int extract_mfe_run_slice(signal_t *signal, matrix_t *output_matrix, ei_dsp_config_mfe_t *config, const float sampling_frequency, matrix_size_t *matrix_size_out) {
+__attribute__((unused)) static int extract_mfe_run_slice(signal_t *signal, matrix_t *output_matrix, ei_dsp_config_mfe_t *config, const float sampling_frequency, matrix_size_t *matrix_size_out) {
     uint32_t frequency = (uint32_t)sampling_frequency;
 
     int x;
@@ -947,6 +879,10 @@ __attribute__((unused)) int extract_mfe_per_slice_features(signal_t *signal, mat
         EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
     }
 
+    if ((config.implementation_version == 0) || (config.implementation_version > 4)) {
+        EIDSP_ERR(EIDSP_BLOCK_VERSION_INCORRECT);
+    }
+
     if (signal->total_length == 0) {
         EIDSP_ERR(EIDSP_PARAMETER_INVALID);
     }
@@ -1206,7 +1142,7 @@ __attribute__((unused)) int extract_image_features(signal_t *signal, matrix_t *o
     return EIDSP_OK;
 }
 
-#if (EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1) && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI)
+#if (EI_CLASSIFIER_QUANTIZATION_ENABLED == 1) && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI)
 
 __attribute__((unused)) int extract_drpai_features_quantized(signal_t *signal, matrix_u8_t *output_matrix, void *config_ptr, const float frequency) {
     ei_dsp_config_image_t config = *((ei_dsp_config_image_t*)config_ptr);
@@ -1258,9 +1194,9 @@ __attribute__((unused)) int extract_drpai_features_quantized(signal_t *signal, m
     return EIDSP_OK;
 }
 
-#endif //(EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1) && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI)
+#endif //(EI_CLASSIFIER_QUANTIZATION_ENABLED == 1) && (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI)
 
-#if (EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1) && (EI_CLASSIFIER_INFERENCING_ENGINE != EI_CLASSIFIER_DRPAI)
+#if (EI_CLASSIFIER_QUANTIZATION_ENABLED == 1) && (EI_CLASSIFIER_INFERENCING_ENGINE != EI_CLASSIFIER_DRPAI)
 
 __attribute__((unused)) int extract_image_features_quantized(signal_t *signal, matrix_i8_t *output_matrix, void *config_ptr, float scale, float zero_point, const float frequency,
                                                              int image_scaling) {
@@ -1332,6 +1268,11 @@ __attribute__((unused)) int extract_image_features_quantized(signal_t *signal, m
                         g = (g - torch_mean[1]) / torch_std[1];
                         b = (b - torch_mean[2]) / torch_std[2];
                     }
+                    else if (image_scaling == EI_CLASSIFIER_IMAGE_SCALING_MIN128_127) {
+                        r -= 128.0f;
+                        g -= 128.0f;
+                        b -= 128.0f;
+                    }
 
                     output_matrix->buffer[output_ix++] = static_cast<int8_t>(round(r / scale) + zero_point);
                     output_matrix->buffer[output_ix++] = static_cast<int8_t>(round(g / scale) + zero_point);
@@ -1374,6 +1315,11 @@ __attribute__((unused)) int extract_image_features_quantized(signal_t *signal, m
                         g = (g - torch_mean[1]) / torch_std[1];
                         b = (b - torch_mean[2]) / torch_std[2];
                     }
+                    else if (image_scaling == EI_CLASSIFIER_IMAGE_SCALING_MIN128_127) {
+                        r -= 128.0f;
+                        g -= 128.0f;
+                        b -= 128.0f;
+                    }
 
                     // ITU-R 601-2 luma transform
                     // see: https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.convert
@@ -1388,7 +1334,7 @@ __attribute__((unused)) int extract_image_features_quantized(signal_t *signal, m
     }
     return EIDSP_OK;
 }
-#endif // (EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1) && (EI_CLASSIFIER_INFERENCING_ENGINE != EI_CLASSIFIER_DRPAI)
+#endif // (EI_CLASSIFIER_QUANTIZATION_ENABLED == 1) && (EI_CLASSIFIER_INFERENCING_ENGINE != EI_CLASSIFIER_DRPAI)
 
 /**
  * Clear all state regarding continuous audio. Invoke this function after continuous audio loop ends.
@@ -1496,7 +1442,7 @@ __attribute__((unused)) void calc_cepstral_mean_and_var_normalization_spectrogra
     }
     else {
         // normalization
-        int ret = speechpy::processing::spectrogram_normalization(matrix, config->noise_floor_db);
+        int ret = speechpy::processing::spectrogram_normalization(matrix, config->noise_floor_db, config->implementation_version == 3);
         if (ret != EIDSP_OK) {
             ei_printf("ERR: normalization failed (%d)\n", ret);
             return;
diff --git a/edge-impulse-sdk/classifier/inferencing_engines/akida.h b/edge-impulse-sdk/classifier/inferencing_engines/akida.h
index 8be1376..08f4fc8 100644
--- a/edge-impulse-sdk/classifier/inferencing_engines/akida.h
+++ b/edge-impulse-sdk/classifier/inferencing_engines/akida.h
@@ -45,9 +45,22 @@
 #endif
 
 #include "model-parameters/model_metadata.h"
+#include <thread>
+#include "tensorflow-lite/tensorflow/lite/c/common.h"
+#include "tensorflow-lite/tensorflow/lite/interpreter.h"
+#include "tensorflow-lite/tensorflow/lite/kernels/register.h"
+#include "tensorflow-lite/tensorflow/lite/model.h"
+#include "tensorflow-lite/tensorflow/lite/optional_debug_tools.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/custom/tree_ensemble_classifier.h"
+#include "edge-impulse-sdk/classifier/ei_model_types.h"
 #include "edge-impulse-sdk/porting/ei_classifier_porting.h"
 #include "edge-impulse-sdk/classifier/ei_fill_result_struct.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/softmax.h"
+#include "tensorflow-lite/tensorflow/lite/kernels/internal/reference/softmax.h"
+#undef EI_CLASSIFIER_INFERENCING_ENGINE
+#define EI_CLASSIFIER_INFERENCING_ENGINE EI_CLASSIFIER_TFLITE_FULL
+#include "tflite_helper.h"
+#undef EI_CLASSIFIER_INFERENCING_ENGINE
+#define EI_CLASSIFIER_INFERENCING_ENGINE EI_CLASSIFIER_AKIDA
 #include <vector>
 #include <fstream>
 #include <sstream>
@@ -75,6 +88,12 @@ static tflite::SoftmaxParams dummy_params;
 static int model_input_bits = 0;
 static float scale;
 static int down_scale;
+typedef struct {
+    std::unique_ptr<tflite::FlatBufferModel> model;
+    std::unique_ptr<tflite::Interpreter> interpreter;
+} ei_tflite_state_t;
+
+std::map<uint32_t, ei_tflite_state_t*> ei_tflite_instances;
 
 bool init_akida(const uint8_t *model_arr, size_t model_arr_size, bool debug)
 {
@@ -106,6 +125,11 @@ bool init_akida(const uint8_t *model_arr, size_t model_arr_size, bool debug)
         return false;
     }
 
+    if(debug) {
+        std::string ver = akida.attr("__version__").cast<std::string>();
+        ei_printf("DEBUG: Akida version: %s\n", ver.c_str());
+    }
+
     py::object Model = akida.attr("Model");
 
     // deploy akida model file into temporary file
@@ -124,6 +148,7 @@ bool init_akida(const uint8_t *model_arr, size_t model_arr_size, bool debug)
     }
     catch (py::error_already_set &e) {
         ei_printf("ERR: Can't load model file from %s\n", model_file_path);
+        ei_printf("ERR: %s\n", e.what());
         return false;
     }
 
@@ -188,6 +213,7 @@ bool init_akida(const uint8_t *model_arr, size_t model_arr_size, bool debug)
     }
     catch (py::error_already_set &e) {
         ei_printf("ERR: Can't load the ML model onto the AKD1000 SoC\n");
+        ei_printf("ERR: %s\n", e.what());
         return false;
     }
 #elif (defined(EI_CLASSIFIER_USE_AKIDA_SOFTWARE) && (EI_CLASSIFIER_USE_AKIDA_SOFTWARE == 1))
@@ -237,13 +263,17 @@ void debug_print(const std::vector<T> vec, const int val_per_row = 3)
  */
 EI_IMPULSE_ERROR run_nn_inference(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
-    bool debug = false)
+    bool debug)
 {
     ei_learning_block_config_tflite_graph_t *block_config = ((ei_learning_block_config_tflite_graph_t*)config_ptr);
     ei_config_tflite_graph_t *graph_config = ((ei_config_tflite_graph_t*)block_config->graph_config);
+
     EI_IMPULSE_ERROR fill_res = EI_IMPULSE_OK;
 
     // init Python embedded interpreter (should be called once!)
@@ -273,12 +303,27 @@ EI_IMPULSE_ERROR run_nn_inference(
      */
     auto r = input_data.mutable_unchecked<4>();
     float temp;
-    for (py::ssize_t x = 0; x < r.shape(1); x++) {
-        for (py::ssize_t y = 0; y < r.shape(2); y++) {
-            for(py::ssize_t z = 0; z < r.shape(3); z++) {
-                temp = (fmatrix->buffer[x * r.shape(2) * r.shape(3) + y * r.shape(3) + z] * scale);
-                temp = std::max(0.0f, std::min(temp, 255.0f));
-                r(0, x, y, z) = (uint8_t)(temp / down_scale);
+
+    size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size;
+    for (size_t i = 0; i < input_block_ids_size; i++) {
+        uint16_t cur_mtx = input_block_ids[i];
+#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0
+        ei::matrix_t* matrix = NULL;
+
+        if (!find_mtx_by_idx(fmatrix, &matrix, cur_mtx, mtx_size)) {
+            ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx);
+            return EI_IMPULSE_INVALID_SIZE;
+        }
+#else
+        ei::matrix_t* matrix = fmatrix[0].matrix;
+#endif
+        for (py::ssize_t x = 0; x < r.shape(1); x++) {
+            for (py::ssize_t y = 0; y < r.shape(2); y++) {
+                for(py::ssize_t z = 0; z < r.shape(3); z++) {
+                    temp = (matrix->buffer[x * r.shape(2) * r.shape(3) + y * r.shape(3) + z] * scale);
+                    temp = std::max(0.0f, std::min(temp, 255.0f));
+                    r(0, x, y, z) = (uint8_t)(temp / down_scale);
+                }
             }
         }
     }
@@ -308,7 +353,7 @@ EI_IMPULSE_ERROR run_nn_inference(
     std::vector<float> potentials_v;// = potentials.cast<std::vector<float>>();
 
     // TODO: output conversion depending on output shape?
-    if (impulse->object_detection == false) {
+    if (block_config->object_detection == false) {
         potentials_v = potentials.squeeze().cast<std::vector<float>>();
     }
     else {
@@ -323,8 +368,10 @@ EI_IMPULSE_ERROR run_nn_inference(
         }
     }
 
-    // apply softmax, becuase Akida is not supporting this operation
-    tflite::reference_ops::Softmax(dummy_params, softmax_shape, potentials_v.data(), softmax_shape, potentials_v.data());
+    if(block_config->object_detection_last_layer != EI_CLASSIFIER_LAST_LAYER_YOLOV2) {
+        // apply softmax, becuase Akida is not supporting this operation
+        tflite::reference_ops::Softmax(dummy_params, softmax_shape, potentials_v.data(), softmax_shape, potentials_v.data());
+    }
 
     if(debug == true) {
         ei_printf("After softmax:\n");
@@ -351,30 +398,40 @@ EI_IMPULSE_ERROR run_nn_inference(
     engine_info << "Power consumption: " << std::fixed << std::setprecision(2) << active_power << " mW\n";
     engine_info << "Inferences per second: " << (1000000 / result->timing.classification_us);
 
-    if (impulse->object_detection) {
-        switch (impulse->object_detection_last_layer) {
+    if (block_config->object_detection) {
+        switch (block_config->object_detection_last_layer) {
             case EI_CLASSIFIER_LAST_LAYER_FOMO: {
                 fill_res = fill_result_struct_f32_fomo(
                     impulse,
+                    block_config,
                     result,
                     potentials_v.data(),
                     impulse->fomo_output_size,
                     impulse->fomo_output_size);
                 break;
             }
+            case EI_CLASSIFIER_LAST_LAYER_YOLOV2: {
+                fill_res = fill_result_struct_f32_yolov2(
+                    impulse,
+                    block_config,
+                    result,
+                    potentials_v.data(),
+                    impulse->tflite_output_features_count);
+                break;
+            }
             case EI_CLASSIFIER_LAST_LAYER_SSD: {
                 ei_printf("ERR: MobileNet SSD models are not implemented for Akida (%d)\n",
-                    impulse->object_detection_last_layer);
+                    block_config->object_detection_last_layer);
                 return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
             }
             case EI_CLASSIFIER_LAST_LAYER_YOLOV5: {
                 ei_printf("ERR: YOLO v5 models are not implemented for Akida (%d)\n",
-                    impulse->object_detection_last_layer);
+                    block_config->object_detection_last_layer);
                 return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
             }
             default: {
                 ei_printf("ERR: Unsupported object detection last layer (%d)\n",
-                    impulse->object_detection_last_layer);
+                    block_config->object_detection_last_layer);
                 return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
             }
         }
@@ -386,6 +443,136 @@ EI_IMPULSE_ERROR run_nn_inference(
     return fill_res;
 }
 
+/**
+ * Construct a tflite interpreter (creates it if needed)
+ */
+static EI_IMPULSE_ERROR get_interpreter(ei_learning_block_config_tflite_graph_t *block_config, tflite::Interpreter **interpreter) {
+    // not in the map yet...
+    if (!ei_tflite_instances.count(block_config->block_id)) {
+        ei_config_tflite_graph_t *graph_config = (ei_config_tflite_graph_t*)block_config->graph_config;
+        ei_tflite_state_t *new_state = new ei_tflite_state_t();
+
+        auto new_model = tflite::FlatBufferModel::BuildFromBuffer((const char*)graph_config->model, graph_config->model_size);
+        new_state->model = std::move(new_model);
+        if (!new_state->model) {
+            ei_printf("Failed to build TFLite model from buffer\n");
+            return EI_IMPULSE_TFLITE_ERROR;
+        }
+
+        tflite::ops::builtin::BuiltinOpResolver resolver;
+#if EI_CLASSIFIER_HAS_TREE_ENSEMBLE_CLASSIFIER
+        resolver.AddCustom("TreeEnsembleClassifier",
+            tflite::ops::custom::Register_TREE_ENSEMBLE_CLASSIFIER());
+#endif
+        tflite::InterpreterBuilder builder(*new_state->model, resolver);
+        builder(&new_state->interpreter);
+
+        if (!new_state->interpreter) {
+            ei_printf("Failed to construct interpreter\n");
+            return EI_IMPULSE_TFLITE_ERROR;
+        }
+
+        if (new_state->interpreter->AllocateTensors() != kTfLiteOk) {
+            ei_printf("AllocateTensors failed\n");
+            return EI_IMPULSE_TFLITE_ERROR;
+        }
+
+        int hw_thread_count = (int)std::thread::hardware_concurrency();
+        hw_thread_count -= 1; // leave one thread free for the other application
+        if (hw_thread_count < 1) {
+            hw_thread_count = 1;
+        }
+
+        if (new_state->interpreter->SetNumThreads(hw_thread_count) != kTfLiteOk) {
+            ei_printf("SetNumThreads failed\n");
+            return EI_IMPULSE_TFLITE_ERROR;
+        }
+
+        ei_tflite_instances.insert(std::make_pair(block_config->block_id, new_state));
+    }
+
+    auto tflite_state = ei_tflite_instances[block_config->block_id];
+    *interpreter = tflite_state->interpreter.get();
+    return EI_IMPULSE_OK;
+}
+
+
+extern "C" EI_IMPULSE_ERROR run_nn_inference_from_dsp(
+    ei_learning_block_config_tflite_graph_t *block_config,
+    signal_t *signal,
+    matrix_t *output_matrix)
+{
+    tflite::Interpreter *interpreter;
+    auto interpreter_ret = get_interpreter(block_config, &interpreter);
+    if (interpreter_ret != EI_IMPULSE_OK) {
+        return interpreter_ret;
+    }
+
+    TfLiteTensor *input = interpreter->input_tensor(0);
+    TfLiteTensor *output = interpreter->output_tensor(0);
+
+    if (!input) {
+        return EI_IMPULSE_INPUT_TENSOR_WAS_NULL;
+    }
+    if (!output) {
+        return EI_IMPULSE_OUTPUT_TENSOR_WAS_NULL;
+    }
+
+    auto input_res = fill_input_tensor_from_signal(signal, input);
+    if (input_res != EI_IMPULSE_OK) {
+        return input_res;
+    }
+
+    TfLiteStatus status = interpreter->Invoke();
+    if (status != kTfLiteOk) {
+        ei_printf("ERR: interpreter->Invoke() failed with %d\n", status);
+        return EI_IMPULSE_TFLITE_ERROR;
+    }
+
+    auto output_res = fill_output_matrix_from_tensor(output, output_matrix);
+    if (output_res != EI_IMPULSE_OK) {
+        return output_res;
+    }
+
+    // on Linux we're not worried about free'ing (for now)
+
+    return EI_IMPULSE_OK;
+}
+
+__attribute__((unused)) int extract_tflite_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float frequency) {
+
+    ei_dsp_config_tflite_t *dsp_config = (ei_dsp_config_tflite_t*)config_ptr;
+
+    ei_config_tflite_graph_t ei_config_tflite_graph_0 = {
+        .implementation_version = 1,
+        .model = dsp_config->model,
+        .model_size = dsp_config->model_size,
+        .arena_size = dsp_config->arena_size
+    };
+
+    ei_learning_block_config_tflite_graph_t ei_learning_block_config = {
+        .implementation_version = 1,
+        .classification_mode = EI_CLASSIFIER_CLASSIFICATION_MODE_DSP,
+        .block_id = dsp_config->block_id,
+        .object_detection = false,
+        .object_detection_last_layer = EI_CLASSIFIER_LAST_LAYER_UNKNOWN,
+        .output_data_tensor = 0,
+        .output_labels_tensor = 255,
+        .output_score_tensor = 255,
+        .threshold = 0,
+        .quantized = 0,
+        .compiled = 1,
+        .graph_config = &ei_config_tflite_graph_0
+    };
+
+    auto x = run_nn_inference_from_dsp(&ei_learning_block_config, signal, output_matrix);
+    if (x != 0) {
+        return x;
+    }
+
+    return EIDSP_OK;
+}
+
 #endif // EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_AKIDA
 
 #endif /* EI_CLASSIFIER_INFERENCING_ENGINE_AKIDA_H */
diff --git a/edge-impulse-sdk/classifier/inferencing_engines/anomaly.h b/edge-impulse-sdk/classifier/inferencing_engines/anomaly.h
index 848e3f0..5a800eb 100644
--- a/edge-impulse-sdk/classifier/inferencing_engines/anomaly.h
+++ b/edge-impulse-sdk/classifier/inferencing_engines/anomaly.h
@@ -18,7 +18,7 @@
 #ifndef _EDGE_IMPULSE_INFERENCING_ANOMALY_H_
 #define _EDGE_IMPULSE_INFERENCING_ANOMALY_H_
 
-#if (EI_CLASSIFIER_HAS_ANOMALY == 1)
+#if (EI_CLASSIFIER_HAS_ANOMALY)
 
 #include <cmath>
 #include <stdlib.h>
@@ -30,6 +30,7 @@
 #include "edge-impulse-sdk/classifier/ei_aligned_malloc.h"
 #include "edge-impulse-sdk/porting/ei_classifier_porting.h"
 #include "edge-impulse-sdk/classifier/inferencing_engines/engines.h"
+#include "edge-impulse-sdk/classifier/ei_fill_result_struct.h"
 
 #ifdef __cplusplus
 namespace {
@@ -87,9 +88,75 @@ float get_min_distance_to_cluster(float *input, size_t input_size, const ei_clas
 }
 #endif // __cplusplus
 
+
+/**
+ * Extracts the input values from the feature matrix based on the anomaly axes.
+ * @param fmatrix Feature matrix
+ * @param input_block_ids Array of block IDs to extract from the feature matrix
+ * @param input_block_ids_size Size of input_block_ids array
+ * @param block_config Anomaly block configuration
+ * @param input Array to store the extracted input values
+ * @return EI_IMPULSE_OK if successful, otherwise an error code
+ */
+EI_IMPULSE_ERROR extract_anomaly_input_values(
+    ei_feature_t *fmatrix,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
+    uint32_t anom_axes_size,
+    const uint16_t *anom_axis,
+    float *input)
+{
+    if (input_block_ids_size == 1) {
+        for (size_t ix = 0; ix < anom_axes_size; ix++) {
+            input[ix] = fmatrix[0].matrix->buffer[anom_axis[ix]];
+        }
+    }
+    else {
+#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0
+        ei::matrix_t* matrix = NULL;
+#endif
+        // tracks where we are now in the combined feature matrix
+        uint32_t global_buf_pos = 0;
+        // we add the size of passed matrix to it
+        uint32_t buf_offset = 0;
+        // current index of input feature
+        uint32_t input_pos = 0;
+
+        for (size_t i = 0; i < input_block_ids_size; i++) {
+#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0
+            size_t cur_mtx = input_block_ids[i];
+            if (!find_mtx_by_idx(fmatrix, &matrix, cur_mtx, anom_axes_size)) {
+                ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx);
+                return EI_IMPULSE_INVALID_SIZE;
+            }
+#else
+            ei::matrix_t* matrix = fmatrix[0].matrix;
+#endif
+            for (size_t ix = 0; ix < anom_axes_size; ix++) {
+                global_buf_pos = anom_axis[input_pos];
+                if (global_buf_pos <= buf_offset + (matrix->rows * matrix->cols)) {
+                    input[input_pos] = matrix->buffer[anom_axis[input_pos] - buf_offset];
+                    input_pos++;
+                if (input_pos >= anom_axes_size) { goto end; }
+                }
+                else {
+                    break;
+                }
+            }
+            buf_offset += matrix->rows * matrix->cols;
+        }
+        end:;
+    }
+    return EI_IMPULSE_OK;
+}
+
+
 EI_IMPULSE_ERROR run_kmeans_anomaly(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
     bool debug = false)
@@ -104,9 +171,8 @@ EI_IMPULSE_ERROR run_kmeans_anomaly(
         return EI_IMPULSE_OUT_OF_MEMORY;
     }
 
-    for (size_t ix = 0; ix < block_config->anom_axes_size; ix++) {
-        input[ix] = fmatrix->buffer[block_config->anom_axis[ix]];
-    }
+    extract_anomaly_input_values(fmatrix, input_block_ids, input_block_ids_size, block_config->anom_axes_size, block_config->anom_axis, input);
+
     standard_scaler(input, block_config->anom_scale, block_config->anom_mean, block_config->anom_axes_size);
     float anomaly = get_min_distance_to_cluster(
         input, block_config->anom_axes_size, block_config->anom_clusters, block_config->anom_cluster_count);
@@ -120,9 +186,7 @@ EI_IMPULSE_ERROR run_kmeans_anomaly(
     }
 
     result->timing.anomaly = anomaly_end_ms - anomaly_start_ms;
-
     result->anomaly = anomaly;
-
     ei_free(input);
 
     return EI_IMPULSE_OK;
@@ -131,7 +195,10 @@ EI_IMPULSE_ERROR run_kmeans_anomaly(
 #if (EI_CLASSIFIER_INFERENCING_ENGINE != EI_CLASSIFIER_NONE)
 EI_IMPULSE_ERROR run_gmm_anomaly(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
     bool debug = false)
@@ -139,29 +206,51 @@ EI_IMPULSE_ERROR run_gmm_anomaly(
     ei_learning_block_config_anomaly_gmm_t *block_config = (ei_learning_block_config_anomaly_gmm_t*)config_ptr;
 
     ei_learning_block_config_tflite_graph_t ei_learning_block_config_gmm = {
-    .implementation_version = 1,
-    .block_id = 0,
-    .object_detection = 0,
-    .object_detection_last_layer = EI_CLASSIFIER_LAST_LAYER_UNKNOWN,
-    .output_data_tensor = 0,
-    .output_labels_tensor = 0,
-    .output_score_tensor = 0,
-    .graph_config = block_config->graph_config
+        .implementation_version = 1,
+        .classification_mode = block_config->classification_mode,
+        .block_id = 0,
+        .object_detection = 0,
+        .object_detection_last_layer = EI_CLASSIFIER_LAST_LAYER_UNKNOWN,
+        .output_data_tensor = 0,
+        .output_labels_tensor = 0,
+        .output_score_tensor = 0,
+        .threshold = block_config->anomaly_threshold,
+        .quantized = 0,
+        .compiled = 0,
+        .graph_config = block_config->graph_config
     };
 
-    ei_impulse_result_t anomaly_result = {0};
+    ei_impulse_result_t anomaly_result = { 0 };
+
+    std::unique_ptr<ei_feature_t[]> input_ptr(new ei_feature_t[1]);
+    ei_feature_t* input = input_ptr.get();
+
     memset(&anomaly_result, 0, sizeof(ei_impulse_result_t));
 
-    ei::matrix_t features_matrix(1, block_config->anom_axes_size);
+    std::unique_ptr<ei::matrix_t> matrix_ptr(new ei::matrix_t(1, block_config->anom_axes_size));
+
+    if (block_config->classification_mode == EI_CLASSIFIER_CLASSIFICATION_MODE_VISUAL_ANOMALY) {
+        // [JJ] Here we assume that the feature extractor block is always directly before the GMM block
+        // if that changes (which I assume it will at some point, e.g. if we have a shared backbone)
+        // this will break. Would it be better if `run_nn_inference` would get pointers to the input/output
+        // matrices instead?
+        input[0].matrix = fmatrix[impulse->dsp_blocks_size + (learn_block_index - 1)].matrix;
+        input[0].blockId = fmatrix[impulse->dsp_blocks_size + (learn_block_index - 1)].blockId;
 
-    for (size_t ix = 0; ix < block_config->anom_axes_size; ix++) {
-        features_matrix.buffer[ix] = fmatrix->buffer[block_config->anom_axis[ix]];
+        input_block_ids_size = 1;
     }
+    else {
+        input[0].matrix = matrix_ptr.get();
+        input[0].blockId = 0;
 
-    EI_IMPULSE_ERROR res = run_nn_inference(impulse, &features_matrix, &anomaly_result, (void*)&ei_learning_block_config_gmm, debug);
+        extract_anomaly_input_values(fmatrix, input_block_ids, input_block_ids_size, block_config->anom_axes_size, block_config->anom_axis, input[0].matrix->buffer);
+        input_block_ids_size = 1;
+    }
+
+    EI_IMPULSE_ERROR res = run_nn_inference(impulse, input, learn_block_index, input_block_ids, input_block_ids_size, &anomaly_result, (void*)&ei_learning_block_config_gmm, debug);
     if (res != EI_IMPULSE_OK) {
-            return res;
-        }
+        return res;
+    }
 
     if (debug) {
         ei_printf("Anomaly score (time: %d ms.): ", anomaly_result.timing.classification);
@@ -171,7 +260,17 @@ EI_IMPULSE_ERROR run_gmm_anomaly(
 
     result->timing.anomaly = anomaly_result.timing.classification;
 
-    result->anomaly = anomaly_result.classification[0].value;
+    if (block_config->classification_mode == EI_CLASSIFIER_CLASSIFICATION_MODE_VISUAL_ANOMALY) {
+#if EI_CLASSIFIER_HAS_VISUAL_ANOMALY
+        result->visual_ad_grid_cells = anomaly_result.visual_ad_grid_cells;
+        result->visual_ad_count = anomaly_result.visual_ad_count;
+        result->visual_ad_result.mean_value = anomaly_result.visual_ad_result.mean_value;
+        result->visual_ad_result.max_value = anomaly_result.visual_ad_result.max_value;
+#endif // EI_CLASSIFIER_HAS_VISUAL_ANOMALY
+    }
+    else {
+        result->anomaly = anomaly_result.classification[0].value;
+    }
 
     return EI_IMPULSE_OK;
 }
diff --git a/edge-impulse-sdk/classifier/inferencing_engines/drpai.h b/edge-impulse-sdk/classifier/inferencing_engines/drpai.h
index 0eec2e4..6ecea7d 100644
--- a/edge-impulse-sdk/classifier/inferencing_engines/drpai.h
+++ b/edge-impulse-sdk/classifier/inferencing_engines/drpai.h
@@ -39,26 +39,27 @@
 #include <unistd.h>
 #include <vector>
 
+#include <model-parameters/model_metadata.h>
+
 #if ((EI_CLASSIFIER_OBJECT_DETECTION == 1) && (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI))
+// For a YOLOV5_V5_DRPAI model we ran the unsupported layers with TF
 #include <thread>
-#include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/interpreter.h"
-#include "tensorflow/lite/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/optional_debug_tools.h"
+#include "tensorflow-lite/tensorflow/lite/c/common.h"
+#include "tensorflow-lite/tensorflow/lite/interpreter.h"
+#include "tensorflow-lite/tensorflow/lite/kernels/register.h"
+#include "tensorflow-lite/tensorflow/lite/model.h"
+#include "tensorflow-lite/tensorflow/lite/optional_debug_tools.h"
 #endif
+#include "edge-impulse-sdk/tensorflow/lite/kernels/custom/tree_ensemble_classifier.h"
+#include "edge-impulse-sdk/classifier/ei_fill_result_struct.h"
+#include "edge-impulse-sdk/classifier/ei_model_types.h"
+#include "edge-impulse-sdk/classifier/ei_run_dsp.h"
+#include "edge-impulse-sdk/porting/ei_logging.h"
 
 #include <linux/drpai.h>
-
 #include <tflite-model/drpai_model.h>
 
-#include "edge-impulse-sdk/classifier/ei_run_dsp.h"
-#include "edge-impulse-sdk/porting/ei_classifier_porting.h"
-#include "edge-impulse-sdk/classifier/ei_fill_result_struct.h"
-#include "edge-impulse-sdk/classifier/ei_model_types.h"
-#include "edge-impulse-sdk/porting/ei_logging.h"
 
-#include <model-parameters/model_metadata.h>
 
 /*****************************************
  * Macro
@@ -454,10 +455,12 @@ EI_IMPULSE_ERROR drpai_close(uint32_t input_frame_size) {
 #if ((EI_CLASSIFIER_OBJECT_DETECTION == 1) && (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI))
 EI_IMPULSE_ERROR drpai_run_yolov5_postprocessing(
     const ei_impulse_t *impulse,
+    ei_learning_block_config_tflite_graph_t *block_config,
     signal_t *signal,
     ei_impulse_result_t *result,
     bool debug = false)
 {
+
     static std::unique_ptr<tflite::FlatBufferModel> model = nullptr;
     static std::unique_ptr<tflite::Interpreter> interpreter = nullptr;
 
@@ -563,7 +566,7 @@ EI_IMPULSE_ERROR drpai_run_yolov5_postprocessing(
     // }
     // printf("\n");
 
-    return fill_result_struct_f32_yolov5(impulse, result, 5, out_data, out_size);
+    return fill_result_struct_f32_yolov5(impulse, block_config, result, 5, out_data, out_size);
 }
 #endif
 
@@ -578,10 +581,13 @@ EI_IMPULSE_ERROR drpai_run_yolov5_postprocessing(
  */
 EI_IMPULSE_ERROR run_nn_inference(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
-    bool debug = false)
+    bool debug)
 {
     // dummy, not used for DRPAI
 }
@@ -598,6 +604,8 @@ EI_IMPULSE_ERROR run_nn_inference_image_quantized(
     void *config_ptr,
     bool debug = false)
 {
+    ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr;
+
     // this needs to be changed for multi-model, multi-impulse
     static bool first_run = true;
     uint64_t ctx_start_us;
@@ -674,8 +682,8 @@ EI_IMPULSE_ERROR run_nn_inference_image_quantized(
 
     EI_IMPULSE_ERROR fill_res = EI_IMPULSE_OK;
 
-    if (impulse->object_detection) {
-        switch (impulse->object_detection_last_layer) {
+    if (block_config->object_detection) {
+        switch (block_config->object_detection_last_layer) {
             case EI_CLASSIFIER_LAST_LAYER_FOMO: {
                 if (debug) {
                     ei_printf("DEBUG: raw drpai output");
@@ -689,6 +697,7 @@ EI_IMPULSE_ERROR run_nn_inference_image_quantized(
 
                 fill_res = fill_result_struct_f32_fomo(
                     impulse,
+                    block_config,
                     result,
                     drpai_output_buf,
                     impulse->fomo_output_size,
@@ -697,14 +706,15 @@ EI_IMPULSE_ERROR run_nn_inference_image_quantized(
             }
             case EI_CLASSIFIER_LAST_LAYER_SSD: {
                 ei_printf("ERR: MobileNet SSD models are not implemented for DRP-AI (%d)\n",
-                    impulse->object_detection_last_layer);
+                    block_config->object_detection_last_layer);
                 return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
             }
             case EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI: {
-                #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1
+                if (block_config->quantized == 1) {
                     ei_printf("ERR: YOLOv5 does not support quantized inference\n");
                     return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
-                #else
+                }
+                else {
                   if (debug) {
                       ei_printf("DEBUG: raw drpai output");
                       ei_printf("\n[");
@@ -716,19 +726,17 @@ EI_IMPULSE_ERROR run_nn_inference_image_quantized(
                       }
                       ei_printf("]\n");
                   }
+                }
 
 #if ((EI_CLASSIFIER_OBJECT_DETECTION == 1) && (EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI))
                   // do post processing
-                  fill_res = drpai_run_yolov5_postprocessing(impulse, signal, result, debug);
+                  fill_res = drpai_run_yolov5_postprocessing(impulse, block_config, signal, result, debug);
 #endif
-
-                #endif
-
                 break;
             }
             default: {
                 ei_printf("ERR: Unsupported object detection last layer (%d)\n",
-                    impulse->object_detection_last_layer);
+                    block_config->object_detection_last_layer);
                 return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
             }
         }
diff --git a/edge-impulse-sdk/classifier/inferencing_engines/engines.h b/edge-impulse-sdk/classifier/inferencing_engines/engines.h
index 75ee5c0..5fa4bd1 100644
--- a/edge-impulse-sdk/classifier/inferencing_engines/engines.h
+++ b/edge-impulse-sdk/classifier/inferencing_engines/engines.h
@@ -22,21 +22,30 @@
 
 EI_IMPULSE_ERROR run_kmeans_anomaly(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
     bool debug);
 
 EI_IMPULSE_ERROR run_gmm_anomaly(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
     bool debug);
 
 EI_IMPULSE_ERROR run_nn_inference(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
     bool debug);
diff --git a/edge-impulse-sdk/classifier/inferencing_engines/memryx.h b/edge-impulse-sdk/classifier/inferencing_engines/memryx.h
index 249df8d..5ce2516 100644
--- a/edge-impulse-sdk/classifier/inferencing_engines/memryx.h
+++ b/edge-impulse-sdk/classifier/inferencing_engines/memryx.h
@@ -47,10 +47,10 @@
 #if EI_CLASSIFIER_HAS_MODEL_VARIABLES == 1
 #include "model-parameters/model_variables.h"
 #endif
-/* TODO: Not sure we need it #include <memryx_model/memryx_model.h> */
+
 #include "edge-impulse-sdk/porting/ei_classifier_porting.h"
 #include "edge-impulse-sdk/classifier/ei_fill_result_struct.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/softmax.h"
+#include "tensorflow-lite/tensorflow/lite/kernels/internal/reference/softmax.h"
 #include <vector>
 #include <fstream>
 #include <sstream>
@@ -64,6 +64,9 @@
 #else
 #include "memx/memx.h"
 #endif
+/* Headers below help us bundle the DFP model with EIM in single binary */
+#include "memryx-model/memryx-model.h"
+#include "utils/model_header_utils.h"
 
 /* Result delivered by memryx simulator contains 3 fields, indexes for print */
 #define MX_SIM_RES_OUTPUTS 0
@@ -106,11 +109,16 @@ static tflite::SoftmaxParams dummy_params;
 
 static bool verbose_debug = 0;
 
-bool init_memryx(bool debug)
+bool init_memryx(bool debug, const ei_impulse_t *impulse)
 {
-    constexpr char model_file_path[] = "memryx_trained.dfp";
+    /* Unpack DFP model to file system */
+    std::string project_file_path = "/tmp/" + std::string(impulse->project_name) + "-" + std::to_string(impulse->project_id) + "-" + std::to_string(impulse->deploy_version);
+    create_project_if_not_exists(project_file_path, model_h_files, model_h_files_len);
+
+    std::string proj_model_path = project_file_path + "/memryx_trained.dfp";
+    const char * model_file_path = proj_model_path.c_str();
 #if (defined(EI_CLASSIFIER_USE_MEMRYX_HARDWARE) && (EI_CLASSIFIER_USE_MEMRYX_HARDWARE == 1))
-#warning "Trying to use hardware"
+#warning "Building EIM for use with MemryX Hardware"
     memx_status status = MEMX_STATUS_OK;
     // 1. Bind MPU device group 0 as MX3:Cascade to model 0.
     status = memx_open(model_id, group_id, MEMX_DEVICE_CASCADE);
@@ -184,20 +192,26 @@ bool init_memryx(bool debug)
 #if (defined(EI_CLASSIFIER_USE_MEMRYX_HARDWARE) && (EI_CLASSIFIER_USE_MEMRYX_HARDWARE == 1))
 EI_IMPULSE_ERROR run_nn_inference(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
     bool debug = false)
 {
+    ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr;
+
     memx_status status = MEMX_STATUS_OK;
     int32_t ifmap_height, ifmap_width, ifmap_channel_number, ifmap_format;
     int32_t ofmap_height, ofmap_width, ofmap_channel_number, ofmap_format;
+    int32_t z;
     uint64_t ctx_start_us = 0;
     uint64_t ctx_end_us = 0;
 
     // check if we've initialized the interpreter and device?
     if (memryx_initialized == false) {
-        if(init_memryx(debug) == false) {
+        if(init_memryx(debug, impulse) == false) {
             return EI_IMPULSE_MEMRYX_ERROR;
         }
         memryx_initialized = true;
@@ -205,13 +219,13 @@ EI_IMPULSE_ERROR run_nn_inference(
 
     /* 4. get input shape - Not needed during runtime, available only for debugging */
     if(verbose_debug) {
-        status = memx_get_ifmap_size(model_id, flow_id, &ifmap_height, &ifmap_width, &ifmap_channel_number, &ifmap_format);
+        status = memx_get_ifmap_size(model_id, flow_id, &ifmap_height, &ifmap_width, &z, &ifmap_channel_number, &ifmap_format);
         ei_printf("status = %d, ifmap shape = (%d, %d, %d), format = %d\n",
                    status, ifmap_height, ifmap_width, ifmap_channel_number, ifmap_format);
     }
 
     // 5. get output shape
-    status = memx_get_ofmap_size(model_id, flow_id, &ofmap_height, &ofmap_width, &ofmap_channel_number, &ofmap_format);
+    status = memx_get_ofmap_size(model_id, flow_id, &ofmap_height, &ofmap_width, &z, &ofmap_channel_number, &ofmap_format);
     if(debug) {
         ei_printf("status = %d, ofmap shape = (%d, %d, %d), format = %d\n",
                   status, ofmap_height, ofmap_width, ofmap_channel_number, ofmap_format);
@@ -222,11 +236,36 @@ EI_IMPULSE_ERROR run_nn_inference(
 
     // 6. Prepare input and output buffers
     float* ofmap = new float [ofmap_width * ofmap_height * ofmap_channel_number];
-    float* ifmap = (float*)fmatrix->buffer;
+
+#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0
+    size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size;
+    ei::matrix_t* matrix = NULL;
+
+    ei::matrix_t combined_matrix(1, impulse->nn_input_frame_size);
+    uint32_t buf_pos = 0;
+
+    for (size_t i = 0; i < input_block_ids_size; i++) {
+        size_t cur_mtx = input_block_ids[i];
+
+        if (!find_mtx_by_idx(fmatrix, &matrix, cur_mtx, mtx_size)) {
+            ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx);
+            return EI_IMPULSE_INVALID_SIZE;
+        }
+
+        for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) {
+            combined_matrix.buffer[buf_pos++] = matrix->buffer[ix];
+        }
+    }
+    matrix = &combined_matrix;
+#else
+    ei::matrix_t* matrix = fmatrix[0].matrix;
+#endif
+
+    float* ifmap = (float*)matrix->buffer;
 
     if(verbose_debug) {
         for(int fidx = 0; fidx < (ofmap_width*ofmap_height); fidx++) {
-            ei_printf("%f\t", fmatrix->buffer[fidx]);
+            ei_printf("%f\t", matrix->buffer[fidx]);
             if(!(fidx % ofmap_width)) ei_printf("\n");
         }
     }
@@ -257,34 +296,36 @@ EI_IMPULSE_ERROR run_nn_inference(
     }
 
     // init softmax shape
-    std::vector<size_t> output_shape = {12,12,2};
+    std::vector<size_t> output_shape = {static_cast<size_t>(ofmap_height),static_cast<size_t>(ofmap_width),
+                                        static_cast<size_t>(ofmap_channel_number)};
     softmax_shape.BuildFrom(output_shape);
     // dumy beta parameter for softmax purposes
     dummy_params.beta = 1;
 
-    // apply softmax, becuase Akida is not supporting this operation
+    // apply softmax, becuase MX3 does not support this operation
     tflite::reference_ops::Softmax(dummy_params, softmax_shape, ofmap, softmax_shape, ofmap);
 
     // handle inference outputs
-    if (impulse->object_detection) {
-        switch (impulse->object_detection_last_layer) {
+    if (block_config->object_detection) {
+        switch (block_config->object_detection_last_layer) {
             case EI_CLASSIFIER_LAST_LAYER_FOMO: {
                 ei_printf("FOMO executed on Memryx\n");
                 fill_result_struct_f32_fomo(
                     impulse,
+                    block_config,
                     result,
                     ofmap,
-                    impulse->input_width / 8,
-                    impulse->input_height / 8);
+                    impulse->fomo_output_size,
+                    impulse->fomo_output_size);
                 break;
             }
             case EI_CLASSIFIER_LAST_LAYER_SSD: {
-                ei_printf("Mobilenet SSD executed on Memryx\n");
+                ei_printf("Mobilenet SSD is not implemented for Edge Impulse MemryX engine, please contact Edge Impulse Support\n");
                 break;
             }
             default: {
                 ei_printf("ERR: Unsupported object detection last layer (%d)\n",
-                    impulse->object_detection_last_layer);
+                    block_config->object_detection_last_layer);
                 return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
             }
         }
@@ -297,26 +338,31 @@ EI_IMPULSE_ERROR run_nn_inference(
     // Device is closed only at EIM exit, therefore we do not use memx_close()
     return EI_IMPULSE_OK;
 }
+
 #elif (defined(EI_CLASSIFIER_USE_MEMRYX_SOFTWARE) && (EI_CLASSIFIER_USE_MEMRYX_SOFTWARE == 1))
 EI_IMPULSE_ERROR run_nn_inference(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* inputBlockIds,
     ei_impulse_result_t *result,
     void *config_ptr,
     bool debug = false)
 {
+    ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr;
+
     // init Python embedded interpreter (should be called once!)
     static py::scoped_interpreter guard{};
 
     // check if we've initialized the interpreter and device?
     if (memryx_initialized == false) {
-        if(init_memryx(debug) == false) {
+        if(init_memryx(debug, impulse) == false) {
             return EI_IMPULSE_MEMRYX_ERROR;
         }
         memryx_initialized = true;
     }
 
-    std::vector<size_t> input_shape = {1,EI_CLASSIFIER_INPUT_WIDTH,EI_CLASSIFIER_INPUT_HEIGHT,3};
+    std::vector<size_t> input_shape = {1, impulse->input_width, impulse->input_height, 3};
     py::array_t<float> input_data(input_shape); // = zeroes(input_shape, 0);
 
     printf("impulse->w=%d h=%d\n", impulse->input_width, impulse->input_height);
@@ -328,10 +374,24 @@ EI_IMPULSE_ERROR run_nn_inference(
      * For Audio shape is (width, height, 1) - spectrogram
      */
     auto r = input_data.mutable_unchecked<4>();
-    for (py::ssize_t x = 0; x < r.shape(1); x++) {
-        for (py::ssize_t y = 0; y < r.shape(2); y++) {
-            for(py::ssize_t z = 0; z < r.shape(3); z++) {
-                r(0, x, y, z) = (float)(fmatrix->buffer[x * r.shape(2) * r.shape(3) + y * r.shape(3) + z]);
+
+    for (size_t i = 0; i < input_block_ids_size; i++) {
+        uint16_t cur_mtx = input_block_ids[i];
+#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0
+        ei::matrix_t* matrix = NULL;
+
+        if (!find_mtx_by_idx(fmatrix, &matrix, cur_mtx, mtx_size)) {
+            ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx);
+            return EI_IMPULSE_INVALID_SIZE;
+        }
+#else
+        ei::matrix_t* matrix = fmatrix[0].matrix;
+#endif
+        for (py::ssize_t x = 0; x < r.shape(1); x++) {
+            for (py::ssize_t y = 0; y < r.shape(2); y++) {
+                for(py::ssize_t z = 0; z < r.shape(3); z++) {
+                r(0, x, y, z) = (float)(fmatrix.buffer[x * r.shape(2) * r.shape(3) + y * r.shape(3) + z]);
+                }
             }
         }
     }
@@ -358,7 +418,7 @@ EI_IMPULSE_ERROR run_nn_inference(
 
     potentials = outputs.squeeze().cast<py::array_t<float>>();
 
-    if (impulse->object_detection == false) {
+    if (block_config->object_detection == false) {
         potentials_v = outputs.squeeze().cast<std::vector<float>>();
     }
     else {
@@ -377,16 +437,17 @@ EI_IMPULSE_ERROR run_nn_inference(
         ei_printf("Memryx raw output:\n%s\n", ret_str.c_str());
     }
 
-    if (impulse->object_detection) {
-        switch (impulse->object_detection_last_layer) {
+    if (block_config->object_detection) {
+        switch (block_config->object_detection_last_layer) {
             case EI_CLASSIFIER_LAST_LAYER_FOMO: {
                 ei_printf("FOMO executed on Memryx\n");
                 fill_result_struct_f32_fomo(
                     impulse,
+                    block_config,
                     result,
                     potentials_v.data(),
-                    impulse->input_width / 8,
-                    impulse->input_height / 8);
+                    impulse->fomo_output_size,
+                    impulse->fomo_output_size);
                 break;
             }
             case EI_CLASSIFIER_LAST_LAYER_SSD: {
diff --git a/edge-impulse-sdk/classifier/inferencing_engines/onnx_tidl.h b/edge-impulse-sdk/classifier/inferencing_engines/onnx_tidl.h
index 0a422dd..acc3e12 100644
--- a/edge-impulse-sdk/classifier/inferencing_engines/onnx_tidl.h
+++ b/edge-impulse-sdk/classifier/inferencing_engines/onnx_tidl.h
@@ -347,6 +347,7 @@ static EI_IMPULSE_ERROR inference_onnx_setup(
  * @return  EI_IMPULSE_OK if successful
  */
 static EI_IMPULSE_ERROR inference_onnx_run(const ei_impulse_t *impulse,
+    void *config_ptr,
     uint64_t ctx_start_us,
     std::vector<Ort::Value>* input_tensors,
     std::vector<Ort::Value>* output_tensors,
@@ -356,6 +357,8 @@ static EI_IMPULSE_ERROR inference_onnx_run(const ei_impulse_t *impulse,
     ei_impulse_result_t *result,
     bool debug) {
 
+    ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr;
+
     session->Run(*run_options, *binding);
 
     uint64_t ctx_end_us = ei_read_timer_us();
@@ -381,14 +384,14 @@ static EI_IMPULSE_ERROR inference_onnx_run(const ei_impulse_t *impulse,
     EI_IMPULSE_ERROR fill_res = EI_IMPULSE_OK;
 
     // NOTE: for now only yolox object detection supported
-    if (impulse->object_detection) {
-        switch (impulse->object_detection_last_layer) {
+    if (block_config->object_detection) {
+        switch (block_config->object_detection_last_layer) {
             case EI_CLASSIFIER_LAST_LAYER_YOLOX: {
-                #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1
+                if (block_config->quantized == 1) {
                     ei_printf("ERR: YOLOX does not support quantized inference\n");
                     return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
-                #else
-
+                }
+                else {
                     if (debug) {
                         ei_printf("YOLOX OUTPUT (%d ms.): ", result->timing.classification);
                         for (size_t ix = 0; ix < output_tensor_features_count; ix++) {
@@ -399,21 +402,22 @@ static EI_IMPULSE_ERROR inference_onnx_run(const ei_impulse_t *impulse,
                     }
                     fill_res = fill_result_struct_f32_yolox_detect(
                         impulse,
+                        block_config,
                         result,
                         (float*)out_data,
                         output_tensor_features_count);
-                #endif
+                }
                 break;
             }
             default: {
                 ei_printf("ERR: Unsupported object detection last layer (%d)\n",
-                    impulse->object_detection_last_layer);
+                    block_config->object_detection_last_layer);
                 break;
             }
         }
     }
     else {
-#if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1
+#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1
 
     switch (output_tensor_type) {
         case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: {
@@ -474,7 +478,10 @@ static EI_IMPULSE_ERROR inference_onnx_run(const ei_impulse_t *impulse,
  */
 EI_IMPULSE_ERROR run_nn_inference(
     const ei_impulse_t *impulse,
-    ei::matrix_t *afmatrix,
+    ei_feature_t *afmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
     bool debug = false)
@@ -519,12 +526,14 @@ EI_IMPULSE_ERROR run_nn_inference(
     size_t height = impulse->input_height;
     size_t width = impulse->input_width;
 
+    ei::matrix_t* matrix = afmatrix[0].matrix;
+
     int dest_ix = 0;
     for (size_t c=0; c < channels; c++) {
         for (size_t h=0; h < height; h++) {
             for (size_t w=0; w < width; w++) {
                 uint32_t src_ix = channels * width * h + w*channels + c;
-                fmatrix.buffer[dest_ix++] = afmatrix->buffer[src_ix];
+                fmatrix.buffer[dest_ix++] = matrix->buffer[src_ix];
             }
         }
     }
@@ -544,6 +553,7 @@ EI_IMPULSE_ERROR run_nn_inference(
 
     ctx_start_us = ei_read_timer_us();
     EI_IMPULSE_ERROR run_res = inference_onnx_run(impulse,
+        config_ptr,
         ctx_start_us,
         &input_tensors,
         &output_tensors,
@@ -559,7 +569,7 @@ EI_IMPULSE_ERROR run_nn_inference(
     return EI_IMPULSE_OK;
 }
 
-#if EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1
+#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1
 /**
  * Special function to run the classifier on images, only works on TFLite models (either interpreter or EON or for tensaiflow)
  * that allocates a lot less memory by quantizing in place. This only works if 'can_run_classifier_image_quantized'
@@ -673,6 +683,7 @@ EI_IMPULSE_ERROR run_nn_inference_image_quantized(
 
     ctx_start_us = ei_read_timer_us();
     EI_IMPULSE_ERROR run_res = inference_onnx_run(impulse,
+        config_ptr,
         ctx_start_us,
         &input_tensors,
         &output_tensors,
@@ -687,7 +698,7 @@ EI_IMPULSE_ERROR run_nn_inference_image_quantized(
 
     return EI_IMPULSE_OK;
 }
-#endif // EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1
+#endif // EI_CLASSIFIER_QUANTIZATION_ENABLED == 1
 
 #endif // #if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_ONNX_TIDL) && (EI_CLASSIFIER_COMPILED != 1)
 #endif // _EI_CLASSIFIER_INFERENCING_ENGINE_ONNX_TIDL_H_
diff --git a/edge-impulse-sdk/classifier/inferencing_engines/tensaiflow.h b/edge-impulse-sdk/classifier/inferencing_engines/tensaiflow.h
index 83ad719..dd6caf6 100644
--- a/edge-impulse-sdk/classifier/inferencing_engines/tensaiflow.h
+++ b/edge-impulse-sdk/classifier/inferencing_engines/tensaiflow.h
@@ -70,7 +70,10 @@ extern "C" void post_process(const void *impulse_arg, int8_t *out_buf_0, int8_t
  */
 EI_IMPULSE_ERROR run_nn_inference(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
     bool debug = false)
@@ -78,7 +81,7 @@ EI_IMPULSE_ERROR run_nn_inference(
     ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr;
     ei_config_tensaiflow_graph_t *graph_config = (ei_config_tensaiflow_graph_t*)block_config->graph_config;
 
-    if (impulse->object_detection) {
+    if (block_config->object_detection) {
         ei_printf("ERR: Object detection models are not supported with TensaiFlow\n");
         return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
     }
@@ -135,8 +138,15 @@ EI_IMPULSE_ERROR run_nn_inference_image_quantized(
     processed_features = (int8_t *) features_matrix.buffer;
 
     // run DSP process and quantize automatically
-    int ret = extract_image_features_quantized(signal, &features_matrix, impulse->dsp_blocks[0].config, graph_config->input_scale, graph_config->input_zeropoint,
-        impulse->frequency, impulse->learning_blocks[0].image_scaling);
+    int ret = extract_image_features_quantized(
+        signal,
+        &features_matrix,
+        impulse->dsp_blocks[0].config,
+        graph_config->input_scale,
+        graph_config->input_zeropoint,
+        impulse->frequency,
+        impulse->learning_blocks[0].image_scaling);
+
     if (ret != EIDSP_OK) {
         ei_printf("ERR: Failed to run DSP process (%d)\n", ret);
         return EI_IMPULSE_DSP_ERROR;
@@ -168,41 +178,53 @@ EI_IMPULSE_ERROR run_nn_inference_image_quantized(
 
     EI_IMPULSE_ERROR fill_res = EI_IMPULSE_OK;
 
-    if (impulse->object_detection) {
-        switch (impulse->object_detection_last_layer) {
+    if (block_config->object_detection) {
+        switch (block_config->object_detection_last_layer) {
             case EI_CLASSIFIER_LAST_LAYER_FOMO: {
-                #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1
-                    fill_res = fill_result_struct_i8_fomo(impulse, result, infer_result,
-                        graph_config->output_zeropoint, graph_config->output_scale,
-                        impulse->fomo_output_size, impulse->fomo_output_size);
-                #else
+                if (block_config->quantized == 1) {
+                    fill_res = fill_result_struct_i8_fomo(
+                        impulse,
+                        block_config,
+                        result,
+                        infer_result,
+                        graph_config->output_zeropoint,
+                        graph_config->output_scale,
+                        impulse->fomo_output_size,
+                        impulse->fomo_output_size);
+                }
+                else {
                     ei_printf("ERR: TensaiFlow does not support float32 inference\n");
                     return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
-                #endif
+                }
             break;
             }
             default: {
                 ei_printf("ERR: Unsupported object detection last layer (%d)\n",
-                    impulse->object_detection_last_layer);
+                    block_config->object_detection_last_layer);
                 return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
             }
         }
     }
     else {
-        #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1
-            fill_res = fill_result_struct_i8(impulse, result, infer_result,
-                graph_config->output_zeropoint, graph_config->output_scale, debug);
-        #else
+        if (block_config->quantized == 1) {
+            fill_res = fill_result_struct_i8(
+                impulse,
+                result,
+                infer_result,
+                graph_config->output_zeropoint,
+                graph_config->output_scale,
+                debug);
+        }
+        else {
             ei_printf("ERR: TensaiFlow does not support float32 inference\n");
             return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
-        #endif
+        }
     }
 
     if (fill_res != EI_IMPULSE_OK) {
         return fill_res;
     }
 
-
     result->timing.classification_us = ei_read_timer_us() - ctx_start_us;
     result->timing.classification = (int)(result->timing.classification_us / 1000);
     return EI_IMPULSE_OK;
diff --git a/edge-impulse-sdk/classifier/inferencing_engines/tensorrt.h b/edge-impulse-sdk/classifier/inferencing_engines/tensorrt.h
index 0575ad1..d9fbb29 100644
--- a/edge-impulse-sdk/classifier/inferencing_engines/tensorrt.h
+++ b/edge-impulse-sdk/classifier/inferencing_engines/tensorrt.h
@@ -25,9 +25,20 @@
 #include "edge-impulse-sdk/porting/ei_classifier_porting.h"
 #include "edge-impulse-sdk/classifier/ei_fill_result_struct.h"
 
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <string>
+#include <filesystem>
 #include <stdlib.h>
 #include "tflite/linux-jetson-nano/libeitrt.h"
 
+#if __APPLE__
+#include <mach-o/dyld.h>
+#else
+#include <linux/limits.h>
+#endif
+
 EiTrt *ei_trt_handle = NULL;
 
 inline bool file_exists(char *model_file_name)
@@ -52,7 +63,10 @@ inline bool file_exists(char *model_file_name)
  */
 EI_IMPULSE_ERROR run_nn_inference(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
     bool debug = false)
@@ -60,62 +74,103 @@ EI_IMPULSE_ERROR run_nn_inference(
     ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr;
     ei_config_tflite_graph_t *graph_config = (ei_config_tflite_graph_t*)block_config->graph_config;
 
-    #if EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1
+    #if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1
     #error "TensorRT requires an unquantized network"
     #endif
 
-    static char model_file_name[128];
-    snprintf(
-        model_file_name,
-        128,
-        "/tmp/%s-%d-%d.engine",
-        impulse->project_name,
-        impulse->project_id,
-        impulse->deploy_version);
+    static char current_exe_path[PATH_MAX] = { 0 };
+
+#if __APPLE__
+    uint32_t len = PATH_MAX;
+    if (_NSGetExecutablePath(current_exe_path, &len) != 0) {
+        current_exe_path[0] = '\0'; // buffer too small
+    }
+    else {
+        // resolve symlinks, ., .. if possible
+        char *canonical_path = realpath(current_exe_path, NULL);
+        if (canonical_path != NULL)
+        {
+            strncpy(current_exe_path, canonical_path, len);
+            free(canonical_path);
+        }
+    }
+#else
+    int readlink_res = readlink("/proc/self/exe", current_exe_path, PATH_MAX);
+    if (readlink_res < 0) {
+        printf("readlink_res = %d\n", readlink_res);
+        current_exe_path[0] = '\0'; // failed to find location
+    }
+#endif
+
+    static char model_file_name[PATH_MAX];
+
+    if (strlen(current_exe_path) == 0) {
+        // could not determine current exe path, use /tmp for the engine file
+        snprintf(
+            model_file_name,
+            PATH_MAX,
+            "/tmp/ei-%d-%d.engine",
+            impulse->project_id,
+            impulse->deploy_version);
+    }
+    else {
+        std::filesystem::path p(current_exe_path);
+        snprintf(
+            model_file_name,
+            PATH_MAX,
+            "%s/%s-project%d-v%d.engine",
+            p.parent_path().c_str(),
+            p.stem().c_str(),
+            impulse->project_id,
+            impulse->deploy_version);
+    }
+
+    static bool first_run = true;
 
-    static bool first_run = !file_exists(model_file_name);
     if (first_run) {
-        ei_printf("INFO: Model file '%s' does not exist, creating now. \n", model_file_name);
 
-        FILE *file = fopen(model_file_name, "w");
-        if (!file) {
-            ei_printf("ERR: TensorRT init failed to open '%s'\n", model_file_name);
-            return EI_IMPULSE_TENSORRT_INIT_FAILED;
-        }
+        bool fexists = file_exists(model_file_name);
+        if (!fexists) {
+            ei_printf("INFO: Model file '%s' does not exist, creating...\n", model_file_name);
 
-        if (fwrite(graph_config->model, graph_config->model_size, 1, file) != 1) {
-            ei_printf("ERR: TensorRT init fwrite failed.\n");
-            return EI_IMPULSE_TENSORRT_INIT_FAILED;
-        }
+            FILE *file = fopen(model_file_name, "w");
+            if (!file) {
+                ei_printf("ERR: TensorRT init failed to open '%s'\n", model_file_name);
+                return EI_IMPULSE_TENSORRT_INIT_FAILED;
+            }
+
+            if (fwrite(graph_config->model, graph_config->model_size, 1, file) != 1) {
+                ei_printf("ERR: TensorRT init fwrite failed.\n");
+                return EI_IMPULSE_TENSORRT_INIT_FAILED;
+            }
 
-        if (fclose(file) != 0) {
-            ei_printf("ERR: TensorRT init fclose failed.\n");
-            return EI_IMPULSE_TENSORRT_INIT_FAILED;
+            if (fclose(file) != 0) {
+                ei_printf("ERR: TensorRT init fclose failed.\n");
+                return EI_IMPULSE_TENSORRT_INIT_FAILED;
+            }
         }
+
+        first_run = false;
     }
 
     uint32_t out_data_size = 0;
 
-    if (impulse->object_detection) {
-        switch (impulse->object_detection_last_layer) {
-            case EI_CLASSIFIER_LAST_LAYER_FOMO: {
-                out_data_size = impulse->tflite_output_features_count;
-                break;
-            }
-            case EI_CLASSIFIER_LAST_LAYER_SSD: {
-                ei_printf("ERR: SSD models are not supported using TensorRT \n");
-                return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
-                break;
-            }
+    if (block_config->object_detection) {
+        switch (block_config->object_detection_last_layer) {
+            case EI_CLASSIFIER_LAST_LAYER_TAO_SSD:
+            case EI_CLASSIFIER_LAST_LAYER_TAO_RETINANET:
+            case EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV3:
+            case EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV4:
+            case EI_CLASSIFIER_LAST_LAYER_FOMO:
             case EI_CLASSIFIER_LAST_LAYER_YOLOV5:
             case EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI: {
-                ei_printf("ERR: YOLOv5 models are not supported using TensorRT \n");
-                return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
+                out_data_size = impulse->tflite_output_features_count;
+                break;
             }
             default: {
                 ei_printf(
                     "ERR: Unsupported object detection last layer (%d)\n",
-                    impulse->object_detection_last_layer);
+                    block_config->object_detection_last_layer);
                 return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
             }
         }
@@ -134,9 +189,33 @@ EI_IMPULSE_ERROR run_nn_inference(
         ei_trt_handle = libeitrt::create_EiTrt(model_file_name, debug);
     }
 
+#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0
+    size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size;
+    ei::matrix_t* matrix = NULL;
+
+    ei::matrix_t combined_matrix(1, impulse->nn_input_frame_size);
+    uint32_t buf_pos = 0;
+
+    for (size_t i = 0; i < input_block_ids_size; i++) {
+        size_t cur_mtx = input_block_ids[i];
+
+        if (!find_mtx_by_idx(fmatrix, &matrix, cur_mtx, mtx_size)) {
+            ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx);
+            return EI_IMPULSE_INVALID_SIZE;
+        }
+
+        for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) {
+            combined_matrix.buffer[buf_pos++] = matrix->buffer[ix];
+        }
+    }
+    matrix = &combined_matrix;
+#else
+    ei::matrix_t* matrix = fmatrix[0].matrix;
+#endif
+
     uint64_t ctx_start_us = ei_read_timer_us();
 
-    libeitrt::infer(ei_trt_handle, fmatrix->buffer, out_data, out_data_size);
+    libeitrt::infer(ei_trt_handle, matrix->buffer, out_data, out_data_size);
 
     uint64_t ctx_end_us = ei_read_timer_us();
 
@@ -145,33 +224,67 @@ EI_IMPULSE_ERROR run_nn_inference(
 
     EI_IMPULSE_ERROR fill_res = EI_IMPULSE_OK;
 
-    if (impulse->object_detection) {
-        switch (impulse->object_detection_last_layer) {
-        case EI_CLASSIFIER_LAST_LAYER_FOMO: {
-            fill_res = fill_result_struct_f32_fomo(
-                impulse,
-                result,
-                out_data,
-                impulse->fomo_output_size,
-                impulse->fomo_output_size);
-            break;
-        }
-        case EI_CLASSIFIER_LAST_LAYER_SSD: {
-            ei_printf("ERR: SSD models are not supported using TensorRT \n");
-            return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
-            break;
-        }
-        case EI_CLASSIFIER_LAST_LAYER_YOLOV5:
-        case EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI: {
-            ei_printf("ERR: YOLOv5 models are not supported using TensorRT \n");
-            return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
-        }
-        default: {
-            ei_printf(
-                "ERR: Unsupported object detection last layer (%d)\n",
-                impulse->object_detection_last_layer);
-            return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
-        }
+    if (block_config->object_detection) {
+        switch (block_config->object_detection_last_layer) {
+            case EI_CLASSIFIER_LAST_LAYER_FOMO: {
+                fill_res = fill_result_struct_f32_fomo(
+                    impulse,
+                    block_config,
+                    result,
+                    out_data,
+                    impulse->fomo_output_size,
+                    impulse->fomo_output_size);
+                break;
+            }
+            case EI_CLASSIFIER_LAST_LAYER_YOLOV5:
+            case EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI: {
+                int version = block_config->object_detection_last_layer == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI ?
+                    5 : 6;
+                fill_res = fill_result_struct_f32_yolov5(
+                    impulse,
+                    block_config,
+                    result,
+                    version,
+                    out_data,
+                    impulse->tflite_output_features_count);
+                break;
+            }
+            case EI_CLASSIFIER_LAST_LAYER_TAO_SSD:
+            case EI_CLASSIFIER_LAST_LAYER_TAO_RETINANET: {
+                fill_res = fill_result_struct_f32_tao_decode_detections(
+                    impulse,
+                    block_config,
+                    result,
+                    out_data,
+                    impulse->tflite_output_features_count,
+                    debug);
+                break;
+            }
+            case EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV3:
+                fill_res = fill_result_struct_f32_tao_yolov3(
+                    impulse,
+                    block_config,
+                    result,
+                    out_data,
+                    impulse->tflite_output_features_count,
+                    debug);
+                break;
+            case EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV4: {
+                fill_res = fill_result_struct_f32_tao_yolov4(
+                    impulse,
+                    block_config,
+                    result,
+                    out_data,
+                    impulse->tflite_output_features_count,
+                    debug);
+                break;
+            }
+            default: {
+                ei_printf(
+                    "ERR: Unsupported object detection last layer (%d)\n",
+                    block_config->object_detection_last_layer);
+                return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
+            }
         }
     }
     else {
@@ -188,7 +301,7 @@ EI_IMPULSE_ERROR run_nn_inference(
 }
 
 /**
- * Special function to run the classifier on images, only works on TFLite models (either interpreter or EON or for tensaiflow)
+ * Special function to run the classifier on images for quantized models
  * that allocates a lot less memory by quantizing in place. This only works if 'can_run_classifier_image_quantized'
  * returns EI_IMPULSE_OK.
  */
diff --git a/edge-impulse-sdk/classifier/inferencing_engines/tflite_eon.h b/edge-impulse-sdk/classifier/inferencing_engines/tflite_eon.h
index f32e86d..c1053e0 100644
--- a/edge-impulse-sdk/classifier/inferencing_engines/tflite_eon.h
+++ b/edge-impulse-sdk/classifier/inferencing_engines/tflite_eon.h
@@ -53,7 +53,7 @@ static EI_IMPULSE_ERROR inference_tflite_setup(
 
     TfLiteStatus init_status = graph_config->model_init(ei_aligned_calloc);
     if (init_status != kTfLiteOk) {
-        ei_printf("Failed to allocate TFLite arena (error code %d)\n", init_status);
+        ei_printf("Failed to initialize the model (error code %d)\n", init_status);
         return EI_IMPULSE_TFLITE_ARENA_ALLOC_FAILED;
     }
 
@@ -96,7 +96,7 @@ static EI_IMPULSE_ERROR inference_tflite_setup(
  */
 static EI_IMPULSE_ERROR inference_tflite_run(
     const ei_impulse_t *impulse,
-    ei_config_tflite_eon_graph_t *config,
+    ei_learning_block_config_tflite_graph_t *block_config,
     uint64_t ctx_start_us,
     TfLiteTensor* output,
     TfLiteTensor* labels_tensor,
@@ -105,7 +105,9 @@ static EI_IMPULSE_ERROR inference_tflite_run(
     ei_impulse_result_t *result,
     bool debug) {
 
-    if(config->model_invoke() != kTfLiteOk) {
+    ei_config_tflite_eon_graph_t *graph_config = (ei_config_tflite_eon_graph_t*)block_config->graph_config;
+
+    if (graph_config->model_invoke() != kTfLiteOk) {
         return EI_IMPULSE_TFLITE_ERROR;
     }
 
@@ -120,9 +122,7 @@ static EI_IMPULSE_ERROR inference_tflite_run(
     }
 
     EI_IMPULSE_ERROR fill_res = fill_result_struct_from_output_tensor_tflite(
-        impulse, output, labels_tensor, scores_tensor, result, debug);
-
-    config->model_reset(ei_aligned_free);
+        impulse, block_config, output, labels_tensor, scores_tensor, result, debug);
 
     if (fill_res != EI_IMPULSE_OK) {
         return fill_res;
@@ -203,7 +203,10 @@ EI_IMPULSE_ERROR run_nn_inference_from_dsp(
  */
 EI_IMPULSE_ERROR run_nn_inference(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
     bool debug = false)
@@ -234,20 +237,30 @@ EI_IMPULSE_ERROR run_nn_inference(
 
     uint8_t* tensor_arena = static_cast<uint8_t*>(p_tensor_arena.get());
 
-    auto input_res = fill_input_tensor_from_matrix(fmatrix, &input);
+    size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size;
+    auto input_res = fill_input_tensor_from_matrix(fmatrix, &input, input_block_ids, input_block_ids_size, mtx_size);
     if (input_res != EI_IMPULSE_OK) {
         return input_res;
     }
 
     EI_IMPULSE_ERROR run_res = inference_tflite_run(
         impulse,
-        graph_config,
+        block_config,
         ctx_start_us,
         &output,
         &output_labels,
         &output_scores,
         tensor_arena, result, debug);
 
+    if (result->copy_output) {
+        auto output_res = fill_output_matrix_from_tensor(&output, fmatrix[impulse->dsp_blocks_size + learn_block_index].matrix);
+        if (output_res != EI_IMPULSE_OK) {
+            return output_res;
+        }
+    }
+
+    graph_config->model_reset(ei_aligned_free);
+
     result->timing.classification_us = ei_read_timer_us() - ctx_start_us;
 
     if (run_res != EI_IMPULSE_OK) {
@@ -257,7 +270,7 @@ EI_IMPULSE_ERROR run_nn_inference(
     return EI_IMPULSE_OK;
 }
 
-#if EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1
+#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1
 /**
  * Special function to run the classifier on images, only works on TFLite models (either interpreter or EON or for tensaiflow)
  * that allocates a lot less memory by quantizing in place. This only works if 'can_run_classifier_image_quantized'
@@ -333,7 +346,7 @@ EI_IMPULSE_ERROR run_nn_inference_image_quantized(
 
     EI_IMPULSE_ERROR run_res = inference_tflite_run(
         impulse,
-        graph_config,
+        block_config,
         ctx_start_us,
         &output,
         &output_labels,
@@ -341,6 +354,9 @@ EI_IMPULSE_ERROR run_nn_inference_image_quantized(
         static_cast<uint8_t*>(p_tensor_arena.get()),
         result,
         debug);
+
+    graph_config->model_reset(ei_aligned_free);
+
     if (run_res != EI_IMPULSE_OK) {
         return run_res;
     }
@@ -349,7 +365,7 @@ EI_IMPULSE_ERROR run_nn_inference_image_quantized(
 
     return EI_IMPULSE_OK;
 }
-#endif // EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1
+#endif // EI_CLASSIFIER_QUANTIZATION_ENABLED == 1
 
 __attribute__((unused)) int extract_tflite_eon_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float frequency) {
     ei_dsp_config_tflite_eon_t *dsp_config = (ei_dsp_config_tflite_eon_t*)config_ptr;
@@ -365,12 +381,16 @@ __attribute__((unused)) int extract_tflite_eon_features(signal_t *signal, matrix
 
     ei_learning_block_config_tflite_graph_t ei_learning_block_config = {
         .implementation_version = 1,
+        .classification_mode = EI_CLASSIFIER_CLASSIFICATION_MODE_DSP,
         .block_id = dsp_config->block_id,
         .object_detection = false,
         .object_detection_last_layer = EI_CLASSIFIER_LAST_LAYER_UNKNOWN,
         .output_data_tensor = 0,
         .output_labels_tensor = 255,
         .output_score_tensor = 255,
+        .threshold = 0,
+        .quantized = 0,
+        .compiled = 1,
         .graph_config = &ei_config_tflite_graph_0
     };
 
diff --git a/edge-impulse-sdk/classifier/inferencing_engines/tflite_full.h b/edge-impulse-sdk/classifier/inferencing_engines/tflite_full.h
index cfd4437..e1f0d42 100644
--- a/edge-impulse-sdk/classifier/inferencing_engines/tflite_full.h
+++ b/edge-impulse-sdk/classifier/inferencing_engines/tflite_full.h
@@ -21,6 +21,7 @@
 #if (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE_FULL)
 
 #include "model-parameters/model_metadata.h"
+#include "tflite-model/trained_model_ops_define.h"
 
 #include <thread>
 #include "tensorflow-lite/tensorflow/lite/c/common.h"
@@ -28,7 +29,7 @@
 #include "tensorflow-lite/tensorflow/lite/kernels/register.h"
 #include "tensorflow-lite/tensorflow/lite/model.h"
 #include "tensorflow-lite/tensorflow/lite/optional_debug_tools.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/custom/tree_ensemble_classifier.h"
 #include "edge-impulse-sdk/classifier/ei_fill_result_struct.h"
 #include "edge-impulse-sdk/classifier/ei_model_types.h"
 #include "edge-impulse-sdk/classifier/inferencing_engines/tflite_helper.h"
@@ -137,7 +138,10 @@ extern "C" EI_IMPULSE_ERROR run_nn_inference_from_dsp(
 
 EI_IMPULSE_ERROR run_nn_inference(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
     bool debug = false)
@@ -160,7 +164,8 @@ EI_IMPULSE_ERROR run_nn_inference(
         return EI_IMPULSE_OUTPUT_TENSOR_WAS_NULL;
     }
 
-    auto input_res = fill_input_tensor_from_matrix(fmatrix, input);
+    size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size;
+    auto input_res = fill_input_tensor_from_matrix(fmatrix, input, input_block_ids, input_block_ids_size, mtx_size);
     if (input_res != EI_IMPULSE_OK) {
         return input_res;
     }
@@ -178,6 +183,13 @@ EI_IMPULSE_ERROR run_nn_inference(
     result->timing.classification_us = ctx_end_us - ctx_start_us;
     result->timing.classification = (int)(result->timing.classification_us / 1000);
 
+    if (result->copy_output) {
+        auto output_res = fill_output_matrix_from_tensor(output, fmatrix[impulse->dsp_blocks_size + learn_block_index].matrix);
+        if (output_res != EI_IMPULSE_OK) {
+            return output_res;
+        }
+    }
+
     if (debug) {
         ei_printf("Predictions (time: %d ms.):\n", result->timing.classification);
     }
@@ -186,7 +198,7 @@ EI_IMPULSE_ERROR run_nn_inference(
     TfLiteTensor *labels_tensor = interpreter->output_tensor(block_config->output_labels_tensor);
 
     EI_IMPULSE_ERROR fill_res = fill_result_struct_from_output_tensor_tflite(
-        impulse, output, labels_tensor, scores_tensor, result, debug);
+        impulse, block_config, output, labels_tensor, scores_tensor, result, debug);
 
     if (fill_res != EI_IMPULSE_OK) {
         return fill_res;
@@ -210,12 +222,16 @@ __attribute__((unused)) int extract_tflite_features(signal_t *signal, matrix_t *
 
     ei_learning_block_config_tflite_graph_t ei_learning_block_config = {
         .implementation_version = 1,
+        .classification_mode = EI_CLASSIFIER_CLASSIFICATION_MODE_DSP,
         .block_id = dsp_config->block_id,
         .object_detection = false,
         .object_detection_last_layer = EI_CLASSIFIER_LAST_LAYER_UNKNOWN,
         .output_data_tensor = 0,
         .output_labels_tensor = 255,
         .output_score_tensor = 255,
+        .threshold = 0,
+        .quantized = 0,
+        .compiled = 0,
         .graph_config = &ei_config_tflite_graph_0
     };
 
diff --git a/edge-impulse-sdk/classifier/inferencing_engines/tflite_helper.h b/edge-impulse-sdk/classifier/inferencing_engines/tflite_helper.h
index 9e34742..c567805 100644
--- a/edge-impulse-sdk/classifier/inferencing_engines/tflite_helper.h
+++ b/edge-impulse-sdk/classifier/inferencing_engines/tflite_helper.h
@@ -33,63 +33,70 @@
 #if EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE
 #include <cmath>
 #include "edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
 #endif // EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE
 
 EI_IMPULSE_ERROR fill_input_tensor_from_matrix(
-    matrix_t *fmatrix,
-    TfLiteTensor *input
+    ei_feature_t *fmatrix,
+    TfLiteTensor *input,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
+    size_t mtx_size
 ) {
-    const size_t matrix_els = fmatrix->rows * fmatrix->cols;
+    size_t matrix_els = 0;
+    uint32_t input_idx = 0;
 
-    switch (input->type) {
-        case kTfLiteFloat32: {
-            if (input->bytes / 4 != matrix_els) {
-                ei_printf("ERR: input tensor has size %d, but input matrix has has size %d\n",
-                    (int)input->bytes / 4, (int)matrix_els);
-                return EI_IMPULSE_INVALID_SIZE;
-            }
+    for (size_t i = 0; i < input_block_ids_size; i++) {
+#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0
+        size_t cur_mtx = input_block_ids[i];
+        ei::matrix_t* matrix = NULL;
 
-            for (size_t ix = 0; ix < fmatrix->rows * fmatrix->cols; ix++) {
-                input->data.f[ix] = fmatrix->buffer[ix];
-            }
-            break;
+        if (!find_mtx_by_idx(fmatrix, &matrix, cur_mtx, mtx_size)) {
+            ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx);
+            return EI_IMPULSE_INVALID_SIZE;
         }
-        case kTfLiteInt8: {
-            if (input->bytes != matrix_els) {
-                ei_printf("ERR: input tensor has size %d, but input matrix has has size %d\n",
-                    (int)input->bytes, (int)matrix_els);
-                return EI_IMPULSE_INVALID_SIZE;
-            }
+#else
+        ei::matrix_t* matrix = fmatrix[0].matrix;
+#endif
+
+        matrix_els += matrix->rows * matrix->cols;
 
-            for (size_t ix = 0; ix < fmatrix->rows * fmatrix->cols; ix++) {
-                float val = (float)fmatrix->buffer[ix];
-                input->data.int8[ix] = static_cast<int8_t>(
-                    pre_cast_quantize(val, input->params.scale, input->params.zero_point, true));
+        switch (input->type) {
+            case kTfLiteFloat32: {
+                for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) {
+                    input->data.f[input_idx++] = matrix->buffer[ix];
+                }
+                break;
             }
-            break;
-        }
-        case kTfLiteUInt8: {
-            if (input->bytes != matrix_els) {
-                ei_printf("ERR: input tensor has size %d, but input matrix has has size %d\n",
-                    (int)input->bytes, (int)matrix_els);
-                return EI_IMPULSE_INVALID_SIZE;
+            case kTfLiteInt8: {
+                for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) {
+                    float val = (float)matrix->buffer[ix];
+                    input->data.int8[input_idx++] = static_cast<int8_t>(
+                        pre_cast_quantize(val, input->params.scale, input->params.zero_point, true));
+                }
+                break;
+            }
+            case kTfLiteUInt8: {
+                for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) {
+                    float val = (float)matrix->buffer[ix];
+                    input->data.uint8[input_idx++] = static_cast<uint8_t>(
+                        pre_cast_quantize(val, input->params.scale, input->params.zero_point, false));            }
+                break;
+            }
+            default: {
+                ei_printf("ERR: Cannot handle input type (%d)\n", input->type);
+                return EI_IMPULSE_INPUT_TENSOR_WAS_NULL;
             }
-
-            for (size_t ix = 0; ix < fmatrix->rows * fmatrix->cols; ix++) {
-                float val = (float)fmatrix->buffer[ix];
-                input->data.uint8[ix] = static_cast<uint8_t>(
-                    pre_cast_quantize(val, input->params.scale, input->params.zero_point, false));            }
-            break;
-        }
-        default: {
-            ei_printf("ERR: Cannot handle input type (%d)\n", input->type);
-            return EI_IMPULSE_INPUT_TENSOR_WAS_NULL;
         }
     }
 
+    if (input->bytes / 4 != matrix_els && input->bytes != matrix_els) {
+        ei_printf("ERR: input tensor has size %d bytes, but input matrix has has size %d bytes\n",
+            (int)input->bytes, (int)matrix_els);
+        return EI_IMPULSE_INVALID_SIZE;
+    }
+
     return EI_IMPULSE_OK;
 }
 
@@ -270,26 +277,38 @@ EI_IMPULSE_ERROR fill_output_matrix_from_tensor(
 
 EI_IMPULSE_ERROR fill_result_struct_from_output_tensor_tflite(
     const ei_impulse_t *impulse,
+    ei_learning_block_config_tflite_graph_t *block_config,
     TfLiteTensor* output,
     TfLiteTensor* labels_tensor,
     TfLiteTensor* scores_tensor,
     ei_impulse_result_t *result,
     bool debug
 ) {
-
     EI_IMPULSE_ERROR fill_res = EI_IMPULSE_OK;
 
-    if (impulse->object_detection) {
-        switch (impulse->object_detection_last_layer) {
+    if (block_config->classification_mode == EI_CLASSIFIER_CLASSIFICATION_MODE_OBJECT_DETECTION) {
+        switch (block_config->object_detection_last_layer) {
             case EI_CLASSIFIER_LAST_LAYER_FOMO: {
                 bool int8_output = output->type == TfLiteType::kTfLiteInt8;
                 if (int8_output) {
-                    fill_res = fill_result_struct_i8_fomo(impulse, result, output->data.int8, output->params.zero_point, output->params.scale,
-                        impulse->fomo_output_size, impulse->fomo_output_size);
+                    fill_res = fill_result_struct_i8_fomo(
+                        impulse,
+                        block_config,
+                        result,
+                        output->data.int8,
+                        output->params.zero_point,
+                        output->params.scale,
+                        impulse->fomo_output_size,
+                        impulse->fomo_output_size);
                 }
                 else {
-                    fill_res = fill_result_struct_f32_fomo(impulse, result, output->data.f,
-                        impulse->fomo_output_size, impulse->fomo_output_size);
+                    fill_res = fill_result_struct_f32_fomo(
+                        impulse,
+                        block_config,
+                        result,
+                        output->data.f,
+                        impulse->fomo_output_size,
+                        impulse->fomo_output_size);
                 }
                 break;
             }
@@ -302,7 +321,14 @@ EI_IMPULSE_ERROR fill_result_struct_from_output_tensor_tflite(
                     return EI_IMPULSE_LABEL_TENSOR_WAS_NULL;
                 }
                 if (output->type == kTfLiteFloat32) {
-                    fill_res = fill_result_struct_f32_object_detection(impulse, result, output->data.f, scores_tensor->data.f, labels_tensor->data.f, debug);
+                    fill_res = fill_result_struct_f32_object_detection(
+                        impulse,
+                        block_config,
+                        result,
+                        output->data.f,
+                        scores_tensor->data.f,
+                        labels_tensor->data.f,
+                        debug);
                 }
                 else {
                     ei_printf("ERR: MobileNet SSD does not support quantized inference\n");
@@ -318,36 +344,42 @@ EI_IMPULSE_ERROR fill_result_struct_from_output_tensor_tflite(
 #endif // EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE_FULL
             case EI_CLASSIFIER_LAST_LAYER_YOLOV5:
             case EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI: {
-                int version = impulse->object_detection_last_layer == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI ?
+                int version = block_config->object_detection_last_layer == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI ?
                     5 : 6;
 
                 if (output->type == kTfLiteInt8) {
                     fill_res = fill_result_struct_quantized_yolov5(
                         impulse,
+                        block_config,
                         result,
                         version,
                         output->data.int8,
                         output->params.zero_point,
                         output->params.scale,
-                        impulse->tflite_output_features_count);
+                        impulse->tflite_output_features_count,
+                        debug);
                 }
                 else if (output->type == kTfLiteUInt8) {
                     fill_res = fill_result_struct_quantized_yolov5(
                         impulse,
+                        block_config,
                         result,
                         version,
                         output->data.uint8,
                         output->params.zero_point,
                         output->params.scale,
-                        impulse->tflite_output_features_count);
+                        impulse->tflite_output_features_count,
+                        debug);
                 }
                 else if (output->type == kTfLiteFloat32) {
                     fill_res = fill_result_struct_f32_yolov5(
                         impulse,
+                        block_config,
                         result,
                         version,
                         output->data.f,
-                        impulse->tflite_output_features_count);
+                        impulse->tflite_output_features_count,
+                        debug);
                 }
                 else {
                     ei_printf("ERR: Invalid output type (%d) for YOLOv5 last layer\n", output->type);
@@ -356,49 +388,182 @@ EI_IMPULSE_ERROR fill_result_struct_from_output_tensor_tflite(
                 break;
             }
             case EI_CLASSIFIER_LAST_LAYER_YOLOX: {
-                #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1
+                if (block_config->quantized == 1) {
                     ei_printf("ERR: YOLOX does not support quantized inference\n");
                     return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
-                #else
+                }
+                else {
                     fill_res = fill_result_struct_f32_yolox(
                         impulse,
+                        block_config,
                         result,
                         output->data.f,
-                        impulse->tflite_output_features_count);
-                #endif
+                        impulse->tflite_output_features_count,
+                        debug);
+                }
                 break;
             }
             case EI_CLASSIFIER_LAST_LAYER_YOLOV7: {
-                #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1
+                if (block_config->quantized == 1) {
                     ei_printf("ERR: YOLOV7 does not support quantized inference\n");
                     return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
-                #else
+                }
+                else {
                     size_t output_feature_count = 1;
                     for (int ix = 0; ix < output->dims->size; ix++) {
                         output_feature_count *= output->dims->data[ix];
                     }
                     fill_res = fill_result_struct_f32_yolov7(
                         impulse,
+                        block_config,
                         result,
                         output->data.f,
                         output_feature_count);
-                #endif
+                }
+                break;
+            }
+            case EI_CLASSIFIER_LAST_LAYER_TAO_SSD:
+            case EI_CLASSIFIER_LAST_LAYER_TAO_RETINANET: {
+
+                if (output->type == kTfLiteInt8) {
+                    fill_res = fill_result_struct_quantized_tao_decode_detections(
+                        impulse,
+                        block_config,
+                        result,
+                        output->data.int8,
+                        output->params.zero_point,
+                        output->params.scale,
+                        impulse->tflite_output_features_count,
+                        debug);
+                }
+                else if (output->type == kTfLiteUInt8) {
+                    fill_res = fill_result_struct_quantized_tao_decode_detections(
+                        impulse,
+                        block_config,
+                        result,
+                        output->data.uint8,
+                        output->params.zero_point,
+                        output->params.scale,
+                        impulse->tflite_output_features_count,
+                        debug);
+                }
+                else if (output->type == kTfLiteFloat32) {
+                    fill_res = fill_result_struct_f32_tao_decode_detections(
+                        impulse,
+                        block_config,
+                        result,
+                        output->data.f,
+                        impulse->tflite_output_features_count,
+                        debug);
+                }
+                else {
+                    ei_printf("ERR: Invalid output type (%d) for TAO last layer\n", output->type);
+                    return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
+                }
+                break;
+            }
+            case EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV3: {
+
+                if (output->type == kTfLiteInt8) {
+                    fill_res = fill_result_struct_quantized_tao_yolov3(
+                        impulse,
+                        block_config,
+                        result,
+                        output->data.int8,
+                        output->params.zero_point,
+                        output->params.scale,
+                        impulse->tflite_output_features_count,
+                        debug);
+                }
+                else if (output->type == kTfLiteUInt8) {
+                    fill_res = fill_result_struct_quantized_tao_yolov3(
+                        impulse,
+                        block_config,
+                        result,
+                        output->data.uint8,
+                        output->params.zero_point,
+                        output->params.scale,
+                        impulse->tflite_output_features_count,
+                        debug);
+                }
+                else if (output->type == kTfLiteFloat32) {
+                    fill_res = fill_result_struct_f32_tao_yolov3(
+                        impulse,
+                        block_config,
+                        result,
+                        output->data.f,
+                        impulse->tflite_output_features_count,
+                        debug);
+                }
+                else {
+                    ei_printf("ERR: Invalid output type (%d) for TAO YOLOv3 layer\n", output->type);
+                    return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
+                }
+                break;
+            }
+            case EI_CLASSIFIER_LAST_LAYER_TAO_YOLOV4: {
+
+                if (output->type == kTfLiteInt8) {
+                    fill_res = fill_result_struct_quantized_tao_yolov4(
+                        impulse,
+                        block_config,
+                        result,
+                        output->data.int8,
+                        output->params.zero_point,
+                        output->params.scale,
+                        impulse->tflite_output_features_count,
+                        debug);
+                }
+                else if (output->type == kTfLiteUInt8) {
+                    fill_res = fill_result_struct_quantized_tao_yolov4(
+                        impulse,
+                        block_config,
+                        result,
+                        output->data.uint8,
+                        output->params.zero_point,
+                        output->params.scale,
+                        impulse->tflite_output_features_count,
+                        debug);
+                }
+                else if (output->type == kTfLiteFloat32) {
+                    fill_res = fill_result_struct_f32_tao_yolov4(
+                        impulse,
+                        block_config,
+                        result,
+                        output->data.f,
+                        impulse->tflite_output_features_count,
+                        debug);
+                }
+                else {
+                    ei_printf("ERR: Invalid output type (%d) for TAO YOLOv4 layer\n", output->type);
+                    return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
+                }
                 break;
             }
             default: {
                 ei_printf("ERR: Unsupported object detection last layer (%d)\n",
-                    impulse->object_detection_last_layer);
+                    block_config->object_detection_last_layer);
                 return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
             }
         }
     }
-    else {
-        bool int8_output = output->type == TfLiteType::kTfLiteInt8;
-        if (int8_output) {
-            fill_res = fill_result_struct_i8(impulse, result, output->data.int8, output->params.zero_point, output->params.scale, debug);
+    else if (block_config->classification_mode == EI_CLASSIFIER_CLASSIFICATION_MODE_VISUAL_ANOMALY)
+    {
+        if (!result->copy_output) {
+            fill_res = fill_result_visual_ad_struct_f32(impulse, result, output->data.f, block_config->threshold, debug);
         }
-        else {
-            fill_res = fill_result_struct_f32(impulse, result, output->data.f, debug);
+    }
+    // if we copy the output, we don't need to process it as classification
+    else
+    {
+        if (!result->copy_output) {
+            bool int8_output = output->type == TfLiteType::kTfLiteInt8;
+            if (int8_output) {
+                fill_res = fill_result_struct_i8(impulse, result, output->data.int8, output->params.zero_point, output->params.scale, debug);
+            }
+            else {
+                fill_res = fill_result_struct_f32(impulse, result, output->data.f, debug);
+            }
         }
     }
 
diff --git a/edge-impulse-sdk/classifier/inferencing_engines/tflite_micro.h b/edge-impulse-sdk/classifier/inferencing_engines/tflite_micro.h
index 637a989..fd3cb26 100644
--- a/edge-impulse-sdk/classifier/inferencing_engines/tflite_micro.h
+++ b/edge-impulse-sdk/classifier/inferencing_engines/tflite_micro.h
@@ -24,9 +24,9 @@
 
 #include <cmath>
 #include "edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h"
 #include "edge-impulse-sdk/classifier/ei_aligned_malloc.h"
 #include "edge-impulse-sdk/classifier/ei_fill_result_struct.h"
 #include "edge-impulse-sdk/classifier/ei_model_types.h"
@@ -36,9 +36,6 @@
 #include "tflite-model/tflite-resolver.h"
 #endif // EI_CLASSIFIER_HAS_TFLITE_OPS_RESOLVER
 
-static tflite::MicroErrorReporter micro_error_reporter;
-static tflite::ErrorReporter* error_reporter = &micro_error_reporter;
-
 #ifdef EI_CLASSIFIER_ALLOCATION_STATIC
 #if defined __GNUC__
 #define ALIGN(X) __attribute__((aligned(X)))
@@ -108,7 +105,7 @@ static EI_IMPULSE_ERROR inference_tflite_setup(
         // copying or parsing, it's a very lightweight operation.
         model = tflite::GetModel(graph_config->model);
         if (model->version() != TFLITE_SCHEMA_VERSION) {
-            error_reporter->Report(
+            ei_printf(
                 "Model provided is schema version %d not equal "
                 "to supported version %d.",
                 model->version(), TFLITE_SCHEMA_VERSION);
@@ -120,19 +117,19 @@ static EI_IMPULSE_ERROR inference_tflite_setup(
 #ifdef EI_TFLITE_RESOLVER
     EI_TFLITE_RESOLVER
 #else
-    tflite::AllOpsResolver resolver;
+    static tflite::AllOpsResolver resolver; // needs static to match the life of the interpreter
 #endif
 
     // Build an interpreter to run the model with.
     tflite::MicroInterpreter *interpreter = new tflite::MicroInterpreter(
-        model, resolver, tensor_arena, graph_config->arena_size, error_reporter);
+        model, resolver, tensor_arena, graph_config->arena_size);
 
     *micro_interpreter = interpreter;
 
     // Allocate memory from the tensor_arena for the model's tensors.
-    TfLiteStatus allocate_status = interpreter->AllocateTensors();
+    TfLiteStatus allocate_status = interpreter->AllocateTensors(true);
     if (allocate_status != kTfLiteOk) {
-        error_reporter->Report("AllocateTensors() failed");
+        ei_printf("AllocateTensors() failed");
         return EI_IMPULSE_TFLITE_ERROR;
     }
 
@@ -166,7 +163,7 @@ static EI_IMPULSE_ERROR inference_tflite_setup(
  */
 static EI_IMPULSE_ERROR inference_tflite_run(
     const ei_impulse_t *impulse,
-    ei_learning_block_config_tflite_graph_t *config,
+    ei_learning_block_config_tflite_graph_t *block_config,
     uint64_t ctx_start_us,
     TfLiteTensor* output,
     TfLiteTensor* labels_tensor,
@@ -176,11 +173,12 @@ static EI_IMPULSE_ERROR inference_tflite_run(
     ei_impulse_result_t *result,
     bool debug) {
 
+
     // Run inference, and report any error
     TfLiteStatus invoke_status = interpreter->Invoke();
     if (invoke_status != kTfLiteOk) {
         delete interpreter;
-        error_reporter->Report("Invoke failed (%d)\n", invoke_status);
+        ei_printf("Invoke failed (%d)\n", invoke_status);
         return EI_IMPULSE_TFLITE_ERROR;
     }
 
@@ -195,7 +193,7 @@ static EI_IMPULSE_ERROR inference_tflite_run(
     }
 
     EI_IMPULSE_ERROR fill_res = fill_result_struct_from_output_tensor_tflite(
-        impulse, output, labels_tensor, scores_tensor, result, debug);
+        impulse, block_config, output, labels_tensor, scores_tensor, result, debug);
 
     delete interpreter;
 
@@ -253,7 +251,7 @@ EI_IMPULSE_ERROR run_nn_inference_from_dsp(
     // Run inference, and report any error
     TfLiteStatus invoke_status = interpreter->Invoke();
     if (invoke_status != kTfLiteOk) {
-        error_reporter->Report("Invoke failed (%d)\n", invoke_status);
+        ei_printf("Invoke failed (%d)\n", invoke_status);
         return EI_IMPULSE_TFLITE_ERROR;
     }
 
@@ -278,7 +276,10 @@ EI_IMPULSE_ERROR run_nn_inference_from_dsp(
  */
 EI_IMPULSE_ERROR run_nn_inference(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
     bool debug = false)
@@ -308,7 +309,8 @@ EI_IMPULSE_ERROR run_nn_inference(
 
     uint8_t* tensor_arena = static_cast<uint8_t*>(p_tensor_arena.get());
 
-    auto input_res = fill_input_tensor_from_matrix(fmatrix, input);
+    size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size;
+    auto input_res = fill_input_tensor_from_matrix(fmatrix, input, input_block_ids, input_block_ids_size, mtx_size);
     if (input_res != EI_IMPULSE_OK) {
         return input_res;
     }
@@ -322,6 +324,13 @@ EI_IMPULSE_ERROR run_nn_inference(
         output_scores,
         interpreter, tensor_arena, result, debug);
 
+    if (result->copy_output) {
+        auto output_res = fill_output_matrix_from_tensor(output, fmatrix[impulse->dsp_blocks_size + learn_block_index].matrix);
+        if (output_res != EI_IMPULSE_OK) {
+            return output_res;
+        }
+    }
+
     result->timing.classification_us = ei_read_timer_us() - ctx_start_us;
 
     if (run_res != EI_IMPULSE_OK) {
@@ -331,7 +340,7 @@ EI_IMPULSE_ERROR run_nn_inference(
     return EI_IMPULSE_OK;
 }
 
-#if EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1
+#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1
 /**
  * Special function to run the classifier on images, only works on TFLite models (either interpreter or EON or for tensaiflow)
  * that allocates a lot less memory by quantizing in place. This only works if 'can_run_classifier_image_quantized'
@@ -422,7 +431,7 @@ EI_IMPULSE_ERROR run_nn_inference_image_quantized(
 
     return EI_IMPULSE_OK;
 }
-#endif // EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1
+#endif // EI_CLASSIFIER_QUANTIZATION_ENABLED == 1
 
 __attribute__((unused)) int extract_tflite_features(signal_t *signal, matrix_t *output_matrix, void *config_ptr, const float frequency) {
     ei_dsp_config_tflite_t *dsp_config = (ei_dsp_config_tflite_t*)config_ptr;
@@ -436,12 +445,16 @@ __attribute__((unused)) int extract_tflite_features(signal_t *signal, matrix_t *
 
     ei_learning_block_config_tflite_graph_t ei_learning_block_config = {
         .implementation_version = 1,
+        .classification_mode = EI_CLASSIFIER_CLASSIFICATION_MODE_DSP,
         .block_id = dsp_config->block_id,
         .object_detection = false,
         .object_detection_last_layer = EI_CLASSIFIER_LAST_LAYER_UNKNOWN,
         .output_data_tensor = 0,
         .output_labels_tensor = 255,
         .output_score_tensor = 255,
+        .threshold = 0,
+        .quantized = 0,
+        .compiled = 0,
         .graph_config = &ei_config_tflite_graph_0
     };
 
diff --git a/edge-impulse-sdk/classifier/inferencing_engines/tflite_tidl.h b/edge-impulse-sdk/classifier/inferencing_engines/tflite_tidl.h
index e82656b..5d78201 100644
--- a/edge-impulse-sdk/classifier/inferencing_engines/tflite_tidl.h
+++ b/edge-impulse-sdk/classifier/inferencing_engines/tflite_tidl.h
@@ -52,12 +52,15 @@ void *out_ptrs[16] = {NULL};
 
 EI_IMPULSE_ERROR run_nn_inference(
     const ei_impulse_t *impulse,
-    ei::matrix_t *fmatrix,
+    ei_feature_t *fmatrix,
+    uint32_t learn_block_index,
+    uint32_t* input_block_ids,
+    uint32_t input_block_ids_size,
     ei_impulse_result_t *result,
     void *config_ptr,
-    bool debug = false)
+    bool debug)
 {
-    ei_learning_block_config_tflite_graph_t *config = (ei_learning_block_config_tflite_graph_t*)config_ptr;
+    ei_learning_block_config_tflite_graph_t *block_config = (ei_learning_block_config_tflite_graph_t*)config_ptr;
 
     static std::unique_ptr<tflite::FlatBufferModel> model = nullptr;
     static std::unique_ptr<tflite::Interpreter> interpreter = nullptr;
@@ -144,7 +147,7 @@ EI_IMPULSE_ERROR run_nn_inference(
     }
 
     // Obtain pointers to the model's input and output tensors.
-#if EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1
+#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1
     int8_t* input = interpreter->typed_input_tensor<int8_t>(0);
 #else
     float* input = interpreter->typed_input_tensor<float>(0);
@@ -154,22 +157,38 @@ EI_IMPULSE_ERROR run_nn_inference(
         return EI_IMPULSE_INPUT_TENSOR_WAS_NULL;
     }
 
-    for (uint32_t ix = 0; ix < fmatrix->rows * fmatrix->cols; ix++) {
-    if (impulse->object_detection) {
-#if EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1
-        float pixel = (float)fmatrix->buffer[ix];
-        input[ix] = static_cast<uint8_t>((pixel / input->tflite_input_scale) + input->tflite_input_zeropoint);
+    size_t mtx_size = impulse->dsp_blocks_size + impulse->learning_blocks_size;
+
+    for (size_t i = 0; i < input_block_ids_size; i++) {
+#if EI_CLASSIFIER_SINGLE_FEATURE_INPUT == 0
+        uint16_t cur_mtx = input_block_ids[i];
+        ei::matrix_t* matrix = NULL;
+
+        if (!find_mtx_by_idx(fmatrix, &matrix, cur_mtx, mtx_size)) {
+            ei_printf("ERR: Cannot find matrix with id %zu\n", cur_mtx);
+            return EI_IMPULSE_INVALID_SIZE;
+        }
 #else
-        input[ix] = fmatrix->buffer[ix];
+        ei::matrix_t* matrix = fmatrix[0].matrix;
 #endif
-    }
-    else {
-#if EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED == 1
-        input[ix] = static_cast<int8_t>(round(fmatrix->buffer[ix] / input->tflite_input_scale) + input->tflite_input_zeropoint);
+
+        for (uint32_t ix = 0; ix < matrix->rows * matrix->cols; ix++) {
+            if (block_config->object_detection) {
+#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1
+                float pixel = (float)matrix->buffer[ix];
+                input[ix] = static_cast<uint8_t>((pixel / input->tflite_input_scale) + input->tflite_input_zeropoint);
 #else
-        input[ix] = fmatrix->buffer[ix];
+                input[ix] = matrix->buffer[ix];
 #endif
-    }
+            }
+            else {
+#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1
+                input[ix] = static_cast<int8_t>(round(matrix->buffer[ix] / input->tflite_input_scale) + input->tflite_input_zeropoint);
+#else
+                input[ix] = matrix->buffer[ix];
+#endif
+            }
+        }
     }
 
     uint64_t ctx_start_us = ei_read_timer_us();
@@ -181,10 +200,10 @@ EI_IMPULSE_ERROR run_nn_inference(
     result->timing.classification_us = ctx_end_us - ctx_start_us;
     result->timing.classification = (int)(result->timing.classification_us / 1000);
 
-#if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1
-    int8_t* out_data = interpreter->typed_output_tensor<int8_t>(config->output_data_tensor);
+#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1
+    int8_t* out_data = interpreter->typed_output_tensor<int8_t>(block_config->output_data_tensor);
 #else
-    float* out_data = interpreter->typed_output_tensor<float>(config->output_data_tensor);
+    float* out_data = interpreter->typed_output_tensor<float>(block_config->output_data_tensor);
 #endif
 
     if (debug) {
@@ -213,7 +232,6 @@ EI_IMPULSE_ERROR run_nn_inference(
                 }
                 ei_printf(")\n");
             }
-
         }
     }
 
@@ -227,70 +245,100 @@ EI_IMPULSE_ERROR run_nn_inference(
 
     EI_IMPULSE_ERROR fill_res = EI_IMPULSE_OK;
 
-    if (impulse->object_detection) {
-        switch (impulse->object_detection_last_layer) {
+    if (block_config->object_detection) {
+        switch (block_config->object_detection_last_layer) {
             case EI_CLASSIFIER_LAST_LAYER_FOMO: {
-                #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1
-                    fill_res = fill_result_struct_i8_fomo(impulse, result, out_data, out_data->tflite_output_zeropoint, out_data->tflite_output_scale,
-                        impulse->fomo_output_size, impulse->fomo_output_size);
-                #else
-                    fill_res = fill_result_struct_f32_fomo(impulse, result, out_data,
-                        impulse->fomo_output_size, impulse->fomo_output_size);
-                #endif
+                if (block_config->quantized == 1) {
+#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1
+                    fill_res = fill_result_struct_i8_fomo(
+                        impulse,
+                        block_config,
+                        result,
+                        out_data,
+                        out_data->tflite_output_zeropoint,
+                        out_data->tflite_output_scale,
+                        impulse->fomo_output_size,
+                        impulse->fomo_output_size);
+#endif
+                }
+                else {
+                    fill_res = fill_result_struct_f32_fomo(
+                        impulse,
+                        block_config,
+                        result,
+                        out_data,
+                        impulse->fomo_output_size,
+                        impulse->fomo_output_size);
+                }
                 break;
             }
             case EI_CLASSIFIER_LAST_LAYER_SSD: {
-                float *scores_tensor = interpreter->typed_output_tensor<float>(config->output_score_tensor);
-                float *label_tensor = interpreter->typed_output_tensor<float>(config->output_labels_tensor);
+                float *scores_tensor = interpreter->typed_output_tensor<float>(block_config->output_score_tensor);
+                float *label_tensor = interpreter->typed_output_tensor<float>(block_config->output_labels_tensor);
                 if (!scores_tensor) {
                     return EI_IMPULSE_SCORE_TENSOR_WAS_NULL;
                 }
                 if (!label_tensor) {
                     return EI_IMPULSE_LABEL_TENSOR_WAS_NULL;
                 }
-                #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1
+                if (block_config->quantized == 1) {
                     ei_printf("ERR: MobileNet SSD does not support quantized inference\n");
                     return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
-                #else
-                    fill_res = fill_result_struct_f32_object_detection(impulse, result, out_data, scores_tensor, label_tensor, debug);
-                #endif
+                }
+                else {
+                    fill_res = fill_result_struct_f32_object_detection(
+                        impulse,
+                        block_config,
+                        result,
+                        out_data,
+                        scores_tensor,
+                        label_tensor,
+                        debug);
+                }
                 break;
             }
             case EI_CLASSIFIER_LAST_LAYER_YOLOV5:
             case EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI: {
-                #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1
+                if (block_config->quantized == 1) {
                     ei_printf("ERR: YOLOv5 does not support quantized inference\n");
                     return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
-                #else
-                    int version = impulse->object_detection_last_layer == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI ?
+                }
+                else {
+                    int version = block_config->object_detection_last_layer == EI_CLASSIFIER_LAST_LAYER_YOLOV5_V5_DRPAI ?
                         5 : 6;
                     fill_res = fill_result_struct_f32_yolov5(
                         impulse,
+                        block_config,
                         result,
                         version,
                         out_data,
-                        impulse->tflite_output_features_count);
-                #endif
+                        impulse->tflite_output_features_count,
+                        debug);
+                }
                 break;
             }
             case EI_CLASSIFIER_LAST_LAYER_YOLOX: {
-                #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1
+                if (block_config->quantized == 1) {
                     ei_printf("ERR: YOLOX does not support quantized inference\n");
                     return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
-                #else
+                }
+                else {
                     fill_res = fill_result_struct_f32_yolox(
                         impulse,
+                        block_config,
                         result,
                         out_data,
-                        impulse->tflite_output_features_count);
-                #endif
+                        impulse->tflite_output_features_count,
+                        debug);
+                }
                 break;
             }
             case EI_CLASSIFIER_LAST_LAYER_YOLOV7: {
-                #if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1
+                if (block_config->quantized == 1) {
                     ei_printf("ERR: YOLOV7 does not support quantized inference\n");
                     return EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE;
-                #else
+                }
+                else {
                     TfLiteTensor *output = interpreter->output_tensor(0);
                     size_t output_feature_count = 1;
                     for (int ix = 0; ix < output->dims->size; ix++) {
@@ -298,21 +346,22 @@ EI_IMPULSE_ERROR run_nn_inference(
                     }
                     fill_res = fill_result_struct_f32_yolov7(
                         impulse,
+                        block_config,
                         result,
                         output->data.f,
                         output_feature_count);
-                #endif
+                }
                 break;
             }
             default: {
                 ei_printf("ERR: Unsupported object detection last layer (%d)\n",
-                    impulse->object_detection_last_layer);
+                    block_config->object_detection_last_layer);
                 break;
             }
         }
     }
     else {
-#if EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED == 1
+#if EI_CLASSIFIER_QUANTIZATION_ENABLED == 1
         fill_res = fill_result_struct_i8(impulse, result, out_data, out_data->tflite_output_zeropoint, out_data->tflite_output_scale, debug);
 #else
         fill_res = fill_result_struct_f32(impulse, result, out_data, debug);
diff --git a/edge-impulse-sdk/create-arduino-library.sh b/edge-impulse-sdk/create-arduino-library.sh
deleted file mode 100644
index 407b28c..0000000
--- a/edge-impulse-sdk/create-arduino-library.sh
+++ /dev/null
@@ -1,55 +0,0 @@
-# Run this script to convert the edge-impulse-sdk folder into a library that can be consumed by the Arduino IDE
-# it renames files (e.g. *.cpp to *.c), removes features (uTensor), and updates include paths
-
-# exit when any command fails
-set -e
-
-cleanup() {
-    echo ""
-    echo "Terminated by user"
-    exit 1
-}
-trap cleanup INT TERM
-
-SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
-
-if [[ "$OSTYPE" == "darwin"* ]]; then
-    SEDCMD="sed -i '' -e"
-    ECHOCMD="echo"
-    LC_CTYPE=C
-    LANG=C
-else
-    SEDCMD="sed -i -e"
-    ECHOCMD="echo -e"
-fi
-
-rm -rf $SCRIPTPATH/tensorflow/lite/micro/mbed/
-rm -rf $SCRIPTPATH/porting/mbed/
-rm -rf $SCRIPTPATH/porting/mingw32/
-rm -rf $SCRIPTPATH/porting/posix/
-rm -rf $SCRIPTPATH/porting/silabs/
-rm -rf $SCRIPTPATH/porting/stm32-cubeai/
-rm -rf $SCRIPTPATH/porting/zephyr/
-rm -rf $SCRIPTPATH/porting/sony/
-rm -rf $SCRIPTPATH/porting/ti/
-rm -rf $SCRIPTPATH/porting/lib/
-rm -rf $SCRIPTPATH/porting/raspberry/
-rm -rf $SCRIPTPATH/porting/himax/
-rm -rf $SCRIPTPATH/porting/synaptics/
-rm -rf $SCRIPTPATH/porting/brickml/
-rm -rf $SCRIPTPATH/porting/renesas-ra6m5/
-rm -rf $SCRIPTPATH/classifier/ei_run_classifier_c*
-rm -rf $SCRIPTPATH/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.S
-rm -rf $SCRIPTPATH/third_party/arc_mli_package/
-
-# rename all .cc files to .cpp, and do an inplace change of the headers
-find . -name '*.cc' -exec sh -c 'mv "$0" "${0%.cc}.cpp"' {} \;
-
-# make sure that abs is undefined on arduino
-find $SCRIPTPATH/ -name 'compatibility.h' -exec bash -c "$SEDCMD 's/#include <cstdint>/#include <cstdint>\\
-#include \"edge-impulse-sdk\/tensorflow\/lite\/portable_type_to_tflitetype.h\"/' {}" {} \;
-find $SCRIPTPATH/ -name 'micro_utils.h' -exec bash -c "$SEDCMD 's/#include <cstdint>/#include <cstdint>\\
-#include \"edge-impulse-sdk\/tensorflow\/lite\/portable_type_to_tflitetype.h\"/' {}" {} \;
-
-# remove all the -e files
-find $SCRIPTPATH/ -name "*-e" -exec rm -f {} \;
diff --git a/edge-impulse-sdk/dsp/ei_alloc.h b/edge-impulse-sdk/dsp/ei_alloc.h
index 3ce7148..6690570 100644
--- a/edge-impulse-sdk/dsp/ei_alloc.h
+++ b/edge-impulse-sdk/dsp/ei_alloc.h
@@ -44,7 +44,7 @@ struct EiAlloc
         auto bytes = n * sizeof(T);
         auto ptr = ei_dsp_malloc(bytes);
 #if EIDSP_TRACK_ALLOCATIONS
-        allocs[ptr] = bytes;
+        get_allocs()[ptr] = bytes;
 #endif
         return (T *)ptr;
     }
@@ -52,9 +52,9 @@ struct EiAlloc
     void deallocate(T *p, size_t n) noexcept
     {
 #if EIDSP_TRACK_ALLOCATIONS
-        auto size_p = allocs.find(p);
+        auto size_p = get_allocs().find(p);
         ei_dsp_free(p,size_p->second);
-        allocs.erase(size_p);
+        get_allocs().erase(size_p);
 #else
         ei_dsp_free(p,0);
 #endif
@@ -62,7 +62,11 @@ struct EiAlloc
 #if EIDSP_TRACK_ALLOCATIONS
     private:
     // [address] -> size requested
-    std::map<void*,size_t> allocs;
+    typedef std::map<void*,size_t> map_t;
+    static map_t& get_allocs() {
+        static map_t allocs;
+        return allocs;
+    }
 #endif
 };
 
diff --git a/edge-impulse-sdk/dsp/ei_dsp_handle.h b/edge-impulse-sdk/dsp/ei_dsp_handle.h
new file mode 100644
index 0000000..462117a
--- /dev/null
+++ b/edge-impulse-sdk/dsp/ei_dsp_handle.h
@@ -0,0 +1,58 @@
+/* Edge Impulse inferencing library
+ * Copyright (c) 2023 EdgeImpulse Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __EI_DSP_HANDLE__H__
+#define __EI_DSP_HANDLE__H__
+
+#include "edge-impulse-sdk/dsp/config.hpp"
+#include "edge-impulse-sdk/dsp/numpy_types.h"
+
+class DspHandle {
+public:
+    /**
+     * @brief Override and call ei_printf to print debug information, especially the current state
+     *
+     * @return int
+     */
+    virtual int print() = 0;
+
+    /**
+     * @brief Override and convert raw data into processed features. Any state should live inside your custom class.
+     * Provide a constructor to initialize your state.
+     *
+     * @param signal Callback object to get raw data from
+     * @param output_matrix Output matrix to write features to
+     * @param config Configuration object, generated by Studio based on your DSP block parameters
+     * @param frequency Sampling frequency, as set in your project
+     * @return int 0 on success, anything else for failure
+     */
+    virtual int extract(ei::signal_t *signal, ei::matrix_t *output_matrix, void *config, const float frequency) = 0;
+
+    // Must declare so user can override
+    /**
+     * @brief If you call new or ei_malloc anywhere in your class, you must override this function and delete your objects
+     *
+     */
+    virtual ~DspHandle() {};
+};
+
+#endif  //!__EI_DSP_HANDLE__H__
\ No newline at end of file
diff --git a/edge-impulse-sdk/dsp/ei_flatten.h b/edge-impulse-sdk/dsp/ei_flatten.h
new file mode 100644
index 0000000..d7586b2
--- /dev/null
+++ b/edge-impulse-sdk/dsp/ei_flatten.h
@@ -0,0 +1,198 @@
+/* Edge Impulse inferencing library
+ * Copyright (c) 2023 EdgeImpulse Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __EI_FLATTEN__H__
+#define __EI_FLATTEN__H__
+
+#include "edge-impulse-sdk/dsp/ei_vector.h"
+#include "edge-impulse-sdk/dsp/returntypes.hpp"
+#include "edge-impulse-sdk/dsp/ei_dsp_handle.h"
+#include "model-parameters/model_metadata.h"
+#include "edge-impulse-sdk/dsp/numpy.hpp"
+#include "edge-impulse-sdk/dsp/config.hpp"
+
+class flatten_class : public DspHandle {
+public:
+    int print() override {
+        ei_printf("means: ");
+        for(int axis = 0; (size_t)axis < this->means.size(); axis++) {
+        ei_printf("axis: %i\n", axis);
+            for (size_t i = 0; i < this->means.size(); i++) {
+                ei_printf("%f ", this->means[axis][i]);
+            }
+        }
+        ei_printf("\n");
+        return ei::EIDSP_OK;
+    }
+
+    int extract(ei::signal_t *signal, ei::matrix_t *output_matrix, void *config_ptr, const float frequency) override {
+        using namespace ei;
+
+        ei_dsp_config_flatten_t config = *((ei_dsp_config_flatten_t*)config_ptr);
+
+        uint32_t expected_matrix_size = 0;
+        if (config.average) expected_matrix_size += config.axes;
+        if (config.minimum) expected_matrix_size += config.axes;
+        if (config.maximum) expected_matrix_size += config.axes;
+        if (config.rms) expected_matrix_size += config.axes;
+        if (config.stdev) expected_matrix_size += config.axes;
+        if (config.skewness) expected_matrix_size += config.axes;
+        if (config.kurtosis) expected_matrix_size += config.axes;
+        if (config.moving_avg_num_windows) expected_matrix_size += config.axes;
+
+        if (output_matrix->rows * output_matrix->cols != expected_matrix_size) {
+            EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
+        }
+
+        int ret;
+
+        // input matrix from the raw signal
+        matrix_t input_matrix(signal->total_length / config.axes, config.axes);
+        if (!input_matrix.buffer) {
+            EIDSP_ERR(EIDSP_OUT_OF_MEM);
+        }
+        signal->get_data(0, signal->total_length, input_matrix.buffer);
+
+        // scale the signal
+        ret = numpy::scale(&input_matrix, config.scale_axes);
+        if (ret != EIDSP_OK) {
+            ei_printf("ERR: Failed to scale signal (%d)\n", ret);
+            EIDSP_ERR(ret);
+        }
+
+        // transpose the matrix so we have one row per axis
+        numpy::transpose_in_place(&input_matrix);
+
+        size_t out_matrix_ix = 0;
+
+        for (size_t row = 0; row < input_matrix.rows; row++) {
+            matrix_t row_matrix(1, input_matrix.cols, input_matrix.buffer + (row * input_matrix.cols));
+
+            float mean; // to use with moving average
+
+            if (config.average || config.moving_avg_num_windows) {
+                float fbuffer;
+                matrix_t out_matrix(1, 1, &fbuffer);
+                numpy::mean(&row_matrix, &out_matrix);
+                mean = out_matrix.buffer[0];
+                if (config.average) {
+                    output_matrix->buffer[out_matrix_ix++] = mean;
+                }
+            }
+
+            if (config.minimum) {
+                float fbuffer;
+                matrix_t out_matrix(1, 1, &fbuffer);
+                numpy::min(&row_matrix, &out_matrix);
+                output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0];
+            }
+
+            if (config.maximum) {
+                float fbuffer;
+                matrix_t out_matrix(1, 1, &fbuffer);
+                numpy::max(&row_matrix, &out_matrix);
+                output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0];
+            }
+
+            if (config.rms) {
+                float fbuffer;
+                matrix_t out_matrix(1, 1, &fbuffer);
+                numpy::rms(&row_matrix, &out_matrix);
+                output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0];
+            }
+
+            if (config.stdev) {
+                float fbuffer;
+                matrix_t out_matrix(1, 1, &fbuffer);
+                numpy::stdev(&row_matrix, &out_matrix);
+                output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0];
+            }
+
+            if (config.skewness) {
+                float fbuffer;
+                matrix_t out_matrix(1, 1, &fbuffer);
+                numpy::skew(&row_matrix, &out_matrix);
+                output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0];
+            }
+
+            if (config.kurtosis) {
+                float fbuffer;
+                matrix_t out_matrix(1, 1, &fbuffer);
+                numpy::kurtosis(&row_matrix, &out_matrix);
+                output_matrix->buffer[out_matrix_ix++] = out_matrix.buffer[0];
+            }
+
+            if (config.moving_avg_num_windows) {
+                push_mean(row, mean);
+                output_matrix->buffer[out_matrix_ix++] = numpy::mean(means[row].data(), means[row].size());
+            }
+        }
+
+        // flatten again
+        output_matrix->cols = output_matrix->rows * output_matrix->cols;
+        output_matrix->rows = 1;
+
+        return EIDSP_OK;
+    }
+
+    static DspHandle* create(void* config, float _sampling_frequency);
+
+    void* operator new(size_t size) {
+        // Custom memory allocation logic here
+        return ei_malloc(size);
+    }
+
+    void operator delete(void* ptr) {
+        // Custom memory deallocation logic here
+        ei_free(ptr);
+    }
+
+private:
+    ei_vector<ei_vector<float>> means;
+    ei_vector<size_t> head_indexes;
+    size_t moving_avg_num_windows;
+
+    flatten_class(int moving_avg_num_windows, int axes_count) : means(axes_count), head_indexes(axes_count, 0) {
+        this->moving_avg_num_windows = moving_avg_num_windows;
+    }
+
+    void push_mean(int axis, float mean) {
+        auto& head = head_indexes[axis];
+        if (head_indexes[axis] >= means[axis].size()) {
+            means[axis].push_back(mean);
+        } else {
+            means[axis][head] = mean;
+        }
+        head = head + 1;
+        // This is a lot cheaper than mod (%)
+        if (head >= moving_avg_num_windows) {
+            head = 0;
+        }
+    }
+};
+
+DspHandle* flatten_class::create(void* config_in, float _sampling_frequency) { // NOLINT def in header is OK at EI
+    auto config = reinterpret_cast<ei_dsp_config_flatten_t*>(config_in);
+    return new flatten_class(config->moving_avg_num_windows, config->axes);
+};
+
+#endif  //!__EI_FLATTEN__H__
\ No newline at end of file
diff --git a/edge-impulse-sdk/dsp/ei_hr.hpp b/edge-impulse-sdk/dsp/ei_hr.hpp
new file mode 100644
index 0000000..6570f9b
--- /dev/null
+++ b/edge-impulse-sdk/dsp/ei_hr.hpp
@@ -0,0 +1,96 @@
+/* Edge Impulse inferencing library
+ * Copyright (c) 2022 EdgeImpulse Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef HR_PPG_HPP
+#define HR_PPG_HPP
+
+#include "edge-impulse-sdk/dsp/numpy.hpp"
+#include "edge-impulse-sdk/dsp/ei_dsp_handle.h"
+#include "edge-impulse-enterprise/hr/hr_ppg.hpp"
+
+class hr_class : public DspHandle {
+public:
+    int print() override {
+        ei_printf("Last HR: %f\n", ppg._res.hr);
+        return ei::EIDSP_OK;
+    }
+
+    int extract(ei::signal_t *signal, ei::matrix_t *output_matrix, void *config_ptr, const float frequency) override {
+        using namespace ei;
+
+        // Don't need just yet
+        // ei_dsp_config_hr_t config = *((ei_dsp_config_hr_t*)config_ptr);
+
+
+        // TODO fix for axes / accel
+        size_t samples_per_inc = ppg.win_inc_samples;
+        // TODO go in a loop for the full window size, once I can actually test this vs studio
+        if(signal->total_length != samples_per_inc) {
+            return EIDSP_BUFFER_SIZE_MISMATCH;
+        }
+
+        // TODO ask for smaller increments and bp them into place
+        // Copy into the end of the buffer
+        matrix_t temp(ppg.axes, samples_per_inc);
+        signal->get_data(0, samples_per_inc, temp.buffer);
+
+
+        output_matrix->buffer[0] = ppg.stream(&temp);
+
+        output_matrix->rows = 1;
+        output_matrix->cols = 1;
+        return EIDSP_OK;
+    }
+
+    // TODO: actually read in config: axes too!
+    hr_class(float frequency) : ppg(frequency, 1, 8*50, 2*50, true) {
+    }
+
+    // Boilerplate below here
+    static DspHandle* create(void* config, float frequency);
+
+    void* operator new(size_t size) {
+        // Custom memory allocation logic here
+        return ei_malloc(size);
+    }
+
+    void operator delete(void* ptr) {
+        // Custom memory deallocation logic here
+        ei_free(ptr);
+    }
+    // end boilerplate
+private:
+    ei::hr_ppg ppg;
+};
+
+DspHandle* hr_class::create(void* config_in, float frequency) { // NOLINT def in header is OK at EI
+    // Don't need just yet
+    // auto config = reinterpret_cast<ei_dsp_config_hr_t*>(config_in);
+    // TODO: actually read in config
+    return new hr_class(frequency);
+};
+
+/*
+NOTE, contact EI sales for license and source to use EI heart rate and heart rate variance functions in deployment
+*/
+
+#endif
\ No newline at end of file
diff --git a/edge-impulse-sdk/dsp/image/processing.cpp b/edge-impulse-sdk/dsp/image/processing.cpp
index 76aeac0..5ff30c6 100644
--- a/edge-impulse-sdk/dsp/image/processing.cpp
+++ b/edge-impulse-sdk/dsp/image/processing.cpp
@@ -33,10 +33,10 @@ enum YUV_OPTIONS
 
 /**
  * @brief Convert YUV to RGB
- * 
+ *
  * @param rgb_out Output buffer (can be the same as yuv_in if big enough)
  * @param yuv_in Input buffer
- * @param in_size_B Size of input image in B 
+ * @param in_size_B Size of input image in B
  * @param opts Note, only BIG_ENDIAN_ORDER supported presently
  */
 int yuv422_to_rgb888(
@@ -97,11 +97,11 @@ int yuv422_to_rgb888(
 /**
  * @brief Crops an image. Can be in-place. 4B alignment for best performance
  * (Alignment is tested, will fall back to B by B movement)
- * 
+ *
  * @param srcWidth X dimension in pixels
  * @param srcHeight Y dimension in pixels
- * @param srcImage Input buffer 
- * @param startX X coord of first pixel to keep 
+ * @param srcImage Input buffer
+ * @param startX X coord of first pixel to keep
  * @param startY Y coord of the first pixel to keep
  * @param dstWidth Desired X dimension in pixels (should be smaller than srcWidth)
  * @param dstHeight Desired Y dimension in pixels (should be smaller than srcHeight)
@@ -229,7 +229,7 @@ int crop_image_rgb888_packed(
  * Can be used to resize the image smaller or larger
  * If resizing much smaller than 1/3 size, then a more rubust algorithm should average all of the pixels
  * This algorithm uses bilinear interpolation - averages a 2x2 region to generate each new pixel
- * 
+ *
  * @param srcWidth Input image width in pixels
  * @param srcHeight Input image height in pixels
  * @param srcImage Input buffer
@@ -273,7 +273,7 @@ int resize_image(
     //dstWidth still needed as is
     //dstHeight shouldn't be scaled
 
-    const uint8_t *s; 
+    const uint8_t *s;
     uint8_t *d;
 
     for (y = 0; y < dstHeight; y++) {
@@ -319,7 +319,7 @@ int resize_image(
  * @brief Calculate new dims that match the aspect ratio of destination
  * This prevents a squashed look
  * The smallest axis is held constant
- * 
+ *
  * @param srcWidth Input width in pixels
  * @param srcHeight Input height in pixels
  * @param dstWidth Ultimate width in pixels
@@ -368,11 +368,42 @@ int crop_and_interpolate_rgb888(
         dstImage,
         cropWidth,
         cropHeight);
-    
+
     if( res != EIDSP_OK) { return res; }
     // Finally, interpolate down to desired dimensions, in place
     return resize_image(dstImage, cropWidth, cropHeight, dstImage, dstWidth, dstHeight, 3);
 }
 
+int crop_and_interpolate_image(
+    const uint8_t *srcImage,
+    int srcWidth,
+    int srcHeight,
+    uint8_t *dstImage,
+    int dstWidth,
+    int dstHeight,
+    int pixel_size_B)
+{
+    int cropWidth, cropHeight;
+    // What are dimensions that maintain aspect ratio?
+    calculate_crop_dims(srcWidth, srcHeight, dstWidth, dstHeight, cropWidth, cropHeight);
+
+    // Now crop to that dimension
+    int res =  cropImage(
+        srcImage,
+        srcWidth * pixel_size_B,
+        srcHeight,
+        ((srcWidth - cropWidth) / 2) * pixel_size_B,
+        (srcHeight - cropHeight) / 2,
+        dstImage,
+        cropWidth * pixel_size_B,
+        cropHeight,
+        8);
+
+    if( res != EIDSP_OK) { return res; }
+
+    // Finally, interpolate down to desired dimensions, in place
+    return resize_image(dstImage, cropWidth, cropHeight, dstImage, dstWidth, dstHeight, pixel_size_B);
+}
+
 }}} //namespaces
 #endif //!__EI_IMAGE_PROCESSING__H__
\ No newline at end of file
diff --git a/edge-impulse-sdk/dsp/image/processing.hpp b/edge-impulse-sdk/dsp/image/processing.hpp
index b3b5651..de8a3be 100644
--- a/edge-impulse-sdk/dsp/image/processing.hpp
+++ b/edge-impulse-sdk/dsp/image/processing.hpp
@@ -32,10 +32,10 @@ enum YUV_OPTIONS
 
 /**
  * @brief Convert YUV to RGB
- * 
+ *
  * @param rgb_out Output buffer (can be the same as yuv_in if big enough)
  * @param yuv_in Input buffer
- * @param in_size_B Size of input image in B 
+ * @param in_size_B Size of input image in B
  * @param opts Note, only BIG_ENDIAN_ORDER supported presently
  */
 int yuv422_to_rgb888(
@@ -47,11 +47,11 @@ int yuv422_to_rgb888(
 /**
  * @brief Crops an image. Can be in-place. 4B alignment for best performance
  * (Alignment is tested, will fall back to B by B movement)
- * 
+ *
  * @param srcWidth X dimension in pixels
  * @param srcHeight Y dimension in pixels
- * @param srcImage Input buffer 
- * @param startX X coord of first pixel to keep 
+ * @param srcImage Input buffer
+ * @param startX X coord of first pixel to keep
  * @param startY Y coord of the first pixel to keep
  * @param dstWidth Desired X dimension in pixels (should be smaller than srcWidth)
  * @param dstHeight Desired Y dimension in pixels (should be smaller than srcHeight)
@@ -99,7 +99,7 @@ constexpr int MONO_B_SIZE = 1;
  * Can be used to resize the image smaller or larger
  * If resizing much smaller than 1/3 size, then a more rubust algorithm should average all of the pixels
  * This algorithm uses bilinear interpolation - averages a 2x2 region to generate each new pixel
- * 
+ *
  * @param srcWidth Input image width in pixels
  * @param srcHeight Input image height in pixels
  * @param srcImage Input buffer
@@ -121,7 +121,7 @@ void resize_image(
  * @brief Calculate new dims that match the aspect ratio of destination
  * This prevents a squashed look
  * The smallest axis is held constant
- * 
+ *
  * @param srcWidth Input width in pixels
  * @param srcHeight Input height in pixels
  * @param dstWidth Ultimate width in pixels
@@ -140,15 +140,15 @@ void calculate_crop_dims(
 /**
  * @brief Crops, then interpolates to a desired new image size
  * Can be done in place (set srcImage == dstImage)
- * 
+ *
  * @param srcImage Input image buffer
  * @param srcWidth Input width in pixels
  * @param srcHeight Input height in pixels
- * @param dstImage Output image buffer, can be same as input buffer 
+ * @param dstImage Output image buffer, can be same as input buffer
  * @param dstWidth Desired new width in pixels
  * @param dstHeight Desired new height in pixels
  */
-void crop_and_interpolate_rgb888(
+int crop_and_interpolate_rgb888(
     const uint8_t *srcImage,
     int srcWidth,
     int srcHeight,
@@ -156,5 +156,28 @@ void crop_and_interpolate_rgb888(
     int dstWidth,
     int dstHeight);
 
+/**
+ * @brief Crops, then interpolates to a desired new image size
+ * Can be done in place (set srcImage == dstImage)
+ * A more beneric version of the previously used
+ * crop_and_interpolate_rgb888
+ *
+ * @param srcImage Input image buffer
+ * @param srcWidth Input width in pixels
+ * @param srcHeight Input height in pixels
+ * @param dstImage Output image buffer, can be same as input buffer
+ * @param dstWidth Desired new width in pixels
+ * @param dstHeight Desired new height in pixels
+ * @param pixel_size_B Size of pixels in Bytes.  3 for RGB, 1 for mono
+ */
+int crop_and_interpolate_image(
+    const uint8_t *srcImage,
+    int srcWidth,
+    int srcHeight,
+    uint8_t *dstImage,
+    int dstWidth,
+    int dstHeight,
+    int pixel_size_B);
+
 }}} //namespaces
 #endif //!__EI_IMAGE_PROCESSING__H__
\ No newline at end of file
diff --git a/edge-impulse-sdk/dsp/kissfft/_kiss_fft_guts.h b/edge-impulse-sdk/dsp/kissfft/_kiss_fft_guts.h
index 7bf5762..754896a 100755
--- a/edge-impulse-sdk/dsp/kissfft/_kiss_fft_guts.h
+++ b/edge-impulse-sdk/dsp/kissfft/_kiss_fft_guts.h
@@ -10,6 +10,8 @@
    defines kiss_fft_scalar as either short or a float type
    and defines
    typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
+#pragma once
+
 #include "kiss_fft.h"
 #include <limits.h>
 
diff --git a/edge-impulse-sdk/dsp/kissfft/kiss_fft.cpp b/edge-impulse-sdk/dsp/kissfft/kiss_fft.cpp
index 33b1ae6..9393357 100755
--- a/edge-impulse-sdk/dsp/kissfft/kiss_fft.cpp
+++ b/edge-impulse-sdk/dsp/kissfft/kiss_fft.cpp
@@ -7,7 +7,7 @@
  */
 
 
-#include "_kiss_fft_guts.h"
+#include "edge-impulse-sdk/dsp/kissfft/_kiss_fft_guts.h"
 /* The guts header contains all the multiplication and addition macros that are defined for
  fixed or floating point complex numbers.  It also delares the kf_ internal functions.
  */
diff --git a/edge-impulse-sdk/dsp/kissfft/kiss_fftr.cpp b/edge-impulse-sdk/dsp/kissfft/kiss_fftr.cpp
index 0d0dcf6..b448730 100644
--- a/edge-impulse-sdk/dsp/kissfft/kiss_fftr.cpp
+++ b/edge-impulse-sdk/dsp/kissfft/kiss_fftr.cpp
@@ -6,8 +6,8 @@
  *  See COPYING file for more information.
  */
 
-#include "kiss_fftr.h"
-#include "_kiss_fft_guts.h"
+#include "edge-impulse-sdk/dsp/kissfft/kiss_fftr.h"
+#include "edge-impulse-sdk/dsp/kissfft/_kiss_fft_guts.h"
 
 struct kiss_fftr_state{
     kiss_fft_cfg substate;
diff --git a/edge-impulse-sdk/dsp/numpy.hpp b/edge-impulse-sdk/dsp/numpy.hpp
index c2caab2..1c92fe5 100644
--- a/edge-impulse-sdk/dsp/numpy.hpp
+++ b/edge-impulse-sdk/dsp/numpy.hpp
@@ -60,6 +60,9 @@
 
 namespace ei {
 
+using fvec = ei_vector<float>;
+using ivec = ei_vector<int>;
+
 // clang-format off
 // lookup table for quantized values between 0.0f and 1.0f
 static constexpr float quantized_values_one_zero[] = { (0.0f / 1.0f), (1.0f / 100.0f), (2.0f / 100.0f), (3.0f / 100.0f), (4.0f / 100.0f), (1.0f / 22.0f), (1.0f / 21.0f), (1.0f / 20.0f), (1.0f / 19.0f), (1.0f / 18.0f), (1.0f / 17.0f), (6.0f / 100.0f), (1.0f / 16.0f), (1.0f / 15.0f), (7.0f / 100.0f), (1.0f / 14.0f), (1.0f / 13.0f), (8.0f / 100.0f), (1.0f / 12.0f), (9.0f / 100.0f), (1.0f / 11.0f), (2.0f / 21.0f), (1.0f / 10.0f), (2.0f / 19.0f), (11.0f / 100.0f), (1.0f / 9.0f), (2.0f / 17.0f), (12.0f / 100.0f), (1.0f / 8.0f), (13.0f / 100.0f), (2.0f / 15.0f), (3.0f / 22.0f), (14.0f / 100.0f), (1.0f / 7.0f), (3.0f / 20.0f), (2.0f / 13.0f), (3.0f / 19.0f), (16.0f / 100.0f), (1.0f / 6.0f), (17.0f / 100.0f), (3.0f / 17.0f), (18.0f / 100.0f), (2.0f / 11.0f), (3.0f / 16.0f), (19.0f / 100.0f), (4.0f / 21.0f), (1.0f / 5.0f), (21.0f / 100.0f), (4.0f / 19.0f), (3.0f / 14.0f), (22.0f / 100.0f), (2.0f / 9.0f), (5.0f / 22.0f), (23.0f / 100.0f), (3.0f / 13.0f), (4.0f / 17.0f), (5.0f / 21.0f), (24.0f / 100.0f), (1.0f / 4.0f), (26.0f / 100.0f), (5.0f / 19.0f), (4.0f / 15.0f), (27.0f / 100.0f), (3.0f / 11.0f), (5.0f / 18.0f), (28.0f / 100.0f), (2.0f / 7.0f), (29.0f / 100.0f), (5.0f / 17.0f), (3.0f / 10.0f), (4.0f / 13.0f), (31.0f / 100.0f), (5.0f / 16.0f), (6.0f / 19.0f), (7.0f / 22.0f), (32.0f / 100.0f), (33.0f / 100.0f), (1.0f / 3.0f), (34.0f / 100.0f), (7.0f / 20.0f), (6.0f / 17.0f), (5.0f / 14.0f), (36.0f / 100.0f), (4.0f / 11.0f), (7.0f / 19.0f), (37.0f / 100.0f), (3.0f / 8.0f), (38.0f / 100.0f), (8.0f / 21.0f), (5.0f / 13.0f), (7.0f / 18.0f), (39.0f / 100.0f), (2.0f / 5.0f), (9.0f / 22.0f), (41.0f / 100.0f), (7.0f / 17.0f), (5.0f / 12.0f), (42.0f / 100.0f), (8.0f / 19.0f), (3.0f / 7.0f), (43.0f / 100.0f), (7.0f / 16.0f), (44.0f / 100.0f), (4.0f / 9.0f), (9.0f / 20.0f), (5.0f / 11.0f), (46.0f / 100.0f), (6.0f / 13.0f), (7.0f / 15.0f), (47.0f / 100.0f), (8.0f / 17.0f), (9.0f / 19.0f), (10.0f / 21.0f), (48.0f / 100.0f), (49.0f / 100.0f), (1.0f / 2.0f), (51.0f / 100.0f), (52.0f / 100.0f), (11.0f / 21.0f), (10.0f / 19.0f), (9.0f / 17.0f), (53.0f / 100.0f), (8.0f / 15.0f), (7.0f / 13.0f), (54.0f / 100.0f), (6.0f / 11.0f), (11.0f / 20.0f), (5.0f / 9.0f), (56.0f / 100.0f), (9.0f / 16.0f), (57.0f / 100.0f), (4.0f / 7.0f), (11.0f / 19.0f), (58.0f / 100.0f), (7.0f / 12.0f), (10.0f / 17.0f), (59.0f / 100.0f), (13.0f / 22.0f), (3.0f / 5.0f), (61.0f / 100.0f), (11.0f / 18.0f), (8.0f / 13.0f), (13.0f / 21.0f), (62.0f / 100.0f), (5.0f / 8.0f), (63.0f / 100.0f), (12.0f / 19.0f), (7.0f / 11.0f), (64.0f / 100.0f), (9.0f / 14.0f), (11.0f / 17.0f), (13.0f / 20.0f), (66.0f / 100.0f), (2.0f / 3.0f), (67.0f / 100.0f), (68.0f / 100.0f), (15.0f / 22.0f), (13.0f / 19.0f), (11.0f / 16.0f), (69.0f / 100.0f), (9.0f / 13.0f), (7.0f / 10.0f), (12.0f / 17.0f), (71.0f / 100.0f), (5.0f / 7.0f), (72.0f / 100.0f), (13.0f / 18.0f), (8.0f / 11.0f), (73.0f / 100.0f), (11.0f / 15.0f), (14.0f / 19.0f), (74.0f / 100.0f), (3.0f / 4.0f), (76.0f / 100.0f), (16.0f / 21.0f), (13.0f / 17.0f), (10.0f / 13.0f), (77.0f / 100.0f), (17.0f / 22.0f), (7.0f / 9.0f), (78.0f / 100.0f), (11.0f / 14.0f), (15.0f / 19.0f), (79.0f / 100.0f), (4.0f / 5.0f), (17.0f / 21.0f), (81.0f / 100.0f), (13.0f / 16.0f), (9.0f / 11.0f), (82.0f / 100.0f), (14.0f / 17.0f), (83.0f / 100.0f), (5.0f / 6.0f), (84.0f / 100.0f), (16.0f / 19.0f), (11.0f / 13.0f), (17.0f / 20.0f), (6.0f / 7.0f), (86.0f / 100.0f), (19.0f / 22.0f), (13.0f / 15.0f), (87.0f / 100.0f), (7.0f / 8.0f), (88.0f / 100.0f), (15.0f / 17.0f), (8.0f / 9.0f), (89.0f / 100.0f), (17.0f / 19.0f), (9.0f / 10.0f), (19.0f / 21.0f), (10.0f / 11.0f), (91.0f / 100.0f), (11.0f / 12.0f), (92.0f / 100.0f), (12.0f / 13.0f), (13.0f / 14.0f), (93.0f / 100.0f), (14.0f / 15.0f), (15.0f / 16.0f), (94.0f / 100.0f), (16.0f / 17.0f), (17.0f / 18.0f), (18.0f / 19.0f), (19.0f / 20.0f), (20.0f / 21.0f), (21.0f / 22.0f), (96.0f / 100.0f), (97.0f / 100.0f), (98.0f / 100.0f), (99.0f / 100.0f), (1.0f / 1.0f) ,
@@ -270,7 +273,7 @@ class numpy {
      * @param out_matrix Pointer to out matrix (MxK)
      * @returns EIDSP_OK if OK
      */
-    static inline int dot_by_row(int i, float *row, uint32_t matrix1_cols, matrix_t *matrix2, matrix_t *out_matrix) {
+    static  int dot_by_row(int i, float *row, uint32_t matrix1_cols, matrix_t *matrix2, matrix_t *out_matrix) {
         if (matrix1_cols != matrix2->rows) {
             EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
         }
@@ -310,7 +313,7 @@ class numpy {
      * @param out_matrix Pointer to out matrix (MxK)
      * @returns EIDSP_OK if OK
      */
-    static inline int dot_by_row(int i, float *row, size_t matrix1_cols,
+    static  int dot_by_row(int i, float *row, size_t matrix1_cols,
         quantized_matrix_t *matrix2, matrix_t *out_matrix)
     {
         if (matrix1_cols != matrix2->rows) {
@@ -332,37 +335,40 @@ class numpy {
     }
 
     static void transpose_in_place(matrix_t *matrix) {
-        size_t size = matrix->cols * matrix->rows - 1;
-        float temp; // temp for swap
-        size_t next; // next item to swap
-        size_t cycleBegin; // index of start of cycle
-        size_t i; // location in matrix
-        size_t all_done_mark = 1;
-        ei_vector<bool> done(size+1,false);
-
-        i = 1; // Note that matrix[0] and last element of matrix won't move
-        while (1)
-        {
-            cycleBegin = i;
-            temp = matrix->buffer[i];
-            do
+        // Don't bother if either dim is one, just need to swap the dimension sizes
+        if( matrix->rows != 1 && matrix->cols != 1) {
+            size_t size = matrix->cols * matrix->rows - 1;
+            float temp; // temp for swap
+            size_t next; // next item to swap
+            size_t cycleBegin; // index of start of cycle
+            size_t i; // location in matrix
+            size_t all_done_mark = 1;
+            ei_vector<bool> done(size+1,false);
+
+            i = 1; // Note that matrix[0] and last element of matrix won't move
+            while (1)
             {
-                size_t col = i % matrix->cols;
-                size_t row = i / matrix->cols;
-                // swap row and col to make new idx, b/c we want to know where in the transposed matrix
-                next = col*matrix->rows + row;
-                float temp2 = matrix->buffer[next];
-                matrix->buffer[next] = temp;
-                temp = temp2;
-                done[next] = true;
-                i = next;
-            }
-            while (i != cycleBegin);
+                cycleBegin = i;
+                temp = matrix->buffer[i];
+                do
+                {
+                    size_t col = i % matrix->cols;
+                    size_t row = i / matrix->cols;
+                    // swap row and col to make new idx, b/c we want to know where in the transposed matrix
+                    next = col*matrix->rows + row;
+                    float temp2 = matrix->buffer[next];
+                    matrix->buffer[next] = temp;
+                    temp = temp2;
+                    done[next] = true;
+                    i = next;
+                }
+                while (i != cycleBegin);
 
-            // start next cycle by find next not done
-            for (i = all_done_mark; done[i]; i++) {
-                all_done_mark++; // move the high water mark so we don't look again
-                if(i>=size) { goto LOOP_END; }
+                // start next cycle by find next not done
+                for (i = all_done_mark; done[i]; i++) {
+                    all_done_mark++; // move the high water mark so we don't look again
+                    if(i>=size) { goto LOOP_END; }
+                }
             }
         }
         LOOP_END:
@@ -1436,94 +1442,30 @@ class numpy {
     }
 
     /**
-     * Convert an int32_t buffer into a float buffer, maps to -1..1
-     * @param input
-     * @param output
-     * @param length
-     * @returns 0 if OK
-     */
-    static int int32_to_float(const EIDSP_i32 *input, float *output, size_t length) {
-#if EIDSP_USE_CMSIS_DSP
-        arm_q31_to_float((q31_t *)input, output, length);
-#else
-        for (size_t ix = 0; ix < length; ix++) {
-            output[ix] = (float)(input[ix]) / 2147483648.f;
-        }
-#endif
-        return EIDSP_OK;
-    }
-
-    /**
-     * Convert an float buffer into a fixedpoint 32 bit buffer, input values are
-     * limited between -1 and 1
-     * @param input
-     * @param output
-     * @param length
-     * @returns 0 if OK
-     */
-    static int float_to_int32(const float *input, EIDSP_i32 *output, size_t length) {
-#if EIDSP_USE_CMSIS_DSP
-        arm_float_to_q31((float *)input, (q31_t *)output, length);
-#else
-        for (size_t ix = 0; ix < length; ix++) {
-            output[ix] = (EIDSP_i32)saturate((int64_t)(input[ix] * 2147483648.f), 32);
-        }
-#endif
-        return EIDSP_OK;
-    }
-
-    /**
-     * Convert an int16_t buffer into a float buffer, maps to -1..1
+     * Convert an int16_t buffer into a float buffer
      * @param input
      * @param output
      * @param length
      * @returns 0 if OK
      */
     static int int16_to_float(const EIDSP_i16 *input, float *output, size_t length) {
-#if EIDSP_USE_CMSIS_DSP
-        arm_q15_to_float((q15_t *)input, output, length);
-#else
         for (size_t ix = 0; ix < length; ix++) {
-            output[ix] = (float)(input[ix]) / 32768.f;
+            output[ix] = static_cast<float>((input[ix]));
         }
-#endif
         return EIDSP_OK;
     }
 
     /**
-     * Convert an float buffer into a fixedpoint 16 bit buffer, input values are
-     * limited between -1 and 1
-     * @param input
-     * @param output
-     * @param length
-     * @returns 0 if OK
-     */
-    static int float_to_int16(const float *input, EIDSP_i16 *output, size_t length) {
-#if EIDSP_USE_CMSIS_DSP
-        arm_float_to_q15((float *)input, output, length);
-#else
-        for (size_t ix = 0; ix < length; ix++) {
-            output[ix] = (EIDSP_i16)saturate((int32_t)(input[ix] * 32768.f), 16);
-        }
-#endif
-        return EIDSP_OK;
-    }
-
-    /**
-     * Convert an int8_t buffer into a float buffer, maps to -1..1
+     * Convert an int8_t buffer into a float buffer
      * @param input
      * @param output
      * @param length
      * @returns 0 if OK
      */
     static int int8_to_float(const EIDSP_i8 *input, float *output, size_t length) {
-#if EIDSP_USE_CMSIS_DSP
-        arm_q7_to_float((q7_t *)input, output, length);
-#else
         for (size_t ix = 0; ix < length; ix++) {
-            output[ix] = (float)(input[ix]) / 128;
+            output[ix] = static_cast<float>((input[ix]));
         }
-#endif
         return EIDSP_OK;
     }
 
@@ -2082,89 +2024,6 @@ class numpy {
       return count;
     }
 
-    static void sqrt_q15(int16_t in, int16_t *pOut)
-    {
-        int32_t bits_val1;
-        int16_t number, temp1, var1, signBits1, half;
-        float temp_float1;
-        union {
-            int32_t fracval;
-            float floatval;
-        } tempconv;
-
-        number = in;
-
-        /* If the input is a positive number then compute the signBits. */
-        if (number > 0) {
-            signBits1 = count_leading_zeros(number) - 17;
-
-            /* Shift by the number of signBits1 */
-            if ((signBits1 % 2) == 0) {
-                number = number << signBits1;
-            } else {
-                number = number << (signBits1 - 1);
-            }
-
-            /* Calculate half value of the number */
-            half = number >> 1;
-            /* Store the number for later use */
-            temp1 = number;
-
-            /* Convert to float */
-            temp_float1 = number * 3.051757812500000e-005f;
-            /* Store as integer */
-            tempconv.floatval = temp_float1;
-            bits_val1 = tempconv.fracval;
-            /* Subtract the shifted value from the magic number to give intial guess */
-            bits_val1 = 0x5f3759df - (bits_val1 >> 1); /* gives initial guess */
-            /* Store as float */
-            tempconv.fracval = bits_val1;
-            temp_float1 = tempconv.floatval;
-            /* Convert to integer format */
-            var1 = (int32_t)(temp_float1 * 16384);
-
-            /* 1st iteration */
-            var1 =
-                ((int16_t)(
-                    (int32_t)var1 *
-                        (0x3000 -
-                         ((int16_t)((((int16_t)(((int32_t)var1 * var1) >> 15)) * (int32_t)half) >> 15))) >>
-                    15))
-                << 2;
-            /* 2nd iteration */
-            var1 =
-                ((int16_t)(
-                    (int32_t)var1 *
-                        (0x3000 -
-                         ((int16_t)((((int16_t)(((int32_t)var1 * var1) >> 15)) * (int32_t)half) >> 15))) >>
-                    15))
-                << 2;
-            /* 3rd iteration */
-            var1 =
-                ((int16_t)(
-                    (int32_t)var1 *
-                        (0x3000 -
-                         ((int16_t)((((int16_t)(((int32_t)var1 * var1) >> 15)) * (int32_t)half) >> 15))) >>
-                    15))
-                << 2;
-
-            /* Multiply the inverse square root with the original value */
-            var1 = ((int16_t)(((int32_t)temp1 * var1) >> 15)) << 1;
-
-            /* Shift the output down accordingly */
-            if ((signBits1 % 2) == 0) {
-                var1 = var1 >> (signBits1 / 2);
-            } else {
-                var1 = var1 >> ((signBits1 - 1) / 2);
-            }
-            *pOut = var1;
-        }
-        /* If the number is a negative number then store zero as its square root value */
-        else {
-            *pOut = 0;
-        }
-    }
-
 #if EIDSP_USE_CMSIS_DSP
     /**
      * Initialize a CMSIS-DSP fast rfft structure
@@ -2174,7 +2033,7 @@ class numpy {
     static int cmsis_rfft_init_f32(arm_rfft_fast_instance_f32 *rfft_instance, const size_t n_fft)
     {
 // ARM cores (ex M55) with Helium extensions (MVEF) need special treatment (Issue 2843)
-#if EI_CLASSIFIER_HAS_FFT_INFO == 1 && !defined(ARM_MATH_MVEF)
+#if EI_CLASSIFIER_HAS_FFT_INFO == 1 && !defined(ARM_MATH_MVEF) && !defined(EI_CLASSIFIER_LOAD_ALL_FFTS)
         arm_status status;
         switch (n_fft) {
 #if EI_CLASSIFIER_LOAD_FFT_32 == 1
@@ -2443,8 +2302,349 @@ class numpy {
     {
         zero_handling(input->buffer, input->rows * input->cols);
     }
+
+    /**
+     * This function handle the underflow float values.
+     * @param input Array
+     * @param input_size Size of array
+     * @param epsilon Smallest valid non-zero value
+     * @returns void
+     */
+    static void underflow_handling(float* input, size_t input_size, float epsilon = 1e-07f)
+    {
+        for (size_t ix = 0; ix < input_size; ix++) {
+            if (fabs(input[ix]) < epsilon) {
+                input[ix] = 0.0f;
+            }
+        }
+    }
+
+    __attribute__((unused)) static void scale(fvec& v, float scale) {
+        for (auto& x : v) {
+            x *= scale;
+        }
+    }
+
+    __attribute__((unused)) static void sub(fvec& v, float b) {
+        for (auto& x : v) {
+            x -= b;
+        }
+    }
+
+    __attribute__((unused)) static void mul(float* y, const float* x, float* b, size_t n) {
+        for (size_t i = 0; i < n; i++) {
+            y[i] = x[i] * b[i];
+        }
+    }
+
+    __attribute__((unused)) static fvec diff(const float* v, size_t n) {
+        fvec d(n - 1);
+        for (size_t i = 0; i < d.size(); i++) {
+            d[i] = v[i + 1] - v[i];
+        }
+        return d;
+    }
+
+    __attribute__((unused)) static float sum(const float* v, size_t n) {
+        float sum = 0;
+        for (size_t i = 0; i < n; i++) {
+            sum += v[i];
+        }
+        return sum;
+    }
+
+    static float mean(const fvec& v) {
+        float mean = 0;
+        for (auto x : v) {
+            mean += x;
+        }
+        mean /= v.size();
+        return mean;
+    }
+
+    static float mean(const float* v, size_t n) {
+        float mean = 0;
+        for (size_t i = 0; i < n; i++) {
+            mean += v[i];
+        }
+        mean /= n;
+        return mean;
+    }
+
+    static float median(const float* v, size_t n) {
+        fvec vc(n);
+        std::copy(v, v + n, vc.begin());
+        std::sort(vc.begin(), vc.end());
+        if (vc.size() % 2 == 0) {
+            return (vc[vc.size() / 2 - 1] + vc[vc.size() / 2]) / 2;
+        }
+        return vc[vc.size() / 2];
+    }
+
+    __attribute__((unused)) static float median(const fvec& v) {
+        return median(v.data(), v.size());
+    }
+
+    static float stddev(const float* v, size_t n, float m /* mean */, int ddof = 0) {
+        float var = 0;
+        for (size_t i = 0; i < n; i++) {
+            var += (v[i] - m) * (v[i] - m);
+        }
+        var /= n - ddof;
+        return sqrt(var);
+    }
+
+    __attribute__((unused)) static float stddev(const float* v, size_t n) {
+        return stddev(v, n, mean(v, n), 0);
+    }
+
+    __attribute__((unused)) static float stddev(const float* v, size_t n, int ddof) {
+        return stddev(v, n, mean(v, n), ddof);
+    }
+
+    __attribute__((unused)) static float stddev(const fvec& v, int ddof = 0) {
+        return stddev(v.data(), v.size(), mean(v), ddof);
+    }
+
+    static float rms(const float* v, size_t n) {
+        float rms = 0;
+        for (size_t i = 0; i < n; i++) {
+            rms += v[i] * v[i];
+        }
+        rms /= n;
+        return sqrt(rms);
+    }
+
+    __attribute__((unused)) static float rms(const fvec& v) {
+        return rms(v.data(), v.size());
+    }
+
+    template <typename T>
+    static float max(const ei_vector<T>& v) {
+        return *std::max_element(v.begin(), v.end());
+    }
+
+    __attribute__((unused)) static float max(const float* v, size_t n) {
+        return *std::max_element(v, v + n);
+    }
+
+    template <typename T>
+    static float min(const ei_vector<T>& v) {
+        return *std::min_element(v.begin(), v.end());
+    }
+
+    __attribute__((unused)) static float min(const float* v, size_t n) {
+        return *std::min_element(v, v + n);
+    }
+
+    __attribute__((unused)) static int argmax(const fvec& v, int start, int end) {
+        return std::max_element(v.begin() + start, v.begin() + end) - v.begin();
+    }
+
+    __attribute__((unused)) static fvec divide(float num, const float* den, size_t n) {
+        fvec v(n);
+        for (size_t i = 0; i < n; i++) {
+            v[i] = num / den[i];
+        }
+        return v;
+    }
+
+    __attribute__((unused)) static ivec histogram(const float* x, size_t n, int a, int b, int inc) {
+        int num_bins = (b - a) / inc;
+        ivec bins(num_bins, 0);
+        for (size_t i = 0; i < n; i++) {
+            int bin = (int)((x[i] - a) / inc);
+            if (bin >= 0 && bin < num_bins) {
+                bins[bin]++;
+            }
+        }
+        return bins;
+    }
+
+    __attribute__((unused)) static fvec cumsum(const float* v, size_t n) {
+        fvec c(n);
+        c[0] = v[0];
+        for (size_t i = 1; i < n; i++) {
+            c[i] = c[i - 1] + v[i];
+        }
+        return c;
+    }
+
+    __attribute__((unused)) static fvec arange(float start, float end, float step) {
+        assert(start < end);
+        assert(step > 0);
+        fvec v(::round((end - start) / step));
+        for (size_t i = 0; i < v.size(); i++) {
+            v[i] = start + i * step;
+        }
+        return v;
+    }
+
+    __attribute__((unused)) static void add(fvec& v, fvec& b) {
+        for (size_t i = 0; i < v.size(); i++) {
+            v[i] += b[i];
+        }
+    }
+
+    __attribute__((unused)) static float trapz(const fvec& x, const fvec& y, size_t lo, size_t hi) {
+        float area = 0;
+        for (size_t i = lo; i < hi; i++) {
+            area += (x[i + 1] - x[i]) * (y[i + 1] + y[i]) / 2;
+        }
+        return area;
+    }
+
+    __attribute__((unused)) static fvec quantile(const fvec& v, size_t start, size_t end, const fvec& q) {
+        end = std::min(end, v.size());
+        fvec vc(end - start);
+        std::copy(v.begin() + start, v.begin() + end, vc.begin());
+        std::sort(vc.begin(), vc.end());
+        fvec res(q.size());
+        for (size_t i = 0; i < q.size(); i++) {
+            res[i] = vc[q[i] * vc.size()];
+        }
+        return res;
+    }
+
+    __attribute__((unused)) static fvec quantile(const float* v, size_t n, const fvec& q) {
+        fvec vc(n);
+        std::copy(v, v + n, vc.begin());
+        std::sort(vc.begin(), vc.end());
+        fvec res(q.size());
+        for (size_t i = 0; i < q.size(); i++) {
+            res[i] = vc[q[i] * vc.size()];
+        }
+        return res;
+    }
+
+    static float dot(const float* x, const float* y, size_t n) {
+        float res = 0;
+        for (size_t i = 0; i < n; i++) {
+            res += x[i] * y[i];
+        }
+        return res;
+    }
+
+
+    __attribute__((unused)) static float cosine_similarity(const fvec& x, const fvec& y) {
+        float xy = dot(x.data(), y.data(), x.size());
+        float magx = dot(x.data(), x.data(), x.size());
+        float magy = dot(y.data(), y.data(), y.size());
+        xy /= sqrt(magx * magy);
+        return xy;
+    }
+
+    __attribute__((unused)) static void ln(fvec& v) {
+        for (auto& x : v) {
+            x = log(x);
+        }
+    }
+
+    static size_t next_power_of_2(size_t x) {
+        size_t res = 1;
+        while (res < x) {
+            res *= 2;
+        }
+        return res;
+    }
+
+    static void detrend(float* data, size_t n) {
+        // Calculate the mean of the data points
+        float mean = 0.0;
+        for (size_t i = 0; i < n; i++) {
+            mean += data[i];
+        }
+        mean /= n;
+
+        // Calculate the slope of the best-fit line
+        float x_mean = (n + 1) / 2.0;
+        float y_mean = mean;
+        float numerator = 0.0;
+        float denominator = 0.0;
+        for (size_t i = 0; i < n; i++) {
+            numerator += (i + 1 - x_mean) * (data[i] - y_mean);
+            denominator += (i + 1 - x_mean) * (i + 1 - x_mean);
+        }
+        float slope = numerator / denominator;
+
+        // Subtract the best-fit line from the data points to get the detrended data
+        for (size_t i = 0; i < n; i++) {
+            data[i] = data[i] - (slope * (i + 1));
+        }
+
+        // Calculate the mean of the detrended data
+        float detrended_mean = 0.0;
+        for (size_t i = 0; i < n; i++) {
+            detrended_mean += data[i];
+        }
+        detrended_mean /= n;
+
+        // Subtract the mean of the detrended data from each element
+        for (size_t i = 0; i < n; i++) {
+            data[i] -= detrended_mean;
+        }
+    }
+
+    static fvec detrend(const fvec& data) {
+        auto ret = data;
+        detrend(ret.data(), ret.size());
+        return ret;
+    }
+
 };
 
+struct fmat {
+    ei_matrix* mat = nullptr;
+    fmat(size_t rows, size_t cols) {
+        mat = new ei_matrix(rows, cols);
+        assert(mat);
+    }
+
+    ~fmat() {
+        delete mat;
+    }
+
+    void resize(size_t rows, size_t cols) {
+        delete mat;
+        mat = new ei_matrix(rows, cols);
+    }
+
+    float* operator[](size_t i) {
+        if (mat == nullptr || i >= mat->rows) {
+            return nullptr;
+        }
+        return mat->get_row_ptr(i);
+    }
+
+    void fill(float x) {
+        if (mat == nullptr) {
+            return;
+        }
+        for (size_t i = 0; i < mat->rows; i++) {
+            for (size_t j = 0; j < mat->cols; j++) {
+                (*this)[i][j] = x;
+            }
+        }
+    }
+
+    void fill_col(size_t col, float x) {
+        if (mat == nullptr) {
+            return;
+        }
+        for (size_t i = 0; i < mat->rows; i++) {
+            (*this)[i][col] = x;
+        }
+    }
+
+    void fill_row(size_t row, float x) {
+        if (mat == nullptr) {
+            return;
+        }
+        for (size_t i = 0; i < mat->cols; i++) {
+            (*this)[row][i] = x;
+        }
+    }
+};
 } // namespace ei
 
 #endif // _EIDSP_NUMPY_H_
diff --git a/edge-impulse-sdk/dsp/numpy_types.h b/edge-impulse-sdk/dsp/numpy_types.h
index 5304541..98b9c78 100644
--- a/edge-impulse-sdk/dsp/numpy_types.h
+++ b/edge-impulse-sdk/dsp/numpy_types.h
@@ -24,13 +24,13 @@
 #include <stddef.h>
 #ifdef __cplusplus
 #include <functional>
+#include "edge-impulse-sdk/dsp/ei_vector.h"
 #ifdef __MBED__
 #include "mbed.h"
 #endif // __MBED__
 #endif // __cplusplus
 #include "config.hpp"
-
-#include "../porting/ei_classifier_porting.h"
+#include "edge-impulse-sdk/dsp/returntypes.h"
 
 #if EIDSP_TRACK_ALLOCATIONS
 #include "memory.hpp"
@@ -142,6 +142,8 @@ typedef struct ei_matrix {
         return buffer + row * cols;
     }
 
+    ei_matrix(ei_vector<float> &in) : ei_matrix(1, in.size(), in.data()) {
+    }
 #endif // #ifdef __cplusplus
 } matrix_t;
 
@@ -555,16 +557,47 @@ typedef enum {
 } DCT_NORMALIZATION_MODE;
 
 /**
- * Sensor signal structure
+ * @addtogroup ei_structs
+ * @{
+ */
+
+/**
+ * @brief Holds the callback pointer for retrieving raw data and the length 
+ *  of data to be retrieved.
+ * 
+ *  Holds the callback function, `get_data(size_t offset, size_t length, float 
+ *  *out_ptr)`. This callback should be implemented by the user and fills the memory
+ *  location given by `*out_ptr` with raw features. Features must be flattened to a
+ *  1-dimensional vector, as described in 
+ *  [this guide](https://docs.edgeimpulse.com/docs/deploy-your-model-as-a-c-library#signal-structure).
+ * 
+ *  `get_data()` may be called multiple times during preprocessing or inference (e.g.
+ *  during execution of 
+ *  [run_classifier()](https://docs.edgeimpulse.com/reference/run_classifier) or
+ *  [run_classifier_continuous()](https://docs.edgeimpulse.com/reference/run_classifier_continuous)). 
+ *  The `offset` argument will update to point to new data, and `length` data must 
+ *  be copied into the location specified by `out_ptr`. This scheme allows raw features
+ *  to be stored in RAM or flash memory and paged in as necessary.
+ * 
+ *  Note that `get_data()` (even after multiple calls during a single execution of
+ *  `run_classifier()` or `run_classifier_continuous()`) will never request more than a
+ *  total number of features as given by `total_length`.
+ * 
+ * **Source**: [dsp/numpy_types.h](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/dsp/numpy_types.h)
+ * 
+ * **Example**: [standalone inferencing main.cpp](https://github.com/edgeimpulse/example-standalone-inferencing/blob/master/source/main.cpp)
  */
 typedef struct ei_signal_t {
     /**
-     * A function to retrieve part of the sensor signal
-     * No bytes will be requested outside of the `total_length`.
-     * @param offset The offset in the signal
-     * @param length The total length of the signal
-     * @param out_ptr An out buffer to set the signal data
-     */
+     * Callback function to be implemented by the user. Parameters are given as 
+     * `get_data(size_t offset, size_t length, float *out_ptr)` and should return an 
+     * int (e.g. `EIDSP_OK` if copying completed successfully). No bytes will be
+     * requested outside of the `total_length`.
+     * Callback parameters:  
+     * `offset`: The offset in the signal  
+     * `length`: The number of samples to write into `out_ptr`  
+     * `out_ptr`: An out buffer to set the signal data  
+    */
 #if EIDSP_SIGNAL_C_FN_POINTER == 1
     int (*get_data)(size_t, size_t, float *);
 #else
@@ -575,9 +608,16 @@ typedef struct ei_signal_t {
 #endif // __MBED__
 #endif // EIDSP_SIGNAL_C_FN_POINTER == 1
 
+    /**
+     * Total number of samples the user will provide (via get_data).  This value should match either the total number of raw features required for a full window (ie, the window size in Studio, but in samples), OR, if using run_classifier_continuous(), the number of samples in a single slice)
+     *  for a new slice (`run_classifier_continuous()`) in order to perform 
+     *  preprocessing and inference.
+    */
     size_t total_length;
 } signal_t;
 
+/** @} */
+
 #ifdef __cplusplus
 } // namespace ei {
 #endif // __cplusplus
diff --git a/edge-impulse-sdk/dsp/returntypes.h b/edge-impulse-sdk/dsp/returntypes.h
new file mode 100644
index 0000000..a7e7191
--- /dev/null
+++ b/edge-impulse-sdk/dsp/returntypes.h
@@ -0,0 +1,48 @@
+#ifndef _EIDSP_RETURN_TYPES_H_
+#define _EIDSP_RETURN_TYPES_H_
+
+#include <stdint.h>
+
+/**
+ * @defgroup ei_returntypes Return codes
+ * 
+ * Return codes for Edge Impulse functions.
+ * 
+ * **Source**: [dsp/returntypes.h](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/dsp/returntypes.h)
+ * 
+ * @addtogroup ei_returntypes
+ * @{
+ */
+
+// outside of namespace for backwards compat
+typedef enum {
+    EI_IMPULSE_OK = 0, /**< Success */
+    EI_IMPULSE_ERROR_SHAPES_DONT_MATCH = -1, /**< The shape of data does not match the shape of input layer. */
+    EI_IMPULSE_CANCELED = -2, /**< Impulse execution is cancelled by user. */
+    EI_IMPULSE_TFLITE_ERROR = -3, /**< Error in TesnorFlow Lite inference engine */
+    EI_IMPULSE_DSP_ERROR = -5, /**< Error in processing portion of impulse */
+    EI_IMPULSE_TFLITE_ARENA_ALLOC_FAILED = -6, /**< Failed to allocate memory in TensorFlow Lite arena, often caused by a lack of available heap memory. */
+    EI_IMPULSE_CUBEAI_ERROR = -7, /**< Error in CubeAI inference engine (STM32) */
+    EI_IMPULSE_ALLOC_FAILED = -8, /**< Memory allocation failed. Could be caused by a fragmented heap. Try to increase heap size. */
+    EI_IMPULSE_ONLY_SUPPORTED_FOR_IMAGES = -9, /**< This function is only supported for impulses with an image input. */
+    EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE = -10, /**< The chosen inference engine (e.g. in Studio) is incapable of running this impulse. */
+    EI_IMPULSE_OUT_OF_MEMORY = -11, /**< Out of memory. Could be caused by a fragmented heap. Try to increase heap size. */
+    EI_IMPULSE_INPUT_TENSOR_WAS_NULL = -13, /**< Input tensor was null */
+    EI_IMPULSE_OUTPUT_TENSOR_WAS_NULL = -14, /**< Output tensor was null */
+    EI_IMPULSE_SCORE_TENSOR_WAS_NULL = -15, /**< Score tensor is null (for SSD Object Detection models). */
+    EI_IMPULSE_LABEL_TENSOR_WAS_NULL = -16, /**< Label tensor is null (for SSD Object Detection models). */
+    EI_IMPULSE_TENSORRT_INIT_FAILED = -17, /**< TensorRT (NVIDIA) initialization failed. */
+    EI_IMPULSE_DRPAI_INIT_FAILED = -18, /**< DRP-AI (Renesas) initialization failed. */
+    EI_IMPULSE_DRPAI_RUNTIME_FAILED = -19, /**< DRP-AI (Renesas) runtime failed. */
+    EI_IMPULSE_DEPRECATED_MODEL = -20, /**< The model is deprecated and cannot be used. You should re-export the impulse from Studio. */
+    EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE = -21, /**< The last layer is not available in the model. */
+    EI_IMPULSE_INFERENCE_ERROR = -22, /**< Error during inference. */
+    EI_IMPULSE_AKIDA_ERROR = -23, /**< Error in Akida inference engine (BrainChip) */
+    EI_IMPULSE_INVALID_SIZE = -24, /**<The shape of data does not match the shape of input layer. */
+    EI_IMPULSE_ONNX_ERROR = -25, /**< Error in ONNX inference engine */
+    EI_IMPULSE_MEMRYX_ERROR = -26, /**< Error in Memryx inference engine */
+} EI_IMPULSE_ERROR;
+
+#endif // _EIDSP_RETURN_TYPES_H_
+
+/** @} */
\ No newline at end of file
diff --git a/edge-impulse-sdk/dsp/returntypes.hpp b/edge-impulse-sdk/dsp/returntypes.hpp
index 01cdbf6..f8eba0a 100644
--- a/edge-impulse-sdk/dsp/returntypes.hpp
+++ b/edge-impulse-sdk/dsp/returntypes.hpp
@@ -15,10 +15,11 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-#ifndef _EIDSP_RETURN_TYPES_H_
-#define _EIDSP_RETURN_TYPES_H_
+#ifndef _EIDSP_RETURN_TYPES_HPP_
+#define _EIDSP_RETURN_TYPES_HPP_
 
 #include <stdint.h>
+#include "returntypes.h"
 
 namespace ei {
 
diff --git a/edge-impulse-sdk/dsp/spectral/feature.hpp b/edge-impulse-sdk/dsp/spectral/feature.hpp
index 553ba2b..aada87e 100644
--- a/edge-impulse-sdk/dsp/spectral/feature.hpp
+++ b/edge-impulse-sdk/dsp/spectral/feature.hpp
@@ -304,6 +304,9 @@ class feature {
     {
         // we want to find n such that fcutoff < sample_f / fft * n ( or > for high pass )
         // also, + - half bin width (sample_f/(fft*2)) for high / low pass
+        if (filter_cutoff > sampling_freq / 2) {
+            filter_cutoff = sampling_freq / 2;
+        }
         float bin = filter_cutoff * fft_length / sampling_freq;
         if (is_high_pass) {
             *start_bin = static_cast<size_t>(bin - 0.5) + 1; // add one b/c we want to always round up
@@ -575,9 +578,11 @@ class feature {
     static int extract_spectral_analysis_features_v4(
         matrix_t *input_matrix,
         matrix_t *output_matrix,
-        ei_dsp_config_spectral_analysis_t *config,
+        ei_dsp_config_spectral_analysis_t *config_p,
         const float sampling_freq)
     {
+        auto config_copy = *config_p;
+        auto config = &config_copy;
         if (strcmp(config->analysis_type, "Wavelet") == 0) {
             return wavelet::extract_wavelet_features(input_matrix, output_matrix, config, sampling_freq);
         }
@@ -610,6 +615,29 @@ class feature {
 
             float new_sampling_freq = sampling_freq / config->input_decimation_ratio;
 
+            // filter here, before decimating, instead of inside extract_spec_features
+            if (strcmp(config->filter_type, "low") == 0) {
+                if( config->filter_order ) {
+                    EI_TRY(spectral::processing::butterworth_lowpass_filter(
+                        input_matrix,
+                        new_sampling_freq,
+                        config->filter_cutoff,
+                        config->filter_order));
+                }
+            }
+            else if (strcmp(config->filter_type, "high") == 0) {
+                if( config->filter_order ) {
+                    EI_TRY(spectral::processing::butterworth_highpass_filter(
+                        input_matrix,
+                        new_sampling_freq,
+                        config->filter_cutoff,
+                        config->filter_order));
+                }
+            }
+            
+            // set the filter order to 0, so that we won't double filter
+            config->filter_order = 0;
+
             // do this before extract_spec_features because extract_spec_features modifies the matrix
             constexpr size_t decimation = 10;
             const size_t decimated_size =
@@ -626,6 +654,7 @@ class feature {
                 false);
 
             if (n_features > 0 && config->extra_low_freq) {
+                // disable filtering post decimation
                 matrix_t lf_features(1, output_matrix->rows * output_matrix->cols - n_features,
                     output_matrix->buffer + n_features);
 
diff --git a/edge-impulse-sdk/dsp/spectral/signal.hpp b/edge-impulse-sdk/dsp/spectral/signal.hpp
index d7137f8..37fb0e9 100644
--- a/edge-impulse-sdk/dsp/spectral/signal.hpp
+++ b/edge-impulse-sdk/dsp/spectral/signal.hpp
@@ -24,6 +24,7 @@
 
 #include "edge-impulse-sdk/dsp/ei_vector.h"
 #include <assert.h>
+#include <string.h>
 
 namespace ei {
 
@@ -85,53 +86,52 @@ class signal {
 
     struct sosfilt {
         const float *coeff; // 6 * num_sections coefficients
-        float *zi; // 2 * num_sections initial conditions
+        float* zi;
+        fvec zi_vec; // 2 * num_sections initial conditions
         size_t num_sections;
 
         sosfilt(const float *coeff_, const float *zi_, size_t num_sections_)
-            : coeff(coeff_)
-            , num_sections(num_sections_)
+            : coeff(coeff_),
+              zi_vec(zi_, zi_ + (num_sections_ * 2)),
+              num_sections(num_sections_)
         {
-            zi = (float *)ei_calloc(2 * num_sections, sizeof(float));
-            memcpy(zi, zi_, 2 * num_sections * sizeof(float));
         }
 
-        ~sosfilt()
+        void update(const float *coeff_, const float *zi_)
         {
-            ei_free(zi);
+            coeff = coeff_;
+            zi_vec.assign(zi_, zi_ + (num_sections * 2));
         }
 
         /**
          * @brief IIR filters in second-order sections.
          * This is the counterpart of scipy.signal.sosfilt .
          * @param input Input signal
-         * @param output Output signal
-         * @param sos Second-order section coefficients
-         * @param zi Initial conditions
+         * @param output Output signal. Can be the same as input for in place
+         * @param x_size Minimum size of input and output signal
          */
-        void run(const float *x, const size_t x_size, fvec &y)
+        void run(const float *input, const size_t size, float* output)
         {
-            assert(y.size() >= x_size);
             assert(num_sections > 0);
 
-            iir2(x, y.data(), x_size, coeff, coeff + 3, zi);
+            iir2(input, output, size, coeff, coeff + 3, zi_vec.data());
 
             for (size_t sect = 1; sect < num_sections; sect++) {
                 iir2(
-                    y.data(),
-                    y.data(),
-                    y.size(),
+                    output,
+                    output,
+                    size,
                     coeff + sect * 6,
                     coeff + sect * 6 + 3,
-                    zi + sect * 2);
+                    zi_vec.data() + sect * 2);
             }
         }
 
         void init(float x0)
         {
             for (size_t sect = 0; sect < num_sections; sect++) {
-                zi[sect * 2] *= x0;
-                zi[sect * 2 + 1] *= x0;
+                zi_vec.data()[sect * 2] *= x0;
+                zi_vec.data()[sect * 2 + 1] *= x0;
             }
         }
     };
@@ -155,7 +155,7 @@ class signal {
         sos.init(input[0]);
 
         fvec filtered(input_size);
-        sos.run(input, input_size, filtered);
+        sos.run(input, input_size, filtered.data());
 
         size_t expected_size = get_decimated_size(input_size, factor);
         assert(output_size >= expected_size);
@@ -222,7 +222,6 @@ class signal {
         }
     }
 
-    // https://www.tutorialspoint.com/cplusplus-program-to-find-gcd
     static int gcd(int a, int b)
     {
         if (b == 0)
@@ -237,12 +236,13 @@ class signal {
      * @param y Output signal
      * @param h FIR coefficients
      */
-    static void upfirdn(const fvec &x, fvec &y, int up, int down, const fvec &h)
+    static void upfirdn(const float * x, size_t x_size, fvec &y, int up, int down, const fvec &h)
     {
         assert(up > 0);
         assert(down > 0);
         assert(h.size() > 0);
 
+#if 0 // bug in optimized version
         const int N = (h.size() - 1) / 2;
 
         for (size_t n = 0; n < y.size(); n++) {
@@ -255,16 +255,48 @@ class signal {
             }
             y[n] = acc;
         }
+#else
+        int nx = x_size;
+        int nh = h.size();
+
+        // Upsample the input signal by inserting zeros
+        fvec r(up * nx);
+        for (int i = 0; i < nx; i++)
+        {
+            r[i * up] = x[i];
+        }
+
+        // Filter the upsampled signal using the given filter coefficients
+        fvec z(nh + up * nx - 1);
+        for (int i = 0; i < up * nx; i++)
+        {
+            for (int j = 0; j < nh; j++)
+            {
+                if (i - j >= 0 && i - j < up * nx)
+                {
+                    z[i] += r[i - j] * h[j];
+                }
+            }
+        }
+
+        // Downsample the filtered signal by skipping samples
+        int skip = (nh - 1) / 2;
+        for (size_t i = 0; i < y.size(); i++)
+        {
+            y[i] = z[i * down + skip];
+        }
+#endif
+
     }
 
     /**
      * @brief Resample using a polyphase FIR.
      * This is the counterpart of scipy.signal.resample_poly.
      * @param input Input signal
-     * @param output Output signal
+     * @param output Output signal, will be moved from an internal vector sized correctly.
      * @param window FIR coefficients. e.g. signal.firwin(2 * half_len + 1, f_c, window=('kaiser', 5.0))
      */
-    static void resample_poly(const fvec &input, fvec &output, int up, int down, const fvec &window)
+    static void resample_poly(const float* input, size_t input_size, fvec &output, int up, int down, const fvec &window)
     {
         assert(up > 0);
         assert(down > 0);
@@ -275,20 +307,19 @@ class signal {
         down /= gcd_up_down;
 
         if (up == 1 && down == 1) {
-            output = input;
+            // output = std::move(fvec(input, input + input_size));
+            output = fvec(input, input + input_size);
             return;
         }
 
-        int n_out = (input.size() * up);
+        int n_out = (input_size * up);
         n_out = n_out / down + (n_out % down == 0 ? 0 : 1);
 
         fvec h = window;
         scale(h, float(up));
 
-        fvec y(n_out);
-        upfirdn(input, y, up, down, h);
-
-        output = y;
+        output.resize(n_out);
+        upfirdn(input, input_size, output, up, down, h);
     }
 
     static void calc_decimation_ratios(
diff --git a/edge-impulse-sdk/dsp/spectral/wavelet.hpp b/edge-impulse-sdk/dsp/spectral/wavelet.hpp
index 98f98a4..ba19b29 100644
--- a/edge-impulse-sdk/dsp/spectral/wavelet.hpp
+++ b/edge-impulse-sdk/dsp/spectral/wavelet.hpp
@@ -222,6 +222,9 @@ class wavelet {
             a[i] = dot(xx + 2 * i, h, nh);
             d[i] = dot(xx + 2 * i, g, nh);
         }
+
+        numpy::underflow_handling(d.data(), d.size());
+        numpy::underflow_handling(a.data(), a.size());
     }
 
     static void extract_features(fvec& y, fvec &features)
diff --git a/edge-impulse-sdk/dsp/speechpy/feature.hpp b/edge-impulse-sdk/dsp/speechpy/feature.hpp
index 845e48d..89765b2 100644
--- a/edge-impulse-sdk/dsp/speechpy/feature.hpp
+++ b/edge-impulse-sdk/dsp/speechpy/feature.hpp
@@ -609,8 +609,8 @@ class feature {
                 EIDSP_ERR(ret);
             }
 
-            // normalize data (only when version is above 3)
-            if (version >= 3) {
+            // normalize data (only when version is 3)
+            if (version == 3) {
                 // it might be that everything is already normalized here...
                 bool all_between_min_1_and_1 = true;
                 for (size_t ix = 0; ix < signal_frame.rows * signal_frame.cols; ix++) {
diff --git a/edge-impulse-sdk/dsp/speechpy/processing.hpp b/edge-impulse-sdk/dsp/speechpy/processing.hpp
index d967af2..5b34b1b 100644
--- a/edge-impulse-sdk/dsp/speechpy/processing.hpp
+++ b/edge-impulse-sdk/dsp/speechpy/processing.hpp
@@ -84,9 +84,6 @@ namespace processing {
                 EIDSP_ERR(ret);
             }
 
-            // it might be that everything is already normalized here...
-            bool all_between_min_1_and_1 = true;
-
             // now we have the signal and we can preemphasize
             for (size_t ix = 0; ix < length; ix++) {
                 float now = out_buffer[ix];
@@ -100,12 +97,6 @@ namespace processing {
                     out_buffer[ix] = now - (_cof * _prev_buffer[0]);
                 }
 
-                if (_rescale && all_between_min_1_and_1) {
-                    if (out_buffer[ix] < -1.0f || out_buffer[ix] > 1.0f) {
-                        all_between_min_1_and_1 = false;
-                    }
-                }
-
                 // roll through and overwrite last element
                 if (_shift != 1) {
                     numpy::roll(_prev_buffer, _shift, -1);
@@ -116,7 +107,7 @@ namespace processing {
             _next_offset_should_be += length;
 
             // rescale from [-1 .. 1] ?
-            if (_rescale && !all_between_min_1_and_1) {
+            if (_rescale) {
                 matrix_t scale_matrix(length, 1, out_buffer);
                 ret = numpy::scale(&scale_matrix, 1.0f / 32768.0f);
                 if (ret != 0) {
@@ -212,7 +203,7 @@ namespace processing {
      * @param frame_stride (float): The stride between frames.
      * @returns Number of frames required, or a negative number if an error occured
      */
-    static int calculate_signal_used(
+    __attribute__((unused)) static int calculate_signal_used(
         size_t signal_size,
         uint32_t sampling_frequency,
         float frame_length,
@@ -524,7 +515,7 @@ namespace processing {
      * then add a hard filter
      * @param features_matrix input feature matrix, will be modified in place
      */
-    static int spectrogram_normalization(matrix_t *features_matrix, int noise_floor_db) {
+    static int spectrogram_normalization(matrix_t *features_matrix, int noise_floor_db, bool clip_at_one) {
         const float noise = static_cast<float>(noise_floor_db * -1);
         const float noise_scale = 1.0f / (static_cast<float>(noise_floor_db * -1) + 12.0f);
 
@@ -539,7 +530,7 @@ namespace processing {
             f *= noise_scale;
             // clip again
             if (f < 0.0f) f = 0.0f;
-            else if (f > 1.0f) f = 1.0f;
+            else if (f > 1.0f && clip_at_one) f = 1.0f;
             features_matrix->buffer[ix] = f;
         }
 
diff --git a/edge-impulse-sdk/porting/ei_classifier_porting.h b/edge-impulse-sdk/porting/ei_classifier_porting.h
index 1a057f3..c0558e8 100644
--- a/edge-impulse-sdk/porting/ei_classifier_porting.h
+++ b/edge-impulse-sdk/porting/ei_classifier_porting.h
@@ -20,106 +20,243 @@
 
 #include <stdint.h>
 #include <stdlib.h>
-#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h"
+#include "edge-impulse-sdk/dsp/returntypes.h"
 
 #if defined(__cplusplus) && EI_C_LINKAGE == 1
 extern "C" {
 #endif // defined(__cplusplus)
 
-typedef enum {
-    EI_IMPULSE_OK = 0,
-    EI_IMPULSE_ERROR_SHAPES_DONT_MATCH = -1,
-    EI_IMPULSE_CANCELED = -2,
-    EI_IMPULSE_TFLITE_ERROR = -3,
-    EI_IMPULSE_DSP_ERROR = -5,
-    EI_IMPULSE_TFLITE_ARENA_ALLOC_FAILED = -6,
-    EI_IMPULSE_CUBEAI_ERROR = -7,
-    EI_IMPULSE_ALLOC_FAILED = -8,
-    EI_IMPULSE_ONLY_SUPPORTED_FOR_IMAGES = -9,
-    EI_IMPULSE_UNSUPPORTED_INFERENCING_ENGINE = -10,
-    EI_IMPULSE_OUT_OF_MEMORY = -11,
-    EI_IMPULSE_INPUT_TENSOR_WAS_NULL = -13,
-    EI_IMPULSE_OUTPUT_TENSOR_WAS_NULL = -14,
-    EI_IMPULSE_SCORE_TENSOR_WAS_NULL = -15,
-    EI_IMPULSE_LABEL_TENSOR_WAS_NULL = -16,
-    EI_IMPULSE_TENSORRT_INIT_FAILED = -17,
-    EI_IMPULSE_DRPAI_INIT_FAILED = -18,
-    EI_IMPULSE_DRPAI_RUNTIME_FAILED = -19,
-    EI_IMPULSE_DEPRECATED_MODEL = -20,
-    EI_IMPULSE_LAST_LAYER_NOT_AVAILABLE = -21,
-    EI_IMPULSE_INFERENCE_ERROR = -22,
-    EI_IMPULSE_AKIDA_ERROR = -23,
-    EI_IMPULSE_INVALID_SIZE = -24,
-    EI_IMPULSE_ONNX_ERROR = -25,
-    EI_IMPULSE_MEMRYX_ERROR = -26,
-} EI_IMPULSE_ERROR;
+/* Private functions ------------------------------------------------------- */
+
+EI_IMPULSE_ERROR ei_run_impulse_check_canceled();
+void ei_serial_set_baudrate(int baudrate);
+
+/* Public functions -------------------------------------------------------- */
 
 /**
- * Cancelable sleep, can be triggered with signal from other thread
+ * @defgroup ei_user_functions User-defined functions
+ * 
+ * These functions are required to be implemented by the user for the target platform.
+ * See [this porting guide](https://docs.edgeimpulse.com/docs/edge-ai-hardware/porting-guide) for more information. They are declared internally in the Edge Impulse
+ * C++ SDK library, and they must be defined by the user.
+ * 
+ * **Source**: [porting/ei_classifier_porting.h](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/porting/ei_classifier_porting.h)
+ * 
+ * **Examples**:
+ * The following examples demonstrate possible implementations of this function for
+ * various platforms. Note the `__attribute__((weak))` in most of the definitions, which
+ * means that a user could override the implementation elsewhere in the program:
+ * * [Arduino classifier porting](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/porting/arduino/ei_classifier_porting.cpp)
+ * * [mbed classifier porting](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/porting/mbed/ei_classifier_porting.cpp)
+ * * [POSIX classifier porting](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/porting/posix/ei_classifier_porting.cpp)
+ * * [Silicon Labs classifier porting](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/porting/silabs/ei_classifier_porting.cpp)
+ * * [STM32 classifier porting](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/porting/stm32-cubeai/ei_classifier_porting.cpp)
+ * * [TI classifier porting](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/porting/ti/debug_log.cpp)
+ * * [Zephyr classifier porting](https://github.com/edgeimpulse/inferencing-sdk-cpp/blob/master/porting/zephyr/ei_classifier_porting.cpp)
+ * 
+ * @addtogroup ei_user_functions
+ * @{
  */
-EI_IMPULSE_ERROR ei_sleep(int32_t time_ms);
 
 /**
- * Check if the sampler thread was canceled, use this in conjunction with
- * the same signaling mechanism as ei_sleep
+ * Cancelable sleep, can be triggered with signal from other thread
  */
-EI_IMPULSE_ERROR ei_run_impulse_check_canceled();
+/**
+ * @brief Cancellable sleep, can be triggered with signal from other thread
+ * 
+ * Allow the processor or thread to sleep or block for the given time.
+ * 
+ * @param[in] time_ms Time in milliseconds to sleep
+ * 
+ * @return `EI_IMPULSE_OK` if successful, error code otherwise
+ */
+EI_IMPULSE_ERROR ei_sleep(int32_t time_ms);
 
 /**
  * Read the millisecond timer
  */
+/**
+ * @brief Read the millisecond timer
+ * 
+ * This function should return the number of milliseconds that have passed since the 
+ * start of the program. If you do not need to determine the run times for DSP and 
+ * inference blocks, you can simply return 0 from this function. Your impulse will still
+ * work correctly without timing information.
+ * 
+ * @return The number of milliseconds that have passed since the start of the program
+ */
 uint64_t ei_read_timer_ms();
 
 /**
- * Read the microsecond timer
+ * @brief Read the microsecond timer
+ * 
+ * This function should return the number of milliseconds that have passed since the 
+ * start of the program. If you do not need to determine the run times for DSP and 
+ * inference blocks, you can simply return 0 from this function. Your impulse will still
+ * work correctly without timing information.
+ * 
+ * @return The number of microseconds that have passed since the start of the program
  */
 uint64_t ei_read_timer_us();
 
 /**
- * Set Serial baudrate
+ * @brief Send a single character to the serial port
+ *
+ * @param[in]  c The chararater to send
  */
-void ei_serial_set_baudrate(int baudrate);
+void ei_putchar(char c);
 
 /**
- * @brief      Connect to putchar of target
- *
- * @param[in]  c The chararater
+ * @brief Read a single character from the serial port
+ * 
+ * @return The character read from the serial port
  */
-void ei_putchar(char c);
+char ei_getchar(void);
 
 /**
- * Print wrapper around printf()
- * This is used internally to print debug information.
+ * @brief Print wrapper around printf()
+ * 
+ * `ei_printf()` is declared internally to the Edge Impulse SDK library so that debugging
+ * information (e.g. during inference) can be printed out. However, the function must be
+ * defined by the user, as printing methods can change depending on the platform and use
+ * case. For example, you may want to print debugging information to stdout in Linux or 
+ * over a UART serial port on a microcontroller.
+ * 
+ * @param[in] format Pointer to a character array or string that should be printed
+ * @param[in] ... Other optional arguments may be passed as necessary (e.g. handle to a 
+ *  UART object). Note that any calls to `ei_printf()` from within the 
+ *  *edge-impulse-sdk* library do not pass anything other than the `format` argument.
  */
 __attribute__ ((format (printf, 1, 2)))
 void ei_printf(const char *format, ...);
 
 /**
- * Override this function if your target cannot properly print floating points
- * If not overriden, this will be sent through `ei_printf()`.
+ * @brief Used to print floating point numbers
+ * 
+ * Some platforms cannot handle directly printing floating point numbers (e.g. to a 
+ * console or over a serial port). If your platform cannot directly print floats, 
+ * provide an implementation of this function to print them as needed (for example,
+ * construct a string containing scientific notation with integers and call 
+ * `ei_printf()`).
+ * 
+ * If your platform can print floating point values, the easiest implementation of this
+ * function is as follows:
+ * 
+ * ```
+ * __attribute__((weak)) void ei_printf_float(float f) {
+ *     printf("%f", f);
+ * }
+ * ```
+ * 
+ * @param[in] f The floating point number to print
  */
 void ei_printf_float(float f);
 
 /**
- * Wrapper around malloc
+ * @brief Wrapper around malloc
+ * 
+ * This function should allocate `size` bytes and return a pointer to the allocated 
+ * memory. In bare-metal implementations, it can simply be a wrapper for `malloc()`. For
+ * example:
+ * 
+ * ```
+ * __attribute__((weak)) void *ei_malloc(size_t size) {
+ *     return malloc(size);
+ * }
+ * ```
+ * 
+ * If you intend to run your impulse in a multi-threaded environment, you will need to
+ * ensure that your implementation of `ei_malloc()` is thread-safe. For example, if you
+ * are using FreeRTOS, here is one possible implementation:
+ * 
+ * ```
+ * __attribute__((weak)) void *ei_malloc(size_t size) {
+ *     return pvPortMalloc(size);
+ * }
+ * ```
+ * 
+ * @param[in] size The number of bytes to allocate
  */
 void *ei_malloc(size_t size);
 
 /**
- * Wrapper around calloc
+ * @brief Wrapper around calloc
+ * 
+ * This function should allocate `nitems * size` bytes and initialize all bytes in this
+ * allocated memory to 0. It should return a pointer to the allocated memory. In 
+ * bare-metal implementations, it can simply be a wrapper for `calloc()`. For example:
+ * 
+ * ```
+ * __attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) {
+ *     return calloc(nitems, size);
+ * }
+ * ```
+ * 
+ * If you intend to run your impulse in a multi-threaded environment, you will need to
+ * ensure that your implementation of `ei_calloc()` is thread-safe. For example, if you
+ * are using FreeRTOS, here is one possible implementation:
+ * 
+ * ```
+ * __attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) {
+ *     void *ptr = NULL;
+ *     if (size > 0) {
+ *         ptr = pvPortMalloc(nitems * size);
+ *         if(ptr)
+ *            memset(ptr, 0, (nitems * size));
+ *     }
+ *     return ptr;
+ * }
+ * ```
+ * 
+ * @param[in] nitems Number of blocks to allocate and clear
+ * @param[in] size Size (in bytes) of each block
  */
 void *ei_calloc(size_t nitems, size_t size);
 
 /**
- * Wrapper around free
+ * @brief Wrapper around free
+ * 
+ * This function should free the memory space pointed to by `ptr`. If `ptr` is `NULL`,
+ * no operation should be performed. In bare-metal implementations, it can simply be a
+ * wrapper for `free()`. For example:
+ * 
+ * ```
+ * __attribute__((weak)) void ei_free(void *ptr) {
+ *     free(ptr);
+ * }
+ * ```
+ * 
+ * If you intend to run your impulse in a multi-threaded environment, you will need to
+ * ensure that your implementation of `ei_free()` is thread-safe. For example, if you 
+ * are using FreeRTOS, here is one possible implementation:
+ * 
+ * ```
+ * __attribute__((weak)) void ei_free(void *ptr) {
+ *     pvPortFree(ptr);
+ * }
+ * ```
+ * 
+ * @param[in] ptr Pointer to the memory to free
  */
 void ei_free(void *ptr);
 
+/** @} */
+
 #if defined(__cplusplus) && EI_C_LINKAGE == 1
 }
 #endif // defined(__cplusplus) && EI_C_LINKAGE == 1
 
 // Load porting layer depending on target
+
+// First check if any of the general frameworks or operating systems are supported/enabled
+#ifndef EI_PORTING_ZEPHYR
+#if defined(__ZEPHYR__)
+#define EI_PORTING_ZEPHYR      1
+#else
+#define EI_PORTING_ZEPHYR      0
+#endif
+#endif
+
 #ifndef EI_PORTING_ARDUINO
 #ifdef ARDUINO
 #define EI_PORTING_ARDUINO      1
@@ -128,30 +265,29 @@ void ei_free(void *ptr);
 #endif
 #endif
 
-#ifndef EI_PORTING_ECM3532
-#ifdef ECM3532
-#define EI_PORTING_ECM3532      1
+#ifndef EI_PORTING_MBED
+#ifdef __MBED__
+#define EI_PORTING_MBED      1
 #else
-#define EI_PORTING_ECM3532      0
+#define EI_PORTING_MBED      0
 #endif
 #endif
 
+// Then check for target spcific build systems
+
 #ifndef EI_PORTING_ESPRESSIF
-#if defined(CONFIG_IDF_TARGET_ESP32) && EI_PORTING_ARDUINO == 0
+#if ((defined(CONFIG_IDF_TARGET_ESP32) || defined(CONFIG_IDF_TARGET_ESP32S3)) && EI_PORTING_ZEPHYR == 0)
+#include "esp_idf_version.h"
+#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 0, 0)
+#define portTICK_RATE_MS portTICK_PERIOD_MS
+#endif
 #define EI_PORTING_ESPRESSIF      1
+#define EI_PORTING_ARDUINO        0
 #else
 #define EI_PORTING_ESPRESSIF     0
 #endif
 #endif
 
-#ifndef EI_PORTING_MBED
-#ifdef __MBED__
-#define EI_PORTING_MBED      1
-#else
-#define EI_PORTING_MBED      0
-#endif
-#endif
-
 #ifndef EI_PORTING_POSIX
 #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
 #define EI_PORTING_POSIX      1
@@ -176,13 +312,6 @@ void ei_free(void *ptr);
 #endif
 #endif
 
-#ifndef EI_PORTING_ZEPHYR
-#if defined(__ZEPHYR__)
-#define EI_PORTING_ZEPHYR      1
-#else
-#define EI_PORTING_ZEPHYR      0
-#endif
-#endif
 
 #ifndef EI_PORTING_STM32_CUBEAI
 #if defined(USE_HAL_DRIVER) && !defined(__MBED__) && EI_PORTING_ZEPHYR == 0
@@ -209,4 +338,23 @@ void ei_free(void *ptr);
 #endif
 // End load porting layer depending on target
 
+// Additional configuration for specific architecture
+#if defined(__CORTEX_M)
+
+#if (__CORTEX_M == 55U)
+#define EI_MAX_OVERFLOW_BUFFER_COUNT	15
+#endif
+
+#if (__CORTEX_M == 85U)
+#define EI_MAX_OVERFLOW_BUFFER_COUNT	50
+#endif
+
+#endif
+
+#if defined(CONFIG_IDF_TARGET_ESP32S3)
+#define EI_MAX_OVERFLOW_BUFFER_COUNT	30
+#endif
+
+// End additional configuration
+
 #endif // _EI_CLASSIFIER_PORTING_H_
diff --git a/edge-impulse-sdk/porting/ei_logging.h b/edge-impulse-sdk/porting/ei_logging.h
index d15832e..b69604b 100644
--- a/edge-impulse-sdk/porting/ei_logging.h
+++ b/edge-impulse-sdk/porting/ei_logging.h
@@ -39,14 +39,10 @@
 #define EI_LOGD(format, ...) (void)0
 
 #ifndef EI_LOG_LEVEL
-    #define EI_LOG_LEVEL EI_LOG_LEVEL_NONE
+    #define EI_LOG_LEVEL EI_LOG_LEVEL_INFO
 #endif
 
-#if defined(__cplusplus) && EI_C_LINKAGE == 1
-extern "C"
-#endif // defined(__cplusplus) && EI_C_LINKAGE == 1
-
-const char *debug_msgs[] =
+__attribute__((unused)) static const char *debug_msgs[] =
 {
     "NONE", // this one will never show
     "ERR",
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/CMakeLists.txt b/edge-impulse-sdk/porting/espressif/ESP-NN/CMakeLists.txt
index ba45866..736eaf9 100644
--- a/edge-impulse-sdk/porting/espressif/ESP-NN/CMakeLists.txt
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/CMakeLists.txt
@@ -1,4 +1,4 @@
-idf_build_get_property(idf_target IDF_TARGET)
+cmake_minimum_required(VERSION 3.5)
 
 set(c_srcs
     "src/activation_functions/esp_nn_relu_ansi.c"
@@ -27,6 +27,7 @@ if(CONFIG_IDF_TARGET_ESP32S3)
         "src/convolution/esp_nn_conv_s16_mult8_esp32s3.S"
         "src/convolution/esp_nn_conv_s8_mult8_1x1_esp32s3.S"
         "src/convolution/esp_nn_conv_s16_mult4_1x1_esp32s3.S"
+        "src/convolution/esp_nn_conv_s8_filter_aligned_input_padded_esp32s3.S"
         "src/convolution/esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3.S"
         "src/convolution/esp_nn_depthwise_conv_s16_mult1_esp32s3.S"
         "src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3.S"
@@ -46,5 +47,5 @@ idf_component_register(SRCS "${c_srcs}"
 if(CONFIG_IDF_TARGET_ESP32S3)
     target_compile_options(${COMPONENT_LIB} PRIVATE -mlongcalls -fno-unroll-loops -O2 -Wno-unused-function)
 else()
-    target_compile_options(${COMPONENT_LIB} PRIVATE -Wno-unused-function)
-endif()
\ No newline at end of file
+    target_compile_options(${COMPONENT_LIB} PRIVATE  -O2 -Wno-unused-function)
+endif()
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/CONTRIBUTING.md b/edge-impulse-sdk/porting/espressif/ESP-NN/CONTRIBUTING.md
new file mode 100644
index 0000000..b541db7
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/CONTRIBUTING.md
@@ -0,0 +1,38 @@
+# Contributing
+
+Contributions to ESP-NN project in the form of pull requests, bug reports, and feature requests are welcome!
+
+This document covers various topics related to contributions to the ESP-NN projects. Please read it if you plan to submit a PR!
+
+## CLA
+
+We require accepting the contributor's license agreement for all pull requests. When opening a pull request the first time you will be prompted to sign the CLA by the [CLA Assistant](https://cla-assistant.io/) service.
+
+## Large-scale Changes
+
+If you'd like to propose a change to the existing APIs or a large-scale refactoring of the implementation, we recommend opening an issue first to discuss this.
+
+## Updating the Benchmarks Table
+
+The benchmarks table in [README.md](README.md) contains benchmarks for ESP32-S3. The benchmarks are collected by running the app in [test_app](test_app/) directory. Please update this table if you have changed the implementations of some of the functions or added the new ones.
+
+## Releasing a new version
+
+Maintainers should follow the steps below to release a new version of ESP-NN component. Assuming the new version is `vX.Y.Z`:
+
+1. Ensure you are on the latest `master` branch:
+   ```bash
+   git checkout master
+   git pull --ff-only origin master
+   ```
+1. Create the new tag:
+   ```bash
+   git tag -s -a -m "vX.Y.Z" vX.Y.Z
+   ```
+1. Push the tag and the branch to the internal repository:
+   ```bash
+   git push origin vX.Y.Z
+   ```
+1. CI will automatically push the tag to Github and will upload the new version to the IDF Component Registry.
+1. Go to https://github.com/espressif/esp-nn/releases and create a release from the tag vX.Y.Z.
+1. Write the release notes and publish the release.
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/idf_component.yml b/edge-impulse-sdk/porting/espressif/ESP-NN/idf_component.yml
new file mode 100644
index 0000000..b90ac5e
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/idf_component.yml
@@ -0,0 +1,11 @@
+description: Optimized NN (Neural Network) functions for Espressif chips
+url: https://github.com/espressif/esp-nn
+repository: https://github.com/espressif/esp-nn.git
+issues: https://github.com/espressif/esp-nn/issues
+dependencies:
+  idf:
+    version: ">=4.2"
+files:
+  exclude:
+    - test_app
+    - tests
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/activation_functions/esp_nn_relu_s8_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/activation_functions/esp_nn_relu_s8_esp32s3.S
new file mode 100644
index 0000000..b020920
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/activation_functions/esp_nn_relu_s8_esp32s3.S
@@ -0,0 +1,118 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+    .text
+    .align  4
+    .literal_position
+
+# in place relu6 function. a2: data, a3: size
+    # Program Unit: esp_nn_relu6_s8_esp32s3
+    .type   esp_nn_relu6_s8_esp32s3, @function
+    .align   4
+    .global esp_nn_relu6_s8_esp32s3
+
+esp_nn_relu6_s8_esp32s3:
+    entry   a1,48                       #
+    mov.n   a9,a2                       # [0], data
+    mov.n   a7,a3                       # [1], size
+
+ // process multiple of 16
+    movi.n      a4,6                    # [4]
+    s8i         a4,a1,0                     # [5]  six
+    addi        a10,a3,-7                   # [2]
+    ee.vldbc.8  q1,a1               # [6]  id:72 six+0x0
+    blti        a3,16,.Lt_0_5634            # [7]
+
+    srai        a8,a3,4                     # [0]
+    ee.zero.q   q2                      # [1]
+    loopgtz     a8,.LBB37_esp_nn_relu6_s8_esp32s3   # [3]
+
+    ee.vld.128.ip   q0,a2,0             # [0*II+0]  id:73
+    ee.vmax.s8      q0,q0,q2            # [0*II+2]
+    ee.vmin.s8      q0,q0,q1            # [0*II+3]
+    ee.vst.128.ip   q0,a2,16            # [0*II+4]  id:74
+.LBB37_esp_nn_relu6_s8_esp32s3: # 0x34
+
+    slli    a8,a8,4                     # [0]
+
+ // remaining multiple of 8 data
+    bge     a8,a10,.Lt_0_3586           # [1]
+
+.Lt_0_3842: # 0x3a
+    sub     a6,a7,a8                    # [0]
+    srai    a6,a6,3                     # [1]
+    loopgtz a6,.LBB52_esp_nn_relu6_s8_esp32s3   # [2]
+
+    ee.vld.l.64.ip  q0,a2,0         # [0*II+0]  id:75
+    ee.vmax.s8      q0,q0,q2            # [0*II+2]
+    ee.vmin.s8      q0,q0,q1            # [0*II+3]
+    ee.vst.l.64.ip  q0,a2,8         # [0*II+4]  id:76
+
+.LBB52_esp_nn_relu6_s8_esp32s3: # 0x4f
+    addx8   a8,a6,a8                    # [0]
+
+.Lt_0_3586: # 0x52
+ // process leftover
+    bge     a8,a7,.Lt_0_6402            # [0]
+
+.Lt_0_4866: # 0x55
+    movi.n  a5,0                    # [0]
+    sub     a3,a7,a8                    # [1]
+    add.n   a2,a8,a9                    # [2]
+    l8ui    a6,a2,0                     # [3]  id:78
+    addi.n  a3,a3,-1                # [4]
+    sext    a6,a6,7
+    max     a6,a5,a6                    # [6]
+    min     a6,a4,a6                    # [7]
+    s8i     a6,a2,0                     # [8]  id:79
+
+    loopgtz a3,.LBB67_esp_nn_relu6_s8_esp32s3   # [9]
+
+    l8ui    a3,a2,1                     # [0*II+0]  id:78
+    addi.n  a2,a2,1                 # [1*II+1]
+    sext    a3,a3,7
+    max     a3,a5,a3                    # [0*II+3]
+    min     a3,a4,a3                    # [0*II+4]
+    s8i     a3,a2,0                     # [0*II+5]  id:79
+.LBB67_esp_nn_relu6_s8_esp32s3: # 0x81
+
+.Lt_0_6402: # 0x83
+    retw.n                          # [0]
+
+.Lt_0_5634: # 0x85
+    blti    a10,1,.Lt_0_5890            # [0]
+
+    movi.n  a8,0                    # [0]
+    ee.zero.q   q2                      # [1]
+    j   .Lt_0_3842                      # [2]
+
+.Lt_0_5890: # 0x90
+    beqz.n  a3,.Lt_0_6402           # [0]
+
+    movi.n  a8,0                    # [0]
+    j   .Lt_0_4866                      # [1]
+
+    .size   esp_nn_relu6_s8_esp32s3, . - esp_nn_relu6_s8_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_add_s8_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_add_s8_esp32s3.S
new file mode 100644
index 0000000..492254c
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_add_s8_esp32s3.S
@@ -0,0 +1,638 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2023 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+    .text
+    .align  4
+    .literal_position
+    .literal    .nudge_val, 1073741824
+
+    # Program Unit: esp_nn_add_elementwise_s8_esp32s3
+    .type   esp_nn_add_elementwise_s8_esp32s3, @function
+    .align   4
+    .global esp_nn_add_elementwise_s8_esp32s3
+
+esp_nn_add_elementwise_s8_esp32s3:  # 0x4
+    # temp_neg_out_shift = 0
+    # temp_neg_input2_shift = 4
+    # temp_neg_input1_shift = 8
+    # gra_spill_temp_2 = 12
+    # gra_spill_temp_3 = 16
+    # gra_spill_temp_4 = 20
+    # gra_spill_temp_5 = 24
+    # gra_spill_temp_6 = 28
+    # gra_spill_temp_7 = 32
+    # gra_spill_temp_8 = 36
+    # gra_spill_temp_9 = 40
+    # gra_spill_temp_10 = 44
+    # gra_spill_temp_11 = 48
+    # gra_spill_temp_12 = 52
+    # gra_spill_temp_13 = 56
+
+ // a2 : *input1_data
+ // a3 : *input2_data
+ // a4 : input1_offset
+ // a5 : input2_offset
+ // a6 : input1_mult
+ // a7 : input2_mult
+ // On stack:
+ // 80: input1_shift
+ // 84: input2_shift
+ // 88: left_shift
+ // 92: *output
+ // 96: out_offset
+ // 100: out_mult, loaded in `a8`
+ // 104: out_shift
+ // 108: activation_min
+ // 112: activation_max
+ // 116: size
+
+    entry       a1,80                      #
+    s32i.n      a4,a1,48                    # [10]  gra_spill_temp_11, input1_offset
+    s32i.n      a5,a1,52                    # [0]  gra_spill_temp_12, input2_offset
+    s32i.n      a2,a1,32                 # [5]  gra_spill_temp_7, input1_data
+    s32i.n      a3,a1,12                    # [3]  gra_spill_temp_2, input2_data
+
+    l32i        a12,a1,116                  # [11]  id:720 size+0x0
+    mov.n       a14,a2                      # [6]
+    mov.n       a10,a3                      # [8]
+    blti        a12,1,.exit           # [1] // exit
+
+    l32i        a3,a1,80                   # [0]  id:721 input1_shift+0x0
+    l32i        a13,a1,84                  # [1]  id:722 input2_shift+0x0
+    l32i        a2,a1,104                   # [8]  id:723 out_shift+0x0
+    l32i        a8,a1,100                   # [1]  out_mult
+
+    neg         a3,a3                       # [12]
+    neg         a13,a13                     # [7]
+    neg         a2,a2                       # [11]
+
+    s32i.n      a3,a1,8                    # [12]  temp_neg_input1_shift, -input1_shift
+    s32i.n      a13,a1,4                   # [7]  temp_neg_input2_shift, -input2_shift
+    s32i.n      a2,a1,0                    # [16]  temp_neg_out_shift, -out_shift
+
+    movi.n      a5,1
+    addi        a9,a3,-1
+    ssl         a9
+    sll         a15,a5
+    s32i.n      a15,a1,16               # gra_spill_temp_3, 1 << (exponent - 1) for input1
+
+    addi        a9,a13,-1
+    ssl         a9
+    sll         a15,a5
+    s32i.n      a15,a1,20               # gra_spill_temp_4, 1 << (exponent - 1) for input2
+
+    addi        a9,a2,-1
+    ssl         a9
+    sll         a15,a5
+    s32i.n      a15,a1,24               # gra_spill_temp_5, 1 << (exponent - 1) for out
+
+    movi.n      a2,0
+    blti        a12,12,.process_leftover          # [23]
+
+    // skip to leftover routine if inputs are unaligned
+    or          a9,a14,a10
+    extui       a9,a9,0,4
+    bnez        a9,.process_leftover
+
+    l32i        a9,a1,92                   # [17]  id:1279 output+0x0
+
+    l32i        a13,a1,116                  # [20]
+    srai        a13,a13,3                   # [21]
+    s32i.n      a13,a1,56                   # [22]  gra_spill_temp_13
+
+    movi.n      a13,8
+    s32i.n      a13,a1,28               # gra_spill_temp_6, mult_of8 counter
+
+    ee.zero.q       q6                      # [8]
+
+.vector_loop: // process 8 values in one go
+    l32i            a15,a1,88                  # [6]  left_shift
+    ee.vld.l.64.ip  q0,a14,8        # [9]  id:729
+    s32i.n          a9,a1,44                    # [10]  gra_spill_temp_10, out_ptr
+    s32i.n          a14,a1,40                   # [20]  gra_spill_temp_9
+    wsr.sar         a15                     # [21] load left shift
+
+    addi.n          a15,a1,48                   # [14]
+    ee.vldbc.16     q7,a15              # [21]  id:1277 input1_offset
+    ee.vcmp.lt.s8   q5,q0,q6            # [29]
+    ee.vzip.8       q0,q5                   # [31], 20 bits
+    ee.vadds.s16    q0,q0,q7            # [34], add offset
+    ee.vcmp.lt.s16  q2,q0,q6        # [36]
+    ee.vzip.16      q0,q2               # [39], 32 bits
+    ee.vsl.32       q0,q0                   # [41] left_shift
+    ee.vsl.32       q2,q2                   # [42] left_shift
+
+    l32r            a9,.nudge_val              # [15], nudge
+
+// mulhi32 for q0
+    ee.movi.32.a    q0,a3,2             # [44]
+    ee.movi.32.a    q0,a4,3             # [45]
+    ee.movi.32.a    q0,a14,1            # [46]
+    ee.movi.32.a    q0,a5,0             # [62]
+
+    mulsh           a13,a6,a3                   # [51]
+    mull            a3,a6,a3                    # [53]
+
+    mulsh           a12,a6,a4                   # [50]
+    mull            a4,a6,a4                    # [55]
+
+    mulsh           a15,a6,a14                  # [48]
+    mull            a14,a6,a14                  # [49]
+
+    ssai            31                          # [47]
+
+    add             a3,a3,a9
+    saltu           a2,a3,a9
+    add.n           a13,a13,a2
+    src             a13,a13,a3
+
+    add             a4,a4,a9
+    saltu           a2,a4,a9
+    add.n           a12,a12,a2
+    src             a12,a12,a4
+    ee.movi.32.q    q0,a13,2            # [62]
+
+    add             a14,a14,a9
+    saltu           a2,a14,a9
+    add.n           a15,a15,a2
+    src             a15,a15,a14
+    ee.movi.32.q    q0,a12,3            # [62]
+
+    mulsh           a13,a6,a5                   # [51]
+    mull            a5,a6,a5                    # [53]
+    ee.movi.32.q    q0,a15,1            # [62]
+
+    add             a5,a5,a9
+    saltu           a2,a5,a9
+    add.n           a13,a13,a2
+    src             a13,a13,a5
+    ee.movi.32.q    q0,a13,0            # [62]
+
+
+// mulhi32 for q2
+    ee.movi.32.a    q2,a3,2             # [44]
+    ee.movi.32.a    q2,a4,3             # [45]
+    ee.movi.32.a    q2,a14,1            # [46]
+    ee.movi.32.a    q2,a5,0             # [62]
+
+    mulsh           a13,a6,a3                   # [51]
+    mull            a3,a6,a3                    # [53]
+
+    mulsh           a12,a6,a4                   # [50]
+    mull            a4,a6,a4                    # [55]
+
+    mulsh           a15,a6,a14                  # [48]
+    mull            a14,a6,a14                  # [49]
+
+    ssai            31                          # [47]
+
+    add             a3,a3,a9
+    saltu           a2,a3,a9
+    add.n           a13,a13,a2
+    src             a13,a13,a3
+
+    add             a4,a4,a9
+    saltu           a2,a4,a9
+    add.n           a12,a12,a2
+    src             a12,a12,a4
+    ee.movi.32.q    q2,a13,2            # [62]
+
+    add             a14,a14,a9
+    saltu           a2,a14,a9
+    add.n           a15,a15,a2
+    src             a15,a15,a14
+    ee.movi.32.q    q2,a12,3            # [62]
+
+    mulsh           a13,a6,a5                   # [51]
+    mull            a5,a6,a5                    # [53]
+    ee.movi.32.q    q2,a15,1            # [62]
+
+    l32i            a3,a1,8                    # [12]  temp_neg_input1_shift, -input1_shift
+    add             a5,a5,a9
+    saltu           a2,a5,a9
+    add.n           a13,a13,a2
+    src             a13,a13,a5
+    ee.movi.32.q    q2,a13,0            # [62]
+
+
+    blti            a3,1, .skip_div_by2_in0
+
+    addi.n          a13,a1,16
+    ee.vcmp.lt.s32  q1,q0,q6
+    ee.vcmp.lt.s32  q3,q2,q6
+    ee.vldbc.32     q5,a13      // 1 << (exponent - 1)
+    wsr.sar         a3          // load right_shift
+    ee.vadds.s32    q0,q0,q1    // subtract 1 `if (val < 0)`
+    ee.vadds.s32    q2,q2,q3    // subtract 1 `if (val < 0)`
+    ee.vadds.s32    q0,q0,q5
+    ee.vadds.s32    q2,q2,q5
+    ee.vsr.32       q0,q0
+    ee.vsr.32       q2,q2
+
+.skip_div_by2_in0:
+
+
+    ee.vld.l.64.ip  q1,a10,8        # [11]  id:1290
+    addi.n          a15,a1,52                   # [12]
+    ee.vldbc.16     q7,a15              # [19]  id:1278 input2_offset
+    l32i            a15,a1,88                  # [6]  left_shift
+    s32i            a10,a1,36                   # [14]  gra_spill_temp_8
+    ee.vcmp.lt.s8   q3,q1,q6            # [271]
+    wsr.sar         a15                     # [21], load shift for left shift
+    ee.vzip.8       q1,q3                   # [274], 20 bits
+    ee.vadds.s16    q1,q1,q7            # [281]
+    ee.vcmp.lt.s16  q3,q1,q6        # [282]
+    ee.vzip.16      q1,q3               # [283], 32 bits
+    ee.vsl.32       q1,q1                   # [284]
+    ee.vsl.32       q3,q3                   # [285]
+
+
+// mulhi32 for q1
+    ee.movi.32.a    q1,a3,2             # [44]
+    ee.movi.32.a    q1,a4,3             # [45]
+    ee.movi.32.a    q1,a14,1            # [46]
+    ee.movi.32.a    q1,a5,0             # [62]
+
+    mulsh           a13,a7,a3                   # [51]
+    mull            a3,a7,a3                    # [53]
+
+    mulsh           a12,a7,a4                   # [50]
+    mull            a4,a7,a4                    # [55]
+
+    mulsh           a15,a7,a14                  # [48]
+    mull            a14,a7,a14                  # [49]
+
+    ssai            31                          # [47]
+
+    add             a3,a3,a9
+    saltu           a2,a3,a9
+    add.n           a13,a13,a2
+    src             a13,a13,a3
+
+    add             a4,a4,a9
+    saltu           a2,a4,a9
+    add.n           a12,a12,a2
+    src             a12,a12,a4
+    ee.movi.32.q    q1,a13,2            # [62]
+
+    add             a14,a14,a9
+    saltu           a2,a14,a9
+    add.n           a15,a15,a2
+    src             a15,a15,a14
+    ee.movi.32.q    q1,a12,3            # [62]
+
+    mulsh           a13,a7,a5                   # [51]
+    mull            a5,a7,a5                    # [53]
+    ee.movi.32.q    q1,a15,1            # [62]
+
+    add             a5,a5,a9
+    saltu           a2,a5,a9
+    add.n           a13,a13,a2
+    src             a13,a13,a5
+    ee.movi.32.q    q1,a13,0            # [62]
+
+
+// mulhi32 for q3
+    ee.movi.32.a    q3,a3,2             # [44]
+    ee.movi.32.a    q3,a4,3             # [45]
+    ee.movi.32.a    q3,a14,1            # [46]
+    ee.movi.32.a    q3,a5,0             # [62]
+
+    mulsh           a13,a7,a3                   # [51]
+    mull            a3,a7,a3                    # [53]
+
+    mulsh           a12,a7,a4                   # [50]
+    mull            a4,a7,a4                    # [55]
+
+    mulsh           a15,a7,a14                  # [48]
+    mull            a14,a7,a14                  # [49]
+
+    ssai            31                          # [47]
+
+    add             a3,a3,a9
+    saltu           a2,a3,a9
+    add.n           a13,a13,a2
+    src             a13,a13,a3
+
+    add             a4,a4,a9
+    saltu           a2,a4,a9
+    add.n           a12,a12,a2
+    src             a12,a12,a4
+    ee.movi.32.q    q3,a13,2            # [62]
+
+    add             a14,a14,a9
+    saltu           a2,a14,a9
+    add.n           a15,a15,a2
+    src             a15,a15,a14
+    ee.movi.32.q    q3,a12,3            # [62]
+
+    mulsh           a13,a7,a5                   # [51]
+    mull            a5,a7,a5                    # [53]
+    ee.movi.32.q    q3,a15,1            # [62]
+    l32i            a14,a1,4                   # [7]  temp_neg_input2_shift, -input2_shift
+
+    add             a5,a5,a9
+    saltu           a2,a5,a9
+    add.n           a13,a13,a2
+    src             a13,a13,a5
+    ee.movi.32.q    q3,a13,0            # [62]
+
+    // multiplication results: q0-q2 & q1-q3
+
+
+    blti            a14,1, .skip_div_by2_in1
+
+    addi.n          a5,a1,20
+    ee.vcmp.lt.s32  q4,q1,q6
+    ee.vcmp.lt.s32  q5,q3,q6
+    ee.vldbc.32     q7,a5       // 1 << (exponent - 1)
+    wsr.sar         a14         // load right_shift
+    ee.vadds.s32    q4,q4,q7    // subtract 1 `if (val < 0)`
+    ee.vadds.s32    q5,q5,q7    // subtract 1 `if (val < 0)`
+    ee.vadds.s32    q1,q1,q4
+    ee.vadds.s32    q3,q3,q5
+    ee.vsr.32       q1,q1
+    ee.vsr.32       q3,q3
+
+.skip_div_by2_in1:
+
+    ee.vadds.s32        q0,q0,q1
+    ee.vadds.s32        q1,q2,q3
+
+// mulhi32 for q0
+    ee.movi.32.a    q0,a3,2             # [44]
+    ee.movi.32.a    q0,a4,3             # [45]
+    ee.movi.32.a    q0,a14,1            # [46]
+    ee.movi.32.a    q0,a5,0             # [62]
+
+    mulsh           a13,a8,a3                   # [51]
+    mull            a3,a8,a3                    # [53]
+
+    mulsh           a12,a8,a4                   # [50]
+    mull            a4,a8,a4                    # [55]
+
+    mulsh           a15,a8,a14                  # [48]
+    mull            a14,a8,a14                  # [49]
+
+    ssai            31                          # [47]
+
+    add             a3,a3,a9
+    saltu           a2,a3,a9
+    add.n           a13,a13,a2
+    src             a13,a13,a3
+
+    add             a4,a4,a9
+    saltu           a2,a4,a9
+    add.n           a12,a12,a2
+    src             a12,a12,a4
+    ee.movi.32.q    q0,a13,2            # [62]
+
+    add             a14,a14,a9
+    saltu           a2,a14,a9
+    add.n           a15,a15,a2
+    src             a15,a15,a14
+    ee.movi.32.q    q0,a12,3            # [62]
+
+    mulsh           a13,a8,a5                   # [51]
+    mull            a5,a8,a5                    # [53]
+    ee.movi.32.q    q0,a15,1            # [62]
+
+    add             a5,a5,a9
+    saltu           a2,a5,a9
+    add.n           a13,a13,a2
+    src             a13,a13,a5
+    ee.movi.32.q    q0,a13,0            # [62]
+
+
+// mulhi32 for q1
+    ee.movi.32.a    q1,a3,2             # [44]
+    ee.movi.32.a    q1,a4,3             # [45]
+    ee.movi.32.a    q1,a14,1            # [46]
+    ee.movi.32.a    q1,a5,0             # [62]
+
+    mulsh           a13,a8,a3                   # [51]
+    mull            a3,a8,a3                    # [53]
+
+    mulsh           a12,a8,a4                   # [50]
+    mull            a4,a8,a4                    # [55]
+
+    mulsh           a15,a8,a14                  # [48]
+    mull            a14,a8,a14                  # [49]
+
+    ssai            31                          # [47]
+
+    add             a3,a3,a9
+    saltu           a2,a3,a9
+    add.n           a13,a13,a2
+    src             a13,a13,a3
+
+    add             a4,a4,a9
+    saltu           a2,a4,a9
+    add.n           a12,a12,a2
+    src             a12,a12,a4
+    ee.movi.32.q    q1,a13,2            # [62]
+
+    add             a14,a14,a9
+    saltu           a2,a14,a9
+    add.n           a15,a15,a2
+    src             a15,a15,a14
+    ee.movi.32.q    q1,a12,3            # [62]
+
+    mulsh           a13,a8,a5                   # [51]
+    mull            a5,a8,a5                    # [53]
+    ee.movi.32.q    q1,a15,1            # [62]
+    l32i            a14,a1,0                   # [738]  temp_neg_out_shift, -out_shift
+
+    add             a5,a5,a9
+    saltu           a2,a5,a9
+    add.n           a13,a13,a2
+    src             a13,a13,a5
+    ee.movi.32.q    q1,a13,0            # [62]
+
+
+    //q0-q1 has output
+
+    blti            a14,1,.skip_div_by2_out
+    addi.n          a5,a1,24
+    ee.vcmp.lt.s32  q2,q0,q6
+    ee.vcmp.lt.s32  q3,q1,q6
+    ee.vldbc.32     q5,a5       // 1 << (exponent - 1)
+    wsr.sar         a14         // load right shift
+    ee.vadds.s32    q0,q0,q2    // subtract 1 `if (val < 0)`
+    ee.vadds.s32    q1,q1,q3    // subtract 1 `if (val < 0)`
+    ee.vadds.s32    q0,q0,q5
+    ee.vadds.s32    q1,q1,q5
+    ee.vsr.32       q0,q0
+    ee.vsr.32       q1,q1
+
+.skip_div_by2_out:
+
+// add offset and apply activation
+    addi            a15,a1,96
+    ee.vldbc.32     q3,a15              # [809]  id:802 out_offset
+    ee.vadds.s32    q0,q0,q3            # [811]
+    ee.vadds.s32    q1,q1,q3            # [812]
+    addi            a13,a1,108
+    addi            a14,a1,112
+    ee.vldbc.32     q3,a14              # [813]  id:803 activation_max
+    ee.vmin.s32     q0,q0,q3            # [815]
+    ee.vmin.s32     q1,q1,q3            # [816]
+    ee.vldbc.32     q3,a13              # [817]  id:804 activation_min
+    l32i            a13,a1,4                   # [818]  temp_neg_input2_shift
+    ee.vmax.s32     q1,q1,q3            # [819]
+    ee.vmax.s32     q0,q0,q3            # [820]
+
+//pack the data and store
+    l32i.n          a9,a1,44                    # [784]  gra_spill_temp_10
+    ee.vunzip.16    q0,q1               # [821]
+    ee.vunzip.8     q0,q1               # [822]
+    l32i.n          a13,a1,28           # gra_spill_temp_6, multiple of 12 index
+    ee.vst.l.64.ip  q0,a9,8             # [823]  id:805
+    l32i            a15,a1,116                  # [1], size
+    l32i.n          a14,a1,40                   # [20]  gra_spill_temp_9
+    l32i.n          a10,a1,36                   # [14]  gra_spill_temp_8
+    addi            a13,a13,8
+    s32i.n          a13,a1,28           # gra_spill_temp_6
+    bge             a15,a13,.vector_loop
+
+    l32i.n  a2,a1,56                # [0]  gra_spill_temp_13
+
+// check for leftover
+    l32i    a10,a1,116                  # [1]
+    slli    a2,a2,3                     # [2]
+    bge     a2,a10,.exit          # [3] // done, exit
+
+.process_leftover:
+    l32i.n  a3,a1,48                    # [1]  gra_spill_temp_11
+    l32i.n  a12,a1,52                   # [2]  gra_spill_temp_12
+
+    l32i.n  a10,a1,12                   # [3]  gra_spill_temp_2
+    l32i.n  a14,a1,32                # [8]  gra_spill_temp_7
+    add.n   a10,a2,a10                  # [5]
+    add.n   a14,a2,a14                  # [6]
+    l8ui    a14,a14,0                   # [7]  id:809, input1
+    l8ui    a10,a10,0                   # [12]  id:1370, input2
+
+    sext    a14,a14,7                   # [9]
+    sext    a10,a10,7                   # [10]
+    add.n   a10,a10,a12                 # [11] // add offset2
+    add.n   a14,a14,a3                  # [16] // add offset1
+    l32i    a12,a1,88                  # [13]  left_shift
+
+    // sat_round_doubling_high_mul step for input1 and input2
+    ssl     a12                         # [15]
+    sll     a10,a10                     # [20]
+    sll     a14,a14                     # [17]
+
+    l32r            a12,.nudge_val             # [0], nudge
+
+    // a13,a3 are free, a12: nudge, a6:mult1
+    mulsh           a13,a14,a6
+    mull            a9,a14,a6
+    ssai            31
+
+    add             a9,a9,a12
+    saltu           a3,a9,a12
+    add.n           a13,a13,a3
+    src             a14,a13,a9 //result in a14
+
+    mulsh           a13,a10,a7
+    mull            a9,a10,a7
+    ssai            31
+
+    add             a9,a9,a12
+    saltu           a3,a9,a12
+    add.n           a13,a13,a3
+    src             a10,a13,a9 //result in a10
+
+// divide_by_power_of2_step for input1 (a14), input2 (a10)
+// free registers: a13, a12, a9, a3
+
+    l32i.n          a12,a1,8   // -input1_shift
+    l32i.n          a13,a1,4   // -input2_shift
+
+    blti            a12,1,.skip_div_by2_in0_remain
+    l32i.n          a3,a1,16    // 1 << (exponent - 1)
+    extui           a9,a14,31,1
+    ssr             a12         // load right_shift
+    sub             a3,a3,a9    // 1 << (exponent - 1) - (val < 0)
+    add             a14,a14,a3
+    sra             a14,a14
+.skip_div_by2_in0_remain:
+
+    blti            a13,1,.skip_div_by2_in1_remain
+    l32i.n          a3,a1,20    // 1 << (exponent - 1)
+    extui           a9,a10,31,1
+    ssr             a13         // load right_shift
+    sub             a3,a3,a9    // 1 << (exponent - 1) - (val < 0)
+    add             a10,a10,a3
+    sra             a10,a10
+.skip_div_by2_in1_remain:
+
+// process output
+    l32r            a12,.nudge_val             # [0], nudge
+    l32i            a13,a1,0                   // -out_shift
+    add.n           a10,a10,a14                 # [45]
+
+// multiply and pick high32
+    mulsh           a3,a10,a8
+    mull            a10,a10,a8
+    ssai            31                          # [0]
+    add             a10,a10,a12
+    saltu           a9,a10,a12
+    add             a12,a3,a9
+    src             a12,a12,a10
+
+// div by power of 2 for output
+
+    l32i            a9,a1,96                   # [31]  out_offset
+    blti            a13,1,.skip_div_by2_out_remain
+    l32i.n          a3,a1,24    // 1 << (exponent - 1)
+    extui           a14,a12,31,1
+    ssr             a13         // load right_shift
+    sub             a3,a3,a14   // 1 << (exponent - 1) - (val < 0)
+    add             a12,a12,a3
+    sra             a12,a12
+.skip_div_by2_out_remain:
+
+// add offset
+    add.n   a9,a9,a12                   # [33]
+
+// apply activation
+    l32i    a13,a1,112                  # [34]  activation_max
+    l32i    a12,a1,108                  # [35]  activation_min
+    min     a13,a13,a9                      # [36]
+    l32i    a9,a1,92                   # [37]  output
+    max     a13,a13,a12                     # [38]
+    add.n   a9,a2,a9                    # [39]
+    s8i     a13,a9,0                    # [40]  id:1371
+    l32i    a12,a1,116
+    addi.n  a2,a2,1                 # [41]
+    blt     a2,a12,.process_leftover
+
+.exit:
+    retw.n                          # [0]
+
+    .size   esp_nn_add_elementwise_s8_esp32s3, . - esp_nn_add_elementwise_s8_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_mul_s8_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_mul_s8_esp32s3.S
new file mode 100644
index 0000000..ca28573
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/basic_math/esp_nn_mul_s8_esp32s3.S
@@ -0,0 +1,323 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2023 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+    .text
+    .align  4
+    .literal_position
+    .literal    .LC0_26_123, 1073741824 // `1 << 30`
+
+    # Program Unit: esp_nn_mul_elementwise_s8_esp32s3
+    .type   esp_nn_mul_elementwise_s8_esp32s3, @function
+    .align   4
+    .global esp_nn_mul_elementwise_s8_esp32s3
+
+esp_nn_mul_elementwise_s8_esp32s3:  # 0x4
+    # to_add = 0
+    # gra_spill_temp_0 = 4
+    # gra_spill_temp_1 = 8
+    # gra_spill_temp_2 = 12
+    # gra_spill_temp_3 = 16
+    # gra_spill_temp_4 = 20
+    # gra_spill_temp_5 = 24
+    # gra_spill_temp_6 = 28
+    # gra_spill_temp_7 = 32
+    # gra_spill_temp_8 = 36
+    # gra_spill_temp_<> = 40
+    # gra_spill_temp_<> = 44
+    # gra_spill_temp_<> = 48
+    # gra_spill_temp_13 = 64
+
+ // registers:
+ // a2: const int8_t *input1_data
+ // a3: const int8_t *input2_data
+ // a4: const int32_t input1_offset
+ // a5: const int32_t input2_offset
+ // a6: int8_t *output
+ // a7: const int32_t out_offset
+
+ // on stack:
+ // 120: const int32_t out_mult
+ // 124: const int32_t out_shift
+ // 128: const int32_t activation_min
+ // 132: const int32_t activation_max
+ // 136: const int32_t size
+
+    entry   a1,120                      #
+    s32i.n  a4,a1,24                # [0]  gra_spill_temp_5, input1_offset
+    s32i.n  a5,a1,28                # [1]  gra_spill_temp_12, input2_offset
+
+    s32i.n  a3,a1,4                     # [5]  gra_spill_temp_0, input2
+    mov.n   a10,a3                      # [6]
+    l32i    a3,a1,136                   # [18]  id:361 size+0x0
+    mov.n   a9,a6                       # [2] // out_addr
+    blti    a3,1,.exit            # [0] // exit
+
+    s32i.n  a2,a1,16                    # [9]  gra_spill_temp_3, input1
+    s32i    a7,a1,40                    # [4]  id:358 out_offset+0x0
+    movi.n  a11,0                       # [3]
+    mov.n   a12,a2                      # [10]
+    s32i    a4,a1,44                # [13]  id:356 input1_offset+0x0
+    s32i    a5,a1,48                # [14]  id:357 input2_offset+0x0
+    movi.n  a2,1                    # [15]
+
+    l32i    a15,a1,124                  # [3]  id:362 out_shift+0x0
+    l32i    a13,a1,120                  # [4]  id:363 out_mult+0x0
+    s32i.n  a6,a1,8                 # [1]  gra_spill_temp_1, out_addr
+    max     a14,a15,a11                 # [11] left_shift
+    sub     a4,a14,a15              # right_shift
+    s32i.n  a4,a1,20                # [9]  gra_spill_temp_4
+
+    blti    a3,8,.process_leftover             # [20]
+
+    // skip to leftover routine if inputs are unaligned
+    or          a6,a12,a10
+    extui       a6,a6,0,4
+    bnez        a6,.process_leftover
+
+    // `size > 8`, s3 optimisation path...
+    ee.zero.q   q1                      # [0]
+    addi    a4,a1,44                # [7]
+    addi    a8,a1,48                    # [8]
+    ee.vldbc.16 q0,a4               # [17]  id:359 input1_offset
+    ee.vldbc.16 q7,a8               # [16]  id:360 input2_offset
+    l32r    a4,.LC0_26_123              # [12]
+    movi    a8, 8
+    st.qr   q0,a1,64                    # [19]  gra_spill_temp_13
+    s32i.n  a8,a1,12                # [6]  gra_spill_temp_2
+
+.Lt_0_7682: # 0x60
+    s32i            a9,a1,36                    # [1]  gra_spill_temp_8, out_addr
+    ld.qr           q4,a1,64                    # [2]  gra_spill_temp_13, input1_offset
+    ee.vld.l.64.ip  q2,a12,8        # [4]  id:367, input1_ptr
+    movi.n          a7,16                   # [3]
+    ee.vld.h.64.ip  q2,a10,8        # [5]  id:368, input2_ptr
+    wsr.sar         a7                      # [6]
+    ee.vcmp.lt.s8   q5,q2,q1            # [7]
+    ee.vzip.8       q2,q5               # [8]
+    ee.vadds.s16    q5,q5,q7            # [9] input2_offset
+    ee.vadds.s16    q4,q2,q4            # [10] input1_offset
+    ee.vmul.s16     q3,q4,q5            # [11]
+    wsr.sar         a11                         # [12]
+    ee.vmul.s16     q2,q4,q5            # [13]
+
+    wsr.sar         a14                     # [14] left_shift
+    ee.vzip.16      q2,q3               # [15]
+    ee.vsl.32       q6,q2                   # [16] left_shift
+    ssai            31                          # [17]
+
+    ee.movi.32.a    q6,a3,2             # [18]
+    ee.movi.32.a    q6,a8,3             # [26]
+
+    mulsh           a6,a13,a3                   # [19]
+    mull            a3,a13,a3                   # [20]
+    mulsh           a7,a13,a8                   # [27]
+    add.n           a3,a4,a3                    # [22]
+    saltu           a2,a3,a4                    # [23]
+    add.n           a2,a2,a6                    # [24]
+    src             a2,a2,a3                    # [25]
+
+    mull            a6,a13,a8                   # [28]
+    add.n           a6,a4,a6                    # [30]
+    saltu           a9,a6,a4                    # [31]
+    add.n           a9,a9,a7                    # [32]
+    src             a9,a9,a6                    # [33]
+    ee.movi.32.q    q2,a2,2             # [53]
+    ee.movi.32.q    q2,a9,3             # [54]
+
+    ee.movi.32.a    q6,a6,1             # [34]
+    mulsh           a7,a13,a6                   # [35]
+    mull            a6,a13,a6                   # [36]
+    add.n           a6,a4,a6                    # [38]
+    saltu           a3,a6,a4                    # [39]
+    add.n           a3,a3,a7                    # [16]
+    src             a3,a3,a6                    # [41]
+    ee.movi.32.a    q6,a2,0             # [42]
+    mulsh           a8,a13,a2                   # [43]
+    mull            a7,a13,a2                   # [4]
+    add.n           a7,a4,a7                    # [46]
+    saltu           a6,a7,a4                    # [47]
+    add.n           a6,a6,a8                    # [24]
+    src             a6,a6,a7                    # [49]
+    ee.movi.32.q    q2,a3,1             # [28]
+    ee.movi.32.q    q2,a6,0             # [50]
+
+    wsr.sar         a14                     # [10]
+    ee.vsl.32       q4,q3                   # [11]
+    ee.movi.32.a    q4,a2,2             # [13]
+    mulsh           a3,a13,a2                   # [14]
+    mull            a2,a13,a2                   # [15]
+    ssai            31                          # [12]
+    add.n           a2,a4,a2                    # [17]
+    saltu           a5,a2,a4                # [18]
+    add.n           a5,a5,a3                # [19]
+    src             a5,a5,a2                    # [20]
+    ee.movi.32.a    q4,a3,3             # [21]
+    mulsh           a6,a13,a3                   # [22]
+    mull            a3,a13,a3                   # [23]
+    add.n           a3,a4,a3                    # [25]
+    saltu           a8,a3,a4                    # [26]
+    add.n           a8,a8,a6                    # [27]
+    src             a8,a8,a3                    # [28]
+    ee.movi.32.q    q0,a5,2             # [24]
+    ee.movi.32.q    q0,a8,3             # [51]
+
+    ee.movi.32.a    q4,a7,1             # [29]
+    mulsh           a6,a13,a7                   # [30]
+    mull            a3,a13,a7                   # [31]
+    add.n           a3,a4,a3                    # [33]
+    saltu           a2,a3,a4                    # [34]
+    add.n           a2,a2,a6                    # [35]
+    src             a2,a2,a3                    # [36]
+    ee.movi.32.a    q4,a6,0             # [37]
+    mulsh           a7,a13,a6                   # [38]
+    mull            a6,a13,a6                   # [39]
+    add.n           a6,a4,a6                    # [41]
+    saltu           a3,a6,a4                    # [42]
+    add.n           a3,a3,a7                    # [43]
+    src             a3,a3,a6                    # [4]
+    ee.movi.32.q    q0,a2,1             # [47]
+    ee.movi.32.q    q0,a3,0             # [46]
+
+    l32i.n          a5,a1,20                # [0]  gra_spill_temp_4, right_shift
+    movi.n          a7,1                    # [51]
+
+    blti            a5,1,.skip_div_by_pow_of_2
+// divide by power of 2
+    ee.vcmp.lt.s32  q5,q2,q1        # [56]
+    ee.vcmp.lt.s32  q6,q0,q1        # [28]
+
+    addi.n          a8,a5,-1                # [1]
+    ssl             a8                          # [2]
+    sll             a7,a7                       # [3]
+    s32i.n          a7,a1,0                 # [4]  to_add
+    ee.vldbc.32     q4,a1               # [5]  id:376 to_add
+
+    wsr.sar         a5                      # [6]
+    ee.vadds.s32    q5,q4,q5            # [7]
+    ee.vadds.s32    q5,q2,q5            # [8]
+    ee.vsr.32       q2,q5                   # [9]
+
+    wsr.sar         a5                      # [5]
+    ee.vadds.s32    q5,q4,q6            # [9]
+    ee.vadds.s32    q5,q0,q5            # [11]
+    ee.vsr.32       q0,q5                   # [12]
+.skip_div_by_pow_of_2:
+
+// add offset, apply activation
+    addi            a8,a1,132                   # [54]
+    ee.vldbc.32     q4,a8               # [55]  id:385 activation_max
+    addi            a5,a1,40                    # [8]
+    ee.vldbc.32     q6,a5               # [10]  id:384 out_offset
+    addi            a7,a1,128                   # [4]
+    ee.vadds.s32    q0,q0,q6            # [13] // add out_offset
+    ee.vadds.s32    q2,q2,q6            # [14] // add out_offset
+    ee.vldbc.32     q6,a7               # [16]  id:386 activation_min
+    ee.vmin.s32     q0,q0,q4            # [17]
+    ee.vmin.s32     q2,q2,q4            # [15]
+    ee.vmax.s32     q0,q0,q6            # [18]
+    ee.vmax.s32     q2,q2,q6            # [19]
+
+// pack and store
+    ee.vunzip.16    q2,q0               # [20]
+    ee.vunzip.8     q2,q0               # [21]
+    l32i.n          a7,a1,12 // count
+    l32i            a9,a1,36                    # [55]  gra_spill_temp_8
+    l32i.n          a3,a1,136               # [1] , size
+    ee.vst.l.64.ip  q2,a9,8         # [22]  id:387
+    addi            a7,a7,8
+    s32i.n          a7,a1,12 // increment count
+    bge             a3,a7,.Lt_0_7682
+
+    addi            a11,a7,-8
+    bge             a11,a3,.exit  # [3] // exit
+
+.process_leftover:
+    sub     a8,a3,a11                   # [1]
+    loopgtz a8,.LBB33_esp_nn_mul_elementwise_s8_esp32s3     # [9]
+
+    ssl     a14                         # [0] left_shift
+    l32i.n  a8,a1,24                # [1]  gra_spill_temp_5, input1_offset
+    l32i.n  a10,a1,4                # [2]  gra_spill_temp_0, input2
+    l32i.n  a12,a1,16               # [3]  gra_spill_temp_3, input1
+    add.n   a10,a11,a10                 # [4], input2
+    add.n   a12,a11,a12                 # [5], input1
+    l8ui    a12,a12,0                   # [6]  id:390
+    l8ui    a10,a10,0                   # [7]  id:391
+    sext    a12,a12,7                   # [8]
+    add.n   a12,a12,a8                  # [9]
+    l32i.n  a8,a1,28                # [10]  gra_spill_temp_12, input2_offset
+    sext    a10,a10,7                   # [11]
+    add.n   a10,a10,a8                  # [12]
+    mull    a10,a12,a10                 # [13] // multiplication result
+
+// multiply by quantised mult
+    l32i.n  a9,a1,20                # [0]  gra_spill_temp_4, load right_shift
+
+    sll     a10,a10                     # [15] // left shift
+
+    mulsh   a3,a10,a13                  # [1]
+    mull    a8,a10,a13                  # [6]
+    ssai    31                          # [0]
+    add.n   a6,a8,a4                    # [8]
+    saltu   a8,a6,a8                    # [9]
+    add.n   a8,a8,a3                    # [10]
+    src     a3,a8,a6                    # [19] // result
+
+    blti    a9, 1, .skip_div_by_pow_of_2_remains
+// divide by power of 2
+    // calculate to_add = `1 << (exponent - 1)`
+    addi    a6,a9,-1
+    ssl     a6                          # [23]
+    movi    a7,1
+    sll     a7,a7                       // to_add
+
+    extui   a8,a3,31,1                  # [24], sign
+    add     a3,a3,a8            // add sign
+    add     a3,a3,a7            // add to_add
+
+    ssr     a9                          # [20] load right_shift
+    sra     a3,a3               // right shift
+.skip_div_by_pow_of_2_remains:
+
+    l32i.n  a6,a1,40                    # [32], out_offset
+    l32i.n  a8,a1,132                   # [35], act_max
+    l32i.n  a7,a1,128                   # [36], act_min
+
+// add offset and apply activation
+    add.n   a3,a3,a6                    # [34], offset added
+    min     a8,a8,a3                    # [37]
+    l32i.n  a3,a1,8                 # [38]  gra_spill_temp_1, load base out_addr
+    max     a8,a8,a7                    # [39]
+
+// store
+    add.n   a3,a11,a3                   # [16], add index from `a11`
+    s8i     a8,a3,0                     # [41]  id:392 // store
+    addi.n  a11,a11,1               # [42]  // inc index
+
+.LBB33_esp_nn_mul_elementwise_s8_esp32s3:   # 0x2ed
+.exit:
+    retw.n                          # [0]
+
+    .size   esp_nn_mul_elementwise_s8_esp32s3, . - esp_nn_mul_elementwise_s8_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/common_functions.h b/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/common_functions.h
index 0a74eca..1158e9b 100644
--- a/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/common_functions.h
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/common_functions.h
@@ -180,7 +180,7 @@ static void esp_nn_aligned_s8_pad_with_value(const int8_t *src, int8_t *dst,
 {
     /* memset with pad_val */
     memset(dst, pad_val, ((input_wd + 2 * pad_wd) * (input_ht + 2 * pad_ht)) * channels);
-    dst += (pad_wd + input_wd + pad_wd) * channels;
+    dst += (pad_wd + input_wd + pad_wd) * pad_ht * channels;
 
     for (int i = 0; i < input_ht; i++) {
         dst += pad_wd * channels;
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_common_functions_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_common_functions_esp32s3.S
new file mode 100644
index 0000000..68d1086
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_common_functions_esp32s3.S
@@ -0,0 +1,266 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+	.text
+
+	# Program Unit: esp_nn_aligned_s8_to_s16_with_offset_esp32s3
+	.type	esp_nn_aligned_s8_to_s16_with_offset_esp32s3, @function
+	.align	 4
+	.global esp_nn_aligned_s8_to_s16_with_offset_esp32s3
+
+esp_nn_aligned_s8_to_s16_with_offset_esp32s3:	# 0x30d
+
+	entry	a1,48                   	#
+	mov.n	a10,a2                  	# // src
+	mov.n	a9,a3                   	# // dst
+	mov.n	a8,a4                   	# // size
+	s32i.n	a5,a1,12               	# [3] // offset
+	addi.n	a2,a1,12               	# [4]
+
+	blti	a4,32,.Lt_2_6402         	# [5] if (size < 32) goto unopt
+
+	addi.n	a6,a8,-1               	# [0]
+	ee.zero.q	q5                  	# [1]
+	ee.vldbc.16	q4,a2             	# [2]  id:136 offset
+	mov.n	a3,a10                  	# [3]
+	mov.n	a2,a9                   	# [4]
+	ee.vld.128.ip	q0,a3,16        	# [5]  id:137
+	ee.vld.128.ip	q1,a3,16        	# [6]  id:138
+	ee.vcmp.lt.s8	q2,q0,q5        	# [7]
+	ee.vzip.8	q0,q2               	# [8]
+	ee.vadds.s16	q0,q0,q4         	# [9]
+	ee.vadds.s16.st.incp	q0,a2,q0,q2,q4 	# [10]  id:139
+	blti	a4,64,.Lt_2_7170         	# [11]
+
+	addi	a5,a4,-32                	# [0]
+	srai	a5,a5,5                  	# [1]
+	slli	a4,a5,5                  	# [2]
+	loopgtz	a5,.LBB37_esp_nn_aligned_s8_to_s16_with_offset_esp32s3 	# [3]
+
+	ee.vst.128.ip	q0,a2,16        	# [0*II+0]  id:140
+	ee.vcmp.lt.s8	q0,q1,q5        	# [0*II+1]
+	ee.vzip.8	q1,q0               	# [0*II+2]
+	ee.vadds.s16.ld.incp	q2,a3,q3,q1,q4 	# [0*II+3]  id:141
+	ee.vadds.s16.st.incp	q3,a2,q0,q0,q4 	# [0*II+4]  id:142
+	ee.vcmp.lt.s8	q3,q2,q5        	# [0*II+5]
+	ee.vst.128.ip	q0,a2,16        	# [0*II+6]  id:143
+	ee.vzip.8	q2,q3               	# [0*II+7]
+	ee.vadds.s16.ld.incp	q1,a3,q0,q2,q4 	# [0*II+8]  id:144
+	ee.vadds.s16.st.incp	q0,a2,q0,q3,q4 	# [0*II+9]  id:145
+
+.LBB37_esp_nn_aligned_s8_to_s16_with_offset_esp32s3:	# 0x36d
+	addi	a4,a4,32                 	# [0]
+
+.Lt_2_3842:	# 0x370
+	ee.vst.128.ip	q0,a2,16        	# [0]  id:146
+	ee.vcmp.lt.s8	q2,q1,q5        	# [1]
+	ee.vzip.8	q1,q2               	# [2]
+	ee.vadds.s16	q2,q2,q4         	# [3]
+	ee.vadds.s16	q3,q1,q4         	# [4]
+	ee.vst.128.ip	q3,a2,16        	# [5]  id:147
+	ee.vst.128.ip	q2,a2,16        	# [6]  id:148
+	bge	a4,a6,.Lt_2_4866          	# [7]
+
+	l32i.n	a5,a1,12               	# [0]  id:135 offset+0x0
+
+.Lt_2_5122:	# 0x38a
+	mov.n	a11,a4                  	# [0]
+	add.n	a2,a4,a10               	# [1]
+ # 576          dst[i + 0] = src[i + 0] + offset;
+	l8ui	a7,a2,0                  	# [2]  id:149
+	addx2	a6,a4,a9                	# [3]
+	sext	a7,a7,7                  	# [4]
+	add.n	a7,a7,a5                	# [5]
+	s16i	a7,a6,0                  	# [6]  id:150
+ # 577          dst[i + 1] = src[i + 1] + offset;
+	l8ui	a3,a2,1                  	# [7]  id:151
+	sub	a7,a8,a4                  	# [8]
+	addi.n	a2,a2,2                	# [9]
+	srai	a7,a7,1                  	# [10]
+	sext	a3,a3,7                  	# [11]
+	add.n	a3,a3,a5                	# [12]
+	s16i	a3,a6,2                  	# [13]  id:152
+	addi.n	a3,a7,-1               	# [14]
+	loopgtz	a3,.LBB52_esp_nn_aligned_s8_to_s16_with_offset_esp32s3 	# [15]
+
+	l8ui	a3,a2,0                  	# [0*II+0]  id:149
+	addi.n	a6,a6,4                	# [1*II+1]
+	sext	a3,a3,7                  	# [0*II+2]
+	add.n	a3,a3,a5                	# [0*II+3]
+	s16i	a3,a6,0                  	# [0*II+4]  id:150
+	l8ui	a3,a2,1                  	# [0*II+5]  id:151
+	addi.n	a2,a2,2                	# [0*II+6]
+	sext	a3,a3,7                  	# [0*II+7]
+	add.n	a3,a3,a5                	# [0*II+8]
+	s16i	a3,a6,2                  	# [0*II+9]  id:152
+
+.LBB52_esp_nn_aligned_s8_to_s16_with_offset_esp32s3:	# 0x3ce
+	addx2	a4,a7,a11               	# [0]
+
+.Lt_2_4866:	# 0x3d1
+	bge	a4,a8,.Lt_2_7682          	# [0]
+
+ # 580          dst[i] = src[i] + offset;
+	addx2	a11,a4,a9               	# [0]
+	add.n	a8,a4,a10               	# [1]
+	l8ui	a8,a8,0                  	# [2]  id:153
+	l32i.n	a12,a1,12              	# [3]  id:135 offset+0x0
+	sext	a8,a8,7                  	# [4]
+	add.n	a8,a8,a12               	# [5]
+	s16i	a8,a11,0                 	# [6]  id:154
+	retw.n                        	# [7]
+
+.Lt_2_6402:	# 0x3e8
+	blti	a4,2,.Lt_2_6658          	# [0]
+
+	movi.n	a4,0                   	# [0]
+	j	.Lt_2_5122                  	# [1]
+
+.Lt_2_7682:	# 0x3f0
+	retw.n                        	# [0]
+
+.Lt_2_6658:	# 0x3f2
+	blti	a4,1,.Lt_2_7682          	# [0]
+
+	l8ui	a11,a10,0                	# [0]  id:153
+	sext	a11,a11,7                	# [2]
+	add.n	a11,a11,a5              	# [3]
+	s16i	a11,a3,0                 	# [4]  id:154
+	retw.n                        	# [5]
+
+.Lt_2_7170:	# 0x402
+	movi.n	a4,32                  	# [0]
+	j	.Lt_2_3842                  	# [1]
+
+	.size	esp_nn_aligned_s8_to_s16_with_offset_esp32s3, . - esp_nn_aligned_s8_to_s16_with_offset_esp32s3
+
+
+	.literal_position
+
+	# Program Unit: esp_nn_s8_to_s16_esp32s3
+	.type	esp_nn_s8_to_s16_esp32s3, @function
+	.align	 4
+	.global esp_nn_s8_to_s16_esp32s3
+
+esp_nn_s8_to_s16_esp32s3:	# 0x40b
+	entry	a1,32                   	#
+	mov.n	a9,a2 // src
+	mov.n	a8,a3 // dst
+	mov.n	a7,a4 // size
+    blti	a4,1,.Lt_3_4866  // size == 0
+	blti	a4,16,.Lt_3_4610 // if (size < 16) jump to unopt path
+
+ // load align_len to sar_byte
+	extui	a2,a2,0,4               	# [0]
+	wur.sar_byte	a2               	# [1]
+	mov.n	a2,a9                   	# [2]
+
+ // preload
+	ee.vld.128.ip	q0,a2,16
+	ee.vld.128.ip	q1,a2,16
+    ee.zero.q	    q4
+ # 672
+ # 673      for (i = 16; i < size - 15; i += 16) {
+	blti	a4,32,.Lt_3_5378         	# [5]
+	addi	a6,a4,-16                	# [1]
+	srai	a6,a6,4                  	# [2]
+	slli	a4,a6,4                  	# [3]
+	loopgtz	a6,.LBB35_esp_nn_s8_to_s16_esp32s3 	# [4]
+
+	ee.src.q.qup	q2,q0,q1         	# [0*II+0]
+	ee.vcmp.lt.s8	q3,q2,q4        	# [0*II+1] // sign
+	ee.vld.128.ip	q1,a2,16        	# [0*II+2] // for next iteration
+	ee.vzip.8	q2,q3               	# [0*II+3]
+	ee.vst.128.ip	q2,a3,16        	# [0*II+4]  id:93
+	ee.vst.128.ip	q3,a3,16        	# [0*II+5]  id:94
+
+.LBB35_esp_nn_s8_to_s16_esp32s3:	# 0x449
+	addi	a4,a4,16                 	# [0]
+
+.Lt_3_2050:	# 0x44c
+	ee.src.q.qup	q5,q0,q1         	# [0]
+	ee.vcmp.lt.s8	q3,q5,q4        	# [1]
+	ee.vzip.8	q5,q3               	# [2]
+	ee.vst.128.ip	q5,a3,16        	# [3]  id:96
+	ee.vst.128.ip	q3,a3,16        	# [4]  id:97
+ # 687
+ # 688  skip_to_remains_s8_to_s16:
+ # 689      for (; i < size; i += 2) {
+	bge	a4,a7,.Lt_3_4866          	# [5]
+
+.Lt_3_3330:	# 0x45e
+	mov.n	a11,a4                  	# [0]
+	add.n	a2,a4,a9                	# [1]
+ # 690          dst[i + 0] = src[i + 0];
+	l8ui	a10,a2,0                 	# [2]  id:98
+	addx2	a5,a4,a8                	# [3]
+	sext	a10,a10,7                	# [4]
+	s16i	a10,a5,0                 	# [5]  id:99
+ # 691          dst[i + 1] = src[i + 1];
+	l8ui	a3,a2,1                  	# [6]  id:100
+	sub	a10,a7,a4                 	# [7]
+	addi.n	a2,a2,2                	# [8]
+	addi.n	a10,a10,1              	# [9]
+	srai	a10,a10,1                	# [10]
+	sext	a3,a3,7                  	# [11]
+	s16i	a3,a5,2                  	# [12]  id:101
+	addi.n	a3,a10,-1              	# [13]
+	loopgtz	a3,.LBB50_esp_nn_s8_to_s16_esp32s3 	# [14]
+
+	l8ui	a3,a2,0                  	# [0*II+0]  id:98
+	addi.n	a5,a5,4                	# [1*II+1]
+	sext	a3,a3,7                  	# [0*II+2]
+	s16i	a3,a5,0                  	# [0*II+3]  id:99
+	l8ui	a3,a2,1                  	# [0*II+4]  id:100
+	addi.n	a2,a2,2                	# [0*II+5]
+	sext	a3,a3,7                  	# [0*II+6]
+	s16i	a3,a5,2                  	# [0*II+7]  id:101
+
+.LBB50_esp_nn_s8_to_s16_esp32s3:	# 0x49c
+	addx2	a4,a10,a11              	# [0]
+ # 692      }
+ # 693      if(i < size) {
+	bge	a4,a7,.Lt_3_4866          	# [1]
+
+ # 694          dst[i] = src[i];
+	add.n	a11,a4,a9               	# [0]
+	l8ui	a11,a11,0                	# [1]  id:102
+	addx2	a12,a4,a8               	# [2]
+	sext	a11,a11,7                	# [3]
+	s16i	a11,a12,0                	# [4]  id:103
+	retw.n                        	# [5]
+
+.Lt_3_4610:	# 0x4b2
+	movi.n	a4,0                   	# [0]
+	j	.Lt_3_3330                  	# [1]
+
+.Lt_3_4866:	# 0x4ba
+	retw.n                        	# [0]
+
+.Lt_3_5378:	# 0x4bc
+	movi.n	a4,16                  	# [1]
+	j	.Lt_3_2050                  	# [2]
+
+	.size	esp_nn_s8_to_s16_esp32s3, . - esp_nn_s8_to_s16_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_multiply_by_quantized_mult_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_multiply_by_quantized_mult_esp32s3.S
new file mode 100644
index 0000000..08ff1b8
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_multiply_by_quantized_mult_esp32s3.S
@@ -0,0 +1,127 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// the macro `use_nudge` enables adding rounding factor similar to tflite implementation
+// this barely changes any accuracy
+// keep this disabled for better performance
+
+#ifndef SKIP_NUDGE
+    # set SKIP_NUDGE flag for ~20% faster (but not bit-exact) quantisation
+    .set use_nudge, 1
+#endif
+
+    .text
+    .literal_position
+    .literal    .nudge_val, 1073741824          # 1 << 30
+
+    .type   esp_nn_multiply_by_quantized_mult_asm_esp32s3, @function
+    .align   4
+    .global esp_nn_multiply_by_quantized_mult_asm_esp32s3
+
+esp_nn_multiply_by_quantized_mult_asm_esp32s3:  # 0x4
+    # to_add = 4
+
+    entry       a1,32
+    wsr.sar     a3
+    ee.zero.q   q2
+
+    bltz        a3,     .skip_left_shift
+    ee.vsl.32   q0,q0                   # [13]
+.skip_left_shift:
+
+    ssai    31                      # [15]
+
+# move data to general purpose registers
+    ee.movi.32.a    q0,a12,0            # [17]
+    ee.movi.32.a    q0,a13,1            # [16]
+    ee.movi.32.a    q0,a14,2            # [18]
+    ee.movi.32.a    q0,a15,3            # [19]
+
+.ifdef use_nudge
+    l32r            a6,.nudge_val
+.endif
+
+# perform 64 bit mult
+    mulsh   a4,a2,a12                   # [22]
+    mulsh   a11,a2,a13                  # [23]
+    mulsh   a10,a2,a14                  # [21]
+    mulsh   a8,a2,a15                   # [20]
+    mull    a12,a2,a12                  # [24]
+    mull    a13,a2,a13                  # [25]
+    mull    a14,a2,a14                  # [26]
+    mull    a15,a2,a15                  # [27]
+
+# add nudge_val and discard low31
+
+.ifdef use_nudge
+    add.n           a14,a6,a14                  # [41]
+    saltu           a2,a14,a6                   # [44]
+    add.n           a10,a10,a2                  # [45]
+
+    add.n           a13,a6,a13                  # [47]
+    saltu           a9,a13,a6                   # [50]
+    add.n           a11,a11,a9                  # [51]
+.endif
+
+    src             a10,a10,a14                     # [88]
+    src             a11,a11,a13                 # [78]
+    ee.movi.32.q    q0,a10,2
+    ee.movi.32.q    q0,a11,1
+
+.ifdef use_nudge
+    add.n           a15,a6,a15                  # [36]
+    saltu           a2,a15,a6                   # [39]
+    add.n           a8,a8,a2                    # [40]
+
+    add.n           a12,a6,a12                  # [54]
+    saltu           a10,a12,a6                  # [57]
+    add.n           a4,a4,a10                   # [58]
+.endif
+
+    src             a8,a8,a15                  # [95]
+    src             a4,a4,a12                  # [69] # discard lower 31 bits
+    ee.movi.32.q    q0,a8,3
+    ee.movi.32.q    q0,a4,0
+
+    bgez    a3, .skip_div_by_power_of_2
+
+    neg     a5,a3                       # [0]  right_shift/exponent = -shift
+    ee.vcmp.lt.s32  q2,q0,q2        # [97]
+    addi.n          a7,a5,-1                # [0]  exponent - 1
+    ssl             a7                      # [1]
+    movi.n          a6,1                    # [92]
+    sll             a6,a6                   # [2]
+    s32i.n          a6,a1,4                 # [3]  to_add
+    addi.n          a4,a1,4                 # [94]  to_add_addr
+    ee.vldbc.32     q1,a4           # [4]  id:148 to_add
+    wsr.sar         a5
+    ee.vadds.s32    q1,q1,q2
+    ee.vadds.s32    q0,q0,q1
+    ee.vsr.32       q0,q0
+
+.skip_div_by_power_of_2:
+    retw.n                          # [9]
+
+    .size   esp_nn_multiply_by_quantized_mult_asm_esp32s3, . - esp_nn_multiply_by_quantized_mult_asm_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_multiply_by_quantized_mult_ver1_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_multiply_by_quantized_mult_ver1_esp32s3.S
new file mode 100644
index 0000000..ed83816
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/common/esp_nn_multiply_by_quantized_mult_ver1_esp32s3.S
@@ -0,0 +1,163 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// quantisation version where we deal with different shifts and mults.
+
+    .set use_nudge, 1
+
+    .text
+    .literal_position
+    .literal    .LC3_19_48, 1073741824
+
+    # Program Unit: esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+    .type   esp_nn_multiply_by_quantized_mult_ver1_esp32s3, @function
+    .align   4
+    .global esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+esp_nn_multiply_by_quantized_mult_ver1_esp32s3:    # 0x1ee
+    entry       a1,32                       #
+    ee.zero.q   q3                      # [0]
+    l32i.n      a8,a3,0                 # [5]  id:200 // shift0
+    l32i.n      a7,a3,4                 # [2]  id:201 // shift1
+    l32i.n      a12,a2,0                # [3]  id:204 // mult0
+    l32i.n      a15,a2,4                # [1]  id:205 // mult1
+    movi.n      a10,0                   # [7]
+
+    max             a6,a10,a8                   # [1] // left_shift0
+    max             a5,a10,a7                   # [7] // left_shift1
+    sub             a8,a6,a8                    # [2] // right_shift0
+    sub             a7,a5,a7                    # [8] // right_shift1
+
+    ee.movi.32.a    q0,a9,0             # [4]
+    ee.movi.32.a    q0,a11,1            # [11]
+    ssl             a6                          # [3]
+    sll             a9,a9                       # [4]
+    mulsh           a4,a12,a9                   # [6]
+    mull            a12,a12,a9                  # [9]
+    ssl             a5                          # [10]
+    sll             a11,a11                         # [12]
+    mulsh           a14,a15,a11                 # [14]
+    mull            a15,a15,a11                 # [16]
+    l32r            a13,.LC3_19_48              # [23]
+
+    ee.movi.32.q    q0,a9,0             # [5]
+    ee.movi.32.q    q0,a11,1            # [15]
+
+
+    l32i.n          a6,a3,8                 # [6]  id:202 // shift2
+    l32i.n          a9,a2,8                 # [19]  id:206 // mult2
+    max             a5,a10,a6                   # [0] // left_shift2
+    sub             a6,a5,a6                    # [24] // right_shift2
+
+
+    ee.movi.32.a    q0,a11,2            # [17]
+    ssl             a5                          # [13]
+    sll             a11,a11                     # [18]
+    ee.movi.32.q    q0,a11,2            # [20]
+    mulsh           a5,a9,a11                  # [21]
+    mull            a9,a9,a11                   # [22]
+    mov             a11, a5
+
+// add nudge to result0 & result1
+    add.n           a12,a13,a12                 # [25]
+    saltu           a5,a12,a13                  # [26]
+    add.n           a15,a13,a15                 # [27]
+    add.n           a5,a5,a4                    # [28]
+    saltu           a4,a15,a13                  # [29]
+    add.n           a4,a4,a14                   # [30]
+
+
+    l32i.n          a14,a3,12               # [31]  id:203 // shift3
+    add.n           a9,a13,a9                   # [32] // add nudge low2
+    max             a10,a10,a14                 # [33]  // left_shift3
+    sub             a14,a10,a14                 # [34]  // right_shift3
+    ssl             a10                         # [35]
+    ee.movi.32.a    q0,a10,3            # [36]
+    sll             a10,a10                     # [37]
+
+// select high32 from result0 and resul1
+    ssai            31                          # [39]
+    src             a5,a5,a12                   # [40]
+    src             a4,a4,a15                   # [41]
+    movi.n          a12,1                   # [42]
+    ee.movi.32.q    q0,a5,0             # [43]
+    saltu           a15,a9,a13                  # [44]
+    add.n           a15,a15,a11                 # [45]
+    ee.movi.32.q    q0,a4,1             # [46]
+    l32i.n          a11,a2,12               # [47]  id:207 // mult3
+    src             a15,a15,a9                  # [48]
+    ee.movi.32.q    q0,a15,2            # [49]
+    mull            a9,a11,a10                  # [50]
+    mulsh           a11,a11,a10                 # [51]
+    add.n           a9,a13,a9                   # [52]
+    saltu           a13,a9,a13                  # [53]
+    add.n           a13,a13,a11                 # [54]
+    src             a13,a13,a9                  # [55]
+    ee.movi.32.q    q0,a13,3            # [57]
+
+// divide_by_power_of2_step
+    ssl             a8                          # [56]
+    sll             a9,a12                      # [58]
+    ssl             a7                          # [59]
+    addi.n          a9,a9,-1                # [60]
+    ee.movi.32.q    q2,a9,0             # [61]
+    sll             a11,a12                     # [62]
+    addi.n          a11,a11,-1              # [63]
+    ssl             a6                          # [64]
+    sll             a10,a12                     # [65]
+    ee.movi.32.q    q2,a11,1            # [66]
+    ssl             a14                         # [67]
+    addi.n          a10,a10,-1              # [68]
+    ee.movi.32.q    q2,a10,2            # [69]
+    sll             a9,a12                      # [70]
+    addi.n          a9,a9,-1                # [71]
+    ee.movi.32.q    q2,a9,3             # [74]
+    ee.andq         q1,q0,q2                # [75]
+
+    ssr             a8                          # [72]
+    sra             a5,a5                       # [73]
+    ssr             a7                          # [76]
+    sra             a4,a4                       # [78]
+    ssr             a6                          # [79]
+    sra             a15,a15                     # [81]
+    ssr             a14                         # [82]
+    sra             a13,a13                     # [84]
+    wsr.sar         a12                     # [85]
+
+    ee.movi.32.q    q7,a5,0             # [77]
+    ee.movi.32.q    q7,a4,1             # [80]
+    ee.movi.32.q    q7,a15,2            # [83]
+    ee.movi.32.q    q7,a13,3            # [86]
+
+    ee.vcmp.lt.s32  q3,q7,q3        # [87]
+    ee.vsr.32       q2,q2                   # [88]
+    ee.vsubs.s32    q2,q2,q3            # [89]
+    ee.vcmp.gt.s32  q1,q1,q2        # [90]
+    ee.vsubs.s32    q0,q7,q1            # [91]
+
+// return
+    retw.n                          # [92]
+
+    .size   esp_nn_multiply_by_quantized_mult_ver1_esp32s3, . - esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_esp32s3.c b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_esp32s3.c
index 52e20fa..1ddf4ba 100644
--- a/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_esp32s3.c
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_esp32s3.c
@@ -1,18 +1,52 @@
 #include "edge-impulse-sdk/classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN
-// Copyright 2020-2021 Espressif Systems (Shanghai) PTE LTD
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+/*
+ * SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * Optimizations strategies used:
+ * Below optimizations are capable of any size of input/filter:
+ *
+ * 1. For filter wdxht = 1x1 (Refer esp_nn_conv_s8_mult8_1x1_esp32s3 function)
+ *      - For this specific version, the strategy we employ:
+ *          > This particular filter has only the channel
+ *              dimension and we have `out_ch` number of such filters.
+ *          > We take 8 input lines at a time and transpose those.
+ *          > Keep loading and multiplying filter values one by one,
+ *              to produce 8 outputs in parallel
+ *
+ * 2. General version: (Refer esp_nn_conv_s8_filter_aligned_input_padded_esp32s3)
+ *      - For all other cases:
+ *          > Consider `filter_wd * in_ch` as a single row. These many values can
+ *              be continuosly loaded from inputs as well.
+ *          > multiply accumulate into a single filter output.
+ *          > To speed things up further, we pre-calculate
+ *              (filter * in_offset + bias term) earlier and add it at the end of filter
+ *
+ *      About ((filter * in_offset + bias term)) accumulate term:
+ *          > The conv operation before requantization is as follows:
+ *              for i in filter_size:
+ *                  conv_out += (input + input_offset) * filter;
+ *               conv_out += bias
+ *
+ *          > where input_offset is constant term hence, we can see that
+ *              this term can be precalculated as:
+ *                  for i in filter_size:
+ *                      acc_term += input_offset * filter[i];
+ *                  acc_term += bias
+ *              OR
+ *                   for i in filter_size:
+ *                      acc_term += filter[i]; // accumulate filter values
+ *                  acc_term = acc_term * input_offset + bias
+ *
+ *
+ * In both the above versions we align the filter if needed, pad the input with
+ *       -input_offset if needed and extend the channels to make those multiple
+ *       of 8/16 as per function needs
+ */
 
 #include <stdio.h>
 #include <edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn_defs.h>
@@ -21,324 +55,47 @@
 
 static int16_t *scratch_buffer = NULL;
 
-extern void esp_nn_conv_s8_mult8_1x1_esp32s3(const int8_t *input_data,
-                                             const uint16_t input_wd,
-                                             const uint16_t input_ht,
-                                             const uint16_t in_channels,
-                                             const int32_t input_offset,
-                                             const int8_t *filter_aligned,
-                                             const int32_t *bias,
-                                             int8_t *out_data,
-                                             const uint16_t out_wd,
-                                             const uint16_t out_ht,
-                                             const uint16_t out_channels,
-                                             const int32_t out_offset,
-                                             const int32_t *out_shift,
-                                             const int32_t *out_mult,
-                                             const int32_t activation_min,
-                                             const int32_t activation_max,
-                                             void *buffer /* scratch buffer */);
-
-extern void esp_nn_conv_s16_mult4_1x1_esp32s3(const int16_t *input_data,
-                                              const uint16_t input_wd,
-                                              const uint16_t input_ht,
-                                              const uint16_t in_channels,
-                                              const int16_t *filter_data,
-                                              const int32_t *bias,
-                                              int8_t *out_data,
-                                              const uint16_t out_wd,
-                                              const uint16_t out_ht,
-                                              const uint16_t out_channels,
-                                              const int32_t out_offset,
-                                              const int32_t *out_shift,
-                                              const int32_t *out_mult,
-                                              const int32_t activation_min,
-                                              const int32_t activation_max,
-                                              void *buffer /* scratch buffer */);
-
-extern void esp_nn_conv_s16_mult8_esp32s3(const int16_t *input_data,
-                                          const uint16_t input_wd,
-                                          const uint16_t input_ht,
-                                          const uint16_t in_channels,
-                                          const uint16_t pad_wd,
-                                          const uint16_t pad_ht,
-                                          const uint16_t stride_wd,
-                                          const uint16_t stride_ht,
-                                          const int16_t *filter_data,
-                                          const uint16_t filter_wd,
-                                          const uint16_t filter_ht,
-                                          const int32_t *bias,
-                                          int8_t *out_data,
-                                          const uint16_t out_wd,
-                                          const uint16_t out_ht,
-                                          const uint16_t out_channels,
-                                          const int32_t out_offset,
-                                          const int32_t *out_shift,
-                                          const int32_t *out_mult,
-                                          const int32_t activation_min,
-                                          const int32_t activation_max);
-
-extern void esp_nn_aligned_s8_to_s16_with_offset_esp32s3(const int8_t *src, int16_t *dst,
-                                                         const int size, const int32_t offset);
-
-extern void esp_nn_s8_to_s16_esp32s3(const int8_t *src, int16_t *dst, const int size);
-
-static void esp_nn_conv_s8_unrolled(const data_dims_t *input_dims,
-                                    const int8_t *input_data,
-                                    const data_dims_t *filter_dims,
-                                    const int8_t *filter_data,
-                                    const int32_t *bias,
-                                    const data_dims_t *output_dims,
-                                    int8_t *out_data,
-                                    const conv_params_t *conv_params,
-                                    const quant_data_t *quant_data)
-{
-    const uint16_t input_wd = input_dims->width;
-    const uint16_t input_ht = input_dims->height;
-    const uint16_t in_ch = input_dims->channels;
-    const int32_t input_offset = conv_params->in_offset;
-    const int32_t out_offset = conv_params->out_offset;
-    const uint16_t pad_wd = conv_params->padding.width;
-    const uint16_t pad_ht = conv_params->padding.height;
-    const uint16_t stride_wd = conv_params->stride.width;
-    const uint16_t stride_ht = conv_params->stride.height;
-    const uint16_t filter_wd = filter_dims->width;
-    const uint16_t filter_ht = filter_dims->height;
-    const uint16_t out_wd = output_dims->width;
-    const uint16_t out_ht = output_dims->height;
-    const uint16_t out_ch = output_dims->channels;
-    const int32_t *out_shift = quant_data->shift;
-    const int32_t *out_mult = quant_data->mult;
-    const int32_t activation_min = conv_params->activation.min;
-    const int32_t activation_max = conv_params->activation.max;
-
-    int32_t out_ch_idx, out_y, out_x, in_ch_idx, filter_y_idx, filter_x_idx;
-
-    for (out_y = 0; out_y < out_ht; out_y++) {
-        for (out_x = 0; out_x < out_wd; out_x++) {
-            for (out_ch_idx = 0; out_ch_idx < out_ch; out_ch_idx++) {
-                int32_t conv_out = 0;
-
-                const int32_t base_y = stride_ht * out_y - pad_ht;
-                const int32_t base_x = stride_wd * out_x - pad_wd;
-
-                const int32_t filter_y_start = max(0, -base_y);
-                const int32_t filter_x_start = max(0, -base_x);
-
-                const int32_t filter_y_end = min(filter_ht, input_ht - base_y);
-                const int32_t filter_x_end = min(filter_wd, input_wd - base_x);
-
-                for (filter_y_idx = filter_y_start; filter_y_idx < filter_y_end; filter_y_idx++) {
-                    for (filter_x_idx = filter_x_start; filter_x_idx < filter_x_end; filter_x_idx++) {
-                        const int32_t in_row = base_y + filter_y_idx;
-                        const int32_t in_col = base_x + filter_x_idx;
-                        int32_t input_base_offset = (in_row * input_wd + in_col) * in_ch;
-                        int32_t filter_base_offset = out_ch_idx * in_ch * filter_ht * filter_wd +
-                                                       (filter_y_idx * filter_wd + filter_x_idx) * in_ch;
-                        for (in_ch_idx = 0; in_ch_idx < in_ch; in_ch_idx++) {
-                            conv_out +=
-                                (input_data[input_base_offset + in_ch_idx] + input_offset) *
-                                filter_data[filter_base_offset + in_ch_idx];
-                        }
-                    }
-                }
-                if (bias) {
-                    conv_out += bias[out_ch_idx];
-                }
-                conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]);
-                conv_out += out_offset;
-                conv_out = max(conv_out, activation_min);
-                conv_out = min(conv_out, activation_max);
-                *out_data++ = (int8_t) conv_out;
-            }
-        }
-    }
-}
-
-static void esp_nn_conv_s8_pad_valid(const int8_t *input_data,
-                                     const uint16_t input_wd,
-                                     const uint16_t input_ht,
-                                     const uint16_t in_channels,
-                                     const int32_t input_offset,
-                                     const uint16_t stride_wd,
-                                     const uint16_t stride_ht,
-                                     const int8_t *filter_data,
-                                     const uint16_t filter_wd,
-                                     const uint16_t filter_ht,
-                                     const int32_t *bias,
-                                     int8_t *out_data,
-                                     const uint16_t out_wd,
-                                     const uint16_t out_ht,
-                                     const uint16_t out_channels,
-                                     const int32_t out_offset,
-                                     const int32_t *out_shift,
-                                     const int32_t *out_mult,
-                                     const int32_t activation_min,
-                                     const int32_t activation_max)
-{
-    int32_t out_ch_idx, out_y, out_x, in_ch_idx, filter_y_idx, filter_x_idx;
-
-    for (out_y = 0; out_y < out_ht; out_y++) {
-        for (out_x = 0; out_x < out_wd; out_x++) {
-            for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {
-                int32_t conv_out = 0;
-
-                const int32_t base_y = stride_ht * out_y;
-                const int32_t base_x = stride_wd * out_x;
-
-                for (filter_y_idx = 0; filter_y_idx < filter_ht; filter_y_idx++) {
-                    for (filter_x_idx = 0; filter_x_idx < filter_wd; filter_x_idx++) {
-                        const int32_t in_row = base_y + filter_y_idx;
-                        const int32_t in_col = base_x + filter_x_idx;
-                        int32_t input_base_offset = (in_row * input_wd + in_col) * in_channels;
-                        int32_t filter_base_offset = out_ch_idx * in_channels * filter_ht * filter_wd +
-                                                       (filter_y_idx * filter_wd + filter_x_idx) * in_channels;
-                        const int8_t *input_data_ptr = input_data + input_base_offset;
-                        const int8_t *filter_data_ptr = filter_data + filter_base_offset;
-                        for (in_ch_idx = 0; in_ch_idx < in_channels; in_ch_idx++) {
-                            conv_out += (*input_data_ptr++ + input_offset) * *filter_data_ptr++;
-                        }
-                    }
-                }
-                if (bias) {
-                    conv_out += bias[out_ch_idx];
-                }
-                conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]);
-                conv_out += out_offset;
-                conv_out = max(conv_out, activation_min);
-                conv_out = min(conv_out, activation_max);
-                *out_data++ = (int8_t) conv_out;
-            }
-        }
-    }
-}
-
-static void esp_nn_conv_s8_pad_valid_3x3(const int8_t *input_data,
-                                         const uint16_t input_wd,
-                                         const uint16_t input_ht,
-                                         const uint16_t in_channels,
-                                         const int32_t input_offset,
-                                         const uint16_t stride_wd,
-                                         const uint16_t stride_ht,
-                                         const int8_t *filter_data,
-                                         const int32_t *bias,
-                                         int8_t *out_data,
-                                         const uint16_t out_wd,
-                                         const uint16_t out_ht,
-                                         const uint16_t out_channels,
-                                         const int32_t out_offset,
-                                         const int32_t *out_shift,
-                                         const int32_t *out_mult,
-                                         const int32_t activation_min,
-                                         const int32_t activation_max)
-{
-    int32_t out_ch_idx, out_y, out_x, in_ch_idx, filter_y_idx, filter_x_idx;
-
-    for (out_y = 0; out_y < out_ht; out_y++) {
-        for (out_x = 0; out_x < out_wd; out_x++) {
-            const int32_t base_y = stride_ht * out_y;
-            const int32_t base_x = stride_wd * out_x;
-            for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {
-                int32_t conv_out = 0;
-                for (filter_y_idx = 0; filter_y_idx < 3; filter_y_idx++) {
-                    for (filter_x_idx = 0; filter_x_idx < 3; filter_x_idx++) {
-                        const int32_t in_row = base_y + filter_y_idx;
-                        const int32_t in_col = base_x + filter_x_idx;
-                        int32_t input_base_offset = (in_row * input_wd + in_col) * in_channels;
-                        int32_t filter_base_offset = out_ch_idx * in_channels * 3 * 3 +
-                                                       (filter_y_idx * 3 + filter_x_idx) * in_channels;
-                        const int8_t *input_data_ptr = input_data + input_base_offset;
-                        const int8_t *filter_data_ptr = filter_data + filter_base_offset;
-                        for (in_ch_idx = 0; in_ch_idx < in_channels; in_ch_idx++) {
-                            conv_out += (*input_data_ptr++ + input_offset) * *filter_data_ptr++;
-                        }
-                    }
-                }
-                if (bias) {
-                    conv_out += bias[out_ch_idx];
-                }
-                conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]);
-                conv_out += out_offset;
-                conv_out = max(conv_out, activation_min);
-                conv_out = min(conv_out, activation_max);
-                *out_data++ = (int8_t) conv_out;
-            }
-        }
-    }
-}
-
-static void esp_nn_conv_s8_pad_valid_ch3_3x3(const int8_t *input_data,
-                                             const uint16_t input_wd,
-                                             const uint16_t input_ht,
-                                             const int32_t input_offset,
-                                             const uint16_t stride_wd,
-                                             const uint16_t stride_ht,
-                                             const int8_t *filter_data,
-                                             const int32_t *bias,
-                                             int8_t *out_data,
-                                             const uint16_t out_wd,
-                                             const uint16_t out_ht,
-                                             const uint16_t out_channels,
-                                             const int32_t out_offset,
-                                             const int32_t *out_shift,
-                                             const int32_t *out_mult,
-                                             const int32_t activation_min,
-                                             const int32_t activation_max)
-{
-    int32_t out_ch_idx, out_y, out_x, filter_y_idx;
-
-    /* use scratch_buffer to pre-compute offset factor */
-    int16_t *filter_sum = (int16_t *) scratch_buffer;
-    const int8_t *filter_ptr = filter_data;
-    for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {
-        int16_t sum_val = 0;
-        for (int i = 0; i < 9; i++) {
-            sum_val += *filter_ptr++;
-            sum_val += *filter_ptr++;
-            sum_val += *filter_ptr++;
-        }
-        *filter_sum++ = sum_val;
-    }
-
-    for (out_y = 0; out_y < out_ht; out_y++) {
-        for (out_x = 0; out_x < out_wd; out_x++) {
-            const int8_t *filter_data_ptr = filter_data;
-            const int32_t base_y = stride_ht * out_y;
-            const int32_t base_x = stride_wd * out_x;
-            const int8_t *input_base_ptr = input_data + (base_y * input_wd + base_x) * 3;
-            int16_t *filter_sum = (int16_t *) scratch_buffer;
-            for (out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {
-                int32_t conv_out = 0;
-
-                for (filter_y_idx = 0; filter_y_idx < 3; filter_y_idx++) {
-                    const int8_t *input_data_ptr = input_base_ptr + (filter_y_idx * input_wd) * 3;
-                    conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
-                    conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
-                    conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
-
-                    conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
-                    conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
-                    conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
-
-                    conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
-                    conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
-                    conv_out += (*input_data_ptr++) * (*filter_data_ptr++);
-                }
-
-                conv_out += *filter_sum++ * input_offset;
-
-                if (bias) {
-                    conv_out += bias[out_ch_idx];
-                }
-                conv_out = esp_nn_multiply_by_quantized_mult_fast(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]);
-                conv_out += out_offset;
-                conv_out = max(conv_out, activation_min);
-                conv_out = min(conv_out, activation_max);
-                *out_data++ = (int8_t) conv_out;
-            }
-        }
-    }
-}
+extern void esp_nn_conv_s8_mult8_1x1_esp32s3(
+                const int8_t *input_data,
+                const uint16_t input_wd,
+                const uint16_t input_ht,
+                const uint16_t in_channels,
+                const int32_t input_offset,
+                const int8_t *filter_aligned,
+                const int32_t *bias,
+                int8_t *out_data,
+                const uint16_t out_wd,
+                const uint16_t out_ht,
+                const uint16_t out_channels,
+                const int32_t out_offset,
+                const int32_t *out_shift,
+                const int32_t *out_mult,
+                const int32_t activation_min,
+                const int32_t activation_max,
+                void *buffer /* scratch buffer */);
+
+extern void esp_nn_conv_s8_filter_aligned_input_padded_esp32s3(
+                const int8_t *input_data,
+                const uint16_t input_wd,
+                const uint16_t input_ht,
+                const uint16_t in_channels,
+                const int32_t input_offset,
+                const uint16_t stride_wd,
+                const uint16_t stride_ht,
+                const int8_t *filter_data,
+                const uint16_t filter_wd,
+                const uint16_t filter_ht,
+                const int32_t *bias,
+                int8_t *out_data,
+                const uint16_t out_wd,
+                const uint16_t out_ht,
+                const uint16_t out_channels,
+                const int32_t out_offset,
+                const int32_t *out_shift,
+                const int32_t *out_mult,
+                const int32_t activation_min,
+                const int32_t activation_max,
+                void *scratch_buffer);
 
 int esp_nn_get_conv_scratch_size_esp32s3(const data_dims_t *input_dims,
                                          const data_dims_t *filter_dims,
@@ -356,19 +113,37 @@ int esp_nn_get_conv_scratch_size_esp32s3(const data_dims_t *input_dims,
     const uint16_t stride_wd = conv_params->stride.width;
     const uint16_t stride_ht = conv_params->stride.height;
 
-    int filter_size = filter_wd * filter_ht * in_ch * out_ch;
-    int input_size = input_wd * input_ht * in_ch;
+    int new_channels = (in_ch + 7) & ~7;
+
+    int input_scratch = input_wd * input_ht * in_ch;
+    int filter_scratch = filter_wd * filter_ht * in_ch * out_ch;
 
-    int transpose_buf_size = 2 * (8 * in_ch); /* to store intermediate data */
-    if (input_wd * input_ht < 8) {
-        transpose_buf_size = 0; // not using this for leftover
-    }
     int align_buf_size = 32; /* extra buffer for alignment */
-    if (in_ch % 8 == 0 && filter_wd == 1 && filter_ht == 1 &&
-            pad_wd == 0 && pad_ht == 0 && stride_wd == 1 && stride_ht == 1) {
-        return filter_size + transpose_buf_size + align_buf_size;
+    if ((filter_wd == 1 && filter_ht == 1 && pad_wd == 0 && pad_ht == 0) &&
+            (stride_wd == 1 && stride_ht == 1)) {
+        int transpose_buf_size = 2 * (8 * new_channels); /* to store intermediate data */
+        if (input_wd * input_ht < 8) {
+            transpose_buf_size = 0; // not using this for leftover
+        }
+        if (in_ch % 8) {
+            input_scratch = input_wd * input_ht * new_channels;
+        } else {
+            input_scratch = 0;
+        }
+        filter_scratch = new_channels * out_ch;
+        return input_scratch + filter_scratch + transpose_buf_size + align_buf_size;
+    } else {
+        new_channels = (in_ch + 15) & ~15;
+        if (pad_wd == 0 && pad_ht == 0) {
+            input_scratch = 0;
+        } else {
+            input_scratch = (input_wd + 2 * pad_wd) * (input_ht + 2 * pad_ht) * in_ch;
+        }
+        filter_scratch = filter_wd * filter_ht * new_channels * out_ch;
+        int offset_acc_scratch = out_ch * 4;
+        return input_scratch + filter_scratch + align_buf_size + offset_acc_scratch;
     }
-    return 2 * (filter_size + input_size) +  transpose_buf_size + align_buf_size;
+    return align_buf_size;
 }
 
 void esp_nn_set_conv_scratch_buf_esp32s3(void *buf)
@@ -386,6 +161,10 @@ void esp_nn_conv_s8_esp32s3(const data_dims_t *input_dims,
                             const conv_params_t *conv_params,
                             const quant_data_t *quant_data)
 {
+    if (scratch_buffer == NULL) {
+        printf("esp_nn_conv error! scratch_buffer not set!\n");
+        return;
+    }
     const uint16_t input_wd = input_dims->width;
     const uint16_t input_ht = input_dims->height;
     const uint16_t channels = input_dims->channels;
@@ -406,61 +185,88 @@ void esp_nn_conv_s8_esp32s3(const data_dims_t *input_dims,
     const int32_t activation_max = conv_params->activation.max;
 
     int filter_size = filter_wd * filter_ht * channels * out_channels;
-    int input_size = input_wd * input_ht * channels;
-    int align_len = 16 - (filter_size & 15);
-    int16_t *filter_data16 = scratch_buffer;
-    int16_t *input_data16 = scratch_buffer + filter_size + align_len;
 
-    if (scratch_buffer == NULL) {
-        printf("esp_nn_conv error! scratch_buffer not set!\n");
-        return;
-    }
+    if (filter_wd == 1 && filter_ht == 1 && pad_wd == 0 && pad_ht == 0 &&
+            stride_wd == 1 && stride_ht == 1) {
 
-    if (channels % 8 == 0 && filter_wd == 1 && filter_ht == 1 &&
-            pad_wd == 0 && pad_ht == 0 && stride_wd == 1 && stride_ht == 1) {
+        int8_t *input_aligned = (int8_t *) input;
+        int8_t *scratch_buf = (int8_t *) scratch_buffer;
         int8_t *filter_aligned = (int8_t *) scratch_buffer;
-        int scratch_offset = (int) (filter_aligned + filter_size);
-        void *scratch_buf = (void *) (scratch_offset + 16 - (scratch_offset & 15));
-        memcpy(filter_aligned, filter_data, filter_size); // copy to aligned address
-        esp_nn_conv_s8_mult8_1x1_esp32s3(
-            input, input_wd, input_ht, channels, input_offset, filter_aligned,
-            bias, out_data, out_wd, out_ht, out_channels, out_offset,
-            out_shift, out_mult, activation_min, activation_max, scratch_buf);
-    } else if (channels % 4 == 0 && filter_wd == 1 && filter_ht == 1 &&
-            (input_wd * input_ht) % 4 == 0 && /* TODO: remove this check */
-            pad_wd == 0 && pad_ht == 0 && stride_wd == 1 && stride_ht == 1) {
-        int scratch_offset = (int) (input_data16 + input_size);
-        void *scratch_buf = (void *) (scratch_offset + 16 - (scratch_offset & 15));
-        esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size);
-        esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input, input_data16, input_size, input_offset);
-        esp_nn_conv_s16_mult4_1x1_esp32s3(
-            input_data16, input_wd, input_ht, channels, filter_data16,
-            bias, out_data, out_wd, out_ht, out_channels, out_offset,
-            out_shift, out_mult, activation_min, activation_max, scratch_buf);
-    } else if (channels % 8 == 0) {
-        esp_nn_s8_to_s16_esp32s3(filter_data, filter_data16, filter_size);
-        esp_nn_aligned_s8_to_s16_with_offset_esp32s3(input, input_data16, input_size, input_offset);
-        esp_nn_conv_s16_mult8_esp32s3(
-            input_data16, input_wd, input_ht, channels, pad_wd, pad_ht,
-            stride_wd, stride_ht, filter_data16, filter_wd, filter_ht, bias,
-            out_data, out_wd, out_ht, out_channels, out_offset, out_shift,
-            out_mult, activation_min, activation_max);
-    } else if (pad_wd == 0 && pad_ht == 0) {
-        if (filter_wd == 3 && filter_ht == 3 && channels == 3) {
-            esp_nn_conv_s8_pad_valid_ch3_3x3(input, input_wd, input_ht, input_offset,
-                                             stride_wd, stride_ht, filter_data, bias,
-                                             out_data, out_wd, out_ht, out_channels, out_offset,
-                                             out_shift, out_mult, activation_min, activation_max);
+        int new_channels = channels;
+        if (channels % 8 == 0) {
+            if ((int) filter_data & 7) { // if the filter_data is not aligned to 8 bytes
+                int scratch_offset = (int) (filter_aligned + filter_size);
+                scratch_buf = (int8_t *) (scratch_offset + 16 - (scratch_offset & 15));
+                memcpy(filter_aligned, filter_data, filter_size); // copy to aligned address
+            } else {
+                filter_aligned = (int8_t *) filter_data;
+            }
         } else {
-            esp_nn_conv_s8_pad_valid(input, input_wd, input_ht, channels, input_offset,
-                                     stride_wd, stride_ht, filter_data, filter_wd, filter_ht, bias,
-                                     out_data, out_wd, out_ht, out_channels, out_offset, out_shift,
-                                     out_mult, activation_min, activation_max);
+            // pad extra channel to make it multiple of 8. Both input and filter
+            new_channels = (channels + 7) & ~7;
+            for (int out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {
+                memcpy(filter_aligned, filter_data, channels);
+                memset(filter_aligned + channels, 0, new_channels - channels);
+                filter_aligned += new_channels;
+                filter_data += channels;
+            }
+            filter_aligned = (int8_t *) scratch_buffer;
+            int filter_data_size = new_channels * out_channels;
+            input_aligned = filter_aligned + filter_data_size;
+            for (int input_idx = 0; input_idx < input_ht * input_wd; input_idx++) {
+                memcpy(input_aligned, input, channels);
+                memset(input_aligned + channels, 0, new_channels - channels);
+                input_aligned += new_channels;
+                input += channels;
+            }
+            input_aligned = filter_aligned + filter_data_size;
+            scratch_buf = input_aligned +  input_ht * input_wd * new_channels;
         }
+        esp_nn_conv_s8_mult8_1x1_esp32s3(
+            input_aligned, input_wd, input_ht, new_channels, input_offset,
+            filter_aligned, bias, out_data, out_wd, out_ht, out_channels, out_offset,
+            out_shift, out_mult, activation_min, activation_max, scratch_buf);
     } else {
-        /* Basic unrolled version */
-        esp_nn_conv_s8_unrolled(input_dims, input, filter_dims, filter_data,
-                                bias, output_dims, out_data, conv_params, quant_data);
+        // align the `filter width * channels` to 16 bytes. Do zero padding for the same
+        int32_t filter_row_size = filter_wd * channels;
+        int32_t filter_alignment_padding = 16 - (filter_row_size & 15);
+        int8_t *filter_data_aligned = (int8_t *) filter_data;
+        int8_t *input_padded = (int8_t *) input;
+        int8_t *scratch_data = (int8_t *) scratch_buffer;
+        int new_input_wd = input_wd, new_input_ht = input_ht;
+        if (filter_alignment_padding != 16) {
+            // pad filter_data
+            int32_t new_row_size = filter_wd * channels + filter_alignment_padding;
+            filter_data_aligned = scratch_data;
+            int8_t *row_ptr = filter_data_aligned;
+            for (int32_t ch_idx = 0; ch_idx < out_channels; ch_idx++) {
+                for (int32_t row_idx = 0; row_idx < filter_ht; row_idx++) {
+                    memcpy(row_ptr, filter_data, filter_row_size);
+                    memset(row_ptr + filter_row_size, 0, new_row_size - filter_row_size);
+                    filter_data += filter_row_size;
+                    row_ptr += new_row_size;
+                }
+            }
+            scratch_data += new_row_size * filter_ht * out_channels;
+            filter_row_size = new_row_size;
+        } else if ( (int) filter_data & 15) {
+            filter_data_aligned = scratch_data;
+            memcpy(filter_data_aligned, filter_data, filter_size);
+            scratch_data += filter_size;
+        }
+        if (pad_wd != 0 || pad_ht != 0) { // need padding
+            input_padded = (int8_t *) scratch_data;
+            esp_nn_aligned_s8_pad_with_value(input, input_padded, input_wd, input_ht, channels,
+                                            -input_offset, pad_wd, pad_ht);
+            new_input_wd = input_wd + 2 * pad_wd;
+            new_input_ht = input_ht + 2 * pad_ht;
+            scratch_data += new_input_wd * new_input_ht * channels;
+        }
+        esp_nn_conv_s8_filter_aligned_input_padded_esp32s3(
+            input_padded, new_input_wd, new_input_ht, channels, input_offset,
+            stride_wd, stride_ht, filter_data_aligned, filter_wd, filter_ht,
+            bias, out_data, out_wd, out_ht, out_channels, out_offset,
+            out_shift, out_mult, activation_min, activation_max, scratch_data);
     }
 }
 
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s16_mult4_1x1_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s16_mult4_1x1_esp32s3.S
new file mode 100644
index 0000000..50c00cc
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s16_mult4_1x1_esp32s3.S
@@ -0,0 +1,358 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+    .text
+    .literal_position
+    .literal    .nudge_val, 1073741824
+
+    # Program Unit: esp_nn_conv_s16_mult4_1x1_esp32s3
+    .type   esp_nn_conv_s16_mult4_1x1_esp32s3, @function
+    .align   4
+    .global esp_nn_conv_s16_mult4_1x1_esp32s3
+esp_nn_conv_s16_mult4_1x1_esp32s3:  # 0xa62
+    # scratch_buf = 0
+    # to_add = 32
+    # gra_spill_temp_139 = 36
+    # gra_spill_temp_140 = 40
+    # gra_spill_temp_141 = 44
+    # gra_spill_temp_155 = 48
+    # gra_spill_temp_156 = 52
+    # gra_spill_temp_144 = 56
+    # gra_spill_temp_145 = 60
+    # gra_spill_temp_146 = 64
+    # gra_spill_temp_147 = 68
+    # gra_spill_temp_148 = 72
+    # gra_spill_temp_149 = 76
+    # gra_spill_temp_150 = 80
+    # gra_spill_temp_151 = 84
+    # gra_spill_temp_152 = 88
+    # gra_spill_temp_153 = 92
+    # lgra_spill_temp_165 = 96
+    # lgra_spill_temp_166 = 100
+    # lgra_spill_temp_167 = 104
+    # lgra_spill_temp_168 = 108
+    # gra_spill_temp_158 = 112
+    # gra_spill_temp_159 = 116
+    # gra_spill_temp_160 = 120
+
+
+ // registers:
+ // a2: int16_t *input_data
+ // a3: uint16_t input_wd
+ // a4: uint16_t input_ht
+ // a5: uint16_t in_channels
+ // a6: int16_t *filter_data
+ // a7: int32_t *bias
+
+ // on stack:
+ // 160: int8_t *out_data
+ // 164: uint16_t out_wd
+ // 168: uint16_t out_ht
+ // 172: uint16_t out_channels
+ // 176: int32_t out_offset
+ // 180: int32_t *out_shift
+ // 184: int32_t *out_mult
+ // 188: int32_t activation_min
+ // 192: int32_t activation_max
+ // 196: *buffer /* scratch buffer */
+
+
+    entry   a1,160                      #
+    s32i.n  a2,a1,40                # [0]  gra_spill_temp_140
+    s32i    a6,a1,68                    # [1]  gra_spill_temp_147
+    s32i    a7,a1,116                   # [2]  gra_spill_temp_159
+
+    mul16u  a3,a3,a4                # [3]
+    addi    a10,a1,112                  # [4]
+    addmi   a11,a1,176                  # [5]
+    addmi   a8,a1,176                   # [6]
+    addmi   a9,a1,176                   # [7]
+    addi.n  a9,a9,12                # [8]
+    addi    a8,a8,16                    # [9]
+    ee.vldbc.32 q5,a11              # [10]  id:188 out_offset
+    ee.vldbc.32 q7,a8               # [12]  id:270 activation_max
+    ee.vldbc.32 q6,a9               # [13]  id:269 activation_min
+    blti    a3,4,.Lt_3_6402             # [14]
+
+.LBB3_esp_nn_conv_s16_mult4_1x1_esp32s3:    # 0xa90
+    l32i    a13,a1,160                  # [0]  id:280 out_data+0x0
+    srai    a8,a5,2                     # [1]
+    addi    a10,a3,-3                   # [2]
+    addi    a9,a5,-3                    # [3]
+    movi.n  a12,0                   # [4]
+    slli    a11,a5,2                    # [5]
+    slli    a15,a5,1                    # [6]
+    l16ui   a14,a1,172                  # [7]  id:271 out_channels+0x0
+    s32i.n  a15,a1,36               # [9]  gra_spill_temp_139
+    s32i.n  a11,a1,56               # [10]  gra_spill_temp_144
+    s32i    a12,a1,84                   # [11]  gra_spill_temp_151
+    s32i    a9,a1,52                   # [12]  gra_spill_temp_156
+    s32i.n  a10,a1,60               # [13]  gra_spill_temp_145
+    s32i    a8,a1,88                    # [14]  gra_spill_temp_152
+    movi.n  a10,0                   # [15]
+    l32i    a8,a1,196                   # [16]  id:281 buffer+0x0
+    slli    a11,a11,1                   # [19]
+    l32i    a15,a1,184                  # [20]  id:192 out_mult+0x0
+    s32i    a11,a1,64                   # [22]  gra_spill_temp_146
+    s32i    a8,a1,112                   # [25]  gra_spill_temp_158
+    s32i    a10,a1,92                   # [26]  gra_spill_temp_153
+    movi.n  a8,0                    # [27]
+    s32i    a10,a1,80                   # [31]  gra_spill_temp_150
+    s32i    a8,a1,76                    # [32]  gra_spill_temp_149
+    slli    a8,a14,1                    # [34]
+    addx2   a9,a14,a14                  # [35]
+    s32i    a9,a1,72                    # [36]  gra_spill_temp_148
+    s32i.n  a8,a1,44                # [37]  gra_spill_temp_141
+    addx4   a14,a14,a15                 # [38]
+    s32i    a14,a1,48                  # [39]  gra_spill_temp_155
+    j   .Lt_3_6914                      # [40]
+
+.Lt_3_8194: # 0xb00
+#<loop> Part of loop body line 305, head labeled .Lt_3_6914
+    l32i.n  a12,a1,60               # [0]  gra_spill_temp_145
+    l32i.n  a9,a1,56                # [1]  gra_spill_temp_144
+    l32i    a8,a1,76                    # [2]  gra_spill_temp_149
+    l32i    a15,a1,64                   # [3]  gra_spill_temp_146
+    l32i    a11,a1,72                   # [4]  gra_spill_temp_148
+    l32i    a14,a1,84                   # [5]  gra_spill_temp_151
+    add.n   a13,a13,a11                 # [6]
+    l32i    a11,a1,80                   # [7]  gra_spill_temp_150
+    add.n   a14,a14,a15                 # [8]
+    add.n   a8,a8,a9                    # [9]
+    s32i    a8,a1,76                    # [10]  gra_spill_temp_149
+    s32i    a14,a1,84                   # [11]  gra_spill_temp_151
+    addi.n  a11,a11,4               # [12]
+    s32i    a11,a1,80                   # [13]  gra_spill_temp_150
+    bge     a11,a12,.Lt_3_6402          # [14]
+
+.Lt_3_6914: # 0xb27
+    l32i    a12,a1,52                  # [0]  gra_spill_temp_156
+    l32i    a4,a1,112                   # [1]  gra_spill_temp_158
+    blti    a12,1,.Lt_3_7170            # [2]
+
+.LBB6_esp_nn_conv_s16_mult4_1x1_esp32s3:    # 0xb30
+    l32i    a3,a1,88                    # [0]  gra_spill_temp_152
+    l32i.n  a5,a1,40                # [1]  gra_spill_temp_140
+    l32i    a2,a1,84                    # [3]  gra_spill_temp_151
+    add.n   a2,a2,a5                    # [7]
+    l32i.n  a5,a1,36                # [9]  gra_spill_temp_139
+
+    // load and transose 4 lines of input 4xchannels,
+    loopgtz a3,.transpose_loop_end
+    mov.n   a3,a2                       # [0*II+0]
+    ee.vld.l.64.xp  q0,a3,a5        # [0*II+2]  id:282
+    ee.vld.l.64.xp  q1,a3,a5        # [0*II+3]  id:283
+    ee.vld.l.64.xp  q2,a3,a5        # [0*II+4]  id:284
+    ee.vld.l.64.xp  q3,a3,a5        # [0*II+5]  id:285
+    ee.vzip.16      q0,q1               # [0*II+6]
+    ee.vzip.16      q2,q3               # [0*II+7]
+    ee.vzip.32      q0,q2               # [0*II+8]
+    ee.vst.128.ip   q0,a4,16            # [0*II+9]  id:286
+    ee.vst.128.ip   q2,a4,16            # [0*II+10]  id:287
+    addi.n  a2,a2,8                 # [0*II+1]
+.transpose_loop_end:
+
+.Lt_3_7170: # 0xb7c
+    l32i    a2,a1,68                    # [0]  gra_spill_temp_147
+    l32i    a9,a1,116                   # [1]  gra_spill_temp_159
+    l16ui   a8,a1,172                   # [2]  out_channels
+    s32i    a9,a1,120                   # [3]  gra_spill_temp_160
+    beqz.n  a8,.Lt_3_8194           # [4]
+
+    l32i    a9,a1,180                # [0]  out_shift
+    l32i    a11,a1,184               # [1]  out_mult
+    l32i    a15,a1,72                   # [2]  gra_spill_temp_148
+    l32i.n  a14,a1,44               # [3]  gra_spill_temp_141
+    add.n   a15,a15,a13                 # [4]
+    add.n   a14,a14,a13                 # [5]
+    j   .Lt_3_8706                      # [6]
+
+.Lt_3_10754:    # 0xb9a
+
+    movi.n  a3,0                    # [0]
+
+.Lt_3_10498:    # 0xb9c
+
+// esp_nn_multiply_by_quantized_mult_esp32s3
+    ee.zero.q   q0                      # [0]
+    l32i        a5,a1,92                    # [1]  gra_spill_temp_153
+    s32i        a2,a1,96                   # [2]  lgra_spill_temp_165
+    s32i        a11,a1,104                  # [3]  lgra_spill_temp_167
+    s32i        a13,a1,108                  # [4]  lgra_spill_temp_168
+    s32i        a9,a1,100                   # [5]  lgra_spill_temp_166
+
+    movi.n          a13,0                   # [6]
+    max             a12,a12,a13                 # [7]
+    wsr.sar         a12                     # [8]
+    ee.vsl.32       q1,q1                   # [9]
+    ssai            31                          # [10]
+    ee.movi.32.a    q1,a7,0             # [11]
+    ee.movi.32.a    q1,a8,1             # [12]
+    ee.movi.32.a    q1,a6,3             # [13]
+    ee.movi.32.a    q1,a9,2             # [14]
+    mulsh           a12,a4,a9                   # [15]
+    mulsh           a11,a4,a6                   # [16]
+    mulsh           a2,a4,a8                    # [17]
+    mulsh           a13,a7,a4                   # [18]
+    mull            a8,a4,a8                    # [19]
+    mull            a7,a7,a4                    # [20]
+    mull            a6,a4,a6                    # [24]
+
+    add.n           a11,a5,a11                  # [21]
+    add.n           a12,a5,a12                  # [22]
+    add.n           a2,a5,a2                    # [23]
+    add.n           a5,a5,a13                   # [25]
+
+    l32r            a13,.nudge_val
+    mull            a9,a4,a9                    # [27]
+
+    add.n           a6,a13,a6                   # [28]
+    add.n           a9,a13,a9                   # [29]
+    add.n           a10,a13,a7                   # [30]
+    add.n           a8,a13,a8                   # [32]
+
+    saltu           a7,a10,a13                   # [33]
+    add.n           a7,a7,a5                    # [34]
+    saltu           a5,a8,a13                   # [35]
+    add.n           a5,a5,a2                    # [36]
+    src             a5,a5,a8                    # [37]
+    saltu           a2,a9,a13                   # [38]
+    add.n           a2,a2,a12                   # [40]
+    saltu           a13,a6,a13                  # [41]
+    addi.n          a12,a3,-1               # [42]
+    src             a2,a2,a9                    # [43]
+    ee.movi.32.q    q3,a5,1             # [51]
+    ee.movi.32.q    q3,a2,2             # [54]
+
+    add.n           a13,a13,a11                 # [44]
+    addi            a9,a1,32                    # [45]  to_add
+    movi.n          a11,1                   # [46]
+    src             a7,a7,a10                    # [47]
+    src             a13,a13,a6                  # [48]
+    ee.movi.32.q    q3,a7,0             # [50]
+    ee.movi.32.q    q3,a13,3            # [57]
+
+    addi            a8,a1,112                   # [49]
+
+    l32i            a7,a1,48                   # [52]  gra_spill_temp_155
+    l16ui           a5,a1,172                   # [53]  out_channels
+    ssl             a12                         # [55]
+    sll             a11,a11                     # [56]
+    wsr.sar         a3                      # [58]
+    ee.vcmp.lt.s32  q0,q3,q0        # [59]
+    l32i            a13,a1,108                  # [60]  lgra_spill_temp_168
+    s32i.n          a11,a1,32               # [61]  to_add
+    ee.vldbc.32     q1,a9               # [62]  id:317 to_add
+    add.n           a5,a5,a13                   # [63]
+    l32i            a9,a1,100                   # [64]  lgra_spill_temp_166
+    ee.vadds.s32    q1,q1,q0            # [65]
+    addi.n          a9,a9,4                 # [66]
+    ee.vadds.s32    q1,q3,q1            # [67]
+    ee.vsr.32       q1,q1                   # [69]
+
+# add offset, apply activation and store
+    ee.vadds.s32    q1,q1,q5            # [70]
+    ee.vmin.s32     q1,q1,q7            # [72]
+    ee.vmax.s32     q1,q1,q6            # [73]
+    ee.vst.128.ip   q1,a1,0             # [74]  id:320
+    l8ui        a6,a1,0                     # [75]  scratch_buf
+    s8i         a6,a13,0                    # [76]
+    addi.n      a13,a13,1               # [77]
+    l8ui        a2,a1,4                     # [78]  scratch_buf+4
+    s8i         a2,a5,0                     # [79]
+    l8ui        a12,a1,8                    # [80]  scratch_buf+8
+    l32i        a2,a1,96                   # [81]  lgra_spill_temp_165
+    s8i         a12,a14,0                   # [82]
+    addi.n      a14,a14,1               # [83]
+    l8ui        a11,a1,12                   # [84]  scratch_buf+12
+    s8i         a11,a15,0                   # [85]
+    l32i        a11,a1,104                  # [86]  lgra_spill_temp_167
+    addi.n      a15,a15,1               # [87]
+    addi.n      a11,a11,4               # [88]
+    sub         a7,a11,a7                   # [89]
+    beqz        a7,.Lt_3_8194               # [90]
+
+.Lt_3_8706: # 0xc97
+    ee.zero.qacc                    # [0]
+    l32i    a8,a1,52                   # [1]  gra_spill_temp_156
+    l32i    a3,a1,112                   # [2]  gra_spill_temp_158
+    blti    a8,1,.Lt_3_8962             # [3]
+
+    l32i    a4,a1,88                    # [0]  gra_spill_temp_152
+    loopgtz a4,.LBB53_esp_nn_conv_s16_mult4_1x1_esp32s3     # [2]
+
+    ee.vld.l.64.ip          q0,a2,8         # [0*II+0]  id:289
+    ee.vld.l.64.ip          q1,a3,8         # [0*II+1]  id:290
+    ee.vld.l.64.ip          q2,a3,8         # [0*II+2]  id:291
+    ee.vsmulas.s16.qacc     q1,q0,0     # [0*II+3]
+    ee.vld.l.64.ip          q3,a3,8         # [0*II+4]  id:292
+    ee.vsmulas.s16.qacc     q2,q0,1     # [0*II+5]
+    ee.vld.l.64.ip          q4,a3,8         # [0*II+6]  id:293
+    ee.vsmulas.s16.qacc     q3,q0,2     # [0*II+7]
+    ee.vsmulas.s16.qacc     q4,q0,3     # [0*II+8]
+
+.LBB53_esp_nn_conv_s16_mult4_1x1_esp32s3:   # 0xcc4
+
+.Lt_3_8962: # 0xcc4
+
+// extract data:
+    mov     a10,a1
+    ee.st.qacc_l.l.128.ip   a10,16      # [0]  id:298
+    ee.st.qacc_l.h.32.ip    a10,-16     # [1]  id:299
+    l8ui    a12,a1,16                   # [2]  scratch_buf+16
+    l8ui    a8,a1,6                     # [3]  scratch_buf+6
+    s8i     a8,a1,3                     # [4]  scratch_buf+3
+    s8i     a12,a1,7                    # [5]  scratch_buf+7
+    l8ui    a8,a1,15                    # [6]  scratch_buf+15
+    l8ui    a12,a1,5                    # [7]  scratch_buf+5
+    s8i     a12,a1,2                    # [8]  scratch_buf+2
+    s8i     a8,a1,6                     # [9]  scratch_buf+6
+    l16ui   a12,a1,10                   # [10]  scratch_buf+10
+    movi.n  a8,16                   # [11]
+    ee.srcmb.s16.qacc   q2,a8,0         # [12]
+    s16i                a12,a1,4                    # [13]  scratch_buf+4
+    ee.vld.l.64.ip      q1,a10,0        # [14]  id:309
+    l32i                a12,a1,116                  # [15]  gra_spill_temp_159, bias
+    ee.vzip.16          q1,q2               # [16]
+
+    beqz.n  a12,.Lt_3_9986          # [17] // skip bias
+ // add bias:
+    l32i            a8,a1,120                   # [0]  gra_spill_temp_160
+    ee.vldbc.32.ip  q0,a8,4         # [2]  id:311
+    s32i            a8,a1,120                   # [3]  gra_spill_temp_160
+    ee.vadds.s32    q1,q1,q0            # [4]
+.Lt_3_9986: # 0xd04
+
+    l32i.n  a12,a9,0                # [0]  id:313
+    l32i.n  a4,a11,0                # [1]  id:312
+    bgei    a12,1,.Lt_3_10754           # [2]
+
+    neg     a3,a12                      # [0]
+    j       .Lt_3_10498                     # [1]
+
+.Lt_3_6402: # 0xd11
+    retw.n                          # [0]
+
+    .size   esp_nn_conv_s16_mult4_1x1_esp32s3, . - esp_nn_conv_s16_mult4_1x1_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s16_mult8_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s16_mult8_esp32s3.S
new file mode 100644
index 0000000..4c49f80
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s16_mult8_esp32s3.S
@@ -0,0 +1,489 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+    .text
+    .literal_position
+    .literal    .LC10_28_153, -2147483648
+    .literal    .LC11_28_154, -1073741823
+    .literal    .LC12_28_155, 2147483647
+    .literal    .LC13_28_156, 1073741824
+
+    # Program Unit: esp_nn_conv_s16_mult8_esp32s3
+    .type   esp_nn_conv_s16_mult8_esp32s3, @function
+    .align   4
+    .global esp_nn_conv_s16_mult8_esp32s3
+esp_nn_conv_s16_mult8_esp32s3:  # 0x6e2
+    # qacc_scratch = 0
+    # gra_spill_temp_96 = 48
+    # gra_spill_temp_97 = 52
+    # gra_spill_temp_98 = 56
+    # gra_spill_temp_99 = 60
+    # gra_spill_temp_100 = 64
+    # gra_spill_temp_101 = 68
+    # gra_spill_temp_102 = 72
+    # gra_spill_temp_103 = 76
+    # gra_spill_temp_104 = 80
+    # gra_spill_temp_105 = 84
+    # gra_spill_temp_106 = 88
+    # gra_spill_temp_107 = 92
+    # gra_spill_temp_108 = 96
+    # gra_spill_temp_109 = 100
+    # gra_spill_temp_110 = 104
+    # gra_spill_temp_111 = 108
+    # gra_spill_temp_112 = 112
+    # gra_spill_temp_113 = 116
+    # gra_spill_temp_114 = 120
+    # gra_spill_temp_115 = 124
+    # gra_spill_temp_116 = 128
+    # gra_spill_temp_117 = 132
+    # gra_spill_temp_118 = 136
+    # gra_spill_temp_119 = 140
+    # gra_spill_temp_120 = 144
+    # gra_spill_temp_121 = 148
+    # gra_spill_temp_122 = 152
+    # gra_spill_temp_123 = 156
+    # gra_spill_temp_124 = 160
+    # gra_spill_temp_125 = 164
+    # gra_spill_temp_126 = 168
+    # gra_spill_temp_127 = 172
+    # gra_spill_temp_128 = 176
+    # gra_spill_temp_129 = 180
+    # gra_spill_temp_130 = 184
+    # gra_spill_temp_131 = 188
+    # gra_spill_temp_132 = 192
+    # gra_spill_temp_133 = 196
+    # gra_spill_temp_134 = 200
+    # gra_spill_temp_135 = 204
+    # gra_spill_temp_136 = 208
+    # gra_spill_temp_137 = 212
+
+ // registers:
+ // a2: const int16_t *input_data
+ // a3: const uint16_t input_wd
+ // a4: const uint16_t input_ht
+ // a5: const uint16_t in_channels
+ // a6: const uint16_t pad_wd
+ // a7: const uint16_t pad_ht
+
+ // on stack:
+ // const uint16_t stride_wd
+ // const uint16_t stride_ht
+ // const int16_t *filter_data
+ // const uint16_t filter_wd
+ // const uint16_t filter_ht
+ // const int32_t *bias
+ // int8_t *out_data
+ // const uint16_t out_wd
+ // const uint16_t out_ht
+ // const uint16_t out_channels
+ // const int32_t out_offset
+ // const int32_t *out_shift
+ // const int32_t *out_mult
+ // const int32_t activation_min
+ // const int32_t activation_max
+
+
+    entry   a1,256                      #
+    s32i    a2,a1,176                   # [0]  gra_spill_temp_128
+    s32i    a3,a1,192                   # [1]  gra_spill_temp_132
+    s32i.n  a6,a1,60                # [2]  gra_spill_temp_99
+    l16ui   a8,a1,288                   # [3]  id:282 out_ht+0x0
+    s32i    a8,a1,68                    # [4]  gra_spill_temp_101
+    beqz.n  a8,.Lt_2_11778          # [5]
+
+    s32i    a7,a1,76                    # [0]  gra_spill_temp_103
+    s32i    a1,a1,156                   # [1]  gra_spill_temp_123
+    l16ui   a8,a1,272                   # [2]  id:285 filter_ht+0x0
+    neg     a11,a7                      # [3]
+    movi.n  a12,0                   # [4]
+    neg     a14,a6                      # [5]
+    l16ui   a15,a1,268                  # [6]  id:286 filter_wd+0x0
+    l16ui   a9,a1,292                   # [7]  id:283 out_channels+0x0
+    l32i    a10,a1,304                  # [8]  id:284 out_mult+0x0
+    s32i    a10,a1,88                   # [9]  gra_spill_temp_106
+    s32i    a9,a1,96                    # [10]  gra_spill_temp_108
+    s32i    a15,a1,196                  # [11]  gra_spill_temp_133
+    s32i.n  a14,a1,48               # [12]  gra_spill_temp_96
+    s32i    a12,a1,72                   # [13]  gra_spill_temp_102
+    s32i    a11,a1,80                   # [14]  gra_spill_temp_104
+    s32i.n  a8,a1,52                # [15]  gra_spill_temp_97
+    sub     a13,a3,a14                  # [16]
+    mul16u  a8,a5,a8                # [17]
+    s32i.n  a13,a1,56               # [18]  gra_spill_temp_98
+    sub     a11,a4,a11                  # [19]
+    l32i    a12,a1,276                  # [20]  id:292 bias+0x0
+    s32i    a12,a1,152                  # [21]  gra_spill_temp_122
+    s32i    a11,a1,84                   # [22]  gra_spill_temp_105
+    l32i    a14,a1,308                  # [23]  id:290 activation_min+0x0
+    l32i    a13,a1,312                  # [24]  id:291 activation_max+0x0
+    s32i    a13,a1,144                  # [25]  gra_spill_temp_120
+    mull    a15,a15,a8                  # [26]
+    addx4   a9,a9,a10                   # [27]
+    s32i    a14,a1,140                  # [28]  gra_spill_temp_119
+    l32i    a11,a1,300                  # [29]  id:293 out_shift+0x0
+    s32i    a11,a1,92                   # [30]  gra_spill_temp_107
+    slli    a14,a5,1                    # [31]
+    s32i    a9,a1,124                   # [32]  gra_spill_temp_115
+    s32i    a15,a1,128                  # [33]  gra_spill_temp_116
+    l32i    a8,a1,280                   # [34]  id:288 out_data+0x0
+    movi.n  a10,0                   # [35]
+    s32i    a10,a1,160                  # [36]  gra_spill_temp_124
+    s32i    a8,a1,132                   # [37]  gra_spill_temp_117
+    l32i    a15,a1,296                  # [38]  id:289 out_offset+0x0
+    l32i    a9,a1,264                   # [39]  id:287 filter_data+0x0
+    s32i    a9,a1,180                   # [40]  gra_spill_temp_129
+    s32i    a15,a1,136                  # [41]  gra_spill_temp_118
+    l16ui   a8,a1,284                   # [42]  id:296 out_wd+0x0
+    l16ui   a10,a1,256                  # [43]  id:294 stride_wd+0x0
+    s32i    a10,a1,100                  # [44]  gra_spill_temp_109
+    s32i    a8,a1,104                   # [45]  gra_spill_temp_110
+    addi.n  a15,a5,-1               # [46]
+    l16ui   a9,a1,260                   # [47]  id:295 stride_ht+0x0
+    s32i    a9,a1,64                    # [48]  gra_spill_temp_100
+    srai    a15,a15,3                   # [49]
+    j   .Lt_2_12290                     # [50]
+
+.Lt_2_12546:    # 0x788
+    l32i    a8,a1,68                    # [0]  gra_spill_temp_101
+    l32i    a12,a1,80                   # [1]  gra_spill_temp_104
+    l32i    a11,a1,84                   # [2]  gra_spill_temp_105
+    l32i    a10,a1,64                   # [3]  gra_spill_temp_100
+    l32i    a13,a1,72                   # [4]  gra_spill_temp_102
+    l32i    a9,a1,76                    # [5]  gra_spill_temp_103
+    addi.n  a13,a13,1               # [6]
+    s32i    a13,a1,72                   # [7]  gra_spill_temp_102
+    sub     a9,a9,a10                   # [8]
+    sub     a11,a11,a10                 # [9]
+    add.n   a12,a12,a10                 # [10]
+    s32i    a12,a1,80                   # [11]  gra_spill_temp_104
+    s32i    a11,a1,84                   # [12]  gra_spill_temp_105
+    s32i    a9,a1,76                    # [13]  gra_spill_temp_103
+    sub     a13,a13,a8                  # [14]
+    beqz    a13,.Lt_2_11778             # [15]
+
+.Lt_2_12290:    # 0x7b6 // width loop
+    l32i    a13,a1,104                  # [0]  gra_spill_temp_110
+    beqz.n  a13,.Lt_2_12546         # [2]
+
+    l32i    a8,a1,192                   # [0]  gra_spill_temp_132
+    l32i    a9,a1,80                    # [1]  gra_spill_temp_104
+    movi.n  a11,0                   # [2]
+    l32i    a10,a1,76                   # [3]  gra_spill_temp_103
+    l32i.n  a12,a1,60               # [4]  gra_spill_temp_99
+    l32i.n  a13,a1,56               # [5]  gra_spill_temp_98
+    s32i    a13,a1,116                  # [6]  gra_spill_temp_113
+    s32i    a12,a1,112                  # [7]  gra_spill_temp_112
+    max     a10,a10,a11                 # [8]
+    s32i    a10,a1,148                  # [9]  gra_spill_temp_121
+    add.n   a9,a9,a10                   # [10]
+    l32i.n  a11,a1,48               # [11]  gra_spill_temp_96
+    s32i    a11,a1,184                  # [12]  gra_spill_temp_130
+    mull    a8,a8,a9                    # [13]
+    l32i    a10,a1,84                   # [14]  gra_spill_temp_105
+    s32i    a8,a1,120                   # [15]  gra_spill_temp_114
+    l32i.n  a9,a1,52                # [16]  gra_spill_temp_97
+    movi.n  a8,0                    # [17]
+    s32i    a8,a1,108                   # [18]  gra_spill_temp_111
+    min     a9,a9,a10                   # [19]
+    s32i    a9,a1,204                   # [20]  gra_spill_temp_135
+    j   .Lt_2_13058                     # [21]
+
+.Lt_2_13314:    # 0x7f6
+#<loop> Part of loop body line 186, head labeled .Lt_2_13058
+    l32i    a13,a1,104                  # [0]  gra_spill_temp_110
+    l32i    a11,a1,112                  # [1]  gra_spill_temp_112
+    l32i    a10,a1,184                  # [2]  gra_spill_temp_130
+    l32i    a9,a1,100                   # [3]  gra_spill_temp_109
+    l32i    a12,a1,108                  # [4]  gra_spill_temp_111
+    l32i    a8,a1,116                   # [5]  gra_spill_temp_113
+    addi.n  a12,a12,1               # [6]
+    s32i    a12,a1,108                  # [7]  gra_spill_temp_111
+    sub     a8,a8,a9                    # [8]
+    add.n   a10,a10,a9                  # [9]
+    sub     a11,a11,a9                  # [10]
+    s32i    a11,a1,112                  # [11]  gra_spill_temp_112
+    s32i    a10,a1,184                  # [12]  gra_spill_temp_130
+    s32i    a8,a1,116                   # [13]  gra_spill_temp_113
+    beq     a12,a13,.Lt_2_12546         # [14]
+
+.Lt_2_13058:    # 0x821 // channel loop
+    l32i    a12,a1,96                   # [0]  gra_spill_temp_108
+    beqz.n  a12,.Lt_2_13314         # [2]
+
+    movi.n  a11,0                   # [0]
+    l32i    a10,a1,112                  # [1]  gra_spill_temp_112
+    l32i    a13,a1,92                   # [2]  gra_spill_temp_107
+    l32i    a8,a1,152                   # [3]  gra_spill_temp_122
+    movi.n  a9,0                    # [4]
+    l32i    a12,a1,88                   # [5]  gra_spill_temp_106
+    s32i    a12,a1,168                  # [6]  gra_spill_temp_126
+    s32i    a9,a1,188                   # [7]  gra_spill_temp_131
+    s32i    a8,a1,164                   # [8]  gra_spill_temp_125
+    s32i    a13,a1,172                  # [9]  gra_spill_temp_127
+    l32i    a8,a1,116                   # [10]  gra_spill_temp_113
+    l32i    a13,a1,196                  # [11]  gra_spill_temp_133
+    max     a10,a10,a11                 # [12]
+    s32i    a10,a1,208                  # [13]  gra_spill_temp_136
+    min     a13,a13,a8                  # [14]
+    s32i    a13,a1,200                  # [15]  gra_spill_temp_134
+    j   .Lt_2_13826                     # [16]
+
+.Lt_2_14082:    # 0x857
+
+// extract data
+    l32i    a4,a1,156                   # [0]  gra_spill_temp_123
+    ee.st.qacc_l.l.128.ip   a4,16       # [2]  id:303
+    ee.st.qacc_l.h.32.ip    a4,0        # [3]  id:304
+    l8ui    a9,a1,15                    # [4]  qacc_scratch+15
+    l16ui   a8,a1,10                    # [5]  qacc_scratch+10
+    l8ui    a12,a1,16                   # [6]  qacc_scratch+16
+    l8ui    a11,a1,6                    # [7]  qacc_scratch+6
+    l8ui    a10,a1,5                    # [8]  qacc_scratch+5
+    s8i     a10,a1,2                    # [9]  qacc_scratch+2
+    s8i     a11,a1,3                    # [10]  qacc_scratch+3
+    s8i     a12,a1,7                    # [11]  qacc_scratch+7
+    s16i    a8,a1,4                     # [12]  qacc_scratch+4
+    s8i     a9,a1,6                     # [13]  qacc_scratch+6
+
+    ee.st.qacc_h.l.128.ip   a4,16       # [14]  id:314
+    ee.st.qacc_h.h.32.ip    a4,-32      # [15]  id:315
+    l8ui    a13,a1,32                   # [16]  qacc_scratch+32
+    l8ui    a9,a1,21                    # [17]  qacc_scratch+21
+    l8ui    a12,a1,31                   # [18]  qacc_scratch+31
+    l16ui   a11,a1,26                   # [19]  qacc_scratch+26
+    l8ui    a10,a1,22                   # [20]  qacc_scratch+22
+    l16ui   a8,a1,16                    # [21]  qacc_scratch+16
+    s16i    a8,a1,8                     # [22]  qacc_scratch+8
+    s8i     a10,a1,11                   # [23]  qacc_scratch+11
+    s16i    a11,a1,12                   # [24]  qacc_scratch+12
+    s8i     a12,a1,14                   # [25]  qacc_scratch+14
+    s8i     a9,a1,10                    # [26]  qacc_scratch+10
+    s8i     a13,a1,15                   # [27]  qacc_scratch+15
+
+    l32i    a9,a1,152                   # [28]  gra_spill_temp_122, bias
+    movi.n  a13,16                  # [29]
+    ee.srcmb.s16.qacc   q1,a13,0        # [30]
+    ee.vld.128.ip   q0,a4,0             # [31]  id:327
+    s32i            a4,a1,156                   # [32]  gra_spill_temp_123
+    ee.vzip.16      q0,q1               # [33]
+    ee.vadds.s32    q0,q0,q1            # [34]
+    ee.movi.32.a    q0,a12,3            # [35]
+    ee.movi.32.a    q0,a11,2            # [36]
+    ee.movi.32.a    q0,a10,0            # [37]
+    add.n           a11,a11,a12                 # [38]
+    ee.movi.32.a    q0,a12,1            # [39]
+    add.n           a10,a10,a12                 # [40]
+    add.n           a10,a10,a11                 # [41]
+
+    beqz.n  a9,.Lt_2_17154          # [42] // skip bias
+
+    l32i    a13,a1,164                  # [0]  gra_spill_temp_125
+    l32i.n  a13,a13,0               # [2]  id:329
+    add.n   a10,a10,a13                 # [4]
+.Lt_2_17154:    # 0x8d7
+
+ # 259                  conv_out = esp_nn_multiply_by_quantized_mult(conv_out, out_mult[out_ch_idx], out_shift[out_ch_idx]);
+    l32i    a11,a1,172                  # [0]  gra_spill_temp_127
+    l32i    a4,a1,168                   # [1]  gra_spill_temp_126
+    l32i.n  a11,a11,0               # [2]  id:331
+    l32i.n  a4,a4,0                 # [3]  id:330
+
+    blti    a11,1,.LBB26_esp_nn_conv_s16_mult8_esp32s3  # [4]
+    movi.n  a13,0                   # [0]
+    j       .Lt_2_17666                     # [1]
+.LBB26_esp_nn_conv_s16_mult8_esp32s3:   # 0xa4e
+    neg     a13,a11                     # [0]
+.Lt_2_17666:    # 0x8e6
+
+    movi.n  a12,0                   # [0]
+    max     a12,a11,a12                 # [1]
+    movi.n  a11,0                   # [2]
+    ssl     a12                         # [3]
+    sll     a10,a10                     # [4]
+    bne     a10,a4,.Lt_2_20994          # [5]
+
+    l32r    a9,.LC10_28_153             # [0]
+    movi.n  a8,1                    # [1]
+    sub     a9,a10,a9                   # [2]
+    moveqz  a11,a8,a9               # [3]
+
+.Lt_2_20994:    # 0x901
+    extui   a8,a4,31,1                  # [0]
+    extui   a12,a10,31,1                # [1]
+    xor     a12,a12,a8                  # [2]
+    extui   a12,a12,0,8                 # [3]
+
+    beqz.n  a12,.Lt_2_18434         # [4]
+    movi.n  a12,-1                  # [0]
+    l32r    a9,.LC11_28_154             # [1]
+    j       .Lt_2_18178                     # [2]
+
+.Lt_2_18434:    # 0xa54
+    movi.n  a12,0                   # [0]
+    l32r    a9,.LC13_28_156             # [1]
+.Lt_2_18178:    # 0x914
+
+    ssai    31                          # [0]
+    l32r    a8,.LC12_28_155             # [1]
+    mulsh   a6,a4,a10                   # [2]
+    mull    a4,a4,a10                   # [3]
+    add.n   a6,a6,a12                   # [4]
+    add.n   a7,a4,a9                    # [5]
+    saltu   a4,a7,a4                    # [6]
+    add.n   a4,a4,a6                    # [7]
+    srai    a6,a4,31                    # [8]
+    and     a6,a6,a8                    # [9]
+    add.n   a7,a6,a7                    # [10]
+    srai    a3,a6,31                    # [11]
+    add.n   a3,a3,a4                    # [12]
+    saltu   a6,a7,a6                    # [13]
+    add.n   a6,a6,a3                    # [14]
+    src     a6,a6,a7                    # [15]
+    extui   a3,a11,0,8                  # [16]
+    movi.n  a7,1                    # [17]
+    ssr     a13                         # [18]
+    movnez  a6,a8,a3                # [19]
+    sra     a8,a6                       # [20]
+
+    addi.n  a3,a8,1                 # [21]
+    ssl     a13                         # [22]
+    sll     a7,a7                       # [23]
+    extui   a4,a8,31,1                  # [24]
+    addi.n  a7,a7,-1                # [25]
+    and     a6,a6,a7                    # [26]
+    srai    a7,a7,1                     # [27]
+    add.n   a4,a4,a7                    # [28]
+    l32i    a7,a1,164                   # [29]  gra_spill_temp_125
+    salt    a4,a4,a6                    # [30]
+    movnez  a8,a3,a4                # [31]
+    l32i    a6,a1,172                   # [32]  gra_spill_temp_127
+    l32i    a4,a1,132                   # [33]  gra_spill_temp_117
+    l32i    a3,a1,160                   # [34]  gra_spill_temp_124
+    addi.n  a7,a7,4                 # [35]
+    s32i    a7,a1,164                   # [36]  gra_spill_temp_125
+    addi.n  a6,a6,4                 # [37]
+    s32i    a6,a1,172                   # [38]  gra_spill_temp_127
+    l32i    a7,a1,136                   # [39]  gra_spill_temp_118
+    l32i    a6,a1,140                   # [40]  gra_spill_temp_119
+    add.n   a4,a3,a4                    # [41]
+    add.n   a7,a7,a8                    # [42]
+    addi.n  a3,a3,1                 # [43]
+    l32i    a8,a1,128                   # [44]  gra_spill_temp_116
+    max     a6,a6,a7                    # [45]
+    s32i    a3,a1,160                   # [46]  gra_spill_temp_124
+    l32i    a7,a1,188                   # [47]  gra_spill_temp_131
+    l32i    a3,a1,144                   # [48]  gra_spill_temp_120
+    add.n   a7,a7,a8                    # [49]
+    min     a3,a3,a6                    # [50]
+    s8i     a3,a4,0                     # [51]  id:332
+    s32i    a7,a1,188                   # [52]  gra_spill_temp_131
+    l32i    a4,a1,168                   # [53]  gra_spill_temp_126
+    l32i    a6,a1,124                   # [54]  gra_spill_temp_115
+    addi.n  a4,a4,4                 # [55]
+    s32i    a4,a1,168                   # [56]  gra_spill_temp_126
+    sub     a4,a4,a6                    # [57]
+    beqz    a4,.Lt_2_13314              # [58]
+
+.Lt_2_13826:    # 0x9b4
+    ee.zero.qacc                    # [0]
+    l32i    a9,a1,204                   # [1]  gra_spill_temp_135
+    l32i    a8,a1,148                   # [2]  gra_spill_temp_121
+    s32i    a8,a1,212                   # [3]  gra_spill_temp_137
+    bge     a8,a9,.Lt_2_14082           # [4]
+
+.LBB12_esp_nn_conv_s16_mult8_esp32s3:   # 0x9c3
+#<loop> Part of loop body line 187, head labeled .Lt_2_13826
+    l32i    a8,a1,196                   # [0]  gra_spill_temp_133
+    l32i    a7,a1,212                   # [1]  gra_spill_temp_137
+    l32i    a13,a1,200                  # [2]  gra_spill_temp_134
+    mull    a7,a7,a8                    # [3]
+    l32i    a6,a1,120                   # [4]  gra_spill_temp_114
+    add.n   a13,a7,a13                  # [5]
+    j   .Lt_2_14594                     # [6]
+
+.Lt_2_14850:    # 0x9d7
+#<loop> Part of loop body line 201, head labeled .Lt_2_14594
+    l32i    a9,a1,204                   # [0]  gra_spill_temp_135
+    l32i    a10,a1,212                  # [1]  gra_spill_temp_137
+    l32i    a12,a1,192                  # [2]  gra_spill_temp_132
+    l32i    a11,a1,196                  # [3]  gra_spill_temp_133
+    add.n   a6,a6,a12                   # [4]
+    add.n   a7,a7,a11                   # [5]
+    add.n   a13,a13,a11                 # [6]
+    addi.n  a10,a10,1               # [7]
+    s32i    a10,a1,212                  # [8]  gra_spill_temp_137
+    sub     a9,a9,a10                   # [9]
+    beqz    a9,.Lt_2_14082              # [10]
+
+.Lt_2_14594:    # 0x9f4
+    l32i    a9,a1,200                   # [0]  gra_spill_temp_134
+    l32i    a8,a1,208                   # [1]  gra_spill_temp_136
+    bge     a8,a9,.Lt_2_14850           # [3]
+
+    l32i    a11,a1,176                  # [0]  gra_spill_temp_128
+    l32i    a10,a1,184                  # [1]  gra_spill_temp_130
+    add.n   a12,a7,a8                   # [2]
+    add.n   a10,a10,a8                  # [3]
+    add.n   a10,a6,a10                  # [4]
+    mull    a10,a5,a10                  # [5]
+    mull    a8,a12,a5                   # [6]
+    addx2   a10,a10,a11                 # [7]
+    l32i    a11,a1,188                  # [8]  gra_spill_temp_131
+    add.n   a11,a11,a8                  # [10]
+    l32i    a8,a1,180                   # [11]  gra_spill_temp_129
+    mov.n   a2,a10                      # [12]
+    addx2   a11,a11,a8                  # [13]
+    movi.n  a8,8                    # [14]
+    mov.n   a3,a11                      # [15]
+    j   .Lt_2_15362                     # [16]
+
+.LBB18_esp_nn_conv_s16_mult8_esp32s3:   # 0xa26
+    loopgtz a15,.LBB54_esp_nn_conv_s16_mult8_esp32s3    # [0]
+
+    ee.vmulas.s16.qacc.ld.ip    q0,a2,16,q0,q1  # [0*II+0]  id:300
+    ee.vld.128.ip   q1,a3,16            # [0*II+1]  id:301
+.LBB54_esp_nn_conv_s16_mult8_esp32s3:   # 0xa30
+
+.Lt_2_15618:    # 0xa30
+    ee.vmulas.s16.qacc  q0,q1       # [0]
+    movi.n  a8,8                    # [1]
+    add.n   a10,a10,a14                 # [2]
+    add.n   a11,a11,a14                 # [3]
+    mov.n   a3,a11                      # [4]
+    mov.n   a2,a10                      # [5]
+    beq     a12,a13,.Lt_2_14850         # [6]
+
+.Lt_2_15362:    # 0xa40
+    ee.vld.128.ip   q1,a3,16            # [0]  id:299
+    ee.vld.128.ip   q0,a2,16            # [1]  id:298
+    addi.n  a12,a12,1               # [2]
+    bltu    a8,a5,.LBB18_esp_nn_conv_s16_mult8_esp32s3  # [3]
+
+    j   .Lt_2_15618                     # [0]
+
+.Lt_2_11778:    # 0xa5c
+    retw.n                          # [0]
+
+    .size   esp_nn_conv_s16_mult8_esp32s3, . - esp_nn_conv_s16_mult8_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s8_filter_aligned_input_padded_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s8_filter_aligned_input_padded_esp32s3.S
new file mode 100644
index 0000000..5545b27
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s8_filter_aligned_input_padded_esp32s3.S
@@ -0,0 +1,271 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+//
+// SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+//
+// Contraints used by this function are:
+//     1. pad_wd and pad_ht is 0. For versions needing padding we do this
+//        explicitly
+//     2. All the filter rows are aligned to 16 bytes boundary. To make sure
+//        this is indeed the case, for filter rows (filter_wd * channels) not
+//        multiple of 16, we add zeros to fill it till 16 bondary.
+//
+//     The optimized kernel assumes this and skips filter row with following
+//     size: ((filter_wd * input_ch) + 15) & ~15.
+
+	.text
+
+.literal_position
+	.literal .LC1, 1073741824
+
+    # Program Unit: esp_nn_conv_s8_filter_aligned_input_padded_esp32s3
+	.type	esp_nn_conv_s8_filter_aligned_input_padded_esp32s3, @function
+	.align	4
+	.global	esp_nn_conv_s8_filter_aligned_input_padded_esp32s3
+ // registers:
+ // a2: const int16_t *input_data
+ // a3: const uint16_t input_wd
+ // a4: const uint16_t input_ht
+ // a5: const uint16_t in_ch
+ // a6: const uint16_t input_offset
+ // a7: const uint16_t stride_wd
+
+ // on stack:
+ // const uint16_t stride_ht	: 80
+ // const int8_t *filter_data	: 84
+ // const uint16_t filter_wd	: 88
+ // const uint16_t filter_ht	: 92
+ // const int32_t *bias			: 96
+ // int8_t *out_data			: 100
+ // const uint16_t out_wd		: 104
+ // const uint16_t out_ht		: 108
+ // const uint16_t out_channels	: 112
+ // const int32_t out_offset	: 116
+ // const int32_t *out_shift	: 120
+ // const int32_t *out_mult		: 124
+ // const int32_t activation_min: 128
+ // const int32_t activation_max: 132
+ // void *scratch_buffer: 136
+
+esp_nn_conv_s8_filter_aligned_input_padded_esp32s3:
+	entry	sp, 80
+	s32i.n  a2, sp, 40  	# input_data
+	mov		a11, a6			# input_offset
+	l16ui	a2, sp, 88  	# filter_wd
+	l32i	a8, sp, 100		# out_data
+	l16ui	a6, sp, 80		# stride_ht
+	mov.n	a15, a5
+
+	mull	a4, a2, a15		# filter_row_sz
+	s32i.n	a8, sp, 24		# out_data_ptr
+	movi.n	a9, 0
+	s32i.n	a9, sp, 36      # out_y
+
+	addi.n	a4, a4, 15		# to round the size up
+	srli	a2, a4, 4		# (filter_row_sz) >> 4
+	slli	a12, a2, 4		# ((filter_row_sz) >> 4) << 4
+
+	mull	a4, a6, a3		# stride_ht * input_wd
+	mull	a5, a3, a15		# input_wd * in_ch
+	l32i.n	a10, sp, 112     # out_ch
+
+	mull 	a9, a7, a15		# stride_wd * in_ch
+	mull 	a4, a4, a15		# (stride_ht * input_wd) * in_ch
+
+	slli	a3, a10, 2		# out_ch * 4
+
+	s32i.n	a3, sp, 32		# out_ch * 4
+	s32i.n	a5, sp, 12		# input_wd * in_ch
+	s32i.n	a9, sp, 52		# stride_wd * in_ch
+	s32i	a4, sp, 56		# (stride_ht * input_wd) * in_ch
+
+	l32i.n	a3, sp, 92   	# filter_ht
+	l32i	a13, sp, 136	# scratch_buf
+	l32i	a5, sp, 84		# filter_data
+	mull    a4, a12, a3		# (filter_wd * filter_ht * in_ch)
+	srai	a4, a4, 1
+	addx4	a10, a10, a13   # scratch_buf + 4 * out_ch
+	l32i	a3, sp, 96
+	// accumulate filter values per channel into scratch buffer
+.L_acc_out_channel_loop:
+	movi.n	a9, 0	// acc
+	loop	a4, .L_acc_filter_size_loop
+	l8ui	a14, a5, 0
+	l8ui	a7, a5, 1
+	addi.n	a5, a5, 2
+	sext	a14, a14, 7
+	sext	a7, a7, 7
+	add		a9, a9, a14
+	add		a9, a9, a7
+	.L_acc_filter_size_loop:
+
+	// multiply by offset, add bias and store the acc value per channel
+	mull 	a9, a9, a11
+	beqz.n 	a3, .L_skip_bias
+	l32i	a8, a3, 0
+	addi	a3, a3, 4	// this will remain 0 if bias not present
+	add 	a9, a9, a8
+.L_skip_bias:
+	s32i	a9, a13, 0
+	addi.n 	a13, a13, 4
+	blt    	a13, a10, .L_acc_out_channel_loop
+
+	movi.n	a4, 0			# 0
+
+.L_height_loop:
+	l32i.n	a8, sp, 40  	# in_row_ptr
+	movi.n	a9, 0
+	l32i.n	a10, sp, 104	# out_wd
+	s32i.n	a8, sp, 28  	# input_ptr
+	s32i.n	a9, sp, 44      # out_x
+
+.L_width_loop:
+	movi.n	a9, 0
+	l32i	a5, sp, 84		# filter_data
+	s32i.n	a9, sp, 20
+	l32i	a3, sp, 136		# scratch_buf
+
+.L_out_ch_loop:
+	movi.n	a6, 0
+	l32i.n	a9, sp, 28  	# input_ptr
+	mov.n	a10, a6
+
+.L_filter_ht_loop:
+	add.n	a8, a5, a12
+	mov.n	a13, a9
+
+	ee.zero.accx
+	ee.ld.128.usar.ip 	q0, a13, 16
+	ee.vld.128.ip 		q4, a13, 16
+	ee.vld.128.ip 		q1, a5, 16
+
+	sub             a15, a8, a5         // row_len - 16
+	extui           a14, a15, 4, 1      // if multiple of 16 and not 32
+	srai            a15, a15, 5         // multiples of 32
+	ee.src.q.qup 	q2, q0, q4
+	beqz	a15, .L_vector_32_loop_end
+
+	loop	a15, .L_vector_32_loop_end
+
+	ee.vld.128.ip 					q4, a13, 16
+	ee.vmulas.s8.accx.ld.ip.qup 	q3, a5, 16, q2, q1, q0, q4
+	ee.vld.128.ip 					q2, a13, 16
+	ee.vmulas.s8.accx.ld.ip.qup 	q1, a5, 16, q0, q3, q4, q2
+	ee.orq 							q0, q2, q2
+	ee.orq 							q2, q4, q4
+
+.L_vector_32_loop_end:
+	beqz	a14, .L_vector_loop_end
+	ee.vmulas.s8.accx.ld.ip 		q4, a13, 16, q2, q1
+	ee.src.q.ld.ip					q1, a5, 16, q0, q4
+	ee.orq 							q2, q0, q0
+
+.L_vector_loop_end:
+	ee.vmulas.s8.accx 	q2, q1
+	addi	a13, a13, -16	// since we incremented by 16 too much
+	movi 	a15, 0
+	ee.srs.accx  	a14, a15, 0
+
+	mov.n	a5, a8
+	add.n 			a6, a6, a14
+.L7:
+	l32i.n	a8, sp, 12		# input_wd * in_ch
+	l32i.n	a2, sp, 92   	# filter_ht
+	addi.n	a10, a10, 1		# filter_y_idx
+	add.n	a9, a9, a8
+	blt		a10, a2, .L_filter_ht_loop
+.L9:
+	l32i    a7, a3, 0		# load input_offset acc
+	addi    a3, a3, 4		# increment offset acc ptr
+	l32i.n	a8, sp, 20
+	add.n	a6, a6, a7		# add input_offset accumulation
+
+.L_multiply_by_quant_mult:
+	l32i	a10, sp, 120
+	l32i	a9, sp, 124
+	add.n	a2, a10, a8
+	l32i.n	a2, a2, 0
+	add.n	a7, a9, a8
+	l32i.n	a7, a7, 0
+	max		a8, a2, a4
+	ssl		a8
+	sll		a6, a6
+	mull	a9, a6, a7
+	l32r	a10, .LC1
+	sub		a2, a8, a2
+	add.n	a8, a9, a10
+	mulsh	a6, a6, a7
+	movi.n	a7, 1
+	bltu	a8, a9, .L13
+	movi.n	a7, 0
+
+.L13:
+	add.n	a6, a7, a6
+	slli	a6, a6, 1
+	extui	a8, a8, 31, 1
+	or		a6, a6, a8
+	beqz.n	a2, .L_skip_div_by_pow_of_2
+	addi.n	a7, a2, -1
+	movi.n	a9, 1
+	extui	a8, a6, 31, 1
+	ssl		a7
+	sll		a7, a9
+	sub		a7, a7, a8
+	add.n	a6, a7, a6
+	ssr		a2
+	sra		a6, a6
+.L_skip_div_by_pow_of_2:
+	l32i	a10, sp, 116
+	l32i	a8, sp, 128
+	add.n	a2, a10, a6
+	l32i	a9, sp, 132
+	l32i.n	a10, sp, 24		# out_data_ptr
+	max		a2, a2, a8
+	min		a2, a2, a9
+	s8i		a2, a10, 0
+	l32i.n	a2, sp, 20
+	addi.n	a10, a10, 1
+	addi.n	a2, a2, 4
+	l32i.n	a6, sp, 32
+	s32i.n	a2, sp, 20
+	s32i.n	a10, sp, 24		# out_data_ptr
+	bne		a6, a2, .L_out_ch_loop
+
+.L4:
+	l32i.n	a5, sp, 44      # out_x
+	l32i.n	a6, sp, 28  	# input_ptr (was stored by height loop)
+	l32i.n	a8, sp, 52		# stride_wd * in_ch
+	addi.n	a5, a5, 1
+	add.n	a6, a6, a8		# input_ptr + stride_wd * in_ch
+	l32i.n	a9, sp, 104 	# out_wd
+	s32i.n	a5, sp, 44      # out_x
+	s32i.n	a6, sp, 28  	# input_ptr
+	bne		a9, a5, .L_width_loop
+
+	l32i.n	a10, sp, 36     # out_y
+	l32i.n	a2, sp, 40  	# in_row_ptr
+	l32i	a5, sp, 56		# (stride_ht * input_wd) * in_ch
+	l32i.n	a6, sp, 108		# out_ht
+	addi.n	a10, a10, 1
+	add.n	a2, a2, a5		# in_row_ptr
+	s32i.n	a10, sp, 36     # out_y
+	s32i.n	a2, sp, 40  	# in_row_ptr
+	blt		a10, a6, .L_height_loop
+	// end outer (height) loop
+	retw.n
+
+	.size	esp_nn_conv_s8_filter_aligned_input_padded_esp32s3, .-esp_nn_conv_s8_filter_aligned_input_padded_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s8_mult8_1x1_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s8_mult8_1x1_esp32s3.S
new file mode 100644
index 0000000..111fd08
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_conv_s8_mult8_1x1_esp32s3.S
@@ -0,0 +1,497 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+    .text
+    .literal_position
+    .literal  .nudge_val, 1073741824
+
+    # Program Unit: esp_nn_conv_s8_mult8_1x1_esp32s3
+    .type   esp_nn_conv_s8_mult8_1x1_esp32s3, @function
+    .align   4
+    .global esp_nn_conv_s8_mult8_1x1_esp32s3
+
+esp_nn_conv_s8_mult8_1x1_esp32s3:  # 0xdbc
+    # scratch_buf = 0   // to store qacc regs need 36 bytes
+    # gra_spill_temp_164 = 36, channel itr, (in_channels - 1) >> 3
+    # gra_spill_temp_165 = 40, i_out
+    # gra_spill_temp_166 = 44, in_channels
+    # gra_spill_temp_167 = 48, in_channels/8 - 1
+    # gra_spill_temp_168 = 52, in_channels-7
+    # gra_spill_temp_169 = 56, input
+    # gra_spill_temp_170 = 60, filter_data
+    # gra_spill_temp_171 = 64, input_offset
+    # gra_spill_temp_172 = 68, input_ptr
+    # gra_spill_temp_173 = 72, bias
+    # gra_spill_temp_174 = 76, in_channels*8
+    # gra_spill_temp_175 = 80, size-7
+    # gra_spill_temp_176 = 84, size
+
+ // registers:
+ // a2: int8_t *input_data
+ // a3: uint16_t input_wd
+ // a4: uint16_t input_ht
+ // a5: uint16_t in_channels
+ // a6: int32_t input_offset
+ // a7: int16_t *filter_data
+
+ // on stack:
+ // int32_t *bias           // 160
+ // int8_t *out_data        // 164
+ // uint16_t out_wd         // 168
+ // uint16_t out_ht         // 172
+ // uint16_t out_channels   // 176
+ // int32_t out_offset      // 180
+ // int32_t *out_shift      // 184
+ // int32_t *out_mult       // 188
+ // int32_t activation_min  // 192
+ // int32_t activation_max  // 196
+ // void *buffer // tmp buf // 200
+
+    entry   a1,160                      #
+    s32i    a5,a1,44                    # [0]  gra_spill_temp_166, in_channels
+    s32i    a6,a1,64                    # [2]  id:619 input_offset+0x0
+    s32i    a7,a1,60                    # [1]  gra_spill_temp_170, filter_data
+    mul16u  a8,a3,a4                    # [3]  size = input_wd * input_ht;
+    s32i    a2,a1,56                    # [0]  gra_spill_temp_169, input
+    l32i    a4,a1,164                   # [1]  id:624 out_data+0x0
+    mov.n   a3,a1                       # [52]  scratch_buf
+
+    s32i    a8,a1,84                    # [4]  gra_spill_temp_176, size
+    blti    a8,8,.prepare_leftover      # [5] // process remaining lines one by one
+    addi    a9,a8,-7                    # [32]
+    s32i    a9,a1,80                    # [33]  gra_spill_temp_175, size-7
+
+    s32i    a2,a1,68                    # [2]  gra_spill_temp_172 , input_ptr
+    srai    a15,a5,3                    # [7] `in_ch/8` loop_cnt
+    movi.n  a11,0                       # [10]
+    s32i    a11,a1,40                   # [11]  gra_spill_temp_165
+    addi    a15,a15,-1                  # [17]  `in_ch/8` loop_cnt - 1
+    s32i    a15,a1,48                   # [18]  gra_spill_temp_167
+    slli    a9,a5,3                     # [19]  in_channels*8
+    s32i    a9,a1,76                    # [20]  gra_spill_temp_174
+    addi    a15,a5,-7                   # [31]
+    s32i    a15,a1,52                   # [34]  gra_spill_temp_168
+
+.outer_loop: // for (; i_out < size - 7; i_out += 8) {
+
+    l32i    a10,a1,200                  # [1]  gra_spill_temp_165, buffer
+    l32i.n  a11,a1,44                   # [1]  gra_spill_temp_166, input_channels
+    l32i.n  a8,a1,68                    # [2]  gra_spill_temp_172, input_ptr
+    srai    a9,a11,3                    # [7] `in_ch/8` loop_cnt for transpose loop
+
+    ee.zero.q   q7                      # [0]
+    addi        a12,a1,64               # [6]
+    ee.vldbc.16 q5,a12                  # [0*II+16]  id:638 input_offset
+
+    // load and transose 8 lines of input 8xchannels,
+    // add input offset and store 16 bit data to tmp buffer
+    loopgtz a9,.transpose_loop_end  # [10]
+    mov.n                   a9,a8
+    ee.vld.l.64.xp          q0,a9,a11
+    ee.vld.l.64.xp          q1,a9,a11
+    ee.vld.h.64.xp          q0,a9,a11
+    ee.vld.h.64.xp          q1,a9,a11
+    ee.vld.l.64.xp          q2,a9,a11
+    ee.vzip.8               q0,q1
+    ee.vld.l.64.xp          q3,a9,a11
+    ee.vld.h.64.xp          q2,a9,a11
+    ee.vld.h.64.ip          q3,a9,0
+    ee.vzip.16              q0,q1
+    ee.vzip.8               q2,q3
+    ee.vzip.16              q2,q3
+    ee.vzip.32              q0,q2
+    ee.vcmp.lt.s8           q4,q2,q7
+    ee.vzip.8               q2,q4
+    ee.vcmp.lt.s8           q6,q0,q7
+    ee.vzip.8               q0,q6
+    ee.vadds.s16            q0,q0,q5
+    ee.vadds.s16.st.incp    q0,a10,q6,q6,q5
+    ee.vadds.s16.st.incp    q6,a10,q2,q2,q5
+    ee.vadds.s16.st.incp    q2,a10,q4,q4,q5
+    ee.vst.128.ip           q4,a10,16
+    ee.vzip.32              q1,q3
+    ee.vcmp.lt.s8           q4,q3,q7
+    ee.vzip.8               q3,q4
+    ee.vcmp.lt.s8           q6,q1,q7
+    ee.vzip.8               q1,q6
+    ee.vadds.s16            q1,q1,q5
+    ee.vadds.s16.st.incp    q1,a10,q6,q6,q5
+    ee.vadds.s16.st.incp    q6,a10,q3,q3,q5
+    ee.vadds.s16.st.incp    q3,a10,q4,q4,q5
+    ee.vst.128.ip           q4,a10,16
+    addi.n                  a8,a8,8
+.transpose_loop_end:    # 0xeeb
+
+ # 468          uint32_t bias_ptr = (uint32_t) bias;
+ # 469          uint32_t filter_ptr = (uint32_t) (filter_data);
+ # 470          const int32_t *out_mult_ptr = out_mult;
+ # 471          const int32_t *out_shift_ptr = out_shift;
+    l32i    a6,a1,184                   # [0]  out_shift
+    l32i    a2,a1,188                   # [1]  out_mult
+    l32i    a5,a1,60                    # [2]  gra_spill_temp_170, filter
+    l32i    a9,a1,160                   # [3]  gra_spill_temp_170, bias
+ # 472          for (int32_t out_ch_idx = 0; out_ch_idx < out_channels; out_ch_idx++) {
+    l16ui   a8,a1,176                   # [5]  id:620 out_channels+0x0
+    s32i    a9,a1,72                    # [5]  gra_spill_temp_173
+    blti    a8,1,.outer_ch_loop_end
+
+    movi.n  a7,0
+
+.out_ch_loop:   # 0xf3e
+    l32i    a8,a1,200                   # [4]  gra_spill_temp_165, buffer_ptr
+    ee.zero.qacc                        # [3]
+    ee.zero.q                       q5  #
+    l32i    a10,a1,52                   # [1]  gra_spill_temp_168, in_channels-7
+    l32i    a9,a1,48                    # [1]  gra_spill_temp_167, in_channels/8 - 1
+    ee.vld.l.64.ip                  q7,a5,8     # load filter 8 values
+    ee.vld.128.ip                   q0,a8,16
+    ee.vld.128.ip                   q1,a8,16
+    ee.vcmp.lt.s8                   q6,q7,q5
+    ee.vzip.8                       q7,q6
+
+    ee.vsmulas.s16.qacc.ld.incp     q2,a8,q0,q7,0
+    ee.vsmulas.s16.qacc.ld.incp     q3,a8,q1,q7,1
+    ee.vsmulas.s16.qacc.ld.incp     q0,a8,q2,q7,2
+    ee.vsmulas.s16.qacc.ld.incp     q1,a8,q3,q7,3
+    ee.vsmulas.s16.qacc.ld.incp     q2,a8,q0,q7,4
+    ee.vsmulas.s16.qacc.ld.incp     q3,a8,q1,q7,5
+    blti    a10,8,.inner_loop_end           # [16]
+
+    loopgtz a9,.inner_loop_end  # [3]
+
+    ee.vsmulas.s16.qacc.ld.incp q0,a8,q2,q7,6   # [0*II+0]  id:657
+    ee.vsmulas.s16.qacc.ld.incp q1,a8,q3,q7,7   # [0*II+1]  id:658
+    ee.vld.l.64.ip              q7,a5,8        # [0*II+2]  id:659, filter
+    ee.vcmp.lt.s8               q6,q7,q5
+    ee.vzip.8                   q7,q6
+    ee.vsmulas.s16.qacc.ld.incp q2,a8,q0,q7,0   # [0*II+4]  id:660
+    ee.vsmulas.s16.qacc.ld.incp q3,a8,q1,q7,1   # [0*II+5]  id:661
+    ee.vsmulas.s16.qacc.ld.incp q0,a8,q2,q7,2   # [0*II+6]  id:662
+    ee.vsmulas.s16.qacc.ld.incp q1,a8,q3,q7,3   # [0*II+7]  id:663
+    ee.vsmulas.s16.qacc.ld.incp q2,a8,q0,q7,4   # [0*II+8]  id:664
+    ee.vsmulas.s16.qacc.ld.incp q3,a8,q1,q7,5   # [0*II+9]  id:665
+.inner_loop_end:    # 0xfaf
+
+    ee.vsmulas.s16.qacc q2,q7,6     # [2]
+    ee.vsmulas.s16.qacc q3,q7,7     # [3]
+
+ # store qacc registers and re-arrange data for low 16 bits
+
+    ee.st.qacc_l.l.128.ip   a3,16       # [5]  id:668
+    ee.st.qacc_l.h.32.ip    a3,-16        # [6]  id:669
+    l32i.n     a10, a1, 0
+    l32i.n     a11, a1, 5
+    l32i.n     a12, a1, 10
+    l32i.n     a13, a1, 15
+    ee.movi.32.q    q0, a10, 0
+    ee.movi.32.q    q0, a11, 1
+    ee.movi.32.q    q0, a12, 2
+    ee.movi.32.q    q0, a13, 3
+
+    ee.st.qacc_h.l.128.ip   a3,16       # [5]  id:668
+    ee.st.qacc_h.h.32.ip    a3,-16        # [6]  id:669
+    l32i.n     a10, a1, 0
+    l32i.n     a11, a1, 5
+    l32i.n     a12, a1, 10
+    l32i.n     a13, a1, 15
+    ee.movi.32.q    q4, a10, 0
+    ee.movi.32.q    q4, a11, 1
+    ee.movi.32.q    q4, a12, 2
+    ee.movi.32.q    q4, a13, 3
+
+    l32i                a9,a1,160       # [17]  gra_spill_temp_170, bias
+    l32i                a10,a1,72       # [0]  gra_spill_temp_173, bias_ptr
+
+ # add bias
+    beqz.n          a9,.no_bias
+    ee.vldbc.32.ip  q6,a10,4
+    s32i            a10,a1,72           # [3]  gra_spill_temp_173, bias_ptr
+    ee.vadds.s32    q0,q0,q6            # [4]
+    ee.vadds.s32    q4,q4,q6            # [5]
+.no_bias:   # 0x102e
+
+    l32i.n  a11,a6,0                    # [1]  id:696
+    l32i.n  a10,a2,0                    # [3]  id:695
+    .global esp_nn_multiply_by_quantized_mult_asm_esp32s3
+    call8   esp_nn_multiply_by_quantized_mult_asm_esp32s3   # [4]  esp_nn_multiply_by_quantized_mult_asm_esp32s3
+
+    l32i.n  a10,a2,0                    # [0]  id:697, mult
+    l32i.n  a11,a6,0                    # [2]  id:698, shift
+    mv.qr   q5,q0
+    mv.qr   q0,q4
+    call8   esp_nn_multiply_by_quantized_mult_asm_esp32s3   # [5]  esp_nn_multiply_by_quantized_mult_asm_esp32s3
+
+    addi.n  a6,a6,4                     # out_shift_ptr++
+    addi.n  a2,a2,4                     # out_mult_ptr++
+    addi    a9,a1,180                   # [7]
+    addi    a10,a1,192                  # [5]
+    addi    a8,a1,196                   # [6]
+
+# load broadcast, activation and out_offset
+    ee.vldbc.32     q4,a9               # [14]  id:699 out_offset
+    ee.vldbc.32     q2,a10              # [11]  id:700 activation_min
+    ee.vldbc.32     q3,a8               # [12]  id:701 activation_max
+
+# add offset
+    ee.vadds.s32    q1,q0,q4            # [17]
+    ee.vadds.s32    q0,q5,q4            # [22]
+
+ # activation
+    ee.vmin.s32     q1,q1,q3            # [19]
+    ee.vmax.s32     q1,q1,q2            # [21]
+    ee.vmin.s32     q0,q0,q3            # [23]
+    ee.vmax.s32     q0,q0,q2            # [24]
+
+    l16ui           a9,a1,176           # [33]  out_channels
+
+# unzip and store
+    ee.vunzip.16    q0,q1               # [25]
+    ee.vst.128.ip   q0,a3,0             # [26]  id:702, scratch_buf
+
+ # a4 = out_data, out_channels = a1+176
+
+    l8ui    a14,a1,0                    # [27]
+    l8ui    a11,a1,2                    # [30]  scratch_buf+2
+    add     a10,a4,a9
+    s8i     a14,a4,0                    # [28], out_data
+    s8i     a11,a10,0                   # [31], out_data + out_channels
+
+    l8ui    a14,a1,4                    # [32]  scratch_buf+4
+    l8ui    a11,a1,6                    # [37]  scratch_buf+6
+    add     a12,a10,a9
+    add     a10,a12,a9
+    s8i     a14,a12,0                   # [28]
+    s8i     a11,a10,0                   # [31]
+
+    l8ui    a14,a1,8                    # [41]  scratch_buf+8
+    l8ui    a11,a1,10                   # [47]  scratch_buf+10
+    add     a12,a10,a9
+    add     a10,a12,a9
+    s8i     a14,a12,0                   # [28]
+    s8i     a11,a10,0                   # [31]
+
+    l8ui    a14,a1,12                   # [51]  scratch_buf+12
+    l8ui    a11,a1,14                   # [55]  scratch_buf+14
+    add     a12,a10,a9
+    add     a10,a12,a9
+    s8i     a14,a12,0                   # [28]
+    s8i     a11,a10,0                   # [31]
+
+    addi.n  a4,a4,1                     # [29] out_data++;
+    addi.n  a7,a7,1
+    bne     a7,a9,.out_ch_loop
+
+.outer_ch_loop_end:
+
+    subx8   a11,a9,a9                   # (7 * out_channels);
+    l32i    a10,a1,76                   # [1]  gra_spill_temp_174, in_channels * 8
+    l32i    a15,a1,40                   # [4]  gra_spill_temp_165
+    l32i    a9,a1,68                    # [2]  gra_spill_temp_172
+    l32i    a8,a1,80                    # [0]  gra_spill_temp_175, size-7
+    add.n   a4,a4,a11                   # [5] out_data += (7 * out_channels);
+    addi.n  a15,a15,8
+    s32i    a15,a1,40                   # [7]  gra_spill_temp_165
+    add.n   a9,a9,a10                   # [8]
+    s32i    a9,a1,68                    # [9]  gra_spill_temp_172
+    blt     a15,a8,.outer_loop          # [10]
+
+ # check if leftover
+    l32i    a15,a1,40
+    l32i    a13,a1,84                   # [1]  gra_spill_temp_176, size
+    l32i    a8,a1,44                    # [0]  gra_spill_temp_166, in_channels
+    bge     a15, a13, .return_function  # no leftover
+
+// This block below processes one input channel line at a time.
+.process_leftover:
+    l32i    a15,a1,40                   # [1]  gra_spill_temp_165, i_out
+    l32i    a14,a1,56                   # [2]  gra_spill_temp_169, input
+    mull    a15,a15,a8                  # [3] in_channels * i_out
+    addi.n  a8,a8,-1                    # [4] in_channels - 1
+    add.n   a14,a14,a15                 # [5] input_ptr = in_channels * i_out + input
+    srai    a8,a8,3                     # [6] iterations, (in_channels - 1) >> 3
+    s32i    a8,a1,36                    # [7]  gra_spill_temp_164, iterations
+    s32i    a14,a1,68                   # [8]  gra_spill_temp_172, in_channels * i_out + input
+    addi            a12,a1,64
+    ee.vldbc.16     q4,a12              # [8]  id:716 input_offset
+
+.leftover_outer_loop:
+
+    l32i    a15,a1,184                  # [0]  out_shift
+    l32i    a2,a1,188                   # [1]  out_mult
+    l32i    a8,a1,60                    # [3]  gra_spill_temp_170, filter_data
+    l32i    a5,a1,160                   # [0]  gra_spill_temp_170, bias
+    movi.n  a11,0                       # [2]
+
+.leftover_out_ch_loop:
+
+    ee.zero.qacc                            # [0]
+    ee.zero.q       q3                      # [1]
+    l32i.n          a9,a1,68                # [4]  gra_spill_temp_172, input_ptr
+    l32i            a10,a1,36               # [1]  gra_spill_temp_164, iterations, (in_channels - 1) >> 3
+    ee.vld.l.64.ip          q0,a9,8         # [7]  id:717, input
+    ee.vld.l.64.ip          q1,a8,8         # [7]  filter
+    ee.vcmp.lt.s8           q6,q0,q3
+    ee.vcmp.lt.s8           q7,q1,q3
+    ee.vzip.8               q0,q6
+    ee.vzip.8               q1,q7
+    ee.vadds.s16            q0,q0,q4  # [11]  id:718, add offset
+
+    loopgtz a10,.leftover_inner_loop_end        # [3]
+
+    ee.vmulas.s16.qacc          q0,q1  # mula(q0,q1)
+    ee.vld.l.64.ip              q0,a9,8         # load 8 input values
+    ee.vld.l.64.ip              q1,a8,8         # [7]  load filter
+    ee.vcmp.lt.s8               q2,q0,q3        # sign
+    ee.vcmp.lt.s8               q7,q1,q3
+    ee.vzip.8                   q0,q2           # 16 bit input
+    ee.vzip.8                   q1,q7           # 16 bit filter
+    ee.vadds.s16                q0,q0,q4        # add offset
+.leftover_inner_loop_end:   # 0x1262
+
+# re-arrange data from qacc in 32 bit q registers
+    ee.vmulas.s16.qacc      q0,q1       # [3]
+    ee.st.qacc_l.l.128.ip   a3,16       # [5]  id:722
+    ee.st.qacc_l.h.32.ip    a3,0        # [6]  id:723
+    l8ui    a10,a1,5                    # [11]  scratch_buf+5
+    l8ui    a12,a1,6                    # [10]  scratch_buf+6
+    l16ui   a14,a1,10                   # [8]  scratch_buf+10
+    l8ui    a9,a1,15                    # [7]  scratch_buf+15
+    l8ui    a13,a1,16                   # [9]  scratch_buf+16
+    s8i     a10,a1,2                    # [12]  scratch_buf+2
+    s8i     a12,a1,3                    # [13]  scratch_buf+3
+    s16i    a14,a1,4                    # [15]  scratch_buf+4
+    s8i     a9,a1,6                     # [16]  scratch_buf+6
+    s8i     a13,a1,7                    # [14]  scratch_buf+7
+
+    ee.st.qacc_h.l.128.ip   a3,16       # [17]  id:724
+    ee.st.qacc_h.h.32.ip    a3,-32      # [18]  id:725
+    l16ui   a13,a1,16                   # [30]  scratch_buf+16
+    l8ui    a14,a1,21                   # [23]  scratch_buf+21
+    l8ui    a9,a1,22                    # [22]  scratch_buf+22
+    l16ui   a10,a1,26                   # [21]  scratch_buf+26
+    s16i    a13,a1,8                    # [31]  scratch_buf+8
+    l8ui    a12,a1,31                   # [20]  scratch_buf+31
+    l8ui    a13,a1,32                   # [19]  scratch_buf+32
+    s8i     a14,a1,10                   # [24]  scratch_buf+10
+    s8i     a9,a1,11                    # [25]  scratch_buf+11
+    s16i    a10,a1,12                   # [26]  scratch_buf+12
+    s8i     a12,a1,14                   # [27]  scratch_buf+14
+    s8i     a13,a1,15                   # [28]  scratch_buf+15
+    movi.n  a12,16
+
+# get data now
+    ee.vld.128.ip       q0,a3,0
+    ee.srcmb.s16.qacc   q1,a12,0
+    ee.vzip.16          q0,q1
+
+    ee.vadds.s32    q0,q0,q1
+    ee.movi.32.a    q0,a10,3
+    ee.movi.32.a    q0,a9,2
+    ee.movi.32.a    q0,a14,0
+    add             a9,a9,a10
+    ee.movi.32.a    q0,a10,1
+    add             a14,a14,a10
+    add             a14,a14,a9
+
+# a14 contains conv_out
+    l32i    a9,a1,160                   # [43]  gra_spill_temp_170, bias ptr
+    l32i.n  a6,a15,0                    # [44]  id:730, shift
+    beqz.n  a9,.leftover_multiply_by_quant_mult             # [45]
+
+# load and add bias
+    l32i.n  a9,a5,0
+    add.n   a14,a14,a9
+
+.leftover_multiply_by_quant_mult:   # 0x12e7
+    l32i.n  a9,a2,0                 # [0]  id:729, mult
+    movi.n  a10,0                   # [1]
+    max     a10,a6,a10              # [2]  left_shift
+    ssl     a10                     # [3]
+    sll     a14,a14                 # [4] (value << left_shift)
+
+    sub     a7,a10,a6               # right_shift
+
+    l32r    a13,.nudge_val
+    mulsh   a12,a9,a14
+    mull    a14,a9,a14
+    ssai    31
+
+    addi.n  a2,a2,4                 # [0] mult
+    addi.n  a15,a15,4               # [1] shift
+    addi.n  a5,a5,4                 # [2] bias
+    addi.n  a11,a11,1               # [3]
+
+    add     a13,a14,a13             # low part
+    saltu   a14,a13,a14
+    add     a9,a12,a14              # high part
+    src     a12,a9,a13
+
+    blti    a7,1,.leftover_skip_div_by2
+
+    addi.n  a14,a7,-1
+    ssl     a14
+    movi.n  a10,1
+    sll     a10,a10                     # 1 << (exponent - 1)
+    extui   a14,a12,31,1
+    ssr     a7
+    sub     a10,a10,a14                 # 1 << (exponent - 1) - (val < 0)
+    add     a12,a12,a10                 # val += to_add
+    sra     a12,a12
+
+.leftover_skip_div_by2:
+    l32i    a10,a1,180                  # [26]  id:733 out_offset+0x0
+    l32i    a9,a1,192                   # [29]  id:732 activation_min+0x0
+    l16ui   a13,a1,176                  # [5]  id:620 out_channels+0x0
+    l32i    a14,a1,196                  # [31]  id:731 activation_max+0x0
+
+// add offset, apply activation and store
+    add.n   a10,a10,a12
+    max     a9,a9,a10
+    min     a14,a14,a9
+    s8i     a14,a4,0
+    addi.n  a4,a4,1
+
+    bne     a11,a13,.leftover_out_ch_loop
+
+    l32i    a15,a1,44                   # [0]  gra_spill_temp_166, in_channels
+    l32i    a14,a1,68                   # [1]  gra_spill_temp_172, input_ptr
+    l32i    a13,a1,40                   # [2]  gra_spill_temp_165, i_out
+    l32i    a12,a1,84                   # [3]  gra_spill_temp_176, size
+    addi.n  a13,a13,1                   # [4]
+    s32i    a13,a1,40                   # [5]  gra_spill_temp_165, i_out
+    add     a14,a14,a15                 # [7]  input_ptr += in_channels
+    s32i    a14,a1,68                   # [8]  gra_spill_temp_172, input_ptr
+    blt     a13,a12,.leftover_outer_loop
+
+.return_function:
+    retw.n              # [9]
+
+.prepare_leftover:
+    l32i    a8,a1,44                    # [0]  gra_spill_temp_166, in_channels
+    movi.n  a15,0
+    s32i    a15,a1,40                   # [7]  gra_spill_temp_165, i_out
+    j   .process_leftover
+
+    .size   esp_nn_conv_s8_mult8_1x1_esp32s3, . - esp_nn_conv_s8_mult8_1x1_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3.S
new file mode 100644
index 0000000..2042573
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3.S
@@ -0,0 +1,403 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+    .text
+    .literal_position
+
+    # Program Unit: esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3
+    .type   esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3, @function
+    .align   4
+    .global esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3
+
+esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3:    # 0x776
+    # qacc_scratch = 0
+    # gra_spill_temp_35 = 48
+    # gra_spill_temp_36 = 52
+    # gra_spill_temp_37 = 56
+    # gra_spill_temp_38 = 60
+    # gra_spill_temp_39 = 64
+    # gra_spill_temp_40 = 68
+    # gra_spill_temp_41 = 72
+    # gra_spill_temp_42 = 76
+    # gra_spill_temp_43 = 80
+    # gra_spill_temp_44 = 84
+    # gra_spill_temp_45 = 88
+    # gra_spill_temp_46 = 92
+    # gra_spill_temp_47 = 96
+    # gra_spill_temp_48 = 100
+    # gra_spill_temp_49 = 104
+    # gra_spill_temp_50 = 108
+    # gra_spill_temp_51 = 112
+    # gra_spill_temp_52 = 116
+    # gra_spill_temp_53 = 120
+    # gra_spill_temp_54 = 124
+    # gra_spill_temp_55 = 128
+    # gra_spill_temp_56 = 132
+    # gra_spill_temp_57 = 136
+    # gra_spill_temp_58 = 140
+    # gra_spill_temp_59 = 144
+    # gra_spill_temp_60 = 148
+    # gra_spill_temp_61 = 152
+    # gra_spill_temp_62 = 156
+    # gra_spill_temp_63 = 160
+    # gra_spill_temp_64 = 164
+    # gra_spill_temp_65 = 168
+    # gra_spill_temp_66 = 176
+    # gra_spill_temp_67 = 192
+    # gra_spill_temp_68 = 208
+    # gra_spill_temp_69 = 224
+    # gra_spill_temp_70 = 240
+
+ // registers:
+ // a2: const int16_t *input_data
+ // a3: const uint16_t input_wd
+ // a4: const uint16_t input_ht
+ // a5: const uint16_t channels
+ // a6: const uint16_t pad_wd
+ // a7: const uint16_t pad_ht
+
+ // on stack
+ // const uint16_t stride_wd
+ // const uint16_t stride_ht
+ // const int16_t *filter_data
+ // const int32_t *bias
+ // int8_t *out_data
+ // const uint16_t out_wd
+ // const uint16_t out_ht
+ // const int32_t out_offset
+ // const int32_t *out_shift
+ // const int32_t *out_mult
+ // const int32_t activation_min
+ // const int32_t activation_max
+
+    entry   a1,288                      #
+    s32i    a2,a1,104                   # [0]  gra_spill_temp_49
+    s32i    a3,a1,112                   # [1]  gra_spill_temp_51
+    s32i    a5,a1,116                   # [2]  gra_spill_temp_52
+    s32i.n  a6,a1,56                # [3]  gra_spill_temp_37
+    addi    a14,a1,112                  # [4]
+    addmi   a11,a1,256                  # [5]
+    addmi   a13,a1,256                  # [6]
+    addmi   a15,a1,256                  # [7]
+    l32i    a9,a1,304                   # [8]  id:251 out_data+0x0
+    l16ui   a8,a1,312                   # [9]  id:252 out_ht+0x0
+    s32i    a8,a1,64                    # [10]  gra_spill_temp_39
+    s32i    a9,a1,156                   # [11]  gra_spill_temp_62
+    addi    a15,a15,60                  # [12]
+    addi    a13,a13,72                  # [13]
+    addi    a11,a11,76                  # [14]
+    ee.vldbc.32 q0,a11              # [15]  id:250 activation_max
+    ee.vldbc.32 q1,a13              # [16]  id:249 activation_min
+    ee.vldbc.32 q2,a15              # [17]  id:248 out_offset
+    st.qr   q2,a14,80                   # [18]  gra_spill_temp_67-112
+    st.qr   q1,a14,96                   # [19]  gra_spill_temp_68-112
+    st.qr   q0,a14,112                  # [20]  gra_spill_temp_69-112
+    beqz.n  a8,.Lt_5_7426           # [21]
+
+.LBB3_esp_nn_depthwise_conv_s16_mult1_3x3:  # 0x7b9
+    s32i    a1,a1,160                   # [0]  gra_spill_temp_63
+    s32i    a7,a1,72                    # [1]  gra_spill_temp_41
+    mul16u  a6,a3,a5                # [2]
+    l32i    a14,a1,296                  # [3]  id:254 filter_data+0x0
+    l32i    a15,a1,300                  # [4]  id:253 bias+0x0
+    l16ui   a9,a1,308                   # [5]  id:259 out_wd+0x0
+    l16ui   a13,a1,288                  # [6]  id:255 stride_wd+0x0
+    neg     a8,a7                       # [7]
+    l16ui   a10,a1,292                  # [8]  id:258 stride_ht+0x0
+    l32i    a11,a1,324                  # [9]  id:257 out_mult+0x0
+    l32i    a12,a1,320                  # [10]  id:256 out_shift+0x0
+    s32i    a12,a1,84                   # [11]  gra_spill_temp_44
+    s32i    a11,a1,88                   # [12]  gra_spill_temp_45
+    s32i.n  a10,a1,60               # [13]  gra_spill_temp_38
+    s32i    a8,a1,124                   # [14]  gra_spill_temp_54
+    s32i    a13,a1,80                   # [15]  gra_spill_temp_43
+    s32i    a9,a1,92                    # [16]  gra_spill_temp_46
+    s32i    a15,a1,140                  # [17]  gra_spill_temp_58
+    s32i    a14,a1,108                  # [18]  gra_spill_temp_50
+    slli    a6,a6,1                     # [19]
+    movi.n  a14,16                  # [20]
+    extui   a15,a15,0,4                 # [21]
+    addi    a9,a5,-7                    # [22]
+    movi.n  a13,0                   # [23]
+    sub     a8,a4,a8                    # [24]
+    addx2   a7,a5,a5                    # [25]
+    slli    a7,a7,1                     # [26]
+    slli    a4,a5,1                     # [27]
+    s32i    a13,a1,68                   # [28]  gra_spill_temp_40
+    s32i    a9,a1,144                   # [29]  gra_spill_temp_59
+    s32i    a15,a1,132                  # [30]  gra_spill_temp_56
+    l32i.n  a9,a1,56                # [31]  gra_spill_temp_37
+    s32i    a8,a1,76                    # [32]  gra_spill_temp_42
+    neg     a9,a9                       # [33]
+    s32i.n  a9,a1,48                # [34]  gra_spill_temp_35
+    sub     a8,a3,a9                    # [35]
+    s32i.n  a8,a1,52                # [36]  gra_spill_temp_36
+
+.Lt_5_7938: # 0x822
+    l32i    a10,a1,92                   # [0]  gra_spill_temp_46
+    beqz.n  a10,.Lt_5_8194          # [2]
+
+.LBB6_esp_nn_depthwise_conv_s16_mult1_3x3:  # 0x827
+    l32i.n  a5,a1,52                # [0]  gra_spill_temp_36
+    l32i    a11,a1,76                   # [1]  gra_spill_temp_42
+    movi.n  a13,0                   # [2]
+    l32i    a12,a1,72                   # [3]  gra_spill_temp_41
+    movi.n  a15,0                   # [4]
+    l32i.n  a8,a1,48                # [5]  gra_spill_temp_35
+    l32i.n  a9,a1,56                # [6]  gra_spill_temp_37
+    s32i    a9,a1,100                   # [7]  gra_spill_temp_48
+    s32i    a8,a1,128                   # [8]  gra_spill_temp_55
+    s32i    a15,a1,96                   # [9]  gra_spill_temp_47
+    max     a12,a12,a13                 # [10]
+    s32i    a12,a1,152                  # [11]  gra_spill_temp_61
+    movi.n  a13,3                   # [12]
+    min     a11,a11,a13                 # [13]
+    s32i    a11,a1,136                  # [14]  gra_spill_temp_57
+    sub     a11,a11,a12                 # [15]
+    s32i    a11,a1,120                  # [16]  gra_spill_temp_53
+
+.Lt_5_8706: # 0x854
+    l32i    a2,a1,84                    # [0]  gra_spill_temp_44
+    l32i    a10,a1,144                  # [1]  gra_spill_temp_59
+    l32i    a11,a1,140                  # [2]  gra_spill_temp_58
+    l32i    a12,a1,88                   # [3]  gra_spill_temp_45
+    s32i    a12,a1,168                  # [4]  gra_spill_temp_65
+    s32i    a11,a1,148                  # [5]  gra_spill_temp_60
+    blti    a10,1,.Lt_5_8962            # [6]
+
+    movi.n  a8,0                    # [0]
+    movi.n  a13,0                   # [1]
+    l32i    a3,a1,100                   # [2]  gra_spill_temp_48
+    s32i    a13,a1,164                  # [3]  gra_spill_temp_64
+    max     a3,a3,a8                    # [4]
+
+.Lt_5_9474: # 0x876
+    l32i    a10,a1,136                  # [0]  gra_spill_temp_57
+    l32i    a9,a1,152                   # [1]  gra_spill_temp_61
+    ee.zero.qacc                    # [2]
+    bge     a9,a10,.Lt_5_9730           # [3]
+
+.LBB12_esp_nn_depthwise_conv_s16_mult1_3x3: # 0x882
+    l32i    a12,a1,128                  # [0]  gra_spill_temp_55
+    l32i    a15,a1,112                  # [1]  gra_spill_temp_51
+    l32i    a10,a1,116                  # [2]  gra_spill_temp_52
+    l32i    a13,a1,124                  # [3]  gra_spill_temp_54
+    mull    a11,a9,a10                  # [4]
+    add.n   a13,a13,a9                  # [5]
+    mull    a13,a13,a15                 # [6]
+    addx2   a11,a11,a11                 # [7]
+    l32i    a9,a1,164                   # [8]  gra_spill_temp_64
+    add.n   a12,a12,a13                 # [9]
+    mull    a10,a10,a12                 # [10]
+    add.n   a11,a9,a11                  # [11]
+    l32i    a12,a1,108                  # [12]  gra_spill_temp_50
+    add.n   a9,a9,a10                   # [13]
+    l32i    a10,a1,104                  # [14]  gra_spill_temp_49
+    addx2   a11,a11,a12                 # [15]
+    l32i    a12,a1,120                  # [16]  gra_spill_temp_53
+    addx2   a9,a9,a10                   # [17]
+    loopgtz a12,.LBB32_esp_nn_depthwise_conv_s16_mult1_3x3  # [18]
+
+    mov.n   a13,a9                      # [0]
+    mov.n   a12,a11                     # [1]
+    mov.n   a9,a11                      # [2]
+    mov.n   a11,a13                     # [3]
+
+    beqz.n  a3,.Lt_5_10498          # [4] if (filter_x_start)
+
+    add.n   a11,a4,a13                  # [0]
+    add.n   a9,a4,a12                   # [1]
+.Lt_5_10498:    # 0x8c5
+
+    ee.vld.128.xp   q0,a11,a4           # [0]  id:261
+    ee.vld.128.xp   q1,a9,a4            # [1]  id:262
+
+    bnez.n  a3,.Lt_5_11010          # [2] if (filter_x_start)
+
+    ee.vmulas.s16.qacc  q0,q1       # [0]
+    ee.vld.128.xp   q0,a11,a4           # [1]  id:264
+    ee.vld.128.xp   q1,a9,a4            # [2]  id:265
+.Lt_5_11010:    # 0x8d6
+
+    ee.vmulas.s16.qacc  q0,q1       # [0]
+    ee.vld.128.xp   q0,a11,a4           # [1]  id:267
+    ee.vld.128.xp   q1,a9,a4            # [2]  id:268
+    add.n   a9,a6,a13                   # [3]
+
+    blti    a5,3,.Lt_5_11522            # [4] if (filter_x_end)
+    ee.vmulas.s16.qacc  q0,q1       # [0]
+.Lt_5_11522:    # 0x8e7
+
+    add.n   a11,a7,a12                  # [0]
+
+.LBB32_esp_nn_depthwise_conv_s16_mult1_3x3: # 0x8eb
+
+.Lt_5_9730: # 0x8eb
+ // extract data
+    l32i    a9,a1,160                   # [0]  gra_spill_temp_63
+    ee.st.qacc_l.l.128.ip   a9,16       # [2]  id:270
+    ee.st.qacc_l.h.32.ip    a9,0        # [3]  id:271
+    l8ui    a11,a1,15                   # [4]  qacc_scratch+15
+    l16ui   a10,a1,10                   # [5]  qacc_scratch+10
+    l8ui    a15,a1,16                   # [6]  qacc_scratch+16
+    l8ui    a13,a1,6                    # [7]  qacc_scratch+6
+    l8ui    a12,a1,5                    # [8]  qacc_scratch+5
+    s8i     a12,a1,2                    # [9]  qacc_scratch+2
+    s8i     a13,a1,3                    # [10]  qacc_scratch+3
+    s8i     a15,a1,7                    # [11]  qacc_scratch+7
+    s16i    a10,a1,4                    # [12]  qacc_scratch+4
+    s8i     a11,a1,6                    # [13]  qacc_scratch+6
+
+    ee.st.qacc_h.l.128.ip   a9,16       # [14]  id:281
+    ee.st.qacc_h.h.32.ip    a9,-32      # [15]  id:282
+    ee.srcmb.s16.qacc   q1,a14,0        # [16]
+    l8ui    a15,a1,31                   # [17]  qacc_scratch+31
+    l8ui    a8,a1,32                    # [18]  qacc_scratch+32
+    l16ui   a13,a1,26                   # [19]  qacc_scratch+26
+    l8ui    a12,a1,22                   # [20]  qacc_scratch+22
+    l8ui    a11,a1,21                   # [21]  qacc_scratch+21
+    l16ui   a10,a1,16                   # [22]  qacc_scratch+16
+    s16i    a10,a1,8                    # [23]  qacc_scratch+8
+    s8i     a11,a1,10                   # [24]  qacc_scratch+10
+    s8i     a12,a1,11                   # [25]  qacc_scratch+11
+    s16i    a13,a1,12                   # [26]  qacc_scratch+12
+    s8i     a8,a1,15                    # [27]  qacc_scratch+15
+    s8i     a15,a1,14                   # [28]  qacc_scratch+14
+
+
+    l32i    a8,a1,140                   # [29]  gra_spill_temp_58 , bias
+    ee.vld.128.ip   q0,a9,0             # [30]  id:294
+    s32i    a9,a1,160                   # [31]  gra_spill_temp_63
+    ee.vzip.16  q0,q1               # [32]
+    beqz.n  a8,.Lt_5_12290          # [33] // skip bias
+
+    addi    a8,a1,112                   # [0]
+    l32i    a10,a1,132                  # [1]  gra_spill_temp_56
+    l32i    a9,a1,148                   # [2]  gra_spill_temp_60
+    wur.sar_byte    a10                 # [3]
+    ee.vld.128.ip   q4,a9,16            # [4]  id:297
+    ee.vld.128.ip   q7,a9,16            # [5]  id:298
+    ee.vld.128.ip   q5,a9,0             # [6]  id:299
+    s32i    a9,a1,148                   # [7]  gra_spill_temp_60
+    ee.src.q.qup    q6,q4,q7            # [8]
+    ee.vadds.s32    q0,q0,q6            # [9]
+    ee.src.q.qup    q3,q4,q5            # [10]
+    ee.vadds.s32    q1,q1,q3            # [11]
+    st.qr   q1,a8,64                    # [12]  gra_spill_temp_66-112
+
+.Lt_5_12290:    # 0x974
+    addi    a11,a1,112                  # [0]
+
+ # 287                  q0 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q0, out_mult_ptr, out_shift_ptr);
+    l32i    a10,a1,168                  # [1]  gra_spill_temp_65
+    st.qr   q1,a11,64                   # [2]  gra_spill_temp_66-112
+    mov.n   a11,a2                      # [3]
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3     # [4]  esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+ # 288                  out_mult_ptr += 4;
+ # 289                  out_shift_ptr += 4;
+ # 290
+ # 291                  q1 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q1, out_mult_ptr, out_shift_ptr);
+    l32i    a10,a1,168                  # [0]  gra_spill_temp_65
+    addmi   a12,a1,256                  # [1]
+    addi    a11,a1,112                  # [2]
+    st.qr   q0,a12,-16                  # [3]  gra_spill_temp_70-256
+    ld.qr   q0,a11,64                   # [4]  gra_spill_temp_66-112
+    addi    a10,a10,16                  # [5]
+    addi    a11,a2,16                   # [6]
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3     # [7]  esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+.LBB25_esp_nn_depthwise_conv_s16_mult1_3x3: # 0x99a
+#<loop> Part of loop body line 216, head labeled .Lt_5_9474
+    movi.n  a14,16                  # [0]
+ # 292                  out_mult_ptr += 4;
+ # 293                  out_shift_ptr += 4;
+    addi    a2,a2,32                    # [1]
+    l32i    a15,a1,144                  # [2]  gra_spill_temp_59
+    l32i    a9,a1,156                   # [3]  gra_spill_temp_62
+    l32i    a8,a1,168                   # [4]  gra_spill_temp_65
+    addmi   a12,a1,256                  # [5]
+    addi    a13,a1,112                  # [6]
+    ld.qr   q3,a13,112                  # [7]  gra_spill_temp_69-112
+    ld.qr   q1,a13,80                   # [8]  gra_spill_temp_67-112
+    ld.qr   q2,a12,-16                  # [9]  gra_spill_temp_70-256
+    addi    a8,a8,32                    # [10]
+    s32i    a8,a1,168                   # [11]  gra_spill_temp_65
+    ee.vadds.s32    q2,q2,q1            # [12]
+    ee.vadds.s32    q1,q0,q1            # [13]
+    ee.vmin.s32 q0,q2,q3            # [14]
+    ee.vmin.s32 q1,q1,q3            # [15]
+    ld.qr   	q2,a13,96                   # [16]  gra_spill_temp_68-112
+    l32i    	a13,a1,164                  # [17]  gra_spill_temp_64
+    ee.vmax.s32 q1,q1,q2            # [18]
+    ee.vmax.s32 q0,q0,q2            # [19]
+    addi.n  	a13,a13,8               # [20]
+    s32i    	a13,a1,164                  # [21]  gra_spill_temp_64
+    ee.vunzip.16    q0,q1               # [22]
+    ee.vunzip.8 	q0,q1               # [23]
+    ee.vst.l.64.ip  q0,a9,8         # [24]  id:302
+    s32i    	a9,a1,156                   # [25]  gra_spill_temp_62
+    blt     	a13,a15,.Lt_5_9474          # [26]
+
+.Lt_5_8962: # 0x9e9
+#<loop> Part of loop body line 203, head labeled .Lt_5_8706
+    l32i    a8,a1,92                    # [0]  gra_spill_temp_46
+    l32i    a11,a1,100                  # [1]  gra_spill_temp_48
+    l32i    a10,a1,128                  # [2]  gra_spill_temp_55
+    l32i    a9,a1,80                    # [3]  gra_spill_temp_43
+    l32i    a15,a1,96                   # [4]  gra_spill_temp_47
+    sub     a5,a5,a9                    # [5]
+    addi.n  a15,a15,1               # [6]
+    s32i    a15,a1,96                   # [7]  gra_spill_temp_47
+    add.n   a10,a10,a9                  # [8]
+    sub     a11,a11,a9                  # [9]
+    s32i    a11,a1,100                  # [10]  gra_spill_temp_48
+    s32i    a10,a1,128                  # [11]  gra_spill_temp_55
+    sub     a15,a15,a8                  # [12]
+    bnez    a15,.Lt_5_8706              # [13]
+
+.Lt_5_8194: # 0xa11
+#<loop> Part of loop body line 201, head labeled .Lt_5_7938
+    l32i    a13,a1,64                   # [0]  gra_spill_temp_39
+    l32i    a10,a1,72                   # [1]  gra_spill_temp_41
+    l32i    a9,a1,124                   # [2]  gra_spill_temp_54
+    l32i.n  a8,a1,60                # [3]  gra_spill_temp_38
+    l32i    a12,a1,68                   # [4]  gra_spill_temp_40
+    l32i    a15,a1,76                   # [5]  gra_spill_temp_42
+    addi.n  a12,a12,1               # [6]
+    s32i    a12,a1,68                   # [7]  gra_spill_temp_40
+    sub     a15,a15,a8                  # [8]
+    add.n   a9,a9,a8                    # [9]
+    sub     a10,a10,a8                  # [10]
+    s32i    a10,a1,72                   # [11]  gra_spill_temp_41
+    s32i    a9,a1,124                   # [12]  gra_spill_temp_54
+    s32i    a15,a1,76                   # [13]  gra_spill_temp_42
+    sub     a12,a12,a13                 # [14]
+    bnez    a12,.Lt_5_7938              # [15]
+
+.Lt_5_7426: # 0xa3e
+    retw.n                          # [0]
+
+    .size   esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3, . - esp_nn_depthwise_conv_s16_mult1_3x3_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3.S
new file mode 100644
index 0000000..06f9307
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3.S
@@ -0,0 +1,367 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+    .text
+    .literal_position
+
+    # Program Unit: esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3
+    .type   esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3, @function
+    .align   4
+    .global esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3
+
+esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3: # 0xa42
+    # qacc_scratch = 0
+    # gra_spill_temp_71 = 48
+    # gra_spill_temp_72 = 52
+    # gra_spill_temp_73 = 56
+    # gra_spill_temp_74 = 60
+    # gra_spill_temp_75 = 64
+    # gra_spill_temp_76 = 68
+    # gra_spill_temp_77 = 72
+    # gra_spill_temp_78 = 76
+    # gra_spill_temp_79 = 80
+    # gra_spill_temp_80 = 84
+    # gra_spill_temp_81 = 88
+    # gra_spill_temp_82 = 92
+    # gra_spill_temp_83 = 96
+    # gra_spill_temp_84 = 100
+    # gra_spill_temp_85 = 104
+    # gra_spill_temp_86 = 108
+    # gra_spill_temp_87 = 112
+    # gra_spill_temp_88 = 116
+    # gra_spill_temp_89 = 120
+    # gra_spill_temp_90 = 124
+    # gra_spill_temp_91 = 128
+    # gra_spill_temp_92 = 132
+    # gra_spill_temp_93 = 136
+    # gra_spill_temp_94 = 140
+    # gra_spill_temp_95 = 144
+    # gra_spill_temp_96 = 160
+    # gra_spill_temp_97 = 176
+    # gra_spill_temp_98 = 192
+    # gra_spill_temp_99 = 208
+    # gra_spill_temp_100 = 224
+    # gra_spill_temp_101 = 240
+    # gra_spill_temp_102 = 244
+    # gra_spill_temp_103 = 248
+
+ // registers:
+ // a2: const int16_t *input_data
+ // a3: const uint16_t input_wd
+ // a4: const uint16_t input_ht
+ // a5: const uint16_t channels
+ // a6: const uint16_t stride_wd
+ // a7: const uint16_t stride_ht
+
+ // on stack:
+ // const int16_t *filter_data
+ // const int32_t *bias
+ // int8_t *out_data
+ // const uint16_t out_wd
+ // const uint16_t out_ht
+ // const int32_t out_offset
+ // const int32_t *out_shift
+ // const int32_t *out_mult
+ // const int32_t activation_min
+ // const int32_t activation_max
+
+    entry   a1,288                      #
+    s32i    a2,a1,120                   # [0]  gra_spill_temp_89
+    s32i.n  a3,a1,48                # [1]  gra_spill_temp_71
+    s32i    a5,a1,76                    # [2]  gra_spill_temp_78
+    s32i    a6,a1,84                    # [3]  gra_spill_temp_80
+    s32i.n  a7,a1,60                # [4]  gra_spill_temp_74
+    l32i    a12,a1,296                  # [5]  id:241 out_data+0x0
+    addi    a14,a1,112                  # [6]
+    addmi   a10,a1,256                  # [7]
+    addmi   a13,a1,256                  # [8]
+    addmi   a15,a1,256                  # [9]
+
+ // height loop
+    l16ui   a8,a1,304                   # [10]  id:242 out_ht+0x0
+    s32i.n  a8,a1,56                # [11]  gra_spill_temp_73
+    addi    a15,a15,52                  # [12]
+    addi    a13,a13,64                  # [13]
+    addi    a10,a10,68                  # [14]
+    ee.vldbc.32 q0,a10              # [15]  id:240 activation_max
+    ee.vldbc.32 q1,a13              # [16]  id:239 activation_min
+    ee.vldbc.32 q2,a15              # [17]  id:238 out_offset
+    st.qr   q2,a14,64                   # [18]  gra_spill_temp_97-112
+    st.qr   q1,a14,80                   # [19]  gra_spill_temp_98-112
+    st.qr   q0,a14,96                   # [20]  gra_spill_temp_99-112
+    beqz.n  a8,.Lt_6_6914           # [21]
+
+.LBB3_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad:   # 0xa83
+    s32i    a1,a1,144                   # [0]  gra_spill_temp_95
+    mul16u  a7,a3,a5                # [1]
+    s32i    a4,a1,72                    # [2]  gra_spill_temp_77
+    addi    a9,a5,-7                    # [3]
+    l16ui   a11,a1,300                  # [4]  id:247 out_wd+0x0
+    l32i    a10,a1,292                  # [5]  id:243 bias+0x0
+    l32i    a15,a1,288                  # [6]  id:244 filter_data+0x0
+    l32i    a13,a1,316                  # [7]  id:246 out_mult+0x0
+    l32i    a14,a1,312                  # [8]  id:245 out_shift+0x0
+    s32i    a14,a1,88                   # [9]  gra_spill_temp_81
+    s32i    a13,a1,92                   # [10]  gra_spill_temp_82
+    s32i    a15,a1,124                  # [11]  gra_spill_temp_90
+    s32i    a10,a1,116                  # [12]  gra_spill_temp_88
+    s32i    a11,a1,96                   # [13]  gra_spill_temp_83
+    s32i    a9,a1,136                   # [14]  gra_spill_temp_93
+    addx2   a4,a5,a5                    # [15]
+    slli    a4,a4,1                     # [16]
+    slli    a7,a7,1                     # [17]
+    l32i.n  a9,a1,60                # [18]  gra_spill_temp_74
+    movi.n  a11,0                   # [19]
+    extui   a10,a10,0,4                 # [20]
+    movi.n  a15,0                   # [21]
+    slli    a5,a5,1                     # [22]
+    s32i    a15,a1,68                   # [23]  gra_spill_temp_76
+    s32i    a10,a1,112                  # [24]  gra_spill_temp_87
+    s32i    a11,a1,64                   # [25]  gra_spill_temp_75
+    mul16u  a8,a3,a9                # [26]
+    movi.n  a11,0                   # [27]
+    s32i    a11,a1,80                   # [28]  gra_spill_temp_79
+    s32i.n  a8,a1,52                # [29]  gra_spill_temp_72
+
+.Lt_6_7426: # 0xad8 // width_loop
+    l32i    a8,a1,96                    # [0]  gra_spill_temp_83
+    beqz.n  a8,.Lt_6_7682           # [2]
+
+    movi.n  a11,3                   # [0]
+    l32i    a10,a1,72                   # [1]  gra_spill_temp_77
+    movi.n  a9,0                    # [2]
+    movi.n  a13,0                   # [3]
+    l32i.n  a14,a1,48               # [4]  gra_spill_temp_71
+    s32i    a14,a1,108                  # [5]  gra_spill_temp_86
+    s32i    a13,a1,104                  # [6]  gra_spill_temp_85
+    s32i    a9,a1,100                   # [7]  gra_spill_temp_84
+    min a10,a10,a11                 # [8]
+    s32i    a10,a1,128                  # [9]  gra_spill_temp_91
+
+.Lt_6_8194: # 0xaf7
+    l32i    a2,a1,88                    # [0]  gra_spill_temp_81
+    l32i    a6,a1,92                    # [1]  gra_spill_temp_82
+    l32i    a8,a1,116                   # [2]  gra_spill_temp_88
+
+// channel loop
+    l32i    a15,a1,136                  # [3]  gra_spill_temp_93
+    s32i    a8,a1,140                   # [4]  gra_spill_temp_94
+    blti    a15,1,.Lt_6_8450            # [5]
+
+    movi.n  a11,0                   # [0]
+    movi.n  a10,0                   # [1]
+    l32i    a9,a1,76                    # [2]  gra_spill_temp_78
+    l32i    a14,a1,80                   # [3]  gra_spill_temp_79
+    movi.n  a8,3                    # [4]
+    l32i    a3,a1,108                   # [5]  gra_spill_temp_86
+    l32i    a13,a1,104                  # [6]  gra_spill_temp_85
+    min a3,a3,a8                    # [7]
+    add.n   a13,a13,a14                 # [8]
+    mull    a9,a9,a13                   # [9]
+    s32i    a9,a1,132                   # [10]  gra_spill_temp_92
+
+.Lt_6_8962: # 0xb26
+    ee.zero.qacc                    # [0]
+    l32i    a9,a1,132                   # [1]  gra_spill_temp_92
+    l32i    a13,a1,120                  # [2]  gra_spill_temp_89
+    add.n   a9,a9,a10                   # [3]
+    addx2   a9,a9,a13                   # [4]
+    l32i    a13,a1,124                  # [5]  gra_spill_temp_90
+    l32i    a14,a1,128                  # [6]  gra_spill_temp_91
+    add.n   a13,a11,a13                 # [7]
+    loopgtz a14,.LBB30_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad   # [8]
+
+.Lt_6_9730: # 0xb3f
+#<loop> Loop body line 360, nesting depth: 4, estimated iterations: 100
+    mov.n   a14,a13                     # [0]
+    mov.n   a15,a9                      # [1]
+    ee.vld.128.xp   q0,a15,a5           # [2]  id:249
+    ee.vld.128.xp   q1,a14,a5           # [3]  id:250
+    add.n   a9,a9,a7                    # [4]
+    beqi    a3,2,.LBB15_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad  # [5]
+
+.Lt_6_9986: # 0xb4e
+    beqi    a3,3,.LBB17_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad  # [0]
+
+.Lt_6_10498:    # 0xb51
+    add.n   a13,a13,a4                  # [0]
+    ee.vmulas.s16.qacc  q0,q1       # [1]
+
+.LBB30_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad:  # 0xb58
+
+ // extract data
+    l32i    a15,a1,144                  # [0]  gra_spill_temp_95
+    ee.st.qacc_l.l.128.ip   a15,16      # [2]  id:258
+    ee.st.qacc_l.h.32.ip    a15,0       # [3]  id:259
+    l8ui    a14,a1,15                   # [4]  qacc_scratch+15
+    l8ui    a13,a1,16                   # [5]  qacc_scratch+16
+    l8ui    a8,a1,5                     # [6]  qacc_scratch+5
+    l8ui    a9,a1,6                     # [7]  qacc_scratch+6
+    s8i     a9,a1,3                     # [8]  qacc_scratch+3
+    s8i     a8,a1,2                     # [9]  qacc_scratch+2
+    s8i     a13,a1,7                    # [10]  qacc_scratch+7
+    s8i     a14,a1,6                    # [11]  qacc_scratch+6
+    l16ui   a13,a1,10                   # [12]  qacc_scratch+10
+    s16i    a13,a1,4                    # [13]  qacc_scratch+4
+    ee.st.qacc_h.l.128.ip   a15,16      # [14]  id:269
+    ee.st.qacc_h.h.32.ip    a15,-32     # [15]  id:270
+    l8ui    a9,a1,32                    # [16]  qacc_scratch+32
+    l8ui    a13,a1,22                   # [17]  qacc_scratch+22
+    l8ui    a8,a1,31                    # [18]  qacc_scratch+31
+    l16ui   a14,a1,26                   # [19]  qacc_scratch+26
+    s16i    a14,a1,12                   # [20]  qacc_scratch+12
+    s8i     a8,a1,14                    # [21]  qacc_scratch+14
+    s8i     a13,a1,11                   # [22]  qacc_scratch+11
+    s8i     a9,a1,15                    # [23]  qacc_scratch+15
+
+    l32i    a13,a1,116                  # [24]  gra_spill_temp_88
+    l8ui    a9,a1,21                    # [25]  qacc_scratch+21
+    l16ui   a8,a1,16                    # [26]  qacc_scratch+16
+    movi.n  a14,16                  # [27]
+    ee.srcmb.s16.qacc   q1,a14,0        # [28]
+    s16i    a8,a1,8                     # [29]  qacc_scratch+8
+    s8i     a9,a1,10                    # [30]  qacc_scratch+10
+    ee.vld.128.ip   q0,a15,0            # [31]  id:282
+    s32i    a15,a1,144                  # [32]  gra_spill_temp_95
+    ee.vzip.16  q0,q1               # [33]
+
+    bnez.n  a13,.LBB20_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad   # [34]
+
+    s32i    a12,a1,240                  # [0]  gra_spill_temp_101
+    s32i    a11,a1,244                  # [1]  gra_spill_temp_102
+    s32i    a10,a1,248                  # [2]  gra_spill_temp_103
+    addi    a14,a1,112                  # [3]
+    st.qr   q1,a14,48                   # [4]  gra_spill_temp_96-112
+    j   .Lt_6_11266                     # [5]
+
+.LBB15_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad:  # 0xbce
+#<loop> Part of loop body line 360, head labeled .Lt_6_9730
+    ee.vmulas.s16.qacc.ld.xp    q0,a15,a5,q0,q1     # [0]  id:251
+    ee.vld.128.xp   q1,a14,a5           # [1]  id:252
+    bnei    a3,3,.Lt_6_10498            # [2]
+
+.LBB17_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad:  # 0xbd8
+    ee.vmulas.s16.qacc.ld.xp    q3,a15,a5,q0,q1     # [0]  id:253
+    ee.vld.128.xp   q4,a14,a5           # [1]  id:254
+    ee.vld.128.xp   q1,a14,a5           # [2]  id:256
+    ee.vmulas.s16.qacc.ld.xp    q0,a15,a5,q3,q4     # [3]  id:255
+    j   .Lt_6_10498                     # [4]
+
+.LBB20_esp_nn_depthwise_conv_s16_mult1_3x3_no_pad:  # 0xbe9
+#<loop> Part of loop body line 358, head labeled .Lt_6_8962
+    s32i    a12,a1,240                  # [0]  gra_spill_temp_101
+    s32i    a11,a1,244                  # [1]  gra_spill_temp_102
+    s32i    a10,a1,248                  # [2]  gra_spill_temp_103
+    addi    a15,a1,112                  # [3]
+    l32i    a9,a1,112                   # [4]  gra_spill_temp_87
+    l32i    a8,a1,140                   # [5]  gra_spill_temp_94
+    wur.sar_byte    a9                  # [6]
+    ee.vld.128.ip   q6,a8,16            # [7]  id:285
+    ee.vld.128.ip   q3,a8,16            # [8]  id:286
+    ee.vld.128.ip   q7,a8,0             # [9]  id:287
+    s32i    a8,a1,140                   # [10]  gra_spill_temp_94
+    ee.src.q.qup    q2,q6,q3            # [11]
+    ee.vadds.s32    q0,q0,q2            # [12]
+    ee.src.q.qup    q5,q6,q7            # [13]
+    ee.vadds.s32    q1,q1,q5            # [14]
+    st.qr           q1,a15,48                   # [15]  gra_spill_temp_96-112
+
+.Lt_6_11266:    # 0xc19
+ # 423                  q0 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q0, out_mult_ptr, out_shift_ptr);
+    mov.n   a10,a6                      # [0]
+    mov.n   a11,a2                      # [1]
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3     # [2]  esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+    addi    a11,a1,112                  # [0]
+    addi    a10,a6,16                   # [1]
+    st.qr   q0,a11,112                  # [2]  gra_spill_temp_100-112
+    ld.qr   q0,a11,48                   # [3]  gra_spill_temp_96-112
+    addi    a11,a2,16                   # [4]
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3     # [5]  esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+    addi    a6,a6,32                    # [0]
+    addi    a2,a2,32                    # [1]
+
+    l32i    a13,a1,136                  # [2]  gra_spill_temp_93
+    l32i    a12,a1,240                  # [3]  gra_spill_temp_101
+    l32i    a10,a1,248                  # [4]  gra_spill_temp_103
+    l32i    a11,a1,244                  # [5]  gra_spill_temp_102
+    addi    a9,a1,112                   # [6]
+    ld.qr   q6,a9,80                    # [7]  gra_spill_temp_98-112
+    ld.qr   q7,a9,96                    # [8]  gra_spill_temp_99-112
+    ld.qr   q5,a9,64                    # [9]  gra_spill_temp_97-112
+    ld.qr   q4,a9,112                   # [10]  gra_spill_temp_100-112
+    addi    a11,a11,16                  # [11]
+    addi.n  a10,a10,8               # [12]
+    ee.vadds.s32    q4,q4,q5            # [13]
+    ee.vadds.s32    q5,q0,q5            # [14]
+    ee.vmin.s32     q4,q4,q7            # [15]
+    ee.vmax.s32     q4,q4,q6            # [16]
+    ee.vmin.s32     q5,q5,q7            # [17]
+    ee.vmax.s32     q5,q5,q6            # [18]
+    ee.vunzip.16    q4,q5               # [19]
+    ee.vunzip.8     q4,q5               # [20]
+    ee.vst.l.64.ip  q4,a12,8        # [21]  id:290
+    blt         a10,a13,.Lt_6_8962          # [22]
+
+.Lt_6_8450: # 0xc76
+#<loop> Part of loop body line 348, head labeled .Lt_6_8194
+    l32i    a11,a1,96                   # [0]  gra_spill_temp_83
+    l32i    a15,a1,104                  # [1]  gra_spill_temp_85
+    l32i    a14,a1,84                   # [2]  gra_spill_temp_80
+    l32i    a10,a1,100                  # [3]  gra_spill_temp_84
+    l32i    a13,a1,108                  # [4]  gra_spill_temp_86
+    addi.n  a10,a10,1               # [5]
+    s32i    a10,a1,100                  # [6]  gra_spill_temp_84
+    sub     a13,a13,a14                 # [7]
+    add.n   a15,a15,a14                 # [8]
+    s32i    a15,a1,104                  # [9]  gra_spill_temp_85
+    s32i    a13,a1,108                  # [10]  gra_spill_temp_86
+    sub     a10,a10,a11                 # [11]
+    bnez    a10,.Lt_6_8194              # [12]
+
+.Lt_6_7682: # 0xc9b
+    l32i.n  a9,a1,56                # [0]  gra_spill_temp_73
+    l32i    a15,a1,64                   # [1]  gra_spill_temp_75
+    l32i.n  a14,a1,52               # [2]  gra_spill_temp_72
+    l32i    a13,a1,80                   # [3]  gra_spill_temp_79
+    l32i.n  a11,a1,60               # [4]  gra_spill_temp_74
+    l32i    a8,a1,68                    # [5]  gra_spill_temp_76
+    l32i    a10,a1,72                   # [6]  gra_spill_temp_77
+    addi.n  a8,a8,1                 # [7]
+    s32i    a8,a1,68                    # [8]  gra_spill_temp_76
+    sub     a10,a10,a11                 # [9]
+    add.n   a13,a13,a14                 # [10]
+    add.n   a15,a15,a11                 # [11]
+    s32i    a15,a1,64                   # [12]  gra_spill_temp_75
+    s32i    a13,a1,80                   # [13]  gra_spill_temp_79
+    s32i    a10,a1,72                   # [14]  gra_spill_temp_77
+    sub     a8,a8,a9                    # [15]
+    bnez    a8,.Lt_6_7426               # [16]
+
+.Lt_6_6914: # 0xcc8
+    retw.n                          # [0]
+
+    .size   esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3, . - esp_nn_depthwise_conv_s16_mult1_3x3_no_pad_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_esp32s3.S
new file mode 100644
index 0000000..8568df5
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult1_esp32s3.S
@@ -0,0 +1,345 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+    .text
+    .literal_position
+
+    # Program Unit: esp_nn_depthwise_conv_s16_mult1_esp32s3
+    .type   esp_nn_depthwise_conv_s16_mult1_esp32s3, @function
+    .align   4
+    .global esp_nn_depthwise_conv_s16_mult1_esp32s3
+
+esp_nn_depthwise_conv_s16_mult1_esp32s3:    # 0x4c8
+    # scratch_buf = 0
+    # gra_spill_temp_2 = 48
+    # gra_spill_temp_22 = 52
+    # gra_spill_temp_4 = 56
+    # gra_spill_temp_23 = 60
+    # gra_spill_temp_24 = 64
+    # gra_spill_temp_7 = 68
+    # gra_spill_temp_26 = 72
+    # gra_spill_temp_27 = 76
+    # gra_spill_temp_28 = 80
+    # gra_spill_temp_29 = 84
+    # gra_spill_temp_12 = 88
+    # gra_spill_temp_13 = 92
+    # gra_spill_temp_14 = 96
+    # gra_spill_temp_15 = 100
+    # gra_spill_temp_21 = 104
+    # gra_spill_temp_17 = 108
+    # gra_spill_temp_18 = 112
+    # gra_spill_temp_20 = 116
+    # gra_spill_temp_30 = 0
+    # gra_spill_temp_34 = 16
+
+ // in registers:
+ // a2: *input_data
+ // a3: input_wd
+ // a4: input_ht
+ // a5: channels
+ // a6: pad_wd
+ // a7: pad_ht
+
+ // on stack:
+ // stride_wd
+ // stride_ht
+ // *filter_data
+ // filter_wd
+ // filter_ht
+ // *bias
+ // *out_data
+ // out_wd
+ // out_ht
+ // out_offset
+ // *out_shift
+ // *out_mult
+ // activation_min
+ // activation_max
+
+    entry   a1,160                      #
+    l32i    a9,a1,184                   # [7]  id:237 out_data+0x0
+    l16ui   a8,a1,192                   # [8]  id:238 out_ht+0x0
+    s32i    a2,a1,52                    # [0]  gra_spill_temp_22
+    s32i.n  a4,a1,56                # [1]  gra_spill_temp_4
+    s32i    a5,a1,60                    # [2]  gra_spill_temp_23
+    s32i    a9,a1,112                   # [10]  gra_spill_temp_18
+    beqz.n  a8,.Lt_4_7170           # [20]
+
+.LBB3_esp_nn_depthwise_conv_s16_mult1:  # 0x508
+    l16ui   a4,a1,172                   # [0]  id:240 filter_wd+0x0
+    neg     a13,a7                      # [2]
+    neg     a12,a6                      # [3]
+    sext    a12,a12,15                  # [16]
+    sext    a13,a13,15                  # [17]
+    s32i    a13,a1,92                   # [18]  gra_spill_temp_13
+    s32i.n  a12,a1,48               # [19]  gra_spill_temp_2
+    movi.n  a8,0                    # [20]
+    slli    a9,a5,1                     # [21]
+    addi    a10,a5,-7                   # [22]
+    s32i    a10,a1,100                  # [23]  gra_spill_temp_15
+    s32i    a9,a1,64                    # [24]  gra_spill_temp_24
+    s32i    a8,a1,68                    # [25]  gra_spill_temp_7
+    j   .Lt_4_7682                      # [30]
+
+.Lt_4_7938: # 0x561
+    l32i    a15,a1,192                  # [0]  out_ht
+    l32i.n  a9,a1,164                   # [1]  stride_ht
+    l32i    a14,a1,68                   # [2]  gra_spill_temp_7
+    l32i    a8,a1,92                    # [3]  gra_spill_temp_13
+    addi.n  a14,a14,1               # [4]
+    s32i    a14,a1,68                   # [5]  gra_spill_temp_7
+    add.n   a9,a8,a9                    # [6]
+    sub     a14,a14,a15                 # [7]
+    sext    a8,a9,15                    # [8]
+    s32i    a8,a1,92                    # [9]  gra_spill_temp_13
+    beqz    a14,.Lt_4_7170              # [10]
+
+.Lt_4_7682: # 0x57f
+#<loop> Loop body line 59, nesting depth: 1, estimated iterations: 100
+ #  60          const int16_t base_y = (out_y * stride_ht) - pad_ht;
+ #  61          for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop
+    l32i    a10,a1,188                  # [0]  out_width
+    beqz.n  a10,.Lt_4_7938          # [2]
+
+.LBB6_esp_nn_depthwise_conv_s16_mult1:  # 0x584
+#<loop> Part of loop body line 59, head labeled .Lt_4_7682
+    movi.n  a14,0                   # [0]
+    l32i.n  a7,a1,176                   # [1]  filter_ht
+    l32i    a13,a1,92                   # [2]  gra_spill_temp_13
+    l32i.n  a8,a1,56                # [3]  gra_spill_temp_4
+    movi.n  a11,0                   # [4]
+    l32i.n  a12,a1,48               # [5]  gra_spill_temp_2
+    s32i    a12,a1,84                   # [6]  gra_spill_temp_29
+    s32i    a11,a1,88                   # [7]  gra_spill_temp_12
+    sub     a8,a8,a13                   # [8]
+    min     a7,a7,a8                    # [9]
+    neg     a13,a13                     # [10]
+    max     a13,a13,a14                 # [11]
+    s32i    a13,a1,96                   # [12]  gra_spill_temp_14
+    j   .Lt_4_8450                      # [13]
+
+.Lt_4_8706: # 0x5a9
+#<loop> Part of loop body line 61, head labeled .Lt_4_8450
+    l32i    a10,a1,188                  # [0]  out_width
+    l32i    a12,a1,160                  # [1]  stride_wd
+    l32i    a9,a1,88                    # [2]  gra_spill_temp_12
+    l32i    a11,a1,84                   # [3]  gra_spill_temp_29
+    addi.n  a9,a9,1                 # [4]
+    s32i    a9,a1,88                    # [5]  gra_spill_temp_12
+    add.n   a12,a11,a12                 # [6]
+    sext    a11,a12,15                  # [7]
+    s32i    a11,a1,84                   # [8]  gra_spill_temp_29
+    beq     a9,a10,.Lt_4_7938           # [9]
+
+.Lt_4_8450: # 0x5c5
+#<loop> Loop body line 61, nesting depth: 2, estimated iterations: 100
+ #  69              uint32_t bias_ptr = (uint32_t) bias;
+ #  70              const int32_t *out_mult_ptr = out_mult;
+ #  71              const int32_t *out_shift_ptr = out_shift;
+ #  72
+ #  73              for (int ch_idx = 0; ch_idx < channels - 7; ch_idx += 8) {//channel_loop
+    l32i    a13,a1,100                  # [0]  gra_spill_temp_15
+    l32i    a14,a1,180                  # [1]  bias
+    l32i    a15,a1,204                  # [2]  out_mult
+    l32i    a8,a1,200                   # [3]  out_shift
+    s32i    a8,a1,104                   # [4]  gra_spill_temp_21
+    s32i    a15,a1,116                  # [5]  gra_spill_temp_20
+    s32i    a14,a1,108                  # [6]  gra_spill_temp_17
+    blti    a13,1,.Lt_4_8706            # [7]
+
+.LBB9_esp_nn_depthwise_conv_s16_mult1:  # 0x5dd
+#<loop> Part of loop body line 61, head labeled .Lt_4_8450
+    movi.n  a2,0                    # [0]
+    l32i    a5,a1,84                    # [1]  gra_spill_temp_29
+    movi.n  a8,0                    # [2]
+    neg     a6,a5                       # [3]
+    max     a6,a6,a8                    # [4]
+    sub     a5,a3,a5                    # [5]
+    min     a5,a4,a5                    # [6]
+    sub     a9,a5,a6                    # [7]
+    s32i    a9,a1,72                    # [8]  gra_spill_temp_26
+    j   .Lt_4_9218                      # [9]
+
+.Lt_4_9474: # 0x5f9
+
+// extract data
+    mov     a11,a1
+    ee.st.qacc_l.l.128.ip   a11,16      # [2]  id:252
+    ee.st.qacc_l.h.32.ip    a11,0       # [3]  id:253
+    l8ui    a12,a1,15                   # [4]  scratch_buf+15
+    l16ui   a10,a1,10                   # [5]  scratch_buf+10
+    l8ui    a13,a1,5                    # [6]  scratch_buf+5
+    l8ui    a14,a1,6                    # [7]  scratch_buf+6
+    l8ui    a15,a1,16                   # [8]  scratch_buf+16
+    s8i     a13,a1,2                    # [11]  scratch_buf+2
+    s8i     a14,a1,3                    # [10]  scratch_buf+3
+    s8i     a15,a1,7                    # [9]  scratch_buf+7
+    s16i    a10,a1,4                    # [12]  scratch_buf+4
+    s8i     a12,a1,6                    # [13]  scratch_buf+6
+
+    movi.n  a10,16                  # [14]
+    ee.st.qacc_h.l.128.ip   a11,16      # [15]  id:263
+    ee.st.qacc_h.h.32.ip    a11,-32     # [16]  id:264
+    ee.srcmb.s16.qacc       q1,a10,0        # [17]
+    l8ui    a8,a1,31                    # [18]  scratch_buf+31
+    l8ui    a9,a1,32                    # [19]  scratch_buf+32
+    l16ui   a12,a1,16                   # [20]  scratch_buf+16
+    l8ui    a13,a1,21                   # [21]  scratch_buf+21
+    l8ui    a14,a1,22                   # [22]  scratch_buf+22
+    l16ui   a15,a1,26                   # [23]  scratch_buf+26
+    s8i     a13,a1,10                   # [26]  scratch_buf+10
+    s8i     a14,a1,11                   # [25]  scratch_buf+11
+    s16i    a15,a1,12                   # [24]  scratch_buf+12
+    s16i    a12,a1,8                    # [27]  scratch_buf+8
+    s8i     a9,a1,15                    # [28]  scratch_buf+15
+    s8i     a8,a1,14                    # [29]  scratch_buf+14
+
+    l32i            a9,a1,180                   # [30]  bias
+    ee.vld.128.ip   q0,a11,0            # [31]  id:164
+    ee.vzip.16      q0,q1               # [33]
+    beqz.n          a9,.Lt_4_11522          # [34] // skip bias
+
+// add bias
+    l32i    a9,a1,108                   # [0]  gra_spill_temp_17
+    addi    a8,a1,112                   # [1]
+    extui   a10,a9,0,4                  # [2]
+    wur.sar_byte    a10                 # [3]
+    ee.vld.128.ip   q4,a9,16            # [4]  id:279
+    ee.vld.128.ip   q7,a9,16            # [5]  id:168
+    ee.vld.128.ip   q5,a9,0             # [6]  id:281
+    s32i    a9,a1,108                   # [7]  gra_spill_temp_17
+    ee.src.q    q4,q4,q7            # [8]
+    ee.src.q    q7,q7,q5            # [10]
+    ee.vadds.s32    q0,q0,q4            # [9]
+    ee.vadds.s32    q1,q1,q7            # [11]
+    st.qr   q1,a1,0                 # [12]  gra_spill_temp_30-112
+
+.Lt_4_11522:    # 0x684
+
+// apply quantisation: esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q0, out_mult_ptr, out_shift_ptr);
+
+    l32i    a10,a1,116                  # [1]  gra_spill_temp_20
+    l32i    a11,a1,104                  # [3]  gra_spill_temp_21
+    st.qr   q1,a1,0                 # [2]  gra_spill_temp_30-112
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3     # [4]  esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+    l32i    a10,a1,116                  # [2]  gra_spill_temp_20
+    l32i    a11,a1,104                  # [0]  gra_spill_temp_21
+    st.qr   q0,a1,16                # [3]  gra_spill_temp_34-112
+    ld.qr   q0,a1,0                 # [4]  gra_spill_temp_30-112
+    addi    a10,a10,16                  # [5] // out_mult_ptr += 4
+    addi    a11,a11,16                  # [6] // out_shift_ptr += 4
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3     # [7]  esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+// add offset, apply activation and store
+    l32i    a13,a1,100                  # [0]  gra_spill_temp_15
+    addi.n  a2,a2,8                 # [1]
+    l32i    a8,a1,112                   # [2]  gra_spill_temp_18
+    l32i    a15,a1,116                  # [3]  gra_spill_temp_20
+    l32i    a14,a1,104                  # [4]  gra_spill_temp_21
+
+    addi        a12,a1,212
+    ee.vldbc.32 q3,a12              # [14]  id:236 activation_max
+    addi        a12,a1,196
+    ee.vldbc.32 q1,a12              # [16]  id:234 out_offset
+    addi    a12,a1,208
+
+    ld.qr   q2,a1,16                # [8]  gra_spill_temp_34-112
+
+    addi    a14,a14,32                  # [9]
+    addi    a15,a15,32                  # [10]
+    s32i    a15,a1,116                  # [11]  gra_spill_temp_20
+    ee.vadds.s32    q2,q2,q1            # [12]
+    s32i            a14,a1,104                  # [13]  gra_spill_temp_21
+    ee.vadds.s32    q1,q0,q1            # [14]
+    ee.vmin.s32     q0,q2,q3            # [15]
+    ee.vldbc.32     q2,a12              # [16]  id:234 out_offset
+    ee.vmin.s32     q1,q1,q3            # [17]
+    ee.vmax.s32     q1,q1,q2            # [18]
+    ee.vmax.s32     q0,q0,q2            # [19]
+    ee.vunzip.16    q0,q1               # [20]
+    ee.vunzip.8     q0,q1               # [21]
+    ee.vst.l.64.ip  q0,a8,8         # [22]  id:172
+    s32i    a8,a1,112                   # [23]  gra_spill_temp_18
+    bge     a2,a13,.Lt_4_8706           # [24]
+
+.Lt_4_9218: # 0x6f5
+    ee.zero.qacc                    # [0]
+    l32i    a13,a1,96                   # [1]  gra_spill_temp_14
+    s32i    a13,a1,80                   # [2]  gra_spill_temp_28
+    bge     a13,a7,.Lt_4_9474           # [3]
+
+.LBB12_esp_nn_depthwise_conv_s16_mult1: # 0x701 // channel_loop
+    mull    a15,a13,a4                  # [0]
+    l32i    a14,a1,92                   # [1]  gra_spill_temp_13
+    add.n   a8,a15,a5                   # [2]
+    add.n   a14,a14,a13                 # [3]
+    mull    a14,a3,a14                  # [4]
+    s32i    a8,a1,76                    # [5]  gra_spill_temp_27
+    bge     a6,a5,.Lt_4_10242           # [6]
+
+.LBB15_esp_nn_depthwise_conv_s16_mult1: # 0x714
+    l32i    a12,a1,64                   # [0]  gra_spill_temp_24
+    l32i    a9,a1,168                   # [1]  filter_data
+    l32i    a10,a1,60                   # [2]  gra_spill_temp_23
+    l32i    a11,a1,84                   # [3]  gra_spill_temp_29
+    add.n   a8,a15,a6                   # [4]
+    add.n   a11,a11,a6                  # [5]
+    mull    a8,a8,a10                   # [6]
+    add.n   a11,a14,a11                 # [7]
+    mull    a10,a10,a11                 # [8]
+    add.n   a8,a2,a8                    # [9]
+    l32i    a11,a1,52                   # [10]  gra_spill_temp_22
+    addx2   a8,a8,a9                    # [11]
+    add.n   a10,a2,a10                  # [12]
+    l32i    a9,a1,72                    # [13]  gra_spill_temp_26
+    addx2   a10,a10,a11                 # [14]
+    loopgtz a9,.LBB41_esp_nn_depthwise_conv_s16_mult1   # [15]
+// innermost loop
+    ee.vld.128.xp   q0,a10,a12          # [0*II+3]  id:249
+    ee.vld.128.xp   q1,a8,a12           # [0*II+4]  id:250
+    ee.vmulas.s16.qacc  q0,q1       # [0*II+6]
+.LBB41_esp_nn_depthwise_conv_s16_mult1: # 0x750
+
+.Lt_4_10242:    # 0x750
+    add.n   a14,a14,a3                  # [0]
+    add.n   a15,a15,a4                  # [1]
+    l32i    a9,a1,80                    # [2]  gra_spill_temp_28
+    l32i    a10,a1,76                   # [3]  gra_spill_temp_27
+    addi.n  a9,a9,1                 # [4]
+    add.n   a10,a10,a4                  # [5]
+    s32i    a10,a1,76                   # [6]  gra_spill_temp_27
+    s32i    a9,a1,80                    # [7]  gra_spill_temp_28
+    sub     a9,a7,a9                    # [8]
+    beqz    a9,.Lt_4_9474               # [9]
+
+    blt a6,a5,.LBB15_esp_nn_depthwise_conv_s16_mult1    # [0]
+
+    j   .Lt_4_10242                     # [0]
+
+.Lt_4_7170: # 0x770
+    retw.n                          # [0]
+
+    .size   esp_nn_depthwise_conv_s16_mult1_esp32s3, . - esp_nn_depthwise_conv_s16_mult1_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult4_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult4_esp32s3.S
new file mode 100644
index 0000000..792d137
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult4_esp32s3.S
@@ -0,0 +1,416 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+    .text
+    .literal_position
+
+    # Program Unit: esp_nn_depthwise_conv_s16_mult4_esp32s3
+    .type   esp_nn_depthwise_conv_s16_mult4_esp32s3, @function
+    .align   4
+    .global esp_nn_depthwise_conv_s16_mult4_esp32s3
+
+esp_nn_depthwise_conv_s16_mult4_esp32s3:    # 0x17c8
+    # qacc_scratch = 0
+    # gra_spill_temp_220 = 32
+    # gra_spill_temp_221 = 36
+    # gra_spill_temp_222 = 40
+    # gra_spill_temp_223 = 44
+    # gra_spill_temp_224 = 48
+    # gra_spill_temp_225 = 52
+    # gra_spill_temp_226 = 56
+    # gra_spill_temp_227 = 60
+    # gra_spill_temp_228 = 64
+    # gra_spill_temp_229 = 68
+    # gra_spill_temp_230 = 72
+    # gra_spill_temp_231 = 76
+    # gra_spill_temp_232 = 80
+    # gra_spill_temp_233 = 84
+    # gra_spill_temp_234 = 88
+    # gra_spill_temp_235 = 92
+    # gra_spill_temp_236 = 96
+    # gra_spill_temp_237 = 100
+    # gra_spill_temp_238 = 104
+    # gra_spill_temp_239 = 108
+    # gra_spill_temp_240 = 112
+    # gra_spill_temp_241 = 116
+    # gra_spill_temp_242 = 120
+    # gra_spill_temp_243 = 124
+    # gra_spill_temp_244 = 128
+    # gra_spill_temp_245 = 132
+    # gra_spill_temp_246 = 136
+    # gra_spill_temp_247 = 140
+    # gra_spill_temp_248 = 144
+    # gra_spill_temp_249 = 148
+    # gra_spill_temp_250 = 152
+    # gra_spill_temp_251 = 156
+    # gra_spill_temp_252 = 160
+    # gra_spill_temp_253 = 164
+    # gra_spill_temp_254 = 168
+    # gra_spill_temp_255 = 172
+    # gra_spill_temp_256 = 176
+    # gra_spill_temp_257 = 192
+    # gra_spill_temp_258 = 208
+    # gra_spill_temp_259 = 224
+    # gra_spill_temp_260 = 240
+
+ // registers:
+ // a2: const int16_t *input_data
+ // a3: const uint16_t input_wd
+ // a4: const uint16_t input_ht
+ // a5: const uint16_t channels
+ // a6: const uint16_t pad_wd
+ // a7: const uint16_t pad_ht
+
+ // on stack:
+ // const uint16_t stride_wd
+ // const uint16_t stride_ht
+ // const uint16_t ch_mult
+ // const int16_t *filter_data
+ // const uint16_t filter_wd
+ // const uint16_t filter_ht
+ // const int32_t *bias
+ // int8_t *out_data
+ // const uint16_t out_wd
+ // const uint16_t out_ht
+ // const int32_t out_offset
+ // const int32_t *out_shift
+ // const int32_t *out_mult
+ // const int32_t activation_min
+ // const int32_t activation_max
+
+
+    entry   a1,288                      #
+    s32i    a2,a1,136                   # [0]  gra_spill_temp_246
+    s32i.n  a4,a1,40                # [1]  gra_spill_temp_222
+    s32i    a5,a1,164                   # [2]  gra_spill_temp_253
+    addi    a12,a1,112                  # [3]
+    addmi   a10,a1,256                  # [4]
+    addmi   a11,a1,256                  # [5]
+    addmi   a13,a1,256                  # [6]
+    l16ui   a8,a1,324                   # [7]  id:216 out_ht+0x0
+    s32i.n  a8,a1,48                # [8]  gra_spill_temp_224
+    addi    a13,a13,72                  # [9]
+    addi    a11,a11,88                  # [10]
+    addi    a10,a10,84                  # [11]
+    ee.vldbc.32 q0,a10              # [12]  id:215 activation_min
+    ee.vldbc.32 q1,a11              # [13]  id:214 activation_max
+    ee.vldbc.32 q2,a13              # [14]  id:213 out_offset
+    st.qr       q2,a12,80                   # [15]  gra_spill_temp_257-112
+    st.qr       q1,a12,96                   # [16]  gra_spill_temp_258-112
+    st.qr       q0,a12,112                  # [17]  gra_spill_temp_259-112
+    beqz.n  a8,.Lt_10_8450          # [18]
+
+    s32i    a1,a1,112                   # [0]  gra_spill_temp_240
+    neg     a15,a6                      # [1]
+    neg     a4,a7                       # [2]
+    addmi   a8,a1,256                   # [3]
+    movi.n  a9,0                    # [4]
+    movi.n  a11,0                   # [5]
+    slli    a14,a5,1                    # [6]
+    l16ui   a13,a1,296                  # [7]  id:217 ch_mult+0x0
+    l16ui   a10,a1,308                  # [8]  id:227 filter_ht+0x0
+    s32i.n  a10,a1,36               # [9]  gra_spill_temp_221
+    s32i    a13,a1,76                   # [10]  gra_spill_temp_231
+    s32i    a14,a1,148                  # [11]  gra_spill_temp_249
+    s32i.n  a11,a1,52               # [12]  gra_spill_temp_225
+    s32i    a9,a1,116                   # [13]  gra_spill_temp_241
+    st.qr   q4,a8,-16                   # [14]  gra_spill_temp_260-256
+    sext    a4,a4,15                    # [15]
+    sext    a15,a15,15                  # [16]
+    s32i.n  a15,a1,32               # [17]  gra_spill_temp_220
+    mul16u  a12,a5,a13              # [18]
+    s32i    a4,a1,92                    # [19]  gra_spill_temp_235
+    l16ui   a8,a1,320                   # [20]  id:229 out_wd+0x0
+    l16ui   a9,a1,292                   # [21]  id:228 stride_ht+0x0
+    l32i    a11,a1,336                  # [22]  id:226 out_mult+0x0
+    s32i    a11,a1,64                   # [23]  gra_spill_temp_228
+    s32i.n  a9,a1,44                # [24]  gra_spill_temp_223
+    s32i    a8,a1,68                    # [25]  gra_spill_temp_229
+    l32i    a4,a1,300                   # [26]  id:218 filter_data+0x0
+    s32i    a12,a1,140                  # [27]  gra_spill_temp_247
+    l32i    a15,a1,316                  # [28]  id:219 out_data+0x0
+    s32i    a15,a1,96                   # [29]  gra_spill_temp_236
+    slli    a12,a12,1                   # [30]
+    s32i    a4,a1,152                   # [31]  gra_spill_temp_250
+    addi    a14,a13,-3                  # [32]
+    l16ui   a4,a1,304                   # [33]  id:223 filter_wd+0x0
+    s32i    a14,a1,108                  # [34]  gra_spill_temp_239
+    s32i    a12,a1,144                  # [35]  gra_spill_temp_248
+    slli    a13,a13,2                   # [36]
+    s32i    a13,a1,80                   # [37]  gra_spill_temp_232
+    l32i    a12,a1,332                  # [38]  id:225 out_shift+0x0
+    l32i    a14,a1,312                  # [39]  id:222 bias+0x0
+    s32i    a14,a1,104                  # [40]  gra_spill_temp_238
+    s32i.n  a12,a1,60               # [41]  gra_spill_temp_227
+    l16ui   a13,a1,288                  # [42]  id:224 stride_wd+0x0
+    s32i.n  a13,a1,56               # [43]  gra_spill_temp_226
+    j   .Lt_10_8962                     # [44]
+
+.Lt_10_9218:    # 0x1880
+    l32i.n  a9,a1,48                # [0]  gra_spill_temp_224
+    l32i.n  a11,a1,44               # [1]  gra_spill_temp_223
+    l32i.n  a8,a1,52                # [2]  gra_spill_temp_225
+    l32i    a10,a1,92                   # [3]  gra_spill_temp_235
+    addi.n  a8,a8,1                 # [4]
+    s32i.n  a8,a1,52                # [5]  gra_spill_temp_225
+    add.n   a11,a10,a11                 # [6]
+    sub     a8,a8,a9                    # [7]
+    sext    a10,a11,15                  # [8]
+    s32i    a10,a1,92                   # [9]  gra_spill_temp_235
+    beqz    a8,.Lt_10_8450              # [10]
+
+.Lt_10_8962:    # 0x189b
+#<loop> Loop body line 1223, nesting depth: 1, estimated iterations: 100
+ #1224          const int16_t base_y = (out_y * stride_ht) - pad_ht;
+ #1225          for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop
+    l32i    a12,a1,68                   # [0]  gra_spill_temp_229
+    beqz.n  a12,.Lt_10_9218         # [2]
+
+.LBB6_esp_nn_depthwise_conv_s16_mult4:  # 0x18a0
+    l32i.n  a7,a1,36                # [0]  gra_spill_temp_221
+    movi.n  a11,0                   # [1]
+    l32i.n  a8,a1,40                # [2]  gra_spill_temp_222
+    l32i    a9,a1,92                    # [3]  gra_spill_temp_235
+    movi.n  a13,0                   # [4]
+    l32i.n  a14,a1,32               # [5]  gra_spill_temp_220
+    s32i    a14,a1,160                  # [6]  gra_spill_temp_252
+    s32i    a13,a1,72                   # [7]  gra_spill_temp_230
+    neg     a10,a9                      # [8]
+    sub     a8,a8,a9                    # [9]
+    max     a10,a10,a11                 # [10]
+    s32i    a10,a1,100                  # [11]  gra_spill_temp_237
+    min     a7,a7,a8                    # [12]
+    j   .Lt_10_9730                     # [13]
+
+.Lt_10_9986:    # 0x18c5
+    l32i    a13,a1,68                   # [0]  gra_spill_temp_229
+    l32i.n  a15,a1,56               # [1]  gra_spill_temp_226
+    l32i    a12,a1,72                   # [2]  gra_spill_temp_230
+    l32i    a14,a1,160                  # [3]  gra_spill_temp_252
+    addi.n  a12,a12,1               # [4]
+    s32i    a12,a1,72                   # [5]  gra_spill_temp_230
+    add.n   a15,a14,a15                 # [6]
+    sext    a14,a15,15                  # [7]
+    s32i    a14,a1,160                  # [8]  gra_spill_temp_252
+    beq a12,a13,.Lt_10_9218         # [9]
+
+.Lt_10_9730:    # 0x18e0
+    l32i    a8,a1,164                   # [0]  gra_spill_temp_253
+    l32i    a9,a1,64                    # [1]  gra_spill_temp_228
+    l32i.n  a10,a1,60               # [2]  gra_spill_temp_227
+    s32i    a10,a1,132                  # [3]  gra_spill_temp_245
+    s32i    a9,a1,128                   # [4]  gra_spill_temp_244
+    beqz.n  a8,.Lt_10_9986          # [5]
+
+    movi.n  a8,0                    # [0]
+    l32i    a5,a1,160                   # [1]  gra_spill_temp_252
+    movi.n  a12,0                   # [2]
+    movi.n  a13,0                   # [3]
+    movi.n  a14,0                   # [4]
+    s32i    a14,a1,84                   # [5]  gra_spill_temp_233
+    s32i    a13,a1,88                   # [6]  gra_spill_temp_234
+    s32i    a12,a1,176                  # [7]  gra_spill_temp_256
+    neg     a6,a5                       # [8]
+    max     a6,a6,a8                    # [9]
+    sub     a5,a3,a5                    # [10]
+    min     a5,a4,a5                    # [11]
+    sub     a11,a5,a6                   # [12]
+    s32i    a11,a1,156                  # [13]  gra_spill_temp_251
+    j   .Lt_10_10498                    # [14]
+
+.Lt_10_10754:   # 0x1919
+    l32i    a10,a1,164                  # [0]  gra_spill_temp_253
+    l32i    a14,a1,76                   # [1]  gra_spill_temp_231
+    l32i    a13,a1,84                   # [2]  gra_spill_temp_233
+    l32i    a12,a1,80                   # [3]  gra_spill_temp_232
+    l32i    a9,a1,176                   # [4]  gra_spill_temp_256
+    l32i    a11,a1,88                   # [5]  gra_spill_temp_234
+    addi.n  a9,a9,1                 # [6]
+    s32i    a9,a1,176                   # [7]  gra_spill_temp_256
+    add.n   a11,a11,a12                 # [8]
+    add.n   a13,a13,a14                 # [9]
+    s32i    a13,a1,84                   # [10]  gra_spill_temp_233
+    s32i    a11,a1,88                   # [11]  gra_spill_temp_234
+    beq     a9,a10,.Lt_10_9986          # [12]
+
+.Lt_10_10498:   # 0x193d
+    l32i    a15,a1,108                  # [0]  gra_spill_temp_239
+    blti    a15,1,.Lt_10_10754          # [2]
+
+    l32i    a2,a1,84                    # [0]  gra_spill_temp_233
+    l32i    a10,a1,104                  # [1]  gra_spill_temp_238
+    l32i    a9,a1,88                    # [2]  gra_spill_temp_234
+    movi.n  a8,0                    # [3]
+    s32i    a8,a1,120                   # [4]  gra_spill_temp_242
+    add.n   a9,a9,a10                   # [5]
+    s32i    a9,a1,124                   # [6]  gra_spill_temp_243
+    j   .Lt_10_11266                    # [7]
+
+.Lt_10_11522:   # 0x1959
+    addmi   a12,a1,256                  # [0]
+    l32i    a14,a1,112                  # [1]  gra_spill_temp_240
+    movi.n  a13,16                  # [2]
+    ee.st.qacc_l.l.128.ip   a14,16      # [3]  id:234
+    ee.st.qacc_l.h.32.ip    a14,-16     # [4]  id:235
+    ee.srcmb.s16.qacc   q5,a13,0        # [5]
+    l16ui   a15,a1,10                   # [6]  qacc_scratch+10
+    l8ui    a8,a1,15                    # [7]  qacc_scratch+15
+    l8ui    a9,a1,5                     # [8]  qacc_scratch+5
+    l8ui    a11,a1,16                   # [9]  qacc_scratch+16
+    l8ui    a10,a1,6                    # [10]  qacc_scratch+6
+    s8i     a10,a1,3                    # [11]  qacc_scratch+3
+    s8i     a11,a1,7                    # [12]  qacc_scratch+7
+    s8i     a9,a1,2                     # [13]  qacc_scratch+2
+
+    l32i    a11,a1,104                  # [14]  gra_spill_temp_238
+    s8i     a8,a1,6                     # [15]  qacc_scratch+6
+    s16i    a15,a1,4                    # [16]  qacc_scratch+4
+    ee.vld.l.64.ip  q0,a14,0        # [17]  id:245
+    s32i    a14,a1,112                  # [18]  gra_spill_temp_240
+    ee.vzip.16  q0,q5               # [19]
+    st.qr   q5,a12,-16                  # [20]  gra_spill_temp_260-256
+
+    beqz.n  a11,.Lt_10_13570        # [21] // skip_bias
+
+ // add bias
+    l32i    a13,a1,124                  # [0]  gra_spill_temp_243
+    extui   a12,a13,0,4                 # [2]
+    ee.vld.128.ip   q7,a13,16           # [3]  id:248
+    ee.vld.128.ip   q1,a13,0            # [4]  id:249
+    wur.sar_byte    a12                 # [5]
+    ee.src.q.qup    q6,q7,q1            # [6]
+    ee.vadds.s32    q0,q0,q6            # [7]
+
+.Lt_10_13570:   # 0x19ae
+ #1287                      q0 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q0, out_mult_ptr, out_shift_ptr);
+    l32i    a10,a1,128                  # [0]  gra_spill_temp_244
+    l32i    a11,a1,132                  # [1]  gra_spill_temp_245
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3     # [2]  esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+    addi.n  a2,a2,4                 # [0]
+    l32i    a13,a1,96                   # [1]  gra_spill_temp_236
+    l32i    a11,a1,128                  # [2]  gra_spill_temp_244
+    l32i    a10,a1,132                  # [3]  gra_spill_temp_245
+    addi    a8,a1,112                   # [4]
+    ld.qr   q1,a8,96                    # [5]  gra_spill_temp_258-112
+    ld.qr   q2,a8,80                    # [6]  gra_spill_temp_257-112
+    addi    a10,a10,16                  # [7]
+    addi    a11,a11,16                  # [8]
+    s32i    a11,a1,128                  # [9]  gra_spill_temp_244
+    ee.vadds.s32    q0,q0,q2            # [10]
+    s32i    a10,a1,132                  # [11]  gra_spill_temp_245
+    ee.vmin.s32 	q0,q0,q1            # [12]
+    ld.qr   		q1,a8,112                   # [13]  gra_spill_temp_259-112
+    l32i    		a8,a1,116                   # [14]  gra_spill_temp_241
+    ee.vmax.s32 	q0,q0,q1            # [15]
+    ee.movi.32.a    q0,a14,2            # [16]
+    ee.movi.32.a    q0,a15,1            # [17]
+    ee.movi.32.a    q0,a9,0             # [18]
+    add.n   		a13,a8,a13                  # [19]
+    ee.movi.32.a    q0,a12,3            # [20]
+    addi.n  a8,a8,4                 # [21]
+    s8i 	a12,a13,3                   # [22]  id:254
+    s32i    a8,a1,116                   # [23]  gra_spill_temp_241
+    s8i 	a9,a13,0                    # [24]  id:251
+    s8i 	a15,a13,1                   # [25]  id:252
+    s8i 	a14,a13,2                   # [26]  id:253
+    l32i    a15,a1,108                  # [27]  gra_spill_temp_239
+    l32i    a14,a1,120                  # [28]  gra_spill_temp_242
+    l32i    a9,a1,124                   # [29]  gra_spill_temp_243
+    addi.n  a14,a14,4               # [30]
+    addi    a9,a9,16                    # [31]
+    s32i    a9,a1,124                   # [32]  gra_spill_temp_243
+    s32i    a14,a1,120                  # [33]  gra_spill_temp_242
+    bge a14,a15,.Lt_10_10754        # [34]
+
+.Lt_10_11266:   # 0x1a1c
+#<loop> Loop body line 1230, nesting depth: 4, estimated iterations: 100
+    ee.zero.qacc                    # [0]
+    l32i    a9,a1,100                   # [1]  gra_spill_temp_237
+    s32i    a9,a1,172                   # [2]  gra_spill_temp_255
+    bge     a9,a7,.Lt_10_11522          # [3]
+
+    mull    a15,a9,a4                   # [0]
+    l32i    a14,a1,92                   # [1]  gra_spill_temp_235
+    add.n   a11,a15,a5                  # [2]
+    add.n   a14,a14,a9                  # [3]
+    mull    a14,a3,a14                  # [4]
+    s32i    a11,a1,168                  # [5]  gra_spill_temp_254
+    bge     a6,a5,.Lt_10_12290          # [6]
+
+.LBB18_esp_nn_depthwise_conv_s16_mult4: # 0x1a3b
+    l32i    a10,a1,176                  # [0]  gra_spill_temp_256
+    l32i    a11,a1,164                  # [1]  gra_spill_temp_253
+    l32i    a12,a1,160                  # [2]  gra_spill_temp_252
+    add.n   a9,a15,a6                   # [3]
+    l32i    a8,a1,140                   # [4]  gra_spill_temp_247
+    addmi   a13,a1,256                  # [5]
+    ld.qr   q1,a13,-16                  # [6]  gra_spill_temp_260-256
+    mull    a8,a8,a9                    # [7]
+    add.n   a12,a12,a6                  # [8]
+    l32i    a9,a1,152                   # [9]  gra_spill_temp_250
+    add.n   a12,a14,a12                 # [10]
+    mull    a11,a11,a12                 # [11]
+    add.n   a8,a2,a8                    # [12]
+    l32i    a12,a1,148                  # [13]  gra_spill_temp_249
+    addx2   a8,a8,a9                    # [14]
+    add.n   a10,a10,a11                 # [15]
+    l32i    a11,a1,136                  # [16]  gra_spill_temp_246
+    l32i    a9,a1,156                   # [17]  gra_spill_temp_251
+    addx2   a10,a10,a11                 # [18]
+    l32i    a11,a1,144                  # [19]  gra_spill_temp_248
+    loopgtz a9,.LBB45_esp_nn_depthwise_conv_s16_mult4   # [20]
+
+    mov.n   a9,a8                       # [0*II+0]
+    ee.vldbc.16 q0,a10              # [0*II+1]  id:232
+    add.n   a10,a10,a12                 # [0*II+2]
+    ee.vld.l.64.ip  q1,a9,0         # [0*II+3]  id:231
+    add.n   a8,a8,a11                   # [0*II+4]
+    ee.vmulas.s16.qacc  q0,q1       # [0*II+5]
+.LBB45_esp_nn_depthwise_conv_s16_mult4: # 0x1a84
+
+    addmi   a10,a1,256                  # [0]
+    st.qr   q1,a10,-16                  # [1]  gra_spill_temp_260-256
+
+.Lt_10_12290:   # 0x1a8a
+    add.n   a14,a14,a3                  # [0]
+    add.n   a15,a15,a4                  # [1]
+    l32i    a11,a1,172                  # [2]  gra_spill_temp_255
+    l32i    a12,a1,168                  # [3]  gra_spill_temp_254
+    addi.n  a11,a11,1               # [4]
+    add.n   a12,a12,a4                  # [5]
+    s32i    a12,a1,168                  # [6]  gra_spill_temp_254
+    s32i    a11,a1,172                  # [7]  gra_spill_temp_255
+    sub     a11,a7,a11                  # [8]
+    beqz    a11,.Lt_10_11522            # [9]
+
+    blt     a6,a5,.LBB18_esp_nn_depthwise_conv_s16_mult4    # [0]
+
+    j   .Lt_10_12290                    # [0]
+
+.Lt_10_8450:    # 0x1aaa
+    retw.n                          # [0]
+
+    .size   esp_nn_depthwise_conv_s16_mult4_esp32s3, . - esp_nn_depthwise_conv_s16_mult4_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3.S
new file mode 100644
index 0000000..b894713
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3.S
@@ -0,0 +1,458 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+    .text
+    .literal_position
+
+    # Program Unit: esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3
+    .type   esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3, @function
+    .align   4
+    .global esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3
+
+esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3:    # 0x11b3
+    # qacc_scratch = 0
+    # gra_spill_temp_142 = 48
+    # gra_spill_temp_143 = 52
+    # gra_spill_temp_144 = 56
+    # gra_spill_temp_145 = 60
+    # gra_spill_temp_146 = 64
+    # gra_spill_temp_147 = 68
+    # gra_spill_temp_148 = 72
+    # gra_spill_temp_149 = 76
+    # gra_spill_temp_150 = 80
+    # gra_spill_temp_151 = 84
+    # gra_spill_temp_152 = 88
+    # gra_spill_temp_153 = 92
+    # gra_spill_temp_154 = 96
+    # gra_spill_temp_155 = 100
+    # gra_spill_temp_156 = 104
+    # gra_spill_temp_157 = 108
+    # gra_spill_temp_158 = 112
+    # gra_spill_temp_159 = 116
+    # gra_spill_temp_160 = 120
+    # gra_spill_temp_161 = 124
+    # gra_spill_temp_162 = 128
+    # gra_spill_temp_163 = 132
+    # gra_spill_temp_164 = 136
+    # gra_spill_temp_165 = 140
+    # gra_spill_temp_166 = 144
+    # gra_spill_temp_167 = 148
+    # gra_spill_temp_168 = 152
+    # gra_spill_temp_169 = 156
+    # gra_spill_temp_170 = 160
+    # gra_spill_temp_171 = 164
+    # gra_spill_temp_172 = 168
+    # gra_spill_temp_173 = 172
+    # gra_spill_temp_174 = 176
+    # gra_spill_temp_175 = 180
+    # gra_spill_temp_176 = 184
+    # gra_spill_temp_177 = 188
+    # gra_spill_temp_178 = 192
+    # gra_spill_temp_179 = 208
+    # gra_spill_temp_180 = 224
+    # gra_spill_temp_181 = 240
+    # gra_spill_temp_182 = 256
+
+ // registers:
+ // a2: const int16_t *input_data
+ // a3: const uint16_t input_wd
+ // a4: const uint16_t input_ht
+ // a5: const uint16_t channels
+ // a6: const uint16_t pad_wd
+ // a7: const uint16_t pad_ht
+
+ // const uint16_t stride_wd
+ // const uint16_t stride_ht
+ // const uint16_t ch_mult
+ // const int16_t *filter_data
+ // const int32_t *bias
+ // int8_t *out_data
+ // const uint16_t out_wd
+ // const uint16_t out_ht
+ // const int32_t out_offset
+ // const int32_t *out_shift
+ // const int32_t *out_mult
+ // const int32_t activation_min
+ // const int32_t activation_max
+
+    entry   a1,304                      #
+    s32i    a2,a1,116                   # [0]  gra_spill_temp_159
+    s32i    a3,a1,120                   # [1]  gra_spill_temp_160
+    s32i    a5,a1,144                   # [2]  gra_spill_temp_166
+    s32i.n  a6,a1,60                # [3]  gra_spill_temp_145
+
+    addmi   a9,a1,256                   # [4]
+    addi    a12,a1,112                  # [5]
+    addmi   a10,a1,256                  # [6]
+    addmi   a11,a1,256                  # [7]
+    addmi   a13,a1,256                  # [8]
+
+ // height loop
+    l16ui   a8,a1,332                   # [9]  id:261 out_ht+0x0
+    l32i    a14,a1,324                  # [10]  id:257 out_data+0x0
+    s32i    a14,a1,176                  # [11]  gra_spill_temp_174
+    s32i    a8,a1,68                    # [12]  gra_spill_temp_147
+    addi    a13,a13,80                  # [13]
+    addi    a11,a11,96                  # [14]
+    addi    a10,a10,92                  # [15]
+    ee.vldbc.32 q0,a10              # [16]  id:260 activation_min
+    ee.vldbc.32 q1,a11              # [17]  id:259 activation_max
+    ee.vldbc.32 q2,a13              # [18]  id:258 out_offset
+    st.qr   	q2,a12,96                   # [19]  gra_spill_temp_179-112
+    st.qr   	q1,a12,112                  # [20]  gra_spill_temp_180-112
+    st.qr   	q0,a9,-16                   # [21]  gra_spill_temp_181-256
+    beqz.n  a8,.Lt_8_8194           # [22]
+
+.LBB3_esp_nn_depthwise_conv_s16_mult8_3x3:  # 0x11f9
+    s32i    a1,a1,180                   # [0]  gra_spill_temp_175
+    mul16u  a6,a3,a5                # [1]
+    s32i    a7,a1,76                    # [2]  gra_spill_temp_149
+    l32i    a9,a1,316                   # [3]  id:264 filter_data+0x0
+    l32i    a15,a1,320                  # [4]  id:262 bias+0x0
+    l16ui   a10,a1,312                  # [5]  id:263 ch_mult+0x0
+    slli    a11,a5,1                    # [6]
+    l16ui   a12,a1,308                  # [7]  id:268 stride_ht+0x0
+    l32i    a13,a1,344                  # [8]  id:267 out_mult+0x0
+    l32i    a14,a1,340                  # [9]  id:266 out_shift+0x0
+    s32i    a14,a1,88                   # [10]  gra_spill_temp_152
+    s32i    a13,a1,92                   # [11]  gra_spill_temp_153
+    s32i    a12,a1,64                   # [12]  gra_spill_temp_146
+    s32i    a11,a1,124                  # [13]  gra_spill_temp_161
+    s32i    a10,a1,108                  # [14]  gra_spill_temp_157
+    s32i    a15,a1,160                  # [15]  gra_spill_temp_170
+    s32i    a9,a1,128                   # [16]  gra_spill_temp_162
+    neg     a7,a7                       # [17]
+    slli    a6,a6,1                     # [18]
+    s32i    a7,a1,136                   # [19]  gra_spill_temp_164
+    movi.n  a9,0                    # [20]
+    extui   a15,a15,0,4                 # [21]
+    s32i    a15,a1,152                  # [22]  gra_spill_temp_168
+    s32i    a9,a1,72                    # [23]  gra_spill_temp_148
+    sub     a7,a4,a7                    # [24]
+    l32i.n  a9,a1,60                # [25]  gra_spill_temp_145
+    s32i    a7,a1,80                    # [26]  gra_spill_temp_150
+    l16ui   a4,a1,328                   # [27]  id:269 out_wd+0x0
+    s32i    a4,a1,96                    # [28]  gra_spill_temp_154
+    l16ui   a7,a1,304                   # [29]  id:265 stride_wd+0x0
+    s32i    a7,a1,84                    # [30]  gra_spill_temp_151
+    mul16u  a4,a5,a10               # [31]
+    neg     a9,a9                       # [32]
+    s32i.n  a9,a1,52                # [33]  gra_spill_temp_143
+    sub     a8,a3,a9                    # [34]
+    addi    a10,a10,-7                  # [35]
+    s32i    a10,a1,164                  # [36]  gra_spill_temp_171
+    s32i.n  a8,a1,56                # [37]  gra_spill_temp_144
+    addx2   a7,a4,a4                    # [38]
+    slli    a7,a7,1                     # [39]
+    j       .Lt_8_8706                      # [40]
+
+.Lt_8_8962: # 0x1270
+#<loop> Part of loop body line 933, head labeled .Lt_8_8706
+    l32i    a10,a1,68                   # [0]  gra_spill_temp_147
+    l32i    a14,a1,76                   # [1]  gra_spill_temp_149
+    l32i    a13,a1,136                  # [2]  gra_spill_temp_164
+    l32i    a12,a1,64                   # [3]  gra_spill_temp_146
+    l32i    a9,a1,72                    # [4]  gra_spill_temp_148
+    l32i    a11,a1,80                   # [5]  gra_spill_temp_150
+    addi.n  a9,a9,1                 # [6]
+    s32i    a9,a1,72                    # [7]  gra_spill_temp_148
+    sub     a11,a11,a12                 # [8]
+    add.n   a13,a13,a12                 # [9]
+    sub     a14,a14,a12                 # [10]
+    s32i    a14,a1,76                   # [11]  gra_spill_temp_149
+    s32i    a13,a1,136                  # [12]  gra_spill_temp_164
+    s32i    a11,a1,80                   # [13]  gra_spill_temp_150
+    sub     a9,a9,a10                   # [14]
+    beqz    a9,.Lt_8_8194               # [15]
+
+.Lt_8_8706: # 0x129e
+#<loop> Loop body line 933, nesting depth: 1, estimated iterations: 100
+ # 934          const int32_t base_y = (out_y * stride_ht) - pad_ht;
+ # 935          for (int out_x = 0; out_x < out_wd; out_x++) { //width_loop
+    l32i    a15,a1,96                   # [0]  gra_spill_temp_154
+    beqz.n  a15,.Lt_8_8962          # [2]
+
+.LBB6_esp_nn_depthwise_conv_s16_mult8_3x3:  # 0x12a3
+#<loop> Part of loop body line 933, head labeled .Lt_8_8706
+    l32i.n  a3,a1,56                # [0]  gra_spill_temp_144
+    l32i    a8,a1,80                    # [1]  gra_spill_temp_150
+    movi.n  a10,0                   # [2]
+    l32i    a9,a1,76                    # [3]  gra_spill_temp_149
+    movi.n  a11,0                   # [4]
+    l32i.n  a12,a1,52               # [5]  gra_spill_temp_143
+    l32i.n  a13,a1,60               # [6]  gra_spill_temp_145
+    s32i    a13,a1,104                  # [7]  gra_spill_temp_156
+    s32i    a12,a1,140                  # [8]  gra_spill_temp_165
+    s32i    a11,a1,100                  # [9]  gra_spill_temp_155
+    max     a9,a9,a10                   # [10]
+    movi.n  a10,3                   # [11]
+    s32i    a9,a1,172                   # [12]  gra_spill_temp_173
+    min     a8,a8,a10                   # [13]
+    s32i    a8,a1,156                   # [14]  gra_spill_temp_169
+    sub     a8,a8,a9                    # [15]
+    s32i    a8,a1,132                   # [16]  gra_spill_temp_163
+    j       .Lt_8_9474                      # [17]
+
+.Lt_8_9730: # 0x12d3
+#<loop> Part of loop body line 935, head labeled .Lt_8_9474
+    l32i    a15,a1,96                   # [0]  gra_spill_temp_154
+    l32i    a10,a1,104                  # [1]  gra_spill_temp_156
+    l32i    a9,a1,140                   # [2]  gra_spill_temp_165
+    l32i    a8,a1,84                    # [3]  gra_spill_temp_151
+    l32i    a14,a1,100                  # [4]  gra_spill_temp_155
+    sub     a3,a3,a8                    # [5]
+    addi.n  a14,a14,1               # [6]
+    s32i    a14,a1,100                  # [7]  gra_spill_temp_155
+    add.n   a9,a9,a8                    # [8]
+    sub     a10,a10,a8                  # [9]
+    s32i    a10,a1,104                  # [10]  gra_spill_temp_156
+    s32i    a9,a1,140                   # [11]  gra_spill_temp_165
+    beq     a14,a15,.Lt_8_8962          # [12]
+
+.Lt_8_9474: # 0x12f8
+ # 936              const int32_t base_x = (out_x * stride_wd) - pad_wd;
+ # 937              const int32_t *out_mult_ptr = out_mult;
+ # 938              const int32_t *out_shift_ptr = out_shift;
+    l32i    a2,a1,88                    # [0]  gra_spill_temp_152
+    l32i    a10,a1,92                   # [1]  gra_spill_temp_153
+ # 939              uint32_t bias_ptr = (uint32_t) (bias);
+    l32i    a12,a1,160                  # [2]  gra_spill_temp_170
+ # 940
+ # 941              for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop
+    l32i    a11,a1,144                  # [3]  gra_spill_temp_166
+    s32i    a12,a1,168                  # [4]  gra_spill_temp_172
+    beqz.n  a11,.Lt_8_9730          # [5]
+
+.LBB9_esp_nn_depthwise_conv_s16_mult8_3x3:  # 0x1309
+#<loop> Part of loop body line 935, head labeled .Lt_8_9474
+    movi.n  a8,0                    # [0]
+    l32i    a5,a1,104                   # [1]  gra_spill_temp_156
+    movi.n  a13,0                   # [2]
+    movi.n  a9,0                    # [3]
+    s32i    a9,a1,112                   # [4]  gra_spill_temp_158
+    s32i    a13,a1,148                  # [5]  gra_spill_temp_167
+    max     a5,a5,a8                    # [6]
+    j       .Lt_8_10242                     # [7]
+
+.Lt_8_10498:    # 0x131e
+#<loop> Part of loop body line 941, head labeled .Lt_8_10242
+    l32i    a12,a1,144                  # [0]  gra_spill_temp_166
+    l32i    a14,a1,108                  # [1]  gra_spill_temp_157
+    l32i    a11,a1,148                  # [2]  gra_spill_temp_167
+    l32i    a13,a1,112                  # [3]  gra_spill_temp_158
+    addi.n  a11,a11,1               # [4]
+    s32i    a11,a1,148                  # [5]  gra_spill_temp_167
+    add.n   a13,a13,a14                 # [6]
+    s32i    a13,a1,112                  # [7]  gra_spill_temp_158
+    beq     a11,a12,.Lt_8_9730          # [8]
+
+.Lt_8_10242:    # 0x1337
+ # 942                  for (int ch_mult_idx = 0; ch_mult_idx < ch_mult - 7; ch_mult_idx += 8) {
+    l32i    a15,a1,164                  # [0]  gra_spill_temp_171
+    blti    a15,1,.Lt_8_10498           # [2]
+
+    movi.n  a8,0                    # [0]
+    l32i    a9,a1,112                   # [1]  gra_spill_temp_158
+    s32i    a9,a1,188                   # [2]  gra_spill_temp_177
+    s32i    a8,a1,184                   # [3]  gra_spill_temp_176
+    j   .Lt_8_11010                     # [4]
+
+.LBB23_esp_nn_depthwise_conv_s16_mult8_3x3: # 0x134b
+    s32i.n  a10,a1,48               # [0]  gra_spill_temp_142
+    addi    a11,a1,112                  # [1]
+    l32i    a13,a1,152                  # [2]  gra_spill_temp_168
+    l32i    a12,a1,168                  # [3]  gra_spill_temp_172
+    wur.sar_byte    a13                 # [4]
+    ee.vld.128.ip   q4,a12,16           # [5]  id:307
+    ee.vld.128.ip   q7,a12,16           # [6]  id:308
+    ee.vld.128.ip   q5,a12,0            # [7]  id:309
+    s32i    a12,a1,168                  # [8]  gra_spill_temp_172
+    ee.src.q.qup    q6,q4,q7            # [9]
+    ee.vadds.s32    q0,q0,q6            # [10]
+    ee.src.q.qup    q3,q4,q5            # [11]
+    ee.vadds.s32    q1,q1,q3            # [12]
+    st.qr   q1,a11,80                   # [13]  gra_spill_temp_178-112
+
+.Lt_8_13314:    # 0x1374
+ #1025  q0 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q0, out_mult_ptr, out_shift_ptr);
+    l32i.n  a10,a1,48               # [0]  gra_spill_temp_142
+    mov.n   a11,a2                      # [1]
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+ #1026                      out_mult_ptr += 4;
+ #1027                      out_shift_ptr += 4;
+ #1028
+ #1029   q1 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q1, out_mult_ptr, out_shift_ptr);
+    l32i.n  a10,a1,48               # [0]  gra_spill_temp_142
+    addmi   a12,a1,256                  # [1]
+    addi    a11,a1,112                  # [2]
+    st.qr   q0,a12,0                    # [3]  gra_spill_temp_182-256
+    ld.qr   q0,a11,80                   # [4]  gra_spill_temp_178-112
+    addi    a10,a10,16                  # [5]
+    addi    a11,a2,16                   # [6]
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+#<loop> Part of loop body line 942, head labeled .Lt_8_11010
+ #1030                      out_mult_ptr += 4;
+ #1031                      out_shift_ptr += 4;
+    addi    a2,a2,32                    # [0]
+    l32i    a14,a1,164                  # [1]  gra_spill_temp_171
+
+    l32i    a8,a1,176                   # [2]  gra_spill_temp_174
+    l32i    a15,a1,188                  # [3]  gra_spill_temp_177
+    l32i    a13,a1,184                  # [4]  gra_spill_temp_176
+    l32i.n  a10,a1,48               # [5]  gra_spill_temp_142
+    addmi   a11,a1,256                  # [6]
+    addi    a12,a1,112                  # [7]
+    ld.qr   q3,a12,112                  # [8]  gra_spill_temp_180-112
+    ld.qr   q1,a12,96                   # [9]  gra_spill_temp_179-112
+    ld.qr   q2,a11,0                    # [10]  gra_spill_temp_182-256
+    addi    a10,a10,32                  # [11]
+    addi.n  a13,a13,8               # [12]
+    addi.n  a15,a15,8               # [13]
+    s32i    a15,a1,188                  # [14]  gra_spill_temp_177
+    ee.vadds.s32    q2,q2,q1            # [15]
+    s32i    a13,a1,184                  # [16]  gra_spill_temp_176
+    ee.vadds.s32    q1,q0,q1            # [17]
+    ee.vmin.s32     q0,q2,q3            # [18]
+    ld.qr           q2,a11,-16                  # [19]  gra_spill_temp_181-256
+    ee.vmin.s32     q1,q1,q3            # [20]
+    ee.vmax.s32     q1,q1,q2            # [21]
+    ee.vmax.s32     q0,q0,q2            # [22]
+    ee.vunzip.16    q0,q1               # [23]
+    ee.vunzip.8     q0,q1               # [24]
+    ee.vst.l.64.ip  q0,a8,8         # [25]  id:312
+    s32i    a8,a1,176                   # [26]  gra_spill_temp_174
+    bge     a13,a14,.Lt_8_10498         # [27]
+
+.Lt_8_11010:    # 0x13e3
+#<loop> Loop body line 942, nesting depth: 4, estimated iterations: 100
+    l32i    a14,a1,156                  # [0]  gra_spill_temp_169
+    l32i    a13,a1,172                  # [1]  gra_spill_temp_173
+    ee.zero.qacc                    # [2]
+    bge     a13,a14,.Lt_8_11266         # [3]
+
+.LBB15_esp_nn_depthwise_conv_s16_mult8_3x3: # 0x13ef
+#<loop> Part of loop body line 942, head labeled .Lt_8_11010
+    l32i    a12,a1,124                  # [0]  gra_spill_temp_161
+    l32i    a8,a1,140                   # [1]  gra_spill_temp_165
+    l32i    a11,a1,120                  # [2]  gra_spill_temp_160
+    l32i    a14,a1,188                  # [3]  gra_spill_temp_177
+    l32i    a9,a1,136                   # [4]  gra_spill_temp_164
+    mull    a15,a4,a13                  # [5]
+    add.n   a9,a9,a13                   # [6]
+    addx2   a15,a15,a15                 # [7]
+    l32i    a13,a1,148                  # [8]  gra_spill_temp_167
+    add.n   a14,a14,a15                 # [9]
+    mull    a9,a9,a11                   # [10]
+    l32i    a15,a1,144                  # [11]  gra_spill_temp_166
+    add.n   a8,a8,a9                    # [12]
+    mull    a15,a15,a8                  # [13]
+    l32i    a8,a1,128                   # [14]  gra_spill_temp_162
+    add.n   a13,a13,a15                 # [15]
+    l32i    a15,a1,116                  # [16]  gra_spill_temp_159
+    addx2   a14,a14,a8                  # [17]
+    addx2   a13,a13,a15                 # [18]
+    add.n   a11,a12,a13                 # [19]
+    l32i    a15,a1,132                  # [20]  gra_spill_temp_163
+    add.n   a12,a12,a11                 # [21]
+    loopgtz a15,.LBB34_esp_nn_depthwise_conv_s16_mult8_3x3  # [22]
+
+.Lt_8_11778:    # 0x142e
+    mov.n   a15,a14                     # [0]
+    mov.n   a9,a14                      # [1]
+    bnez.n  a5,.Lt_8_12034          # [2]
+
+    ee.vldbc.16     q3,a13              # [0]  id:271
+    mov.n           a9,a14                      # [1]
+    ee.vld.128.ip       q4,a9,0             # [2]  id:272
+    ee.vmulas.s16.qacc  q3,q4       # [4]
+
+.Lt_8_12034:    # 0x143f
+    ee.vldbc.16     q5,a11              # [0]  id:274
+    addx2           a9,a4,a9                    # [1]
+    ee.vld.128.ip   q6,a9,0             # [2]  id:275
+    add.n           a13,a13,a6                  # [3]
+    ee.vmulas.s16.qacc  q5,q6       # [4]
+    blti    a3,3,.Lt_8_12546            # [5]
+
+    ee.vldbc.16     q7,a12              # [0]  id:277
+    addx2           a14,a4,a9                   # [1]
+    ee.vld.128.ip   q0,a14,0            # [2]  id:278
+    ee.vmulas.s16.qacc  q7,q0       # [4]
+
+.Lt_8_12546:    # 0x145c
+#<loop> Part of loop body line 953, head labeled .Lt_8_11778
+    add.n   a11,a11,a6                  # [0]
+    add.n   a12,a12,a6                  # [1]
+    add.n   a14,a7,a15                  # [2]
+
+.LBB34_esp_nn_depthwise_conv_s16_mult8_3x3: # 0x1464
+.Lt_8_11266:    # 0x1464
+
+    l32i    a8,a1,180                   # [0]  gra_spill_temp_175
+    ee.st.qacc_l.l.128.ip   a8,16       # [2]  id:280
+    ee.st.qacc_l.h.32.ip    a8,0        # [3]  id:281
+    l16ui   a9,a1,10                    # [4]  qacc_scratch+10
+    l8ui    a11,a1,15                   # [5]  qacc_scratch+15
+    l8ui    a12,a1,5                    # [6]  qacc_scratch+5
+    l8ui    a13,a1,6                    # [7]  qacc_scratch+6
+    l8ui    a14,a1,16                   # [8]  qacc_scratch+16
+    s8i     a14,a1,7                    # [9]  qacc_scratch+7
+    s8i     a13,a1,3                    # [10]  qacc_scratch+3
+    s8i     a12,a1,2                    # [11]  qacc_scratch+2
+    s8i     a11,a1,6                    # [12]  qacc_scratch+6
+    s16i    a9,a1,4                     # [13]  qacc_scratch+4
+    ee.st.qacc_h.l.128.ip   a8,16       # [14]  id:291
+    ee.st.qacc_h.h.32.ip    a8,-32      # [15]  id:292
+    l16ui   a9,a1,16                    # [16]  qacc_scratch+16
+    l8ui    a15,a1,32                   # [17]  qacc_scratch+32
+    l8ui    a12,a1,22                   # [18]  qacc_scratch+22
+    l8ui    a11,a1,21                   # [19]  qacc_scratch+21
+    l8ui    a14,a1,31                   # [20]  qacc_scratch+31
+    l16ui   a13,a1,26                   # [21]  qacc_scratch+26
+    s16i    a13,a1,12                   # [22]  qacc_scratch+12
+    s8i 	a14,a1,14                   # [23]  qacc_scratch+14
+    s8i 	a11,a1,10                   # [24]  qacc_scratch+10
+    s8i 	a12,a1,11                   # [25]  qacc_scratch+11
+    s8i 	a15,a1,15                   # [26]  qacc_scratch+15
+    s16i    a9,a1,8                     # [27]  qacc_scratch+8
+    l32i    a15,a1,160                  # [28]  gra_spill_temp_170
+    movi.n  a9,16                   # [29]
+    ee.srcmb.s16.qacc   q1,a9,0         # [30]
+    ee.vld.128.ip   q0,a8,0             # [31]  id:304
+    s32i    a8,a1,180                   # [32]  gra_spill_temp_175
+    ee.vzip.16  q0,q1               # [33]
+    bnez.n  a15,.LBB23_esp_nn_depthwise_conv_s16_mult8_3x3  # [34]
+
+    s32i.n  a10,a1,48               # [0]  gra_spill_temp_142
+    addi    a15,a1,112                  # [1]
+    st.qr   q1,a15,80                   # [2]  gra_spill_temp_178-112
+    j   .Lt_8_13314                     # [3]
+
+.Lt_8_8194: # 0x14d3
+    retw.n                          # [0]
+
+    .size   esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3, . - esp_nn_depthwise_conv_s16_mult8_3x3_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult8_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult8_esp32s3.S
new file mode 100644
index 0000000..4f9143b
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s16_mult8_esp32s3.S
@@ -0,0 +1,432 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+    .text
+    .literal_position
+
+    # Program Unit: esp_nn_depthwise_conv_s16_mult8_esp32s3
+    .type   esp_nn_depthwise_conv_s16_mult8_esp32s3, @function
+    .align   4
+    .global esp_nn_depthwise_conv_s16_mult8_esp32s3
+
+esp_nn_depthwise_conv_s16_mult8_esp32s3:    # 0x14d7
+    # qacc_scratch = 0
+    # gra_spill_temp_183 = 48
+    # gra_spill_temp_184 = 52
+    # gra_spill_temp_185 = 56
+    # gra_spill_temp_186 = 60
+    # gra_spill_temp_187 = 64
+    # gra_spill_temp_188 = 68
+    # gra_spill_temp_189 = 72
+    # gra_spill_temp_190 = 76
+    # gra_spill_temp_191 = 80
+    # gra_spill_temp_192 = 84
+    # gra_spill_temp_193 = 88
+    # gra_spill_temp_194 = 92
+    # gra_spill_temp_195 = 96
+    # gra_spill_temp_196 = 100
+    # gra_spill_temp_197 = 104
+    # gra_spill_temp_198 = 108
+    # gra_spill_temp_199 = 112
+    # gra_spill_temp_200 = 116
+    # gra_spill_temp_201 = 120
+    # gra_spill_temp_202 = 124
+    # gra_spill_temp_203 = 128
+    # gra_spill_temp_204 = 132
+    # gra_spill_temp_205 = 136
+    # gra_spill_temp_206 = 140
+    # gra_spill_temp_207 = 144
+    # gra_spill_temp_208 = 148
+    # gra_spill_temp_209 = 152
+    # gra_spill_temp_210 = 156
+    # gra_spill_temp_211 = 160
+    # gra_spill_temp_212 = 164
+    # gra_spill_temp_213 = 168
+    # gra_spill_temp_214 = 172
+    # gra_spill_temp_215 = 176
+    # gra_spill_temp_216 = 180
+    # gra_spill_temp_217 = 184
+    # gra_spill_temp_218 = 192
+    # gra_spill_temp_219 = 208
+
+ // registers:
+ // a2: const int16_t *input_data
+ // a3: const uint16_t input_wd
+ // a4: const uint16_t input_ht
+ // a5: const uint16_t channels
+ // a6: const uint16_t pad_wd
+ // a7: const uint16_t pad_ht
+
+ // on stack:
+ // const uint16_t stride_wd
+ // const uint16_t stride_ht
+ // const uint16_t ch_mult
+ // const int16_t *filter_data
+ // const uint16_t filter_wd
+ // const uint16_t filter_ht
+ // const int32_t *bias
+ // int8_t *out_data
+ // const uint16_t out_wd
+ // const uint16_t out_ht
+ // const int32_t out_offset
+ // const int32_t *out_shift
+ // const int32_t *out_mult
+ // const int32_t activation_min
+ // const int32_t activation_max
+
+    entry   a1,256                      #
+    s32i    a2,a1,144                   # [0]  gra_spill_temp_207
+    s32i.n  a4,a1,56                # [1]  gra_spill_temp_185
+    s32i    a5,a1,172                   # [2]  gra_spill_temp_214
+    l32i    a9,a1,284                   # [3]  id:241 out_data+0x0
+
+    l16ui   a8,a1,292                   # [4]  id:242 out_ht+0x0
+    s32i    a8,a1,64                    # [5]  gra_spill_temp_187
+    s32i    a9,a1,124                   # [6]  gra_spill_temp_202
+    beqz.n  a8,.Lt_9_8450           # [7]
+
+    s32i    a1,a1,128                   # [0]  gra_spill_temp_203
+    neg     a13,a7                      # [1]
+    movi.n  a4,0                    # [2]
+    neg     a12,a6                      # [3]
+    l32i    a9,a1,280                   # [4]  id:243 bias+0x0
+    slli    a11,a5,1                    # [5]
+    l16ui   a10,a1,264                  # [6]  id:244 ch_mult+0x0
+    l32i    a14,a1,268                  # [7]  id:245 filter_data+0x0
+    s32i    a14,a1,160                  # [8]  gra_spill_temp_211
+    s32i    a10,a1,92                   # [9]  gra_spill_temp_194
+    s32i    a11,a1,156                  # [10]  gra_spill_temp_210
+    s32i    a9,a1,112                   # [11]  gra_spill_temp_199
+    sext    a12,a12,15                  # [12]
+    s32i    a4,a1,68                    # [13]  gra_spill_temp_188
+    sext    a13,a13,15                  # [14]
+    l16ui   a4,a1,272                   # [15]  id:246 filter_wd+0x0
+    s32i    a13,a1,100                  # [16]  gra_spill_temp_196
+    s32i.n  a12,a1,48               # [17]  gra_spill_temp_183
+    mul16u  a8,a5,a10               # [18]
+    extui   a9,a9,0,4                   # [19]
+    l32i    a11,a1,304                  # [20]  id:249 out_mult+0x0
+    s32i    a11,a1,80                   # [21]  gra_spill_temp_191
+    s32i    a9,a1,104                   # [22]  gra_spill_temp_197
+    s32i    a8,a1,148                   # [23]  gra_spill_temp_208
+    addi    a10,a10,-7                  # [24]
+    l32i    a12,a1,300                  # [25]  id:248 out_shift+0x0
+    l16ui   a13,a1,256                  # [26]  id:247 stride_wd+0x0
+    s32i    a13,a1,72                   # [27]  gra_spill_temp_189
+    s32i    a12,a1,76                   # [28]  gra_spill_temp_190
+    s32i    a10,a1,116                  # [29]  gra_spill_temp_200
+    slli    a8,a8,1                     # [30]
+    l16ui   a9,a1,260                   # [31]  id:251 stride_ht+0x0
+    s32i.n  a9,a1,60                # [32]  gra_spill_temp_186
+    s32i    a8,a1,152                   # [33]  gra_spill_temp_209
+    l16ui   a10,a1,276                  # [34]  id:250 filter_ht+0x0
+    s32i.n  a10,a1,52               # [35]  gra_spill_temp_184
+    l16ui   a8,a1,288                   # [36]  id:252 out_wd+0x0
+    s32i    a8,a1,84                    # [37]  gra_spill_temp_192
+    j       .Lt_9_8962                      # [38]
+
+.Lt_9_9218: # 0x1561
+#<loop> Part of loop body line 1083, head labeled .Lt_9_8962
+    l32i    a15,a1,64                   # [0]  gra_spill_temp_187
+    l32i.n  a9,a1,60                # [1]  gra_spill_temp_186
+    l32i    a14,a1,68                   # [2]  gra_spill_temp_188
+    l32i    a8,a1,100                   # [3]  gra_spill_temp_196
+    addi.n  a14,a14,1               # [4]
+    s32i    a14,a1,68                   # [5]  gra_spill_temp_188
+    add.n   a9,a8,a9                    # [6]
+    sub     a14,a14,a15                 # [7]
+    sext    a8,a9,15                    # [8]
+    s32i    a8,a1,100                   # [9]  gra_spill_temp_196
+    beqz    a14,.Lt_9_8450              # [10]
+
+.Lt_9_8962: # 0x157f
+    l32i    a10,a1,84                   # [0]  gra_spill_temp_192
+    beqz.n  a10,.Lt_9_9218          # [2]
+
+    l32i.n  a7,a1,52                # [0]  gra_spill_temp_184
+    movi.n  a11,0                   # [1]
+    l32i.n  a8,a1,56                # [2]  gra_spill_temp_185
+    l32i    a9,a1,100                   # [3]  gra_spill_temp_196
+    l32i.n  a12,a1,48               # [4]  gra_spill_temp_183
+    s32i    a12,a1,168                  # [5]  gra_spill_temp_213
+    neg     a10,a9                      # [6]
+    sub     a8,a8,a9                    # [7]
+    max     a10,a10,a11                 # [8]
+    s32i    a10,a1,108                  # [9]  gra_spill_temp_198
+    min     a7,a7,a8                    # [10]
+    movi.n  a11,0                   # [11]
+    s32i    a11,a1,88                   # [12]  gra_spill_temp_193
+    j       .Lt_9_9730                      # [13]
+
+.Lt_9_9986: # 0x15a9
+#<loop> Part of loop body line 1085, head labeled .Lt_9_9730
+    l32i    a13,a1,84                   # [0]  gra_spill_temp_192
+    l32i    a15,a1,72                   # [1]  gra_spill_temp_189
+    l32i    a12,a1,88                   # [2]  gra_spill_temp_193
+    l32i    a14,a1,168                  # [3]  gra_spill_temp_213
+    addi.n  a12,a12,1               # [4]
+    s32i    a12,a1,88                   # [5]  gra_spill_temp_193
+    add.n   a15,a14,a15                 # [6]
+    sext    a14,a15,15                  # [7]
+    s32i    a14,a1,168                  # [8]  gra_spill_temp_213
+    beq     a12,a13,.Lt_9_9218          # [9]
+
+.Lt_9_9730: # 0x15c5
+#<loop> Loop body line 1085, nesting depth: 2, estimated iterations: 100
+ #1086              const int16_t base_x = (out_x * stride_wd) - pad_wd;
+ #1087              const int32_t *out_mult_ptr = out_mult;
+ #1088              const int32_t *out_shift_ptr = out_shift;
+ #1089              uint32_t bias_ptr = (uint32_t) (bias);
+ #1090              for (int ch_idx = 0; ch_idx < channels; ch_idx++) {//channel_loop
+    l32i    a8,a1,172                   # [0]  gra_spill_temp_214
+    l32i    a9,a1,80                    # [1]  gra_spill_temp_191
+    l32i    a10,a1,76                   # [2]  gra_spill_temp_190
+    l32i    a11,a1,112                  # [3]  gra_spill_temp_199
+    s32i    a11,a1,120                  # [4]  gra_spill_temp_201
+    s32i    a10,a1,140                  # [5]  gra_spill_temp_206
+    s32i    a9,a1,136                   # [6]  gra_spill_temp_205
+    beqz.n  a8,.Lt_9_9986           # [7]
+
+.LBB9_esp_nn_depthwise_conv_s16_mult8:  # 0x15dc
+#<loop> Part of loop body line 1085, head labeled .Lt_9_9730
+    movi.n  a8,0                    # [0]
+    l32i    a5,a1,168                   # [1]  gra_spill_temp_213
+    movi.n  a13,0                   # [2]
+    movi.n  a14,0                   # [3]
+    s32i    a14,a1,96                   # [4]  gra_spill_temp_195
+    s32i    a13,a1,184                  # [5]  gra_spill_temp_217
+    neg     a6,a5                       # [6]
+    max     a6,a6,a8                    # [7]
+    sub     a5,a3,a5                    # [8]
+    min     a5,a4,a5                    # [9]
+    sub     a12,a5,a6                   # [10]
+    s32i    a12,a1,164                  # [11]  gra_spill_temp_212
+    j       .Lt_9_10498                     # [12]
+
+.Lt_9_10754:    # 0x1600
+#<loop> Part of loop body line 1090, head labeled .Lt_9_10498
+    l32i    a10,a1,172                  # [0]  gra_spill_temp_214
+    l32i    a12,a1,92                   # [1]  gra_spill_temp_194
+    l32i    a9,a1,184                   # [2]  gra_spill_temp_217
+    l32i    a11,a1,96                   # [3]  gra_spill_temp_195
+    addi.n  a9,a9,1                 # [4]
+    s32i    a9,a1,184                   # [5]  gra_spill_temp_217
+    add.n   a11,a11,a12                 # [6]
+    s32i    a11,a1,96                   # [7]  gra_spill_temp_195
+    beq     a9,a10,.Lt_9_9986           # [8]
+
+.Lt_9_10498:    # 0x1619
+#<loop> Loop body line 1090, nesting depth: 3, estimated iterations: 100
+ #1091                  for (int ch_mult_idx = 0; ch_mult_idx < ch_mult - 7; ch_mult_idx += 8) {
+    l32i    a13,a1,116                  # [0]  gra_spill_temp_200
+    blti    a13,1,.Lt_9_10754           # [2]
+
+.LBB12_esp_nn_depthwise_conv_s16_mult8: # 0x161f
+#<loop> Part of loop body line 1090, head labeled .Lt_9_10498
+    l32i    a2,a1,96                    # [0]  gra_spill_temp_195
+    movi.n  a14,0                   # [1]
+    s32i    a14,a1,132                  # [2]  gra_spill_temp_204
+    j       .Lt_9_11266                     # [3]
+
+.Lt_9_11522:    # 0x162a
+    l32i    a9,a1,128                   # [0]  gra_spill_temp_203
+    ee.st.qacc_l.l.128.ip   a9,16       # [2]  id:257
+    ee.st.qacc_l.h.32.ip    a9,0        # [3]  id:258
+    l8ui    a10,a1,15                   # [4]  qacc_scratch+15
+    l16ui   a8,a1,10                    # [5]  qacc_scratch+10
+    l8ui    a13,a1,16                   # [6]  qacc_scratch+16
+    l8ui    a12,a1,6                    # [7]  qacc_scratch+6
+    l8ui    a11,a1,5                    # [8]  qacc_scratch+5
+    s8i     a11,a1,2                    # [9]  qacc_scratch+2
+    s8i     a12,a1,3                    # [10]  qacc_scratch+3
+    s8i     a13,a1,7                    # [11]  qacc_scratch+7
+    s16i    a8,a1,4                     # [12]  qacc_scratch+4
+    s8i     a10,a1,6                    # [13]  qacc_scratch+6
+
+    movi.n  a8,16                   # [14]
+    ee.st.qacc_h.l.128.ip   a9,16       # [15]  id:268
+    ee.st.qacc_h.h.32.ip    a9,-32      # [16]  id:269
+    ee.srcmb.s16.qacc   q1,a8,0         # [17]
+    l16ui   a13,a1,26                   # [18]  qacc_scratch+26
+    l8ui    a15,a1,32                   # [19]  qacc_scratch+32
+    l8ui    a12,a1,22                   # [20]  qacc_scratch+22
+    l8ui    a11,a1,21                   # [21]  qacc_scratch+21
+    l16ui   a10,a1,16                   # [22]  qacc_scratch+16
+    l8ui    a14,a1,31                   # [23]  qacc_scratch+31
+    s8i     a14,a1,14                   # [24]  qacc_scratch+14
+    s16i    a10,a1,8                    # [25]  qacc_scratch+8
+    s8i     a11,a1,10                   # [26]  qacc_scratch+10
+    s8i     a12,a1,11                   # [27]  qacc_scratch+11
+    s8i     a15,a1,15                   # [28]  qacc_scratch+15
+    s16i    a13,a1,12                   # [29]  qacc_scratch+12
+ #1138                      EE_VZIP_16(q0, q1); /* 4x32 */
+ #1139
+ #1140                      if (bias) {
+    l32i            a15,a1,112                  # [30]  gra_spill_temp_199
+    ee.vld.128.ip   q0,a9,0             # [31]  id:281
+    s32i            a9,a1,128                   # [32]  gra_spill_temp_203
+    ee.vzip.16      q0,q1               # [33]
+    beqz.n          a15,.Lt_9_13570         # [34]
+
+.LBB23_esp_nn_depthwise_conv_s16_mult8: # 0x168e
+#<loop> Part of loop body line 1091, head labeled .Lt_9_11266
+    addi            a14,a1,112                  # [0]
+    l32i            a8,a1,104                   # [1]  gra_spill_temp_197
+    l32i            a15,a1,120                  # [2]  gra_spill_temp_201
+    wur.sar_byte    a8                  # [3]
+    ee.vld.128.ip   q3,a15,16           # [4]  id:284
+    ee.vld.128.ip   q6,a15,16           # [5]  id:285
+    ee.vld.128.ip   q4,a15,0            # [6]  id:286
+    s32i            a15,a1,120                  # [7]  gra_spill_temp_201
+    ee.src.q.qup    q5,q3,q6            # [8]
+    ee.vadds.s32    q0,q0,q5            # [9]
+    ee.src.q.qup    q2,q3,q4            # [10]
+    ee.vadds.s32    q1,q1,q2            # [11]
+    st.qr           q1,a14,96                   # [12]  gra_spill_temp_219-112
+
+.Lt_9_13570:    # 0x16b5
+ #1158  q0 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q0, out_mult_ptr, out_shift_ptr);
+    l32i    a10,a1,136                  # [0]  gra_spill_temp_205
+    l32i    a11,a1,140                  # [1]  gra_spill_temp_206
+    addi    a9,a1,112                   # [2]
+    st.qr   q1,a9,96                    # [3]  gra_spill_temp_219-112
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+ #1159                      out_mult_ptr += 4;
+ #1160                      out_shift_ptr += 4;
+ #1161
+ #1162  q1 = esp_nn_multiply_by_quantized_mult_ver1_esp32s3(q1, out_mult_ptr, out_shift_ptr);
+    l32i    a11,a1,140                  # [0]  gra_spill_temp_206
+    addi    a12,a1,112                  # [1]
+    l32i    a10,a1,136                  # [2]  gra_spill_temp_205
+    st.qr   q0,a12,80                   # [3]  gra_spill_temp_218-112
+    ld.qr   q0,a12,96                   # [4]  gra_spill_temp_219-112
+    addi    a10,a10,16                  # [5]
+    addi    a11,a11,16                  # [6]
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+    addi.n  a2,a2,8                 # [0]
+    l32i    a14,a1,116                  # [1]  gra_spill_temp_200
+    l32i    a15,a1,124                  # [2]  gra_spill_temp_202
+    l32i    a13,a1,132                  # [3]  gra_spill_temp_204
+    l32i    a10,a1,140                  # [4]  gra_spill_temp_206
+    l32i    a11,a1,136                  # [5]  gra_spill_temp_205
+    addmi   a9,a1,256                   # [6]
+    addi    a8,a1,112                   # [7]
+    ld.qr   q7,a8,80                    # [8]  gra_spill_temp_218-112
+    addi    a9,a9,56                    # [9]
+    ee.vldbc.32 q2,a9               # [10]  id:290 activation_max
+    addi    a11,a11,32                  # [11]
+    addi    a10,a10,32                  # [12]
+    addi.n  a13,a13,8               # [13]
+    s32i    a13,a1,132                  # [14]  gra_spill_temp_204
+    s32i    a10,a1,140                  # [15]  gra_spill_temp_206
+    s32i    a11,a1,136                  # [16]  gra_spill_temp_205
+    addmi   a10,a1,256                  # [17]
+    addmi   a11,a1,256                  # [18]
+    addi    a11,a11,52                  # [19]
+    addi    a10,a10,40                  # [20]
+    ee.vldbc.32     q3,a10              # [21]  id:289 out_offset
+    ee.vldbc.32     q1,a11              # [22]  id:291 activation_min
+    ee.vadds.s32    q0,q0,q3            # [23]
+    ee.vadds.s32    q7,q7,q3            # [24]
+    ee.vmin.s32     q7,q7,q2            # [25]
+    ee.vmin.s32     q0,q0,q2            # [26]
+    ee.vmax.s32     q0,q0,q1            # [27]
+    ee.vmax.s32     q7,q7,q1            # [28]
+    ee.vunzip.16    q7,q0               # [29]
+    ee.vunzip.8     q7,q0               # [30]
+    ee.vst.l.64.ip  q7,a15,8        # [31]  id:292
+    s32i            a15,a1,124                  # [32]  gra_spill_temp_202
+    bge             a13,a14,.Lt_9_10754         # [33]
+
+.Lt_9_11266:    # 0x1740
+
+    ee.zero.qacc                    # [0]
+    l32i    a12,a1,108                  # [1]  gra_spill_temp_198
+    s32i    a12,a1,180                  # [2]  gra_spill_temp_216
+    bge a12,a7,.Lt_9_11522          # [3]
+
+    mull    a15,a12,a4                  # [0]
+    l32i    a14,a1,100                  # [1]  gra_spill_temp_196
+    add.n   a8,a15,a5                   # [2]
+    add.n   a14,a14,a12                 # [3]
+    mull    a14,a3,a14                  # [4]
+    s32i    a8,a1,176                   # [5]  gra_spill_temp_215
+    bge     a6,a5,.Lt_9_12290           # [6]
+
+.LBB18_esp_nn_depthwise_conv_s16_mult8: # 0x175f
+#<loop> Part of loop body line 1091, head labeled .Lt_9_11266
+    l32i    a10,a1,184                  # [0]  gra_spill_temp_217
+    l32i    a11,a1,172                  # [1]  gra_spill_temp_214
+    l32i    a12,a1,168                  # [2]  gra_spill_temp_213
+    l32i    a8,a1,148                   # [3]  gra_spill_temp_208
+    add.n   a9,a15,a6                   # [4]
+    mull    a8,a8,a9                    # [5]
+    add.n   a12,a12,a6                  # [6]
+    l32i    a9,a1,160                   # [7]  gra_spill_temp_211
+    add.n   a12,a14,a12                 # [8]
+    mull    a11,a11,a12                 # [9]
+    add.n   a8,a2,a8                    # [10]
+    l32i    a12,a1,156                  # [11]  gra_spill_temp_210
+    addx2   a8,a8,a9                    # [12]
+    add.n   a10,a10,a11                 # [13]
+    l32i    a11,a1,144                  # [14]  gra_spill_temp_207
+    l32i    a9,a1,164                   # [15]  gra_spill_temp_212
+    addx2   a10,a10,a11                 # [16]
+    l32i    a11,a1,152                  # [17]  gra_spill_temp_209
+    loopgtz a9,.LBB45_esp_nn_depthwise_conv_s16_mult8   # [18]
+
+    mov.n           a9,a8                       # [0*II+0]
+    ee.vldbc.16     q0,a10              # [0*II+1]  id:255
+    ee.vld.128.ip   q1,a9,0             # [0*II+2]  id:254
+    add.n           a10,a10,a12                 # [0*II+3]
+    add.n           a8,a8,a11                   # [0*II+4]
+    ee.vmulas.s16.qacc  q0,q1       # [0*II+5]
+
+.LBB45_esp_nn_depthwise_conv_s16_mult8: # 0x17a2
+
+.Lt_9_12290:    # 0x17a2
+
+    add.n   a14,a14,a3                  # [0]
+    add.n   a15,a15,a4                  # [1]
+    l32i    a10,a1,180                  # [2]  gra_spill_temp_216
+    l32i    a11,a1,176                  # [3]  gra_spill_temp_215
+    addi.n  a10,a10,1               # [4]
+    add.n   a11,a11,a4                  # [5]
+    s32i    a11,a1,176                  # [6]  gra_spill_temp_215
+    s32i    a10,a1,180                  # [7]  gra_spill_temp_216
+    sub     a10,a7,a10                  # [8]
+    beqz    a10,.Lt_9_11522             # [9]
+
+.Lt_9_12034:    # 0x17bc
+    blt     a6,a5,.LBB18_esp_nn_depthwise_conv_s16_mult8    # [0]
+
+    j       .Lt_9_12290                     # [0]
+
+.Lt_9_8450: # 0x17c2
+    retw.n                          # [0]
+
+    .size   esp_nn_depthwise_conv_s16_mult8_esp32s3, . - esp_nn_depthwise_conv_s16_mult8_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3.S
new file mode 100644
index 0000000..c9240d4
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/convolution/esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3.S
@@ -0,0 +1,512 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+    .text
+    .literal_position
+
+// processes multiple of 16 channels
+// already padded version. no additional padding needed
+// simply keep sliding filter window by stride_size
+
+    # Program Unit: esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3
+    .type   esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3, @function
+    .align   4
+    .global esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3
+
+esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3:  # 0xccc
+    # qacc_scratch = 0
+    # gra_spill_temp_103 = 40 // stride_wd*channels
+    # gra_spill_temp_104 = 44 // bias_align
+    # gra_spill_temp_107 = 48 // input_offset
+    # gra_spill_temp_105 = 52 // out_mult_ptr
+    # gra_spill_temp_106 = 56 // out_shift_ptr
+    # gra_spill_temp_108 = 60 // ch_idx
+    # gra_spill_temp_109 = 64 // out_ch
+    # gra_spill_temp_110 = 68 // bias_ptr
+    # gra_spill_temp_111 = 72 // 2 * (input_wd * channels)
+    # gra_spill_temp_112 = 76 // input_data
+    # gra_spill_temp_118 = 96
+    # gra_spill_temp_119 = 100
+    # gra_spill_temp_120 = 104
+    # gra_spill_temp_121 = 108
+    # gra_spill_temp_113 = 112 // input_wd * channels
+    # gra_spill_temp_114 = 116 // input_wd
+    # gra_spill_temp_130 = 120
+
+    # gra_spill_temp_141 = 0
+    # gra_spill_temp_120 = 16
+    # gra_spill_temp_137 = 80
+
+// offset+bias factor
+    # gra_spill_temp_134 = 128 //256-128
+    # gra_spill_temp_135 = 144 //256-112
+    # gra_spill_temp_133 = 160 //256-96
+    # gra_spill_temp_132 = 176 //256-80
+
+
+ // registers:
+ // a2: input_data
+ // a3: input_wd
+ // a4: input_ht
+ // a5: channels
+ // a6: input_offset
+ // a7: stride_wd
+
+ // on stack:
+
+ // 320: stride_ht
+ // 324: filter_data
+ // 328: *bias
+ // 332: *out_data
+ // 336: out_wd
+ // 340: out_ht
+ // 344: out_offset
+ // 348: *out_shift
+ // 352: *out_mult
+ // 356: activation_min
+ // 360: activation_max
+
+    entry   a1,320                      #
+    mul16u  a7,a7,a5
+    s32i    a3,a1,116                   # [0]  gra_spill_temp_114, input_wd
+    s32i    a6,a1,48                    # [1]  gra_spill_temp_107, input_offset
+    s32i    a7,a1,40                    # gra_spill_temp_103, stride_wd*channels
+
+    addi    a8,a5,-15                   # [2]
+    s32i    a2,a1,76                    # [3]  gra_spill_temp_112, input_data
+    l32i    a9,a1,328                   # [4]  id:664 bias+0x0
+    mov.n   a2,a5                       # [5]
+    s32i    a8,a1,64                    # [7]  gra_spill_temp_109
+    s32i    a9,a1,68                    # [8]  gra_spill_temp_110, bias_ptr
+    blti    a8,1,.Lt_7_4610             # [9]
+
+    l32i    a12,a1,348                  # [4]  id:666 out_shift+0x0
+    mul16u  a15,a3,a5               # [1]
+    movi.n  a9,0                    # [13]
+    s32i    a12,a1,56                   # [9]  gra_spill_temp_106 // out_shift_ptr
+    s32i    a9,a1,60                    # [14]  gra_spill_temp_108, ch_idx
+    s32i    a15,a1,112                  # [12]  gra_spill_temp_113, input_wd*channels
+    l32i    a9,a1,352                   # [24]  id:665 out_mult+0x0
+    slli    a15,a15,1                   # [15]
+    s32i    a15,a1,72                   # [23]  gra_spill_temp_111, 2 * (input_wd * channels)
+    s32i    a9,a1,52                    # [25]  gra_spill_temp_105, out_mult_ptr
+
+// outer most out_ch loop
+.Lt_7_5122: # 0xd57
+    l32i            a13,a1,324                  # [1]  filter_data
+    l32i            a6,a1,60                    # [2]  gra_spill_temp_108, ch_idx
+    l32i            a9,a1,48                    # [0]  gra_spill_temp_107, input_offset
+    ee.zero.q       q2                      # [3]
+    add.n           a13,a6,a13                  # [4]
+    s32i            a13,a1,108                  # [5]  gra_spill_temp_121
+
+// multiply accumulate filter points
+    ee.vld.128.xp   q1,a13,a2           # [6]  id:673
+    ee.vld.128.xp   q3,a13,a2           # [7]  id:674
+    ee.vcmp.lt.s8   q0,q1,q2            # [8]
+    ee.vcmp.lt.s8   q4,q3,q2            # [9]
+    ee.vzip.8       q1,q0                   # [10]
+    ee.vzip.8       q3,q4                   # [11]
+    ee.vadds.s16    q0,q0,q4            # [12]
+    ee.vld.128.xp   q4,a13,a2           # [13]  id:675
+    ee.vadds.s16    q1,q1,q3            # [14]
+    ee.vcmp.lt.s8   q3,q4,q2            # [15]
+    ee.vzip.8       q4,q3                   # [16]
+    ee.vadds.s16    q1,q1,q4            # [17]
+    ee.vld.128.xp   q4,a13,a2           # [18]  id:676
+    ee.vadds.s16    q0,q0,q3            # [19]
+    ee.vcmp.lt.s8   q3,q4,q2            # [20]
+    ee.vzip.8       q4,q3                   # [21]
+    ee.vadds.s16    q0,q0,q3            # [22]
+    ee.vld.128.xp   q3,a13,a2           # [23]  id:677
+    ee.vadds.s16    q1,q1,q4            # [24]
+    ee.vcmp.lt.s8   q4,q3,q2            # [25]
+    ee.vzip.8       q3,q4                   # [26]
+    ee.vadds.s16    q1,q1,q3            # [27]
+    ee.vld.128.xp   q3,a13,a2           # [28]  id:678
+    ee.vadds.s16    q0,q0,q4            # [29]
+    ee.vcmp.lt.s8   q4,q3,q2            # [30]
+    ee.vzip.8       q3,q4                   # [31]
+    ee.vadds.s16    q0,q0,q4            # [32]
+    ee.vld.128.xp   q4,a13,a2           # [33]  id:679
+    ee.vadds.s16    q1,q1,q3            # [34]
+    ee.vcmp.lt.s8   q3,q4,q2            # [35]
+    ee.vzip.8       q4,q3                   # [36]
+    ee.vadds.s16    q1,q1,q4            # [37]
+    ee.vld.128.xp   q4,a13,a2           # [38]  id:680
+    ee.vadds.s16    q0,q0,q3            # [39]
+    ee.vcmp.lt.s8   q3,q4,q2            # [40]
+    ee.vzip.8       q4,q3                   # [41]
+    ee.vadds.s16    q0,q0,q3            # [42]
+    ee.vld.128.xp   q3,a13,a2           # [44]  id:681
+    ee.vadds.s16    q1,q1,q4            # [43]
+    ee.vcmp.lt.s8   q2,q3,q2            # [47]
+    ee.vzip.8       q3,q2                   # [48]
+    ee.vadds.s16    q0,q0,q2            # [49]
+    ee.vadds.s16    q1,q1,q3            # [50]
+
+    ee.movi.32.a    q1,a15,1            # [51]
+    ee.movi.32.a    q1,a8,3             # [52]
+    ee.movi.32.a    q0,a10,3            # [54]
+    ee.movi.32.a    q0,a13,1            # [55]
+    srai            a11,a10,16                  # [56]
+    srai            a12,a8,16                   # [57]
+    mull            a12,a9,a12                  # [58]
+    mull            a11,a9,a11                  # [59]
+    sext            a8,a8,15                    # [328]
+    sext            a10,a10,15                  # [61]
+    srai            a14,a13,16                  # [62]
+    mull            a14,a9,a14                  # [63]
+    mull            a10,a9,a10                  # [64]
+    mull            a8,a9,a8                    # [65]
+    sext            a13,a13,15                  # [66]
+    mull            a13,a9,a13                  # [67]
+    ee.movi.32.q    q3,a11,3            # [68]
+    ee.movi.32.q    q4,a12,3            # [69]
+    ee.movi.32.q    q4,a8,2             # [70]
+    ee.movi.32.q    q3,a10,2            # [71]
+    ee.movi.32.a    q1,a11,2            # [72]
+    srai            a12,a11,16                  # [74]
+    srai            a8,a15,16                   # [75]
+    mull            a8,a9,a8                    # [76]
+    mull            a12,a9,a12                  # [77]
+    sext            a15,a15,15                  # [78]
+    sext            a11,a11,15                  # [79]
+    mull            a11,a9,a11                  # [80]
+    mull            a15,a9,a15                  # [81]
+    ee.movi.32.q    q4,a12,1            # [82]
+    ee.movi.32.q    q1,a8,3             # [83]
+    ee.movi.32.q    q1,a15,2            # [84]
+    ee.movi.32.q    q4,a11,0            # [85]
+    ee.movi.32.a    q0,a15,2            # [86]
+    ee.movi.32.q    q0,a14,3            # [88]
+    ee.movi.32.q    q0,a13,2            # [91]
+    srai            a8,a15,16                   # [89]
+    mull            a8,a9,a8                    # [90]
+    sext            a15,a15,15                  # [92]
+    mull            a15,a9,a15                  # [93]
+ # 526  MUL_IN_OFFSET_EXPAND(q_sum2, 0, q_sum2, 0);
+    ee.movi.32.a    q0,a11,0            # [94]
+    srai            a13,a11,16                  # [95]
+    ee.movi.32.q    q3,a8,1             # [96]
+    ee.movi.32.q    q3,a15,0            # [100]
+    sext            a11,a11,15                  # [97]
+    mull            a13,a9,a13                  # [98]
+    l32i            a8,a1,332                   # [99]
+    ee.movi.32.a    q1,a10,0            # [103]
+    ee.movi.32.q    q0,a13,1            # [100]
+    srai            a12,a10,16                  # [105]
+    sext            a10,a10,15                  # [106]
+    mull            a12,a9,a12                  # [107]
+    mull            a10,a9,a10                  # [108]
+    mull            a9,a9,a11                   # [109]
+    ee.movi.32.q    q1,a12,1            # [110]
+    ee.movi.32.q    q1,a10,0            # [111]
+
+    l32i            a11,a1,328      // load bias
+    add.n           a6,a6,a8                    # [102]
+    ee.movi.32.q    q0,a9,0             # [113]
+    beqz.n          a11,.Lt_7_5378          # [114]
+
+// add bias
+    l32i            a8,a1,68                    # [0]  gra_spill_temp_110, bias_ptr
+    extui           a11,a11,0,4                 # [2] // bias_align
+    wur.sar_byte    a11                 # [4]
+    ee.vld.128.ip   q5,a8,16            # [5]  id:683
+    ee.vld.128.ip   q6,a8,16            # [6]  id:684
+    ee.vld.128.ip   q7,a8,16            # [7]  id:685
+    addmi           a10,a1,256                  # [2]
+    ee.src.q.ld.ip  q2,a8,16,q5,q6              # [9]
+    ee.vadds.s32    q1,q1,q5            # [12]
+    ee.src.q.ld.ip  q5,a8,0,q6,q7               # [13]
+    s32i            a8,a1,68                    # [11]  gra_spill_temp_110, bias_ptr
+    ee.vadds.s32    q4,q4,q6            # [18]
+    ee.src.q        q7,q7,q2                # [9]
+    ee.src.q        q2,q2,q5                # [13]
+    ee.vadds.s32    q0,q0,q7            # [12]
+    ee.vadds.s32    q3,q3,q2            # [12]
+.Lt_7_5378: # 0xeef
+
+// store offset+bias factor (q1,q4,q0,q3)
+    st.qr           q4,a10,-112                  # [17]  gra_spill_temp_135-256
+    st.qr           q3,a10,-128                  # [21]  gra_spill_temp_134-256
+    st.qr           q1,a10,-96                  # [7]  gra_spill_temp_133-256
+    st.qr           q0,a10,-80                  # [8]  gra_spill_temp_132-256
+
+// prepare height loop
+    movi.n  a15,0                   # [1]
+    movi.n  a8,0                    # [2]
+    movi.n  a9,0                    # [3]
+    s32i    a9,a1,100                   # [4]  gra_spill_temp_119
+    s32i    a8,a1,104                   # [5]  gra_spill_temp_120
+    s32i    a15,a1,96                  # [6]  gra_spill_temp_118
+
+// height loop
+.Lt_7_6402: # 0xf0c
+    l32i    a4,a1,104                   # [2]  gra_spill_temp_120 // out_y * (input_wd * stride_ht) * channels)
+    l32i    a8,a1,100                   # [3]  gra_spill_temp_119 // initialised to 0 before height loop
+    l32i    a5,a1,76                    # [1]  gra_spill_temp_112, input_data
+    l32i    a3,a1,60                    # [0]  gra_spill_temp_108, ch_idx
+    l32i    a7,a1,112                   # [1]  gra_spill_temp_113, input_wd*channels
+    l32i    a10,a1,336                  # [0]  out_wd
+    add.n   a4,a4,a5                    # [4] // input_data + (out_y * stride_ht) * input_wd * channels
+    mov.n   a5,a8                       # [5] // index
+    add.n   a3,a3,a4                    # [6] // input_row0
+    l32i    a4,a1,72                    # [9]  gra_spill_temp_111, 2 * (input_wd * channels)
+    add.n   a7,a7,a3                    # [7] // input_row1 = (input_wd * channels)
+    add.n   a8,a8,a10                   # [8]
+    s32i    a8,a1,120                   # [10]  gra_spill_temp_130
+    add.n   a4,a4,a3                    # [11] // input_row2
+
+// width loop
+.Lt_7_7170: # 0xf32
+    l32i                    a9,a1,108                   # [3]  gra_spill_temp_121, filter_ptr
+    ee.zero.qacc                    # [2]
+    mov.n                   a12,a3                      # [4]
+    mov.n                   a11,a7                      # [1]
+    mov.n                   a10,a4                      # [0]
+    ee.vld.128.xp           q0,a12,a2           # [5]  id:693
+    ee.vld.128.xp           q6,a12,a2           # [6]  id:695
+    ee.vld.128.xp           q1,a9,a2            # [7]  id:694
+    ee.vld.128.xp           q7,a9,a2            # [8]  id:696
+    ee.vld.128.xp           q5,a9,a2            # [9]  id:698
+    ee.vld.128.xp           q3,a9,a2            # [10]  id:700
+    ee.vmulas.s8.qacc.ld.xp q4,a12,a2,q0,q1     # [11]  id:697
+    ee.vmulas.s8.qacc.ld.xp q2,a11,a2,q6,q7     # [13]  id:699
+    ee.vld.128.xp           q1,a9,a2            # [14]  id:702
+    ee.vmulas.s8.qacc.ld.xp q0,a11,a2,q4,q5     # [15]  id:701
+    ee.vmulas.s8.qacc.ld.xp q6,a11,a2,q2,q3     # [16]  id:703
+    ee.vld.128.xp           q7,a9,a2            # [17]  id:704
+    ee.vld.128.xp           q3,a9,a2            # [18]  id:706
+    ee.vmulas.s8.qacc.ld.xp q0,a10,a2,q0,q1     # [19]  id:705
+    ee.vmulas.s8.qacc.ld.xp q1,a10,a2,q6,q7     # [20]  id:707
+    ee.vmulas.s8.qacc.ld.xp q4,a10,a2,q0,q3     # [21]  id:709
+    ee.vld.128.xp           q6,a9,a2            # [22]  id:708
+    ee.vld.128.xp           q5,a9,a2            # [23]  id:710
+    ee.vmulas.s8.qacc       q1,q6           # [24]
+    ee.vmulas.s8.qacc       q4,q5           # [25]
+
+ // extract data
+    mov     a12,a1      //// scratch
+    ee.st.qacc_l.l.128.ip   a12,16      # [27]  id:713
+    ee.st.qacc_l.h.32.ip    a12,-16     # [28]  id:714
+
+    l32i.n  a9,a1,8                 # [29]  qacc_scratch+8
+    l32i.n  a11,a1,4                # [30]  qacc_scratch+4
+    l32i.n  a15,a1,0                # [31]  qacc_scratch
+    slli    a14,a11,24                  # [32]
+    sext    a8,a15,19                   # [33]
+    slli    a10,a9,16                   # [34]
+    slli    a13,a11,4                   # [35]
+    extui   a9,a9,16,16                 # [36]
+    srai    a13,a13,12                  # [37]
+    extui   a15,a15,20,12               # [39]
+    srai    a14,a14,12                  # [40]
+    srai    a10,a10,12                  # [41]
+    extui   a11,a11,28,4                # [42]
+    or      a10,a10,a11                 # [43]
+    or      a14,a14,a15                 # [44]
+
+// insert to q0
+    ee.movi.32.q    q0,a8,0             # [38]
+    ee.movi.32.q    q0,a14,1            # [45]
+    ee.movi.32.q    q0,a13,2            # [48]
+    ee.movi.32.q    q0,a10,3            # [49]
+
+    l32i.n  a11,a1,16               # [46]  qacc_scratch+16
+    l32i.n  a14,a1,12               # [47]  qacc_scratch+12
+    slli    a13,a11,20                  # [50]
+
+    ee.st.qacc_h.l.128.ip   a12,16      # [51]  id:720
+    ee.st.qacc_h.h.32.ip    a12,-16     # [55]  id:721
+    srai    a11,a11,12                  # [52]
+    srai    a13,a13,12                  # [53]
+    slli    a8,a14,28                   # [54]
+    slli    a15,a14,8                   # [56]
+    srai    a15,a15,12                  # [57]
+    srai    a8,a8,12                    # [59]
+
+    l32i.n          a12,a1,8                # [328]  qacc_scratch+8
+    or              a8,a8,a9                    # [61]
+    extui           a14,a14,24,8                # [62]
+    l32i.n          a9,a1,0                 # [63]  qacc_scratch
+    or              a13,a13,a14                 # [64]
+//insert to q3
+    ee.movi.32.q    q3,a8,0             # [65]
+    ee.movi.32.q    q3,a15,1            # [67]
+    ee.movi.32.q    q3,a13,2            # [69]
+    ee.movi.32.q    q3,a11,3            # [70]
+
+    l32i.n          a14,a1,4                # [66]  qacc_scratch+4
+    sext            a10,a9,19                   # [68]
+    extui           a9,a9,20,12                 # [72]
+    slli            a13,a12,16                  # [73]
+    slli            a8,a14,24                   # [74]
+    extui           a12,a12,16,16               # [75]
+    srai            a13,a13,12                  # [76]
+    srai            a8,a8,12                    # [77]
+    slli            a15,a14,4                   # [78]
+    srai            a15,a15,12                  # [79]
+    or              a8,a8,a9                    # [80]
+    extui           a14,a14,28,4                # [81]
+    l32i.n          a9,a1,12                # [82]  qacc_scratch+12
+    or              a13,a13,a14                 # [83]
+// insert to q1
+    ee.movi.32.q    q1,a10,0            # [71]
+    ee.movi.32.q    q1,a8,1             # [84]
+    ee.movi.32.q    q1,a15,2            # [85]
+    ee.movi.32.q    q1,a13,3            # [88]
+
+// load in_offset+bias factor
+    addmi           a14,a1,256                  # [86]
+    ld.qr           q7,a14,-128                  # [87]  gra_spill_temp_134-256
+    ld.qr           q4,a14,-112                  # [89]  gra_spill_temp_135-256
+    l32i.n          a15,a1,16               # [90]  qacc_scratch+16
+    ld.qr           q2,a14,-96                  # [91]  gra_spill_temp_133-256
+    slli            a11,a9,28                   # [92]
+    slli            a10,a9,8                    # [93]
+    srai            a10,a10,12                  # [94]
+    srai            a11,a11,12                  # [95]
+    extui           a9,a9,24,8                  # [96]
+    or              a11,a11,a12                 # [97]
+    ee.vadds.s32    q0,q0,q2            # [98]
+    slli            a8,a15,20                   # [99]
+    ee.vadds.s32    q3,q3,q4            # [100]
+    st.qr           q3,a1,80                # [101]  gra_spill_temp_137-256
+    srai            a15,a15,12                  # [102]
+    ld.qr           q2,a14,-80                  # [103]  gra_spill_temp_132-256
+    srai            a8,a8,12                    # [105]
+    or              a8,a8,a9                    # [108]
+
+// insert to q6
+    ee.movi.32.q    q6,a11,0            # [100]
+    ee.movi.32.q    q6,a10,1            # [107]
+    ee.movi.32.q    q6,a8,2             # [112]
+    ee.movi.32.q    q6,a15,3            # [113]
+
+    ee.vadds.s32    q1,q1,q2            # [110]
+    ee.vadds.s32    q6,q6,q7            # [114]
+    st.qr           q1,a1,16                   # [111]  gra_spill_temp_120
+    s32i.n          a7,a1,32                # [0] // tmp
+    s32i.n          a6,a1,36                # [106] // tmp
+    l32i            a7,a1,52                # [109]  gra_spill_temp_105, out_mult_ptr
+    l32i            a6,a1,56                # [106]  gra_spill_temp_106, out_shift_ptr
+    addi.n          a10,a7,0
+    addi.n          a11,a6,0
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3     # [116]  esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+    mv.qr       q5,q0
+    ld.qr       q0,a1,80                # [4]  gra_spill_temp_137-256
+    addi.n      a10,a7,16
+    addi.n      a11,a6,16
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3     # [5]  esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+    mv.qr       q4,q0
+    ld.qr       q0,a1,16                   # [5]  gra_spill_temp_120
+    addi.n      a10,a7,32
+    addi.n      a11,a6,32
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3     # [6]  esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+    st.qr       q0,a1,0                 # [3]  gra_spill_temp_141
+    mv.qr       q0,q6
+    addi.n      a10,a7,48
+    addi.n      a11,a6,48
+    call8   esp_nn_multiply_by_quantized_mult_ver1_esp32s3     # [6]  esp_nn_multiply_by_quantized_mult_ver1_esp32s3
+
+
+    l32i.n  a6,a1,36                # [106]  // tmp
+    l32i.n  a7,a1,32                # [0]  // tmp
+    l32i    a15,a1,40                   # gra_spill_temp_103, stride_wd * channels
+    l32i    a11,a1,120                  # [3]  gra_spill_temp_130
+
+    add.n   a3,a3,a15                   # [0]
+    add.n   a4,a4,a15                   # [1]
+    add.n   a7,a7,a15                   # [2]
+    addi.n  a5,a5,1                 # [4]
+
+ // add offset, apply activation and store
+    addmi   a13,a1,256                  # [8]
+    ld.qr   q3,a1,0                 # [10]  gra_spill_temp_141
+    mv.qr   q2,q5
+    addi    a8,a13,88                   # [14]
+    addi    a9,a13,100                  # [15]
+    addi    a15,a13,104                 # [13]
+    ee.vldbc.32     q6,a9               # [17]  id:723 activation_min
+    ee.vldbc.32     q1,a8               # [18]  id:722 out_offset
+    ee.vldbc.32     q7,a15              # [19]  id:724 activation_max
+    ee.vadds.s32    q4,q4,q1            # [20]
+    ee.vadds.s32    q2,q2,q1            # [21]
+    ee.vadds.s32    q5,q0,q1            # [22]
+    ee.vadds.s32    q3,q3,q1            # [23]
+    ee.vmin.s32     q3,q3,q7            # [24]
+    ee.vmin.s32     q5,q5,q7            # [25]
+    ee.vmin.s32     q2,q2,q7            # [26]
+    ee.vmin.s32     q4,q4,q7            # [27]
+    ee.vmax.s32     q4,q4,q6            # [28]
+    ee.vmax.s32     q2,q2,q6            # [29]
+    ee.vmax.s32     q5,q5,q6            # [30]
+    ee.vmax.s32     q3,q3,q6            # [31]
+    ee.vunzip.16    q3,q5               # [32]
+    ee.vunzip.16    q2,q4               # [33]
+    ee.vunzip.8     q2,q3               # [34]
+    ee.vst.128.xp   q2,a6,a2            # [35]  id:725
+    bne             a5,a11,.Lt_7_7170               # [36]
+
+.Lt_7_6658: # 0x112f
+#<loop> Part of loop body line 548, head labeled .Lt_7_6402
+    l32i    a15,a1,112                  # [3]  gra_spill_temp_113, input_wd*channels
+    l32i    a10,a1,320                  # gra_spill_temp_103
+    l32i    a13,a1,340                  # [0]  // out_ht
+    l32i    a9,a1,116                   # [1]  gra_spill_temp_114, input_wd
+    l32i    a12,a1,96                  # [4]  gra_spill_temp_118
+    mull    a15,a10,a15                 # // (input_wd * stride_ht) * channels
+    l32i    a14,a1,104                  # [5]  gra_spill_temp_120
+    l32i    a8,a1,100                   # [2]  gra_spill_temp_119
+
+    addi.n  a12,a12,1               # [6]
+    s32i    a12,a1,96                  # [7]  gra_spill_temp_118
+    add.n   a14,a14,a15                 # [8]
+    add.n   a8,a8,a9                    # [9]
+    s32i    a8,a1,100                   # [10]  gra_spill_temp_119
+    s32i    a14,a1,104                  # [11]  gra_spill_temp_120, (input_wd * stride_wd) * channels
+    bne     a12,a13,.Lt_7_6402              # [13] // iterate over height loop
+
+#<loop> Part of loop body line 348, head labeled .Lt_7_5122
+    l32i    a11,a1,56                   # [6]  gra_spill_temp_106 // out_shift_ptr
+    l32i    a15,a1,52                   # [2]  gra_spill_temp_105, out_mult_ptr
+    l32i    a10,a1,60                   # [24]  gra_spill_temp_108, ch_idx
+    addi    a11,a11,64                  # [8]
+    addi    a15,a15,64                  # [13]
+    s32i    a11,a1,56                   # [23]  gra_spill_temp_106
+    s32i    a15,a1,52                   # [18]  gra_spill_temp_105, out_mult_ptr
+    l32i    a11,a1,64                   # [25]  gra_spill_temp_109
+    addi    a10,a10,16                  # [26]
+    s32i    a10,a1,60                   # [27]  gra_spill_temp_108, ch_idx
+    blt     a10,a11,.Lt_7_5122          # [28] // iterate over outer most out_ch loop
+
+.Lt_7_4610: # 0x11ad
+    retw.n                          # [0]
+
+    .size   esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3, . - esp_nn_depthwise_conv_s8_mult1_3x3_padded_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/fully_connected/esp_nn_fully_connected_s8_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/fully_connected/esp_nn_fully_connected_s8_esp32s3.S
new file mode 100644
index 0000000..9c1a835
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/fully_connected/esp_nn_fully_connected_s8_esp32s3.S
@@ -0,0 +1,220 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+//
+// SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+    .text
+    .align  4
+    .literal_position
+    .literal    .LC3_26_101, 1073741824 // nudge (1 << 30)
+
+    # Program Unit: esp_nn_fully_connected_s8_esp32s3
+    .type   esp_nn_fully_connected_s8_esp32s3, @function
+    .align   4
+    .global esp_nn_fully_connected_s8_esp32s3
+
+// a2: input_data
+// a3: input_offset
+// a4: row_len
+// a5: filter_data
+// a6: filter_offset
+// a7: bias
+// on stack: out_data
+// on stack: out_channels
+// on stack: out_offset
+// on stack: out_shift
+// on stack: out_mult
+// on stack: activation_min
+// on stack: activation_max
+
+esp_nn_fully_connected_s8_esp32s3:  # 0x4
+    # qacc_scratch = 0
+    // 40, filter_offset
+    // 44, input_offset
+    # gra_spill_temp_7 = 48
+    # gra_spill_temp_0 = 52
+    # gra_spill_temp_1 = 56
+    # gra_spill_temp_2 = 60
+    # gra_spill_temp_3 = 64
+    # gra_spill_temp_4 = 68
+    # gra_spill_temp_5 = 72
+    # gra_spill_temp_6 = 76
+
+    entry   a1,112                      #
+    s32i.n  a5,a1,60                # [0]  gra_spill_temp_2, filter_data
+    s32i    a7,a1,48                    # [1]  gra_spill_temp_7, bias
+    s32i    a6,a1,40                    # [2]  id:252 filter_offset+0x0
+    s32i    a3,a1,44                    # [3]  id:251 input_offset+0x0
+    mov.n   a13,a2                      # [5]
+    mov.n   a12,a4                      # [6]
+
+ // out_channel loop
+    l16ui       a2,a1,116                   # [7]  id:255 out_channels+0x0
+    addi        a4,a1,40                # [8]
+    addi        a8,a1,44                # [9]
+    ee.vldbc.16 q5,a8               # [10]  id:253 input_offset
+    ee.vldbc.16 q6,a4               # [12]  id:254 filter_offset
+    beqz.n      a2,.Lt_0_7938           # [13]
+
+    ee.zero.q   q7                      # [0]
+    srai        a11,a12,3                   # [2]
+    l32i        a10,a1,128                  # [5]  id:257 out_mult+0x0
+    l32i        a8,a1,112                   # [6]  id:259 out_data+0x0
+    addi        a9,a12,-7                   # [7]
+    s32i        a9,a1,76                    # [8]  gra_spill_temp_6
+    s32i        a8,a1,72                    # [9]  gra_spill_temp_5
+    s32i        a11,a1,64                   # [14]  gra_spill_temp_3
+    slli        a11,a11,3                   # [16]
+    s32i        a11,a1,68                   # [18]  gra_spill_temp_4
+    l32i        a10,a1,124                  # [25]  id:256 out_shift+0x0
+    movi.n      a15,0                   # [17]
+    mov.n       a14,a7                      # [15]
+    max         a11,a10,a15                 # [29]
+    s32i        a11,a1,52                   # [30]  gra_spill_temp_0 // left_shift
+    sub         a10,a11,a10                 #  // right_shift
+    s32i.n      a10,a1,56                   # [28]  gra_spill_temp_1 // right_shift
+    mov.n       a11,a5                      # [31]
+    movi.n      a10,0                   # [32]
+    mov.n       a2,a11                      # [33]
+
+.Lt_0_8450: # 0x12b
+
+    l32i            a9,a1,76                    # [2]  gra_spill_temp_6
+    extui           a5,a11,0,3                  # [34]
+    ee.zero.accx
+    slli            a5,a5,1                     # [3]
+    bgei            a9,0,.LBB6_esp_nn_fully_connected_s8_esp32s3            # [9]
+
+    mov.n           a5,a10                      # [6]
+    movi.n  a2,0                    # [0]
+    j       .Lt_0_8706                      # [1]
+
+.LBB6_esp_nn_fully_connected_s8_esp32s3:    # 0x147
+    wur.sar_byte    a5                  # [5]
+    ee.vld.l.64.ip  q4,a2,8         # [4]  id:267
+    l32i            a4,a1,64                    # [0]  gra_spill_temp_3
+    mov.n           a3,a13                      # [1]
+    addx8           a5,a4,a10                   # [2]
+    ee.vcmp.lt.s8   q2,q4,q7            # [7]
+    ee.vzip.8       q4,q2                   # [8]
+    loopgtz a4,.LBB45_esp_nn_fully_connected_s8_esp32s3     # [3]
+
+    ee.vld.l.64.ip      q0,a2,8         # [0*II+0]  id:268
+    ee.vld.l.64.ip      q1,a3,8         # [0*II+1]  id:270
+    ee.vcmp.lt.s8       q2,q0,q7            # [0*II+2]
+    ee.vcmp.lt.s8       q3,q1,q7            # [0*II+3]
+    ee.vzip.8           q0,q2                   # [0*II+4]
+    ee.vzip.8           q1,q3                   # [0*II+5]
+    ee.vadds.s16        q1,q1,q5            # [0*II+6]
+    ee.src.q.qup        q2,q4,q0            # [0*II+7]
+    ee.vadds.s16        q2,q2,q6            # [0*II+8]
+    ee.vmulas.s16.accx  q1,q2       # [0*II+9]
+
+.LBB45_esp_nn_fully_connected_s8_esp32s3:   # 0x170
+    l32i    a2,a1,68                    # [0]  gra_spill_temp_4
+
+.Lt_0_8706: # 0x173
+	movi a9, 0
+	ee.srs.accx  a6, a9, 0
+
+    bge             a2,a12,.Lt_0_9730           # [38]
+
+// prepare remaining loop
+    l32i    a8,a1,44                    # [0]  id:251 input_offset+0x0
+    l32i    a7,a1,40                    # [1]  id:252 filter_offset+0x0
+    sub     a3,a12,a2                   # [2]
+    l32i.n  a4,a1,60                # [3]  gra_spill_temp_2
+    add.n   a2,a2,a13                   # [4]
+    add.n   a4,a4,a5                    # [5]
+    loopgtz a3,.LBB60_esp_nn_fully_connected_s8_esp32s3     # [6]
+
+// remaining c loop
+    l8ui    a3,a2,0                     # [0*II+0]  id:299
+    l8ui    a5,a4,0                     # [0*II+1]  id:300
+    sext    a3,a3,7                     # [0*II+2]
+    sext    a5,a5,7                     # [0*II+3]
+    add.n   a5,a5,a7                    # [0*II+5]
+    add.n   a3,a3,a8                    # [0*II+6]
+    mull    a3,a3,a5                    # [0*II+7]
+    addi.n  a2,a2,1                 # [0*II+8]
+    addi.n  a4,a4,1                 # [0*II+4]
+    add.n   a6,a6,a3                    # [0*II+9]
+
+.LBB60_esp_nn_fully_connected_s8_esp32s3:   # 0x20f
+
+// add bias
+.Lt_0_9730: # 0x20f
+    l32i    a8,a1,48                    # [0]  gra_spill_temp_7, bias
+    beqz.n  a8,.Lt_0_10754          # [2], skip_bias
+
+    l32i.n  a9,a14,0                # [0]  id:301
+    add.n   a6,a6,a9                    # [2]
+
+// apply quantization
+.Lt_0_10754:    # 0x218
+    l32i    a2,a1,52                    # [1]  gra_spill_temp_0 // left_shift
+    l32i    a5,a1,56                    # [2]  gra_spill_temp_1 // right_shift
+    ssl     a2                          # [3]
+    sll     a6,a6                       # [5] // x * (1 << left_shift)
+
+    l32r    a3,.LC3_26_101              # [0]
+
+    add.n   a10,a10,a12                 # [0]
+    addi.n  a14,a14,4               # [1]
+
+    l32i    a4,a1,128                   # [2]  gra_spill_temp_10 //out_mult
+    add.n   a11,a11,a12                 # [6]
+
+// multiply add nudge and pick high32
+    ssai    31
+    mulsh   a7,a4,a6                    # [4]
+    mull    a4,a4,a6                    # [5]
+
+    mov.n   a2,a11                      # [27]
+    add     a4,a4,a3
+    saltu   a8,a4,a3
+    add.n   a7,a7,a8
+    src     a3,a7,a4
+
+// divide_by_power_of2_step
+    blti    a5,1,.skip_divide_by2
+    movi.n  a8,1                    # [28]
+    addi    a4,a5,-1
+    ssl     a4          // load left_shift
+    sll     a8,a8       // to_add factor ( 1 << (exponent - 1))
+    extui   a6,a3,31,1                  # [33]
+    sub     a8,a8,a6        // modified to_add factor ( 1 << (exponent - 1) - (val < 0))
+    add     a3,a3,a8    // val + to_add
+    ssr     a5                          # [29] //load right_shift
+    sra     a3,a3                       # [31]
+.skip_divide_by2:
+
+    l32i    a8,a1,120                   # [41]  out_offset
+    l32i    a7,a1,132                   # [44] // activation_min
+    l32i    a4,a1,136                   # [45] // activation_max
+
+    add.n   a8,a8,a3                    # [46] // add out_offset
+    l32i    a6,a1,72                    # [47]  gra_spill_temp_5
+    l32i.n  a3,a1,116                   # [48]  out_channels
+    max     a7,a7,a8                    # [49]
+    add.n   a6,a15,a6                   # [50]
+    min     a4,a4,a7                    # [51]
+    addi.n  a15,a15,1               # [52]
+    s8i     a4,a6,0                     # [53]  id:302
+    bne     a3,a15,.Lt_0_8450               # [55]
+
+.Lt_0_7938: # 0x25c
+    retw.n                          # [0]
+
+    .size   esp_nn_fully_connected_s8_esp32s3, . - esp_nn_fully_connected_s8_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_avg_pool_s8_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_avg_pool_s8_esp32s3.S
new file mode 100644
index 0000000..9e76a1e
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_avg_pool_s8_esp32s3.S
@@ -0,0 +1,686 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+    .text
+    .align  4
+    .literal_position
+
+    # Program Unit: esp_nn_avg_pool_s8_esp32s3
+    .type   esp_nn_avg_pool_s8_esp32s3, @function
+    .align   4
+    .global esp_nn_avg_pool_s8_esp32s3
+
+// no of channels must be multiple of 4.
+
+// a2: input
+// a3: input_wd
+// a4: input_ht
+// a5: output
+// a6: output_wd
+// a7: output_ht
+// on stack: stride_wd
+// on stack: stride_ht
+// on stack: filter_wd
+// on stack: filter_ht
+// on stack: pad_wd
+// on stack: pad_ht
+// on stack: activation_min
+// on stack: activation_max
+// on stack: channels
+
+esp_nn_avg_pool_s8_esp32s3: # 0x4
+    # activation_min = 0
+    # activation_max = 4
+    # gra_spill_temp_0 = 8
+    # gra_spill_temp_1 = 12
+    # gra_spill_temp_2 = 16
+    # gra_spill_temp_3 = 20
+    # gra_spill_temp_4 = 24
+    # gra_spill_temp_5 = 28
+    # gra_spill_temp_6 = 32
+    # gra_spill_temp_7 = 36
+    # gra_spill_temp_8 = 40
+    # gra_spill_temp_9 = 44
+    # gra_spill_temp_10 = 48
+    # gra_spill_temp_11 = 52
+    # gra_spill_temp_12 = 56
+    # gra_spill_temp_13 = 60
+    # gra_spill_temp_14 = 64
+    # gra_spill_temp_15 = 68
+    # gra_spill_temp_16 = 72
+    # gra_spill_temp_17 = 76
+    # gra_spill_temp_18 = 80
+    # gra_spill_temp_19 = 84
+    # gra_spill_temp_20 = 88
+    # gra_spill_temp_21 = 92
+    # gra_spill_temp_22 = 96
+    # gra_spill_temp_23 = 100
+    # gra_spill_temp_24 = 104
+    # gra_spill_temp_25 = 108
+    # gra_spill_temp_26 = 112
+    # gra_spill_temp_27 = 116
+    # gra_spill_temp_28 = 120
+    # gra_spill_temp_29 = 124
+    # gra_spill_temp_30 = 128
+    # gra_spill_temp_31 = 132
+    # gra_spill_temp_32 = 136
+    # gra_spill_temp_33 = 140
+    # gra_spill_temp_34 = 144
+    # gra_spill_temp_35 = 148
+    # gra_spill_temp_36 = 152
+    # gra_spill_temp_37 = 156
+    # gra_spill_temp_38 = 160
+    # gra_spill_temp_39 = 164
+    # gra_spill_temp_40 = 168
+    # gra_spill_temp_41 = 172
+    # gra_spill_temp_43 = 180
+
+    entry   a1,240                      #
+    mov.n   a11,a3                      # [0]
+    mov.n   a12,a2                      # [1]
+    s32i    a5,a1,136                   # [4]  gra_spill_temp_30
+    s32i    a6,a1,128                   # [3]  gra_spill_temp_32
+
+    l16ui   a5,a1,272                   # [5]  id:663 channels+0x0
+    s32i    a7,a1,72                    # [6]  gra_spill_temp_16
+
+    l32i        a9,a1,264                   # [1]  id:664 activation_min+0x0
+    l32i        a10,a1,268                  # [2]  id:666 activation_max+0x0
+    s32i.n      a9,a1,0                 # [4]  activation_min
+    s32i.n      a10,a1,4                # [3]  activation_max
+    addi.n      a8,a1,4                 # [0]  activation_max
+    ee.vldbc.32 q7,a1               # [5]  id:668 activation_min
+    ee.vldbc.32 q6,a8               # [6]  id:669 activation_max
+    ee.zero.q   q4                      # [0]
+
+    extui   a10,a5,0,3                  # [7]
+    beqz.n  a10,.LBB3_esp_nn_avg_pool_s8_esp32s3    # [8], if (channels % 8 == 0)
+
+    extui   a13,a5,0,2                  # [0]
+    beqz.n  a13,.LBB52_esp_nn_avg_pool_s8_esp32s3   # [1], if (channels % 4 == 0)
+
+// exit
+.Lt_0_44546:    # 0x1e9
+    retw.n                          # [0]
+
+.LBB3_esp_nn_avg_pool_s8_esp32s3:   # 0x1eb // if (channels % 8 == 0)
+
+    l16ui   a7,a1,256                   # [1]  id:671 pad_wd+0x0
+    l16ui   a10,a1,260                  # [5]  id:670 pad_ht+0x0
+    l32i    a15,a1,72                   # [12]  gra_spill_temp_16
+    movi.n  a14,0                   # [13]
+    movi.n  a8,0                    # [14]
+    neg     a10,a10                     # [15]
+    s32i    a10,a1,56                   # [16]  gra_spill_temp_12
+    s32i    a8,a1,44                    # [17]  gra_spill_temp_9
+    s32i.n  a14,a1,20               # [18]  gra_spill_temp_3
+    sub     a9,a4,a10                   # [19]
+    s32i    a9,a1,40                    # [20]  gra_spill_temp_8
+    mul16u  a15,a15,a5              # [21]
+    neg     a13,a7                      # [22]
+    s32i    a13,a1,104                  # [23]  gra_spill_temp_24
+    s32i.n  a15,a1,16               # [24]  gra_spill_temp_2
+    sub     a13,a3,a13                  # [25]
+    s32i.n  a13,a1,12               # [26]  gra_spill_temp_1
+    j       .Lt_0_28162                     # [27]
+
+.Lt_0_28418:    # 0x24e
+#<loop> Part of loop body line 44, head labeled .Lt_0_28162
+    l32i    a15,a1,260                  # [0]  pad_ht
+    l32i    a14,a1,56                   # [1]  gra_spill_temp_12
+    l32i.n  a9,a1,16                # [2]  gra_spill_temp_2
+    l32i    a13,a1,244                  # [3]  stride_ht
+    l32i    a10,a1,40                   # [4]  gra_spill_temp_8
+    l32i    a8,a1,44                    # [5]  gra_spill_temp_9
+    sub     a10,a10,a13                 # [6]
+    add.n   a8,a8,a9                    # [7]
+    add.n   a14,a14,a13                 # [8]
+    sub     a15,a15,a13                 # [9]
+    s32i    a15,a1,260                  # [10]  pad_ht
+    s32i    a14,a1,56                   # [11]  gra_spill_temp_12
+    s32i    a8,a1,44                    # [12]  gra_spill_temp_9
+    s32i    a10,a1,40                   # [13]  gra_spill_temp_8
+    l32i.n  a8,a1,20                # [14]  gra_spill_temp_3
+    l32i    a9,a1,72                    # [15]  gra_spill_temp_16
+    addi.n  a8,a8,1                 # [16]
+    s32i.n  a8,a1,20                # [17]  gra_spill_temp_3
+    beq a8,a9,.Lt_0_44546           # [18]
+
+.Lt_0_28162:    # 0x281
+    l32i    a10,a1,128                  # [0]  gra_spill_temp_32
+    beqz.n  a10,.Lt_0_28418         # [2]
+
+.LBB7_esp_nn_avg_pool_s8_esp32s3:   # 0x286
+#<loop> Part of loop body line 44, head labeled .Lt_0_28162
+    s32i    a7,a1,112                   # [0]  gra_spill_temp_26
+    movi.n  a10,0                   # [1]
+    l32i    a9,a1,260                   # [2]  pad_ht
+    l32i.n  a6,a1,12                # [3]  gra_spill_temp_1
+    l32i    a8,a1,44                    # [4]  gra_spill_temp_9
+    movi.n  a13,0                   # [5]
+    l32i    a15,a1,104                  # [6]  gra_spill_temp_24
+    s32i    a15,a1,116                  # [7]  gra_spill_temp_27
+    s32i    a13,a1,48                   # [8]  gra_spill_temp_10
+    s32i    a8,a1,124                   # [9]  gra_spill_temp_29
+    s32i    a6,a1,120                   # [10]  gra_spill_temp_28
+    l32i    a8,a1,40                    # [11]  gra_spill_temp_8
+    l32i    a6,a1,252                   # [12]  filter_ht
+    movi.n  a13,0                   # [13]
+    max     a9,a9,a10                   # [14]
+    s32i    a9,a1,160                   # [15]  gra_spill_temp_38
+    s32i    a13,a1,92                   # [16]  gra_spill_temp_21
+    min     a6,a6,a8                    # [17]
+    bnez.n  a5,.LBB10_esp_nn_avg_pool_s8_esp32s3    # [18]
+
+.Lt_0_29186:    # 0x2ba
+    l32i    a8,a1,116                   # [0]  gra_spill_temp_27
+    l32i    a15,a1,120                  # [1]  gra_spill_temp_28
+    l32i    a9,a1,48                    # [2]  gra_spill_temp_10
+    l32i    a14,a1,240                  # [3]  stride_wd
+    l32i    a10,a1,124                  # [4]  gra_spill_temp_29
+    l32i    a13,a1,112                  # [5]  gra_spill_temp_26
+    add.n   a10,a10,a5                  # [6]
+    s32i    a10,a1,124                  # [7]  gra_spill_temp_29
+    sub     a13,a13,a14                 # [8]
+    add.n   a9,a9,a14                   # [9]
+    sub     a15,a15,a14                 # [10]
+    add.n   a8,a8,a14                   # [11]
+    s32i    a8,a1,116                   # [12]  gra_spill_temp_27
+    s32i    a15,a1,120                  # [13]  gra_spill_temp_28
+    s32i    a9,a1,48                    # [14]  gra_spill_temp_10
+    s32i    a13,a1,112                  # [15]  gra_spill_temp_26
+    l32i    a9,a1,92                    # [16]  gra_spill_temp_21
+    l32i    a10,a1,128                  # [17]  gra_spill_temp_32
+    addi.n  a9,a9,1                 # [18]
+    s32i    a9,a1,92                    # [19]  gra_spill_temp_21
+    beq     a9,a10,.Lt_0_28418          # [20]
+
+.Lt_0_28930:    # 0x2f5
+#<loop> Part of loop body line 46, head labeled .Lt_0_29186
+    beqz.n  a5,.Lt_0_29186          # [0]
+
+.LBB10_esp_nn_avg_pool_s8_esp32s3:  # 0x2f7
+#<loop> Part of loop body line 44, head labeled .Lt_0_28162
+    l32i    a14,a1,120                  # [0]  gra_spill_temp_28
+    l32i    a13,a1,248                  # [1]  filter_wd
+    l32i    a9,a1,136                   # [2]  gra_spill_temp_30
+    l32i    a8,a1,124                   # [3]  gra_spill_temp_29
+    movi.n  a15,0                   # [4]
+    s32i    a15,a1,24                   # [5]  gra_spill_temp_60
+    add.n   a10,a8,a5                   # [6]
+    movi.n  a15,0                   # [7]
+    add.n   a8,a8,a9                    # [8]
+    min     a13,a13,a14                 # [9]
+    add.n   a10,a9,a10                  # [10]
+    s32i    a10,a1,180                  # [11]  gra_spill_temp_43
+    s32i    a13,a1,76                   # [12]  gra_spill_temp_17
+    l32i    a14,a1,112                  # [13]  gra_spill_temp_26
+    s32i    a8,a1,148                   # [14]  gra_spill_temp_45
+    max     a14,a14,a15                 # [15]
+    l32i    a15,a1,116                  # [16]  gra_spill_temp_27
+    s32i    a14,a1,152                  # [17]  gra_spill_temp_63
+    add.n   a8,a15,a14                  # [18]
+    s32i    a8,a1,36                    # [19]  gra_spill_temp_7
+    add.n   a15,a15,a13                 # [20]
+    s32i    a15,a1,204                  # [21]  gra_spill_temp_39
+    sub     a13,a13,a14                 # [22]
+    s32i    a13,a1,280                  # [23]  gra_spill_temp_58
+    j   .Lt_0_29698                     # [24]
+
+.LBB13_esp_nn_avg_pool_s8_esp32s3:  # 0x33b
+#<loop> Part of loop body line 16, head labeled .Lt_0_29698
+    l32i    a10,a1,56                   # [0]  gra_spill_temp_12
+    l32i    a14,a1,204                  # [1]  gra_spill_temp_39
+    add.n   a10,a10,a15                 # [2]
+    mull    a10,a11,a10                 # [3]
+    movi.n  a15,0                   # [4]
+    add.n   a14,a10,a14                 # [5]
+
+.Lt_0_30466:    # 0x34a
+#<loop> Loop body line 61, nesting depth: 4, estimated iterations: 252
+    l32i    a9,a1,76                    # [0]  gra_spill_temp_17
+    l32i    a8,a1,152                   # [1]  gra_spill_temp_63
+    add.n   a14,a14,a11                 # [2]
+    bge     a8,a9,.Lt_0_30722           # [3]
+
+.LBB16_esp_nn_avg_pool_s8_esp32s3:  # 0x355
+#<loop> Part of loop body line 61, head labeled .Lt_0_30466
+    l32i    a3,a1,36                    # [0]  gra_spill_temp_7
+    l32i    a2,a1,24                    # [1]  gra_spill_temp_4
+    add.n   a3,a3,a10                   # [2]
+    mull    a3,a3,a5                    # [3]
+    movi.n  a8,0                    # [4]
+    add.n   a2,a2,a3                    # [5]
+    l32i    a3,a1,280                   # [6]  gra_spill_temp_58
+    add.n   a2,a12,a2                   # [7]
+    loopgtz a3,.LBB140_esp_nn_avg_pool_s8_esp32s3   # [8]
+
+    ee.vld.l.64.xp  q0,a2,a5            # [0*II+1]  id:677
+    ee.vcmp.lt.s8   q1,q0,q4            # [0*II+3]
+    ee.vzip.8       q0,q1                   # [0*II+4]
+    ee.vcmp.lt.s16  q1,q0,q4        # [0*II+5]
+    ee.vzip.16      q0,q1               # [0*II+6]
+    ee.vadds.s32    q2,q2,q1            # [0*II+7]
+    ee.vadds.s32    q3,q3,q0            # [0*II+8]
+
+
+.LBB140_esp_nn_avg_pool_s8_esp32s3: # 0x385
+#<loop> Part of loop body line 61, head labeled .Lt_0_30466
+    l32i    a2,a1,48                    # [0]  gra_spill_temp_10
+    sub     a9,a7,a2                    # [2]
+    sub     a2,a2,a7                    # [3]
+    max     a9,a9,a8                    # [4]
+    l32i    a8,a1,248                   # [5]  filter_wd
+    sub     a2,a11,a2                   # [6]
+    min     a8,a8,a2                    # [7]
+    sub     a8,a8,a9                    # [8]
+    add.n   a15,a15,a8                  # [9]
+
+.Lt_0_30722:    # 0x39f
+#<loop> Part of loop body line 61, head labeled .Lt_0_30466
+    add.n   a10,a10,a11                 # [0]
+    addi.n  a13,a13,1               # [1]
+    bne     a6,a13,.Lt_0_30466          # [2]
+
+.Lt_0_29954:    # 0x3a6
+    srai            a2,a15,1                    # [3]
+
+// move data to general purpose registers and average
+    ee.movi.32.a    q3,a9,0             # [0]
+    ee.movi.32.a    q3,a4,1             # [0]
+
+    blti            a9,1,.Lt_0_32258            # [4]
+    add.n           a9,a9,a2                    # [0]
+    j               .Lt_0_32002                     # [2]
+.Lt_0_32258:    # 0x45e
+    sub             a9,a9,a2                    # [0]
+.Lt_0_32002:    # 0x3b9
+
+    blti            a4,1,.Lt_0_32770            # [1]
+    add.n           a4,a2,a4                    # [0]
+    j               .Lt_0_32514                     # [2]
+.Lt_0_32770:
+    sub             a4,a4,a2                    # [0]
+.Lt_0_32514:    # 0x3c4
+
+    quos            a9,a9,a15                   # [1]
+    quos            a4,a4,a15                   # [1]
+    ee.movi.32.q    q3,a9,0             # [0]
+    ee.movi.32.q    q3,a4,1             # [1]
+
+    ee.movi.32.a    q3,a9,2             # [2]
+    ee.movi.32.a    q3,a14,3            # [0]
+
+    blti            a9,1,.Lt_0_33282            # [3]
+    add.n           a9,a9,a2                    # [0]
+    j               .Lt_0_33026                     # [2]
+.Lt_0_33282:    # 0x470
+    sub             a9,a9,a2                    # [0]
+.Lt_0_33026:    # 0x3d5
+
+    blti            a14,1,.Lt_0_33794           # [1]
+    add.n           a14,a2,a14                  # [0]
+    j               .Lt_0_33538                     # [2]
+.Lt_0_33794:    # 0x479
+    sub             a14,a14,a2                      # [0]
+.Lt_0_33538:    # 0x3e0
+
+    quos            a9,a9,a15                   # [1]
+    quos            a14,a14,a15                 # [1]
+    ee.movi.32.q    q3,a9,2             # [0]
+    ee.movi.32.q    q3,a14,3            # [1]
+
+
+    ee.movi.32.a    q2,a9,0             # [0]
+    ee.movi.32.a    q2,a4,1             # [0]
+
+    blti            a9,1,.Lt_0_34306            # [3]
+    add.n           a9,a9,a2                    # [0]
+    j               .Lt_0_34050                     # [2]
+.Lt_0_34306:    # 0x482
+    sub             a9,a9,a2                    # [0]
+.Lt_0_34050:    # 0x3f1
+
+    blti            a4,1,.Lt_0_34818            # [1]
+    add.n           a4,a2,a4                    # [0]
+    j               .Lt_0_34562                     # [2]
+.Lt_0_34818:    # 0x48b
+    sub             a4,a4,a2                    # [0]
+.Lt_0_34562:    # 0x3fc
+
+    quos            a9,a9,a15                   # [1]
+    quos            a4,a4,a15                   # [1]
+    ee.movi.32.q    q2,a9,0             # [0]
+    ee.movi.32.q    q2,a4,1             # [1]
+
+    ee.movi.32.a    q2,a9,2             # [2]
+    ee.movi.32.a    q2,a14,3            # [0]
+
+    blti            a9,1,.Lt_0_35330            # [3]
+    add.n           a9,a9,a2                    # [0]
+    j               .Lt_0_35074                     # [2]
+.Lt_0_35330:    # 0x494
+    sub             a9,a9,a2                    # [0]
+.Lt_0_35074:    # 0x40d
+
+    blti            a14,1,.Lt_0_35842           # [1]
+    add.n           a14,a2,a14                  # [0]
+    j               .Lt_0_35586                     # [2]
+.Lt_0_35842:    # 0x49d
+    sub             a14,a14,a2                      # [0]
+.Lt_0_35586:    # 0x418
+
+    quos            a9,a9,a15                   # [1]
+    quos            a14,a14,a15                 # [1]
+    ee.movi.32.q    q2,a9,2             # [0]
+    ee.movi.32.q    q2,a14,3            # [1]
+
+
+    l32i            a9,a1,180                   # [0]  gra_spill_temp_43
+    l32i            a14,a1,24                   # [1]  gra_spill_temp_4
+    l32i            a13,a1,148                  # [2]  gra_spill_temp_45
+    ee.vmin.s32     q1,q3,q6            # [4]
+    ee.vmax.s32     q1,q1,q7            # [5]
+    ee.vmin.s32     q5,q2,q6            # [8]
+    addi.n          a14,a14,8               # [9]
+    s32i            a14,a1,24                   # [10]  gra_spill_temp_4
+    ee.vmax.s32     q5,q5,q7            # [11]
+    addi.n          a8,a13,8                    # [12]
+    s32i            a8,a1,148                   # [13]  gra_spill_temp_45
+    ee.vunzip.16    q1,q5               # [14]
+    ee.vunzip.8     q1,q5               # [15]
+    ee.vst.l.64.ip  q1,a13,0        # [16]  id:678
+    bge             a8,a9,.Lt_0_29186           # [17]
+
+.Lt_0_29698:    # 0x44b
+#<loop> Loop body line 16, nesting depth: 3, estimated iterations: 252
+    mv.qr   q3,q4                       # [0]
+    l32i    a15,a1,160                  # [1]  gra_spill_temp_38
+    mv.qr   q2,q4                       # [2]
+    mov.n   a13,a15                     # [3]
+    blt a15,a6,.LBB13_esp_nn_avg_pool_s8_esp32s3    # [4]
+
+.Lt_0_51458:    # 0x459
+#<loop> Part of loop body line 16, head labeled .Lt_0_29698
+    movi.n  a15,0                   # [0]
+    j   .Lt_0_29954                     # [1]
+
+
+.LBB52_esp_nn_avg_pool_s8_esp32s3:  # 0x4a6 // if (channels % 4 == 0)
+
+    l16ui   a7,a1,256                   # [1]  id:671 pad_wd+0x0
+    l16ui   a13,a1,260                  # [5]  id:670 pad_ht+0x0
+    s32i    a13,a1,64                   # [8]  gra_spill_temp_4
+    l32i    a8,a1,72                    # [12]  gra_spill_temp_16
+    movi.n  a15,0                   # [13]
+    movi.n  a9,0                    # [14]
+    neg     a13,a13                     # [15]
+    s32i    a13,a1,192                  # [16]  gra_spill_temp_36
+    s32i    a9,a1,32                    # [17]  gra_spill_temp_6
+    s32i.n  a15,a1,8                # [18]  gra_spill_temp_0
+    sub     a10,a4,a13                  # [19]
+    s32i    a10,a1,28                   # [20]  gra_spill_temp_5
+    mul16u  a8,a8,a5                # [21]
+    neg     a14,a7                      # [22]
+    s32i    a14,a1,104                  # [23]  gra_spill_temp_24
+    s32i.n  a8,a1,16                # [24]  gra_spill_temp_2
+    sub     a14,a3,a14                  # [25]
+    s32i.n  a14,a1,12               # [26]  gra_spill_temp_1
+    j   .Lt_0_37890                     # [27]
+
+.Lt_0_38146:    # 0x50b
+#<loop> Part of loop body line 161, head labeled .Lt_0_37890
+    l32i    a15,a1,64                   # [0]  gra_spill_temp_4
+    l32i    a14,a1,192                  # [1]  gra_spill_temp_36
+    l32i.n  a9,a1,16                # [2]  gra_spill_temp_2
+    l32i    a13,a1,244                  # [3]  stride_ht
+    l32i    a10,a1,28                   # [4]  gra_spill_temp_5
+    l32i    a8,a1,32                    # [5]  gra_spill_temp_6
+    sub     a10,a10,a13                 # [6]
+    add.n   a8,a8,a9                    # [7]
+    add.n   a14,a14,a13                 # [8]
+    sub     a15,a15,a13                 # [9]
+    s32i    a15,a1,64                   # [10]  gra_spill_temp_4
+    s32i    a14,a1,192                  # [11]  gra_spill_temp_36
+    s32i    a8,a1,32                    # [12]  gra_spill_temp_6
+    s32i    a10,a1,28                   # [13]  gra_spill_temp_5
+    l32i.n  a8,a1,8                 # [14]  gra_spill_temp_0
+    l32i    a9,a1,72                    # [15]  gra_spill_temp_16
+    addi.n  a8,a8,1                 # [16]
+    s32i.n  a8,a1,8                 # [17]  gra_spill_temp_0
+    sub     a8,a8,a9                    # [18]
+    beqz    a8,.Lt_0_44546              # [19]
+
+.Lt_0_37890:    # 0x541
+#<loop> Loop body line 161, nesting depth: 1, estimated iterations: 252
+    l32i    a10,a1,128                  # [0]  gra_spill_temp_32
+    beqz.n  a10,.Lt_0_38146         # [2]
+
+#<loop> Part of loop body line 161, head labeled .Lt_0_37890
+    s32i    a7,a1,96                    # [0]  gra_spill_temp_22
+    movi.n  a10,0                   # [1]
+    l32i    a9,a1,64                    # [2]  gra_spill_temp_4
+    l32i.n  a6,a1,12                # [3]  gra_spill_temp_1
+    l32i    a8,a1,32                    # [4]  gra_spill_temp_6
+    movi.n  a13,0                   # [5]
+    l32i    a15,a1,104                  # [6]  gra_spill_temp_24
+    s32i    a15,a1,100                  # [7]  gra_spill_temp_23
+    s32i    a13,a1,148                  # [8]  gra_spill_temp_35
+    s32i    a8,a1,108                   # [9]  gra_spill_temp_25
+    s32i    a6,a1,144                   # [10]  gra_spill_temp_24
+    l32i    a8,a1,28                    # [11]  gra_spill_temp_5
+    l32i    a6,a1,252                   # [12]  filter_ht
+    max     a9,a9,a10                   # [14]
+    s32i    a9,a1,168                   # [15]  gra_spill_temp_40
+    s32i    a13,a1,88                   # [16]  gra_spill_temp_20
+    min     a6,a6,a8                    # [17]
+    bnez.n  a5,.LBB59_esp_nn_avg_pool_s8_esp32s3    # [18]
+
+.Lt_0_38914:    # 0x57a
+#<loop> Loop body line 163
+    l32i    a8,a1,100                   # [0]  gra_spill_temp_23
+    l32i    a15,a1,144                  # [1]  gra_spill_temp_24
+    l32i    a9,a1,148                   # [2]  gra_spill_temp_35
+    l32i    a14,a1,240                  # [3]  stride_wd
+    l32i    a10,a1,108                  # [4]  gra_spill_temp_25
+    l32i    a13,a1,96                   # [5]  gra_spill_temp_22
+    add.n   a10,a10,a5                  # [6]
+    s32i    a10,a1,108                  # [7]  gra_spill_temp_25
+    sub     a13,a13,a14                 # [8]
+    add.n   a9,a9,a14                   # [9]
+    sub     a15,a15,a14                 # [10]
+    add.n   a8,a8,a14                   # [11]
+    s32i    a8,a1,100                   # [12]  gra_spill_temp_23
+    s32i    a15,a1,144                  # [13]  gra_spill_temp_24
+    s32i    a9,a1,148                   # [14]  gra_spill_temp_35
+    s32i    a13,a1,96                   # [15]  gra_spill_temp_22
+    l32i    a9,a1,88                    # [16]  gra_spill_temp_20
+    l32i    a10,a1,128                  # [17]  gra_spill_temp_32
+    addi.n  a9,a9,1                 # [18]
+    s32i    a9,a1,88                    # [19]  gra_spill_temp_20
+    beq     a9,a10,.Lt_0_38146          # [20]
+
+    beqz.n  a5,.Lt_0_38914          # [0]
+
+.LBB59_esp_nn_avg_pool_s8_esp32s3:  # 0x5b7
+#<loop> Part of loop body line 161, head labeled .Lt_0_37890
+    l32i    a14,a1,144                  # [0]  gra_spill_temp_24
+    l32i    a13,a1,248                  # [1]  filter_wd
+    l32i    a9,a1,136                   # [2]  gra_spill_temp_30
+    l32i    a8,a1,108                   # [3]  gra_spill_temp_25
+    movi.n  a15,0                   # [4]
+    s32i    a15,a1,216                  # [5]  gra_spill_temp_52
+    add.n   a10,a8,a5                   # [6]
+    add.n   a8,a8,a9                    # [8]
+    min     a13,a13,a14                 # [9]
+    add.n   a10,a9,a10                  # [10]
+    s32i    a10,a1,172                  # [11]  gra_spill_temp_41
+    s32i    a13,a1,132                  # [12]  gra_spill_temp_31
+    l32i    a14,a1,96                   # [13]  gra_spill_temp_22
+    s32i    a8,a1,164                   # [14]  gra_spill_temp_39
+    max     a14,a14,a15                 # [15]
+    l32i    a15,a1,100                  # [16]  gra_spill_temp_23
+    s32i    a14,a1,208                  # [17]  gra_spill_temp_50
+    add.n   a8,a15,a14                  # [18]
+    s32i    a8,a1,60                    # [19]  gra_spill_temp_13
+    add.n   a15,a15,a13                 # [20]
+    s32i    a15,a1,196                  # [21]  gra_spill_temp_37
+    sub     a13,a13,a14                 # [22]
+    s32i    a13,a1,52                   # [23]  gra_spill_temp_11
+    j       .Lt_0_39426                     # [24]
+
+.LBB62_esp_nn_avg_pool_s8_esp32s3:  # 0x5fb
+#<loop> Part of loop body line 173, head labeled .Lt_0_39426
+    l32i    a10,a1,192                  # [0]  gra_spill_temp_36
+    l32i    a14,a1,196                  # [1]  gra_spill_temp_37
+    add.n   a10,a10,a15                 # [2]
+    mull    a10,a11,a10                 # [3]
+    movi.n  a15,0                   # [4]
+    add.n   a14,a10,a14                 # [5]
+
+.Lt_0_40194:    # 0x60a
+#<loop> Loop body line 178, nesting depth: 4, estimated iterations: 252
+    l32i    a9,a1,132                   # [0]  gra_spill_temp_31
+    l32i    a8,a1,208                   # [1]  gra_spill_temp_50
+    add.n   a14,a14,a11                 # [2]
+    bge a8,a9,.Lt_0_40450           # [3]
+
+.LBB65_esp_nn_avg_pool_s8_esp32s3:  # 0x615
+#<loop> Part of loop body line 178, head labeled .Lt_0_40194
+    l32i    a3,a1,60                    # [0]  gra_spill_temp_13
+    l32i    a2,a1,216                   # [1]  gra_spill_temp_52
+    add.n   a3,a3,a10                   # [2]
+    mull    a3,a3,a5                    # [3]
+    l32i    a4,a1,52                    # [4]  gra_spill_temp_11
+    add.n   a2,a2,a3                    # [5]
+    add.n   a2,a12,a2                   # [6]
+    loopgtz a4,.LBB155_esp_nn_avg_pool_s8_esp32s3   # [7]
+
+    ee.vldbc.32.xp  q0,a2,a5                # [0*II+0]  id:684
+    ee.vcmp.lt.s8   q1,q0,q4            # [0*II+2]
+    ee.vzip.8       q0,q1                   # [0*II+3]
+    ee.vcmp.lt.s16  q1,q0,q4        # [0*II+4]
+    ee.vzip.16      q0,q1               # [0*II+5]
+    ee.vadds.s32    q2,q2,q0            # [0*II+6]
+
+.LBB155_esp_nn_avg_pool_s8_esp32s3: # 0x63e
+#<loop> Part of loop body line 178, head labeled .Lt_0_40194
+    l32i    a2,a1,148                   # [0]  gra_spill_temp_35
+    movi.n  a8,0                    # [1]
+    sub     a9,a7,a2                    # [2]
+    sub     a2,a2,a7                    # [3]
+    max     a9,a9,a8                    # [4]
+    l32i    a8,a1,248                   # [5]  filter_wd
+    sub     a2,a11,a2                   # [6]
+    min     a8,a8,a2                    # [7]
+    sub     a8,a8,a9                    # [8]
+    add.n   a15,a15,a8                  # [9]
+
+.Lt_0_40450:    # 0x65a
+#<loop> Part of loop body line 178, head labeled .Lt_0_40194
+    add.n   a10,a10,a11                 # [0]
+    addi.n  a13,a13,1               # [1]
+    bne     a6,a13,.Lt_0_40194          # [2]
+
+.Lt_0_39682:    # 0x661
+#<loop> Part of loop body line 173, head labeled .Lt_0_39426
+    srai            a2,a15,1                    # [5]
+
+// move to gp registers and average
+
+    ee.movi.32.a    q2,a9,0             # [0]
+    ee.movi.32.a    q2,a4,1             # [0]
+
+    blti            a9,1,.Lt_0_41986            # [3]
+    add.n           a9,a9,a2                    # [0]
+    j               .Lt_0_41730                     # [2]
+.Lt_0_41986:    # 0x482
+    sub             a9,a9,a2                    # [0]
+.Lt_0_41730:    # 0x3f1
+
+    blti            a4,1,.Lt_0_42498            # [1]
+    add.n           a4,a2,a4                    # [0]
+    j               .Lt_0_42242                     # [2]
+.Lt_0_42498:    # 0x48b
+    sub             a4,a4,a2                    # [0]
+.Lt_0_42242:    # 0x3fc
+
+
+    quos            a9,a9,a15                   # [1]
+    quos            a4,a4,a15                   # [1]
+    ee.movi.32.q    q2,a9,0             # [0]
+    ee.movi.32.q    q2,a4,1             # [1]
+
+    ee.movi.32.a    q2,a9,2             # [2]
+    ee.movi.32.a    q2,a14,3            # [0]
+
+    blti            a9,1,.Lt_0_43010            # [3]
+    add.n           a9,a9,a2                    # [0]
+    j               .Lt_0_42754                     # [2]
+.Lt_0_43010:    # 0x494
+    sub             a9,a9,a2                    # [0]
+.Lt_0_42754:    # 0x40d
+
+
+    blti            a14,1,.Lt_0_43522           # [1]
+    add.n           a14,a2,a14                  # [0]
+    j               .Lt_0_43266                     # [2]
+.Lt_0_43522:    # 0x49d
+    sub             a14,a14,a2                      # [0]
+.Lt_0_43266:    # 0x418
+
+    quos            a9,a9,a15                   # [1]
+    quos            a14,a14,a15                 # [1]
+    ee.movi.32.q    q2,a9,2             # [0]
+    ee.movi.32.q    q2,a14,3            # [1]
+
+
+    l32i            a9,a1,172                   # [0]  gra_spill_temp_41
+    l32i            a8,a1,164                   # [1]  gra_spill_temp_39
+    l32i            a14,a1,216                  # [2]  gra_spill_temp_52
+    addi.n          a14,a14,4               # [5]
+    ee.vmin.s32     q2,q2,q6            # [6]
+    s32i            a14,a1,216                  # [7]  gra_spill_temp_52
+    ee.vmax.s32     q2,q2,q7            # [8]
+    ee.vunzip.16    q2,q1               # [9]
+    ee.vunzip.8     q2,q1               # [10]
+    ee.vst.l.64.ip  q2,a1,0         # [11]  id:691
+    l32i.n          a13,a1,0                # [12]  id:692
+    s32i.n          a13,a8,0                # [13]  id:693
+    addi.n          a8,a8,4                 # [14]
+    s32i            a8,a1,164                   # [15]  gra_spill_temp_39
+    bge             a8,a9,.Lt_0_38914           # [16]
+
+.Lt_0_39426:    # 0x6cb
+    l32i    a15,a1,168                  # [0]  gra_spill_temp_40
+    mv.qr   q2,q4                       # [1]
+    mov.n   a13,a15                     # [2]
+    blt     a15,a6,.LBB62_esp_nn_avg_pool_s8_esp32s3    # [3]
+
+.Lt_0_52738:    # 0x6d6
+    movi.n  a15,0                   # [0]
+    j       .Lt_0_39682                     # [1]
+
+    .size   esp_nn_avg_pool_s8_esp32s3, . - esp_nn_avg_pool_s8_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_max_pool_s8_esp32s3.S b/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_max_pool_s8_esp32s3.S
new file mode 100644
index 0000000..722e0db
--- /dev/null
+++ b/edge-impulse-sdk/porting/espressif/ESP-NN/src/pooling/esp_nn_max_pool_s8_esp32s3.S
@@ -0,0 +1,449 @@
+#include "edge-impulse-sdk/classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
+// Copyright 2021-2022 Espressif Systems (Shanghai) PTE LTD
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+    .text
+    .align  4
+    .literal_position
+
+    # Program Unit: esp_nn_max_pool_s8_esp32s3
+    .type   esp_nn_max_pool_s8_esp32s3, @function
+    .align   4
+    .global esp_nn_max_pool_s8_esp32s3
+
+// no of channels must be multiple of 4
+
+esp_nn_max_pool_s8_esp32s3: # 0x4
+    # int8_min = 0
+    # gra_spill_temp_0 = 4
+    # gra_spill_temp_1 = 8
+    # gra_spill_temp_2 = 12
+    # gra_spill_temp_3 = 16
+    # gra_spill_temp_4 = 20
+    # gra_spill_temp_5 = 24
+    # gra_spill_temp_6 = 28
+    # gra_spill_temp_7 = 32
+    # gra_spill_temp_8 = 36
+    # gra_spill_temp_9 = 40
+    # gra_spill_temp_10 = 44
+    # gra_spill_temp_11 = 48
+    # gra_spill_temp_12 = 52
+    # gra_spill_temp_13 = 56
+    # gra_spill_temp_14 = 60
+    # gra_spill_temp_15 = 64
+    # gra_spill_temp_16 = 68
+    # gra_spill_temp_17 = 72
+    # gra_spill_temp_18 = 76
+    # gra_spill_temp_19 = 80
+    # gra_spill_temp_20 = 84
+    # gra_spill_temp_21 = 88
+    # gra_spill_temp_22 = 92
+    # gra_spill_temp_23 = 96
+
+// a2: input
+// a3: input_wd
+// a4: input_ht
+// a5: output
+// a6: output_wd
+// a7: output_ht
+// on stack: stride_wd = 120
+// on stack: stride_ht = 124
+// on stack: filter_wd = 128
+// on stack: filter_ht = 132
+// on stack: pad_wd = 136
+// on stack: pad_ht = 140
+// on stack: activation_min
+// on stack: activation_max
+// on stack: channels
+
+
+    entry   a1,120                      #
+    mov.n   a12,a2                      # [0]
+    s32i    a6,a1,4                 # [2]  gra_spill_temp_0
+    s32i    a7,a1,68                    # [3]  gra_spill_temp_16
+    mov.n   a11,a3                      # [4]
+    s32i    a5,a1,96                    # [5]  gra_spill_temp_23
+
+    l16ui   a5,a1,152                   # [6]  id:465 channels+0x0
+    movi    a3,-128                     # [7]
+    s32i.n  a3,a1,0                 # [1]  int8_min
+
+    addi.n      a9,a1,148                   # [0]  activation_max
+    addi.n      a15,a1,144                  # [1]  activation_min
+    ee.vldbc.8  q3,a1               # [7]  id:473 int8_min+0x0
+    ee.vldbc.8  q5,a15                  # [8]  id:470 activation_min+0x0
+    ee.vldbc.8  q4,a9               # [9]  id:471 activation_max+0x0
+
+    extui   a8,a5,0,3                   # [8]
+    beqz.n  a8,.LBB3_esp_nn_max_pool_s8_esp32s3     # [9] // if (channels % 8 == 0)
+
+    extui   a14,a5,0,2                  # [0]
+    beqz.n  a14,.LBB25_esp_nn_max_pool_s8_esp32s3   # [1] // if (channels % 4 == 0)
+
+    retw.n                          # [0]   // exit
+
+.LBB3_esp_nn_max_pool_s8_esp32s3:   # 0x1c5 // if (channels % 8 == 0)
+
+    l16ui   a15,a1,136                  # [1]  id:475 pad_wd+0x0
+    l16ui   a14,a1,140                  # [4]  id:474 pad_ht+0x0
+    movi.n  a8,0                    # [13]
+    movi.n  a10,0                   # [15]
+    s32i    a14,a1,44                   # [7]  gra_spill_temp_10
+    neg     a15,a15                     # [12]
+    mul16u  a9,a6,a5                # [14]
+    neg     a14,a14                     # [16]
+    s32i    a14,a1,92                   # [17]  gra_spill_temp_22
+    s32i    a10,a1,52                   # [18]  gra_spill_temp_12
+    s32i    a9,a1,60                    # [19]  gra_spill_temp_14
+    s32i.n  a8,a1,36                # [16]  gra_spill_temp_8
+    s32i    a15,a1,56                   # [21]  gra_spill_temp_13
+    sub     a13,a4,a14                  # [22]
+    s32i    a13,a1,48                   # [23]  gra_spill_temp_11
+    sub     a15,a11,a15                 # [24]
+    s32i.n  a15,a1,40               # [25]  gra_spill_temp_9
+
+.Lt_0_21506:    # 0x229
+    l32i    a8,a1,4                 # [0]  gra_spill_temp_0
+    beqz.n  a8,.Lt_0_21762          # [2]
+
+    movi.n  a10,0                   # [0]
+    l32i    a9,a1,44                    # [1]  gra_spill_temp_10
+    l32i.n  a15,a1,40               # [2]  gra_spill_temp_9
+    l32i    a8,a1,52                    # [3]  gra_spill_temp_12
+    l32i.n  a13,a1,136                  # [4]  ,pad_wd
+    l32i    a14,a1,56                   # [5]  gra_spill_temp_13
+    s32i    a14,a1,80                   # [6]  gra_spill_temp_19
+    s32i    a13,a1,76                   # [7]  gra_spill_temp_18
+    s32i    a8,a1,88                    # [8]  gra_spill_temp_21
+    s32i    a15,a1,84                   # [9]  gra_spill_temp_20
+    l32i    a8,a1,48                    # [10]  gra_spill_temp_11
+    max     a9,a9,a10                   # [11]
+    l32i    a15,a1,132                  # [12]  filter_ht
+    s32i    a9,a1,8                 # [13]  gra_spill_temp_1
+    movi.n  a9,0                    # [14]
+    min     a15,a15,a8                  # [15]
+    s32i    a9,a1,64                    # [16]  gra_spill_temp_15
+
+.Lt_0_22274:    # 0x25d
+    beqz.n  a5,.Lt_0_22530          # [0]
+
+.LBB10_esp_nn_max_pool_s8_esp32s3:  # 0x25f
+#<loop> Part of loop body line 46, head labeled .Lt_0_22274
+    l32i    a6,a1,76                    # [0]  gra_spill_temp_18
+    l32i    a13,a1,96                   # [1]  gra_spill_temp_23
+    l32i    a8,a1,84                    # [2]  gra_spill_temp_20
+    l32i    a7,a1,128                   # [3]  filter_wd
+    l32i    a10,a1,88                   # [4]  gra_spill_temp_21
+    movi.n  a9,0                    # [5]
+    s32i    a9,a1,20                    # [6]  gra_spill_temp_4
+    add.n   a14,a10,a5                  # [7]
+    min     a7,a7,a8                    # [8]
+    add.n   a10,a10,a13                 # [9]
+    add.n   a14,a13,a14                 # [10]
+    s32i    a14,a1,12                   # [11]  gra_spill_temp_2
+    s32i    a10,a1,16                   # [12]  gra_spill_temp_3
+    movi.n  a8,0                    # [13]
+    l32i    a10,a1,80                   # [14]  gra_spill_temp_19
+    max     a6,a6,a8                    # [15]
+    sub     a9,a7,a6                    # [16]
+    s32i    a9,a1,28                    # [17]  gra_spill_temp_6
+    add.n   a13,a10,a6                  # [18]
+    s32i    a13,a1,24                   # [19]  gra_spill_temp_5
+    add.n   a10,a10,a7                  # [16]
+    s32i    a10,a1,72                   # [21]  gra_spill_temp_17
+
+.Lt_0_23042:    # 0x29a
+    l32i    a8,a1,8                 # [0]  gra_spill_temp_1
+    mv.qr   q1,q3                       # [1]
+    mov.n   a13,a8                      # [2]
+    bge     a8,a15,.Lt_0_23298          # [3]
+
+.LBB13_esp_nn_max_pool_s8_esp32s3:  # 0x2a5
+#<loop> Part of loop body line 40, head labeled .Lt_0_23042
+    l32i    a10,a1,92                   # [0]  gra_spill_temp_22
+    l32i    a14,a1,72                   # [1]  gra_spill_temp_17
+    add.n   a10,a10,a8                  # [2]
+    mull    a10,a11,a10                 # [3]
+    add.n   a14,a10,a14                 # [5]
+
+.Lt_0_23810:    # 0x2b2
+    add.n   a14,a14,a11                 # [0]
+    addi.n  a13,a13,1               # [1]
+    bge     a6,a7,.Lt_0_24066           # [2]
+
+.LBB16_esp_nn_max_pool_s8_esp32s3:  # 0x2b9
+    l32i    a3,a1,24                    # [0]  gra_spill_temp_5
+    l32i    a2,a1,20                    # [1]  gra_spill_temp_4
+    add.n   a3,a3,a10                   # [2]
+    mull    a3,a3,a5                    # [3]
+    add.n   a2,a2,a3                    # [5]
+    l32i    a3,a1,28                    # [6]  gra_spill_temp_6
+    add.n   a2,a12,a2                   # [7]
+    loopgtz a3,.LBB93_esp_nn_max_pool_s8_esp32s3    # [8]
+
+    ee.vld.l.64.ip  q0,a2,0         # [0*II+1]  id:481
+    add.n           a2,a2,a5                    # [0*II+2]
+    ee.vmax.s8      q1,q1,q0            # [0*II+3]
+.LBB93_esp_nn_max_pool_s8_esp32s3:  # 0x2d8
+
+.Lt_0_24066:    # 0x2d8
+    add.n   a10,a10,a11                 # [0]
+    bne     a15,a13,.Lt_0_23810         # [1]
+
+.Lt_0_23298:    # 0x2dd
+    l32i    a9,a1,12                    # [0]  gra_spill_temp_2
+    l32i    a13,a1,20                   # [1]  gra_spill_temp_4
+    l32i    a8,a1,16                    # [2]  gra_spill_temp_3
+    ee.vmin.s8  q2,q1,q4            # [3]
+    ee.vmax.s8  q2,q2,q5            # [4]
+    mov.n   a10,a8                      # [5]
+    addi.n  a13,a13,8               # [6]
+    s32i    a13,a1,20                   # [7]  gra_spill_temp_4
+    ee.vst.l.64.ip  q2,a10,0        # [8]  id:482
+    addi.n  a8,a8,8                 # [9]
+    s32i    a8,a1,16                    # [10]  gra_spill_temp_3
+    blt     a8,a9,.Lt_0_23042           # [11]
+
+.Lt_0_22530:    # 0x2fe
+    l32i    a13,a1,84                   # [0]  gra_spill_temp_20
+    l32i    a14,a1,80                   # [1]  gra_spill_temp_19
+    l32i    a10,a1,120                  # [2]  stride_wd
+    l32i    a8,a1,88                    # [3]  gra_spill_temp_21
+    l32i    a9,a1,76                    # [4]  gra_spill_temp_18
+    add.n   a8,a8,a5                    # [5]
+    s32i    a8,a1,88                    # [6]  gra_spill_temp_21
+    sub     a9,a9,a10                   # [7]
+    add.n   a14,a14,a10                 # [8]
+    sub     a13,a13,a10                 # [9]
+    s32i    a13,a1,84                   # [10]  gra_spill_temp_20
+    s32i    a14,a1,80                   # [11]  gra_spill_temp_19
+    s32i    a9,a1,76                    # [12]  gra_spill_temp_18
+    l32i    a14,a1,64                   # [13]  gra_spill_temp_15
+    l32i    a8,a1,4                 # [14]  gra_spill_temp_0
+    addi.n  a14,a14,1               # [15]
+    s32i    a14,a1,64                   # [16]  gra_spill_temp_15
+    sub     a14,a14,a8                  # [17]
+    bnez    a14,.Lt_0_22274             # [18]
+
+.Lt_0_21762:    # 0x334
+#<loop> Part of loop body line 20, head labeled .Lt_0_21506
+    l32i    a8,a1,44                    # [0]  gra_spill_temp_10
+    l32i    a15,a1,92                   # [1]  gra_spill_temp_22
+    l32i    a10,a1,60                   # [2]  gra_spill_temp_14
+    l32i    a14,a1,124                  # [3]  stride_ht
+    l32i    a13,a1,48                   # [4]  gra_spill_temp_11
+    l32i    a9,a1,52                    # [5]  gra_spill_temp_12
+    sub     a13,a13,a14                 # [6]
+    add.n   a9,a9,a10                   # [7]
+    add.n   a15,a15,a14                 # [8]
+    sub     a8,a8,a14                   # [9]
+    s32i    a8,a1,44                    # [10]  gra_spill_temp_10
+    s32i    a15,a1,92                   # [11]  gra_spill_temp_22
+    s32i    a9,a1,52                    # [12]  gra_spill_temp_12
+    s32i    a13,a1,48                   # [13]  gra_spill_temp_11
+    l32i.n  a9,a1,36                # [14]  gra_spill_temp_8
+    l32i    a10,a1,68                   # [15]  gra_spill_temp_16
+    addi.n  a9,a9,1                 # [16]
+    s32i.n  a9,a1,36                # [17]  gra_spill_temp_8
+    sub     a9,a9,a10                   # [18]
+    bnez    a9,.Lt_0_21506              # [19]
+
+    retw.n                          # [0] // exit
+
+.LBB25_esp_nn_max_pool_s8_esp32s3:  # 0x36d // if (channels % 4 == 0)
+
+    l16ui   a10,a1,136                  # [1]  id:475 pad_wd+0x0
+    l16ui   a9,a1,140                   # [4]  id:474 pad_ht+0x0
+    movi.n  a13,0                   # [13]
+    movi.n  a15,0                   # [15]
+    neg     a10,a10                     # [12]
+    s32i    a9,a1,44                    # [7]  gra_spill_temp_10
+    mul16u  a14,a6,a5               # [14]
+    neg     a9,a9                       # [16]
+    s32i    a9,a1,92                    # [17]  gra_spill_temp_22
+    s32i    a15,a1,52                   # [18]  gra_spill_temp_12
+    s32i    a14,a1,60                   # [19]  gra_spill_temp_14
+    s32i.n  a13,a1,36               # [16]  gra_spill_temp_8
+    s32i    a10,a1,56                   # [21]  gra_spill_temp_13
+    sub     a8,a4,a9                    # [22]
+    s32i    a8,a1,48                    # [23]  gra_spill_temp_11
+    sub     a10,a11,a10                 # [24]
+    s32i.n  a10,a1,40               # [25]  gra_spill_temp_9
+
+.Lt_0_27138:    # 0x3d5
+    l32i    a13,a1,4                # [0]  gra_spill_temp_0
+    beqz.n  a13,.Lt_0_27394         # [2]
+
+.LBB29_esp_nn_max_pool_s8_esp32s3:  # 0x3da
+#<loop> Part of loop body line 107, head labeled .Lt_0_27138
+    movi.n  a10,0                   # [0]
+    l32i    a9,a1,44                    # [1]  gra_spill_temp_10
+    l32i.n  a15,a1,40               # [2]  gra_spill_temp_9
+    l32i    a8,a1,52                    # [3]  gra_spill_temp_12
+    l32i    a14,a1,56                   # [4]  gra_spill_temp_13
+    l32i.n  a13,a1,136                  # [5]  pad_wd
+    s32i    a13,a1,76                   # [6]  gra_spill_temp_18
+    s32i    a14,a1,80                   # [7]  gra_spill_temp_19
+    s32i    a8,a1,88                    # [8]  gra_spill_temp_21
+    s32i    a15,a1,84                   # [9]  gra_spill_temp_20
+    l32i    a8,a1,48                    # [10]  gra_spill_temp_11
+    l32i    a15,a1,132                  # [11]  filter_ht
+    movi.n  a14,0                   # [12]
+    max     a9,a9,a10                   # [13]
+    s32i    a9,a1,8                 # [14]  gra_spill_temp_1
+    s32i    a14,a1,64                   # [15]  gra_spill_temp_15
+    min     a15,a15,a8                  # [16]
+
+.Lt_0_27906:    # 0x409
+#<loop> Loop body line 109, nesting depth: 2, estimated iterations: 56
+    beqz.n  a5,.Lt_0_28162          # [0]
+
+.LBB32_esp_nn_max_pool_s8_esp32s3:  # 0x40b
+#<loop> Part of loop body line 109, head labeled .Lt_0_27906
+    l32i    a6,a1,76                    # [0]  gra_spill_temp_18
+    l32i    a13,a1,96                   # [1]  gra_spill_temp_23
+    l32i    a8,a1,84                    # [2]  gra_spill_temp_20
+    l32i    a7,a1,128                   # [3]  filter_wd
+    l32i    a10,a1,88                   # [4]  gra_spill_temp_21
+    movi.n  a9,0                    # [5]
+    s32i    a9,a1,32                    # [6]  gra_spill_temp_7
+    add.n   a14,a10,a5                  # [7]
+    min     a7,a7,a8                    # [8]
+    add.n   a10,a10,a13                 # [9]
+    add.n   a14,a13,a14                 # [10]
+    s32i    a14,a1,12                   # [11]  gra_spill_temp_2
+    s32i    a10,a1,16                   # [12]  gra_spill_temp_3
+    movi.n  a8,0                    # [13]
+    l32i    a10,a1,80                   # [14]  gra_spill_temp_19
+    max     a6,a6,a8                    # [15]
+    sub     a9,a7,a6                    # [16]
+    s32i    a9,a1,28                    # [17]  gra_spill_temp_6
+    add.n   a13,a10,a6                  # [18]
+    s32i    a13,a1,24                   # [19]  gra_spill_temp_5
+    add.n   a10,a10,a7                  # [16]
+    s32i    a10,a1,72                   # [21]  gra_spill_temp_17
+
+.Lt_0_28674:    # 0x446
+#<loop> Loop body line 8, nesting depth: 3, estimated iterations: 56
+    l32i    a8,a1,8                 # [0]  gra_spill_temp_1
+    mv.qr   q1,q3                       # [1]
+    mov.n   a13,a8                      # [2]
+    bge     a8,a15,.Lt_0_28930          # [3]
+
+.LBB35_esp_nn_max_pool_s8_esp32s3:  # 0x451
+#<loop> Part of loop body line 8, head labeled .Lt_0_28674
+    l32i    a10,a1,92                   # [0]  gra_spill_temp_22
+    l32i    a14,a1,72                   # [1]  gra_spill_temp_17
+    add.n   a10,a10,a8                  # [2]
+    mull    a10,a11,a10                 # [3]
+    add.n   a14,a10,a14                 # [5]
+
+.Lt_0_29442:    # 0x45e
+    add.n   a14,a14,a11                 # [0]
+    addi.n  a13,a13,1               # [1]
+    bge     a6,a7,.Lt_0_29698           # [2]
+
+.LBB38_esp_nn_max_pool_s8_esp32s3:  # 0x465
+    l32i    a3,a1,24                    # [0]  gra_spill_temp_5
+    l32i    a2,a1,32                    # [1]  gra_spill_temp_7
+    add.n   a3,a3,a10                   # [2]
+    mull    a3,a3,a5                    # [3]
+    l32i    a4,a1,28                    # [4]  gra_spill_temp_6
+    add.n   a2,a2,a3                    # [5]
+    add.n   a2,a12,a2                   # [6]
+    loopgtz a4,.LBB108_esp_nn_max_pool_s8_esp32s3   # [7]
+
+    ee.vldbc.32 q0,a2               # [0*II+0]  id:489
+    add.n       a2,a2,a5                    # [0*II+1]
+    ee.vmax.s8  q1,q1,q0            # [0*II+2]
+.LBB108_esp_nn_max_pool_s8_esp32s3: # 0x482
+
+.Lt_0_29698:    # 0x482
+    add.n   a10,a10,a11                 # [0]
+    bne     a15,a13,.Lt_0_29442         # [1]
+
+.Lt_0_28930:    # 0x487
+#<loop> Part of loop body line 8, head labeled .Lt_0_28674
+    l32i            a9,a1,12                    # [0]  gra_spill_temp_2
+    l32i            a8,a1,16                    # [1]  gra_spill_temp_3
+    l32i            a10,a1,32                   # [3]  gra_spill_temp_7
+
+    ee.vmin.s8      q5,q1,q4            # [4]
+    ee.vmax.s8      q5,q5,q5            # [5]
+    addi.n          a10,a10,4               # [6]
+    ee.movi.32.a    q5,a13,0
+    s32i            a10,a1,32                   # [9]  gra_spill_temp_7
+    s32i.n          a13,a8,0                # [10]  id:492
+    addi.n          a8,a8,4                 # [11]
+    s32i            a8,a1,16                    # [12]  gra_spill_temp_3
+    blt             a8,a9,.Lt_0_28674           # [13]
+
+.Lt_0_28162:    # 0x4ad
+#<loop> Part of loop body line 109, head labeled .Lt_0_27906
+    l32i    a13,a1,84                   # [0]  gra_spill_temp_20
+    l32i    a14,a1,80                   # [1]  gra_spill_temp_19
+    l32i    a10,a1,120                  # [2]  stride_wd
+    l32i    a8,a1,88                    # [3]  gra_spill_temp_21
+    l32i    a9,a1,76                    # [4]  gra_spill_temp_18
+    add.n   a8,a8,a5                    # [5]
+    s32i    a8,a1,88                    # [6]  gra_spill_temp_21
+    sub     a9,a9,a10                   # [7]
+    add.n   a14,a14,a10                 # [8]
+    sub     a13,a13,a10                 # [9]
+    s32i    a13,a1,84                   # [10]  gra_spill_temp_20
+    s32i    a14,a1,80                   # [11]  gra_spill_temp_19
+    s32i    a9,a1,76                    # [12]  gra_spill_temp_18
+    l32i    a14,a1,64                   # [13]  gra_spill_temp_15
+    l32i    a8,a1,4                 # [14]  gra_spill_temp_0
+    addi.n  a14,a14,1               # [15]
+    s32i    a14,a1,64                   # [16]  gra_spill_temp_15
+    sub     a14,a14,a8                  # [17]
+    bnez    a14,.Lt_0_27906             # [18]
+
+.Lt_0_27394:    # 0x4e3
+#<loop> Part of loop body line 107, head labeled .Lt_0_27138
+    l32i    a8,a1,44                    # [0]  gra_spill_temp_10
+    l32i    a15,a1,92                   # [1]  gra_spill_temp_22
+    l32i    a10,a1,60                   # [2]  gra_spill_temp_14
+    l32i    a14,a1,124                  # [3]  stride_ht
+    l32i    a13,a1,48                   # [4]  gra_spill_temp_11
+    l32i    a9,a1,52                    # [5]  gra_spill_temp_12
+    sub     a13,a13,a14                 # [6]
+    add.n   a9,a9,a10                   # [7]
+    add.n   a15,a15,a14                 # [8]
+    sub     a8,a8,a14                   # [9]
+    s32i    a8,a1,44                    # [10]  gra_spill_temp_10
+    s32i    a15,a1,92                   # [11]  gra_spill_temp_22
+    s32i    a9,a1,52                    # [12]  gra_spill_temp_12
+    s32i    a13,a1,48                   # [13]  gra_spill_temp_11
+    l32i.n  a9,a1,36                # [14]  gra_spill_temp_8
+    l32i    a10,a1,68                   # [15]  gra_spill_temp_16
+    addi.n  a9,a9,1                 # [16]
+    s32i.n  a9,a1,36                # [17]  gra_spill_temp_8
+    sub     a9,a9,a10                   # [18]
+    bnez    a9,.Lt_0_27138              # [19]
+
+    retw.n                          # [0] // exit
+
+    .size   esp_nn_max_pool_s8_esp32s3, . - esp_nn_max_pool_s8_esp32s3
+
+#elif defined(WIO_TERMINAL)
+// dummy code, added for old ARM toolchain
+.syntax unified
+.thumb
+.cpu cortex-m0
+
+.section .text
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN && EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN_S3
diff --git a/edge-impulse-sdk/porting/espressif/ei_classifier_porting.cpp b/edge-impulse-sdk/porting/espressif/ei_classifier_porting.cpp
index dd25c07..840ebe6 100644
--- a/edge-impulse-sdk/porting/espressif/ei_classifier_porting.cpp
+++ b/edge-impulse-sdk/porting/espressif/ei_classifier_porting.cpp
@@ -21,12 +21,17 @@
 #include <stdarg.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <cstring>
 // Include FreeRTOS for delay
 #include <freertos/FreeRTOS.h>
 #include <freertos/task.h>
 
 // for millis and micros
 #include "esp_timer.h"
+#include "esp_idf_version.h"
+
+// memory handling
+#include "esp_heap_caps.h"
 
 #define EI_WEAK_FN __attribute__((weak))
 
@@ -73,11 +78,33 @@ __attribute__((weak)) void ei_printf_float(float f) {
     ei_printf("%f", f);
 }
 
+// we use alligned alloc instead of regular malloc
+// due to https://github.com/espressif/esp-nn/issues/7
 __attribute__((weak)) void *ei_malloc(size_t size) {
+#if defined(CONFIG_IDF_TARGET_ESP32S3)
+#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 2, 1)
+    return heap_caps_aligned_alloc(16, size, MALLOC_CAP_DEFAULT);
+#else
+    return aligned_alloc(16, size);
+#endif
+#endif
     return malloc(size);
 }
 
 __attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) {
+#if defined(CONFIG_IDF_TARGET_ESP32S3)
+#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(5, 2, 1)
+    return heap_caps_calloc(nitems, size, MALLOC_CAP_DEFAULT);
+#else
+    void *p;
+    p = aligned_alloc(16, nitems * size);
+    if (p == nullptr)
+        return p;
+
+    memset(p, '\0', nitems * size);
+    return p;
+#endif
+#endif
     return calloc(nitems, size);
 }
 
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/.clang-format b/edge-impulse-sdk/porting/ethos-core-driver/.clang-format
deleted file mode 100644
index 9a7ae82..0000000
--- a/edge-impulse-sdk/porting/ethos-core-driver/.clang-format
+++ /dev/null
@@ -1,144 +0,0 @@
-#
-# Copyright (c) 2019-2020 Arm Limited. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the License); you may
-# not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an AS IS BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
----
-Language:        Cpp
-AccessModifierOffset: -2
-AlignAfterOpenBracket: Align
-AlignConsecutiveMacros: false
-AlignConsecutiveAssignments: true
-AlignConsecutiveDeclarations: false
-AlignEscapedNewlines: Right
-AlignOperands:   true
-AlignTrailingComments: true
-AllowAllArgumentsOnNextLine: true
-AllowAllConstructorInitializersOnNextLine: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: false
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: Empty
-AllowShortLambdasOnASingleLine: All
-AllowShortIfStatementsOnASingleLine: Never
-AllowShortLoopsOnASingleLine: false
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: false
-AlwaysBreakTemplateDeclarations: MultiLine
-BinPackArguments: false
-BinPackParameters: false
-BraceWrapping:
-  AfterCaseLabel:  true
-  AfterClass:      true
-  AfterControlStatement: true
-  AfterEnum:       true
-  AfterFunction:   true
-  AfterNamespace:  true
-  AfterObjCDeclaration: true
-  AfterStruct:     true
-  AfterUnion:      true
-  AfterExternBlock: false
-  BeforeCatch:     true
-  BeforeElse:      true
-  IndentBraces:    false
-  SplitEmptyFunction: true
-  SplitEmptyRecord: true
-  SplitEmptyNamespace: true
-BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Custom
-BreakBeforeInheritanceComma: true
-BreakInheritanceList: AfterColon
-BreakBeforeTernaryOperators: false
-BreakConstructorInitializersBeforeComma: true
-BreakConstructorInitializers: AfterColon
-BreakAfterJavaFieldAnnotations: false
-BreakStringLiterals: true
-ColumnLimit:     120
-CommentPragmas:  '^ IWYU pragma:'
-CompactNamespaces: false
-ConstructorInitializerAllOnOneLineOrOnePerLine: false
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
-Cpp11BracedListStyle: true
-DerivePointerAlignment: false
-DisableFormat:   false
-ExperimentalAutoDetectBinPacking: false
-FixNamespaceComments: true
-ForEachMacros:
-  - foreach
-  - Q_FOREACH
-  - BOOST_FOREACH
-IncludeBlocks:   Preserve
-IncludeCategories:
-  - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
-    Priority:        2
-  - Regex:           '^(<|"(gtest|gmock|isl|json)/)'
-    Priority:        3
-  - Regex:           '.*'
-    Priority:        1
-IncludeIsMainRegex: '(Test)?$'
-IndentCaseLabels: false
-IndentPPDirectives: None
-IndentWidth:     4
-IndentWrappedFunctionNames: false
-JavaScriptQuotes: Leave
-JavaScriptWrapImports: true
-KeepEmptyLinesAtTheStartOfBlocks: true
-MacroBlockBegin: ''
-MacroBlockEnd:   ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBinPackProtocolList: Auto
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: true
-PenaltyBreakAssignment: 2
-PenaltyBreakBeforeFirstCallParameter: 19
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyBreakTemplateDeclaration: 10
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 1000000
-PointerAlignment: Right
-ReflowComments:  true
-SortIncludes:    true
-SortUsingDeclarations: true
-SpaceAfterCStyleCast: false
-SpaceAfterLogicalNot: false
-SpaceAfterTemplateKeyword: true
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeCpp11BracedList: false
-SpaceBeforeCtorInitializerColon: true
-SpaceBeforeInheritanceColon: true
-SpaceBeforeParens: ControlStatements
-SpaceBeforeRangeBasedForLoopColon: true
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 1
-SpacesInAngles:  false
-SpacesInContainerLiterals: false
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard:        Cpp11
-StatementMacros:
-  - Q_UNUSED
-  - QT_REQUIRE_VERSION
-TabWidth:        8
-UseTab:          Never
-...
-
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/CMakeLists.txt b/edge-impulse-sdk/porting/ethos-core-driver/CMakeLists.txt
index 1dd1e54..e7cd9e1 100644
--- a/edge-impulse-sdk/porting/ethos-core-driver/CMakeLists.txt
+++ b/edge-impulse-sdk/porting/ethos-core-driver/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020 Arm Limited. All rights reserved.
+# Copyright (c) 2019-2021 Arm Limited. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -18,12 +18,6 @@
 
 cmake_minimum_required(VERSION 3.15.6)
 
-macro(test_and_convert_error_level log_level ethos_level)
-    if(${log_level} STREQUAL ${LOG_LEVEL})
-        set(ETHOSU_LOG_SEVERITY ${ethos_level})
-    endif()
-endmacro()
-
 project(ethosu_core_driver VERSION 0.0.1)
 
 #
@@ -34,17 +28,10 @@ option(DRIVER_PMU_AUTOINIT "Enable PMU boot auto-initialization" OFF)
 
 set(CMSIS_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../cmsis" CACHE PATH "Path to CMSIS.")
 
-set(LOG_NAMES emerg alert crit err warning notice info debug)
-if(DEFINED LOG_LEVEL)
-    test_and_convert_error_level( LOG_LEVEL_ERROR err )
-    test_and_convert_error_level( LOG_LEVEL_DEBUG debug )
-    test_and_convert_error_level( LOG_LEVEL_TRACE debug )
-    test_and_convert_error_level( LOG_LEVEL_INFO info )
-    test_and_convert_error_level( LOG_LEVEL_WARN warning )
-else()
-    set(ETHOSU_LOG_SEVERITY "info" CACHE STRING "Driver log severity level ${LOG_NAMES}")
-    set_property(CACHE ETHOSU_LOG_SEVERITY PROPERTY STRINGS ${LOG_NAMES})
-endif()
+set(LOG_NAMES err warning info debug)
+set(ETHOSU_LOG_SEVERITY "warning" CACHE STRING "Driver log severity level ${LOG_NAMES} (Defaults to 'warning')")
+set(ETHOSU_TARGET_NPU_CONFIG "ethos-u55-128" CACHE STRING "Default NPU configuration")
+set_property(CACHE ETHOSU_LOG_SEVERITY PROPERTY STRINGS ${LOG_NAMES})
 
 #
 # Global settings
@@ -67,7 +54,26 @@ include_directories(${CMSIS_PATH}/CMSIS/Core/Include)
 # Build driver library
 add_library(ethosu_core_driver STATIC)
 target_include_directories(ethosu_core_driver PUBLIC include)
-target_sources(ethosu_core_driver PRIVATE src/ethosu_driver.c src/ethosu_device.c src/ethosu_pmu.c)
+target_sources(ethosu_core_driver PRIVATE src/ethosu_driver.c src/ethosu_pmu.c)
+
+string(TOLOWER ${ETHOSU_TARGET_NPU_CONFIG} ETHOSU_TARGET_NPU_CONFIG)
+if(ETHOSU_TARGET_NPU_CONFIG MATCHES "^ethos-(u[0-9]+|uz)-([0-9]+$)")
+    set(ETHOSU_ARCH ${CMAKE_MATCH_1})
+    set(ETHOSU_MACS ${CMAKE_MATCH_2})
+else()
+    message(FATAL_ERROR "Invalid Ethos-U target configuration '${ETHOSU_TARGET_NPU_CONFIG}")
+endif()
+
+target_compile_definitions(ethosu_core_driver PRIVATE
+    ETHOSU_ARCH=${ETHOSU_ARCH}
+    ETHOS$<UPPER_CASE:${ETHOSU_ARCH}>)
+
+if (ETHOSU_ARCH STREQUAL "u55" OR ETHOSU_ARCH STREQUAL "u65")
+    target_sources(ethosu_core_driver PRIVATE src/ethosu_device_u55_u65.c)
+else()
+    message(FATAL_ERROR "Invalid NPU configuration")
+endif()
+
 
 # Set the log level for the target
 target_compile_definitions(ethosu_core_driver PRIVATE ETHOSU_LOG_SEVERITY=${LOG_SEVERITY})
@@ -83,6 +89,7 @@ install(FILES include/ethosu_device.h include/ethosu_driver.h include/pmu_ethosu
 
 message(STATUS "*******************************************************")
 message(STATUS "PROJECT_NAME                           : ${PROJECT_NAME}")
+message(STATUS "ETHOSU_TARGET_NPU_CONFIG               : ${ETHOSU_TARGET_NPU_CONFIG}")
 message(STATUS "CMAKE_SYSTEM_PROCESSOR                 : ${CMAKE_SYSTEM_PROCESSOR}")
 message(STATUS "CMSIS_PATH                             : ${CMSIS_PATH}")
 message(STATUS "ETHOSU_LOG_SEVERITY                    : ${ETHOSU_LOG_SEVERITY}")
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/README.md b/edge-impulse-sdk/porting/ethos-core-driver/README.md
index e23ed2e..9e077b3 100644
--- a/edge-impulse-sdk/porting/ethos-core-driver/README.md
+++ b/edge-impulse-sdk/porting/ethos-core-driver/README.md
@@ -4,27 +4,136 @@ This repository contains a device driver for the Arm(R) Ethos(TM)-U NPU.
 
 ## Building
 
-The source code comes with a CMake based build system. The driver is expeced to
-be cross compiled for any of the supported Arm Cortex(R)-M CPUs, which will
-require the user to setup a custom toolchain file.
+The source code comes with a CMake based build system. The driver is expected to
+be cross compiled for any of the supported Arm Cortex(R)-M CPUs, which requires
+the user to configure the build to match their system configuration.
 
-The user is also required to define `CMAKE_SYSTEM_PROCESSOR` for the target CPU,
-for example cortex-m55+nodsp+nofp. This can be done either in the toolchain
-file or on the command line.
 
+One such requirement is to define the target CPU, normally by setting
+`CMAKE_SYSTEM_PROCESSOR`. **Note** that when using the toolchain files provided
+in [core_platform](https://git.mlplatform.org/ml/ethos-u/ethos-u-core-platform.git),
+the variable `TARGET_CPU` must be used instead of `CMAKE_SYSTEM_PROCESSOR`.
+
+Target CPU is specified on the form "cortex-m<nr><features>", for example:
+"cortex-m55+nodsp+nofp".
+
+Similarly the target NPU configuration is
+controlled by setting `ETHOSU_TARGET_NPU_CONFIG`, for example "ethos-u55-128".
+
+The build configuration can be defined either in the toolchain file or
+by passing options on the command line.
+
+```[bash]
+$ cmake -B build  \
+    -DCMAKE_TOOLCHAIN_FILE=<toolchain> \
+    -DCMAKE_SYSTEM_PROCESSOR=cortex-m<nr><features> \
+    -DETHOSU_TARGET_NPU_CONFIG=ethos-u<nr>-<macs>
+$ cmake --build build
+```
+
+or when using toolchain files from [core_platform](https://git.mlplatform.org/ml/ethos-u/ethos-u-core-platform.git)
+
+```[bash]
+$ cmake -B build  \
+    -DCMAKE_TOOLCHAIN_FILE=<core_platform_toolchain> \
+    -DTARGET_CPU=cortex-m<nr><features> \
+    -DETHOSU_TARGET_NPU_CONFIG=ethos-u<nr>-<macs>
+$ cmake --build build
 ```
-$ mkdir build
-$ cd build
-$ cmake .. -DCMAKE_TOOLCHAIN_FILE=<toolchain> -DCMAKE_SYSTEM_PROCESSOR=cortex-m<nr><features>
-$ make
+
+## Driver APIs
+
+The driver APIs are defined in `include/ethosu_driver.h` and the related types
+in `include/ethosu_types.h`. Inferences can be invoked in two manners:
+synchronously or asynchronously. The two types of invocation can be freely mixed
+in a single application.
+
+### Synchronous invocation
+
+A typical usage of the driver can be the following:
+
+```[C]
+// reserve a driver to be used (this call could block until a driver is available)
+struct ethosu_driver *drv = ethosu_reserve_driver();
+...
+// run one or more inferences
+int result = ethosu_invoke(drv,
+                           custom_data_ptr,
+                           custom_data_size,
+                           base_addr,
+                           base_addr_size,
+                           num_base_addr);
+...
+// release the driver for others to use
+ethosu_release_driver(drv);
 ```
 
-For running the driver on Arm CPUs which are configured with datacache, the
+### Asynchronous invocation
+
+A typical usage of the driver can be the following:
+
+```[C]
+// reserve a driver to be used (this call could block until a driver is available)
+struct ethosu_driver *drv = ethosu_reserve_driver();
+...
+// run one or more inferences
+int result = ethosu_invoke_async(drv,
+                                 custom_data_ptr,
+                                 custom_data_size,
+                                 base_addr,
+                                 base_addr_size,
+                                 num_base_addr,
+                                 user_arg);
+...
+// do some other work
+...
+int ret;
+do {
+    // true = blocking, false = non-blocking
+    // ret > 0 means inference not completed (only for non-blocking mode)
+    ret = ethosu_wait(drv, <true|false>);
+} while(ret > 0);
+...
+// release the driver for others to use
+ethosu_release_driver(drv);
+```
+
+Note that if `ethosu_wait` is invoked from a different thread and concurrently
+with `ethosu_invoke_async`, the user is responsible to guarantee that
+`ethosu_wait` is called after a successful completion of `ethosu_invoke_async`.
+Otherwise `ethosu_wait` might fail and not actually wait for the inference
+completion.
+
+### Driver initialization
+
+In order to use a driver it first needs to be initialized by calling the `init`
+function, which will also register the handle in the list of available drivers.
+A driver can be torn down by using the `deinit` function, which also removes the
+driver from the list.
+
+The correct mapping is one driver per NPU device. Note that the NPUs must have
+the same configuration, indeed the NPU configuration can be only one, which is
+defined at compile time.
+
+## Implementation design
+
+The driver is structured in two main parts: the driver, which is responsible to
+provide an unified API to the user; and the device part, which deals with the
+details at the hardware level.
+
+In order to do its task the driver needs a device implementation. There could be
+multiple device implementation for different hardware model and/or
+configurations. Note that the driver can be compiled to target only one NPU
+configuration by specializing the device part at compile time.
+
+## Data caching
+
+For running the driver on Arm CPUs which are configured with data cache, the
 cache maintenance functions in the driver are exported with weakly linked
-symbols that should be overriden. An example implementation using the CMSIS
+symbols that should be overridden. An example implementation using the CMSIS
 primitives found in cachel1_armv7.h could be as below:
 
-```
+```[C++]
 extern "C" {
 void ethosu_flush_dcache(uint32_t *p, size_t bytes) {
     if (p)
@@ -42,12 +151,81 @@ void ethosu_invalidate_dcache(uint32_t *p, size_t bytes) {
 }
 ```
 
-# License
+## Mutex and semaphores
+
+To ensure the correct functionality of the driver mutexes and semaphores are
+used internally. The default implementations of mutexes and semaphores are
+designed for a single-threaded baremetal environment. Hence for integration in
+environemnts where multi-threading is possible, e.g., RTOS, the user is
+responsible to provide implementation for mutexes and semaphores to be used by
+the driver.
+
+The mutex and semaphore APIs are defined as weak linked functions that can be
+overridden by the user. The APIs are the usual ones and described below:
+
+```[C]
+// create a mutex by returning back a handle
+void *ethosu_mutex_create(void);
+// lock the given mutex
+void ethosu_mutex_lock(void *mutex);
+// unlock the given mutex
+void ethosu_mutex_unlock(void *mutex);
+
+// create a (binary) semaphore by returning back a handle
+void *ethosu_semaphore_create(void);
+// take from the given semaphore
+void ethosu_semaphore_take(void *sem);
+// give from the given semaphore
+void ethosu_semaphore_give(void *sem);
+```
+
+## Begin/End inference callbacks
+
+The driver provide weak linked functions as hooks to receive callbacks whenever
+an inference begins and ends. The user can override such functions when needed.
+To avoid memory leaks, any allocations done in the ethosu_inference_begin() must
+be balanced by a corresponding free of the memory in the ethosu_inference_end()
+callback.
+
+```[C]
+void ethosu_inference_begin(struct ethosu_driver *drv, void *user_arg);
+void ethosu_inference_end(struct ethosu_driver *drv, void *user_arg);
+```
+
+Note that the `void *user_arg` pointer passed to invoke() function is the same
+pointer passed to the begin() and end() callbacks. For example:
+
+```[C]
+void my_function() {
+    ...
+    struct my_data data = {...};
+    int result = int ethosu_invoke_v3(drv,
+                                  custom_data_ptr,
+                                  custom_data_size,
+                                  base_addr,
+                                  base_addr_size,
+                                  num_base_addr,
+                                  (void *)&data);
+    ....
+}
+
+void ethosu_inference_begin(struct ethosu_driver *drv, void *user_arg) {
+        struct my_data *data = (struct my_data*) user_arg;
+        // use drv and data here
+}
+
+void ethosu_inference_end(struct ethosu_driver *drv, void *user_arg) {
+        struct my_data *data = (struct my_data*) user_arg;
+        // use drv and data here
+}
+```
+
+## License
 
 The Arm Ethos-U core driver is provided under an Apache-2.0 license. Please see
 [LICENSE.txt](LICENSE.txt) for more information.
 
-# Contributions
+## Contributions
 
 The Arm Ethos-U project welcomes contributions under the Apache-2.0 license.
 
@@ -61,14 +239,14 @@ commit message. You must use your real name, no pseudonyms or anonymous
 contributions are accepted. If there are more than one contributor, everyone
 adds their name and e-mail to the commit message.
 
-```
+```[]
 Author: John Doe \<john.doe@example.org\>
 Date:   Mon Feb 29 12:12:12 2016 +0000
 
 Title of the commit
 
 Short description of the change.
-   
+
 Signed-off-by: John Doe john.doe@example.org
 Signed-off-by: Foo Bar foo.bar@example.org
 ```
@@ -76,11 +254,18 @@ Signed-off-by: Foo Bar foo.bar@example.org
 The contributions will be code reviewed by Arm before they can be accepted into
 the repository.
 
-# Security
+In order to submit a contribution push your patch to
+`ssh://<GITHUB_USER_ID>@review.mlplatform.org:29418/ml/ethos-u/ethos-u-core-driver`.
+To do this you will need to sign-in to
+[review.mlplatform.org](https://review.mlplatform.org) using a GitHub account
+and add your SSH key under your settings. If there is a problem adding the SSH
+key make sure there is a valid email address in the Email Addresses field.
+
+## Security
 
 Please see [Security](SECURITY.md).
 
-# Trademark notice
+## Trademark notice
 
 Arm, Cortex and Ethos are registered trademarks of Arm Limited (or its
 subsidiaries) in the US and/or elsewhere.
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_device.h b/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_device.h
deleted file mode 100644
index c3bcecb..0000000
--- a/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_device.h
+++ /dev/null
@@ -1,430 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an "AS
- * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
- * express or implied. See the License for the specific language
- * governing permissions and limitations under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- */
-
-#ifndef ETHOSU_DEVICE_H
-#define ETHOSU_DEVICE_H
-
-/******************************************************************************
- * Includes
- ******************************************************************************/
-
-#include <stdbool.h>
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/******************************************************************************
- * Defines
- ******************************************************************************/
-
-#define ETHOSU_DRIVER_VERSION_MAJOR 0  ///< Driver major version
-#define ETHOSU_DRIVER_VERSION_MINOR 16 ///< Driver minor version
-#define ETHOSU_DRIVER_VERSION_PATCH 0  ///< Driver patch version
-#define ETHOSU_DRIVER_BASEP_INDEXES 8  ///< Number of base pointer indexes
-
-#ifndef ETHOSU_PMU_NCOUNTERS
-#define ETHOSU_PMU_NCOUNTERS 4
-#endif
-
-/******************************************************************************
- * Types
- ******************************************************************************/
-
-enum ethosu_error_codes
-{
-    ETHOSU_SUCCESS         = 0,  ///< Success
-    ETHOSU_GENERIC_FAILURE = -1, ///< Generic failure
-    ETHOSU_INVALID_PARAM   = -2  ///< Invalid parameter
-};
-
-struct ethosu_device
-{
-    volatile uintptr_t base_address;
-    uint32_t proto;
-    uint32_t pmcr;
-    uint32_t pmccntr[2];
-    uint32_t pmcnten;
-    uint32_t pmint;
-    uint32_t pmccntr_cfg;
-    uint32_t pmu_evcntr[ETHOSU_PMU_NCOUNTERS];
-    uint32_t pmu_evtypr[ETHOSU_PMU_NCOUNTERS];
-    uint32_t secure;
-    uint32_t privileged;
-};
-
-struct ethosu_id
-{
-    uint32_t version_status; ///< Version status
-    uint32_t version_minor;  ///< Version minor
-    uint32_t version_major;  ///< Version major
-    uint32_t product_major;  ///< Product major
-    uint32_t arch_patch_rev; ///< Architecture version patch
-    uint32_t arch_minor_rev; ///< Architecture version minor
-    uint32_t arch_major_rev; ///< Architecture version major
-};
-
-struct ethosu_config
-{
-    struct
-    {
-        uint32_t macs_per_cc;        ///< MACs per clock cycle
-        uint32_t cmd_stream_version; ///< NPU command stream version
-        uint32_t shram_size;         ///< SHRAM size
-        uint32_t custom_dma;         ///< Custom DMA enabled
-    };
-};
-
-/**
- * Memory type parameter for set_regioncfg_reg:
- *   Counter{0,1}: Outstanding transactions for
- *   AXI port 0 for memory type/region a=0,b=1
- *   Counter{2,3}: Outstanding transactions for
- *   AXI port 1 for memory type/region a=2,b=3
- */
-enum ethosu_memory_type
-{
-    ETHOSU_AXI0_OUTSTANDING_COUNTER0 = 0, ///< NPU axi0_outstanding_counter0
-    ETHOSU_AXI0_OUTSTANDING_COUNTER1 = 1, ///< NPU axi0_outstanding_counter1
-    ETHOSU_AXI1_OUTSTANDING_COUNTER2 = 2, ///< NPU axi1_outstanding_counter2
-    ETHOSU_AXI1_OUTSTANDING_COUNTER3 = 3  ///< NPU axi1_outstanding_counter3
-};
-
-enum ethosu_axi_limit_beats
-{
-    ETHOSU_AXI_LIMIT_64_BYTES  = 0, ///< NPU AXI limit 64 byte burst split alignment.
-    ETHOSU_AXI_LIMIT_128_BYTES = 1, ///< NPU AXI limit 128 byte burst split alignment.
-    ETHOSU_AXI_LIMIT_256_BYTES = 2  ///< NPU AXI limit 256 byte burst split alignment.
-};
-
-enum ethosu_axi_limit_mem_type
-{
-    ETHOSU_MEM_TYPE_DEVICE_NON_BUFFERABLE                 = 0,
-    ETHOSU_MEM_TYPE_DEVICE_BUFFERABLE                     = 1,
-    ETHOSU_MEM_TYPE_NORMAL_NON_CACHEABLE_NON_BUFFERABLE   = 2,
-    ETHOSU_MEM_TYPE_NORMAL_NON_CACHEABLE_BUFFERABLE       = 3,
-    ETHOSU_MEM_TYPE_WRITE_THROUGH_NO_ALLOCATE             = 4,
-    ETHOSU_MEM_TYPE_WRITE_THROUGH_READ_ALLOCATE           = 5,
-    ETHOSU_MEM_TYPE_WRITE_THROUGH_WRITE_ALLOCATE          = 6,
-    ETHOSU_MEM_TYPE_WRITE_THROUGH_READ_AND_WRITE_ALLOCATE = 7,
-    ETHOSU_MEM_TYPE_WRITE_BACK_NO_ALLOCATE                = 8,
-    ETHOSU_MEM_TYPE_WRITE_BACK_READ_ALLOCATE              = 9,
-    ETHOSU_MEM_TYPE_WRITE_BACK_WRITE_ALLOCATE             = 10,
-    ETHOSU_MEM_TYPE_WRITE_BACK_READ_AND_WRITE_ALLOCATE    = 11
-};
-
-enum ethosu_clock_q_request
-{
-    ETHOSU_CLOCK_Q_DISABLE = 0, ///< Disble NPU signal ready for clock off.
-    ETHOSU_CLOCK_Q_ENABLE  = 1  ///< Enable NPU signal ready for clock off when stop+idle state reached.
-};
-
-enum ethosu_power_q_request
-{
-    ETHOSU_POWER_Q_DISABLE = 0, ///< Disble NPU signal ready for power off.
-    ETHOSU_POWER_Q_ENABLE  = 1  ///< Enable NPU signal ready for power off when stop+idle state reached.
-};
-
-/******************************************************************************
- * Prototypes
- ******************************************************************************/
-
-/**
- * Initialize the device.
- */
-enum ethosu_error_codes ethosu_dev_init(struct ethosu_device *dev,
-                                        const void *base_address,
-                                        uint32_t secure_enable,
-                                        uint32_t privilege_enable);
-
-/**
- * Get device id.
- */
-enum ethosu_error_codes ethosu_get_id(struct ethosu_device *dev, struct ethosu_id *id);
-
-/**
- * Get device configuration.
- */
-enum ethosu_error_codes ethosu_get_config(struct ethosu_device *dev, struct ethosu_config *config);
-
-/**
- * Execute a given command stream on NPU.
- * \param[in] cmd_stream_ptr   Pointer to the command stream
- * \param[in] cms_length       Command stream length
- * \param[in] base_addr        Pointer to array of base addresses
- *                             - 0: weight tensor
- *                             - 1: scratch tensor
- *                             - All input tensors
- *                             - All output tensors
- * \param[in] num_base_addr    Number of base addresses.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_run_command_stream(struct ethosu_device *dev,
-                                                  const uint8_t *cmd_stream_ptr,
-                                                  uint32_t cms_length,
-                                                  const uint64_t *base_addr,
-                                                  int num_base_addr);
-
-/**
- * Check if IRQ is raised.
- * \param[out] irq_status      Pointer to IRQ status
- *                             - 0 IRQ not raised
- *                             - 1 IRQ raised
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_is_irq_raised(struct ethosu_device *dev, uint8_t *irq_status);
-
-/**
- * Clear IRQ status.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_clear_irq_status(struct ethosu_device *dev);
-
-/**
- * Get the 16 bit status mask.
- * \param[out] irq_status_mask     Pointer to the status mask.
- *                                 The lower 16 bits of status reg are returned.
- *                                 bit0: state
- *                                 bit1: irq_raised
- *                                 bit2: bus_status
- *                                 bit3: reset_status
- *                                 bit4: cmd_parse_error
- *                                 bit5: cmd_end_reached
- *                                 bit6: pmu_irq_raised
- *                                 bit7-15: reserved
- * \return                         \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_get_status_mask(struct ethosu_device *dev, uint16_t *status_mask);
-
-/**
- * Get the 16 bit IRQ history mask.
- * \param[out] irq_history_mask    Pointer to the IRQ history mask.
- * \return                         \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_get_irq_history_mask(struct ethosu_device *dev, uint16_t *irq_history_mask);
-
-/**
- * Clear the given bits in the
- *                                     IRQ history mask.
- * \param[in] irq_history_clear_mask   16 bit mask indicating which bits to
- *                                     clear in the IRQ history mask.
- * \return                             \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_clear_irq_history_mask(struct ethosu_device *dev, uint16_t irq_history_clear_mask);
-
-/**
- * Perform a NPU soft reset.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_soft_reset(struct ethosu_device *dev);
-
-/**
- * Wait for reset ready.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_wait_for_reset(struct ethosu_device *dev);
-
-/**
- * Read and return the content of a given NPU APB
- *                             register range.
- * \param[in] start_address    Start address.
- * \param[in] num_reg          Number of registers to read.
- * \param[out] reg_p           Pointer to a output area, allocated by the
- *                             caller, where the register content shall be
- *                             written.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_read_apb_reg(struct ethosu_device *dev,
-                                            uint32_t start_address,
-                                            uint16_t num_reg,
-                                            uint32_t *reg_p);
-
-/**
- * Set qconfig register. I.e.
- *                             AXI configuration for the command stream.
- * \param[in] memory_type      Memory_type to use for command stream:
- *                             enum ethosu_memory_type.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_set_qconfig(struct ethosu_device *dev, enum ethosu_memory_type memory_type);
-
-/**
- * Set register REGIONCFG.
- *                             Base pointer configuration.
- *                             Bits[2*k+1:2*k] give the memory type for BASEP[k].
- * \param[in] region           Region field to set: 0 - 7.
- * \param[in] memory_type      Memory_type to use for region: enum ethosu_memory_type.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_set_regioncfg(struct ethosu_device *dev,
-                                             uint8_t region,
-                                             enum ethosu_memory_type memory_type);
-
-/**
- * Set AXI limit parameters for port 0 counter 0.
- * \param[in] max_beats        Burst split alignment, \ref ethosu_axi_limit_beats.
- * \param[in] memtype          Cache policy \ref ethosu_axi_limit_mem_type
- * \param[in] max_reads        Maximum number of outstanding reads.
- * \param[in] max_writes       Maximum number of outstanding writes.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_set_axi_limit0(struct ethosu_device *dev,
-                                              enum ethosu_axi_limit_beats max_beats,
-                                              enum ethosu_axi_limit_mem_type memtype,
-                                              uint8_t max_reads,
-                                              uint8_t max_writes);
-/**
- * Set AXI limit parameters for port 0 counter 1.
- * \param[in] max_beats        Burst split alignment, \ref ethosu_axi_limit_beats.
- * \param[in] memtype          Cache policy \ref ethosu_axi_limit_mem_type
- * \param[in] max_reads        Maximum number of outstanding reads.
- * \param[in] max_writes       Maximum number of outstanding writes.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_set_axi_limit1(struct ethosu_device *dev,
-                                              enum ethosu_axi_limit_beats max_beats,
-                                              enum ethosu_axi_limit_mem_type memtype,
-                                              uint8_t max_reads,
-                                              uint8_t max_writes);
-/**
- * Set AXI limit parameters for port 1 counter 2.
- * \param[in] max_beats        Burst split alignment, \ref ethosu_axi_limit_beats.
- * \param[in] memtype          Cache policy \ref ethosu_axi_limit_mem_type
- * \param[in] max_reads        Maximum number of outstanding reads.
- * \param[in] max_writes       Maximum number of outstanding writes.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_set_axi_limit2(struct ethosu_device *dev,
-                                              enum ethosu_axi_limit_beats max_beats,
-                                              enum ethosu_axi_limit_mem_type memtype,
-                                              uint8_t max_reads,
-                                              uint8_t max_writes);
-/**
- * Set AXI limit parameters for port 1 counter 3.
- * \param[in] max_beats        Burst split alignment, \ref ethosu_axi_limit_beats.
- * \param[in] memtype          Cache policy \ref ethosu_axi_limit_mem_type
- * \param[in] max_reads        Maximum number of outstanding reads.
- * \param[in] max_writes       Maximum number of outstanding writes.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_set_axi_limit3(struct ethosu_device *dev,
-                                              enum ethosu_axi_limit_beats max_beats,
-                                              enum ethosu_axi_limit_mem_type memtype,
-                                              uint8_t max_reads,
-                                              uint8_t max_writes);
-
-/**
- * Get current command stream queue read position.
- * \param[out] qread           Pointer to queue read.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_get_qread(struct ethosu_device *dev, uint32_t *qread);
-
-/**
- * Get revision of NPU
- * \param[out] revision        Pointer to revision read.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_get_revision(struct ethosu_device *dev, uint32_t *revision);
-
-/**
- * Issue run command for the currently programmed
- *                             command stream, starting at current queue read
- *                             position.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_set_command_run(struct ethosu_device *dev);
-
-/**
- * Dump a 1KB section of SHRAM.
- * \param[in] section          Section offset to 1KB section in SHRAM.
- * \param[out] shram_p         Pointer to a output area, allocated by the
- *                             caller, where the SHRAM content shall be
- *                             written.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_get_shram_data(struct ethosu_device *dev, int section, uint32_t *shram_p);
-
-/**
- * Set clock and power q request enable bits.
- * \param[in] clock_q          Clock q ENABLE/DISABLE \ref clock_q_request.
- * \param[in] power_q          Power q ENABLE/DISABLE \ref power_q_request.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_set_clock_and_power(struct ethosu_device *dev,
-                                                   enum ethosu_clock_q_request clock_q,
-                                                   enum ethosu_power_q_request power_q);
-
-/**
- * Read register.
- * \param[in] address          Address to read.
- * \return                     Register value.
- */
-uint32_t ethosu_read_reg(struct ethosu_device *dev, uint32_t address);
-
-/**
- * Write register.
- * \param[in] address          Address to read.
- * \param[in] value            Value to be written.
- */
-void ethosu_write_reg(struct ethosu_device *dev, uint32_t address, uint32_t value);
-
-/**
- * Write register with shadow variable.
- * \param[in] address          Address to read.
- * \param[in] value            Value to be written.
- */
-void ethosu_write_reg_shadow(struct ethosu_device *dev, uint32_t address, uint32_t value, uint32_t *shadow);
-
-/**
- * Save the PMU configuration to ethosu_device struct.
- * \param[in] dev              Ethos-U device where the PMU configuration is
- *                             saved.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_save_pmu_config(struct ethosu_device *dev);
-
-/**
- * Restore the PMU configuration from a ethosu_device struct.
- * \param[in] dev              Ethos-U device where the PMU configuration is
- *                             stored.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_restore_pmu_config(struct ethosu_device *dev);
-
-/**
- * Save PMU counters to shadow variables in memory.
- * \param[in] dev              Ethos-U device where the PMU configuration is
- *                             stored.
- * \return                     \ref ethosu_error_codes
- */
-enum ethosu_error_codes ethosu_save_pmu_counters(struct ethosu_device *dev);
-
-/**
- * Check if the STATUS register has any error bits set or not.
- * \param[in] dev              Ethos-U device to check.
- * \return                     true if any error bits set,
- *                             false otherwise.
- */
-bool ethosu_status_has_error(struct ethosu_device *dev);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // ETHOSU_DEVICE_H
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_driver.h b/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_driver.h
index ff040fc..9c9f173 100644
--- a/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_driver.h
+++ b/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_driver.h
@@ -1,18 +1,19 @@
 /*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2019-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
  * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an "AS
- * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
- * express or implied. See the License for the specific language
- * governing permissions and limitations under the License.
+ * www.apache.org/licenses/LICENSE-2.0
  *
- * SPDX-License-Identifier: Apache-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 #ifndef ETHOSU_DRIVER_H
@@ -22,7 +23,7 @@
  * Includes
  ******************************************************************************/
 
-#include "ethosu_device.h"
+#include "ethosu_types.h"
 
 #include <stdbool.h>
 #include <stddef.h>
@@ -32,55 +33,57 @@
 extern "C" {
 #endif
 
+/******************************************************************************
+ * Defines
+ ******************************************************************************/
+
+#define ETHOSU_DRIVER_VERSION_MAJOR 0  ///< Driver major version
+#define ETHOSU_DRIVER_VERSION_MINOR 16 ///< Driver minor version
+#define ETHOSU_DRIVER_VERSION_PATCH 0  ///< Driver patch version
+
 /******************************************************************************
  * Types
  ******************************************************************************/
 
-struct ethosu_driver
+// Forward declare
+struct ethosu_device;
+
+enum ethosu_job_state
 {
-    struct ethosu_device dev;
-    bool abort_inference;
-    uint64_t fast_memory;
-    size_t fast_memory_size;
-    bool status_error;
-    bool dev_power_always_on;
-    struct ethosu_driver *next;
-    bool reserved;
-    volatile bool irq_triggered;
-    void *semaphore;
-    uint8_t clock_request;
-    uint8_t power_request;
+    ETHOSU_JOB_IDLE = 0,
+    ETHOSU_JOB_RUNNING,
+    ETHOSU_JOB_DONE
 };
 
-struct ethosu_version_id
+struct ethosu_job
 {
-    // Ethos-U id
-    uint8_t version_status;
-    uint8_t version_minor;
-    uint8_t version_major;
-    uint8_t product_major;
-    uint8_t arch_patch_rev;
-    uint8_t arch_minor_rev;
-    uint8_t arch_major_rev;
-
-    // Driver Version
-    uint8_t driver_patch_rev;
-    uint8_t driver_minor_rev;
-    uint8_t driver_major_rev;
+    volatile enum ethosu_job_state state;
+    const void *custom_data_ptr;
+    int custom_data_size;
+    const uint64_t *base_addr;
+    const size_t *base_addr_size;
+    int num_base_addr;
+    void *user_arg;
 };
 
-struct ethosu_version_config
+struct ethosu_driver
 {
-    uint8_t macs_per_cc;
-    uint8_t cmd_stream_version;
-    uint8_t shram_size;
-    uint8_t custom_dma;
+    struct ethosu_device *dev;
+    struct ethosu_driver *next;
+    struct ethosu_job job;
+    void *semaphore;
+    uint64_t fast_memory;
+    size_t fast_memory_size;
+    uint32_t power_request_counter;
+    bool status_error;
+    bool reserved;
 };
 
-struct ethosu_version
+struct ethosu_driver_version
 {
-    struct ethosu_version_id id;
-    struct ethosu_version_config cfg;
+    uint8_t major;
+    uint8_t minor;
+    uint8_t patch;
 };
 
 enum ethosu_request_clients
@@ -90,10 +93,111 @@ enum ethosu_request_clients
 };
 
 /******************************************************************************
- * Variables
+ * Prototypes (weak functions in driver)
  ******************************************************************************/
 
-extern struct ethosu_driver ethosu_drv;
+/**
+ * Interrupt handler to be called on IRQ from Ethos-U
+ *
+ * @param drv       Pointer to driver handle
+ */
+void ethosu_irq_handler(struct ethosu_driver *drv);
+
+/**
+ * Flush/clean the data cache by address and size. Passing NULL as p argument
+ * expects the whole cache to be flushed.
+ *
+ * Addresses passed to this function must be 16 byte aligned.
+ *
+ * @param p         16 byte aligned address
+ * @param bytes     Size of memory block in bytes
+ */
+void ethosu_flush_dcache(uint32_t *p, size_t bytes);
+
+/**
+ * Invalidate the data cache by address and size. Passing NULL as p argument
+ * expects the whole cache to be invalidated.
+ *
+ * Addresses passed to this function must be 16 byte aligned.
+ *
+ * @param p         16 byte aligned address
+ * @param bytes     Size in bytes
+ */
+void ethosu_invalidate_dcache(uint32_t *p, size_t bytes);
+
+/**
+ * Minimal mutex implementation for baremetal applications. See
+ * ethosu_driver.c.
+ *
+ * @return Pointer to mutex handle
+ */
+void *ethosu_mutex_create(void);
+
+/**
+ * Minimal sempahore implementation for baremetal applications. See
+ * ethosu_driver.c.
+ *
+ * @return Pointer to semaphore handle
+ */
+void *ethosu_semaphore_create(void);
+
+/**
+ * Lock mutex.
+ *
+ * @param mutex     Pointer to mutex handle
+ * @returns 0 on success, else negative error code
+ */
+int ethosu_mutex_lock(void *mutex);
+
+/**
+ * Unlock mutex.
+ *
+ * @param mutex     Pointer to mutex handle
+ * @returns 0 on success, else negative error code
+ */
+int ethosu_mutex_unlock(void *mutex);
+
+/**
+ * Take semaphore.
+ *
+ * @param sem       Pointer to semaphore handle
+ * @returns 0 on success, else negative error code
+ */
+int ethosu_semaphore_take(void *sem);
+
+/**
+ * Give semaphore.
+ *
+ * @param sem       Pointer to semaphore handle
+ * @returns 0 on success, else negative error code
+ */
+int ethosu_semaphore_give(void *sem);
+
+/**
+ * Callback invoked just before the inference is started.
+ *
+ * @param drv       Pointer to driver handle
+ * @param user_arg  User argument provided to ethosu_invoke_*()
+ */
+void ethosu_inference_begin(struct ethosu_driver *drv, void *user_arg);
+
+/**
+ * Callback invoked just after the inference has completed.
+ *
+ * @param drv       Pointer to driver handle
+ * @param user_arg  User argument provided to ethosu_invoke_*()
+ */
+void ethosu_inference_end(struct ethosu_driver *drv, void *user_arg);
+
+/**
+ * Remapping command stream and base pointer addresses.
+ *
+ * @param address   Address to be remapped.
+ * @param index     -1 command stream, 0-n base address index
+ *
+ * @return Remapped address
+ */
+uint64_t ethosu_address_remap(uint64_t address, int index);
 
 /******************************************************************************
  * Prototypes
@@ -101,83 +205,151 @@ extern struct ethosu_driver ethosu_drv;
 
 /**
  * Initialize the Ethos-U driver.
+ *
+ * @param drv               Pointer to driver handle
+ * @param base_address      NPU register base address
+ * @param fast_memory       Fast memory area, used for Ethos-U65 with spilling
+ * @param fast_memory_size  Size in bytes of fast memory area
+ * @param secure_enable     Configure NPU in secure- or non-secure mode
+ * @param privilege_enable  Configure NPU in privileged- or non-privileged mode
+ * @return 0 on success, else negative error code
  */
 int ethosu_init(struct ethosu_driver *drv,
-                const void *base_address,
+                void *const base_address,
                 const void *fast_memory,
                 const size_t fast_memory_size,
                 uint32_t secure_enable,
                 uint32_t privilege_enable);
 
 /**
- * Get Ethos-U version.
+ * Deinitialize the Ethos-U driver.
+ *
+ * @param drv       Pointer to driver handle
  */
-int ethosu_get_version(struct ethosu_driver *drv, struct ethosu_version *version);
+void ethosu_deinit(struct ethosu_driver *drv);
 
 /**
- * Invoke Vela command stream.
+ * Soft resets the Ethos-U device.
+ *
+ * @param drv       Pointer to driver handle
+ * @return 0 on success, else negative error code
  */
-int ethosu_invoke(struct ethosu_driver *drv,
-                  const void *custom_data_ptr,
-                  const int custom_data_size,
-                  const uint64_t *base_addr,
-                  const size_t *base_addr_size,
-                  const int num_base_addr);
+int ethosu_soft_reset(struct ethosu_driver *drv);
 
 /**
- * Abort Ethos-U inference.
+ * Request to disable Q-channel power gating of the Ethos-U device.
+ * Power requests are ref.counted. Increases count.
+ * (Note: clock gating is made to follow power gating)
+ *
+ * @param drv       Pointer to driver handle
+ * @return 0 on success, else negative error code
  */
-void ethosu_abort(struct ethosu_driver *drv);
+int ethosu_request_power(struct ethosu_driver *drv);
 
 /**
- * Interrupt handler do be called on IRQ from Ethos-U
+ * Release disable request for Q-channel power gating of the Ethos-U device.
+ * Power requests are ref.counted. Decreases count.
+ *
+ * @param drv       Pointer to driver handle
  */
-void ethosu_irq_handler(struct ethosu_driver *drv);
+void ethosu_release_power(struct ethosu_driver *drv);
 
 /**
- * Set Ethos-U power mode.
+ * Get Ethos-U driver version.
+ *
+ * @param ver       Driver version struct
  */
-void ethosu_set_power_mode(struct ethosu_driver *drv, bool always_on);
+void ethosu_get_driver_version(struct ethosu_driver_version *ver);
 
 /**
- *  Register a driver for multiNPU usage
+ * Get Ethos-U hardware information.
+ *
+ * @param drv       Pointer to driver handle
+ * @param hw        Hardware information struct
  */
-int ethosu_register_driver(struct ethosu_driver *drv);
+void ethosu_get_hw_info(struct ethosu_driver *drv, struct ethosu_hw_info *hw);
 
 /**
- * Deregister a driver from multiNPU usage
+ * Invoke command stream.
+ *
+ * @param drv               Pointer to driver handle
+ * @param custom_data_ptr   Custom data payload
+ * @param custom_data_size  Size in bytes of custom data
+ * @param base_addr         Array of base address pointers
+ * @param base_addr_size    Size in bytes of each address in base_addr
+ * @param num_base_addr     Number of elements in base_addr array
+ * @param user_arg          User argument, will be passed to
+ *                          ethosu_inference_begin() and ethosu_inference_end()
+ * @return 0 on success, else negative error code
  */
-int ethosu_deregister_driver(struct ethosu_driver *drv);
+int ethosu_invoke_v3(struct ethosu_driver *drv,
+                     const void *custom_data_ptr,
+                     const int custom_data_size,
+                     uint64_t *const base_addr,
+                     const size_t *base_addr_size,
+                     const int num_base_addr,
+                     void *user_arg);
+
+#define ethosu_invoke(drv, custom_data_ptr, custom_data_size, base_addr, base_addr_size, num_base_addr)                \
+    ethosu_invoke_v3(drv, custom_data_ptr, custom_data_size, base_addr, base_addr_size, num_base_addr, 0)
 
 /**
- * Reserves a driver to execute inference with
+ * Invoke command stream using async interface.
+ * Must be followed by call(s) to ethosu_wait() upon successful return.
+ *
+ * @see ethosu_invoke_v3 for documentation.
  */
-struct ethosu_driver *ethosu_reserve_driver(void);
+int ethosu_invoke_async(struct ethosu_driver *drv,
+                        const void *custom_data_ptr,
+                        const int custom_data_size,
+                        uint64_t *const base_addr,
+                        const size_t *base_addr_size,
+                        const int num_base_addr,
+                        void *user_arg);
 
 /**
- * Change driver status to available
+ * Wait for inference to complete (block=true)
+ * Poll status or finish up if inference is complete (block=false)
+ * (This function is only intended to be used in conjuction with ethosu_invoke_async)
+ *
+ * @param drv       Pointer to driver handle
+ * @param block     If call should block if inference is running
+ * @return -2 on inference not invoked, -1 on inference error, 0 on success, 1 on inference running
  */
-void ethosu_release_driver(struct ethosu_driver *drv);
+int ethosu_wait(struct ethosu_driver *drv, bool block);
 
 /**
- * Set clock and power request bits
+ * Reserves a driver to execute inference with. Call will block until a driver
+ * is available.
+ *
+ * @return Pointer to driver handle.
  */
-enum ethosu_error_codes set_clock_and_power_request(struct ethosu_driver *drv,
-                                                    enum ethosu_request_clients client,
-                                                    enum ethosu_clock_q_request clock_request,
-                                                    enum ethosu_power_q_request power_request);
+struct ethosu_driver *ethosu_reserve_driver(void);
 
 /**
- * Static inline for backwards-compatibility
+ * Release driver that was previously reserved with @see ethosu_reserve_driver.
+ *
+ * @param drv       Pointer to driver handle
+ */
+void ethosu_release_driver(struct ethosu_driver *drv);
+
+/**
+ * Static inline for backwards-compatibility.
+ *
+ * @see ethosu_invoke_v3 for documentation.
  */
 static inline int ethosu_invoke_v2(const void *custom_data_ptr,
                                    const int custom_data_size,
-                                   const uint64_t *base_addr,
+                                   uint64_t *const base_addr,
                                    const size_t *base_addr_size,
                                    const int num_base_addr)
 {
     struct ethosu_driver *drv = ethosu_reserve_driver();
-    int result = ethosu_invoke(drv, custom_data_ptr, custom_data_size, base_addr, base_addr_size, num_base_addr);
+    if (!drv)
+    {
+        return -1;
+    }
+    int result = ethosu_invoke_v3(drv, custom_data_ptr, custom_data_size, base_addr, base_addr_size, num_base_addr, 0);
     ethosu_release_driver(drv);
     return result;
 }
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_types.h b/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_types.h
new file mode 100644
index 0000000..a8062dd
--- /dev/null
+++ b/edge-impulse-sdk/porting/ethos-core-driver/include/ethosu_types.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ETHOSU_TYPES_H
+#define ETHOSU_TYPES_H
+
+/******************************************************************************
+ * Includes
+ ******************************************************************************/
+
+#include <stdint.h>
+
+/******************************************************************************
+ * Types
+ ******************************************************************************/
+
+enum ethosu_error_codes
+{
+    ETHOSU_SUCCESS         = 0,  ///< Success
+    ETHOSU_GENERIC_FAILURE = -1, ///< Generic failure
+    ETHOSU_INVALID_PARAM   = -2  ///< Invalid parameter
+};
+
+enum ethosu_clock_q_request
+{
+    ETHOSU_CLOCK_Q_DISABLE   = 0, ///< Disable NPU signal ready for clock off.
+    ETHOSU_CLOCK_Q_ENABLE    = 1, ///< Enable NPU signal ready for clock off when stop+idle state reached.
+    ETHOSU_CLOCK_Q_UNCHANGED = 2  ///< Keep current clock q setting
+};
+
+enum ethosu_power_q_request
+{
+    ETHOSU_POWER_Q_DISABLE   = 0, ///< Disable NPU signal ready for power off.
+    ETHOSU_POWER_Q_ENABLE    = 1, ///< Enable NPU signal ready for power off when stop+idle state reached.
+    ETHOSU_POWER_Q_UNCHANGED = 2  ///< Keep current power q setting
+};
+
+struct ethosu_id
+{
+    uint32_t version_status; ///< Version status
+    uint32_t version_minor;  ///< Version minor
+    uint32_t version_major;  ///< Version major
+    uint32_t product_major;  ///< Product major
+    uint32_t arch_patch_rev; ///< Architecture version patch
+    uint32_t arch_minor_rev; ///< Architecture version minor
+    uint32_t arch_major_rev; ///< Architecture version major
+};
+
+struct ethosu_config
+{
+    uint32_t macs_per_cc;        ///< MACs per clock cycle
+    uint32_t cmd_stream_version; ///< NPU command stream version
+    uint32_t custom_dma;         ///< Custom DMA enabled
+};
+
+struct ethosu_hw_info
+{
+    struct ethosu_id version;
+    struct ethosu_config cfg;
+};
+#endif // ETHOSU_TYPES_H
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/include/pmu_ethosu.h b/edge-impulse-sdk/porting/ethos-core-driver/include/pmu_ethosu.h
index acd2a94..b717130 100644
--- a/edge-impulse-sdk/porting/ethos-core-driver/include/pmu_ethosu.h
+++ b/edge-impulse-sdk/porting/ethos-core-driver/include/pmu_ethosu.h
@@ -1,18 +1,19 @@
 /*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2019-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
  * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an "AS
- * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
- * express or implied. See the License for the specific language
- * governing permissions and limitations under the License.
+ * www.apache.org/licenses/LICENSE-2.0
  *
- * SPDX-License-Identifier: Apache-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 #ifndef PMU_ETHOSU_H
@@ -104,6 +105,15 @@ enum ethosu_pmu_event_type
     ETHOSU_PMU_AXI0_ENABLED_CYCLES,
     ETHOSU_PMU_AXI0_RD_STALL_LIMIT,
     ETHOSU_PMU_AXI0_WR_STALL_LIMIT,
+    ETHOSU_PMU_AXI_LATENCY_ANY,
+    ETHOSU_PMU_AXI_LATENCY_32,
+    ETHOSU_PMU_AXI_LATENCY_64,
+    ETHOSU_PMU_AXI_LATENCY_128,
+    ETHOSU_PMU_AXI_LATENCY_256,
+    ETHOSU_PMU_AXI_LATENCY_512,
+    ETHOSU_PMU_AXI_LATENCY_1024,
+    ETHOSU_PMU_ECC_DMA,
+    ETHOSU_PMU_ECC_SB0,
     ETHOSU_PMU_AXI1_RD_TRANS_ACCEPTED,
     ETHOSU_PMU_AXI1_RD_TRANS_COMPLETED,
     ETHOSU_PMU_AXI1_RD_DATA_BEAT_RECEIVED,
@@ -117,15 +127,6 @@ enum ethosu_pmu_event_type
     ETHOSU_PMU_AXI1_ENABLED_CYCLES,
     ETHOSU_PMU_AXI1_RD_STALL_LIMIT,
     ETHOSU_PMU_AXI1_WR_STALL_LIMIT,
-    ETHOSU_PMU_AXI_LATENCY_ANY,
-    ETHOSU_PMU_AXI_LATENCY_32,
-    ETHOSU_PMU_AXI_LATENCY_64,
-    ETHOSU_PMU_AXI_LATENCY_128,
-    ETHOSU_PMU_AXI_LATENCY_256,
-    ETHOSU_PMU_AXI_LATENCY_512,
-    ETHOSU_PMU_AXI_LATENCY_1024,
-    ETHOSU_PMU_ECC_DMA,
-    ETHOSU_PMU_ECC_SB0,
     ETHOSU_PMU_ECC_SB1,
 
     ETHOSU_PMU_SENTINEL // End-marker (not event)
@@ -152,6 +153,12 @@ void ETHOSU_PMU_Disable(struct ethosu_driver *drv);
  */
 void ETHOSU_PMU_Set_EVTYPER(struct ethosu_driver *drv, uint32_t num, enum ethosu_pmu_event_type type);
 
+/**
+ * \brief   Get number of PMU event counters
+ * \return                Number of event counters
+ */
+uint32_t ETHOSU_PMU_Get_NumEventCounters(void);
+
 /**
  * \brief   Get event to count for PMU eventer counter
  * \param [in]    num     Event counter (0-ETHOSU_PMU_NCOUNTERS) to configure
@@ -302,6 +309,16 @@ void ETHOSU_PMU_PMCCNTR_CFG_Set_Start_Event(struct ethosu_driver *drv, enum etho
  */
 void ETHOSU_PMU_PMCCNTR_CFG_Set_Stop_Event(struct ethosu_driver *drv, enum ethosu_pmu_event_type stop_event);
 
+/**
+ * \brief   Read qread register
+ */
+uint32_t ETHOSU_PMU_Get_QREAD(struct ethosu_driver *drv);
+
+/**
+ * \brief   Read status register
+ */
+uint32_t ETHOSU_PMU_Get_STATUS(struct ethosu_driver *drv);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/src/ehtosu_config_u65.h b/edge-impulse-sdk/porting/ethos-core-driver/src/ehtosu_config_u65.h
new file mode 100644
index 0000000..b115f43
--- /dev/null
+++ b/edge-impulse-sdk/porting/ethos-core-driver/src/ehtosu_config_u65.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2019-2020,2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ETHOSU_CONFIG_H
+#define ETHOSU_CONFIG_H
+
+/* Set default values if not manually overriden */
+
+#ifndef NPU_QCONFIG
+#define NPU_QCONFIG 2
+#endif
+
+#ifndef NPU_REGIONCFG_0
+#define NPU_REGIONCFG_0 3
+#endif
+
+#ifndef NPU_REGIONCFG_1
+#define NPU_REGIONCFG_1 2
+#endif
+
+#ifndef NPU_REGIONCFG_2
+#define NPU_REGIONCFG_2 1
+#endif
+
+#ifndef NPU_REGIONCFG_3
+#define NPU_REGIONCFG_3 1
+#endif
+
+#ifndef NPU_REGIONCFG_4
+#define NPU_REGIONCFG_4 1
+#endif
+
+#ifndef NPU_REGIONCFG_5
+#define NPU_REGIONCFG_5 1
+#endif
+
+#ifndef NPU_REGIONCFG_6
+#define NPU_REGIONCFG_6 1
+#endif
+
+#ifndef NPU_REGIONCFG_7
+#define NPU_REGIONCFG_7 1
+#endif
+
+#ifndef AXI_LIMIT0_MAX_BEATS_BYTES
+#define AXI_LIMIT0_MAX_BEATS_BYTES 0x0
+#endif
+
+#ifndef AXI_LIMIT0_MEM_TYPE
+#define AXI_LIMIT0_MEM_TYPE 0x0
+#endif
+
+#ifndef AXI_LIMIT0_MAX_OUTSTANDING_READS
+#define AXI_LIMIT0_MAX_OUTSTANDING_READS 64
+#endif
+
+#ifndef AXI_LIMIT0_MAX_OUTSTANDING_WRITES
+#define AXI_LIMIT0_MAX_OUTSTANDING_WRITES 32
+#endif
+
+#ifndef AXI_LIMIT1_MAX_BEATS_BYTES
+#define AXI_LIMIT1_MAX_BEATS_BYTES 0x0
+#endif
+
+#ifndef AXI_LIMIT1_MEM_TYPE
+#define AXI_LIMIT1_MEM_TYPE 0x0
+#endif
+
+#ifndef AXI_LIMIT1_MAX_OUTSTANDING_READS
+#define AXI_LIMIT1_MAX_OUTSTANDING_READS 64
+#endif
+
+#ifndef AXI_LIMIT1_MAX_OUTSTANDING_WRITES
+#define AXI_LIMIT1_MAX_OUTSTANDING_WRITES 32
+#endif
+
+#ifndef AXI_LIMIT2_MAX_BEATS_BYTES
+#define AXI_LIMIT2_MAX_BEATS_BYTES 0x0
+#endif
+
+#ifndef AXI_LIMIT2_MEM_TYPE
+#define AXI_LIMIT2_MEM_TYPE 0x0
+#endif
+
+#ifndef AXI_LIMIT2_MAX_OUTSTANDING_READS
+#define AXI_LIMIT2_MAX_OUTSTANDING_READS 64
+#endif
+
+#ifndef AXI_LIMIT2_MAX_OUTSTANDING_WRITES
+#define AXI_LIMIT2_MAX_OUTSTANDING_WRITES 32
+#endif
+
+#ifndef AXI_LIMIT3_MAX_BEATS_BYTES
+#define AXI_LIMIT3_MAX_BEATS_BYTES 0x0
+#endif
+
+#ifndef AXI_LIMIT3_MEM_TYPE
+#define AXI_LIMIT3_MEM_TYPE 0x0
+#endif
+
+#ifndef AXI_LIMIT3_MAX_OUTSTANDING_READS
+#define AXI_LIMIT3_MAX_OUTSTANDING_READS 64
+#endif
+
+#ifndef AXI_LIMIT3_MAX_OUTSTANDING_WRITES
+#define AXI_LIMIT3_MAX_OUTSTANDING_WRITES 32
+#endif
+
+#endif /* #ifndef ETHOSU_CONFIG_H */
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu55_interface.h b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu55_interface.h
index 0d1ee6c..9c0d230 100644
--- a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu55_interface.h
+++ b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu55_interface.h
@@ -1,5 +1,6 @@
+
 /*
- * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2020-2021 Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -35,15 +36,32 @@
 #define STRUCT struct
 #else
 #define STRUCT
-#include <stdexcept>
 #endif
 
+#if defined(__cplusplus) && defined(NPU_DISASSEMBLE)
+#include <iomanip>
+#include <sstream>
+#include <vector>
+#endif
+
+#if defined(__cplusplus) && !defined(NPU_NAMESPACE)
+#define NPU_NAMESPACE npu
+#endif
+
+#ifdef __cplusplus
+#include <cstring>
+#include <limits>
+#endif
+
+#ifdef __cplusplus
+namespace NPU_NAMESPACE
+{
+#endif
 #define NNX_ARCH_VERSION_MAJOR 1
-#define NNX_ARCH_VERSION_MINOR 0
-#define NNX_ARCH_VERSION_PATCH 6
+#define NNX_ARCH_VERSION_MINOR 1
+#define NNX_ARCH_VERSION_PATCH 0
 
 // Register offsets
-
 //
 // Register subpage BASE
 //
@@ -51,8 +69,8 @@
 #define NPU_REG_STATUS 0x0004
 #define NPU_REG_CMD 0x0008
 #define NPU_REG_RESET 0x000C
-#define NPU_REG_QBASE0 0x0010
-#define NPU_REG_QBASE1 0x0014
+#define NPU_REG_QBASE 0x0010
+#define NPU_REG_QBASE_HI 0x0014
 #define NPU_REG_QREAD 0x0018
 #define NPU_REG_QCONFIG 0x001C
 #define NPU_REG_QSIZE 0x0020
@@ -64,28 +82,14 @@
 #define NPU_REG_AXI_LIMIT1 0x0044
 #define NPU_REG_AXI_LIMIT2 0x0048
 #define NPU_REG_AXI_LIMIT3 0x004C
-#define BASE_REGISTERS_SIZE 0x0050
+#define BASE_REGISTERS_SIZE 0x0080
 
 //
 // Register subpage BASE_POINTERS
 //
-#define NPU_REG_BASEP0 0x0080
-#define NPU_REG_BASEP1 0x0084
-#define NPU_REG_BASEP2 0x0088
-#define NPU_REG_BASEP3 0x008C
-#define NPU_REG_BASEP4 0x0090
-#define NPU_REG_BASEP5 0x0094
-#define NPU_REG_BASEP6 0x0098
-#define NPU_REG_BASEP7 0x009C
-#define NPU_REG_BASEP8 0x00A0
-#define NPU_REG_BASEP9 0x00A4
-#define NPU_REG_BASEP10 0x00A8
-#define NPU_REG_BASEP11 0x00AC
-#define NPU_REG_BASEP12 0x00B0
-#define NPU_REG_BASEP13 0x00B4
-#define NPU_REG_BASEP14 0x00B8
-#define NPU_REG_BASEP15 0x00BC
-#define BASE_POINTERS_REGISTERS_SIZE 0x00C0
+#define NPU_REG_BASEP_BASE 0x0080
+#define NPU_REG_BASEP_ARRLEN 0x0008
+#define BASE_POINTERS_REGISTERS_SIZE 0x0100
 
 //
 // Register subpage DEBUG
@@ -98,27 +102,8 @@
 #define NPU_REG_CLKFORCE 0x0140
 #define NPU_REG_DEBUG_ADDRESS 0x0144
 #define NPU_REG_DEBUG_MISC 0x0148
-#define NPU_REG_DEBUGCORE 0x014C
 #define NPU_REG_DEBUG_BLOCK 0x0150
-#define DEBUG_REGISTERS_SIZE 0x0154
-
-//
-// Register subpage ID
-//
-#define NPU_REG_REVISION 0x0FC0
-#define NPU_REG_PID4 0x0FD0
-#define NPU_REG_PID5 0x0FD4
-#define NPU_REG_PID6 0x0FD8
-#define NPU_REG_PID7 0x0FDC
-#define NPU_REG_PID0 0x0FE0
-#define NPU_REG_PID1 0x0FE4
-#define NPU_REG_PID2 0x0FE8
-#define NPU_REG_PID3 0x0FEC
-#define NPU_REG_CID0 0x0FF0
-#define NPU_REG_CID1 0x0FF4
-#define NPU_REG_CID2 0x0FF8
-#define NPU_REG_CID3 0x0FFC
-#define ID_REGISTERS_SIZE 0x1000
+#define DEBUG_REGISTERS_SIZE 0x0180
 
 //
 // Register subpage PMU
@@ -130,283 +115,72 @@
 #define NPU_REG_PMOVSCLR 0x0190
 #define NPU_REG_PMINTSET 0x0194
 #define NPU_REG_PMINTCLR 0x0198
-#define NPU_REG_PMCCNTR_LO 0x01A0
+#define NPU_REG_PMCCNTR 0x01A0
 #define NPU_REG_PMCCNTR_HI 0x01A4
 #define NPU_REG_PMCCNTR_CFG 0x01A8
 #define NPU_REG_PMCAXI_CHAN 0x01AC
-#define NPU_REG_PMEVCNTR0 0x0300
-#define NPU_REG_PMEVCNTR1 0x0304
-#define NPU_REG_PMEVCNTR2 0x0308
-#define NPU_REG_PMEVCNTR3 0x030C
-#define NPU_REG_PMEVTYPER0 0x0380
-#define NPU_REG_PMEVTYPER1 0x0384
-#define NPU_REG_PMEVTYPER2 0x0388
-#define NPU_REG_PMEVTYPER3 0x038C
-#define PMU_REGISTERS_SIZE 0x0390
+#define PMU_REGISTERS_SIZE 0x0200
+
+//
+// Register subpage TSU_DEBUG
+//
+#define NPU_REG_KERNEL_X 0x0200
+#define NPU_REG_KERNEL_Y 0x0204
+#define NPU_REG_KERNEL_W_M1 0x0208
+#define NPU_REG_KERNEL_H_M1 0x020C
+#define NPU_REG_OFM_CBLK_WIDTH_M1 0x0210
+#define NPU_REG_OFM_CBLK_HEIGHT_M1 0x0214
+#define NPU_REG_OFM_CBLK_DEPTH_M1 0x0218
+#define NPU_REG_IFM_CBLK_DEPTH_M1 0x021C
+#define NPU_REG_OFM_X 0x0220
+#define NPU_REG_OFM_Y 0x0224
+#define NPU_REG_OFM_Z 0x0228
+#define NPU_REG_IFM_Z 0x022C
+#define NPU_REG_PAD_TOP 0x0230
+#define NPU_REG_PAD_LEFT 0x0234
+#define NPU_REG_IFM_CBLK_WIDTH 0x0238
+#define NPU_REG_IFM_CBLK_HEIGHT 0x023C
+#define NPU_REG_DMA_IFM_SRC 0x0240
+#define NPU_REG_DMA_IFM_SRC_HI 0x0244
+#define NPU_REG_DMA_IFM_DST 0x0248
+#define NPU_REG_DMA_OFM_SRC 0x024C
+#define NPU_REG_DMA_OFM_DST 0x0250
+#define NPU_REG_DMA_OFM_DST_HI 0x0254
+#define NPU_REG_DMA_WEIGHT_SRC 0x0258
+#define NPU_REG_DMA_WEIGHT_SRC_HI 0x025C
+#define NPU_REG_DMA_CMD_SRC 0x0260
+#define NPU_REG_DMA_CMD_SRC_HI 0x0264
+#define NPU_REG_DMA_CMD_SIZE 0x0268
+#define NPU_REG_DMA_M2M_SRC 0x026C
+#define NPU_REG_DMA_M2M_SRC_HI 0x0270
+#define NPU_REG_DMA_M2M_DST 0x0274
+#define NPU_REG_DMA_M2M_DST_HI 0x0278
+#define NPU_REG_CURRENT_QREAD 0x027C
+#define NPU_REG_DMA_SCALE_SRC 0x0280
+#define NPU_REG_DMA_SCALE_SRC_HI 0x0284
+#define NPU_REG_CURRENT_BLOCK 0x02B4
+#define NPU_REG_CURRENT_OP 0x02B8
+#define NPU_REG_CURRENT_CMD 0x02BC
+#define TSU_DEBUG_REGISTERS_SIZE 0x02C0
+
+//
+// Register subpage PMU_COUNTERS
+//
+#define NPU_REG_PMEVCNTR_BASE 0x0300
+#define NPU_REG_PMEVCNTR_ARRLEN 0x0004
+#define NPU_REG_PMEVTYPER_BASE 0x0380
+#define NPU_REG_PMEVTYPER_ARRLEN 0x0004
+#define PMU_COUNTERS_REGISTERS_SIZE 0x0400
 
 //
 // Register subpage SHARED_BUFFER
 //
-#define NPU_REG_SHARED_BUFFER0 0x0400
-#define NPU_REG_SHARED_BUFFER1 0x0404
-#define NPU_REG_SHARED_BUFFER2 0x0408
-#define NPU_REG_SHARED_BUFFER3 0x040C
-#define NPU_REG_SHARED_BUFFER4 0x0410
-#define NPU_REG_SHARED_BUFFER5 0x0414
-#define NPU_REG_SHARED_BUFFER6 0x0418
-#define NPU_REG_SHARED_BUFFER7 0x041C
-#define NPU_REG_SHARED_BUFFER8 0x0420
-#define NPU_REG_SHARED_BUFFER9 0x0424
-#define NPU_REG_SHARED_BUFFER10 0x0428
-#define NPU_REG_SHARED_BUFFER11 0x042C
-#define NPU_REG_SHARED_BUFFER12 0x0430
-#define NPU_REG_SHARED_BUFFER13 0x0434
-#define NPU_REG_SHARED_BUFFER14 0x0438
-#define NPU_REG_SHARED_BUFFER15 0x043C
-#define NPU_REG_SHARED_BUFFER16 0x0440
-#define NPU_REG_SHARED_BUFFER17 0x0444
-#define NPU_REG_SHARED_BUFFER18 0x0448
-#define NPU_REG_SHARED_BUFFER19 0x044C
-#define NPU_REG_SHARED_BUFFER20 0x0450
-#define NPU_REG_SHARED_BUFFER21 0x0454
-#define NPU_REG_SHARED_BUFFER22 0x0458
-#define NPU_REG_SHARED_BUFFER23 0x045C
-#define NPU_REG_SHARED_BUFFER24 0x0460
-#define NPU_REG_SHARED_BUFFER25 0x0464
-#define NPU_REG_SHARED_BUFFER26 0x0468
-#define NPU_REG_SHARED_BUFFER27 0x046C
-#define NPU_REG_SHARED_BUFFER28 0x0470
-#define NPU_REG_SHARED_BUFFER29 0x0474
-#define NPU_REG_SHARED_BUFFER30 0x0478
-#define NPU_REG_SHARED_BUFFER31 0x047C
-#define NPU_REG_SHARED_BUFFER32 0x0480
-#define NPU_REG_SHARED_BUFFER33 0x0484
-#define NPU_REG_SHARED_BUFFER34 0x0488
-#define NPU_REG_SHARED_BUFFER35 0x048C
-#define NPU_REG_SHARED_BUFFER36 0x0490
-#define NPU_REG_SHARED_BUFFER37 0x0494
-#define NPU_REG_SHARED_BUFFER38 0x0498
-#define NPU_REG_SHARED_BUFFER39 0x049C
-#define NPU_REG_SHARED_BUFFER40 0x04A0
-#define NPU_REG_SHARED_BUFFER41 0x04A4
-#define NPU_REG_SHARED_BUFFER42 0x04A8
-#define NPU_REG_SHARED_BUFFER43 0x04AC
-#define NPU_REG_SHARED_BUFFER44 0x04B0
-#define NPU_REG_SHARED_BUFFER45 0x04B4
-#define NPU_REG_SHARED_BUFFER46 0x04B8
-#define NPU_REG_SHARED_BUFFER47 0x04BC
-#define NPU_REG_SHARED_BUFFER48 0x04C0
-#define NPU_REG_SHARED_BUFFER49 0x04C4
-#define NPU_REG_SHARED_BUFFER50 0x04C8
-#define NPU_REG_SHARED_BUFFER51 0x04CC
-#define NPU_REG_SHARED_BUFFER52 0x04D0
-#define NPU_REG_SHARED_BUFFER53 0x04D4
-#define NPU_REG_SHARED_BUFFER54 0x04D8
-#define NPU_REG_SHARED_BUFFER55 0x04DC
-#define NPU_REG_SHARED_BUFFER56 0x04E0
-#define NPU_REG_SHARED_BUFFER57 0x04E4
-#define NPU_REG_SHARED_BUFFER58 0x04E8
-#define NPU_REG_SHARED_BUFFER59 0x04EC
-#define NPU_REG_SHARED_BUFFER60 0x04F0
-#define NPU_REG_SHARED_BUFFER61 0x04F4
-#define NPU_REG_SHARED_BUFFER62 0x04F8
-#define NPU_REG_SHARED_BUFFER63 0x04FC
-#define NPU_REG_SHARED_BUFFER64 0x0500
-#define NPU_REG_SHARED_BUFFER65 0x0504
-#define NPU_REG_SHARED_BUFFER66 0x0508
-#define NPU_REG_SHARED_BUFFER67 0x050C
-#define NPU_REG_SHARED_BUFFER68 0x0510
-#define NPU_REG_SHARED_BUFFER69 0x0514
-#define NPU_REG_SHARED_BUFFER70 0x0518
-#define NPU_REG_SHARED_BUFFER71 0x051C
-#define NPU_REG_SHARED_BUFFER72 0x0520
-#define NPU_REG_SHARED_BUFFER73 0x0524
-#define NPU_REG_SHARED_BUFFER74 0x0528
-#define NPU_REG_SHARED_BUFFER75 0x052C
-#define NPU_REG_SHARED_BUFFER76 0x0530
-#define NPU_REG_SHARED_BUFFER77 0x0534
-#define NPU_REG_SHARED_BUFFER78 0x0538
-#define NPU_REG_SHARED_BUFFER79 0x053C
-#define NPU_REG_SHARED_BUFFER80 0x0540
-#define NPU_REG_SHARED_BUFFER81 0x0544
-#define NPU_REG_SHARED_BUFFER82 0x0548
-#define NPU_REG_SHARED_BUFFER83 0x054C
-#define NPU_REG_SHARED_BUFFER84 0x0550
-#define NPU_REG_SHARED_BUFFER85 0x0554
-#define NPU_REG_SHARED_BUFFER86 0x0558
-#define NPU_REG_SHARED_BUFFER87 0x055C
-#define NPU_REG_SHARED_BUFFER88 0x0560
-#define NPU_REG_SHARED_BUFFER89 0x0564
-#define NPU_REG_SHARED_BUFFER90 0x0568
-#define NPU_REG_SHARED_BUFFER91 0x056C
-#define NPU_REG_SHARED_BUFFER92 0x0570
-#define NPU_REG_SHARED_BUFFER93 0x0574
-#define NPU_REG_SHARED_BUFFER94 0x0578
-#define NPU_REG_SHARED_BUFFER95 0x057C
-#define NPU_REG_SHARED_BUFFER96 0x0580
-#define NPU_REG_SHARED_BUFFER97 0x0584
-#define NPU_REG_SHARED_BUFFER98 0x0588
-#define NPU_REG_SHARED_BUFFER99 0x058C
-#define NPU_REG_SHARED_BUFFER100 0x0590
-#define NPU_REG_SHARED_BUFFER101 0x0594
-#define NPU_REG_SHARED_BUFFER102 0x0598
-#define NPU_REG_SHARED_BUFFER103 0x059C
-#define NPU_REG_SHARED_BUFFER104 0x05A0
-#define NPU_REG_SHARED_BUFFER105 0x05A4
-#define NPU_REG_SHARED_BUFFER106 0x05A8
-#define NPU_REG_SHARED_BUFFER107 0x05AC
-#define NPU_REG_SHARED_BUFFER108 0x05B0
-#define NPU_REG_SHARED_BUFFER109 0x05B4
-#define NPU_REG_SHARED_BUFFER110 0x05B8
-#define NPU_REG_SHARED_BUFFER111 0x05BC
-#define NPU_REG_SHARED_BUFFER112 0x05C0
-#define NPU_REG_SHARED_BUFFER113 0x05C4
-#define NPU_REG_SHARED_BUFFER114 0x05C8
-#define NPU_REG_SHARED_BUFFER115 0x05CC
-#define NPU_REG_SHARED_BUFFER116 0x05D0
-#define NPU_REG_SHARED_BUFFER117 0x05D4
-#define NPU_REG_SHARED_BUFFER118 0x05D8
-#define NPU_REG_SHARED_BUFFER119 0x05DC
-#define NPU_REG_SHARED_BUFFER120 0x05E0
-#define NPU_REG_SHARED_BUFFER121 0x05E4
-#define NPU_REG_SHARED_BUFFER122 0x05E8
-#define NPU_REG_SHARED_BUFFER123 0x05EC
-#define NPU_REG_SHARED_BUFFER124 0x05F0
-#define NPU_REG_SHARED_BUFFER125 0x05F4
-#define NPU_REG_SHARED_BUFFER126 0x05F8
-#define NPU_REG_SHARED_BUFFER127 0x05FC
-#define NPU_REG_SHARED_BUFFER128 0x0600
-#define NPU_REG_SHARED_BUFFER129 0x0604
-#define NPU_REG_SHARED_BUFFER130 0x0608
-#define NPU_REG_SHARED_BUFFER131 0x060C
-#define NPU_REG_SHARED_BUFFER132 0x0610
-#define NPU_REG_SHARED_BUFFER133 0x0614
-#define NPU_REG_SHARED_BUFFER134 0x0618
-#define NPU_REG_SHARED_BUFFER135 0x061C
-#define NPU_REG_SHARED_BUFFER136 0x0620
-#define NPU_REG_SHARED_BUFFER137 0x0624
-#define NPU_REG_SHARED_BUFFER138 0x0628
-#define NPU_REG_SHARED_BUFFER139 0x062C
-#define NPU_REG_SHARED_BUFFER140 0x0630
-#define NPU_REG_SHARED_BUFFER141 0x0634
-#define NPU_REG_SHARED_BUFFER142 0x0638
-#define NPU_REG_SHARED_BUFFER143 0x063C
-#define NPU_REG_SHARED_BUFFER144 0x0640
-#define NPU_REG_SHARED_BUFFER145 0x0644
-#define NPU_REG_SHARED_BUFFER146 0x0648
-#define NPU_REG_SHARED_BUFFER147 0x064C
-#define NPU_REG_SHARED_BUFFER148 0x0650
-#define NPU_REG_SHARED_BUFFER149 0x0654
-#define NPU_REG_SHARED_BUFFER150 0x0658
-#define NPU_REG_SHARED_BUFFER151 0x065C
-#define NPU_REG_SHARED_BUFFER152 0x0660
-#define NPU_REG_SHARED_BUFFER153 0x0664
-#define NPU_REG_SHARED_BUFFER154 0x0668
-#define NPU_REG_SHARED_BUFFER155 0x066C
-#define NPU_REG_SHARED_BUFFER156 0x0670
-#define NPU_REG_SHARED_BUFFER157 0x0674
-#define NPU_REG_SHARED_BUFFER158 0x0678
-#define NPU_REG_SHARED_BUFFER159 0x067C
-#define NPU_REG_SHARED_BUFFER160 0x0680
-#define NPU_REG_SHARED_BUFFER161 0x0684
-#define NPU_REG_SHARED_BUFFER162 0x0688
-#define NPU_REG_SHARED_BUFFER163 0x068C
-#define NPU_REG_SHARED_BUFFER164 0x0690
-#define NPU_REG_SHARED_BUFFER165 0x0694
-#define NPU_REG_SHARED_BUFFER166 0x0698
-#define NPU_REG_SHARED_BUFFER167 0x069C
-#define NPU_REG_SHARED_BUFFER168 0x06A0
-#define NPU_REG_SHARED_BUFFER169 0x06A4
-#define NPU_REG_SHARED_BUFFER170 0x06A8
-#define NPU_REG_SHARED_BUFFER171 0x06AC
-#define NPU_REG_SHARED_BUFFER172 0x06B0
-#define NPU_REG_SHARED_BUFFER173 0x06B4
-#define NPU_REG_SHARED_BUFFER174 0x06B8
-#define NPU_REG_SHARED_BUFFER175 0x06BC
-#define NPU_REG_SHARED_BUFFER176 0x06C0
-#define NPU_REG_SHARED_BUFFER177 0x06C4
-#define NPU_REG_SHARED_BUFFER178 0x06C8
-#define NPU_REG_SHARED_BUFFER179 0x06CC
-#define NPU_REG_SHARED_BUFFER180 0x06D0
-#define NPU_REG_SHARED_BUFFER181 0x06D4
-#define NPU_REG_SHARED_BUFFER182 0x06D8
-#define NPU_REG_SHARED_BUFFER183 0x06DC
-#define NPU_REG_SHARED_BUFFER184 0x06E0
-#define NPU_REG_SHARED_BUFFER185 0x06E4
-#define NPU_REG_SHARED_BUFFER186 0x06E8
-#define NPU_REG_SHARED_BUFFER187 0x06EC
-#define NPU_REG_SHARED_BUFFER188 0x06F0
-#define NPU_REG_SHARED_BUFFER189 0x06F4
-#define NPU_REG_SHARED_BUFFER190 0x06F8
-#define NPU_REG_SHARED_BUFFER191 0x06FC
-#define NPU_REG_SHARED_BUFFER192 0x0700
-#define NPU_REG_SHARED_BUFFER193 0x0704
-#define NPU_REG_SHARED_BUFFER194 0x0708
-#define NPU_REG_SHARED_BUFFER195 0x070C
-#define NPU_REG_SHARED_BUFFER196 0x0710
-#define NPU_REG_SHARED_BUFFER197 0x0714
-#define NPU_REG_SHARED_BUFFER198 0x0718
-#define NPU_REG_SHARED_BUFFER199 0x071C
-#define NPU_REG_SHARED_BUFFER200 0x0720
-#define NPU_REG_SHARED_BUFFER201 0x0724
-#define NPU_REG_SHARED_BUFFER202 0x0728
-#define NPU_REG_SHARED_BUFFER203 0x072C
-#define NPU_REG_SHARED_BUFFER204 0x0730
-#define NPU_REG_SHARED_BUFFER205 0x0734
-#define NPU_REG_SHARED_BUFFER206 0x0738
-#define NPU_REG_SHARED_BUFFER207 0x073C
-#define NPU_REG_SHARED_BUFFER208 0x0740
-#define NPU_REG_SHARED_BUFFER209 0x0744
-#define NPU_REG_SHARED_BUFFER210 0x0748
-#define NPU_REG_SHARED_BUFFER211 0x074C
-#define NPU_REG_SHARED_BUFFER212 0x0750
-#define NPU_REG_SHARED_BUFFER213 0x0754
-#define NPU_REG_SHARED_BUFFER214 0x0758
-#define NPU_REG_SHARED_BUFFER215 0x075C
-#define NPU_REG_SHARED_BUFFER216 0x0760
-#define NPU_REG_SHARED_BUFFER217 0x0764
-#define NPU_REG_SHARED_BUFFER218 0x0768
-#define NPU_REG_SHARED_BUFFER219 0x076C
-#define NPU_REG_SHARED_BUFFER220 0x0770
-#define NPU_REG_SHARED_BUFFER221 0x0774
-#define NPU_REG_SHARED_BUFFER222 0x0778
-#define NPU_REG_SHARED_BUFFER223 0x077C
-#define NPU_REG_SHARED_BUFFER224 0x0780
-#define NPU_REG_SHARED_BUFFER225 0x0784
-#define NPU_REG_SHARED_BUFFER226 0x0788
-#define NPU_REG_SHARED_BUFFER227 0x078C
-#define NPU_REG_SHARED_BUFFER228 0x0790
-#define NPU_REG_SHARED_BUFFER229 0x0794
-#define NPU_REG_SHARED_BUFFER230 0x0798
-#define NPU_REG_SHARED_BUFFER231 0x079C
-#define NPU_REG_SHARED_BUFFER232 0x07A0
-#define NPU_REG_SHARED_BUFFER233 0x07A4
-#define NPU_REG_SHARED_BUFFER234 0x07A8
-#define NPU_REG_SHARED_BUFFER235 0x07AC
-#define NPU_REG_SHARED_BUFFER236 0x07B0
-#define NPU_REG_SHARED_BUFFER237 0x07B4
-#define NPU_REG_SHARED_BUFFER238 0x07B8
-#define NPU_REG_SHARED_BUFFER239 0x07BC
-#define NPU_REG_SHARED_BUFFER240 0x07C0
-#define NPU_REG_SHARED_BUFFER241 0x07C4
-#define NPU_REG_SHARED_BUFFER242 0x07C8
-#define NPU_REG_SHARED_BUFFER243 0x07CC
-#define NPU_REG_SHARED_BUFFER244 0x07D0
-#define NPU_REG_SHARED_BUFFER245 0x07D4
-#define NPU_REG_SHARED_BUFFER246 0x07D8
-#define NPU_REG_SHARED_BUFFER247 0x07DC
-#define NPU_REG_SHARED_BUFFER248 0x07E0
-#define NPU_REG_SHARED_BUFFER249 0x07E4
-#define NPU_REG_SHARED_BUFFER250 0x07E8
-#define NPU_REG_SHARED_BUFFER251 0x07EC
-#define NPU_REG_SHARED_BUFFER252 0x07F0
-#define NPU_REG_SHARED_BUFFER253 0x07F4
-#define NPU_REG_SHARED_BUFFER254 0x07F8
-#define NPU_REG_SHARED_BUFFER255 0x07FC
+#define NPU_REG_SHARED_BUFFER_BASE 0x0400
+#define NPU_REG_SHARED_BUFFER_ARRLEN 0x0100
 #define SHARED_BUFFER_REGISTERS_SIZE 0x0800
 
 //
-// Register subpage TSU
+// Register subpage TSU_IFM
 //
 #define NPU_REG_IFM_PAD_TOP 0x0800
 #define NPU_REG_IFM_PAD_LEFT 0x0804
@@ -421,6 +195,11 @@
 #define NPU_REG_IFM_HEIGHT1_M1 0x0830
 #define NPU_REG_IFM_IB_END 0x0834
 #define NPU_REG_IFM_REGION 0x083C
+#define TSU_IFM_REGISTERS_SIZE 0x0840
+
+//
+// Register subpage TSU_OFM
+//
 #define NPU_REG_OFM_WIDTH_M1 0x0844
 #define NPU_REG_OFM_HEIGHT_M1 0x0848
 #define NPU_REG_OFM_DEPTH_M1 0x084C
@@ -433,10 +212,14 @@
 #define NPU_REG_OFM_HEIGHT0_M1 0x086C
 #define NPU_REG_OFM_HEIGHT1_M1 0x0870
 #define NPU_REG_OFM_REGION 0x087C
+#define TSU_OFM_REGISTERS_SIZE 0x0880
+
+//
+// Register subpage TSU_KERNEL
+//
 #define NPU_REG_KERNEL_WIDTH_M1 0x0880
 #define NPU_REG_KERNEL_HEIGHT_M1 0x0884
 #define NPU_REG_KERNEL_STRIDE 0x0888
-#define NPU_REG_PARALLEL_MODE 0x088C
 #define NPU_REG_ACC_FORMAT 0x0890
 #define NPU_REG_ACTIVATION 0x0894
 #define NPU_REG_ACTIVATION_MIN 0x0898
@@ -445,10 +228,20 @@
 #define NPU_REG_SCALE_REGION 0x08A4
 #define NPU_REG_AB_START 0x08B4
 #define NPU_REG_BLOCKDEP 0x08BC
+#define TSU_KERNEL_REGISTERS_SIZE 0x08C0
+
+//
+// Register subpage TSU_DMA
+//
 #define NPU_REG_DMA0_SRC_REGION 0x08C0
 #define NPU_REG_DMA0_DST_REGION 0x08C4
 #define NPU_REG_DMA0_SIZE0 0x08C8
 #define NPU_REG_DMA0_SIZE1 0x08CC
+#define TSU_DMA_REGISTERS_SIZE 0x0900
+
+//
+// Register subpage TSU_IFM2
+//
 #define NPU_REG_IFM2_BROADCAST 0x0900
 #define NPU_REG_IFM2_SCALAR 0x0904
 #define NPU_REG_IFM2_PRECISION 0x0914
@@ -458,6 +251,11 @@
 #define NPU_REG_IFM2_HEIGHT1_M1 0x0930
 #define NPU_REG_IFM2_IB_START 0x0934
 #define NPU_REG_IFM2_REGION 0x093C
+#define TSU_IFM2_REGISTERS_SIZE 0x0940
+
+//
+// Register subpage TSU_IFM_BASE
+//
 #define NPU_REG_IFM_BASE0 0x0A00
 #define NPU_REG_IFM_BASE0_HI 0x0A04
 #define NPU_REG_IFM_BASE1 0x0A08
@@ -472,6 +270,11 @@
 #define NPU_REG_IFM_STRIDE_Y_HI 0x0A2C
 #define NPU_REG_IFM_STRIDE_C 0x0A30
 #define NPU_REG_IFM_STRIDE_C_HI 0x0A34
+#define TSU_IFM_BASE_REGISTERS_SIZE 0x0A40
+
+//
+// Register subpage TSU_OFM_BASE
+//
 #define NPU_REG_OFM_BASE0 0x0A40
 #define NPU_REG_OFM_BASE0_HI 0x0A44
 #define NPU_REG_OFM_BASE1 0x0A48
@@ -486,27 +289,40 @@
 #define NPU_REG_OFM_STRIDE_Y_HI 0x0A6C
 #define NPU_REG_OFM_STRIDE_C 0x0A70
 #define NPU_REG_OFM_STRIDE_C_HI 0x0A74
+#define TSU_OFM_BASE_REGISTERS_SIZE 0x0A80
+
+//
+// Register subpage TSU_WS_BASE
+//
 #define NPU_REG_WEIGHT_BASE 0x0A80
 #define NPU_REG_WEIGHT_BASE_HI 0x0A84
 #define NPU_REG_WEIGHT_LENGTH 0x0A88
+#define NPU_REG_WEIGHT_LENGTH_HI 0x0A8C
 #define NPU_REG_SCALE_BASE 0x0A90
 #define NPU_REG_SCALE_BASE_HI 0x0A94
 #define NPU_REG_SCALE_LENGTH 0x0A98
+#define NPU_REG_SCALE_LENGTH_HI 0x0A9C
 #define NPU_REG_OFM_SCALE 0x0AA0
 #define NPU_REG_OFM_SCALE_SHIFT 0x0AA4
 #define NPU_REG_OPA_SCALE 0x0AA8
 #define NPU_REG_OPA_SCALE_SHIFT 0x0AAC
 #define NPU_REG_OPB_SCALE 0x0AB0
+#define TSU_WS_BASE_REGISTERS_SIZE 0x0AC0
+
+//
+// Register subpage TSU_DMA_BASE
+//
 #define NPU_REG_DMA0_SRC 0x0AC0
 #define NPU_REG_DMA0_SRC_HI 0x0AC4
 #define NPU_REG_DMA0_DST 0x0AC8
 #define NPU_REG_DMA0_DST_HI 0x0ACC
 #define NPU_REG_DMA0_LEN 0x0AD0
 #define NPU_REG_DMA0_LEN_HI 0x0AD4
-#define NPU_REG_DMA0_SKIP0 0x0AD8
-#define NPU_REG_DMA0_SKIP0_HI 0x0ADC
-#define NPU_REG_DMA0_SKIP1 0x0AE0
-#define NPU_REG_DMA0_SKIP1_HI 0x0AE4
+#define TSU_DMA_BASE_REGISTERS_SIZE 0x0B00
+
+//
+// Register subpage TSU_IFM2_BASE
+//
 #define NPU_REG_IFM2_BASE0 0x0B00
 #define NPU_REG_IFM2_BASE0_HI 0x0B04
 #define NPU_REG_IFM2_BASE1 0x0B08
@@ -521,97 +337,53 @@
 #define NPU_REG_IFM2_STRIDE_Y_HI 0x0B2C
 #define NPU_REG_IFM2_STRIDE_C 0x0B30
 #define NPU_REG_IFM2_STRIDE_C_HI 0x0B34
-#define NPU_REG_WEIGHT1_BASE 0x0B40
-#define NPU_REG_WEIGHT1_BASE_HI 0x0B44
-#define NPU_REG_WEIGHT1_LENGTH 0x0B48
-#define NPU_REG_SCALE1_BASE 0x0B50
-#define NPU_REG_SCALE1_BASE_HI 0x0B54
-#define NPU_REG_SCALE1_LENGTH 0x0B58
-#define TSU_REGISTERS_SIZE 0x0B5C
+#define TSU_IFM2_BASE_REGISTERS_SIZE 0x0B40
 
 //
-// Register subpage TSU_DEBUG
+// Register subpage TSU_WS1_BASE
 //
-#define NPU_REG_KERNEL_X 0x0200
-#define NPU_REG_KERNEL_Y 0x0204
-#define NPU_REG_KERNEL_W_M1 0x0208
-#define NPU_REG_KERNEL_H_M1 0x020C
-#define NPU_REG_OFM_CBLK_WIDTH_M1 0x0210
-#define NPU_REG_OFM_CBLK_HEIGHT_M1 0x0214
-#define NPU_REG_OFM_CBLK_DEPTH_M1 0x0218
-#define NPU_REG_IFM_CBLK_DEPTH_M1 0x021C
-#define NPU_REG_OFM_X 0x0220
-#define NPU_REG_OFM_Y 0x0224
-#define NPU_REG_OFM_Z 0x0228
-#define NPU_REG_IFM_Z 0x022C
-#define NPU_REG_PAD_TOP 0x0230
-#define NPU_REG_PAD_LEFT 0x0234
-#define NPU_REG_IFM_CBLK_WIDTH 0x0238
-#define NPU_REG_IFM_CBLK_HEIGHT 0x023C
-#define NPU_REG_DMA_IFM_SRC 0x0240
-#define NPU_REG_DMA_IFM_SRC_HI 0x0244
-#define NPU_REG_DMA_IFM_DST 0x0248
-#define NPU_REG_DMA_OFM_SRC 0x024C
-#define NPU_REG_DMA_OFM_DST 0x0250
-#define NPU_REG_DMA_OFM_DST_HI 0x0254
-#define NPU_REG_DMA_WEIGHT_SRC 0x0258
-#define NPU_REG_DMA_WEIGHT_SRC_HI 0x025C
-#define NPU_REG_DMA_CMD_SRC 0x0260
-#define NPU_REG_DMA_CMD_SRC_HI 0x0264
-#define NPU_REG_DMA_CMD_SIZE 0x0268
-#define NPU_REG_DMA_M2M_SRC 0x026C
-#define NPU_REG_DMA_M2M_SRC_HI 0x0270
-#define NPU_REG_DMA_M2M_DST 0x0274
-#define NPU_REG_DMA_M2M_DST_HI 0x0278
-#define NPU_REG_CURRENT_QREAD 0x027C
-#define NPU_REG_DMA_SCALE_SRC 0x0280
-#define NPU_REG_DMA_SCALE_SRC_HI 0x0284
-#define NPU_REG_CURRENT_BLOCK 0x02B4
-#define NPU_REG_CURRENT_OP 0x02B8
-#define NPU_REG_CURRENT_CMD 0x02BC
-#define TSU_DEBUG_REGISTERS_SIZE 0x02C0
-
-#ifdef __cplusplus
+#define TSU_WS1_BASE_REGISTERS_SIZE 0x0B80
 
-// Enum types
+//
+// Register subpage TSU_USER_BASE
+//
+#define NPU_REG_USER_DEFINED_BASE 0x0B80
+#define NPU_REG_USER_DEFINED_ARRLEN 0x0008
+#define TSU_USER_BASE_REGISTERS_SIZE 0x0BC0
 
-enum class acc_format : uint16_t
-{
-    INT_32BIT = 0,
-    INT_40BIT = 1,
-    FP_S5_10  = 2,
-};
+//
+// Register subpage TSU_DMA_EBASE
+//
+#define TSU_DMA_EBASE_REGISTERS_SIZE 0x0C00
 
-enum class activation : uint16_t
-{
-    NONE      = 0,
-    TANH      = 3,
-    SIGMOID   = 4,
-    LUT_START = 16,
-    LUT_END   = 23,
-};
+//
+// Register subpage ID
+//
+#define NPU_REG_REVISION 0x0FC0
+#define NPU_REG_PID4 0x0FD0
+#define NPU_REG_PID5 0x0FD4
+#define NPU_REG_PID6 0x0FD8
+#define NPU_REG_PID7 0x0FDC
+#define NPU_REG_PID0 0x0FE0
+#define NPU_REG_PID1 0x0FE4
+#define NPU_REG_PID2 0x0FE8
+#define NPU_REG_PID3 0x0FEC
+#define NPU_REG_CID0 0x0FF0
+#define NPU_REG_CID1 0x0FF4
+#define NPU_REG_CID2 0x0FF8
+#define NPU_REG_CID3 0x0FFC
+#define ID_REGISTERS_SIZE 0x1000
 
-enum class axi_mem_encoding_type : uint8_t
+#ifdef __cplusplus
+// Enum types
+enum class acc_format : uint8_t
 {
-    DEVICE_NON_BUFFERABLE                 = 0x0,
-    DEVICE_BUFFERABLE                     = 0x1,
-    NORMAL_NON_CACHEABLE_NON_BUFFERABLE   = 0x2,
-    NORMAL_NON_CACHEABLE_BUFFERABLE       = 0x3,
-    WRITE_THROUGH_NO_ALLOCATE             = 0x4,
-    WRITE_THROUGH_READ_ALLOCATE           = 0x5,
-    WRITE_THROUGH_WRITE_ALLOCATE          = 0x6,
-    WRITE_THROUGH_READ_AND_WRITE_ALLOCATE = 0x7,
-    WRITE_BACK_NO_ALLOCATE                = 0x8,
-    WRITE_BACK_READ_ALLOCATE              = 0x9,
-    WRITE_BACK_WRITE_ALLOCATE             = 0xA,
-    WRITE_BACK_READ_AND_WRITE_ALLOCATE    = 0xB,
-    RESERVED_12                           = 0xC,
-    RESERVED_13                           = 0xD,
-    RESERVED_14                           = 0xE,
-    RESERVED_15                           = 0xF,
+    I32 = 0,
+    I40 = 1,
+    F16 = 2,
 };
 
-enum class clip_range : uint8_t
+enum class activation_clip_range : uint8_t
 {
     OFM_PRECISION = 0,
     FORCE_UINT8   = 2,
@@ -619,656 +391,2100 @@ enum class clip_range : uint8_t
     FORCE_INT16   = 5,
 };
 
-enum class cmd0 : uint16_t
-{
-    NPU_OP_STOP               = 0x000,
-    NPU_OP_IRQ                = 0x001,
-    NPU_OP_CONV               = 0x002,
-    NPU_OP_DEPTHWISE          = 0x003,
-    NPU_OP_POOL               = 0x005,
-    NPU_OP_ELEMENTWISE        = 0x006,
-    NPU_OP_DMA_START          = 0x010,
-    NPU_OP_DMA_WAIT           = 0x011,
-    NPU_OP_KERNEL_WAIT        = 0x012,
-    NPU_OP_PMU_MASK           = 0x013,
-    NPU_SET_IFM_PAD_TOP       = 0x100,
-    NPU_SET_IFM_PAD_LEFT      = 0x101,
-    NPU_SET_IFM_PAD_RIGHT     = 0x102,
-    NPU_SET_IFM_PAD_BOTTOM    = 0x103,
-    NPU_SET_IFM_DEPTH_M1      = 0x104,
-    NPU_SET_IFM_PRECISION     = 0x105,
-    NPU_SET_IFM_UPSCALE       = 0x107,
-    NPU_SET_IFM_ZERO_POINT    = 0x109,
-    NPU_SET_IFM_WIDTH0_M1     = 0x10A,
-    NPU_SET_IFM_HEIGHT0_M1    = 0x10B,
-    NPU_SET_IFM_HEIGHT1_M1    = 0x10C,
-    NPU_SET_IFM_IB_END        = 0x10D,
-    NPU_SET_IFM_REGION        = 0x10F,
-    NPU_SET_OFM_WIDTH_M1      = 0x111,
-    NPU_SET_OFM_HEIGHT_M1     = 0x112,
-    NPU_SET_OFM_DEPTH_M1      = 0x113,
-    NPU_SET_OFM_PRECISION     = 0x114,
-    NPU_SET_OFM_BLK_WIDTH_M1  = 0x115,
-    NPU_SET_OFM_BLK_HEIGHT_M1 = 0x116,
-    NPU_SET_OFM_BLK_DEPTH_M1  = 0x117,
-    NPU_SET_OFM_ZERO_POINT    = 0x118,
-    NPU_SET_OFM_WIDTH0_M1     = 0x11A,
-    NPU_SET_OFM_HEIGHT0_M1    = 0x11B,
-    NPU_SET_OFM_HEIGHT1_M1    = 0x11C,
-    NPU_SET_OFM_REGION        = 0x11F,
-    NPU_SET_KERNEL_WIDTH_M1   = 0x120,
-    NPU_SET_KERNEL_HEIGHT_M1  = 0x121,
-    NPU_SET_KERNEL_STRIDE     = 0x122,
-    NPU_SET_PARALLEL_MODE     = 0x123,
-    NPU_SET_ACC_FORMAT        = 0x124,
-    NPU_SET_ACTIVATION        = 0x125,
-    NPU_SET_ACTIVATION_MIN    = 0x126,
-    NPU_SET_ACTIVATION_MAX    = 0x127,
-    NPU_SET_WEIGHT_REGION     = 0x128,
-    NPU_SET_SCALE_REGION      = 0x129,
-    NPU_SET_AB_START          = 0x12D,
-    NPU_SET_BLOCKDEP          = 0x12F,
-    NPU_SET_DMA0_SRC_REGION   = 0x130,
-    NPU_SET_DMA0_DST_REGION   = 0x131,
-    NPU_SET_DMA0_SIZE0        = 0x132,
-    NPU_SET_DMA0_SIZE1        = 0x133,
-    NPU_SET_IFM2_BROADCAST    = 0x180,
-    NPU_SET_IFM2_SCALAR       = 0x181,
-    NPU_SET_IFM2_PRECISION    = 0x185,
-    NPU_SET_IFM2_ZERO_POINT   = 0x189,
-    NPU_SET_IFM2_WIDTH0_M1    = 0x18A,
-    NPU_SET_IFM2_HEIGHT0_M1   = 0x18B,
-    NPU_SET_IFM2_HEIGHT1_M1   = 0x18C,
-    NPU_SET_IFM2_IB_START     = 0x18D,
-    NPU_SET_IFM2_REGION       = 0x18F,
-};
-
-enum class cmd1 : uint16_t
-{
-    NPU_SET_IFM_BASE0      = 0x000,
-    NPU_SET_IFM_BASE1      = 0x001,
-    NPU_SET_IFM_BASE2      = 0x002,
-    NPU_SET_IFM_BASE3      = 0x003,
-    NPU_SET_IFM_STRIDE_X   = 0x004,
-    NPU_SET_IFM_STRIDE_Y   = 0x005,
-    NPU_SET_IFM_STRIDE_C   = 0x006,
-    NPU_SET_OFM_BASE0      = 0x010,
-    NPU_SET_OFM_BASE1      = 0x011,
-    NPU_SET_OFM_BASE2      = 0x012,
-    NPU_SET_OFM_BASE3      = 0x013,
-    NPU_SET_OFM_STRIDE_X   = 0x014,
-    NPU_SET_OFM_STRIDE_Y   = 0x015,
-    NPU_SET_OFM_STRIDE_C   = 0x016,
-    NPU_SET_WEIGHT_BASE    = 0x020,
-    NPU_SET_WEIGHT_LENGTH  = 0x021,
-    NPU_SET_SCALE_BASE     = 0x022,
-    NPU_SET_SCALE_LENGTH   = 0x023,
-    NPU_SET_OFM_SCALE      = 0x024,
-    NPU_SET_OPA_SCALE      = 0x025,
-    NPU_SET_OPB_SCALE      = 0x026,
-    NPU_SET_DMA0_SRC       = 0x030,
-    NPU_SET_DMA0_DST       = 0x031,
-    NPU_SET_DMA0_LEN       = 0x032,
-    NPU_SET_DMA0_SKIP0     = 0x033,
-    NPU_SET_DMA0_SKIP1     = 0x034,
-    NPU_SET_IFM2_BASE0     = 0x080,
-    NPU_SET_IFM2_BASE1     = 0x081,
-    NPU_SET_IFM2_BASE2     = 0x082,
-    NPU_SET_IFM2_BASE3     = 0x083,
-    NPU_SET_IFM2_STRIDE_X  = 0x084,
-    NPU_SET_IFM2_STRIDE_Y  = 0x085,
-    NPU_SET_IFM2_STRIDE_C  = 0x086,
-    NPU_SET_WEIGHT1_BASE   = 0x090,
-    NPU_SET_WEIGHT1_LENGTH = 0x091,
-    NPU_SET_SCALE1_BASE    = 0x092,
-    NPU_SET_SCALE1_LENGTH  = 0x093,
-};
-
-enum class data_format : uint8_t
+enum class activation_format : uint8_t
 {
     NHWC    = 0,
     NHCWB16 = 1,
 };
 
-enum class elementwise_mode : uint16_t
+enum class activation_function : uint8_t
 {
-    MUL   = 0,
-    ADD   = 1,
-    SUB   = 2,
-    MIN   = 3,
-    MAX   = 4,
-    LRELU = 5,
-    ABS   = 6,
-    CLZ   = 7,
-    SHR   = 8,
-    SHL   = 9,
+    RELU    = 0,
+    TANH    = 3,
+    SIGMOID = 4,
+    TABLE_0 = 16,
+    TABLE_1 = 17,
+    TABLE_2 = 18,
+    TABLE_3 = 19,
+    TABLE_4 = 20,
+    TABLE_5 = 21,
+    TABLE_6 = 22,
+    TABLE_7 = 23,
 };
 
-enum class ifm_precision : uint8_t
+enum class activation_precision : uint8_t
 {
-    U8  = 0,
-    S8  = 1,
-    U16 = 4,
-    S16 = 5,
-    S32 = 9,
+    B8  = 0,
+    B16 = 1,
+    B32 = 2,
+    B64 = 3,
 };
 
-enum class ifm_scale_mode : uint8_t
+enum class activation_type : uint8_t
 {
-    SCALE_16BIT     = 0,
-    SCALE_OPA_32BIT = 1,
-    SCALE_OPB_32BIT = 2,
+    UNSIGNED = 0,
+    SIGNED   = 1,
 };
 
-enum class macs_per_cc : uint8_t
+enum class axi_mem_encoding : uint8_t
 {
-    MACS_PER_CC_IS_5 = 0x5,
-    MACS_PER_CC_IS_6 = 0x6,
-    MACS_PER_CC_IS_7 = 0x7,
-    MACS_PER_CC_IS_8 = 0x8,
+    DEVICE_NON_BUFFERABLE                 = 0,
+    DEVICE_BUFFERABLE                     = 1,
+    NORMAL_NON_CACHEABLE_NON_BUFFERABLE   = 2,
+    NORMAL_NON_CACHEABLE_BUFFERABLE       = 3,
+    WRITE_THROUGH_NO_ALLOCATE             = 4,
+    WRITE_THROUGH_READ_ALLOCATE           = 5,
+    WRITE_THROUGH_WRITE_ALLOCATE          = 6,
+    WRITE_THROUGH_READ_AND_WRITE_ALLOCATE = 7,
+    WRITE_BACK_NO_ALLOCATE                = 8,
+    WRITE_BACK_READ_ALLOCATE              = 9,
+    WRITE_BACK_WRITE_ALLOCATE             = 10,
+    WRITE_BACK_READ_AND_WRITE_ALLOCATE    = 11,
 };
 
-enum class memory_type : uint8_t
+enum class broadcast_mode : uint8_t
 {
-    AXI0_OUTSTANDING_COUNTER0 = 0,
-    AXI0_OUTSTANDING_COUNTER1 = 1,
-    AXI1_OUTSTANDING_COUNTER2 = 2,
-    AXI1_OUTSTANDING_COUNTER3 = 3,
+    DISABLE = 0,
+    ENABLE  = 1,
 };
 
-enum class ofm_precision : uint8_t
-{
-    U8  = 0,
-    S8  = 1,
-    U16 = 2,
-    S16 = 3,
-    S32 = 5,
-};
-
-enum class pmu_event_type : uint16_t
-{
-    NO_EVENT                     = 0x00,
-    CYCLE                        = 0x11,
-    NPU_IDLE                     = 0x20,
-    CC_STALLED_ON_BLOCKDEP       = 0x21,
-    CC_STALLED_ON_SHRAM_RECONFIG = 0x22,
-    NPU_ACTIVE                   = 0x23,
-    MAC_ACTIVE                   = 0x30,
-    MAC_ACTIVE_8BIT              = 0x31,
-    MAC_ACTIVE_16BIT             = 0x32,
-    MAC_DPU_ACTIVE               = 0x33,
-    MAC_STALLED_BY_WD_ACC        = 0x34,
-    MAC_STALLED_BY_WD            = 0x35,
-    MAC_STALLED_BY_ACC           = 0x36,
-    MAC_STALLED_BY_IB            = 0x37,
-    MAC_ACTIVE_32BIT             = 0x38,
-    MAC_STALLED_BY_INT_W         = 0x39,
-    MAC_STALLED_BY_INT_ACC       = 0x3A,
-    AO_ACTIVE                    = 0x40,
-    AO_ACTIVE_8BIT               = 0x41,
-    AO_ACTIVE_16BIT              = 0x42,
-    AO_STALLED_BY_OFMP_OB        = 0x43,
-    AO_STALLED_BY_OFMP           = 0x44,
-    AO_STALLED_BY_OB             = 0x45,
-    AO_STALLED_BY_ACC_IB         = 0x46,
-    AO_STALLED_BY_ACC            = 0x47,
-    AO_STALLED_BY_IB             = 0x48,
-    WD_ACTIVE                    = 0x50,
-    WD_STALLED                   = 0x51,
-    WD_STALLED_BY_WS             = 0x52,
-    WD_STALLED_BY_WD_BUF         = 0x53,
-    WD_PARSE_ACTIVE              = 0x54,
-    WD_PARSE_STALLED             = 0x55,
-    WD_PARSE_STALLED_IN          = 0x56,
-    WD_PARSE_STALLED_OUT         = 0x57,
-    WD_TRANS_WS                  = 0x58,
-    WD_TRANS_WB                  = 0x59,
-    WD_TRANS_DW0                 = 0x5a,
-    WD_TRANS_DW1                 = 0x5b,
-    AXI0_RD_TRANS_ACCEPTED       = 0x80,
-    AXI0_RD_TRANS_COMPLETED      = 0x81,
-    AXI0_RD_DATA_BEAT_RECEIVED   = 0x82,
-    AXI0_RD_TRAN_REQ_STALLED     = 0x83,
-    AXI0_WR_TRANS_ACCEPTED       = 0x84,
-    AXI0_WR_TRANS_COMPLETED_M    = 0x85,
-    AXI0_WR_TRANS_COMPLETED_S    = 0x86,
-    AXI0_WR_DATA_BEAT_WRITTEN    = 0x87,
-    AXI0_WR_TRAN_REQ_STALLED     = 0x88,
-    AXI0_WR_DATA_BEAT_STALLED    = 0x89,
-    AXI0_ENABLED_CYCLES          = 0x8c,
-    AXI0_RD_STALL_LIMIT          = 0x8e,
-    AXI0_WR_STALL_LIMIT          = 0x8f,
-    AXI1_RD_TRANS_ACCEPTED       = 0x180,
-    AXI1_RD_TRANS_COMPLETED      = 0x181,
-    AXI1_RD_DATA_BEAT_RECEIVED   = 0x182,
-    AXI1_RD_TRAN_REQ_STALLED     = 0x183,
-    AXI1_WR_TRANS_ACCEPTED       = 0x184,
-    AXI1_WR_TRANS_COMPLETED_M    = 0x185,
-    AXI1_WR_TRANS_COMPLETED_S    = 0x186,
-    AXI1_WR_DATA_BEAT_WRITTEN    = 0x187,
-    AXI1_WR_TRAN_REQ_STALLED     = 0x188,
-    AXI1_WR_DATA_BEAT_STALLED    = 0x189,
-    AXI1_ENABLED_CYCLES          = 0x18c,
-    AXI1_RD_STALL_LIMIT          = 0x18e,
-    AXI1_WR_STALL_LIMIT          = 0x18f,
-    AXI_LATENCY_ANY              = 0xa0,
-    AXI_LATENCY_32               = 0xa1,
-    AXI_LATENCY_64               = 0xa2,
-    AXI_LATENCY_128              = 0xa3,
-    AXI_LATENCY_256              = 0xa4,
-    AXI_LATENCY_512              = 0xa5,
-    AXI_LATENCY_1024             = 0xa6,
-    ECC_DMA                      = 0xb0,
-    ECC_SB0                      = 0xb1,
-    ECC_SB1                      = 0x1b1,
-};
-
-enum class pooling_mode : uint16_t
+enum class cmd0_opcode : uint16_t
 {
-    MAX        = 0,
-    AVERAGE    = 1,
-    REDUCE_SUM = 2,
+    NPU_OP_STOP               = 0,
+    NPU_OP_IRQ                = 1,
+    NPU_OP_CONV               = 2,
+    NPU_OP_DEPTHWISE          = 3,
+    NPU_OP_POOL               = 5,
+    NPU_OP_ELEMENTWISE        = 6,
+    NPU_OP_DMA_START          = 16,
+    NPU_OP_DMA_WAIT           = 17,
+    NPU_OP_KERNEL_WAIT        = 18,
+    NPU_OP_PMU_MASK           = 19,
+    NPU_SET_IFM_PAD_TOP       = 256,
+    NPU_SET_IFM_PAD_LEFT      = 257,
+    NPU_SET_IFM_PAD_RIGHT     = 258,
+    NPU_SET_IFM_PAD_BOTTOM    = 259,
+    NPU_SET_IFM_DEPTH_M1      = 260,
+    NPU_SET_IFM_PRECISION     = 261,
+    NPU_SET_IFM_UPSCALE       = 263,
+    NPU_SET_IFM_ZERO_POINT    = 265,
+    NPU_SET_IFM_WIDTH0_M1     = 266,
+    NPU_SET_IFM_HEIGHT0_M1    = 267,
+    NPU_SET_IFM_HEIGHT1_M1    = 268,
+    NPU_SET_IFM_IB_END        = 269,
+    NPU_SET_IFM_REGION        = 271,
+    NPU_SET_OFM_WIDTH_M1      = 273,
+    NPU_SET_OFM_HEIGHT_M1     = 274,
+    NPU_SET_OFM_DEPTH_M1      = 275,
+    NPU_SET_OFM_PRECISION     = 276,
+    NPU_SET_OFM_BLK_WIDTH_M1  = 277,
+    NPU_SET_OFM_BLK_HEIGHT_M1 = 278,
+    NPU_SET_OFM_BLK_DEPTH_M1  = 279,
+    NPU_SET_OFM_ZERO_POINT    = 280,
+    NPU_SET_OFM_WIDTH0_M1     = 282,
+    NPU_SET_OFM_HEIGHT0_M1    = 283,
+    NPU_SET_OFM_HEIGHT1_M1    = 284,
+    NPU_SET_OFM_REGION        = 287,
+    NPU_SET_KERNEL_WIDTH_M1   = 288,
+    NPU_SET_KERNEL_HEIGHT_M1  = 289,
+    NPU_SET_KERNEL_STRIDE     = 290,
+    NPU_SET_ACC_FORMAT        = 292,
+    NPU_SET_ACTIVATION        = 293,
+    NPU_SET_ACTIVATION_MIN    = 294,
+    NPU_SET_ACTIVATION_MAX    = 295,
+    NPU_SET_WEIGHT_REGION     = 296,
+    NPU_SET_SCALE_REGION      = 297,
+    NPU_SET_AB_START          = 301,
+    NPU_SET_BLOCKDEP          = 303,
+    NPU_SET_DMA0_SRC_REGION   = 304,
+    NPU_SET_DMA0_DST_REGION   = 305,
+    NPU_SET_DMA0_SIZE0        = 306,
+    NPU_SET_DMA0_SIZE1        = 307,
+    NPU_SET_IFM2_BROADCAST    = 384,
+    NPU_SET_IFM2_SCALAR       = 385,
+    NPU_SET_IFM2_PRECISION    = 389,
+    NPU_SET_IFM2_ZERO_POINT   = 393,
+    NPU_SET_IFM2_WIDTH0_M1    = 394,
+    NPU_SET_IFM2_HEIGHT0_M1   = 395,
+    NPU_SET_IFM2_HEIGHT1_M1   = 396,
+    NPU_SET_IFM2_IB_START     = 397,
+    NPU_SET_IFM2_REGION       = 399,
 };
 
-enum class privilege_level : uint8_t
+enum class cmd1_opcode : uint16_t
 {
-    USER       = 0,
-    PRIVILEGED = 1,
+    NPU_SET_IFM_BASE0     = 0,
+    NPU_SET_IFM_BASE1     = 1,
+    NPU_SET_IFM_BASE2     = 2,
+    NPU_SET_IFM_BASE3     = 3,
+    NPU_SET_IFM_STRIDE_X  = 4,
+    NPU_SET_IFM_STRIDE_Y  = 5,
+    NPU_SET_IFM_STRIDE_C  = 6,
+    NPU_SET_OFM_BASE0     = 16,
+    NPU_SET_OFM_BASE1     = 17,
+    NPU_SET_OFM_BASE2     = 18,
+    NPU_SET_OFM_BASE3     = 19,
+    NPU_SET_OFM_STRIDE_X  = 20,
+    NPU_SET_OFM_STRIDE_Y  = 21,
+    NPU_SET_OFM_STRIDE_C  = 22,
+    NPU_SET_WEIGHT_BASE   = 32,
+    NPU_SET_WEIGHT_LENGTH = 33,
+    NPU_SET_SCALE_BASE    = 34,
+    NPU_SET_SCALE_LENGTH  = 35,
+    NPU_SET_OFM_SCALE     = 36,
+    NPU_SET_OPA_SCALE     = 37,
+    NPU_SET_OPB_SCALE     = 38,
+    NPU_SET_DMA0_SRC      = 48,
+    NPU_SET_DMA0_DST      = 49,
+    NPU_SET_DMA0_LEN      = 50,
+    NPU_SET_IFM2_BASE0    = 128,
+    NPU_SET_IFM2_BASE1    = 129,
+    NPU_SET_IFM2_BASE2    = 130,
+    NPU_SET_IFM2_BASE3    = 131,
+    NPU_SET_IFM2_STRIDE_X = 132,
+    NPU_SET_IFM2_STRIDE_Y = 133,
+    NPU_SET_IFM2_STRIDE_C = 134,
+    NPU_SET_USER_DEFINED0 = 160,
+    NPU_SET_USER_DEFINED1 = 161,
+    NPU_SET_USER_DEFINED2 = 162,
+    NPU_SET_USER_DEFINED3 = 163,
+    NPU_SET_USER_DEFINED4 = 164,
+    NPU_SET_USER_DEFINED5 = 165,
+    NPU_SET_USER_DEFINED6 = 166,
+    NPU_SET_USER_DEFINED7 = 167,
 };
 
-enum class resampling_mode : uint8_t
+enum class cmd_ctrl : uint8_t
 {
-    NONE      = 0,
-    NEAREST   = 1,
-    TRANSPOSE = 2,
+    CMD0_CTRL = 0,
+    CMD1_CTRL = 1,
 };
 
-enum class rounding : uint8_t
+enum class custom_dma_cs : uint8_t
 {
-    TFL      = 0,
-    TRUNCATE = 1,
-    NATURAL  = 2,
+    DISABLE = 0,
+    ENABLE  = 1,
 };
 
-enum class security_level : uint8_t
+enum class custom_dma : uint8_t
 {
-    SECURE     = 0,
-    NON_SECURE = 1,
+    NOT_IMPLEMENTED = 0,
+    IMPLEMENTED     = 1,
 };
 
-enum class shram_size : uint8_t
+enum class dma_fault_src : uint8_t
 {
-    SHRAM_96KB = 0x60,
-    SHRAM_48KB = 0x30,
-    SHRAM_24KB = 0x18,
-    SHRAM_16KB = 0x10,
+    AXI_M0 = 0,
+    AXI_M1 = 1,
 };
 
-enum class state : uint8_t
+enum class dma_region_mode : uint8_t
 {
-    STOPPED = 0,
-    RUNNING = 1,
+    EXTERNAL = 0,
+    INTERNAL = 1,
 };
 
-enum class stride_mode : uint8_t
+enum class dma_stride_mode : uint8_t
 {
-    STRIDE_MODE_1D = 0,
-    STRIDE_MODE_2D = 1,
-    STRIDE_MODE_3D = 2,
+    D1 = 0,
 };
 
-#else
+enum class elementwise_mode : uint8_t
+{
+    MUL   = 0,
+    ADD   = 1,
+    SUB   = 2,
+    MIN   = 3,
+    MAX   = 4,
+    LRELU = 5,
+    ABS   = 6,
+    CLZ   = 7,
+    SHR   = 8,
+    SHL   = 9,
+};
 
-enum acc_format
+enum class functional_safety : uint8_t
 {
-    ACC_FORMAT_INT_32BIT = 0,
-    ACC_FORMAT_INT_40BIT = 1,
-    ACC_FORMAT_FP_S5_10  = 2,
-};
-
-enum activation
-{
-    ACTIVATION_NONE      = 0,
-    ACTIVATION_TANH      = 3,
-    ACTIVATION_SIGMOID   = 4,
-    ACTIVATION_LUT_START = 16,
-    ACTIVATION_LUT_END   = 23,
-};
-
-enum axi_mem_encoding_type
-{
-    AXI_MEM_ENCODING_TYPE_DEVICE_NON_BUFFERABLE                 = 0x0,
-    AXI_MEM_ENCODING_TYPE_DEVICE_BUFFERABLE                     = 0x1,
-    AXI_MEM_ENCODING_TYPE_NORMAL_NON_CACHEABLE_NON_BUFFERABLE   = 0x2,
-    AXI_MEM_ENCODING_TYPE_NORMAL_NON_CACHEABLE_BUFFERABLE       = 0x3,
-    AXI_MEM_ENCODING_TYPE_WRITE_THROUGH_NO_ALLOCATE             = 0x4,
-    AXI_MEM_ENCODING_TYPE_WRITE_THROUGH_READ_ALLOCATE           = 0x5,
-    AXI_MEM_ENCODING_TYPE_WRITE_THROUGH_WRITE_ALLOCATE          = 0x6,
-    AXI_MEM_ENCODING_TYPE_WRITE_THROUGH_READ_AND_WRITE_ALLOCATE = 0x7,
-    AXI_MEM_ENCODING_TYPE_WRITE_BACK_NO_ALLOCATE                = 0x8,
-    AXI_MEM_ENCODING_TYPE_WRITE_BACK_READ_ALLOCATE              = 0x9,
-    AXI_MEM_ENCODING_TYPE_WRITE_BACK_WRITE_ALLOCATE             = 0xA,
-    AXI_MEM_ENCODING_TYPE_WRITE_BACK_READ_AND_WRITE_ALLOCATE    = 0xB,
-    AXI_MEM_ENCODING_TYPE_RESERVED_12                           = 0xC,
-    AXI_MEM_ENCODING_TYPE_RESERVED_13                           = 0xD,
-    AXI_MEM_ENCODING_TYPE_RESERVED_14                           = 0xE,
-    AXI_MEM_ENCODING_TYPE_RESERVED_15                           = 0xF,
-};
-
-enum clip_range
-{
-    CLIP_RANGE_OFM_PRECISION = 0,
-    CLIP_RANGE_FORCE_UINT8   = 2,
-    CLIP_RANGE_FORCE_INT8    = 3,
-    CLIP_RANGE_FORCE_INT16   = 5,
-};
-
-enum cmd0
-{
-    CMD0_NPU_OP_STOP               = 0x000,
-    CMD0_NPU_OP_IRQ                = 0x001,
-    CMD0_NPU_OP_CONV               = 0x002,
-    CMD0_NPU_OP_DEPTHWISE          = 0x003,
-    CMD0_NPU_OP_POOL               = 0x005,
-    CMD0_NPU_OP_ELEMENTWISE        = 0x006,
-    CMD0_NPU_OP_DMA_START          = 0x010,
-    CMD0_NPU_OP_DMA_WAIT           = 0x011,
-    CMD0_NPU_OP_KERNEL_WAIT        = 0x012,
-    CMD0_NPU_OP_PMU_MASK           = 0x013,
-    CMD0_NPU_SET_IFM_PAD_TOP       = 0x100,
-    CMD0_NPU_SET_IFM_PAD_LEFT      = 0x101,
-    CMD0_NPU_SET_IFM_PAD_RIGHT     = 0x102,
-    CMD0_NPU_SET_IFM_PAD_BOTTOM    = 0x103,
-    CMD0_NPU_SET_IFM_DEPTH_M1      = 0x104,
-    CMD0_NPU_SET_IFM_PRECISION     = 0x105,
-    CMD0_NPU_SET_IFM_UPSCALE       = 0x107,
-    CMD0_NPU_SET_IFM_ZERO_POINT    = 0x109,
-    CMD0_NPU_SET_IFM_WIDTH0_M1     = 0x10A,
-    CMD0_NPU_SET_IFM_HEIGHT0_M1    = 0x10B,
-    CMD0_NPU_SET_IFM_HEIGHT1_M1    = 0x10C,
-    CMD0_NPU_SET_IFM_IB_END        = 0x10D,
-    CMD0_NPU_SET_IFM_REGION        = 0x10F,
-    CMD0_NPU_SET_OFM_WIDTH_M1      = 0x111,
-    CMD0_NPU_SET_OFM_HEIGHT_M1     = 0x112,
-    CMD0_NPU_SET_OFM_DEPTH_M1      = 0x113,
-    CMD0_NPU_SET_OFM_PRECISION     = 0x114,
-    CMD0_NPU_SET_OFM_BLK_WIDTH_M1  = 0x115,
-    CMD0_NPU_SET_OFM_BLK_HEIGHT_M1 = 0x116,
-    CMD0_NPU_SET_OFM_BLK_DEPTH_M1  = 0x117,
-    CMD0_NPU_SET_OFM_ZERO_POINT    = 0x118,
-    CMD0_NPU_SET_OFM_WIDTH0_M1     = 0x11A,
-    CMD0_NPU_SET_OFM_HEIGHT0_M1    = 0x11B,
-    CMD0_NPU_SET_OFM_HEIGHT1_M1    = 0x11C,
-    CMD0_NPU_SET_OFM_REGION        = 0x11F,
-    CMD0_NPU_SET_KERNEL_WIDTH_M1   = 0x120,
-    CMD0_NPU_SET_KERNEL_HEIGHT_M1  = 0x121,
-    CMD0_NPU_SET_KERNEL_STRIDE     = 0x122,
-    CMD0_NPU_SET_PARALLEL_MODE     = 0x123,
-    CMD0_NPU_SET_ACC_FORMAT        = 0x124,
-    CMD0_NPU_SET_ACTIVATION        = 0x125,
-    CMD0_NPU_SET_ACTIVATION_MIN    = 0x126,
-    CMD0_NPU_SET_ACTIVATION_MAX    = 0x127,
-    CMD0_NPU_SET_WEIGHT_REGION     = 0x128,
-    CMD0_NPU_SET_SCALE_REGION      = 0x129,
-    CMD0_NPU_SET_AB_START          = 0x12D,
-    CMD0_NPU_SET_BLOCKDEP          = 0x12F,
-    CMD0_NPU_SET_DMA0_SRC_REGION   = 0x130,
-    CMD0_NPU_SET_DMA0_DST_REGION   = 0x131,
-    CMD0_NPU_SET_DMA0_SIZE0        = 0x132,
-    CMD0_NPU_SET_DMA0_SIZE1        = 0x133,
-    CMD0_NPU_SET_IFM2_BROADCAST    = 0x180,
-    CMD0_NPU_SET_IFM2_SCALAR       = 0x181,
-    CMD0_NPU_SET_IFM2_PRECISION    = 0x185,
-    CMD0_NPU_SET_IFM2_ZERO_POINT   = 0x189,
-    CMD0_NPU_SET_IFM2_WIDTH0_M1    = 0x18A,
-    CMD0_NPU_SET_IFM2_HEIGHT0_M1   = 0x18B,
-    CMD0_NPU_SET_IFM2_HEIGHT1_M1   = 0x18C,
-    CMD0_NPU_SET_IFM2_IB_START     = 0x18D,
-    CMD0_NPU_SET_IFM2_REGION       = 0x18F,
-};
-
-enum cmd1
-{
-    CMD1_NPU_SET_IFM_BASE0      = 0x000,
-    CMD1_NPU_SET_IFM_BASE1      = 0x001,
-    CMD1_NPU_SET_IFM_BASE2      = 0x002,
-    CMD1_NPU_SET_IFM_BASE3      = 0x003,
-    CMD1_NPU_SET_IFM_STRIDE_X   = 0x004,
-    CMD1_NPU_SET_IFM_STRIDE_Y   = 0x005,
-    CMD1_NPU_SET_IFM_STRIDE_C   = 0x006,
-    CMD1_NPU_SET_OFM_BASE0      = 0x010,
-    CMD1_NPU_SET_OFM_BASE1      = 0x011,
-    CMD1_NPU_SET_OFM_BASE2      = 0x012,
-    CMD1_NPU_SET_OFM_BASE3      = 0x013,
-    CMD1_NPU_SET_OFM_STRIDE_X   = 0x014,
-    CMD1_NPU_SET_OFM_STRIDE_Y   = 0x015,
-    CMD1_NPU_SET_OFM_STRIDE_C   = 0x016,
-    CMD1_NPU_SET_WEIGHT_BASE    = 0x020,
-    CMD1_NPU_SET_WEIGHT_LENGTH  = 0x021,
-    CMD1_NPU_SET_SCALE_BASE     = 0x022,
-    CMD1_NPU_SET_SCALE_LENGTH   = 0x023,
-    CMD1_NPU_SET_OFM_SCALE      = 0x024,
-    CMD1_NPU_SET_OPA_SCALE      = 0x025,
-    CMD1_NPU_SET_OPB_SCALE      = 0x026,
-    CMD1_NPU_SET_DMA0_SRC       = 0x030,
-    CMD1_NPU_SET_DMA0_DST       = 0x031,
-    CMD1_NPU_SET_DMA0_LEN       = 0x032,
-    CMD1_NPU_SET_DMA0_SKIP0     = 0x033,
-    CMD1_NPU_SET_DMA0_SKIP1     = 0x034,
-    CMD1_NPU_SET_IFM2_BASE0     = 0x080,
-    CMD1_NPU_SET_IFM2_BASE1     = 0x081,
-    CMD1_NPU_SET_IFM2_BASE2     = 0x082,
-    CMD1_NPU_SET_IFM2_BASE3     = 0x083,
-    CMD1_NPU_SET_IFM2_STRIDE_X  = 0x084,
-    CMD1_NPU_SET_IFM2_STRIDE_Y  = 0x085,
-    CMD1_NPU_SET_IFM2_STRIDE_C  = 0x086,
-    CMD1_NPU_SET_WEIGHT1_BASE   = 0x090,
-    CMD1_NPU_SET_WEIGHT1_LENGTH = 0x091,
-    CMD1_NPU_SET_SCALE1_BASE    = 0x092,
-    CMD1_NPU_SET_SCALE1_LENGTH  = 0x093,
-};
-
-enum data_format
-{
-    DATA_FORMAT_NHWC    = 0,
-    DATA_FORMAT_NHCWB16 = 1,
+    NOT_IMPLEMENTED = 0,
+    IMPLEMENTED     = 1,
 };
 
-enum elementwise_mode
+enum class ifm2_operand_order : uint8_t
 {
-    ELEMENTWISE_MODE_MUL   = 0,
-    ELEMENTWISE_MODE_ADD   = 1,
-    ELEMENTWISE_MODE_SUB   = 2,
-    ELEMENTWISE_MODE_MIN   = 3,
-    ELEMENTWISE_MODE_MAX   = 4,
-    ELEMENTWISE_MODE_LRELU = 5,
-    ELEMENTWISE_MODE_ABS   = 6,
-    ELEMENTWISE_MODE_CLZ   = 7,
-    ELEMENTWISE_MODE_SHR   = 8,
-    ELEMENTWISE_MODE_SHL   = 9,
+    ORDER_B = 0,
+    ORDER_A = 1,
 };
 
-enum ifm_precision
+enum class ifm_scale_mode : uint8_t
 {
-    IFM_PRECISION_U8  = 0,
-    IFM_PRECISION_S8  = 1,
-    IFM_PRECISION_U16 = 4,
-    IFM_PRECISION_S16 = 5,
-    IFM_PRECISION_S32 = 9,
+    OPA_OPB_16 = 0,
+    OPA_32     = 1,
+    OPB_32     = 2,
 };
 
-enum ifm_scale_mode
+enum class ifm_upscale_mode : uint8_t
 {
-    IFM_SCALE_MODE_SCALE_16BIT     = 0,
-    IFM_SCALE_MODE_SCALE_OPA_32BIT = 1,
-    IFM_SCALE_MODE_SCALE_OPB_32BIT = 2,
-};
-
-enum macs_per_cc
-{
-    MACS_PER_CC_MACS_PER_CC_IS_5 = 0x5,
-    MACS_PER_CC_MACS_PER_CC_IS_6 = 0x6,
-    MACS_PER_CC_MACS_PER_CC_IS_7 = 0x7,
-    MACS_PER_CC_MACS_PER_CC_IS_8 = 0x8,
-};
-
-enum memory_type
-{
-    MEMORY_TYPE_AXI0_OUTSTANDING_COUNTER0 = 0,
-    MEMORY_TYPE_AXI0_OUTSTANDING_COUNTER1 = 1,
-    MEMORY_TYPE_AXI1_OUTSTANDING_COUNTER2 = 2,
-    MEMORY_TYPE_AXI1_OUTSTANDING_COUNTER3 = 3,
-};
-
-enum ofm_precision
-{
-    OFM_PRECISION_U8  = 0,
-    OFM_PRECISION_S8  = 1,
-    OFM_PRECISION_U16 = 2,
-    OFM_PRECISION_S16 = 3,
-    OFM_PRECISION_S32 = 5,
-};
-
-enum pmu_event_type
-{
-    PMU_EVENT_TYPE_NO_EVENT                     = 0x00,
-    PMU_EVENT_TYPE_CYCLE                        = 0x11,
-    PMU_EVENT_TYPE_NPU_IDLE                     = 0x20,
-    PMU_EVENT_TYPE_CC_STALLED_ON_BLOCKDEP       = 0x21,
-    PMU_EVENT_TYPE_CC_STALLED_ON_SHRAM_RECONFIG = 0x22,
-    PMU_EVENT_TYPE_NPU_ACTIVE                   = 0x23,
-    PMU_EVENT_TYPE_MAC_ACTIVE                   = 0x30,
-    PMU_EVENT_TYPE_MAC_ACTIVE_8BIT              = 0x31,
-    PMU_EVENT_TYPE_MAC_ACTIVE_16BIT             = 0x32,
-    PMU_EVENT_TYPE_MAC_DPU_ACTIVE               = 0x33,
-    PMU_EVENT_TYPE_MAC_STALLED_BY_WD_ACC        = 0x34,
-    PMU_EVENT_TYPE_MAC_STALLED_BY_WD            = 0x35,
-    PMU_EVENT_TYPE_MAC_STALLED_BY_ACC           = 0x36,
-    PMU_EVENT_TYPE_MAC_STALLED_BY_IB            = 0x37,
-    PMU_EVENT_TYPE_MAC_ACTIVE_32BIT             = 0x38,
-    PMU_EVENT_TYPE_MAC_STALLED_BY_INT_W         = 0x39,
-    PMU_EVENT_TYPE_MAC_STALLED_BY_INT_ACC       = 0x3A,
-    PMU_EVENT_TYPE_AO_ACTIVE                    = 0x40,
-    PMU_EVENT_TYPE_AO_ACTIVE_8BIT               = 0x41,
-    PMU_EVENT_TYPE_AO_ACTIVE_16BIT              = 0x42,
-    PMU_EVENT_TYPE_AO_STALLED_BY_OFMP_OB        = 0x43,
-    PMU_EVENT_TYPE_AO_STALLED_BY_OFMP           = 0x44,
-    PMU_EVENT_TYPE_AO_STALLED_BY_OB             = 0x45,
-    PMU_EVENT_TYPE_AO_STALLED_BY_ACC_IB         = 0x46,
-    PMU_EVENT_TYPE_AO_STALLED_BY_ACC            = 0x47,
-    PMU_EVENT_TYPE_AO_STALLED_BY_IB             = 0x48,
-    PMU_EVENT_TYPE_WD_ACTIVE                    = 0x50,
-    PMU_EVENT_TYPE_WD_STALLED                   = 0x51,
-    PMU_EVENT_TYPE_WD_STALLED_BY_WS             = 0x52,
-    PMU_EVENT_TYPE_WD_STALLED_BY_WD_BUF         = 0x53,
-    PMU_EVENT_TYPE_WD_PARSE_ACTIVE              = 0x54,
-    PMU_EVENT_TYPE_WD_PARSE_STALLED             = 0x55,
-    PMU_EVENT_TYPE_WD_PARSE_STALLED_IN          = 0x56,
-    PMU_EVENT_TYPE_WD_PARSE_STALLED_OUT         = 0x57,
-    PMU_EVENT_TYPE_WD_TRANS_WS                  = 0x58,
-    PMU_EVENT_TYPE_WD_TRANS_WB                  = 0x59,
-    PMU_EVENT_TYPE_WD_TRANS_DW0                 = 0x5a,
-    PMU_EVENT_TYPE_WD_TRANS_DW1                 = 0x5b,
-    PMU_EVENT_TYPE_AXI0_RD_TRANS_ACCEPTED       = 0x80,
-    PMU_EVENT_TYPE_AXI0_RD_TRANS_COMPLETED      = 0x81,
-    PMU_EVENT_TYPE_AXI0_RD_DATA_BEAT_RECEIVED   = 0x82,
-    PMU_EVENT_TYPE_AXI0_RD_TRAN_REQ_STALLED     = 0x83,
-    PMU_EVENT_TYPE_AXI0_WR_TRANS_ACCEPTED       = 0x84,
-    PMU_EVENT_TYPE_AXI0_WR_TRANS_COMPLETED_M    = 0x85,
-    PMU_EVENT_TYPE_AXI0_WR_TRANS_COMPLETED_S    = 0x86,
-    PMU_EVENT_TYPE_AXI0_WR_DATA_BEAT_WRITTEN    = 0x87,
-    PMU_EVENT_TYPE_AXI0_WR_TRAN_REQ_STALLED     = 0x88,
-    PMU_EVENT_TYPE_AXI0_WR_DATA_BEAT_STALLED    = 0x89,
-    PMU_EVENT_TYPE_AXI0_ENABLED_CYCLES          = 0x8c,
-    PMU_EVENT_TYPE_AXI0_RD_STALL_LIMIT          = 0x8e,
-    PMU_EVENT_TYPE_AXI0_WR_STALL_LIMIT          = 0x8f,
-    PMU_EVENT_TYPE_AXI1_RD_TRANS_ACCEPTED       = 0x180,
-    PMU_EVENT_TYPE_AXI1_RD_TRANS_COMPLETED      = 0x181,
-    PMU_EVENT_TYPE_AXI1_RD_DATA_BEAT_RECEIVED   = 0x182,
-    PMU_EVENT_TYPE_AXI1_RD_TRAN_REQ_STALLED     = 0x183,
-    PMU_EVENT_TYPE_AXI1_WR_TRANS_ACCEPTED       = 0x184,
-    PMU_EVENT_TYPE_AXI1_WR_TRANS_COMPLETED_M    = 0x185,
-    PMU_EVENT_TYPE_AXI1_WR_TRANS_COMPLETED_S    = 0x186,
-    PMU_EVENT_TYPE_AXI1_WR_DATA_BEAT_WRITTEN    = 0x187,
-    PMU_EVENT_TYPE_AXI1_WR_TRAN_REQ_STALLED     = 0x188,
-    PMU_EVENT_TYPE_AXI1_WR_DATA_BEAT_STALLED    = 0x189,
-    PMU_EVENT_TYPE_AXI1_ENABLED_CYCLES          = 0x18c,
-    PMU_EVENT_TYPE_AXI1_RD_STALL_LIMIT          = 0x18e,
-    PMU_EVENT_TYPE_AXI1_WR_STALL_LIMIT          = 0x18f,
-    PMU_EVENT_TYPE_AXI_LATENCY_ANY              = 0xa0,
-    PMU_EVENT_TYPE_AXI_LATENCY_32               = 0xa1,
-    PMU_EVENT_TYPE_AXI_LATENCY_64               = 0xa2,
-    PMU_EVENT_TYPE_AXI_LATENCY_128              = 0xa3,
-    PMU_EVENT_TYPE_AXI_LATENCY_256              = 0xa4,
-    PMU_EVENT_TYPE_AXI_LATENCY_512              = 0xa5,
-    PMU_EVENT_TYPE_AXI_LATENCY_1024             = 0xa6,
-    PMU_EVENT_TYPE_ECC_DMA                      = 0xb0,
-    PMU_EVENT_TYPE_ECC_SB0                      = 0xb1,
-    PMU_EVENT_TYPE_ECC_SB1                      = 0x1b1,
+    NONE    = 0,
+    NEAREST = 1,
+    ZEROS   = 2,
 };
 
-enum pooling_mode
+enum class kernel_decomposition : uint8_t
 {
-    POOLING_MODE_MAX        = 0,
-    POOLING_MODE_AVERAGE    = 1,
-    POOLING_MODE_REDUCE_SUM = 2,
+    D8X8 = 0,
+    D4X4 = 1,
 };
 
-enum privilege_level
+enum class kernel_dilation : uint8_t
 {
-    PRIVILEGE_LEVEL_USER       = 0,
-    PRIVILEGE_LEVEL_PRIVILEGED = 1,
+    NONE = 0,
+    X2   = 1,
 };
 
-enum resampling_mode
+enum class max_beats : uint8_t
 {
-    RESAMPLING_MODE_NONE      = 0,
-    RESAMPLING_MODE_NEAREST   = 1,
-    RESAMPLING_MODE_TRANSPOSE = 2,
+    B64  = 0,
+    B128 = 1,
+    B256 = 2,
 };
 
-enum rounding
+enum class mem_attr : uint8_t
 {
-    ROUNDING_TFL      = 0,
-    ROUNDING_TRUNCATE = 1,
-    ROUNDING_NATURAL  = 2,
+    AXI0_OUTSTANDING_COUNTER0 = 0,
+    AXI0_OUTSTANDING_COUNTER1 = 1,
+    AXI1_OUTSTANDING_COUNTER2 = 2,
+    AXI1_OUTSTANDING_COUNTER3 = 3,
 };
 
-enum security_level
+enum class ofm_scale_mode : uint8_t
 {
-    SECURITY_LEVEL_SECURE     = 0,
-    SECURITY_LEVEL_NON_SECURE = 1,
+    PER_CHANNEL = 0,
+    GLOBAL      = 1,
 };
 
-enum shram_size
+enum class pmu_axi_channel : uint8_t
 {
-    SHRAM_SIZE_SHRAM_96KB = 0x60,
-    SHRAM_SIZE_SHRAM_48KB = 0x30,
-    SHRAM_SIZE_SHRAM_24KB = 0x18,
-    SHRAM_SIZE_SHRAM_16KB = 0x10,
+    RD_CMD        = 0,
+    RD_IFM        = 1,
+    RD_WEIGHTS    = 2,
+    RD_SCALE_BIAS = 3,
+    RD_MEM2MEM    = 4,
+    WR_OFM        = 8,
+    WR_MEM2MEM    = 9,
 };
 
-enum state
+enum class pmu_event : uint16_t
 {
-    STATE_STOPPED = 0,
-    STATE_RUNNING = 1,
+    NO_EVENT                     = 0,
+    CYCLE                        = 17,
+    NPU_IDLE                     = 32,
+    CC_STALLED_ON_BLOCKDEP       = 33,
+    CC_STALLED_ON_SHRAM_RECONFIG = 34,
+    NPU_ACTIVE                   = 35,
+    MAC_ACTIVE                   = 48,
+    MAC_ACTIVE_8BIT              = 49,
+    MAC_ACTIVE_16BIT             = 50,
+    MAC_DPU_ACTIVE               = 51,
+    MAC_STALLED_BY_WD_ACC        = 52,
+    MAC_STALLED_BY_WD            = 53,
+    MAC_STALLED_BY_ACC           = 54,
+    MAC_STALLED_BY_IB            = 55,
+    MAC_ACTIVE_32BIT             = 56,
+    MAC_STALLED_BY_INT_W         = 57,
+    MAC_STALLED_BY_INT_ACC       = 58,
+    AO_ACTIVE                    = 64,
+    AO_ACTIVE_8BIT               = 65,
+    AO_ACTIVE_16BIT              = 66,
+    AO_STALLED_BY_OFMP_OB        = 67,
+    AO_STALLED_BY_OFMP           = 68,
+    AO_STALLED_BY_OB             = 69,
+    AO_STALLED_BY_ACC_IB         = 70,
+    AO_STALLED_BY_ACC            = 71,
+    AO_STALLED_BY_IB             = 72,
+    WD_ACTIVE                    = 80,
+    WD_STALLED                   = 81,
+    WD_STALLED_BY_WS             = 82,
+    WD_STALLED_BY_WD_BUF         = 83,
+    WD_PARSE_ACTIVE              = 84,
+    WD_PARSE_STALLED             = 85,
+    WD_PARSE_STALLED_IN          = 86,
+    WD_PARSE_STALLED_OUT         = 87,
+    WD_TRANS_WS                  = 88,
+    WD_TRANS_WB                  = 89,
+    WD_TRANS_DW0                 = 90,
+    WD_TRANS_DW1                 = 91,
+    AXI0_RD_TRANS_ACCEPTED       = 128,
+    AXI0_RD_TRANS_COMPLETED      = 129,
+    AXI0_RD_DATA_BEAT_RECEIVED   = 130,
+    AXI0_RD_TRAN_REQ_STALLED     = 131,
+    AXI0_WR_TRANS_ACCEPTED       = 132,
+    AXI0_WR_TRANS_COMPLETED_M    = 133,
+    AXI0_WR_TRANS_COMPLETED_S    = 134,
+    AXI0_WR_DATA_BEAT_WRITTEN    = 135,
+    AXI0_WR_TRAN_REQ_STALLED     = 136,
+    AXI0_WR_DATA_BEAT_STALLED    = 137,
+    AXI0_ENABLED_CYCLES          = 140,
+    AXI0_RD_STALL_LIMIT          = 142,
+    AXI0_WR_STALL_LIMIT          = 143,
+    AXI_LATENCY_ANY              = 160,
+    AXI_LATENCY_32               = 161,
+    AXI_LATENCY_64               = 162,
+    AXI_LATENCY_128              = 163,
+    AXI_LATENCY_256              = 164,
+    AXI_LATENCY_512              = 165,
+    AXI_LATENCY_1024             = 166,
+    ECC_DMA                      = 176,
+    ECC_SB0                      = 177,
+    AXI1_RD_TRANS_ACCEPTED       = 384,
+    AXI1_RD_TRANS_COMPLETED      = 385,
+    AXI1_RD_DATA_BEAT_RECEIVED   = 386,
+    AXI1_RD_TRAN_REQ_STALLED     = 387,
+    AXI1_WR_TRANS_ACCEPTED       = 388,
+    AXI1_WR_TRANS_COMPLETED_M    = 389,
+    AXI1_WR_TRANS_COMPLETED_S    = 390,
+    AXI1_WR_DATA_BEAT_WRITTEN    = 391,
+    AXI1_WR_TRAN_REQ_STALLED     = 392,
+    AXI1_WR_DATA_BEAT_STALLED    = 393,
+    AXI1_ENABLED_CYCLES          = 396,
+    AXI1_RD_STALL_LIMIT          = 398,
+    AXI1_WR_STALL_LIMIT          = 399,
+    ECC_SB1                      = 433,
 };
 
-enum stride_mode
+enum class pooling_mode : uint8_t
 {
-    STRIDE_MODE_STRIDE_MODE_1D = 0,
-    STRIDE_MODE_STRIDE_MODE_2D = 1,
-    STRIDE_MODE_STRIDE_MODE_3D = 2,
+    MAX        = 0,
+    AVERAGE    = 1,
+    REDUCE_SUM = 2,
 };
 
-#endif
+enum class privilege_level : uint8_t
+{
+    USER       = 0,
+    PRIVILEGED = 1,
+};
 
-// id_r - ID register
-struct id_r
+enum class round_mode : uint8_t
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+    DBL      = 0,
+    TRUNCATE = 1,
+    NATURAL  = 2,
+};
+
+enum class security_level : uint8_t
+{
+    SECURE     = 0,
+    NON_SECURE = 1,
+};
+
+enum class state : uint8_t
+{
+    STOPPED = 0,
+    RUNNING = 1,
+};
+
+enum class wd_core_slice_state : uint8_t
+{
+    HEADER  = 0,
+    PALETTE = 1,
+    WEIGHTS = 2,
+};
+
+enum class wd_ctrl_state : uint8_t
+{
+    IDLE     = 0,
+    DRAIN    = 1,
+    OFD_INIT = 2,
+    OFD_RUN  = 3,
+};
+
+enum class weight_order : uint8_t
+{
+    DEPTH_FIRST       = 0,
+    PART_KERNEL_FIRST = 1,
+};
+
+#else
+
+enum acc_format
+{
+    ACC_FORMAT_I32 = 0,
+    ACC_FORMAT_I40 = 1,
+    ACC_FORMAT_F16 = 2,
+};
+
+enum activation_clip_range
+{
+    ACTIVATION_CLIP_RANGE_OFM_PRECISION = 0,
+    ACTIVATION_CLIP_RANGE_FORCE_UINT8   = 2,
+    ACTIVATION_CLIP_RANGE_FORCE_INT8    = 3,
+    ACTIVATION_CLIP_RANGE_FORCE_INT16   = 5,
+};
+
+enum activation_format
+{
+    ACTIVATION_FORMAT_NHWC    = 0,
+    ACTIVATION_FORMAT_NHCWB16 = 1,
+};
+
+enum activation_function
+{
+    ACTIVATION_FUNCTION_RELU    = 0,
+    ACTIVATION_FUNCTION_TANH    = 3,
+    ACTIVATION_FUNCTION_SIGMOID = 4,
+    ACTIVATION_FUNCTION_TABLE_0 = 16,
+    ACTIVATION_FUNCTION_TABLE_1 = 17,
+    ACTIVATION_FUNCTION_TABLE_2 = 18,
+    ACTIVATION_FUNCTION_TABLE_3 = 19,
+    ACTIVATION_FUNCTION_TABLE_4 = 20,
+    ACTIVATION_FUNCTION_TABLE_5 = 21,
+    ACTIVATION_FUNCTION_TABLE_6 = 22,
+    ACTIVATION_FUNCTION_TABLE_7 = 23,
+};
+
+enum activation_precision
+{
+    ACTIVATION_PRECISION_B8  = 0,
+    ACTIVATION_PRECISION_B16 = 1,
+    ACTIVATION_PRECISION_B32 = 2,
+    ACTIVATION_PRECISION_B64 = 3,
+};
+
+enum activation_type
+{
+    ACTIVATION_TYPE_UNSIGNED = 0,
+    ACTIVATION_TYPE_SIGNED   = 1,
+};
+
+enum axi_mem_encoding
+{
+    AXI_MEM_ENCODING_DEVICE_NON_BUFFERABLE                 = 0,
+    AXI_MEM_ENCODING_DEVICE_BUFFERABLE                     = 1,
+    AXI_MEM_ENCODING_NORMAL_NON_CACHEABLE_NON_BUFFERABLE   = 2,
+    AXI_MEM_ENCODING_NORMAL_NON_CACHEABLE_BUFFERABLE       = 3,
+    AXI_MEM_ENCODING_WRITE_THROUGH_NO_ALLOCATE             = 4,
+    AXI_MEM_ENCODING_WRITE_THROUGH_READ_ALLOCATE           = 5,
+    AXI_MEM_ENCODING_WRITE_THROUGH_WRITE_ALLOCATE          = 6,
+    AXI_MEM_ENCODING_WRITE_THROUGH_READ_AND_WRITE_ALLOCATE = 7,
+    AXI_MEM_ENCODING_WRITE_BACK_NO_ALLOCATE                = 8,
+    AXI_MEM_ENCODING_WRITE_BACK_READ_ALLOCATE              = 9,
+    AXI_MEM_ENCODING_WRITE_BACK_WRITE_ALLOCATE             = 10,
+    AXI_MEM_ENCODING_WRITE_BACK_READ_AND_WRITE_ALLOCATE    = 11,
+};
+
+enum broadcast_mode
+{
+    BROADCAST_MODE_DISABLE = 0,
+    BROADCAST_MODE_ENABLE  = 1,
+};
+
+enum cmd0_opcode
+{
+    CMD0_OPCODE_NPU_OP_STOP               = 0,
+    CMD0_OPCODE_NPU_OP_IRQ                = 1,
+    CMD0_OPCODE_NPU_OP_CONV               = 2,
+    CMD0_OPCODE_NPU_OP_DEPTHWISE          = 3,
+    CMD0_OPCODE_NPU_OP_POOL               = 5,
+    CMD0_OPCODE_NPU_OP_ELEMENTWISE        = 6,
+    CMD0_OPCODE_NPU_OP_DMA_START          = 16,
+    CMD0_OPCODE_NPU_OP_DMA_WAIT           = 17,
+    CMD0_OPCODE_NPU_OP_KERNEL_WAIT        = 18,
+    CMD0_OPCODE_NPU_OP_PMU_MASK           = 19,
+    CMD0_OPCODE_NPU_SET_IFM_PAD_TOP       = 256,
+    CMD0_OPCODE_NPU_SET_IFM_PAD_LEFT      = 257,
+    CMD0_OPCODE_NPU_SET_IFM_PAD_RIGHT     = 258,
+    CMD0_OPCODE_NPU_SET_IFM_PAD_BOTTOM    = 259,
+    CMD0_OPCODE_NPU_SET_IFM_DEPTH_M1      = 260,
+    CMD0_OPCODE_NPU_SET_IFM_PRECISION     = 261,
+    CMD0_OPCODE_NPU_SET_IFM_UPSCALE       = 263,
+    CMD0_OPCODE_NPU_SET_IFM_ZERO_POINT    = 265,
+    CMD0_OPCODE_NPU_SET_IFM_WIDTH0_M1     = 266,
+    CMD0_OPCODE_NPU_SET_IFM_HEIGHT0_M1    = 267,
+    CMD0_OPCODE_NPU_SET_IFM_HEIGHT1_M1    = 268,
+    CMD0_OPCODE_NPU_SET_IFM_IB_END        = 269,
+    CMD0_OPCODE_NPU_SET_IFM_REGION        = 271,
+    CMD0_OPCODE_NPU_SET_OFM_WIDTH_M1      = 273,
+    CMD0_OPCODE_NPU_SET_OFM_HEIGHT_M1     = 274,
+    CMD0_OPCODE_NPU_SET_OFM_DEPTH_M1      = 275,
+    CMD0_OPCODE_NPU_SET_OFM_PRECISION     = 276,
+    CMD0_OPCODE_NPU_SET_OFM_BLK_WIDTH_M1  = 277,
+    CMD0_OPCODE_NPU_SET_OFM_BLK_HEIGHT_M1 = 278,
+    CMD0_OPCODE_NPU_SET_OFM_BLK_DEPTH_M1  = 279,
+    CMD0_OPCODE_NPU_SET_OFM_ZERO_POINT    = 280,
+    CMD0_OPCODE_NPU_SET_OFM_WIDTH0_M1     = 282,
+    CMD0_OPCODE_NPU_SET_OFM_HEIGHT0_M1    = 283,
+    CMD0_OPCODE_NPU_SET_OFM_HEIGHT1_M1    = 284,
+    CMD0_OPCODE_NPU_SET_OFM_REGION        = 287,
+    CMD0_OPCODE_NPU_SET_KERNEL_WIDTH_M1   = 288,
+    CMD0_OPCODE_NPU_SET_KERNEL_HEIGHT_M1  = 289,
+    CMD0_OPCODE_NPU_SET_KERNEL_STRIDE     = 290,
+    CMD0_OPCODE_NPU_SET_ACC_FORMAT        = 292,
+    CMD0_OPCODE_NPU_SET_ACTIVATION        = 293,
+    CMD0_OPCODE_NPU_SET_ACTIVATION_MIN    = 294,
+    CMD0_OPCODE_NPU_SET_ACTIVATION_MAX    = 295,
+    CMD0_OPCODE_NPU_SET_WEIGHT_REGION     = 296,
+    CMD0_OPCODE_NPU_SET_SCALE_REGION      = 297,
+    CMD0_OPCODE_NPU_SET_AB_START          = 301,
+    CMD0_OPCODE_NPU_SET_BLOCKDEP          = 303,
+    CMD0_OPCODE_NPU_SET_DMA0_SRC_REGION   = 304,
+    CMD0_OPCODE_NPU_SET_DMA0_DST_REGION   = 305,
+    CMD0_OPCODE_NPU_SET_DMA0_SIZE0        = 306,
+    CMD0_OPCODE_NPU_SET_DMA0_SIZE1        = 307,
+    CMD0_OPCODE_NPU_SET_IFM2_BROADCAST    = 384,
+    CMD0_OPCODE_NPU_SET_IFM2_SCALAR       = 385,
+    CMD0_OPCODE_NPU_SET_IFM2_PRECISION    = 389,
+    CMD0_OPCODE_NPU_SET_IFM2_ZERO_POINT   = 393,
+    CMD0_OPCODE_NPU_SET_IFM2_WIDTH0_M1    = 394,
+    CMD0_OPCODE_NPU_SET_IFM2_HEIGHT0_M1   = 395,
+    CMD0_OPCODE_NPU_SET_IFM2_HEIGHT1_M1   = 396,
+    CMD0_OPCODE_NPU_SET_IFM2_IB_START     = 397,
+    CMD0_OPCODE_NPU_SET_IFM2_REGION       = 399,
+};
+
+enum cmd1_opcode
+{
+    CMD1_OPCODE_NPU_SET_IFM_BASE0     = 0,
+    CMD1_OPCODE_NPU_SET_IFM_BASE1     = 1,
+    CMD1_OPCODE_NPU_SET_IFM_BASE2     = 2,
+    CMD1_OPCODE_NPU_SET_IFM_BASE3     = 3,
+    CMD1_OPCODE_NPU_SET_IFM_STRIDE_X  = 4,
+    CMD1_OPCODE_NPU_SET_IFM_STRIDE_Y  = 5,
+    CMD1_OPCODE_NPU_SET_IFM_STRIDE_C  = 6,
+    CMD1_OPCODE_NPU_SET_OFM_BASE0     = 16,
+    CMD1_OPCODE_NPU_SET_OFM_BASE1     = 17,
+    CMD1_OPCODE_NPU_SET_OFM_BASE2     = 18,
+    CMD1_OPCODE_NPU_SET_OFM_BASE3     = 19,
+    CMD1_OPCODE_NPU_SET_OFM_STRIDE_X  = 20,
+    CMD1_OPCODE_NPU_SET_OFM_STRIDE_Y  = 21,
+    CMD1_OPCODE_NPU_SET_OFM_STRIDE_C  = 22,
+    CMD1_OPCODE_NPU_SET_WEIGHT_BASE   = 32,
+    CMD1_OPCODE_NPU_SET_WEIGHT_LENGTH = 33,
+    CMD1_OPCODE_NPU_SET_SCALE_BASE    = 34,
+    CMD1_OPCODE_NPU_SET_SCALE_LENGTH  = 35,
+    CMD1_OPCODE_NPU_SET_OFM_SCALE     = 36,
+    CMD1_OPCODE_NPU_SET_OPA_SCALE     = 37,
+    CMD1_OPCODE_NPU_SET_OPB_SCALE     = 38,
+    CMD1_OPCODE_NPU_SET_DMA0_SRC      = 48,
+    CMD1_OPCODE_NPU_SET_DMA0_DST      = 49,
+    CMD1_OPCODE_NPU_SET_DMA0_LEN      = 50,
+    CMD1_OPCODE_NPU_SET_IFM2_BASE0    = 128,
+    CMD1_OPCODE_NPU_SET_IFM2_BASE1    = 129,
+    CMD1_OPCODE_NPU_SET_IFM2_BASE2    = 130,
+    CMD1_OPCODE_NPU_SET_IFM2_BASE3    = 131,
+    CMD1_OPCODE_NPU_SET_IFM2_STRIDE_X = 132,
+    CMD1_OPCODE_NPU_SET_IFM2_STRIDE_Y = 133,
+    CMD1_OPCODE_NPU_SET_IFM2_STRIDE_C = 134,
+    CMD1_OPCODE_NPU_SET_USER_DEFINED0 = 160,
+    CMD1_OPCODE_NPU_SET_USER_DEFINED1 = 161,
+    CMD1_OPCODE_NPU_SET_USER_DEFINED2 = 162,
+    CMD1_OPCODE_NPU_SET_USER_DEFINED3 = 163,
+    CMD1_OPCODE_NPU_SET_USER_DEFINED4 = 164,
+    CMD1_OPCODE_NPU_SET_USER_DEFINED5 = 165,
+    CMD1_OPCODE_NPU_SET_USER_DEFINED6 = 166,
+    CMD1_OPCODE_NPU_SET_USER_DEFINED7 = 167,
+};
+
+enum cmd_ctrl
+{
+    CMD_CTRL_CMD0_CTRL = 0,
+    CMD_CTRL_CMD1_CTRL = 1,
+};
+
+enum custom_dma_cs
+{
+    CUSTOM_DMA_CS_DISABLE = 0,
+    CUSTOM_DMA_CS_ENABLE  = 1,
+};
+
+enum custom_dma
+{
+    CUSTOM_DMA_NOT_IMPLEMENTED = 0,
+    CUSTOM_DMA_IMPLEMENTED     = 1,
+};
+
+enum dma_fault_src
+{
+    DMA_FAULT_SRC_AXI_M0 = 0,
+    DMA_FAULT_SRC_AXI_M1 = 1,
+};
+
+enum dma_region_mode
+{
+    DMA_REGION_MODE_EXTERNAL = 0,
+    DMA_REGION_MODE_INTERNAL = 1,
+};
+
+enum dma_stride_mode
+{
+    DMA_STRIDE_MODE_D1 = 0,
+};
+
+enum elementwise_mode
+{
+    ELEMENTWISE_MODE_MUL   = 0,
+    ELEMENTWISE_MODE_ADD   = 1,
+    ELEMENTWISE_MODE_SUB   = 2,
+    ELEMENTWISE_MODE_MIN   = 3,
+    ELEMENTWISE_MODE_MAX   = 4,
+    ELEMENTWISE_MODE_LRELU = 5,
+    ELEMENTWISE_MODE_ABS   = 6,
+    ELEMENTWISE_MODE_CLZ   = 7,
+    ELEMENTWISE_MODE_SHR   = 8,
+    ELEMENTWISE_MODE_SHL   = 9,
+};
+
+enum functional_safety
+{
+    FUNCTIONAL_SAFETY_NOT_IMPLEMENTED = 0,
+    FUNCTIONAL_SAFETY_IMPLEMENTED     = 1,
+};
+
+enum ifm2_operand_order
+{
+    IFM2_OPERAND_ORDER_ORDER_B = 0,
+    IFM2_OPERAND_ORDER_ORDER_A = 1,
+};
+
+enum ifm_scale_mode
+{
+    IFM_SCALE_MODE_OPA_OPB_16 = 0,
+    IFM_SCALE_MODE_OPA_32     = 1,
+    IFM_SCALE_MODE_OPB_32     = 2,
+};
+
+enum ifm_upscale_mode
+{
+    IFM_UPSCALE_MODE_NONE    = 0,
+    IFM_UPSCALE_MODE_NEAREST = 1,
+    IFM_UPSCALE_MODE_ZEROS   = 2,
+};
+
+enum kernel_decomposition
+{
+    KERNEL_DECOMPOSITION_D8X8 = 0,
+    KERNEL_DECOMPOSITION_D4X4 = 1,
+};
+
+enum kernel_dilation
+{
+    KERNEL_DILATION_NONE = 0,
+    KERNEL_DILATION_X2   = 1,
+};
+
+enum max_beats
+{
+    MAX_BEATS_B64  = 0,
+    MAX_BEATS_B128 = 1,
+    MAX_BEATS_B256 = 2,
+};
+
+enum mem_attr
+{
+    MEM_ATTR_AXI0_OUTSTANDING_COUNTER0 = 0,
+    MEM_ATTR_AXI0_OUTSTANDING_COUNTER1 = 1,
+    MEM_ATTR_AXI1_OUTSTANDING_COUNTER2 = 2,
+    MEM_ATTR_AXI1_OUTSTANDING_COUNTER3 = 3,
+};
+
+enum ofm_scale_mode
+{
+    OFM_SCALE_MODE_PER_CHANNEL = 0,
+    OFM_SCALE_MODE_GLOBAL      = 1,
+};
+
+enum pmu_axi_channel
+{
+    PMU_AXI_CHANNEL_RD_CMD        = 0,
+    PMU_AXI_CHANNEL_RD_IFM        = 1,
+    PMU_AXI_CHANNEL_RD_WEIGHTS    = 2,
+    PMU_AXI_CHANNEL_RD_SCALE_BIAS = 3,
+    PMU_AXI_CHANNEL_RD_MEM2MEM    = 4,
+    PMU_AXI_CHANNEL_WR_OFM        = 8,
+    PMU_AXI_CHANNEL_WR_MEM2MEM    = 9,
+};
+
+enum pmu_event
+{
+    PMU_EVENT_NO_EVENT                     = 0,
+    PMU_EVENT_CYCLE                        = 17,
+    PMU_EVENT_NPU_IDLE                     = 32,
+    PMU_EVENT_CC_STALLED_ON_BLOCKDEP       = 33,
+    PMU_EVENT_CC_STALLED_ON_SHRAM_RECONFIG = 34,
+    PMU_EVENT_NPU_ACTIVE                   = 35,
+    PMU_EVENT_MAC_ACTIVE                   = 48,
+    PMU_EVENT_MAC_ACTIVE_8BIT              = 49,
+    PMU_EVENT_MAC_ACTIVE_16BIT             = 50,
+    PMU_EVENT_MAC_DPU_ACTIVE               = 51,
+    PMU_EVENT_MAC_STALLED_BY_WD_ACC        = 52,
+    PMU_EVENT_MAC_STALLED_BY_WD            = 53,
+    PMU_EVENT_MAC_STALLED_BY_ACC           = 54,
+    PMU_EVENT_MAC_STALLED_BY_IB            = 55,
+    PMU_EVENT_MAC_ACTIVE_32BIT             = 56,
+    PMU_EVENT_MAC_STALLED_BY_INT_W         = 57,
+    PMU_EVENT_MAC_STALLED_BY_INT_ACC       = 58,
+    PMU_EVENT_AO_ACTIVE                    = 64,
+    PMU_EVENT_AO_ACTIVE_8BIT               = 65,
+    PMU_EVENT_AO_ACTIVE_16BIT              = 66,
+    PMU_EVENT_AO_STALLED_BY_OFMP_OB        = 67,
+    PMU_EVENT_AO_STALLED_BY_OFMP           = 68,
+    PMU_EVENT_AO_STALLED_BY_OB             = 69,
+    PMU_EVENT_AO_STALLED_BY_ACC_IB         = 70,
+    PMU_EVENT_AO_STALLED_BY_ACC            = 71,
+    PMU_EVENT_AO_STALLED_BY_IB             = 72,
+    PMU_EVENT_WD_ACTIVE                    = 80,
+    PMU_EVENT_WD_STALLED                   = 81,
+    PMU_EVENT_WD_STALLED_BY_WS             = 82,
+    PMU_EVENT_WD_STALLED_BY_WD_BUF         = 83,
+    PMU_EVENT_WD_PARSE_ACTIVE              = 84,
+    PMU_EVENT_WD_PARSE_STALLED             = 85,
+    PMU_EVENT_WD_PARSE_STALLED_IN          = 86,
+    PMU_EVENT_WD_PARSE_STALLED_OUT         = 87,
+    PMU_EVENT_WD_TRANS_WS                  = 88,
+    PMU_EVENT_WD_TRANS_WB                  = 89,
+    PMU_EVENT_WD_TRANS_DW0                 = 90,
+    PMU_EVENT_WD_TRANS_DW1                 = 91,
+    PMU_EVENT_AXI0_RD_TRANS_ACCEPTED       = 128,
+    PMU_EVENT_AXI0_RD_TRANS_COMPLETED      = 129,
+    PMU_EVENT_AXI0_RD_DATA_BEAT_RECEIVED   = 130,
+    PMU_EVENT_AXI0_RD_TRAN_REQ_STALLED     = 131,
+    PMU_EVENT_AXI0_WR_TRANS_ACCEPTED       = 132,
+    PMU_EVENT_AXI0_WR_TRANS_COMPLETED_M    = 133,
+    PMU_EVENT_AXI0_WR_TRANS_COMPLETED_S    = 134,
+    PMU_EVENT_AXI0_WR_DATA_BEAT_WRITTEN    = 135,
+    PMU_EVENT_AXI0_WR_TRAN_REQ_STALLED     = 136,
+    PMU_EVENT_AXI0_WR_DATA_BEAT_STALLED    = 137,
+    PMU_EVENT_AXI0_ENABLED_CYCLES          = 140,
+    PMU_EVENT_AXI0_RD_STALL_LIMIT          = 142,
+    PMU_EVENT_AXI0_WR_STALL_LIMIT          = 143,
+    PMU_EVENT_AXI_LATENCY_ANY              = 160,
+    PMU_EVENT_AXI_LATENCY_32               = 161,
+    PMU_EVENT_AXI_LATENCY_64               = 162,
+    PMU_EVENT_AXI_LATENCY_128              = 163,
+    PMU_EVENT_AXI_LATENCY_256              = 164,
+    PMU_EVENT_AXI_LATENCY_512              = 165,
+    PMU_EVENT_AXI_LATENCY_1024             = 166,
+    PMU_EVENT_ECC_DMA                      = 176,
+    PMU_EVENT_ECC_SB0                      = 177,
+    PMU_EVENT_AXI1_RD_TRANS_ACCEPTED       = 384,
+    PMU_EVENT_AXI1_RD_TRANS_COMPLETED      = 385,
+    PMU_EVENT_AXI1_RD_DATA_BEAT_RECEIVED   = 386,
+    PMU_EVENT_AXI1_RD_TRAN_REQ_STALLED     = 387,
+    PMU_EVENT_AXI1_WR_TRANS_ACCEPTED       = 388,
+    PMU_EVENT_AXI1_WR_TRANS_COMPLETED_M    = 389,
+    PMU_EVENT_AXI1_WR_TRANS_COMPLETED_S    = 390,
+    PMU_EVENT_AXI1_WR_DATA_BEAT_WRITTEN    = 391,
+    PMU_EVENT_AXI1_WR_TRAN_REQ_STALLED     = 392,
+    PMU_EVENT_AXI1_WR_DATA_BEAT_STALLED    = 393,
+    PMU_EVENT_AXI1_ENABLED_CYCLES          = 396,
+    PMU_EVENT_AXI1_RD_STALL_LIMIT          = 398,
+    PMU_EVENT_AXI1_WR_STALL_LIMIT          = 399,
+    PMU_EVENT_ECC_SB1                      = 433,
+};
+
+enum pooling_mode
+{
+    POOLING_MODE_MAX        = 0,
+    POOLING_MODE_AVERAGE    = 1,
+    POOLING_MODE_REDUCE_SUM = 2,
+};
+
+enum privilege_level
+{
+    PRIVILEGE_LEVEL_USER       = 0,
+    PRIVILEGE_LEVEL_PRIVILEGED = 1,
+};
+
+enum round_mode
+{
+    ROUND_MODE_DBL      = 0,
+    ROUND_MODE_TRUNCATE = 1,
+    ROUND_MODE_NATURAL  = 2,
+};
+
+enum security_level
+{
+    SECURITY_LEVEL_SECURE     = 0,
+    SECURITY_LEVEL_NON_SECURE = 1,
+};
+
+enum state
+{
+    STATE_STOPPED = 0,
+    STATE_RUNNING = 1,
+};
+
+enum wd_core_slice_state
+{
+    WD_CORE_SLICE_STATE_HEADER  = 0,
+    WD_CORE_SLICE_STATE_PALETTE = 1,
+    WD_CORE_SLICE_STATE_WEIGHTS = 2,
+};
+
+enum wd_ctrl_state
+{
+    WD_CTRL_STATE_IDLE     = 0,
+    WD_CTRL_STATE_DRAIN    = 1,
+    WD_CTRL_STATE_OFD_INIT = 2,
+    WD_CTRL_STATE_OFD_RUN  = 3,
+};
+
+enum weight_order
+{
+    WEIGHT_ORDER_DEPTH_FIRST       = 0,
+    WEIGHT_ORDER_PART_KERNEL_FIRST = 1,
+};
+
+#endif
+
+#ifdef NPU_DISASSEMBLE
+
+static const char *acc_format_str[] = {
+    "ACC_FORMAT_I32",
+    "ACC_FORMAT_I40",
+    "ACC_FORMAT_F16",
+};
+
+static const char *activation_clip_range_str[] = {
+    "ACTIVATION_CLIP_RANGE_OFM_PRECISION",
+    "****",
+    "ACTIVATION_CLIP_RANGE_FORCE_UINT8",
+    "ACTIVATION_CLIP_RANGE_FORCE_INT8",
+    "****",
+    "ACTIVATION_CLIP_RANGE_FORCE_INT16",
+};
+
+static const char *activation_format_str[] = {
+    "ACTIVATION_FORMAT_NHWC",
+    "ACTIVATION_FORMAT_NHCWB16",
+};
+
+static const char *activation_function_str[] = {
+    "ACTIVATION_FUNCTION_RELU",
+    "****",
+    "****",
+    "ACTIVATION_FUNCTION_TANH",
+    "ACTIVATION_FUNCTION_SIGMOID",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "ACTIVATION_FUNCTION_TABLE_0",
+    "ACTIVATION_FUNCTION_TABLE_1",
+    "ACTIVATION_FUNCTION_TABLE_2",
+    "ACTIVATION_FUNCTION_TABLE_3",
+    "ACTIVATION_FUNCTION_TABLE_4",
+    "ACTIVATION_FUNCTION_TABLE_5",
+    "ACTIVATION_FUNCTION_TABLE_6",
+    "ACTIVATION_FUNCTION_TABLE_7",
+};
+
+static const char *activation_precision_str[] = {
+    "ACTIVATION_PRECISION_B8",
+    "ACTIVATION_PRECISION_B16",
+    "ACTIVATION_PRECISION_B32",
+    "ACTIVATION_PRECISION_B64",
+};
+
+static const char *activation_type_str[] = {
+    "ACTIVATION_TYPE_UNSIGNED",
+    "ACTIVATION_TYPE_SIGNED",
+};
+
+static const char *axi_mem_encoding_str[] = {
+    "AXI_MEM_ENCODING_DEVICE_NON_BUFFERABLE",
+    "AXI_MEM_ENCODING_DEVICE_BUFFERABLE",
+    "AXI_MEM_ENCODING_NORMAL_NON_CACHEABLE_NON_BUFFERABLE",
+    "AXI_MEM_ENCODING_NORMAL_NON_CACHEABLE_BUFFERABLE",
+    "AXI_MEM_ENCODING_WRITE_THROUGH_NO_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_THROUGH_READ_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_THROUGH_WRITE_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_THROUGH_READ_AND_WRITE_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_BACK_NO_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_BACK_READ_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_BACK_WRITE_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_BACK_READ_AND_WRITE_ALLOCATE",
+};
+
+static const char *broadcast_mode_str[] = {
+    "BROADCAST_MODE_DISABLE",
+    "BROADCAST_MODE_ENABLE",
+};
+
+static const char *cmd0_opcode_str[] = {
+    "CMD0_OPCODE_NPU_OP_STOP",
+    "CMD0_OPCODE_NPU_OP_IRQ",
+    "CMD0_OPCODE_NPU_OP_CONV",
+    "CMD0_OPCODE_NPU_OP_DEPTHWISE",
+    "****",
+    "CMD0_OPCODE_NPU_OP_POOL",
+    "CMD0_OPCODE_NPU_OP_ELEMENTWISE",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_OP_DMA_START",
+    "CMD0_OPCODE_NPU_OP_DMA_WAIT",
+    "CMD0_OPCODE_NPU_OP_KERNEL_WAIT",
+    "CMD0_OPCODE_NPU_OP_PMU_MASK",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM_PAD_TOP",
+    "CMD0_OPCODE_NPU_SET_IFM_PAD_LEFT",
+    "CMD0_OPCODE_NPU_SET_IFM_PAD_RIGHT",
+    "CMD0_OPCODE_NPU_SET_IFM_PAD_BOTTOM",
+    "CMD0_OPCODE_NPU_SET_IFM_DEPTH_M1",
+    "CMD0_OPCODE_NPU_SET_IFM_PRECISION",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM_UPSCALE",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM_ZERO_POINT",
+    "CMD0_OPCODE_NPU_SET_IFM_WIDTH0_M1",
+    "CMD0_OPCODE_NPU_SET_IFM_HEIGHT0_M1",
+    "CMD0_OPCODE_NPU_SET_IFM_HEIGHT1_M1",
+    "CMD0_OPCODE_NPU_SET_IFM_IB_END",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM_REGION",
+    "****",
+    "CMD0_OPCODE_NPU_SET_OFM_WIDTH_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_HEIGHT_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_DEPTH_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_PRECISION",
+    "CMD0_OPCODE_NPU_SET_OFM_BLK_WIDTH_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_BLK_HEIGHT_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_BLK_DEPTH_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_ZERO_POINT",
+    "****",
+    "CMD0_OPCODE_NPU_SET_OFM_WIDTH0_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_HEIGHT0_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_HEIGHT1_M1",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_OFM_REGION",
+    "CMD0_OPCODE_NPU_SET_KERNEL_WIDTH_M1",
+    "CMD0_OPCODE_NPU_SET_KERNEL_HEIGHT_M1",
+    "CMD0_OPCODE_NPU_SET_KERNEL_STRIDE",
+    "****",
+    "CMD0_OPCODE_NPU_SET_ACC_FORMAT",
+    "CMD0_OPCODE_NPU_SET_ACTIVATION",
+    "CMD0_OPCODE_NPU_SET_ACTIVATION_MIN",
+    "CMD0_OPCODE_NPU_SET_ACTIVATION_MAX",
+    "CMD0_OPCODE_NPU_SET_WEIGHT_REGION",
+    "CMD0_OPCODE_NPU_SET_SCALE_REGION",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_AB_START",
+    "****",
+    "CMD0_OPCODE_NPU_SET_BLOCKDEP",
+    "CMD0_OPCODE_NPU_SET_DMA0_SRC_REGION",
+    "CMD0_OPCODE_NPU_SET_DMA0_DST_REGION",
+    "CMD0_OPCODE_NPU_SET_DMA0_SIZE0",
+    "CMD0_OPCODE_NPU_SET_DMA0_SIZE1",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM2_BROADCAST",
+    "CMD0_OPCODE_NPU_SET_IFM2_SCALAR",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM2_PRECISION",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM2_ZERO_POINT",
+    "CMD0_OPCODE_NPU_SET_IFM2_WIDTH0_M1",
+    "CMD0_OPCODE_NPU_SET_IFM2_HEIGHT0_M1",
+    "CMD0_OPCODE_NPU_SET_IFM2_HEIGHT1_M1",
+    "CMD0_OPCODE_NPU_SET_IFM2_IB_START",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM2_REGION",
+};
+
+static const char *cmd1_opcode_str[] = {
+    "CMD1_OPCODE_NPU_SET_IFM_BASE0",
+    "CMD1_OPCODE_NPU_SET_IFM_BASE1",
+    "CMD1_OPCODE_NPU_SET_IFM_BASE2",
+    "CMD1_OPCODE_NPU_SET_IFM_BASE3",
+    "CMD1_OPCODE_NPU_SET_IFM_STRIDE_X",
+    "CMD1_OPCODE_NPU_SET_IFM_STRIDE_Y",
+    "CMD1_OPCODE_NPU_SET_IFM_STRIDE_C",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_OFM_BASE0",
+    "CMD1_OPCODE_NPU_SET_OFM_BASE1",
+    "CMD1_OPCODE_NPU_SET_OFM_BASE2",
+    "CMD1_OPCODE_NPU_SET_OFM_BASE3",
+    "CMD1_OPCODE_NPU_SET_OFM_STRIDE_X",
+    "CMD1_OPCODE_NPU_SET_OFM_STRIDE_Y",
+    "CMD1_OPCODE_NPU_SET_OFM_STRIDE_C",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_WEIGHT_BASE",
+    "CMD1_OPCODE_NPU_SET_WEIGHT_LENGTH",
+    "CMD1_OPCODE_NPU_SET_SCALE_BASE",
+    "CMD1_OPCODE_NPU_SET_SCALE_LENGTH",
+    "CMD1_OPCODE_NPU_SET_OFM_SCALE",
+    "CMD1_OPCODE_NPU_SET_OPA_SCALE",
+    "CMD1_OPCODE_NPU_SET_OPB_SCALE",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_DMA0_SRC",
+    "CMD1_OPCODE_NPU_SET_DMA0_DST",
+    "CMD1_OPCODE_NPU_SET_DMA0_LEN",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_IFM2_BASE0",
+    "CMD1_OPCODE_NPU_SET_IFM2_BASE1",
+    "CMD1_OPCODE_NPU_SET_IFM2_BASE2",
+    "CMD1_OPCODE_NPU_SET_IFM2_BASE3",
+    "CMD1_OPCODE_NPU_SET_IFM2_STRIDE_X",
+    "CMD1_OPCODE_NPU_SET_IFM2_STRIDE_Y",
+    "CMD1_OPCODE_NPU_SET_IFM2_STRIDE_C",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_USER_DEFINED0",
+    "CMD1_OPCODE_NPU_SET_USER_DEFINED1",
+    "CMD1_OPCODE_NPU_SET_USER_DEFINED2",
+    "CMD1_OPCODE_NPU_SET_USER_DEFINED3",
+    "CMD1_OPCODE_NPU_SET_USER_DEFINED4",
+    "CMD1_OPCODE_NPU_SET_USER_DEFINED5",
+    "CMD1_OPCODE_NPU_SET_USER_DEFINED6",
+    "CMD1_OPCODE_NPU_SET_USER_DEFINED7",
+};
+
+static const char *cmd_ctrl_str[] = {
+    "CMD_CTRL_CMD0_CTRL",
+    "CMD_CTRL_CMD1_CTRL",
+};
+
+static const char *custom_dma_cs_str[] = {
+    "CUSTOM_DMA_CS_DISABLE",
+    "CUSTOM_DMA_CS_ENABLE",
+};
+
+static const char *custom_dma_str[] = {
+    "CUSTOM_DMA_NOT_IMPLEMENTED",
+    "CUSTOM_DMA_IMPLEMENTED",
+};
+
+static const char *dma_fault_src_str[] = {
+    "DMA_FAULT_SRC_AXI_M0",
+    "DMA_FAULT_SRC_AXI_M1",
+};
+
+static const char *dma_region_mode_str[] = {
+    "DMA_REGION_MODE_EXTERNAL",
+    "DMA_REGION_MODE_INTERNAL",
+};
+
+static const char *dma_stride_mode_str[] = {
+    "DMA_STRIDE_MODE_D1",
+};
+
+static const char *elementwise_mode_str[] = {
+    "ELEMENTWISE_MODE_MUL",
+    "ELEMENTWISE_MODE_ADD",
+    "ELEMENTWISE_MODE_SUB",
+    "ELEMENTWISE_MODE_MIN",
+    "ELEMENTWISE_MODE_MAX",
+    "ELEMENTWISE_MODE_LRELU",
+    "ELEMENTWISE_MODE_ABS",
+    "ELEMENTWISE_MODE_CLZ",
+    "ELEMENTWISE_MODE_SHR",
+    "ELEMENTWISE_MODE_SHL",
+};
+
+static const char *functional_safety_str[] = {
+    "FUNCTIONAL_SAFETY_NOT_IMPLEMENTED",
+    "FUNCTIONAL_SAFETY_IMPLEMENTED",
+};
+
+static const char *ifm2_operand_order_str[] = {
+    "IFM2_OPERAND_ORDER_ORDER_B",
+    "IFM2_OPERAND_ORDER_ORDER_A",
+};
+
+static const char *ifm_scale_mode_str[] = {
+    "IFM_SCALE_MODE_OPA_OPB_16",
+    "IFM_SCALE_MODE_OPA_32",
+    "IFM_SCALE_MODE_OPB_32",
+};
+
+static const char *ifm_upscale_mode_str[] = {
+    "IFM_UPSCALE_MODE_NONE",
+    "IFM_UPSCALE_MODE_NEAREST",
+    "IFM_UPSCALE_MODE_ZEROS",
+};
+
+static const char *kernel_decomposition_str[] = {
+    "KERNEL_DECOMPOSITION_D8X8",
+    "KERNEL_DECOMPOSITION_D4X4",
+};
+
+static const char *kernel_dilation_str[] = {
+    "KERNEL_DILATION_NONE",
+    "KERNEL_DILATION_X2",
+};
+
+static const char *max_beats_str[] = {
+    "MAX_BEATS_B64",
+    "MAX_BEATS_B128",
+    "MAX_BEATS_B256",
+};
+
+static const char *mem_attr_str[] = {
+    "MEM_ATTR_AXI0_OUTSTANDING_COUNTER0",
+    "MEM_ATTR_AXI0_OUTSTANDING_COUNTER1",
+    "MEM_ATTR_AXI1_OUTSTANDING_COUNTER2",
+    "MEM_ATTR_AXI1_OUTSTANDING_COUNTER3",
+};
+
+static const char *ofm_scale_mode_str[] = {
+    "OFM_SCALE_MODE_PER_CHANNEL",
+    "OFM_SCALE_MODE_GLOBAL",
+};
+
+static const char *pmu_axi_channel_str[] = {
+    "PMU_AXI_CHANNEL_RD_CMD",
+    "PMU_AXI_CHANNEL_RD_IFM",
+    "PMU_AXI_CHANNEL_RD_WEIGHTS",
+    "PMU_AXI_CHANNEL_RD_SCALE_BIAS",
+    "PMU_AXI_CHANNEL_RD_MEM2MEM",
+    "****",
+    "****",
+    "****",
+    "PMU_AXI_CHANNEL_WR_OFM",
+    "PMU_AXI_CHANNEL_WR_MEM2MEM",
+};
+
+static const char *pmu_event_str[] = {
+    "PMU_EVENT_NO_EVENT",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_CYCLE",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_NPU_IDLE",
+    "PMU_EVENT_CC_STALLED_ON_BLOCKDEP",
+    "PMU_EVENT_CC_STALLED_ON_SHRAM_RECONFIG",
+    "PMU_EVENT_NPU_ACTIVE",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_MAC_ACTIVE",
+    "PMU_EVENT_MAC_ACTIVE_8BIT",
+    "PMU_EVENT_MAC_ACTIVE_16BIT",
+    "PMU_EVENT_MAC_DPU_ACTIVE",
+    "PMU_EVENT_MAC_STALLED_BY_WD_ACC",
+    "PMU_EVENT_MAC_STALLED_BY_WD",
+    "PMU_EVENT_MAC_STALLED_BY_ACC",
+    "PMU_EVENT_MAC_STALLED_BY_IB",
+    "PMU_EVENT_MAC_ACTIVE_32BIT",
+    "PMU_EVENT_MAC_STALLED_BY_INT_W",
+    "PMU_EVENT_MAC_STALLED_BY_INT_ACC",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_AO_ACTIVE",
+    "PMU_EVENT_AO_ACTIVE_8BIT",
+    "PMU_EVENT_AO_ACTIVE_16BIT",
+    "PMU_EVENT_AO_STALLED_BY_OFMP_OB",
+    "PMU_EVENT_AO_STALLED_BY_OFMP",
+    "PMU_EVENT_AO_STALLED_BY_OB",
+    "PMU_EVENT_AO_STALLED_BY_ACC_IB",
+    "PMU_EVENT_AO_STALLED_BY_ACC",
+    "PMU_EVENT_AO_STALLED_BY_IB",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_WD_ACTIVE",
+    "PMU_EVENT_WD_STALLED",
+    "PMU_EVENT_WD_STALLED_BY_WS",
+    "PMU_EVENT_WD_STALLED_BY_WD_BUF",
+    "PMU_EVENT_WD_PARSE_ACTIVE",
+    "PMU_EVENT_WD_PARSE_STALLED",
+    "PMU_EVENT_WD_PARSE_STALLED_IN",
+    "PMU_EVENT_WD_PARSE_STALLED_OUT",
+    "PMU_EVENT_WD_TRANS_WS",
+    "PMU_EVENT_WD_TRANS_WB",
+    "PMU_EVENT_WD_TRANS_DW0",
+    "PMU_EVENT_WD_TRANS_DW1",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_AXI0_RD_TRANS_ACCEPTED",
+    "PMU_EVENT_AXI0_RD_TRANS_COMPLETED",
+    "PMU_EVENT_AXI0_RD_DATA_BEAT_RECEIVED",
+    "PMU_EVENT_AXI0_RD_TRAN_REQ_STALLED",
+    "PMU_EVENT_AXI0_WR_TRANS_ACCEPTED",
+    "PMU_EVENT_AXI0_WR_TRANS_COMPLETED_M",
+    "PMU_EVENT_AXI0_WR_TRANS_COMPLETED_S",
+    "PMU_EVENT_AXI0_WR_DATA_BEAT_WRITTEN",
+    "PMU_EVENT_AXI0_WR_TRAN_REQ_STALLED",
+    "PMU_EVENT_AXI0_WR_DATA_BEAT_STALLED",
+    "****",
+    "****",
+    "PMU_EVENT_AXI0_ENABLED_CYCLES",
+    "****",
+    "PMU_EVENT_AXI0_RD_STALL_LIMIT",
+    "PMU_EVENT_AXI0_WR_STALL_LIMIT",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_AXI_LATENCY_ANY",
+    "PMU_EVENT_AXI_LATENCY_32",
+    "PMU_EVENT_AXI_LATENCY_64",
+    "PMU_EVENT_AXI_LATENCY_128",
+    "PMU_EVENT_AXI_LATENCY_256",
+    "PMU_EVENT_AXI_LATENCY_512",
+    "PMU_EVENT_AXI_LATENCY_1024",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_ECC_DMA",
+    "PMU_EVENT_ECC_SB0",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_AXI1_RD_TRANS_ACCEPTED",
+    "PMU_EVENT_AXI1_RD_TRANS_COMPLETED",
+    "PMU_EVENT_AXI1_RD_DATA_BEAT_RECEIVED",
+    "PMU_EVENT_AXI1_RD_TRAN_REQ_STALLED",
+    "PMU_EVENT_AXI1_WR_TRANS_ACCEPTED",
+    "PMU_EVENT_AXI1_WR_TRANS_COMPLETED_M",
+    "PMU_EVENT_AXI1_WR_TRANS_COMPLETED_S",
+    "PMU_EVENT_AXI1_WR_DATA_BEAT_WRITTEN",
+    "PMU_EVENT_AXI1_WR_TRAN_REQ_STALLED",
+    "PMU_EVENT_AXI1_WR_DATA_BEAT_STALLED",
+    "****",
+    "****",
+    "PMU_EVENT_AXI1_ENABLED_CYCLES",
+    "****",
+    "PMU_EVENT_AXI1_RD_STALL_LIMIT",
+    "PMU_EVENT_AXI1_WR_STALL_LIMIT",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_ECC_SB1",
+};
+
+static const char *pooling_mode_str[] = {
+    "POOLING_MODE_MAX",
+    "POOLING_MODE_AVERAGE",
+    "POOLING_MODE_REDUCE_SUM",
+};
+
+static const char *privilege_level_str[] = {
+    "PRIVILEGE_LEVEL_USER",
+    "PRIVILEGE_LEVEL_PRIVILEGED",
+};
+
+static const char *round_mode_str[] = {
+    "ROUND_MODE_DBL",
+    "ROUND_MODE_TRUNCATE",
+    "ROUND_MODE_NATURAL",
+};
+
+static const char *security_level_str[] = {
+    "SECURITY_LEVEL_SECURE",
+    "SECURITY_LEVEL_NON_SECURE",
+};
+
+static const char *state_str[] = {
+    "STATE_STOPPED",
+    "STATE_RUNNING",
+};
+
+static const char *wd_core_slice_state_str[] = {
+    "WD_CORE_SLICE_STATE_HEADER",
+    "WD_CORE_SLICE_STATE_PALETTE",
+    "WD_CORE_SLICE_STATE_WEIGHTS",
+};
+
+static const char *wd_ctrl_state_str[] = {
+    "WD_CTRL_STATE_IDLE",
+    "WD_CTRL_STATE_DRAIN",
+    "WD_CTRL_STATE_OFD_INIT",
+    "WD_CTRL_STATE_OFD_RUN",
+};
+
+static const char *weight_order_str[] = {
+    "WEIGHT_ORDER_DEPTH_FIRST",
+    "WEIGHT_ORDER_PART_KERNEL_FIRST",
+};
+
+#endif
+
+// Register type structs
+// id_r - ID register
+struct id_r
+{
+#ifndef __cplusplus
     union
     {
         struct
@@ -1276,7 +2492,7 @@ struct id_r
             uint32_t version_status : 4; // This is the version of the product
             uint32_t version_minor : 4;  // This is the n for the P part of an RnPn release number
             uint32_t version_major : 4;  // This is the n for the R part of an RnPn release number
-            uint32_t product_major : 4;  // This is the X part of the ML00X product number
+            uint32_t product_major : 4;  // Product major ID number (unique per base product)
             uint32_t arch_patch_rev : 4; // This is the patch number of the architecture version a.b
             uint32_t
                 arch_minor_rev : 8; // This is the minor architecture version number, b in the architecture version a.b
@@ -1285,31 +2501,28 @@ struct id_r
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR id_r() :
-        version_status(static_cast<uint32_t>(1)), version_minor(static_cast<uint32_t>(0x0)),
-        version_major(static_cast<uint32_t>(0x1)), product_major(static_cast<uint32_t>(4)),
-        arch_patch_rev(static_cast<uint32_t>(6)), arch_minor_rev(static_cast<uint32_t>(0)),
-        arch_major_rev(static_cast<uint32_t>(1))
-    {
-    }
-    CONSTEXPR id_r(uint32_t init) : word(init) {}
+    CONSTEXPR id_r() : word0(269500929) {}
+    CONSTEXPR id_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     id_r copy() volatile
     {
@@ -1317,118 +2530,151 @@ struct id_r
     }
     CONSTEXPR uint32_t get_version_status() const
     {
-        uint32_t value = static_cast<uint32_t>(version_status);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 0);
         return value;
     }
     uint32_t get_version_status() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(version_status);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 0);
         return value;
     }
     CONSTEXPR id_r &set_version_status(uint32_t value)
     {
-        version_status = ((1u << 4) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 0) & word0) | ((((1U << 4) - 1) & value) << 0);
+        return *this;
+    }
+    volatile id_r &set_version_status(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 0) & word0) | ((((1U << 4) - 1) & value) << 0);
         return *this;
     }
     CONSTEXPR uint32_t get_version_minor() const
     {
-        uint32_t value = static_cast<uint32_t>(version_minor);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 4);
         return value;
     }
     uint32_t get_version_minor() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(version_minor);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 4);
         return value;
     }
     CONSTEXPR id_r &set_version_minor(uint32_t value)
     {
-        version_minor = ((1u << 4) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & value) << 4);
+        return *this;
+    }
+    volatile id_r &set_version_minor(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & value) << 4);
         return *this;
     }
     CONSTEXPR uint32_t get_version_major() const
     {
-        uint32_t value = static_cast<uint32_t>(version_major);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 8);
         return value;
     }
     uint32_t get_version_major() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(version_major);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 8);
         return value;
     }
     CONSTEXPR id_r &set_version_major(uint32_t value)
     {
-        version_major = ((1u << 4) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 8) & word0) | ((((1U << 4) - 1) & value) << 8);
         return *this;
     }
-    CONSTEXPR uint32_t get_product_major() const
+    volatile id_r &set_version_major(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(product_major);
+        word0 = (((~((1U << 4) - 1)) << 8) & word0) | ((((1U << 4) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_product_major() const
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 12);
         return value;
     }
     uint32_t get_product_major() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(product_major);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 12);
         return value;
     }
     CONSTEXPR id_r &set_product_major(uint32_t value)
     {
-        product_major = ((1u << 4) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 12) & word0) | ((((1U << 4) - 1) & value) << 12);
+        return *this;
+    }
+    volatile id_r &set_product_major(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 12) & word0) | ((((1U << 4) - 1) & value) << 12);
         return *this;
     }
     CONSTEXPR uint32_t get_arch_patch_rev() const
     {
-        uint32_t value = static_cast<uint32_t>(arch_patch_rev);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 16);
         return value;
     }
     uint32_t get_arch_patch_rev() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(arch_patch_rev);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 16);
         return value;
     }
     CONSTEXPR id_r &set_arch_patch_rev(uint32_t value)
     {
-        arch_patch_rev = ((1u << 4) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 16) & word0) | ((((1U << 4) - 1) & value) << 16);
+        return *this;
+    }
+    volatile id_r &set_arch_patch_rev(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 16) & word0) | ((((1U << 4) - 1) & value) << 16);
         return *this;
     }
     CONSTEXPR uint32_t get_arch_minor_rev() const
     {
-        uint32_t value = static_cast<uint32_t>(arch_minor_rev);
+        uint32_t value = ((1U << 8) - 1) & (word0 >> 20);
         return value;
     }
     uint32_t get_arch_minor_rev() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(arch_minor_rev);
+        uint32_t value = ((1U << 8) - 1) & (word0 >> 20);
         return value;
     }
     CONSTEXPR id_r &set_arch_minor_rev(uint32_t value)
     {
-        arch_minor_rev = ((1u << 8) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 8) - 1)) << 20) & word0) | ((((1U << 8) - 1) & value) << 20);
+        return *this;
+    }
+    volatile id_r &set_arch_minor_rev(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 8) - 1)) << 20) & word0) | ((((1U << 8) - 1) & value) << 20);
         return *this;
     }
     CONSTEXPR uint32_t get_arch_major_rev() const
     {
-        uint32_t value = static_cast<uint32_t>(arch_major_rev);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 28);
         return value;
     }
     uint32_t get_arch_major_rev() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(arch_major_rev);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 28);
         return value;
     }
     CONSTEXPR id_r &set_arch_major_rev(uint32_t value)
     {
-        arch_major_rev = ((1u << 4) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 28) & word0) | ((((1U << 4) - 1) & value) << 28);
+        return *this;
+    }
+    volatile id_r &set_arch_major_rev(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 28) & word0) | ((((1U << 4) - 1) & value) << 28);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
 // status_r - Register describes the current operating status of the NPU
 struct status_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
@@ -1452,235 +2698,287 @@ struct status_r
             uint32_t ecc_fault : 1; // ECC state for internal RAMs: 0=no fault 1=ECC fault signalled. Can only be
                                     // cleared by reset
             uint32_t reserved0 : 2;
-            uint32_t faulting_interface : 1; // Faulting interface on bus abort. 0=AXI-M0 1=AXI-M1
+            uint32_t faulting_interface : 1; // Faulting interface on bus abort
             uint32_t faulting_channel : 4;  // Faulting channel on a bus abort. Read: 0=Cmd 1=IFM 2=Weights 3=Scale+Bias
                                             // 4=Mem2Mem; Write: 8=OFM 9=Mem2Mem
             uint32_t irq_history_mask : 16; // IRQ History mask
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR status_r() :
-        state(static_cast<uint32_t>(::state::STOPPED)), irq_raised(static_cast<uint32_t>(0x0)),
-        bus_status(static_cast<uint32_t>(0x0)), reset_status(static_cast<uint32_t>(0x1)),
-        cmd_parse_error(static_cast<uint32_t>(0x0)), cmd_end_reached(static_cast<uint32_t>(0x0)),
-        pmu_irq_raised(static_cast<uint32_t>(0x0)), wd_fault(static_cast<uint32_t>(0x0)),
-        ecc_fault(static_cast<uint32_t>(0x0)), reserved0(static_cast<uint32_t>(0)),
-        faulting_interface(static_cast<uint32_t>(0x0)), faulting_channel(static_cast<uint32_t>(0x0)),
-        irq_history_mask(static_cast<uint32_t>(0x0))
-    {
-    }
-    CONSTEXPR status_r(uint32_t init) : word(init) {}
+    CONSTEXPR status_r() : word0(8) {}
+    CONSTEXPR status_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     status_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR ::state get_state() const
+    CONSTEXPR NPU_NAMESPACE::state get_state() const
     {
-        ::state value = static_cast<::state>(state);
+        NPU_NAMESPACE::state value = static_cast<NPU_NAMESPACE::state>(((1U << 1) - 1) & (word0 >> 0));
         return value;
     }
-    ::state get_state() const volatile
+    NPU_NAMESPACE::state get_state() const volatile
     {
-        ::state value = static_cast<::state>(state);
+        NPU_NAMESPACE::state value = static_cast<NPU_NAMESPACE::state>(((1U << 1) - 1) & (word0 >> 0));
         return value;
     }
-    CONSTEXPR status_r &set_state(::state value)
+    CONSTEXPR status_r &set_state(NPU_NAMESPACE::state value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile status_r &set_state(NPU_NAMESPACE::state value) volatile
     {
-        state = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
         return *this;
     }
     CONSTEXPR uint32_t get_irq_raised() const
     {
-        uint32_t value = static_cast<uint32_t>(irq_raised);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
     uint32_t get_irq_raised() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(irq_raised);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
     CONSTEXPR status_r &set_irq_raised(uint32_t value)
     {
-        irq_raised = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile status_r &set_irq_raised(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
         return *this;
     }
     CONSTEXPR uint32_t get_bus_status() const
     {
-        uint32_t value = static_cast<uint32_t>(bus_status);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
     uint32_t get_bus_status() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(bus_status);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
     CONSTEXPR status_r &set_bus_status(uint32_t value)
     {
-        bus_status = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile status_r &set_bus_status(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
     CONSTEXPR uint32_t get_reset_status() const
     {
-        uint32_t value = static_cast<uint32_t>(reset_status);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
     uint32_t get_reset_status() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(reset_status);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
     CONSTEXPR status_r &set_reset_status(uint32_t value)
     {
-        reset_status = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile status_r &set_reset_status(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
         return *this;
     }
     CONSTEXPR uint32_t get_cmd_parse_error() const
     {
-        uint32_t value = static_cast<uint32_t>(cmd_parse_error);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
         return value;
     }
     uint32_t get_cmd_parse_error() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(cmd_parse_error);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
         return value;
     }
     CONSTEXPR status_r &set_cmd_parse_error(uint32_t value)
     {
-        cmd_parse_error = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    volatile status_r &set_cmd_parse_error(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
         return *this;
     }
     CONSTEXPR uint32_t get_cmd_end_reached() const
     {
-        uint32_t value = static_cast<uint32_t>(cmd_end_reached);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
         return value;
     }
     uint32_t get_cmd_end_reached() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(cmd_end_reached);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
         return value;
     }
     CONSTEXPR status_r &set_cmd_end_reached(uint32_t value)
     {
-        cmd_end_reached = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    volatile status_r &set_cmd_end_reached(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
         return *this;
     }
     CONSTEXPR uint32_t get_pmu_irq_raised() const
     {
-        uint32_t value = static_cast<uint32_t>(pmu_irq_raised);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
         return value;
     }
     uint32_t get_pmu_irq_raised() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(pmu_irq_raised);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
         return value;
     }
     CONSTEXPR status_r &set_pmu_irq_raised(uint32_t value)
     {
-        pmu_irq_raised = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    volatile status_r &set_pmu_irq_raised(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
         return *this;
     }
     CONSTEXPR uint32_t get_wd_fault() const
     {
-        uint32_t value = static_cast<uint32_t>(wd_fault);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
         return value;
     }
     uint32_t get_wd_fault() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(wd_fault);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
         return value;
     }
     CONSTEXPR status_r &set_wd_fault(uint32_t value)
     {
-        wd_fault = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    volatile status_r &set_wd_fault(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
         return *this;
     }
     CONSTEXPR uint32_t get_ecc_fault() const
     {
-        uint32_t value = static_cast<uint32_t>(ecc_fault);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
         return value;
     }
     uint32_t get_ecc_fault() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(ecc_fault);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
         return value;
     }
     CONSTEXPR status_r &set_ecc_fault(uint32_t value)
     {
-        ecc_fault = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+    volatile status_r &set_ecc_fault(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
         return *this;
     }
-    CONSTEXPR uint32_t get_faulting_interface() const
+    CONSTEXPR NPU_NAMESPACE::dma_fault_src get_faulting_interface() const
     {
-        uint32_t value = static_cast<uint32_t>(faulting_interface);
+        NPU_NAMESPACE::dma_fault_src value = static_cast<NPU_NAMESPACE::dma_fault_src>(((1U << 1) - 1) & (word0 >> 11));
         return value;
     }
-    uint32_t get_faulting_interface() const volatile
+    NPU_NAMESPACE::dma_fault_src get_faulting_interface() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(faulting_interface);
+        NPU_NAMESPACE::dma_fault_src value = static_cast<NPU_NAMESPACE::dma_fault_src>(((1U << 1) - 1) & (word0 >> 11));
         return value;
     }
-    CONSTEXPR status_r &set_faulting_interface(uint32_t value)
+    CONSTEXPR status_r &set_faulting_interface(NPU_NAMESPACE::dma_fault_src value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 11);
+        return *this;
+    }
+    volatile status_r &set_faulting_interface(NPU_NAMESPACE::dma_fault_src value) volatile
     {
-        faulting_interface = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 11);
         return *this;
     }
     CONSTEXPR uint32_t get_faulting_channel() const
     {
-        uint32_t value = static_cast<uint32_t>(faulting_channel);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 12);
         return value;
     }
     uint32_t get_faulting_channel() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(faulting_channel);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 12);
         return value;
     }
     CONSTEXPR status_r &set_faulting_channel(uint32_t value)
     {
-        faulting_channel = ((1u << 4) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 12) & word0) | ((((1U << 4) - 1) & value) << 12);
+        return *this;
+    }
+    volatile status_r &set_faulting_channel(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 12) & word0) | ((((1U << 4) - 1) & value) << 12);
         return *this;
     }
     CONSTEXPR uint32_t get_irq_history_mask() const
     {
-        uint32_t value = static_cast<uint32_t>(irq_history_mask);
+        uint32_t value = ((1U << 16) - 1) & (word0 >> 16);
         return value;
     }
     uint32_t get_irq_history_mask() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(irq_history_mask);
+        uint32_t value = ((1U << 16) - 1) & (word0 >> 16);
         return value;
     }
     CONSTEXPR status_r &set_irq_history_mask(uint32_t value)
     {
-        irq_history_mask = ((1u << 16) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 16) - 1)) << 16) & word0) | ((((1U << 16) - 1) & value) << 16);
+        return *this;
+    }
+    volatile status_r &set_irq_history_mask(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 16) - 1)) << 16) & word0) | ((((1U << 16) - 1) & value) << 16);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
 // cmd_r - Command register, reads as last written command
 struct cmd_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
@@ -1689,7 +2987,7 @@ struct cmd_r
                                                       // no effect
             uint32_t clear_irq : 1; // Write 1 to clear the IRQ status in the STATUS register. Writing 0 has no effect
             uint32_t clock_q_enable : 1; // Write 1 to this bit to enable clock off using clock q-interface and enable
-                                         // the master clock gate
+                                         // the requester clock gate
             uint32_t power_q_enable : 1; // Write 1 to this bit to enable power off using power q-interface
             uint32_t
                 stop_request : 1; // Write 1 to this bit to request STOP after completing any already-started commands
@@ -1698,31 +2996,28 @@ struct cmd_r
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR cmd_r() :
-        transition_to_running_state(static_cast<uint32_t>(0x0)), clear_irq(static_cast<uint32_t>(0x0)),
-        clock_q_enable(static_cast<uint32_t>(0x1)), power_q_enable(static_cast<uint32_t>(0x1)),
-        stop_request(static_cast<uint32_t>(0x0)), reserved0(static_cast<uint32_t>(0)),
-        clear_irq_history(static_cast<uint32_t>(0x0))
-    {
-    }
-    CONSTEXPR cmd_r(uint32_t init) : word(init) {}
+    CONSTEXPR cmd_r() : word0(12) {}
+    CONSTEXPR cmd_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     cmd_r copy() volatile
     {
@@ -1730,103 +3025,131 @@ struct cmd_r
     }
     CONSTEXPR uint32_t get_transition_to_running_state() const
     {
-        uint32_t value = static_cast<uint32_t>(transition_to_running_state);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
     uint32_t get_transition_to_running_state() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(transition_to_running_state);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
     CONSTEXPR cmd_r &set_transition_to_running_state(uint32_t value)
     {
-        transition_to_running_state = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile cmd_r &set_transition_to_running_state(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
         return *this;
     }
     CONSTEXPR uint32_t get_clear_irq() const
     {
-        uint32_t value = static_cast<uint32_t>(clear_irq);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
     uint32_t get_clear_irq() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(clear_irq);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
     CONSTEXPR cmd_r &set_clear_irq(uint32_t value)
     {
-        clear_irq = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile cmd_r &set_clear_irq(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
         return *this;
     }
     CONSTEXPR uint32_t get_clock_q_enable() const
     {
-        uint32_t value = static_cast<uint32_t>(clock_q_enable);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
     uint32_t get_clock_q_enable() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(clock_q_enable);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
     CONSTEXPR cmd_r &set_clock_q_enable(uint32_t value)
     {
-        clock_q_enable = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile cmd_r &set_clock_q_enable(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
     CONSTEXPR uint32_t get_power_q_enable() const
     {
-        uint32_t value = static_cast<uint32_t>(power_q_enable);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
     uint32_t get_power_q_enable() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(power_q_enable);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
     CONSTEXPR cmd_r &set_power_q_enable(uint32_t value)
     {
-        power_q_enable = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile cmd_r &set_power_q_enable(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
         return *this;
     }
     CONSTEXPR uint32_t get_stop_request() const
     {
-        uint32_t value = static_cast<uint32_t>(stop_request);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
         return value;
     }
     uint32_t get_stop_request() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(stop_request);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
         return value;
     }
     CONSTEXPR cmd_r &set_stop_request(uint32_t value)
     {
-        stop_request = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    volatile cmd_r &set_stop_request(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
         return *this;
     }
     CONSTEXPR uint32_t get_clear_irq_history() const
     {
-        uint32_t value = static_cast<uint32_t>(clear_irq_history);
+        uint32_t value = ((1U << 16) - 1) & (word0 >> 16);
         return value;
     }
     uint32_t get_clear_irq_history() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(clear_irq_history);
+        uint32_t value = ((1U << 16) - 1) & (word0 >> 16);
         return value;
     }
     CONSTEXPR cmd_r &set_clear_irq_history(uint32_t value)
     {
-        clear_irq_history = ((1u << 16) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 16) - 1)) << 16) & word0) | ((((1U << 16) - 1) & value) << 16);
+        return *this;
+    }
+    volatile cmd_r &set_clear_irq_history(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 16) - 1)) << 16) & word0) | ((((1U << 16) - 1) & value) << 16);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
 // reset_r - Request Reset and new security mode
 struct reset_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
@@ -1837,203 +3160,164 @@ struct reset_r
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR reset_r() :
-        pending_CPL(static_cast<uint32_t>(::privilege_level::USER)),
-        pending_CSL(static_cast<uint32_t>(::security_level::SECURE)), reserved0(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR reset_r(uint32_t init) : word(init) {}
+    CONSTEXPR reset_r() : word0(0) {}
+    CONSTEXPR reset_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     reset_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR ::privilege_level get_pending_CPL() const
+    CONSTEXPR NPU_NAMESPACE::privilege_level get_pending_CPL() const
     {
-        ::privilege_level value = static_cast<::privilege_level>(pending_CPL);
+        NPU_NAMESPACE::privilege_level value =
+            static_cast<NPU_NAMESPACE::privilege_level>(((1U << 1) - 1) & (word0 >> 0));
         return value;
     }
-    ::privilege_level get_pending_CPL() const volatile
+    NPU_NAMESPACE::privilege_level get_pending_CPL() const volatile
     {
-        ::privilege_level value = static_cast<::privilege_level>(pending_CPL);
+        NPU_NAMESPACE::privilege_level value =
+            static_cast<NPU_NAMESPACE::privilege_level>(((1U << 1) - 1) & (word0 >> 0));
         return value;
     }
-    CONSTEXPR reset_r &set_pending_CPL(::privilege_level value)
+    CONSTEXPR reset_r &set_pending_CPL(NPU_NAMESPACE::privilege_level value)
     {
-        pending_CPL = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
         return *this;
     }
-    CONSTEXPR ::security_level get_pending_CSL() const
-    {
-        ::security_level value = static_cast<::security_level>(pending_CSL);
-        return value;
-    }
-    ::security_level get_pending_CSL() const volatile
-    {
-        ::security_level value = static_cast<::security_level>(pending_CSL);
-        return value;
-    }
-    CONSTEXPR reset_r &set_pending_CSL(::security_level value)
+    volatile reset_r &set_pending_CPL(NPU_NAMESPACE::privilege_level value) volatile
     {
-        pending_CSL = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// qbase0_r - Base address of command queue bits [31:0]. The address is 4 byte aligned
-struct qbase0_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t QBASE0; // The 4 byte aligned lower bytes of the base address value for the command stream
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR qbase0_r() : QBASE0(static_cast<uint32_t>(0x00000000)) {}
-    CONSTEXPR qbase0_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
-    {
-        word = value;
-    }
-    void operator=(uint32_t value) volatile
-    {
-        word = value;
-    }
-    CONSTEXPR operator uint32_t()
-    {
-        return word;
-    }
-    operator uint32_t() volatile
-    {
-        return word;
-    }
-    qbase0_r copy() volatile
+    CONSTEXPR NPU_NAMESPACE::security_level get_pending_CSL() const
     {
-        return *this;
+        NPU_NAMESPACE::security_level value =
+            static_cast<NPU_NAMESPACE::security_level>(((1U << 1) - 1) & (word0 >> 1));
+        return value;
     }
-    CONSTEXPR uint32_t get_QBASE0() const
+    NPU_NAMESPACE::security_level get_pending_CSL() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(QBASE0);
+        NPU_NAMESPACE::security_level value =
+            static_cast<NPU_NAMESPACE::security_level>(((1U << 1) - 1) & (word0 >> 1));
         return value;
     }
-    uint32_t get_QBASE0() const volatile
+    CONSTEXPR reset_r &set_pending_CSL(NPU_NAMESPACE::security_level value)
     {
-        uint32_t value = static_cast<uint32_t>(QBASE0);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 1);
+        return *this;
     }
-    CONSTEXPR qbase0_r &set_QBASE0(uint32_t value)
+    volatile reset_r &set_pending_CSL(NPU_NAMESPACE::security_level value) volatile
     {
-        QBASE0 = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 1);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// qbase1_r - Address extension bits [47:32] bits for queue base
-struct qbase1_r
+// qbase_r - Base address of the command stream in bytes
+struct qbase_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
-        uint32_t QBASE1; // The 4 byte aligned upper bytes of the base address value for the command stream
-        uint32_t word;
+        struct
+        {
+            uint32_t offset : 32; // Offset
+            uint32_t reserved0 : 32;
+        };
+        uint32_t word[2];
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
   public:
-    CONSTEXPR qbase1_r() : QBASE1(static_cast<uint32_t>(0x00000000)) {}
-    CONSTEXPR qbase1_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
-    {
-        word = value;
-    }
-    void operator=(uint32_t value) volatile
-    {
-        word = value;
-    }
-    CONSTEXPR operator uint32_t()
+    CONSTEXPR qbase_r() : word0(0), word1(0) {}
+    CONSTEXPR qbase_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
     {
-        return word;
     }
-    operator uint32_t() volatile
+    CONSTEXPR void operator=(uint64_t value)
     {
-        return word;
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
     }
-    qbase1_r copy() volatile
+    void operator=(uint64_t value) volatile
     {
-        return *this;
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
     }
-    CONSTEXPR uint32_t get_QBASE1() const
+    CONSTEXPR operator uint64_t()
     {
-        uint32_t value = static_cast<uint32_t>(QBASE1);
-        return value;
+        return (static_cast<uint64_t>(word1) << 32) | word0;
     }
-    uint32_t get_QBASE1() const volatile
+    operator uint64_t() volatile
     {
-        uint32_t value = static_cast<uint32_t>(QBASE1);
-        return value;
+        return (static_cast<uint64_t>(word1) << 32) | word0;
     }
-    CONSTEXPR qbase1_r &set_QBASE1(uint32_t value)
+    qbase_r copy() volatile
     {
-        QBASE1 = static_cast<uint32_t>(value);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
 // qread_r - Read offset in the command stream in bytes. Multiple of 4 in the range 0 to 16 MB
 struct qread_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
-        uint32_t QREAD; // The read offset of the current command under execution
+        struct
+        {
+            uint32_t QREAD : 32; // The read offset of the current command under execution
+        };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR qread_r() : QREAD(static_cast<uint32_t>(0x00000000)) {}
-    CONSTEXPR qread_r(uint32_t init) : word(init) {}
+    CONSTEXPR qread_r() : word0(0) {}
+    CONSTEXPR qread_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     qread_r copy() volatile
     {
@@ -2041,105 +3325,124 @@ struct qread_r
     }
     CONSTEXPR uint32_t get_QREAD() const
     {
-        uint32_t value = static_cast<uint32_t>(QREAD);
+        uint32_t value = word0;
         return value;
     }
     uint32_t get_QREAD() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(QREAD);
+        uint32_t value = word0;
         return value;
     }
     CONSTEXPR qread_r &set_QREAD(uint32_t value)
     {
-        QREAD = static_cast<uint32_t>(value);
+        word0 = value;
+        return *this;
+    }
+    volatile qread_r &set_QREAD(uint32_t value) volatile
+    {
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
 // qconfig_r - AXI configuration for the command stream in the range 0-3. Same encoding as for REGIONCFG
 struct qconfig_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
-        uint32_t QCONFIG; // AXI configuration for the command stream in the range 0-3
+        struct
+        {
+            uint32_t cmd_region0 : 2; // Command region configuration
+            uint32_t reserved0 : 30;
+        };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR qconfig_r() : QCONFIG(static_cast<uint32_t>(0x00000000)) {}
-    CONSTEXPR qconfig_r(uint32_t init) : word(init) {}
+    CONSTEXPR qconfig_r() : word0(0) {}
+    CONSTEXPR qconfig_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     qconfig_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_QCONFIG() const
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_cmd_region0() const
     {
-        uint32_t value = static_cast<uint32_t>(QCONFIG);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 0));
         return value;
     }
-    uint32_t get_QCONFIG() const volatile
+    NPU_NAMESPACE::mem_attr get_cmd_region0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(QCONFIG);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 0));
         return value;
     }
-    CONSTEXPR qconfig_r &set_QCONFIG(uint32_t value)
+    CONSTEXPR qconfig_r &set_cmd_region0(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile qconfig_r &set_cmd_region0(NPU_NAMESPACE::mem_attr value) volatile
     {
-        QCONFIG = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
 // qsize_r - Size of the command stream in bytes. Multiple of 4 in the range 0 to 16 MB
 struct qsize_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
-        uint32_t QSIZE; // Size of the next command stream to be executed by the NPU
+        struct
+        {
+            uint32_t QSIZE : 32; // Size of the next command stream to be executed by the NPU
+        };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR qsize_r() : QSIZE(static_cast<uint32_t>(0x00000000)) {}
-    CONSTEXPR qsize_r(uint32_t init) : word(init) {}
+    CONSTEXPR qsize_r() : word0(0) {}
+    CONSTEXPR qsize_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     qsize_r copy() volatile
     {
@@ -2147,28 +3450,31 @@ struct qsize_r
     }
     CONSTEXPR uint32_t get_QSIZE() const
     {
-        uint32_t value = static_cast<uint32_t>(QSIZE);
+        uint32_t value = word0;
         return value;
     }
     uint32_t get_QSIZE() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(QSIZE);
+        uint32_t value = word0;
         return value;
     }
     CONSTEXPR qsize_r &set_QSIZE(uint32_t value)
     {
-        QSIZE = static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+    volatile qsize_r &set_QSIZE(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
 };
 
-// prot_r - Protection level configured for the NPU when acting as an AXI master
+// prot_r - Protection level configured for the NPU when acting as an AXI requester
 struct prot_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
@@ -2179,208 +3485,284 @@ struct prot_r
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR prot_r() :
-        active_CPL(static_cast<uint32_t>(::privilege_level::USER)),
-        active_CSL(static_cast<uint32_t>(::security_level::SECURE)), reserved0(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR prot_r(uint32_t init) : word(init) {}
+    CONSTEXPR prot_r() : word0(0) {}
+    CONSTEXPR prot_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     prot_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR ::privilege_level get_active_CPL() const
+    CONSTEXPR NPU_NAMESPACE::privilege_level get_active_CPL() const
     {
-        ::privilege_level value = static_cast<::privilege_level>(active_CPL);
+        NPU_NAMESPACE::privilege_level value =
+            static_cast<NPU_NAMESPACE::privilege_level>(((1U << 1) - 1) & (word0 >> 0));
         return value;
     }
-    ::privilege_level get_active_CPL() const volatile
+    NPU_NAMESPACE::privilege_level get_active_CPL() const volatile
     {
-        ::privilege_level value = static_cast<::privilege_level>(active_CPL);
+        NPU_NAMESPACE::privilege_level value =
+            static_cast<NPU_NAMESPACE::privilege_level>(((1U << 1) - 1) & (word0 >> 0));
         return value;
     }
-    CONSTEXPR prot_r &set_active_CPL(::privilege_level value)
+    CONSTEXPR prot_r &set_active_CPL(NPU_NAMESPACE::privilege_level value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile prot_r &set_active_CPL(NPU_NAMESPACE::privilege_level value) volatile
     {
-        active_CPL = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
         return *this;
     }
-    CONSTEXPR ::security_level get_active_CSL() const
+    CONSTEXPR NPU_NAMESPACE::security_level get_active_CSL() const
     {
-        ::security_level value = static_cast<::security_level>(active_CSL);
+        NPU_NAMESPACE::security_level value =
+            static_cast<NPU_NAMESPACE::security_level>(((1U << 1) - 1) & (word0 >> 1));
         return value;
     }
-    ::security_level get_active_CSL() const volatile
+    NPU_NAMESPACE::security_level get_active_CSL() const volatile
     {
-        ::security_level value = static_cast<::security_level>(active_CSL);
+        NPU_NAMESPACE::security_level value =
+            static_cast<NPU_NAMESPACE::security_level>(((1U << 1) - 1) & (word0 >> 1));
         return value;
     }
-    CONSTEXPR prot_r &set_active_CSL(::security_level value)
+    CONSTEXPR prot_r &set_active_CSL(NPU_NAMESPACE::security_level value)
     {
-        active_CSL = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 1);
         return *this;
     }
-#endif //__cplusplus
+    volatile prot_r &set_active_CSL(NPU_NAMESPACE::security_level value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 1);
+        return *this;
+    }
+#endif
 };
 
 // config_r - RTL configuration
 struct config_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t macs_per_cc : 4;        // The log2(macs/clock cycle). Valid encoding range is 5 to 8 for 32 to 256
-                                             // MACs/clock cycle.
-            uint32_t cmd_stream_version : 4; // command stream version accepted by this NPU.
-            uint32_t shram_size : 8;         // Size in KB of SHRAM in the range 8 to 48.
-            uint32_t reserved0 : 11;
-            uint32_t custom_dma : 1; // Custom DMA enable bit.
-            uint32_t product : 4;    // Product configuration
+            uint32_t macs_per_cc : 4;        // The log2(macs/clock cycle)
+            uint32_t cmd_stream_version : 4; // command stream version accepted by this NPU
+            uint32_t shram_size : 8;         // Total size in KB of internal SHRAM
+            uint32_t reserved0 : 10;
+            uint32_t functional_safety : 1; // Functional safety configuration
+            uint32_t custom_dma : 1;        // Custom DMA configuration
+            uint32_t product : 4;           // Product configuration
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR config_r() :
-        macs_per_cc(static_cast<uint32_t>(0)), cmd_stream_version(static_cast<uint32_t>(0x0)),
-        shram_size(static_cast<uint32_t>(0)), reserved0(static_cast<uint32_t>(0)), product(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR config_r(uint32_t init) : word(init) {}
+    CONSTEXPR config_r() : word0(0) {}
+    CONSTEXPR config_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     config_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR ::macs_per_cc get_macs_per_cc() const
+    CONSTEXPR uint32_t get_macs_per_cc() const
     {
-        ::macs_per_cc value = static_cast<::macs_per_cc>(macs_per_cc);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 0);
         return value;
     }
-    ::macs_per_cc get_macs_per_cc() const volatile
+    uint32_t get_macs_per_cc() const volatile
     {
-        ::macs_per_cc value = static_cast<::macs_per_cc>(macs_per_cc);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 0);
         return value;
     }
-    CONSTEXPR config_r &set_macs_per_cc(::macs_per_cc value)
+    CONSTEXPR config_r &set_macs_per_cc(uint32_t value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 0) & word0) | ((((1U << 4) - 1) & value) << 0);
+        return *this;
+    }
+    volatile config_r &set_macs_per_cc(uint32_t value) volatile
     {
-        macs_per_cc = ((1u << 4) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 0) & word0) | ((((1U << 4) - 1) & value) << 0);
         return *this;
     }
     CONSTEXPR uint32_t get_cmd_stream_version() const
     {
-        uint32_t value = static_cast<uint32_t>(cmd_stream_version);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 4);
         return value;
     }
     uint32_t get_cmd_stream_version() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(cmd_stream_version);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 4);
         return value;
     }
     CONSTEXPR config_r &set_cmd_stream_version(uint32_t value)
     {
-        cmd_stream_version = ((1u << 4) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & value) << 4);
+        return *this;
+    }
+    volatile config_r &set_cmd_stream_version(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & value) << 4);
         return *this;
     }
-    CONSTEXPR ::shram_size get_shram_size() const
+    CONSTEXPR uint32_t get_shram_size() const
     {
-        ::shram_size value = static_cast<::shram_size>(shram_size);
+        uint32_t value = ((1U << 8) - 1) & (word0 >> 8);
         return value;
     }
-    ::shram_size get_shram_size() const volatile
+    uint32_t get_shram_size() const volatile
     {
-        ::shram_size value = static_cast<::shram_size>(shram_size);
+        uint32_t value = ((1U << 8) - 1) & (word0 >> 8);
         return value;
     }
-    CONSTEXPR config_r &set_shram_size(::shram_size value)
+    CONSTEXPR config_r &set_shram_size(uint32_t value)
     {
-        shram_size = ((1u << 8) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 8) - 1)) << 8) & word0) | ((((1U << 8) - 1) & value) << 8);
         return *this;
     }
-    CONSTEXPR uint32_t get_product() const
+    volatile config_r &set_shram_size(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 8) - 1)) << 8) & word0) | ((((1U << 8) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::functional_safety get_functional_safety() const
     {
-        uint32_t value = static_cast<uint32_t>(product);
+        NPU_NAMESPACE::functional_safety value =
+            static_cast<NPU_NAMESPACE::functional_safety>(((1U << 1) - 1) & (word0 >> 26));
         return value;
     }
-    uint32_t get_product() const volatile
+    NPU_NAMESPACE::functional_safety get_functional_safety() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(product);
+        NPU_NAMESPACE::functional_safety value =
+            static_cast<NPU_NAMESPACE::functional_safety>(((1U << 1) - 1) & (word0 >> 26));
         return value;
     }
-    CONSTEXPR config_r &set_product(uint32_t value)
+    CONSTEXPR config_r &set_functional_safety(NPU_NAMESPACE::functional_safety value)
     {
-        product = ((1u << 4) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 26) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 26);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// lock_r - Lock register. This register is designed for driver use and does not affect NPU functionality
-struct lock_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t LOCK; // 32 bit value for LOCK configuration
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR lock_r() : LOCK(static_cast<uint32_t>(0x00000000)) {}
-    CONSTEXPR lock_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile config_r &set_functional_safety(NPU_NAMESPACE::functional_safety value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 26) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 26);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR NPU_NAMESPACE::custom_dma get_custom_dma() const
     {
-        word = value;
+        NPU_NAMESPACE::custom_dma value = static_cast<NPU_NAMESPACE::custom_dma>(((1U << 1) - 1) & (word0 >> 27));
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    NPU_NAMESPACE::custom_dma get_custom_dma() const volatile
     {
-        return word;
+        NPU_NAMESPACE::custom_dma value = static_cast<NPU_NAMESPACE::custom_dma>(((1U << 1) - 1) & (word0 >> 27));
+        return value;
+    }
+    CONSTEXPR config_r &set_custom_dma(NPU_NAMESPACE::custom_dma value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 27) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 27);
+        return *this;
+    }
+    volatile config_r &set_custom_dma(NPU_NAMESPACE::custom_dma value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 27) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 27);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_product() const
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 28);
+        return value;
+    }
+    uint32_t get_product() const volatile
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 28);
+        return value;
+    }
+    CONSTEXPR config_r &set_product(uint32_t value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 28) & word0) | ((((1U << 4) - 1) & value) << 28);
+        return *this;
+    }
+    volatile config_r &set_product(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 28) & word0) | ((((1U << 4) - 1) & value) << 28);
+        return *this;
+    }
+#endif
+};
+
+// lock_r - Lock register. This register is designed for driver use and does not affect NPU functionality
+struct lock_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t LOCK : 32; // 32 bit value for LOCK configuration
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR lock_r() : word0(0) {}
+    CONSTEXPR lock_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     lock_r copy() volatile
     {
@@ -2388,6371 +3770,12576 @@ struct lock_r
     }
     CONSTEXPR uint32_t get_LOCK() const
     {
-        uint32_t value = static_cast<uint32_t>(LOCK);
+        uint32_t value = word0;
         return value;
     }
     uint32_t get_LOCK() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(LOCK);
+        uint32_t value = word0;
         return value;
     }
     CONSTEXPR lock_r &set_LOCK(uint32_t value)
     {
-        LOCK = static_cast<uint32_t>(value);
+        word0 = value;
+        return *this;
+    }
+    volatile lock_r &set_LOCK(uint32_t value) volatile
+    {
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
 // regioncfg_r - Region memory type configuration. Bits[2*k+1:2*k] give the memory type for REGION[k]
 struct regioncfg_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t region0 : 2; // Bits for Region0 Configurion
-            uint32_t region1 : 2; // Bits for Region1 Configurion
-            uint32_t region2 : 2; // Bits for Region2 Configurion
-            uint32_t region3 : 2; // Bits for Region3 Configurion
-            uint32_t region4 : 2; // Bits for Region4 Configurion
-            uint32_t region5 : 2; // Bits for Region5 Configurion
-            uint32_t region6 : 2; // Bits for Region6 Configurion
-            uint32_t region7 : 2; // Bits for Region7 Configurion
+            uint32_t region0 : 2; // Bits for Region0 Configuration
+            uint32_t region1 : 2; // Bits for Region1 Configuration
+            uint32_t region2 : 2; // Bits for Region2 Configuration
+            uint32_t region3 : 2; // Bits for Region3 Configuration
+            uint32_t region4 : 2; // Bits for Region4 Configuration
+            uint32_t region5 : 2; // Bits for Region5 Configuration
+            uint32_t region6 : 2; // Bits for Region6 Configuration
+            uint32_t region7 : 2; // Bits for Region7 Configuration
             uint32_t reserved0 : 16;
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR regioncfg_r() :
-        region0(static_cast<uint32_t>(::memory_type::AXI0_OUTSTANDING_COUNTER0)),
-        region1(static_cast<uint32_t>(::memory_type::AXI0_OUTSTANDING_COUNTER0)),
-        region2(static_cast<uint32_t>(::memory_type::AXI0_OUTSTANDING_COUNTER0)),
-        region3(static_cast<uint32_t>(::memory_type::AXI0_OUTSTANDING_COUNTER0)),
-        region4(static_cast<uint32_t>(::memory_type::AXI0_OUTSTANDING_COUNTER0)),
-        region5(static_cast<uint32_t>(::memory_type::AXI0_OUTSTANDING_COUNTER0)),
-        region6(static_cast<uint32_t>(::memory_type::AXI0_OUTSTANDING_COUNTER0)),
-        region7(static_cast<uint32_t>(::memory_type::AXI0_OUTSTANDING_COUNTER0)), reserved0(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR regioncfg_r(uint32_t init) : word(init) {}
+    CONSTEXPR regioncfg_r() : word0(0) {}
+    CONSTEXPR regioncfg_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     regioncfg_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR ::memory_type get_region0() const
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region0() const
     {
-        ::memory_type value = static_cast<::memory_type>(region0);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 0));
         return value;
     }
-    ::memory_type get_region0() const volatile
+    NPU_NAMESPACE::mem_attr get_region0() const volatile
     {
-        ::memory_type value = static_cast<::memory_type>(region0);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 0));
         return value;
     }
-    CONSTEXPR regioncfg_r &set_region0(::memory_type value)
+    CONSTEXPR regioncfg_r &set_region0(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile regioncfg_r &set_region0(NPU_NAMESPACE::mem_attr value) volatile
     {
-        region0 = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
         return *this;
     }
-    CONSTEXPR ::memory_type get_region1() const
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region1() const
     {
-        ::memory_type value = static_cast<::memory_type>(region1);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 2));
         return value;
     }
-    ::memory_type get_region1() const volatile
+    NPU_NAMESPACE::mem_attr get_region1() const volatile
     {
-        ::memory_type value = static_cast<::memory_type>(region1);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 2));
         return value;
     }
-    CONSTEXPR regioncfg_r &set_region1(::memory_type value)
+    CONSTEXPR regioncfg_r &set_region1(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 2) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 2);
+        return *this;
+    }
+    volatile regioncfg_r &set_region1(NPU_NAMESPACE::mem_attr value) volatile
     {
-        region1 = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 2) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 2);
         return *this;
     }
-    CONSTEXPR ::memory_type get_region2() const
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region2() const
     {
-        ::memory_type value = static_cast<::memory_type>(region2);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 4));
         return value;
     }
-    ::memory_type get_region2() const volatile
+    NPU_NAMESPACE::mem_attr get_region2() const volatile
     {
-        ::memory_type value = static_cast<::memory_type>(region2);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 4));
         return value;
     }
-    CONSTEXPR regioncfg_r &set_region2(::memory_type value)
+    CONSTEXPR regioncfg_r &set_region2(NPU_NAMESPACE::mem_attr value)
     {
-        region2 = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 4) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 4);
         return *this;
     }
-    CONSTEXPR ::memory_type get_region3() const
+    volatile regioncfg_r &set_region2(NPU_NAMESPACE::mem_attr value) volatile
     {
-        ::memory_type value = static_cast<::memory_type>(region3);
+        word0 = (((~((1U << 2) - 1)) << 4) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region3() const
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 6));
         return value;
     }
-    ::memory_type get_region3() const volatile
+    NPU_NAMESPACE::mem_attr get_region3() const volatile
     {
-        ::memory_type value = static_cast<::memory_type>(region3);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 6));
         return value;
     }
-    CONSTEXPR regioncfg_r &set_region3(::memory_type value)
+    CONSTEXPR regioncfg_r &set_region3(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 6) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 6);
+        return *this;
+    }
+    volatile regioncfg_r &set_region3(NPU_NAMESPACE::mem_attr value) volatile
     {
-        region3 = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 6) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 6);
         return *this;
     }
-    CONSTEXPR ::memory_type get_region4() const
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region4() const
     {
-        ::memory_type value = static_cast<::memory_type>(region4);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 8));
         return value;
     }
-    ::memory_type get_region4() const volatile
+    NPU_NAMESPACE::mem_attr get_region4() const volatile
     {
-        ::memory_type value = static_cast<::memory_type>(region4);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 8));
         return value;
     }
-    CONSTEXPR regioncfg_r &set_region4(::memory_type value)
+    CONSTEXPR regioncfg_r &set_region4(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 8) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 8);
+        return *this;
+    }
+    volatile regioncfg_r &set_region4(NPU_NAMESPACE::mem_attr value) volatile
     {
-        region4 = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 8) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 8);
         return *this;
     }
-    CONSTEXPR ::memory_type get_region5() const
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region5() const
     {
-        ::memory_type value = static_cast<::memory_type>(region5);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 10));
         return value;
     }
-    ::memory_type get_region5() const volatile
+    NPU_NAMESPACE::mem_attr get_region5() const volatile
     {
-        ::memory_type value = static_cast<::memory_type>(region5);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 10));
         return value;
     }
-    CONSTEXPR regioncfg_r &set_region5(::memory_type value)
+    CONSTEXPR regioncfg_r &set_region5(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 10) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 10);
+        return *this;
+    }
+    volatile regioncfg_r &set_region5(NPU_NAMESPACE::mem_attr value) volatile
     {
-        region5 = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 10) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 10);
         return *this;
     }
-    CONSTEXPR ::memory_type get_region6() const
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region6() const
     {
-        ::memory_type value = static_cast<::memory_type>(region6);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 12));
         return value;
     }
-    ::memory_type get_region6() const volatile
+    NPU_NAMESPACE::mem_attr get_region6() const volatile
     {
-        ::memory_type value = static_cast<::memory_type>(region6);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 12));
         return value;
     }
-    CONSTEXPR regioncfg_r &set_region6(::memory_type value)
+    CONSTEXPR regioncfg_r &set_region6(NPU_NAMESPACE::mem_attr value)
     {
-        region6 = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 12) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 12);
         return *this;
     }
-    CONSTEXPR ::memory_type get_region7() const
+    volatile regioncfg_r &set_region6(NPU_NAMESPACE::mem_attr value) volatile
     {
-        ::memory_type value = static_cast<::memory_type>(region7);
+        word0 = (((~((1U << 2) - 1)) << 12) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 12);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region7() const
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 14));
         return value;
     }
-    ::memory_type get_region7() const volatile
+    NPU_NAMESPACE::mem_attr get_region7() const volatile
     {
-        ::memory_type value = static_cast<::memory_type>(region7);
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 14));
         return value;
     }
-    CONSTEXPR regioncfg_r &set_region7(::memory_type value)
+    CONSTEXPR regioncfg_r &set_region7(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 14) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 14);
+        return *this;
+    }
+    volatile regioncfg_r &set_region7(NPU_NAMESPACE::mem_attr value) volatile
     {
-        region7 = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 14) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 14);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
 // axi_limit0_r - AXI limits for port 0 counter 0
 struct axi_limit0_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t max_beats : 2; // Burst split alignment: 0=64 bytes, 1=128 bytes, 2=256 bytes, 3=reserved
+            uint32_t max_beats : 2; // Burst split alignment
             uint32_t reserved0 : 2;
             uint32_t memtype : 4; // Memtype to be used to encode AxCACHE signals
             uint32_t reserved1 : 8;
             uint32_t
-                max_outstanding_read_m1 : 8; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 31
-            uint32_t max_outstanding_write_m1 : 8; // Maximum number of outstanding AXI write transactions - 1 in range
+                max_outstanding_read_m1 : 5; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 31
+            uint32_t reserved2 : 3;
+            uint32_t max_outstanding_write_m1 : 4; // Maximum number of outstanding AXI write transactions - 1 in range
                                                    // 0 to 15
+            uint32_t reserved3 : 4;
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR axi_limit0_r() :
-        max_beats(static_cast<uint32_t>(0x0)), reserved0(static_cast<uint32_t>(0)),
-        memtype(static_cast<uint32_t>(::axi_mem_encoding_type::DEVICE_NON_BUFFERABLE)),
-        reserved1(static_cast<uint32_t>(0)), max_outstanding_read_m1(static_cast<uint32_t>(0x00)),
-        max_outstanding_write_m1(static_cast<uint32_t>(0x00))
-    {
-    }
-    CONSTEXPR axi_limit0_r(uint32_t init) : word(init) {}
+    CONSTEXPR axi_limit0_r() : word0(0) {}
+    CONSTEXPR axi_limit0_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     axi_limit0_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_max_beats() const
+    CONSTEXPR NPU_NAMESPACE::max_beats get_max_beats() const
     {
-        uint32_t value = static_cast<uint32_t>(max_beats);
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
         return value;
     }
-    uint32_t get_max_beats() const volatile
+    NPU_NAMESPACE::max_beats get_max_beats() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(max_beats);
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
         return value;
     }
-    CONSTEXPR axi_limit0_r &set_max_beats(uint32_t value)
+    CONSTEXPR axi_limit0_r &set_max_beats(NPU_NAMESPACE::max_beats value)
     {
-        max_beats = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
         return *this;
     }
-    CONSTEXPR ::axi_mem_encoding_type get_memtype() const
+    volatile axi_limit0_r &set_max_beats(NPU_NAMESPACE::max_beats value) volatile
     {
-        ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype);
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::axi_mem_encoding get_memtype() const
+    {
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
         return value;
     }
-    ::axi_mem_encoding_type get_memtype() const volatile
+    NPU_NAMESPACE::axi_mem_encoding get_memtype() const volatile
     {
-        ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype);
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
         return value;
     }
-    CONSTEXPR axi_limit0_r &set_memtype(::axi_mem_encoding_type value)
+    CONSTEXPR axi_limit0_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    volatile axi_limit0_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value) volatile
     {
-        memtype = ((1u << 4) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
         return *this;
     }
     CONSTEXPR uint32_t get_max_outstanding_read_m1() const
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_read_m1);
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 16);
         return value;
     }
     uint32_t get_max_outstanding_read_m1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_read_m1);
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 16);
         return value;
     }
     CONSTEXPR axi_limit0_r &set_max_outstanding_read_m1(uint32_t value)
     {
-        max_outstanding_read_m1 = ((1u << 8) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 5) - 1)) << 16) & word0) | ((((1U << 5) - 1) & value) << 16);
+        return *this;
+    }
+    volatile axi_limit0_r &set_max_outstanding_read_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 5) - 1)) << 16) & word0) | ((((1U << 5) - 1) & value) << 16);
         return *this;
     }
     CONSTEXPR uint32_t get_max_outstanding_write_m1() const
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_write_m1);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 24);
         return value;
     }
     uint32_t get_max_outstanding_write_m1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_write_m1);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 24);
         return value;
     }
     CONSTEXPR axi_limit0_r &set_max_outstanding_write_m1(uint32_t value)
     {
-        max_outstanding_write_m1 = ((1u << 8) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 24) & word0) | ((((1U << 4) - 1) & value) << 24);
+        return *this;
+    }
+    volatile axi_limit0_r &set_max_outstanding_write_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 24) & word0) | ((((1U << 4) - 1) & value) << 24);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
 // axi_limit1_r - AXI limits for port 0 counter 1
 struct axi_limit1_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t max_beats : 2; // Burst split alignment: 0=64 bytes, 1=128 bytes, 2=256 bytes, 3=reserved
+            uint32_t max_beats : 2; // Burst split alignment
             uint32_t reserved0 : 2;
             uint32_t memtype : 4; // Memtype to be used to encode AxCACHE signals
             uint32_t reserved1 : 8;
             uint32_t
-                max_outstanding_read_m1 : 8; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 31
-            uint32_t max_outstanding_write_m1 : 8; // Maximum number of outstanding AXI write transactions - 1 in range
+                max_outstanding_read_m1 : 5; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 31
+            uint32_t reserved2 : 3;
+            uint32_t max_outstanding_write_m1 : 4; // Maximum number of outstanding AXI write transactions - 1 in range
                                                    // 0 to 15
+            uint32_t reserved3 : 4;
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR axi_limit1_r() :
-        max_beats(static_cast<uint32_t>(0x0)), reserved0(static_cast<uint32_t>(0)),
-        memtype(static_cast<uint32_t>(::axi_mem_encoding_type::DEVICE_NON_BUFFERABLE)),
-        reserved1(static_cast<uint32_t>(0)), max_outstanding_read_m1(static_cast<uint32_t>(0x00)),
-        max_outstanding_write_m1(static_cast<uint32_t>(0x00))
-    {
-    }
-    CONSTEXPR axi_limit1_r(uint32_t init) : word(init) {}
+    CONSTEXPR axi_limit1_r() : word0(0) {}
+    CONSTEXPR axi_limit1_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     axi_limit1_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_max_beats() const
+    CONSTEXPR NPU_NAMESPACE::max_beats get_max_beats() const
     {
-        uint32_t value = static_cast<uint32_t>(max_beats);
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
         return value;
     }
-    uint32_t get_max_beats() const volatile
+    NPU_NAMESPACE::max_beats get_max_beats() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(max_beats);
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
         return value;
     }
-    CONSTEXPR axi_limit1_r &set_max_beats(uint32_t value)
+    CONSTEXPR axi_limit1_r &set_max_beats(NPU_NAMESPACE::max_beats value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile axi_limit1_r &set_max_beats(NPU_NAMESPACE::max_beats value) volatile
     {
-        max_beats = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
         return *this;
     }
-    CONSTEXPR ::axi_mem_encoding_type get_memtype() const
+    CONSTEXPR NPU_NAMESPACE::axi_mem_encoding get_memtype() const
     {
-        ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype);
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
         return value;
     }
-    ::axi_mem_encoding_type get_memtype() const volatile
+    NPU_NAMESPACE::axi_mem_encoding get_memtype() const volatile
     {
-        ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype);
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
         return value;
     }
-    CONSTEXPR axi_limit1_r &set_memtype(::axi_mem_encoding_type value)
+    CONSTEXPR axi_limit1_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    volatile axi_limit1_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value) volatile
     {
-        memtype = ((1u << 4) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
         return *this;
     }
     CONSTEXPR uint32_t get_max_outstanding_read_m1() const
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_read_m1);
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 16);
         return value;
     }
     uint32_t get_max_outstanding_read_m1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_read_m1);
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 16);
         return value;
     }
     CONSTEXPR axi_limit1_r &set_max_outstanding_read_m1(uint32_t value)
     {
-        max_outstanding_read_m1 = ((1u << 8) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 5) - 1)) << 16) & word0) | ((((1U << 5) - 1) & value) << 16);
+        return *this;
+    }
+    volatile axi_limit1_r &set_max_outstanding_read_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 5) - 1)) << 16) & word0) | ((((1U << 5) - 1) & value) << 16);
         return *this;
     }
     CONSTEXPR uint32_t get_max_outstanding_write_m1() const
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_write_m1);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 24);
         return value;
     }
     uint32_t get_max_outstanding_write_m1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_write_m1);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 24);
         return value;
     }
     CONSTEXPR axi_limit1_r &set_max_outstanding_write_m1(uint32_t value)
     {
-        max_outstanding_write_m1 = ((1u << 8) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 24) & word0) | ((((1U << 4) - 1) & value) << 24);
+        return *this;
+    }
+    volatile axi_limit1_r &set_max_outstanding_write_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 24) & word0) | ((((1U << 4) - 1) & value) << 24);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
 // axi_limit2_r - AXI limits for port 1 counter 2
 struct axi_limit2_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t max_beats : 2; // Burst split alignment: 0=64 bytes, 1=128 bytes, 2=256 bytes, 3=reserved
+            uint32_t max_beats : 2; // Burst split alignment
             uint32_t reserved0 : 2;
             uint32_t memtype : 4; // Memtype to be used to encode AxCACHE signals
             uint32_t reserved1 : 8;
             uint32_t
-                max_outstanding_read_m1 : 8; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 31
-            uint32_t max_outstanding_write_m1 : 8; // Maximum number of outstanding AXI write transactions - 1 in range
+                max_outstanding_read_m1 : 5; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 31
+            uint32_t reserved2 : 3;
+            uint32_t max_outstanding_write_m1 : 4; // Maximum number of outstanding AXI write transactions - 1 in range
                                                    // 0 to 15
+            uint32_t reserved3 : 4;
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR axi_limit2_r() :
-        max_beats(static_cast<uint32_t>(0x0)), reserved0(static_cast<uint32_t>(0)),
-        memtype(static_cast<uint32_t>(::axi_mem_encoding_type::DEVICE_NON_BUFFERABLE)),
-        reserved1(static_cast<uint32_t>(0)), max_outstanding_read_m1(static_cast<uint32_t>(0x00)),
-        max_outstanding_write_m1(static_cast<uint32_t>(0x00))
-    {
-    }
-    CONSTEXPR axi_limit2_r(uint32_t init) : word(init) {}
+    CONSTEXPR axi_limit2_r() : word0(0) {}
+    CONSTEXPR axi_limit2_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     axi_limit2_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_max_beats() const
+    CONSTEXPR NPU_NAMESPACE::max_beats get_max_beats() const
     {
-        uint32_t value = static_cast<uint32_t>(max_beats);
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
         return value;
     }
-    uint32_t get_max_beats() const volatile
+    NPU_NAMESPACE::max_beats get_max_beats() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(max_beats);
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
         return value;
     }
-    CONSTEXPR axi_limit2_r &set_max_beats(uint32_t value)
+    CONSTEXPR axi_limit2_r &set_max_beats(NPU_NAMESPACE::max_beats value)
     {
-        max_beats = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
         return *this;
     }
-    CONSTEXPR ::axi_mem_encoding_type get_memtype() const
+    volatile axi_limit2_r &set_max_beats(NPU_NAMESPACE::max_beats value) volatile
     {
-        ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype);
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::axi_mem_encoding get_memtype() const
+    {
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
         return value;
     }
-    ::axi_mem_encoding_type get_memtype() const volatile
+    NPU_NAMESPACE::axi_mem_encoding get_memtype() const volatile
     {
-        ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype);
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
         return value;
     }
-    CONSTEXPR axi_limit2_r &set_memtype(::axi_mem_encoding_type value)
+    CONSTEXPR axi_limit2_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    volatile axi_limit2_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value) volatile
     {
-        memtype = ((1u << 4) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
         return *this;
     }
     CONSTEXPR uint32_t get_max_outstanding_read_m1() const
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_read_m1);
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 16);
         return value;
     }
     uint32_t get_max_outstanding_read_m1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_read_m1);
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 16);
         return value;
     }
     CONSTEXPR axi_limit2_r &set_max_outstanding_read_m1(uint32_t value)
     {
-        max_outstanding_read_m1 = ((1u << 8) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 5) - 1)) << 16) & word0) | ((((1U << 5) - 1) & value) << 16);
+        return *this;
+    }
+    volatile axi_limit2_r &set_max_outstanding_read_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 5) - 1)) << 16) & word0) | ((((1U << 5) - 1) & value) << 16);
         return *this;
     }
     CONSTEXPR uint32_t get_max_outstanding_write_m1() const
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_write_m1);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 24);
         return value;
     }
     uint32_t get_max_outstanding_write_m1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_write_m1);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 24);
         return value;
     }
     CONSTEXPR axi_limit2_r &set_max_outstanding_write_m1(uint32_t value)
     {
-        max_outstanding_write_m1 = ((1u << 8) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 24) & word0) | ((((1U << 4) - 1) & value) << 24);
+        return *this;
+    }
+    volatile axi_limit2_r &set_max_outstanding_write_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 24) & word0) | ((((1U << 4) - 1) & value) << 24);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
 // axi_limit3_r - AXI limits for port 1 counter 3
 struct axi_limit3_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t max_beats : 2; // Burst split alignment: 0=64 bytes, 1=128 bytes, 2=256 bytes, 3=reserved
+            uint32_t max_beats : 2; // Burst split alignment
             uint32_t reserved0 : 2;
             uint32_t memtype : 4; // Memtype to be used to encode AxCACHE signals
             uint32_t reserved1 : 8;
             uint32_t
-                max_outstanding_read_m1 : 8; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 31
-            uint32_t max_outstanding_write_m1 : 8; // Maximum number of outstanding AXI write transactions - 1 in range
+                max_outstanding_read_m1 : 5; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 31
+            uint32_t reserved2 : 3;
+            uint32_t max_outstanding_write_m1 : 4; // Maximum number of outstanding AXI write transactions - 1 in range
                                                    // 0 to 15
+            uint32_t reserved3 : 4;
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR axi_limit3_r() :
-        max_beats(static_cast<uint32_t>(0x0)), reserved0(static_cast<uint32_t>(0)),
-        memtype(static_cast<uint32_t>(::axi_mem_encoding_type::DEVICE_NON_BUFFERABLE)),
-        reserved1(static_cast<uint32_t>(0)), max_outstanding_read_m1(static_cast<uint32_t>(0x00)),
-        max_outstanding_write_m1(static_cast<uint32_t>(0x00))
-    {
-    }
-    CONSTEXPR axi_limit3_r(uint32_t init) : word(init) {}
+    CONSTEXPR axi_limit3_r() : word0(0) {}
+    CONSTEXPR axi_limit3_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
     axi_limit3_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_max_beats() const
+    CONSTEXPR NPU_NAMESPACE::max_beats get_max_beats() const
     {
-        uint32_t value = static_cast<uint32_t>(max_beats);
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
         return value;
     }
-    uint32_t get_max_beats() const volatile
+    NPU_NAMESPACE::max_beats get_max_beats() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(max_beats);
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
         return value;
     }
-    CONSTEXPR axi_limit3_r &set_max_beats(uint32_t value)
+    CONSTEXPR axi_limit3_r &set_max_beats(NPU_NAMESPACE::max_beats value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile axi_limit3_r &set_max_beats(NPU_NAMESPACE::max_beats value) volatile
     {
-        max_beats = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
         return *this;
     }
-    CONSTEXPR ::axi_mem_encoding_type get_memtype() const
+    CONSTEXPR NPU_NAMESPACE::axi_mem_encoding get_memtype() const
     {
-        ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype);
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
         return value;
     }
-    ::axi_mem_encoding_type get_memtype() const volatile
+    NPU_NAMESPACE::axi_mem_encoding get_memtype() const volatile
     {
-        ::axi_mem_encoding_type value = static_cast<::axi_mem_encoding_type>(memtype);
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
         return value;
     }
-    CONSTEXPR axi_limit3_r &set_memtype(::axi_mem_encoding_type value)
+    CONSTEXPR axi_limit3_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    volatile axi_limit3_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value) volatile
     {
-        memtype = ((1u << 4) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
         return *this;
     }
     CONSTEXPR uint32_t get_max_outstanding_read_m1() const
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_read_m1);
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 16);
         return value;
     }
     uint32_t get_max_outstanding_read_m1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_read_m1);
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 16);
         return value;
     }
     CONSTEXPR axi_limit3_r &set_max_outstanding_read_m1(uint32_t value)
     {
-        max_outstanding_read_m1 = ((1u << 8) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 5) - 1)) << 16) & word0) | ((((1U << 5) - 1) & value) << 16);
+        return *this;
+    }
+    volatile axi_limit3_r &set_max_outstanding_read_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 5) - 1)) << 16) & word0) | ((((1U << 5) - 1) & value) << 16);
         return *this;
     }
     CONSTEXPR uint32_t get_max_outstanding_write_m1() const
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_write_m1);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 24);
         return value;
     }
     uint32_t get_max_outstanding_write_m1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(max_outstanding_write_m1);
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 24);
         return value;
     }
     CONSTEXPR axi_limit3_r &set_max_outstanding_write_m1(uint32_t value)
     {
-        max_outstanding_write_m1 = ((1u << 8) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 24) & word0) | ((((1U << 4) - 1) & value) << 24);
+        return *this;
+    }
+    volatile axi_limit3_r &set_max_outstanding_write_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 24) & word0) | ((((1U << 4) - 1) & value) << 24);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// basep0_r - Lower 32 bits of the Base pointer for region index 0
-struct basep0_r
+// basep_r - The driver can use this address to relocate the command stream on region 0. If the region contains data
+// requiring A-byte alignment then the base must be a multiple of A
+struct basep_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
-        uint32_t addr_word; // The low word of the 64-bit address
-        uint32_t word;
+        struct
+        {
+            uint32_t offset : 32; // Offset
+            uint32_t reserved0 : 32;
+        };
+        uint32_t word[2];
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
   public:
-    CONSTEXPR basep0_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep0_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
-    {
-        word = value;
-    }
-    void operator=(uint32_t value) volatile
-    {
-        word = value;
-    }
-    CONSTEXPR operator uint32_t()
+    CONSTEXPR basep_r() : word0(0), word1(0) {}
+    CONSTEXPR basep_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
     {
-        return word;
     }
-    operator uint32_t() volatile
+    CONSTEXPR void operator=(uint64_t value)
     {
-        return word;
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
     }
-    basep0_r copy() volatile
+    void operator=(uint64_t value) volatile
     {
-        return *this;
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR operator uint64_t()
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
-        return value;
+        return (static_cast<uint64_t>(word1) << 32) | word0;
     }
-    uint32_t get_addr_word() const volatile
+    operator uint64_t() volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
-        return value;
+        return (static_cast<uint64_t>(word1) << 32) | word0;
     }
-    CONSTEXPR basep0_r &set_addr_word(uint32_t value)
+    basep_r copy() volatile
     {
-        addr_word = static_cast<uint32_t>(value);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// basep1_r - Upper 32 bits of the Base pointer for region index 0
-struct basep1_r
+// wd_status_r - WD_STATUS
+struct wd_status_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
-        uint32_t addr_word; // The high word of the 64-bit address
+        struct
+        {
+            uint32_t core_slice_state : 2; // WD core slice parser state
+            uint32_t core_idle : 1;        // Core idle
+            uint32_t ctrl_state : 2;       // WD control state
+            uint32_t ctrl_idle : 1;        // All stripe jobs idle (all weights consumed)
+            uint32_t write_buf_index0 : 3; // current write index for next data from core
+            uint32_t write_buf_valid0 : 1; // write buf valid (full)
+            uint32_t write_buf_idle0 : 1;  // write buf idle (empty)
+            uint32_t write_buf_index1 : 3; // current write index for next data from core
+            uint32_t write_buf_valid1 : 1; // write buf valid (full)
+            uint32_t write_buf_idle1 : 1;  // write buf idle (empty)
+            uint32_t events : 12;          // WD events mapped as appendix A
+            uint32_t reserved0 : 4;
+        };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR basep1_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep1_r(uint32_t init) : word(init) {}
+    CONSTEXPR wd_status_r() : word0(0) {}
+    CONSTEXPR wd_status_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    basep1_r copy() volatile
+    wd_status_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR NPU_NAMESPACE::wd_core_slice_state get_core_slice_state() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        NPU_NAMESPACE::wd_core_slice_state value =
+            static_cast<NPU_NAMESPACE::wd_core_slice_state>(((1U << 2) - 1) & (word0 >> 0));
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    NPU_NAMESPACE::wd_core_slice_state get_core_slice_state() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        NPU_NAMESPACE::wd_core_slice_state value =
+            static_cast<NPU_NAMESPACE::wd_core_slice_state>(((1U << 2) - 1) & (word0 >> 0));
         return value;
     }
-    CONSTEXPR basep1_r &set_addr_word(uint32_t value)
+    CONSTEXPR wd_status_r &set_core_slice_state(NPU_NAMESPACE::wd_core_slice_state value)
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// basep2_r - Lower 32 bits of the Base pointer for region index 1
-struct basep2_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t addr_word; // The low word of the 64-bit address
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR basep2_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep2_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile wd_status_r &set_core_slice_state(NPU_NAMESPACE::wd_core_slice_state value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_core_idle() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_core_idle() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR wd_status_r &set_core_idle(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
     }
-    basep2_r copy() volatile
+    volatile wd_status_r &set_core_idle(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR NPU_NAMESPACE::wd_ctrl_state get_ctrl_state() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        NPU_NAMESPACE::wd_ctrl_state value = static_cast<NPU_NAMESPACE::wd_ctrl_state>(((1U << 2) - 1) & (word0 >> 3));
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    NPU_NAMESPACE::wd_ctrl_state get_ctrl_state() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        NPU_NAMESPACE::wd_ctrl_state value = static_cast<NPU_NAMESPACE::wd_ctrl_state>(((1U << 2) - 1) & (word0 >> 3));
         return value;
     }
-    CONSTEXPR basep2_r &set_addr_word(uint32_t value)
+    CONSTEXPR wd_status_r &set_ctrl_state(NPU_NAMESPACE::wd_ctrl_state value)
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 3) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 3);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// basep3_r - Upper 32 bits of the Base pointer for region index 1
-struct basep3_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
+    volatile wd_status_r &set_ctrl_state(NPU_NAMESPACE::wd_ctrl_state value) volatile
     {
-        uint32_t addr_word; // The high word of the 64-bit address
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR basep3_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep3_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
-    {
-        word = value;
+        word0 = (((~((1U << 2) - 1)) << 3) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 3);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_ctrl_idle() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_ctrl_idle() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR wd_status_r &set_ctrl_idle(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
     }
-    basep3_r copy() volatile
+    volatile wd_status_r &set_ctrl_idle(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR uint32_t get_write_buf_index0() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 3) - 1) & (word0 >> 6);
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    uint32_t get_write_buf_index0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 3) - 1) & (word0 >> 6);
         return value;
     }
-    CONSTEXPR basep3_r &set_addr_word(uint32_t value)
+    CONSTEXPR wd_status_r &set_write_buf_index0(uint32_t value)
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 3) - 1)) << 6) & word0) | ((((1U << 3) - 1) & value) << 6);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// basep4_r - Lower 32 bits of the Base pointer for region index 2
-struct basep4_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t addr_word; // The low word of the 64-bit address
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR basep4_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep4_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile wd_status_r &set_write_buf_index0(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 3) - 1)) << 6) & word0) | ((((1U << 3) - 1) & value) << 6);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_write_buf_valid0() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_write_buf_valid0() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR wd_status_r &set_write_buf_valid0(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
     }
-    basep4_r copy() volatile
+    volatile wd_status_r &set_write_buf_valid0(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR uint32_t get_write_buf_idle0() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    uint32_t get_write_buf_idle0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
         return value;
     }
-    CONSTEXPR basep4_r &set_addr_word(uint32_t value)
+    CONSTEXPR wd_status_r &set_write_buf_idle0(uint32_t value)
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// basep5_r - Upper 32 bits of the Base pointer for region index 2
-struct basep5_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t addr_word; // The high word of the 64-bit address
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR basep5_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep5_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile wd_status_r &set_write_buf_idle0(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_write_buf_index1() const
     {
-        word = value;
+        uint32_t value = ((1U << 3) - 1) & (word0 >> 11);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_write_buf_index1() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 3) - 1) & (word0 >> 11);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR wd_status_r &set_write_buf_index1(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 3) - 1)) << 11) & word0) | ((((1U << 3) - 1) & value) << 11);
+        return *this;
     }
-    basep5_r copy() volatile
+    volatile wd_status_r &set_write_buf_index1(uint32_t value) volatile
     {
+        word0 = (((~((1U << 3) - 1)) << 11) & word0) | ((((1U << 3) - 1) & value) << 11);
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR uint32_t get_write_buf_valid1() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    uint32_t get_write_buf_valid1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
         return value;
     }
-    CONSTEXPR basep5_r &set_addr_word(uint32_t value)
+    CONSTEXPR wd_status_r &set_write_buf_valid1(uint32_t value)
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// basep6_r - Lower 32 bits of the Base pointer for region index 3
-struct basep6_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t addr_word; // The low word of the 64-bit address
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR basep6_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep6_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile wd_status_r &set_write_buf_valid1(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_write_buf_idle1() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 15);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_write_buf_idle1() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 15);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR wd_status_r &set_write_buf_idle1(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 15) & word0) | ((((1U << 1) - 1) & value) << 15);
+        return *this;
     }
-    basep6_r copy() volatile
+    volatile wd_status_r &set_write_buf_idle1(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 15) & word0) | ((((1U << 1) - 1) & value) << 15);
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR uint32_t get_events() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 12) - 1) & (word0 >> 16);
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    uint32_t get_events() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 12) - 1) & (word0 >> 16);
         return value;
     }
-    CONSTEXPR basep6_r &set_addr_word(uint32_t value)
+    CONSTEXPR wd_status_r &set_events(uint32_t value)
+    {
+        word0 = (((~((1U << 12) - 1)) << 16) & word0) | ((((1U << 12) - 1) & value) << 16);
+        return *this;
+    }
+    volatile wd_status_r &set_events(uint32_t value) volatile
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 12) - 1)) << 16) & word0) | ((((1U << 12) - 1) & value) << 16);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// basep7_r - Upper 32 bits of the Base pointer for region index 3
-struct basep7_r
+// mac_status_r - MAC_STATUS
+struct mac_status_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
-        uint32_t addr_word; // The high word of the 64-bit address
+        struct
+        {
+            uint32_t block_cfg_valid : 1;     // MAC has a valid block configuration
+            uint32_t trav_en : 1;             // MAC is doing block traversal
+            uint32_t wait_for_ib : 1;         // MAC is waiting for an Input Buffer to become available
+            uint32_t wait_for_acc_buf : 1;    // MAC is waiting for an Accumulator Buffer to become available
+            uint32_t wait_for_weights : 1;    // MAC is waiting for a Weight Block to become available
+            uint32_t stall_stripe : 1;        // MAC is stalling between two stripes
+            uint32_t dw_sel : 1;              // Currently used weight interface in MAC AI
+            uint32_t wait_for_dw0_ready : 1;  // MAC AI is waiting for MAC DPU to send dw0_ready to WD
+            uint32_t wait_for_dw1_ready : 1;  // MAC AI is waiting for MAC DPU to send dw1_ready to WD
+            uint32_t acc_buf_sel_ai : 1;      // Currently used AccBuf interface in MAC AI
+            uint32_t wait_for_acc0_ready : 1; // MAC AI is waiting for acc0_ready from AO
+            uint32_t wait_for_acc1_ready : 1; // MAC AI is waiting for acc1_ready from AO
+            uint32_t acc_buf_sel_aa : 1;      // Currently used AccBuf interface in MAC ADDER_ARRAY
+            uint32_t acc0_valid : 1;          // MAC outgoing value of acc0_valid
+            uint32_t acc1_valid : 1;          // MAC outgoing value of acc1_valid
+            uint32_t reserved0 : 1;
+            uint32_t events : 11; // Mapped to MAC events described in Appendix A
+            uint32_t reserved1 : 5;
+        };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR basep7_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep7_r(uint32_t init) : word(init) {}
+    CONSTEXPR mac_status_r() : word0(0) {}
+    CONSTEXPR mac_status_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    basep7_r copy() volatile
+    mac_status_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR uint32_t get_block_cfg_valid() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    uint32_t get_block_cfg_valid() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    CONSTEXPR basep7_r &set_addr_word(uint32_t value)
+    CONSTEXPR mac_status_r &set_block_cfg_valid(uint32_t value)
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// basep8_r - Lower 32 bits of the Base pointer for region index 4
-struct basep8_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t addr_word; // The low word of the 64-bit address
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR basep8_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep8_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile mac_status_r &set_block_cfg_valid(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_trav_en() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_trav_en() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR mac_status_r &set_trav_en(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
     }
-    basep8_r copy() volatile
+    volatile mac_status_r &set_trav_en(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR uint32_t get_wait_for_ib() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    uint32_t get_wait_for_ib() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    CONSTEXPR basep8_r &set_addr_word(uint32_t value)
+    CONSTEXPR mac_status_r &set_wait_for_ib(uint32_t value)
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// basep9_r - Upper 32 bits of the Base pointer for region index 4
-struct basep9_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t addr_word; // The high word of the 64-bit address
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR basep9_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep9_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile mac_status_r &set_wait_for_ib(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_wait_for_acc_buf() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_wait_for_acc_buf() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR mac_status_r &set_wait_for_acc_buf(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
     }
-    basep9_r copy() volatile
+    volatile mac_status_r &set_wait_for_acc_buf(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR uint32_t get_wait_for_weights() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    uint32_t get_wait_for_weights() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
         return value;
     }
-    CONSTEXPR basep9_r &set_addr_word(uint32_t value)
+    CONSTEXPR mac_status_r &set_wait_for_weights(uint32_t value)
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// basep10_r - Lower 32 bits of the Base pointer for region index 5
-struct basep10_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t addr_word; // The low word of the 64-bit address
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR basep10_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep10_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile mac_status_r &set_wait_for_weights(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_stall_stripe() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_stall_stripe() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR mac_status_r &set_stall_stripe(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
     }
-    basep10_r copy() volatile
+    volatile mac_status_r &set_stall_stripe(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR uint32_t get_dw_sel() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    uint32_t get_dw_sel() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
         return value;
     }
-    CONSTEXPR basep10_r &set_addr_word(uint32_t value)
+    CONSTEXPR mac_status_r &set_dw_sel(uint32_t value)
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// basep11_r - Upper 32 bits of the Base pointer for region index 5
-struct basep11_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t addr_word; // The high word of the 64-bit address
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR basep11_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep11_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile mac_status_r &set_dw_sel(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_wait_for_dw0_ready() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_wait_for_dw0_ready() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR mac_status_r &set_wait_for_dw0_ready(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
     }
-    basep11_r copy() volatile
+    volatile mac_status_r &set_wait_for_dw0_ready(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR uint32_t get_wait_for_dw1_ready() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    uint32_t get_wait_for_dw1_ready() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
         return value;
     }
-    CONSTEXPR basep11_r &set_addr_word(uint32_t value)
+    CONSTEXPR mac_status_r &set_wait_for_dw1_ready(uint32_t value)
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// basep12_r - Lower 32 bits of the Base pointer for region index 6
-struct basep12_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t addr_word; // The low word of the 64-bit address
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR basep12_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep12_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile mac_status_r &set_wait_for_dw1_ready(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_acc_buf_sel_ai() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_acc_buf_sel_ai() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR mac_status_r &set_acc_buf_sel_ai(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
     }
-    basep12_r copy() volatile
+    volatile mac_status_r &set_acc_buf_sel_ai(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR uint32_t get_wait_for_acc0_ready() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    uint32_t get_wait_for_acc0_ready() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
         return value;
     }
-    CONSTEXPR basep12_r &set_addr_word(uint32_t value)
+    CONSTEXPR mac_status_r &set_wait_for_acc0_ready(uint32_t value)
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// basep13_r - Upper 32 bits of the Base pointer for region index 6
-struct basep13_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t addr_word; // The high word of the 64-bit address
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR basep13_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep13_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
-    {
-        word = value;
-    }
-    void operator=(uint32_t value) volatile
-    {
-        word = value;
-    }
-    CONSTEXPR operator uint32_t()
-    {
-        return word;
-    }
-    operator uint32_t() volatile
-    {
-        return word;
-    }
-    basep13_r copy() volatile
+    volatile mac_status_r &set_wait_for_acc0_ready(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR uint32_t get_wait_for_acc1_ready() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 11);
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    uint32_t get_wait_for_acc1_ready() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 11);
         return value;
     }
-    CONSTEXPR basep13_r &set_addr_word(uint32_t value)
+    CONSTEXPR mac_status_r &set_wait_for_acc1_ready(uint32_t value)
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & value) << 11);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// basep14_r - Lower 32 bits of the Base pointer for region index 7
-struct basep14_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
+    volatile mac_status_r &set_wait_for_acc1_ready(uint32_t value) volatile
     {
-        uint32_t addr_word; // The low word of the 64-bit address
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR basep14_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep14_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
-    {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & value) << 11);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_acc_buf_sel_aa() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 12);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_acc_buf_sel_aa() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 12);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR mac_status_r &set_acc_buf_sel_aa(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 12) & word0) | ((((1U << 1) - 1) & value) << 12);
+        return *this;
     }
-    basep14_r copy() volatile
+    volatile mac_status_r &set_acc_buf_sel_aa(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 12) & word0) | ((((1U << 1) - 1) & value) << 12);
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR uint32_t get_acc0_valid() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 13);
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    uint32_t get_acc0_valid() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 13);
         return value;
     }
-    CONSTEXPR basep14_r &set_addr_word(uint32_t value)
+    CONSTEXPR mac_status_r &set_acc0_valid(uint32_t value)
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 13) & word0) | ((((1U << 1) - 1) & value) << 13);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// basep15_r - Upper 32 bits of the Base pointer for region index 7
-struct basep15_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t addr_word; // The high word of the 64-bit address
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR basep15_r() : addr_word(static_cast<uint32_t>(0)) {}
-    CONSTEXPR basep15_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile mac_status_r &set_acc0_valid(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 13) & word0) | ((((1U << 1) - 1) & value) << 13);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_acc1_valid() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_acc1_valid() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR mac_status_r &set_acc1_valid(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
+        return *this;
     }
-    basep15_r copy() volatile
+    volatile mac_status_r &set_acc1_valid(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
         return *this;
     }
-    CONSTEXPR uint32_t get_addr_word() const
+    CONSTEXPR uint32_t get_events() const
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 11) - 1) & (word0 >> 16);
         return value;
     }
-    uint32_t get_addr_word() const volatile
+    uint32_t get_events() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(addr_word);
+        uint32_t value = ((1U << 11) - 1) & (word0 >> 16);
         return value;
     }
-    CONSTEXPR basep15_r &set_addr_word(uint32_t value)
+    CONSTEXPR mac_status_r &set_events(uint32_t value)
+    {
+        word0 = (((~((1U << 11) - 1)) << 16) & word0) | ((((1U << 11) - 1) & value) << 16);
+        return *this;
+    }
+    volatile mac_status_r &set_events(uint32_t value) volatile
     {
-        addr_word = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 11) - 1)) << 16) & word0) | ((((1U << 11) - 1) & value) << 16);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// wd_status_r - WD_STATUS of core DEBUGCORE
-struct wd_status_r
+// ao_status_r - AO_STATUS
+struct ao_status_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t core_slice_state : 2; // STATE_HEADER=0, STATE_PALETTE=1, STATE_WEIGHTS=2
-            uint32_t core_idle : 1;        // Core idle
-            uint32_t ctrl_state : 2;       // IDLE=0, DRAIN=1, OFD_INIT=2, OFD_RUN=3
-            uint32_t ctrl_idle : 1;        // All stripe jobs idle (all weights consumed)
-            uint32_t write_buf_index0 : 3; // current write index for next data from core
-            uint32_t write_buf_valid0 : 1; // write buf valid (full)
-            uint32_t write_buf_idle0 : 1;  // write buf idle (empty)
-            uint32_t write_buf_index1 : 3; // current write index for next data from core
-            uint32_t write_buf_valid1 : 1; // write buf valid (full)
-            uint32_t write_buf_idle1 : 1;  // write buf idle (empty)
-            uint32_t events : 12;          // WD events mapped as appendix A
-            uint32_t reserved0 : 4;
+            uint32_t cmd_sbw_valid : 1; // Block command to shared buffer write module is valid
+            uint32_t cmd_act_valid : 1; // Block command to activation function module is valid
+            uint32_t cmd_ctl_valid : 1; // Block command to control module is valid
+            uint32_t cmd_scl_valid : 1; // Block command to scale module is valid
+            uint32_t cmd_sbr_valid : 1; // Block command to shared buffer read module is valid
+            uint32_t cmd_ofm_valid : 1; // Block command to ofm parameter module is valid
+            uint32_t blk_cmd_ready : 1; // Ready to accept block command
+            uint32_t blk_cmd_valid : 1; // Block command from CC is valid
+            uint32_t reserved0 : 8;
+            uint32_t events : 8; // Mapped to AO events described in Appendix A
+            uint32_t reserved1 : 8;
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR wd_status_r() :
-        core_slice_state(static_cast<uint32_t>(0)), core_idle(static_cast<uint32_t>(0)),
-        ctrl_state(static_cast<uint32_t>(0)), ctrl_idle(static_cast<uint32_t>(0)),
-        write_buf_index0(static_cast<uint32_t>(0)), write_buf_valid0(static_cast<uint32_t>(0)),
-        write_buf_idle0(static_cast<uint32_t>(0)), write_buf_index1(static_cast<uint32_t>(0)),
-        write_buf_valid1(static_cast<uint32_t>(0)), write_buf_idle1(static_cast<uint32_t>(0)),
-        events(static_cast<uint32_t>(0)), reserved0(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR wd_status_r(uint32_t init) : word(init) {}
+    CONSTEXPR ao_status_r() : word0(0) {}
+    CONSTEXPR ao_status_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    wd_status_r copy() volatile
+    ao_status_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_core_slice_state() const
+    CONSTEXPR uint32_t get_cmd_sbw_valid() const
     {
-        uint32_t value = static_cast<uint32_t>(core_slice_state);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    uint32_t get_core_slice_state() const volatile
+    uint32_t get_cmd_sbw_valid() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(core_slice_state);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    CONSTEXPR wd_status_r &set_core_slice_state(uint32_t value)
+    CONSTEXPR ao_status_r &set_cmd_sbw_valid(uint32_t value)
     {
-        core_slice_state = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
         return *this;
     }
-    CONSTEXPR uint32_t get_core_idle() const
+    volatile ao_status_r &set_cmd_sbw_valid(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(core_idle);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
     }
-    uint32_t get_core_idle() const volatile
+    CONSTEXPR uint32_t get_cmd_act_valid() const
     {
-        uint32_t value = static_cast<uint32_t>(core_idle);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
-    CONSTEXPR wd_status_r &set_core_idle(uint32_t value)
-    {
-        core_idle = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_ctrl_state() const
+    uint32_t get_cmd_act_valid() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(ctrl_state);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
-    uint32_t get_ctrl_state() const volatile
+    CONSTEXPR ao_status_r &set_cmd_act_valid(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(ctrl_state);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
     }
-    CONSTEXPR wd_status_r &set_ctrl_state(uint32_t value)
+    volatile ao_status_r &set_cmd_act_valid(uint32_t value) volatile
     {
-        ctrl_state = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
         return *this;
     }
-    CONSTEXPR uint32_t get_ctrl_idle() const
+    CONSTEXPR uint32_t get_cmd_ctl_valid() const
     {
-        uint32_t value = static_cast<uint32_t>(ctrl_idle);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    uint32_t get_ctrl_idle() const volatile
+    uint32_t get_cmd_ctl_valid() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(ctrl_idle);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    CONSTEXPR wd_status_r &set_ctrl_idle(uint32_t value)
+    CONSTEXPR ao_status_r &set_cmd_ctl_valid(uint32_t value)
     {
-        ctrl_idle = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
-    CONSTEXPR uint32_t get_write_buf_index0() const
+    volatile ao_status_r &set_cmd_ctl_valid(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(write_buf_index0);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
     }
-    uint32_t get_write_buf_index0() const volatile
+    CONSTEXPR uint32_t get_cmd_scl_valid() const
     {
-        uint32_t value = static_cast<uint32_t>(write_buf_index0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
-    CONSTEXPR wd_status_r &set_write_buf_index0(uint32_t value)
-    {
-        write_buf_index0 = ((1u << 3) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_write_buf_valid0() const
+    uint32_t get_cmd_scl_valid() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(write_buf_valid0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
-    uint32_t get_write_buf_valid0() const volatile
+    CONSTEXPR ao_status_r &set_cmd_scl_valid(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(write_buf_valid0);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
     }
-    CONSTEXPR wd_status_r &set_write_buf_valid0(uint32_t value)
+    volatile ao_status_r &set_cmd_scl_valid(uint32_t value) volatile
     {
-        write_buf_valid0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
         return *this;
     }
-    CONSTEXPR uint32_t get_write_buf_idle0() const
+    CONSTEXPR uint32_t get_cmd_sbr_valid() const
     {
-        uint32_t value = static_cast<uint32_t>(write_buf_idle0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
         return value;
     }
-    uint32_t get_write_buf_idle0() const volatile
+    uint32_t get_cmd_sbr_valid() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(write_buf_idle0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
         return value;
     }
-    CONSTEXPR wd_status_r &set_write_buf_idle0(uint32_t value)
+    CONSTEXPR ao_status_r &set_cmd_sbr_valid(uint32_t value)
     {
-        write_buf_idle0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
         return *this;
     }
-    CONSTEXPR uint32_t get_write_buf_index1() const
+    volatile ao_status_r &set_cmd_sbr_valid(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_ofm_valid() const
     {
-        uint32_t value = static_cast<uint32_t>(write_buf_index1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
         return value;
     }
-    uint32_t get_write_buf_index1() const volatile
+    uint32_t get_cmd_ofm_valid() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(write_buf_index1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
         return value;
     }
-    CONSTEXPR wd_status_r &set_write_buf_index1(uint32_t value)
+    CONSTEXPR ao_status_r &set_cmd_ofm_valid(uint32_t value)
     {
-        write_buf_index1 = ((1u << 3) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
         return *this;
     }
-    CONSTEXPR uint32_t get_write_buf_valid1() const
+    volatile ao_status_r &set_cmd_ofm_valid(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_blk_cmd_ready() const
     {
-        uint32_t value = static_cast<uint32_t>(write_buf_valid1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
         return value;
     }
-    uint32_t get_write_buf_valid1() const volatile
+    uint32_t get_blk_cmd_ready() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(write_buf_valid1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
         return value;
     }
-    CONSTEXPR wd_status_r &set_write_buf_valid1(uint32_t value)
+    CONSTEXPR ao_status_r &set_blk_cmd_ready(uint32_t value)
     {
-        write_buf_valid1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
         return *this;
     }
-    CONSTEXPR uint32_t get_write_buf_idle1() const
+    volatile ao_status_r &set_blk_cmd_ready(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(write_buf_idle1);
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_blk_cmd_valid() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
         return value;
     }
-    uint32_t get_write_buf_idle1() const volatile
+    uint32_t get_blk_cmd_valid() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(write_buf_idle1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
         return value;
     }
-    CONSTEXPR wd_status_r &set_write_buf_idle1(uint32_t value)
+    CONSTEXPR ao_status_r &set_blk_cmd_valid(uint32_t value)
     {
-        write_buf_idle1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    volatile ao_status_r &set_blk_cmd_valid(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
         return *this;
     }
     CONSTEXPR uint32_t get_events() const
     {
-        uint32_t value = static_cast<uint32_t>(events);
+        uint32_t value = ((1U << 8) - 1) & (word0 >> 16);
         return value;
     }
     uint32_t get_events() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(events);
+        uint32_t value = ((1U << 8) - 1) & (word0 >> 16);
         return value;
     }
-    CONSTEXPR wd_status_r &set_events(uint32_t value)
+    CONSTEXPR ao_status_r &set_events(uint32_t value)
+    {
+        word0 = (((~((1U << 8) - 1)) << 16) & word0) | ((((1U << 8) - 1) & value) << 16);
+        return *this;
+    }
+    volatile ao_status_r &set_events(uint32_t value) volatile
     {
-        events = ((1u << 12) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 8) - 1)) << 16) & word0) | ((((1U << 8) - 1) & value) << 16);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// mac_status_r - MAC_STATUS of core DEBUGCORE
-struct mac_status_r
+// dma_status0_r - DMA_STATUS0
+struct dma_status0_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t block_cfg_valid : 1;     // MAC has a valid block configuration
-            uint32_t trav_en : 1;             // MAC is doing block traversal
-            uint32_t wait_for_ib : 1;         // MAC is waiting for an Input Buffer to become available
-            uint32_t wait_for_acc_buf : 1;    // MAC is waiting for an Accumulator Buffer to become available
-            uint32_t wait_for_weights : 1;    // MAC is waiting for a Weight Block to become available
-            uint32_t stall_stripe : 1;        // MAC is stalling between two stripes
-            uint32_t dw_sel : 1;              // Currently used weight interface in MAC AI
-            uint32_t wait_for_dw0_ready : 1;  // MAC AI is waiting for MAC DPU to send dw0_ready to WD
-            uint32_t wait_for_dw1_ready : 1;  // MAC AI is waiting for MAC DPU to send dw1_ready to WD
-            uint32_t acc_buf_sel_ai : 1;      // Currently used AccBuf interface in MAC AI
-            uint32_t wait_for_acc0_ready : 1; // MAC AI is waiting for acc0_ready from AO
-            uint32_t wait_for_acc1_ready : 1; // MAC AI is waiting for acc1_ready from AO
-            uint32_t acc_buf_sel_aa : 1;      // Currently used AccBuf interface in MAC ADDER_ARRAY
-            uint32_t acc0_valid : 1;          // MAC outgoing value of acc0_valid
-            uint32_t acc1_valid : 1;          // MAC outgoing value of acc1_valid
-            uint32_t reserved0 : 1;
-            uint32_t events : 11; // Mapped to MAC events described in Appendix A
-            uint32_t reserved1 : 5;
+            uint32_t cmd_idle : 1; // When this bit is high means that the CMD block is not busy in generating addresses
+                                   // for a CMD job
+            uint32_t ifm_idle : 1; // When this bit is high means that there are no ongoing IFM jobs
+            uint32_t wgt_idle_c0 : 1; // When this bit is high means that the WGT block is not busy in generating
+                                      // addresses for a WGT job
+            uint32_t bas_idle_c0 : 1; // When this bit is high means that the BAS block is not busy in generating
+                                      // addresses for a BAS job
+            uint32_t m2m_idle : 1;    // When this bit is high means that there are no ongoing M2M jobs
+            uint32_t ofm_idle : 1;    // When this bit is high means that there are no ongoing OFM jobs
+            uint32_t halt_req : 1;    // CPM has requested to HALT AXI bus before soft reset
+            uint32_t halt_ack : 1;    // DMA is in condition to halt the AXI bus since there are no pending transactions
+            uint32_t pause_req : 1;   // CC has requested to pause the AXI
+            uint32_t pause_ack : 1; // DMA is in condition to pause the AXI bus since there are no pending transactions
+            uint32_t ib0_ai_valid_c0 : 1;       // Data for AI to be read in IFM input buffer 0 - Core 0
+            uint32_t ib0_ai_ready_c0 : 1;       // Data consumed from AI in IFM input buffer 0 - Core 0
+            uint32_t ib1_ai_valid_c0 : 1;       // Data for AI to be read in IFM input buffer 1 - Core 0
+            uint32_t ib1_ai_ready_c0 : 1;       // Data consumed from AI in IFM input buffer 1 - Core 0
+            uint32_t ib0_ao_valid_c0 : 1;       // Data for AO to be read in IFM input buffer 0 - Core 0
+            uint32_t ib0_ao_ready_c0 : 1;       // Data consumed from AO in IFM input buffer 0 - Core 0
+            uint32_t ib1_ao_valid_c0 : 1;       // Data for AO to be read in IFM input buffer 0 - Core 0
+            uint32_t ib1_ao_ready_c0 : 1;       // Data consumed from AO in IFM input buffer 1 - Core 0
+            uint32_t ob0_valid_c0 : 1;          // Data for DMA ready to be consumed in OFM output buffer 0 -  Core 0
+            uint32_t ob0_ready_c0 : 1;          // Data consumed from DMA in OFM output buffer 0 - Core 0
+            uint32_t ob1_valid_c0 : 1;          // Data for DMA ready to be consumed in OFM output buffer 1 -  Core 0
+            uint32_t ob1_ready_c0 : 1;          // Data consumed from DMA in OFM output buffer 1 - Core 0
+            uint32_t cmd_valid : 1;             // New command word for CC to be consumed
+            uint32_t cmd_ready : 1;             // command word consumed by CC
+            uint32_t wd_bitstream_valid_c0 : 1; // New weight word for WD to be consumed - Core 0
+            uint32_t wd_bitstream_ready_c0 : 1; // Weight word consumed by WD - Core 0
+            uint32_t bs_bitstream_valid_c0 : 1; // New BaS word for AO to be consumed - Core 0
+            uint32_t bs_bitstream_ready_c0 : 1; // BaS word consumed by AO - Core 0
+            uint32_t axi0_ar_stalled : 1; // Read transfer request stalled on arready low AXI0 (due to memory system)
+            uint32_t axi0_rd_limit_stall : 1; // Read stalled due to one AXI0 limit counter being reached
+            uint32_t axi0_aw_stalled : 1; // Write transfer request stalled on awready low AXI0 (due to memory system)
+            uint32_t axi0_w_stalled : 1;  // Write transfer stalled on awready low AXI0 (due to memory system)
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR mac_status_r() :
-        block_cfg_valid(static_cast<uint32_t>(0)), trav_en(static_cast<uint32_t>(0)),
-        wait_for_ib(static_cast<uint32_t>(0)), wait_for_acc_buf(static_cast<uint32_t>(0)),
-        wait_for_weights(static_cast<uint32_t>(0)), stall_stripe(static_cast<uint32_t>(0)),
-        dw_sel(static_cast<uint32_t>(0)), wait_for_dw0_ready(static_cast<uint32_t>(0)),
-        wait_for_dw1_ready(static_cast<uint32_t>(0)), acc_buf_sel_ai(static_cast<uint32_t>(0)),
-        wait_for_acc0_ready(static_cast<uint32_t>(0)), wait_for_acc1_ready(static_cast<uint32_t>(0)),
-        acc_buf_sel_aa(static_cast<uint32_t>(0)), acc0_valid(static_cast<uint32_t>(0)),
-        acc1_valid(static_cast<uint32_t>(0)), reserved0(static_cast<uint32_t>(0)), events(static_cast<uint32_t>(0)),
-        reserved1(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR mac_status_r(uint32_t init) : word(init) {}
+    CONSTEXPR dma_status0_r() : word0(0) {}
+    CONSTEXPR dma_status0_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    mac_status_r copy() volatile
+    dma_status0_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_block_cfg_valid() const
+    CONSTEXPR uint32_t get_cmd_idle() const
     {
-        uint32_t value = static_cast<uint32_t>(block_cfg_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    uint32_t get_block_cfg_valid() const volatile
+    uint32_t get_cmd_idle() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(block_cfg_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    CONSTEXPR mac_status_r &set_block_cfg_valid(uint32_t value)
+    CONSTEXPR dma_status0_r &set_cmd_idle(uint32_t value)
     {
-        block_cfg_valid = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
         return *this;
     }
-    CONSTEXPR uint32_t get_trav_en() const
+    volatile dma_status0_r &set_cmd_idle(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ifm_idle() const
     {
-        uint32_t value = static_cast<uint32_t>(trav_en);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
-    uint32_t get_trav_en() const volatile
+    uint32_t get_ifm_idle() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(trav_en);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
-    CONSTEXPR mac_status_r &set_trav_en(uint32_t value)
+    CONSTEXPR dma_status0_r &set_ifm_idle(uint32_t value)
     {
-        trav_en = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
         return *this;
     }
-    CONSTEXPR uint32_t get_wait_for_ib() const
+    volatile dma_status0_r &set_ifm_idle(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(wait_for_ib);
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wgt_idle_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    uint32_t get_wait_for_ib() const volatile
+    uint32_t get_wgt_idle_c0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(wait_for_ib);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    CONSTEXPR mac_status_r &set_wait_for_ib(uint32_t value)
+    CONSTEXPR dma_status0_r &set_wgt_idle_c0(uint32_t value)
     {
-        wait_for_ib = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
-    CONSTEXPR uint32_t get_wait_for_acc_buf() const
+    volatile dma_status0_r &set_wgt_idle_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_bas_idle_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(wait_for_acc_buf);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
-    uint32_t get_wait_for_acc_buf() const volatile
+    uint32_t get_bas_idle_c0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(wait_for_acc_buf);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
-    CONSTEXPR mac_status_r &set_wait_for_acc_buf(uint32_t value)
+    CONSTEXPR dma_status0_r &set_bas_idle_c0(uint32_t value)
     {
-        wait_for_acc_buf = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
         return *this;
     }
-    CONSTEXPR uint32_t get_wait_for_weights() const
+    volatile dma_status0_r &set_bas_idle_c0(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(wait_for_weights);
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_m2m_idle() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
         return value;
     }
-    uint32_t get_wait_for_weights() const volatile
+    uint32_t get_m2m_idle() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(wait_for_weights);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
         return value;
     }
-    CONSTEXPR mac_status_r &set_wait_for_weights(uint32_t value)
+    CONSTEXPR dma_status0_r &set_m2m_idle(uint32_t value)
     {
-        wait_for_weights = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
         return *this;
     }
-    CONSTEXPR uint32_t get_stall_stripe() const
+    volatile dma_status0_r &set_m2m_idle(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ofm_idle() const
     {
-        uint32_t value = static_cast<uint32_t>(stall_stripe);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
         return value;
     }
-    uint32_t get_stall_stripe() const volatile
+    uint32_t get_ofm_idle() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(stall_stripe);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
         return value;
     }
-    CONSTEXPR mac_status_r &set_stall_stripe(uint32_t value)
+    CONSTEXPR dma_status0_r &set_ofm_idle(uint32_t value)
     {
-        stall_stripe = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
         return *this;
     }
-    CONSTEXPR uint32_t get_dw_sel() const
+    volatile dma_status0_r &set_ofm_idle(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(dw_sel);
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_halt_req() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
         return value;
     }
-    uint32_t get_dw_sel() const volatile
+    uint32_t get_halt_req() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(dw_sel);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
         return value;
     }
-    CONSTEXPR mac_status_r &set_dw_sel(uint32_t value)
+    CONSTEXPR dma_status0_r &set_halt_req(uint32_t value)
     {
-        dw_sel = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
         return *this;
     }
-    CONSTEXPR uint32_t get_wait_for_dw0_ready() const
+    volatile dma_status0_r &set_halt_req(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_halt_ack() const
     {
-        uint32_t value = static_cast<uint32_t>(wait_for_dw0_ready);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
         return value;
     }
-    uint32_t get_wait_for_dw0_ready() const volatile
+    uint32_t get_halt_ack() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(wait_for_dw0_ready);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
         return value;
     }
-    CONSTEXPR mac_status_r &set_wait_for_dw0_ready(uint32_t value)
+    CONSTEXPR dma_status0_r &set_halt_ack(uint32_t value)
     {
-        wait_for_dw0_ready = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
         return *this;
     }
-    CONSTEXPR uint32_t get_wait_for_dw1_ready() const
+    volatile dma_status0_r &set_halt_ack(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(wait_for_dw1_ready);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
     }
-    uint32_t get_wait_for_dw1_ready() const volatile
+    CONSTEXPR uint32_t get_pause_req() const
     {
-        uint32_t value = static_cast<uint32_t>(wait_for_dw1_ready);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
         return value;
     }
-    CONSTEXPR mac_status_r &set_wait_for_dw1_ready(uint32_t value)
-    {
-        wait_for_dw1_ready = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_acc_buf_sel_ai() const
+    uint32_t get_pause_req() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(acc_buf_sel_ai);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
         return value;
     }
-    uint32_t get_acc_buf_sel_ai() const volatile
+    CONSTEXPR dma_status0_r &set_pause_req(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(acc_buf_sel_ai);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
     }
-    CONSTEXPR mac_status_r &set_acc_buf_sel_ai(uint32_t value)
+    volatile dma_status0_r &set_pause_req(uint32_t value) volatile
     {
-        acc_buf_sel_ai = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
         return *this;
     }
-    CONSTEXPR uint32_t get_wait_for_acc0_ready() const
+    CONSTEXPR uint32_t get_pause_ack() const
     {
-        uint32_t value = static_cast<uint32_t>(wait_for_acc0_ready);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
         return value;
     }
-    uint32_t get_wait_for_acc0_ready() const volatile
+    uint32_t get_pause_ack() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(wait_for_acc0_ready);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
         return value;
     }
-    CONSTEXPR mac_status_r &set_wait_for_acc0_ready(uint32_t value)
+    CONSTEXPR dma_status0_r &set_pause_ack(uint32_t value)
     {
-        wait_for_acc0_ready = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
         return *this;
     }
-    CONSTEXPR uint32_t get_wait_for_acc1_ready() const
+    volatile dma_status0_r &set_pause_ack(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(wait_for_acc1_ready);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
     }
-    uint32_t get_wait_for_acc1_ready() const volatile
+    CONSTEXPR uint32_t get_ib0_ai_valid_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(wait_for_acc1_ready);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
         return value;
     }
-    CONSTEXPR mac_status_r &set_wait_for_acc1_ready(uint32_t value)
-    {
-        wait_for_acc1_ready = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_acc_buf_sel_aa() const
+    uint32_t get_ib0_ai_valid_c0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(acc_buf_sel_aa);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
         return value;
     }
-    uint32_t get_acc_buf_sel_aa() const volatile
+    CONSTEXPR dma_status0_r &set_ib0_ai_valid_c0(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(acc_buf_sel_aa);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
     }
-    CONSTEXPR mac_status_r &set_acc_buf_sel_aa(uint32_t value)
+    volatile dma_status0_r &set_ib0_ai_valid_c0(uint32_t value) volatile
     {
-        acc_buf_sel_aa = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
         return *this;
     }
-    CONSTEXPR uint32_t get_acc0_valid() const
+    CONSTEXPR uint32_t get_ib0_ai_ready_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(acc0_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 11);
         return value;
     }
-    uint32_t get_acc0_valid() const volatile
+    uint32_t get_ib0_ai_ready_c0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(acc0_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 11);
         return value;
     }
-    CONSTEXPR mac_status_r &set_acc0_valid(uint32_t value)
+    CONSTEXPR dma_status0_r &set_ib0_ai_ready_c0(uint32_t value)
     {
-        acc0_valid = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & value) << 11);
         return *this;
     }
-    CONSTEXPR uint32_t get_acc1_valid() const
+    volatile dma_status0_r &set_ib0_ai_ready_c0(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(acc1_valid);
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & value) << 11);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib1_ai_valid_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 12);
         return value;
     }
-    uint32_t get_acc1_valid() const volatile
+    uint32_t get_ib1_ai_valid_c0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(acc1_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 12);
         return value;
     }
-    CONSTEXPR mac_status_r &set_acc1_valid(uint32_t value)
+    CONSTEXPR dma_status0_r &set_ib1_ai_valid_c0(uint32_t value)
     {
-        acc1_valid = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 12) & word0) | ((((1U << 1) - 1) & value) << 12);
         return *this;
     }
-    CONSTEXPR uint32_t get_events() const
+    volatile dma_status0_r &set_ib1_ai_valid_c0(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(events);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 12) & word0) | ((((1U << 1) - 1) & value) << 12);
+        return *this;
     }
-    uint32_t get_events() const volatile
+    CONSTEXPR uint32_t get_ib1_ai_ready_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(events);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 13);
         return value;
     }
-    CONSTEXPR mac_status_r &set_events(uint32_t value)
+    uint32_t get_ib1_ai_ready_c0() const volatile
     {
-        events = ((1u << 11) - 1) & static_cast<uint32_t>(value);
-        return *this;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 13);
+        return value;
     }
-#endif //__cplusplus
-};
-
-// ao_status_r - AO_STATUS of core DEBUGCORE
-struct ao_status_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        struct
-        {
-            uint32_t cmd_sbw_valid : 1; // Block command to shared buffer write module is valid.
-            uint32_t cmd_act_valid : 1; // Block command to activation function module is valid.
-            uint32_t cmd_ctl_valid : 1; // Block command to control module is valid.
-            uint32_t cmd_scl_valid : 1; // Block command to scale module is valid.
-            uint32_t cmd_sbr_valid : 1; // Block command to shared buffer read module is valid.
-            uint32_t cmd_ofm_valid : 1; // Block command to ofm parameter module is valid.
-            uint32_t blk_cmd_ready : 1; // Ready to accept block command.
-            uint32_t blk_cmd_valid : 1; // Block command from CC is valid.
-            uint32_t reserved0 : 8;
-            uint32_t events : 8; // Mapped to AO events described in Appendix A.
-            uint32_t reserved1 : 8;
-        };
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR ao_status_r() :
-        cmd_sbw_valid(static_cast<uint32_t>(0)), cmd_act_valid(static_cast<uint32_t>(0)),
-        cmd_ctl_valid(static_cast<uint32_t>(0)), cmd_scl_valid(static_cast<uint32_t>(0)),
-        cmd_sbr_valid(static_cast<uint32_t>(0)), cmd_ofm_valid(static_cast<uint32_t>(0)),
-        blk_cmd_ready(static_cast<uint32_t>(0)), blk_cmd_valid(static_cast<uint32_t>(0)),
-        reserved0(static_cast<uint32_t>(0)), events(static_cast<uint32_t>(0)), reserved1(static_cast<uint32_t>(0))
+    CONSTEXPR dma_status0_r &set_ib1_ai_ready_c0(uint32_t value)
     {
+        word0 = (((~((1U << 1) - 1)) << 13) & word0) | ((((1U << 1) - 1) & value) << 13);
+        return *this;
     }
-    CONSTEXPR ao_status_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile dma_status0_r &set_ib1_ai_ready_c0(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 13) & word0) | ((((1U << 1) - 1) & value) << 13);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_ib0_ao_valid_c0() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_ib0_ao_valid_c0() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR dma_status0_r &set_ib0_ao_valid_c0(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
+        return *this;
     }
-    ao_status_r copy() volatile
+    volatile dma_status0_r &set_ib0_ao_valid_c0(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
         return *this;
     }
-    CONSTEXPR uint32_t get_cmd_sbw_valid() const
+    CONSTEXPR uint32_t get_ib0_ao_ready_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(cmd_sbw_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 15);
         return value;
     }
-    uint32_t get_cmd_sbw_valid() const volatile
+    uint32_t get_ib0_ao_ready_c0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(cmd_sbw_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 15);
         return value;
     }
-    CONSTEXPR ao_status_r &set_cmd_sbw_valid(uint32_t value)
+    CONSTEXPR dma_status0_r &set_ib0_ao_ready_c0(uint32_t value)
     {
-        cmd_sbw_valid = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 15) & word0) | ((((1U << 1) - 1) & value) << 15);
         return *this;
     }
-    CONSTEXPR uint32_t get_cmd_act_valid() const
+    volatile dma_status0_r &set_ib0_ao_ready_c0(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(cmd_act_valid);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 15) & word0) | ((((1U << 1) - 1) & value) << 15);
+        return *this;
     }
-    uint32_t get_cmd_act_valid() const volatile
+    CONSTEXPR uint32_t get_ib1_ao_valid_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(cmd_act_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 16);
         return value;
     }
-    CONSTEXPR ao_status_r &set_cmd_act_valid(uint32_t value)
-    {
-        cmd_act_valid = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_cmd_ctl_valid() const
+    uint32_t get_ib1_ao_valid_c0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(cmd_ctl_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 16);
         return value;
     }
-    uint32_t get_cmd_ctl_valid() const volatile
+    CONSTEXPR dma_status0_r &set_ib1_ao_valid_c0(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(cmd_ctl_valid);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 16) & word0) | ((((1U << 1) - 1) & value) << 16);
+        return *this;
     }
-    CONSTEXPR ao_status_r &set_cmd_ctl_valid(uint32_t value)
+    volatile dma_status0_r &set_ib1_ao_valid_c0(uint32_t value) volatile
     {
-        cmd_ctl_valid = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 16) & word0) | ((((1U << 1) - 1) & value) << 16);
         return *this;
     }
-    CONSTEXPR uint32_t get_cmd_scl_valid() const
+    CONSTEXPR uint32_t get_ib1_ao_ready_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(cmd_scl_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 17);
         return value;
     }
-    uint32_t get_cmd_scl_valid() const volatile
+    uint32_t get_ib1_ao_ready_c0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(cmd_scl_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 17);
         return value;
     }
-    CONSTEXPR ao_status_r &set_cmd_scl_valid(uint32_t value)
+    CONSTEXPR dma_status0_r &set_ib1_ao_ready_c0(uint32_t value)
     {
-        cmd_scl_valid = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 17) & word0) | ((((1U << 1) - 1) & value) << 17);
         return *this;
     }
-    CONSTEXPR uint32_t get_cmd_sbr_valid() const
+    volatile dma_status0_r &set_ib1_ao_ready_c0(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(cmd_sbr_valid);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 17) & word0) | ((((1U << 1) - 1) & value) << 17);
+        return *this;
     }
-    uint32_t get_cmd_sbr_valid() const volatile
+    CONSTEXPR uint32_t get_ob0_valid_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(cmd_sbr_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 18);
         return value;
     }
-    CONSTEXPR ao_status_r &set_cmd_sbr_valid(uint32_t value)
-    {
-        cmd_sbr_valid = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_cmd_ofm_valid() const
+    uint32_t get_ob0_valid_c0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(cmd_ofm_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 18);
         return value;
     }
-    uint32_t get_cmd_ofm_valid() const volatile
+    CONSTEXPR dma_status0_r &set_ob0_valid_c0(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(cmd_ofm_valid);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 18) & word0) | ((((1U << 1) - 1) & value) << 18);
+        return *this;
     }
-    CONSTEXPR ao_status_r &set_cmd_ofm_valid(uint32_t value)
+    volatile dma_status0_r &set_ob0_valid_c0(uint32_t value) volatile
     {
-        cmd_ofm_valid = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 18) & word0) | ((((1U << 1) - 1) & value) << 18);
         return *this;
     }
-    CONSTEXPR uint32_t get_blk_cmd_ready() const
+    CONSTEXPR uint32_t get_ob0_ready_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(blk_cmd_ready);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 19);
         return value;
     }
-    uint32_t get_blk_cmd_ready() const volatile
+    uint32_t get_ob0_ready_c0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(blk_cmd_ready);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 19);
         return value;
     }
-    CONSTEXPR ao_status_r &set_blk_cmd_ready(uint32_t value)
+    CONSTEXPR dma_status0_r &set_ob0_ready_c0(uint32_t value)
     {
-        blk_cmd_ready = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 19) & word0) | ((((1U << 1) - 1) & value) << 19);
         return *this;
     }
-    CONSTEXPR uint32_t get_blk_cmd_valid() const
+    volatile dma_status0_r &set_ob0_ready_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 19) & word0) | ((((1U << 1) - 1) & value) << 19);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ob1_valid_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(blk_cmd_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 20);
         return value;
     }
-    uint32_t get_blk_cmd_valid() const volatile
+    uint32_t get_ob1_valid_c0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(blk_cmd_valid);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 20);
         return value;
     }
-    CONSTEXPR ao_status_r &set_blk_cmd_valid(uint32_t value)
+    CONSTEXPR dma_status0_r &set_ob1_valid_c0(uint32_t value)
     {
-        blk_cmd_valid = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 20) & word0) | ((((1U << 1) - 1) & value) << 20);
         return *this;
     }
-    CONSTEXPR uint32_t get_events() const
+    volatile dma_status0_r &set_ob1_valid_c0(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(events);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 20) & word0) | ((((1U << 1) - 1) & value) << 20);
+        return *this;
     }
-    uint32_t get_events() const volatile
+    CONSTEXPR uint32_t get_ob1_ready_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(events);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 21);
         return value;
     }
-    CONSTEXPR ao_status_r &set_events(uint32_t value)
+    uint32_t get_ob1_ready_c0() const volatile
     {
-        events = ((1u << 8) - 1) & static_cast<uint32_t>(value);
-        return *this;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 21);
+        return value;
     }
-#endif //__cplusplus
-};
-
-// dma_status0_r - DMA_STATUS0 of core DEBUGCORE
-struct dma_status0_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        struct
-        {
-            uint32_t CMD_IDLE : 1; // When this bit is high means that the CMD block is not busy in generating addresses
-                                   // for a CMD job.
-            uint32_t IFM_IDLE : 1; // When this bit is high means that there are no ongoing IFM jobs
-            uint32_t WGT_IDLE_C0 : 1; // When this bit is high means that the WGT block is not busy in generating
-                                      // addresses for a WGT job
-            uint32_t BAS_IDLE_C0 : 1; // When this bit is high means that the BAS block is not busy in generating
-                                      // addresses for a BAS job
-            uint32_t M2M_IDLE : 1;    // When this bit is high means that there are no ongoing M2M jobs
-            uint32_t OFM_IDLE : 1;    // When this bit is high means that there are no ongoing OFM jobs
-            uint32_t HALT_REQ : 1;    // CPM has requested to HALT AXI bus before soft reset
-            uint32_t HALT_ACK : 1;    // DMA is in condition to halt the AXI bus since there are no pending transactions
-            uint32_t PAUSE_REQ : 1;   // CC has requested to pause the AXI
-            uint32_t PAUSE_ACK : 1; // DMA is in condition to pause the AXI bus since there are no pending transactions
-            uint32_t IB0_AI_VALID_C0 : 1;       // Data for AI to be read in IFM input buffer 0 - Core 0
-            uint32_t IB0_AI_READY_C0 : 1;       // Data consumed from AI in IFM input buffer 0 - Core 0
-            uint32_t IB1_AI_VALID_C0 : 1;       // Data for AI to be read in IFM input buffer 1 - Core 0
-            uint32_t IB1_AI_READY_C0 : 1;       // Data consumed from AI in IFM input buffer 1 - Core 0
-            uint32_t IB0_AO_VALID_C0 : 1;       // Data for AO to be read in IFM input buffer 0 - Core 0
-            uint32_t IB0_AO_READY_C0 : 1;       // Data consumed from AO in IFM input buffer 0 - Core 0
-            uint32_t IB1_AO_VALID_C0 : 1;       // Data for AO to be read in IFM input buffer 0 - Core 0
-            uint32_t IB1_AO_READY_C0 : 1;       // Data consumed from AO in IFM input buffer 1 - Core 0
-            uint32_t OB0_VALID_C0 : 1;          // Data for DMA ready to be consumed in OFM output buffer 0 -  Core 0
-            uint32_t OB0_READY_C0 : 1;          // Data consumed from DMA in OFM output buffer 0 - Core 0
-            uint32_t OB1_VALID_C0 : 1;          // Data for DMA ready to be consumed in OFM output buffer 1 -  Core 0
-            uint32_t OB1_READY_C0 : 1;          // Data consumed from DMA in OFM output buffer 1 - Core 0
-            uint32_t CMD_VALID : 1;             // New command word for CC to be consumed
-            uint32_t CMD_READY : 1;             // command word consumed by CC
-            uint32_t WD_BITSTREAM_VALID_C0 : 1; // New weight word for WD to be consumed - Core 0
-            uint32_t WD_BITSTREAM_READY_C0 : 1; // Weight word consumed by WD - Core 0
-            uint32_t BS_BITSTREAM_VALID_C0 : 1; // New BaS word for AO to be consumed - Core 0
-            uint32_t BS_BITSTREAM_READY_C0 : 1; // BaS word consumed by AO - Core 0
-            uint32_t AXI0_AR_STALLED : 1; // Read transfer request stalled on arready low AXI0 (due to memory system)
-            uint32_t AXI0_RD_LIMIT_STALL : 1; // Read stalled due to one AXI0 limit counter being reached
-            uint32_t AXI0_AW_STALLED : 1; // Write transfer request stalled on awready low AXI0 (due to memory system)
-            uint32_t AXI0_W_STALLED : 1;  // Write transfer stalled on awready low AXI0 (due to memory system)
-        };
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR dma_status0_r() :
-        CMD_IDLE(static_cast<uint32_t>(0)), IFM_IDLE(static_cast<uint32_t>(0)), WGT_IDLE_C0(static_cast<uint32_t>(0)),
-        BAS_IDLE_C0(static_cast<uint32_t>(0)), M2M_IDLE(static_cast<uint32_t>(0)), OFM_IDLE(static_cast<uint32_t>(0)),
-        HALT_REQ(static_cast<uint32_t>(0)), HALT_ACK(static_cast<uint32_t>(0)), PAUSE_REQ(static_cast<uint32_t>(0)),
-        PAUSE_ACK(static_cast<uint32_t>(0)), IB0_AI_VALID_C0(static_cast<uint32_t>(0)),
-        IB0_AI_READY_C0(static_cast<uint32_t>(0)), IB1_AI_VALID_C0(static_cast<uint32_t>(0)),
-        IB1_AI_READY_C0(static_cast<uint32_t>(0)), IB0_AO_VALID_C0(static_cast<uint32_t>(0)),
-        IB0_AO_READY_C0(static_cast<uint32_t>(0)), IB1_AO_VALID_C0(static_cast<uint32_t>(0)),
-        IB1_AO_READY_C0(static_cast<uint32_t>(0)), OB0_VALID_C0(static_cast<uint32_t>(0)),
-        OB0_READY_C0(static_cast<uint32_t>(0)), OB1_VALID_C0(static_cast<uint32_t>(0)),
-        OB1_READY_C0(static_cast<uint32_t>(0)), CMD_VALID(static_cast<uint32_t>(0)),
-        CMD_READY(static_cast<uint32_t>(0)), WD_BITSTREAM_VALID_C0(static_cast<uint32_t>(0)),
-        WD_BITSTREAM_READY_C0(static_cast<uint32_t>(0)), BS_BITSTREAM_VALID_C0(static_cast<uint32_t>(0)),
-        BS_BITSTREAM_READY_C0(static_cast<uint32_t>(0)), AXI0_AR_STALLED(static_cast<uint32_t>(0)),
-        AXI0_RD_LIMIT_STALL(static_cast<uint32_t>(0)), AXI0_AW_STALLED(static_cast<uint32_t>(0)),
-        AXI0_W_STALLED(static_cast<uint32_t>(0))
+    CONSTEXPR dma_status0_r &set_ob1_ready_c0(uint32_t value)
     {
+        word0 = (((~((1U << 1) - 1)) << 21) & word0) | ((((1U << 1) - 1) & value) << 21);
+        return *this;
     }
-    CONSTEXPR dma_status0_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile dma_status0_r &set_ob1_ready_c0(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 21) & word0) | ((((1U << 1) - 1) & value) << 21);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_cmd_valid() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 22);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_cmd_valid() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 22);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR dma_status0_r &set_cmd_valid(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 22) & word0) | ((((1U << 1) - 1) & value) << 22);
+        return *this;
     }
-    dma_status0_r copy() volatile
+    volatile dma_status0_r &set_cmd_valid(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 22) & word0) | ((((1U << 1) - 1) & value) << 22);
         return *this;
     }
-    CONSTEXPR uint32_t get_CMD_IDLE() const
+    CONSTEXPR uint32_t get_cmd_ready() const
     {
-        uint32_t value = static_cast<uint32_t>(CMD_IDLE);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 23);
         return value;
     }
-    uint32_t get_CMD_IDLE() const volatile
+    uint32_t get_cmd_ready() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(CMD_IDLE);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 23);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_CMD_IDLE(uint32_t value)
+    CONSTEXPR dma_status0_r &set_cmd_ready(uint32_t value)
     {
-        CMD_IDLE = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 23) & word0) | ((((1U << 1) - 1) & value) << 23);
         return *this;
     }
-    CONSTEXPR uint32_t get_IFM_IDLE() const
+    volatile dma_status0_r &set_cmd_ready(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(IFM_IDLE);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 23) & word0) | ((((1U << 1) - 1) & value) << 23);
+        return *this;
     }
-    uint32_t get_IFM_IDLE() const volatile
+    CONSTEXPR uint32_t get_wd_bitstream_valid_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(IFM_IDLE);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 24);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_IFM_IDLE(uint32_t value)
+    uint32_t get_wd_bitstream_valid_c0() const volatile
     {
-        IFM_IDLE = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_WGT_IDLE_C0() const
-    {
-        uint32_t value = static_cast<uint32_t>(WGT_IDLE_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 24);
         return value;
     }
-    uint32_t get_WGT_IDLE_C0() const volatile
+    CONSTEXPR dma_status0_r &set_wd_bitstream_valid_c0(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(WGT_IDLE_C0);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 24) & word0) | ((((1U << 1) - 1) & value) << 24);
+        return *this;
     }
-    CONSTEXPR dma_status0_r &set_WGT_IDLE_C0(uint32_t value)
+    volatile dma_status0_r &set_wd_bitstream_valid_c0(uint32_t value) volatile
     {
-        WGT_IDLE_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 24) & word0) | ((((1U << 1) - 1) & value) << 24);
         return *this;
     }
-    CONSTEXPR uint32_t get_BAS_IDLE_C0() const
+    CONSTEXPR uint32_t get_wd_bitstream_ready_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(BAS_IDLE_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 25);
         return value;
     }
-    uint32_t get_BAS_IDLE_C0() const volatile
+    uint32_t get_wd_bitstream_ready_c0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(BAS_IDLE_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 25);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_BAS_IDLE_C0(uint32_t value)
+    CONSTEXPR dma_status0_r &set_wd_bitstream_ready_c0(uint32_t value)
     {
-        BAS_IDLE_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 25) & word0) | ((((1U << 1) - 1) & value) << 25);
         return *this;
     }
-    CONSTEXPR uint32_t get_M2M_IDLE() const
+    volatile dma_status0_r &set_wd_bitstream_ready_c0(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(M2M_IDLE);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 25) & word0) | ((((1U << 1) - 1) & value) << 25);
+        return *this;
     }
-    uint32_t get_M2M_IDLE() const volatile
+    CONSTEXPR uint32_t get_bs_bitstream_valid_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(M2M_IDLE);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 26);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_M2M_IDLE(uint32_t value)
-    {
-        M2M_IDLE = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_OFM_IDLE() const
+    uint32_t get_bs_bitstream_valid_c0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(OFM_IDLE);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 26);
         return value;
     }
-    uint32_t get_OFM_IDLE() const volatile
+    CONSTEXPR dma_status0_r &set_bs_bitstream_valid_c0(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(OFM_IDLE);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 26) & word0) | ((((1U << 1) - 1) & value) << 26);
+        return *this;
     }
-    CONSTEXPR dma_status0_r &set_OFM_IDLE(uint32_t value)
+    volatile dma_status0_r &set_bs_bitstream_valid_c0(uint32_t value) volatile
     {
-        OFM_IDLE = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 26) & word0) | ((((1U << 1) - 1) & value) << 26);
         return *this;
     }
-    CONSTEXPR uint32_t get_HALT_REQ() const
+    CONSTEXPR uint32_t get_bs_bitstream_ready_c0() const
     {
-        uint32_t value = static_cast<uint32_t>(HALT_REQ);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 27);
         return value;
     }
-    uint32_t get_HALT_REQ() const volatile
+    uint32_t get_bs_bitstream_ready_c0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(HALT_REQ);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 27);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_HALT_REQ(uint32_t value)
+    CONSTEXPR dma_status0_r &set_bs_bitstream_ready_c0(uint32_t value)
     {
-        HALT_REQ = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 27) & word0) | ((((1U << 1) - 1) & value) << 27);
         return *this;
     }
-    CONSTEXPR uint32_t get_HALT_ACK() const
+    volatile dma_status0_r &set_bs_bitstream_ready_c0(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(HALT_ACK);
+        word0 = (((~((1U << 1) - 1)) << 27) & word0) | ((((1U << 1) - 1) & value) << 27);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi0_ar_stalled() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 28);
         return value;
     }
-    uint32_t get_HALT_ACK() const volatile
+    uint32_t get_axi0_ar_stalled() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(HALT_ACK);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 28);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_HALT_ACK(uint32_t value)
+    CONSTEXPR dma_status0_r &set_axi0_ar_stalled(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 28) & word0) | ((((1U << 1) - 1) & value) << 28);
+        return *this;
+    }
+    volatile dma_status0_r &set_axi0_ar_stalled(uint32_t value) volatile
     {
-        HALT_ACK = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 28) & word0) | ((((1U << 1) - 1) & value) << 28);
         return *this;
     }
-    CONSTEXPR uint32_t get_PAUSE_REQ() const
+    CONSTEXPR uint32_t get_axi0_rd_limit_stall() const
     {
-        uint32_t value = static_cast<uint32_t>(PAUSE_REQ);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 29);
         return value;
     }
-    uint32_t get_PAUSE_REQ() const volatile
+    uint32_t get_axi0_rd_limit_stall() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(PAUSE_REQ);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 29);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_PAUSE_REQ(uint32_t value)
+    CONSTEXPR dma_status0_r &set_axi0_rd_limit_stall(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 29) & word0) | ((((1U << 1) - 1) & value) << 29);
+        return *this;
+    }
+    volatile dma_status0_r &set_axi0_rd_limit_stall(uint32_t value) volatile
     {
-        PAUSE_REQ = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 29) & word0) | ((((1U << 1) - 1) & value) << 29);
         return *this;
     }
-    CONSTEXPR uint32_t get_PAUSE_ACK() const
+    CONSTEXPR uint32_t get_axi0_aw_stalled() const
     {
-        uint32_t value = static_cast<uint32_t>(PAUSE_ACK);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 30);
         return value;
     }
-    uint32_t get_PAUSE_ACK() const volatile
+    uint32_t get_axi0_aw_stalled() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(PAUSE_ACK);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 30);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_PAUSE_ACK(uint32_t value)
+    CONSTEXPR dma_status0_r &set_axi0_aw_stalled(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 30) & word0) | ((((1U << 1) - 1) & value) << 30);
+        return *this;
+    }
+    volatile dma_status0_r &set_axi0_aw_stalled(uint32_t value) volatile
     {
-        PAUSE_ACK = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 30) & word0) | ((((1U << 1) - 1) & value) << 30);
         return *this;
     }
-    CONSTEXPR uint32_t get_IB0_AI_VALID_C0() const
+    CONSTEXPR uint32_t get_axi0_w_stalled() const
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AI_VALID_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
         return value;
     }
-    uint32_t get_IB0_AI_VALID_C0() const volatile
+    uint32_t get_axi0_w_stalled() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AI_VALID_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_IB0_AI_VALID_C0(uint32_t value)
+    CONSTEXPR dma_status0_r &set_axi0_w_stalled(uint32_t value)
     {
-        IB0_AI_VALID_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
         return *this;
     }
-    CONSTEXPR uint32_t get_IB0_AI_READY_C0() const
+    volatile dma_status0_r &set_axi0_w_stalled(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AI_READY_C0);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
     }
-    uint32_t get_IB0_AI_READY_C0() const volatile
+#endif
+};
+
+// dma_status1_r - DMA_STATUS1
+struct dma_status1_r
+{
+#ifndef __cplusplus
+    union
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AI_READY_C0);
-        return value;
+        struct
+        {
+            uint32_t axi0_wr_limit_stall : 1; // Write stalled due to one AXI0 limit counter being reached
+            uint32_t axi1_ar_stalled : 1; // Read transfer request stalled on arready low AXI1 (due to memory system)
+            uint32_t axi1_rd_limit_stall : 1; // Read stalled due to one AXI1 limit counter being reached
+            uint32_t axi1_wr_stalled : 1; // Write transfer request stalled on awready low AXI1 (due to memory system)
+            uint32_t axi1_w_stalled : 1;  // Write transfer stalled on wready low AXI1 (due to memory system)
+            uint32_t axi1_wr_limit_stall : 1; // Write stalled due to one AXI1 limit counter being reached
+            uint32_t wgt_idle_c1 : 1;     // When this bit is high means that the WGT block is not busy in generating
+                                          // addresses for a WGT job
+            uint32_t bas_idle_c1 : 1;     // When this bit is high means that the BAS block is not busy in generating
+                                          // addresses for a BAS job
+            uint32_t ib0_ai_valid_c1 : 1; // Data for AI to be read in IFM input buffer 0 - Core 1
+            uint32_t ib0_ai_ready_c1 : 1; // Data consumed from AI in IFM input buffer 0 - Core 1
+            uint32_t ib1_ai_valid_c1 : 1; // Data for AI to be read in IFM input buffer 1 - Core 1
+            uint32_t ib1_ai_ready_c1 : 1; // Data consumed from AI in IFM input buffer 1 - Core 1
+            uint32_t ib0_ao_valid_c1 : 1; // Data for AO to be read in IFM input buffer 0 - Core 1
+            uint32_t ib0_ao_ready_c1 : 1; // Data consumed from AO in IFM input buffer 0 - Core 1
+            uint32_t ib1_ao_valid_c1 : 1; // Data for AO to be read in IFM input buffer 0 - Core 1
+            uint32_t ib1_ao_ready_c1 : 1; // Data consumed from AO in IFM input buffer 1 - Core 1
+            uint32_t ob0_valid_c1 : 1;    // Data for DMA ready to be consumed in OFM output buffer 0 - Core 1
+            uint32_t ob0_ready_c1 : 1;    // Data consumed from DMA in OFM output buffer 0 - Core 1
+            uint32_t ob1_valid_c1 : 1;    // Data for DMA ready to be consumed in OFM output buffer 1 - Core 1
+            uint32_t ob1_ready_c1 : 1;    // Data consumed from DMA in OFM output buffer 1 - Core 1
+            uint32_t wd_bitstream_valid_c1 : 1; // New weight word for WD to be consumed - Core 1
+            uint32_t wd_bitstream_ready_c1 : 1; // Weight word consumed by WD - Core 1
+            uint32_t bs_bitstream_valid_c1 : 1; // New BaS word for AO to be consumed - Core 1
+            uint32_t bs_bitstream_ready_c1 : 1; // BaS word consumed by AO - Core 1
+            uint32_t reserved0 : 8;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma_status1_r() : word0(0) {}
+    CONSTEXPR dma_status1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
     }
-    CONSTEXPR dma_status0_r &set_IB0_AI_READY_C0(uint32_t value)
+    void operator=(uint32_t value) volatile
     {
-        IB0_AI_READY_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
+        word0 = value;
     }
-    CONSTEXPR uint32_t get_IB1_AI_VALID_C0() const
+    CONSTEXPR operator uint32_t()
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AI_VALID_C0);
-        return value;
+        return word0;
     }
-    uint32_t get_IB1_AI_VALID_C0() const volatile
+    operator uint32_t() volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AI_VALID_C0);
-        return value;
+        return word0;
     }
-    CONSTEXPR dma_status0_r &set_IB1_AI_VALID_C0(uint32_t value)
+    dma_status1_r copy() volatile
     {
-        IB1_AI_VALID_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
         return *this;
     }
-    CONSTEXPR uint32_t get_IB1_AI_READY_C0() const
+    CONSTEXPR uint32_t get_axi0_wr_limit_stall() const
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AI_READY_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    uint32_t get_IB1_AI_READY_C0() const volatile
+    uint32_t get_axi0_wr_limit_stall() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AI_READY_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_IB1_AI_READY_C0(uint32_t value)
+    CONSTEXPR dma_status1_r &set_axi0_wr_limit_stall(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile dma_status1_r &set_axi0_wr_limit_stall(uint32_t value) volatile
     {
-        IB1_AI_READY_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
         return *this;
     }
-    CONSTEXPR uint32_t get_IB0_AO_VALID_C0() const
+    CONSTEXPR uint32_t get_axi1_ar_stalled() const
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AO_VALID_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
-    uint32_t get_IB0_AO_VALID_C0() const volatile
+    uint32_t get_axi1_ar_stalled() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AO_VALID_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_IB0_AO_VALID_C0(uint32_t value)
+    CONSTEXPR dma_status1_r &set_axi1_ar_stalled(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile dma_status1_r &set_axi1_ar_stalled(uint32_t value) volatile
     {
-        IB0_AO_VALID_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
         return *this;
     }
-    CONSTEXPR uint32_t get_IB0_AO_READY_C0() const
+    CONSTEXPR uint32_t get_axi1_rd_limit_stall() const
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AO_READY_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    uint32_t get_IB0_AO_READY_C0() const volatile
+    uint32_t get_axi1_rd_limit_stall() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AO_READY_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_IB0_AO_READY_C0(uint32_t value)
+    CONSTEXPR dma_status1_r &set_axi1_rd_limit_stall(uint32_t value)
     {
-        IB0_AO_READY_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
-    CONSTEXPR uint32_t get_IB1_AO_VALID_C0() const
+    volatile dma_status1_r &set_axi1_rd_limit_stall(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AO_VALID_C0);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi1_wr_stalled() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
-    uint32_t get_IB1_AO_VALID_C0() const volatile
+    uint32_t get_axi1_wr_stalled() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AO_VALID_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_IB1_AO_VALID_C0(uint32_t value)
+    CONSTEXPR dma_status1_r &set_axi1_wr_stalled(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile dma_status1_r &set_axi1_wr_stalled(uint32_t value) volatile
     {
-        IB1_AO_VALID_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
         return *this;
     }
-    CONSTEXPR uint32_t get_IB1_AO_READY_C0() const
+    CONSTEXPR uint32_t get_axi1_w_stalled() const
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AO_READY_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
         return value;
     }
-    uint32_t get_IB1_AO_READY_C0() const volatile
+    uint32_t get_axi1_w_stalled() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AO_READY_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_IB1_AO_READY_C0(uint32_t value)
+    CONSTEXPR dma_status1_r &set_axi1_w_stalled(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    volatile dma_status1_r &set_axi1_w_stalled(uint32_t value) volatile
     {
-        IB1_AO_READY_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
         return *this;
     }
-    CONSTEXPR uint32_t get_OB0_VALID_C0() const
+    CONSTEXPR uint32_t get_axi1_wr_limit_stall() const
     {
-        uint32_t value = static_cast<uint32_t>(OB0_VALID_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
         return value;
     }
-    uint32_t get_OB0_VALID_C0() const volatile
+    uint32_t get_axi1_wr_limit_stall() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(OB0_VALID_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_OB0_VALID_C0(uint32_t value)
+    CONSTEXPR dma_status1_r &set_axi1_wr_limit_stall(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    volatile dma_status1_r &set_axi1_wr_limit_stall(uint32_t value) volatile
     {
-        OB0_VALID_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
         return *this;
     }
-    CONSTEXPR uint32_t get_OB0_READY_C0() const
+    CONSTEXPR uint32_t get_wgt_idle_c1() const
     {
-        uint32_t value = static_cast<uint32_t>(OB0_READY_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
         return value;
     }
-    uint32_t get_OB0_READY_C0() const volatile
+    uint32_t get_wgt_idle_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(OB0_READY_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_OB0_READY_C0(uint32_t value)
+    CONSTEXPR dma_status1_r &set_wgt_idle_c1(uint32_t value)
     {
-        OB0_READY_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
         return *this;
     }
-    CONSTEXPR uint32_t get_OB1_VALID_C0() const
+    volatile dma_status1_r &set_wgt_idle_c1(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(OB1_VALID_C0);
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_bas_idle_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
         return value;
     }
-    uint32_t get_OB1_VALID_C0() const volatile
+    uint32_t get_bas_idle_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(OB1_VALID_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_OB1_VALID_C0(uint32_t value)
+    CONSTEXPR dma_status1_r &set_bas_idle_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    volatile dma_status1_r &set_bas_idle_c1(uint32_t value) volatile
     {
-        OB1_VALID_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
         return *this;
     }
-    CONSTEXPR uint32_t get_OB1_READY_C0() const
+    CONSTEXPR uint32_t get_ib0_ai_valid_c1() const
     {
-        uint32_t value = static_cast<uint32_t>(OB1_READY_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
         return value;
     }
-    uint32_t get_OB1_READY_C0() const volatile
+    uint32_t get_ib0_ai_valid_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(OB1_READY_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_OB1_READY_C0(uint32_t value)
+    CONSTEXPR dma_status1_r &set_ib0_ai_valid_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+    volatile dma_status1_r &set_ib0_ai_valid_c1(uint32_t value) volatile
     {
-        OB1_READY_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
         return *this;
     }
-    CONSTEXPR uint32_t get_CMD_VALID() const
+    CONSTEXPR uint32_t get_ib0_ai_ready_c1() const
     {
-        uint32_t value = static_cast<uint32_t>(CMD_VALID);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
         return value;
     }
-    uint32_t get_CMD_VALID() const volatile
+    uint32_t get_ib0_ai_ready_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(CMD_VALID);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_CMD_VALID(uint32_t value)
+    CONSTEXPR dma_status1_r &set_ib0_ai_ready_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
+    }
+    volatile dma_status1_r &set_ib0_ai_ready_c1(uint32_t value) volatile
     {
-        CMD_VALID = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
         return *this;
     }
-    CONSTEXPR uint32_t get_CMD_READY() const
+    CONSTEXPR uint32_t get_ib1_ai_valid_c1() const
     {
-        uint32_t value = static_cast<uint32_t>(CMD_READY);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
         return value;
     }
-    uint32_t get_CMD_READY() const volatile
+    uint32_t get_ib1_ai_valid_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(CMD_READY);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_CMD_READY(uint32_t value)
+    CONSTEXPR dma_status1_r &set_ib1_ai_valid_c1(uint32_t value)
     {
-        CMD_READY = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
         return *this;
     }
-    CONSTEXPR uint32_t get_WD_BITSTREAM_VALID_C0() const
+    volatile dma_status1_r &set_ib1_ai_valid_c1(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(WD_BITSTREAM_VALID_C0);
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib1_ai_ready_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 11);
         return value;
     }
-    uint32_t get_WD_BITSTREAM_VALID_C0() const volatile
+    uint32_t get_ib1_ai_ready_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(WD_BITSTREAM_VALID_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 11);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_WD_BITSTREAM_VALID_C0(uint32_t value)
+    CONSTEXPR dma_status1_r &set_ib1_ai_ready_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & value) << 11);
+        return *this;
+    }
+    volatile dma_status1_r &set_ib1_ai_ready_c1(uint32_t value) volatile
     {
-        WD_BITSTREAM_VALID_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & value) << 11);
         return *this;
     }
-    CONSTEXPR uint32_t get_WD_BITSTREAM_READY_C0() const
+    CONSTEXPR uint32_t get_ib0_ao_valid_c1() const
     {
-        uint32_t value = static_cast<uint32_t>(WD_BITSTREAM_READY_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 12);
         return value;
     }
-    uint32_t get_WD_BITSTREAM_READY_C0() const volatile
+    uint32_t get_ib0_ao_valid_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(WD_BITSTREAM_READY_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 12);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_WD_BITSTREAM_READY_C0(uint32_t value)
+    CONSTEXPR dma_status1_r &set_ib0_ao_valid_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 12) & word0) | ((((1U << 1) - 1) & value) << 12);
+        return *this;
+    }
+    volatile dma_status1_r &set_ib0_ao_valid_c1(uint32_t value) volatile
     {
-        WD_BITSTREAM_READY_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 12) & word0) | ((((1U << 1) - 1) & value) << 12);
         return *this;
     }
-    CONSTEXPR uint32_t get_BS_BITSTREAM_VALID_C0() const
+    CONSTEXPR uint32_t get_ib0_ao_ready_c1() const
     {
-        uint32_t value = static_cast<uint32_t>(BS_BITSTREAM_VALID_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 13);
         return value;
     }
-    uint32_t get_BS_BITSTREAM_VALID_C0() const volatile
+    uint32_t get_ib0_ao_ready_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(BS_BITSTREAM_VALID_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 13);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_BS_BITSTREAM_VALID_C0(uint32_t value)
+    CONSTEXPR dma_status1_r &set_ib0_ao_ready_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 13) & word0) | ((((1U << 1) - 1) & value) << 13);
+        return *this;
+    }
+    volatile dma_status1_r &set_ib0_ao_ready_c1(uint32_t value) volatile
     {
-        BS_BITSTREAM_VALID_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 13) & word0) | ((((1U << 1) - 1) & value) << 13);
         return *this;
     }
-    CONSTEXPR uint32_t get_BS_BITSTREAM_READY_C0() const
+    CONSTEXPR uint32_t get_ib1_ao_valid_c1() const
     {
-        uint32_t value = static_cast<uint32_t>(BS_BITSTREAM_READY_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
         return value;
     }
-    uint32_t get_BS_BITSTREAM_READY_C0() const volatile
+    uint32_t get_ib1_ao_valid_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(BS_BITSTREAM_READY_C0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_BS_BITSTREAM_READY_C0(uint32_t value)
+    CONSTEXPR dma_status1_r &set_ib1_ao_valid_c1(uint32_t value)
     {
-        BS_BITSTREAM_READY_C0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
         return *this;
     }
-    CONSTEXPR uint32_t get_AXI0_AR_STALLED() const
+    volatile dma_status1_r &set_ib1_ao_valid_c1(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(AXI0_AR_STALLED);
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib1_ao_ready_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 15);
         return value;
     }
-    uint32_t get_AXI0_AR_STALLED() const volatile
+    uint32_t get_ib1_ao_ready_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(AXI0_AR_STALLED);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 15);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_AXI0_AR_STALLED(uint32_t value)
+    CONSTEXPR dma_status1_r &set_ib1_ao_ready_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 15) & word0) | ((((1U << 1) - 1) & value) << 15);
+        return *this;
+    }
+    volatile dma_status1_r &set_ib1_ao_ready_c1(uint32_t value) volatile
     {
-        AXI0_AR_STALLED = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 15) & word0) | ((((1U << 1) - 1) & value) << 15);
         return *this;
     }
-    CONSTEXPR uint32_t get_AXI0_RD_LIMIT_STALL() const
+    CONSTEXPR uint32_t get_ob0_valid_c1() const
     {
-        uint32_t value = static_cast<uint32_t>(AXI0_RD_LIMIT_STALL);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 16);
         return value;
     }
-    uint32_t get_AXI0_RD_LIMIT_STALL() const volatile
+    uint32_t get_ob0_valid_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(AXI0_RD_LIMIT_STALL);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 16);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_AXI0_RD_LIMIT_STALL(uint32_t value)
+    CONSTEXPR dma_status1_r &set_ob0_valid_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 16) & word0) | ((((1U << 1) - 1) & value) << 16);
+        return *this;
+    }
+    volatile dma_status1_r &set_ob0_valid_c1(uint32_t value) volatile
     {
-        AXI0_RD_LIMIT_STALL = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 16) & word0) | ((((1U << 1) - 1) & value) << 16);
         return *this;
     }
-    CONSTEXPR uint32_t get_AXI0_AW_STALLED() const
+    CONSTEXPR uint32_t get_ob0_ready_c1() const
     {
-        uint32_t value = static_cast<uint32_t>(AXI0_AW_STALLED);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 17);
         return value;
     }
-    uint32_t get_AXI0_AW_STALLED() const volatile
+    uint32_t get_ob0_ready_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(AXI0_AW_STALLED);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 17);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_AXI0_AW_STALLED(uint32_t value)
+    CONSTEXPR dma_status1_r &set_ob0_ready_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 17) & word0) | ((((1U << 1) - 1) & value) << 17);
+        return *this;
+    }
+    volatile dma_status1_r &set_ob0_ready_c1(uint32_t value) volatile
     {
-        AXI0_AW_STALLED = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 17) & word0) | ((((1U << 1) - 1) & value) << 17);
         return *this;
     }
-    CONSTEXPR uint32_t get_AXI0_W_STALLED() const
+    CONSTEXPR uint32_t get_ob1_valid_c1() const
     {
-        uint32_t value = static_cast<uint32_t>(AXI0_W_STALLED);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 18);
         return value;
     }
-    uint32_t get_AXI0_W_STALLED() const volatile
+    uint32_t get_ob1_valid_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(AXI0_W_STALLED);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 18);
         return value;
     }
-    CONSTEXPR dma_status0_r &set_AXI0_W_STALLED(uint32_t value)
+    CONSTEXPR dma_status1_r &set_ob1_valid_c1(uint32_t value)
     {
-        AXI0_W_STALLED = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 18) & word0) | ((((1U << 1) - 1) & value) << 18);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// dma_status1_r - DMA_STATUS1 of core DEBUGCORE
-struct dma_status1_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        struct
-        {
-            uint32_t AXI0_WR_LIMIT_STALL : 1; // Write stalled due to one AXI0 limit counter being reached
-            uint32_t AXI1_AR_STALLED : 1; // Read transfer request stalled on arready low AXI1 (due to memory system)
-            uint32_t AXI1_RD_LIMIT_STALL : 1; // Read stalled due to one AXI1 limit counter being reached
-            uint32_t AXI1_WR_STALLED : 1; // Write transfer request stalled on awready low AXI1 (due to memory system)
-            uint32_t AXI1_W_STALLED : 1;  // Write transfer stalled on wready low AXI1 (due to memory system)
-            uint32_t AXI1_WR_LIMIT_STALL : 1; // Write stalled due to one AXI1 limit counter being reached
-            uint32_t WGT_IDLE_C1 : 1;     // When this bit is high means that the WGT block is not busy in generating
-                                          // addresses for a WGT job
-            uint32_t BAS_IDLE_C1 : 1;     // When this bit is high means that the BAS block is not busy in generating
-                                          // addresses for a BAS job.
-            uint32_t IB0_AI_VALID_C1 : 1; // Data for AI to be read in IFM input buffer 0 - Core 1
-            uint32_t IB0_AI_READY_C1 : 1; // Data consumed from AI in IFM input buffer 0 - Core 1
-            uint32_t IB1_AI_VALID_C1 : 1; // Data for AI to be read in IFM input buffer 1 - Core 1
-            uint32_t IB1_AI_READY_C1 : 1; // Data consumed from AI in IFM input buffer 1 - Core 1
-            uint32_t IB0_AO_VALID_C1 : 1; // Data for AO to be read in IFM input buffer 0 - Core 1
-            uint32_t IB0_AO_READY_C1 : 1; // Data consumed from AO in IFM input buffer 0 - Core 1
-            uint32_t IB1_AO_VALID_C1 : 1; // Data for AO to be read in IFM input buffer 0 - Core 1
-            uint32_t IB1_AO_READY_C1 : 1; // Data consumed from AO in IFM input buffer 1 - Core 1
-            uint32_t OB0_VALID_C1 : 1;    // Data for DMA ready to be consumed in OFM output buffer 0 - Core 1
-            uint32_t OB0_READY_C1 : 1;    // Data consumed from DMA in OFM output buffer 0 - Core 1
-            uint32_t OB1_VALID_C1 : 1;    // Data for DMA ready to be consumed in OFM output buffer 1 - Core 1
-            uint32_t OB1_READY_C1 : 1;    // Data consumed from DMA in OFM output buffer 1 - Core 1
-            uint32_t WD_BITSTREAM_VALID_C1 : 1; // New weight word for WD to be consumed - Core 1
-            uint32_t WD_BITSTREAM_READY_C1 : 1; // Weight word consumed by WD - Core 1
-            uint32_t BS_BITSTREAM_VALID_C1 : 1; // New BaS word for AO to be consumed - Core 1
-            uint32_t BS_BITSTREAM_READY_C1 : 1; // BaS word consumed by AO - Core 1
-            uint32_t reserved0 : 8;
-        };
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR dma_status1_r() :
-        AXI0_WR_LIMIT_STALL(static_cast<uint32_t>(0)), AXI1_AR_STALLED(static_cast<uint32_t>(0)),
-        AXI1_RD_LIMIT_STALL(static_cast<uint32_t>(0)), AXI1_WR_STALLED(static_cast<uint32_t>(0)),
-        AXI1_W_STALLED(static_cast<uint32_t>(0)), AXI1_WR_LIMIT_STALL(static_cast<uint32_t>(0)),
-        WGT_IDLE_C1(static_cast<uint32_t>(0)), BAS_IDLE_C1(static_cast<uint32_t>(0)),
-        IB0_AI_VALID_C1(static_cast<uint32_t>(0)), IB0_AI_READY_C1(static_cast<uint32_t>(0)),
-        IB1_AI_VALID_C1(static_cast<uint32_t>(0)), IB1_AI_READY_C1(static_cast<uint32_t>(0)),
-        IB0_AO_VALID_C1(static_cast<uint32_t>(0)), IB0_AO_READY_C1(static_cast<uint32_t>(0)),
-        IB1_AO_VALID_C1(static_cast<uint32_t>(0)), IB1_AO_READY_C1(static_cast<uint32_t>(0)),
-        OB0_VALID_C1(static_cast<uint32_t>(0)), OB0_READY_C1(static_cast<uint32_t>(0)),
-        OB1_VALID_C1(static_cast<uint32_t>(0)), OB1_READY_C1(static_cast<uint32_t>(0)),
-        WD_BITSTREAM_VALID_C1(static_cast<uint32_t>(0)), WD_BITSTREAM_READY_C1(static_cast<uint32_t>(0)),
-        BS_BITSTREAM_VALID_C1(static_cast<uint32_t>(0)), BS_BITSTREAM_READY_C1(static_cast<uint32_t>(0)),
-        reserved0(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR dma_status1_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile dma_status1_r &set_ob1_valid_c1(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 18) & word0) | ((((1U << 1) - 1) & value) << 18);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_ob1_ready_c1() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 19);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_ob1_ready_c1() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 19);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR dma_status1_r &set_ob1_ready_c1(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 19) & word0) | ((((1U << 1) - 1) & value) << 19);
+        return *this;
     }
-    dma_status1_r copy() volatile
+    volatile dma_status1_r &set_ob1_ready_c1(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 19) & word0) | ((((1U << 1) - 1) & value) << 19);
         return *this;
     }
-    CONSTEXPR uint32_t get_AXI0_WR_LIMIT_STALL() const
+    CONSTEXPR uint32_t get_wd_bitstream_valid_c1() const
     {
-        uint32_t value = static_cast<uint32_t>(AXI0_WR_LIMIT_STALL);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 20);
         return value;
     }
-    uint32_t get_AXI0_WR_LIMIT_STALL() const volatile
+    uint32_t get_wd_bitstream_valid_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(AXI0_WR_LIMIT_STALL);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 20);
         return value;
     }
-    CONSTEXPR dma_status1_r &set_AXI0_WR_LIMIT_STALL(uint32_t value)
+    CONSTEXPR dma_status1_r &set_wd_bitstream_valid_c1(uint32_t value)
     {
-        AXI0_WR_LIMIT_STALL = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 20) & word0) | ((((1U << 1) - 1) & value) << 20);
         return *this;
     }
-    CONSTEXPR uint32_t get_AXI1_AR_STALLED() const
+    volatile dma_status1_r &set_wd_bitstream_valid_c1(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(AXI1_AR_STALLED);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 20) & word0) | ((((1U << 1) - 1) & value) << 20);
+        return *this;
     }
-    uint32_t get_AXI1_AR_STALLED() const volatile
+    CONSTEXPR uint32_t get_wd_bitstream_ready_c1() const
     {
-        uint32_t value = static_cast<uint32_t>(AXI1_AR_STALLED);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 21);
         return value;
     }
-    CONSTEXPR dma_status1_r &set_AXI1_AR_STALLED(uint32_t value)
-    {
-        AXI1_AR_STALLED = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_AXI1_RD_LIMIT_STALL() const
+    uint32_t get_wd_bitstream_ready_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(AXI1_RD_LIMIT_STALL);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 21);
         return value;
     }
-    uint32_t get_AXI1_RD_LIMIT_STALL() const volatile
+    CONSTEXPR dma_status1_r &set_wd_bitstream_ready_c1(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(AXI1_RD_LIMIT_STALL);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 21) & word0) | ((((1U << 1) - 1) & value) << 21);
+        return *this;
     }
-    CONSTEXPR dma_status1_r &set_AXI1_RD_LIMIT_STALL(uint32_t value)
+    volatile dma_status1_r &set_wd_bitstream_ready_c1(uint32_t value) volatile
     {
-        AXI1_RD_LIMIT_STALL = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 21) & word0) | ((((1U << 1) - 1) & value) << 21);
         return *this;
     }
-    CONSTEXPR uint32_t get_AXI1_WR_STALLED() const
+    CONSTEXPR uint32_t get_bs_bitstream_valid_c1() const
     {
-        uint32_t value = static_cast<uint32_t>(AXI1_WR_STALLED);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 22);
         return value;
     }
-    uint32_t get_AXI1_WR_STALLED() const volatile
+    uint32_t get_bs_bitstream_valid_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(AXI1_WR_STALLED);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 22);
         return value;
     }
-    CONSTEXPR dma_status1_r &set_AXI1_WR_STALLED(uint32_t value)
+    CONSTEXPR dma_status1_r &set_bs_bitstream_valid_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 22) & word0) | ((((1U << 1) - 1) & value) << 22);
+        return *this;
+    }
+    volatile dma_status1_r &set_bs_bitstream_valid_c1(uint32_t value) volatile
     {
-        AXI1_WR_STALLED = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 22) & word0) | ((((1U << 1) - 1) & value) << 22);
         return *this;
     }
-    CONSTEXPR uint32_t get_AXI1_W_STALLED() const
+    CONSTEXPR uint32_t get_bs_bitstream_ready_c1() const
     {
-        uint32_t value = static_cast<uint32_t>(AXI1_W_STALLED);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 23);
         return value;
     }
-    uint32_t get_AXI1_W_STALLED() const volatile
+    uint32_t get_bs_bitstream_ready_c1() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(AXI1_W_STALLED);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 23);
         return value;
     }
-    CONSTEXPR dma_status1_r &set_AXI1_W_STALLED(uint32_t value)
+    CONSTEXPR dma_status1_r &set_bs_bitstream_ready_c1(uint32_t value)
     {
-        AXI1_W_STALLED = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 23) & word0) | ((((1U << 1) - 1) & value) << 23);
         return *this;
     }
-    CONSTEXPR uint32_t get_AXI1_WR_LIMIT_STALL() const
+    volatile dma_status1_r &set_bs_bitstream_ready_c1(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(AXI1_WR_LIMIT_STALL);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 23) & word0) | ((((1U << 1) - 1) & value) << 23);
+        return *this;
     }
-    uint32_t get_AXI1_WR_LIMIT_STALL() const volatile
+#endif
+};
+
+// clkforce_r - Force clocks on for clock gating
+struct clkforce_r
+{
+#ifndef __cplusplus
+    union
     {
-        uint32_t value = static_cast<uint32_t>(AXI1_WR_LIMIT_STALL);
-        return value;
+        struct
+        {
+            uint32_t top_level_clk : 1; // set to 1 to force on TOP level clock
+            uint32_t cc_clk : 1;        // set to 1 to force on CC clock
+            uint32_t dma_clk : 1;       // set to 1 to force on DMA clock
+            uint32_t mac_clk : 1;       // set to 1 to force on MAC clock
+            uint32_t ao_clk : 1;        // set to 1 to force on AO clock
+            uint32_t wd_clk : 1;        // set to 1 to force on WD clock
+            uint32_t reserved0 : 26;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR clkforce_r() : word0(0) {}
+    CONSTEXPR clkforce_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
     }
-    CONSTEXPR dma_status1_r &set_AXI1_WR_LIMIT_STALL(uint32_t value)
+    void operator=(uint32_t value) volatile
     {
-        AXI1_WR_LIMIT_STALL = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
+        word0 = value;
     }
-    CONSTEXPR uint32_t get_WGT_IDLE_C1() const
+    CONSTEXPR operator uint32_t()
     {
-        uint32_t value = static_cast<uint32_t>(WGT_IDLE_C1);
-        return value;
+        return word0;
     }
-    uint32_t get_WGT_IDLE_C1() const volatile
+    operator uint32_t() volatile
     {
-        uint32_t value = static_cast<uint32_t>(WGT_IDLE_C1);
-        return value;
+        return word0;
     }
-    CONSTEXPR dma_status1_r &set_WGT_IDLE_C1(uint32_t value)
+    clkforce_r copy() volatile
     {
-        WGT_IDLE_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
         return *this;
     }
-    CONSTEXPR uint32_t get_BAS_IDLE_C1() const
+    CONSTEXPR uint32_t get_top_level_clk() const
     {
-        uint32_t value = static_cast<uint32_t>(BAS_IDLE_C1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    uint32_t get_BAS_IDLE_C1() const volatile
+    uint32_t get_top_level_clk() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(BAS_IDLE_C1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    CONSTEXPR dma_status1_r &set_BAS_IDLE_C1(uint32_t value)
+    CONSTEXPR clkforce_r &set_top_level_clk(uint32_t value)
     {
-        BAS_IDLE_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
         return *this;
     }
-    CONSTEXPR uint32_t get_IB0_AI_VALID_C1() const
+    volatile clkforce_r &set_top_level_clk(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AI_VALID_C1);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
     }
-    uint32_t get_IB0_AI_VALID_C1() const volatile
+    CONSTEXPR uint32_t get_cc_clk() const
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AI_VALID_C1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
-    CONSTEXPR dma_status1_r &set_IB0_AI_VALID_C1(uint32_t value)
-    {
-        IB0_AI_VALID_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_IB0_AI_READY_C1() const
+    uint32_t get_cc_clk() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AI_READY_C1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
-    uint32_t get_IB0_AI_READY_C1() const volatile
+    CONSTEXPR clkforce_r &set_cc_clk(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AI_READY_C1);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
     }
-    CONSTEXPR dma_status1_r &set_IB0_AI_READY_C1(uint32_t value)
+    volatile clkforce_r &set_cc_clk(uint32_t value) volatile
     {
-        IB0_AI_READY_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
         return *this;
     }
-    CONSTEXPR uint32_t get_IB1_AI_VALID_C1() const
+    CONSTEXPR uint32_t get_dma_clk() const
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AI_VALID_C1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    uint32_t get_IB1_AI_VALID_C1() const volatile
+    uint32_t get_dma_clk() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AI_VALID_C1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    CONSTEXPR dma_status1_r &set_IB1_AI_VALID_C1(uint32_t value)
+    CONSTEXPR clkforce_r &set_dma_clk(uint32_t value)
     {
-        IB1_AI_VALID_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
-    CONSTEXPR uint32_t get_IB1_AI_READY_C1() const
+    volatile clkforce_r &set_dma_clk(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AI_READY_C1);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
     }
-    uint32_t get_IB1_AI_READY_C1() const volatile
+    CONSTEXPR uint32_t get_mac_clk() const
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AI_READY_C1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
-    CONSTEXPR dma_status1_r &set_IB1_AI_READY_C1(uint32_t value)
-    {
-        IB1_AI_READY_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_IB0_AO_VALID_C1() const
+    uint32_t get_mac_clk() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AO_VALID_C1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
-    uint32_t get_IB0_AO_VALID_C1() const volatile
+    CONSTEXPR clkforce_r &set_mac_clk(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AO_VALID_C1);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
     }
-    CONSTEXPR dma_status1_r &set_IB0_AO_VALID_C1(uint32_t value)
+    volatile clkforce_r &set_mac_clk(uint32_t value) volatile
     {
-        IB0_AO_VALID_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
         return *this;
     }
-    CONSTEXPR uint32_t get_IB0_AO_READY_C1() const
+    CONSTEXPR uint32_t get_ao_clk() const
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AO_READY_C1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
         return value;
     }
-    uint32_t get_IB0_AO_READY_C1() const volatile
+    uint32_t get_ao_clk() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB0_AO_READY_C1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
         return value;
     }
-    CONSTEXPR dma_status1_r &set_IB0_AO_READY_C1(uint32_t value)
+    CONSTEXPR clkforce_r &set_ao_clk(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    volatile clkforce_r &set_ao_clk(uint32_t value) volatile
     {
-        IB0_AO_READY_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
         return *this;
     }
-    CONSTEXPR uint32_t get_IB1_AO_VALID_C1() const
+    CONSTEXPR uint32_t get_wd_clk() const
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AO_VALID_C1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
         return value;
     }
-    uint32_t get_IB1_AO_VALID_C1() const volatile
+    uint32_t get_wd_clk() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AO_VALID_C1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
         return value;
     }
-    CONSTEXPR dma_status1_r &set_IB1_AO_VALID_C1(uint32_t value)
+    CONSTEXPR clkforce_r &set_wd_clk(uint32_t value)
     {
-        IB1_AO_VALID_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
         return *this;
     }
-    CONSTEXPR uint32_t get_IB1_AO_READY_C1() const
+    volatile clkforce_r &set_wd_clk(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AO_READY_C1);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
     }
-    uint32_t get_IB1_AO_READY_C1() const volatile
+#endif
+};
+
+// debug_address_r - Set debug address for register reads 0x400-0x7FF. The address must be 1KB aligned
+struct debug_address_r
+{
+#ifndef __cplusplus
+    union
     {
-        uint32_t value = static_cast<uint32_t>(IB1_AO_READY_C1);
-        return value;
+        struct
+        {
+            uint32_t addr : 32; // Register address
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR debug_address_r() : word0(0) {}
+    CONSTEXPR debug_address_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
     }
-    CONSTEXPR dma_status1_r &set_IB1_AO_READY_C1(uint32_t value)
+    void operator=(uint32_t value) volatile
     {
-        IB1_AO_READY_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
+        word0 = value;
     }
-    CONSTEXPR uint32_t get_OB0_VALID_C1() const
+    CONSTEXPR operator uint32_t()
     {
-        uint32_t value = static_cast<uint32_t>(OB0_VALID_C1);
-        return value;
+        return word0;
     }
-    uint32_t get_OB0_VALID_C1() const volatile
+    operator uint32_t() volatile
     {
-        uint32_t value = static_cast<uint32_t>(OB0_VALID_C1);
-        return value;
+        return word0;
     }
-    CONSTEXPR dma_status1_r &set_OB0_VALID_C1(uint32_t value)
+    debug_address_r copy() volatile
     {
-        OB0_VALID_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
         return *this;
     }
-    CONSTEXPR uint32_t get_OB0_READY_C1() const
+    CONSTEXPR uint32_t get_addr() const
     {
-        uint32_t value = static_cast<uint32_t>(OB0_READY_C1);
+        uint32_t value = word0;
         return value;
     }
-    uint32_t get_OB0_READY_C1() const volatile
+    uint32_t get_addr() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(OB0_READY_C1);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR dma_status1_r &set_OB0_READY_C1(uint32_t value)
+    CONSTEXPR debug_address_r &set_addr(uint32_t value)
     {
-        OB0_READY_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-    CONSTEXPR uint32_t get_OB1_VALID_C1() const
+    volatile debug_address_r &set_addr(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(OB1_VALID_C1);
-        return value;
+        word0 = value;
+        return *this;
     }
-    uint32_t get_OB1_VALID_C1() const volatile
+#endif
+};
+
+// debug_misc_r - 32-bit read/write register for driver debug use. This does not affect NPU function
+struct debug_misc_r
+{
+#ifndef __cplusplus
+    union
     {
-        uint32_t value = static_cast<uint32_t>(OB1_VALID_C1);
-        return value;
+        struct
+        {
+            uint32_t misc : 32; // Debug misc
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR debug_misc_r() : word0(0) {}
+    CONSTEXPR debug_misc_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
     }
-    CONSTEXPR dma_status1_r &set_OB1_VALID_C1(uint32_t value)
+    void operator=(uint32_t value) volatile
     {
-        OB1_VALID_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
+        word0 = value;
     }
-    CONSTEXPR uint32_t get_OB1_READY_C1() const
+    CONSTEXPR operator uint32_t()
     {
-        uint32_t value = static_cast<uint32_t>(OB1_READY_C1);
-        return value;
+        return word0;
     }
-    uint32_t get_OB1_READY_C1() const volatile
+    operator uint32_t() volatile
     {
-        uint32_t value = static_cast<uint32_t>(OB1_READY_C1);
-        return value;
+        return word0;
     }
-    CONSTEXPR dma_status1_r &set_OB1_READY_C1(uint32_t value)
+    debug_misc_r copy() volatile
     {
-        OB1_READY_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
         return *this;
     }
-    CONSTEXPR uint32_t get_WD_BITSTREAM_VALID_C1() const
+    CONSTEXPR uint32_t get_misc() const
     {
-        uint32_t value = static_cast<uint32_t>(WD_BITSTREAM_VALID_C1);
+        uint32_t value = word0;
         return value;
     }
-    uint32_t get_WD_BITSTREAM_VALID_C1() const volatile
+    uint32_t get_misc() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(WD_BITSTREAM_VALID_C1);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR dma_status1_r &set_WD_BITSTREAM_VALID_C1(uint32_t value)
+    CONSTEXPR debug_misc_r &set_misc(uint32_t value)
     {
-        WD_BITSTREAM_VALID_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-    CONSTEXPR uint32_t get_WD_BITSTREAM_READY_C1() const
+    volatile debug_misc_r &set_misc(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(WD_BITSTREAM_READY_C1);
-        return value;
+        word0 = value;
+        return *this;
     }
-    uint32_t get_WD_BITSTREAM_READY_C1() const volatile
+#endif
+};
+
+// debug_block_r - Set from which of four block banks the TSU registers are read. 0 = read from the current bank 256+n =
+// force to read from bank n where n is in the range 0 to 3
+struct debug_block_r
+{
+#ifndef __cplusplus
+    union
     {
-        uint32_t value = static_cast<uint32_t>(WD_BITSTREAM_READY_C1);
-        return value;
+        struct
+        {
+            uint32_t block : 32; // Debug block
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR debug_block_r() : word0(0) {}
+    CONSTEXPR debug_block_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
     }
-    CONSTEXPR dma_status1_r &set_WD_BITSTREAM_READY_C1(uint32_t value)
+    void operator=(uint32_t value) volatile
     {
-        WD_BITSTREAM_READY_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
+        word0 = value;
     }
-    CONSTEXPR uint32_t get_BS_BITSTREAM_VALID_C1() const
+    CONSTEXPR operator uint32_t()
     {
-        uint32_t value = static_cast<uint32_t>(BS_BITSTREAM_VALID_C1);
-        return value;
+        return word0;
     }
-    uint32_t get_BS_BITSTREAM_VALID_C1() const volatile
+    operator uint32_t() volatile
     {
-        uint32_t value = static_cast<uint32_t>(BS_BITSTREAM_VALID_C1);
-        return value;
+        return word0;
     }
-    CONSTEXPR dma_status1_r &set_BS_BITSTREAM_VALID_C1(uint32_t value)
+    debug_block_r copy() volatile
     {
-        BS_BITSTREAM_VALID_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
         return *this;
     }
-    CONSTEXPR uint32_t get_BS_BITSTREAM_READY_C1() const
+    CONSTEXPR uint32_t get_block() const
     {
-        uint32_t value = static_cast<uint32_t>(BS_BITSTREAM_READY_C1);
+        uint32_t value = word0;
         return value;
     }
-    uint32_t get_BS_BITSTREAM_READY_C1() const volatile
+    uint32_t get_block() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(BS_BITSTREAM_READY_C1);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR dma_status1_r &set_BS_BITSTREAM_READY_C1(uint32_t value)
+    CONSTEXPR debug_block_r &set_block(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile debug_block_r &set_block(uint32_t value) volatile
     {
-        BS_BITSTREAM_READY_C1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// clkforce_r - Force clocks on for clock gating
-struct clkforce_r
+// pmcr_r - PMU Register control
+struct pmcr_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t top_level_clk : 1; // set to 1 to force on TOP level clock
-            uint32_t cc_clk : 1;        // set to 1 to force on CC clock
-            uint32_t dma_clk : 1;       // set to 1 to force on DMA clock
-            uint32_t mac_clk : 1;       // set to 1 to force on MAC clock
-            uint32_t ao_clk : 1;        // set to 1 to force on AO clock
-            uint32_t wd_clk : 1;        // set to 1 to force on WD clock
-            uint32_t reserved0 : 26;
+            uint32_t cnt_en : 1;        // Enable counter
+            uint32_t event_cnt_rst : 1; // Reset event counter
+            uint32_t cycle_cnt_rst : 1; // Reset cycle counter
+            uint32_t mask_en : 1;       // PMU can be enabled/disabled by command stream operation NPU_OP_PMU_MASK
+            uint32_t reserved0 : 7;
+            uint32_t num_event_cnt : 5; // Number of event counters
+            uint32_t reserved1 : 16;
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR clkforce_r() :
-        top_level_clk(static_cast<uint32_t>(0)), cc_clk(static_cast<uint32_t>(0)), dma_clk(static_cast<uint32_t>(0)),
-        mac_clk(static_cast<uint32_t>(0)), ao_clk(static_cast<uint32_t>(0)), wd_clk(static_cast<uint32_t>(0)),
-        reserved0(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR clkforce_r(uint32_t init) : word(init) {}
+    CONSTEXPR pmcr_r() : word0(8192) {}
+    CONSTEXPR pmcr_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    clkforce_r copy() volatile
+    pmcr_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_top_level_clk() const
+    CONSTEXPR uint32_t get_cnt_en() const
     {
-        uint32_t value = static_cast<uint32_t>(top_level_clk);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    uint32_t get_top_level_clk() const volatile
+    uint32_t get_cnt_en() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(top_level_clk);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    CONSTEXPR clkforce_r &set_top_level_clk(uint32_t value)
+    CONSTEXPR pmcr_r &set_cnt_en(uint32_t value)
     {
-        top_level_clk = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
         return *this;
     }
-    CONSTEXPR uint32_t get_cc_clk() const
+    volatile pmcr_r &set_cnt_en(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_event_cnt_rst() const
     {
-        uint32_t value = static_cast<uint32_t>(cc_clk);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
-    uint32_t get_cc_clk() const volatile
+    uint32_t get_event_cnt_rst() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(cc_clk);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
-    CONSTEXPR clkforce_r &set_cc_clk(uint32_t value)
+    CONSTEXPR pmcr_r &set_event_cnt_rst(uint32_t value)
     {
-        cc_clk = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
         return *this;
     }
-    CONSTEXPR uint32_t get_dma_clk() const
+    volatile pmcr_r &set_event_cnt_rst(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cycle_cnt_rst() const
     {
-        uint32_t value = static_cast<uint32_t>(dma_clk);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    uint32_t get_dma_clk() const volatile
+    uint32_t get_cycle_cnt_rst() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(dma_clk);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    CONSTEXPR clkforce_r &set_dma_clk(uint32_t value)
+    CONSTEXPR pmcr_r &set_cycle_cnt_rst(uint32_t value)
     {
-        dma_clk = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
-    CONSTEXPR uint32_t get_mac_clk() const
+    volatile pmcr_r &set_cycle_cnt_rst(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mask_en() const
     {
-        uint32_t value = static_cast<uint32_t>(mac_clk);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
-    uint32_t get_mac_clk() const volatile
+    uint32_t get_mask_en() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(mac_clk);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
-    CONSTEXPR clkforce_r &set_mac_clk(uint32_t value)
+    CONSTEXPR pmcr_r &set_mask_en(uint32_t value)
     {
-        mac_clk = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
         return *this;
     }
-    CONSTEXPR uint32_t get_ao_clk() const
+    volatile pmcr_r &set_mask_en(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(ao_clk);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
     }
-    uint32_t get_ao_clk() const volatile
+    CONSTEXPR uint32_t get_num_event_cnt() const
     {
-        uint32_t value = static_cast<uint32_t>(ao_clk);
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 11);
         return value;
     }
-    CONSTEXPR clkforce_r &set_ao_clk(uint32_t value)
-    {
-        ao_clk = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_wd_clk() const
+    uint32_t get_num_event_cnt() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(wd_clk);
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 11);
         return value;
     }
-    uint32_t get_wd_clk() const volatile
+    CONSTEXPR pmcr_r &set_num_event_cnt(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(wd_clk);
-        return value;
+        word0 = (((~((1U << 5) - 1)) << 11) & word0) | ((((1U << 5) - 1) & value) << 11);
+        return *this;
     }
-    CONSTEXPR clkforce_r &set_wd_clk(uint32_t value)
+    volatile pmcr_r &set_num_event_cnt(uint32_t value) volatile
     {
-        wd_clk = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 5) - 1)) << 11) & word0) | ((((1U << 5) - 1) & value) << 11);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pid4_r - Peripheral ID byte 4 (Arm=code 4)
-struct pid4_r
+// pmcntenset_r - Count enable set register
+struct pmcntenset_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
-        uint32_t PID4; // Byte 4 of Peripheral ID (Lower 8 bits valid)
+        struct
+        {
+            uint32_t EVENT_CNT_0 : 1; // Event counter enable bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1 : 1; // Event counter enable bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2 : 1; // Event counter enable bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3 : 1; // Event counter enable bit for PMEVCNTR3
+            uint32_t reserved0 : 27;
+            uint32_t CYCLE_CNT : 1; // PMCCNTR enable bit
+        };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pid4_r() : PID4(static_cast<uint32_t>(0x04)) {}
-    CONSTEXPR pid4_r(uint32_t init) : word(init) {}
+    CONSTEXPR pmcntenset_r() : word0(0) {}
+    CONSTEXPR pmcntenset_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pid4_r copy() volatile
+    pmcntenset_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_PID4() const
+    CONSTEXPR uint32_t get_EVENT_CNT_0() const
     {
-        uint32_t value = static_cast<uint32_t>(PID4);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    uint32_t get_PID4() const volatile
+    uint32_t get_EVENT_CNT_0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(PID4);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    CONSTEXPR pid4_r &set_PID4(uint32_t value)
+    CONSTEXPR pmcntenset_r &set_EVENT_CNT_0(uint32_t value)
     {
-        PID4 = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// pid5_r - Peripheral ID byte 5 (reserved)
-struct pid5_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t PID5; // Byte 5 of Peripheral ID (Lower 8 bits valid)
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR pid5_r() : PID5(static_cast<uint32_t>(0x00)) {}
-    CONSTEXPR pid5_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile pmcntenset_r &set_EVENT_CNT_0(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_EVENT_CNT_1() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_EVENT_CNT_1() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR pmcntenset_r &set_EVENT_CNT_1(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
     }
-    pid5_r copy() volatile
+    volatile pmcntenset_r &set_EVENT_CNT_1(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
         return *this;
     }
-    CONSTEXPR uint32_t get_PID5() const
+    CONSTEXPR uint32_t get_EVENT_CNT_2() const
     {
-        uint32_t value = static_cast<uint32_t>(PID5);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    uint32_t get_PID5() const volatile
+    uint32_t get_EVENT_CNT_2() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(PID5);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    CONSTEXPR pid5_r &set_PID5(uint32_t value)
+    CONSTEXPR pmcntenset_r &set_EVENT_CNT_2(uint32_t value)
     {
-        PID5 = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// pid6_r - Peripheral ID byte 6 (reserved)
-struct pid6_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t PID6; // Byte 6 of Peripheral ID (Lower 8 bits valid)
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR pid6_r() : PID6(static_cast<uint32_t>(0x00)) {}
-    CONSTEXPR pid6_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile pmcntenset_r &set_EVENT_CNT_2(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_EVENT_CNT_3() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_EVENT_CNT_3() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR pmcntenset_r &set_EVENT_CNT_3(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
     }
-    pid6_r copy() volatile
+    volatile pmcntenset_r &set_EVENT_CNT_3(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
         return *this;
     }
-    CONSTEXPR uint32_t get_PID6() const
+    CONSTEXPR uint32_t get_CYCLE_CNT() const
     {
-        uint32_t value = static_cast<uint32_t>(PID6);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
         return value;
     }
-    uint32_t get_PID6() const volatile
+    uint32_t get_CYCLE_CNT() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(PID6);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
         return value;
     }
-    CONSTEXPR pid6_r &set_PID6(uint32_t value)
+    CONSTEXPR pmcntenset_r &set_CYCLE_CNT(uint32_t value)
     {
-        PID6 = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
         return *this;
     }
-#endif //__cplusplus
+    volatile pmcntenset_r &set_CYCLE_CNT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
 };
 
-// pid7_r - Peripheral ID byte 7 (reserved)
-struct pid7_r
+// pmcntenclr_r - Count enable clear register
+struct pmcntenclr_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
-        uint32_t PID7; // Byte 7 of Peripheral ID (Lower 8 bits valid)
+        struct
+        {
+            uint32_t EVENT_CNT_0 : 1; // Event counter disable bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1 : 1; // Event counter disable bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2 : 1; // Event counter disable bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3 : 1; // Event counter disable bit for PMEVCNTR3
+            uint32_t reserved0 : 27;
+            uint32_t CYCLE_CNT : 1; // PMCCNTR disable bit
+        };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pid7_r() : PID7(static_cast<uint32_t>(0x00)) {}
-    CONSTEXPR pid7_r(uint32_t init) : word(init) {}
+    CONSTEXPR pmcntenclr_r() : word0(0) {}
+    CONSTEXPR pmcntenclr_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pid7_r copy() volatile
+    pmcntenclr_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_PID7() const
+    CONSTEXPR uint32_t get_EVENT_CNT_0() const
     {
-        uint32_t value = static_cast<uint32_t>(PID7);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    uint32_t get_PID7() const volatile
+    uint32_t get_EVENT_CNT_0() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(PID7);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    CONSTEXPR pid7_r &set_PID7(uint32_t value)
+    CONSTEXPR pmcntenclr_r &set_EVENT_CNT_0(uint32_t value)
     {
-        PID7 = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// pid0_r - Peripheral ID byte 0. This is bits[7:0] of the part number.
-struct pid0_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t PID0; // Byte 0 of Peripheral ID (Lower 8 bits valid)
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR pid0_r() : PID0(static_cast<uint32_t>(0x80)) {}
-    CONSTEXPR pid0_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile pmcntenclr_r &set_EVENT_CNT_0(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_EVENT_CNT_1() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_EVENT_CNT_1() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR pmcntenclr_r &set_EVENT_CNT_1(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
     }
-    pid0_r copy() volatile
+    volatile pmcntenclr_r &set_EVENT_CNT_1(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
         return *this;
     }
-    CONSTEXPR uint32_t get_PID0() const
+    CONSTEXPR uint32_t get_EVENT_CNT_2() const
     {
-        uint32_t value = static_cast<uint32_t>(PID0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    uint32_t get_PID0() const volatile
+    uint32_t get_EVENT_CNT_2() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(PID0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    CONSTEXPR pid0_r &set_PID0(uint32_t value)
+    CONSTEXPR pmcntenclr_r &set_EVENT_CNT_2(uint32_t value)
     {
-        PID0 = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// pid1_r - Peripheral ID byte 1. This is bits[11:8] of the part number in bits[3:0], and bits[3:0] of the Arm ID in
-// bits[7:4].
-struct pid1_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t PID1; // Byte 1 of Peripheral ID (Lower 8 bits valid)
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR pid1_r() : PID1(static_cast<uint32_t>(0xB5)) {}
-    CONSTEXPR pid1_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile pmcntenclr_r &set_EVENT_CNT_2(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_EVENT_CNT_3() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_EVENT_CNT_3() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR pmcntenclr_r &set_EVENT_CNT_3(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
     }
-    pid1_r copy() volatile
+    volatile pmcntenclr_r &set_EVENT_CNT_3(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
         return *this;
     }
-    CONSTEXPR uint32_t get_PID1() const
+    CONSTEXPR uint32_t get_CYCLE_CNT() const
     {
-        uint32_t value = static_cast<uint32_t>(PID1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
         return value;
     }
-    uint32_t get_PID1() const volatile
+    uint32_t get_CYCLE_CNT() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(PID1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
         return value;
     }
-    CONSTEXPR pid1_r &set_PID1(uint32_t value)
+    CONSTEXPR pmcntenclr_r &set_CYCLE_CNT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+    volatile pmcntenclr_r &set_CYCLE_CNT(uint32_t value) volatile
     {
-        PID1 = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pid2_r - Peripheral ID byte 2. This is bits[6:4] of the Arm ID in bits[2:0], and bit 3 indicates format B.
-struct pid2_r
+// pmovsset_r - Overflow flag status set register
+struct pmovsset_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
-        uint32_t PID2; // Byte 2 of Peripheral ID (Lower 8 bits valid)
+        struct
+        {
+            uint32_t EVENT_CNT_0_OVF : 1; // Event counter overflow set bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1_OVF : 1; // Event counter overflow set bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2_OVF : 1; // Event counter overflow set bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3_OVF : 1; // Event counter overflow set bit for PMEVCNTR3
+            uint32_t reserved0 : 27;
+            uint32_t CYCLE_CNT_OVF : 1; // PMCCNTR overflow set bit
+        };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pid2_r() : PID2(static_cast<uint32_t>(0x0B)) {}
-    CONSTEXPR pid2_r(uint32_t init) : word(init) {}
+    CONSTEXPR pmovsset_r() : word0(0) {}
+    CONSTEXPR pmovsset_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pid2_r copy() volatile
+    pmovsset_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_PID2() const
+    CONSTEXPR uint32_t get_EVENT_CNT_0_OVF() const
     {
-        uint32_t value = static_cast<uint32_t>(PID2);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    uint32_t get_PID2() const volatile
+    uint32_t get_EVENT_CNT_0_OVF() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(PID2);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    CONSTEXPR pid2_r &set_PID2(uint32_t value)
+    CONSTEXPR pmovsset_r &set_EVENT_CNT_0_OVF(uint32_t value)
     {
-        PID2 = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// pid3_r - Peripheral ID byte 3.
-struct pid3_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t PID3; // Byte 1 of Peripheral ID (Lower 8 bits valid)
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR pid3_r() : PID3(static_cast<uint32_t>(0x0)) {}
-    CONSTEXPR pid3_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile pmovsset_r &set_EVENT_CNT_0_OVF(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_EVENT_CNT_1_OVF() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_EVENT_CNT_1_OVF() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR pmovsset_r &set_EVENT_CNT_1_OVF(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
     }
-    pid3_r copy() volatile
+    volatile pmovsset_r &set_EVENT_CNT_1_OVF(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
         return *this;
     }
-    CONSTEXPR uint32_t get_PID3() const
+    CONSTEXPR uint32_t get_EVENT_CNT_2_OVF() const
     {
-        uint32_t value = static_cast<uint32_t>(PID3);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    uint32_t get_PID3() const volatile
+    uint32_t get_EVENT_CNT_2_OVF() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(PID3);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    CONSTEXPR pid3_r &set_PID3(uint32_t value)
+    CONSTEXPR pmovsset_r &set_EVENT_CNT_2_OVF(uint32_t value)
     {
-        PID3 = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// cid0_r - Component ID byte 0.
-struct cid0_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t CID0; // Byte 0 of Component ID (Lower 8 bits valid)
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR cid0_r() : CID0(static_cast<uint32_t>(0x0D)) {}
-    CONSTEXPR cid0_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile pmovsset_r &set_EVENT_CNT_2_OVF(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_EVENT_CNT_3_OVF() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_EVENT_CNT_3_OVF() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR pmovsset_r &set_EVENT_CNT_3_OVF(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
     }
-    cid0_r copy() volatile
+    volatile pmovsset_r &set_EVENT_CNT_3_OVF(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
         return *this;
     }
-    CONSTEXPR uint32_t get_CID0() const
+    CONSTEXPR uint32_t get_CYCLE_CNT_OVF() const
     {
-        uint32_t value = static_cast<uint32_t>(CID0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
         return value;
     }
-    uint32_t get_CID0() const volatile
+    uint32_t get_CYCLE_CNT_OVF() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(CID0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
         return value;
     }
-    CONSTEXPR cid0_r &set_CID0(uint32_t value)
+    CONSTEXPR pmovsset_r &set_CYCLE_CNT_OVF(uint32_t value)
     {
-        CID0 = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
         return *this;
     }
-#endif //__cplusplus
+    volatile pmovsset_r &set_CYCLE_CNT_OVF(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
 };
 
-// cid1_r - Component ID byte 1.
-struct cid1_r
+// pmovsclr_r - Overflow flag status clear register
+struct pmovsclr_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
-        uint32_t CID1; // Byte 1 of Component ID (Lower 8 bits valid)
+        struct
+        {
+            uint32_t EVENT_CNT_0_OVF : 1; // Event counter overflow clear bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1_OVF : 1; // Event counter overflow clear bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2_OVF : 1; // Event counter overflow clear bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3_OVF : 1; // Event counter overflow clear bit for PMEVCNTR3
+            uint32_t reserved0 : 27;
+            uint32_t CYCLE_CNT_OVF : 1; // PMCCNTR overflow clear bit
+        };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR cid1_r() : CID1(static_cast<uint32_t>(0xF0)) {}
-    CONSTEXPR cid1_r(uint32_t init) : word(init) {}
+    CONSTEXPR pmovsclr_r() : word0(0) {}
+    CONSTEXPR pmovsclr_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    cid1_r copy() volatile
+    pmovsclr_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_CID1() const
+    CONSTEXPR uint32_t get_EVENT_CNT_0_OVF() const
     {
-        uint32_t value = static_cast<uint32_t>(CID1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    uint32_t get_CID1() const volatile
+    uint32_t get_EVENT_CNT_0_OVF() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(CID1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    CONSTEXPR cid1_r &set_CID1(uint32_t value)
+    CONSTEXPR pmovsclr_r &set_EVENT_CNT_0_OVF(uint32_t value)
     {
-        CID1 = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// cid2_r - Component ID byte 2.
-struct cid2_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t CID2; // Byte 2 of Component ID (Lower 8 bits valid)
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR cid2_r() : CID2(static_cast<uint32_t>(0x05)) {}
-    CONSTEXPR cid2_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile pmovsclr_r &set_EVENT_CNT_0_OVF(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_EVENT_CNT_1_OVF() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_EVENT_CNT_1_OVF() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR pmovsclr_r &set_EVENT_CNT_1_OVF(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
     }
-    cid2_r copy() volatile
+    volatile pmovsclr_r &set_EVENT_CNT_1_OVF(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
         return *this;
     }
-    CONSTEXPR uint32_t get_CID2() const
+    CONSTEXPR uint32_t get_EVENT_CNT_2_OVF() const
     {
-        uint32_t value = static_cast<uint32_t>(CID2);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    uint32_t get_CID2() const volatile
+    uint32_t get_EVENT_CNT_2_OVF() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(CID2);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    CONSTEXPR cid2_r &set_CID2(uint32_t value)
+    CONSTEXPR pmovsclr_r &set_EVENT_CNT_2_OVF(uint32_t value)
     {
-        CID2 = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
-#endif //__cplusplus
-};
-
-// cid3_r - Component ID byte 3.
-struct cid3_r
-{
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
-    union
-    {
-        uint32_t CID3; // Byte 3 of Component ID (Lower 8 bits valid)
-        uint32_t word;
-    };
-#ifdef __cplusplus
-  public:
-    CONSTEXPR cid3_r() : CID3(static_cast<uint32_t>(0xB1)) {}
-    CONSTEXPR cid3_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    volatile pmovsclr_r &set_EVENT_CNT_2_OVF(uint32_t value) volatile
     {
-        word = value;
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
     }
-    void operator=(uint32_t value) volatile
+    CONSTEXPR uint32_t get_EVENT_CNT_3_OVF() const
     {
-        word = value;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
     }
-    CONSTEXPR operator uint32_t()
+    uint32_t get_EVENT_CNT_3_OVF() const volatile
     {
-        return word;
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
     }
-    operator uint32_t() volatile
+    CONSTEXPR pmovsclr_r &set_EVENT_CNT_3_OVF(uint32_t value)
     {
-        return word;
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
     }
-    cid3_r copy() volatile
+    volatile pmovsclr_r &set_EVENT_CNT_3_OVF(uint32_t value) volatile
     {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
         return *this;
     }
-    CONSTEXPR uint32_t get_CID3() const
+    CONSTEXPR uint32_t get_CYCLE_CNT_OVF() const
     {
-        uint32_t value = static_cast<uint32_t>(CID3);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
         return value;
     }
-    uint32_t get_CID3() const volatile
+    uint32_t get_CYCLE_CNT_OVF() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(CID3);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
         return value;
     }
-    CONSTEXPR cid3_r &set_CID3(uint32_t value)
+    CONSTEXPR pmovsclr_r &set_CYCLE_CNT_OVF(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+    volatile pmovsclr_r &set_CYCLE_CNT_OVF(uint32_t value) volatile
     {
-        CID3 = static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pmcr_r - PMU Register control
-struct pmcr_r
+// pmintset_r - Interrupt enable set register
+struct pmintset_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t cnt_en : 1;        // Enable counter
-            uint32_t event_cnt_rst : 1; // Reset event counter
-            uint32_t cycle_cnt_rst : 1; // Reset cycle counter
-            uint32_t mask_en : 1;       // PMU can be enabled/disabled by command stream operation NPU_OP_PMU_MASK
-            uint32_t reserved0 : 7;
-            uint32_t num_event_cnt : 5; // Number of event counters
-            uint32_t reserved1 : 16;
+            uint32_t EVENT_CNT_0_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR3
+            uint32_t reserved0 : 27;
+            uint32_t CYCLE_CNT_INT : 1; // PMCCNTR overflow interrupt request enable bit
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pmcr_r() :
-        cnt_en(static_cast<uint32_t>(0x0)), event_cnt_rst(static_cast<uint32_t>(0)),
-        cycle_cnt_rst(static_cast<uint32_t>(0)), mask_en(static_cast<uint32_t>(0x0)),
-        reserved0(static_cast<uint32_t>(0)), num_event_cnt(static_cast<uint32_t>(0x04)),
-        reserved1(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR pmcr_r(uint32_t init) : word(init) {}
+    CONSTEXPR pmintset_r() : word0(0) {}
+    CONSTEXPR pmintset_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pmcr_r copy() volatile
+    pmintset_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_cnt_en() const
+    CONSTEXPR uint32_t get_EVENT_CNT_0_INT() const
     {
-        uint32_t value = static_cast<uint32_t>(cnt_en);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    uint32_t get_cnt_en() const volatile
+    uint32_t get_EVENT_CNT_0_INT() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(cnt_en);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    CONSTEXPR pmcr_r &set_cnt_en(uint32_t value)
+    CONSTEXPR pmintset_r &set_EVENT_CNT_0_INT(uint32_t value)
     {
-        cnt_en = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
         return *this;
     }
-    CONSTEXPR uint32_t get_event_cnt_rst() const
+    volatile pmintset_r &set_EVENT_CNT_0_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_1_INT() const
     {
-        uint32_t value = static_cast<uint32_t>(event_cnt_rst);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
-    uint32_t get_event_cnt_rst() const volatile
+    uint32_t get_EVENT_CNT_1_INT() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(event_cnt_rst);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
-    CONSTEXPR pmcr_r &set_event_cnt_rst(uint32_t value)
+    CONSTEXPR pmintset_r &set_EVENT_CNT_1_INT(uint32_t value)
     {
-        event_cnt_rst = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
         return *this;
     }
-    CONSTEXPR uint32_t get_cycle_cnt_rst() const
+    volatile pmintset_r &set_EVENT_CNT_1_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_2_INT() const
     {
-        uint32_t value = static_cast<uint32_t>(cycle_cnt_rst);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    uint32_t get_cycle_cnt_rst() const volatile
+    uint32_t get_EVENT_CNT_2_INT() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(cycle_cnt_rst);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    CONSTEXPR pmcr_r &set_cycle_cnt_rst(uint32_t value)
+    CONSTEXPR pmintset_r &set_EVENT_CNT_2_INT(uint32_t value)
     {
-        cycle_cnt_rst = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
-    CONSTEXPR uint32_t get_mask_en() const
+    volatile pmintset_r &set_EVENT_CNT_2_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_3_INT() const
     {
-        uint32_t value = static_cast<uint32_t>(mask_en);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
-    uint32_t get_mask_en() const volatile
+    uint32_t get_EVENT_CNT_3_INT() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(mask_en);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
-    CONSTEXPR pmcr_r &set_mask_en(uint32_t value)
+    CONSTEXPR pmintset_r &set_EVENT_CNT_3_INT(uint32_t value)
     {
-        mask_en = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
         return *this;
     }
-    CONSTEXPR uint32_t get_num_event_cnt() const
+    volatile pmintset_r &set_EVENT_CNT_3_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CYCLE_CNT_INT() const
     {
-        uint32_t value = static_cast<uint32_t>(num_event_cnt);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
         return value;
     }
-    uint32_t get_num_event_cnt() const volatile
+    uint32_t get_CYCLE_CNT_INT() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(num_event_cnt);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
         return value;
     }
-    CONSTEXPR pmcr_r &set_num_event_cnt(uint32_t value)
+    CONSTEXPR pmintset_r &set_CYCLE_CNT_INT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+    volatile pmintset_r &set_CYCLE_CNT_INT(uint32_t value) volatile
     {
-        num_event_cnt = ((1u << 5) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pmcntenset_r - Count enable set register
-struct pmcntenset_r
+// pmintclr_r - Interrupt enable clear register
+struct pmintclr_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t EVENT_CNT_0 : 1; // Event counter enable bit for PMEVCNTR0
-            uint32_t EVENT_CNT_1 : 1; // Event counter enable bit for PMEVCNTR1
-            uint32_t EVENT_CNT_2 : 1; // Event counter enable bit for PMEVCNTR2
-            uint32_t EVENT_CNT_3 : 1; // Event counter enable bit for PMEVCNTR3
+            uint32_t EVENT_CNT_0_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR3
             uint32_t reserved0 : 27;
-            uint32_t CYCLE_CNT : 1; // PMCCNTR enable bit
+            uint32_t CYCLE_CNT_INT : 1; // PMCCNTR overflow interrupt request disable bit
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pmcntenset_r() :
-        EVENT_CNT_0(static_cast<uint32_t>(0)), EVENT_CNT_1(static_cast<uint32_t>(0)),
-        EVENT_CNT_2(static_cast<uint32_t>(0)), EVENT_CNT_3(static_cast<uint32_t>(0)),
-        reserved0(static_cast<uint32_t>(0)), CYCLE_CNT(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR pmcntenset_r(uint32_t init) : word(init) {}
+    CONSTEXPR pmintclr_r() : word0(0) {}
+    CONSTEXPR pmintclr_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pmcntenset_r copy() volatile
+    pmintclr_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_0() const
+    CONSTEXPR uint32_t get_EVENT_CNT_0_INT() const
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    uint32_t get_EVENT_CNT_0() const volatile
+    uint32_t get_EVENT_CNT_0_INT() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_0);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
         return value;
     }
-    CONSTEXPR pmcntenset_r &set_EVENT_CNT_0(uint32_t value)
+    CONSTEXPR pmintclr_r &set_EVENT_CNT_0_INT(uint32_t value)
     {
-        EVENT_CNT_0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_1() const
+    volatile pmintclr_r &set_EVENT_CNT_0_INT(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_1);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
     }
-    uint32_t get_EVENT_CNT_1() const volatile
+    CONSTEXPR uint32_t get_EVENT_CNT_1_INT() const
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_1);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
-    CONSTEXPR pmcntenset_r &set_EVENT_CNT_1(uint32_t value)
-    {
-        EVENT_CNT_1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_EVENT_CNT_2() const
+    uint32_t get_EVENT_CNT_1_INT() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_2);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
         return value;
     }
-    uint32_t get_EVENT_CNT_2() const volatile
+    CONSTEXPR pmintclr_r &set_EVENT_CNT_1_INT(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_2);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
     }
-    CONSTEXPR pmcntenset_r &set_EVENT_CNT_2(uint32_t value)
+    volatile pmintclr_r &set_EVENT_CNT_1_INT(uint32_t value) volatile
     {
-        EVENT_CNT_2 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_3() const
+    CONSTEXPR uint32_t get_EVENT_CNT_2_INT() const
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_3);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    uint32_t get_EVENT_CNT_3() const volatile
+    uint32_t get_EVENT_CNT_2_INT() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_3);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
         return value;
     }
-    CONSTEXPR pmcntenset_r &set_EVENT_CNT_3(uint32_t value)
+    CONSTEXPR pmintclr_r &set_EVENT_CNT_2_INT(uint32_t value)
     {
-        EVENT_CNT_3 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
         return *this;
     }
-    CONSTEXPR uint32_t get_CYCLE_CNT() const
+    volatile pmintclr_r &set_EVENT_CNT_2_INT(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
     }
-    uint32_t get_CYCLE_CNT() const volatile
+    CONSTEXPR uint32_t get_EVENT_CNT_3_INT() const
     {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
         return value;
     }
-    CONSTEXPR pmcntenset_r &set_CYCLE_CNT(uint32_t value)
+    uint32_t get_EVENT_CNT_3_INT() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR pmintclr_r &set_EVENT_CNT_3_INT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile pmintclr_r &set_EVENT_CNT_3_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CYCLE_CNT_INT() const
     {
-        CYCLE_CNT = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    uint32_t get_CYCLE_CNT_INT() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    CONSTEXPR pmintclr_r &set_CYCLE_CNT_INT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
         return *this;
     }
-#endif //__cplusplus
+    volatile pmintclr_r &set_CYCLE_CNT_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
 };
 
-// pmcntenclr_r - Count enable clear register
-struct pmcntenclr_r
+// pmccntr_r - Performance monitor cycle count register
+struct pmccntr_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t EVENT_CNT_0 : 1; // Event counter disable bit for PMEVCNTR0
-            uint32_t EVENT_CNT_1 : 1; // Event counter disable bit for PMEVCNTR1
-            uint32_t EVENT_CNT_2 : 1; // Event counter disable bit for PMEVCNTR2
-            uint32_t EVENT_CNT_3 : 1; // Event counter disable bit for PMEVCNTR3
-            uint32_t reserved0 : 27;
-            uint32_t CYCLE_CNT : 1; // PMCCNTR disable bit
+            uint32_t CYCLE_CNT_LO : 32; // Cycle count - LSB
+            uint32_t CYCLE_CNT_HI : 16; // Cycle count - MSB
+            uint32_t reserved0 : 16;
         };
-        uint32_t word;
+        uint32_t word[2];
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
   public:
-    CONSTEXPR pmcntenclr_r() :
-        EVENT_CNT_0(static_cast<uint32_t>(0)), EVENT_CNT_1(static_cast<uint32_t>(0)),
-        EVENT_CNT_2(static_cast<uint32_t>(0)), EVENT_CNT_3(static_cast<uint32_t>(0)),
-        reserved0(static_cast<uint32_t>(0)), CYCLE_CNT(static_cast<uint32_t>(0))
+    CONSTEXPR pmccntr_r() : word0(0), word1(0) {}
+    CONSTEXPR pmccntr_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
     {
     }
-    CONSTEXPR pmcntenclr_r(uint32_t init) : word(init) {}
-    CONSTEXPR void operator=(uint32_t value)
+    CONSTEXPR void operator=(uint64_t value)
     {
-        word = value;
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
     }
-    void operator=(uint32_t value) volatile
+    void operator=(uint64_t value) volatile
     {
-        word = value;
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
     }
-    CONSTEXPR operator uint32_t()
+    CONSTEXPR operator uint64_t()
     {
-        return word;
+        return (static_cast<uint64_t>(word1) << 32) | word0;
     }
-    operator uint32_t() volatile
+    operator uint64_t() volatile
     {
-        return word;
+        return (static_cast<uint64_t>(word1) << 32) | word0;
     }
-    pmcntenclr_r copy() volatile
+    pmccntr_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_0() const
+#endif
+};
+
+// pmccntr_cfg_r - Set start/stop event on the cycle counter
+struct pmccntr_cfg_r
+{
+#ifndef __cplusplus
+    union
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_0);
-        return value;
-    }
-    uint32_t get_EVENT_CNT_0() const volatile
+        struct
+        {
+            uint32_t CYCLE_CNT_CFG_START : 10; // Cycle counter start event
+            uint32_t reserved0 : 6;
+            uint32_t CYCLE_CNT_CFG_STOP : 10; // Cycle counter stop event
+            uint32_t reserved1 : 6;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pmccntr_cfg_r() : word0(0) {}
+    CONSTEXPR pmccntr_cfg_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_0);
-        return value;
+        word0 = value;
     }
-    CONSTEXPR pmcntenclr_r &set_EVENT_CNT_0(uint32_t value)
+    void operator=(uint32_t value) volatile
     {
-        EVENT_CNT_0 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
+        word0 = value;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_1() const
+    CONSTEXPR operator uint32_t()
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_1);
-        return value;
+        return word0;
     }
-    uint32_t get_EVENT_CNT_1() const volatile
+    operator uint32_t() volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_1);
-        return value;
+        return word0;
     }
-    CONSTEXPR pmcntenclr_r &set_EVENT_CNT_1(uint32_t value)
+    pmccntr_cfg_r copy() volatile
     {
-        EVENT_CNT_1 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_2() const
+    CONSTEXPR NPU_NAMESPACE::pmu_event get_CYCLE_CNT_CFG_START() const
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_2);
+        NPU_NAMESPACE::pmu_event value = static_cast<NPU_NAMESPACE::pmu_event>(((1U << 10) - 1) & (word0 >> 0));
         return value;
     }
-    uint32_t get_EVENT_CNT_2() const volatile
+    NPU_NAMESPACE::pmu_event get_CYCLE_CNT_CFG_START() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_2);
+        NPU_NAMESPACE::pmu_event value = static_cast<NPU_NAMESPACE::pmu_event>(((1U << 10) - 1) & (word0 >> 0));
         return value;
     }
-    CONSTEXPR pmcntenclr_r &set_EVENT_CNT_2(uint32_t value)
+    CONSTEXPR pmccntr_cfg_r &set_CYCLE_CNT_CFG_START(NPU_NAMESPACE::pmu_event value)
     {
-        EVENT_CNT_2 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 10) - 1)) << 0) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 0);
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_3() const
+    volatile pmccntr_cfg_r &set_CYCLE_CNT_CFG_START(NPU_NAMESPACE::pmu_event value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_3);
-        return value;
+        word0 = (((~((1U << 10) - 1)) << 0) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
     }
-    uint32_t get_EVENT_CNT_3() const volatile
+    CONSTEXPR NPU_NAMESPACE::pmu_event get_CYCLE_CNT_CFG_STOP() const
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_3);
+        NPU_NAMESPACE::pmu_event value = static_cast<NPU_NAMESPACE::pmu_event>(((1U << 10) - 1) & (word0 >> 16));
         return value;
     }
-    CONSTEXPR pmcntenclr_r &set_EVENT_CNT_3(uint32_t value)
-    {
-        EVENT_CNT_3 = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_CYCLE_CNT() const
+    NPU_NAMESPACE::pmu_event get_CYCLE_CNT_CFG_STOP() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT);
+        NPU_NAMESPACE::pmu_event value = static_cast<NPU_NAMESPACE::pmu_event>(((1U << 10) - 1) & (word0 >> 16));
         return value;
     }
-    uint32_t get_CYCLE_CNT() const volatile
+    CONSTEXPR pmccntr_cfg_r &set_CYCLE_CNT_CFG_STOP(NPU_NAMESPACE::pmu_event value)
     {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT);
-        return value;
+        word0 = (((~((1U << 10) - 1)) << 16) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 16);
+        return *this;
     }
-    CONSTEXPR pmcntenclr_r &set_CYCLE_CNT(uint32_t value)
+    volatile pmccntr_cfg_r &set_CYCLE_CNT_CFG_STOP(NPU_NAMESPACE::pmu_event value) volatile
     {
-        CYCLE_CNT = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 10) - 1)) << 16) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 16);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pmovsset_r - Overflow flag status set register
-struct pmovsset_r
+// pmcaxi_chan_r - Set which AXI channel to monitor for latency measurements in PMU
+struct pmcaxi_chan_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t EVENT_CNT_0_OVF : 1; // Event counter overflow set bit for PMEVCNTR0
-            uint32_t EVENT_CNT_1_OVF : 1; // Event counter overflow set bit for PMEVCNTR1
-            uint32_t EVENT_CNT_2_OVF : 1; // Event counter overflow set bit for PMEVCNTR2
-            uint32_t EVENT_CNT_3_OVF : 1; // Event counter overflow set bit for PMEVCNTR3
-            uint32_t reserved0 : 27;
-            uint32_t CYCLE_CNT_OVF : 1; // PMCCNTR overflow set bit
+            uint32_t CH_SEL : 4; // Channel select for latency measurements
+            uint32_t reserved0 : 4;
+            uint32_t AXI_CNT_SEL : 2;  // AXI counter to monitor for latency measurements
+            uint32_t BW_CH_SEL_EN : 1; // Bandwidth channel selector
+            uint32_t reserved1 : 21;
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pmovsset_r() :
-        EVENT_CNT_0_OVF(static_cast<uint32_t>(0)), EVENT_CNT_1_OVF(static_cast<uint32_t>(0)),
-        EVENT_CNT_2_OVF(static_cast<uint32_t>(0)), EVENT_CNT_3_OVF(static_cast<uint32_t>(0)),
-        reserved0(static_cast<uint32_t>(0)), CYCLE_CNT_OVF(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR pmovsset_r(uint32_t init) : word(init) {}
+    CONSTEXPR pmcaxi_chan_r() : word0(0) {}
+    CONSTEXPR pmcaxi_chan_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pmovsset_r copy() volatile
+    pmcaxi_chan_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_0_OVF() const
+    CONSTEXPR NPU_NAMESPACE::pmu_axi_channel get_CH_SEL() const
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_0_OVF);
+        NPU_NAMESPACE::pmu_axi_channel value =
+            static_cast<NPU_NAMESPACE::pmu_axi_channel>(((1U << 4) - 1) & (word0 >> 0));
         return value;
     }
-    uint32_t get_EVENT_CNT_0_OVF() const volatile
+    NPU_NAMESPACE::pmu_axi_channel get_CH_SEL() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_0_OVF);
+        NPU_NAMESPACE::pmu_axi_channel value =
+            static_cast<NPU_NAMESPACE::pmu_axi_channel>(((1U << 4) - 1) & (word0 >> 0));
         return value;
     }
-    CONSTEXPR pmovsset_r &set_EVENT_CNT_0_OVF(uint32_t value)
+    CONSTEXPR pmcaxi_chan_r &set_CH_SEL(NPU_NAMESPACE::pmu_axi_channel value)
     {
-        EVENT_CNT_0_OVF = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 0) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 0);
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_1_OVF() const
-    {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_1_OVF);
-        return value;
-    }
-    uint32_t get_EVENT_CNT_1_OVF() const volatile
-    {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_1_OVF);
-        return value;
-    }
-    CONSTEXPR pmovsset_r &set_EVENT_CNT_1_OVF(uint32_t value)
+    volatile pmcaxi_chan_r &set_CH_SEL(NPU_NAMESPACE::pmu_axi_channel value) volatile
     {
-        EVENT_CNT_1_OVF = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 4) - 1)) << 0) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 0);
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_2_OVF() const
+    CONSTEXPR uint32_t get_AXI_CNT_SEL() const
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_2_OVF);
+        uint32_t value = ((1U << 2) - 1) & (word0 >> 8);
         return value;
     }
-    uint32_t get_EVENT_CNT_2_OVF() const volatile
+    uint32_t get_AXI_CNT_SEL() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_2_OVF);
+        uint32_t value = ((1U << 2) - 1) & (word0 >> 8);
         return value;
     }
-    CONSTEXPR pmovsset_r &set_EVENT_CNT_2_OVF(uint32_t value)
+    CONSTEXPR pmcaxi_chan_r &set_AXI_CNT_SEL(uint32_t value)
     {
-        EVENT_CNT_2_OVF = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 2) - 1)) << 8) & word0) | ((((1U << 2) - 1) & value) << 8);
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_3_OVF() const
+    volatile pmcaxi_chan_r &set_AXI_CNT_SEL(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_3_OVF);
-        return value;
+        word0 = (((~((1U << 2) - 1)) << 8) & word0) | ((((1U << 2) - 1) & value) << 8);
+        return *this;
     }
-    uint32_t get_EVENT_CNT_3_OVF() const volatile
+    CONSTEXPR uint32_t get_BW_CH_SEL_EN() const
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_3_OVF);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
         return value;
     }
-    CONSTEXPR pmovsset_r &set_EVENT_CNT_3_OVF(uint32_t value)
-    {
-        EVENT_CNT_3_OVF = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_CYCLE_CNT_OVF() const
+    uint32_t get_BW_CH_SEL_EN() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_OVF);
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
         return value;
     }
-    uint32_t get_CYCLE_CNT_OVF() const volatile
+    CONSTEXPR pmcaxi_chan_r &set_BW_CH_SEL_EN(uint32_t value)
     {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_OVF);
-        return value;
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
     }
-    CONSTEXPR pmovsset_r &set_CYCLE_CNT_OVF(uint32_t value)
+    volatile pmcaxi_chan_r &set_BW_CH_SEL_EN(uint32_t value) volatile
     {
-        CYCLE_CNT_OVF = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pmovsclr_r - Overflow flag status clear register
-struct pmovsclr_r
+// kernel_x_r - Kernel X offset of in kernel decomposition
+struct kernel_x_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t EVENT_CNT_0_OVF : 1; // Event counter overflow clear bit for PMEVCNTR0
-            uint32_t EVENT_CNT_1_OVF : 1; // Event counter overflow clear bit for PMEVCNTR1
-            uint32_t EVENT_CNT_2_OVF : 1; // Event counter overflow clear bit for PMEVCNTR2
-            uint32_t EVENT_CNT_3_OVF : 1; // Event counter overflow clear bit for PMEVCNTR3
-            uint32_t reserved0 : 27;
-            uint32_t CYCLE_CNT_OVF : 1; // PMCCNTR overflow clear bit
+            uint32_t value : 32; // 32-bit register value
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pmovsclr_r() :
-        EVENT_CNT_0_OVF(static_cast<uint32_t>(0)), EVENT_CNT_1_OVF(static_cast<uint32_t>(0)),
-        EVENT_CNT_2_OVF(static_cast<uint32_t>(0)), EVENT_CNT_3_OVF(static_cast<uint32_t>(0)),
-        reserved0(static_cast<uint32_t>(0)), CYCLE_CNT_OVF(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR pmovsclr_r(uint32_t init) : word(init) {}
+    CONSTEXPR kernel_x_r() : word0(0) {}
+    CONSTEXPR kernel_x_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pmovsclr_r copy() volatile
+    kernel_x_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_0_OVF() const
+    CONSTEXPR uint32_t get_value() const
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_0_OVF);
+        uint32_t value = word0;
         return value;
     }
-    uint32_t get_EVENT_CNT_0_OVF() const volatile
+    uint32_t get_value() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_0_OVF);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR pmovsclr_r &set_EVENT_CNT_0_OVF(uint32_t value)
+    CONSTEXPR kernel_x_r &set_value(uint32_t value)
     {
-        EVENT_CNT_0_OVF = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_1_OVF() const
+    volatile kernel_x_r &set_value(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_1_OVF);
-        return value;
+        word0 = value;
+        return *this;
     }
-    uint32_t get_EVENT_CNT_1_OVF() const volatile
+#endif
+};
+
+// kernel_y_r - Kernel Y offset of in kernel decomposition
+struct kernel_y_r
+{
+#ifndef __cplusplus
+    union
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_1_OVF);
-        return value;
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR kernel_y_r() : word0(0) {}
+    CONSTEXPR kernel_y_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
     }
-    CONSTEXPR pmovsclr_r &set_EVENT_CNT_1_OVF(uint32_t value)
+    void operator=(uint32_t value) volatile
     {
-        EVENT_CNT_1_OVF = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
+        word0 = value;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_2_OVF() const
+    CONSTEXPR operator uint32_t()
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_2_OVF);
-        return value;
+        return word0;
     }
-    uint32_t get_EVENT_CNT_2_OVF() const volatile
+    operator uint32_t() volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_2_OVF);
-        return value;
+        return word0;
     }
-    CONSTEXPR pmovsclr_r &set_EVENT_CNT_2_OVF(uint32_t value)
+    kernel_y_r copy() volatile
     {
-        EVENT_CNT_2_OVF = ((1u << 1) - 1) & static_cast<uint32_t>(value);
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_3_OVF() const
+    CONSTEXPR uint32_t get_value() const
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_3_OVF);
+        uint32_t value = word0;
         return value;
     }
-    uint32_t get_EVENT_CNT_3_OVF() const volatile
+    uint32_t get_value() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_3_OVF);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR pmovsclr_r &set_EVENT_CNT_3_OVF(uint32_t value)
+    CONSTEXPR kernel_y_r &set_value(uint32_t value)
     {
-        EVENT_CNT_3_OVF = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-    CONSTEXPR uint32_t get_CYCLE_CNT_OVF() const
-    {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_OVF);
-        return value;
-    }
-    uint32_t get_CYCLE_CNT_OVF() const volatile
-    {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_OVF);
-        return value;
-    }
-    CONSTEXPR pmovsclr_r &set_CYCLE_CNT_OVF(uint32_t value)
+    volatile kernel_y_r &set_value(uint32_t value) volatile
     {
-        CYCLE_CNT_OVF = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pmintset_r - Interrupt enable set register
-struct pmintset_r
+// kernel_w_m1_r - Kernel (width-1) of current block
+struct kernel_w_m1_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t EVENT_CNT_0_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR0
-            uint32_t EVENT_CNT_1_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR1
-            uint32_t EVENT_CNT_2_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR2
-            uint32_t EVENT_CNT_3_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR3
-            uint32_t reserved0 : 27;
-            uint32_t CYCLE_CNT_INT : 1; // PMCCNTR overflow interrupt request enable bit
+            uint32_t value : 32; // 32-bit register value
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pmintset_r() :
-        EVENT_CNT_0_INT(static_cast<uint32_t>(0)), EVENT_CNT_1_INT(static_cast<uint32_t>(0)),
-        EVENT_CNT_2_INT(static_cast<uint32_t>(0)), EVENT_CNT_3_INT(static_cast<uint32_t>(0)),
-        reserved0(static_cast<uint32_t>(0)), CYCLE_CNT_INT(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR pmintset_r(uint32_t init) : word(init) {}
+    CONSTEXPR kernel_w_m1_r() : word0(0) {}
+    CONSTEXPR kernel_w_m1_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pmintset_r copy() volatile
+    kernel_w_m1_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_0_INT() const
+    CONSTEXPR uint32_t get_value() const
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_0_INT);
+        uint32_t value = word0;
         return value;
     }
-    uint32_t get_EVENT_CNT_0_INT() const volatile
+    uint32_t get_value() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_0_INT);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR pmintset_r &set_EVENT_CNT_0_INT(uint32_t value)
+    CONSTEXPR kernel_w_m1_r &set_value(uint32_t value)
     {
-        EVENT_CNT_0_INT = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_1_INT() const
+    volatile kernel_w_m1_r &set_value(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_1_INT);
-        return value;
+        word0 = value;
+        return *this;
     }
-    uint32_t get_EVENT_CNT_1_INT() const volatile
-    {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_1_INT);
-        return value;
+#endif
+};
+
+// kernel_h_m1_r - Kernel (height-1) of current block
+struct kernel_h_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR kernel_h_m1_r() : word0(0) {}
+    CONSTEXPR kernel_h_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
     }
-    CONSTEXPR pmintset_r &set_EVENT_CNT_1_INT(uint32_t value)
+    void operator=(uint32_t value) volatile
     {
-        EVENT_CNT_1_INT = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
+        word0 = value;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_2_INT() const
+    CONSTEXPR operator uint32_t()
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_2_INT);
-        return value;
+        return word0;
     }
-    uint32_t get_EVENT_CNT_2_INT() const volatile
+    operator uint32_t() volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_2_INT);
-        return value;
+        return word0;
     }
-    CONSTEXPR pmintset_r &set_EVENT_CNT_2_INT(uint32_t value)
+    kernel_h_m1_r copy() volatile
     {
-        EVENT_CNT_2_INT = ((1u << 1) - 1) & static_cast<uint32_t>(value);
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_3_INT() const
+    CONSTEXPR uint32_t get_value() const
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_3_INT);
+        uint32_t value = word0;
         return value;
     }
-    uint32_t get_EVENT_CNT_3_INT() const volatile
+    uint32_t get_value() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_3_INT);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR pmintset_r &set_EVENT_CNT_3_INT(uint32_t value)
+    CONSTEXPR kernel_h_m1_r &set_value(uint32_t value)
     {
-        EVENT_CNT_3_INT = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-    CONSTEXPR uint32_t get_CYCLE_CNT_INT() const
-    {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_INT);
-        return value;
-    }
-    uint32_t get_CYCLE_CNT_INT() const volatile
-    {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_INT);
-        return value;
-    }
-    CONSTEXPR pmintset_r &set_CYCLE_CNT_INT(uint32_t value)
+    volatile kernel_h_m1_r &set_value(uint32_t value) volatile
     {
-        CYCLE_CNT_INT = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pmintclr_r - Interrupt enable clear register
-struct pmintclr_r
+// ofm_cblk_width_m1_r - OFM current block (width-1)
+struct ofm_cblk_width_m1_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t EVENT_CNT_0_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR0
-            uint32_t EVENT_CNT_1_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR1
-            uint32_t EVENT_CNT_2_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR2
-            uint32_t EVENT_CNT_3_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR3
-            uint32_t reserved0 : 27;
-            uint32_t CYCLE_CNT_INT : 1; // PMCCNTR overflow interrupt request disable bit
+            uint32_t value : 32; // 32-bit register value
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pmintclr_r() :
-        EVENT_CNT_0_INT(static_cast<uint32_t>(0)), EVENT_CNT_1_INT(static_cast<uint32_t>(0)),
-        EVENT_CNT_2_INT(static_cast<uint32_t>(0)), EVENT_CNT_3_INT(static_cast<uint32_t>(0)),
-        reserved0(static_cast<uint32_t>(0)), CYCLE_CNT_INT(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR pmintclr_r(uint32_t init) : word(init) {}
+    CONSTEXPR ofm_cblk_width_m1_r() : word0(0) {}
+    CONSTEXPR ofm_cblk_width_m1_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pmintclr_r copy() volatile
+    ofm_cblk_width_m1_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_0_INT() const
+    CONSTEXPR uint32_t get_value() const
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_0_INT);
+        uint32_t value = word0;
         return value;
     }
-    uint32_t get_EVENT_CNT_0_INT() const volatile
+    uint32_t get_value() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_0_INT);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR pmintclr_r &set_EVENT_CNT_0_INT(uint32_t value)
+    CONSTEXPR ofm_cblk_width_m1_r &set_value(uint32_t value)
     {
-        EVENT_CNT_0_INT = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_1_INT() const
+    volatile ofm_cblk_width_m1_r &set_value(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_1_INT);
-        return value;
+        word0 = value;
+        return *this;
     }
-    uint32_t get_EVENT_CNT_1_INT() const volatile
+#endif
+};
+
+// ofm_cblk_height_m1_r - OFM current block (height-1)
+struct ofm_cblk_height_m1_r
+{
+#ifndef __cplusplus
+    union
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_1_INT);
-        return value;
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_cblk_height_m1_r() : word0(0) {}
+    CONSTEXPR ofm_cblk_height_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
     }
-    CONSTEXPR pmintclr_r &set_EVENT_CNT_1_INT(uint32_t value)
+    void operator=(uint32_t value) volatile
     {
-        EVENT_CNT_1_INT = ((1u << 1) - 1) & static_cast<uint32_t>(value);
-        return *this;
+        word0 = value;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_2_INT() const
+    CONSTEXPR operator uint32_t()
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_2_INT);
-        return value;
+        return word0;
     }
-    uint32_t get_EVENT_CNT_2_INT() const volatile
+    operator uint32_t() volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_2_INT);
-        return value;
+        return word0;
     }
-    CONSTEXPR pmintclr_r &set_EVENT_CNT_2_INT(uint32_t value)
+    ofm_cblk_height_m1_r copy() volatile
     {
-        EVENT_CNT_2_INT = ((1u << 1) - 1) & static_cast<uint32_t>(value);
         return *this;
     }
-    CONSTEXPR uint32_t get_EVENT_CNT_3_INT() const
+    CONSTEXPR uint32_t get_value() const
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_3_INT);
+        uint32_t value = word0;
         return value;
     }
-    uint32_t get_EVENT_CNT_3_INT() const volatile
+    uint32_t get_value() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(EVENT_CNT_3_INT);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR pmintclr_r &set_EVENT_CNT_3_INT(uint32_t value)
+    CONSTEXPR ofm_cblk_height_m1_r &set_value(uint32_t value)
     {
-        EVENT_CNT_3_INT = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-    CONSTEXPR uint32_t get_CYCLE_CNT_INT() const
-    {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_INT);
-        return value;
-    }
-    uint32_t get_CYCLE_CNT_INT() const volatile
-    {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_INT);
-        return value;
-    }
-    CONSTEXPR pmintclr_r &set_CYCLE_CNT_INT(uint32_t value)
+    volatile ofm_cblk_height_m1_r &set_value(uint32_t value) volatile
     {
-        CYCLE_CNT_INT = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pmccntr_lo_r - Performance monitor cycle count low register
-struct pmccntr_lo_r
+// ofm_cblk_depth_m1_r - OFM current block (depth-1)
+struct ofm_cblk_depth_m1_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
-        uint32_t CYCLE_CNT_LO; // Cycle count low
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pmccntr_lo_r() : CYCLE_CNT_LO(static_cast<uint32_t>(0x00000000)) {}
-    CONSTEXPR pmccntr_lo_r(uint32_t init) : word(init) {}
+    CONSTEXPR ofm_cblk_depth_m1_r() : word0(0) {}
+    CONSTEXPR ofm_cblk_depth_m1_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pmccntr_lo_r copy() volatile
+    ofm_cblk_depth_m1_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_CYCLE_CNT_LO() const
+    CONSTEXPR uint32_t get_value() const
     {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_LO);
+        uint32_t value = word0;
         return value;
     }
-    uint32_t get_CYCLE_CNT_LO() const volatile
+    uint32_t get_value() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_LO);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR pmccntr_lo_r &set_CYCLE_CNT_LO(uint32_t value)
+    CONSTEXPR ofm_cblk_depth_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_cblk_depth_m1_r &set_value(uint32_t value) volatile
     {
-        CYCLE_CNT_LO = static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pmccntr_hi_r - Performance monitor cycle count high register
-struct pmccntr_hi_r
+// ifm_cblk_depth_m1_r - IFM current block (depth-1)
+struct ifm_cblk_depth_m1_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t CYCLE_CNT_HI : 16; // Cycle count high
-            uint32_t reserved0 : 16;
+            uint32_t value : 32; // 32-bit register value
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pmccntr_hi_r() : CYCLE_CNT_HI(static_cast<uint32_t>(0x0000)), reserved0(static_cast<uint32_t>(0)) {}
-    CONSTEXPR pmccntr_hi_r(uint32_t init) : word(init) {}
+    CONSTEXPR ifm_cblk_depth_m1_r() : word0(0) {}
+    CONSTEXPR ifm_cblk_depth_m1_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pmccntr_hi_r copy() volatile
+    ifm_cblk_depth_m1_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_CYCLE_CNT_HI() const
+    CONSTEXPR uint32_t get_value() const
     {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_HI);
+        uint32_t value = word0;
         return value;
     }
-    uint32_t get_CYCLE_CNT_HI() const volatile
+    uint32_t get_value() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_HI);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR pmccntr_hi_r &set_CYCLE_CNT_HI(uint32_t value)
+    CONSTEXPR ifm_cblk_depth_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_cblk_depth_m1_r &set_value(uint32_t value) volatile
     {
-        CYCLE_CNT_HI = ((1u << 16) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pmccntr_cfg_r - Set start/stop event on the cycle counter
-struct pmccntr_cfg_r
+// ofm_x_r - Block X coordinate in OFM
+struct ofm_x_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t CYCLE_CNT_CFG_START : 10; // Cycle counter start event
-            uint32_t reserved0 : 6;
-            uint32_t CYCLE_CNT_CFG_STOP : 10; // Cycle counter stop event
-            uint32_t reserved1 : 6;
+            uint32_t value : 32; // 32-bit register value
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pmccntr_cfg_r() :
-        CYCLE_CNT_CFG_START(static_cast<uint32_t>(0x00)), reserved0(static_cast<uint32_t>(0)),
-        CYCLE_CNT_CFG_STOP(static_cast<uint32_t>(0x00)), reserved1(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR pmccntr_cfg_r(uint32_t init) : word(init) {}
+    CONSTEXPR ofm_x_r() : word0(0) {}
+    CONSTEXPR ofm_x_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pmccntr_cfg_r copy() volatile
+    ofm_x_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR uint32_t get_CYCLE_CNT_CFG_START() const
+    CONSTEXPR uint32_t get_value() const
     {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_CFG_START);
+        uint32_t value = word0;
         return value;
     }
-    uint32_t get_CYCLE_CNT_CFG_START() const volatile
+    uint32_t get_value() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_CFG_START);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR pmccntr_cfg_r &set_CYCLE_CNT_CFG_START(uint32_t value)
+    CONSTEXPR ofm_x_r &set_value(uint32_t value)
     {
-        CYCLE_CNT_CFG_START = ((1u << 10) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-    CONSTEXPR uint32_t get_CYCLE_CNT_CFG_STOP() const
-    {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_CFG_STOP);
-        return value;
-    }
-    uint32_t get_CYCLE_CNT_CFG_STOP() const volatile
+    volatile ofm_x_r &set_value(uint32_t value) volatile
     {
-        uint32_t value = static_cast<uint32_t>(CYCLE_CNT_CFG_STOP);
-        return value;
-    }
-    CONSTEXPR pmccntr_cfg_r &set_CYCLE_CNT_CFG_STOP(uint32_t value)
-    {
-        CYCLE_CNT_CFG_STOP = ((1u << 10) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pmcaxi_chan_r - Set which AXI channel to monitor for latency measurements in PMU
-struct pmcaxi_chan_r
+// ofm_y_r - Block Y coordinate in OFM
+struct ofm_y_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t CH_SEL : 4; // Channel number to monitor for latency measurements (Read: 0=Cmd 1=IFM 2=Weights
-                                 // 3=Scale+Bias 4=Mem2Mem; Write: 8=OFM 9=Mem2Mem)
-            uint32_t reserved0 : 4;
-            uint32_t AXI_CNT_SEL : 2;  // AXI counter to monitor for latency measurements (0=AXI0 counter0, 1=AXI0
-                                       // counter1, 2=AXI1 counter 2, 3=AXI counter3)
-            uint32_t BW_CH_SEL_EN : 1; // Bandwidth channel selector enable: {0=AXI bw events measured for all channels,
-                                       // 1=AXI bw events measured for channel specified by CH_SEL
-            uint32_t reserved1 : 21;
+            uint32_t value : 32; // 32-bit register value
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pmcaxi_chan_r() :
-        CH_SEL(static_cast<uint32_t>(0x0)), reserved0(static_cast<uint32_t>(0)),
-        AXI_CNT_SEL(static_cast<uint32_t>(0x000000)), BW_CH_SEL_EN(static_cast<uint32_t>(0x000000)),
-        reserved1(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR pmcaxi_chan_r(uint32_t init) : word(init) {}
+    CONSTEXPR ofm_y_r() : word0(0) {}
+    CONSTEXPR ofm_y_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
-    }
-    pmcaxi_chan_r copy() volatile
-    {
-        return *this;
-    }
-    CONSTEXPR uint32_t get_CH_SEL() const
-    {
-        uint32_t value = static_cast<uint32_t>(CH_SEL);
-        return value;
-    }
-    uint32_t get_CH_SEL() const volatile
-    {
-        uint32_t value = static_cast<uint32_t>(CH_SEL);
-        return value;
+        return word0;
     }
-    CONSTEXPR pmcaxi_chan_r &set_CH_SEL(uint32_t value)
+    ofm_y_r copy() volatile
     {
-        CH_SEL = ((1u << 4) - 1) & static_cast<uint32_t>(value);
         return *this;
     }
-    CONSTEXPR uint32_t get_AXI_CNT_SEL() const
+    CONSTEXPR uint32_t get_value() const
     {
-        uint32_t value = static_cast<uint32_t>(AXI_CNT_SEL);
+        uint32_t value = word0;
         return value;
     }
-    uint32_t get_AXI_CNT_SEL() const volatile
+    uint32_t get_value() const volatile
     {
-        uint32_t value = static_cast<uint32_t>(AXI_CNT_SEL);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR pmcaxi_chan_r &set_AXI_CNT_SEL(uint32_t value)
+    CONSTEXPR ofm_y_r &set_value(uint32_t value)
     {
-        AXI_CNT_SEL = ((1u << 2) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-    CONSTEXPR uint32_t get_BW_CH_SEL_EN() const
-    {
-        uint32_t value = static_cast<uint32_t>(BW_CH_SEL_EN);
-        return value;
-    }
-    uint32_t get_BW_CH_SEL_EN() const volatile
-    {
-        uint32_t value = static_cast<uint32_t>(BW_CH_SEL_EN);
-        return value;
-    }
-    CONSTEXPR pmcaxi_chan_r &set_BW_CH_SEL_EN(uint32_t value)
+    volatile ofm_y_r &set_value(uint32_t value) volatile
     {
-        BW_CH_SEL_EN = ((1u << 1) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pmevtyper0_r - Performance monitor event type register 0
-struct pmevtyper0_r
+// ofm_z_r - Block Z (channel) coordinate in OFM
+struct ofm_z_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t EV_TYPE : 10; // Event Type
-            uint32_t reserved0 : 22;
+            uint32_t value : 32; // 32-bit register value
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pmevtyper0_r() :
-        EV_TYPE(static_cast<uint32_t>(::pmu_event_type::NO_EVENT)), reserved0(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR pmevtyper0_r(uint32_t init) : word(init) {}
+    CONSTEXPR ofm_z_r() : word0(0) {}
+    CONSTEXPR ofm_z_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pmevtyper0_r copy() volatile
+    ofm_z_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR ::pmu_event_type get_EV_TYPE() const
+    CONSTEXPR uint32_t get_value() const
     {
-        ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE);
+        uint32_t value = word0;
         return value;
     }
-    ::pmu_event_type get_EV_TYPE() const volatile
+    uint32_t get_value() const volatile
     {
-        ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR pmevtyper0_r &set_EV_TYPE(::pmu_event_type value)
+    CONSTEXPR ofm_z_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_z_r &set_value(uint32_t value) volatile
     {
-        EV_TYPE = ((1u << 10) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pmevtyper1_r - Performance monitor event type register 1
-struct pmevtyper1_r
+// ifm_z_r - Block Z (channel) coordinate in IFM
+struct ifm_z_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t EV_TYPE : 10; // Event Type
-            uint32_t reserved0 : 22;
+            uint32_t value : 32; // 32-bit register value
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pmevtyper1_r() :
-        EV_TYPE(static_cast<uint32_t>(::pmu_event_type::NO_EVENT)), reserved0(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR pmevtyper1_r(uint32_t init) : word(init) {}
+    CONSTEXPR ifm_z_r() : word0(0) {}
+    CONSTEXPR ifm_z_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pmevtyper1_r copy() volatile
+    ifm_z_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR ::pmu_event_type get_EV_TYPE() const
+    CONSTEXPR uint32_t get_value() const
     {
-        ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE);
+        uint32_t value = word0;
         return value;
     }
-    ::pmu_event_type get_EV_TYPE() const volatile
+    uint32_t get_value() const volatile
     {
-        ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR pmevtyper1_r &set_EV_TYPE(::pmu_event_type value)
+    CONSTEXPR ifm_z_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_z_r &set_value(uint32_t value) volatile
     {
-        EV_TYPE = ((1u << 10) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pmevtyper2_r - Performance monitor event type register 2
-struct pmevtyper2_r
+// pad_top_r - Block top pad
+struct pad_top_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t EV_TYPE : 10; // Event Type
-            uint32_t reserved0 : 22;
+            uint32_t value : 32; // 32-bit register value
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pmevtyper2_r() :
-        EV_TYPE(static_cast<uint32_t>(::pmu_event_type::NO_EVENT)), reserved0(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR pmevtyper2_r(uint32_t init) : word(init) {}
+    CONSTEXPR pad_top_r() : word0(0) {}
+    CONSTEXPR pad_top_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pmevtyper2_r copy() volatile
+    pad_top_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR ::pmu_event_type get_EV_TYPE() const
+    CONSTEXPR uint32_t get_value() const
     {
-        ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE);
+        uint32_t value = word0;
         return value;
     }
-    ::pmu_event_type get_EV_TYPE() const volatile
+    uint32_t get_value() const volatile
     {
-        ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR pmevtyper2_r &set_EV_TYPE(::pmu_event_type value)
+    CONSTEXPR pad_top_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pad_top_r &set_value(uint32_t value) volatile
     {
-        EV_TYPE = ((1u << 10) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+#endif
 };
 
-// pmevtyper3_r - Performance monitor event type register 3
-struct pmevtyper3_r
+// pad_left_r - Block left pad
+struct pad_left_r
 {
-#ifdef __cplusplus
-  private:
-#endif //__cplusplus
+#ifndef __cplusplus
     union
     {
         struct
         {
-            uint32_t EV_TYPE : 10; // Event Type
-            uint32_t reserved0 : 22;
+            uint32_t value : 32; // 32-bit register value
         };
         uint32_t word;
     };
-#ifdef __cplusplus
+#else
+  private:
+    uint32_t word0;
+
   public:
-    CONSTEXPR pmevtyper3_r() :
-        EV_TYPE(static_cast<uint32_t>(::pmu_event_type::NO_EVENT)), reserved0(static_cast<uint32_t>(0))
-    {
-    }
-    CONSTEXPR pmevtyper3_r(uint32_t init) : word(init) {}
+    CONSTEXPR pad_left_r() : word0(0) {}
+    CONSTEXPR pad_left_r(uint32_t init) : word0(init) {}
     CONSTEXPR void operator=(uint32_t value)
     {
-        word = value;
+        word0 = value;
     }
     void operator=(uint32_t value) volatile
     {
-        word = value;
+        word0 = value;
     }
     CONSTEXPR operator uint32_t()
     {
-        return word;
+        return word0;
     }
     operator uint32_t() volatile
     {
-        return word;
+        return word0;
     }
-    pmevtyper3_r copy() volatile
+    pad_left_r copy() volatile
     {
         return *this;
     }
-    CONSTEXPR ::pmu_event_type get_EV_TYPE() const
+    CONSTEXPR uint32_t get_value() const
     {
-        ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE);
+        uint32_t value = word0;
         return value;
     }
-    ::pmu_event_type get_EV_TYPE() const volatile
+    uint32_t get_value() const volatile
     {
-        ::pmu_event_type value = static_cast<::pmu_event_type>(EV_TYPE);
+        uint32_t value = word0;
         return value;
     }
-    CONSTEXPR pmevtyper3_r &set_EV_TYPE(::pmu_event_type value)
+    CONSTEXPR pad_left_r &set_value(uint32_t value)
     {
-        EV_TYPE = ((1u << 10) - 1) & static_cast<uint32_t>(value);
+        word0 = value;
         return *this;
     }
-#endif //__cplusplus
+    volatile pad_left_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
 };
 
-struct NPU_REG
+// ifm_cblk_width_r - IFM current block derived width
+struct ifm_cblk_width_r
 {
-    STRUCT id_r ID;           // 0x0
-    STRUCT status_r STATUS;   // 0x4
-    STRUCT cmd_r CMD;         // 0x8
-    STRUCT reset_r RESET;     // 0xc
-    STRUCT qbase0_r QBASE0;   // 0x10
-    STRUCT qbase1_r QBASE1;   // 0x14
-    STRUCT qread_r QREAD;     // 0x18
-    STRUCT qconfig_r QCONFIG; // 0x1c
-    STRUCT qsize_r QSIZE;     // 0x20
-    STRUCT prot_r PROT;       // 0x24
-    STRUCT config_r CONFIG;   // 0x28
-    STRUCT lock_r LOCK;       // 0x2c
-    uint32_t unused0[3];
-    STRUCT regioncfg_r REGIONCFG;   // 0x3c
-    STRUCT axi_limit0_r AXI_LIMIT0; // 0x40
-    STRUCT axi_limit1_r AXI_LIMIT1; // 0x44
-    STRUCT axi_limit2_r AXI_LIMIT2; // 0x48
-    STRUCT axi_limit3_r AXI_LIMIT3; // 0x4c
-    uint32_t unused1[12];
-    STRUCT basep0_r BASEP0;   // 0x80
-    STRUCT basep1_r BASEP1;   // 0x84
-    STRUCT basep2_r BASEP2;   // 0x88
-    STRUCT basep3_r BASEP3;   // 0x8c
-    STRUCT basep4_r BASEP4;   // 0x90
-    STRUCT basep5_r BASEP5;   // 0x94
-    STRUCT basep6_r BASEP6;   // 0x98
-    STRUCT basep7_r BASEP7;   // 0x9c
-    STRUCT basep8_r BASEP8;   // 0xa0
-    STRUCT basep9_r BASEP9;   // 0xa4
-    STRUCT basep10_r BASEP10; // 0xa8
-    STRUCT basep11_r BASEP11; // 0xac
-    STRUCT basep12_r BASEP12; // 0xb0
-    STRUCT basep13_r BASEP13; // 0xb4
-    STRUCT basep14_r BASEP14; // 0xb8
-    STRUCT basep15_r BASEP15; // 0xbc
-    uint32_t unused2[16];
-    STRUCT wd_status_r WD_STATUS;   // 0x100
-    STRUCT mac_status_r MAC_STATUS; // 0x104
-    STRUCT ao_status_r AO_STATUS;   // 0x108
-    uint32_t unused3[1];
-    STRUCT dma_status0_r DMA_STATUS0; // 0x110
-    STRUCT dma_status1_r DMA_STATUS1; // 0x114
-    uint32_t unused4[10];
-    STRUCT clkforce_r CLKFORCE; // 0x140
-    uint32_t DEBUG_ADDRESS;     // 0x144
-    uint32_t DEBUG_MISC;        // 0x148
-    uint32_t DEBUGCORE;         // 0x14c
-    uint32_t DEBUG_BLOCK;       // 0x150
-    uint32_t unused5[11];
-    STRUCT pmcr_r PMCR;             // 0x180
-    STRUCT pmcntenset_r PMCNTENSET; // 0x184
-    STRUCT pmcntenclr_r PMCNTENCLR; // 0x188
-    STRUCT pmovsset_r PMOVSSET;     // 0x18c
-    STRUCT pmovsclr_r PMOVSCLR;     // 0x190
-    STRUCT pmintset_r PMINTSET;     // 0x194
-    STRUCT pmintclr_r PMINTCLR;     // 0x198
-    uint32_t unused6[1];
-    STRUCT pmccntr_lo_r PMCCNTR_LO;   // 0x1a0
-    STRUCT pmccntr_hi_r PMCCNTR_HI;   // 0x1a4
-    STRUCT pmccntr_cfg_r PMCCNTR_CFG; // 0x1a8
-    STRUCT pmcaxi_chan_r PMCAXI_CHAN; // 0x1ac
-    uint32_t unused7[20];
-    uint32_t KERNEL_X;           // 0x200
-    uint32_t KERNEL_Y;           // 0x204
-    uint32_t KERNEL_W_M1;        // 0x208
-    uint32_t KERNEL_H_M1;        // 0x20c
-    uint32_t OFM_CBLK_WIDTH_M1;  // 0x210
-    uint32_t OFM_CBLK_HEIGHT_M1; // 0x214
-    uint32_t OFM_CBLK_DEPTH_M1;  // 0x218
-    uint32_t IFM_CBLK_DEPTH_M1;  // 0x21c
-    uint32_t OFM_X;              // 0x220
-    uint32_t OFM_Y;              // 0x224
-    uint32_t OFM_Z;              // 0x228
-    uint32_t IFM_Z;              // 0x22c
-    uint32_t PAD_TOP;            // 0x230
-    uint32_t PAD_LEFT;           // 0x234
-    uint32_t IFM_CBLK_WIDTH;     // 0x238
-    uint32_t IFM_CBLK_HEIGHT;    // 0x23c
-    uint32_t DMA_IFM_SRC;        // 0x240
-    uint32_t DMA_IFM_SRC_HI;     // 0x244
-    uint32_t DMA_IFM_DST;        // 0x248
-    uint32_t DMA_OFM_SRC;        // 0x24c
-    uint32_t DMA_OFM_DST;        // 0x250
-    uint32_t DMA_OFM_DST_HI;     // 0x254
-    uint32_t DMA_WEIGHT_SRC;     // 0x258
-    uint32_t DMA_WEIGHT_SRC_HI;  // 0x25c
-    uint32_t DMA_CMD_SRC;        // 0x260
-    uint32_t DMA_CMD_SRC_HI;     // 0x264
-    uint32_t DMA_CMD_SIZE;       // 0x268
-    uint32_t DMA_M2M_SRC;        // 0x26c
-    uint32_t DMA_M2M_SRC_HI;     // 0x270
-    uint32_t DMA_M2M_DST;        // 0x274
-    uint32_t DMA_M2M_DST_HI;     // 0x278
-    uint32_t CURRENT_QREAD;      // 0x27c
-    uint32_t DMA_SCALE_SRC;      // 0x280
-    uint32_t DMA_SCALE_SRC_HI;   // 0x284
-    uint32_t unused8[11];
-    uint32_t CURRENT_BLOCK; // 0x2b4
-    uint32_t CURRENT_OP;    // 0x2b8
-    uint32_t CURRENT_CMD;   // 0x2bc
-    uint32_t unused9[16];
-    uint32_t PMEVCNTR[4]; // 0x300
-    uint32_t unused10[28];
-    STRUCT pmevtyper0_r PMEVTYPER[4]; // 0x380
-    uint32_t unused11[28];
-    uint32_t SHARED_BUFFER[256]; // 0x400
-    uint32_t IFM_PAD_TOP;        // 0x800
-    uint32_t IFM_PAD_LEFT;       // 0x804
-    uint32_t IFM_PAD_RIGHT;      // 0x808
-    uint32_t IFM_PAD_BOTTOM;     // 0x80c
-    uint32_t IFM_DEPTH_M1;       // 0x810
-    uint32_t IFM_PRECISION;      // 0x814
-    uint32_t unused12[1];
-    uint32_t IFM_UPSCALE; // 0x81c
-    uint32_t unused13[1];
-    uint32_t IFM_ZERO_POINT; // 0x824
-    uint32_t IFM_WIDTH0_M1;  // 0x828
-    uint32_t IFM_HEIGHT0_M1; // 0x82c
-    uint32_t IFM_HEIGHT1_M1; // 0x830
-    uint32_t IFM_IB_END;     // 0x834
-    uint32_t unused14[1];
-    uint32_t IFM_REGION; // 0x83c
-    uint32_t unused15[1];
-    uint32_t OFM_WIDTH_M1;      // 0x844
-    uint32_t OFM_HEIGHT_M1;     // 0x848
-    uint32_t OFM_DEPTH_M1;      // 0x84c
-    uint32_t OFM_PRECISION;     // 0x850
-    uint32_t OFM_BLK_WIDTH_M1;  // 0x854
-    uint32_t OFM_BLK_HEIGHT_M1; // 0x858
-    uint32_t OFM_BLK_DEPTH_M1;  // 0x85c
-    uint32_t OFM_ZERO_POINT;    // 0x860
-    uint32_t unused16[1];
-    uint32_t OFM_WIDTH0_M1;  // 0x868
-    uint32_t OFM_HEIGHT0_M1; // 0x86c
-    uint32_t OFM_HEIGHT1_M1; // 0x870
-    uint32_t unused17[2];
-    uint32_t OFM_REGION;       // 0x87c
-    uint32_t KERNEL_WIDTH_M1;  // 0x880
-    uint32_t KERNEL_HEIGHT_M1; // 0x884
-    uint32_t KERNEL_STRIDE;    // 0x888
-    uint32_t PARALLEL_MODE;    // 0x88c
-    uint32_t ACC_FORMAT;       // 0x890
-    uint32_t ACTIVATION;       // 0x894
-    uint32_t ACTIVATION_MIN;   // 0x898
-    uint32_t ACTIVATION_MAX;   // 0x89c
-    uint32_t WEIGHT_REGION;    // 0x8a0
-    uint32_t SCALE_REGION;     // 0x8a4
-    uint32_t unused18[3];
-    uint32_t AB_START; // 0x8b4
-    uint32_t unused19[1];
-    uint32_t BLOCKDEP;        // 0x8bc
-    uint32_t DMA0_SRC_REGION; // 0x8c0
-    uint32_t DMA0_DST_REGION; // 0x8c4
-    uint32_t DMA0_SIZE0;      // 0x8c8
-    uint32_t DMA0_SIZE1;      // 0x8cc
-    uint32_t unused20[12];
-    uint32_t IFM2_BROADCAST; // 0x900
-    uint32_t IFM2_SCALAR;    // 0x904
-    uint32_t unused21[3];
-    uint32_t IFM2_PRECISION; // 0x914
-    uint32_t unused22[3];
-    uint32_t IFM2_ZERO_POINT; // 0x924
-    uint32_t IFM2_WIDTH0_M1;  // 0x928
-    uint32_t IFM2_HEIGHT0_M1; // 0x92c
-    uint32_t IFM2_HEIGHT1_M1; // 0x930
-    uint32_t IFM2_IB_START;   // 0x934
-    uint32_t unused23[1];
-    uint32_t IFM2_REGION; // 0x93c
-    uint32_t unused24[48];
-    uint32_t IFM_BASE0;       // 0xa00
-    uint32_t IFM_BASE0_HI;    // 0xa04
-    uint32_t IFM_BASE1;       // 0xa08
-    uint32_t IFM_BASE1_HI;    // 0xa0c
-    uint32_t IFM_BASE2;       // 0xa10
-    uint32_t IFM_BASE2_HI;    // 0xa14
-    uint32_t IFM_BASE3;       // 0xa18
-    uint32_t IFM_BASE3_HI;    // 0xa1c
-    uint32_t IFM_STRIDE_X;    // 0xa20
-    uint32_t IFM_STRIDE_X_HI; // 0xa24
-    uint32_t IFM_STRIDE_Y;    // 0xa28
-    uint32_t IFM_STRIDE_Y_HI; // 0xa2c
-    uint32_t IFM_STRIDE_C;    // 0xa30
-    uint32_t IFM_STRIDE_C_HI; // 0xa34
-    uint32_t unused25[2];
-    uint32_t OFM_BASE0;       // 0xa40
-    uint32_t OFM_BASE0_HI;    // 0xa44
-    uint32_t OFM_BASE1;       // 0xa48
-    uint32_t OFM_BASE1_HI;    // 0xa4c
-    uint32_t OFM_BASE2;       // 0xa50
-    uint32_t OFM_BASE2_HI;    // 0xa54
-    uint32_t OFM_BASE3;       // 0xa58
-    uint32_t OFM_BASE3_HI;    // 0xa5c
-    uint32_t OFM_STRIDE_X;    // 0xa60
-    uint32_t OFM_STRIDE_X_HI; // 0xa64
-    uint32_t OFM_STRIDE_Y;    // 0xa68
-    uint32_t OFM_STRIDE_Y_HI; // 0xa6c
-    uint32_t OFM_STRIDE_C;    // 0xa70
-    uint32_t OFM_STRIDE_C_HI; // 0xa74
-    uint32_t unused26[2];
-    uint32_t WEIGHT_BASE;    // 0xa80
-    uint32_t WEIGHT_BASE_HI; // 0xa84
-    uint32_t WEIGHT_LENGTH;  // 0xa88
-    uint32_t unused27[1];
-    uint32_t SCALE_BASE;    // 0xa90
-    uint32_t SCALE_BASE_HI; // 0xa94
-    uint32_t SCALE_LENGTH;  // 0xa98
-    uint32_t unused28[1];
-    uint32_t OFM_SCALE;       // 0xaa0
-    uint32_t OFM_SCALE_SHIFT; // 0xaa4
-    uint32_t OPA_SCALE;       // 0xaa8
-    uint32_t OPA_SCALE_SHIFT; // 0xaac
-    uint32_t OPB_SCALE;       // 0xab0
-    uint32_t unused29[3];
-    uint32_t DMA0_SRC;      // 0xac0
-    uint32_t DMA0_SRC_HI;   // 0xac4
-    uint32_t DMA0_DST;      // 0xac8
-    uint32_t DMA0_DST_HI;   // 0xacc
-    uint32_t DMA0_LEN;      // 0xad0
-    uint32_t DMA0_LEN_HI;   // 0xad4
-    uint32_t DMA0_SKIP0;    // 0xad8
-    uint32_t DMA0_SKIP0_HI; // 0xadc
-    uint32_t DMA0_SKIP1;    // 0xae0
-    uint32_t DMA0_SKIP1_HI; // 0xae4
-    uint32_t unused30[6];
-    uint32_t IFM2_BASE0;       // 0xb00
-    uint32_t IFM2_BASE0_HI;    // 0xb04
-    uint32_t IFM2_BASE1;       // 0xb08
-    uint32_t IFM2_BASE1_HI;    // 0xb0c
-    uint32_t IFM2_BASE2;       // 0xb10
-    uint32_t IFM2_BASE2_HI;    // 0xb14
-    uint32_t IFM2_BASE3;       // 0xb18
-    uint32_t IFM2_BASE3_HI;    // 0xb1c
-    uint32_t IFM2_STRIDE_X;    // 0xb20
-    uint32_t IFM2_STRIDE_X_HI; // 0xb24
-    uint32_t IFM2_STRIDE_Y;    // 0xb28
-    uint32_t IFM2_STRIDE_Y_HI; // 0xb2c
-    uint32_t IFM2_STRIDE_C;    // 0xb30
-    uint32_t IFM2_STRIDE_C_HI; // 0xb34
-    uint32_t unused31[2];
-    uint32_t WEIGHT1_BASE;    // 0xb40
-    uint32_t WEIGHT1_BASE_HI; // 0xb44
-    uint32_t WEIGHT1_LENGTH;  // 0xb48
-    uint32_t unused32[1];
-    uint32_t SCALE1_BASE;    // 0xb50
-    uint32_t SCALE1_BASE_HI; // 0xb54
-    uint32_t SCALE1_LENGTH;  // 0xb58
-    uint32_t unused33[281];
-    uint32_t REVISION; // 0xfc0
-    uint32_t unused34[3];
-    STRUCT pid4_r PID4; // 0xfd0
-    STRUCT pid5_r PID5; // 0xfd4
-    STRUCT pid6_r PID6; // 0xfd8
-    STRUCT pid7_r PID7; // 0xfdc
-    STRUCT pid0_r PID0; // 0xfe0
-    STRUCT pid1_r PID1; // 0xfe4
-    STRUCT pid2_r PID2; // 0xfe8
-    STRUCT pid3_r PID3; // 0xfec
-    STRUCT cid0_r CID0; // 0xff0
-    STRUCT cid1_r CID1; // 0xff4
-    STRUCT cid2_r CID2; // 0xff8
-    STRUCT cid3_r CID3; // 0xffc
-#ifdef __cplusplus
-    NPU_REG()
+#ifndef __cplusplus
+    union
     {
-        reset();
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_cblk_width_r() : word0(0) {}
+    CONSTEXPR ifm_cblk_width_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
     }
-    void reset()
+    void operator=(uint32_t value) volatile
     {
-        ID                 = 268845313;
-        STATUS             = 8;
-        CMD                = 12;
-        RESET              = 0;
-        QBASE0             = 0;
-        QBASE1             = 0;
-        QREAD              = 0;
-        QCONFIG            = 0;
-        QSIZE              = 0;
-        PROT               = 0;
-        CONFIG             = 0;
-        LOCK               = 0;
-        REGIONCFG          = 0;
-        AXI_LIMIT0         = 0;
-        AXI_LIMIT1         = 0;
-        AXI_LIMIT2         = 0;
-        AXI_LIMIT3         = 0;
-        BASEP0             = 0;
-        BASEP1             = 0;
-        BASEP2             = 0;
-        BASEP3             = 0;
-        BASEP4             = 0;
-        BASEP5             = 0;
-        BASEP6             = 0;
-        BASEP7             = 0;
-        BASEP8             = 0;
-        BASEP9             = 0;
-        BASEP10            = 0;
-        BASEP11            = 0;
-        BASEP12            = 0;
-        BASEP13            = 0;
-        BASEP14            = 0;
-        BASEP15            = 0;
-        REVISION           = 0;
-        PID4               = 4;
-        PID5               = 0;
-        PID6               = 0;
-        PID7               = 0;
-        PID0               = 128;
-        PID1               = 181;
-        PID2               = 11;
-        PID3               = 0;
-        CID0               = 13;
-        CID1               = 240;
-        CID2               = 5;
-        CID3               = 177;
-        WD_STATUS          = 0;
-        MAC_STATUS         = 0;
-        AO_STATUS          = 0;
-        DMA_STATUS0        = 0;
-        DMA_STATUS1        = 0;
-        CLKFORCE           = 0;
-        DEBUG_ADDRESS      = 0;
-        DEBUG_MISC         = 0;
-        DEBUGCORE          = 0;
-        DEBUG_BLOCK        = 0;
-        KERNEL_X           = 0;
-        KERNEL_Y           = 0;
-        KERNEL_W_M1        = 0;
-        KERNEL_H_M1        = 0;
-        OFM_CBLK_WIDTH_M1  = 0;
-        OFM_CBLK_HEIGHT_M1 = 0;
-        OFM_CBLK_DEPTH_M1  = 0;
-        IFM_CBLK_DEPTH_M1  = 0;
-        OFM_X              = 0;
-        OFM_Y              = 0;
-        OFM_Z              = 0;
-        IFM_Z              = 0;
-        PAD_TOP            = 0;
-        PAD_LEFT           = 0;
-        IFM_CBLK_WIDTH     = 0;
-        IFM_CBLK_HEIGHT    = 0;
-        DMA_IFM_SRC        = 0;
-        DMA_IFM_SRC_HI     = 0;
-        DMA_IFM_DST        = 0;
-        DMA_OFM_SRC        = 0;
-        DMA_OFM_DST        = 0;
-        DMA_OFM_DST_HI     = 0;
-        DMA_WEIGHT_SRC     = 0;
-        DMA_WEIGHT_SRC_HI  = 0;
-        DMA_CMD_SRC        = 0;
-        DMA_CMD_SRC_HI     = 0;
-        DMA_CMD_SIZE       = 0;
-        DMA_M2M_SRC        = 0;
-        DMA_M2M_SRC_HI     = 0;
-        DMA_M2M_DST        = 0;
-        DMA_M2M_DST_HI     = 0;
-        CURRENT_QREAD      = 0;
-        DMA_SCALE_SRC      = 0;
-        DMA_SCALE_SRC_HI   = 0;
-        CURRENT_BLOCK      = 0;
-        CURRENT_OP         = 0;
-        CURRENT_CMD        = 0;
-        IFM_PAD_TOP        = 0;
-        IFM_PAD_LEFT       = 0;
-        IFM_PAD_RIGHT      = 0;
-        IFM_PAD_BOTTOM     = 0;
-        IFM_DEPTH_M1       = 0;
-        IFM_PRECISION      = 0;
-        IFM_UPSCALE        = 0;
-        IFM_ZERO_POINT     = 0;
-        IFM_WIDTH0_M1      = 0;
-        IFM_HEIGHT0_M1     = 0;
-        IFM_HEIGHT1_M1     = 0;
-        IFM_IB_END         = 0;
-        IFM_REGION         = 0;
-        OFM_WIDTH_M1       = 0;
-        OFM_HEIGHT_M1      = 0;
-        OFM_DEPTH_M1       = 0;
-        OFM_PRECISION      = 0;
-        OFM_BLK_WIDTH_M1   = 0;
-        OFM_BLK_HEIGHT_M1  = 0;
-        OFM_BLK_DEPTH_M1   = 0;
-        OFM_ZERO_POINT     = 0;
-        OFM_WIDTH0_M1      = 0;
-        OFM_HEIGHT0_M1     = 0;
-        OFM_HEIGHT1_M1     = 0;
-        OFM_REGION         = 0;
-        KERNEL_WIDTH_M1    = 0;
-        KERNEL_HEIGHT_M1   = 0;
-        KERNEL_STRIDE      = 0;
-        PARALLEL_MODE      = 0;
-        ACC_FORMAT         = 0;
-        ACTIVATION         = 0;
-        ACTIVATION_MIN     = 0;
-        ACTIVATION_MAX     = 0;
-        WEIGHT_REGION      = 0;
-        SCALE_REGION       = 0;
-        AB_START           = 0;
-        BLOCKDEP           = 0;
-        DMA0_SRC_REGION    = 0;
-        DMA0_DST_REGION    = 0;
-        DMA0_SIZE0         = 0;
-        DMA0_SIZE1         = 0;
-        IFM2_BROADCAST     = 0;
-        IFM2_SCALAR        = 0;
-        IFM2_PRECISION     = 0;
-        IFM2_ZERO_POINT    = 0;
-        IFM2_WIDTH0_M1     = 0;
-        IFM2_HEIGHT0_M1    = 0;
-        IFM2_HEIGHT1_M1    = 0;
-        IFM2_IB_START      = 0;
-        IFM2_REGION        = 0;
-        IFM_BASE0          = 0;
-        IFM_BASE0_HI       = 0;
-        IFM_BASE1          = 0;
-        IFM_BASE1_HI       = 0;
-        IFM_BASE2          = 0;
-        IFM_BASE2_HI       = 0;
-        IFM_BASE3          = 0;
-        IFM_BASE3_HI       = 0;
-        IFM_STRIDE_X       = 0;
-        IFM_STRIDE_X_HI    = 0;
-        IFM_STRIDE_Y       = 0;
-        IFM_STRIDE_Y_HI    = 0;
-        IFM_STRIDE_C       = 0;
-        IFM_STRIDE_C_HI    = 0;
-        OFM_BASE0          = 0;
-        OFM_BASE0_HI       = 0;
-        OFM_BASE1          = 0;
-        OFM_BASE1_HI       = 0;
-        OFM_BASE2          = 0;
-        OFM_BASE2_HI       = 0;
-        OFM_BASE3          = 0;
-        OFM_BASE3_HI       = 0;
-        OFM_STRIDE_X       = 0;
-        OFM_STRIDE_X_HI    = 0;
-        OFM_STRIDE_Y       = 0;
-        OFM_STRIDE_Y_HI    = 0;
-        OFM_STRIDE_C       = 0;
-        OFM_STRIDE_C_HI    = 0;
-        WEIGHT_BASE        = 0;
-        WEIGHT_BASE_HI     = 0;
-        WEIGHT_LENGTH      = 0;
-        SCALE_BASE         = 0;
-        SCALE_BASE_HI      = 0;
-        SCALE_LENGTH       = 0;
-        OFM_SCALE          = 0;
-        OFM_SCALE_SHIFT    = 0;
-        OPA_SCALE          = 0;
-        OPA_SCALE_SHIFT    = 0;
-        OPB_SCALE          = 0;
-        DMA0_SRC           = 0;
-        DMA0_SRC_HI        = 0;
-        DMA0_DST           = 0;
-        DMA0_DST_HI        = 0;
-        DMA0_LEN           = 0;
-        DMA0_LEN_HI        = 0;
-        DMA0_SKIP0         = 0;
-        DMA0_SKIP0_HI      = 0;
-        DMA0_SKIP1         = 0;
-        DMA0_SKIP1_HI      = 0;
-        IFM2_BASE0         = 0;
-        IFM2_BASE0_HI      = 0;
-        IFM2_BASE1         = 0;
-        IFM2_BASE1_HI      = 0;
-        IFM2_BASE2         = 0;
-        IFM2_BASE2_HI      = 0;
-        IFM2_BASE3         = 0;
-        IFM2_BASE3_HI      = 0;
-        IFM2_STRIDE_X      = 0;
-        IFM2_STRIDE_X_HI   = 0;
-        IFM2_STRIDE_Y      = 0;
-        IFM2_STRIDE_Y_HI   = 0;
-        IFM2_STRIDE_C      = 0;
-        IFM2_STRIDE_C_HI   = 0;
-        WEIGHT1_BASE       = 0;
-        WEIGHT1_BASE_HI    = 0;
-        WEIGHT1_LENGTH     = 0;
-        SCALE1_BASE        = 0;
-        SCALE1_BASE_HI     = 0;
-        SCALE1_LENGTH      = 0;
-        PMCR               = 8192;
-        PMCNTENSET         = 0;
-        PMCNTENCLR         = 0;
-        PMOVSSET           = 0;
-        PMOVSCLR           = 0;
-        PMINTSET           = 0;
-        PMINTCLR           = 0;
-        PMCCNTR_LO         = 0;
-        PMCCNTR_HI         = 0;
-        PMCCNTR_CFG        = 0;
-        PMCAXI_CHAN        = 0;
-        for (size_t i = 0; i < (sizeof(PMEVCNTR) / sizeof(PMEVCNTR[0])); ++i)
-            PMEVCNTR[i] = 0;
-        for (size_t i = 0; i < (sizeof(PMEVTYPER) / sizeof(PMEVTYPER[0])); ++i)
-            PMEVTYPER[i] = 0;
-        for (size_t i = 0; i < (sizeof(SHARED_BUFFER) / sizeof(SHARED_BUFFER[0])); ++i)
-            SHARED_BUFFER[i] = 0;
+        word0 = value;
     }
-    uint32_t &operator[](const int addr_offset)
+    CONSTEXPR operator uint32_t()
     {
-        return reinterpret_cast<uint32_t *>(this)[addr_offset / 4];
+        return word0;
     }
-    enum class access_type_t : bool
+    operator uint32_t() volatile
     {
-        RO,
-        RW
+        return word0;
+    }
+    ifm_cblk_width_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_cblk_width_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_cblk_width_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_cblk_height_r - IFM current block derived height
+struct ifm_cblk_height_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
     };
-    access_type_t get_access_type(uint32_t offset)
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_cblk_height_r() : word0(0) {}
+    CONSTEXPR ifm_cblk_height_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
     {
-        switch (offset)
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_cblk_height_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_cblk_height_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_cblk_height_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_ifm_src_r - DMA IFM channel source position on AXI
+struct dma_ifm_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
         {
-        case 0:
-            return access_type_t::RO;
-        case 4:
-            return access_type_t::RO;
-        case 8:
-            return access_type_t::RW;
-        case 12:
-            return access_type_t::RW;
-        case 16:
-            return access_type_t::RW;
-        case 20:
-            return access_type_t::RW;
-        case 24:
-            return access_type_t::RO;
-        case 28:
-            return access_type_t::RW;
-        case 32:
-            return access_type_t::RW;
-        case 36:
-            return access_type_t::RO;
-        case 40:
-            return access_type_t::RO;
-        case 44:
-            return access_type_t::RW;
-        case 60:
-            return access_type_t::RW;
-        case 64:
-            return access_type_t::RW;
-        case 68:
-            return access_type_t::RW;
-        case 72:
-            return access_type_t::RW;
-        case 76:
-            return access_type_t::RW;
-        case 128:
-            return access_type_t::RW;
-        case 132:
-            return access_type_t::RW;
-        case 136:
-            return access_type_t::RW;
-        case 140:
-            return access_type_t::RW;
-        case 144:
-            return access_type_t::RW;
-        case 148:
-            return access_type_t::RW;
-        case 152:
-            return access_type_t::RW;
-        case 156:
-            return access_type_t::RW;
-        case 160:
-            return access_type_t::RW;
-        case 164:
-            return access_type_t::RW;
-        case 168:
-            return access_type_t::RW;
-        case 172:
-            return access_type_t::RW;
-        case 176:
-            return access_type_t::RW;
-        case 180:
-            return access_type_t::RW;
-        case 184:
-            return access_type_t::RW;
-        case 188:
-            return access_type_t::RW;
-        case 4032:
-            return access_type_t::RO;
-        case 4048:
-            return access_type_t::RO;
-        case 4052:
-            return access_type_t::RO;
-        case 4056:
-            return access_type_t::RO;
-        case 4060:
-            return access_type_t::RO;
-        case 4064:
-            return access_type_t::RO;
-        case 4068:
-            return access_type_t::RO;
-        case 4072:
-            return access_type_t::RO;
-        case 4076:
-            return access_type_t::RO;
-        case 4080:
-            return access_type_t::RO;
-        case 4084:
-            return access_type_t::RO;
-        case 4088:
-            return access_type_t::RO;
-        case 4092:
-            return access_type_t::RO;
-        case 256:
-            return access_type_t::RO;
-        case 260:
-            return access_type_t::RO;
-        case 264:
-            return access_type_t::RO;
-        case 272:
-            return access_type_t::RO;
-        case 276:
-            return access_type_t::RO;
-        case 320:
-            return access_type_t::RW;
-        case 324:
-            return access_type_t::RW;
-        case 328:
-            return access_type_t::RW;
-        case 332:
-            return access_type_t::RW;
-        case 336:
-            return access_type_t::RW;
-        case 512:
-            return access_type_t::RO;
-        case 516:
-            return access_type_t::RO;
-        case 520:
-            return access_type_t::RO;
-        case 524:
-            return access_type_t::RO;
-        case 528:
-            return access_type_t::RO;
-        case 532:
-            return access_type_t::RO;
-        case 536:
-            return access_type_t::RO;
-        case 540:
-            return access_type_t::RO;
-        case 544:
-            return access_type_t::RO;
-        case 548:
-            return access_type_t::RO;
-        case 552:
-            return access_type_t::RO;
-        case 556:
-            return access_type_t::RO;
-        case 560:
-            return access_type_t::RO;
-        case 564:
-            return access_type_t::RO;
-        case 568:
-            return access_type_t::RO;
-        case 572:
-            return access_type_t::RO;
-        case 576:
-            return access_type_t::RO;
-        case 580:
-            return access_type_t::RO;
-        case 584:
-            return access_type_t::RO;
-        case 588:
-            return access_type_t::RO;
-        case 592:
-            return access_type_t::RO;
-        case 596:
-            return access_type_t::RO;
-        case 600:
-            return access_type_t::RO;
-        case 604:
-            return access_type_t::RO;
-        case 608:
-            return access_type_t::RO;
-        case 612:
-            return access_type_t::RO;
-        case 616:
-            return access_type_t::RO;
-        case 620:
-            return access_type_t::RO;
-        case 624:
-            return access_type_t::RO;
-        case 628:
-            return access_type_t::RO;
-        case 632:
-            return access_type_t::RO;
-        case 636:
-            return access_type_t::RO;
-        case 640:
-            return access_type_t::RO;
-        case 644:
-            return access_type_t::RO;
-        case 692:
-            return access_type_t::RO;
-        case 696:
+            uint32_t offset : 32; // Offset
+            uint32_t reserved0 : 32;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma_ifm_src_r() : word0(0), word1(0) {}
+    CONSTEXPR dma_ifm_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_ifm_src_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_ifm_dst_r - DMA IFM channel destination position in SHRAM
+struct dma_ifm_dst_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma_ifm_dst_r() : word0(0) {}
+    CONSTEXPR dma_ifm_dst_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma_ifm_dst_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR dma_ifm_dst_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile dma_ifm_dst_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_ofm_src_r - DMA OFM channel source position in SHRAM
+struct dma_ofm_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma_ofm_src_r() : word0(0) {}
+    CONSTEXPR dma_ofm_src_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma_ofm_src_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR dma_ofm_src_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile dma_ofm_src_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_ofm_dst_r - DMA OFM channel destination position on AXI
+struct dma_ofm_dst_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset : 32; // Offset
+            uint32_t reserved0 : 32;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma_ofm_dst_r() : word0(0), word1(0) {}
+    CONSTEXPR dma_ofm_dst_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_ofm_dst_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_weight_src_r - DMA weight channel source position on AXI
+struct dma_weight_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset : 32; // Offset
+            uint32_t reserved0 : 32;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma_weight_src_r() : word0(0), word1(0) {}
+    CONSTEXPR dma_weight_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_weight_src_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_cmd_src_r - DMA command channel source position on AXI
+struct dma_cmd_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset : 32; // Offset
+            uint32_t reserved0 : 32;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma_cmd_src_r() : word0(0), word1(0) {}
+    CONSTEXPR dma_cmd_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_cmd_src_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_cmd_size_r - DMA command channel number of bytes buffered
+struct dma_cmd_size_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma_cmd_size_r() : word0(0) {}
+    CONSTEXPR dma_cmd_size_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma_cmd_size_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR dma_cmd_size_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile dma_cmd_size_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_m2m_src_r - DMA memory to memory source position on AXI
+struct dma_m2m_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset : 32; // Offset
+            uint32_t reserved0 : 32;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma_m2m_src_r() : word0(0), word1(0) {}
+    CONSTEXPR dma_m2m_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_m2m_src_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_m2m_dst_r - DMA memory to memory destination position on AXI
+struct dma_m2m_dst_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset : 32; // Offset
+            uint32_t reserved0 : 32;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma_m2m_dst_r() : word0(0), word1(0) {}
+    CONSTEXPR dma_m2m_dst_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_m2m_dst_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// current_qread_r - QREAD position being issued (rather than completed)
+struct current_qread_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR current_qread_r() : word0(0) {}
+    CONSTEXPR current_qread_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    current_qread_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR current_qread_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile current_qread_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_scale_src_r - DMA scale and bias channel source position on AXI
+struct dma_scale_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset : 32; // Offset
+            uint32_t reserved0 : 32;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma_scale_src_r() : word0(0), word1(0) {}
+    CONSTEXPR dma_scale_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_scale_src_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// current_block_r - 0-3. Current block bank being executed by the TSU or last one executed if TSU is stopped
+struct current_block_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR current_block_r() : word0(0) {}
+    CONSTEXPR current_block_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    current_block_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR current_block_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile current_block_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// current_op_r - Current NPU OP command being executed by the TSU
+struct current_op_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR current_op_r() : word0(0) {}
+    CONSTEXPR current_op_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    current_op_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR current_op_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile current_op_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// current_cmd_r - Current 32-bit command being parsed by the command stream parser
+struct current_cmd_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR current_cmd_r() : word0(0) {}
+    CONSTEXPR current_cmd_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    current_cmd_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR current_cmd_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile current_cmd_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pmevcntr_r - Performance monitor event 0 count register
+struct pmevcntr_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t count : 32; // Count word
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pmevcntr_r() : word0(0) {}
+    CONSTEXPR pmevcntr_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pmevcntr_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_count() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_count() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pmevcntr_r &set_count(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pmevcntr_r &set_count(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pmevtyper_r - Performance monitor event type register 0
+struct pmevtyper_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EV_TYPE : 10; // Event Type
+            uint32_t reserved0 : 22;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pmevtyper_r() : word0(0) {}
+    CONSTEXPR pmevtyper_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pmevtyper_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_event get_EV_TYPE() const
+    {
+        NPU_NAMESPACE::pmu_event value = static_cast<NPU_NAMESPACE::pmu_event>(((1U << 10) - 1) & (word0 >> 0));
+        return value;
+    }
+    NPU_NAMESPACE::pmu_event get_EV_TYPE() const volatile
+    {
+        NPU_NAMESPACE::pmu_event value = static_cast<NPU_NAMESPACE::pmu_event>(((1U << 10) - 1) & (word0 >> 0));
+        return value;
+    }
+    CONSTEXPR pmevtyper_r &set_EV_TYPE(NPU_NAMESPACE::pmu_event value)
+    {
+        word0 = (((~((1U << 10) - 1)) << 0) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile pmevtyper_r &set_EV_TYPE(NPU_NAMESPACE::pmu_event value) volatile
+    {
+        word0 = (((~((1U << 10) - 1)) << 0) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+#endif
+};
+
+// shared_buffer_r - Shared buffer debug access. Only valid in STOPPED state
+struct shared_buffer_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t mem_word : 32; // Memory word
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR shared_buffer_r() : word0(0) {}
+    CONSTEXPR shared_buffer_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    shared_buffer_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mem_word() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_mem_word() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR shared_buffer_r &set_mem_word(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile shared_buffer_r &set_mem_word(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_pad_top_r - None
+struct ifm_pad_top_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_pad_top_r() : word0(0) {}
+    CONSTEXPR ifm_pad_top_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_pad_top_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_pad_top_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_pad_top_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_pad_left_r - None
+struct ifm_pad_left_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_pad_left_r() : word0(0) {}
+    CONSTEXPR ifm_pad_left_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_pad_left_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_pad_left_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_pad_left_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_pad_right_r - None
+struct ifm_pad_right_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_pad_right_r() : word0(0) {}
+    CONSTEXPR ifm_pad_right_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_pad_right_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_pad_right_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_pad_right_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_pad_bottom_r - None
+struct ifm_pad_bottom_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_pad_bottom_r() : word0(0) {}
+    CONSTEXPR ifm_pad_bottom_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_pad_bottom_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_pad_bottom_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_pad_bottom_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_depth_m1_r - None
+struct ifm_depth_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_depth_m1_r() : word0(0) {}
+    CONSTEXPR ifm_depth_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_depth_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_depth_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_depth_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_precision_r - None
+struct ifm_precision_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_precision_r() : word0(0) {}
+    CONSTEXPR ifm_precision_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_precision_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_precision_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_precision_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_upscale_r - None
+struct ifm_upscale_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_upscale_r() : word0(0) {}
+    CONSTEXPR ifm_upscale_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_upscale_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_upscale_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_upscale_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_zero_point_r - None
+struct ifm_zero_point_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_zero_point_r() : word0(0) {}
+    CONSTEXPR ifm_zero_point_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_zero_point_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_zero_point_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_zero_point_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_width0_m1_r - None
+struct ifm_width0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_width0_m1_r() : word0(0) {}
+    CONSTEXPR ifm_width0_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_width0_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_width0_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_width0_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_height0_m1_r - None
+struct ifm_height0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_height0_m1_r() : word0(0) {}
+    CONSTEXPR ifm_height0_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_height0_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_height0_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_height0_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_height1_m1_r - None
+struct ifm_height1_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_height1_m1_r() : word0(0) {}
+    CONSTEXPR ifm_height1_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_height1_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_height1_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_height1_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_ib_end_r - None
+struct ifm_ib_end_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_ib_end_r() : word0(0) {}
+    CONSTEXPR ifm_ib_end_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_ib_end_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_ib_end_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_ib_end_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_region_r - None
+struct ifm_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_region_r() : word0(0) {}
+    CONSTEXPR ifm_region_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_region_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_region_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_region_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_width_m1_r - None
+struct ofm_width_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_width_m1_r() : word0(0) {}
+    CONSTEXPR ofm_width_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_width_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_width_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_width_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_height_m1_r - None
+struct ofm_height_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_height_m1_r() : word0(0) {}
+    CONSTEXPR ofm_height_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_height_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_height_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_height_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_depth_m1_r - None
+struct ofm_depth_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_depth_m1_r() : word0(0) {}
+    CONSTEXPR ofm_depth_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_depth_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_depth_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_depth_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_precision_r - None
+struct ofm_precision_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_precision_r() : word0(0) {}
+    CONSTEXPR ofm_precision_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_precision_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_precision_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_precision_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_blk_width_m1_r - None
+struct ofm_blk_width_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_blk_width_m1_r() : word0(0) {}
+    CONSTEXPR ofm_blk_width_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_blk_width_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_blk_width_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_blk_width_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_blk_height_m1_r - None
+struct ofm_blk_height_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_blk_height_m1_r() : word0(0) {}
+    CONSTEXPR ofm_blk_height_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_blk_height_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_blk_height_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_blk_height_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_blk_depth_m1_r - None
+struct ofm_blk_depth_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_blk_depth_m1_r() : word0(0) {}
+    CONSTEXPR ofm_blk_depth_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_blk_depth_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_blk_depth_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_blk_depth_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_zero_point_r - None
+struct ofm_zero_point_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_zero_point_r() : word0(0) {}
+    CONSTEXPR ofm_zero_point_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_zero_point_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_zero_point_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_zero_point_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_width0_m1_r - None
+struct ofm_width0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_width0_m1_r() : word0(0) {}
+    CONSTEXPR ofm_width0_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_width0_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_width0_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_width0_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_height0_m1_r - None
+struct ofm_height0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_height0_m1_r() : word0(0) {}
+    CONSTEXPR ofm_height0_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_height0_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_height0_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_height0_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_height1_m1_r - None
+struct ofm_height1_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_height1_m1_r() : word0(0) {}
+    CONSTEXPR ofm_height1_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_height1_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_height1_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_height1_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_region_r - None
+struct ofm_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_region_r() : word0(0) {}
+    CONSTEXPR ofm_region_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_region_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_region_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_region_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// kernel_width_m1_r - None
+struct kernel_width_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR kernel_width_m1_r() : word0(0) {}
+    CONSTEXPR kernel_width_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    kernel_width_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR kernel_width_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile kernel_width_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// kernel_height_m1_r - None
+struct kernel_height_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR kernel_height_m1_r() : word0(0) {}
+    CONSTEXPR kernel_height_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    kernel_height_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR kernel_height_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile kernel_height_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// kernel_stride_r - None
+struct kernel_stride_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR kernel_stride_r() : word0(0) {}
+    CONSTEXPR kernel_stride_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    kernel_stride_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR kernel_stride_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile kernel_stride_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// acc_format_r - None
+struct acc_format_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR acc_format_r() : word0(0) {}
+    CONSTEXPR acc_format_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    acc_format_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR acc_format_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile acc_format_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// activation_r - None
+struct activation_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR activation_r() : word0(0) {}
+    CONSTEXPR activation_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    activation_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR activation_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile activation_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// activation_min_r - None
+struct activation_min_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR activation_min_r() : word0(0) {}
+    CONSTEXPR activation_min_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    activation_min_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR activation_min_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile activation_min_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// activation_max_r - None
+struct activation_max_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR activation_max_r() : word0(0) {}
+    CONSTEXPR activation_max_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    activation_max_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR activation_max_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile activation_max_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// weight_region_r - None
+struct weight_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR weight_region_r() : word0(0) {}
+    CONSTEXPR weight_region_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    weight_region_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR weight_region_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile weight_region_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// scale_region_r - None
+struct scale_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR scale_region_r() : word0(0) {}
+    CONSTEXPR scale_region_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    scale_region_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR scale_region_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile scale_region_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ab_start_r - None
+struct ab_start_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ab_start_r() : word0(0) {}
+    CONSTEXPR ab_start_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ab_start_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ab_start_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ab_start_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// blockdep_r - None
+struct blockdep_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR blockdep_r() : word0(0) {}
+    CONSTEXPR blockdep_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    blockdep_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR blockdep_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile blockdep_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_src_region_r - None
+struct dma0_src_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma0_src_region_r() : word0(0) {}
+    CONSTEXPR dma0_src_region_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma0_src_region_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR dma0_src_region_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile dma0_src_region_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_dst_region_r - None
+struct dma0_dst_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma0_dst_region_r() : word0(0) {}
+    CONSTEXPR dma0_dst_region_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma0_dst_region_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR dma0_dst_region_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile dma0_dst_region_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_size0_r - None
+struct dma0_size0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma0_size0_r() : word0(0) {}
+    CONSTEXPR dma0_size0_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma0_size0_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR dma0_size0_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile dma0_size0_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_size1_r - None
+struct dma0_size1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma0_size1_r() : word0(0) {}
+    CONSTEXPR dma0_size1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma0_size1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR dma0_size1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile dma0_size1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_broadcast_r - None
+struct ifm2_broadcast_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_broadcast_r() : word0(0) {}
+    CONSTEXPR ifm2_broadcast_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_broadcast_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_broadcast_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_broadcast_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_scalar_r - None
+struct ifm2_scalar_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_scalar_r() : word0(0) {}
+    CONSTEXPR ifm2_scalar_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_scalar_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_scalar_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_scalar_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_precision_r - None
+struct ifm2_precision_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_precision_r() : word0(0) {}
+    CONSTEXPR ifm2_precision_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_precision_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_precision_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_precision_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_zero_point_r - None
+struct ifm2_zero_point_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_zero_point_r() : word0(0) {}
+    CONSTEXPR ifm2_zero_point_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_zero_point_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_zero_point_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_zero_point_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_width0_m1_r - None
+struct ifm2_width0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_width0_m1_r() : word0(0) {}
+    CONSTEXPR ifm2_width0_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_width0_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_width0_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_width0_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_height0_m1_r - None
+struct ifm2_height0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_height0_m1_r() : word0(0) {}
+    CONSTEXPR ifm2_height0_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_height0_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_height0_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_height0_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_height1_m1_r - None
+struct ifm2_height1_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_height1_m1_r() : word0(0) {}
+    CONSTEXPR ifm2_height1_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_height1_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_height1_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_height1_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_ib_start_r - None
+struct ifm2_ib_start_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_ib_start_r() : word0(0) {}
+    CONSTEXPR ifm2_ib_start_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_ib_start_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_ib_start_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_ib_start_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_region_r - None
+struct ifm2_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_region_r() : word0(0) {}
+    CONSTEXPR ifm2_region_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_region_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_region_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_region_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_base0_r - None
+struct ifm_base0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm_base0_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm_base0_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_base0_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_base1_r - None
+struct ifm_base1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm_base1_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm_base1_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_base1_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_base2_r - None
+struct ifm_base2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm_base2_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm_base2_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_base2_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_base3_r - None
+struct ifm_base3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm_base3_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm_base3_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_base3_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_stride_x_r - None
+struct ifm_stride_x_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm_stride_x_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm_stride_x_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_stride_x_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_stride_y_r - None
+struct ifm_stride_y_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm_stride_y_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm_stride_y_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_stride_y_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_stride_c_r - None
+struct ifm_stride_c_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm_stride_c_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm_stride_c_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_stride_c_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_base0_r - None
+struct ofm_base0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ofm_base0_r() : word0(0), word1(0) {}
+    CONSTEXPR ofm_base0_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_base0_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_base1_r - None
+struct ofm_base1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ofm_base1_r() : word0(0), word1(0) {}
+    CONSTEXPR ofm_base1_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_base1_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_base2_r - None
+struct ofm_base2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ofm_base2_r() : word0(0), word1(0) {}
+    CONSTEXPR ofm_base2_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_base2_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_base3_r - None
+struct ofm_base3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ofm_base3_r() : word0(0), word1(0) {}
+    CONSTEXPR ofm_base3_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_base3_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_stride_x_r - None
+struct ofm_stride_x_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ofm_stride_x_r() : word0(0), word1(0) {}
+    CONSTEXPR ofm_stride_x_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_stride_x_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_stride_y_r - None
+struct ofm_stride_y_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ofm_stride_y_r() : word0(0), word1(0) {}
+    CONSTEXPR ofm_stride_y_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_stride_y_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_stride_c_r - None
+struct ofm_stride_c_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ofm_stride_c_r() : word0(0), word1(0) {}
+    CONSTEXPR ofm_stride_c_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_stride_c_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// weight_base_r - None
+struct weight_base_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR weight_base_r() : word0(0), word1(0) {}
+    CONSTEXPR weight_base_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    weight_base_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// weight_length_r - None
+struct weight_length_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR weight_length_r() : word0(0), word1(0) {}
+    CONSTEXPR weight_length_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    weight_length_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// scale_base_r - None
+struct scale_base_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR scale_base_r() : word0(0), word1(0) {}
+    CONSTEXPR scale_base_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    scale_base_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// scale_length_r - None
+struct scale_length_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR scale_length_r() : word0(0), word1(0) {}
+    CONSTEXPR scale_length_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    scale_length_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_scale_r - None
+struct ofm_scale_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_scale_r() : word0(0) {}
+    CONSTEXPR ofm_scale_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_scale_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_scale_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_scale_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_scale_shift_r - None
+struct ofm_scale_shift_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_scale_shift_r() : word0(0) {}
+    CONSTEXPR ofm_scale_shift_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_scale_shift_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_scale_shift_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_scale_shift_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// opa_scale_r - None
+struct opa_scale_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR opa_scale_r() : word0(0) {}
+    CONSTEXPR opa_scale_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    opa_scale_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR opa_scale_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile opa_scale_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// opa_scale_shift_r - None
+struct opa_scale_shift_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR opa_scale_shift_r() : word0(0) {}
+    CONSTEXPR opa_scale_shift_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    opa_scale_shift_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR opa_scale_shift_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile opa_scale_shift_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// opb_scale_r - None
+struct opb_scale_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR opb_scale_r() : word0(0) {}
+    CONSTEXPR opb_scale_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    opb_scale_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR opb_scale_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile opb_scale_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_src_r - None
+struct dma0_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma0_src_r() : word0(0), word1(0) {}
+    CONSTEXPR dma0_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_src_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_dst_r - None
+struct dma0_dst_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma0_dst_r() : word0(0), word1(0) {}
+    CONSTEXPR dma0_dst_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_dst_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_len_r - None
+struct dma0_len_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma0_len_r() : word0(0), word1(0) {}
+    CONSTEXPR dma0_len_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_len_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_base0_r - None
+struct ifm2_base0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm2_base0_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm2_base0_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_base0_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_base1_r - None
+struct ifm2_base1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm2_base1_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm2_base1_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_base1_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_base2_r - None
+struct ifm2_base2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm2_base2_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm2_base2_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_base2_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_base3_r - None
+struct ifm2_base3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm2_base3_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm2_base3_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_base3_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_stride_x_r - None
+struct ifm2_stride_x_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm2_stride_x_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm2_stride_x_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_stride_x_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_stride_y_r - None
+struct ifm2_stride_y_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm2_stride_y_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm2_stride_y_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_stride_y_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_stride_c_r - None
+struct ifm2_stride_c_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm2_stride_c_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm2_stride_c_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_stride_c_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// revision_r - Internal FPGA build revision: first 32-bits of the Ultan Git hash used for the build
+struct revision_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR revision_r() : word0(0) {}
+    CONSTEXPR revision_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    revision_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR revision_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile revision_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid4_r - Peripheral ID byte 4 (Arm=code 4)
+struct pid4_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID4 : 32; // Byte 4 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid4_r() : word0(4) {}
+    CONSTEXPR pid4_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid4_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID4() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID4() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid4_r &set_PID4(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid4_r &set_PID4(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid5_r - Peripheral ID byte 5 (reserved)
+struct pid5_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID5 : 32; // Byte 5 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid5_r() : word0(0) {}
+    CONSTEXPR pid5_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid5_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID5() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID5() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid5_r &set_PID5(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid5_r &set_PID5(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid6_r - Peripheral ID byte 6 (reserved)
+struct pid6_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID6 : 32; // Byte 6 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid6_r() : word0(0) {}
+    CONSTEXPR pid6_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid6_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID6() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID6() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid6_r &set_PID6(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid6_r &set_PID6(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid7_r - Peripheral ID byte 7 (reserved)
+struct pid7_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID7 : 32; // Byte 7 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid7_r() : word0(0) {}
+    CONSTEXPR pid7_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid7_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID7() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID7() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid7_r &set_PID7(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid7_r &set_PID7(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid0_r - Peripheral ID byte 0. This is bits[7:0] of the part number
+struct pid0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID0 : 32; // Byte 0 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid0_r() : word0(128) {}
+    CONSTEXPR pid0_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid0_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID0() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID0() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid0_r &set_PID0(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid0_r &set_PID0(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid1_r - Peripheral ID byte 1. This is bits[11:8] of the part number in bits[3:0], and bits[3:0] of the Arm ID in
+// bits[7:4]
+struct pid1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID1 : 32; // Byte 1 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid1_r() : word0(181) {}
+    CONSTEXPR pid1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID1() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID1() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid1_r &set_PID1(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid1_r &set_PID1(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid2_r - Peripheral ID byte 2. This is bits[6:4] of the Arm ID in bits[2:0], and bit 3 indicates format B
+struct pid2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID2 : 32; // Byte 2 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid2_r() : word0(11) {}
+    CONSTEXPR pid2_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid2_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID2() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID2() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid2_r &set_PID2(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid2_r &set_PID2(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid3_r - Peripheral ID byte 3
+struct pid3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID3 : 32; // Byte 1 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid3_r() : word0(0) {}
+    CONSTEXPR pid3_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid3_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID3() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID3() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid3_r &set_PID3(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid3_r &set_PID3(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// cid0_r - Component ID byte 0
+struct cid0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CID0 : 32; // Byte 0 of Component ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR cid0_r() : word0(13) {}
+    CONSTEXPR cid0_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    cid0_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CID0() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_CID0() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR cid0_r &set_CID0(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile cid0_r &set_CID0(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// cid1_r - Component ID byte 1
+struct cid1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CID1 : 32; // Byte 1 of Component ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR cid1_r() : word0(240) {}
+    CONSTEXPR cid1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    cid1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CID1() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_CID1() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR cid1_r &set_CID1(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile cid1_r &set_CID1(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// cid2_r - Component ID byte 2
+struct cid2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CID2 : 32; // Byte 2 of Component ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR cid2_r() : word0(5) {}
+    CONSTEXPR cid2_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    cid2_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CID2() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_CID2() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR cid2_r &set_CID2(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile cid2_r &set_CID2(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// cid3_r - Component ID byte 3
+struct cid3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CID3 : 32; // Byte 3 of Component ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR cid3_r() : word0(177) {}
+    CONSTEXPR cid3_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    cid3_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CID3() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_CID3() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR cid3_r &set_CID3(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile cid3_r &set_CID3(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+struct NPU_REG
+{
+    STRUCT id_r ID;           // 0x0000
+    STRUCT status_r STATUS;   // 0x0004
+    STRUCT cmd_r CMD;         // 0x0008
+    STRUCT reset_r RESET;     // 0x000C
+    STRUCT qbase_r QBASE;     // 0x0010
+    STRUCT qread_r QREAD;     // 0x0018
+    STRUCT qconfig_r QCONFIG; // 0x001C
+    STRUCT qsize_r QSIZE;     // 0x0020
+    STRUCT prot_r PROT;       // 0x0024
+    STRUCT config_r CONFIG;   // 0x0028
+    STRUCT lock_r LOCK;       // 0x002C
+    uint32_t unused0[3];
+    STRUCT regioncfg_r REGIONCFG;   // 0x003C
+    STRUCT axi_limit0_r AXI_LIMIT0; // 0x0040
+    STRUCT axi_limit1_r AXI_LIMIT1; // 0x0044
+    STRUCT axi_limit2_r AXI_LIMIT2; // 0x0048
+    STRUCT axi_limit3_r AXI_LIMIT3; // 0x004C
+    uint32_t unused1[12];
+    STRUCT basep_r BASEP[8]; // 0x0080
+    uint32_t unused2[16];
+    STRUCT wd_status_r WD_STATUS;   // 0x0100
+    STRUCT mac_status_r MAC_STATUS; // 0x0104
+    STRUCT ao_status_r AO_STATUS;   // 0x0108
+    uint32_t unused3[1];
+    STRUCT dma_status0_r DMA_STATUS0; // 0x0110
+    STRUCT dma_status1_r DMA_STATUS1; // 0x0114
+    uint32_t unused4[10];
+    STRUCT clkforce_r CLKFORCE;           // 0x0140
+    STRUCT debug_address_r DEBUG_ADDRESS; // 0x0144
+    STRUCT debug_misc_r DEBUG_MISC;       // 0x0148
+    uint32_t unused5[1];
+    STRUCT debug_block_r DEBUG_BLOCK; // 0x0150
+    uint32_t unused6[11];
+    STRUCT pmcr_r PMCR;             // 0x0180
+    STRUCT pmcntenset_r PMCNTENSET; // 0x0184
+    STRUCT pmcntenclr_r PMCNTENCLR; // 0x0188
+    STRUCT pmovsset_r PMOVSSET;     // 0x018C
+    STRUCT pmovsclr_r PMOVSCLR;     // 0x0190
+    STRUCT pmintset_r PMINTSET;     // 0x0194
+    STRUCT pmintclr_r PMINTCLR;     // 0x0198
+    uint32_t unused7[1];
+    STRUCT pmccntr_r PMCCNTR;         // 0x01A0
+    STRUCT pmccntr_cfg_r PMCCNTR_CFG; // 0x01A8
+    STRUCT pmcaxi_chan_r PMCAXI_CHAN; // 0x01AC
+    uint32_t unused8[20];
+    STRUCT kernel_x_r KERNEL_X;                     // 0x0200
+    STRUCT kernel_y_r KERNEL_Y;                     // 0x0204
+    STRUCT kernel_w_m1_r KERNEL_W_M1;               // 0x0208
+    STRUCT kernel_h_m1_r KERNEL_H_M1;               // 0x020C
+    STRUCT ofm_cblk_width_m1_r OFM_CBLK_WIDTH_M1;   // 0x0210
+    STRUCT ofm_cblk_height_m1_r OFM_CBLK_HEIGHT_M1; // 0x0214
+    STRUCT ofm_cblk_depth_m1_r OFM_CBLK_DEPTH_M1;   // 0x0218
+    STRUCT ifm_cblk_depth_m1_r IFM_CBLK_DEPTH_M1;   // 0x021C
+    STRUCT ofm_x_r OFM_X;                           // 0x0220
+    STRUCT ofm_y_r OFM_Y;                           // 0x0224
+    STRUCT ofm_z_r OFM_Z;                           // 0x0228
+    STRUCT ifm_z_r IFM_Z;                           // 0x022C
+    STRUCT pad_top_r PAD_TOP;                       // 0x0230
+    STRUCT pad_left_r PAD_LEFT;                     // 0x0234
+    STRUCT ifm_cblk_width_r IFM_CBLK_WIDTH;         // 0x0238
+    STRUCT ifm_cblk_height_r IFM_CBLK_HEIGHT;       // 0x023C
+    STRUCT dma_ifm_src_r DMA_IFM_SRC;               // 0x0240
+    STRUCT dma_ifm_dst_r DMA_IFM_DST;               // 0x0248
+    STRUCT dma_ofm_src_r DMA_OFM_SRC;               // 0x024C
+    STRUCT dma_ofm_dst_r DMA_OFM_DST;               // 0x0250
+    STRUCT dma_weight_src_r DMA_WEIGHT_SRC;         // 0x0258
+    STRUCT dma_cmd_src_r DMA_CMD_SRC;               // 0x0260
+    STRUCT dma_cmd_size_r DMA_CMD_SIZE;             // 0x0268
+    STRUCT dma_m2m_src_r DMA_M2M_SRC;               // 0x026C
+    STRUCT dma_m2m_dst_r DMA_M2M_DST;               // 0x0274
+    STRUCT current_qread_r CURRENT_QREAD;           // 0x027C
+    STRUCT dma_scale_src_r DMA_SCALE_SRC;           // 0x0280
+    uint32_t unused9[11];
+    STRUCT current_block_r CURRENT_BLOCK; // 0x02B4
+    STRUCT current_op_r CURRENT_OP;       // 0x02B8
+    STRUCT current_cmd_r CURRENT_CMD;     // 0x02BC
+    uint32_t unused10[16];
+    STRUCT pmevcntr_r PMEVCNTR[4]; // 0x0300
+    uint32_t unused11[28];
+    STRUCT pmevtyper_r PMEVTYPER[4]; // 0x0380
+    uint32_t unused12[28];
+    STRUCT shared_buffer_r SHARED_BUFFER[256]; // 0x0400
+    STRUCT ifm_pad_top_r IFM_PAD_TOP;          // 0x0800
+    STRUCT ifm_pad_left_r IFM_PAD_LEFT;        // 0x0804
+    STRUCT ifm_pad_right_r IFM_PAD_RIGHT;      // 0x0808
+    STRUCT ifm_pad_bottom_r IFM_PAD_BOTTOM;    // 0x080C
+    STRUCT ifm_depth_m1_r IFM_DEPTH_M1;        // 0x0810
+    STRUCT ifm_precision_r IFM_PRECISION;      // 0x0814
+    uint32_t unused13[1];
+    STRUCT ifm_upscale_r IFM_UPSCALE; // 0x081C
+    uint32_t unused14[1];
+    STRUCT ifm_zero_point_r IFM_ZERO_POINT; // 0x0824
+    STRUCT ifm_width0_m1_r IFM_WIDTH0_M1;   // 0x0828
+    STRUCT ifm_height0_m1_r IFM_HEIGHT0_M1; // 0x082C
+    STRUCT ifm_height1_m1_r IFM_HEIGHT1_M1; // 0x0830
+    STRUCT ifm_ib_end_r IFM_IB_END;         // 0x0834
+    uint32_t unused15[1];
+    STRUCT ifm_region_r IFM_REGION; // 0x083C
+    uint32_t unused16[1];
+    STRUCT ofm_width_m1_r OFM_WIDTH_M1;           // 0x0844
+    STRUCT ofm_height_m1_r OFM_HEIGHT_M1;         // 0x0848
+    STRUCT ofm_depth_m1_r OFM_DEPTH_M1;           // 0x084C
+    STRUCT ofm_precision_r OFM_PRECISION;         // 0x0850
+    STRUCT ofm_blk_width_m1_r OFM_BLK_WIDTH_M1;   // 0x0854
+    STRUCT ofm_blk_height_m1_r OFM_BLK_HEIGHT_M1; // 0x0858
+    STRUCT ofm_blk_depth_m1_r OFM_BLK_DEPTH_M1;   // 0x085C
+    STRUCT ofm_zero_point_r OFM_ZERO_POINT;       // 0x0860
+    uint32_t unused17[1];
+    STRUCT ofm_width0_m1_r OFM_WIDTH0_M1;   // 0x0868
+    STRUCT ofm_height0_m1_r OFM_HEIGHT0_M1; // 0x086C
+    STRUCT ofm_height1_m1_r OFM_HEIGHT1_M1; // 0x0870
+    uint32_t unused18[2];
+    STRUCT ofm_region_r OFM_REGION;             // 0x087C
+    STRUCT kernel_width_m1_r KERNEL_WIDTH_M1;   // 0x0880
+    STRUCT kernel_height_m1_r KERNEL_HEIGHT_M1; // 0x0884
+    STRUCT kernel_stride_r KERNEL_STRIDE;       // 0x0888
+    uint32_t unused19[1];
+    STRUCT acc_format_r ACC_FORMAT;         // 0x0890
+    STRUCT activation_r ACTIVATION;         // 0x0894
+    STRUCT activation_min_r ACTIVATION_MIN; // 0x0898
+    STRUCT activation_max_r ACTIVATION_MAX; // 0x089C
+    STRUCT weight_region_r WEIGHT_REGION;   // 0x08A0
+    STRUCT scale_region_r SCALE_REGION;     // 0x08A4
+    uint32_t unused20[3];
+    STRUCT ab_start_r AB_START; // 0x08B4
+    uint32_t unused21[1];
+    STRUCT blockdep_r BLOCKDEP;               // 0x08BC
+    STRUCT dma0_src_region_r DMA0_SRC_REGION; // 0x08C0
+    STRUCT dma0_dst_region_r DMA0_DST_REGION; // 0x08C4
+    STRUCT dma0_size0_r DMA0_SIZE0;           // 0x08C8
+    STRUCT dma0_size1_r DMA0_SIZE1;           // 0x08CC
+    uint32_t unused22[12];
+    STRUCT ifm2_broadcast_r IFM2_BROADCAST; // 0x0900
+    STRUCT ifm2_scalar_r IFM2_SCALAR;       // 0x0904
+    uint32_t unused23[3];
+    STRUCT ifm2_precision_r IFM2_PRECISION; // 0x0914
+    uint32_t unused24[3];
+    STRUCT ifm2_zero_point_r IFM2_ZERO_POINT; // 0x0924
+    STRUCT ifm2_width0_m1_r IFM2_WIDTH0_M1;   // 0x0928
+    STRUCT ifm2_height0_m1_r IFM2_HEIGHT0_M1; // 0x092C
+    STRUCT ifm2_height1_m1_r IFM2_HEIGHT1_M1; // 0x0930
+    STRUCT ifm2_ib_start_r IFM2_IB_START;     // 0x0934
+    uint32_t unused25[1];
+    STRUCT ifm2_region_r IFM2_REGION; // 0x093C
+    uint32_t unused26[48];
+    STRUCT ifm_base0_r IFM_BASE0;       // 0x0A00
+    STRUCT ifm_base1_r IFM_BASE1;       // 0x0A08
+    STRUCT ifm_base2_r IFM_BASE2;       // 0x0A10
+    STRUCT ifm_base3_r IFM_BASE3;       // 0x0A18
+    STRUCT ifm_stride_x_r IFM_STRIDE_X; // 0x0A20
+    STRUCT ifm_stride_y_r IFM_STRIDE_Y; // 0x0A28
+    STRUCT ifm_stride_c_r IFM_STRIDE_C; // 0x0A30
+    uint32_t unused27[2];
+    STRUCT ofm_base0_r OFM_BASE0;       // 0x0A40
+    STRUCT ofm_base1_r OFM_BASE1;       // 0x0A48
+    STRUCT ofm_base2_r OFM_BASE2;       // 0x0A50
+    STRUCT ofm_base3_r OFM_BASE3;       // 0x0A58
+    STRUCT ofm_stride_x_r OFM_STRIDE_X; // 0x0A60
+    STRUCT ofm_stride_y_r OFM_STRIDE_Y; // 0x0A68
+    STRUCT ofm_stride_c_r OFM_STRIDE_C; // 0x0A70
+    uint32_t unused28[2];
+    STRUCT weight_base_r WEIGHT_BASE;         // 0x0A80
+    STRUCT weight_length_r WEIGHT_LENGTH;     // 0x0A88
+    STRUCT scale_base_r SCALE_BASE;           // 0x0A90
+    STRUCT scale_length_r SCALE_LENGTH;       // 0x0A98
+    STRUCT ofm_scale_r OFM_SCALE;             // 0x0AA0
+    STRUCT ofm_scale_shift_r OFM_SCALE_SHIFT; // 0x0AA4
+    STRUCT opa_scale_r OPA_SCALE;             // 0x0AA8
+    STRUCT opa_scale_shift_r OPA_SCALE_SHIFT; // 0x0AAC
+    STRUCT opb_scale_r OPB_SCALE;             // 0x0AB0
+    uint32_t unused29[3];
+    STRUCT dma0_src_r DMA0_SRC; // 0x0AC0
+    STRUCT dma0_dst_r DMA0_DST; // 0x0AC8
+    STRUCT dma0_len_r DMA0_LEN; // 0x0AD0
+    uint32_t unused30[10];
+    STRUCT ifm2_base0_r IFM2_BASE0;       // 0x0B00
+    STRUCT ifm2_base1_r IFM2_BASE1;       // 0x0B08
+    STRUCT ifm2_base2_r IFM2_BASE2;       // 0x0B10
+    STRUCT ifm2_base3_r IFM2_BASE3;       // 0x0B18
+    STRUCT ifm2_stride_x_r IFM2_STRIDE_X; // 0x0B20
+    STRUCT ifm2_stride_y_r IFM2_STRIDE_Y; // 0x0B28
+    STRUCT ifm2_stride_c_r IFM2_STRIDE_C; // 0x0B30
+    uint32_t unused31[18];
+    uint32_t USER_DEFINED[16]; // 0x0B80
+    uint32_t unused32[256];
+    STRUCT revision_r REVISION; // 0x0FC0
+    uint32_t unused33[3];
+    STRUCT pid4_r PID4; // 0x0FD0
+    STRUCT pid5_r PID5; // 0x0FD4
+    STRUCT pid6_r PID6; // 0x0FD8
+    STRUCT pid7_r PID7; // 0x0FDC
+    STRUCT pid0_r PID0; // 0x0FE0
+    STRUCT pid1_r PID1; // 0x0FE4
+    STRUCT pid2_r PID2; // 0x0FE8
+    STRUCT pid3_r PID3; // 0x0FEC
+    STRUCT cid0_r CID0; // 0x0FF0
+    STRUCT cid1_r CID1; // 0x0FF4
+    STRUCT cid2_r CID2; // 0x0FF8
+    STRUCT cid3_r CID3; // 0x0FFC
+
+#ifdef __cplusplus
+    enum class access_type_t : uint8_t
+    {
+        RW,
+        RO,
+        WO
+    };
+    NPU_REG()
+    {
+        reset();
+    }
+    void reset()
+    {
+        ID         = 269500929;
+        STATUS     = 8;
+        CMD        = 12;
+        RESET      = 0;
+        QBASE      = 0;
+        QREAD      = 0;
+        QCONFIG    = 0;
+        QSIZE      = 0;
+        PROT       = 0;
+        CONFIG     = 0;
+        LOCK       = 0;
+        REGIONCFG  = 0;
+        AXI_LIMIT0 = 0;
+        AXI_LIMIT1 = 0;
+        AXI_LIMIT2 = 0;
+        AXI_LIMIT3 = 0;
+        for (size_t i = 0; i < (sizeof(BASEP) / sizeof(BASEP[0])); ++i)
+            BASEP[i] = 0;
+        WD_STATUS          = 0;
+        MAC_STATUS         = 0;
+        AO_STATUS          = 0;
+        DMA_STATUS0        = 0;
+        DMA_STATUS1        = 0;
+        CLKFORCE           = 0;
+        DEBUG_ADDRESS      = 0;
+        DEBUG_MISC         = 0;
+        DEBUG_BLOCK        = 0;
+        PMCR               = 8192;
+        PMCNTENSET         = 0;
+        PMCNTENCLR         = 0;
+        PMOVSSET           = 0;
+        PMOVSCLR           = 0;
+        PMINTSET           = 0;
+        PMINTCLR           = 0;
+        PMCCNTR            = 0;
+        PMCCNTR_CFG        = 0;
+        PMCAXI_CHAN        = 0;
+        KERNEL_X           = 0;
+        KERNEL_Y           = 0;
+        KERNEL_W_M1        = 0;
+        KERNEL_H_M1        = 0;
+        OFM_CBLK_WIDTH_M1  = 0;
+        OFM_CBLK_HEIGHT_M1 = 0;
+        OFM_CBLK_DEPTH_M1  = 0;
+        IFM_CBLK_DEPTH_M1  = 0;
+        OFM_X              = 0;
+        OFM_Y              = 0;
+        OFM_Z              = 0;
+        IFM_Z              = 0;
+        PAD_TOP            = 0;
+        PAD_LEFT           = 0;
+        IFM_CBLK_WIDTH     = 0;
+        IFM_CBLK_HEIGHT    = 0;
+        DMA_IFM_SRC        = 0;
+        DMA_IFM_DST        = 0;
+        DMA_OFM_SRC        = 0;
+        DMA_OFM_DST        = 0;
+        DMA_WEIGHT_SRC     = 0;
+        DMA_CMD_SRC        = 0;
+        DMA_CMD_SIZE       = 0;
+        DMA_M2M_SRC        = 0;
+        DMA_M2M_DST        = 0;
+        CURRENT_QREAD      = 0;
+        DMA_SCALE_SRC      = 0;
+        CURRENT_BLOCK      = 0;
+        CURRENT_OP         = 0;
+        CURRENT_CMD        = 0;
+        for (size_t i = 0; i < (sizeof(PMEVCNTR) / sizeof(PMEVCNTR[0])); ++i)
+            PMEVCNTR[i] = 0;
+        for (size_t i = 0; i < (sizeof(PMEVTYPER) / sizeof(PMEVTYPER[0])); ++i)
+            PMEVTYPER[i] = 0;
+        for (size_t i = 0; i < (sizeof(SHARED_BUFFER) / sizeof(SHARED_BUFFER[0])); ++i)
+            SHARED_BUFFER[i] = 0;
+        IFM_PAD_TOP       = 0;
+        IFM_PAD_LEFT      = 0;
+        IFM_PAD_RIGHT     = 0;
+        IFM_PAD_BOTTOM    = 0;
+        IFM_DEPTH_M1      = 0;
+        IFM_PRECISION     = 0;
+        IFM_UPSCALE       = 0;
+        IFM_ZERO_POINT    = 0;
+        IFM_WIDTH0_M1     = 0;
+        IFM_HEIGHT0_M1    = 0;
+        IFM_HEIGHT1_M1    = 0;
+        IFM_IB_END        = 0;
+        IFM_REGION        = 0;
+        OFM_WIDTH_M1      = 0;
+        OFM_HEIGHT_M1     = 0;
+        OFM_DEPTH_M1      = 0;
+        OFM_PRECISION     = 0;
+        OFM_BLK_WIDTH_M1  = 0;
+        OFM_BLK_HEIGHT_M1 = 0;
+        OFM_BLK_DEPTH_M1  = 0;
+        OFM_ZERO_POINT    = 0;
+        OFM_WIDTH0_M1     = 0;
+        OFM_HEIGHT0_M1    = 0;
+        OFM_HEIGHT1_M1    = 0;
+        OFM_REGION        = 0;
+        KERNEL_WIDTH_M1   = 0;
+        KERNEL_HEIGHT_M1  = 0;
+        KERNEL_STRIDE     = 0;
+        ACC_FORMAT        = 0;
+        ACTIVATION        = 0;
+        ACTIVATION_MIN    = 0;
+        ACTIVATION_MAX    = 0;
+        WEIGHT_REGION     = 0;
+        SCALE_REGION      = 0;
+        AB_START          = 0;
+        BLOCKDEP          = 0;
+        DMA0_SRC_REGION   = 0;
+        DMA0_DST_REGION   = 0;
+        DMA0_SIZE0        = 0;
+        DMA0_SIZE1        = 0;
+        IFM2_BROADCAST    = 0;
+        IFM2_SCALAR       = 0;
+        IFM2_PRECISION    = 0;
+        IFM2_ZERO_POINT   = 0;
+        IFM2_WIDTH0_M1    = 0;
+        IFM2_HEIGHT0_M1   = 0;
+        IFM2_HEIGHT1_M1   = 0;
+        IFM2_IB_START     = 0;
+        IFM2_REGION       = 0;
+        IFM_BASE0         = 0;
+        IFM_BASE1         = 0;
+        IFM_BASE2         = 0;
+        IFM_BASE3         = 0;
+        IFM_STRIDE_X      = 0;
+        IFM_STRIDE_Y      = 0;
+        IFM_STRIDE_C      = 0;
+        OFM_BASE0         = 0;
+        OFM_BASE1         = 0;
+        OFM_BASE2         = 0;
+        OFM_BASE3         = 0;
+        OFM_STRIDE_X      = 0;
+        OFM_STRIDE_Y      = 0;
+        OFM_STRIDE_C      = 0;
+        WEIGHT_BASE       = 0;
+        WEIGHT_LENGTH     = 0;
+        SCALE_BASE        = 0;
+        SCALE_LENGTH      = 0;
+        OFM_SCALE         = 0;
+        OFM_SCALE_SHIFT   = 0;
+        OPA_SCALE         = 0;
+        OPA_SCALE_SHIFT   = 0;
+        OPB_SCALE         = 0;
+        DMA0_SRC          = 0;
+        DMA0_DST          = 0;
+        DMA0_LEN          = 0;
+        IFM2_BASE0        = 0;
+        IFM2_BASE1        = 0;
+        IFM2_BASE2        = 0;
+        IFM2_BASE3        = 0;
+        IFM2_STRIDE_X     = 0;
+        IFM2_STRIDE_Y     = 0;
+        IFM2_STRIDE_C     = 0;
+        for (size_t i = 0; i < (sizeof(USER_DEFINED) / sizeof(USER_DEFINED[0])); ++i)
+            USER_DEFINED[i] = 0;
+        REVISION = 0;
+        PID4     = 4;
+        PID5     = 0;
+        PID6     = 0;
+        PID7     = 0;
+        PID0     = 128;
+        PID1     = 181;
+        PID2     = 11;
+        PID3     = 0;
+        CID0     = 13;
+        CID1     = 240;
+        CID2     = 5;
+        CID3     = 177;
+    }
+    uint32_t &operator[](const int addr_offset)
+    {
+        return reinterpret_cast<uint32_t *>(this)[addr_offset / 4];
+    }
+    access_type_t get_access_type(uint32_t offset)
+    {
+        switch (offset)
+        {
+        case 0:
             return access_type_t::RO;
-        case 700:
+        case 4:
             return access_type_t::RO;
-        case 2048:
-            return access_type_t::RW;
-        case 2052:
-            return access_type_t::RW;
-        case 2056:
-            return access_type_t::RW;
-        case 2060:
-            return access_type_t::RW;
-        case 2064:
-            return access_type_t::RW;
-        case 2068:
-            return access_type_t::RW;
-        case 2076:
-            return access_type_t::RW;
-        case 2084:
-            return access_type_t::RW;
-        case 2088:
-            return access_type_t::RW;
-        case 2092:
-            return access_type_t::RW;
-        case 2096:
-            return access_type_t::RW;
-        case 2100:
-            return access_type_t::RW;
-        case 2108:
-            return access_type_t::RW;
-        case 2116:
-            return access_type_t::RW;
-        case 2120:
-            return access_type_t::RW;
-        case 2124:
-            return access_type_t::RW;
-        case 2128:
-            return access_type_t::RW;
-        case 2132:
-            return access_type_t::RW;
-        case 2136:
-            return access_type_t::RW;
-        case 2140:
-            return access_type_t::RW;
-        case 2144:
-            return access_type_t::RW;
-        case 2152:
-            return access_type_t::RW;
-        case 2156:
-            return access_type_t::RW;
-        case 2160:
-            return access_type_t::RW;
-        case 2172:
-            return access_type_t::RW;
-        case 2176:
-            return access_type_t::RW;
-        case 2180:
-            return access_type_t::RW;
-        case 2184:
-            return access_type_t::RW;
-        case 2188:
-            return access_type_t::RW;
-        case 2192:
-            return access_type_t::RW;
-        case 2196:
-            return access_type_t::RW;
-        case 2200:
-            return access_type_t::RW;
-        case 2204:
-            return access_type_t::RW;
-        case 2208:
-            return access_type_t::RW;
-        case 2212:
-            return access_type_t::RW;
-        case 2228:
-            return access_type_t::RW;
-        case 2236:
-            return access_type_t::RW;
-        case 2240:
-            return access_type_t::RW;
-        case 2244:
-            return access_type_t::RW;
-        case 2248:
-            return access_type_t::RW;
-        case 2252:
-            return access_type_t::RW;
-        case 2304:
-            return access_type_t::RW;
-        case 2308:
-            return access_type_t::RW;
-        case 2324:
-            return access_type_t::RW;
-        case 2340:
-            return access_type_t::RW;
-        case 2344:
-            return access_type_t::RW;
-        case 2348:
-            return access_type_t::RW;
-        case 2352:
-            return access_type_t::RW;
-        case 2356:
-            return access_type_t::RW;
-        case 2364:
-            return access_type_t::RW;
-        case 2560:
-            return access_type_t::RW;
-        case 2564:
-            return access_type_t::RW;
-        case 2568:
-            return access_type_t::RW;
-        case 2572:
-            return access_type_t::RW;
-        case 2576:
-            return access_type_t::RW;
-        case 2580:
-            return access_type_t::RW;
-        case 2584:
-            return access_type_t::RW;
-        case 2588:
-            return access_type_t::RW;
-        case 2592:
-            return access_type_t::RW;
-        case 2596:
-            return access_type_t::RW;
-        case 2600:
-            return access_type_t::RW;
-        case 2604:
-            return access_type_t::RW;
-        case 2608:
-            return access_type_t::RW;
-        case 2612:
-            return access_type_t::RW;
-        case 2624:
-            return access_type_t::RW;
-        case 2628:
-            return access_type_t::RW;
-        case 2632:
-            return access_type_t::RW;
-        case 2636:
-            return access_type_t::RW;
-        case 2640:
-            return access_type_t::RW;
-        case 2644:
-            return access_type_t::RW;
-        case 2648:
-            return access_type_t::RW;
-        case 2652:
-            return access_type_t::RW;
-        case 2656:
-            return access_type_t::RW;
-        case 2660:
-            return access_type_t::RW;
-        case 2664:
-            return access_type_t::RW;
-        case 2668:
-            return access_type_t::RW;
-        case 2672:
-            return access_type_t::RW;
-        case 2676:
-            return access_type_t::RW;
-        case 2688:
-            return access_type_t::RW;
-        case 2692:
-            return access_type_t::RW;
-        case 2696:
-            return access_type_t::RW;
-        case 2704:
-            return access_type_t::RW;
-        case 2708:
-            return access_type_t::RW;
-        case 2712:
-            return access_type_t::RW;
-        case 2720:
-            return access_type_t::RW;
-        case 2724:
-            return access_type_t::RW;
-        case 2728:
-            return access_type_t::RW;
-        case 2732:
-            return access_type_t::RW;
-        case 2736:
-            return access_type_t::RW;
-        case 2752:
-            return access_type_t::RW;
-        case 2756:
-            return access_type_t::RW;
-        case 2760:
-            return access_type_t::RW;
-        case 2764:
-            return access_type_t::RW;
-        case 2768:
-            return access_type_t::RW;
-        case 2772:
-            return access_type_t::RW;
-        case 2776:
-            return access_type_t::RW;
-        case 2780:
+        case 8:
             return access_type_t::RW;
-        case 2784:
+        case 12:
             return access_type_t::RW;
-        case 2788:
+        case 16:
             return access_type_t::RW;
-        case 2816:
+        case 24:
+            return access_type_t::RO;
+        case 28:
             return access_type_t::RW;
-        case 2820:
+        case 32:
             return access_type_t::RW;
-        case 2824:
+        case 36:
+            return access_type_t::RO;
+        case 40:
+            return access_type_t::RO;
+        case 44:
             return access_type_t::RW;
-        case 2828:
+        case 60:
             return access_type_t::RW;
-        case 2832:
+        case 64:
             return access_type_t::RW;
-        case 2836:
+        case 68:
             return access_type_t::RW;
-        case 2840:
+        case 72:
             return access_type_t::RW;
-        case 2844:
+        case 76:
             return access_type_t::RW;
-        case 2848:
+        case 128:
             return access_type_t::RW;
-        case 2852:
+        case 136:
             return access_type_t::RW;
-        case 2856:
+        case 144:
             return access_type_t::RW;
-        case 2860:
+        case 152:
             return access_type_t::RW;
-        case 2864:
+        case 160:
             return access_type_t::RW;
-        case 2868:
+        case 168:
             return access_type_t::RW;
-        case 2880:
+        case 176:
             return access_type_t::RW;
-        case 2884:
+        case 184:
             return access_type_t::RW;
-        case 2888:
+        case 256:
+            return access_type_t::RO;
+        case 260:
+            return access_type_t::RO;
+        case 264:
+            return access_type_t::RO;
+        case 272:
+            return access_type_t::RO;
+        case 276:
+            return access_type_t::RO;
+        case 320:
             return access_type_t::RW;
-        case 2896:
+        case 324:
             return access_type_t::RW;
-        case 2900:
+        case 328:
             return access_type_t::RW;
-        case 2904:
+        case 336:
             return access_type_t::RW;
         case 384:
             return access_type_t::RW;
@@ -8770,12 +16357,70 @@ struct NPU_REG
             return access_type_t::RW;
         case 416:
             return access_type_t::RW;
-        case 420:
-            return access_type_t::RW;
         case 424:
             return access_type_t::RW;
         case 428:
             return access_type_t::RW;
+        case 512:
+            return access_type_t::RO;
+        case 516:
+            return access_type_t::RO;
+        case 520:
+            return access_type_t::RO;
+        case 524:
+            return access_type_t::RO;
+        case 528:
+            return access_type_t::RO;
+        case 532:
+            return access_type_t::RO;
+        case 536:
+            return access_type_t::RO;
+        case 540:
+            return access_type_t::RO;
+        case 544:
+            return access_type_t::RO;
+        case 548:
+            return access_type_t::RO;
+        case 552:
+            return access_type_t::RO;
+        case 556:
+            return access_type_t::RO;
+        case 560:
+            return access_type_t::RO;
+        case 564:
+            return access_type_t::RO;
+        case 568:
+            return access_type_t::RO;
+        case 572:
+            return access_type_t::RO;
+        case 576:
+            return access_type_t::RO;
+        case 584:
+            return access_type_t::RO;
+        case 588:
+            return access_type_t::RO;
+        case 592:
+            return access_type_t::RO;
+        case 600:
+            return access_type_t::RO;
+        case 608:
+            return access_type_t::RO;
+        case 616:
+            return access_type_t::RO;
+        case 620:
+            return access_type_t::RO;
+        case 628:
+            return access_type_t::RO;
+        case 636:
+            return access_type_t::RO;
+        case 640:
+            return access_type_t::RO;
+        case 692:
+            return access_type_t::RO;
+        case 696:
+            return access_type_t::RO;
+        case 700:
+            return access_type_t::RO;
         case 768:
             return access_type_t::RW;
         case 772:
@@ -9304,4492 +16949,8880 @@ struct NPU_REG
             return access_type_t::RW;
         case 2044:
             return access_type_t::RW;
+        case 2048:
+            return access_type_t::RW;
+        case 2052:
+            return access_type_t::RW;
+        case 2056:
+            return access_type_t::RW;
+        case 2060:
+            return access_type_t::RW;
+        case 2064:
+            return access_type_t::RW;
+        case 2068:
+            return access_type_t::RW;
+        case 2076:
+            return access_type_t::RW;
+        case 2084:
+            return access_type_t::RW;
+        case 2088:
+            return access_type_t::RW;
+        case 2092:
+            return access_type_t::RW;
+        case 2096:
+            return access_type_t::RW;
+        case 2100:
+            return access_type_t::RW;
+        case 2108:
+            return access_type_t::RW;
+        case 2116:
+            return access_type_t::RW;
+        case 2120:
+            return access_type_t::RW;
+        case 2124:
+            return access_type_t::RW;
+        case 2128:
+            return access_type_t::RW;
+        case 2132:
+            return access_type_t::RW;
+        case 2136:
+            return access_type_t::RW;
+        case 2140:
+            return access_type_t::RW;
+        case 2144:
+            return access_type_t::RW;
+        case 2152:
+            return access_type_t::RW;
+        case 2156:
+            return access_type_t::RW;
+        case 2160:
+            return access_type_t::RW;
+        case 2172:
+            return access_type_t::RW;
+        case 2176:
+            return access_type_t::RW;
+        case 2180:
+            return access_type_t::RW;
+        case 2184:
+            return access_type_t::RW;
+        case 2192:
+            return access_type_t::RW;
+        case 2196:
+            return access_type_t::RW;
+        case 2200:
+            return access_type_t::RW;
+        case 2204:
+            return access_type_t::RW;
+        case 2208:
+            return access_type_t::RW;
+        case 2212:
+            return access_type_t::RW;
+        case 2228:
+            return access_type_t::RW;
+        case 2236:
+            return access_type_t::RW;
+        case 2240:
+            return access_type_t::RW;
+        case 2244:
+            return access_type_t::RW;
+        case 2248:
+            return access_type_t::RW;
+        case 2252:
+            return access_type_t::RW;
+        case 2304:
+            return access_type_t::RW;
+        case 2308:
+            return access_type_t::RW;
+        case 2324:
+            return access_type_t::RW;
+        case 2340:
+            return access_type_t::RW;
+        case 2344:
+            return access_type_t::RW;
+        case 2348:
+            return access_type_t::RW;
+        case 2352:
+            return access_type_t::RW;
+        case 2356:
+            return access_type_t::RW;
+        case 2364:
+            return access_type_t::RW;
+        case 2560:
+            return access_type_t::RW;
+        case 2568:
+            return access_type_t::RW;
+        case 2576:
+            return access_type_t::RW;
+        case 2584:
+            return access_type_t::RW;
+        case 2592:
+            return access_type_t::RW;
+        case 2600:
+            return access_type_t::RW;
+        case 2608:
+            return access_type_t::RW;
+        case 2624:
+            return access_type_t::RW;
+        case 2632:
+            return access_type_t::RW;
+        case 2640:
+            return access_type_t::RW;
+        case 2648:
+            return access_type_t::RW;
+        case 2656:
+            return access_type_t::RW;
+        case 2664:
+            return access_type_t::RW;
+        case 2672:
+            return access_type_t::RW;
+        case 2688:
+            return access_type_t::RW;
+        case 2696:
+            return access_type_t::RW;
+        case 2704:
+            return access_type_t::RW;
+        case 2712:
+            return access_type_t::RW;
+        case 2720:
+            return access_type_t::RW;
+        case 2724:
+            return access_type_t::RW;
+        case 2728:
+            return access_type_t::RW;
+        case 2732:
+            return access_type_t::RW;
+        case 2736:
+            return access_type_t::RW;
+        case 2752:
+            return access_type_t::RW;
+        case 2760:
+            return access_type_t::RW;
+        case 2768:
+            return access_type_t::RW;
+        case 2816:
+            return access_type_t::RW;
+        case 2824:
+            return access_type_t::RW;
+        case 2832:
+            return access_type_t::RW;
+        case 2840:
+            return access_type_t::RW;
+        case 2848:
+            return access_type_t::RW;
+        case 2856:
+            return access_type_t::RW;
+        case 2864:
+            return access_type_t::RW;
+        case 2944:
+            return access_type_t::RW;
+        case 2952:
+            return access_type_t::RW;
+        case 2960:
+            return access_type_t::RW;
+        case 2968:
+            return access_type_t::RW;
+        case 2976:
+            return access_type_t::RW;
+        case 2984:
+            return access_type_t::RW;
+        case 2992:
+            return access_type_t::RW;
+        case 3000:
+            return access_type_t::RW;
+        case 4032:
+            return access_type_t::RO;
+        case 4048:
+            return access_type_t::RO;
+        case 4052:
+            return access_type_t::RO;
+        case 4056:
+            return access_type_t::RO;
+        case 4060:
+            return access_type_t::RO;
+        case 4064:
+            return access_type_t::RO;
+        case 4068:
+            return access_type_t::RO;
+        case 4072:
+            return access_type_t::RO;
+        case 4076:
+            return access_type_t::RO;
+        case 4080:
+            return access_type_t::RO;
+        case 4084:
+            return access_type_t::RO;
+        case 4088:
+            return access_type_t::RO;
+        case 4092:
+            return access_type_t::RO;
         default:
-            throw std::runtime_error("invalid register address");
+            return access_type_t::RO;
+        }
+    }
+#endif
+};
+
+#ifdef __cplusplus
+struct isa
+{
+#ifdef NPU_DISASSEMBLE
+    static int disassemble(const uint32_t *in,
+                           std::string &op,
+                           std::vector<std::pair<std::string, std::string>> &fields)
+    {
+        switch (*in & 0xffff)
+        {
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP):
+        {
+            const npu_op_stop_t &v = *reinterpret_cast<const npu_op_stop_t *>(in);
+            op                     = "NPU_OP_STOP";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ):
+        {
+            const npu_op_irq_t &v = *reinterpret_cast<const npu_op_irq_t *>(in);
+            op                    = "NPU_OP_IRQ";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_CONV):
+        {
+            const npu_op_conv_t &v = *reinterpret_cast<const npu_op_conv_t *>(in);
+            op                     = "NPU_OP_CONV";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DEPTHWISE):
+        {
+            const npu_op_depthwise_t &v = *reinterpret_cast<const npu_op_depthwise_t *>(in);
+            op                          = "NPU_OP_DEPTHWISE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL):
+        {
+            const npu_op_pool_t &v = *reinterpret_cast<const npu_op_pool_t *>(in);
+            op                     = "NPU_OP_POOL";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE):
+        {
+            const npu_op_elementwise_t &v = *reinterpret_cast<const npu_op_elementwise_t *>(in);
+            op                            = "NPU_OP_ELEMENTWISE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_START):
+        {
+            const npu_op_dma_start_t &v = *reinterpret_cast<const npu_op_dma_start_t *>(in);
+            op                          = "NPU_OP_DMA_START";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT):
+        {
+            const npu_op_dma_wait_t &v = *reinterpret_cast<const npu_op_dma_wait_t *>(in);
+            op                         = "NPU_OP_DMA_WAIT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT):
+        {
+            const npu_op_kernel_wait_t &v = *reinterpret_cast<const npu_op_kernel_wait_t *>(in);
+            op                            = "NPU_OP_KERNEL_WAIT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK):
+        {
+            const npu_op_pmu_mask_t &v = *reinterpret_cast<const npu_op_pmu_mask_t *>(in);
+            op                         = "NPU_OP_PMU_MASK";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP):
+        {
+            const npu_set_ifm_pad_top_t &v = *reinterpret_cast<const npu_set_ifm_pad_top_t *>(in);
+            op                             = "NPU_SET_IFM_PAD_TOP";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT):
+        {
+            const npu_set_ifm_pad_left_t &v = *reinterpret_cast<const npu_set_ifm_pad_left_t *>(in);
+            op                              = "NPU_SET_IFM_PAD_LEFT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT):
+        {
+            const npu_set_ifm_pad_right_t &v = *reinterpret_cast<const npu_set_ifm_pad_right_t *>(in);
+            op                               = "NPU_SET_IFM_PAD_RIGHT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM):
+        {
+            const npu_set_ifm_pad_bottom_t &v = *reinterpret_cast<const npu_set_ifm_pad_bottom_t *>(in);
+            op                                = "NPU_SET_IFM_PAD_BOTTOM";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1):
+        {
+            const npu_set_ifm_depth_m1_t &v = *reinterpret_cast<const npu_set_ifm_depth_m1_t *>(in);
+            op                              = "NPU_SET_IFM_DEPTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION):
+        {
+            const npu_set_ifm_precision_t &v = *reinterpret_cast<const npu_set_ifm_precision_t *>(in);
+            op                               = "NPU_SET_IFM_PRECISION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE):
+        {
+            const npu_set_ifm_upscale_t &v = *reinterpret_cast<const npu_set_ifm_upscale_t *>(in);
+            op                             = "NPU_SET_IFM_UPSCALE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT):
+        {
+            const npu_set_ifm_zero_point_t &v = *reinterpret_cast<const npu_set_ifm_zero_point_t *>(in);
+            op                                = "NPU_SET_IFM_ZERO_POINT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1):
+        {
+            const npu_set_ifm_width0_m1_t &v = *reinterpret_cast<const npu_set_ifm_width0_m1_t *>(in);
+            op                               = "NPU_SET_IFM_WIDTH0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1):
+        {
+            const npu_set_ifm_height0_m1_t &v = *reinterpret_cast<const npu_set_ifm_height0_m1_t *>(in);
+            op                                = "NPU_SET_IFM_HEIGHT0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1):
+        {
+            const npu_set_ifm_height1_m1_t &v = *reinterpret_cast<const npu_set_ifm_height1_m1_t *>(in);
+            op                                = "NPU_SET_IFM_HEIGHT1_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_IB_END):
+        {
+            const npu_set_ifm_ib_end_t &v = *reinterpret_cast<const npu_set_ifm_ib_end_t *>(in);
+            op                            = "NPU_SET_IFM_IB_END";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION):
+        {
+            const npu_set_ifm_region_t &v = *reinterpret_cast<const npu_set_ifm_region_t *>(in);
+            op                            = "NPU_SET_IFM_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1):
+        {
+            const npu_set_ofm_width_m1_t &v = *reinterpret_cast<const npu_set_ofm_width_m1_t *>(in);
+            op                              = "NPU_SET_OFM_WIDTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1):
+        {
+            const npu_set_ofm_height_m1_t &v = *reinterpret_cast<const npu_set_ofm_height_m1_t *>(in);
+            op                               = "NPU_SET_OFM_HEIGHT_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1):
+        {
+            const npu_set_ofm_depth_m1_t &v = *reinterpret_cast<const npu_set_ofm_depth_m1_t *>(in);
+            op                              = "NPU_SET_OFM_DEPTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION):
+        {
+            const npu_set_ofm_precision_t &v = *reinterpret_cast<const npu_set_ofm_precision_t *>(in);
+            op                               = "NPU_SET_OFM_PRECISION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1):
+        {
+            const npu_set_ofm_blk_width_m1_t &v = *reinterpret_cast<const npu_set_ofm_blk_width_m1_t *>(in);
+            op                                  = "NPU_SET_OFM_BLK_WIDTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1):
+        {
+            const npu_set_ofm_blk_height_m1_t &v = *reinterpret_cast<const npu_set_ofm_blk_height_m1_t *>(in);
+            op                                   = "NPU_SET_OFM_BLK_HEIGHT_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1):
+        {
+            const npu_set_ofm_blk_depth_m1_t &v = *reinterpret_cast<const npu_set_ofm_blk_depth_m1_t *>(in);
+            op                                  = "NPU_SET_OFM_BLK_DEPTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT):
+        {
+            const npu_set_ofm_zero_point_t &v = *reinterpret_cast<const npu_set_ofm_zero_point_t *>(in);
+            op                                = "NPU_SET_OFM_ZERO_POINT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1):
+        {
+            const npu_set_ofm_width0_m1_t &v = *reinterpret_cast<const npu_set_ofm_width0_m1_t *>(in);
+            op                               = "NPU_SET_OFM_WIDTH0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1):
+        {
+            const npu_set_ofm_height0_m1_t &v = *reinterpret_cast<const npu_set_ofm_height0_m1_t *>(in);
+            op                                = "NPU_SET_OFM_HEIGHT0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1):
+        {
+            const npu_set_ofm_height1_m1_t &v = *reinterpret_cast<const npu_set_ofm_height1_m1_t *>(in);
+            op                                = "NPU_SET_OFM_HEIGHT1_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION):
+        {
+            const npu_set_ofm_region_t &v = *reinterpret_cast<const npu_set_ofm_region_t *>(in);
+            op                            = "NPU_SET_OFM_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1):
+        {
+            const npu_set_kernel_width_m1_t &v = *reinterpret_cast<const npu_set_kernel_width_m1_t *>(in);
+            op                                 = "NPU_SET_KERNEL_WIDTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1):
+        {
+            const npu_set_kernel_height_m1_t &v = *reinterpret_cast<const npu_set_kernel_height_m1_t *>(in);
+            op                                  = "NPU_SET_KERNEL_HEIGHT_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE):
+        {
+            const npu_set_kernel_stride_t &v = *reinterpret_cast<const npu_set_kernel_stride_t *>(in);
+            op                               = "NPU_SET_KERNEL_STRIDE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT):
+        {
+            const npu_set_acc_format_t &v = *reinterpret_cast<const npu_set_acc_format_t *>(in);
+            op                            = "NPU_SET_ACC_FORMAT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION):
+        {
+            const npu_set_activation_t &v = *reinterpret_cast<const npu_set_activation_t *>(in);
+            op                            = "NPU_SET_ACTIVATION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN):
+        {
+            const npu_set_activation_min_t &v = *reinterpret_cast<const npu_set_activation_min_t *>(in);
+            op                                = "NPU_SET_ACTIVATION_MIN";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX):
+        {
+            const npu_set_activation_max_t &v = *reinterpret_cast<const npu_set_activation_max_t *>(in);
+            op                                = "NPU_SET_ACTIVATION_MAX";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION):
+        {
+            const npu_set_weight_region_t &v = *reinterpret_cast<const npu_set_weight_region_t *>(in);
+            op                               = "NPU_SET_WEIGHT_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION):
+        {
+            const npu_set_scale_region_t &v = *reinterpret_cast<const npu_set_scale_region_t *>(in);
+            op                              = "NPU_SET_SCALE_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_AB_START):
+        {
+            const npu_set_ab_start_t &v = *reinterpret_cast<const npu_set_ab_start_t *>(in);
+            op                          = "NPU_SET_AB_START";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP):
+        {
+            const npu_set_blockdep_t &v = *reinterpret_cast<const npu_set_blockdep_t *>(in);
+            op                          = "NPU_SET_BLOCKDEP";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION):
+        {
+            const npu_set_dma0_src_region_t &v = *reinterpret_cast<const npu_set_dma0_src_region_t *>(in);
+            op                                 = "NPU_SET_DMA0_SRC_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION):
+        {
+            const npu_set_dma0_dst_region_t &v = *reinterpret_cast<const npu_set_dma0_dst_region_t *>(in);
+            op                                 = "NPU_SET_DMA0_DST_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0):
+        {
+            const npu_set_dma0_size0_t &v = *reinterpret_cast<const npu_set_dma0_size0_t *>(in);
+            op                            = "NPU_SET_DMA0_SIZE0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1):
+        {
+            const npu_set_dma0_size1_t &v = *reinterpret_cast<const npu_set_dma0_size1_t *>(in);
+            op                            = "NPU_SET_DMA0_SIZE1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST):
+        {
+            const npu_set_ifm2_broadcast_t &v = *reinterpret_cast<const npu_set_ifm2_broadcast_t *>(in);
+            op                                = "NPU_SET_IFM2_BROADCAST";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_SCALAR):
+        {
+            const npu_set_ifm2_scalar_t &v = *reinterpret_cast<const npu_set_ifm2_scalar_t *>(in);
+            op                             = "NPU_SET_IFM2_SCALAR";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION):
+        {
+            const npu_set_ifm2_precision_t &v = *reinterpret_cast<const npu_set_ifm2_precision_t *>(in);
+            op                                = "NPU_SET_IFM2_PRECISION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT):
+        {
+            const npu_set_ifm2_zero_point_t &v = *reinterpret_cast<const npu_set_ifm2_zero_point_t *>(in);
+            op                                 = "NPU_SET_IFM2_ZERO_POINT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1):
+        {
+            const npu_set_ifm2_width0_m1_t &v = *reinterpret_cast<const npu_set_ifm2_width0_m1_t *>(in);
+            op                                = "NPU_SET_IFM2_WIDTH0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1):
+        {
+            const npu_set_ifm2_height0_m1_t &v = *reinterpret_cast<const npu_set_ifm2_height0_m1_t *>(in);
+            op                                 = "NPU_SET_IFM2_HEIGHT0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1):
+        {
+            const npu_set_ifm2_height1_m1_t &v = *reinterpret_cast<const npu_set_ifm2_height1_m1_t *>(in);
+            op                                 = "NPU_SET_IFM2_HEIGHT1_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_IB_START):
+        {
+            const npu_set_ifm2_ib_start_t &v = *reinterpret_cast<const npu_set_ifm2_ib_start_t *>(in);
+            op                               = "NPU_SET_IFM2_IB_START";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION):
+        {
+            const npu_set_ifm2_region_t &v = *reinterpret_cast<const npu_set_ifm2_region_t *>(in);
+            op                             = "NPU_SET_IFM2_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0):
+        {
+            const npu_set_ifm_base0_t &v = *reinterpret_cast<const npu_set_ifm_base0_t *>(in);
+            op                           = "NPU_SET_IFM_BASE0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1):
+        {
+            const npu_set_ifm_base1_t &v = *reinterpret_cast<const npu_set_ifm_base1_t *>(in);
+            op                           = "NPU_SET_IFM_BASE1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2):
+        {
+            const npu_set_ifm_base2_t &v = *reinterpret_cast<const npu_set_ifm_base2_t *>(in);
+            op                           = "NPU_SET_IFM_BASE2";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3):
+        {
+            const npu_set_ifm_base3_t &v = *reinterpret_cast<const npu_set_ifm_base3_t *>(in);
+            op                           = "NPU_SET_IFM_BASE3";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X):
+        {
+            const npu_set_ifm_stride_x_t &v = *reinterpret_cast<const npu_set_ifm_stride_x_t *>(in);
+            op                              = "NPU_SET_IFM_STRIDE_X";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y):
+        {
+            const npu_set_ifm_stride_y_t &v = *reinterpret_cast<const npu_set_ifm_stride_y_t *>(in);
+            op                              = "NPU_SET_IFM_STRIDE_Y";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C):
+        {
+            const npu_set_ifm_stride_c_t &v = *reinterpret_cast<const npu_set_ifm_stride_c_t *>(in);
+            op                              = "NPU_SET_IFM_STRIDE_C";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0):
+        {
+            const npu_set_ofm_base0_t &v = *reinterpret_cast<const npu_set_ofm_base0_t *>(in);
+            op                           = "NPU_SET_OFM_BASE0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1):
+        {
+            const npu_set_ofm_base1_t &v = *reinterpret_cast<const npu_set_ofm_base1_t *>(in);
+            op                           = "NPU_SET_OFM_BASE1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2):
+        {
+            const npu_set_ofm_base2_t &v = *reinterpret_cast<const npu_set_ofm_base2_t *>(in);
+            op                           = "NPU_SET_OFM_BASE2";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3):
+        {
+            const npu_set_ofm_base3_t &v = *reinterpret_cast<const npu_set_ofm_base3_t *>(in);
+            op                           = "NPU_SET_OFM_BASE3";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X):
+        {
+            const npu_set_ofm_stride_x_t &v = *reinterpret_cast<const npu_set_ofm_stride_x_t *>(in);
+            op                              = "NPU_SET_OFM_STRIDE_X";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y):
+        {
+            const npu_set_ofm_stride_y_t &v = *reinterpret_cast<const npu_set_ofm_stride_y_t *>(in);
+            op                              = "NPU_SET_OFM_STRIDE_Y";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C):
+        {
+            const npu_set_ofm_stride_c_t &v = *reinterpret_cast<const npu_set_ofm_stride_c_t *>(in);
+            op                              = "NPU_SET_OFM_STRIDE_C";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE):
+        {
+            const npu_set_weight_base_t &v = *reinterpret_cast<const npu_set_weight_base_t *>(in);
+            op                             = "NPU_SET_WEIGHT_BASE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH):
+        {
+            const npu_set_weight_length_t &v = *reinterpret_cast<const npu_set_weight_length_t *>(in);
+            op                               = "NPU_SET_WEIGHT_LENGTH";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE):
+        {
+            const npu_set_scale_base_t &v = *reinterpret_cast<const npu_set_scale_base_t *>(in);
+            op                            = "NPU_SET_SCALE_BASE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH):
+        {
+            const npu_set_scale_length_t &v = *reinterpret_cast<const npu_set_scale_length_t *>(in);
+            op                              = "NPU_SET_SCALE_LENGTH";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE):
+        {
+            const npu_set_ofm_scale_t &v = *reinterpret_cast<const npu_set_ofm_scale_t *>(in);
+            op                           = "NPU_SET_OFM_SCALE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPA_SCALE):
+        {
+            const npu_set_opa_scale_t &v = *reinterpret_cast<const npu_set_opa_scale_t *>(in);
+            op                           = "NPU_SET_OPA_SCALE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPB_SCALE):
+        {
+            const npu_set_opb_scale_t &v = *reinterpret_cast<const npu_set_opb_scale_t *>(in);
+            op                           = "NPU_SET_OPB_SCALE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC):
+        {
+            const npu_set_dma0_src_t &v = *reinterpret_cast<const npu_set_dma0_src_t *>(in);
+            op                          = "NPU_SET_DMA0_SRC";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST):
+        {
+            const npu_set_dma0_dst_t &v = *reinterpret_cast<const npu_set_dma0_dst_t *>(in);
+            op                          = "NPU_SET_DMA0_DST";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN):
+        {
+            const npu_set_dma0_len_t &v = *reinterpret_cast<const npu_set_dma0_len_t *>(in);
+            op                          = "NPU_SET_DMA0_LEN";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0):
+        {
+            const npu_set_ifm2_base0_t &v = *reinterpret_cast<const npu_set_ifm2_base0_t *>(in);
+            op                            = "NPU_SET_IFM2_BASE0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1):
+        {
+            const npu_set_ifm2_base1_t &v = *reinterpret_cast<const npu_set_ifm2_base1_t *>(in);
+            op                            = "NPU_SET_IFM2_BASE1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2):
+        {
+            const npu_set_ifm2_base2_t &v = *reinterpret_cast<const npu_set_ifm2_base2_t *>(in);
+            op                            = "NPU_SET_IFM2_BASE2";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3):
+        {
+            const npu_set_ifm2_base3_t &v = *reinterpret_cast<const npu_set_ifm2_base3_t *>(in);
+            op                            = "NPU_SET_IFM2_BASE3";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X):
+        {
+            const npu_set_ifm2_stride_x_t &v = *reinterpret_cast<const npu_set_ifm2_stride_x_t *>(in);
+            op                               = "NPU_SET_IFM2_STRIDE_X";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y):
+        {
+            const npu_set_ifm2_stride_y_t &v = *reinterpret_cast<const npu_set_ifm2_stride_y_t *>(in);
+            op                               = "NPU_SET_IFM2_STRIDE_Y";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C):
+        {
+            const npu_set_ifm2_stride_c_t &v = *reinterpret_cast<const npu_set_ifm2_stride_c_t *>(in);
+            op                               = "NPU_SET_IFM2_STRIDE_C";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED0):
+        {
+            const npu_set_user_defined0_t &v = *reinterpret_cast<const npu_set_user_defined0_t *>(in);
+            op                               = "NPU_SET_USER_DEFINED0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED1):
+        {
+            const npu_set_user_defined1_t &v = *reinterpret_cast<const npu_set_user_defined1_t *>(in);
+            op                               = "NPU_SET_USER_DEFINED1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED2):
+        {
+            const npu_set_user_defined2_t &v = *reinterpret_cast<const npu_set_user_defined2_t *>(in);
+            op                               = "NPU_SET_USER_DEFINED2";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED3):
+        {
+            const npu_set_user_defined3_t &v = *reinterpret_cast<const npu_set_user_defined3_t *>(in);
+            op                               = "NPU_SET_USER_DEFINED3";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED4):
+        {
+            const npu_set_user_defined4_t &v = *reinterpret_cast<const npu_set_user_defined4_t *>(in);
+            op                               = "NPU_SET_USER_DEFINED4";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED5):
+        {
+            const npu_set_user_defined5_t &v = *reinterpret_cast<const npu_set_user_defined5_t *>(in);
+            op                               = "NPU_SET_USER_DEFINED5";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED6):
+        {
+            const npu_set_user_defined6_t &v = *reinterpret_cast<const npu_set_user_defined6_t *>(in);
+            op                               = "NPU_SET_USER_DEFINED6";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED7):
+        {
+            const npu_set_user_defined7_t &v = *reinterpret_cast<const npu_set_user_defined7_t *>(in);
+            op                               = "NPU_SET_USER_DEFINED7";
+            v.disassemble(fields);
+            break;
+        }
+        }
+        return (*in & (3 << 14)) != 0 ? 2 : 1;
+    }
+#endif
+#endif
+    // Signal the end of command stream
+    struct npu_op_stop_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t mask : 16;   //  Encoding for 16-bit mask value
+#ifdef __cplusplus
+      public:
+        npu_op_stop_t(uint32_t _mask) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), mask(_mask & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_op_stop_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), mask(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_stop_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_stop_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_mask() const
+        {
+            return static_cast<uint32_t>(mask);
+        }
+        CONSTEXPR npu_op_stop_t &set_mask(uint32_t value)
+        {
+            mask = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("mask", std::to_string(mask)));
+        }
+#endif
+#endif
+    };
+    // Raises an IRQ to the host
+    struct npu_op_irq_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t mask : 16;   //  Encoding for 16-bit mask value
+#ifdef __cplusplus
+      public:
+        npu_op_irq_t(uint32_t _mask) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), mask(_mask & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_op_irq_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), mask(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_irq_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_irq_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_mask() const
+        {
+            return static_cast<uint32_t>(mask);
+        }
+        CONSTEXPR npu_op_irq_t &set_mask(uint32_t value)
+        {
+            mask = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("mask", std::to_string(mask)));
+        }
+#endif
+#endif
+    };
+    // 2D convolution
+    struct npu_op_conv_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+#ifdef __cplusplus
+      public:
+        CONSTEXPR npu_op_conv_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_CONV)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_CONV) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_CONV);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_conv_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_conv_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const {}
+#endif
+#endif
+    };
+    // Depth-wise 2D convolution
+    struct npu_op_depthwise_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+#ifdef __cplusplus
+      public:
+        CONSTEXPR npu_op_depthwise_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DEPTHWISE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DEPTHWISE) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DEPTHWISE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_depthwise_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_depthwise_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const {}
+#endif
+#endif
+    };
+    // Pooling
+    struct npu_op_pool_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;      //  control
+        uint32_t pooling_mode : 3; //  Pooling mode
+        uint32_t reserved1 : 13;
+#ifdef __cplusplus
+      public:
+        npu_op_pool_t(NPU_NAMESPACE::pooling_mode _pooling_mode) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            pooling_mode(static_cast<uint8_t>(_pooling_mode) & ((1U << 3) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_op_pool_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pooling_mode(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_pool_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_pool_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::pooling_mode get_pooling_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::pooling_mode>(pooling_mode);
+        }
+        CONSTEXPR npu_op_pool_t &set_pooling_mode(NPU_NAMESPACE::pooling_mode value)
+        {
+            pooling_mode = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "pooling_mode",
+                (pooling_mode < (sizeof(pooling_mode_str) / sizeof(pooling_mode_str[0])) ?
+                     pooling_mode_str[pooling_mode] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // Elementwise operation
+    struct npu_op_elementwise_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;          //  control
+        uint32_t elementwise_mode : 6; //  Elementwise mode
+        uint32_t reserved1 : 10;
+#ifdef __cplusplus
+      public:
+        npu_op_elementwise_t(NPU_NAMESPACE::elementwise_mode _elementwise_mode) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            elementwise_mode(static_cast<uint8_t>(_elementwise_mode) & ((1U << 6) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_op_elementwise_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), elementwise_mode(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_elementwise_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_elementwise_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::elementwise_mode get_elementwise_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::elementwise_mode>(elementwise_mode);
+        }
+        CONSTEXPR npu_op_elementwise_t &set_elementwise_mode(NPU_NAMESPACE::elementwise_mode value)
+        {
+            elementwise_mode = static_cast<uint8_t>(value) & ((1U << 6) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "elementwise_mode",
+                (elementwise_mode < (sizeof(elementwise_mode_str) / sizeof(elementwise_mode_str[0])) ?
+                     elementwise_mode_str[elementwise_mode] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // Queue new DMA for the given channel
+    struct npu_op_dma_start_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+#ifdef __cplusplus
+      public:
+        CONSTEXPR npu_op_dma_start_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_START)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_START) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_START);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_dma_start_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_dma_start_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const {}
+#endif
+#endif
+    };
+    // Wait for the DMA channel to have k or fewer active descriptors outstanding
+    struct npu_op_dma_wait_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t k : 4;       //  Number of outstanding descriptors
+        uint32_t reserved1 : 12;
+#ifdef __cplusplus
+      public:
+        npu_op_dma_wait_t(uint32_t _k) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), k(_k & ((1U << 4) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_op_dma_wait_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), k(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_dma_wait_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_dma_wait_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_k() const
+        {
+            return static_cast<uint32_t>(k);
+        }
+        CONSTEXPR npu_op_dma_wait_t &set_k(uint32_t value)
+        {
+            k = static_cast<uint8_t>(value) & ((1U << 4) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("k", std::to_string(k)));
+        }
+#endif
+#endif
+    };
+    // Wait for n or fewer kernel operations to be remaining
+    struct npu_op_kernel_wait_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t n : 2;       //  Number of kernel operations in range 0-3
+        uint32_t reserved1 : 14;
+#ifdef __cplusplus
+      public:
+        npu_op_kernel_wait_t(uint32_t _n) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), n(_n & ((1U << 2) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_op_kernel_wait_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), n(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_kernel_wait_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_kernel_wait_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_n() const
+        {
+            return static_cast<uint32_t>(n);
+        }
+        CONSTEXPR npu_op_kernel_wait_t &set_n(uint32_t value)
+        {
+            n = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("n", std::to_string(n)));
+        }
+#endif
+#endif
+    };
+    // Enable or disable PMU counting (debug feature only)
+    struct npu_op_pmu_mask_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t enable : 1;  //  Enable or disable PMU mask
+        uint32_t reserved1 : 15;
+#ifdef __cplusplus
+      public:
+        npu_op_pmu_mask_t(uint32_t _enable) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), enable(_enable & ((1U << 1) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_op_pmu_mask_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), enable(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_pmu_mask_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_pmu_mask_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_enable() const
+        {
+            return static_cast<uint32_t>(enable);
+        }
+        CONSTEXPR npu_op_pmu_mask_t &set_enable(uint32_t value)
+        {
+            enable = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
         }
-    }
-#endif //__cplusplus
-};
-
-// Data structure for commands without payload
-struct command_no_payload_t
-{
-    uint32_t cmd_code : 10;
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
-#ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR command_no_payload_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR command_no_payload_t &set_param(uint32_t value)
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("enable", std::to_string(enable)));
+        }
+#endif
+#endif
+    };
+    // IFM top pad
+    struct npu_set_ifm_pad_top_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Data structure for commands with payload
-struct command_with_payload_t
-{
-    uint32_t cmd_code : 10;
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t param : 16;
-    uint32_t data : 32;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return must_be_zero == 0 && payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR command_with_payload_t &set_cmd_code(::cmd1 value)
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t pad : 7;     //  IFM top pad
+        uint32_t reserved1 : 9;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_pad_top_t(uint32_t _pad) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(_pad & ((1U << 7) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm_pad_top_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_pad_top_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_pad_top_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_pad() const
+        {
+            return static_cast<uint32_t>(pad);
+        }
+        CONSTEXPR npu_set_ifm_pad_top_t &set_pad(uint32_t value)
+        {
+            pad = static_cast<uint8_t>(value) & ((1U << 7) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("pad", std::to_string(pad)));
+        }
+#endif
+#endif
+    };
+    // IFM left pad
+    struct npu_set_ifm_pad_left_t
     {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t pad : 7;     //  IFM left pad
+        uint32_t reserved1 : 9;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_pad_left_t(uint32_t _pad) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(_pad & ((1U << 7) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm_pad_left_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_pad_left_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_pad_left_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_pad() const
+        {
+            return static_cast<uint32_t>(pad);
+        }
+        CONSTEXPR npu_set_ifm_pad_left_t &set_pad(uint32_t value)
+        {
+            pad = static_cast<uint8_t>(value) & ((1U << 7) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("pad", std::to_string(pad)));
+        }
+#endif
+#endif
+    };
+    // IFM right pad
+    struct npu_set_ifm_pad_right_t
     {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR command_with_payload_t &set_data(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t pad : 8;     //  IFM right pad. Max value is 128
+        uint32_t reserved1 : 8;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_pad_right_t(uint32_t _pad) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(_pad & ((1U << 8) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm_pad_right_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_pad_right_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_pad_right_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_pad() const
+        {
+            return static_cast<uint32_t>(pad);
+        }
+        CONSTEXPR npu_set_ifm_pad_right_t &set_pad(uint32_t value)
+        {
+            pad = static_cast<uint8_t>(value) & ((1U << 8) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("pad", std::to_string(pad)));
+        }
+#endif
+#endif
+    };
+    // IFM bottom pad
+    struct npu_set_ifm_pad_bottom_t
     {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t pad : 8;     //  IFM bottom pad. Max value is 128
+        uint32_t reserved1 : 8;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_pad_bottom_t(uint32_t _pad) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(_pad & ((1U << 8) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm_pad_bottom_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_pad_bottom_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_pad_bottom_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_pad() const
+        {
+            return static_cast<uint32_t>(pad);
+        }
+        CONSTEXPR npu_set_ifm_pad_bottom_t &set_pad(uint32_t value)
+        {
+            pad = static_cast<uint8_t>(value) & ((1U << 8) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("pad", std::to_string(pad)));
+        }
+#endif
+#endif
+    };
+    // Number of input channels for convolution
+    struct npu_set_ifm_depth_m1_t
     {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR command_with_payload_t &set_param(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t depth_m1 : 16; //  Number of input channels for convolution
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_depth_m1_t(uint32_t _depth_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), depth_m1(_depth_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm_depth_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), depth_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_depth_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_depth_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_depth_m1() const
+        {
+            return static_cast<uint32_t>(depth_m1);
+        }
+        CONSTEXPR npu_set_ifm_depth_m1_t &set_depth_m1(uint32_t value)
+        {
+            depth_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("depth_m1", std::to_string(depth_m1)));
+        }
+#endif
+#endif
+    };
+    // IFM Precision
+    struct npu_set_ifm_precision_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;         //  control
+        uint32_t activation_type : 1; //  IFM type
+        uint32_t reserved1 : 1;
+        uint32_t activation_precision : 2; //  IFM precision
+        uint32_t reserved2 : 2;
+        uint32_t activation_format : 2; //  IFM format
+        uint32_t scale_mode : 2;        //  IFM scale mode
+        uint32_t reserved3 : 4;
+        uint32_t round_mode : 2; //  IFM round mode
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_precision_t(NPU_NAMESPACE::activation_type _activation_type,
+                                NPU_NAMESPACE::activation_precision _activation_precision,
+                                NPU_NAMESPACE::activation_format _activation_format,
+                                NPU_NAMESPACE::ifm_scale_mode _scale_mode,
+                                NPU_NAMESPACE::round_mode _round_mode) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            activation_type(static_cast<uint8_t>(_activation_type) & ((1U << 1) - 1)), reserved1(0),
+            activation_precision(static_cast<uint8_t>(_activation_precision) & ((1U << 2) - 1)), reserved2(0),
+            activation_format(static_cast<uint8_t>(_activation_format) & ((1U << 2) - 1)),
+            scale_mode(static_cast<uint8_t>(_scale_mode) & ((1U << 2) - 1)), reserved3(0),
+            round_mode(static_cast<uint8_t>(_round_mode) & ((1U << 2) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm_precision_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), activation_type(0), reserved1(0),
+            activation_precision(0), reserved2(0), activation_format(0), scale_mode(0), reserved3(0), round_mode(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_precision_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_precision_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_type get_activation_type() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_type>(activation_type);
+        }
+        CONSTEXPR npu_set_ifm_precision_t &set_activation_type(NPU_NAMESPACE::activation_type value)
+        {
+            activation_type = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_precision get_activation_precision() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_precision>(activation_precision);
+        }
+        CONSTEXPR npu_set_ifm_precision_t &set_activation_precision(NPU_NAMESPACE::activation_precision value)
+        {
+            activation_precision = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_format get_activation_format() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_format>(activation_format);
+        }
+        CONSTEXPR npu_set_ifm_precision_t &set_activation_format(NPU_NAMESPACE::activation_format value)
+        {
+            activation_format = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::ifm_scale_mode get_scale_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::ifm_scale_mode>(scale_mode);
+        }
+        CONSTEXPR npu_set_ifm_precision_t &set_scale_mode(NPU_NAMESPACE::ifm_scale_mode value)
+        {
+            scale_mode = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::round_mode get_round_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::round_mode>(round_mode);
+        }
+        CONSTEXPR npu_set_ifm_precision_t &set_round_mode(NPU_NAMESPACE::round_mode value)
+        {
+            round_mode = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_type",
+                (activation_type < (sizeof(activation_type_str) / sizeof(activation_type_str[0])) ?
+                     activation_type_str[activation_type] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_precision",
+                (activation_precision < (sizeof(activation_precision_str) / sizeof(activation_precision_str[0])) ?
+                     activation_precision_str[activation_precision] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_format",
+                (activation_format < (sizeof(activation_format_str) / sizeof(activation_format_str[0])) ?
+                     activation_format_str[activation_format] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "scale_mode",
+                (scale_mode < (sizeof(ifm_scale_mode_str) / sizeof(ifm_scale_mode_str[0])) ?
+                     ifm_scale_mode_str[scale_mode] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "round_mode",
+                (round_mode < (sizeof(round_mode_str) / sizeof(round_mode_str[0])) ? round_mode_str[round_mode] :
+                                                                                     "****")));
+        }
+#endif
+#endif
+    };
+    // IFM upscale mode
+    struct npu_set_ifm_upscale_t
     {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR command_with_payload_t &set_payload_size(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t mode : 2;    //  IFM upscale mode
+        uint32_t reserved1 : 14;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_upscale_t(NPU_NAMESPACE::ifm_upscale_mode _mode) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            mode(static_cast<uint8_t>(_mode) & ((1U << 2) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm_upscale_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), mode(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_upscale_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_upscale_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::ifm_upscale_mode get_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::ifm_upscale_mode>(mode);
+        }
+        CONSTEXPR npu_set_ifm_upscale_t &set_mode(NPU_NAMESPACE::ifm_upscale_mode value)
+        {
+            mode = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "mode",
+                (mode < (sizeof(ifm_upscale_mode_str) / sizeof(ifm_upscale_mode_str[0])) ? ifm_upscale_mode_str[mode] :
+                                                                                           "****")));
+        }
+#endif
+#endif
+    };
+    // IFM zero point
+    struct npu_set_ifm_zero_point_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Move to stopped state once all commands to this point are done. Raise IRQ to the host and logically OR the mask into
-// the status register upper 16 bits (see the status register)
-struct npu_op_stop_t
-{
-    uint32_t cmd_code : 10;     // NPU_OP_STOP
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t mask : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_OP_STOP) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_OP_STOP);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_op_stop_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_mask() const
-    {
-        return static_cast<uint32_t>(mask);
-    }
-    CONSTEXPR npu_op_stop_t &set_mask(uint32_t value)
-    {
-        mask = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Raise IRQ to the host and logically OR the mask into the status register upper 16 bits (see the status register)
-struct npu_op_irq_t
-{
-    uint32_t cmd_code : 10;     // NPU_OP_IRQ
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t mask : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;     //  control
+        uint32_t zero_point : 16; //  Zero point offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_OP_IRQ) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_OP_IRQ);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_op_irq_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_mask() const
-    {
-        return static_cast<uint32_t>(mask);
-    }
-    CONSTEXPR npu_op_irq_t &set_mask(uint32_t value)
+      public:
+        npu_set_ifm_zero_point_t(uint32_t _zero_point) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            zero_point(_zero_point & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm_zero_point_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), zero_point(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_zero_point_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_zero_point_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_zero_point() const
+        {
+            return static_cast<uint32_t>(zero_point);
+        }
+        CONSTEXPR npu_set_ifm_zero_point_t &set_zero_point(uint32_t value)
+        {
+            zero_point = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("zero_point", std::to_string(zero_point)));
+        }
+#endif
+#endif
+    };
+    // IFM Tile 0 and tile 2 width
+    struct npu_set_ifm_width0_m1_t
     {
-        mask = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Start stripe with full convolution or deconvolution
-struct npu_op_conv_t
-{
-    uint32_t cmd_code : 10;     // NPU_OP_CONV
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t reserved0 : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_OP_CONV) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_OP_CONV);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_op_conv_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Start stripe width depth-wise convolution or deconvolution operation
-struct npu_op_depthwise_t
-{
-    uint32_t cmd_code : 10;     // NPU_OP_DEPTHWISE
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t reserved0 : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t width_m1 : 16; //  IFM Tile 0 and tile 2 width
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_OP_DEPTHWISE) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_OP_DEPTHWISE);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_op_depthwise_t &set_cmd_code(::cmd0 value)
+      public:
+        npu_set_ifm_width0_m1_t(uint32_t _width_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(_width_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm_width0_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_width0_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_width0_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_width_m1() const
+        {
+            return static_cast<uint32_t>(width_m1);
+        }
+        CONSTEXPR npu_set_ifm_width0_m1_t &set_width_m1(uint32_t value)
+        {
+            width_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+        }
+#endif
+#endif
+    };
+    // IFM Tile 0 height
+    struct npu_set_ifm_height0_m1_t
     {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Start stripe with pooling operation
-struct npu_op_pool_t
-{
-    uint32_t cmd_code : 10;     // NPU_OP_POOL
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t mode : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_OP_POOL) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_OP_POOL);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  IFM Tile 0 height
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_height0_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm_height0_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_height0_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_height0_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ifm_height0_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // IFM Tile 1 height
+    struct npu_set_ifm_height1_m1_t
     {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_op_pool_t &set_cmd_code(::cmd0 value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  IFM Tile 1 height
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_height1_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm_height1_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_height1_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_height1_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ifm_height1_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // End of IB0,IB1 buffers
+    struct npu_set_ifm_ib_end_t
     {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::pooling_mode get_mode() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t ib_end : 6;  //  End of IB0,IB1 buffers in the SHRAM in KB units. Multiple of 2
+        uint32_t reserved1 : 10;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_ib_end_t(uint32_t _ib_end) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_IB_END)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), ib_end(_ib_end & ((1U << 6) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm_ib_end_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_IB_END)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), ib_end(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_IB_END) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_IB_END);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_ib_end_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_ib_end_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_ib_end() const
+        {
+            return static_cast<uint32_t>(ib_end);
+        }
+        CONSTEXPR npu_set_ifm_ib_end_t &set_ib_end(uint32_t value)
+        {
+            ib_end = static_cast<uint8_t>(value) & ((1U << 6) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("ib_end", std::to_string(ib_end)));
+        }
+#endif
+#endif
+    };
+    // Index n for IFM access
+    struct npu_set_ifm_region_t
     {
-        return static_cast<::pooling_mode>(mode);
-    }
-    CONSTEXPR npu_op_pool_t &set_mode(::pooling_mode value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t region : 3;  //  Region number n
+        uint32_t reserved1 : 12;
+        uint32_t custom_dma_cs : 1; //  Custom DMA select
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_region_t(uint32_t _region, NPU_NAMESPACE::custom_dma_cs _custom_dma_cs) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(_region & ((1U << 3) - 1)),
+            reserved1(0), custom_dma_cs(static_cast<uint8_t>(_custom_dma_cs) & ((1U << 1) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm_region_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(0), reserved1(0), custom_dma_cs(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_region_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_region_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_region() const
+        {
+            return static_cast<uint32_t>(region);
+        }
+        CONSTEXPR npu_set_ifm_region_t &set_region(uint32_t value)
+        {
+            region = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::custom_dma_cs get_custom_dma_cs() const
+        {
+            return static_cast<NPU_NAMESPACE::custom_dma_cs>(custom_dma_cs);
+        }
+        CONSTEXPR npu_set_ifm_region_t &set_custom_dma_cs(NPU_NAMESPACE::custom_dma_cs value)
+        {
+            custom_dma_cs = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "custom_dma_cs",
+                (custom_dma_cs < (sizeof(custom_dma_cs_str) / sizeof(custom_dma_cs_str[0])) ?
+                     custom_dma_cs_str[custom_dma_cs] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // Output feature map width
+    struct npu_set_ofm_width_m1_t
     {
-        mode = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Start stripe with pointwise operation
-struct npu_op_elementwise_t
-{
-    uint32_t cmd_code : 10;     // NPU_OP_ELEMENTWISE
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t mode : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_OP_ELEMENTWISE) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_OP_ELEMENTWISE);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_op_elementwise_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::elementwise_mode get_mode() const
-    {
-        return static_cast<::elementwise_mode>(mode);
-    }
-    CONSTEXPR npu_op_elementwise_t &set_mode(::elementwise_mode value)
-    {
-        mode = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Queue new DMA for the given channel with the given mode. Mode bit 0 specifies the source address type 0=external,
-// 1=internal Mode bit 1 specifies the destination address type 0=external, 1=internal In Ethos-U55 there is only one
-// user channel so channel=0. If the channel is fully in use then the command blocks until a new DMA can start
-struct npu_op_dma_start_t
-{
-    uint32_t cmd_code : 10;     // NPU_OP_DMA_START
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t channel_mode : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t width_m1 : 16; //  Output feature map width
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_OP_DMA_START) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_OP_DMA_START);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR uint32_t get_channel_mode() const
-    {
-        return static_cast<uint32_t>(channel_mode);
-    }
-    CONSTEXPR npu_op_dma_start_t &set_channel_mode(uint32_t value)
-    {
-        channel_mode = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_op_dma_start_t &set_cmd_code(::cmd0 value)
+      public:
+        npu_set_ofm_width_m1_t(uint32_t _width_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(_width_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_width_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_width_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_width_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_width_m1() const
+        {
+            return static_cast<uint32_t>(width_m1);
+        }
+        CONSTEXPR npu_set_ofm_width_m1_t &set_width_m1(uint32_t value)
+        {
+            width_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+        }
+#endif
+#endif
+    };
+    // Output feature map height
+    struct npu_set_ofm_height_m1_t
     {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Wait for the DMA channel to have k or fewer active descriptors outstanding. In Ethos-U55 there is only one user
-// channel so channel=0. In Ethos-U55 there is only one descriptor per channel so k=0 and the command waits for the
-// single DMA to be complete.
-struct npu_op_dma_wait_t
-{
-    uint32_t cmd_code : 10;     // NPU_OP_DMA_WAIT
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t reserved0 : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_OP_DMA_WAIT) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_OP_DMA_WAIT);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_op_dma_wait_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Wait for n or fewer kernel operations to be remaining (not complete) before starting the next command. A kernel
-// operation is Conv, Depthwise, Pool, VectorProd Elementwise. This command is typically placed before an
-// NPU_OP_DMA_START command to prevent the DMA from starting until a previous kernel operation reading the memory has
-// completed.
-struct npu_op_kernel_wait_t
-{
-    uint32_t cmd_code : 10;     // NPU_OP_KERNEL_WAIT
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  Output feature map height
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_OP_KERNEL_WAIT) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_OP_KERNEL_WAIT);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_op_kernel_wait_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_op_kernel_wait_t &set_param(uint32_t value)
+      public:
+        npu_set_ofm_height_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_height_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_height_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_height_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ofm_height_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // Output feature map depth
+    struct npu_set_ofm_depth_m1_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Enable or disable PMU counting (debug feature only).
-struct npu_op_pmu_mask_t
-{
-    uint32_t cmd_code : 10;     // NPU_OP_PMU_MASK
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_OP_PMU_MASK) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_OP_PMU_MASK);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_op_pmu_mask_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_op_pmu_mask_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// IFM top pad
-struct npu_set_ifm_pad_top_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM_PAD_TOP
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t depth_m1 : 16; //  Output feature map depth
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM_PAD_TOP) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM_PAD_TOP);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_pad_top_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm_pad_top_t &set_param(uint32_t value)
+      public:
+        npu_set_ofm_depth_m1_t(uint32_t _depth_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), depth_m1(_depth_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_depth_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), depth_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_depth_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_depth_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_depth_m1() const
+        {
+            return static_cast<uint32_t>(depth_m1);
+        }
+        CONSTEXPR npu_set_ofm_depth_m1_t &set_depth_m1(uint32_t value)
+        {
+            depth_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("depth_m1", std::to_string(depth_m1)));
+        }
+#endif
+#endif
+    };
+    // OFM Precision
+    struct npu_set_ofm_precision_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// IFM left pad
-struct npu_set_ifm_pad_left_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM_PAD_LEFT
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM_PAD_LEFT) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM_PAD_LEFT);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_pad_left_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm_pad_left_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// IFM right pad
-struct npu_set_ifm_pad_right_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM_PAD_RIGHT
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;              //  control
+        uint32_t activation_type : 1;      //  OFM type
+        uint32_t activation_precision : 2; //  OFM precision
+        uint32_t reserved1 : 3;
+        uint32_t activation_format : 2; //  OFM format
+        uint32_t scale_mode : 1;        //  OFM scale mode
+        uint32_t reserved2 : 5;
+        uint32_t round_mode : 2; //  OFM round mode
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM_PAD_RIGHT) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM_PAD_RIGHT);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_pad_right_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm_pad_right_t &set_param(uint32_t value)
+      public:
+        npu_set_ofm_precision_t(NPU_NAMESPACE::activation_type _activation_type,
+                                NPU_NAMESPACE::activation_precision _activation_precision,
+                                NPU_NAMESPACE::activation_format _activation_format,
+                                NPU_NAMESPACE::ofm_scale_mode _scale_mode,
+                                NPU_NAMESPACE::round_mode _round_mode) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            activation_type(static_cast<uint8_t>(_activation_type) & ((1U << 1) - 1)),
+            activation_precision(static_cast<uint8_t>(_activation_precision) & ((1U << 2) - 1)), reserved1(0),
+            activation_format(static_cast<uint8_t>(_activation_format) & ((1U << 2) - 1)),
+            scale_mode(static_cast<uint8_t>(_scale_mode) & ((1U << 1) - 1)), reserved2(0),
+            round_mode(static_cast<uint8_t>(_round_mode) & ((1U << 2) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_precision_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), activation_type(0),
+            activation_precision(0), reserved1(0), activation_format(0), scale_mode(0), reserved2(0), round_mode(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_precision_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_precision_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_type get_activation_type() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_type>(activation_type);
+        }
+        CONSTEXPR npu_set_ofm_precision_t &set_activation_type(NPU_NAMESPACE::activation_type value)
+        {
+            activation_type = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_precision get_activation_precision() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_precision>(activation_precision);
+        }
+        CONSTEXPR npu_set_ofm_precision_t &set_activation_precision(NPU_NAMESPACE::activation_precision value)
+        {
+            activation_precision = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_format get_activation_format() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_format>(activation_format);
+        }
+        CONSTEXPR npu_set_ofm_precision_t &set_activation_format(NPU_NAMESPACE::activation_format value)
+        {
+            activation_format = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::ofm_scale_mode get_scale_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::ofm_scale_mode>(scale_mode);
+        }
+        CONSTEXPR npu_set_ofm_precision_t &set_scale_mode(NPU_NAMESPACE::ofm_scale_mode value)
+        {
+            scale_mode = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::round_mode get_round_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::round_mode>(round_mode);
+        }
+        CONSTEXPR npu_set_ofm_precision_t &set_round_mode(NPU_NAMESPACE::round_mode value)
+        {
+            round_mode = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_type",
+                (activation_type < (sizeof(activation_type_str) / sizeof(activation_type_str[0])) ?
+                     activation_type_str[activation_type] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_precision",
+                (activation_precision < (sizeof(activation_precision_str) / sizeof(activation_precision_str[0])) ?
+                     activation_precision_str[activation_precision] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_format",
+                (activation_format < (sizeof(activation_format_str) / sizeof(activation_format_str[0])) ?
+                     activation_format_str[activation_format] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "scale_mode",
+                (scale_mode < (sizeof(ofm_scale_mode_str) / sizeof(ofm_scale_mode_str[0])) ?
+                     ofm_scale_mode_str[scale_mode] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "round_mode",
+                (round_mode < (sizeof(round_mode_str) / sizeof(round_mode_str[0])) ? round_mode_str[round_mode] :
+                                                                                     "****")));
+        }
+#endif
+#endif
+    };
+    // OFM block width
+    struct npu_set_ofm_blk_width_m1_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// IFM bottom pad
-struct npu_set_ifm_pad_bottom_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM_PAD_BOTTOM
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM_PAD_BOTTOM) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM_PAD_BOTTOM);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_pad_bottom_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm_pad_bottom_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Number of input channels - 1
-struct npu_set_ifm_depth_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM_DEPTH_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;  //  control
+        uint32_t width_m1 : 6; //  OFM block width
+        uint32_t reserved1 : 10;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM_DEPTH_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM_DEPTH_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_depth_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm_depth_m1_t &set_param(uint32_t value)
+      public:
+        npu_set_ofm_blk_width_m1_t(uint32_t _width_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(_width_m1 & ((1U << 6) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ofm_blk_width_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_blk_width_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_blk_width_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_width_m1() const
+        {
+            return static_cast<uint32_t>(width_m1);
+        }
+        CONSTEXPR npu_set_ofm_blk_width_m1_t &set_width_m1(uint32_t value)
+        {
+            width_m1 = static_cast<uint8_t>(value) & ((1U << 6) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+        }
+#endif
+#endif
+    };
+    // OFM block height
+    struct npu_set_ofm_blk_height_m1_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM precision
-struct npu_set_ifm_precision_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM_PRECISION
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t precision : 4;
-    uint32_t reserved0 : 2;
-    uint32_t format : 2;
-    uint32_t scale_mode : 2;
-    uint32_t reserved1 : 4;
-    uint32_t round_mode : 2;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM_PRECISION) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM_PRECISION);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_precision_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::data_format get_format() const
-    {
-        return static_cast<::data_format>(format);
-    }
-    CONSTEXPR npu_set_ifm_precision_t &set_format(::data_format value)
-    {
-        format = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::ifm_precision get_precision() const
-    {
-        return static_cast<::ifm_precision>(precision);
-    }
-    CONSTEXPR npu_set_ifm_precision_t &set_precision(::ifm_precision value)
-    {
-        precision = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::rounding get_round_mode() const
-    {
-        return static_cast<::rounding>(round_mode);
-    }
-    CONSTEXPR npu_set_ifm_precision_t &set_round_mode(::rounding value)
-    {
-        round_mode = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::ifm_scale_mode get_scale_mode() const
-    {
-        return static_cast<::ifm_scale_mode>(scale_mode);
-    }
-    CONSTEXPR npu_set_ifm_precision_t &set_scale_mode(::ifm_scale_mode value)
-    {
-        scale_mode = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// b[1:0] = upscale mode (0=none, 1=2x2 nearest, 2=2x2 transpose)
-struct npu_set_ifm_upscale_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM_UPSCALE
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t mode : 2;
-    uint32_t reserved0 : 14;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t height_m1 : 5; //  OFM block height
+        uint32_t reserved1 : 11;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM_UPSCALE) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM_UPSCALE);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_upscale_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::resampling_mode get_mode() const
-    {
-        return static_cast<::resampling_mode>(mode);
-    }
-    CONSTEXPR npu_set_ifm_upscale_t &set_mode(::resampling_mode value)
+      public:
+        npu_set_ofm_blk_height_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 5) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ofm_blk_height_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_blk_height_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_blk_height_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ofm_blk_height_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint8_t>(value) & ((1U << 5) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // OFM block depth
+    struct npu_set_ofm_blk_depth_m1_t
     {
-        mode = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Zero point offset (so value that 0 is encoded as)
-struct npu_set_ifm_zero_point_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM_ZERO_POINT
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM_ZERO_POINT) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM_ZERO_POINT);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_zero_point_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm_zero_point_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// IFM Tile 0 and tile 2 (width-1)
-struct npu_set_ifm_width0_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM_WIDTH0_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;  //  control
+        uint32_t depth_m1 : 7; //  OFM block depth
+        uint32_t reserved1 : 9;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM_WIDTH0_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM_WIDTH0_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_width0_m1_t &set_cmd_code(::cmd0 value)
+      public:
+        npu_set_ofm_blk_depth_m1_t(uint32_t _depth_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), depth_m1(_depth_m1 & ((1U << 7) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ofm_blk_depth_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), depth_m1(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_blk_depth_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_blk_depth_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_depth_m1() const
+        {
+            return static_cast<uint32_t>(depth_m1);
+        }
+        CONSTEXPR npu_set_ofm_blk_depth_m1_t &set_depth_m1(uint32_t value)
+        {
+            depth_m1 = static_cast<uint8_t>(value) & ((1U << 7) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("depth_m1", std::to_string(depth_m1)));
+        }
+#endif
+#endif
+    };
+    // OFM zero point
+    struct npu_set_ofm_zero_point_t
     {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;     //  control
+        uint32_t zero_point : 16; //  Zero point offset
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_zero_point_t(uint32_t _zero_point) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            zero_point(_zero_point & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_zero_point_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), zero_point(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_zero_point_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_zero_point_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_zero_point() const
+        {
+            return static_cast<uint32_t>(zero_point);
+        }
+        CONSTEXPR npu_set_ofm_zero_point_t &set_zero_point(uint32_t value)
+        {
+            zero_point = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("zero_point", std::to_string(zero_point)));
+        }
+#endif
+#endif
+    };
+    // OFM Tile 0 and tile 2 width
+    struct npu_set_ofm_width0_m1_t
     {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm_width0_m1_t &set_param(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t width_m1 : 16; //  OFM Tile 0 and tile 2 width
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_width0_m1_t(uint32_t _width_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(_width_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_width0_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_width0_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_width0_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_width_m1() const
+        {
+            return static_cast<uint32_t>(width_m1);
+        }
+        CONSTEXPR npu_set_ofm_width0_m1_t &set_width_m1(uint32_t value)
+        {
+            width_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+        }
+#endif
+#endif
+    };
+    // OFM Tile 0 height
+    struct npu_set_ofm_height0_m1_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// IFM Tile 0 (height-1)
-struct npu_set_ifm_height0_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM_HEIGHT0_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM_HEIGHT0_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM_HEIGHT0_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_height0_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm_height0_m1_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// IFM Tile 1 (height-1)
-struct npu_set_ifm_height1_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM_HEIGHT1_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  OFM Tile 0 height
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM_HEIGHT1_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM_HEIGHT1_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_height1_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm_height1_m1_t &set_param(uint32_t value)
+      public:
+        npu_set_ofm_height0_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_height0_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_height0_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_height0_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ofm_height0_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // OFM Tile 1 height
+    struct npu_set_ofm_height1_m1_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// End of IB0,IB1 buffers in the SHRAM in KB units. Multiple of 2.
-struct npu_set_ifm_ib_end_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM_IB_END
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM_IB_END) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM_IB_END);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_ib_end_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm_ib_end_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Index n for IFM access: BasePointer[n] is added to all IFM offsets
-struct npu_set_ifm_region_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM_REGION
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  OFM Tile 1 height
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM_REGION) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM_REGION);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_region_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm_region_t &set_param(uint32_t value)
+      public:
+        npu_set_ofm_height1_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_height1_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_height1_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_height1_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ofm_height1_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // Index n for OFM access
+    struct npu_set_ofm_region_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Output feature map width -1 (for the stripe to process)
-struct npu_set_ofm_width_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_OFM_WIDTH_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_OFM_WIDTH_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_OFM_WIDTH_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_width_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ofm_width_m1_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Output feature map height -1 (for the stripe to process)
-struct npu_set_ofm_height_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_OFM_HEIGHT_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t region : 3;  //  Index n for OFM access
+        uint32_t reserved1 : 12;
+        uint32_t custom_dma_cs : 1; //  Custom DMA select
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_OFM_HEIGHT_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_OFM_HEIGHT_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_height_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ofm_height_m1_t &set_param(uint32_t value)
+      public:
+        npu_set_ofm_region_t(uint32_t _region, NPU_NAMESPACE::custom_dma_cs _custom_dma_cs) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(_region & ((1U << 3) - 1)),
+            reserved1(0), custom_dma_cs(static_cast<uint8_t>(_custom_dma_cs) & ((1U << 1) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_region_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(0), reserved1(0), custom_dma_cs(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_region_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_region_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_region() const
+        {
+            return static_cast<uint32_t>(region);
+        }
+        CONSTEXPR npu_set_ofm_region_t &set_region(uint32_t value)
+        {
+            region = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::custom_dma_cs get_custom_dma_cs() const
+        {
+            return static_cast<NPU_NAMESPACE::custom_dma_cs>(custom_dma_cs);
+        }
+        CONSTEXPR npu_set_ofm_region_t &set_custom_dma_cs(NPU_NAMESPACE::custom_dma_cs value)
+        {
+            custom_dma_cs = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "custom_dma_cs",
+                (custom_dma_cs < (sizeof(custom_dma_cs_str) / sizeof(custom_dma_cs_str[0])) ?
+                     custom_dma_cs_str[custom_dma_cs] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // Kernel width
+    struct npu_set_kernel_width_m1_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Output feature map depth -1 (for the stripe to process)
-struct npu_set_ofm_depth_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_OFM_DEPTH_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_OFM_DEPTH_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_OFM_DEPTH_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_depth_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ofm_depth_m1_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set OFM precision
-struct npu_set_ofm_precision_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_OFM_PRECISION
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t precision : 3;
-    uint32_t reserved0 : 3;
-    uint32_t format : 2;
-    uint32_t scaling : 1; // 0=Per channel scale/bias 1=Global scale (SET_OFM_SCALE), no bias
-    uint32_t reserved1 : 5;
-    uint32_t rounding : 2; // 0=TFL rounding 1=truncate towards zero 2=natural rounding
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t width_m1 : 16; //  Kernel width
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_OFM_PRECISION) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_OFM_PRECISION);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_precision_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::data_format get_format() const
-    {
-        return static_cast<::data_format>(format);
-    }
-    CONSTEXPR npu_set_ofm_precision_t &set_format(::data_format value)
-    {
-        format = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::ofm_precision get_precision() const
-    {
-        return static_cast<::ofm_precision>(precision);
-    }
-    CONSTEXPR npu_set_ofm_precision_t &set_precision(::ofm_precision value)
-    {
-        precision = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::rounding get_rounding() const
-    {
-        return static_cast<::rounding>(rounding);
-    }
-    CONSTEXPR npu_set_ofm_precision_t &set_rounding(::rounding value)
-    {
-        rounding = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_scaling() const
-    {
-        return static_cast<uint32_t>(scaling);
-    }
-    CONSTEXPR npu_set_ofm_precision_t &set_scaling(uint32_t value)
+      public:
+        npu_set_kernel_width_m1_t(uint32_t _width_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(_width_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_kernel_width_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_kernel_width_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_kernel_width_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_width_m1() const
+        {
+            return static_cast<uint32_t>(width_m1);
+        }
+        CONSTEXPR npu_set_kernel_width_m1_t &set_width_m1(uint32_t value)
+        {
+            width_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+        }
+#endif
+#endif
+    };
+    // Kernel height
+    struct npu_set_kernel_height_m1_t
     {
-        scaling = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// TSU block width - 1 (provided sufficient data remaining)
-struct npu_set_ofm_blk_width_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_OFM_BLK_WIDTH_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_OFM_BLK_WIDTH_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_OFM_BLK_WIDTH_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_blk_width_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ofm_blk_width_m1_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// TSU block height -1 (provided sufficient data remaining)
-struct npu_set_ofm_blk_height_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_OFM_BLK_HEIGHT_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  Kernel height
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_OFM_BLK_HEIGHT_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_OFM_BLK_HEIGHT_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_blk_height_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ofm_blk_height_m1_t &set_param(uint32_t value)
+      public:
+        npu_set_kernel_height_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_kernel_height_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_kernel_height_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_kernel_height_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_kernel_height_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // Kernel stride
+    struct npu_set_kernel_stride_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// TSU block depth -1 (provided sufficient data remaining)
-struct npu_set_ofm_blk_depth_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_OFM_BLK_DEPTH_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_OFM_BLK_DEPTH_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_OFM_BLK_DEPTH_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_blk_depth_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ofm_blk_depth_m1_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Zero point offset (so value that 0 is encoded as)
-struct npu_set_ofm_zero_point_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_OFM_ZERO_POINT
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;       //  control
+        uint32_t stride_x_lsb : 1;  //  Stride x LSB. (kernel_x_stride - 1)[0]
+        uint32_t stride_y_lsb : 1;  //  Stride y LSB. (kernel_y_stride - 1)[0]
+        uint32_t weight_order : 1;  //  Weight ordering mode
+        uint32_t dilation_x : 1;    //  Kernel x dilation
+        uint32_t dilation_y : 1;    //  Kernel y dilation
+        uint32_t decomposition : 1; //  Kernel decomposition
+        uint32_t stride_x_msb : 1;  //  Stride x MSB. (kernel_x_stride - 1) >> 1
+        uint32_t reserved1 : 2;
+        uint32_t stride_y_msb : 1; //  Stride y MSB. (kernel_y_stride - 1) >> 1
+        uint32_t reserved2 : 6;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_OFM_ZERO_POINT) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_OFM_ZERO_POINT);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_zero_point_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ofm_zero_point_t &set_param(uint32_t value)
+      public:
+        npu_set_kernel_stride_t(uint32_t _stride_x_lsb,
+                                uint32_t _stride_y_lsb,
+                                NPU_NAMESPACE::weight_order _weight_order,
+                                NPU_NAMESPACE::kernel_dilation _dilation_x,
+                                NPU_NAMESPACE::kernel_dilation _dilation_y,
+                                NPU_NAMESPACE::kernel_decomposition _decomposition,
+                                uint32_t _stride_x_msb,
+                                uint32_t _stride_y_msb) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            stride_x_lsb(_stride_x_lsb & ((1U << 1) - 1)), stride_y_lsb(_stride_y_lsb & ((1U << 1) - 1)),
+            weight_order(static_cast<uint8_t>(_weight_order) & ((1U << 1) - 1)),
+            dilation_x(static_cast<uint8_t>(_dilation_x) & ((1U << 1) - 1)),
+            dilation_y(static_cast<uint8_t>(_dilation_y) & ((1U << 1) - 1)),
+            decomposition(static_cast<uint8_t>(_decomposition) & ((1U << 1) - 1)),
+            stride_x_msb(_stride_x_msb & ((1U << 1) - 1)), reserved1(0), stride_y_msb(_stride_y_msb & ((1U << 1) - 1)),
+            reserved2(0)
+        {
+        }
+        CONSTEXPR npu_set_kernel_stride_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), stride_x_lsb(0), stride_y_lsb(0),
+            weight_order(0), dilation_x(0), dilation_y(0), decomposition(0), stride_x_msb(0), reserved1(0),
+            stride_y_msb(0), reserved2(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_stride_x_lsb() const
+        {
+            return static_cast<uint32_t>(stride_x_lsb);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_stride_x_lsb(uint32_t value)
+        {
+            stride_x_lsb = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_stride_y_lsb() const
+        {
+            return static_cast<uint32_t>(stride_y_lsb);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_stride_y_lsb(uint32_t value)
+        {
+            stride_y_lsb = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::weight_order get_weight_order() const
+        {
+            return static_cast<NPU_NAMESPACE::weight_order>(weight_order);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_weight_order(NPU_NAMESPACE::weight_order value)
+        {
+            weight_order = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::kernel_dilation get_dilation_x() const
+        {
+            return static_cast<NPU_NAMESPACE::kernel_dilation>(dilation_x);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_dilation_x(NPU_NAMESPACE::kernel_dilation value)
+        {
+            dilation_x = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::kernel_dilation get_dilation_y() const
+        {
+            return static_cast<NPU_NAMESPACE::kernel_dilation>(dilation_y);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_dilation_y(NPU_NAMESPACE::kernel_dilation value)
+        {
+            dilation_y = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::kernel_decomposition get_decomposition() const
+        {
+            return static_cast<NPU_NAMESPACE::kernel_decomposition>(decomposition);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_decomposition(NPU_NAMESPACE::kernel_decomposition value)
+        {
+            decomposition = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_stride_x_msb() const
+        {
+            return static_cast<uint32_t>(stride_x_msb);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_stride_x_msb(uint32_t value)
+        {
+            stride_x_msb = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_stride_y_msb() const
+        {
+            return static_cast<uint32_t>(stride_y_msb);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_stride_y_msb(uint32_t value)
+        {
+            stride_y_msb = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("stride_x_lsb", std::to_string(stride_x_lsb)));
+            fields.push_back(std::make_pair<std::string, std::string>("stride_y_lsb", std::to_string(stride_y_lsb)));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "weight_order",
+                (weight_order < (sizeof(weight_order_str) / sizeof(weight_order_str[0])) ?
+                     weight_order_str[weight_order] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "dilation_x",
+                (dilation_x < (sizeof(kernel_dilation_str) / sizeof(kernel_dilation_str[0])) ?
+                     kernel_dilation_str[dilation_x] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "dilation_y",
+                (dilation_y < (sizeof(kernel_dilation_str) / sizeof(kernel_dilation_str[0])) ?
+                     kernel_dilation_str[dilation_y] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "decomposition",
+                (decomposition < (sizeof(kernel_decomposition_str) / sizeof(kernel_decomposition_str[0])) ?
+                     kernel_decomposition_str[decomposition] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>("stride_x_msb", std::to_string(stride_x_msb)));
+            fields.push_back(std::make_pair<std::string, std::string>("stride_y_msb", std::to_string(stride_y_msb)));
+        }
+#endif
+#endif
+    };
+    // Accumulator format
+    struct npu_set_acc_format_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// OFM Tile 0 and tile 2 (width-1)
-struct npu_set_ofm_width0_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_OFM_WIDTH0_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_OFM_WIDTH0_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_OFM_WIDTH0_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_width0_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ofm_width0_m1_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// OFM Tile 0 (height-1)
-struct npu_set_ofm_height0_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_OFM_HEIGHT0_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t acc_format : 2; //  Accumulator format
+        uint32_t reserved1 : 14;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_OFM_HEIGHT0_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_OFM_HEIGHT0_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_height0_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ofm_height0_m1_t &set_param(uint32_t value)
+      public:
+        npu_set_acc_format_t(NPU_NAMESPACE::acc_format _acc_format) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            acc_format(static_cast<uint8_t>(_acc_format) & ((1U << 2) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_acc_format_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), acc_format(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_acc_format_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_acc_format_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::acc_format get_acc_format() const
+        {
+            return static_cast<NPU_NAMESPACE::acc_format>(acc_format);
+        }
+        CONSTEXPR npu_set_acc_format_t &set_acc_format(NPU_NAMESPACE::acc_format value)
+        {
+            acc_format = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "acc_format",
+                (acc_format < (sizeof(acc_format_str) / sizeof(acc_format_str[0])) ? acc_format_str[acc_format] :
+                                                                                     "****")));
+        }
+#endif
+#endif
+    };
+    // Activation function and clip range
+    struct npu_set_activation_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// OFM Tile 1 (height-1)
-struct npu_set_ofm_height1_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_OFM_HEIGHT1_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_OFM_HEIGHT1_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_OFM_HEIGHT1_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_height1_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ofm_height1_m1_t &set_param(uint32_t value)
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;             //  control
+        uint32_t activation_function : 5; //  Activation function (before table lookup)
+        uint32_t reserved1 : 7;
+        uint32_t activation_clip_range : 3; //  Activation clip range. This must be set to 0 if table lookup is not used
+        uint32_t reserved2 : 1;
+#ifdef __cplusplus
+      public:
+        npu_set_activation_t(NPU_NAMESPACE::activation_function _activation_function,
+                             NPU_NAMESPACE::activation_clip_range _activation_clip_range) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            activation_function(static_cast<uint8_t>(_activation_function) & ((1U << 5) - 1)), reserved1(0),
+            activation_clip_range(static_cast<uint8_t>(_activation_clip_range) & ((1U << 3) - 1)), reserved2(0)
+        {
+        }
+        CONSTEXPR npu_set_activation_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), activation_function(0), reserved1(0),
+            activation_clip_range(0), reserved2(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_activation_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_activation_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_function get_activation_function() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_function>(activation_function);
+        }
+        CONSTEXPR npu_set_activation_t &set_activation_function(NPU_NAMESPACE::activation_function value)
+        {
+            activation_function = static_cast<uint8_t>(value) & ((1U << 5) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_clip_range get_activation_clip_range() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_clip_range>(activation_clip_range);
+        }
+        CONSTEXPR npu_set_activation_t &set_activation_clip_range(NPU_NAMESPACE::activation_clip_range value)
+        {
+            activation_clip_range = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_function",
+                (activation_function < (sizeof(activation_function_str) / sizeof(activation_function_str[0])) ?
+                     activation_function_str[activation_function] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_clip_range",
+                (activation_clip_range < (sizeof(activation_clip_range_str) / sizeof(activation_clip_range_str[0])) ?
+                     activation_clip_range_str[activation_clip_range] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // Lower bound clip
+    struct npu_set_activation_min_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Index n for OFM access: BasePointer[n] is added to all OFM offsets
-struct npu_set_ofm_region_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_OFM_REGION
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_OFM_REGION) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_OFM_REGION);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_region_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ofm_region_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set kernel width - 1
-struct npu_set_kernel_width_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_KERNEL_WIDTH_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;        //  control
+        uint32_t clip_boundary : 16; //  Clip boundary for OFM activations
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_KERNEL_WIDTH_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_KERNEL_WIDTH_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_kernel_width_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_kernel_width_m1_t &set_param(uint32_t value)
+      public:
+        npu_set_activation_min_t(uint32_t _clip_boundary) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            clip_boundary(_clip_boundary & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_activation_min_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), clip_boundary(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_activation_min_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_activation_min_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_clip_boundary() const
+        {
+            return static_cast<uint32_t>(clip_boundary);
+        }
+        CONSTEXPR npu_set_activation_min_t &set_clip_boundary(uint32_t value)
+        {
+            clip_boundary = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("clip_boundary", std::to_string(clip_boundary)));
+        }
+#endif
+#endif
+    };
+    // Upper bound clip
+    struct npu_set_activation_max_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set kernel height - 1
-struct npu_set_kernel_height_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_KERNEL_HEIGHT_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_KERNEL_HEIGHT_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_KERNEL_HEIGHT_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_kernel_height_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_kernel_height_m1_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Kernel stride b0=(X stride-1)&1, b1=(Y stride-1)&1, b2=weight order (0=depth, 1=kernel) b3 = kernel_x_dilation - 1
-// (0=no x dilation, 1=x dilation of x2) b4 = kernel_y_dilation -1 (0=no y dilation, 1=y dilation of x2) b5 = kernel
-// decomposition size (0 for kernel_split_size=8, 1 for kernel_split_size=4) b[8:6] = (X stride-1)>>1 b[11:9] = (Y
-// stride-1)>>1
-struct npu_set_kernel_stride_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_KERNEL_STRIDE
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;        //  control
+        uint32_t clip_boundary : 16; //  Clip boundary for OFM activations
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_KERNEL_STRIDE) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_KERNEL_STRIDE);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_kernel_stride_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_kernel_stride_t &set_param(uint32_t value)
+      public:
+        npu_set_activation_max_t(uint32_t _clip_boundary) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            clip_boundary(_clip_boundary & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_activation_max_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), clip_boundary(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_activation_max_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_activation_max_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_clip_boundary() const
+        {
+            return static_cast<uint32_t>(clip_boundary);
+        }
+        CONSTEXPR npu_set_activation_max_t &set_clip_boundary(uint32_t value)
+        {
+            clip_boundary = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("clip_boundary", std::to_string(clip_boundary)));
+        }
+#endif
+#endif
+    };
+    // Index n for weight stream access
+    struct npu_set_weight_region_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// 0=1-core, 1=2-core depth (this command is Ethos-U65 only and UNPREDICTABLE for Ethos-U55)
-struct npu_set_parallel_mode_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_PARALLEL_MODE
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_PARALLEL_MODE) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_PARALLEL_MODE);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_parallel_mode_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_parallel_mode_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set accumulator format
-struct npu_set_acc_format_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_ACC_FORMAT
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t region : 3;  //  Index n for weight stream access
+        uint32_t reserved1 : 12;
+        uint32_t custom_dma_cs : 1; //  Custom DMA select
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_ACC_FORMAT) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_ACC_FORMAT);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_acc_format_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::acc_format get_param() const
-    {
-        return static_cast<::acc_format>(param);
-    }
-    CONSTEXPR npu_set_acc_format_t &set_param(::acc_format value)
+      public:
+        npu_set_weight_region_t(uint32_t _region, NPU_NAMESPACE::custom_dma_cs _custom_dma_cs) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(_region & ((1U << 3) - 1)),
+            reserved1(0), custom_dma_cs(static_cast<uint8_t>(_custom_dma_cs) & ((1U << 1) - 1))
+        {
+        }
+        CONSTEXPR npu_set_weight_region_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(0), reserved1(0), custom_dma_cs(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_weight_region_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_weight_region_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_region() const
+        {
+            return static_cast<uint32_t>(region);
+        }
+        CONSTEXPR npu_set_weight_region_t &set_region(uint32_t value)
+        {
+            region = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::custom_dma_cs get_custom_dma_cs() const
+        {
+            return static_cast<NPU_NAMESPACE::custom_dma_cs>(custom_dma_cs);
+        }
+        CONSTEXPR npu_set_weight_region_t &set_custom_dma_cs(NPU_NAMESPACE::custom_dma_cs value)
+        {
+            custom_dma_cs = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "custom_dma_cs",
+                (custom_dma_cs < (sizeof(custom_dma_cs_str) / sizeof(custom_dma_cs_str[0])) ?
+                     custom_dma_cs_str[custom_dma_cs] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // Index n for scale stream access
+    struct npu_set_scale_region_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set activation
-struct npu_set_activation_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_ACTIVATION
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t type : 12;
-    uint32_t act_clip_range : 4;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_ACTIVATION) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_ACTIVATION);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::clip_range get_act_clip_range() const
-    {
-        return static_cast<::clip_range>(act_clip_range);
-    }
-    CONSTEXPR npu_set_activation_t &set_act_clip_range(::clip_range value)
-    {
-        act_clip_range = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_activation_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::activation get_type() const
-    {
-        return static_cast<::activation>(type);
-    }
-    CONSTEXPR npu_set_activation_t &set_type(::activation value)
-    {
-        type = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Lower bound clip for OFM activations – range is the OFM type range
-struct npu_set_activation_min_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_ACTIVATION_MIN
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t region : 3;  //  Index n for scale stream access
+        uint32_t reserved1 : 12;
+        uint32_t custom_dma_cs : 1; //  Custom DMA select
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_ACTIVATION_MIN) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_ACTIVATION_MIN);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_activation_min_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_activation_min_t &set_param(uint32_t value)
+      public:
+        npu_set_scale_region_t(uint32_t _region, NPU_NAMESPACE::custom_dma_cs _custom_dma_cs) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(_region & ((1U << 3) - 1)),
+            reserved1(0), custom_dma_cs(static_cast<uint8_t>(_custom_dma_cs) & ((1U << 1) - 1))
+        {
+        }
+        CONSTEXPR npu_set_scale_region_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(0), reserved1(0), custom_dma_cs(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_scale_region_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_scale_region_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_region() const
+        {
+            return static_cast<uint32_t>(region);
+        }
+        CONSTEXPR npu_set_scale_region_t &set_region(uint32_t value)
+        {
+            region = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::custom_dma_cs get_custom_dma_cs() const
+        {
+            return static_cast<NPU_NAMESPACE::custom_dma_cs>(custom_dma_cs);
+        }
+        CONSTEXPR npu_set_scale_region_t &set_custom_dma_cs(NPU_NAMESPACE::custom_dma_cs value)
+        {
+            custom_dma_cs = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "custom_dma_cs",
+                (custom_dma_cs < (sizeof(custom_dma_cs_str) / sizeof(custom_dma_cs_str[0])) ?
+                     custom_dma_cs_str[custom_dma_cs] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // Start of ACC0,ACC1 buffers
+    struct npu_set_ab_start_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Upper bound clip for OFM activations – range is the OFM type range
-struct npu_set_activation_max_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_ACTIVATION_MAX
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_ACTIVATION_MAX) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_ACTIVATION_MAX);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_activation_max_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_activation_max_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Index n for weight access: BasePointer[n] is added to all Weight stream offsets
-struct npu_set_weight_region_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_WEIGHT_REGION
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;  //  control
+        uint32_t ab_start : 6; //  Start of ACC0,ACC1 buffers in the SHRAM in KB units. Multiple of 2
+        uint32_t reserved1 : 10;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_WEIGHT_REGION) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_WEIGHT_REGION);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_weight_region_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_weight_region_t &set_param(uint32_t value)
+      public:
+        npu_set_ab_start_t(uint32_t _ab_start) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_AB_START)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), ab_start(_ab_start & ((1U << 6) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ab_start_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_AB_START)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), ab_start(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_AB_START) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_AB_START);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ab_start_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ab_start_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_ab_start() const
+        {
+            return static_cast<uint32_t>(ab_start);
+        }
+        CONSTEXPR npu_set_ab_start_t &set_ab_start(uint32_t value)
+        {
+            ab_start = static_cast<uint8_t>(value) & ((1U << 6) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("ab_start", std::to_string(ab_start)));
+        }
+#endif
+#endif
+    };
+    // Block number of blocks dependency
+    struct npu_set_blockdep_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Index n for weight access: BasePointer[n] is added to all scale stream offsets
-struct npu_set_scale_region_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_SCALE_REGION
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_SCALE_REGION) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_SCALE_REGION);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_scale_region_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_scale_region_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Start of ACC0,ACC1 buffers in the SHRAM in KB units. Multiple of 4.)
-struct npu_set_ab_start_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_AB_START
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;  //  control
+        uint32_t blockdep : 2; //  Block number of blocks dependency between kernel operations
+        uint32_t reserved1 : 14;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_AB_START) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_AB_START);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ab_start_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ab_start_t &set_param(uint32_t value)
+      public:
+        npu_set_blockdep_t(uint32_t _blockdep) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), blockdep(_blockdep & ((1U << 2) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_blockdep_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), blockdep(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_blockdep_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_blockdep_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_blockdep() const
+        {
+            return static_cast<uint32_t>(blockdep);
+        }
+        CONSTEXPR npu_set_blockdep_t &set_blockdep(uint32_t value)
+        {
+            blockdep = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("blockdep", std::to_string(blockdep)));
+        }
+#endif
+#endif
+    };
+    // DMA0 source region
+    struct npu_set_dma0_src_region_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set block number of blocks dependency between kernel operations
-struct npu_set_blockdep_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_BLOCKDEP
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_BLOCKDEP) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_BLOCKDEP);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_blockdep_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_blockdep_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// DMA0 SRC region bitmap
-struct npu_set_dma0_src_region_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_DMA0_SRC_REGION
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t region : 8;        // If Bit[8]=0, Bit[7:0]=Region number in the range [0, 8) of SRC offset. If Bit[8]=1,
-                                // Bit[7:0]=Core number (0 or 1) to read.
-    uint32_t internal : 1;      // Must be 0 (external)
-    uint32_t stride_mode : 2;   // stride mode 0/1/2=1D/2D/3D
-    uint32_t reserved0 : 5;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t region : 3;  //  Region number
+        uint32_t reserved1 : 5;
+        uint32_t region_mode : 1; //  Region mode
+        uint32_t stride_mode : 2; //  Stride mode
+        uint32_t reserved2 : 4;
+        uint32_t custom_dma_cs : 1; //  Custom DMA select
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_DMA0_SRC_REGION) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_DMA0_SRC_REGION);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_dma0_src_region_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_internal() const
-    {
-        return static_cast<uint32_t>(internal);
-    }
-    CONSTEXPR npu_set_dma0_src_region_t &set_internal(uint32_t value)
-    {
-        internal = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_region() const
-    {
-        return static_cast<uint32_t>(region);
-    }
-    CONSTEXPR npu_set_dma0_src_region_t &set_region(uint32_t value)
-    {
-        region = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::stride_mode get_stride_mode() const
+      public:
+        npu_set_dma0_src_region_t(uint32_t _region,
+                                  NPU_NAMESPACE::dma_region_mode _region_mode,
+                                  NPU_NAMESPACE::dma_stride_mode _stride_mode,
+                                  NPU_NAMESPACE::custom_dma_cs _custom_dma_cs) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            region(_region & ((1U << 3) - 1)), reserved1(0),
+            region_mode(static_cast<uint8_t>(_region_mode) & ((1U << 1) - 1)),
+            stride_mode(static_cast<uint8_t>(_stride_mode) & ((1U << 2) - 1)), reserved2(0),
+            custom_dma_cs(static_cast<uint8_t>(_custom_dma_cs) & ((1U << 1) - 1))
+        {
+        }
+        CONSTEXPR npu_set_dma0_src_region_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(0), reserved1(0), region_mode(0),
+            stride_mode(0), reserved2(0), custom_dma_cs(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_dma0_src_region_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_dma0_src_region_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_region() const
+        {
+            return static_cast<uint32_t>(region);
+        }
+        CONSTEXPR npu_set_dma0_src_region_t &set_region(uint32_t value)
+        {
+            region = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::dma_region_mode get_region_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::dma_region_mode>(region_mode);
+        }
+        CONSTEXPR npu_set_dma0_src_region_t &set_region_mode(NPU_NAMESPACE::dma_region_mode value)
+        {
+            region_mode = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::dma_stride_mode get_stride_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::dma_stride_mode>(stride_mode);
+        }
+        CONSTEXPR npu_set_dma0_src_region_t &set_stride_mode(NPU_NAMESPACE::dma_stride_mode value)
+        {
+            stride_mode = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::custom_dma_cs get_custom_dma_cs() const
+        {
+            return static_cast<NPU_NAMESPACE::custom_dma_cs>(custom_dma_cs);
+        }
+        CONSTEXPR npu_set_dma0_src_region_t &set_custom_dma_cs(NPU_NAMESPACE::custom_dma_cs value)
+        {
+            custom_dma_cs = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "region_mode",
+                (region_mode < (sizeof(dma_region_mode_str) / sizeof(dma_region_mode_str[0])) ?
+                     dma_region_mode_str[region_mode] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "stride_mode",
+                (stride_mode < (sizeof(dma_stride_mode_str) / sizeof(dma_stride_mode_str[0])) ?
+                     dma_stride_mode_str[stride_mode] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "custom_dma_cs",
+                (custom_dma_cs < (sizeof(custom_dma_cs_str) / sizeof(custom_dma_cs_str[0])) ?
+                     custom_dma_cs_str[custom_dma_cs] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // DMA0 destination region
+    struct npu_set_dma0_dst_region_t
     {
-        return static_cast<::stride_mode>(stride_mode);
-    }
-    CONSTEXPR npu_set_dma0_src_region_t &set_stride_mode(::stride_mode value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t region : 3; //  Region number if region_mode is region_mode_external. Else core mask to write to (bit k
+                             //  set for core k=0,1)
+        uint32_t reserved1 : 5;
+        uint32_t region_mode : 1; //  Region mode
+        uint32_t stride_mode : 2; //  Stride mode
+        uint32_t reserved2 : 4;
+        uint32_t custom_dma_cs : 1; //  Custom DMA select
+#ifdef __cplusplus
+      public:
+        npu_set_dma0_dst_region_t(uint32_t _region,
+                                  NPU_NAMESPACE::dma_region_mode _region_mode,
+                                  NPU_NAMESPACE::dma_stride_mode _stride_mode,
+                                  NPU_NAMESPACE::custom_dma_cs _custom_dma_cs) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            region(_region & ((1U << 3) - 1)), reserved1(0),
+            region_mode(static_cast<uint8_t>(_region_mode) & ((1U << 1) - 1)),
+            stride_mode(static_cast<uint8_t>(_stride_mode) & ((1U << 2) - 1)), reserved2(0),
+            custom_dma_cs(static_cast<uint8_t>(_custom_dma_cs) & ((1U << 1) - 1))
+        {
+        }
+        CONSTEXPR npu_set_dma0_dst_region_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(0), reserved1(0), region_mode(0),
+            stride_mode(0), reserved2(0), custom_dma_cs(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_dma0_dst_region_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_dma0_dst_region_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_region() const
+        {
+            return static_cast<uint32_t>(region);
+        }
+        CONSTEXPR npu_set_dma0_dst_region_t &set_region(uint32_t value)
+        {
+            region = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::dma_region_mode get_region_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::dma_region_mode>(region_mode);
+        }
+        CONSTEXPR npu_set_dma0_dst_region_t &set_region_mode(NPU_NAMESPACE::dma_region_mode value)
+        {
+            region_mode = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::dma_stride_mode get_stride_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::dma_stride_mode>(stride_mode);
+        }
+        CONSTEXPR npu_set_dma0_dst_region_t &set_stride_mode(NPU_NAMESPACE::dma_stride_mode value)
+        {
+            stride_mode = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::custom_dma_cs get_custom_dma_cs() const
+        {
+            return static_cast<NPU_NAMESPACE::custom_dma_cs>(custom_dma_cs);
+        }
+        CONSTEXPR npu_set_dma0_dst_region_t &set_custom_dma_cs(NPU_NAMESPACE::custom_dma_cs value)
+        {
+            custom_dma_cs = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "region_mode",
+                (region_mode < (sizeof(dma_region_mode_str) / sizeof(dma_region_mode_str[0])) ?
+                     dma_region_mode_str[region_mode] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "stride_mode",
+                (stride_mode < (sizeof(dma_stride_mode_str) / sizeof(dma_stride_mode_str[0])) ?
+                     dma_stride_mode_str[stride_mode] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "custom_dma_cs",
+                (custom_dma_cs < (sizeof(custom_dma_cs_str) / sizeof(custom_dma_cs_str[0])) ?
+                     custom_dma_cs_str[custom_dma_cs] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // Size of second dimension for 2D/3D transfers
+    struct npu_set_dma0_size0_t
     {
-        stride_mode = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// DMA0 DST region bitmap
-struct npu_set_dma0_dst_region_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_DMA0_DST_REGION
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t region : 8;        // If Bit[8]=0, Bit[7:0]=Region number in the range [0, 8) of DST offset. If Bit[8]=1,
-                                // Bit[7:0]=Core mask to write to (bit k set for core k=0,1).
-    uint32_t internal : 1;      // Select external/internal=0/1
-    uint32_t stride_mode : 2;   // stride mode 0/1/2=1D/2D/3D
-    uint32_t reserved0 : 5;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_DMA0_DST_REGION) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_DMA0_DST_REGION);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_dma0_dst_region_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_internal() const
-    {
-        return static_cast<uint32_t>(internal);
-    }
-    CONSTEXPR npu_set_dma0_dst_region_t &set_internal(uint32_t value)
-    {
-        internal = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_region() const
-    {
-        return static_cast<uint32_t>(region);
-    }
-    CONSTEXPR npu_set_dma0_dst_region_t &set_region(uint32_t value)
-    {
-        region = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::stride_mode get_stride_mode() const
-    {
-        return static_cast<::stride_mode>(stride_mode);
-    }
-    CONSTEXPR npu_set_dma0_dst_region_t &set_stride_mode(::stride_mode value)
-    {
-        stride_mode = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Inner size for 2D/3D mode.
-struct npu_set_dma0_size0_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_DMA0_SIZE0
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t size : 16;   //  Size of second dimension for 2D/3D transfers
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_DMA0_SIZE0) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_DMA0_SIZE0);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_dma0_size0_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_dma0_size0_t &set_param(uint32_t value)
+      public:
+        npu_set_dma0_size0_t(uint32_t _size) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), size(_size & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_dma0_size0_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), size(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_dma0_size0_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_dma0_size0_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_size() const
+        {
+            return static_cast<uint32_t>(size);
+        }
+        CONSTEXPR npu_set_dma0_size0_t &set_size(uint32_t value)
+        {
+            size = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("size", std::to_string(size)));
+        }
+#endif
+#endif
+    };
+    // Size of third dimension for 3D transfers
+    struct npu_set_dma0_size1_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Outer size for 3D mode.
-struct npu_set_dma0_size1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_DMA0_SIZE1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_DMA0_SIZE1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_DMA0_SIZE1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_dma0_size1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_dma0_size1_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM2 Broadcast mode
-struct npu_set_ifm2_broadcast_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM2_BROADCAST
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t broadcast_height : 1;
-    uint32_t broadcast_width : 1;
-    uint32_t broadcast_depth : 1;
-    uint32_t reserved0 : 3;
-    uint32_t operand_order : 1;
-    uint32_t broadcast_scalar : 1;
-    uint32_t reserved1 : 8;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t size : 16;   //  Size of third dimension for 3D transfers
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM2_BROADCAST) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM2_BROADCAST);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR uint32_t get_broadcast_depth() const
-    {
-        return static_cast<uint32_t>(broadcast_depth);
-    }
-    CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_depth(uint32_t value)
-    {
-        broadcast_depth = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_broadcast_height() const
-    {
-        return static_cast<uint32_t>(broadcast_height);
-    }
-    CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_height(uint32_t value)
-    {
-        broadcast_height = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_broadcast_scalar() const
-    {
-        return static_cast<uint32_t>(broadcast_scalar);
-    }
-    CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_scalar(uint32_t value)
-    {
-        broadcast_scalar = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_broadcast_width() const
-    {
-        return static_cast<uint32_t>(broadcast_width);
-    }
-    CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_width(uint32_t value)
-    {
-        broadcast_width = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
+      public:
+        npu_set_dma0_size1_t(uint32_t _size) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), size(_size & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_dma0_size1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), size(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_dma0_size1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_dma0_size1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_size() const
+        {
+            return static_cast<uint32_t>(size);
+        }
+        CONSTEXPR npu_set_dma0_size1_t &set_size(uint32_t value)
+        {
+            size = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("size", std::to_string(size)));
+        }
+#endif
+#endif
+    };
+    // IFM2 broadcast configuration
+    struct npu_set_ifm2_broadcast_t
     {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_broadcast_t &set_cmd_code(::cmd0 value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t
+            broadcast_h : 1; //  Broadcast H dimension (if set then any accesses to IFM2 sets y=0 and IFM2 height=1)
+        uint32_t broadcast_w : 1; //  Broadcast W dimension (if set then any accesses to IFM2 sets x=0 and IFM2 width=1)
+        uint32_t broadcast_c : 1; //  Broadcast C dimension (if set then any accesses to IFM2 sets c=0 and IFM2 depth=1)
+        uint32_t reserved1 : 3;
+        uint32_t operand_order : 1;      //  Operand order
+        uint32_t broadcast_constant : 1; //  Broadcast constant given by NPU_SET_IFM2_SCALAR and so ignore BH, BW and BC
+        uint32_t reserved2 : 8;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_broadcast_t(NPU_NAMESPACE::broadcast_mode _broadcast_h,
+                                 NPU_NAMESPACE::broadcast_mode _broadcast_w,
+                                 NPU_NAMESPACE::broadcast_mode _broadcast_c,
+                                 NPU_NAMESPACE::ifm2_operand_order _operand_order,
+                                 NPU_NAMESPACE::broadcast_mode _broadcast_constant) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            broadcast_h(static_cast<uint8_t>(_broadcast_h) & ((1U << 1) - 1)),
+            broadcast_w(static_cast<uint8_t>(_broadcast_w) & ((1U << 1) - 1)),
+            broadcast_c(static_cast<uint8_t>(_broadcast_c) & ((1U << 1) - 1)), reserved1(0),
+            operand_order(static_cast<uint8_t>(_operand_order) & ((1U << 1) - 1)),
+            broadcast_constant(static_cast<uint8_t>(_broadcast_constant) & ((1U << 1) - 1)), reserved2(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), broadcast_h(0), broadcast_w(0),
+            broadcast_c(0), reserved1(0), operand_order(0), broadcast_constant(0), reserved2(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::broadcast_mode get_broadcast_h() const
+        {
+            return static_cast<NPU_NAMESPACE::broadcast_mode>(broadcast_h);
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_h(NPU_NAMESPACE::broadcast_mode value)
+        {
+            broadcast_h = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::broadcast_mode get_broadcast_w() const
+        {
+            return static_cast<NPU_NAMESPACE::broadcast_mode>(broadcast_w);
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_w(NPU_NAMESPACE::broadcast_mode value)
+        {
+            broadcast_w = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::broadcast_mode get_broadcast_c() const
+        {
+            return static_cast<NPU_NAMESPACE::broadcast_mode>(broadcast_c);
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_c(NPU_NAMESPACE::broadcast_mode value)
+        {
+            broadcast_c = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::ifm2_operand_order get_operand_order() const
+        {
+            return static_cast<NPU_NAMESPACE::ifm2_operand_order>(operand_order);
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t &set_operand_order(NPU_NAMESPACE::ifm2_operand_order value)
+        {
+            operand_order = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::broadcast_mode get_broadcast_constant() const
+        {
+            return static_cast<NPU_NAMESPACE::broadcast_mode>(broadcast_constant);
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_constant(NPU_NAMESPACE::broadcast_mode value)
+        {
+            broadcast_constant = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "broadcast_h",
+                (broadcast_h < (sizeof(broadcast_mode_str) / sizeof(broadcast_mode_str[0])) ?
+                     broadcast_mode_str[broadcast_h] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "broadcast_w",
+                (broadcast_w < (sizeof(broadcast_mode_str) / sizeof(broadcast_mode_str[0])) ?
+                     broadcast_mode_str[broadcast_w] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "broadcast_c",
+                (broadcast_c < (sizeof(broadcast_mode_str) / sizeof(broadcast_mode_str[0])) ?
+                     broadcast_mode_str[broadcast_c] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "operand_order",
+                (operand_order < (sizeof(ifm2_operand_order_str) / sizeof(ifm2_operand_order_str[0])) ?
+                     ifm2_operand_order_str[operand_order] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "broadcast_constant",
+                (broadcast_constant < (sizeof(broadcast_mode_str) / sizeof(broadcast_mode_str[0])) ?
+                     broadcast_mode_str[broadcast_constant] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // IFM2 scalar value
+    struct npu_set_ifm2_scalar_t
     {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_operand_order() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t scalar : 16; //  int16 or uint16 depending on ifm2_precision.type
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_scalar_t(uint32_t _scalar) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_SCALAR)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), scalar(_scalar & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_scalar_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_SCALAR)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), scalar(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_SCALAR) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_SCALAR);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_scalar_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_scalar_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_scalar() const
+        {
+            return static_cast<uint32_t>(scalar);
+        }
+        CONSTEXPR npu_set_ifm2_scalar_t &set_scalar(uint32_t value)
+        {
+            scalar = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("scalar", std::to_string(scalar)));
+        }
+#endif
+#endif
+    };
+    // IFM2 Precision
+    struct npu_set_ifm2_precision_t
     {
-        return static_cast<uint32_t>(operand_order);
-    }
-    CONSTEXPR npu_set_ifm2_broadcast_t &set_operand_order(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;         //  control
+        uint32_t activation_type : 1; //  IFM type - MUST MATCH IFM
+        uint32_t reserved1 : 1;
+        uint32_t activation_precision : 2; //  IFM precision - MUST MATCH IFM
+        uint32_t reserved2 : 2;
+        uint32_t activation_format : 2; //  IFM format
+        uint32_t reserved3 : 8;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_precision_t(NPU_NAMESPACE::activation_type _activation_type,
+                                 NPU_NAMESPACE::activation_precision _activation_precision,
+                                 NPU_NAMESPACE::activation_format _activation_format) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            activation_type(static_cast<uint8_t>(_activation_type) & ((1U << 1) - 1)), reserved1(0),
+            activation_precision(static_cast<uint8_t>(_activation_precision) & ((1U << 2) - 1)), reserved2(0),
+            activation_format(static_cast<uint8_t>(_activation_format) & ((1U << 2) - 1)), reserved3(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_precision_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), activation_type(0), reserved1(0),
+            activation_precision(0), reserved2(0), activation_format(0), reserved3(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_precision_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_precision_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_type get_activation_type() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_type>(activation_type);
+        }
+        CONSTEXPR npu_set_ifm2_precision_t &set_activation_type(NPU_NAMESPACE::activation_type value)
+        {
+            activation_type = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_precision get_activation_precision() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_precision>(activation_precision);
+        }
+        CONSTEXPR npu_set_ifm2_precision_t &set_activation_precision(NPU_NAMESPACE::activation_precision value)
+        {
+            activation_precision = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_format get_activation_format() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_format>(activation_format);
+        }
+        CONSTEXPR npu_set_ifm2_precision_t &set_activation_format(NPU_NAMESPACE::activation_format value)
+        {
+            activation_format = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_type",
+                (activation_type < (sizeof(activation_type_str) / sizeof(activation_type_str[0])) ?
+                     activation_type_str[activation_type] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_precision",
+                (activation_precision < (sizeof(activation_precision_str) / sizeof(activation_precision_str[0])) ?
+                     activation_precision_str[activation_precision] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_format",
+                (activation_format < (sizeof(activation_format_str) / sizeof(activation_format_str[0])) ?
+                     activation_format_str[activation_format] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // IFM2 zero point
+    struct npu_set_ifm2_zero_point_t
     {
-        operand_order = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// IFM2 scalar value at range IFM_PRECISION
-struct npu_set_ifm2_scalar_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM2_SCALAR
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM2_SCALAR) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM2_SCALAR);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_scalar_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm2_scalar_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set activation
-struct npu_set_ifm2_precision_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM2_PRECISION
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t precision : 4;
-    uint32_t reserved0 : 2;
-    uint32_t format : 2;
-    uint32_t reserved1 : 8;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;     //  control
+        uint32_t zero_point : 16; //  Zero point offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM2_PRECISION) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM2_PRECISION);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_precision_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::data_format get_format() const
-    {
-        return static_cast<::data_format>(format);
-    }
-    CONSTEXPR npu_set_ifm2_precision_t &set_format(::data_format value)
-    {
-        format = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR ::ifm_precision get_precision() const
-    {
-        return static_cast<::ifm_precision>(precision);
-    }
-    CONSTEXPR npu_set_ifm2_precision_t &set_precision(::ifm_precision value)
+      public:
+        npu_set_ifm2_zero_point_t(uint32_t _zero_point) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            zero_point(_zero_point & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_zero_point_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), zero_point(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_zero_point_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_zero_point_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_zero_point() const
+        {
+            return static_cast<uint32_t>(zero_point);
+        }
+        CONSTEXPR npu_set_ifm2_zero_point_t &set_zero_point(uint32_t value)
+        {
+            zero_point = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("zero_point", std::to_string(zero_point)));
+        }
+#endif
+#endif
+    };
+    // IFM2 Tile 0 and tile 2 width
+    struct npu_set_ifm2_width0_m1_t
     {
-        precision = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Zero point offset (so value that 0 is encoded as) at range IFM_PRECISION
-struct npu_set_ifm2_zero_point_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM2_ZERO_POINT
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM2_ZERO_POINT) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM2_ZERO_POINT);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_zero_point_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm2_zero_point_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// IFM2 Tile 0 and tile 2 (width-1)
-struct npu_set_ifm2_width0_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM2_WIDTH0_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t width_m1 : 16; //  IFM2 Tile 0 and tile 2 width
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM2_WIDTH0_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM2_WIDTH0_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_width0_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm2_width0_m1_t &set_param(uint32_t value)
+      public:
+        npu_set_ifm2_width0_m1_t(uint32_t _width_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(_width_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_width0_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_width0_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_width0_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_width_m1() const
+        {
+            return static_cast<uint32_t>(width_m1);
+        }
+        CONSTEXPR npu_set_ifm2_width0_m1_t &set_width_m1(uint32_t value)
+        {
+            width_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+        }
+#endif
+#endif
+    };
+    // IFM2 Tile 0 height
+    struct npu_set_ifm2_height0_m1_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// IFM2 Tile 0 (height-1)
-struct npu_set_ifm2_height0_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM2_HEIGHT0_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM2_HEIGHT0_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM2_HEIGHT0_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_height0_m1_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm2_height0_m1_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// IFM2 Tile 1 (height-1)
-struct npu_set_ifm2_height1_m1_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM2_HEIGHT1_M1
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  IFM2 Tile 0 height
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM2_HEIGHT1_M1) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM2_HEIGHT1_M1);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_height1_m1_t &set_cmd_code(::cmd0 value)
+      public:
+        npu_set_ifm2_height0_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_height0_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_height0_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_height0_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ifm2_height0_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // IFM2 Tile 1 height
+    struct npu_set_ifm2_height1_m1_t
     {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  IFM2 Tile 1 height
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_height1_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_height1_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_height1_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_height1_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ifm2_height1_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // Start of IB0,IB1 buffers for IFM2
+    struct npu_set_ifm2_ib_start_t
     {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm2_height1_m1_t &set_param(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;  //  control
+        uint32_t ib_start : 6; //  Start of IB0,IB1 buffers for IFM2 in the SHRAM in KB units. Multiple of 2
+        uint32_t reserved1 : 10;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_ib_start_t(uint32_t _ib_start) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_IB_START)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), ib_start(_ib_start & ((1U << 6) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_ib_start_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_IB_START)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), ib_start(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_IB_START) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_IB_START);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_ib_start_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_ib_start_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_ib_start() const
+        {
+            return static_cast<uint32_t>(ib_start);
+        }
+        CONSTEXPR npu_set_ifm2_ib_start_t &set_ib_start(uint32_t value)
+        {
+            ib_start = static_cast<uint8_t>(value) & ((1U << 6) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("ib_start", std::to_string(ib_start)));
+        }
+#endif
+#endif
+    };
+    // Index n for IFM2 access
+    struct npu_set_ifm2_region_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Start of IB0, IB1 buffers for IFM2 in SHRAM. In KB units, multiple of 2.
-struct npu_set_ifm2_ib_start_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM2_IB_START
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM2_IB_START) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM2_IB_START);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_ib_start_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm2_ib_start_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Index n for IFM2 access: Region[n] is added to all IFM2 addresses
-struct npu_set_ifm2_region_t
-{
-    uint32_t cmd_code : 10;     // NPU_SET_IFM2_REGION
-    uint32_t must_be_zero0 : 6; // 0
-    uint32_t param : 16;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t region : 3;  //  Index n for IFM2 access
+        uint32_t reserved1 : 13;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd0::NPU_SET_IFM2_REGION) && must_be_zero0 == 0;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code      = static_cast<uint32_t>(cmd0::NPU_SET_IFM2_REGION);
-        must_be_zero0 = 0;
-    }
-    CONSTEXPR ::cmd0 get_cmd_code() const
-    {
-        return static_cast<::cmd0>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_region_t &set_cmd_code(::cmd0 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_ifm2_region_t &set_param(uint32_t value)
+      public:
+        npu_set_ifm2_region_t(uint32_t _region) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(_region & ((1U << 3) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_region_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_region_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_region_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_region() const
+        {
+            return static_cast<uint32_t>(region);
+        }
+        CONSTEXPR npu_set_ifm2_region_t &set_region(uint32_t value)
+        {
+            region = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+        }
+#endif
+#endif
+    };
+    // IFM Tile 0 address
+    struct npu_set_ifm_base0_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM base address (top left tile)
-struct npu_set_ifm_base0_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_IFM_BASE0
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // IFM base address (top left tile)
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_IFM_BASE0) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_IFM_BASE0);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_base0_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ifm_base0_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ifm_base0_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM base address (top right tile)
-struct npu_set_ifm_base1_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_IFM_BASE1
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // IFM base address (top right tile)
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_IFM_BASE1) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_IFM_BASE1);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_base1_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ifm_base1_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ifm_base1_t &set_payload_size(uint32_t value)
+      public:
+        npu_set_ifm_base0_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ifm_base0_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_base0_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_base0_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ifm_base0_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM Tile 1 address
+    struct npu_set_ifm_base1_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM base address (bottom left tile)
-struct npu_set_ifm_base2_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_IFM_BASE2
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // IFM base address (bottom left tile)
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_IFM_BASE2) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_IFM_BASE2);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_base2_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ifm_base2_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ifm_base2_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM base address (bottom right tile)
-struct npu_set_ifm_base3_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_IFM_BASE3
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // IFM base address (bottom right tile)
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_IFM_BASE3) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_IFM_BASE3);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_base3_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ifm_base3_t &set_data(uint32_t value)
+      public:
+        npu_set_ifm_base1_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ifm_base1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_base1_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_base1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ifm_base1_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM Tile 2 address
+    struct npu_set_ifm_base2_t
     {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_base2_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ifm_base2_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_base2_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_base2_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ifm_base2_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM Tile 3 address
+    struct npu_set_ifm_base3_t
     {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ifm_base3_t &set_payload_size(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_base3_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ifm_base3_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_base3_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_base3_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ifm_base3_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM byte stride between horizontal values
+    struct npu_set_ifm_stride_x_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM byte stride between horizontal values
-struct npu_set_ifm_stride_x_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_IFM_STRIDE_X
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // IFM byte stride between horizontal values
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_IFM_STRIDE_X) && must_be_zero == 0 &&
-               payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_IFM_STRIDE_X);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_stride_x_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ifm_stride_x_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ifm_stride_x_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM byte stride between vertical values
-struct npu_set_ifm_stride_y_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_IFM_STRIDE_Y
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // IFM byte stride between vertical values
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_IFM_STRIDE_Y) && must_be_zero == 0 &&
-               payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_IFM_STRIDE_Y);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_stride_y_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ifm_stride_y_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ifm_stride_y_t &set_payload_size(uint32_t value)
+      public:
+        npu_set_ifm_stride_x_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ifm_stride_x_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_stride_x_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_stride_x_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ifm_stride_x_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM byte stride between vertical values
+    struct npu_set_ifm_stride_y_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM byte stride between channel blocks (of 16 bytes each block)
-struct npu_set_ifm_stride_c_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_IFM_STRIDE_C
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // IFM byte stride between channel blocks (of 16 bytes each block)
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_IFM_STRIDE_C) && must_be_zero == 0 &&
-               payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_IFM_STRIDE_C);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm_stride_c_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ifm_stride_c_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ifm_stride_c_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set OFM base address (top left tile)
-struct npu_set_ofm_base0_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_OFM_BASE0
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // OFM base address (top left tile)
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_OFM_BASE0) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_OFM_BASE0);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_base0_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ofm_base0_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ofm_base0_t &set_payload_size(uint32_t value)
+      public:
+        npu_set_ifm_stride_y_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ifm_stride_y_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_stride_y_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_stride_y_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ifm_stride_y_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM byte stride between channel blocks (of 16 bytes each block)
+    struct npu_set_ifm_stride_c_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set OFM base address (top right tile)
-struct npu_set_ofm_base1_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_OFM_BASE1
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // OFM base address (top right tile)
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_OFM_BASE1) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_OFM_BASE1);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_base1_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ofm_base1_t &set_data(uint32_t value)
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_stride_c_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ifm_stride_c_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_stride_c_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_stride_c_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ifm_stride_c_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // OFM Tile 0 address
+    struct npu_set_ofm_base0_t
     {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_base0_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ofm_base0_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_base0_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_base0_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ofm_base0_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // OFM Tile 1 address
+    struct npu_set_ofm_base1_t
     {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ofm_base1_t &set_payload_size(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_base1_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ofm_base1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_base1_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_base1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ofm_base1_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // OFM Tile 2 address
+    struct npu_set_ofm_base2_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set OFM base address (bottom left tile)
-struct npu_set_ofm_base2_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_OFM_BASE2
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // OFM base address (bottom left tile)
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_OFM_BASE2) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_OFM_BASE2);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_base2_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ofm_base2_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ofm_base2_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set OFM base address (bottom right tile)
-struct npu_set_ofm_base3_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_OFM_BASE3
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // OFM base address (bottom right tile)
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_OFM_BASE3) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_OFM_BASE3);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_base3_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ofm_base3_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ofm_base3_t &set_payload_size(uint32_t value)
+      public:
+        npu_set_ofm_base2_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ofm_base2_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_base2_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_base2_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ofm_base2_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // OFM Tile 3 address
+    struct npu_set_ofm_base3_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set OFM byte stride between horizontal values
-struct npu_set_ofm_stride_x_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_OFM_STRIDE_X
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // OFM byte stride between horizontal values
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_OFM_STRIDE_X) && must_be_zero == 0 &&
-               payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_OFM_STRIDE_X);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_stride_x_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ofm_stride_x_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ofm_stride_x_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set OFM byte stride between vertical values
-struct npu_set_ofm_stride_y_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_OFM_STRIDE_Y
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // OFM byte stride between vertical values
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_OFM_STRIDE_Y) && must_be_zero == 0 &&
-               payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_OFM_STRIDE_Y);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_stride_y_t &set_cmd_code(::cmd1 value)
+      public:
+        npu_set_ofm_base3_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ofm_base3_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_base3_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_base3_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ofm_base3_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // OFM byte stride between horizontal values
+    struct npu_set_ofm_stride_x_t
     {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_stride_x_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ofm_stride_x_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_stride_x_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_stride_x_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ofm_stride_x_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // OFM byte stride between vertical values
+    struct npu_set_ofm_stride_y_t
     {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ofm_stride_y_t &set_data(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_stride_y_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ofm_stride_y_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_stride_y_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_stride_y_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ofm_stride_y_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // OFM byte stride between channel blocks (of 16 bytes each block)
+    struct npu_set_ofm_stride_c_t
     {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_stride_c_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ofm_stride_c_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_stride_c_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_stride_c_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ofm_stride_c_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // Weight stream byte offset in WEIGHT_REGION
+    struct npu_set_weight_base_t
     {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ofm_stride_y_t &set_payload_size(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_weight_base_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_weight_base_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_weight_base_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_weight_base_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_weight_base_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // Weight stream byte length
+    struct npu_set_weight_length_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set OFM byte stride between channel blocks (of 16 bytes each block)
-struct npu_set_ofm_stride_c_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_OFM_STRIDE_C
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // OFM byte stride between channel blocks (of 16 bytes each block)
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_OFM_STRIDE_C) && must_be_zero == 0 &&
-               payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_OFM_STRIDE_C);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_stride_c_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ofm_stride_c_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ofm_stride_c_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set Weight stream input base address
-struct npu_set_weight_base_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_WEIGHT_BASE
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // Weight stream input base address
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t length : 32; //  Weight stream byte length
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_WEIGHT_BASE) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_WEIGHT_BASE);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_weight_base_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_weight_base_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_weight_base_t &set_payload_size(uint32_t value)
+      public:
+        npu_set_weight_length_t(uint32_t _length) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), length(_length)
+        {
+        }
+        CONSTEXPR npu_set_weight_length_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), length(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_weight_length_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_weight_length_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_length() const
+        {
+            return static_cast<uint32_t>(length);
+        }
+        CONSTEXPR npu_set_weight_length_t &set_length(uint32_t value)
+        {
+            length = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("length", std::to_string(length)));
+        }
+#endif
+#endif
+    };
+    // Scale and bias stream input byte offset from SCALE_REGION
+    struct npu_set_scale_base_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set Weight stream length
-struct npu_set_weight_length_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_WEIGHT_LENGTH
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // Weight stream length
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_WEIGHT_LENGTH) && must_be_zero == 0 &&
-               payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_WEIGHT_LENGTH);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_weight_length_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_weight_length_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_weight_length_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set Scale and bias stream input base address
-struct npu_set_scale_base_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_SCALE_BASE
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // Scale and bias stream input base address
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_SCALE_BASE) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_SCALE_BASE);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_scale_base_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_scale_base_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_scale_base_t &set_payload_size(uint32_t value)
+      public:
+        npu_set_scale_base_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_scale_base_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_scale_base_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_scale_base_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_scale_base_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // Scale and bias stream input byte length
+    struct npu_set_scale_length_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set Scale and bias stream input length
-struct npu_set_scale_length_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_SCALE_LENGTH
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // Scale and bias stream input length
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_SCALE_LENGTH) && must_be_zero == 0 &&
-               payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_SCALE_LENGTH);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_scale_length_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_scale_length_t &set_data(uint32_t value)
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t length : 20; //  Scale and bias stream byte length
+        uint32_t reserved2 : 12;
+#ifdef __cplusplus
+      public:
+        npu_set_scale_length_t(uint32_t _length) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0),
+            length(_length & ((1U << 20) - 1)), reserved2(0)
+        {
+        }
+        CONSTEXPR npu_set_scale_length_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), length(0), reserved2(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_scale_length_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_scale_length_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_length() const
+        {
+            return static_cast<uint32_t>(length);
+        }
+        CONSTEXPR npu_set_scale_length_t &set_length(uint32_t value)
+        {
+            length = value & ((1U << 20) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("length", std::to_string(length)));
+        }
+#endif
+#endif
+    };
+    // OFM scale
+    struct npu_set_ofm_scale_t
     {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t shift : 6;   //  Shift
+        uint32_t reserved1 : 10;
+        uint32_t scale : 32; //  Scale. Not applied for 32-bit operations
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_scale_t(uint32_t _shift, uint32_t _scale) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), shift(_shift & ((1U << 6) - 1)),
+            reserved1(0), scale(_scale)
+        {
+        }
+        CONSTEXPR npu_set_ofm_scale_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), shift(0), reserved1(0), scale(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_scale_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_scale_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_shift() const
+        {
+            return static_cast<uint32_t>(shift);
+        }
+        CONSTEXPR npu_set_ofm_scale_t &set_shift(uint32_t value)
+        {
+            shift = static_cast<uint8_t>(value) & ((1U << 6) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_scale() const
+        {
+            return static_cast<uint32_t>(scale);
+        }
+        CONSTEXPR npu_set_ofm_scale_t &set_scale(uint32_t value)
+        {
+            scale = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("shift", std::to_string(shift)));
+            fields.push_back(std::make_pair<std::string, std::string>("scale", std::to_string(scale)));
+        }
+#endif
+#endif
+    };
+    // Input operand A scale
+    struct npu_set_opa_scale_t
     {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_scale_length_t &set_payload_size(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t shift : 6;   //  Shift. Ignored if IFM scale mode is 0
+        uint32_t reserved1 : 10;
+        uint32_t scale : 32; //  Scale. 16-bit if IFM scale mode is 0
+#ifdef __cplusplus
+      public:
+        npu_set_opa_scale_t(uint32_t _shift, uint32_t _scale) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPA_SCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), shift(_shift & ((1U << 6) - 1)),
+            reserved1(0), scale(_scale)
+        {
+        }
+        CONSTEXPR npu_set_opa_scale_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPA_SCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), shift(0), reserved1(0), scale(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPA_SCALE) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPA_SCALE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_opa_scale_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_opa_scale_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_shift() const
+        {
+            return static_cast<uint32_t>(shift);
+        }
+        CONSTEXPR npu_set_opa_scale_t &set_shift(uint32_t value)
+        {
+            shift = static_cast<uint8_t>(value) & ((1U << 6) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_scale() const
+        {
+            return static_cast<uint32_t>(scale);
+        }
+        CONSTEXPR npu_set_opa_scale_t &set_scale(uint32_t value)
+        {
+            scale = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("shift", std::to_string(shift)));
+            fields.push_back(std::make_pair<std::string, std::string>("scale", std::to_string(scale)));
+        }
+#endif
+#endif
+    };
+    // Input operand B scale
+    struct npu_set_opb_scale_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set scale (32-bit). Used by average pool with pad=0, elementwise MUL, ADD, SUB
-struct npu_set_ofm_scale_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_OFM_SCALE
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t shift : 16;
-    uint32_t data : 32; // scale (32-bit). Used by average pool with pad=0, elementwise MUL, ADD, SUB
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_OFM_SCALE) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_OFM_SCALE);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ofm_scale_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ofm_scale_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ofm_scale_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_shift() const
-    {
-        return static_cast<uint32_t>(shift);
-    }
-    CONSTEXPR npu_set_ofm_scale_t &set_shift(uint32_t value)
-    {
-        shift = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set scale (32-bit) used for elementwise ADD/SUB OPA prescale. If IFM scale mode is 0 then shift is ignored and scale
-// is 16-bit. If IFM scale mode is 1 or 2 then shift is 6-bit and scale is 32-bit
-struct npu_set_opa_scale_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_OPA_SCALE
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t shift : 16;
-    uint32_t
-        data : 32; // scale (32-bit) used for elementwise ADD/SUB OPA prescale. If IFM scale mode is 0 then shift is
-                   // ignored and scale is 16-bit. If IFM scale mode is 1 or 2 then shift is 6-bit and scale is 32-bit
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t scale : 16; //  Scale. Not used if IFM scale mode is 1 or 2
+        uint32_t reserved2 : 16;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_OPA_SCALE) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_OPA_SCALE);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_opa_scale_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_opa_scale_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_opa_scale_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_shift() const
-    {
-        return static_cast<uint32_t>(shift);
-    }
-    CONSTEXPR npu_set_opa_scale_t &set_shift(uint32_t value)
+      public:
+        npu_set_opb_scale_t(uint32_t _scale) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPB_SCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0),
+            scale(_scale & ((1U << 16) - 1)), reserved2(0)
+        {
+        }
+        CONSTEXPR npu_set_opb_scale_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPB_SCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), scale(0), reserved2(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPB_SCALE) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPB_SCALE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_opb_scale_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_opb_scale_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_scale() const
+        {
+            return static_cast<uint32_t>(scale);
+        }
+        CONSTEXPR npu_set_opb_scale_t &set_scale(uint32_t value)
+        {
+            scale = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("scale", std::to_string(scale)));
+        }
+#endif
+#endif
+    };
+    // DMA user channel 0 source byte offset from DMA0_SRC_REGION
+    struct npu_set_dma0_src_t
     {
-        shift = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set scale (16-bit) used for elementwise ADD/SUB OPB prescale. If IFM scale mode is 0 then scale is 16-bit. If IFM
-// scale mode is 1 or 2 then this register is not used
-struct npu_set_opb_scale_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_OPB_SCALE
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // scale (16-bit) used for elementwise ADD/SUB OPB prescale. If IFM scale mode is 0 then scale
-                        // is 16-bit. If IFM scale mode is 1 or 2 then this register is not used
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_OPB_SCALE) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_OPB_SCALE);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_opb_scale_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_opb_scale_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_opb_scale_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set DMA source address
-struct npu_set_dma0_src_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_DMA0_SRC
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32;
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_DMA0_SRC) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_DMA0_SRC);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_dma0_src_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_dma0_src_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_dma0_src_t &set_payload_size(uint32_t value)
+      public:
+        npu_set_dma0_src_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_dma0_src_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_dma0_src_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_dma0_src_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_dma0_src_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // DMA user channel 0 destination byte offset from DMA0_DST_REGION
+    struct npu_set_dma0_dst_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set DMA destination address
-struct npu_set_dma0_dst_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_DMA0_DST
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32;
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_DMA0_DST) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_DMA0_DST);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_dma0_dst_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_dma0_dst_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_dma0_dst_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set DMA length
-struct npu_set_dma0_len_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_DMA0_LEN
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // DMA length
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_DMA0_LEN) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_DMA0_LEN);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_dma0_len_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_dma0_len_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
+      public:
+        npu_set_dma0_dst_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_dma0_dst_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_dma0_dst_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_dma0_dst_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_dma0_dst_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // DMA user channel 0 transfer length in bytes for each 1D transfer
+    struct npu_set_dma0_len_t
     {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_dma0_len_t &set_payload_size(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_dma0_len_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_dma0_len_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_dma0_len_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_dma0_len_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_dma0_len_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM2 Tile 0 address
+    struct npu_set_ifm2_base0_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set Byte distance to skip after inner size (2D/3D mode)
-struct npu_set_dma0_skip0_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_DMA0_SKIP0
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t param : 16;
-    uint32_t data : 32; // Byte distance to skip after inner size (2D/3D mode)
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_DMA0_SKIP0) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_DMA0_SKIP0);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_dma0_skip0_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_dma0_skip0_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_dma0_skip0_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_dma0_skip0_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set Byte distance to skip after outer size (3D mode)
-struct npu_set_dma0_skip1_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_DMA0_SKIP1
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t param : 16;
-    uint32_t data : 32; // Byte distance to skip after outer size (3D mode)
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_DMA0_SKIP1) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_DMA0_SKIP1);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_dma0_skip1_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_dma0_skip1_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_dma0_skip1_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_dma0_skip1_t &set_payload_size(uint32_t value)
+      public:
+        npu_set_ifm2_base0_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_base0_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_base0_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_base0_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ifm2_base0_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM2 Tile 1 address
+    struct npu_set_ifm2_base1_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM2 tile0 offset (top left tile) from IFM_REGION start
-struct npu_set_ifm2_base0_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_IFM2_BASE0
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // IFM2 tile0 offset (top left tile) from IFM_REGION start
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_IFM2_BASE0) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_IFM2_BASE0);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_base0_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ifm2_base0_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ifm2_base0_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM2 tile1 offset (top right tile) from IFM_REGION start
-struct npu_set_ifm2_base1_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_IFM2_BASE1
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // IFM2 tile1 offset (top right tile) from IFM_REGION start
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_IFM2_BASE1) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_IFM2_BASE1);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_base1_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ifm2_base1_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ifm2_base1_t &set_payload_size(uint32_t value)
+      public:
+        npu_set_ifm2_base1_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_base1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_base1_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_base1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ifm2_base1_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM2 Tile 2 address
+    struct npu_set_ifm2_base2_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM2 tile2 offset (bottom left tile) from IFM_REGION start
-struct npu_set_ifm2_base2_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_IFM2_BASE2
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // IFM2 tile2 offset (bottom left tile) from IFM_REGION start
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_IFM2_BASE2) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_IFM2_BASE2);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_base2_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ifm2_base2_t &set_data(uint32_t value)
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_base2_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_base2_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_base2_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_base2_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ifm2_base2_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM2 Tile 3 address
+    struct npu_set_ifm2_base3_t
     {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_base3_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_base3_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_base3_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_base3_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ifm2_base3_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM2 byte stride between horizontal values
+    struct npu_set_ifm2_stride_x_t
     {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ifm2_base2_t &set_payload_size(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_stride_x_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_stride_x_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_stride_x_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_stride_x_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ifm2_stride_x_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM2 byte stride between vertical values
+    struct npu_set_ifm2_stride_y_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM2 tile3 offset (bottom right tile) from IFM_REGION start
-struct npu_set_ifm2_base3_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_IFM2_BASE3
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // IFM2 tile3 offset (bottom right tile) from IFM_REGION start
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_IFM2_BASE3) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_IFM2_BASE3);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_base3_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ifm2_base3_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ifm2_base3_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM2 byte stride between horizontal values
-struct npu_set_ifm2_stride_x_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_IFM2_STRIDE_X
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // IFM2 byte stride between horizontal values
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_IFM2_STRIDE_X) && must_be_zero == 0 &&
-               payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_IFM2_STRIDE_X);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_stride_x_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ifm2_stride_x_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ifm2_stride_x_t &set_payload_size(uint32_t value)
+      public:
+        npu_set_ifm2_stride_y_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_stride_y_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_stride_y_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_stride_y_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ifm2_stride_y_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM2 byte stride between channel blocks (of 16 bytes each block)
+    struct npu_set_ifm2_stride_c_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM2 byte stride between vertical values
-struct npu_set_ifm2_stride_y_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_IFM2_STRIDE_Y
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // IFM2 byte stride between vertical values
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_IFM2_STRIDE_Y) && must_be_zero == 0 &&
-               payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_IFM2_STRIDE_Y);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_stride_y_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ifm2_stride_y_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ifm2_stride_y_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set IFM2 byte stride between channel blocks (of 16 bytes each block)
-struct npu_set_ifm2_stride_c_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_IFM2_STRIDE_C
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // IFM2 byte stride between channel blocks (of 16 bytes each block)
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t addr : 32; //  address offset
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_IFM2_STRIDE_C) && must_be_zero == 0 &&
-               payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_IFM2_STRIDE_C);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_ifm2_stride_c_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_ifm2_stride_c_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_ifm2_stride_c_t &set_payload_size(uint32_t value)
+      public:
+        npu_set_ifm2_stride_c_t(uint32_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(_addr)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_stride_c_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), addr(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_stride_c_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_stride_c_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_addr() const
+        {
+            return static_cast<uint32_t>(addr);
+        }
+        CONSTEXPR npu_set_ifm2_stride_c_t &set_addr(uint32_t value)
+        {
+            addr = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // User defined register 0
+    struct npu_set_user_defined0_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set Weight stream byte offset in WEIGHT_REGION
-struct npu_set_weight1_base_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_WEIGHT1_BASE
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t param : 16;
-    uint32_t data : 32; // Weight stream byte offset in WEIGHT_REGION
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_WEIGHT1_BASE) && must_be_zero == 0 &&
-               payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_WEIGHT1_BASE);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_weight1_base_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_weight1_base_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_weight1_base_t &set_param(uint32_t value)
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t user_reg : 32; //  User defined register
+#ifdef __cplusplus
+      public:
+        npu_set_user_defined0_t(uint32_t _user_reg) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(_user_reg)
+        {
+        }
+        CONSTEXPR npu_set_user_defined0_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED0) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED0);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_user_defined0_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_user_defined0_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_user_reg() const
+        {
+            return static_cast<uint32_t>(user_reg);
+        }
+        CONSTEXPR npu_set_user_defined0_t &set_user_reg(uint32_t value)
+        {
+            user_reg = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("user_reg", std::to_string(user_reg)));
+        }
+#endif
+#endif
+    };
+    // User defined register 1
+    struct npu_set_user_defined1_t
     {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t user_reg : 32; //  User defined register
+#ifdef __cplusplus
+      public:
+        npu_set_user_defined1_t(uint32_t _user_reg) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(_user_reg)
+        {
+        }
+        CONSTEXPR npu_set_user_defined1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED1) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_user_defined1_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_user_defined1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_user_reg() const
+        {
+            return static_cast<uint32_t>(user_reg);
+        }
+        CONSTEXPR npu_set_user_defined1_t &set_user_reg(uint32_t value)
+        {
+            user_reg = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("user_reg", std::to_string(user_reg)));
+        }
+#endif
+#endif
+    };
+    // User defined register 2
+    struct npu_set_user_defined2_t
     {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_weight1_base_t &set_payload_size(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t user_reg : 32; //  User defined register
+#ifdef __cplusplus
+      public:
+        npu_set_user_defined2_t(uint32_t _user_reg) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED2)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(_user_reg)
+        {
+        }
+        CONSTEXPR npu_set_user_defined2_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED2)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED2) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED2);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_user_defined2_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_user_defined2_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_user_reg() const
+        {
+            return static_cast<uint32_t>(user_reg);
+        }
+        CONSTEXPR npu_set_user_defined2_t &set_user_reg(uint32_t value)
+        {
+            user_reg = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("user_reg", std::to_string(user_reg)));
+        }
+#endif
+#endif
+    };
+    // User defined register 3
+    struct npu_set_user_defined3_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set Weight stream byte length (unsigned 32 bits)
-struct npu_set_weight1_length_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_WEIGHT1_LENGTH
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // Weight stream byte length (unsigned 32 bits)
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_WEIGHT1_LENGTH) && must_be_zero == 0 &&
-               payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_WEIGHT1_LENGTH);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_weight1_length_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_weight1_length_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_weight1_length_t &set_payload_size(uint32_t value)
-    {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set Scale and bias stream input byte offset from SCALE_REGION
-struct npu_set_scale1_base_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_SCALE1_BASE
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t param : 16;
-    uint32_t data : 32; // Scale and bias stream input byte offset from SCALE_REGION
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t user_reg : 32; //  User defined register
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_SCALE1_BASE) && must_be_zero == 0 && payload_size >= 1 &&
-               payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_SCALE1_BASE);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_scale1_base_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_scale1_base_t &set_data(uint32_t value)
-    {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_param() const
-    {
-        return static_cast<uint32_t>(param);
-    }
-    CONSTEXPR npu_set_scale1_base_t &set_param(uint32_t value)
-    {
-        param = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
-    {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_scale1_base_t &set_payload_size(uint32_t value)
+      public:
+        npu_set_user_defined3_t(uint32_t _user_reg) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED3)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(_user_reg)
+        {
+        }
+        CONSTEXPR npu_set_user_defined3_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED3)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED3) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED3);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_user_defined3_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_user_defined3_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_user_reg() const
+        {
+            return static_cast<uint32_t>(user_reg);
+        }
+        CONSTEXPR npu_set_user_defined3_t &set_user_reg(uint32_t value)
+        {
+            user_reg = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("user_reg", std::to_string(user_reg)));
+        }
+#endif
+#endif
+    };
+    // User defined register 4
+    struct npu_set_user_defined4_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
-};
-
-// Set Scale and bias stream input byte length (unsigned 20 bits)
-struct npu_set_scale1_length_t
-{
-    uint32_t cmd_code : 10;    // NPU_SET_SCALE1_LENGTH
-    uint32_t must_be_zero : 4; // 0
-    uint32_t payload_size : 2; // Min:1 Max:2
-    uint32_t reserved0 : 16;
-    uint32_t data : 32; // Scale and bias stream input byte length (unsigned 20 bits)
 #ifdef __cplusplus
-    CONSTEXPR bool valid() const
-    {
-        return cmd_code == static_cast<uint32_t>(cmd1::NPU_SET_SCALE1_LENGTH) && must_be_zero == 0 &&
-               payload_size >= 1 && payload_size <= 2;
-    }
-    CONSTEXPR void init()
-    {
-        cmd_code     = static_cast<uint32_t>(cmd1::NPU_SET_SCALE1_LENGTH);
-        must_be_zero = 0;
-        payload_size = 1;
-    }
-    CONSTEXPR ::cmd1 get_cmd_code() const
-    {
-        return static_cast<::cmd1>(cmd_code);
-    }
-    CONSTEXPR npu_set_scale1_length_t &set_cmd_code(::cmd1 value)
-    {
-        cmd_code = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_data() const
-    {
-        return static_cast<uint32_t>(data);
-    }
-    CONSTEXPR npu_set_scale1_length_t &set_data(uint32_t value)
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t user_reg : 32; //  User defined register
+#ifdef __cplusplus
+      public:
+        npu_set_user_defined4_t(uint32_t _user_reg) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED4)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(_user_reg)
+        {
+        }
+        CONSTEXPR npu_set_user_defined4_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED4)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED4) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED4);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_user_defined4_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_user_defined4_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_user_reg() const
+        {
+            return static_cast<uint32_t>(user_reg);
+        }
+        CONSTEXPR npu_set_user_defined4_t &set_user_reg(uint32_t value)
+        {
+            user_reg = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("user_reg", std::to_string(user_reg)));
+        }
+#endif
+#endif
+    };
+    // User defined register 5
+    struct npu_set_user_defined5_t
     {
-        data = static_cast<uint32_t>(value);
-        return *this;
-    }
-    CONSTEXPR uint32_t get_payload_size() const
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t user_reg : 32; //  User defined register
+#ifdef __cplusplus
+      public:
+        npu_set_user_defined5_t(uint32_t _user_reg) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED5)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(_user_reg)
+        {
+        }
+        CONSTEXPR npu_set_user_defined5_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED5)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED5) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED5);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_user_defined5_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_user_defined5_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_user_reg() const
+        {
+            return static_cast<uint32_t>(user_reg);
+        }
+        CONSTEXPR npu_set_user_defined5_t &set_user_reg(uint32_t value)
+        {
+            user_reg = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("user_reg", std::to_string(user_reg)));
+        }
+#endif
+#endif
+    };
+    // User defined register 6
+    struct npu_set_user_defined6_t
     {
-        return static_cast<uint32_t>(payload_size);
-    }
-    CONSTEXPR npu_set_scale1_length_t &set_payload_size(uint32_t value)
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t user_reg : 32; //  User defined register
+#ifdef __cplusplus
+      public:
+        npu_set_user_defined6_t(uint32_t _user_reg) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED6)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(_user_reg)
+        {
+        }
+        CONSTEXPR npu_set_user_defined6_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED6)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED6) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED6);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_user_defined6_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_user_defined6_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_user_reg() const
+        {
+            return static_cast<uint32_t>(user_reg);
+        }
+        CONSTEXPR npu_set_user_defined6_t &set_user_reg(uint32_t value)
+        {
+            user_reg = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("user_reg", std::to_string(user_reg)));
+        }
+#endif
+#endif
+    };
+    // User defined register 7
+    struct npu_set_user_defined7_t
     {
-        payload_size = static_cast<uint32_t>(value);
-        return *this;
-    }
-#endif //__cplusplus
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t user_reg : 32; //  User defined register
+#ifdef __cplusplus
+      public:
+        npu_set_user_defined7_t(uint32_t _user_reg) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED7)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(_user_reg)
+        {
+        }
+        CONSTEXPR npu_set_user_defined7_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED7)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), user_reg(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED7) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_USER_DEFINED7);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_user_defined7_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_user_defined7_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_user_reg() const
+        {
+            return static_cast<uint32_t>(user_reg);
+        }
+        CONSTEXPR npu_set_user_defined7_t &set_user_reg(uint32_t value)
+        {
+            user_reg = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("user_reg", std::to_string(user_reg)));
+        }
+#endif
+#endif
+    };
+#ifdef __cplusplus
 };
-
-#define NPU_DATA_STRUCTS                                                                                               \
-    NPU_STRUCT(command_no_payload)                                                                                     \
-    NPU_STRUCT(command_with_payload)                                                                                   \
-    NPU_STRUCT(npu_op_stop)                                                                                            \
-    NPU_STRUCT(npu_op_irq)                                                                                             \
-    NPU_STRUCT(npu_op_conv)                                                                                            \
-    NPU_STRUCT(npu_op_depthwise)                                                                                       \
-    NPU_STRUCT(npu_op_pool)                                                                                            \
-    NPU_STRUCT(npu_op_elementwise)                                                                                     \
-    NPU_STRUCT(npu_op_dma_start)                                                                                       \
-    NPU_STRUCT(npu_op_dma_wait)                                                                                        \
-    NPU_STRUCT(npu_op_kernel_wait)                                                                                     \
-    NPU_STRUCT(npu_op_pmu_mask)                                                                                        \
-    NPU_STRUCT(npu_set_ifm_pad_top)                                                                                    \
-    NPU_STRUCT(npu_set_ifm_pad_left)                                                                                   \
-    NPU_STRUCT(npu_set_ifm_pad_right)                                                                                  \
-    NPU_STRUCT(npu_set_ifm_pad_bottom)                                                                                 \
-    NPU_STRUCT(npu_set_ifm_depth_m1)                                                                                   \
-    NPU_STRUCT(npu_set_ifm_precision)                                                                                  \
-    NPU_STRUCT(npu_set_ifm_upscale)                                                                                    \
-    NPU_STRUCT(npu_set_ifm_zero_point)                                                                                 \
-    NPU_STRUCT(npu_set_ifm_width0_m1)                                                                                  \
-    NPU_STRUCT(npu_set_ifm_height0_m1)                                                                                 \
-    NPU_STRUCT(npu_set_ifm_height1_m1)                                                                                 \
-    NPU_STRUCT(npu_set_ifm_ib_end)                                                                                     \
-    NPU_STRUCT(npu_set_ifm_region)                                                                                     \
-    NPU_STRUCT(npu_set_ofm_width_m1)                                                                                   \
-    NPU_STRUCT(npu_set_ofm_height_m1)                                                                                  \
-    NPU_STRUCT(npu_set_ofm_depth_m1)                                                                                   \
-    NPU_STRUCT(npu_set_ofm_precision)                                                                                  \
-    NPU_STRUCT(npu_set_ofm_blk_width_m1)                                                                               \
-    NPU_STRUCT(npu_set_ofm_blk_height_m1)                                                                              \
-    NPU_STRUCT(npu_set_ofm_blk_depth_m1)                                                                               \
-    NPU_STRUCT(npu_set_ofm_zero_point)                                                                                 \
-    NPU_STRUCT(npu_set_ofm_width0_m1)                                                                                  \
-    NPU_STRUCT(npu_set_ofm_height0_m1)                                                                                 \
-    NPU_STRUCT(npu_set_ofm_height1_m1)                                                                                 \
-    NPU_STRUCT(npu_set_ofm_region)                                                                                     \
-    NPU_STRUCT(npu_set_kernel_width_m1)                                                                                \
-    NPU_STRUCT(npu_set_kernel_height_m1)                                                                               \
-    NPU_STRUCT(npu_set_kernel_stride)                                                                                  \
-    NPU_STRUCT(npu_set_parallel_mode)                                                                                  \
-    NPU_STRUCT(npu_set_acc_format)                                                                                     \
-    NPU_STRUCT(npu_set_activation)                                                                                     \
-    NPU_STRUCT(npu_set_activation_min)                                                                                 \
-    NPU_STRUCT(npu_set_activation_max)                                                                                 \
-    NPU_STRUCT(npu_set_weight_region)                                                                                  \
-    NPU_STRUCT(npu_set_scale_region)                                                                                   \
-    NPU_STRUCT(npu_set_ab_start)                                                                                       \
-    NPU_STRUCT(npu_set_blockdep)                                                                                       \
-    NPU_STRUCT(npu_set_dma0_src_region)                                                                                \
-    NPU_STRUCT(npu_set_dma0_dst_region)                                                                                \
-    NPU_STRUCT(npu_set_dma0_size0)                                                                                     \
-    NPU_STRUCT(npu_set_dma0_size1)                                                                                     \
-    NPU_STRUCT(npu_set_ifm2_broadcast)                                                                                 \
-    NPU_STRUCT(npu_set_ifm2_scalar)                                                                                    \
-    NPU_STRUCT(npu_set_ifm2_precision)                                                                                 \
-    NPU_STRUCT(npu_set_ifm2_zero_point)                                                                                \
-    NPU_STRUCT(npu_set_ifm2_width0_m1)                                                                                 \
-    NPU_STRUCT(npu_set_ifm2_height0_m1)                                                                                \
-    NPU_STRUCT(npu_set_ifm2_height1_m1)                                                                                \
-    NPU_STRUCT(npu_set_ifm2_ib_start)                                                                                  \
-    NPU_STRUCT(npu_set_ifm2_region)                                                                                    \
-    NPU_STRUCT(npu_set_ifm_base0)                                                                                      \
-    NPU_STRUCT(npu_set_ifm_base1)                                                                                      \
-    NPU_STRUCT(npu_set_ifm_base2)                                                                                      \
-    NPU_STRUCT(npu_set_ifm_base3)                                                                                      \
-    NPU_STRUCT(npu_set_ifm_stride_x)                                                                                   \
-    NPU_STRUCT(npu_set_ifm_stride_y)                                                                                   \
-    NPU_STRUCT(npu_set_ifm_stride_c)                                                                                   \
-    NPU_STRUCT(npu_set_ofm_base0)                                                                                      \
-    NPU_STRUCT(npu_set_ofm_base1)                                                                                      \
-    NPU_STRUCT(npu_set_ofm_base2)                                                                                      \
-    NPU_STRUCT(npu_set_ofm_base3)                                                                                      \
-    NPU_STRUCT(npu_set_ofm_stride_x)                                                                                   \
-    NPU_STRUCT(npu_set_ofm_stride_y)                                                                                   \
-    NPU_STRUCT(npu_set_ofm_stride_c)                                                                                   \
-    NPU_STRUCT(npu_set_weight_base)                                                                                    \
-    NPU_STRUCT(npu_set_weight_length)                                                                                  \
-    NPU_STRUCT(npu_set_scale_base)                                                                                     \
-    NPU_STRUCT(npu_set_scale_length)                                                                                   \
-    NPU_STRUCT(npu_set_ofm_scale)                                                                                      \
-    NPU_STRUCT(npu_set_opa_scale)                                                                                      \
-    NPU_STRUCT(npu_set_opb_scale)                                                                                      \
-    NPU_STRUCT(npu_set_dma0_src)                                                                                       \
-    NPU_STRUCT(npu_set_dma0_dst)                                                                                       \
-    NPU_STRUCT(npu_set_dma0_len)                                                                                       \
-    NPU_STRUCT(npu_set_dma0_skip0)                                                                                     \
-    NPU_STRUCT(npu_set_dma0_skip1)                                                                                     \
-    NPU_STRUCT(npu_set_ifm2_base0)                                                                                     \
-    NPU_STRUCT(npu_set_ifm2_base1)                                                                                     \
-    NPU_STRUCT(npu_set_ifm2_base2)                                                                                     \
-    NPU_STRUCT(npu_set_ifm2_base3)                                                                                     \
-    NPU_STRUCT(npu_set_ifm2_stride_x)                                                                                  \
-    NPU_STRUCT(npu_set_ifm2_stride_y)                                                                                  \
-    NPU_STRUCT(npu_set_ifm2_stride_c)                                                                                  \
-    NPU_STRUCT(npu_set_weight1_base)                                                                                   \
-    NPU_STRUCT(npu_set_weight1_length)                                                                                 \
-    NPU_STRUCT(npu_set_scale1_base)                                                                                    \
-    NPU_STRUCT(npu_set_scale1_length)
+#endif
 #define NPU_OP_STRUCTS                                                                                                 \
     NPU_OP_(stop)                                                                                                      \
     NPU_OP_(irq)                                                                                                       \
@@ -13801,6 +25834,7 @@ struct npu_set_scale1_length_t
     NPU_OP_(dma_wait)                                                                                                  \
     NPU_OP_(kernel_wait)                                                                                               \
     NPU_OP_(pmu_mask)
+
 #define NPU_SET_STRUCTS                                                                                                \
     NPU_SET_(ifm_pad_top)                                                                                              \
     NPU_SET_(ifm_pad_left)                                                                                             \
@@ -13830,7 +25864,6 @@ struct npu_set_scale1_length_t
     NPU_SET_(kernel_width_m1)                                                                                          \
     NPU_SET_(kernel_height_m1)                                                                                         \
     NPU_SET_(kernel_stride)                                                                                            \
-    NPU_SET_(parallel_mode)                                                                                            \
     NPU_SET_(acc_format)                                                                                               \
     NPU_SET_(activation)                                                                                               \
     NPU_SET_(activation_min)                                                                                           \
@@ -13876,8 +25909,6 @@ struct npu_set_scale1_length_t
     NPU_SET_(dma0_src)                                                                                                 \
     NPU_SET_(dma0_dst)                                                                                                 \
     NPU_SET_(dma0_len)                                                                                                 \
-    NPU_SET_(dma0_skip0)                                                                                               \
-    NPU_SET_(dma0_skip1)                                                                                               \
     NPU_SET_(ifm2_base0)                                                                                               \
     NPU_SET_(ifm2_base1)                                                                                               \
     NPU_SET_(ifm2_base2)                                                                                               \
@@ -13885,105 +25916,147 @@ struct npu_set_scale1_length_t
     NPU_SET_(ifm2_stride_x)                                                                                            \
     NPU_SET_(ifm2_stride_y)                                                                                            \
     NPU_SET_(ifm2_stride_c)                                                                                            \
-    NPU_SET_(weight1_base)                                                                                             \
-    NPU_SET_(weight1_length)                                                                                           \
-    NPU_SET_(scale1_base)                                                                                              \
-    NPU_SET_(scale1_length)
-#define COMMAND_STRUCTS                                                                                                \
-    COMMAND_(no_payload)                                                                                               \
-    COMMAND_(with_payload)
-
-#define EXPAND_ACC_FORMAT(FUNC, SEP)                                                                                   \
-    FUNC(acc_format, INT_32BIT) SEP FUNC(acc_format, INT_40BIT) SEP FUNC(acc_format, FP_S5_10)
-
-#define EXPAND_ACTIVATION(FUNC, SEP)                                                                                   \
-    FUNC(activation, NONE)                                                                                             \
-    SEP FUNC(activation, TANH) SEP FUNC(activation, SIGMOID) SEP FUNC(activation, LUT_START)                           \
-        SEP FUNC(activation, LUT_END)
-
-#define EXPAND_AXI_MEM_ENCODING_TYPE(FUNC, SEP)                                                                        \
-    FUNC(axi_mem_encoding_type, DEVICE_NON_BUFFERABLE)                                                                 \
-    SEP FUNC(axi_mem_encoding_type, DEVICE_BUFFERABLE)                                                                 \
-        SEP FUNC(axi_mem_encoding_type, NORMAL_NON_CACHEABLE_NON_BUFFERABLE)                                           \
-            SEP FUNC(axi_mem_encoding_type, NORMAL_NON_CACHEABLE_BUFFERABLE)                                           \
-                SEP FUNC(axi_mem_encoding_type, WRITE_THROUGH_NO_ALLOCATE)                                             \
-                    SEP FUNC(axi_mem_encoding_type, WRITE_THROUGH_READ_ALLOCATE)                                       \
-                        SEP FUNC(axi_mem_encoding_type, WRITE_THROUGH_WRITE_ALLOCATE)                                  \
-                            SEP FUNC(axi_mem_encoding_type, WRITE_THROUGH_READ_AND_WRITE_ALLOCATE)                     \
-                                SEP FUNC(axi_mem_encoding_type, WRITE_BACK_NO_ALLOCATE)                                \
-                                    SEP FUNC(axi_mem_encoding_type, WRITE_BACK_READ_ALLOCATE)                          \
-                                        SEP FUNC(axi_mem_encoding_type, WRITE_BACK_WRITE_ALLOCATE)                     \
-                                            SEP FUNC(axi_mem_encoding_type, WRITE_BACK_READ_AND_WRITE_ALLOCATE)        \
-                                                SEP FUNC(axi_mem_encoding_type, RESERVED_12)                           \
-                                                    SEP FUNC(axi_mem_encoding_type, RESERVED_13)                       \
-                                                        SEP FUNC(axi_mem_encoding_type, RESERVED_14)                   \
-                                                            SEP FUNC(axi_mem_encoding_type, RESERVED_15)
-
-#define EXPAND_CLIP_RANGE(FUNC, SEP)                                                                                   \
-    FUNC(clip_range, OFM_PRECISION)                                                                                    \
-    SEP FUNC(clip_range, FORCE_UINT8) SEP FUNC(clip_range, FORCE_INT8) SEP FUNC(clip_range, FORCE_INT16)
-
-#define EXPAND_CMD0(FUNC, SEP)                                                                                         \
-    FUNC(cmd0, NPU_OP_STOP)                                                                                            \
-    SEP FUNC(cmd0, NPU_OP_IRQ) SEP FUNC(cmd0, NPU_OP_CONV) SEP FUNC(cmd0, NPU_OP_DEPTHWISE) SEP FUNC(                  \
-        cmd0, NPU_OP_POOL) SEP FUNC(cmd0, NPU_OP_ELEMENTWISE) SEP FUNC(cmd0, NPU_OP_DMA_START)                         \
-        SEP FUNC(cmd0, NPU_OP_DMA_WAIT) SEP FUNC(cmd0, NPU_OP_KERNEL_WAIT) SEP FUNC(cmd0, NPU_OP_PMU_MASK) SEP FUNC(   \
-            cmd0, NPU_SET_IFM_PAD_TOP) SEP FUNC(cmd0, NPU_SET_IFM_PAD_LEFT) SEP FUNC(cmd0, NPU_SET_IFM_PAD_RIGHT)      \
-            SEP FUNC(cmd0, NPU_SET_IFM_PAD_BOTTOM) SEP FUNC(cmd0, NPU_SET_IFM_DEPTH_M1) SEP FUNC(                      \
-                cmd0, NPU_SET_IFM_PRECISION) SEP FUNC(cmd0, NPU_SET_IFM_UPSCALE)                                       \
-                SEP FUNC(cmd0, NPU_SET_IFM_ZERO_POINT) SEP FUNC(cmd0, NPU_SET_IFM_WIDTH0_M1) SEP FUNC(                 \
-                    cmd0, NPU_SET_IFM_HEIGHT0_M1) SEP FUNC(cmd0, NPU_SET_IFM_HEIGHT1_M1) SEP FUNC(cmd0,                \
-                                                                                                  NPU_SET_IFM_IB_END)  \
-                    SEP FUNC(cmd0, NPU_SET_IFM_REGION) SEP FUNC(cmd0, NPU_SET_OFM_WIDTH_M1) SEP FUNC(                  \
-                        cmd0, NPU_SET_OFM_HEIGHT_M1) SEP FUNC(cmd0, NPU_SET_OFM_DEPTH_M1)                              \
-                        SEP FUNC(cmd0, NPU_SET_OFM_PRECISION) SEP FUNC(cmd0, NPU_SET_OFM_BLK_WIDTH_M1) SEP FUNC(       \
-                            cmd0, NPU_SET_OFM_BLK_HEIGHT_M1) SEP FUNC(cmd0, NPU_SET_OFM_BLK_DEPTH_M1)                  \
-                            SEP FUNC(cmd0, NPU_SET_OFM_ZERO_POINT) SEP FUNC(cmd0, NPU_SET_OFM_WIDTH0_M1) SEP FUNC(     \
-                                cmd0, NPU_SET_OFM_HEIGHT0_M1) SEP FUNC(cmd0, NPU_SET_OFM_HEIGHT1_M1)                   \
-                                SEP FUNC(cmd0, NPU_SET_OFM_REGION) SEP FUNC(cmd0, NPU_SET_KERNEL_WIDTH_M1) SEP FUNC(   \
-                                    cmd0, NPU_SET_KERNEL_HEIGHT_M1) SEP FUNC(cmd0, NPU_SET_KERNEL_STRIDE)              \
-                                    SEP FUNC(cmd0, NPU_SET_PARALLEL_MODE) SEP FUNC(cmd0, NPU_SET_ACC_FORMAT) SEP FUNC( \
-                                        cmd0, NPU_SET_ACTIVATION) SEP FUNC(cmd0, NPU_SET_ACTIVATION_MIN)               \
-                                        SEP FUNC(cmd0, NPU_SET_ACTIVATION_MAX) SEP FUNC(cmd0, NPU_SET_WEIGHT_REGION)   \
-                                            SEP FUNC(cmd0, NPU_SET_SCALE_REGION) SEP FUNC(cmd0, NPU_SET_AB_START)      \
-                                                SEP FUNC(cmd0,                                                         \
-                                                         NPU_SET_BLOCKDEP) SEP FUNC(cmd0, NPU_SET_DMA0_SRC_REGION)     \
-                                                    SEP FUNC(cmd0, NPU_SET_DMA0_DST_REGION) SEP FUNC(                  \
-                                                        cmd0, NPU_SET_DMA0_SIZE0) SEP FUNC(cmd0, NPU_SET_DMA0_SIZE1)   \
-                                                        SEP FUNC(cmd0, NPU_SET_IFM2_BROADCAST)                         \
-                                                            SEP FUNC(cmd0, NPU_SET_IFM2_SCALAR)                        \
-                                                                SEP FUNC(cmd0, NPU_SET_IFM2_PRECISION) SEP FUNC(       \
-                                                                    cmd0, NPU_SET_IFM2_ZERO_POINT)                     \
-                                                                    SEP FUNC(cmd0, NPU_SET_IFM2_WIDTH0_M1) SEP FUNC(   \
-                                                                        cmd0, NPU_SET_IFM2_HEIGHT0_M1)                 \
-                                                                        SEP FUNC(cmd0, NPU_SET_IFM2_HEIGHT1_M1)        \
-                                                                            SEP FUNC(cmd0, NPU_SET_IFM2_IB_START)      \
-                                                                                SEP FUNC(cmd0, NPU_SET_IFM2_REGION)
-
-#define EXPAND_CMD1(FUNC, SEP)                                                                                         \
-    FUNC(cmd1, NPU_SET_IFM_BASE0)                                                                                      \
-    SEP FUNC(cmd1, NPU_SET_IFM_BASE1) SEP FUNC(cmd1, NPU_SET_IFM_BASE2) SEP FUNC(cmd1, NPU_SET_IFM_BASE3)              \
-        SEP FUNC(cmd1, NPU_SET_IFM_STRIDE_X) SEP FUNC(cmd1, NPU_SET_IFM_STRIDE_Y) SEP FUNC(cmd1, NPU_SET_IFM_STRIDE_C) \
-            SEP FUNC(cmd1, NPU_SET_OFM_BASE0) SEP FUNC(cmd1, NPU_SET_OFM_BASE1) SEP FUNC(cmd1, NPU_SET_OFM_BASE2)      \
-                SEP FUNC(cmd1, NPU_SET_OFM_BASE3) SEP FUNC(cmd1, NPU_SET_OFM_STRIDE_X)                                 \
-                    SEP FUNC(cmd1, NPU_SET_OFM_STRIDE_Y) SEP FUNC(cmd1, NPU_SET_OFM_STRIDE_C)                          \
-                        SEP FUNC(cmd1, NPU_SET_WEIGHT_BASE) SEP FUNC(cmd1, NPU_SET_WEIGHT_LENGTH)                      \
-                            SEP FUNC(cmd1, NPU_SET_SCALE_BASE) SEP FUNC(cmd1, NPU_SET_SCALE_LENGTH)                    \
-                                SEP FUNC(cmd1, NPU_SET_OFM_SCALE) SEP FUNC(cmd1, NPU_SET_OPA_SCALE)                    \
-                                    SEP FUNC(cmd1, NPU_SET_OPB_SCALE) SEP FUNC(cmd1, NPU_SET_DMA0_SRC)                 \
-                                        SEP FUNC(cmd1, NPU_SET_DMA0_DST) SEP FUNC(cmd1, NPU_SET_DMA0_LEN) SEP FUNC(    \
-                                            cmd1, NPU_SET_DMA0_SKIP0) SEP FUNC(cmd1, NPU_SET_DMA0_SKIP1)               \
-                                            SEP FUNC(cmd1, NPU_SET_IFM2_BASE0) SEP FUNC(cmd1, NPU_SET_IFM2_BASE1)      \
-                                                SEP FUNC(cmd1, NPU_SET_IFM2_BASE2) SEP FUNC(cmd1, NPU_SET_IFM2_BASE3)  \
-                                                    SEP FUNC(cmd1, NPU_SET_IFM2_STRIDE_X)                              \
-                                                        SEP FUNC(cmd1, NPU_SET_IFM2_STRIDE_Y)                          \
-                                                            SEP FUNC(cmd1, NPU_SET_IFM2_STRIDE_C)                      \
-                                                                SEP FUNC(cmd1, NPU_SET_WEIGHT1_BASE)                   \
-                                                                    SEP FUNC(cmd1, NPU_SET_WEIGHT1_LENGTH)             \
-                                                                        SEP FUNC(cmd1, NPU_SET_SCALE1_BASE)            \
-                                                                            SEP FUNC(cmd1, NPU_SET_SCALE1_LENGTH)
-
-#define EXPAND_DATA_FORMAT(FUNC, SEP) FUNC(data_format, NHWC) SEP FUNC(data_format, NHCWB16)
+    NPU_SET_(user_defined0)                                                                                            \
+    NPU_SET_(user_defined1)                                                                                            \
+    NPU_SET_(user_defined2)                                                                                            \
+    NPU_SET_(user_defined3)                                                                                            \
+    NPU_SET_(user_defined4)                                                                                            \
+    NPU_SET_(user_defined5)                                                                                            \
+    NPU_SET_(user_defined6)                                                                                            \
+    NPU_SET_(user_defined7)
+
+#define EXPAND_ACC_FORMAT(FUNC, SEP) FUNC(acc_format, I32) SEP FUNC(acc_format, I40) SEP FUNC(acc_format, F16)
+
+#define EXPAND_ACTIVATION_CLIP_RANGE(FUNC, SEP)                                                                        \
+    FUNC(activation_clip_range, OFM_PRECISION)                                                                         \
+    SEP FUNC(activation_clip_range, FORCE_UINT8) SEP FUNC(activation_clip_range, FORCE_INT8)                           \
+        SEP FUNC(activation_clip_range, FORCE_INT16)
+
+#define EXPAND_ACTIVATION_FORMAT(FUNC, SEP) FUNC(activation_format, NHWC) SEP FUNC(activation_format, NHCWB16)
+
+#define EXPAND_ACTIVATION_FUNCTION(FUNC, SEP)                                                                          \
+    FUNC(activation_function, RELU)                                                                                    \
+    SEP FUNC(activation_function, TANH) SEP FUNC(activation_function, SIGMOID) SEP FUNC(activation_function, TABLE_0)  \
+        SEP FUNC(activation_function, TABLE_1) SEP FUNC(activation_function, TABLE_2)                                  \
+            SEP FUNC(activation_function, TABLE_3) SEP FUNC(activation_function, TABLE_4)                              \
+                SEP FUNC(activation_function, TABLE_5) SEP FUNC(activation_function, TABLE_6)                          \
+                    SEP FUNC(activation_function, TABLE_7)
+
+#define EXPAND_ACTIVATION_PRECISION(FUNC, SEP)                                                                         \
+    FUNC(activation_precision, B8)                                                                                     \
+    SEP FUNC(activation_precision, B16) SEP FUNC(activation_precision, B32) SEP FUNC(activation_precision, B64)
+
+#define EXPAND_ACTIVATION_TYPE(FUNC, SEP) FUNC(activation_type, UNSIGNED) SEP FUNC(activation_type, SIGNED)
+
+#define EXPAND_AXI_MEM_ENCODING(FUNC, SEP)                                                                             \
+    FUNC(axi_mem_encoding, DEVICE_NON_BUFFERABLE)                                                                      \
+    SEP FUNC(axi_mem_encoding, DEVICE_BUFFERABLE) SEP FUNC(axi_mem_encoding, NORMAL_NON_CACHEABLE_NON_BUFFERABLE)      \
+        SEP FUNC(axi_mem_encoding, NORMAL_NON_CACHEABLE_BUFFERABLE)                                                    \
+            SEP FUNC(axi_mem_encoding, WRITE_THROUGH_NO_ALLOCATE)                                                      \
+                SEP FUNC(axi_mem_encoding, WRITE_THROUGH_READ_ALLOCATE)                                                \
+                    SEP FUNC(axi_mem_encoding, WRITE_THROUGH_WRITE_ALLOCATE)                                           \
+                        SEP FUNC(axi_mem_encoding, WRITE_THROUGH_READ_AND_WRITE_ALLOCATE)                              \
+                            SEP FUNC(axi_mem_encoding, WRITE_BACK_NO_ALLOCATE)                                         \
+                                SEP FUNC(axi_mem_encoding, WRITE_BACK_READ_ALLOCATE)                                   \
+                                    SEP FUNC(axi_mem_encoding, WRITE_BACK_WRITE_ALLOCATE)                              \
+                                        SEP FUNC(axi_mem_encoding, WRITE_BACK_READ_AND_WRITE_ALLOCATE)
+
+#define EXPAND_BROADCAST_MODE(FUNC, SEP) FUNC(broadcast_mode, DISABLE) SEP FUNC(broadcast_mode, ENABLE)
+
+#define EXPAND_CMD0_OPCODE(FUNC, SEP)                                                                                  \
+    FUNC(cmd0_opcode, NPU_OP_STOP)                                                                                     \
+    SEP FUNC(cmd0_opcode, NPU_OP_IRQ) SEP FUNC(cmd0_opcode, NPU_OP_CONV) SEP FUNC(                                     \
+        cmd0_opcode, NPU_OP_DEPTHWISE) SEP FUNC(cmd0_opcode, NPU_OP_POOL) SEP FUNC(cmd0_opcode, NPU_OP_ELEMENTWISE)    \
+        SEP FUNC(cmd0_opcode, NPU_OP_DMA_START) SEP FUNC(cmd0_opcode, NPU_OP_DMA_WAIT) SEP FUNC(                       \
+            cmd0_opcode, NPU_OP_KERNEL_WAIT) SEP FUNC(cmd0_opcode, NPU_OP_PMU_MASK) SEP FUNC(cmd0_opcode,              \
+                                                                                             NPU_SET_IFM_PAD_TOP)      \
+            SEP FUNC(cmd0_opcode, NPU_SET_IFM_PAD_LEFT) SEP FUNC(cmd0_opcode, NPU_SET_IFM_PAD_RIGHT) SEP FUNC(         \
+                cmd0_opcode, NPU_SET_IFM_PAD_BOTTOM) SEP FUNC(cmd0_opcode,                                             \
+                                                              NPU_SET_IFM_DEPTH_M1) SEP FUNC(cmd0_opcode,              \
+                                                                                             NPU_SET_IFM_PRECISION)    \
+                SEP FUNC(cmd0_opcode, NPU_SET_IFM_UPSCALE) SEP FUNC(cmd0_opcode, NPU_SET_IFM_ZERO_POINT) SEP FUNC(     \
+                    cmd0_opcode, NPU_SET_IFM_WIDTH0_M1) SEP FUNC(cmd0_opcode, NPU_SET_IFM_HEIGHT0_M1)                  \
+                    SEP FUNC(cmd0_opcode, NPU_SET_IFM_HEIGHT1_M1) SEP FUNC(cmd0_opcode, NPU_SET_IFM_IB_END) SEP FUNC(  \
+                        cmd0_opcode, NPU_SET_IFM_REGION) SEP FUNC(cmd0_opcode, NPU_SET_OFM_WIDTH_M1)                   \
+                        SEP FUNC(cmd0_opcode, NPU_SET_OFM_HEIGHT_M1) SEP FUNC(cmd0_opcode, NPU_SET_OFM_DEPTH_M1)       \
+                            SEP FUNC(cmd0_opcode, NPU_SET_OFM_PRECISION) SEP FUNC(                                     \
+                                cmd0_opcode, NPU_SET_OFM_BLK_WIDTH_M1) SEP FUNC(cmd0_opcode,                           \
+                                                                                NPU_SET_OFM_BLK_HEIGHT_M1)             \
+                                SEP FUNC(cmd0_opcode, NPU_SET_OFM_BLK_DEPTH_M1) SEP FUNC(                              \
+                                    cmd0_opcode, NPU_SET_OFM_ZERO_POINT) SEP FUNC(cmd0_opcode, NPU_SET_OFM_WIDTH0_M1)  \
+                                    SEP FUNC(cmd0_opcode, NPU_SET_OFM_HEIGHT0_M1) SEP FUNC(                            \
+                                        cmd0_opcode,                                                                   \
+                                        NPU_SET_OFM_HEIGHT1_M1) SEP FUNC(cmd0_opcode, NPU_SET_OFM_REGION)              \
+                                        SEP FUNC(cmd0_opcode, NPU_SET_KERNEL_WIDTH_M1) SEP FUNC(                       \
+                                            cmd0_opcode,                                                               \
+                                            NPU_SET_KERNEL_HEIGHT_M1) SEP FUNC(cmd0_opcode, NPU_SET_KERNEL_STRIDE)     \
+                                            SEP FUNC(cmd0_opcode, NPU_SET_ACC_FORMAT) SEP FUNC(                        \
+                                                cmd0_opcode,                                                           \
+                                                NPU_SET_ACTIVATION) SEP FUNC(cmd0_opcode, NPU_SET_ACTIVATION_MIN)      \
+                                                SEP FUNC(cmd0_opcode, NPU_SET_ACTIVATION_MAX) SEP FUNC(                \
+                                                    cmd0_opcode,                                                       \
+                                                    NPU_SET_WEIGHT_REGION) SEP FUNC(cmd0_opcode, NPU_SET_SCALE_REGION) \
+                                                    SEP FUNC(cmd0_opcode,                                              \
+                                                             NPU_SET_AB_START) SEP FUNC(cmd0_opcode, NPU_SET_BLOCKDEP) \
+                                                        SEP FUNC(cmd0_opcode, NPU_SET_DMA0_SRC_REGION) SEP FUNC(       \
+                                                            cmd0_opcode,                                               \
+                                                            NPU_SET_DMA0_DST_REGION) SEP FUNC(cmd0_opcode,             \
+                                                                                              NPU_SET_DMA0_SIZE0)      \
+                                                            SEP FUNC(cmd0_opcode, NPU_SET_DMA0_SIZE1) SEP FUNC(        \
+                                                                cmd0_opcode,                                           \
+                                                                NPU_SET_IFM2_BROADCAST)                                \
+                                                                SEP FUNC(cmd0_opcode, NPU_SET_IFM2_SCALAR) SEP FUNC(   \
+                                                                    cmd0_opcode,                                       \
+                                                                    NPU_SET_IFM2_PRECISION)                            \
+                                                                    SEP FUNC(cmd0_opcode, NPU_SET_IFM2_ZERO_POINT)     \
+                                                                        SEP FUNC(cmd0_opcode, NPU_SET_IFM2_WIDTH0_M1)  \
+                                                                            SEP FUNC(cmd0_opcode,                      \
+                                                                                     NPU_SET_IFM2_HEIGHT0_M1)          \
+                                                                                SEP FUNC(cmd0_opcode,                  \
+                                                                                         NPU_SET_IFM2_HEIGHT1_M1)      \
+                                                                                    SEP FUNC(cmd0_opcode,              \
+                                                                                             NPU_SET_IFM2_IB_START)    \
+                                                                                        SEP FUNC(cmd0_opcode,          \
+                                                                                                 NPU_SET_IFM2_REGION)
+
+#define EXPAND_CMD1_OPCODE(FUNC, SEP)                                                                                  \
+    FUNC(cmd1_opcode, NPU_SET_IFM_BASE0)                                                                               \
+    SEP FUNC(cmd1_opcode, NPU_SET_IFM_BASE1) SEP FUNC(cmd1_opcode, NPU_SET_IFM_BASE2) SEP FUNC(                        \
+        cmd1_opcode, NPU_SET_IFM_BASE3) SEP FUNC(cmd1_opcode, NPU_SET_IFM_STRIDE_X)                                    \
+        SEP FUNC(cmd1_opcode, NPU_SET_IFM_STRIDE_Y) SEP FUNC(cmd1_opcode, NPU_SET_IFM_STRIDE_C) SEP FUNC(              \
+            cmd1_opcode, NPU_SET_OFM_BASE0) SEP FUNC(cmd1_opcode, NPU_SET_OFM_BASE1)                                   \
+            SEP FUNC(cmd1_opcode, NPU_SET_OFM_BASE2) SEP FUNC(cmd1_opcode, NPU_SET_OFM_BASE3) SEP FUNC(                \
+                cmd1_opcode, NPU_SET_OFM_STRIDE_X) SEP FUNC(cmd1_opcode, NPU_SET_OFM_STRIDE_Y)                         \
+                SEP FUNC(cmd1_opcode, NPU_SET_OFM_STRIDE_C) SEP FUNC(cmd1_opcode, NPU_SET_WEIGHT_BASE) SEP FUNC(       \
+                    cmd1_opcode, NPU_SET_WEIGHT_LENGTH) SEP FUNC(cmd1_opcode, NPU_SET_SCALE_BASE)                      \
+                    SEP FUNC(cmd1_opcode, NPU_SET_SCALE_LENGTH) SEP FUNC(cmd1_opcode, NPU_SET_OFM_SCALE) SEP FUNC(     \
+                        cmd1_opcode, NPU_SET_OPA_SCALE) SEP FUNC(cmd1_opcode, NPU_SET_OPB_SCALE)                       \
+                        SEP FUNC(cmd1_opcode, NPU_SET_DMA0_SRC) SEP FUNC(cmd1_opcode, NPU_SET_DMA0_DST) SEP FUNC(      \
+                            cmd1_opcode, NPU_SET_DMA0_LEN) SEP FUNC(cmd1_opcode, NPU_SET_IFM2_BASE0)                   \
+                            SEP FUNC(cmd1_opcode, NPU_SET_IFM2_BASE1) SEP FUNC(cmd1_opcode, NPU_SET_IFM2_BASE2)        \
+                                SEP FUNC(cmd1_opcode, NPU_SET_IFM2_BASE3) SEP FUNC(cmd1_opcode, NPU_SET_IFM2_STRIDE_X) \
+                                    SEP FUNC(cmd1_opcode, NPU_SET_IFM2_STRIDE_Y)                                       \
+                                        SEP FUNC(cmd1_opcode, NPU_SET_IFM2_STRIDE_C)                                   \
+                                            SEP FUNC(cmd1_opcode, NPU_SET_USER_DEFINED0)                               \
+                                                SEP FUNC(cmd1_opcode, NPU_SET_USER_DEFINED1)                           \
+                                                    SEP FUNC(cmd1_opcode, NPU_SET_USER_DEFINED2)                       \
+                                                        SEP FUNC(cmd1_opcode, NPU_SET_USER_DEFINED3)                   \
+                                                            SEP FUNC(cmd1_opcode, NPU_SET_USER_DEFINED4)               \
+                                                                SEP FUNC(cmd1_opcode, NPU_SET_USER_DEFINED5)           \
+                                                                    SEP FUNC(cmd1_opcode, NPU_SET_USER_DEFINED6)       \
+                                                                        SEP FUNC(cmd1_opcode, NPU_SET_USER_DEFINED7)
+
+#define EXPAND_CMD_CTRL(FUNC, SEP) FUNC(cmd_ctrl, CMD0_CTRL) SEP FUNC(cmd_ctrl, CMD1_CTRL)
+
+#define EXPAND_CUSTOM_DMA_CS(FUNC, SEP) FUNC(custom_dma_cs, DISABLE) SEP FUNC(custom_dma_cs, ENABLE)
+
+#define EXPAND_CUSTOM_DMA(FUNC, SEP) FUNC(custom_dma, NOT_IMPLEMENTED) SEP FUNC(custom_dma, IMPLEMENTED)
+
+#define EXPAND_DMA_FAULT_SRC(FUNC, SEP) FUNC(dma_fault_src, AXI_M0) SEP FUNC(dma_fault_src, AXI_M1)
+
+#define EXPAND_DMA_REGION_MODE(FUNC, SEP) FUNC(dma_region_mode, EXTERNAL) SEP FUNC(dma_region_mode, INTERNAL)
+
+#define EXPAND_DMA_STRIDE_MODE(FUNC, SEP) FUNC(dma_stride_mode, D1)
 
 #define EXPAND_ELEMENTWISE_MODE(FUNC, SEP)                                                                             \
     FUNC(elementwise_mode, MUL)                                                                                        \
@@ -13991,116 +26064,112 @@ struct npu_set_scale1_length_t
         SEP FUNC(elementwise_mode, MAX) SEP FUNC(elementwise_mode, LRELU) SEP FUNC(elementwise_mode, ABS)              \
             SEP FUNC(elementwise_mode, CLZ) SEP FUNC(elementwise_mode, SHR) SEP FUNC(elementwise_mode, SHL)
 
-#define EXPAND_IFM_PRECISION(FUNC, SEP)                                                                                \
-    FUNC(ifm_precision, U8)                                                                                            \
-    SEP FUNC(ifm_precision, S8) SEP FUNC(ifm_precision, U16) SEP FUNC(ifm_precision, S16) SEP FUNC(ifm_precision, S32)
+#define EXPAND_FUNCTIONAL_SAFETY(FUNC, SEP)                                                                            \
+    FUNC(functional_safety, NOT_IMPLEMENTED) SEP FUNC(functional_safety, IMPLEMENTED)
+
+#define EXPAND_IFM2_OPERAND_ORDER(FUNC, SEP) FUNC(ifm2_operand_order, ORDER_B) SEP FUNC(ifm2_operand_order, ORDER_A)
 
 #define EXPAND_IFM_SCALE_MODE(FUNC, SEP)                                                                               \
-    FUNC(ifm_scale_mode, SCALE_16BIT)                                                                                  \
-    SEP FUNC(ifm_scale_mode, SCALE_OPA_32BIT) SEP FUNC(ifm_scale_mode, SCALE_OPB_32BIT)
-
-#define EXPAND_MACS_PER_CC(FUNC, SEP)                                                                                  \
-    FUNC(macs_per_cc, MACS_PER_CC_IS_5)                                                                                \
-    SEP FUNC(macs_per_cc, MACS_PER_CC_IS_6) SEP FUNC(macs_per_cc, MACS_PER_CC_IS_7)                                    \
-        SEP FUNC(macs_per_cc, MACS_PER_CC_IS_8)
-
-#define EXPAND_MEMORY_TYPE(FUNC, SEP)                                                                                  \
-    FUNC(memory_type, AXI0_OUTSTANDING_COUNTER0)                                                                       \
-    SEP FUNC(memory_type, AXI0_OUTSTANDING_COUNTER1) SEP FUNC(memory_type, AXI1_OUTSTANDING_COUNTER2)                  \
-        SEP FUNC(memory_type, AXI1_OUTSTANDING_COUNTER3)
-
-#define EXPAND_OFM_PRECISION(FUNC, SEP)                                                                                \
-    FUNC(ofm_precision, U8)                                                                                            \
-    SEP FUNC(ofm_precision, S8) SEP FUNC(ofm_precision, U16) SEP FUNC(ofm_precision, S16) SEP FUNC(ofm_precision, S32)
-
-#define EXPAND_PMU_EVENT_TYPE(FUNC, SEP)                                                                                           \
-    FUNC(pmu_event_type, NO_EVENT)                                                                                                 \
-    SEP FUNC(pmu_event_type, CYCLE) SEP FUNC(pmu_event_type, NPU_IDLE) SEP FUNC(                                                   \
-        pmu_event_type, CC_STALLED_ON_BLOCKDEP) SEP FUNC(pmu_event_type,                                                           \
-                                                         CC_STALLED_ON_SHRAM_RECONFIG) SEP FUNC(pmu_event_type,                    \
-                                                                                                NPU_ACTIVE)                        \
-        SEP FUNC(pmu_event_type, MAC_ACTIVE) SEP FUNC(pmu_event_type, MAC_ACTIVE_8BIT) SEP FUNC(                                   \
-            pmu_event_type, MAC_ACTIVE_16BIT) SEP FUNC(pmu_event_type, MAC_DPU_ACTIVE) SEP FUNC(pmu_event_type,                    \
-                                                                                                MAC_STALLED_BY_WD_ACC)             \
-            SEP FUNC(pmu_event_type, MAC_STALLED_BY_WD) SEP FUNC(pmu_event_type, MAC_STALLED_BY_ACC) SEP FUNC(                     \
-                pmu_event_type, MAC_STALLED_BY_IB) SEP FUNC(pmu_event_type,                                                        \
-                                                            MAC_ACTIVE_32BIT) SEP FUNC(pmu_event_type,                             \
-                                                                                       MAC_STALLED_BY_INT_W)                       \
-                SEP FUNC(pmu_event_type, MAC_STALLED_BY_INT_ACC) SEP FUNC(pmu_event_type, AO_ACTIVE) SEP FUNC(                     \
-                    pmu_event_type, AO_ACTIVE_8BIT) SEP FUNC(pmu_event_type,                                                       \
-                                                             AO_ACTIVE_16BIT) SEP FUNC(pmu_event_type,                             \
-                                                                                       AO_STALLED_BY_OFMP_OB)                      \
-                    SEP FUNC(pmu_event_type, AO_STALLED_BY_OFMP) SEP FUNC(pmu_event_type, AO_STALLED_BY_OB) SEP FUNC(              \
-                        pmu_event_type,                                                                                            \
-                        AO_STALLED_BY_ACC_IB) SEP FUNC(pmu_event_type,                                                             \
-                                                       AO_STALLED_BY_ACC) SEP FUNC(pmu_event_type,                                 \
-                                                                                   AO_STALLED_BY_IB) SEP FUNC(pmu_event_type,      \
-                                                                                                              WD_ACTIVE) SEP       \
-                        FUNC(pmu_event_type, WD_STALLED) SEP FUNC(pmu_event_type, WD_STALLED_BY_WS) SEP FUNC(                      \
-                            pmu_event_type,                                                                                        \
-                            WD_STALLED_BY_WD_BUF) SEP                                                                              \
-                            FUNC(pmu_event_type, WD_PARSE_ACTIVE) SEP FUNC(pmu_event_type, WD_PARSE_STALLED) SEP FUNC(             \
-                                pmu_event_type,                                                                                    \
-                                WD_PARSE_STALLED_IN) SEP FUNC(pmu_event_type,                                                      \
-                                                              WD_PARSE_STALLED_OUT) SEP                                            \
-                                FUNC(pmu_event_type, WD_TRANS_WS) SEP FUNC(pmu_event_type, WD_TRANS_WB) SEP FUNC(                  \
-                                    pmu_event_type,                                                                                \
-                                    WD_TRANS_DW0) SEP FUNC(pmu_event_type,                                                         \
-                                                           WD_TRANS_DW1) SEP FUNC(pmu_event_type,                                  \
-                                                                                  AXI0_RD_TRANS_ACCEPTED) SEP                      \
-                                    FUNC(pmu_event_type, AXI0_RD_TRANS_COMPLETED) SEP FUNC(                                        \
-                                        pmu_event_type,                                                                            \
-                                        AXI0_RD_DATA_BEAT_RECEIVED) SEP FUNC(pmu_event_type, AXI0_RD_TRAN_REQ_STALLED)             \
-                                        SEP FUNC(pmu_event_type,                                                                   \
-                                                 AXI0_WR_TRANS_ACCEPTED) SEP FUNC(pmu_event_type,                                  \
-                                                                                  AXI0_WR_TRANS_COMPLETED_M)                       \
-                                            SEP FUNC(pmu_event_type, AXI0_WR_TRANS_COMPLETED_S) SEP FUNC(                          \
-                                                pmu_event_type,                                                                    \
-                                                AXI0_WR_DATA_BEAT_WRITTEN)                                                         \
-                                                SEP FUNC(pmu_event_type, AXI0_WR_TRAN_REQ_STALLED) SEP FUNC(                       \
-                                                    pmu_event_type,                                                                \
-                                                    AXI0_WR_DATA_BEAT_STALLED) SEP                                                 \
-                                                    FUNC(pmu_event_type, AXI0_ENABLED_CYCLES) SEP FUNC(                            \
-                                                        pmu_event_type,                                                            \
-                                                        AXI0_RD_STALL_LIMIT) SEP FUNC(pmu_event_type,                              \
-                                                                                      AXI0_WR_STALL_LIMIT) SEP                     \
-                                                        FUNC(pmu_event_type, AXI1_RD_TRANS_ACCEPTED) SEP FUNC(                     \
-                                                            pmu_event_type,                                                        \
-                                                            AXI1_RD_TRANS_COMPLETED) SEP FUNC(pmu_event_type,                      \
-                                                                                              AXI1_RD_DATA_BEAT_RECEIVED) SEP      \
-                                                            FUNC(pmu_event_type, AXI1_RD_TRAN_REQ_STALLED) SEP FUNC(               \
-                                                                pmu_event_type,                                                    \
-                                                                AXI1_WR_TRANS_ACCEPTED) SEP                                        \
-                                                                FUNC(pmu_event_type, AXI1_WR_TRANS_COMPLETED_M) SEP FUNC(          \
-                                                                    pmu_event_type,                                                \
-                                                                    AXI1_WR_TRANS_COMPLETED_S) SEP                                 \
-                                                                    FUNC(pmu_event_type, AXI1_WR_DATA_BEAT_WRITTEN) SEP FUNC(      \
-                                                                        pmu_event_type,                                            \
-                                                                        AXI1_WR_TRAN_REQ_STALLED) SEP                              \
-                                                                        FUNC(pmu_event_type, AXI1_WR_DATA_BEAT_STALLED) SEP FUNC(  \
-                                                                            pmu_event_type,                                        \
-                                                                            AXI1_ENABLED_CYCLES) SEP FUNC(pmu_event_type,          \
-                                                                                                          AXI1_RD_STALL_LIMIT) SEP \
-                                                                            FUNC(pmu_event_type, AXI1_WR_STALL_LIMIT) SEP FUNC(    \
-                                                                                pmu_event_type,                                    \
-                                                                                AXI_LATENCY_ANY) SEP FUNC(pmu_event_type,          \
-                                                                                                          AXI_LATENCY_32) SEP      \
-                                                                                FUNC(pmu_event_type, AXI_LATENCY_64) SEP FUNC(     \
-                                                                                    pmu_event_type,                                \
-                                                                                    AXI_LATENCY_128) SEP                           \
-                                                                                    FUNC(pmu_event_type,                           \
-                                                                                         AXI_LATENCY_256) SEP                      \
-                                                                                        FUNC(pmu_event_type,                       \
-                                                                                             AXI_LATENCY_512) SEP                  \
-                                                                                            FUNC(pmu_event_type,                   \
-                                                                                                 AXI_LATENCY_1024) SEP             \
-                                                                                                FUNC(pmu_event_type,               \
-                                                                                                     ECC_DMA) SEP                  \
-                                                                                                    FUNC(                          \
-                                                                                                        pmu_event_type,            \
-                                                                                                        ECC_SB0) SEP               \
-                                                                                                        FUNC(                      \
-                                                                                                            pmu_event_type,        \
+    FUNC(ifm_scale_mode, OPA_OPB_16) SEP FUNC(ifm_scale_mode, OPA_32) SEP FUNC(ifm_scale_mode, OPB_32)
+
+#define EXPAND_IFM_UPSCALE_MODE(FUNC, SEP)                                                                             \
+    FUNC(ifm_upscale_mode, NONE) SEP FUNC(ifm_upscale_mode, NEAREST) SEP FUNC(ifm_upscale_mode, ZEROS)
+
+#define EXPAND_KERNEL_DECOMPOSITION(FUNC, SEP) FUNC(kernel_decomposition, D8X8) SEP FUNC(kernel_decomposition, D4X4)
+
+#define EXPAND_KERNEL_DILATION(FUNC, SEP) FUNC(kernel_dilation, NONE) SEP FUNC(kernel_dilation, X2)
+
+#define EXPAND_MAX_BEATS(FUNC, SEP) FUNC(max_beats, B64) SEP FUNC(max_beats, B128) SEP FUNC(max_beats, B256)
+
+#define EXPAND_MEM_ATTR(FUNC, SEP)                                                                                     \
+    FUNC(mem_attr, AXI0_OUTSTANDING_COUNTER0)                                                                          \
+    SEP FUNC(mem_attr, AXI0_OUTSTANDING_COUNTER1) SEP FUNC(mem_attr, AXI1_OUTSTANDING_COUNTER2)                        \
+        SEP FUNC(mem_attr, AXI1_OUTSTANDING_COUNTER3)
+
+#define EXPAND_OFM_SCALE_MODE(FUNC, SEP) FUNC(ofm_scale_mode, PER_CHANNEL) SEP FUNC(ofm_scale_mode, GLOBAL)
+
+#define EXPAND_PMU_AXI_CHANNEL(FUNC, SEP)                                                                              \
+    FUNC(pmu_axi_channel, RD_CMD)                                                                                      \
+    SEP FUNC(pmu_axi_channel, RD_IFM) SEP FUNC(pmu_axi_channel, RD_WEIGHTS) SEP FUNC(pmu_axi_channel, RD_SCALE_BIAS)   \
+        SEP FUNC(pmu_axi_channel, RD_MEM2MEM) SEP FUNC(pmu_axi_channel, WR_OFM) SEP FUNC(pmu_axi_channel, WR_MEM2MEM)
+
+#define EXPAND_PMU_EVENT(FUNC, SEP)                                                                                                    \
+    FUNC(pmu_event, NO_EVENT)                                                                                                          \
+    SEP FUNC(pmu_event, CYCLE) SEP FUNC(pmu_event, NPU_IDLE) SEP FUNC(pmu_event, CC_STALLED_ON_BLOCKDEP) SEP FUNC(                     \
+        pmu_event, CC_STALLED_ON_SHRAM_RECONFIG) SEP FUNC(pmu_event, NPU_ACTIVE) SEP FUNC(pmu_event, MAC_ACTIVE)                       \
+        SEP FUNC(pmu_event, MAC_ACTIVE_8BIT) SEP FUNC(pmu_event, MAC_ACTIVE_16BIT) SEP FUNC(                                           \
+            pmu_event, MAC_DPU_ACTIVE) SEP FUNC(pmu_event, MAC_STALLED_BY_WD_ACC) SEP FUNC(pmu_event,                                  \
+                                                                                           MAC_STALLED_BY_WD)                          \
+            SEP FUNC(pmu_event, MAC_STALLED_BY_ACC) SEP FUNC(pmu_event, MAC_STALLED_BY_IB) SEP FUNC(                                   \
+                pmu_event,                                                                                                             \
+                MAC_ACTIVE_32BIT) SEP FUNC(pmu_event,                                                                                  \
+                                           MAC_STALLED_BY_INT_W) SEP FUNC(pmu_event,                                                   \
+                                                                          MAC_STALLED_BY_INT_ACC) SEP FUNC(pmu_event,                  \
+                                                                                                           AO_ACTIVE)                  \
+                SEP FUNC(pmu_event, AO_ACTIVE_8BIT) SEP FUNC(pmu_event, AO_ACTIVE_16BIT) SEP FUNC(                                     \
+                    pmu_event, AO_STALLED_BY_OFMP_OB) SEP FUNC(pmu_event, AO_STALLED_BY_OFMP) SEP                                      \
+                    FUNC(pmu_event, AO_STALLED_BY_OB) SEP FUNC(pmu_event, AO_STALLED_BY_ACC_IB) SEP FUNC(                              \
+                        pmu_event, AO_STALLED_BY_ACC) SEP FUNC(pmu_event, AO_STALLED_BY_IB) SEP                                        \
+                        FUNC(pmu_event, WD_ACTIVE) SEP FUNC(pmu_event, WD_STALLED) SEP FUNC(pmu_event, WD_STALLED_BY_WS) SEP FUNC(     \
+                            pmu_event, WD_STALLED_BY_WD_BUF) SEP FUNC(pmu_event,                                                       \
+                                                                      WD_PARSE_ACTIVE) SEP                                             \
+                            FUNC(pmu_event, WD_PARSE_STALLED) SEP FUNC(pmu_event, WD_PARSE_STALLED_IN) SEP FUNC(                       \
+                                pmu_event, WD_PARSE_STALLED_OUT) SEP FUNC(pmu_event,                                                   \
+                                                                          WD_TRANS_WS) SEP                                             \
+                                FUNC(pmu_event, WD_TRANS_WB) SEP FUNC(pmu_event, WD_TRANS_DW0) SEP FUNC(                               \
+                                    pmu_event, WD_TRANS_DW1) SEP FUNC(pmu_event,                                                       \
+                                                                      AXI0_RD_TRANS_ACCEPTED) SEP                                      \
+                                    FUNC(pmu_event, AXI0_RD_TRANS_COMPLETED) SEP FUNC(pmu_event, AXI0_RD_DATA_BEAT_RECEIVED) SEP FUNC( \
+                                        pmu_event, AXI0_RD_TRAN_REQ_STALLED) SEP FUNC(pmu_event,                                       \
+                                                                                      AXI0_WR_TRANS_ACCEPTED) SEP                      \
+                                        FUNC(pmu_event, AXI0_WR_TRANS_COMPLETED_M) SEP FUNC(                                           \
+                                            pmu_event, AXI0_WR_TRANS_COMPLETED_S) SEP                                                  \
+                                            FUNC(pmu_event, AXI0_WR_DATA_BEAT_WRITTEN) SEP FUNC(                                       \
+                                                pmu_event, AXI0_WR_TRAN_REQ_STALLED) SEP                                               \
+                                                FUNC(pmu_event, AXI0_WR_DATA_BEAT_STALLED) SEP FUNC(                                   \
+                                                    pmu_event,                                                                         \
+                                                    AXI0_ENABLED_CYCLES) SEP FUNC(pmu_event,                                           \
+                                                                                  AXI0_RD_STALL_LIMIT) SEP                             \
+                                                    FUNC(pmu_event, AXI0_WR_STALL_LIMIT) SEP FUNC(                                     \
+                                                        pmu_event,                                                                     \
+                                                        AXI_LATENCY_ANY) SEP FUNC(pmu_event,                                           \
+                                                                                  AXI_LATENCY_32) SEP                                  \
+                                                        FUNC(pmu_event,                                                                \
+                                                             AXI_LATENCY_64) SEP FUNC(pmu_event,                                       \
+                                                                                      AXI_LATENCY_128) SEP                             \
+                                                            FUNC(pmu_event, AXI_LATENCY_256) SEP FUNC(                                 \
+                                                                pmu_event,                                                             \
+                                                                AXI_LATENCY_512) SEP FUNC(pmu_event,                                   \
+                                                                                          AXI_LATENCY_1024) SEP                        \
+                                                                FUNC(pmu_event, ECC_DMA) SEP FUNC(                                     \
+                                                                    pmu_event,                                                         \
+                                                                    ECC_SB0) SEP FUNC(pmu_event,                                       \
+                                                                                      AXI1_RD_TRANS_ACCEPTED) SEP                      \
+                                                                    FUNC(pmu_event, AXI1_RD_TRANS_COMPLETED) SEP FUNC(                 \
+                                                                        pmu_event, AXI1_RD_DATA_BEAT_RECEIVED) SEP                     \
+                                                                        FUNC(pmu_event, AXI1_RD_TRAN_REQ_STALLED) SEP FUNC(            \
+                                                                            pmu_event, AXI1_WR_TRANS_ACCEPTED) SEP                     \
+                                                                            FUNC(pmu_event, AXI1_WR_TRANS_COMPLETED_M) SEP FUNC(       \
+                                                                                pmu_event,                                             \
+                                                                                AXI1_WR_TRANS_COMPLETED_S) SEP                         \
+                                                                                FUNC(pmu_event,                                        \
+                                                                                     AXI1_WR_DATA_BEAT_WRITTEN) SEP                    \
+                                                                                    FUNC(pmu_event,                                    \
+                                                                                         AXI1_WR_TRAN_REQ_STALLED) SEP                 \
+                                                                                        FUNC(                                          \
+                                                                                            pmu_event,                                 \
+                                                                                            AXI1_WR_DATA_BEAT_STALLED) SEP             \
+                                                                                            FUNC(                                      \
+                                                                                                pmu_event,                             \
+                                                                                                AXI1_ENABLED_CYCLES) SEP               \
+                                                                                                FUNC(                                  \
+                                                                                                    pmu_event,                         \
+                                                                                                    AXI1_RD_STALL_LIMIT) SEP           \
+                                                                                                    FUNC(                              \
+                                                                                                        pmu_event,                     \
+                                                                                                        AXI1_WR_STALL_LIMIT)           \
+                                                                                                        SEP FUNC(                      \
+                                                                                                            pmu_event,                 \
                                                                                                             ECC_SB1)
 
 #define EXPAND_POOLING_MODE(FUNC, SEP)                                                                                 \
@@ -14108,19 +26177,22 @@ struct npu_set_scale1_length_t
 
 #define EXPAND_PRIVILEGE_LEVEL(FUNC, SEP) FUNC(privilege_level, USER) SEP FUNC(privilege_level, PRIVILEGED)
 
-#define EXPAND_RESAMPLING_MODE(FUNC, SEP)                                                                              \
-    FUNC(resampling_mode, NONE) SEP FUNC(resampling_mode, NEAREST) SEP FUNC(resampling_mode, TRANSPOSE)
-
-#define EXPAND_ROUNDING(FUNC, SEP) FUNC(rounding, TFL) SEP FUNC(rounding, TRUNCATE) SEP FUNC(rounding, NATURAL)
+#define EXPAND_ROUND_MODE(FUNC, SEP) FUNC(round_mode, DBL) SEP FUNC(round_mode, TRUNCATE) SEP FUNC(round_mode, NATURAL)
 
 #define EXPAND_SECURITY_LEVEL(FUNC, SEP) FUNC(security_level, SECURE) SEP FUNC(security_level, NON_SECURE)
 
-#define EXPAND_SHRAM_SIZE(FUNC, SEP)                                                                                   \
-    FUNC(shram_size, SHRAM_96KB)                                                                                       \
-    SEP FUNC(shram_size, SHRAM_48KB) SEP FUNC(shram_size, SHRAM_24KB) SEP FUNC(shram_size, SHRAM_16KB)
-
 #define EXPAND_STATE(FUNC, SEP) FUNC(state, STOPPED) SEP FUNC(state, RUNNING)
 
-#define EXPAND_STRIDE_MODE(FUNC, SEP)                                                                                  \
-    FUNC(stride_mode, STRIDE_MODE_1D) SEP FUNC(stride_mode, STRIDE_MODE_2D) SEP FUNC(stride_mode, STRIDE_MODE_3D)
-#endif /* ETHOSU55_INTERFACE_H */
+#define EXPAND_WD_CORE_SLICE_STATE(FUNC, SEP)                                                                          \
+    FUNC(wd_core_slice_state, HEADER) SEP FUNC(wd_core_slice_state, PALETTE) SEP FUNC(wd_core_slice_state, WEIGHTS)
+
+#define EXPAND_WD_CTRL_STATE(FUNC, SEP)                                                                                \
+    FUNC(wd_ctrl_state, IDLE)                                                                                          \
+    SEP FUNC(wd_ctrl_state, DRAIN) SEP FUNC(wd_ctrl_state, OFD_INIT) SEP FUNC(wd_ctrl_state, OFD_RUN)
+
+#define EXPAND_WEIGHT_ORDER(FUNC, SEP) FUNC(weight_order, DEPTH_FIRST) SEP FUNC(weight_order, PART_KERNEL_FIRST)
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu65_interface.h b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu65_interface.h
new file mode 100644
index 0000000..c09cbf8
--- /dev/null
+++ b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu65_interface.h
@@ -0,0 +1,26061 @@
+
+/*
+ * Copyright (c) 2020-2021 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ETHOSU65_INTERFACE_H
+#define ETHOSU65_INTERFACE_H
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#if !defined(__cplusplus) || __cplusplus < 201402L
+#define CONSTEXPR
+#else
+#define CONSTEXPR constexpr
+#endif
+
+#ifndef __cplusplus
+#define STRUCT struct
+#else
+#define STRUCT
+#endif
+
+#if defined(__cplusplus) && defined(NPU_DISASSEMBLE)
+#include <iomanip>
+#include <sstream>
+#include <vector>
+#endif
+
+#if defined(__cplusplus) && !defined(NPU_NAMESPACE)
+#define NPU_NAMESPACE npu
+#endif
+
+#ifdef __cplusplus
+#include <cstring>
+#include <limits>
+#endif
+
+#ifdef __cplusplus
+namespace NPU_NAMESPACE
+{
+#endif
+#define NNX_ARCH_VERSION_MAJOR 1
+#define NNX_ARCH_VERSION_MINOR 0
+#define NNX_ARCH_VERSION_PATCH 6
+
+// Register offsets
+//
+// Register subpage BASE
+//
+#define NPU_REG_ID 0x0000
+#define NPU_REG_STATUS 0x0004
+#define NPU_REG_CMD 0x0008
+#define NPU_REG_RESET 0x000C
+#define NPU_REG_QBASE 0x0010
+#define NPU_REG_QBASE_HI 0x0014
+#define NPU_REG_QREAD 0x0018
+#define NPU_REG_QCONFIG 0x001C
+#define NPU_REG_QSIZE 0x0020
+#define NPU_REG_PROT 0x0024
+#define NPU_REG_CONFIG 0x0028
+#define NPU_REG_LOCK 0x002C
+#define NPU_REG_REGIONCFG 0x003C
+#define NPU_REG_AXI_LIMIT0 0x0040
+#define NPU_REG_AXI_LIMIT1 0x0044
+#define NPU_REG_AXI_LIMIT2 0x0048
+#define NPU_REG_AXI_LIMIT3 0x004C
+#define BASE_REGISTERS_SIZE 0x0080
+
+//
+// Register subpage BASE_POINTERS
+//
+#define NPU_REG_BASEP_BASE 0x0080
+#define NPU_REG_BASEP_ARRLEN 0x0008
+#define BASE_POINTERS_REGISTERS_SIZE 0x0100
+
+//
+// Register subpage DEBUG
+//
+#define NPU_REG_WD_STATUS 0x0100
+#define NPU_REG_MAC_STATUS 0x0104
+#define NPU_REG_AO_STATUS 0x0108
+#define NPU_REG_DMA_STATUS0 0x0110
+#define NPU_REG_DMA_STATUS1 0x0114
+#define NPU_REG_CLKFORCE 0x0140
+#define NPU_REG_DEBUG_ADDRESS 0x0144
+#define NPU_REG_DEBUG_MISC 0x0148
+#define NPU_REG_DEBUGCORE 0x014C
+#define NPU_REG_DEBUG_BLOCK 0x0150
+#define DEBUG_REGISTERS_SIZE 0x0180
+
+//
+// Register subpage PMU
+//
+#define NPU_REG_PMCR 0x0180
+#define NPU_REG_PMCNTENSET 0x0184
+#define NPU_REG_PMCNTENCLR 0x0188
+#define NPU_REG_PMOVSSET 0x018C
+#define NPU_REG_PMOVSCLR 0x0190
+#define NPU_REG_PMINTSET 0x0194
+#define NPU_REG_PMINTCLR 0x0198
+#define NPU_REG_PMCCNTR 0x01A0
+#define NPU_REG_PMCCNTR_HI 0x01A4
+#define NPU_REG_PMCCNTR_CFG 0x01A8
+#define NPU_REG_PMCAXI_CHAN 0x01AC
+#define PMU_REGISTERS_SIZE 0x0200
+
+//
+// Register subpage TSU_DEBUG
+//
+#define NPU_REG_KERNEL_X 0x0200
+#define NPU_REG_KERNEL_Y 0x0204
+#define NPU_REG_KERNEL_W_M1 0x0208
+#define NPU_REG_KERNEL_H_M1 0x020C
+#define NPU_REG_OFM_CBLK_WIDTH_M1 0x0210
+#define NPU_REG_OFM_CBLK_HEIGHT_M1 0x0214
+#define NPU_REG_OFM_CBLK_DEPTH_M1 0x0218
+#define NPU_REG_IFM_CBLK_DEPTH_M1 0x021C
+#define NPU_REG_OFM_X 0x0220
+#define NPU_REG_OFM_Y 0x0224
+#define NPU_REG_OFM_Z 0x0228
+#define NPU_REG_IFM_Z 0x022C
+#define NPU_REG_PAD_TOP 0x0230
+#define NPU_REG_PAD_LEFT 0x0234
+#define NPU_REG_IFM_CBLK_WIDTH 0x0238
+#define NPU_REG_IFM_CBLK_HEIGHT 0x023C
+#define NPU_REG_DMA_IFM_SRC 0x0240
+#define NPU_REG_DMA_IFM_SRC_HI 0x0244
+#define NPU_REG_DMA_IFM_DST 0x0248
+#define NPU_REG_DMA_OFM_SRC 0x024C
+#define NPU_REG_DMA_OFM_DST 0x0250
+#define NPU_REG_DMA_OFM_DST_HI 0x0254
+#define NPU_REG_DMA_WEIGHT_SRC 0x0258
+#define NPU_REG_DMA_WEIGHT_SRC_HI 0x025C
+#define NPU_REG_DMA_CMD_SRC 0x0260
+#define NPU_REG_DMA_CMD_SRC_HI 0x0264
+#define NPU_REG_DMA_CMD_SIZE 0x0268
+#define NPU_REG_DMA_M2M_SRC 0x026C
+#define NPU_REG_DMA_M2M_SRC_HI 0x0270
+#define NPU_REG_DMA_M2M_DST 0x0274
+#define NPU_REG_DMA_M2M_DST_HI 0x0278
+#define NPU_REG_CURRENT_QREAD 0x027C
+#define NPU_REG_DMA_SCALE_SRC 0x0280
+#define NPU_REG_DMA_SCALE_SRC_HI 0x0284
+#define NPU_REG_CURRENT_BLOCK 0x02B4
+#define NPU_REG_CURRENT_OP 0x02B8
+#define NPU_REG_CURRENT_CMD 0x02BC
+#define TSU_DEBUG_REGISTERS_SIZE 0x02C0
+
+//
+// Register subpage PMU_COUNTERS
+//
+#define NPU_REG_PMEVCNTR_BASE 0x0300
+#define NPU_REG_PMEVCNTR_ARRLEN 0x0004
+#define NPU_REG_PMEVTYPER_BASE 0x0380
+#define NPU_REG_PMEVTYPER_ARRLEN 0x0004
+#define PMU_COUNTERS_REGISTERS_SIZE 0x0400
+
+//
+// Register subpage SHARED_BUFFER
+//
+#define NPU_REG_SHARED_BUFFER_BASE 0x0400
+#define NPU_REG_SHARED_BUFFER_ARRLEN 0x0100
+#define SHARED_BUFFER_REGISTERS_SIZE 0x0800
+
+//
+// Register subpage TSU_IFM
+//
+#define NPU_REG_IFM_PAD_TOP 0x0800
+#define NPU_REG_IFM_PAD_LEFT 0x0804
+#define NPU_REG_IFM_PAD_RIGHT 0x0808
+#define NPU_REG_IFM_PAD_BOTTOM 0x080C
+#define NPU_REG_IFM_DEPTH_M1 0x0810
+#define NPU_REG_IFM_PRECISION 0x0814
+#define NPU_REG_IFM_UPSCALE 0x081C
+#define NPU_REG_IFM_ZERO_POINT 0x0824
+#define NPU_REG_IFM_WIDTH0_M1 0x0828
+#define NPU_REG_IFM_HEIGHT0_M1 0x082C
+#define NPU_REG_IFM_HEIGHT1_M1 0x0830
+#define NPU_REG_IFM_IB_END 0x0834
+#define NPU_REG_IFM_REGION 0x083C
+#define TSU_IFM_REGISTERS_SIZE 0x0840
+
+//
+// Register subpage TSU_OFM
+//
+#define NPU_REG_OFM_WIDTH_M1 0x0844
+#define NPU_REG_OFM_HEIGHT_M1 0x0848
+#define NPU_REG_OFM_DEPTH_M1 0x084C
+#define NPU_REG_OFM_PRECISION 0x0850
+#define NPU_REG_OFM_BLK_WIDTH_M1 0x0854
+#define NPU_REG_OFM_BLK_HEIGHT_M1 0x0858
+#define NPU_REG_OFM_BLK_DEPTH_M1 0x085C
+#define NPU_REG_OFM_ZERO_POINT 0x0860
+#define NPU_REG_OFM_WIDTH0_M1 0x0868
+#define NPU_REG_OFM_HEIGHT0_M1 0x086C
+#define NPU_REG_OFM_HEIGHT1_M1 0x0870
+#define NPU_REG_OFM_REGION 0x087C
+#define TSU_OFM_REGISTERS_SIZE 0x0880
+
+//
+// Register subpage TSU_KERNEL
+//
+#define NPU_REG_KERNEL_WIDTH_M1 0x0880
+#define NPU_REG_KERNEL_HEIGHT_M1 0x0884
+#define NPU_REG_KERNEL_STRIDE 0x0888
+#define NPU_REG_PARALLEL_MODE 0x088C
+#define NPU_REG_ACC_FORMAT 0x0890
+#define NPU_REG_ACTIVATION 0x0894
+#define NPU_REG_ACTIVATION_MIN 0x0898
+#define NPU_REG_ACTIVATION_MAX 0x089C
+#define NPU_REG_WEIGHT_REGION 0x08A0
+#define NPU_REG_SCALE_REGION 0x08A4
+#define NPU_REG_AB_START 0x08B4
+#define NPU_REG_BLOCKDEP 0x08BC
+#define TSU_KERNEL_REGISTERS_SIZE 0x08C0
+
+//
+// Register subpage TSU_DMA
+//
+#define NPU_REG_DMA0_SRC_REGION 0x08C0
+#define NPU_REG_DMA0_DST_REGION 0x08C4
+#define NPU_REG_DMA0_SIZE0 0x08C8
+#define NPU_REG_DMA0_SIZE1 0x08CC
+#define TSU_DMA_REGISTERS_SIZE 0x0900
+
+//
+// Register subpage TSU_IFM2
+//
+#define NPU_REG_IFM2_BROADCAST 0x0900
+#define NPU_REG_IFM2_SCALAR 0x0904
+#define NPU_REG_IFM2_PRECISION 0x0914
+#define NPU_REG_IFM2_ZERO_POINT 0x0924
+#define NPU_REG_IFM2_WIDTH0_M1 0x0928
+#define NPU_REG_IFM2_HEIGHT0_M1 0x092C
+#define NPU_REG_IFM2_HEIGHT1_M1 0x0930
+#define NPU_REG_IFM2_IB_START 0x0934
+#define NPU_REG_IFM2_REGION 0x093C
+#define TSU_IFM2_REGISTERS_SIZE 0x0940
+
+//
+// Register subpage TSU_IFM_BASE
+//
+#define NPU_REG_IFM_BASE0 0x0A00
+#define NPU_REG_IFM_BASE0_HI 0x0A04
+#define NPU_REG_IFM_BASE1 0x0A08
+#define NPU_REG_IFM_BASE1_HI 0x0A0C
+#define NPU_REG_IFM_BASE2 0x0A10
+#define NPU_REG_IFM_BASE2_HI 0x0A14
+#define NPU_REG_IFM_BASE3 0x0A18
+#define NPU_REG_IFM_BASE3_HI 0x0A1C
+#define NPU_REG_IFM_STRIDE_X 0x0A20
+#define NPU_REG_IFM_STRIDE_X_HI 0x0A24
+#define NPU_REG_IFM_STRIDE_Y 0x0A28
+#define NPU_REG_IFM_STRIDE_Y_HI 0x0A2C
+#define NPU_REG_IFM_STRIDE_C 0x0A30
+#define NPU_REG_IFM_STRIDE_C_HI 0x0A34
+#define TSU_IFM_BASE_REGISTERS_SIZE 0x0A40
+
+//
+// Register subpage TSU_OFM_BASE
+//
+#define NPU_REG_OFM_BASE0 0x0A40
+#define NPU_REG_OFM_BASE0_HI 0x0A44
+#define NPU_REG_OFM_BASE1 0x0A48
+#define NPU_REG_OFM_BASE1_HI 0x0A4C
+#define NPU_REG_OFM_BASE2 0x0A50
+#define NPU_REG_OFM_BASE2_HI 0x0A54
+#define NPU_REG_OFM_BASE3 0x0A58
+#define NPU_REG_OFM_BASE3_HI 0x0A5C
+#define NPU_REG_OFM_STRIDE_X 0x0A60
+#define NPU_REG_OFM_STRIDE_X_HI 0x0A64
+#define NPU_REG_OFM_STRIDE_Y 0x0A68
+#define NPU_REG_OFM_STRIDE_Y_HI 0x0A6C
+#define NPU_REG_OFM_STRIDE_C 0x0A70
+#define NPU_REG_OFM_STRIDE_C_HI 0x0A74
+#define TSU_OFM_BASE_REGISTERS_SIZE 0x0A80
+
+//
+// Register subpage TSU_WS_BASE
+//
+#define NPU_REG_WEIGHT_BASE 0x0A80
+#define NPU_REG_WEIGHT_BASE_HI 0x0A84
+#define NPU_REG_WEIGHT_LENGTH 0x0A88
+#define NPU_REG_WEIGHT_LENGTH_HI 0x0A8C
+#define NPU_REG_SCALE_BASE 0x0A90
+#define NPU_REG_SCALE_BASE_HI 0x0A94
+#define NPU_REG_SCALE_LENGTH 0x0A98
+#define NPU_REG_SCALE_LENGTH_HI 0x0A9C
+#define NPU_REG_OFM_SCALE 0x0AA0
+#define NPU_REG_OFM_SCALE_SHIFT 0x0AA4
+#define NPU_REG_OPA_SCALE 0x0AA8
+#define NPU_REG_OPA_SCALE_SHIFT 0x0AAC
+#define NPU_REG_OPB_SCALE 0x0AB0
+#define TSU_WS_BASE_REGISTERS_SIZE 0x0AC0
+
+//
+// Register subpage TSU_DMA_BASE
+//
+#define NPU_REG_DMA0_SRC 0x0AC0
+#define NPU_REG_DMA0_SRC_HI 0x0AC4
+#define NPU_REG_DMA0_DST 0x0AC8
+#define NPU_REG_DMA0_DST_HI 0x0ACC
+#define NPU_REG_DMA0_LEN 0x0AD0
+#define NPU_REG_DMA0_LEN_HI 0x0AD4
+#define NPU_REG_DMA0_SKIP0 0x0AD8
+#define NPU_REG_DMA0_SKIP0_HI 0x0ADC
+#define NPU_REG_DMA0_SKIP1 0x0AE0
+#define NPU_REG_DMA0_SKIP1_HI 0x0AE4
+#define TSU_DMA_BASE_REGISTERS_SIZE 0x0B00
+
+//
+// Register subpage TSU_IFM2_BASE
+//
+#define NPU_REG_IFM2_BASE0 0x0B00
+#define NPU_REG_IFM2_BASE0_HI 0x0B04
+#define NPU_REG_IFM2_BASE1 0x0B08
+#define NPU_REG_IFM2_BASE1_HI 0x0B0C
+#define NPU_REG_IFM2_BASE2 0x0B10
+#define NPU_REG_IFM2_BASE2_HI 0x0B14
+#define NPU_REG_IFM2_BASE3 0x0B18
+#define NPU_REG_IFM2_BASE3_HI 0x0B1C
+#define NPU_REG_IFM2_STRIDE_X 0x0B20
+#define NPU_REG_IFM2_STRIDE_X_HI 0x0B24
+#define NPU_REG_IFM2_STRIDE_Y 0x0B28
+#define NPU_REG_IFM2_STRIDE_Y_HI 0x0B2C
+#define NPU_REG_IFM2_STRIDE_C 0x0B30
+#define NPU_REG_IFM2_STRIDE_C_HI 0x0B34
+#define TSU_IFM2_BASE_REGISTERS_SIZE 0x0B40
+
+//
+// Register subpage TSU_WS1_BASE
+//
+#define NPU_REG_WEIGHT1_BASE 0x0B40
+#define NPU_REG_WEIGHT1_BASE_HI 0x0B44
+#define NPU_REG_WEIGHT1_LENGTH 0x0B48
+#define NPU_REG_WEIGHT1_LENGTH_HI 0x0B4C
+#define NPU_REG_SCALE1_BASE 0x0B50
+#define NPU_REG_SCALE1_BASE_HI 0x0B54
+#define NPU_REG_SCALE1_LENGTH 0x0B58
+#define NPU_REG_SCALE1_LENGTH_HI 0x0B5C
+#define TSU_WS1_BASE_REGISTERS_SIZE 0x0B80
+
+//
+// Register subpage TSU_USER_BASE
+//
+#define TSU_USER_BASE_REGISTERS_SIZE 0x0BC0
+
+//
+// Register subpage TSU_DMA_EBASE
+//
+#define TSU_DMA_EBASE_REGISTERS_SIZE 0x0C00
+
+//
+// Register subpage ID
+//
+#define NPU_REG_REVISION 0x0FC0
+#define NPU_REG_PID4 0x0FD0
+#define NPU_REG_PID5 0x0FD4
+#define NPU_REG_PID6 0x0FD8
+#define NPU_REG_PID7 0x0FDC
+#define NPU_REG_PID0 0x0FE0
+#define NPU_REG_PID1 0x0FE4
+#define NPU_REG_PID2 0x0FE8
+#define NPU_REG_PID3 0x0FEC
+#define NPU_REG_CID0 0x0FF0
+#define NPU_REG_CID1 0x0FF4
+#define NPU_REG_CID2 0x0FF8
+#define NPU_REG_CID3 0x0FFC
+#define ID_REGISTERS_SIZE 0x1000
+
+#ifdef __cplusplus
+// Enum types
+enum class acc_format : uint8_t
+{
+    I32 = 0,
+    I40 = 1,
+    F16 = 2,
+};
+
+enum class activation_clip_range : uint8_t
+{
+    OFM_PRECISION = 0,
+    FORCE_UINT8   = 2,
+    FORCE_INT8    = 3,
+    FORCE_INT16   = 5,
+};
+
+enum class activation_format : uint8_t
+{
+    NHWC    = 0,
+    NHCWB16 = 1,
+};
+
+enum class activation_function : uint8_t
+{
+    RELU    = 0,
+    TANH    = 3,
+    SIGMOID = 4,
+    TABLE_0 = 16,
+    TABLE_1 = 17,
+    TABLE_2 = 18,
+    TABLE_3 = 19,
+    TABLE_4 = 20,
+    TABLE_5 = 21,
+    TABLE_6 = 22,
+    TABLE_7 = 23,
+};
+
+enum class activation_precision : uint8_t
+{
+    B8  = 0,
+    B16 = 1,
+    B32 = 2,
+    B64 = 3,
+};
+
+enum class activation_type : uint8_t
+{
+    UNSIGNED = 0,
+    SIGNED   = 1,
+};
+
+enum class axi_mem_encoding : uint8_t
+{
+    DEVICE_NON_BUFFERABLE                 = 0,
+    DEVICE_BUFFERABLE                     = 1,
+    NORMAL_NON_CACHEABLE_NON_BUFFERABLE   = 2,
+    NORMAL_NON_CACHEABLE_BUFFERABLE       = 3,
+    WRITE_THROUGH_NO_ALLOCATE             = 4,
+    WRITE_THROUGH_READ_ALLOCATE           = 5,
+    WRITE_THROUGH_WRITE_ALLOCATE          = 6,
+    WRITE_THROUGH_READ_AND_WRITE_ALLOCATE = 7,
+    WRITE_BACK_NO_ALLOCATE                = 8,
+    WRITE_BACK_READ_ALLOCATE              = 9,
+    WRITE_BACK_WRITE_ALLOCATE             = 10,
+    WRITE_BACK_READ_AND_WRITE_ALLOCATE    = 11,
+};
+
+enum class broadcast_mode : uint8_t
+{
+    DISABLE = 0,
+    ENABLE  = 1,
+};
+
+enum class cmd0_opcode : uint16_t
+{
+    NPU_OP_STOP               = 0,
+    NPU_OP_IRQ                = 1,
+    NPU_OP_CONV               = 2,
+    NPU_OP_DEPTHWISE          = 3,
+    NPU_OP_POOL               = 5,
+    NPU_OP_ELEMENTWISE        = 6,
+    NPU_OP_DMA_START          = 16,
+    NPU_OP_DMA_WAIT           = 17,
+    NPU_OP_KERNEL_WAIT        = 18,
+    NPU_OP_PMU_MASK           = 19,
+    NPU_SET_IFM_PAD_TOP       = 256,
+    NPU_SET_IFM_PAD_LEFT      = 257,
+    NPU_SET_IFM_PAD_RIGHT     = 258,
+    NPU_SET_IFM_PAD_BOTTOM    = 259,
+    NPU_SET_IFM_DEPTH_M1      = 260,
+    NPU_SET_IFM_PRECISION     = 261,
+    NPU_SET_IFM_UPSCALE       = 263,
+    NPU_SET_IFM_ZERO_POINT    = 265,
+    NPU_SET_IFM_WIDTH0_M1     = 266,
+    NPU_SET_IFM_HEIGHT0_M1    = 267,
+    NPU_SET_IFM_HEIGHT1_M1    = 268,
+    NPU_SET_IFM_IB_END        = 269,
+    NPU_SET_IFM_REGION        = 271,
+    NPU_SET_OFM_WIDTH_M1      = 273,
+    NPU_SET_OFM_HEIGHT_M1     = 274,
+    NPU_SET_OFM_DEPTH_M1      = 275,
+    NPU_SET_OFM_PRECISION     = 276,
+    NPU_SET_OFM_BLK_WIDTH_M1  = 277,
+    NPU_SET_OFM_BLK_HEIGHT_M1 = 278,
+    NPU_SET_OFM_BLK_DEPTH_M1  = 279,
+    NPU_SET_OFM_ZERO_POINT    = 280,
+    NPU_SET_OFM_WIDTH0_M1     = 282,
+    NPU_SET_OFM_HEIGHT0_M1    = 283,
+    NPU_SET_OFM_HEIGHT1_M1    = 284,
+    NPU_SET_OFM_REGION        = 287,
+    NPU_SET_KERNEL_WIDTH_M1   = 288,
+    NPU_SET_KERNEL_HEIGHT_M1  = 289,
+    NPU_SET_KERNEL_STRIDE     = 290,
+    NPU_SET_PARALLEL_MODE     = 291,
+    NPU_SET_ACC_FORMAT        = 292,
+    NPU_SET_ACTIVATION        = 293,
+    NPU_SET_ACTIVATION_MIN    = 294,
+    NPU_SET_ACTIVATION_MAX    = 295,
+    NPU_SET_WEIGHT_REGION     = 296,
+    NPU_SET_SCALE_REGION      = 297,
+    NPU_SET_AB_START          = 301,
+    NPU_SET_BLOCKDEP          = 303,
+    NPU_SET_DMA0_SRC_REGION   = 304,
+    NPU_SET_DMA0_DST_REGION   = 305,
+    NPU_SET_DMA0_SIZE0        = 306,
+    NPU_SET_DMA0_SIZE1        = 307,
+    NPU_SET_IFM2_BROADCAST    = 384,
+    NPU_SET_IFM2_SCALAR       = 385,
+    NPU_SET_IFM2_PRECISION    = 389,
+    NPU_SET_IFM2_ZERO_POINT   = 393,
+    NPU_SET_IFM2_WIDTH0_M1    = 394,
+    NPU_SET_IFM2_HEIGHT0_M1   = 395,
+    NPU_SET_IFM2_HEIGHT1_M1   = 396,
+    NPU_SET_IFM2_IB_START     = 397,
+    NPU_SET_IFM2_REGION       = 399,
+};
+
+enum class cmd1_opcode : uint16_t
+{
+    NPU_SET_IFM_BASE0      = 0,
+    NPU_SET_IFM_BASE1      = 1,
+    NPU_SET_IFM_BASE2      = 2,
+    NPU_SET_IFM_BASE3      = 3,
+    NPU_SET_IFM_STRIDE_X   = 4,
+    NPU_SET_IFM_STRIDE_Y   = 5,
+    NPU_SET_IFM_STRIDE_C   = 6,
+    NPU_SET_OFM_BASE0      = 16,
+    NPU_SET_OFM_BASE1      = 17,
+    NPU_SET_OFM_BASE2      = 18,
+    NPU_SET_OFM_BASE3      = 19,
+    NPU_SET_OFM_STRIDE_X   = 20,
+    NPU_SET_OFM_STRIDE_Y   = 21,
+    NPU_SET_OFM_STRIDE_C   = 22,
+    NPU_SET_WEIGHT_BASE    = 32,
+    NPU_SET_WEIGHT_LENGTH  = 33,
+    NPU_SET_SCALE_BASE     = 34,
+    NPU_SET_SCALE_LENGTH   = 35,
+    NPU_SET_OFM_SCALE      = 36,
+    NPU_SET_OPA_SCALE      = 37,
+    NPU_SET_OPB_SCALE      = 38,
+    NPU_SET_DMA0_SRC       = 48,
+    NPU_SET_DMA0_DST       = 49,
+    NPU_SET_DMA0_LEN       = 50,
+    NPU_SET_DMA0_SKIP0     = 51,
+    NPU_SET_DMA0_SKIP1     = 52,
+    NPU_SET_IFM2_BASE0     = 128,
+    NPU_SET_IFM2_BASE1     = 129,
+    NPU_SET_IFM2_BASE2     = 130,
+    NPU_SET_IFM2_BASE3     = 131,
+    NPU_SET_IFM2_STRIDE_X  = 132,
+    NPU_SET_IFM2_STRIDE_Y  = 133,
+    NPU_SET_IFM2_STRIDE_C  = 134,
+    NPU_SET_WEIGHT1_BASE   = 144,
+    NPU_SET_WEIGHT1_LENGTH = 145,
+    NPU_SET_SCALE1_BASE    = 146,
+    NPU_SET_SCALE1_LENGTH  = 147,
+};
+
+enum class cmd_ctrl : uint8_t
+{
+    CMD0_CTRL = 0,
+    CMD1_CTRL = 1,
+};
+
+enum class custom_dma : uint8_t
+{
+    NOT_IMPLEMENTED = 0,
+    IMPLEMENTED     = 1,
+};
+
+enum class dma_fault_src : uint8_t
+{
+    AXI_M0 = 0,
+    AXI_M1 = 1,
+};
+
+enum class dma_region_mode : uint8_t
+{
+    EXTERNAL = 0,
+    INTERNAL = 1,
+};
+
+enum class dma_stride_mode : uint8_t
+{
+    D1 = 0,
+    D2 = 1,
+    D3 = 2,
+};
+
+enum class elementwise_mode : uint8_t
+{
+    MUL   = 0,
+    ADD   = 1,
+    SUB   = 2,
+    MIN   = 3,
+    MAX   = 4,
+    LRELU = 5,
+    ABS   = 6,
+    CLZ   = 7,
+    SHR   = 8,
+    SHL   = 9,
+};
+
+enum class functional_safety : uint8_t
+{
+    NOT_IMPLEMENTED = 0,
+    IMPLEMENTED     = 1,
+};
+
+enum class ifm2_operand_order : uint8_t
+{
+    ORDER_B = 0,
+    ORDER_A = 1,
+};
+
+enum class ifm_scale_mode : uint8_t
+{
+    OPA_OPB_16 = 0,
+    OPA_32     = 1,
+    OPB_32     = 2,
+};
+
+enum class ifm_upscale_mode : uint8_t
+{
+    NONE    = 0,
+    NEAREST = 1,
+    ZEROS   = 2,
+};
+
+enum class kernel_decomposition : uint8_t
+{
+    D8X8 = 0,
+    D4X4 = 1,
+};
+
+enum class kernel_dilation : uint8_t
+{
+    NONE = 0,
+    X2   = 1,
+};
+
+enum class max_beats : uint8_t
+{
+    B64  = 0,
+    B128 = 1,
+    B256 = 2,
+};
+
+enum class mem_attr : uint8_t
+{
+    AXI0_OUTSTANDING_COUNTER0 = 0,
+    AXI0_OUTSTANDING_COUNTER1 = 1,
+    AXI1_OUTSTANDING_COUNTER2 = 2,
+    AXI1_OUTSTANDING_COUNTER3 = 3,
+};
+
+enum class ofm_scale_mode : uint8_t
+{
+    PER_CHANNEL = 0,
+    GLOBAL      = 1,
+};
+
+enum class parallel_mode : uint8_t
+{
+    SINGLE_CORE     = 0,
+    DUAL_CORE_DEPTH = 1,
+};
+
+enum class pmu_axi_channel : uint8_t
+{
+    RD_CMD        = 0,
+    RD_IFM        = 1,
+    RD_WEIGHTS    = 2,
+    RD_SCALE_BIAS = 3,
+    RD_MEM2MEM    = 4,
+    WR_OFM        = 8,
+    WR_MEM2MEM    = 9,
+};
+
+enum class pmu_event : uint16_t
+{
+    NO_EVENT                     = 0,
+    CYCLE                        = 17,
+    NPU_IDLE                     = 32,
+    CC_STALLED_ON_BLOCKDEP       = 33,
+    CC_STALLED_ON_SHRAM_RECONFIG = 34,
+    NPU_ACTIVE                   = 35,
+    MAC_ACTIVE                   = 48,
+    MAC_ACTIVE_8BIT              = 49,
+    MAC_ACTIVE_16BIT             = 50,
+    MAC_DPU_ACTIVE               = 51,
+    MAC_STALLED_BY_WD_ACC        = 52,
+    MAC_STALLED_BY_WD            = 53,
+    MAC_STALLED_BY_ACC           = 54,
+    MAC_STALLED_BY_IB            = 55,
+    MAC_ACTIVE_32BIT             = 56,
+    MAC_STALLED_BY_INT_W         = 57,
+    MAC_STALLED_BY_INT_ACC       = 58,
+    AO_ACTIVE                    = 64,
+    AO_ACTIVE_8BIT               = 65,
+    AO_ACTIVE_16BIT              = 66,
+    AO_STALLED_BY_OFMP_OB        = 67,
+    AO_STALLED_BY_OFMP           = 68,
+    AO_STALLED_BY_OB             = 69,
+    AO_STALLED_BY_ACC_IB         = 70,
+    AO_STALLED_BY_ACC            = 71,
+    AO_STALLED_BY_IB             = 72,
+    WD_ACTIVE                    = 80,
+    WD_STALLED                   = 81,
+    WD_STALLED_BY_WS             = 82,
+    WD_STALLED_BY_WD_BUF         = 83,
+    WD_PARSE_ACTIVE              = 84,
+    WD_PARSE_STALLED             = 85,
+    WD_PARSE_STALLED_IN          = 86,
+    WD_PARSE_STALLED_OUT         = 87,
+    WD_TRANS_WS                  = 88,
+    WD_TRANS_WB                  = 89,
+    WD_TRANS_DW0                 = 90,
+    WD_TRANS_DW1                 = 91,
+    AXI0_RD_TRANS_ACCEPTED       = 128,
+    AXI0_RD_TRANS_COMPLETED      = 129,
+    AXI0_RD_DATA_BEAT_RECEIVED   = 130,
+    AXI0_RD_TRAN_REQ_STALLED     = 131,
+    AXI0_WR_TRANS_ACCEPTED       = 132,
+    AXI0_WR_TRANS_COMPLETED_M    = 133,
+    AXI0_WR_TRANS_COMPLETED_S    = 134,
+    AXI0_WR_DATA_BEAT_WRITTEN    = 135,
+    AXI0_WR_TRAN_REQ_STALLED     = 136,
+    AXI0_WR_DATA_BEAT_STALLED    = 137,
+    AXI0_ENABLED_CYCLES          = 140,
+    AXI0_RD_STALL_LIMIT          = 142,
+    AXI0_WR_STALL_LIMIT          = 143,
+    AXI_LATENCY_ANY              = 160,
+    AXI_LATENCY_32               = 161,
+    AXI_LATENCY_64               = 162,
+    AXI_LATENCY_128              = 163,
+    AXI_LATENCY_256              = 164,
+    AXI_LATENCY_512              = 165,
+    AXI_LATENCY_1024             = 166,
+    ECC_DMA                      = 176,
+    ECC_SB0                      = 177,
+    AXI1_RD_TRANS_ACCEPTED       = 384,
+    AXI1_RD_TRANS_COMPLETED      = 385,
+    AXI1_RD_DATA_BEAT_RECEIVED   = 386,
+    AXI1_RD_TRAN_REQ_STALLED     = 387,
+    AXI1_WR_TRANS_ACCEPTED       = 388,
+    AXI1_WR_TRANS_COMPLETED_M    = 389,
+    AXI1_WR_TRANS_COMPLETED_S    = 390,
+    AXI1_WR_DATA_BEAT_WRITTEN    = 391,
+    AXI1_WR_TRAN_REQ_STALLED     = 392,
+    AXI1_WR_DATA_BEAT_STALLED    = 393,
+    AXI1_ENABLED_CYCLES          = 396,
+    AXI1_RD_STALL_LIMIT          = 398,
+    AXI1_WR_STALL_LIMIT          = 399,
+    ECC_SB1                      = 433,
+};
+
+enum class pooling_mode : uint8_t
+{
+    MAX        = 0,
+    AVERAGE    = 1,
+    REDUCE_SUM = 2,
+};
+
+enum class privilege_level : uint8_t
+{
+    USER       = 0,
+    PRIVILEGED = 1,
+};
+
+enum class round_mode : uint8_t
+{
+    DBL      = 0,
+    TRUNCATE = 1,
+    NATURAL  = 2,
+};
+
+enum class security_level : uint8_t
+{
+    SECURE     = 0,
+    NON_SECURE = 1,
+};
+
+enum class state : uint8_t
+{
+    STOPPED = 0,
+    RUNNING = 1,
+};
+
+enum class wd_core_slice_state : uint8_t
+{
+    HEADER  = 0,
+    PALETTE = 1,
+    WEIGHTS = 2,
+};
+
+enum class wd_ctrl_state : uint8_t
+{
+    IDLE     = 0,
+    DRAIN    = 1,
+    OFD_INIT = 2,
+    OFD_RUN  = 3,
+};
+
+enum class weight_order : uint8_t
+{
+    DEPTH_FIRST       = 0,
+    PART_KERNEL_FIRST = 1,
+};
+
+#else
+
+enum acc_format
+{
+    ACC_FORMAT_I32 = 0,
+    ACC_FORMAT_I40 = 1,
+    ACC_FORMAT_F16 = 2,
+};
+
+enum activation_clip_range
+{
+    ACTIVATION_CLIP_RANGE_OFM_PRECISION = 0,
+    ACTIVATION_CLIP_RANGE_FORCE_UINT8   = 2,
+    ACTIVATION_CLIP_RANGE_FORCE_INT8    = 3,
+    ACTIVATION_CLIP_RANGE_FORCE_INT16   = 5,
+};
+
+enum activation_format
+{
+    ACTIVATION_FORMAT_NHWC    = 0,
+    ACTIVATION_FORMAT_NHCWB16 = 1,
+};
+
+enum activation_function
+{
+    ACTIVATION_FUNCTION_RELU    = 0,
+    ACTIVATION_FUNCTION_TANH    = 3,
+    ACTIVATION_FUNCTION_SIGMOID = 4,
+    ACTIVATION_FUNCTION_TABLE_0 = 16,
+    ACTIVATION_FUNCTION_TABLE_1 = 17,
+    ACTIVATION_FUNCTION_TABLE_2 = 18,
+    ACTIVATION_FUNCTION_TABLE_3 = 19,
+    ACTIVATION_FUNCTION_TABLE_4 = 20,
+    ACTIVATION_FUNCTION_TABLE_5 = 21,
+    ACTIVATION_FUNCTION_TABLE_6 = 22,
+    ACTIVATION_FUNCTION_TABLE_7 = 23,
+};
+
+enum activation_precision
+{
+    ACTIVATION_PRECISION_B8  = 0,
+    ACTIVATION_PRECISION_B16 = 1,
+    ACTIVATION_PRECISION_B32 = 2,
+    ACTIVATION_PRECISION_B64 = 3,
+};
+
+enum activation_type
+{
+    ACTIVATION_TYPE_UNSIGNED = 0,
+    ACTIVATION_TYPE_SIGNED   = 1,
+};
+
+enum axi_mem_encoding
+{
+    AXI_MEM_ENCODING_DEVICE_NON_BUFFERABLE                 = 0,
+    AXI_MEM_ENCODING_DEVICE_BUFFERABLE                     = 1,
+    AXI_MEM_ENCODING_NORMAL_NON_CACHEABLE_NON_BUFFERABLE   = 2,
+    AXI_MEM_ENCODING_NORMAL_NON_CACHEABLE_BUFFERABLE       = 3,
+    AXI_MEM_ENCODING_WRITE_THROUGH_NO_ALLOCATE             = 4,
+    AXI_MEM_ENCODING_WRITE_THROUGH_READ_ALLOCATE           = 5,
+    AXI_MEM_ENCODING_WRITE_THROUGH_WRITE_ALLOCATE          = 6,
+    AXI_MEM_ENCODING_WRITE_THROUGH_READ_AND_WRITE_ALLOCATE = 7,
+    AXI_MEM_ENCODING_WRITE_BACK_NO_ALLOCATE                = 8,
+    AXI_MEM_ENCODING_WRITE_BACK_READ_ALLOCATE              = 9,
+    AXI_MEM_ENCODING_WRITE_BACK_WRITE_ALLOCATE             = 10,
+    AXI_MEM_ENCODING_WRITE_BACK_READ_AND_WRITE_ALLOCATE    = 11,
+};
+
+enum broadcast_mode
+{
+    BROADCAST_MODE_DISABLE = 0,
+    BROADCAST_MODE_ENABLE  = 1,
+};
+
+enum cmd0_opcode
+{
+    CMD0_OPCODE_NPU_OP_STOP               = 0,
+    CMD0_OPCODE_NPU_OP_IRQ                = 1,
+    CMD0_OPCODE_NPU_OP_CONV               = 2,
+    CMD0_OPCODE_NPU_OP_DEPTHWISE          = 3,
+    CMD0_OPCODE_NPU_OP_POOL               = 5,
+    CMD0_OPCODE_NPU_OP_ELEMENTWISE        = 6,
+    CMD0_OPCODE_NPU_OP_DMA_START          = 16,
+    CMD0_OPCODE_NPU_OP_DMA_WAIT           = 17,
+    CMD0_OPCODE_NPU_OP_KERNEL_WAIT        = 18,
+    CMD0_OPCODE_NPU_OP_PMU_MASK           = 19,
+    CMD0_OPCODE_NPU_SET_IFM_PAD_TOP       = 256,
+    CMD0_OPCODE_NPU_SET_IFM_PAD_LEFT      = 257,
+    CMD0_OPCODE_NPU_SET_IFM_PAD_RIGHT     = 258,
+    CMD0_OPCODE_NPU_SET_IFM_PAD_BOTTOM    = 259,
+    CMD0_OPCODE_NPU_SET_IFM_DEPTH_M1      = 260,
+    CMD0_OPCODE_NPU_SET_IFM_PRECISION     = 261,
+    CMD0_OPCODE_NPU_SET_IFM_UPSCALE       = 263,
+    CMD0_OPCODE_NPU_SET_IFM_ZERO_POINT    = 265,
+    CMD0_OPCODE_NPU_SET_IFM_WIDTH0_M1     = 266,
+    CMD0_OPCODE_NPU_SET_IFM_HEIGHT0_M1    = 267,
+    CMD0_OPCODE_NPU_SET_IFM_HEIGHT1_M1    = 268,
+    CMD0_OPCODE_NPU_SET_IFM_IB_END        = 269,
+    CMD0_OPCODE_NPU_SET_IFM_REGION        = 271,
+    CMD0_OPCODE_NPU_SET_OFM_WIDTH_M1      = 273,
+    CMD0_OPCODE_NPU_SET_OFM_HEIGHT_M1     = 274,
+    CMD0_OPCODE_NPU_SET_OFM_DEPTH_M1      = 275,
+    CMD0_OPCODE_NPU_SET_OFM_PRECISION     = 276,
+    CMD0_OPCODE_NPU_SET_OFM_BLK_WIDTH_M1  = 277,
+    CMD0_OPCODE_NPU_SET_OFM_BLK_HEIGHT_M1 = 278,
+    CMD0_OPCODE_NPU_SET_OFM_BLK_DEPTH_M1  = 279,
+    CMD0_OPCODE_NPU_SET_OFM_ZERO_POINT    = 280,
+    CMD0_OPCODE_NPU_SET_OFM_WIDTH0_M1     = 282,
+    CMD0_OPCODE_NPU_SET_OFM_HEIGHT0_M1    = 283,
+    CMD0_OPCODE_NPU_SET_OFM_HEIGHT1_M1    = 284,
+    CMD0_OPCODE_NPU_SET_OFM_REGION        = 287,
+    CMD0_OPCODE_NPU_SET_KERNEL_WIDTH_M1   = 288,
+    CMD0_OPCODE_NPU_SET_KERNEL_HEIGHT_M1  = 289,
+    CMD0_OPCODE_NPU_SET_KERNEL_STRIDE     = 290,
+    CMD0_OPCODE_NPU_SET_PARALLEL_MODE     = 291,
+    CMD0_OPCODE_NPU_SET_ACC_FORMAT        = 292,
+    CMD0_OPCODE_NPU_SET_ACTIVATION        = 293,
+    CMD0_OPCODE_NPU_SET_ACTIVATION_MIN    = 294,
+    CMD0_OPCODE_NPU_SET_ACTIVATION_MAX    = 295,
+    CMD0_OPCODE_NPU_SET_WEIGHT_REGION     = 296,
+    CMD0_OPCODE_NPU_SET_SCALE_REGION      = 297,
+    CMD0_OPCODE_NPU_SET_AB_START          = 301,
+    CMD0_OPCODE_NPU_SET_BLOCKDEP          = 303,
+    CMD0_OPCODE_NPU_SET_DMA0_SRC_REGION   = 304,
+    CMD0_OPCODE_NPU_SET_DMA0_DST_REGION   = 305,
+    CMD0_OPCODE_NPU_SET_DMA0_SIZE0        = 306,
+    CMD0_OPCODE_NPU_SET_DMA0_SIZE1        = 307,
+    CMD0_OPCODE_NPU_SET_IFM2_BROADCAST    = 384,
+    CMD0_OPCODE_NPU_SET_IFM2_SCALAR       = 385,
+    CMD0_OPCODE_NPU_SET_IFM2_PRECISION    = 389,
+    CMD0_OPCODE_NPU_SET_IFM2_ZERO_POINT   = 393,
+    CMD0_OPCODE_NPU_SET_IFM2_WIDTH0_M1    = 394,
+    CMD0_OPCODE_NPU_SET_IFM2_HEIGHT0_M1   = 395,
+    CMD0_OPCODE_NPU_SET_IFM2_HEIGHT1_M1   = 396,
+    CMD0_OPCODE_NPU_SET_IFM2_IB_START     = 397,
+    CMD0_OPCODE_NPU_SET_IFM2_REGION       = 399,
+};
+
+enum cmd1_opcode
+{
+    CMD1_OPCODE_NPU_SET_IFM_BASE0      = 0,
+    CMD1_OPCODE_NPU_SET_IFM_BASE1      = 1,
+    CMD1_OPCODE_NPU_SET_IFM_BASE2      = 2,
+    CMD1_OPCODE_NPU_SET_IFM_BASE3      = 3,
+    CMD1_OPCODE_NPU_SET_IFM_STRIDE_X   = 4,
+    CMD1_OPCODE_NPU_SET_IFM_STRIDE_Y   = 5,
+    CMD1_OPCODE_NPU_SET_IFM_STRIDE_C   = 6,
+    CMD1_OPCODE_NPU_SET_OFM_BASE0      = 16,
+    CMD1_OPCODE_NPU_SET_OFM_BASE1      = 17,
+    CMD1_OPCODE_NPU_SET_OFM_BASE2      = 18,
+    CMD1_OPCODE_NPU_SET_OFM_BASE3      = 19,
+    CMD1_OPCODE_NPU_SET_OFM_STRIDE_X   = 20,
+    CMD1_OPCODE_NPU_SET_OFM_STRIDE_Y   = 21,
+    CMD1_OPCODE_NPU_SET_OFM_STRIDE_C   = 22,
+    CMD1_OPCODE_NPU_SET_WEIGHT_BASE    = 32,
+    CMD1_OPCODE_NPU_SET_WEIGHT_LENGTH  = 33,
+    CMD1_OPCODE_NPU_SET_SCALE_BASE     = 34,
+    CMD1_OPCODE_NPU_SET_SCALE_LENGTH   = 35,
+    CMD1_OPCODE_NPU_SET_OFM_SCALE      = 36,
+    CMD1_OPCODE_NPU_SET_OPA_SCALE      = 37,
+    CMD1_OPCODE_NPU_SET_OPB_SCALE      = 38,
+    CMD1_OPCODE_NPU_SET_DMA0_SRC       = 48,
+    CMD1_OPCODE_NPU_SET_DMA0_DST       = 49,
+    CMD1_OPCODE_NPU_SET_DMA0_LEN       = 50,
+    CMD1_OPCODE_NPU_SET_DMA0_SKIP0     = 51,
+    CMD1_OPCODE_NPU_SET_DMA0_SKIP1     = 52,
+    CMD1_OPCODE_NPU_SET_IFM2_BASE0     = 128,
+    CMD1_OPCODE_NPU_SET_IFM2_BASE1     = 129,
+    CMD1_OPCODE_NPU_SET_IFM2_BASE2     = 130,
+    CMD1_OPCODE_NPU_SET_IFM2_BASE3     = 131,
+    CMD1_OPCODE_NPU_SET_IFM2_STRIDE_X  = 132,
+    CMD1_OPCODE_NPU_SET_IFM2_STRIDE_Y  = 133,
+    CMD1_OPCODE_NPU_SET_IFM2_STRIDE_C  = 134,
+    CMD1_OPCODE_NPU_SET_WEIGHT1_BASE   = 144,
+    CMD1_OPCODE_NPU_SET_WEIGHT1_LENGTH = 145,
+    CMD1_OPCODE_NPU_SET_SCALE1_BASE    = 146,
+    CMD1_OPCODE_NPU_SET_SCALE1_LENGTH  = 147,
+};
+
+enum cmd_ctrl
+{
+    CMD_CTRL_CMD0_CTRL = 0,
+    CMD_CTRL_CMD1_CTRL = 1,
+};
+
+enum custom_dma
+{
+    CUSTOM_DMA_NOT_IMPLEMENTED = 0,
+    CUSTOM_DMA_IMPLEMENTED     = 1,
+};
+
+enum dma_fault_src
+{
+    DMA_FAULT_SRC_AXI_M0 = 0,
+    DMA_FAULT_SRC_AXI_M1 = 1,
+};
+
+enum dma_region_mode
+{
+    DMA_REGION_MODE_EXTERNAL = 0,
+    DMA_REGION_MODE_INTERNAL = 1,
+};
+
+enum dma_stride_mode
+{
+    DMA_STRIDE_MODE_D1 = 0,
+    DMA_STRIDE_MODE_D2 = 1,
+    DMA_STRIDE_MODE_D3 = 2,
+};
+
+enum elementwise_mode
+{
+    ELEMENTWISE_MODE_MUL   = 0,
+    ELEMENTWISE_MODE_ADD   = 1,
+    ELEMENTWISE_MODE_SUB   = 2,
+    ELEMENTWISE_MODE_MIN   = 3,
+    ELEMENTWISE_MODE_MAX   = 4,
+    ELEMENTWISE_MODE_LRELU = 5,
+    ELEMENTWISE_MODE_ABS   = 6,
+    ELEMENTWISE_MODE_CLZ   = 7,
+    ELEMENTWISE_MODE_SHR   = 8,
+    ELEMENTWISE_MODE_SHL   = 9,
+};
+
+enum functional_safety
+{
+    FUNCTIONAL_SAFETY_NOT_IMPLEMENTED = 0,
+    FUNCTIONAL_SAFETY_IMPLEMENTED     = 1,
+};
+
+enum ifm2_operand_order
+{
+    IFM2_OPERAND_ORDER_ORDER_B = 0,
+    IFM2_OPERAND_ORDER_ORDER_A = 1,
+};
+
+enum ifm_scale_mode
+{
+    IFM_SCALE_MODE_OPA_OPB_16 = 0,
+    IFM_SCALE_MODE_OPA_32     = 1,
+    IFM_SCALE_MODE_OPB_32     = 2,
+};
+
+enum ifm_upscale_mode
+{
+    IFM_UPSCALE_MODE_NONE    = 0,
+    IFM_UPSCALE_MODE_NEAREST = 1,
+    IFM_UPSCALE_MODE_ZEROS   = 2,
+};
+
+enum kernel_decomposition
+{
+    KERNEL_DECOMPOSITION_D8X8 = 0,
+    KERNEL_DECOMPOSITION_D4X4 = 1,
+};
+
+enum kernel_dilation
+{
+    KERNEL_DILATION_NONE = 0,
+    KERNEL_DILATION_X2   = 1,
+};
+
+enum max_beats
+{
+    MAX_BEATS_B64  = 0,
+    MAX_BEATS_B128 = 1,
+    MAX_BEATS_B256 = 2,
+};
+
+enum mem_attr
+{
+    MEM_ATTR_AXI0_OUTSTANDING_COUNTER0 = 0,
+    MEM_ATTR_AXI0_OUTSTANDING_COUNTER1 = 1,
+    MEM_ATTR_AXI1_OUTSTANDING_COUNTER2 = 2,
+    MEM_ATTR_AXI1_OUTSTANDING_COUNTER3 = 3,
+};
+
+enum ofm_scale_mode
+{
+    OFM_SCALE_MODE_PER_CHANNEL = 0,
+    OFM_SCALE_MODE_GLOBAL      = 1,
+};
+
+enum parallel_mode
+{
+    PARALLEL_MODE_SINGLE_CORE     = 0,
+    PARALLEL_MODE_DUAL_CORE_DEPTH = 1,
+};
+
+enum pmu_axi_channel
+{
+    PMU_AXI_CHANNEL_RD_CMD        = 0,
+    PMU_AXI_CHANNEL_RD_IFM        = 1,
+    PMU_AXI_CHANNEL_RD_WEIGHTS    = 2,
+    PMU_AXI_CHANNEL_RD_SCALE_BIAS = 3,
+    PMU_AXI_CHANNEL_RD_MEM2MEM    = 4,
+    PMU_AXI_CHANNEL_WR_OFM        = 8,
+    PMU_AXI_CHANNEL_WR_MEM2MEM    = 9,
+};
+
+enum pmu_event
+{
+    PMU_EVENT_NO_EVENT                     = 0,
+    PMU_EVENT_CYCLE                        = 17,
+    PMU_EVENT_NPU_IDLE                     = 32,
+    PMU_EVENT_CC_STALLED_ON_BLOCKDEP       = 33,
+    PMU_EVENT_CC_STALLED_ON_SHRAM_RECONFIG = 34,
+    PMU_EVENT_NPU_ACTIVE                   = 35,
+    PMU_EVENT_MAC_ACTIVE                   = 48,
+    PMU_EVENT_MAC_ACTIVE_8BIT              = 49,
+    PMU_EVENT_MAC_ACTIVE_16BIT             = 50,
+    PMU_EVENT_MAC_DPU_ACTIVE               = 51,
+    PMU_EVENT_MAC_STALLED_BY_WD_ACC        = 52,
+    PMU_EVENT_MAC_STALLED_BY_WD            = 53,
+    PMU_EVENT_MAC_STALLED_BY_ACC           = 54,
+    PMU_EVENT_MAC_STALLED_BY_IB            = 55,
+    PMU_EVENT_MAC_ACTIVE_32BIT             = 56,
+    PMU_EVENT_MAC_STALLED_BY_INT_W         = 57,
+    PMU_EVENT_MAC_STALLED_BY_INT_ACC       = 58,
+    PMU_EVENT_AO_ACTIVE                    = 64,
+    PMU_EVENT_AO_ACTIVE_8BIT               = 65,
+    PMU_EVENT_AO_ACTIVE_16BIT              = 66,
+    PMU_EVENT_AO_STALLED_BY_OFMP_OB        = 67,
+    PMU_EVENT_AO_STALLED_BY_OFMP           = 68,
+    PMU_EVENT_AO_STALLED_BY_OB             = 69,
+    PMU_EVENT_AO_STALLED_BY_ACC_IB         = 70,
+    PMU_EVENT_AO_STALLED_BY_ACC            = 71,
+    PMU_EVENT_AO_STALLED_BY_IB             = 72,
+    PMU_EVENT_WD_ACTIVE                    = 80,
+    PMU_EVENT_WD_STALLED                   = 81,
+    PMU_EVENT_WD_STALLED_BY_WS             = 82,
+    PMU_EVENT_WD_STALLED_BY_WD_BUF         = 83,
+    PMU_EVENT_WD_PARSE_ACTIVE              = 84,
+    PMU_EVENT_WD_PARSE_STALLED             = 85,
+    PMU_EVENT_WD_PARSE_STALLED_IN          = 86,
+    PMU_EVENT_WD_PARSE_STALLED_OUT         = 87,
+    PMU_EVENT_WD_TRANS_WS                  = 88,
+    PMU_EVENT_WD_TRANS_WB                  = 89,
+    PMU_EVENT_WD_TRANS_DW0                 = 90,
+    PMU_EVENT_WD_TRANS_DW1                 = 91,
+    PMU_EVENT_AXI0_RD_TRANS_ACCEPTED       = 128,
+    PMU_EVENT_AXI0_RD_TRANS_COMPLETED      = 129,
+    PMU_EVENT_AXI0_RD_DATA_BEAT_RECEIVED   = 130,
+    PMU_EVENT_AXI0_RD_TRAN_REQ_STALLED     = 131,
+    PMU_EVENT_AXI0_WR_TRANS_ACCEPTED       = 132,
+    PMU_EVENT_AXI0_WR_TRANS_COMPLETED_M    = 133,
+    PMU_EVENT_AXI0_WR_TRANS_COMPLETED_S    = 134,
+    PMU_EVENT_AXI0_WR_DATA_BEAT_WRITTEN    = 135,
+    PMU_EVENT_AXI0_WR_TRAN_REQ_STALLED     = 136,
+    PMU_EVENT_AXI0_WR_DATA_BEAT_STALLED    = 137,
+    PMU_EVENT_AXI0_ENABLED_CYCLES          = 140,
+    PMU_EVENT_AXI0_RD_STALL_LIMIT          = 142,
+    PMU_EVENT_AXI0_WR_STALL_LIMIT          = 143,
+    PMU_EVENT_AXI_LATENCY_ANY              = 160,
+    PMU_EVENT_AXI_LATENCY_32               = 161,
+    PMU_EVENT_AXI_LATENCY_64               = 162,
+    PMU_EVENT_AXI_LATENCY_128              = 163,
+    PMU_EVENT_AXI_LATENCY_256              = 164,
+    PMU_EVENT_AXI_LATENCY_512              = 165,
+    PMU_EVENT_AXI_LATENCY_1024             = 166,
+    PMU_EVENT_ECC_DMA                      = 176,
+    PMU_EVENT_ECC_SB0                      = 177,
+    PMU_EVENT_AXI1_RD_TRANS_ACCEPTED       = 384,
+    PMU_EVENT_AXI1_RD_TRANS_COMPLETED      = 385,
+    PMU_EVENT_AXI1_RD_DATA_BEAT_RECEIVED   = 386,
+    PMU_EVENT_AXI1_RD_TRAN_REQ_STALLED     = 387,
+    PMU_EVENT_AXI1_WR_TRANS_ACCEPTED       = 388,
+    PMU_EVENT_AXI1_WR_TRANS_COMPLETED_M    = 389,
+    PMU_EVENT_AXI1_WR_TRANS_COMPLETED_S    = 390,
+    PMU_EVENT_AXI1_WR_DATA_BEAT_WRITTEN    = 391,
+    PMU_EVENT_AXI1_WR_TRAN_REQ_STALLED     = 392,
+    PMU_EVENT_AXI1_WR_DATA_BEAT_STALLED    = 393,
+    PMU_EVENT_AXI1_ENABLED_CYCLES          = 396,
+    PMU_EVENT_AXI1_RD_STALL_LIMIT          = 398,
+    PMU_EVENT_AXI1_WR_STALL_LIMIT          = 399,
+    PMU_EVENT_ECC_SB1                      = 433,
+};
+
+enum pooling_mode
+{
+    POOLING_MODE_MAX        = 0,
+    POOLING_MODE_AVERAGE    = 1,
+    POOLING_MODE_REDUCE_SUM = 2,
+};
+
+enum privilege_level
+{
+    PRIVILEGE_LEVEL_USER       = 0,
+    PRIVILEGE_LEVEL_PRIVILEGED = 1,
+};
+
+enum round_mode
+{
+    ROUND_MODE_DBL      = 0,
+    ROUND_MODE_TRUNCATE = 1,
+    ROUND_MODE_NATURAL  = 2,
+};
+
+enum security_level
+{
+    SECURITY_LEVEL_SECURE     = 0,
+    SECURITY_LEVEL_NON_SECURE = 1,
+};
+
+enum state
+{
+    STATE_STOPPED = 0,
+    STATE_RUNNING = 1,
+};
+
+enum wd_core_slice_state
+{
+    WD_CORE_SLICE_STATE_HEADER  = 0,
+    WD_CORE_SLICE_STATE_PALETTE = 1,
+    WD_CORE_SLICE_STATE_WEIGHTS = 2,
+};
+
+enum wd_ctrl_state
+{
+    WD_CTRL_STATE_IDLE     = 0,
+    WD_CTRL_STATE_DRAIN    = 1,
+    WD_CTRL_STATE_OFD_INIT = 2,
+    WD_CTRL_STATE_OFD_RUN  = 3,
+};
+
+enum weight_order
+{
+    WEIGHT_ORDER_DEPTH_FIRST       = 0,
+    WEIGHT_ORDER_PART_KERNEL_FIRST = 1,
+};
+
+#endif
+
+#ifdef NPU_DISASSEMBLE
+
+static const char *acc_format_str[] = {
+    "ACC_FORMAT_I32",
+    "ACC_FORMAT_I40",
+    "ACC_FORMAT_F16",
+};
+
+static const char *activation_clip_range_str[] = {
+    "ACTIVATION_CLIP_RANGE_OFM_PRECISION",
+    "****",
+    "ACTIVATION_CLIP_RANGE_FORCE_UINT8",
+    "ACTIVATION_CLIP_RANGE_FORCE_INT8",
+    "****",
+    "ACTIVATION_CLIP_RANGE_FORCE_INT16",
+};
+
+static const char *activation_format_str[] = {
+    "ACTIVATION_FORMAT_NHWC",
+    "ACTIVATION_FORMAT_NHCWB16",
+};
+
+static const char *activation_function_str[] = {
+    "ACTIVATION_FUNCTION_RELU",
+    "****",
+    "****",
+    "ACTIVATION_FUNCTION_TANH",
+    "ACTIVATION_FUNCTION_SIGMOID",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "ACTIVATION_FUNCTION_TABLE_0",
+    "ACTIVATION_FUNCTION_TABLE_1",
+    "ACTIVATION_FUNCTION_TABLE_2",
+    "ACTIVATION_FUNCTION_TABLE_3",
+    "ACTIVATION_FUNCTION_TABLE_4",
+    "ACTIVATION_FUNCTION_TABLE_5",
+    "ACTIVATION_FUNCTION_TABLE_6",
+    "ACTIVATION_FUNCTION_TABLE_7",
+};
+
+static const char *activation_precision_str[] = {
+    "ACTIVATION_PRECISION_B8",
+    "ACTIVATION_PRECISION_B16",
+    "ACTIVATION_PRECISION_B32",
+    "ACTIVATION_PRECISION_B64",
+};
+
+static const char *activation_type_str[] = {
+    "ACTIVATION_TYPE_UNSIGNED",
+    "ACTIVATION_TYPE_SIGNED",
+};
+
+static const char *axi_mem_encoding_str[] = {
+    "AXI_MEM_ENCODING_DEVICE_NON_BUFFERABLE",
+    "AXI_MEM_ENCODING_DEVICE_BUFFERABLE",
+    "AXI_MEM_ENCODING_NORMAL_NON_CACHEABLE_NON_BUFFERABLE",
+    "AXI_MEM_ENCODING_NORMAL_NON_CACHEABLE_BUFFERABLE",
+    "AXI_MEM_ENCODING_WRITE_THROUGH_NO_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_THROUGH_READ_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_THROUGH_WRITE_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_THROUGH_READ_AND_WRITE_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_BACK_NO_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_BACK_READ_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_BACK_WRITE_ALLOCATE",
+    "AXI_MEM_ENCODING_WRITE_BACK_READ_AND_WRITE_ALLOCATE",
+};
+
+static const char *broadcast_mode_str[] = {
+    "BROADCAST_MODE_DISABLE",
+    "BROADCAST_MODE_ENABLE",
+};
+
+static const char *cmd0_opcode_str[] = {
+    "CMD0_OPCODE_NPU_OP_STOP",
+    "CMD0_OPCODE_NPU_OP_IRQ",
+    "CMD0_OPCODE_NPU_OP_CONV",
+    "CMD0_OPCODE_NPU_OP_DEPTHWISE",
+    "****",
+    "CMD0_OPCODE_NPU_OP_POOL",
+    "CMD0_OPCODE_NPU_OP_ELEMENTWISE",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_OP_DMA_START",
+    "CMD0_OPCODE_NPU_OP_DMA_WAIT",
+    "CMD0_OPCODE_NPU_OP_KERNEL_WAIT",
+    "CMD0_OPCODE_NPU_OP_PMU_MASK",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM_PAD_TOP",
+    "CMD0_OPCODE_NPU_SET_IFM_PAD_LEFT",
+    "CMD0_OPCODE_NPU_SET_IFM_PAD_RIGHT",
+    "CMD0_OPCODE_NPU_SET_IFM_PAD_BOTTOM",
+    "CMD0_OPCODE_NPU_SET_IFM_DEPTH_M1",
+    "CMD0_OPCODE_NPU_SET_IFM_PRECISION",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM_UPSCALE",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM_ZERO_POINT",
+    "CMD0_OPCODE_NPU_SET_IFM_WIDTH0_M1",
+    "CMD0_OPCODE_NPU_SET_IFM_HEIGHT0_M1",
+    "CMD0_OPCODE_NPU_SET_IFM_HEIGHT1_M1",
+    "CMD0_OPCODE_NPU_SET_IFM_IB_END",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM_REGION",
+    "****",
+    "CMD0_OPCODE_NPU_SET_OFM_WIDTH_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_HEIGHT_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_DEPTH_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_PRECISION",
+    "CMD0_OPCODE_NPU_SET_OFM_BLK_WIDTH_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_BLK_HEIGHT_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_BLK_DEPTH_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_ZERO_POINT",
+    "****",
+    "CMD0_OPCODE_NPU_SET_OFM_WIDTH0_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_HEIGHT0_M1",
+    "CMD0_OPCODE_NPU_SET_OFM_HEIGHT1_M1",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_OFM_REGION",
+    "CMD0_OPCODE_NPU_SET_KERNEL_WIDTH_M1",
+    "CMD0_OPCODE_NPU_SET_KERNEL_HEIGHT_M1",
+    "CMD0_OPCODE_NPU_SET_KERNEL_STRIDE",
+    "CMD0_OPCODE_NPU_SET_PARALLEL_MODE",
+    "CMD0_OPCODE_NPU_SET_ACC_FORMAT",
+    "CMD0_OPCODE_NPU_SET_ACTIVATION",
+    "CMD0_OPCODE_NPU_SET_ACTIVATION_MIN",
+    "CMD0_OPCODE_NPU_SET_ACTIVATION_MAX",
+    "CMD0_OPCODE_NPU_SET_WEIGHT_REGION",
+    "CMD0_OPCODE_NPU_SET_SCALE_REGION",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_AB_START",
+    "****",
+    "CMD0_OPCODE_NPU_SET_BLOCKDEP",
+    "CMD0_OPCODE_NPU_SET_DMA0_SRC_REGION",
+    "CMD0_OPCODE_NPU_SET_DMA0_DST_REGION",
+    "CMD0_OPCODE_NPU_SET_DMA0_SIZE0",
+    "CMD0_OPCODE_NPU_SET_DMA0_SIZE1",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM2_BROADCAST",
+    "CMD0_OPCODE_NPU_SET_IFM2_SCALAR",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM2_PRECISION",
+    "****",
+    "****",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM2_ZERO_POINT",
+    "CMD0_OPCODE_NPU_SET_IFM2_WIDTH0_M1",
+    "CMD0_OPCODE_NPU_SET_IFM2_HEIGHT0_M1",
+    "CMD0_OPCODE_NPU_SET_IFM2_HEIGHT1_M1",
+    "CMD0_OPCODE_NPU_SET_IFM2_IB_START",
+    "****",
+    "CMD0_OPCODE_NPU_SET_IFM2_REGION",
+};
+
+static const char *cmd1_opcode_str[] = {
+    "CMD1_OPCODE_NPU_SET_IFM_BASE0",
+    "CMD1_OPCODE_NPU_SET_IFM_BASE1",
+    "CMD1_OPCODE_NPU_SET_IFM_BASE2",
+    "CMD1_OPCODE_NPU_SET_IFM_BASE3",
+    "CMD1_OPCODE_NPU_SET_IFM_STRIDE_X",
+    "CMD1_OPCODE_NPU_SET_IFM_STRIDE_Y",
+    "CMD1_OPCODE_NPU_SET_IFM_STRIDE_C",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_OFM_BASE0",
+    "CMD1_OPCODE_NPU_SET_OFM_BASE1",
+    "CMD1_OPCODE_NPU_SET_OFM_BASE2",
+    "CMD1_OPCODE_NPU_SET_OFM_BASE3",
+    "CMD1_OPCODE_NPU_SET_OFM_STRIDE_X",
+    "CMD1_OPCODE_NPU_SET_OFM_STRIDE_Y",
+    "CMD1_OPCODE_NPU_SET_OFM_STRIDE_C",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_WEIGHT_BASE",
+    "CMD1_OPCODE_NPU_SET_WEIGHT_LENGTH",
+    "CMD1_OPCODE_NPU_SET_SCALE_BASE",
+    "CMD1_OPCODE_NPU_SET_SCALE_LENGTH",
+    "CMD1_OPCODE_NPU_SET_OFM_SCALE",
+    "CMD1_OPCODE_NPU_SET_OPA_SCALE",
+    "CMD1_OPCODE_NPU_SET_OPB_SCALE",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_DMA0_SRC",
+    "CMD1_OPCODE_NPU_SET_DMA0_DST",
+    "CMD1_OPCODE_NPU_SET_DMA0_LEN",
+    "CMD1_OPCODE_NPU_SET_DMA0_SKIP0",
+    "CMD1_OPCODE_NPU_SET_DMA0_SKIP1",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_IFM2_BASE0",
+    "CMD1_OPCODE_NPU_SET_IFM2_BASE1",
+    "CMD1_OPCODE_NPU_SET_IFM2_BASE2",
+    "CMD1_OPCODE_NPU_SET_IFM2_BASE3",
+    "CMD1_OPCODE_NPU_SET_IFM2_STRIDE_X",
+    "CMD1_OPCODE_NPU_SET_IFM2_STRIDE_Y",
+    "CMD1_OPCODE_NPU_SET_IFM2_STRIDE_C",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "CMD1_OPCODE_NPU_SET_WEIGHT1_BASE",
+    "CMD1_OPCODE_NPU_SET_WEIGHT1_LENGTH",
+    "CMD1_OPCODE_NPU_SET_SCALE1_BASE",
+    "CMD1_OPCODE_NPU_SET_SCALE1_LENGTH",
+};
+
+static const char *cmd_ctrl_str[] = {
+    "CMD_CTRL_CMD0_CTRL",
+    "CMD_CTRL_CMD1_CTRL",
+};
+
+static const char *custom_dma_str[] = {
+    "CUSTOM_DMA_NOT_IMPLEMENTED",
+    "CUSTOM_DMA_IMPLEMENTED",
+};
+
+static const char *dma_fault_src_str[] = {
+    "DMA_FAULT_SRC_AXI_M0",
+    "DMA_FAULT_SRC_AXI_M1",
+};
+
+static const char *dma_region_mode_str[] = {
+    "DMA_REGION_MODE_EXTERNAL",
+    "DMA_REGION_MODE_INTERNAL",
+};
+
+static const char *dma_stride_mode_str[] = {
+    "DMA_STRIDE_MODE_D1",
+    "DMA_STRIDE_MODE_D2",
+    "DMA_STRIDE_MODE_D3",
+};
+
+static const char *elementwise_mode_str[] = {
+    "ELEMENTWISE_MODE_MUL",
+    "ELEMENTWISE_MODE_ADD",
+    "ELEMENTWISE_MODE_SUB",
+    "ELEMENTWISE_MODE_MIN",
+    "ELEMENTWISE_MODE_MAX",
+    "ELEMENTWISE_MODE_LRELU",
+    "ELEMENTWISE_MODE_ABS",
+    "ELEMENTWISE_MODE_CLZ",
+    "ELEMENTWISE_MODE_SHR",
+    "ELEMENTWISE_MODE_SHL",
+};
+
+static const char *functional_safety_str[] = {
+    "FUNCTIONAL_SAFETY_NOT_IMPLEMENTED",
+    "FUNCTIONAL_SAFETY_IMPLEMENTED",
+};
+
+static const char *ifm2_operand_order_str[] = {
+    "IFM2_OPERAND_ORDER_ORDER_B",
+    "IFM2_OPERAND_ORDER_ORDER_A",
+};
+
+static const char *ifm_scale_mode_str[] = {
+    "IFM_SCALE_MODE_OPA_OPB_16",
+    "IFM_SCALE_MODE_OPA_32",
+    "IFM_SCALE_MODE_OPB_32",
+};
+
+static const char *ifm_upscale_mode_str[] = {
+    "IFM_UPSCALE_MODE_NONE",
+    "IFM_UPSCALE_MODE_NEAREST",
+    "IFM_UPSCALE_MODE_ZEROS",
+};
+
+static const char *kernel_decomposition_str[] = {
+    "KERNEL_DECOMPOSITION_D8X8",
+    "KERNEL_DECOMPOSITION_D4X4",
+};
+
+static const char *kernel_dilation_str[] = {
+    "KERNEL_DILATION_NONE",
+    "KERNEL_DILATION_X2",
+};
+
+static const char *max_beats_str[] = {
+    "MAX_BEATS_B64",
+    "MAX_BEATS_B128",
+    "MAX_BEATS_B256",
+};
+
+static const char *mem_attr_str[] = {
+    "MEM_ATTR_AXI0_OUTSTANDING_COUNTER0",
+    "MEM_ATTR_AXI0_OUTSTANDING_COUNTER1",
+    "MEM_ATTR_AXI1_OUTSTANDING_COUNTER2",
+    "MEM_ATTR_AXI1_OUTSTANDING_COUNTER3",
+};
+
+static const char *ofm_scale_mode_str[] = {
+    "OFM_SCALE_MODE_PER_CHANNEL",
+    "OFM_SCALE_MODE_GLOBAL",
+};
+
+static const char *parallel_mode_str[] = {
+    "PARALLEL_MODE_SINGLE_CORE",
+    "PARALLEL_MODE_DUAL_CORE_DEPTH",
+};
+
+static const char *pmu_axi_channel_str[] = {
+    "PMU_AXI_CHANNEL_RD_CMD",
+    "PMU_AXI_CHANNEL_RD_IFM",
+    "PMU_AXI_CHANNEL_RD_WEIGHTS",
+    "PMU_AXI_CHANNEL_RD_SCALE_BIAS",
+    "PMU_AXI_CHANNEL_RD_MEM2MEM",
+    "****",
+    "****",
+    "****",
+    "PMU_AXI_CHANNEL_WR_OFM",
+    "PMU_AXI_CHANNEL_WR_MEM2MEM",
+};
+
+static const char *pmu_event_str[] = {
+    "PMU_EVENT_NO_EVENT",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_CYCLE",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_NPU_IDLE",
+    "PMU_EVENT_CC_STALLED_ON_BLOCKDEP",
+    "PMU_EVENT_CC_STALLED_ON_SHRAM_RECONFIG",
+    "PMU_EVENT_NPU_ACTIVE",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_MAC_ACTIVE",
+    "PMU_EVENT_MAC_ACTIVE_8BIT",
+    "PMU_EVENT_MAC_ACTIVE_16BIT",
+    "PMU_EVENT_MAC_DPU_ACTIVE",
+    "PMU_EVENT_MAC_STALLED_BY_WD_ACC",
+    "PMU_EVENT_MAC_STALLED_BY_WD",
+    "PMU_EVENT_MAC_STALLED_BY_ACC",
+    "PMU_EVENT_MAC_STALLED_BY_IB",
+    "PMU_EVENT_MAC_ACTIVE_32BIT",
+    "PMU_EVENT_MAC_STALLED_BY_INT_W",
+    "PMU_EVENT_MAC_STALLED_BY_INT_ACC",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_AO_ACTIVE",
+    "PMU_EVENT_AO_ACTIVE_8BIT",
+    "PMU_EVENT_AO_ACTIVE_16BIT",
+    "PMU_EVENT_AO_STALLED_BY_OFMP_OB",
+    "PMU_EVENT_AO_STALLED_BY_OFMP",
+    "PMU_EVENT_AO_STALLED_BY_OB",
+    "PMU_EVENT_AO_STALLED_BY_ACC_IB",
+    "PMU_EVENT_AO_STALLED_BY_ACC",
+    "PMU_EVENT_AO_STALLED_BY_IB",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_WD_ACTIVE",
+    "PMU_EVENT_WD_STALLED",
+    "PMU_EVENT_WD_STALLED_BY_WS",
+    "PMU_EVENT_WD_STALLED_BY_WD_BUF",
+    "PMU_EVENT_WD_PARSE_ACTIVE",
+    "PMU_EVENT_WD_PARSE_STALLED",
+    "PMU_EVENT_WD_PARSE_STALLED_IN",
+    "PMU_EVENT_WD_PARSE_STALLED_OUT",
+    "PMU_EVENT_WD_TRANS_WS",
+    "PMU_EVENT_WD_TRANS_WB",
+    "PMU_EVENT_WD_TRANS_DW0",
+    "PMU_EVENT_WD_TRANS_DW1",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_AXI0_RD_TRANS_ACCEPTED",
+    "PMU_EVENT_AXI0_RD_TRANS_COMPLETED",
+    "PMU_EVENT_AXI0_RD_DATA_BEAT_RECEIVED",
+    "PMU_EVENT_AXI0_RD_TRAN_REQ_STALLED",
+    "PMU_EVENT_AXI0_WR_TRANS_ACCEPTED",
+    "PMU_EVENT_AXI0_WR_TRANS_COMPLETED_M",
+    "PMU_EVENT_AXI0_WR_TRANS_COMPLETED_S",
+    "PMU_EVENT_AXI0_WR_DATA_BEAT_WRITTEN",
+    "PMU_EVENT_AXI0_WR_TRAN_REQ_STALLED",
+    "PMU_EVENT_AXI0_WR_DATA_BEAT_STALLED",
+    "****",
+    "****",
+    "PMU_EVENT_AXI0_ENABLED_CYCLES",
+    "****",
+    "PMU_EVENT_AXI0_RD_STALL_LIMIT",
+    "PMU_EVENT_AXI0_WR_STALL_LIMIT",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_AXI_LATENCY_ANY",
+    "PMU_EVENT_AXI_LATENCY_32",
+    "PMU_EVENT_AXI_LATENCY_64",
+    "PMU_EVENT_AXI_LATENCY_128",
+    "PMU_EVENT_AXI_LATENCY_256",
+    "PMU_EVENT_AXI_LATENCY_512",
+    "PMU_EVENT_AXI_LATENCY_1024",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_ECC_DMA",
+    "PMU_EVENT_ECC_SB0",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_AXI1_RD_TRANS_ACCEPTED",
+    "PMU_EVENT_AXI1_RD_TRANS_COMPLETED",
+    "PMU_EVENT_AXI1_RD_DATA_BEAT_RECEIVED",
+    "PMU_EVENT_AXI1_RD_TRAN_REQ_STALLED",
+    "PMU_EVENT_AXI1_WR_TRANS_ACCEPTED",
+    "PMU_EVENT_AXI1_WR_TRANS_COMPLETED_M",
+    "PMU_EVENT_AXI1_WR_TRANS_COMPLETED_S",
+    "PMU_EVENT_AXI1_WR_DATA_BEAT_WRITTEN",
+    "PMU_EVENT_AXI1_WR_TRAN_REQ_STALLED",
+    "PMU_EVENT_AXI1_WR_DATA_BEAT_STALLED",
+    "****",
+    "****",
+    "PMU_EVENT_AXI1_ENABLED_CYCLES",
+    "****",
+    "PMU_EVENT_AXI1_RD_STALL_LIMIT",
+    "PMU_EVENT_AXI1_WR_STALL_LIMIT",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "****",
+    "PMU_EVENT_ECC_SB1",
+};
+
+static const char *pooling_mode_str[] = {
+    "POOLING_MODE_MAX",
+    "POOLING_MODE_AVERAGE",
+    "POOLING_MODE_REDUCE_SUM",
+};
+
+static const char *privilege_level_str[] = {
+    "PRIVILEGE_LEVEL_USER",
+    "PRIVILEGE_LEVEL_PRIVILEGED",
+};
+
+static const char *round_mode_str[] = {
+    "ROUND_MODE_DBL",
+    "ROUND_MODE_TRUNCATE",
+    "ROUND_MODE_NATURAL",
+};
+
+static const char *security_level_str[] = {
+    "SECURITY_LEVEL_SECURE",
+    "SECURITY_LEVEL_NON_SECURE",
+};
+
+static const char *state_str[] = {
+    "STATE_STOPPED",
+    "STATE_RUNNING",
+};
+
+static const char *wd_core_slice_state_str[] = {
+    "WD_CORE_SLICE_STATE_HEADER",
+    "WD_CORE_SLICE_STATE_PALETTE",
+    "WD_CORE_SLICE_STATE_WEIGHTS",
+};
+
+static const char *wd_ctrl_state_str[] = {
+    "WD_CTRL_STATE_IDLE",
+    "WD_CTRL_STATE_DRAIN",
+    "WD_CTRL_STATE_OFD_INIT",
+    "WD_CTRL_STATE_OFD_RUN",
+};
+
+static const char *weight_order_str[] = {
+    "WEIGHT_ORDER_DEPTH_FIRST",
+    "WEIGHT_ORDER_PART_KERNEL_FIRST",
+};
+
+#endif
+
+// Register type structs
+// id_r - ID register
+struct id_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t version_status : 4; // This is the version of the product
+            uint32_t version_minor : 4;  // This is the n for the P part of an RnPn release number
+            uint32_t version_major : 4;  // This is the n for the R part of an RnPn release number
+            uint32_t product_major : 4;  // Product major ID number (unique per base product)
+            uint32_t arch_patch_rev : 4; // This is the patch number of the architecture version a.b
+            uint32_t
+                arch_minor_rev : 8; // This is the minor architecture version number, b in the architecture version a.b
+            uint32_t
+                arch_major_rev : 4; // This is the major architecture version number, a in the architecture version a.b
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR id_r() : word0(268853249) {}
+    CONSTEXPR id_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    id_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_version_status() const
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_version_status() const volatile
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR id_r &set_version_status(uint32_t value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 0) & word0) | ((((1U << 4) - 1) & value) << 0);
+        return *this;
+    }
+    volatile id_r &set_version_status(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 0) & word0) | ((((1U << 4) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_version_minor() const
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 4);
+        return value;
+    }
+    uint32_t get_version_minor() const volatile
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 4);
+        return value;
+    }
+    CONSTEXPR id_r &set_version_minor(uint32_t value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & value) << 4);
+        return *this;
+    }
+    volatile id_r &set_version_minor(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_version_major() const
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 8);
+        return value;
+    }
+    uint32_t get_version_major() const volatile
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 8);
+        return value;
+    }
+    CONSTEXPR id_r &set_version_major(uint32_t value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 8) & word0) | ((((1U << 4) - 1) & value) << 8);
+        return *this;
+    }
+    volatile id_r &set_version_major(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 8) & word0) | ((((1U << 4) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_product_major() const
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 12);
+        return value;
+    }
+    uint32_t get_product_major() const volatile
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 12);
+        return value;
+    }
+    CONSTEXPR id_r &set_product_major(uint32_t value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 12) & word0) | ((((1U << 4) - 1) & value) << 12);
+        return *this;
+    }
+    volatile id_r &set_product_major(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 12) & word0) | ((((1U << 4) - 1) & value) << 12);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_arch_patch_rev() const
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 16);
+        return value;
+    }
+    uint32_t get_arch_patch_rev() const volatile
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 16);
+        return value;
+    }
+    CONSTEXPR id_r &set_arch_patch_rev(uint32_t value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 16) & word0) | ((((1U << 4) - 1) & value) << 16);
+        return *this;
+    }
+    volatile id_r &set_arch_patch_rev(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 16) & word0) | ((((1U << 4) - 1) & value) << 16);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_arch_minor_rev() const
+    {
+        uint32_t value = ((1U << 8) - 1) & (word0 >> 20);
+        return value;
+    }
+    uint32_t get_arch_minor_rev() const volatile
+    {
+        uint32_t value = ((1U << 8) - 1) & (word0 >> 20);
+        return value;
+    }
+    CONSTEXPR id_r &set_arch_minor_rev(uint32_t value)
+    {
+        word0 = (((~((1U << 8) - 1)) << 20) & word0) | ((((1U << 8) - 1) & value) << 20);
+        return *this;
+    }
+    volatile id_r &set_arch_minor_rev(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 8) - 1)) << 20) & word0) | ((((1U << 8) - 1) & value) << 20);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_arch_major_rev() const
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 28);
+        return value;
+    }
+    uint32_t get_arch_major_rev() const volatile
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 28);
+        return value;
+    }
+    CONSTEXPR id_r &set_arch_major_rev(uint32_t value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 28) & word0) | ((((1U << 4) - 1) & value) << 28);
+        return *this;
+    }
+    volatile id_r &set_arch_major_rev(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 28) & word0) | ((((1U << 4) - 1) & value) << 28);
+        return *this;
+    }
+#endif
+};
+
+// status_r - Register describes the current operating status of the NPU
+struct status_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t state : 1;      // NPU state, 0 = Stopped, 1 = Running
+            uint32_t irq_raised : 1; // Raw IRQ status, 0 = IRQ not raised, 1 = IRQ raised. IRQ is cleared using command
+                                     // register bit 1
+            uint32_t
+                bus_status : 1; // 0=OK, 1=Bus abort detected and processing halted (NPU will reach IDLE state and not
+                                // to start process any more commands/AXI transactions). Can only be cleared by a reset
+            uint32_t reset_status : 1; // Reset is ongoing and only this register can be read (other registers read as 0
+                                       // and writes are ignored.) A value of 0 means NPU is not being reset and can be
+                                       // accessed as normal
+            uint32_t
+                cmd_parse_error : 1; // 0=No error 1=Command stream parsing error detected. Can only be cleared by reset
+            uint32_t cmd_end_reached : 1; // 0=Not reached, 1=Reached. Cleared by writing QBASE or QSIZE when NPU is in
+                                          // stopped state
+            uint32_t pmu_irq_raised : 1;  // 0=No PMU IRQ, 1=PMU IRQ raised. Cleared by using command register bit 1
+            uint32_t wd_fault : 1; // Weight decoder state: 0=no fault 1=weight decoder decompression fault. Can only be
+                                   // cleared by reset
+            uint32_t ecc_fault : 1; // ECC state for internal RAMs: 0=no fault 1=ECC fault signalled. Can only be
+                                    // cleared by reset
+            uint32_t reserved0 : 2;
+            uint32_t faulting_interface : 1; // Faulting interface on bus abort
+            uint32_t faulting_channel : 4;  // Faulting channel on a bus abort. Read: 0=Cmd 1=IFM 2=Weights 3=Scale+Bias
+                                            // 4=Mem2Mem; Write: 8=OFM 9=Mem2Mem
+            uint32_t irq_history_mask : 16; // IRQ History mask
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR status_r() : word0(8) {}
+    CONSTEXPR status_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    status_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::state get_state() const
+    {
+        NPU_NAMESPACE::state value = static_cast<NPU_NAMESPACE::state>(((1U << 1) - 1) & (word0 >> 0));
+        return value;
+    }
+    NPU_NAMESPACE::state get_state() const volatile
+    {
+        NPU_NAMESPACE::state value = static_cast<NPU_NAMESPACE::state>(((1U << 1) - 1) & (word0 >> 0));
+        return value;
+    }
+    CONSTEXPR status_r &set_state(NPU_NAMESPACE::state value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile status_r &set_state(NPU_NAMESPACE::state value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_irq_raised() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    uint32_t get_irq_raised() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    CONSTEXPR status_r &set_irq_raised(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile status_r &set_irq_raised(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_bus_status() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_bus_status() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR status_r &set_bus_status(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile status_r &set_bus_status(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_reset_status() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    uint32_t get_reset_status() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR status_r &set_reset_status(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile status_r &set_reset_status(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_parse_error() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
+        return value;
+    }
+    uint32_t get_cmd_parse_error() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
+        return value;
+    }
+    CONSTEXPR status_r &set_cmd_parse_error(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    volatile status_r &set_cmd_parse_error(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_end_reached() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
+    }
+    uint32_t get_cmd_end_reached() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
+    }
+    CONSTEXPR status_r &set_cmd_end_reached(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    volatile status_r &set_cmd_end_reached(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_pmu_irq_raised() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
+        return value;
+    }
+    uint32_t get_pmu_irq_raised() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
+        return value;
+    }
+    CONSTEXPR status_r &set_pmu_irq_raised(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    volatile status_r &set_pmu_irq_raised(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wd_fault() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
+        return value;
+    }
+    uint32_t get_wd_fault() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
+        return value;
+    }
+    CONSTEXPR status_r &set_wd_fault(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    volatile status_r &set_wd_fault(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ecc_fault() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
+        return value;
+    }
+    uint32_t get_ecc_fault() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
+        return value;
+    }
+    CONSTEXPR status_r &set_ecc_fault(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+    volatile status_r &set_ecc_fault(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::dma_fault_src get_faulting_interface() const
+    {
+        NPU_NAMESPACE::dma_fault_src value = static_cast<NPU_NAMESPACE::dma_fault_src>(((1U << 1) - 1) & (word0 >> 11));
+        return value;
+    }
+    NPU_NAMESPACE::dma_fault_src get_faulting_interface() const volatile
+    {
+        NPU_NAMESPACE::dma_fault_src value = static_cast<NPU_NAMESPACE::dma_fault_src>(((1U << 1) - 1) & (word0 >> 11));
+        return value;
+    }
+    CONSTEXPR status_r &set_faulting_interface(NPU_NAMESPACE::dma_fault_src value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 11);
+        return *this;
+    }
+    volatile status_r &set_faulting_interface(NPU_NAMESPACE::dma_fault_src value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 11);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_faulting_channel() const
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 12);
+        return value;
+    }
+    uint32_t get_faulting_channel() const volatile
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 12);
+        return value;
+    }
+    CONSTEXPR status_r &set_faulting_channel(uint32_t value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 12) & word0) | ((((1U << 4) - 1) & value) << 12);
+        return *this;
+    }
+    volatile status_r &set_faulting_channel(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 12) & word0) | ((((1U << 4) - 1) & value) << 12);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_irq_history_mask() const
+    {
+        uint32_t value = ((1U << 16) - 1) & (word0 >> 16);
+        return value;
+    }
+    uint32_t get_irq_history_mask() const volatile
+    {
+        uint32_t value = ((1U << 16) - 1) & (word0 >> 16);
+        return value;
+    }
+    CONSTEXPR status_r &set_irq_history_mask(uint32_t value)
+    {
+        word0 = (((~((1U << 16) - 1)) << 16) & word0) | ((((1U << 16) - 1) & value) << 16);
+        return *this;
+    }
+    volatile status_r &set_irq_history_mask(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 16) - 1)) << 16) & word0) | ((((1U << 16) - 1) & value) << 16);
+        return *this;
+    }
+#endif
+};
+
+// cmd_r - Command register, reads as last written command
+struct cmd_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t transition_to_running_state : 1; // Write 1 to transition the NPU to running state. Writing 0 has
+                                                      // no effect
+            uint32_t clear_irq : 1; // Write 1 to clear the IRQ status in the STATUS register. Writing 0 has no effect
+            uint32_t clock_q_enable : 1; // Write 1 to this bit to enable clock off using clock q-interface and enable
+                                         // the requester clock gate
+            uint32_t power_q_enable : 1; // Write 1 to this bit to enable power off using power q-interface
+            uint32_t
+                stop_request : 1; // Write 1 to this bit to request STOP after completing any already-started commands
+            uint32_t reserved0 : 11;
+            uint32_t clear_irq_history : 16; // Clears the IRQ history mask
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR cmd_r() : word0(12) {}
+    CONSTEXPR cmd_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    cmd_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_transition_to_running_state() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_transition_to_running_state() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR cmd_r &set_transition_to_running_state(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile cmd_r &set_transition_to_running_state(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_clear_irq() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    uint32_t get_clear_irq() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    CONSTEXPR cmd_r &set_clear_irq(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile cmd_r &set_clear_irq(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_clock_q_enable() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_clock_q_enable() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR cmd_r &set_clock_q_enable(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile cmd_r &set_clock_q_enable(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_power_q_enable() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    uint32_t get_power_q_enable() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR cmd_r &set_power_q_enable(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile cmd_r &set_power_q_enable(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_stop_request() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
+        return value;
+    }
+    uint32_t get_stop_request() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
+        return value;
+    }
+    CONSTEXPR cmd_r &set_stop_request(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    volatile cmd_r &set_stop_request(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_clear_irq_history() const
+    {
+        uint32_t value = ((1U << 16) - 1) & (word0 >> 16);
+        return value;
+    }
+    uint32_t get_clear_irq_history() const volatile
+    {
+        uint32_t value = ((1U << 16) - 1) & (word0 >> 16);
+        return value;
+    }
+    CONSTEXPR cmd_r &set_clear_irq_history(uint32_t value)
+    {
+        word0 = (((~((1U << 16) - 1)) << 16) & word0) | ((((1U << 16) - 1) & value) << 16);
+        return *this;
+    }
+    volatile cmd_r &set_clear_irq_history(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 16) - 1)) << 16) & word0) | ((((1U << 16) - 1) & value) << 16);
+        return *this;
+    }
+#endif
+};
+
+// reset_r - Request Reset and new security mode
+struct reset_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t pending_CPL : 1; // Current privilege level 0=User 1=Privileged
+            uint32_t pending_CSL : 1; // Current security level 0=Secure 1=Non secure
+            uint32_t reserved0 : 30;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR reset_r() : word0(0) {}
+    CONSTEXPR reset_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    reset_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::privilege_level get_pending_CPL() const
+    {
+        NPU_NAMESPACE::privilege_level value =
+            static_cast<NPU_NAMESPACE::privilege_level>(((1U << 1) - 1) & (word0 >> 0));
+        return value;
+    }
+    NPU_NAMESPACE::privilege_level get_pending_CPL() const volatile
+    {
+        NPU_NAMESPACE::privilege_level value =
+            static_cast<NPU_NAMESPACE::privilege_level>(((1U << 1) - 1) & (word0 >> 0));
+        return value;
+    }
+    CONSTEXPR reset_r &set_pending_CPL(NPU_NAMESPACE::privilege_level value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile reset_r &set_pending_CPL(NPU_NAMESPACE::privilege_level value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::security_level get_pending_CSL() const
+    {
+        NPU_NAMESPACE::security_level value =
+            static_cast<NPU_NAMESPACE::security_level>(((1U << 1) - 1) & (word0 >> 1));
+        return value;
+    }
+    NPU_NAMESPACE::security_level get_pending_CSL() const volatile
+    {
+        NPU_NAMESPACE::security_level value =
+            static_cast<NPU_NAMESPACE::security_level>(((1U << 1) - 1) & (word0 >> 1));
+        return value;
+    }
+    CONSTEXPR reset_r &set_pending_CSL(NPU_NAMESPACE::security_level value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 1);
+        return *this;
+    }
+    volatile reset_r &set_pending_CSL(NPU_NAMESPACE::security_level value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 1);
+        return *this;
+    }
+#endif
+};
+
+// qbase_r - Base address of the command stream in bytes
+struct qbase_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8;  // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR qbase_r() : word0(0), word1(0) {}
+    CONSTEXPR qbase_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    qbase_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// qread_r - Read offset in the command stream in bytes. Multiple of 4 in the range 0 to 16 MB
+struct qread_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t QREAD : 32; // The read offset of the current command under execution
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR qread_r() : word0(0) {}
+    CONSTEXPR qread_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    qread_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_QREAD() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_QREAD() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR qread_r &set_QREAD(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile qread_r &set_QREAD(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// qconfig_r - AXI configuration for the command stream in the range 0-3. Same encoding as for REGIONCFG
+struct qconfig_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t cmd_region0 : 2; // Command region configuration
+            uint32_t reserved0 : 30;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR qconfig_r() : word0(0) {}
+    CONSTEXPR qconfig_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    qconfig_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_cmd_region0() const
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 0));
+        return value;
+    }
+    NPU_NAMESPACE::mem_attr get_cmd_region0() const volatile
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 0));
+        return value;
+    }
+    CONSTEXPR qconfig_r &set_cmd_region0(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile qconfig_r &set_cmd_region0(NPU_NAMESPACE::mem_attr value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+#endif
+};
+
+// qsize_r - Size of the command stream in bytes. Multiple of 4 in the range 0 to 16 MB
+struct qsize_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t QSIZE : 32; // Size of the next command stream to be executed by the NPU
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR qsize_r() : word0(0) {}
+    CONSTEXPR qsize_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    qsize_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_QSIZE() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_QSIZE() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR qsize_r &set_QSIZE(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile qsize_r &set_QSIZE(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// prot_r - Protection level configured for the NPU when acting as an AXI requester
+struct prot_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t active_CPL : 1; // Current privilege level 0=User 1=Privileged
+            uint32_t active_CSL : 1; // Current security level 0=Secure 1=Non secure
+            uint32_t reserved0 : 30;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR prot_r() : word0(0) {}
+    CONSTEXPR prot_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    prot_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::privilege_level get_active_CPL() const
+    {
+        NPU_NAMESPACE::privilege_level value =
+            static_cast<NPU_NAMESPACE::privilege_level>(((1U << 1) - 1) & (word0 >> 0));
+        return value;
+    }
+    NPU_NAMESPACE::privilege_level get_active_CPL() const volatile
+    {
+        NPU_NAMESPACE::privilege_level value =
+            static_cast<NPU_NAMESPACE::privilege_level>(((1U << 1) - 1) & (word0 >> 0));
+        return value;
+    }
+    CONSTEXPR prot_r &set_active_CPL(NPU_NAMESPACE::privilege_level value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile prot_r &set_active_CPL(NPU_NAMESPACE::privilege_level value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::security_level get_active_CSL() const
+    {
+        NPU_NAMESPACE::security_level value =
+            static_cast<NPU_NAMESPACE::security_level>(((1U << 1) - 1) & (word0 >> 1));
+        return value;
+    }
+    NPU_NAMESPACE::security_level get_active_CSL() const volatile
+    {
+        NPU_NAMESPACE::security_level value =
+            static_cast<NPU_NAMESPACE::security_level>(((1U << 1) - 1) & (word0 >> 1));
+        return value;
+    }
+    CONSTEXPR prot_r &set_active_CSL(NPU_NAMESPACE::security_level value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 1);
+        return *this;
+    }
+    volatile prot_r &set_active_CSL(NPU_NAMESPACE::security_level value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 1);
+        return *this;
+    }
+#endif
+};
+
+// config_r - RTL configuration
+struct config_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t macs_per_cc : 4;        // The log2(macs/clock cycle)
+            uint32_t cmd_stream_version : 4; // command stream version accepted by this NPU
+            uint32_t shram_size : 8;         // Total size in KB of internal SHRAM
+            uint32_t reserved0 : 10;
+            uint32_t functional_safety : 1; // Functional safety configuration
+            uint32_t custom_dma : 1;        // Custom DMA configuration
+            uint32_t product : 4;           // Product configuration
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR config_r() : word0(268435456) {}
+    CONSTEXPR config_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    config_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_macs_per_cc() const
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_macs_per_cc() const volatile
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR config_r &set_macs_per_cc(uint32_t value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 0) & word0) | ((((1U << 4) - 1) & value) << 0);
+        return *this;
+    }
+    volatile config_r &set_macs_per_cc(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 0) & word0) | ((((1U << 4) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_stream_version() const
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 4);
+        return value;
+    }
+    uint32_t get_cmd_stream_version() const volatile
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 4);
+        return value;
+    }
+    CONSTEXPR config_r &set_cmd_stream_version(uint32_t value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & value) << 4);
+        return *this;
+    }
+    volatile config_r &set_cmd_stream_version(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_shram_size() const
+    {
+        uint32_t value = ((1U << 8) - 1) & (word0 >> 8);
+        return value;
+    }
+    uint32_t get_shram_size() const volatile
+    {
+        uint32_t value = ((1U << 8) - 1) & (word0 >> 8);
+        return value;
+    }
+    CONSTEXPR config_r &set_shram_size(uint32_t value)
+    {
+        word0 = (((~((1U << 8) - 1)) << 8) & word0) | ((((1U << 8) - 1) & value) << 8);
+        return *this;
+    }
+    volatile config_r &set_shram_size(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 8) - 1)) << 8) & word0) | ((((1U << 8) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::functional_safety get_functional_safety() const
+    {
+        NPU_NAMESPACE::functional_safety value =
+            static_cast<NPU_NAMESPACE::functional_safety>(((1U << 1) - 1) & (word0 >> 26));
+        return value;
+    }
+    NPU_NAMESPACE::functional_safety get_functional_safety() const volatile
+    {
+        NPU_NAMESPACE::functional_safety value =
+            static_cast<NPU_NAMESPACE::functional_safety>(((1U << 1) - 1) & (word0 >> 26));
+        return value;
+    }
+    CONSTEXPR config_r &set_functional_safety(NPU_NAMESPACE::functional_safety value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 26) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 26);
+        return *this;
+    }
+    volatile config_r &set_functional_safety(NPU_NAMESPACE::functional_safety value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 26) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 26);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::custom_dma get_custom_dma() const
+    {
+        NPU_NAMESPACE::custom_dma value = static_cast<NPU_NAMESPACE::custom_dma>(((1U << 1) - 1) & (word0 >> 27));
+        return value;
+    }
+    NPU_NAMESPACE::custom_dma get_custom_dma() const volatile
+    {
+        NPU_NAMESPACE::custom_dma value = static_cast<NPU_NAMESPACE::custom_dma>(((1U << 1) - 1) & (word0 >> 27));
+        return value;
+    }
+    CONSTEXPR config_r &set_custom_dma(NPU_NAMESPACE::custom_dma value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 27) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 27);
+        return *this;
+    }
+    volatile config_r &set_custom_dma(NPU_NAMESPACE::custom_dma value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 27) & word0) | ((((1U << 1) - 1) & static_cast<uint32_t>(value)) << 27);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_product() const
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 28);
+        return value;
+    }
+    uint32_t get_product() const volatile
+    {
+        uint32_t value = ((1U << 4) - 1) & (word0 >> 28);
+        return value;
+    }
+    CONSTEXPR config_r &set_product(uint32_t value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 28) & word0) | ((((1U << 4) - 1) & value) << 28);
+        return *this;
+    }
+    volatile config_r &set_product(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 28) & word0) | ((((1U << 4) - 1) & value) << 28);
+        return *this;
+    }
+#endif
+};
+
+// lock_r - Lock register. This register is designed for driver use and does not affect NPU functionality
+struct lock_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t LOCK : 32; // 32 bit value for LOCK configuration
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR lock_r() : word0(0) {}
+    CONSTEXPR lock_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    lock_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_LOCK() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_LOCK() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR lock_r &set_LOCK(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile lock_r &set_LOCK(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// regioncfg_r - Region memory type configuration. Bits[2*k+1:2*k] give the memory type for REGION[k]
+struct regioncfg_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t region0 : 2; // Bits for Region0 Configuration
+            uint32_t region1 : 2; // Bits for Region1 Configuration
+            uint32_t region2 : 2; // Bits for Region2 Configuration
+            uint32_t region3 : 2; // Bits for Region3 Configuration
+            uint32_t region4 : 2; // Bits for Region4 Configuration
+            uint32_t region5 : 2; // Bits for Region5 Configuration
+            uint32_t region6 : 2; // Bits for Region6 Configuration
+            uint32_t region7 : 2; // Bits for Region7 Configuration
+            uint32_t reserved0 : 16;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR regioncfg_r() : word0(0) {}
+    CONSTEXPR regioncfg_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    regioncfg_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region0() const
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 0));
+        return value;
+    }
+    NPU_NAMESPACE::mem_attr get_region0() const volatile
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 0));
+        return value;
+    }
+    CONSTEXPR regioncfg_r &set_region0(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile regioncfg_r &set_region0(NPU_NAMESPACE::mem_attr value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region1() const
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 2));
+        return value;
+    }
+    NPU_NAMESPACE::mem_attr get_region1() const volatile
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 2));
+        return value;
+    }
+    CONSTEXPR regioncfg_r &set_region1(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 2) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 2);
+        return *this;
+    }
+    volatile regioncfg_r &set_region1(NPU_NAMESPACE::mem_attr value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 2) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 2);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region2() const
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 4));
+        return value;
+    }
+    NPU_NAMESPACE::mem_attr get_region2() const volatile
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 4));
+        return value;
+    }
+    CONSTEXPR regioncfg_r &set_region2(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 4) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    volatile regioncfg_r &set_region2(NPU_NAMESPACE::mem_attr value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 4) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region3() const
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 6));
+        return value;
+    }
+    NPU_NAMESPACE::mem_attr get_region3() const volatile
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 6));
+        return value;
+    }
+    CONSTEXPR regioncfg_r &set_region3(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 6) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 6);
+        return *this;
+    }
+    volatile regioncfg_r &set_region3(NPU_NAMESPACE::mem_attr value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 6) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 6);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region4() const
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 8));
+        return value;
+    }
+    NPU_NAMESPACE::mem_attr get_region4() const volatile
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 8));
+        return value;
+    }
+    CONSTEXPR regioncfg_r &set_region4(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 8) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 8);
+        return *this;
+    }
+    volatile regioncfg_r &set_region4(NPU_NAMESPACE::mem_attr value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 8) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 8);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region5() const
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 10));
+        return value;
+    }
+    NPU_NAMESPACE::mem_attr get_region5() const volatile
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 10));
+        return value;
+    }
+    CONSTEXPR regioncfg_r &set_region5(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 10) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 10);
+        return *this;
+    }
+    volatile regioncfg_r &set_region5(NPU_NAMESPACE::mem_attr value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 10) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 10);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region6() const
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 12));
+        return value;
+    }
+    NPU_NAMESPACE::mem_attr get_region6() const volatile
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 12));
+        return value;
+    }
+    CONSTEXPR regioncfg_r &set_region6(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 12) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 12);
+        return *this;
+    }
+    volatile regioncfg_r &set_region6(NPU_NAMESPACE::mem_attr value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 12) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 12);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::mem_attr get_region7() const
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 14));
+        return value;
+    }
+    NPU_NAMESPACE::mem_attr get_region7() const volatile
+    {
+        NPU_NAMESPACE::mem_attr value = static_cast<NPU_NAMESPACE::mem_attr>(((1U << 2) - 1) & (word0 >> 14));
+        return value;
+    }
+    CONSTEXPR regioncfg_r &set_region7(NPU_NAMESPACE::mem_attr value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 14) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 14);
+        return *this;
+    }
+    volatile regioncfg_r &set_region7(NPU_NAMESPACE::mem_attr value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 14) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 14);
+        return *this;
+    }
+#endif
+};
+
+// axi_limit0_r - AXI limits for port 0 counter 0
+struct axi_limit0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t max_beats : 2; // Burst split alignment
+            uint32_t reserved0 : 2;
+            uint32_t memtype : 4; // Memtype to be used to encode AxCACHE signals
+            uint32_t reserved1 : 8;
+            uint32_t
+                max_outstanding_read_m1 : 6; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 63
+            uint32_t reserved2 : 2;
+            uint32_t max_outstanding_write_m1 : 5; // Maximum number of outstanding AXI write transactions - 1 in range
+                                                   // 0 to 31
+            uint32_t reserved3 : 3;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR axi_limit0_r() : word0(0) {}
+    CONSTEXPR axi_limit0_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    axi_limit0_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::max_beats get_max_beats() const
+    {
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
+        return value;
+    }
+    NPU_NAMESPACE::max_beats get_max_beats() const volatile
+    {
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
+        return value;
+    }
+    CONSTEXPR axi_limit0_r &set_max_beats(NPU_NAMESPACE::max_beats value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile axi_limit0_r &set_max_beats(NPU_NAMESPACE::max_beats value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::axi_mem_encoding get_memtype() const
+    {
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
+        return value;
+    }
+    NPU_NAMESPACE::axi_mem_encoding get_memtype() const volatile
+    {
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
+        return value;
+    }
+    CONSTEXPR axi_limit0_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    volatile axi_limit0_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_read_m1() const
+    {
+        uint32_t value = ((1U << 6) - 1) & (word0 >> 16);
+        return value;
+    }
+    uint32_t get_max_outstanding_read_m1() const volatile
+    {
+        uint32_t value = ((1U << 6) - 1) & (word0 >> 16);
+        return value;
+    }
+    CONSTEXPR axi_limit0_r &set_max_outstanding_read_m1(uint32_t value)
+    {
+        word0 = (((~((1U << 6) - 1)) << 16) & word0) | ((((1U << 6) - 1) & value) << 16);
+        return *this;
+    }
+    volatile axi_limit0_r &set_max_outstanding_read_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 6) - 1)) << 16) & word0) | ((((1U << 6) - 1) & value) << 16);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_write_m1() const
+    {
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 24);
+        return value;
+    }
+    uint32_t get_max_outstanding_write_m1() const volatile
+    {
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 24);
+        return value;
+    }
+    CONSTEXPR axi_limit0_r &set_max_outstanding_write_m1(uint32_t value)
+    {
+        word0 = (((~((1U << 5) - 1)) << 24) & word0) | ((((1U << 5) - 1) & value) << 24);
+        return *this;
+    }
+    volatile axi_limit0_r &set_max_outstanding_write_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 5) - 1)) << 24) & word0) | ((((1U << 5) - 1) & value) << 24);
+        return *this;
+    }
+#endif
+};
+
+// axi_limit1_r - AXI limits for port 0 counter 1
+struct axi_limit1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t max_beats : 2; // Burst split alignment
+            uint32_t reserved0 : 2;
+            uint32_t memtype : 4; // Memtype to be used to encode AxCACHE signals
+            uint32_t reserved1 : 8;
+            uint32_t
+                max_outstanding_read_m1 : 6; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 63
+            uint32_t reserved2 : 2;
+            uint32_t max_outstanding_write_m1 : 5; // Maximum number of outstanding AXI write transactions - 1 in range
+                                                   // 0 to 31
+            uint32_t reserved3 : 3;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR axi_limit1_r() : word0(0) {}
+    CONSTEXPR axi_limit1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    axi_limit1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::max_beats get_max_beats() const
+    {
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
+        return value;
+    }
+    NPU_NAMESPACE::max_beats get_max_beats() const volatile
+    {
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
+        return value;
+    }
+    CONSTEXPR axi_limit1_r &set_max_beats(NPU_NAMESPACE::max_beats value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile axi_limit1_r &set_max_beats(NPU_NAMESPACE::max_beats value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::axi_mem_encoding get_memtype() const
+    {
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
+        return value;
+    }
+    NPU_NAMESPACE::axi_mem_encoding get_memtype() const volatile
+    {
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
+        return value;
+    }
+    CONSTEXPR axi_limit1_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    volatile axi_limit1_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_read_m1() const
+    {
+        uint32_t value = ((1U << 6) - 1) & (word0 >> 16);
+        return value;
+    }
+    uint32_t get_max_outstanding_read_m1() const volatile
+    {
+        uint32_t value = ((1U << 6) - 1) & (word0 >> 16);
+        return value;
+    }
+    CONSTEXPR axi_limit1_r &set_max_outstanding_read_m1(uint32_t value)
+    {
+        word0 = (((~((1U << 6) - 1)) << 16) & word0) | ((((1U << 6) - 1) & value) << 16);
+        return *this;
+    }
+    volatile axi_limit1_r &set_max_outstanding_read_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 6) - 1)) << 16) & word0) | ((((1U << 6) - 1) & value) << 16);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_write_m1() const
+    {
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 24);
+        return value;
+    }
+    uint32_t get_max_outstanding_write_m1() const volatile
+    {
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 24);
+        return value;
+    }
+    CONSTEXPR axi_limit1_r &set_max_outstanding_write_m1(uint32_t value)
+    {
+        word0 = (((~((1U << 5) - 1)) << 24) & word0) | ((((1U << 5) - 1) & value) << 24);
+        return *this;
+    }
+    volatile axi_limit1_r &set_max_outstanding_write_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 5) - 1)) << 24) & word0) | ((((1U << 5) - 1) & value) << 24);
+        return *this;
+    }
+#endif
+};
+
+// axi_limit2_r - AXI limits for port 1 counter 2
+struct axi_limit2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t max_beats : 2; // Burst split alignment
+            uint32_t reserved0 : 2;
+            uint32_t memtype : 4; // Memtype to be used to encode AxCACHE signals
+            uint32_t reserved1 : 8;
+            uint32_t
+                max_outstanding_read_m1 : 6; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 63
+            uint32_t reserved2 : 2;
+            uint32_t max_outstanding_write_m1 : 5; // Maximum number of outstanding AXI write transactions - 1 in range
+                                                   // 0 to 31
+            uint32_t reserved3 : 3;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR axi_limit2_r() : word0(0) {}
+    CONSTEXPR axi_limit2_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    axi_limit2_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::max_beats get_max_beats() const
+    {
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
+        return value;
+    }
+    NPU_NAMESPACE::max_beats get_max_beats() const volatile
+    {
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
+        return value;
+    }
+    CONSTEXPR axi_limit2_r &set_max_beats(NPU_NAMESPACE::max_beats value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile axi_limit2_r &set_max_beats(NPU_NAMESPACE::max_beats value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::axi_mem_encoding get_memtype() const
+    {
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
+        return value;
+    }
+    NPU_NAMESPACE::axi_mem_encoding get_memtype() const volatile
+    {
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
+        return value;
+    }
+    CONSTEXPR axi_limit2_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    volatile axi_limit2_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_read_m1() const
+    {
+        uint32_t value = ((1U << 6) - 1) & (word0 >> 16);
+        return value;
+    }
+    uint32_t get_max_outstanding_read_m1() const volatile
+    {
+        uint32_t value = ((1U << 6) - 1) & (word0 >> 16);
+        return value;
+    }
+    CONSTEXPR axi_limit2_r &set_max_outstanding_read_m1(uint32_t value)
+    {
+        word0 = (((~((1U << 6) - 1)) << 16) & word0) | ((((1U << 6) - 1) & value) << 16);
+        return *this;
+    }
+    volatile axi_limit2_r &set_max_outstanding_read_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 6) - 1)) << 16) & word0) | ((((1U << 6) - 1) & value) << 16);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_write_m1() const
+    {
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 24);
+        return value;
+    }
+    uint32_t get_max_outstanding_write_m1() const volatile
+    {
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 24);
+        return value;
+    }
+    CONSTEXPR axi_limit2_r &set_max_outstanding_write_m1(uint32_t value)
+    {
+        word0 = (((~((1U << 5) - 1)) << 24) & word0) | ((((1U << 5) - 1) & value) << 24);
+        return *this;
+    }
+    volatile axi_limit2_r &set_max_outstanding_write_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 5) - 1)) << 24) & word0) | ((((1U << 5) - 1) & value) << 24);
+        return *this;
+    }
+#endif
+};
+
+// axi_limit3_r - AXI limits for port 1 counter 3
+struct axi_limit3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t max_beats : 2; // Burst split alignment
+            uint32_t reserved0 : 2;
+            uint32_t memtype : 4; // Memtype to be used to encode AxCACHE signals
+            uint32_t reserved1 : 8;
+            uint32_t
+                max_outstanding_read_m1 : 6; // Maximum number of outstanding AXI read transactions - 1 in range 0 to 63
+            uint32_t reserved2 : 2;
+            uint32_t max_outstanding_write_m1 : 5; // Maximum number of outstanding AXI write transactions - 1 in range
+                                                   // 0 to 31
+            uint32_t reserved3 : 3;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR axi_limit3_r() : word0(0) {}
+    CONSTEXPR axi_limit3_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    axi_limit3_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::max_beats get_max_beats() const
+    {
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
+        return value;
+    }
+    NPU_NAMESPACE::max_beats get_max_beats() const volatile
+    {
+        NPU_NAMESPACE::max_beats value = static_cast<NPU_NAMESPACE::max_beats>(((1U << 2) - 1) & (word0 >> 0));
+        return value;
+    }
+    CONSTEXPR axi_limit3_r &set_max_beats(NPU_NAMESPACE::max_beats value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile axi_limit3_r &set_max_beats(NPU_NAMESPACE::max_beats value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::axi_mem_encoding get_memtype() const
+    {
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
+        return value;
+    }
+    NPU_NAMESPACE::axi_mem_encoding get_memtype() const volatile
+    {
+        NPU_NAMESPACE::axi_mem_encoding value =
+            static_cast<NPU_NAMESPACE::axi_mem_encoding>(((1U << 4) - 1) & (word0 >> 4));
+        return value;
+    }
+    CONSTEXPR axi_limit3_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    volatile axi_limit3_r &set_memtype(NPU_NAMESPACE::axi_mem_encoding value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 4) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_read_m1() const
+    {
+        uint32_t value = ((1U << 6) - 1) & (word0 >> 16);
+        return value;
+    }
+    uint32_t get_max_outstanding_read_m1() const volatile
+    {
+        uint32_t value = ((1U << 6) - 1) & (word0 >> 16);
+        return value;
+    }
+    CONSTEXPR axi_limit3_r &set_max_outstanding_read_m1(uint32_t value)
+    {
+        word0 = (((~((1U << 6) - 1)) << 16) & word0) | ((((1U << 6) - 1) & value) << 16);
+        return *this;
+    }
+    volatile axi_limit3_r &set_max_outstanding_read_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 6) - 1)) << 16) & word0) | ((((1U << 6) - 1) & value) << 16);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_max_outstanding_write_m1() const
+    {
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 24);
+        return value;
+    }
+    uint32_t get_max_outstanding_write_m1() const volatile
+    {
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 24);
+        return value;
+    }
+    CONSTEXPR axi_limit3_r &set_max_outstanding_write_m1(uint32_t value)
+    {
+        word0 = (((~((1U << 5) - 1)) << 24) & word0) | ((((1U << 5) - 1) & value) << 24);
+        return *this;
+    }
+    volatile axi_limit3_r &set_max_outstanding_write_m1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 5) - 1)) << 24) & word0) | ((((1U << 5) - 1) & value) << 24);
+        return *this;
+    }
+#endif
+};
+
+// basep_r - The driver can use this address to relocate the command stream on region 0. If the region contains data
+// requiring A-byte alignment then the base must be a multiple of A
+struct basep_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8;  // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR basep_r() : word0(0), word1(0) {}
+    CONSTEXPR basep_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    basep_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// wd_status_r - WD_STATUS
+struct wd_status_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t core_slice_state : 2; // WD core slice parser state
+            uint32_t core_idle : 1;        // Core idle
+            uint32_t ctrl_state : 2;       // WD control state
+            uint32_t ctrl_idle : 1;        // All stripe jobs idle (all weights consumed)
+            uint32_t write_buf_index0 : 3; // current write index for next data from core
+            uint32_t write_buf_valid0 : 1; // write buf valid (full)
+            uint32_t write_buf_idle0 : 1;  // write buf idle (empty)
+            uint32_t write_buf_index1 : 3; // current write index for next data from core
+            uint32_t write_buf_valid1 : 1; // write buf valid (full)
+            uint32_t write_buf_idle1 : 1;  // write buf idle (empty)
+            uint32_t events : 12;          // WD events mapped as appendix A
+            uint32_t reserved0 : 4;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR wd_status_r() : word0(0) {}
+    CONSTEXPR wd_status_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    wd_status_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::wd_core_slice_state get_core_slice_state() const
+    {
+        NPU_NAMESPACE::wd_core_slice_state value =
+            static_cast<NPU_NAMESPACE::wd_core_slice_state>(((1U << 2) - 1) & (word0 >> 0));
+        return value;
+    }
+    NPU_NAMESPACE::wd_core_slice_state get_core_slice_state() const volatile
+    {
+        NPU_NAMESPACE::wd_core_slice_state value =
+            static_cast<NPU_NAMESPACE::wd_core_slice_state>(((1U << 2) - 1) & (word0 >> 0));
+        return value;
+    }
+    CONSTEXPR wd_status_r &set_core_slice_state(NPU_NAMESPACE::wd_core_slice_state value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile wd_status_r &set_core_slice_state(NPU_NAMESPACE::wd_core_slice_state value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 0) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_core_idle() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_core_idle() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR wd_status_r &set_core_idle(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile wd_status_r &set_core_idle(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::wd_ctrl_state get_ctrl_state() const
+    {
+        NPU_NAMESPACE::wd_ctrl_state value = static_cast<NPU_NAMESPACE::wd_ctrl_state>(((1U << 2) - 1) & (word0 >> 3));
+        return value;
+    }
+    NPU_NAMESPACE::wd_ctrl_state get_ctrl_state() const volatile
+    {
+        NPU_NAMESPACE::wd_ctrl_state value = static_cast<NPU_NAMESPACE::wd_ctrl_state>(((1U << 2) - 1) & (word0 >> 3));
+        return value;
+    }
+    CONSTEXPR wd_status_r &set_ctrl_state(NPU_NAMESPACE::wd_ctrl_state value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 3) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 3);
+        return *this;
+    }
+    volatile wd_status_r &set_ctrl_state(NPU_NAMESPACE::wd_ctrl_state value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 3) & word0) | ((((1U << 2) - 1) & static_cast<uint32_t>(value)) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ctrl_idle() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
+    }
+    uint32_t get_ctrl_idle() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
+    }
+    CONSTEXPR wd_status_r &set_ctrl_idle(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    volatile wd_status_r &set_ctrl_idle(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_write_buf_index0() const
+    {
+        uint32_t value = ((1U << 3) - 1) & (word0 >> 6);
+        return value;
+    }
+    uint32_t get_write_buf_index0() const volatile
+    {
+        uint32_t value = ((1U << 3) - 1) & (word0 >> 6);
+        return value;
+    }
+    CONSTEXPR wd_status_r &set_write_buf_index0(uint32_t value)
+    {
+        word0 = (((~((1U << 3) - 1)) << 6) & word0) | ((((1U << 3) - 1) & value) << 6);
+        return *this;
+    }
+    volatile wd_status_r &set_write_buf_index0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 3) - 1)) << 6) & word0) | ((((1U << 3) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_write_buf_valid0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
+        return value;
+    }
+    uint32_t get_write_buf_valid0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
+        return value;
+    }
+    CONSTEXPR wd_status_r &set_write_buf_valid0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
+    }
+    volatile wd_status_r &set_write_buf_valid0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_write_buf_idle0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
+        return value;
+    }
+    uint32_t get_write_buf_idle0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
+        return value;
+    }
+    CONSTEXPR wd_status_r &set_write_buf_idle0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+    volatile wd_status_r &set_write_buf_idle0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_write_buf_index1() const
+    {
+        uint32_t value = ((1U << 3) - 1) & (word0 >> 11);
+        return value;
+    }
+    uint32_t get_write_buf_index1() const volatile
+    {
+        uint32_t value = ((1U << 3) - 1) & (word0 >> 11);
+        return value;
+    }
+    CONSTEXPR wd_status_r &set_write_buf_index1(uint32_t value)
+    {
+        word0 = (((~((1U << 3) - 1)) << 11) & word0) | ((((1U << 3) - 1) & value) << 11);
+        return *this;
+    }
+    volatile wd_status_r &set_write_buf_index1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 3) - 1)) << 11) & word0) | ((((1U << 3) - 1) & value) << 11);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_write_buf_valid1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
+        return value;
+    }
+    uint32_t get_write_buf_valid1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
+        return value;
+    }
+    CONSTEXPR wd_status_r &set_write_buf_valid1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
+        return *this;
+    }
+    volatile wd_status_r &set_write_buf_valid1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_write_buf_idle1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 15);
+        return value;
+    }
+    uint32_t get_write_buf_idle1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 15);
+        return value;
+    }
+    CONSTEXPR wd_status_r &set_write_buf_idle1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 15) & word0) | ((((1U << 1) - 1) & value) << 15);
+        return *this;
+    }
+    volatile wd_status_r &set_write_buf_idle1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 15) & word0) | ((((1U << 1) - 1) & value) << 15);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_events() const
+    {
+        uint32_t value = ((1U << 12) - 1) & (word0 >> 16);
+        return value;
+    }
+    uint32_t get_events() const volatile
+    {
+        uint32_t value = ((1U << 12) - 1) & (word0 >> 16);
+        return value;
+    }
+    CONSTEXPR wd_status_r &set_events(uint32_t value)
+    {
+        word0 = (((~((1U << 12) - 1)) << 16) & word0) | ((((1U << 12) - 1) & value) << 16);
+        return *this;
+    }
+    volatile wd_status_r &set_events(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 12) - 1)) << 16) & word0) | ((((1U << 12) - 1) & value) << 16);
+        return *this;
+    }
+#endif
+};
+
+// mac_status_r - MAC_STATUS
+struct mac_status_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t block_cfg_valid : 1;     // MAC has a valid block configuration
+            uint32_t trav_en : 1;             // MAC is doing block traversal
+            uint32_t wait_for_ib : 1;         // MAC is waiting for an Input Buffer to become available
+            uint32_t wait_for_acc_buf : 1;    // MAC is waiting for an Accumulator Buffer to become available
+            uint32_t wait_for_weights : 1;    // MAC is waiting for a Weight Block to become available
+            uint32_t stall_stripe : 1;        // MAC is stalling between two stripes
+            uint32_t dw_sel : 1;              // Currently used weight interface in MAC AI
+            uint32_t wait_for_dw0_ready : 1;  // MAC AI is waiting for MAC DPU to send dw0_ready to WD
+            uint32_t wait_for_dw1_ready : 1;  // MAC AI is waiting for MAC DPU to send dw1_ready to WD
+            uint32_t acc_buf_sel_ai : 1;      // Currently used AccBuf interface in MAC AI
+            uint32_t wait_for_acc0_ready : 1; // MAC AI is waiting for acc0_ready from AO
+            uint32_t wait_for_acc1_ready : 1; // MAC AI is waiting for acc1_ready from AO
+            uint32_t acc_buf_sel_aa : 1;      // Currently used AccBuf interface in MAC ADDER_ARRAY
+            uint32_t acc0_valid : 1;          // MAC outgoing value of acc0_valid
+            uint32_t acc1_valid : 1;          // MAC outgoing value of acc1_valid
+            uint32_t reserved0 : 1;
+            uint32_t events : 11; // Mapped to MAC events described in Appendix A
+            uint32_t reserved1 : 5;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR mac_status_r() : word0(0) {}
+    CONSTEXPR mac_status_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    mac_status_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_block_cfg_valid() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_block_cfg_valid() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_block_cfg_valid(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile mac_status_r &set_block_cfg_valid(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_trav_en() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    uint32_t get_trav_en() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_trav_en(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile mac_status_r &set_trav_en(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wait_for_ib() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_wait_for_ib() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_wait_for_ib(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile mac_status_r &set_wait_for_ib(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wait_for_acc_buf() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    uint32_t get_wait_for_acc_buf() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_wait_for_acc_buf(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile mac_status_r &set_wait_for_acc_buf(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wait_for_weights() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
+        return value;
+    }
+    uint32_t get_wait_for_weights() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_wait_for_weights(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    volatile mac_status_r &set_wait_for_weights(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_stall_stripe() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
+    }
+    uint32_t get_stall_stripe() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_stall_stripe(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    volatile mac_status_r &set_stall_stripe(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_dw_sel() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
+        return value;
+    }
+    uint32_t get_dw_sel() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_dw_sel(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    volatile mac_status_r &set_dw_sel(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wait_for_dw0_ready() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
+        return value;
+    }
+    uint32_t get_wait_for_dw0_ready() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_wait_for_dw0_ready(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    volatile mac_status_r &set_wait_for_dw0_ready(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wait_for_dw1_ready() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
+        return value;
+    }
+    uint32_t get_wait_for_dw1_ready() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_wait_for_dw1_ready(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+    volatile mac_status_r &set_wait_for_dw1_ready(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_acc_buf_sel_ai() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
+        return value;
+    }
+    uint32_t get_acc_buf_sel_ai() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_acc_buf_sel_ai(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
+    }
+    volatile mac_status_r &set_acc_buf_sel_ai(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wait_for_acc0_ready() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
+        return value;
+    }
+    uint32_t get_wait_for_acc0_ready() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_wait_for_acc0_ready(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+    volatile mac_status_r &set_wait_for_acc0_ready(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wait_for_acc1_ready() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 11);
+        return value;
+    }
+    uint32_t get_wait_for_acc1_ready() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 11);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_wait_for_acc1_ready(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & value) << 11);
+        return *this;
+    }
+    volatile mac_status_r &set_wait_for_acc1_ready(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & value) << 11);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_acc_buf_sel_aa() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 12);
+        return value;
+    }
+    uint32_t get_acc_buf_sel_aa() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 12);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_acc_buf_sel_aa(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 12) & word0) | ((((1U << 1) - 1) & value) << 12);
+        return *this;
+    }
+    volatile mac_status_r &set_acc_buf_sel_aa(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 12) & word0) | ((((1U << 1) - 1) & value) << 12);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_acc0_valid() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 13);
+        return value;
+    }
+    uint32_t get_acc0_valid() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 13);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_acc0_valid(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 13) & word0) | ((((1U << 1) - 1) & value) << 13);
+        return *this;
+    }
+    volatile mac_status_r &set_acc0_valid(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 13) & word0) | ((((1U << 1) - 1) & value) << 13);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_acc1_valid() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
+        return value;
+    }
+    uint32_t get_acc1_valid() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_acc1_valid(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
+        return *this;
+    }
+    volatile mac_status_r &set_acc1_valid(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_events() const
+    {
+        uint32_t value = ((1U << 11) - 1) & (word0 >> 16);
+        return value;
+    }
+    uint32_t get_events() const volatile
+    {
+        uint32_t value = ((1U << 11) - 1) & (word0 >> 16);
+        return value;
+    }
+    CONSTEXPR mac_status_r &set_events(uint32_t value)
+    {
+        word0 = (((~((1U << 11) - 1)) << 16) & word0) | ((((1U << 11) - 1) & value) << 16);
+        return *this;
+    }
+    volatile mac_status_r &set_events(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 11) - 1)) << 16) & word0) | ((((1U << 11) - 1) & value) << 16);
+        return *this;
+    }
+#endif
+};
+
+// ao_status_r - AO_STATUS
+struct ao_status_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t cmd_sbw_valid : 1; // Block command to shared buffer write module is valid
+            uint32_t cmd_act_valid : 1; // Block command to activation function module is valid
+            uint32_t cmd_ctl_valid : 1; // Block command to control module is valid
+            uint32_t cmd_scl_valid : 1; // Block command to scale module is valid
+            uint32_t cmd_sbr_valid : 1; // Block command to shared buffer read module is valid
+            uint32_t cmd_ofm_valid : 1; // Block command to ofm parameter module is valid
+            uint32_t blk_cmd_ready : 1; // Ready to accept block command
+            uint32_t blk_cmd_valid : 1; // Block command from CC is valid
+            uint32_t reserved0 : 8;
+            uint32_t events : 8; // Mapped to AO events described in Appendix A
+            uint32_t reserved1 : 8;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ao_status_r() : word0(0) {}
+    CONSTEXPR ao_status_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ao_status_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_sbw_valid() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_cmd_sbw_valid() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR ao_status_r &set_cmd_sbw_valid(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile ao_status_r &set_cmd_sbw_valid(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_act_valid() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    uint32_t get_cmd_act_valid() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    CONSTEXPR ao_status_r &set_cmd_act_valid(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile ao_status_r &set_cmd_act_valid(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_ctl_valid() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_cmd_ctl_valid() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR ao_status_r &set_cmd_ctl_valid(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile ao_status_r &set_cmd_ctl_valid(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_scl_valid() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    uint32_t get_cmd_scl_valid() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR ao_status_r &set_cmd_scl_valid(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile ao_status_r &set_cmd_scl_valid(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_sbr_valid() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
+        return value;
+    }
+    uint32_t get_cmd_sbr_valid() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
+        return value;
+    }
+    CONSTEXPR ao_status_r &set_cmd_sbr_valid(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    volatile ao_status_r &set_cmd_sbr_valid(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_ofm_valid() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
+    }
+    uint32_t get_cmd_ofm_valid() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
+    }
+    CONSTEXPR ao_status_r &set_cmd_ofm_valid(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    volatile ao_status_r &set_cmd_ofm_valid(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_blk_cmd_ready() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
+        return value;
+    }
+    uint32_t get_blk_cmd_ready() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
+        return value;
+    }
+    CONSTEXPR ao_status_r &set_blk_cmd_ready(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    volatile ao_status_r &set_blk_cmd_ready(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_blk_cmd_valid() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
+        return value;
+    }
+    uint32_t get_blk_cmd_valid() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
+        return value;
+    }
+    CONSTEXPR ao_status_r &set_blk_cmd_valid(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    volatile ao_status_r &set_blk_cmd_valid(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_events() const
+    {
+        uint32_t value = ((1U << 8) - 1) & (word0 >> 16);
+        return value;
+    }
+    uint32_t get_events() const volatile
+    {
+        uint32_t value = ((1U << 8) - 1) & (word0 >> 16);
+        return value;
+    }
+    CONSTEXPR ao_status_r &set_events(uint32_t value)
+    {
+        word0 = (((~((1U << 8) - 1)) << 16) & word0) | ((((1U << 8) - 1) & value) << 16);
+        return *this;
+    }
+    volatile ao_status_r &set_events(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 8) - 1)) << 16) & word0) | ((((1U << 8) - 1) & value) << 16);
+        return *this;
+    }
+#endif
+};
+
+// dma_status0_r - DMA_STATUS0
+struct dma_status0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t cmd_idle : 1; // When this bit is high means that the CMD block is not busy in generating addresses
+                                   // for a CMD job
+            uint32_t ifm_idle : 1; // When this bit is high means that there are no ongoing IFM jobs
+            uint32_t wgt_idle_c0 : 1; // When this bit is high means that the WGT block is not busy in generating
+                                      // addresses for a WGT job
+            uint32_t bas_idle_c0 : 1; // When this bit is high means that the BAS block is not busy in generating
+                                      // addresses for a BAS job
+            uint32_t m2m_idle : 1;    // When this bit is high means that there are no ongoing M2M jobs
+            uint32_t ofm_idle : 1;    // When this bit is high means that there are no ongoing OFM jobs
+            uint32_t halt_req : 1;    // CPM has requested to HALT AXI bus before soft reset
+            uint32_t halt_ack : 1;    // DMA is in condition to halt the AXI bus since there are no pending transactions
+            uint32_t pause_req : 1;   // CC has requested to pause the AXI
+            uint32_t pause_ack : 1; // DMA is in condition to pause the AXI bus since there are no pending transactions
+            uint32_t ib0_ai_valid_c0 : 1;       // Data for AI to be read in IFM input buffer 0 - Core 0
+            uint32_t ib0_ai_ready_c0 : 1;       // Data consumed from AI in IFM input buffer 0 - Core 0
+            uint32_t ib1_ai_valid_c0 : 1;       // Data for AI to be read in IFM input buffer 1 - Core 0
+            uint32_t ib1_ai_ready_c0 : 1;       // Data consumed from AI in IFM input buffer 1 - Core 0
+            uint32_t ib0_ao_valid_c0 : 1;       // Data for AO to be read in IFM input buffer 0 - Core 0
+            uint32_t ib0_ao_ready_c0 : 1;       // Data consumed from AO in IFM input buffer 0 - Core 0
+            uint32_t ib1_ao_valid_c0 : 1;       // Data for AO to be read in IFM input buffer 0 - Core 0
+            uint32_t ib1_ao_ready_c0 : 1;       // Data consumed from AO in IFM input buffer 1 - Core 0
+            uint32_t ob0_valid_c0 : 1;          // Data for DMA ready to be consumed in OFM output buffer 0 -  Core 0
+            uint32_t ob0_ready_c0 : 1;          // Data consumed from DMA in OFM output buffer 0 - Core 0
+            uint32_t ob1_valid_c0 : 1;          // Data for DMA ready to be consumed in OFM output buffer 1 -  Core 0
+            uint32_t ob1_ready_c0 : 1;          // Data consumed from DMA in OFM output buffer 1 - Core 0
+            uint32_t cmd_valid : 1;             // New command word for CC to be consumed
+            uint32_t cmd_ready : 1;             // command word consumed by CC
+            uint32_t wd_bitstream_valid_c0 : 1; // New weight word for WD to be consumed - Core 0
+            uint32_t wd_bitstream_ready_c0 : 1; // Weight word consumed by WD - Core 0
+            uint32_t bs_bitstream_valid_c0 : 1; // New BaS word for AO to be consumed - Core 0
+            uint32_t bs_bitstream_ready_c0 : 1; // BaS word consumed by AO - Core 0
+            uint32_t axi0_ar_stalled : 1; // Read transfer request stalled on arready low AXI0 (due to memory system)
+            uint32_t axi0_rd_limit_stall : 1; // Read stalled due to one AXI0 limit counter being reached
+            uint32_t axi0_aw_stalled : 1; // Write transfer request stalled on awready low AXI0 (due to memory system)
+            uint32_t axi0_w_stalled : 1;  // Write transfer stalled on awready low AXI0 (due to memory system)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma_status0_r() : word0(0) {}
+    CONSTEXPR dma_status0_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma_status0_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_idle() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_cmd_idle() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_cmd_idle(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile dma_status0_r &set_cmd_idle(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ifm_idle() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    uint32_t get_ifm_idle() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_ifm_idle(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile dma_status0_r &set_ifm_idle(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wgt_idle_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_wgt_idle_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_wgt_idle_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile dma_status0_r &set_wgt_idle_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_bas_idle_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    uint32_t get_bas_idle_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_bas_idle_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile dma_status0_r &set_bas_idle_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_m2m_idle() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
+        return value;
+    }
+    uint32_t get_m2m_idle() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_m2m_idle(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    volatile dma_status0_r &set_m2m_idle(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ofm_idle() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
+    }
+    uint32_t get_ofm_idle() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_ofm_idle(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    volatile dma_status0_r &set_ofm_idle(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_halt_req() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
+        return value;
+    }
+    uint32_t get_halt_req() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_halt_req(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    volatile dma_status0_r &set_halt_req(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_halt_ack() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
+        return value;
+    }
+    uint32_t get_halt_ack() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_halt_ack(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    volatile dma_status0_r &set_halt_ack(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_pause_req() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
+        return value;
+    }
+    uint32_t get_pause_req() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_pause_req(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+    volatile dma_status0_r &set_pause_req(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_pause_ack() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
+        return value;
+    }
+    uint32_t get_pause_ack() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_pause_ack(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
+    }
+    volatile dma_status0_r &set_pause_ack(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib0_ai_valid_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
+        return value;
+    }
+    uint32_t get_ib0_ai_valid_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_ib0_ai_valid_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+    volatile dma_status0_r &set_ib0_ai_valid_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib0_ai_ready_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 11);
+        return value;
+    }
+    uint32_t get_ib0_ai_ready_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 11);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_ib0_ai_ready_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & value) << 11);
+        return *this;
+    }
+    volatile dma_status0_r &set_ib0_ai_ready_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & value) << 11);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib1_ai_valid_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 12);
+        return value;
+    }
+    uint32_t get_ib1_ai_valid_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 12);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_ib1_ai_valid_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 12) & word0) | ((((1U << 1) - 1) & value) << 12);
+        return *this;
+    }
+    volatile dma_status0_r &set_ib1_ai_valid_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 12) & word0) | ((((1U << 1) - 1) & value) << 12);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib1_ai_ready_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 13);
+        return value;
+    }
+    uint32_t get_ib1_ai_ready_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 13);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_ib1_ai_ready_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 13) & word0) | ((((1U << 1) - 1) & value) << 13);
+        return *this;
+    }
+    volatile dma_status0_r &set_ib1_ai_ready_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 13) & word0) | ((((1U << 1) - 1) & value) << 13);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib0_ao_valid_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
+        return value;
+    }
+    uint32_t get_ib0_ao_valid_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_ib0_ao_valid_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
+        return *this;
+    }
+    volatile dma_status0_r &set_ib0_ao_valid_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib0_ao_ready_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 15);
+        return value;
+    }
+    uint32_t get_ib0_ao_ready_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 15);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_ib0_ao_ready_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 15) & word0) | ((((1U << 1) - 1) & value) << 15);
+        return *this;
+    }
+    volatile dma_status0_r &set_ib0_ao_ready_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 15) & word0) | ((((1U << 1) - 1) & value) << 15);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib1_ao_valid_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 16);
+        return value;
+    }
+    uint32_t get_ib1_ao_valid_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 16);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_ib1_ao_valid_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 16) & word0) | ((((1U << 1) - 1) & value) << 16);
+        return *this;
+    }
+    volatile dma_status0_r &set_ib1_ao_valid_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 16) & word0) | ((((1U << 1) - 1) & value) << 16);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib1_ao_ready_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 17);
+        return value;
+    }
+    uint32_t get_ib1_ao_ready_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 17);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_ib1_ao_ready_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 17) & word0) | ((((1U << 1) - 1) & value) << 17);
+        return *this;
+    }
+    volatile dma_status0_r &set_ib1_ao_ready_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 17) & word0) | ((((1U << 1) - 1) & value) << 17);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ob0_valid_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 18);
+        return value;
+    }
+    uint32_t get_ob0_valid_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 18);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_ob0_valid_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 18) & word0) | ((((1U << 1) - 1) & value) << 18);
+        return *this;
+    }
+    volatile dma_status0_r &set_ob0_valid_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 18) & word0) | ((((1U << 1) - 1) & value) << 18);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ob0_ready_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 19);
+        return value;
+    }
+    uint32_t get_ob0_ready_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 19);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_ob0_ready_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 19) & word0) | ((((1U << 1) - 1) & value) << 19);
+        return *this;
+    }
+    volatile dma_status0_r &set_ob0_ready_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 19) & word0) | ((((1U << 1) - 1) & value) << 19);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ob1_valid_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 20);
+        return value;
+    }
+    uint32_t get_ob1_valid_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 20);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_ob1_valid_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 20) & word0) | ((((1U << 1) - 1) & value) << 20);
+        return *this;
+    }
+    volatile dma_status0_r &set_ob1_valid_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 20) & word0) | ((((1U << 1) - 1) & value) << 20);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ob1_ready_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 21);
+        return value;
+    }
+    uint32_t get_ob1_ready_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 21);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_ob1_ready_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 21) & word0) | ((((1U << 1) - 1) & value) << 21);
+        return *this;
+    }
+    volatile dma_status0_r &set_ob1_ready_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 21) & word0) | ((((1U << 1) - 1) & value) << 21);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_valid() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 22);
+        return value;
+    }
+    uint32_t get_cmd_valid() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 22);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_cmd_valid(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 22) & word0) | ((((1U << 1) - 1) & value) << 22);
+        return *this;
+    }
+    volatile dma_status0_r &set_cmd_valid(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 22) & word0) | ((((1U << 1) - 1) & value) << 22);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cmd_ready() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 23);
+        return value;
+    }
+    uint32_t get_cmd_ready() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 23);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_cmd_ready(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 23) & word0) | ((((1U << 1) - 1) & value) << 23);
+        return *this;
+    }
+    volatile dma_status0_r &set_cmd_ready(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 23) & word0) | ((((1U << 1) - 1) & value) << 23);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wd_bitstream_valid_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 24);
+        return value;
+    }
+    uint32_t get_wd_bitstream_valid_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 24);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_wd_bitstream_valid_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 24) & word0) | ((((1U << 1) - 1) & value) << 24);
+        return *this;
+    }
+    volatile dma_status0_r &set_wd_bitstream_valid_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 24) & word0) | ((((1U << 1) - 1) & value) << 24);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wd_bitstream_ready_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 25);
+        return value;
+    }
+    uint32_t get_wd_bitstream_ready_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 25);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_wd_bitstream_ready_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 25) & word0) | ((((1U << 1) - 1) & value) << 25);
+        return *this;
+    }
+    volatile dma_status0_r &set_wd_bitstream_ready_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 25) & word0) | ((((1U << 1) - 1) & value) << 25);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_bs_bitstream_valid_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 26);
+        return value;
+    }
+    uint32_t get_bs_bitstream_valid_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 26);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_bs_bitstream_valid_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 26) & word0) | ((((1U << 1) - 1) & value) << 26);
+        return *this;
+    }
+    volatile dma_status0_r &set_bs_bitstream_valid_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 26) & word0) | ((((1U << 1) - 1) & value) << 26);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_bs_bitstream_ready_c0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 27);
+        return value;
+    }
+    uint32_t get_bs_bitstream_ready_c0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 27);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_bs_bitstream_ready_c0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 27) & word0) | ((((1U << 1) - 1) & value) << 27);
+        return *this;
+    }
+    volatile dma_status0_r &set_bs_bitstream_ready_c0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 27) & word0) | ((((1U << 1) - 1) & value) << 27);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi0_ar_stalled() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 28);
+        return value;
+    }
+    uint32_t get_axi0_ar_stalled() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 28);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_axi0_ar_stalled(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 28) & word0) | ((((1U << 1) - 1) & value) << 28);
+        return *this;
+    }
+    volatile dma_status0_r &set_axi0_ar_stalled(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 28) & word0) | ((((1U << 1) - 1) & value) << 28);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi0_rd_limit_stall() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 29);
+        return value;
+    }
+    uint32_t get_axi0_rd_limit_stall() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 29);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_axi0_rd_limit_stall(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 29) & word0) | ((((1U << 1) - 1) & value) << 29);
+        return *this;
+    }
+    volatile dma_status0_r &set_axi0_rd_limit_stall(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 29) & word0) | ((((1U << 1) - 1) & value) << 29);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi0_aw_stalled() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 30);
+        return value;
+    }
+    uint32_t get_axi0_aw_stalled() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 30);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_axi0_aw_stalled(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 30) & word0) | ((((1U << 1) - 1) & value) << 30);
+        return *this;
+    }
+    volatile dma_status0_r &set_axi0_aw_stalled(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 30) & word0) | ((((1U << 1) - 1) & value) << 30);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi0_w_stalled() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    uint32_t get_axi0_w_stalled() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    CONSTEXPR dma_status0_r &set_axi0_w_stalled(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+    volatile dma_status0_r &set_axi0_w_stalled(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
+};
+
+// dma_status1_r - DMA_STATUS1
+struct dma_status1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t axi0_wr_limit_stall : 1; // Write stalled due to one AXI0 limit counter being reached
+            uint32_t axi1_ar_stalled : 1; // Read transfer request stalled on arready low AXI1 (due to memory system)
+            uint32_t axi1_rd_limit_stall : 1; // Read stalled due to one AXI1 limit counter being reached
+            uint32_t axi1_wr_stalled : 1; // Write transfer request stalled on awready low AXI1 (due to memory system)
+            uint32_t axi1_w_stalled : 1;  // Write transfer stalled on wready low AXI1 (due to memory system)
+            uint32_t axi1_wr_limit_stall : 1; // Write stalled due to one AXI1 limit counter being reached
+            uint32_t wgt_idle_c1 : 1;     // When this bit is high means that the WGT block is not busy in generating
+                                          // addresses for a WGT job
+            uint32_t bas_idle_c1 : 1;     // When this bit is high means that the BAS block is not busy in generating
+                                          // addresses for a BAS job
+            uint32_t ib0_ai_valid_c1 : 1; // Data for AI to be read in IFM input buffer 0 - Core 1
+            uint32_t ib0_ai_ready_c1 : 1; // Data consumed from AI in IFM input buffer 0 - Core 1
+            uint32_t ib1_ai_valid_c1 : 1; // Data for AI to be read in IFM input buffer 1 - Core 1
+            uint32_t ib1_ai_ready_c1 : 1; // Data consumed from AI in IFM input buffer 1 - Core 1
+            uint32_t ib0_ao_valid_c1 : 1; // Data for AO to be read in IFM input buffer 0 - Core 1
+            uint32_t ib0_ao_ready_c1 : 1; // Data consumed from AO in IFM input buffer 0 - Core 1
+            uint32_t ib1_ao_valid_c1 : 1; // Data for AO to be read in IFM input buffer 0 - Core 1
+            uint32_t ib1_ao_ready_c1 : 1; // Data consumed from AO in IFM input buffer 1 - Core 1
+            uint32_t ob0_valid_c1 : 1;    // Data for DMA ready to be consumed in OFM output buffer 0 - Core 1
+            uint32_t ob0_ready_c1 : 1;    // Data consumed from DMA in OFM output buffer 0 - Core 1
+            uint32_t ob1_valid_c1 : 1;    // Data for DMA ready to be consumed in OFM output buffer 1 - Core 1
+            uint32_t ob1_ready_c1 : 1;    // Data consumed from DMA in OFM output buffer 1 - Core 1
+            uint32_t wd_bitstream_valid_c1 : 1; // New weight word for WD to be consumed - Core 1
+            uint32_t wd_bitstream_ready_c1 : 1; // Weight word consumed by WD - Core 1
+            uint32_t bs_bitstream_valid_c1 : 1; // New BaS word for AO to be consumed - Core 1
+            uint32_t bs_bitstream_ready_c1 : 1; // BaS word consumed by AO - Core 1
+            uint32_t reserved0 : 8;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma_status1_r() : word0(0) {}
+    CONSTEXPR dma_status1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma_status1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi0_wr_limit_stall() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_axi0_wr_limit_stall() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_axi0_wr_limit_stall(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile dma_status1_r &set_axi0_wr_limit_stall(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi1_ar_stalled() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    uint32_t get_axi1_ar_stalled() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_axi1_ar_stalled(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile dma_status1_r &set_axi1_ar_stalled(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi1_rd_limit_stall() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_axi1_rd_limit_stall() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_axi1_rd_limit_stall(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile dma_status1_r &set_axi1_rd_limit_stall(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi1_wr_stalled() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    uint32_t get_axi1_wr_stalled() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_axi1_wr_stalled(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile dma_status1_r &set_axi1_wr_stalled(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi1_w_stalled() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
+        return value;
+    }
+    uint32_t get_axi1_w_stalled() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_axi1_w_stalled(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    volatile dma_status1_r &set_axi1_w_stalled(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_axi1_wr_limit_stall() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
+    }
+    uint32_t get_axi1_wr_limit_stall() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_axi1_wr_limit_stall(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    volatile dma_status1_r &set_axi1_wr_limit_stall(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wgt_idle_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
+        return value;
+    }
+    uint32_t get_wgt_idle_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 6);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_wgt_idle_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    volatile dma_status1_r &set_wgt_idle_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 6) & word0) | ((((1U << 1) - 1) & value) << 6);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_bas_idle_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
+        return value;
+    }
+    uint32_t get_bas_idle_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 7);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_bas_idle_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    volatile dma_status1_r &set_bas_idle_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 7) & word0) | ((((1U << 1) - 1) & value) << 7);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib0_ai_valid_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
+        return value;
+    }
+    uint32_t get_ib0_ai_valid_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 8);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_ib0_ai_valid_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+    volatile dma_status1_r &set_ib0_ai_valid_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 8) & word0) | ((((1U << 1) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib0_ai_ready_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
+        return value;
+    }
+    uint32_t get_ib0_ai_ready_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 9);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_ib0_ai_ready_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
+    }
+    volatile dma_status1_r &set_ib0_ai_ready_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 9) & word0) | ((((1U << 1) - 1) & value) << 9);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib1_ai_valid_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
+        return value;
+    }
+    uint32_t get_ib1_ai_valid_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_ib1_ai_valid_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+    volatile dma_status1_r &set_ib1_ai_valid_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib1_ai_ready_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 11);
+        return value;
+    }
+    uint32_t get_ib1_ai_ready_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 11);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_ib1_ai_ready_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & value) << 11);
+        return *this;
+    }
+    volatile dma_status1_r &set_ib1_ai_ready_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 11) & word0) | ((((1U << 1) - 1) & value) << 11);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib0_ao_valid_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 12);
+        return value;
+    }
+    uint32_t get_ib0_ao_valid_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 12);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_ib0_ao_valid_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 12) & word0) | ((((1U << 1) - 1) & value) << 12);
+        return *this;
+    }
+    volatile dma_status1_r &set_ib0_ao_valid_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 12) & word0) | ((((1U << 1) - 1) & value) << 12);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib0_ao_ready_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 13);
+        return value;
+    }
+    uint32_t get_ib0_ao_ready_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 13);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_ib0_ao_ready_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 13) & word0) | ((((1U << 1) - 1) & value) << 13);
+        return *this;
+    }
+    volatile dma_status1_r &set_ib0_ao_ready_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 13) & word0) | ((((1U << 1) - 1) & value) << 13);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib1_ao_valid_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
+        return value;
+    }
+    uint32_t get_ib1_ao_valid_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 14);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_ib1_ao_valid_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
+        return *this;
+    }
+    volatile dma_status1_r &set_ib1_ao_valid_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 14) & word0) | ((((1U << 1) - 1) & value) << 14);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ib1_ao_ready_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 15);
+        return value;
+    }
+    uint32_t get_ib1_ao_ready_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 15);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_ib1_ao_ready_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 15) & word0) | ((((1U << 1) - 1) & value) << 15);
+        return *this;
+    }
+    volatile dma_status1_r &set_ib1_ao_ready_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 15) & word0) | ((((1U << 1) - 1) & value) << 15);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ob0_valid_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 16);
+        return value;
+    }
+    uint32_t get_ob0_valid_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 16);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_ob0_valid_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 16) & word0) | ((((1U << 1) - 1) & value) << 16);
+        return *this;
+    }
+    volatile dma_status1_r &set_ob0_valid_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 16) & word0) | ((((1U << 1) - 1) & value) << 16);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ob0_ready_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 17);
+        return value;
+    }
+    uint32_t get_ob0_ready_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 17);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_ob0_ready_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 17) & word0) | ((((1U << 1) - 1) & value) << 17);
+        return *this;
+    }
+    volatile dma_status1_r &set_ob0_ready_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 17) & word0) | ((((1U << 1) - 1) & value) << 17);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ob1_valid_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 18);
+        return value;
+    }
+    uint32_t get_ob1_valid_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 18);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_ob1_valid_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 18) & word0) | ((((1U << 1) - 1) & value) << 18);
+        return *this;
+    }
+    volatile dma_status1_r &set_ob1_valid_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 18) & word0) | ((((1U << 1) - 1) & value) << 18);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ob1_ready_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 19);
+        return value;
+    }
+    uint32_t get_ob1_ready_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 19);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_ob1_ready_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 19) & word0) | ((((1U << 1) - 1) & value) << 19);
+        return *this;
+    }
+    volatile dma_status1_r &set_ob1_ready_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 19) & word0) | ((((1U << 1) - 1) & value) << 19);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wd_bitstream_valid_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 20);
+        return value;
+    }
+    uint32_t get_wd_bitstream_valid_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 20);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_wd_bitstream_valid_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 20) & word0) | ((((1U << 1) - 1) & value) << 20);
+        return *this;
+    }
+    volatile dma_status1_r &set_wd_bitstream_valid_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 20) & word0) | ((((1U << 1) - 1) & value) << 20);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wd_bitstream_ready_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 21);
+        return value;
+    }
+    uint32_t get_wd_bitstream_ready_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 21);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_wd_bitstream_ready_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 21) & word0) | ((((1U << 1) - 1) & value) << 21);
+        return *this;
+    }
+    volatile dma_status1_r &set_wd_bitstream_ready_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 21) & word0) | ((((1U << 1) - 1) & value) << 21);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_bs_bitstream_valid_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 22);
+        return value;
+    }
+    uint32_t get_bs_bitstream_valid_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 22);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_bs_bitstream_valid_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 22) & word0) | ((((1U << 1) - 1) & value) << 22);
+        return *this;
+    }
+    volatile dma_status1_r &set_bs_bitstream_valid_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 22) & word0) | ((((1U << 1) - 1) & value) << 22);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_bs_bitstream_ready_c1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 23);
+        return value;
+    }
+    uint32_t get_bs_bitstream_ready_c1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 23);
+        return value;
+    }
+    CONSTEXPR dma_status1_r &set_bs_bitstream_ready_c1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 23) & word0) | ((((1U << 1) - 1) & value) << 23);
+        return *this;
+    }
+    volatile dma_status1_r &set_bs_bitstream_ready_c1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 23) & word0) | ((((1U << 1) - 1) & value) << 23);
+        return *this;
+    }
+#endif
+};
+
+// clkforce_r - Force clocks on for clock gating
+struct clkforce_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t top_level_clk : 1; // set to 1 to force on TOP level clock
+            uint32_t cc_clk : 1;        // set to 1 to force on CC clock
+            uint32_t dma_clk : 1;       // set to 1 to force on DMA clock
+            uint32_t mac_clk : 1;       // set to 1 to force on MAC clock
+            uint32_t ao_clk : 1;        // set to 1 to force on AO clock
+            uint32_t wd_clk : 1;        // set to 1 to force on WD clock
+            uint32_t reserved0 : 26;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR clkforce_r() : word0(0) {}
+    CONSTEXPR clkforce_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    clkforce_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_top_level_clk() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_top_level_clk() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR clkforce_r &set_top_level_clk(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile clkforce_r &set_top_level_clk(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cc_clk() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    uint32_t get_cc_clk() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    CONSTEXPR clkforce_r &set_cc_clk(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile clkforce_r &set_cc_clk(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_dma_clk() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_dma_clk() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR clkforce_r &set_dma_clk(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile clkforce_r &set_dma_clk(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mac_clk() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    uint32_t get_mac_clk() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR clkforce_r &set_mac_clk(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile clkforce_r &set_mac_clk(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_ao_clk() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
+        return value;
+    }
+    uint32_t get_ao_clk() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 4);
+        return value;
+    }
+    CONSTEXPR clkforce_r &set_ao_clk(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    volatile clkforce_r &set_ao_clk(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 4) & word0) | ((((1U << 1) - 1) & value) << 4);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_wd_clk() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
+    }
+    uint32_t get_wd_clk() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 5);
+        return value;
+    }
+    CONSTEXPR clkforce_r &set_wd_clk(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+    volatile clkforce_r &set_wd_clk(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 5) & word0) | ((((1U << 1) - 1) & value) << 5);
+        return *this;
+    }
+#endif
+};
+
+// debug_address_r - Set debug address for register reads 0x400-0x7FF. The address must be 1KB aligned
+struct debug_address_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t addr : 32; // Register address
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR debug_address_r() : word0(0) {}
+    CONSTEXPR debug_address_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    debug_address_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_addr() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_addr() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR debug_address_r &set_addr(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile debug_address_r &set_addr(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// debug_misc_r - 32-bit read/write register for driver debug use. This does not affect NPU function
+struct debug_misc_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t misc : 32; // Debug misc
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR debug_misc_r() : word0(0) {}
+    CONSTEXPR debug_misc_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    debug_misc_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_misc() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_misc() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR debug_misc_r &set_misc(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile debug_misc_r &set_misc(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// debugcore_r - Select core number for debug registers (0x200-0x2FF) and RAM reads (0x400-0x7FF). Value is 0 or 1
+struct debugcore_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t core : 32; // Debug core
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR debugcore_r() : word0(0) {}
+    CONSTEXPR debugcore_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    debugcore_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_core() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_core() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR debugcore_r &set_core(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile debugcore_r &set_core(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// debug_block_r - Set from which of four block banks the TSU registers are read. 0 = read from the current bank 256+n =
+// force to read from bank n where n is in the range 0 to 3
+struct debug_block_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t block : 32; // Debug block
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR debug_block_r() : word0(0) {}
+    CONSTEXPR debug_block_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    debug_block_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_block() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_block() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR debug_block_r &set_block(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile debug_block_r &set_block(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pmcr_r - PMU Register control
+struct pmcr_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t cnt_en : 1;        // Enable counter
+            uint32_t event_cnt_rst : 1; // Reset event counter
+            uint32_t cycle_cnt_rst : 1; // Reset cycle counter
+            uint32_t mask_en : 1;       // PMU can be enabled/disabled by command stream operation NPU_OP_PMU_MASK
+            uint32_t reserved0 : 7;
+            uint32_t num_event_cnt : 5; // Number of event counters
+            uint32_t reserved1 : 16;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pmcr_r() : word0(8192) {}
+    CONSTEXPR pmcr_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pmcr_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cnt_en() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_cnt_en() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR pmcr_r &set_cnt_en(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile pmcr_r &set_cnt_en(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_event_cnt_rst() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    uint32_t get_event_cnt_rst() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    CONSTEXPR pmcr_r &set_event_cnt_rst(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile pmcr_r &set_event_cnt_rst(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_cycle_cnt_rst() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_cycle_cnt_rst() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR pmcr_r &set_cycle_cnt_rst(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile pmcr_r &set_cycle_cnt_rst(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mask_en() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    uint32_t get_mask_en() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR pmcr_r &set_mask_en(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile pmcr_r &set_mask_en(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_num_event_cnt() const
+    {
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 11);
+        return value;
+    }
+    uint32_t get_num_event_cnt() const volatile
+    {
+        uint32_t value = ((1U << 5) - 1) & (word0 >> 11);
+        return value;
+    }
+    CONSTEXPR pmcr_r &set_num_event_cnt(uint32_t value)
+    {
+        word0 = (((~((1U << 5) - 1)) << 11) & word0) | ((((1U << 5) - 1) & value) << 11);
+        return *this;
+    }
+    volatile pmcr_r &set_num_event_cnt(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 5) - 1)) << 11) & word0) | ((((1U << 5) - 1) & value) << 11);
+        return *this;
+    }
+#endif
+};
+
+// pmcntenset_r - Count enable set register
+struct pmcntenset_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EVENT_CNT_0 : 1; // Event counter enable bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1 : 1; // Event counter enable bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2 : 1; // Event counter enable bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3 : 1; // Event counter enable bit for PMEVCNTR3
+            uint32_t reserved0 : 27;
+            uint32_t CYCLE_CNT : 1; // PMCCNTR enable bit
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pmcntenset_r() : word0(0) {}
+    CONSTEXPR pmcntenset_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pmcntenset_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR pmcntenset_r &set_EVENT_CNT_0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile pmcntenset_r &set_EVENT_CNT_0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    CONSTEXPR pmcntenset_r &set_EVENT_CNT_1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile pmcntenset_r &set_EVENT_CNT_1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_2() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_2() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR pmcntenset_r &set_EVENT_CNT_2(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile pmcntenset_r &set_EVENT_CNT_2(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_3() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_3() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR pmcntenset_r &set_EVENT_CNT_3(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile pmcntenset_r &set_EVENT_CNT_3(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CYCLE_CNT() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    uint32_t get_CYCLE_CNT() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    CONSTEXPR pmcntenset_r &set_CYCLE_CNT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+    volatile pmcntenset_r &set_CYCLE_CNT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
+};
+
+// pmcntenclr_r - Count enable clear register
+struct pmcntenclr_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EVENT_CNT_0 : 1; // Event counter disable bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1 : 1; // Event counter disable bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2 : 1; // Event counter disable bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3 : 1; // Event counter disable bit for PMEVCNTR3
+            uint32_t reserved0 : 27;
+            uint32_t CYCLE_CNT : 1; // PMCCNTR disable bit
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pmcntenclr_r() : word0(0) {}
+    CONSTEXPR pmcntenclr_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pmcntenclr_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_0() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_0() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR pmcntenclr_r &set_EVENT_CNT_0(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile pmcntenclr_r &set_EVENT_CNT_0(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_1() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_1() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    CONSTEXPR pmcntenclr_r &set_EVENT_CNT_1(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile pmcntenclr_r &set_EVENT_CNT_1(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_2() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_2() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR pmcntenclr_r &set_EVENT_CNT_2(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile pmcntenclr_r &set_EVENT_CNT_2(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_3() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_3() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR pmcntenclr_r &set_EVENT_CNT_3(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile pmcntenclr_r &set_EVENT_CNT_3(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CYCLE_CNT() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    uint32_t get_CYCLE_CNT() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    CONSTEXPR pmcntenclr_r &set_CYCLE_CNT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+    volatile pmcntenclr_r &set_CYCLE_CNT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
+};
+
+// pmovsset_r - Overflow flag status set register
+struct pmovsset_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EVENT_CNT_0_OVF : 1; // Event counter overflow set bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1_OVF : 1; // Event counter overflow set bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2_OVF : 1; // Event counter overflow set bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3_OVF : 1; // Event counter overflow set bit for PMEVCNTR3
+            uint32_t reserved0 : 27;
+            uint32_t CYCLE_CNT_OVF : 1; // PMCCNTR overflow set bit
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pmovsset_r() : word0(0) {}
+    CONSTEXPR pmovsset_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pmovsset_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_0_OVF() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_0_OVF() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR pmovsset_r &set_EVENT_CNT_0_OVF(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile pmovsset_r &set_EVENT_CNT_0_OVF(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_1_OVF() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_1_OVF() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    CONSTEXPR pmovsset_r &set_EVENT_CNT_1_OVF(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile pmovsset_r &set_EVENT_CNT_1_OVF(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_2_OVF() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_2_OVF() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR pmovsset_r &set_EVENT_CNT_2_OVF(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile pmovsset_r &set_EVENT_CNT_2_OVF(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_3_OVF() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_3_OVF() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR pmovsset_r &set_EVENT_CNT_3_OVF(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile pmovsset_r &set_EVENT_CNT_3_OVF(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CYCLE_CNT_OVF() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    uint32_t get_CYCLE_CNT_OVF() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    CONSTEXPR pmovsset_r &set_CYCLE_CNT_OVF(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+    volatile pmovsset_r &set_CYCLE_CNT_OVF(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
+};
+
+// pmovsclr_r - Overflow flag status clear register
+struct pmovsclr_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EVENT_CNT_0_OVF : 1; // Event counter overflow clear bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1_OVF : 1; // Event counter overflow clear bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2_OVF : 1; // Event counter overflow clear bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3_OVF : 1; // Event counter overflow clear bit for PMEVCNTR3
+            uint32_t reserved0 : 27;
+            uint32_t CYCLE_CNT_OVF : 1; // PMCCNTR overflow clear bit
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pmovsclr_r() : word0(0) {}
+    CONSTEXPR pmovsclr_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pmovsclr_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_0_OVF() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_0_OVF() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR pmovsclr_r &set_EVENT_CNT_0_OVF(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile pmovsclr_r &set_EVENT_CNT_0_OVF(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_1_OVF() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_1_OVF() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    CONSTEXPR pmovsclr_r &set_EVENT_CNT_1_OVF(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile pmovsclr_r &set_EVENT_CNT_1_OVF(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_2_OVF() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_2_OVF() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR pmovsclr_r &set_EVENT_CNT_2_OVF(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile pmovsclr_r &set_EVENT_CNT_2_OVF(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_3_OVF() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_3_OVF() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR pmovsclr_r &set_EVENT_CNT_3_OVF(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile pmovsclr_r &set_EVENT_CNT_3_OVF(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CYCLE_CNT_OVF() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    uint32_t get_CYCLE_CNT_OVF() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    CONSTEXPR pmovsclr_r &set_CYCLE_CNT_OVF(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+    volatile pmovsclr_r &set_CYCLE_CNT_OVF(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
+};
+
+// pmintset_r - Interrupt enable set register
+struct pmintset_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EVENT_CNT_0_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3_INT : 1; // Event counter overflow interrupt request enable bit for PMEVCNTR3
+            uint32_t reserved0 : 27;
+            uint32_t CYCLE_CNT_INT : 1; // PMCCNTR overflow interrupt request enable bit
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pmintset_r() : word0(0) {}
+    CONSTEXPR pmintset_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pmintset_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_0_INT() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_0_INT() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR pmintset_r &set_EVENT_CNT_0_INT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile pmintset_r &set_EVENT_CNT_0_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_1_INT() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_1_INT() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    CONSTEXPR pmintset_r &set_EVENT_CNT_1_INT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile pmintset_r &set_EVENT_CNT_1_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_2_INT() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_2_INT() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR pmintset_r &set_EVENT_CNT_2_INT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile pmintset_r &set_EVENT_CNT_2_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_3_INT() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_3_INT() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR pmintset_r &set_EVENT_CNT_3_INT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile pmintset_r &set_EVENT_CNT_3_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CYCLE_CNT_INT() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    uint32_t get_CYCLE_CNT_INT() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    CONSTEXPR pmintset_r &set_CYCLE_CNT_INT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+    volatile pmintset_r &set_CYCLE_CNT_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
+};
+
+// pmintclr_r - Interrupt enable clear register
+struct pmintclr_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EVENT_CNT_0_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR0
+            uint32_t EVENT_CNT_1_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR1
+            uint32_t EVENT_CNT_2_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR2
+            uint32_t EVENT_CNT_3_INT : 1; // Event counter overflow interrupt request disable bit for PMEVCNTR3
+            uint32_t reserved0 : 27;
+            uint32_t CYCLE_CNT_INT : 1; // PMCCNTR overflow interrupt request disable bit
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pmintclr_r() : word0(0) {}
+    CONSTEXPR pmintclr_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pmintclr_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_0_INT() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_0_INT() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 0);
+        return value;
+    }
+    CONSTEXPR pmintclr_r &set_EVENT_CNT_0_INT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    volatile pmintclr_r &set_EVENT_CNT_0_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 0) & word0) | ((((1U << 1) - 1) & value) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_1_INT() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_1_INT() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 1);
+        return value;
+    }
+    CONSTEXPR pmintclr_r &set_EVENT_CNT_1_INT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    volatile pmintclr_r &set_EVENT_CNT_1_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 1) & word0) | ((((1U << 1) - 1) & value) << 1);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_2_INT() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_2_INT() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 2);
+        return value;
+    }
+    CONSTEXPR pmintclr_r &set_EVENT_CNT_2_INT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    volatile pmintclr_r &set_EVENT_CNT_2_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 2) & word0) | ((((1U << 1) - 1) & value) << 2);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_EVENT_CNT_3_INT() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    uint32_t get_EVENT_CNT_3_INT() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 3);
+        return value;
+    }
+    CONSTEXPR pmintclr_r &set_EVENT_CNT_3_INT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    volatile pmintclr_r &set_EVENT_CNT_3_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 3) & word0) | ((((1U << 1) - 1) & value) << 3);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CYCLE_CNT_INT() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    uint32_t get_CYCLE_CNT_INT() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 31);
+        return value;
+    }
+    CONSTEXPR pmintclr_r &set_CYCLE_CNT_INT(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+    volatile pmintclr_r &set_CYCLE_CNT_INT(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 31) & word0) | ((((1U << 1) - 1) & value) << 31);
+        return *this;
+    }
+#endif
+};
+
+// pmccntr_r - Performance monitor cycle count register
+struct pmccntr_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CYCLE_CNT_LO : 32; // Cycle count - LSB
+            uint32_t CYCLE_CNT_HI : 16; // Cycle count - MSB
+            uint32_t reserved0 : 16;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR pmccntr_r() : word0(0), word1(0) {}
+    CONSTEXPR pmccntr_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    pmccntr_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// pmccntr_cfg_r - Set start/stop event on the cycle counter
+struct pmccntr_cfg_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CYCLE_CNT_CFG_START : 10; // Cycle counter start event
+            uint32_t reserved0 : 6;
+            uint32_t CYCLE_CNT_CFG_STOP : 10; // Cycle counter stop event
+            uint32_t reserved1 : 6;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pmccntr_cfg_r() : word0(0) {}
+    CONSTEXPR pmccntr_cfg_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pmccntr_cfg_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_event get_CYCLE_CNT_CFG_START() const
+    {
+        NPU_NAMESPACE::pmu_event value = static_cast<NPU_NAMESPACE::pmu_event>(((1U << 10) - 1) & (word0 >> 0));
+        return value;
+    }
+    NPU_NAMESPACE::pmu_event get_CYCLE_CNT_CFG_START() const volatile
+    {
+        NPU_NAMESPACE::pmu_event value = static_cast<NPU_NAMESPACE::pmu_event>(((1U << 10) - 1) & (word0 >> 0));
+        return value;
+    }
+    CONSTEXPR pmccntr_cfg_r &set_CYCLE_CNT_CFG_START(NPU_NAMESPACE::pmu_event value)
+    {
+        word0 = (((~((1U << 10) - 1)) << 0) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile pmccntr_cfg_r &set_CYCLE_CNT_CFG_START(NPU_NAMESPACE::pmu_event value) volatile
+    {
+        word0 = (((~((1U << 10) - 1)) << 0) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_event get_CYCLE_CNT_CFG_STOP() const
+    {
+        NPU_NAMESPACE::pmu_event value = static_cast<NPU_NAMESPACE::pmu_event>(((1U << 10) - 1) & (word0 >> 16));
+        return value;
+    }
+    NPU_NAMESPACE::pmu_event get_CYCLE_CNT_CFG_STOP() const volatile
+    {
+        NPU_NAMESPACE::pmu_event value = static_cast<NPU_NAMESPACE::pmu_event>(((1U << 10) - 1) & (word0 >> 16));
+        return value;
+    }
+    CONSTEXPR pmccntr_cfg_r &set_CYCLE_CNT_CFG_STOP(NPU_NAMESPACE::pmu_event value)
+    {
+        word0 = (((~((1U << 10) - 1)) << 16) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 16);
+        return *this;
+    }
+    volatile pmccntr_cfg_r &set_CYCLE_CNT_CFG_STOP(NPU_NAMESPACE::pmu_event value) volatile
+    {
+        word0 = (((~((1U << 10) - 1)) << 16) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 16);
+        return *this;
+    }
+#endif
+};
+
+// pmcaxi_chan_r - Set which AXI channel to monitor for latency measurements in PMU
+struct pmcaxi_chan_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CH_SEL : 4; // Channel select for latency measurements
+            uint32_t reserved0 : 4;
+            uint32_t AXI_CNT_SEL : 2;  // AXI counter to monitor for latency measurements
+            uint32_t BW_CH_SEL_EN : 1; // Bandwidth channel selector
+            uint32_t reserved1 : 21;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pmcaxi_chan_r() : word0(0) {}
+    CONSTEXPR pmcaxi_chan_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pmcaxi_chan_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_axi_channel get_CH_SEL() const
+    {
+        NPU_NAMESPACE::pmu_axi_channel value =
+            static_cast<NPU_NAMESPACE::pmu_axi_channel>(((1U << 4) - 1) & (word0 >> 0));
+        return value;
+    }
+    NPU_NAMESPACE::pmu_axi_channel get_CH_SEL() const volatile
+    {
+        NPU_NAMESPACE::pmu_axi_channel value =
+            static_cast<NPU_NAMESPACE::pmu_axi_channel>(((1U << 4) - 1) & (word0 >> 0));
+        return value;
+    }
+    CONSTEXPR pmcaxi_chan_r &set_CH_SEL(NPU_NAMESPACE::pmu_axi_channel value)
+    {
+        word0 = (((~((1U << 4) - 1)) << 0) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile pmcaxi_chan_r &set_CH_SEL(NPU_NAMESPACE::pmu_axi_channel value) volatile
+    {
+        word0 = (((~((1U << 4) - 1)) << 0) & word0) | ((((1U << 4) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_AXI_CNT_SEL() const
+    {
+        uint32_t value = ((1U << 2) - 1) & (word0 >> 8);
+        return value;
+    }
+    uint32_t get_AXI_CNT_SEL() const volatile
+    {
+        uint32_t value = ((1U << 2) - 1) & (word0 >> 8);
+        return value;
+    }
+    CONSTEXPR pmcaxi_chan_r &set_AXI_CNT_SEL(uint32_t value)
+    {
+        word0 = (((~((1U << 2) - 1)) << 8) & word0) | ((((1U << 2) - 1) & value) << 8);
+        return *this;
+    }
+    volatile pmcaxi_chan_r &set_AXI_CNT_SEL(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 2) - 1)) << 8) & word0) | ((((1U << 2) - 1) & value) << 8);
+        return *this;
+    }
+    CONSTEXPR uint32_t get_BW_CH_SEL_EN() const
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
+        return value;
+    }
+    uint32_t get_BW_CH_SEL_EN() const volatile
+    {
+        uint32_t value = ((1U << 1) - 1) & (word0 >> 10);
+        return value;
+    }
+    CONSTEXPR pmcaxi_chan_r &set_BW_CH_SEL_EN(uint32_t value)
+    {
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+    volatile pmcaxi_chan_r &set_BW_CH_SEL_EN(uint32_t value) volatile
+    {
+        word0 = (((~((1U << 1) - 1)) << 10) & word0) | ((((1U << 1) - 1) & value) << 10);
+        return *this;
+    }
+#endif
+};
+
+// kernel_x_r - Kernel X offset of in kernel decomposition
+struct kernel_x_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR kernel_x_r() : word0(0) {}
+    CONSTEXPR kernel_x_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    kernel_x_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR kernel_x_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile kernel_x_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// kernel_y_r - Kernel Y offset of in kernel decomposition
+struct kernel_y_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR kernel_y_r() : word0(0) {}
+    CONSTEXPR kernel_y_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    kernel_y_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR kernel_y_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile kernel_y_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// kernel_w_m1_r - Kernel (width-1) of current block
+struct kernel_w_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR kernel_w_m1_r() : word0(0) {}
+    CONSTEXPR kernel_w_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    kernel_w_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR kernel_w_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile kernel_w_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// kernel_h_m1_r - Kernel (height-1) of current block
+struct kernel_h_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR kernel_h_m1_r() : word0(0) {}
+    CONSTEXPR kernel_h_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    kernel_h_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR kernel_h_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile kernel_h_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_cblk_width_m1_r - OFM current block (width-1)
+struct ofm_cblk_width_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_cblk_width_m1_r() : word0(0) {}
+    CONSTEXPR ofm_cblk_width_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_cblk_width_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_cblk_width_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_cblk_width_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_cblk_height_m1_r - OFM current block (height-1)
+struct ofm_cblk_height_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_cblk_height_m1_r() : word0(0) {}
+    CONSTEXPR ofm_cblk_height_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_cblk_height_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_cblk_height_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_cblk_height_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_cblk_depth_m1_r - OFM current block (depth-1)
+struct ofm_cblk_depth_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_cblk_depth_m1_r() : word0(0) {}
+    CONSTEXPR ofm_cblk_depth_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_cblk_depth_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_cblk_depth_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_cblk_depth_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_cblk_depth_m1_r - IFM current block (depth-1)
+struct ifm_cblk_depth_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_cblk_depth_m1_r() : word0(0) {}
+    CONSTEXPR ifm_cblk_depth_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_cblk_depth_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_cblk_depth_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_cblk_depth_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_x_r - Block X coordinate in OFM
+struct ofm_x_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_x_r() : word0(0) {}
+    CONSTEXPR ofm_x_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_x_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_x_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_x_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_y_r - Block Y coordinate in OFM
+struct ofm_y_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_y_r() : word0(0) {}
+    CONSTEXPR ofm_y_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_y_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_y_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_y_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_z_r - Block Z (channel) coordinate in OFM
+struct ofm_z_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_z_r() : word0(0) {}
+    CONSTEXPR ofm_z_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_z_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_z_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_z_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_z_r - Block Z (channel) coordinate in IFM
+struct ifm_z_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_z_r() : word0(0) {}
+    CONSTEXPR ifm_z_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_z_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_z_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_z_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pad_top_r - Block top pad
+struct pad_top_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pad_top_r() : word0(0) {}
+    CONSTEXPR pad_top_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pad_top_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pad_top_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pad_top_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pad_left_r - Block left pad
+struct pad_left_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pad_left_r() : word0(0) {}
+    CONSTEXPR pad_left_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pad_left_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pad_left_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pad_left_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_cblk_width_r - IFM current block derived width
+struct ifm_cblk_width_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_cblk_width_r() : word0(0) {}
+    CONSTEXPR ifm_cblk_width_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_cblk_width_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_cblk_width_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_cblk_width_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_cblk_height_r - IFM current block derived height
+struct ifm_cblk_height_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_cblk_height_r() : word0(0) {}
+    CONSTEXPR ifm_cblk_height_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_cblk_height_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_cblk_height_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_cblk_height_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_ifm_src_r - DMA IFM channel source position on AXI
+struct dma_ifm_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8;  // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma_ifm_src_r() : word0(0), word1(0) {}
+    CONSTEXPR dma_ifm_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_ifm_src_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_ifm_dst_r - DMA IFM channel destination position in SHRAM
+struct dma_ifm_dst_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma_ifm_dst_r() : word0(0) {}
+    CONSTEXPR dma_ifm_dst_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma_ifm_dst_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR dma_ifm_dst_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile dma_ifm_dst_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_ofm_src_r - DMA OFM channel source position in SHRAM
+struct dma_ofm_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma_ofm_src_r() : word0(0) {}
+    CONSTEXPR dma_ofm_src_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma_ofm_src_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR dma_ofm_src_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile dma_ofm_src_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_ofm_dst_r - DMA OFM channel destination position on AXI
+struct dma_ofm_dst_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8;  // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma_ofm_dst_r() : word0(0), word1(0) {}
+    CONSTEXPR dma_ofm_dst_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_ofm_dst_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_weight_src_r - DMA weight channel source position on AXI
+struct dma_weight_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8;  // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma_weight_src_r() : word0(0), word1(0) {}
+    CONSTEXPR dma_weight_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_weight_src_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_cmd_src_r - DMA command channel source position on AXI
+struct dma_cmd_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8;  // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma_cmd_src_r() : word0(0), word1(0) {}
+    CONSTEXPR dma_cmd_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_cmd_src_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_cmd_size_r - DMA command channel number of bytes buffered
+struct dma_cmd_size_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma_cmd_size_r() : word0(0) {}
+    CONSTEXPR dma_cmd_size_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma_cmd_size_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR dma_cmd_size_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile dma_cmd_size_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_m2m_src_r - DMA memory to memory source position on AXI
+struct dma_m2m_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8;  // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma_m2m_src_r() : word0(0), word1(0) {}
+    CONSTEXPR dma_m2m_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_m2m_src_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma_m2m_dst_r - DMA memory to memory destination position on AXI
+struct dma_m2m_dst_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8;  // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma_m2m_dst_r() : word0(0), word1(0) {}
+    CONSTEXPR dma_m2m_dst_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_m2m_dst_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// current_qread_r - QREAD position being issued (rather than completed)
+struct current_qread_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR current_qread_r() : word0(0) {}
+    CONSTEXPR current_qread_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    current_qread_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR current_qread_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile current_qread_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma_scale_src_r - DMA scale and bias channel source position on AXI
+struct dma_scale_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t offset_LO : 32; // Offset - LSB
+            uint32_t offset_HI : 8;  // Offset - MSB
+            uint32_t reserved0 : 24;
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma_scale_src_r() : word0(0), word1(0) {}
+    CONSTEXPR dma_scale_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma_scale_src_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// current_block_r - 0-3. Current block bank being executed by the TSU or last one executed if TSU is stopped
+struct current_block_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR current_block_r() : word0(0) {}
+    CONSTEXPR current_block_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    current_block_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR current_block_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile current_block_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// current_op_r - Current NPU OP command being executed by the TSU
+struct current_op_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR current_op_r() : word0(0) {}
+    CONSTEXPR current_op_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    current_op_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR current_op_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile current_op_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// current_cmd_r - Current 32-bit command being parsed by the command stream parser
+struct current_cmd_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR current_cmd_r() : word0(0) {}
+    CONSTEXPR current_cmd_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    current_cmd_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR current_cmd_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile current_cmd_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pmevcntr_r - Performance monitor event 0 count register
+struct pmevcntr_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t count : 32; // Count word
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pmevcntr_r() : word0(0) {}
+    CONSTEXPR pmevcntr_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pmevcntr_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_count() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_count() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pmevcntr_r &set_count(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pmevcntr_r &set_count(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pmevtyper_r - Performance monitor event type register 0
+struct pmevtyper_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t EV_TYPE : 10; // Event Type
+            uint32_t reserved0 : 22;
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pmevtyper_r() : word0(0) {}
+    CONSTEXPR pmevtyper_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pmevtyper_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR NPU_NAMESPACE::pmu_event get_EV_TYPE() const
+    {
+        NPU_NAMESPACE::pmu_event value = static_cast<NPU_NAMESPACE::pmu_event>(((1U << 10) - 1) & (word0 >> 0));
+        return value;
+    }
+    NPU_NAMESPACE::pmu_event get_EV_TYPE() const volatile
+    {
+        NPU_NAMESPACE::pmu_event value = static_cast<NPU_NAMESPACE::pmu_event>(((1U << 10) - 1) & (word0 >> 0));
+        return value;
+    }
+    CONSTEXPR pmevtyper_r &set_EV_TYPE(NPU_NAMESPACE::pmu_event value)
+    {
+        word0 = (((~((1U << 10) - 1)) << 0) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+    volatile pmevtyper_r &set_EV_TYPE(NPU_NAMESPACE::pmu_event value) volatile
+    {
+        word0 = (((~((1U << 10) - 1)) << 0) & word0) | ((((1U << 10) - 1) & static_cast<uint32_t>(value)) << 0);
+        return *this;
+    }
+#endif
+};
+
+// shared_buffer_r - Shared buffer debug access. Only valid in STOPPED state
+struct shared_buffer_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t mem_word : 32; // Memory word
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR shared_buffer_r() : word0(0) {}
+    CONSTEXPR shared_buffer_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    shared_buffer_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_mem_word() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_mem_word() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR shared_buffer_r &set_mem_word(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile shared_buffer_r &set_mem_word(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_pad_top_r - None
+struct ifm_pad_top_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_pad_top_r() : word0(0) {}
+    CONSTEXPR ifm_pad_top_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_pad_top_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_pad_top_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_pad_top_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_pad_left_r - None
+struct ifm_pad_left_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_pad_left_r() : word0(0) {}
+    CONSTEXPR ifm_pad_left_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_pad_left_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_pad_left_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_pad_left_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_pad_right_r - None
+struct ifm_pad_right_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_pad_right_r() : word0(0) {}
+    CONSTEXPR ifm_pad_right_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_pad_right_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_pad_right_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_pad_right_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_pad_bottom_r - None
+struct ifm_pad_bottom_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_pad_bottom_r() : word0(0) {}
+    CONSTEXPR ifm_pad_bottom_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_pad_bottom_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_pad_bottom_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_pad_bottom_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_depth_m1_r - None
+struct ifm_depth_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_depth_m1_r() : word0(0) {}
+    CONSTEXPR ifm_depth_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_depth_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_depth_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_depth_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_precision_r - None
+struct ifm_precision_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_precision_r() : word0(0) {}
+    CONSTEXPR ifm_precision_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_precision_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_precision_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_precision_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_upscale_r - None
+struct ifm_upscale_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_upscale_r() : word0(0) {}
+    CONSTEXPR ifm_upscale_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_upscale_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_upscale_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_upscale_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_zero_point_r - None
+struct ifm_zero_point_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_zero_point_r() : word0(0) {}
+    CONSTEXPR ifm_zero_point_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_zero_point_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_zero_point_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_zero_point_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_width0_m1_r - None
+struct ifm_width0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_width0_m1_r() : word0(0) {}
+    CONSTEXPR ifm_width0_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_width0_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_width0_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_width0_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_height0_m1_r - None
+struct ifm_height0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_height0_m1_r() : word0(0) {}
+    CONSTEXPR ifm_height0_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_height0_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_height0_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_height0_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_height1_m1_r - None
+struct ifm_height1_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_height1_m1_r() : word0(0) {}
+    CONSTEXPR ifm_height1_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_height1_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_height1_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_height1_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_ib_end_r - None
+struct ifm_ib_end_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_ib_end_r() : word0(0) {}
+    CONSTEXPR ifm_ib_end_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_ib_end_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_ib_end_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_ib_end_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_region_r - None
+struct ifm_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm_region_r() : word0(0) {}
+    CONSTEXPR ifm_region_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm_region_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm_region_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm_region_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_width_m1_r - None
+struct ofm_width_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_width_m1_r() : word0(0) {}
+    CONSTEXPR ofm_width_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_width_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_width_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_width_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_height_m1_r - None
+struct ofm_height_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_height_m1_r() : word0(0) {}
+    CONSTEXPR ofm_height_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_height_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_height_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_height_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_depth_m1_r - None
+struct ofm_depth_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_depth_m1_r() : word0(0) {}
+    CONSTEXPR ofm_depth_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_depth_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_depth_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_depth_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_precision_r - None
+struct ofm_precision_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_precision_r() : word0(0) {}
+    CONSTEXPR ofm_precision_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_precision_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_precision_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_precision_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_blk_width_m1_r - None
+struct ofm_blk_width_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_blk_width_m1_r() : word0(0) {}
+    CONSTEXPR ofm_blk_width_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_blk_width_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_blk_width_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_blk_width_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_blk_height_m1_r - None
+struct ofm_blk_height_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_blk_height_m1_r() : word0(0) {}
+    CONSTEXPR ofm_blk_height_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_blk_height_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_blk_height_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_blk_height_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_blk_depth_m1_r - None
+struct ofm_blk_depth_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_blk_depth_m1_r() : word0(0) {}
+    CONSTEXPR ofm_blk_depth_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_blk_depth_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_blk_depth_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_blk_depth_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_zero_point_r - None
+struct ofm_zero_point_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_zero_point_r() : word0(0) {}
+    CONSTEXPR ofm_zero_point_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_zero_point_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_zero_point_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_zero_point_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_width0_m1_r - None
+struct ofm_width0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_width0_m1_r() : word0(0) {}
+    CONSTEXPR ofm_width0_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_width0_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_width0_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_width0_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_height0_m1_r - None
+struct ofm_height0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_height0_m1_r() : word0(0) {}
+    CONSTEXPR ofm_height0_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_height0_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_height0_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_height0_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_height1_m1_r - None
+struct ofm_height1_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_height1_m1_r() : word0(0) {}
+    CONSTEXPR ofm_height1_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_height1_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_height1_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_height1_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_region_r - None
+struct ofm_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_region_r() : word0(0) {}
+    CONSTEXPR ofm_region_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_region_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_region_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_region_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// kernel_width_m1_r - None
+struct kernel_width_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR kernel_width_m1_r() : word0(0) {}
+    CONSTEXPR kernel_width_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    kernel_width_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR kernel_width_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile kernel_width_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// kernel_height_m1_r - None
+struct kernel_height_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR kernel_height_m1_r() : word0(0) {}
+    CONSTEXPR kernel_height_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    kernel_height_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR kernel_height_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile kernel_height_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// kernel_stride_r - None
+struct kernel_stride_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR kernel_stride_r() : word0(0) {}
+    CONSTEXPR kernel_stride_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    kernel_stride_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR kernel_stride_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile kernel_stride_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// parallel_mode_r - None
+struct parallel_mode_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR parallel_mode_r() : word0(0) {}
+    CONSTEXPR parallel_mode_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    parallel_mode_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR parallel_mode_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile parallel_mode_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// acc_format_r - None
+struct acc_format_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR acc_format_r() : word0(0) {}
+    CONSTEXPR acc_format_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    acc_format_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR acc_format_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile acc_format_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// activation_r - None
+struct activation_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR activation_r() : word0(0) {}
+    CONSTEXPR activation_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    activation_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR activation_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile activation_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// activation_min_r - None
+struct activation_min_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR activation_min_r() : word0(0) {}
+    CONSTEXPR activation_min_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    activation_min_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR activation_min_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile activation_min_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// activation_max_r - None
+struct activation_max_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR activation_max_r() : word0(0) {}
+    CONSTEXPR activation_max_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    activation_max_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR activation_max_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile activation_max_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// weight_region_r - None
+struct weight_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR weight_region_r() : word0(0) {}
+    CONSTEXPR weight_region_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    weight_region_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR weight_region_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile weight_region_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// scale_region_r - None
+struct scale_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR scale_region_r() : word0(0) {}
+    CONSTEXPR scale_region_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    scale_region_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR scale_region_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile scale_region_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ab_start_r - None
+struct ab_start_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ab_start_r() : word0(0) {}
+    CONSTEXPR ab_start_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ab_start_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ab_start_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ab_start_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// blockdep_r - None
+struct blockdep_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR blockdep_r() : word0(0) {}
+    CONSTEXPR blockdep_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    blockdep_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR blockdep_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile blockdep_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_src_region_r - None
+struct dma0_src_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma0_src_region_r() : word0(0) {}
+    CONSTEXPR dma0_src_region_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma0_src_region_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR dma0_src_region_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile dma0_src_region_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_dst_region_r - None
+struct dma0_dst_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma0_dst_region_r() : word0(0) {}
+    CONSTEXPR dma0_dst_region_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma0_dst_region_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR dma0_dst_region_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile dma0_dst_region_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_size0_r - None
+struct dma0_size0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma0_size0_r() : word0(0) {}
+    CONSTEXPR dma0_size0_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma0_size0_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR dma0_size0_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile dma0_size0_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_size1_r - None
+struct dma0_size1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR dma0_size1_r() : word0(0) {}
+    CONSTEXPR dma0_size1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    dma0_size1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR dma0_size1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile dma0_size1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_broadcast_r - None
+struct ifm2_broadcast_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_broadcast_r() : word0(0) {}
+    CONSTEXPR ifm2_broadcast_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_broadcast_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_broadcast_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_broadcast_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_scalar_r - None
+struct ifm2_scalar_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_scalar_r() : word0(0) {}
+    CONSTEXPR ifm2_scalar_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_scalar_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_scalar_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_scalar_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_precision_r - None
+struct ifm2_precision_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_precision_r() : word0(0) {}
+    CONSTEXPR ifm2_precision_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_precision_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_precision_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_precision_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_zero_point_r - None
+struct ifm2_zero_point_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_zero_point_r() : word0(0) {}
+    CONSTEXPR ifm2_zero_point_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_zero_point_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_zero_point_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_zero_point_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_width0_m1_r - None
+struct ifm2_width0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_width0_m1_r() : word0(0) {}
+    CONSTEXPR ifm2_width0_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_width0_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_width0_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_width0_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_height0_m1_r - None
+struct ifm2_height0_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_height0_m1_r() : word0(0) {}
+    CONSTEXPR ifm2_height0_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_height0_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_height0_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_height0_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_height1_m1_r - None
+struct ifm2_height1_m1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_height1_m1_r() : word0(0) {}
+    CONSTEXPR ifm2_height1_m1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_height1_m1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_height1_m1_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_height1_m1_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_ib_start_r - None
+struct ifm2_ib_start_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_ib_start_r() : word0(0) {}
+    CONSTEXPR ifm2_ib_start_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_ib_start_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_ib_start_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_ib_start_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm2_region_r - None
+struct ifm2_region_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ifm2_region_r() : word0(0) {}
+    CONSTEXPR ifm2_region_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ifm2_region_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ifm2_region_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ifm2_region_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ifm_base0_r - None
+struct ifm_base0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm_base0_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm_base0_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_base0_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_base1_r - None
+struct ifm_base1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm_base1_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm_base1_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_base1_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_base2_r - None
+struct ifm_base2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm_base2_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm_base2_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_base2_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_base3_r - None
+struct ifm_base3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm_base3_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm_base3_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_base3_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_stride_x_r - None
+struct ifm_stride_x_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm_stride_x_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm_stride_x_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_stride_x_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_stride_y_r - None
+struct ifm_stride_y_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm_stride_y_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm_stride_y_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_stride_y_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm_stride_c_r - None
+struct ifm_stride_c_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm_stride_c_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm_stride_c_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm_stride_c_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_base0_r - None
+struct ofm_base0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ofm_base0_r() : word0(0), word1(0) {}
+    CONSTEXPR ofm_base0_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_base0_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_base1_r - None
+struct ofm_base1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ofm_base1_r() : word0(0), word1(0) {}
+    CONSTEXPR ofm_base1_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_base1_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_base2_r - None
+struct ofm_base2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ofm_base2_r() : word0(0), word1(0) {}
+    CONSTEXPR ofm_base2_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_base2_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_base3_r - None
+struct ofm_base3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ofm_base3_r() : word0(0), word1(0) {}
+    CONSTEXPR ofm_base3_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_base3_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_stride_x_r - None
+struct ofm_stride_x_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ofm_stride_x_r() : word0(0), word1(0) {}
+    CONSTEXPR ofm_stride_x_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_stride_x_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_stride_y_r - None
+struct ofm_stride_y_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ofm_stride_y_r() : word0(0), word1(0) {}
+    CONSTEXPR ofm_stride_y_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_stride_y_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_stride_c_r - None
+struct ofm_stride_c_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ofm_stride_c_r() : word0(0), word1(0) {}
+    CONSTEXPR ofm_stride_c_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ofm_stride_c_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// weight_base_r - None
+struct weight_base_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR weight_base_r() : word0(0), word1(0) {}
+    CONSTEXPR weight_base_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    weight_base_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// weight_length_r - None
+struct weight_length_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR weight_length_r() : word0(0), word1(0) {}
+    CONSTEXPR weight_length_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    weight_length_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// scale_base_r - None
+struct scale_base_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR scale_base_r() : word0(0), word1(0) {}
+    CONSTEXPR scale_base_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    scale_base_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// scale_length_r - None
+struct scale_length_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR scale_length_r() : word0(0), word1(0) {}
+    CONSTEXPR scale_length_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    scale_length_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ofm_scale_r - None
+struct ofm_scale_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_scale_r() : word0(0) {}
+    CONSTEXPR ofm_scale_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_scale_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_scale_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_scale_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// ofm_scale_shift_r - None
+struct ofm_scale_shift_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR ofm_scale_shift_r() : word0(0) {}
+    CONSTEXPR ofm_scale_shift_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    ofm_scale_shift_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR ofm_scale_shift_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile ofm_scale_shift_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// opa_scale_r - None
+struct opa_scale_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR opa_scale_r() : word0(0) {}
+    CONSTEXPR opa_scale_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    opa_scale_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR opa_scale_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile opa_scale_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// opa_scale_shift_r - None
+struct opa_scale_shift_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR opa_scale_shift_r() : word0(0) {}
+    CONSTEXPR opa_scale_shift_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    opa_scale_shift_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR opa_scale_shift_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile opa_scale_shift_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// opb_scale_r - None
+struct opb_scale_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR opb_scale_r() : word0(0) {}
+    CONSTEXPR opb_scale_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    opb_scale_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR opb_scale_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile opb_scale_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// dma0_src_r - None
+struct dma0_src_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma0_src_r() : word0(0), word1(0) {}
+    CONSTEXPR dma0_src_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_src_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_dst_r - None
+struct dma0_dst_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma0_dst_r() : word0(0), word1(0) {}
+    CONSTEXPR dma0_dst_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_dst_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_len_r - None
+struct dma0_len_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma0_len_r() : word0(0), word1(0) {}
+    CONSTEXPR dma0_len_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_len_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_skip0_r - None
+struct dma0_skip0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma0_skip0_r() : word0(0), word1(0) {}
+    CONSTEXPR dma0_skip0_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_skip0_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// dma0_skip1_r - None
+struct dma0_skip1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR dma0_skip1_r() : word0(0), word1(0) {}
+    CONSTEXPR dma0_skip1_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    dma0_skip1_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_base0_r - None
+struct ifm2_base0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm2_base0_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm2_base0_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_base0_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_base1_r - None
+struct ifm2_base1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm2_base1_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm2_base1_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_base1_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_base2_r - None
+struct ifm2_base2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm2_base2_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm2_base2_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_base2_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_base3_r - None
+struct ifm2_base3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm2_base3_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm2_base3_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_base3_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_stride_x_r - None
+struct ifm2_stride_x_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm2_stride_x_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm2_stride_x_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_stride_x_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_stride_y_r - None
+struct ifm2_stride_y_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm2_stride_y_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm2_stride_y_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_stride_y_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// ifm2_stride_c_r - None
+struct ifm2_stride_c_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR ifm2_stride_c_r() : word0(0), word1(0) {}
+    CONSTEXPR ifm2_stride_c_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    ifm2_stride_c_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// weight1_base_r - None
+struct weight1_base_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR weight1_base_r() : word0(0), word1(0) {}
+    CONSTEXPR weight1_base_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    weight1_base_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// weight1_length_r - None
+struct weight1_length_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR weight1_length_r() : word0(0), word1(0) {}
+    CONSTEXPR weight1_length_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    weight1_length_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// scale1_base_r - None
+struct scale1_base_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR scale1_base_r() : word0(0), word1(0) {}
+    CONSTEXPR scale1_base_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    scale1_base_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// scale1_length_r - None
+struct scale1_length_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value_LO : 32; // 64-bit register value - LSB
+            uint32_t value_HI : 32; // 64-bit register value - MSB
+        };
+        uint32_t word[2];
+    };
+#else
+  private:
+    uint32_t word0;
+    uint32_t word1;
+
+  public:
+    CONSTEXPR scale1_length_r() : word0(0), word1(0) {}
+    CONSTEXPR scale1_length_r(uint64_t init) :
+        word0(static_cast<uint32_t>((init)&std::numeric_limits<uint64_t>::max())),
+        word1(static_cast<uint32_t>((init >> 32) & std::numeric_limits<uint64_t>::max()))
+    {
+    }
+    CONSTEXPR void operator=(uint64_t value)
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    void operator=(uint64_t value) volatile
+    {
+        word0 = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+        word1 = static_cast<uint32_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+    }
+    CONSTEXPR operator uint64_t()
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    operator uint64_t() volatile
+    {
+        return (static_cast<uint64_t>(word1) << 32) | word0;
+    }
+    scale1_length_r copy() volatile
+    {
+        return *this;
+    }
+#endif
+};
+
+// revision_r - Internal FPGA build revision: first 32-bits of the Ultan Git hash used for the build
+struct revision_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t value : 32; // 32-bit register value
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR revision_r() : word0(0) {}
+    CONSTEXPR revision_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    revision_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_value() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_value() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR revision_r &set_value(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile revision_r &set_value(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid4_r - Peripheral ID byte 4 (Arm=code 4)
+struct pid4_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID4 : 32; // Byte 4 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid4_r() : word0(4) {}
+    CONSTEXPR pid4_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid4_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID4() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID4() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid4_r &set_PID4(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid4_r &set_PID4(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid5_r - Peripheral ID byte 5 (reserved)
+struct pid5_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID5 : 32; // Byte 5 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid5_r() : word0(0) {}
+    CONSTEXPR pid5_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid5_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID5() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID5() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid5_r &set_PID5(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid5_r &set_PID5(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid6_r - Peripheral ID byte 6 (reserved)
+struct pid6_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID6 : 32; // Byte 6 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid6_r() : word0(0) {}
+    CONSTEXPR pid6_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid6_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID6() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID6() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid6_r &set_PID6(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid6_r &set_PID6(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid7_r - Peripheral ID byte 7 (reserved)
+struct pid7_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID7 : 32; // Byte 7 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid7_r() : word0(0) {}
+    CONSTEXPR pid7_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid7_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID7() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID7() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid7_r &set_PID7(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid7_r &set_PID7(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid0_r - Peripheral ID byte 0. This is bits[7:0] of the part number
+struct pid0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID0 : 32; // Byte 0 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid0_r() : word0(129) {}
+    CONSTEXPR pid0_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid0_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID0() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID0() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid0_r &set_PID0(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid0_r &set_PID0(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid1_r - Peripheral ID byte 1. This is bits[11:8] of the part number in bits[3:0], and bits[3:0] of the Arm ID in
+// bits[7:4]
+struct pid1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID1 : 32; // Byte 1 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid1_r() : word0(181) {}
+    CONSTEXPR pid1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID1() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID1() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid1_r &set_PID1(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid1_r &set_PID1(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid2_r - Peripheral ID byte 2. This is bits[6:4] of the Arm ID in bits[2:0], and bit 3 indicates format B
+struct pid2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID2 : 32; // Byte 2 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid2_r() : word0(11) {}
+    CONSTEXPR pid2_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid2_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID2() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID2() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid2_r &set_PID2(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid2_r &set_PID2(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// pid3_r - Peripheral ID byte 3
+struct pid3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t PID3 : 32; // Byte 1 of Peripheral ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR pid3_r() : word0(0) {}
+    CONSTEXPR pid3_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    pid3_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_PID3() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_PID3() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR pid3_r &set_PID3(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile pid3_r &set_PID3(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// cid0_r - Component ID byte 0
+struct cid0_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CID0 : 32; // Byte 0 of Component ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR cid0_r() : word0(13) {}
+    CONSTEXPR cid0_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    cid0_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CID0() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_CID0() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR cid0_r &set_CID0(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile cid0_r &set_CID0(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// cid1_r - Component ID byte 1
+struct cid1_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CID1 : 32; // Byte 1 of Component ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR cid1_r() : word0(240) {}
+    CONSTEXPR cid1_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    cid1_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CID1() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_CID1() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR cid1_r &set_CID1(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile cid1_r &set_CID1(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// cid2_r - Component ID byte 2
+struct cid2_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CID2 : 32; // Byte 2 of Component ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR cid2_r() : word0(5) {}
+    CONSTEXPR cid2_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    cid2_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CID2() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_CID2() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR cid2_r &set_CID2(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile cid2_r &set_CID2(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+// cid3_r - Component ID byte 3
+struct cid3_r
+{
+#ifndef __cplusplus
+    union
+    {
+        struct
+        {
+            uint32_t CID3 : 32; // Byte 3 of Component ID (Lower 8 bits valid)
+        };
+        uint32_t word;
+    };
+#else
+  private:
+    uint32_t word0;
+
+  public:
+    CONSTEXPR cid3_r() : word0(177) {}
+    CONSTEXPR cid3_r(uint32_t init) : word0(init) {}
+    CONSTEXPR void operator=(uint32_t value)
+    {
+        word0 = value;
+    }
+    void operator=(uint32_t value) volatile
+    {
+        word0 = value;
+    }
+    CONSTEXPR operator uint32_t()
+    {
+        return word0;
+    }
+    operator uint32_t() volatile
+    {
+        return word0;
+    }
+    cid3_r copy() volatile
+    {
+        return *this;
+    }
+    CONSTEXPR uint32_t get_CID3() const
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    uint32_t get_CID3() const volatile
+    {
+        uint32_t value = word0;
+        return value;
+    }
+    CONSTEXPR cid3_r &set_CID3(uint32_t value)
+    {
+        word0 = value;
+        return *this;
+    }
+    volatile cid3_r &set_CID3(uint32_t value) volatile
+    {
+        word0 = value;
+        return *this;
+    }
+#endif
+};
+
+struct NPU_REG
+{
+    STRUCT id_r ID;           // 0x0000
+    STRUCT status_r STATUS;   // 0x0004
+    STRUCT cmd_r CMD;         // 0x0008
+    STRUCT reset_r RESET;     // 0x000C
+    STRUCT qbase_r QBASE;     // 0x0010
+    STRUCT qread_r QREAD;     // 0x0018
+    STRUCT qconfig_r QCONFIG; // 0x001C
+    STRUCT qsize_r QSIZE;     // 0x0020
+    STRUCT prot_r PROT;       // 0x0024
+    STRUCT config_r CONFIG;   // 0x0028
+    STRUCT lock_r LOCK;       // 0x002C
+    uint32_t unused0[3];
+    STRUCT regioncfg_r REGIONCFG;   // 0x003C
+    STRUCT axi_limit0_r AXI_LIMIT0; // 0x0040
+    STRUCT axi_limit1_r AXI_LIMIT1; // 0x0044
+    STRUCT axi_limit2_r AXI_LIMIT2; // 0x0048
+    STRUCT axi_limit3_r AXI_LIMIT3; // 0x004C
+    uint32_t unused1[12];
+    STRUCT basep_r BASEP[8]; // 0x0080
+    uint32_t unused2[16];
+    STRUCT wd_status_r WD_STATUS;   // 0x0100
+    STRUCT mac_status_r MAC_STATUS; // 0x0104
+    STRUCT ao_status_r AO_STATUS;   // 0x0108
+    uint32_t unused3[1];
+    STRUCT dma_status0_r DMA_STATUS0; // 0x0110
+    STRUCT dma_status1_r DMA_STATUS1; // 0x0114
+    uint32_t unused4[10];
+    STRUCT clkforce_r CLKFORCE;           // 0x0140
+    STRUCT debug_address_r DEBUG_ADDRESS; // 0x0144
+    STRUCT debug_misc_r DEBUG_MISC;       // 0x0148
+    STRUCT debugcore_r DEBUGCORE;         // 0x014C
+    STRUCT debug_block_r DEBUG_BLOCK;     // 0x0150
+    uint32_t unused5[11];
+    STRUCT pmcr_r PMCR;             // 0x0180
+    STRUCT pmcntenset_r PMCNTENSET; // 0x0184
+    STRUCT pmcntenclr_r PMCNTENCLR; // 0x0188
+    STRUCT pmovsset_r PMOVSSET;     // 0x018C
+    STRUCT pmovsclr_r PMOVSCLR;     // 0x0190
+    STRUCT pmintset_r PMINTSET;     // 0x0194
+    STRUCT pmintclr_r PMINTCLR;     // 0x0198
+    uint32_t unused6[1];
+    STRUCT pmccntr_r PMCCNTR;         // 0x01A0
+    STRUCT pmccntr_cfg_r PMCCNTR_CFG; // 0x01A8
+    STRUCT pmcaxi_chan_r PMCAXI_CHAN; // 0x01AC
+    uint32_t unused7[20];
+    STRUCT kernel_x_r KERNEL_X;                     // 0x0200
+    STRUCT kernel_y_r KERNEL_Y;                     // 0x0204
+    STRUCT kernel_w_m1_r KERNEL_W_M1;               // 0x0208
+    STRUCT kernel_h_m1_r KERNEL_H_M1;               // 0x020C
+    STRUCT ofm_cblk_width_m1_r OFM_CBLK_WIDTH_M1;   // 0x0210
+    STRUCT ofm_cblk_height_m1_r OFM_CBLK_HEIGHT_M1; // 0x0214
+    STRUCT ofm_cblk_depth_m1_r OFM_CBLK_DEPTH_M1;   // 0x0218
+    STRUCT ifm_cblk_depth_m1_r IFM_CBLK_DEPTH_M1;   // 0x021C
+    STRUCT ofm_x_r OFM_X;                           // 0x0220
+    STRUCT ofm_y_r OFM_Y;                           // 0x0224
+    STRUCT ofm_z_r OFM_Z;                           // 0x0228
+    STRUCT ifm_z_r IFM_Z;                           // 0x022C
+    STRUCT pad_top_r PAD_TOP;                       // 0x0230
+    STRUCT pad_left_r PAD_LEFT;                     // 0x0234
+    STRUCT ifm_cblk_width_r IFM_CBLK_WIDTH;         // 0x0238
+    STRUCT ifm_cblk_height_r IFM_CBLK_HEIGHT;       // 0x023C
+    STRUCT dma_ifm_src_r DMA_IFM_SRC;               // 0x0240
+    STRUCT dma_ifm_dst_r DMA_IFM_DST;               // 0x0248
+    STRUCT dma_ofm_src_r DMA_OFM_SRC;               // 0x024C
+    STRUCT dma_ofm_dst_r DMA_OFM_DST;               // 0x0250
+    STRUCT dma_weight_src_r DMA_WEIGHT_SRC;         // 0x0258
+    STRUCT dma_cmd_src_r DMA_CMD_SRC;               // 0x0260
+    STRUCT dma_cmd_size_r DMA_CMD_SIZE;             // 0x0268
+    STRUCT dma_m2m_src_r DMA_M2M_SRC;               // 0x026C
+    STRUCT dma_m2m_dst_r DMA_M2M_DST;               // 0x0274
+    STRUCT current_qread_r CURRENT_QREAD;           // 0x027C
+    STRUCT dma_scale_src_r DMA_SCALE_SRC;           // 0x0280
+    uint32_t unused8[11];
+    STRUCT current_block_r CURRENT_BLOCK; // 0x02B4
+    STRUCT current_op_r CURRENT_OP;       // 0x02B8
+    STRUCT current_cmd_r CURRENT_CMD;     // 0x02BC
+    uint32_t unused9[16];
+    STRUCT pmevcntr_r PMEVCNTR[4]; // 0x0300
+    uint32_t unused10[28];
+    STRUCT pmevtyper_r PMEVTYPER[4]; // 0x0380
+    uint32_t unused11[28];
+    STRUCT shared_buffer_r SHARED_BUFFER[256]; // 0x0400
+    STRUCT ifm_pad_top_r IFM_PAD_TOP;          // 0x0800
+    STRUCT ifm_pad_left_r IFM_PAD_LEFT;        // 0x0804
+    STRUCT ifm_pad_right_r IFM_PAD_RIGHT;      // 0x0808
+    STRUCT ifm_pad_bottom_r IFM_PAD_BOTTOM;    // 0x080C
+    STRUCT ifm_depth_m1_r IFM_DEPTH_M1;        // 0x0810
+    STRUCT ifm_precision_r IFM_PRECISION;      // 0x0814
+    uint32_t unused12[1];
+    STRUCT ifm_upscale_r IFM_UPSCALE; // 0x081C
+    uint32_t unused13[1];
+    STRUCT ifm_zero_point_r IFM_ZERO_POINT; // 0x0824
+    STRUCT ifm_width0_m1_r IFM_WIDTH0_M1;   // 0x0828
+    STRUCT ifm_height0_m1_r IFM_HEIGHT0_M1; // 0x082C
+    STRUCT ifm_height1_m1_r IFM_HEIGHT1_M1; // 0x0830
+    STRUCT ifm_ib_end_r IFM_IB_END;         // 0x0834
+    uint32_t unused14[1];
+    STRUCT ifm_region_r IFM_REGION; // 0x083C
+    uint32_t unused15[1];
+    STRUCT ofm_width_m1_r OFM_WIDTH_M1;           // 0x0844
+    STRUCT ofm_height_m1_r OFM_HEIGHT_M1;         // 0x0848
+    STRUCT ofm_depth_m1_r OFM_DEPTH_M1;           // 0x084C
+    STRUCT ofm_precision_r OFM_PRECISION;         // 0x0850
+    STRUCT ofm_blk_width_m1_r OFM_BLK_WIDTH_M1;   // 0x0854
+    STRUCT ofm_blk_height_m1_r OFM_BLK_HEIGHT_M1; // 0x0858
+    STRUCT ofm_blk_depth_m1_r OFM_BLK_DEPTH_M1;   // 0x085C
+    STRUCT ofm_zero_point_r OFM_ZERO_POINT;       // 0x0860
+    uint32_t unused16[1];
+    STRUCT ofm_width0_m1_r OFM_WIDTH0_M1;   // 0x0868
+    STRUCT ofm_height0_m1_r OFM_HEIGHT0_M1; // 0x086C
+    STRUCT ofm_height1_m1_r OFM_HEIGHT1_M1; // 0x0870
+    uint32_t unused17[2];
+    STRUCT ofm_region_r OFM_REGION;             // 0x087C
+    STRUCT kernel_width_m1_r KERNEL_WIDTH_M1;   // 0x0880
+    STRUCT kernel_height_m1_r KERNEL_HEIGHT_M1; // 0x0884
+    STRUCT kernel_stride_r KERNEL_STRIDE;       // 0x0888
+    STRUCT parallel_mode_r PARALLEL_MODE;       // 0x088C
+    STRUCT acc_format_r ACC_FORMAT;             // 0x0890
+    STRUCT activation_r ACTIVATION;             // 0x0894
+    STRUCT activation_min_r ACTIVATION_MIN;     // 0x0898
+    STRUCT activation_max_r ACTIVATION_MAX;     // 0x089C
+    STRUCT weight_region_r WEIGHT_REGION;       // 0x08A0
+    STRUCT scale_region_r SCALE_REGION;         // 0x08A4
+    uint32_t unused18[3];
+    STRUCT ab_start_r AB_START; // 0x08B4
+    uint32_t unused19[1];
+    STRUCT blockdep_r BLOCKDEP;               // 0x08BC
+    STRUCT dma0_src_region_r DMA0_SRC_REGION; // 0x08C0
+    STRUCT dma0_dst_region_r DMA0_DST_REGION; // 0x08C4
+    STRUCT dma0_size0_r DMA0_SIZE0;           // 0x08C8
+    STRUCT dma0_size1_r DMA0_SIZE1;           // 0x08CC
+    uint32_t unused20[12];
+    STRUCT ifm2_broadcast_r IFM2_BROADCAST; // 0x0900
+    STRUCT ifm2_scalar_r IFM2_SCALAR;       // 0x0904
+    uint32_t unused21[3];
+    STRUCT ifm2_precision_r IFM2_PRECISION; // 0x0914
+    uint32_t unused22[3];
+    STRUCT ifm2_zero_point_r IFM2_ZERO_POINT; // 0x0924
+    STRUCT ifm2_width0_m1_r IFM2_WIDTH0_M1;   // 0x0928
+    STRUCT ifm2_height0_m1_r IFM2_HEIGHT0_M1; // 0x092C
+    STRUCT ifm2_height1_m1_r IFM2_HEIGHT1_M1; // 0x0930
+    STRUCT ifm2_ib_start_r IFM2_IB_START;     // 0x0934
+    uint32_t unused23[1];
+    STRUCT ifm2_region_r IFM2_REGION; // 0x093C
+    uint32_t unused24[48];
+    STRUCT ifm_base0_r IFM_BASE0;       // 0x0A00
+    STRUCT ifm_base1_r IFM_BASE1;       // 0x0A08
+    STRUCT ifm_base2_r IFM_BASE2;       // 0x0A10
+    STRUCT ifm_base3_r IFM_BASE3;       // 0x0A18
+    STRUCT ifm_stride_x_r IFM_STRIDE_X; // 0x0A20
+    STRUCT ifm_stride_y_r IFM_STRIDE_Y; // 0x0A28
+    STRUCT ifm_stride_c_r IFM_STRIDE_C; // 0x0A30
+    uint32_t unused25[2];
+    STRUCT ofm_base0_r OFM_BASE0;       // 0x0A40
+    STRUCT ofm_base1_r OFM_BASE1;       // 0x0A48
+    STRUCT ofm_base2_r OFM_BASE2;       // 0x0A50
+    STRUCT ofm_base3_r OFM_BASE3;       // 0x0A58
+    STRUCT ofm_stride_x_r OFM_STRIDE_X; // 0x0A60
+    STRUCT ofm_stride_y_r OFM_STRIDE_Y; // 0x0A68
+    STRUCT ofm_stride_c_r OFM_STRIDE_C; // 0x0A70
+    uint32_t unused26[2];
+    STRUCT weight_base_r WEIGHT_BASE;         // 0x0A80
+    STRUCT weight_length_r WEIGHT_LENGTH;     // 0x0A88
+    STRUCT scale_base_r SCALE_BASE;           // 0x0A90
+    STRUCT scale_length_r SCALE_LENGTH;       // 0x0A98
+    STRUCT ofm_scale_r OFM_SCALE;             // 0x0AA0
+    STRUCT ofm_scale_shift_r OFM_SCALE_SHIFT; // 0x0AA4
+    STRUCT opa_scale_r OPA_SCALE;             // 0x0AA8
+    STRUCT opa_scale_shift_r OPA_SCALE_SHIFT; // 0x0AAC
+    STRUCT opb_scale_r OPB_SCALE;             // 0x0AB0
+    uint32_t unused27[3];
+    STRUCT dma0_src_r DMA0_SRC;     // 0x0AC0
+    STRUCT dma0_dst_r DMA0_DST;     // 0x0AC8
+    STRUCT dma0_len_r DMA0_LEN;     // 0x0AD0
+    STRUCT dma0_skip0_r DMA0_SKIP0; // 0x0AD8
+    STRUCT dma0_skip1_r DMA0_SKIP1; // 0x0AE0
+    uint32_t unused28[6];
+    STRUCT ifm2_base0_r IFM2_BASE0;       // 0x0B00
+    STRUCT ifm2_base1_r IFM2_BASE1;       // 0x0B08
+    STRUCT ifm2_base2_r IFM2_BASE2;       // 0x0B10
+    STRUCT ifm2_base3_r IFM2_BASE3;       // 0x0B18
+    STRUCT ifm2_stride_x_r IFM2_STRIDE_X; // 0x0B20
+    STRUCT ifm2_stride_y_r IFM2_STRIDE_Y; // 0x0B28
+    STRUCT ifm2_stride_c_r IFM2_STRIDE_C; // 0x0B30
+    uint32_t unused29[2];
+    STRUCT weight1_base_r WEIGHT1_BASE;     // 0x0B40
+    STRUCT weight1_length_r WEIGHT1_LENGTH; // 0x0B48
+    STRUCT scale1_base_r SCALE1_BASE;       // 0x0B50
+    STRUCT scale1_length_r SCALE1_LENGTH;   // 0x0B58
+    uint32_t unused30[280];
+    STRUCT revision_r REVISION; // 0x0FC0
+    uint32_t unused31[3];
+    STRUCT pid4_r PID4; // 0x0FD0
+    STRUCT pid5_r PID5; // 0x0FD4
+    STRUCT pid6_r PID6; // 0x0FD8
+    STRUCT pid7_r PID7; // 0x0FDC
+    STRUCT pid0_r PID0; // 0x0FE0
+    STRUCT pid1_r PID1; // 0x0FE4
+    STRUCT pid2_r PID2; // 0x0FE8
+    STRUCT pid3_r PID3; // 0x0FEC
+    STRUCT cid0_r CID0; // 0x0FF0
+    STRUCT cid1_r CID1; // 0x0FF4
+    STRUCT cid2_r CID2; // 0x0FF8
+    STRUCT cid3_r CID3; // 0x0FFC
+
+#ifdef __cplusplus
+    enum class access_type_t : uint8_t
+    {
+        RW,
+        RO,
+        WO
+    };
+    NPU_REG()
+    {
+        reset();
+    }
+    void reset()
+    {
+        ID         = 268853249;
+        STATUS     = 8;
+        CMD        = 12;
+        RESET      = 0;
+        QBASE      = 0;
+        QREAD      = 0;
+        QCONFIG    = 0;
+        QSIZE      = 0;
+        PROT       = 0;
+        CONFIG     = 268435456;
+        LOCK       = 0;
+        REGIONCFG  = 0;
+        AXI_LIMIT0 = 0;
+        AXI_LIMIT1 = 0;
+        AXI_LIMIT2 = 0;
+        AXI_LIMIT3 = 0;
+        for (size_t i = 0; i < (sizeof(BASEP) / sizeof(BASEP[0])); ++i)
+            BASEP[i] = 0;
+        WD_STATUS          = 0;
+        MAC_STATUS         = 0;
+        AO_STATUS          = 0;
+        DMA_STATUS0        = 0;
+        DMA_STATUS1        = 0;
+        CLKFORCE           = 0;
+        DEBUG_ADDRESS      = 0;
+        DEBUG_MISC         = 0;
+        DEBUGCORE          = 0;
+        DEBUG_BLOCK        = 0;
+        PMCR               = 8192;
+        PMCNTENSET         = 0;
+        PMCNTENCLR         = 0;
+        PMOVSSET           = 0;
+        PMOVSCLR           = 0;
+        PMINTSET           = 0;
+        PMINTCLR           = 0;
+        PMCCNTR            = 0;
+        PMCCNTR_CFG        = 0;
+        PMCAXI_CHAN        = 0;
+        KERNEL_X           = 0;
+        KERNEL_Y           = 0;
+        KERNEL_W_M1        = 0;
+        KERNEL_H_M1        = 0;
+        OFM_CBLK_WIDTH_M1  = 0;
+        OFM_CBLK_HEIGHT_M1 = 0;
+        OFM_CBLK_DEPTH_M1  = 0;
+        IFM_CBLK_DEPTH_M1  = 0;
+        OFM_X              = 0;
+        OFM_Y              = 0;
+        OFM_Z              = 0;
+        IFM_Z              = 0;
+        PAD_TOP            = 0;
+        PAD_LEFT           = 0;
+        IFM_CBLK_WIDTH     = 0;
+        IFM_CBLK_HEIGHT    = 0;
+        DMA_IFM_SRC        = 0;
+        DMA_IFM_DST        = 0;
+        DMA_OFM_SRC        = 0;
+        DMA_OFM_DST        = 0;
+        DMA_WEIGHT_SRC     = 0;
+        DMA_CMD_SRC        = 0;
+        DMA_CMD_SIZE       = 0;
+        DMA_M2M_SRC        = 0;
+        DMA_M2M_DST        = 0;
+        CURRENT_QREAD      = 0;
+        DMA_SCALE_SRC      = 0;
+        CURRENT_BLOCK      = 0;
+        CURRENT_OP         = 0;
+        CURRENT_CMD        = 0;
+        for (size_t i = 0; i < (sizeof(PMEVCNTR) / sizeof(PMEVCNTR[0])); ++i)
+            PMEVCNTR[i] = 0;
+        for (size_t i = 0; i < (sizeof(PMEVTYPER) / sizeof(PMEVTYPER[0])); ++i)
+            PMEVTYPER[i] = 0;
+        for (size_t i = 0; i < (sizeof(SHARED_BUFFER) / sizeof(SHARED_BUFFER[0])); ++i)
+            SHARED_BUFFER[i] = 0;
+        IFM_PAD_TOP       = 0;
+        IFM_PAD_LEFT      = 0;
+        IFM_PAD_RIGHT     = 0;
+        IFM_PAD_BOTTOM    = 0;
+        IFM_DEPTH_M1      = 0;
+        IFM_PRECISION     = 0;
+        IFM_UPSCALE       = 0;
+        IFM_ZERO_POINT    = 0;
+        IFM_WIDTH0_M1     = 0;
+        IFM_HEIGHT0_M1    = 0;
+        IFM_HEIGHT1_M1    = 0;
+        IFM_IB_END        = 0;
+        IFM_REGION        = 0;
+        OFM_WIDTH_M1      = 0;
+        OFM_HEIGHT_M1     = 0;
+        OFM_DEPTH_M1      = 0;
+        OFM_PRECISION     = 0;
+        OFM_BLK_WIDTH_M1  = 0;
+        OFM_BLK_HEIGHT_M1 = 0;
+        OFM_BLK_DEPTH_M1  = 0;
+        OFM_ZERO_POINT    = 0;
+        OFM_WIDTH0_M1     = 0;
+        OFM_HEIGHT0_M1    = 0;
+        OFM_HEIGHT1_M1    = 0;
+        OFM_REGION        = 0;
+        KERNEL_WIDTH_M1   = 0;
+        KERNEL_HEIGHT_M1  = 0;
+        KERNEL_STRIDE     = 0;
+        PARALLEL_MODE     = 0;
+        ACC_FORMAT        = 0;
+        ACTIVATION        = 0;
+        ACTIVATION_MIN    = 0;
+        ACTIVATION_MAX    = 0;
+        WEIGHT_REGION     = 0;
+        SCALE_REGION      = 0;
+        AB_START          = 0;
+        BLOCKDEP          = 0;
+        DMA0_SRC_REGION   = 0;
+        DMA0_DST_REGION   = 0;
+        DMA0_SIZE0        = 0;
+        DMA0_SIZE1        = 0;
+        IFM2_BROADCAST    = 0;
+        IFM2_SCALAR       = 0;
+        IFM2_PRECISION    = 0;
+        IFM2_ZERO_POINT   = 0;
+        IFM2_WIDTH0_M1    = 0;
+        IFM2_HEIGHT0_M1   = 0;
+        IFM2_HEIGHT1_M1   = 0;
+        IFM2_IB_START     = 0;
+        IFM2_REGION       = 0;
+        IFM_BASE0         = 0;
+        IFM_BASE1         = 0;
+        IFM_BASE2         = 0;
+        IFM_BASE3         = 0;
+        IFM_STRIDE_X      = 0;
+        IFM_STRIDE_Y      = 0;
+        IFM_STRIDE_C      = 0;
+        OFM_BASE0         = 0;
+        OFM_BASE1         = 0;
+        OFM_BASE2         = 0;
+        OFM_BASE3         = 0;
+        OFM_STRIDE_X      = 0;
+        OFM_STRIDE_Y      = 0;
+        OFM_STRIDE_C      = 0;
+        WEIGHT_BASE       = 0;
+        WEIGHT_LENGTH     = 0;
+        SCALE_BASE        = 0;
+        SCALE_LENGTH      = 0;
+        OFM_SCALE         = 0;
+        OFM_SCALE_SHIFT   = 0;
+        OPA_SCALE         = 0;
+        OPA_SCALE_SHIFT   = 0;
+        OPB_SCALE         = 0;
+        DMA0_SRC          = 0;
+        DMA0_DST          = 0;
+        DMA0_LEN          = 0;
+        DMA0_SKIP0        = 0;
+        DMA0_SKIP1        = 0;
+        IFM2_BASE0        = 0;
+        IFM2_BASE1        = 0;
+        IFM2_BASE2        = 0;
+        IFM2_BASE3        = 0;
+        IFM2_STRIDE_X     = 0;
+        IFM2_STRIDE_Y     = 0;
+        IFM2_STRIDE_C     = 0;
+        WEIGHT1_BASE      = 0;
+        WEIGHT1_LENGTH    = 0;
+        SCALE1_BASE       = 0;
+        SCALE1_LENGTH     = 0;
+        REVISION          = 0;
+        PID4              = 4;
+        PID5              = 0;
+        PID6              = 0;
+        PID7              = 0;
+        PID0              = 129;
+        PID1              = 181;
+        PID2              = 11;
+        PID3              = 0;
+        CID0              = 13;
+        CID1              = 240;
+        CID2              = 5;
+        CID3              = 177;
+    }
+    uint32_t &operator[](const int addr_offset)
+    {
+        return reinterpret_cast<uint32_t *>(this)[addr_offset / 4];
+    }
+    access_type_t get_access_type(uint32_t offset)
+    {
+        switch (offset)
+        {
+        case 0:
+            return access_type_t::RO;
+        case 4:
+            return access_type_t::RO;
+        case 8:
+            return access_type_t::RW;
+        case 12:
+            return access_type_t::RW;
+        case 16:
+            return access_type_t::RW;
+        case 24:
+            return access_type_t::RO;
+        case 28:
+            return access_type_t::RW;
+        case 32:
+            return access_type_t::RW;
+        case 36:
+            return access_type_t::RO;
+        case 40:
+            return access_type_t::RO;
+        case 44:
+            return access_type_t::RW;
+        case 60:
+            return access_type_t::RW;
+        case 64:
+            return access_type_t::RW;
+        case 68:
+            return access_type_t::RW;
+        case 72:
+            return access_type_t::RW;
+        case 76:
+            return access_type_t::RW;
+        case 128:
+            return access_type_t::RW;
+        case 136:
+            return access_type_t::RW;
+        case 144:
+            return access_type_t::RW;
+        case 152:
+            return access_type_t::RW;
+        case 160:
+            return access_type_t::RW;
+        case 168:
+            return access_type_t::RW;
+        case 176:
+            return access_type_t::RW;
+        case 184:
+            return access_type_t::RW;
+        case 256:
+            return access_type_t::RO;
+        case 260:
+            return access_type_t::RO;
+        case 264:
+            return access_type_t::RO;
+        case 272:
+            return access_type_t::RO;
+        case 276:
+            return access_type_t::RO;
+        case 320:
+            return access_type_t::RW;
+        case 324:
+            return access_type_t::RW;
+        case 328:
+            return access_type_t::RW;
+        case 332:
+            return access_type_t::RW;
+        case 336:
+            return access_type_t::RW;
+        case 384:
+            return access_type_t::RW;
+        case 388:
+            return access_type_t::RW;
+        case 392:
+            return access_type_t::RW;
+        case 396:
+            return access_type_t::RW;
+        case 400:
+            return access_type_t::RW;
+        case 404:
+            return access_type_t::RW;
+        case 408:
+            return access_type_t::RW;
+        case 416:
+            return access_type_t::RW;
+        case 424:
+            return access_type_t::RW;
+        case 428:
+            return access_type_t::RW;
+        case 512:
+            return access_type_t::RO;
+        case 516:
+            return access_type_t::RO;
+        case 520:
+            return access_type_t::RO;
+        case 524:
+            return access_type_t::RO;
+        case 528:
+            return access_type_t::RO;
+        case 532:
+            return access_type_t::RO;
+        case 536:
+            return access_type_t::RO;
+        case 540:
+            return access_type_t::RO;
+        case 544:
+            return access_type_t::RO;
+        case 548:
+            return access_type_t::RO;
+        case 552:
+            return access_type_t::RO;
+        case 556:
+            return access_type_t::RO;
+        case 560:
+            return access_type_t::RO;
+        case 564:
+            return access_type_t::RO;
+        case 568:
+            return access_type_t::RO;
+        case 572:
+            return access_type_t::RO;
+        case 576:
+            return access_type_t::RO;
+        case 584:
+            return access_type_t::RO;
+        case 588:
+            return access_type_t::RO;
+        case 592:
+            return access_type_t::RO;
+        case 600:
+            return access_type_t::RO;
+        case 608:
+            return access_type_t::RO;
+        case 616:
+            return access_type_t::RO;
+        case 620:
+            return access_type_t::RO;
+        case 628:
+            return access_type_t::RO;
+        case 636:
+            return access_type_t::RO;
+        case 640:
+            return access_type_t::RO;
+        case 692:
+            return access_type_t::RO;
+        case 696:
+            return access_type_t::RO;
+        case 700:
+            return access_type_t::RO;
+        case 768:
+            return access_type_t::RW;
+        case 772:
+            return access_type_t::RW;
+        case 776:
+            return access_type_t::RW;
+        case 780:
+            return access_type_t::RW;
+        case 896:
+            return access_type_t::RW;
+        case 900:
+            return access_type_t::RW;
+        case 904:
+            return access_type_t::RW;
+        case 908:
+            return access_type_t::RW;
+        case 1024:
+            return access_type_t::RW;
+        case 1028:
+            return access_type_t::RW;
+        case 1032:
+            return access_type_t::RW;
+        case 1036:
+            return access_type_t::RW;
+        case 1040:
+            return access_type_t::RW;
+        case 1044:
+            return access_type_t::RW;
+        case 1048:
+            return access_type_t::RW;
+        case 1052:
+            return access_type_t::RW;
+        case 1056:
+            return access_type_t::RW;
+        case 1060:
+            return access_type_t::RW;
+        case 1064:
+            return access_type_t::RW;
+        case 1068:
+            return access_type_t::RW;
+        case 1072:
+            return access_type_t::RW;
+        case 1076:
+            return access_type_t::RW;
+        case 1080:
+            return access_type_t::RW;
+        case 1084:
+            return access_type_t::RW;
+        case 1088:
+            return access_type_t::RW;
+        case 1092:
+            return access_type_t::RW;
+        case 1096:
+            return access_type_t::RW;
+        case 1100:
+            return access_type_t::RW;
+        case 1104:
+            return access_type_t::RW;
+        case 1108:
+            return access_type_t::RW;
+        case 1112:
+            return access_type_t::RW;
+        case 1116:
+            return access_type_t::RW;
+        case 1120:
+            return access_type_t::RW;
+        case 1124:
+            return access_type_t::RW;
+        case 1128:
+            return access_type_t::RW;
+        case 1132:
+            return access_type_t::RW;
+        case 1136:
+            return access_type_t::RW;
+        case 1140:
+            return access_type_t::RW;
+        case 1144:
+            return access_type_t::RW;
+        case 1148:
+            return access_type_t::RW;
+        case 1152:
+            return access_type_t::RW;
+        case 1156:
+            return access_type_t::RW;
+        case 1160:
+            return access_type_t::RW;
+        case 1164:
+            return access_type_t::RW;
+        case 1168:
+            return access_type_t::RW;
+        case 1172:
+            return access_type_t::RW;
+        case 1176:
+            return access_type_t::RW;
+        case 1180:
+            return access_type_t::RW;
+        case 1184:
+            return access_type_t::RW;
+        case 1188:
+            return access_type_t::RW;
+        case 1192:
+            return access_type_t::RW;
+        case 1196:
+            return access_type_t::RW;
+        case 1200:
+            return access_type_t::RW;
+        case 1204:
+            return access_type_t::RW;
+        case 1208:
+            return access_type_t::RW;
+        case 1212:
+            return access_type_t::RW;
+        case 1216:
+            return access_type_t::RW;
+        case 1220:
+            return access_type_t::RW;
+        case 1224:
+            return access_type_t::RW;
+        case 1228:
+            return access_type_t::RW;
+        case 1232:
+            return access_type_t::RW;
+        case 1236:
+            return access_type_t::RW;
+        case 1240:
+            return access_type_t::RW;
+        case 1244:
+            return access_type_t::RW;
+        case 1248:
+            return access_type_t::RW;
+        case 1252:
+            return access_type_t::RW;
+        case 1256:
+            return access_type_t::RW;
+        case 1260:
+            return access_type_t::RW;
+        case 1264:
+            return access_type_t::RW;
+        case 1268:
+            return access_type_t::RW;
+        case 1272:
+            return access_type_t::RW;
+        case 1276:
+            return access_type_t::RW;
+        case 1280:
+            return access_type_t::RW;
+        case 1284:
+            return access_type_t::RW;
+        case 1288:
+            return access_type_t::RW;
+        case 1292:
+            return access_type_t::RW;
+        case 1296:
+            return access_type_t::RW;
+        case 1300:
+            return access_type_t::RW;
+        case 1304:
+            return access_type_t::RW;
+        case 1308:
+            return access_type_t::RW;
+        case 1312:
+            return access_type_t::RW;
+        case 1316:
+            return access_type_t::RW;
+        case 1320:
+            return access_type_t::RW;
+        case 1324:
+            return access_type_t::RW;
+        case 1328:
+            return access_type_t::RW;
+        case 1332:
+            return access_type_t::RW;
+        case 1336:
+            return access_type_t::RW;
+        case 1340:
+            return access_type_t::RW;
+        case 1344:
+            return access_type_t::RW;
+        case 1348:
+            return access_type_t::RW;
+        case 1352:
+            return access_type_t::RW;
+        case 1356:
+            return access_type_t::RW;
+        case 1360:
+            return access_type_t::RW;
+        case 1364:
+            return access_type_t::RW;
+        case 1368:
+            return access_type_t::RW;
+        case 1372:
+            return access_type_t::RW;
+        case 1376:
+            return access_type_t::RW;
+        case 1380:
+            return access_type_t::RW;
+        case 1384:
+            return access_type_t::RW;
+        case 1388:
+            return access_type_t::RW;
+        case 1392:
+            return access_type_t::RW;
+        case 1396:
+            return access_type_t::RW;
+        case 1400:
+            return access_type_t::RW;
+        case 1404:
+            return access_type_t::RW;
+        case 1408:
+            return access_type_t::RW;
+        case 1412:
+            return access_type_t::RW;
+        case 1416:
+            return access_type_t::RW;
+        case 1420:
+            return access_type_t::RW;
+        case 1424:
+            return access_type_t::RW;
+        case 1428:
+            return access_type_t::RW;
+        case 1432:
+            return access_type_t::RW;
+        case 1436:
+            return access_type_t::RW;
+        case 1440:
+            return access_type_t::RW;
+        case 1444:
+            return access_type_t::RW;
+        case 1448:
+            return access_type_t::RW;
+        case 1452:
+            return access_type_t::RW;
+        case 1456:
+            return access_type_t::RW;
+        case 1460:
+            return access_type_t::RW;
+        case 1464:
+            return access_type_t::RW;
+        case 1468:
+            return access_type_t::RW;
+        case 1472:
+            return access_type_t::RW;
+        case 1476:
+            return access_type_t::RW;
+        case 1480:
+            return access_type_t::RW;
+        case 1484:
+            return access_type_t::RW;
+        case 1488:
+            return access_type_t::RW;
+        case 1492:
+            return access_type_t::RW;
+        case 1496:
+            return access_type_t::RW;
+        case 1500:
+            return access_type_t::RW;
+        case 1504:
+            return access_type_t::RW;
+        case 1508:
+            return access_type_t::RW;
+        case 1512:
+            return access_type_t::RW;
+        case 1516:
+            return access_type_t::RW;
+        case 1520:
+            return access_type_t::RW;
+        case 1524:
+            return access_type_t::RW;
+        case 1528:
+            return access_type_t::RW;
+        case 1532:
+            return access_type_t::RW;
+        case 1536:
+            return access_type_t::RW;
+        case 1540:
+            return access_type_t::RW;
+        case 1544:
+            return access_type_t::RW;
+        case 1548:
+            return access_type_t::RW;
+        case 1552:
+            return access_type_t::RW;
+        case 1556:
+            return access_type_t::RW;
+        case 1560:
+            return access_type_t::RW;
+        case 1564:
+            return access_type_t::RW;
+        case 1568:
+            return access_type_t::RW;
+        case 1572:
+            return access_type_t::RW;
+        case 1576:
+            return access_type_t::RW;
+        case 1580:
+            return access_type_t::RW;
+        case 1584:
+            return access_type_t::RW;
+        case 1588:
+            return access_type_t::RW;
+        case 1592:
+            return access_type_t::RW;
+        case 1596:
+            return access_type_t::RW;
+        case 1600:
+            return access_type_t::RW;
+        case 1604:
+            return access_type_t::RW;
+        case 1608:
+            return access_type_t::RW;
+        case 1612:
+            return access_type_t::RW;
+        case 1616:
+            return access_type_t::RW;
+        case 1620:
+            return access_type_t::RW;
+        case 1624:
+            return access_type_t::RW;
+        case 1628:
+            return access_type_t::RW;
+        case 1632:
+            return access_type_t::RW;
+        case 1636:
+            return access_type_t::RW;
+        case 1640:
+            return access_type_t::RW;
+        case 1644:
+            return access_type_t::RW;
+        case 1648:
+            return access_type_t::RW;
+        case 1652:
+            return access_type_t::RW;
+        case 1656:
+            return access_type_t::RW;
+        case 1660:
+            return access_type_t::RW;
+        case 1664:
+            return access_type_t::RW;
+        case 1668:
+            return access_type_t::RW;
+        case 1672:
+            return access_type_t::RW;
+        case 1676:
+            return access_type_t::RW;
+        case 1680:
+            return access_type_t::RW;
+        case 1684:
+            return access_type_t::RW;
+        case 1688:
+            return access_type_t::RW;
+        case 1692:
+            return access_type_t::RW;
+        case 1696:
+            return access_type_t::RW;
+        case 1700:
+            return access_type_t::RW;
+        case 1704:
+            return access_type_t::RW;
+        case 1708:
+            return access_type_t::RW;
+        case 1712:
+            return access_type_t::RW;
+        case 1716:
+            return access_type_t::RW;
+        case 1720:
+            return access_type_t::RW;
+        case 1724:
+            return access_type_t::RW;
+        case 1728:
+            return access_type_t::RW;
+        case 1732:
+            return access_type_t::RW;
+        case 1736:
+            return access_type_t::RW;
+        case 1740:
+            return access_type_t::RW;
+        case 1744:
+            return access_type_t::RW;
+        case 1748:
+            return access_type_t::RW;
+        case 1752:
+            return access_type_t::RW;
+        case 1756:
+            return access_type_t::RW;
+        case 1760:
+            return access_type_t::RW;
+        case 1764:
+            return access_type_t::RW;
+        case 1768:
+            return access_type_t::RW;
+        case 1772:
+            return access_type_t::RW;
+        case 1776:
+            return access_type_t::RW;
+        case 1780:
+            return access_type_t::RW;
+        case 1784:
+            return access_type_t::RW;
+        case 1788:
+            return access_type_t::RW;
+        case 1792:
+            return access_type_t::RW;
+        case 1796:
+            return access_type_t::RW;
+        case 1800:
+            return access_type_t::RW;
+        case 1804:
+            return access_type_t::RW;
+        case 1808:
+            return access_type_t::RW;
+        case 1812:
+            return access_type_t::RW;
+        case 1816:
+            return access_type_t::RW;
+        case 1820:
+            return access_type_t::RW;
+        case 1824:
+            return access_type_t::RW;
+        case 1828:
+            return access_type_t::RW;
+        case 1832:
+            return access_type_t::RW;
+        case 1836:
+            return access_type_t::RW;
+        case 1840:
+            return access_type_t::RW;
+        case 1844:
+            return access_type_t::RW;
+        case 1848:
+            return access_type_t::RW;
+        case 1852:
+            return access_type_t::RW;
+        case 1856:
+            return access_type_t::RW;
+        case 1860:
+            return access_type_t::RW;
+        case 1864:
+            return access_type_t::RW;
+        case 1868:
+            return access_type_t::RW;
+        case 1872:
+            return access_type_t::RW;
+        case 1876:
+            return access_type_t::RW;
+        case 1880:
+            return access_type_t::RW;
+        case 1884:
+            return access_type_t::RW;
+        case 1888:
+            return access_type_t::RW;
+        case 1892:
+            return access_type_t::RW;
+        case 1896:
+            return access_type_t::RW;
+        case 1900:
+            return access_type_t::RW;
+        case 1904:
+            return access_type_t::RW;
+        case 1908:
+            return access_type_t::RW;
+        case 1912:
+            return access_type_t::RW;
+        case 1916:
+            return access_type_t::RW;
+        case 1920:
+            return access_type_t::RW;
+        case 1924:
+            return access_type_t::RW;
+        case 1928:
+            return access_type_t::RW;
+        case 1932:
+            return access_type_t::RW;
+        case 1936:
+            return access_type_t::RW;
+        case 1940:
+            return access_type_t::RW;
+        case 1944:
+            return access_type_t::RW;
+        case 1948:
+            return access_type_t::RW;
+        case 1952:
+            return access_type_t::RW;
+        case 1956:
+            return access_type_t::RW;
+        case 1960:
+            return access_type_t::RW;
+        case 1964:
+            return access_type_t::RW;
+        case 1968:
+            return access_type_t::RW;
+        case 1972:
+            return access_type_t::RW;
+        case 1976:
+            return access_type_t::RW;
+        case 1980:
+            return access_type_t::RW;
+        case 1984:
+            return access_type_t::RW;
+        case 1988:
+            return access_type_t::RW;
+        case 1992:
+            return access_type_t::RW;
+        case 1996:
+            return access_type_t::RW;
+        case 2000:
+            return access_type_t::RW;
+        case 2004:
+            return access_type_t::RW;
+        case 2008:
+            return access_type_t::RW;
+        case 2012:
+            return access_type_t::RW;
+        case 2016:
+            return access_type_t::RW;
+        case 2020:
+            return access_type_t::RW;
+        case 2024:
+            return access_type_t::RW;
+        case 2028:
+            return access_type_t::RW;
+        case 2032:
+            return access_type_t::RW;
+        case 2036:
+            return access_type_t::RW;
+        case 2040:
+            return access_type_t::RW;
+        case 2044:
+            return access_type_t::RW;
+        case 2048:
+            return access_type_t::RW;
+        case 2052:
+            return access_type_t::RW;
+        case 2056:
+            return access_type_t::RW;
+        case 2060:
+            return access_type_t::RW;
+        case 2064:
+            return access_type_t::RW;
+        case 2068:
+            return access_type_t::RW;
+        case 2076:
+            return access_type_t::RW;
+        case 2084:
+            return access_type_t::RW;
+        case 2088:
+            return access_type_t::RW;
+        case 2092:
+            return access_type_t::RW;
+        case 2096:
+            return access_type_t::RW;
+        case 2100:
+            return access_type_t::RW;
+        case 2108:
+            return access_type_t::RW;
+        case 2116:
+            return access_type_t::RW;
+        case 2120:
+            return access_type_t::RW;
+        case 2124:
+            return access_type_t::RW;
+        case 2128:
+            return access_type_t::RW;
+        case 2132:
+            return access_type_t::RW;
+        case 2136:
+            return access_type_t::RW;
+        case 2140:
+            return access_type_t::RW;
+        case 2144:
+            return access_type_t::RW;
+        case 2152:
+            return access_type_t::RW;
+        case 2156:
+            return access_type_t::RW;
+        case 2160:
+            return access_type_t::RW;
+        case 2172:
+            return access_type_t::RW;
+        case 2176:
+            return access_type_t::RW;
+        case 2180:
+            return access_type_t::RW;
+        case 2184:
+            return access_type_t::RW;
+        case 2188:
+            return access_type_t::RW;
+        case 2192:
+            return access_type_t::RW;
+        case 2196:
+            return access_type_t::RW;
+        case 2200:
+            return access_type_t::RW;
+        case 2204:
+            return access_type_t::RW;
+        case 2208:
+            return access_type_t::RW;
+        case 2212:
+            return access_type_t::RW;
+        case 2228:
+            return access_type_t::RW;
+        case 2236:
+            return access_type_t::RW;
+        case 2240:
+            return access_type_t::RW;
+        case 2244:
+            return access_type_t::RW;
+        case 2248:
+            return access_type_t::RW;
+        case 2252:
+            return access_type_t::RW;
+        case 2304:
+            return access_type_t::RW;
+        case 2308:
+            return access_type_t::RW;
+        case 2324:
+            return access_type_t::RW;
+        case 2340:
+            return access_type_t::RW;
+        case 2344:
+            return access_type_t::RW;
+        case 2348:
+            return access_type_t::RW;
+        case 2352:
+            return access_type_t::RW;
+        case 2356:
+            return access_type_t::RW;
+        case 2364:
+            return access_type_t::RW;
+        case 2560:
+            return access_type_t::RW;
+        case 2568:
+            return access_type_t::RW;
+        case 2576:
+            return access_type_t::RW;
+        case 2584:
+            return access_type_t::RW;
+        case 2592:
+            return access_type_t::RW;
+        case 2600:
+            return access_type_t::RW;
+        case 2608:
+            return access_type_t::RW;
+        case 2624:
+            return access_type_t::RW;
+        case 2632:
+            return access_type_t::RW;
+        case 2640:
+            return access_type_t::RW;
+        case 2648:
+            return access_type_t::RW;
+        case 2656:
+            return access_type_t::RW;
+        case 2664:
+            return access_type_t::RW;
+        case 2672:
+            return access_type_t::RW;
+        case 2688:
+            return access_type_t::RW;
+        case 2696:
+            return access_type_t::RW;
+        case 2704:
+            return access_type_t::RW;
+        case 2712:
+            return access_type_t::RW;
+        case 2720:
+            return access_type_t::RW;
+        case 2724:
+            return access_type_t::RW;
+        case 2728:
+            return access_type_t::RW;
+        case 2732:
+            return access_type_t::RW;
+        case 2736:
+            return access_type_t::RW;
+        case 2752:
+            return access_type_t::RW;
+        case 2760:
+            return access_type_t::RW;
+        case 2768:
+            return access_type_t::RW;
+        case 2776:
+            return access_type_t::RW;
+        case 2784:
+            return access_type_t::RW;
+        case 2816:
+            return access_type_t::RW;
+        case 2824:
+            return access_type_t::RW;
+        case 2832:
+            return access_type_t::RW;
+        case 2840:
+            return access_type_t::RW;
+        case 2848:
+            return access_type_t::RW;
+        case 2856:
+            return access_type_t::RW;
+        case 2864:
+            return access_type_t::RW;
+        case 2880:
+            return access_type_t::RW;
+        case 2888:
+            return access_type_t::RW;
+        case 2896:
+            return access_type_t::RW;
+        case 2904:
+            return access_type_t::RW;
+        case 4032:
+            return access_type_t::RO;
+        case 4048:
+            return access_type_t::RO;
+        case 4052:
+            return access_type_t::RO;
+        case 4056:
+            return access_type_t::RO;
+        case 4060:
+            return access_type_t::RO;
+        case 4064:
+            return access_type_t::RO;
+        case 4068:
+            return access_type_t::RO;
+        case 4072:
+            return access_type_t::RO;
+        case 4076:
+            return access_type_t::RO;
+        case 4080:
+            return access_type_t::RO;
+        case 4084:
+            return access_type_t::RO;
+        case 4088:
+            return access_type_t::RO;
+        case 4092:
+            return access_type_t::RO;
+        default:
+            return access_type_t::RO;
+        }
+    }
+#endif
+};
+
+#ifdef __cplusplus
+struct isa
+{
+#ifdef NPU_DISASSEMBLE
+    static int disassemble(const uint32_t *in,
+                           std::string &op,
+                           std::vector<std::pair<std::string, std::string>> &fields)
+    {
+        switch (*in & 0xffff)
+        {
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP):
+        {
+            const npu_op_stop_t &v = *reinterpret_cast<const npu_op_stop_t *>(in);
+            op                     = "NPU_OP_STOP";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ):
+        {
+            const npu_op_irq_t &v = *reinterpret_cast<const npu_op_irq_t *>(in);
+            op                    = "NPU_OP_IRQ";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_CONV):
+        {
+            const npu_op_conv_t &v = *reinterpret_cast<const npu_op_conv_t *>(in);
+            op                     = "NPU_OP_CONV";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DEPTHWISE):
+        {
+            const npu_op_depthwise_t &v = *reinterpret_cast<const npu_op_depthwise_t *>(in);
+            op                          = "NPU_OP_DEPTHWISE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL):
+        {
+            const npu_op_pool_t &v = *reinterpret_cast<const npu_op_pool_t *>(in);
+            op                     = "NPU_OP_POOL";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE):
+        {
+            const npu_op_elementwise_t &v = *reinterpret_cast<const npu_op_elementwise_t *>(in);
+            op                            = "NPU_OP_ELEMENTWISE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_START):
+        {
+            const npu_op_dma_start_t &v = *reinterpret_cast<const npu_op_dma_start_t *>(in);
+            op                          = "NPU_OP_DMA_START";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT):
+        {
+            const npu_op_dma_wait_t &v = *reinterpret_cast<const npu_op_dma_wait_t *>(in);
+            op                         = "NPU_OP_DMA_WAIT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT):
+        {
+            const npu_op_kernel_wait_t &v = *reinterpret_cast<const npu_op_kernel_wait_t *>(in);
+            op                            = "NPU_OP_KERNEL_WAIT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK):
+        {
+            const npu_op_pmu_mask_t &v = *reinterpret_cast<const npu_op_pmu_mask_t *>(in);
+            op                         = "NPU_OP_PMU_MASK";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP):
+        {
+            const npu_set_ifm_pad_top_t &v = *reinterpret_cast<const npu_set_ifm_pad_top_t *>(in);
+            op                             = "NPU_SET_IFM_PAD_TOP";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT):
+        {
+            const npu_set_ifm_pad_left_t &v = *reinterpret_cast<const npu_set_ifm_pad_left_t *>(in);
+            op                              = "NPU_SET_IFM_PAD_LEFT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT):
+        {
+            const npu_set_ifm_pad_right_t &v = *reinterpret_cast<const npu_set_ifm_pad_right_t *>(in);
+            op                               = "NPU_SET_IFM_PAD_RIGHT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM):
+        {
+            const npu_set_ifm_pad_bottom_t &v = *reinterpret_cast<const npu_set_ifm_pad_bottom_t *>(in);
+            op                                = "NPU_SET_IFM_PAD_BOTTOM";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1):
+        {
+            const npu_set_ifm_depth_m1_t &v = *reinterpret_cast<const npu_set_ifm_depth_m1_t *>(in);
+            op                              = "NPU_SET_IFM_DEPTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION):
+        {
+            const npu_set_ifm_precision_t &v = *reinterpret_cast<const npu_set_ifm_precision_t *>(in);
+            op                               = "NPU_SET_IFM_PRECISION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE):
+        {
+            const npu_set_ifm_upscale_t &v = *reinterpret_cast<const npu_set_ifm_upscale_t *>(in);
+            op                             = "NPU_SET_IFM_UPSCALE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT):
+        {
+            const npu_set_ifm_zero_point_t &v = *reinterpret_cast<const npu_set_ifm_zero_point_t *>(in);
+            op                                = "NPU_SET_IFM_ZERO_POINT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1):
+        {
+            const npu_set_ifm_width0_m1_t &v = *reinterpret_cast<const npu_set_ifm_width0_m1_t *>(in);
+            op                               = "NPU_SET_IFM_WIDTH0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1):
+        {
+            const npu_set_ifm_height0_m1_t &v = *reinterpret_cast<const npu_set_ifm_height0_m1_t *>(in);
+            op                                = "NPU_SET_IFM_HEIGHT0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1):
+        {
+            const npu_set_ifm_height1_m1_t &v = *reinterpret_cast<const npu_set_ifm_height1_m1_t *>(in);
+            op                                = "NPU_SET_IFM_HEIGHT1_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_IB_END):
+        {
+            const npu_set_ifm_ib_end_t &v = *reinterpret_cast<const npu_set_ifm_ib_end_t *>(in);
+            op                            = "NPU_SET_IFM_IB_END";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION):
+        {
+            const npu_set_ifm_region_t &v = *reinterpret_cast<const npu_set_ifm_region_t *>(in);
+            op                            = "NPU_SET_IFM_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1):
+        {
+            const npu_set_ofm_width_m1_t &v = *reinterpret_cast<const npu_set_ofm_width_m1_t *>(in);
+            op                              = "NPU_SET_OFM_WIDTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1):
+        {
+            const npu_set_ofm_height_m1_t &v = *reinterpret_cast<const npu_set_ofm_height_m1_t *>(in);
+            op                               = "NPU_SET_OFM_HEIGHT_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1):
+        {
+            const npu_set_ofm_depth_m1_t &v = *reinterpret_cast<const npu_set_ofm_depth_m1_t *>(in);
+            op                              = "NPU_SET_OFM_DEPTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION):
+        {
+            const npu_set_ofm_precision_t &v = *reinterpret_cast<const npu_set_ofm_precision_t *>(in);
+            op                               = "NPU_SET_OFM_PRECISION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1):
+        {
+            const npu_set_ofm_blk_width_m1_t &v = *reinterpret_cast<const npu_set_ofm_blk_width_m1_t *>(in);
+            op                                  = "NPU_SET_OFM_BLK_WIDTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1):
+        {
+            const npu_set_ofm_blk_height_m1_t &v = *reinterpret_cast<const npu_set_ofm_blk_height_m1_t *>(in);
+            op                                   = "NPU_SET_OFM_BLK_HEIGHT_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1):
+        {
+            const npu_set_ofm_blk_depth_m1_t &v = *reinterpret_cast<const npu_set_ofm_blk_depth_m1_t *>(in);
+            op                                  = "NPU_SET_OFM_BLK_DEPTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT):
+        {
+            const npu_set_ofm_zero_point_t &v = *reinterpret_cast<const npu_set_ofm_zero_point_t *>(in);
+            op                                = "NPU_SET_OFM_ZERO_POINT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1):
+        {
+            const npu_set_ofm_width0_m1_t &v = *reinterpret_cast<const npu_set_ofm_width0_m1_t *>(in);
+            op                               = "NPU_SET_OFM_WIDTH0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1):
+        {
+            const npu_set_ofm_height0_m1_t &v = *reinterpret_cast<const npu_set_ofm_height0_m1_t *>(in);
+            op                                = "NPU_SET_OFM_HEIGHT0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1):
+        {
+            const npu_set_ofm_height1_m1_t &v = *reinterpret_cast<const npu_set_ofm_height1_m1_t *>(in);
+            op                                = "NPU_SET_OFM_HEIGHT1_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION):
+        {
+            const npu_set_ofm_region_t &v = *reinterpret_cast<const npu_set_ofm_region_t *>(in);
+            op                            = "NPU_SET_OFM_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1):
+        {
+            const npu_set_kernel_width_m1_t &v = *reinterpret_cast<const npu_set_kernel_width_m1_t *>(in);
+            op                                 = "NPU_SET_KERNEL_WIDTH_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1):
+        {
+            const npu_set_kernel_height_m1_t &v = *reinterpret_cast<const npu_set_kernel_height_m1_t *>(in);
+            op                                  = "NPU_SET_KERNEL_HEIGHT_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE):
+        {
+            const npu_set_kernel_stride_t &v = *reinterpret_cast<const npu_set_kernel_stride_t *>(in);
+            op                               = "NPU_SET_KERNEL_STRIDE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_PARALLEL_MODE):
+        {
+            const npu_set_parallel_mode_t &v = *reinterpret_cast<const npu_set_parallel_mode_t *>(in);
+            op                               = "NPU_SET_PARALLEL_MODE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT):
+        {
+            const npu_set_acc_format_t &v = *reinterpret_cast<const npu_set_acc_format_t *>(in);
+            op                            = "NPU_SET_ACC_FORMAT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION):
+        {
+            const npu_set_activation_t &v = *reinterpret_cast<const npu_set_activation_t *>(in);
+            op                            = "NPU_SET_ACTIVATION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN):
+        {
+            const npu_set_activation_min_t &v = *reinterpret_cast<const npu_set_activation_min_t *>(in);
+            op                                = "NPU_SET_ACTIVATION_MIN";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX):
+        {
+            const npu_set_activation_max_t &v = *reinterpret_cast<const npu_set_activation_max_t *>(in);
+            op                                = "NPU_SET_ACTIVATION_MAX";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION):
+        {
+            const npu_set_weight_region_t &v = *reinterpret_cast<const npu_set_weight_region_t *>(in);
+            op                               = "NPU_SET_WEIGHT_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION):
+        {
+            const npu_set_scale_region_t &v = *reinterpret_cast<const npu_set_scale_region_t *>(in);
+            op                              = "NPU_SET_SCALE_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_AB_START):
+        {
+            const npu_set_ab_start_t &v = *reinterpret_cast<const npu_set_ab_start_t *>(in);
+            op                          = "NPU_SET_AB_START";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP):
+        {
+            const npu_set_blockdep_t &v = *reinterpret_cast<const npu_set_blockdep_t *>(in);
+            op                          = "NPU_SET_BLOCKDEP";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION):
+        {
+            const npu_set_dma0_src_region_t &v = *reinterpret_cast<const npu_set_dma0_src_region_t *>(in);
+            op                                 = "NPU_SET_DMA0_SRC_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION):
+        {
+            const npu_set_dma0_dst_region_t &v = *reinterpret_cast<const npu_set_dma0_dst_region_t *>(in);
+            op                                 = "NPU_SET_DMA0_DST_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0):
+        {
+            const npu_set_dma0_size0_t &v = *reinterpret_cast<const npu_set_dma0_size0_t *>(in);
+            op                            = "NPU_SET_DMA0_SIZE0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1):
+        {
+            const npu_set_dma0_size1_t &v = *reinterpret_cast<const npu_set_dma0_size1_t *>(in);
+            op                            = "NPU_SET_DMA0_SIZE1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST):
+        {
+            const npu_set_ifm2_broadcast_t &v = *reinterpret_cast<const npu_set_ifm2_broadcast_t *>(in);
+            op                                = "NPU_SET_IFM2_BROADCAST";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_SCALAR):
+        {
+            const npu_set_ifm2_scalar_t &v = *reinterpret_cast<const npu_set_ifm2_scalar_t *>(in);
+            op                             = "NPU_SET_IFM2_SCALAR";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION):
+        {
+            const npu_set_ifm2_precision_t &v = *reinterpret_cast<const npu_set_ifm2_precision_t *>(in);
+            op                                = "NPU_SET_IFM2_PRECISION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT):
+        {
+            const npu_set_ifm2_zero_point_t &v = *reinterpret_cast<const npu_set_ifm2_zero_point_t *>(in);
+            op                                 = "NPU_SET_IFM2_ZERO_POINT";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1):
+        {
+            const npu_set_ifm2_width0_m1_t &v = *reinterpret_cast<const npu_set_ifm2_width0_m1_t *>(in);
+            op                                = "NPU_SET_IFM2_WIDTH0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1):
+        {
+            const npu_set_ifm2_height0_m1_t &v = *reinterpret_cast<const npu_set_ifm2_height0_m1_t *>(in);
+            op                                 = "NPU_SET_IFM2_HEIGHT0_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1):
+        {
+            const npu_set_ifm2_height1_m1_t &v = *reinterpret_cast<const npu_set_ifm2_height1_m1_t *>(in);
+            op                                 = "NPU_SET_IFM2_HEIGHT1_M1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_IB_START):
+        {
+            const npu_set_ifm2_ib_start_t &v = *reinterpret_cast<const npu_set_ifm2_ib_start_t *>(in);
+            op                               = "NPU_SET_IFM2_IB_START";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION):
+        {
+            const npu_set_ifm2_region_t &v = *reinterpret_cast<const npu_set_ifm2_region_t *>(in);
+            op                             = "NPU_SET_IFM2_REGION";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0):
+        {
+            const npu_set_ifm_base0_t &v = *reinterpret_cast<const npu_set_ifm_base0_t *>(in);
+            op                           = "NPU_SET_IFM_BASE0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1):
+        {
+            const npu_set_ifm_base1_t &v = *reinterpret_cast<const npu_set_ifm_base1_t *>(in);
+            op                           = "NPU_SET_IFM_BASE1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2):
+        {
+            const npu_set_ifm_base2_t &v = *reinterpret_cast<const npu_set_ifm_base2_t *>(in);
+            op                           = "NPU_SET_IFM_BASE2";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3):
+        {
+            const npu_set_ifm_base3_t &v = *reinterpret_cast<const npu_set_ifm_base3_t *>(in);
+            op                           = "NPU_SET_IFM_BASE3";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X):
+        {
+            const npu_set_ifm_stride_x_t &v = *reinterpret_cast<const npu_set_ifm_stride_x_t *>(in);
+            op                              = "NPU_SET_IFM_STRIDE_X";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y):
+        {
+            const npu_set_ifm_stride_y_t &v = *reinterpret_cast<const npu_set_ifm_stride_y_t *>(in);
+            op                              = "NPU_SET_IFM_STRIDE_Y";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C):
+        {
+            const npu_set_ifm_stride_c_t &v = *reinterpret_cast<const npu_set_ifm_stride_c_t *>(in);
+            op                              = "NPU_SET_IFM_STRIDE_C";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0):
+        {
+            const npu_set_ofm_base0_t &v = *reinterpret_cast<const npu_set_ofm_base0_t *>(in);
+            op                           = "NPU_SET_OFM_BASE0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1):
+        {
+            const npu_set_ofm_base1_t &v = *reinterpret_cast<const npu_set_ofm_base1_t *>(in);
+            op                           = "NPU_SET_OFM_BASE1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2):
+        {
+            const npu_set_ofm_base2_t &v = *reinterpret_cast<const npu_set_ofm_base2_t *>(in);
+            op                           = "NPU_SET_OFM_BASE2";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3):
+        {
+            const npu_set_ofm_base3_t &v = *reinterpret_cast<const npu_set_ofm_base3_t *>(in);
+            op                           = "NPU_SET_OFM_BASE3";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X):
+        {
+            const npu_set_ofm_stride_x_t &v = *reinterpret_cast<const npu_set_ofm_stride_x_t *>(in);
+            op                              = "NPU_SET_OFM_STRIDE_X";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y):
+        {
+            const npu_set_ofm_stride_y_t &v = *reinterpret_cast<const npu_set_ofm_stride_y_t *>(in);
+            op                              = "NPU_SET_OFM_STRIDE_Y";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C):
+        {
+            const npu_set_ofm_stride_c_t &v = *reinterpret_cast<const npu_set_ofm_stride_c_t *>(in);
+            op                              = "NPU_SET_OFM_STRIDE_C";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE):
+        {
+            const npu_set_weight_base_t &v = *reinterpret_cast<const npu_set_weight_base_t *>(in);
+            op                             = "NPU_SET_WEIGHT_BASE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH):
+        {
+            const npu_set_weight_length_t &v = *reinterpret_cast<const npu_set_weight_length_t *>(in);
+            op                               = "NPU_SET_WEIGHT_LENGTH";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE):
+        {
+            const npu_set_scale_base_t &v = *reinterpret_cast<const npu_set_scale_base_t *>(in);
+            op                            = "NPU_SET_SCALE_BASE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH):
+        {
+            const npu_set_scale_length_t &v = *reinterpret_cast<const npu_set_scale_length_t *>(in);
+            op                              = "NPU_SET_SCALE_LENGTH";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE):
+        {
+            const npu_set_ofm_scale_t &v = *reinterpret_cast<const npu_set_ofm_scale_t *>(in);
+            op                           = "NPU_SET_OFM_SCALE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPA_SCALE):
+        {
+            const npu_set_opa_scale_t &v = *reinterpret_cast<const npu_set_opa_scale_t *>(in);
+            op                           = "NPU_SET_OPA_SCALE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPB_SCALE):
+        {
+            const npu_set_opb_scale_t &v = *reinterpret_cast<const npu_set_opb_scale_t *>(in);
+            op                           = "NPU_SET_OPB_SCALE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC):
+        {
+            const npu_set_dma0_src_t &v = *reinterpret_cast<const npu_set_dma0_src_t *>(in);
+            op                          = "NPU_SET_DMA0_SRC";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST):
+        {
+            const npu_set_dma0_dst_t &v = *reinterpret_cast<const npu_set_dma0_dst_t *>(in);
+            op                          = "NPU_SET_DMA0_DST";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN):
+        {
+            const npu_set_dma0_len_t &v = *reinterpret_cast<const npu_set_dma0_len_t *>(in);
+            op                          = "NPU_SET_DMA0_LEN";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SKIP0):
+        {
+            const npu_set_dma0_skip0_t &v = *reinterpret_cast<const npu_set_dma0_skip0_t *>(in);
+            op                            = "NPU_SET_DMA0_SKIP0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SKIP1):
+        {
+            const npu_set_dma0_skip1_t &v = *reinterpret_cast<const npu_set_dma0_skip1_t *>(in);
+            op                            = "NPU_SET_DMA0_SKIP1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0):
+        {
+            const npu_set_ifm2_base0_t &v = *reinterpret_cast<const npu_set_ifm2_base0_t *>(in);
+            op                            = "NPU_SET_IFM2_BASE0";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1):
+        {
+            const npu_set_ifm2_base1_t &v = *reinterpret_cast<const npu_set_ifm2_base1_t *>(in);
+            op                            = "NPU_SET_IFM2_BASE1";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2):
+        {
+            const npu_set_ifm2_base2_t &v = *reinterpret_cast<const npu_set_ifm2_base2_t *>(in);
+            op                            = "NPU_SET_IFM2_BASE2";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3):
+        {
+            const npu_set_ifm2_base3_t &v = *reinterpret_cast<const npu_set_ifm2_base3_t *>(in);
+            op                            = "NPU_SET_IFM2_BASE3";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X):
+        {
+            const npu_set_ifm2_stride_x_t &v = *reinterpret_cast<const npu_set_ifm2_stride_x_t *>(in);
+            op                               = "NPU_SET_IFM2_STRIDE_X";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y):
+        {
+            const npu_set_ifm2_stride_y_t &v = *reinterpret_cast<const npu_set_ifm2_stride_y_t *>(in);
+            op                               = "NPU_SET_IFM2_STRIDE_Y";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C):
+        {
+            const npu_set_ifm2_stride_c_t &v = *reinterpret_cast<const npu_set_ifm2_stride_c_t *>(in);
+            op                               = "NPU_SET_IFM2_STRIDE_C";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_BASE):
+        {
+            const npu_set_weight1_base_t &v = *reinterpret_cast<const npu_set_weight1_base_t *>(in);
+            op                              = "NPU_SET_WEIGHT1_BASE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_LENGTH):
+        {
+            const npu_set_weight1_length_t &v = *reinterpret_cast<const npu_set_weight1_length_t *>(in);
+            op                                = "NPU_SET_WEIGHT1_LENGTH";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE1_BASE):
+        {
+            const npu_set_scale1_base_t &v = *reinterpret_cast<const npu_set_scale1_base_t *>(in);
+            op                             = "NPU_SET_SCALE1_BASE";
+            v.disassemble(fields);
+            break;
+        }
+        case (static_cast<uint32_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL) << 14) |
+            static_cast<uint32_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE1_LENGTH):
+        {
+            const npu_set_scale1_length_t &v = *reinterpret_cast<const npu_set_scale1_length_t *>(in);
+            op                               = "NPU_SET_SCALE1_LENGTH";
+            v.disassemble(fields);
+            break;
+        }
+        }
+        return (*in & (3 << 14)) != 0 ? 2 : 1;
+    }
+#endif
+#endif
+    // Signal the end of command stream
+    struct npu_op_stop_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t mask : 16;   //  Encoding for 16-bit mask value
+#ifdef __cplusplus
+      public:
+        npu_op_stop_t(uint32_t _mask) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), mask(_mask & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_op_stop_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), mask(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_STOP);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_stop_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_stop_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_mask() const
+        {
+            return static_cast<uint32_t>(mask);
+        }
+        CONSTEXPR npu_op_stop_t &set_mask(uint32_t value)
+        {
+            mask = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("mask", std::to_string(mask)));
+        }
+#endif
+#endif
+    };
+    // Raises an IRQ to the host
+    struct npu_op_irq_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t mask : 16;   //  Encoding for 16-bit mask value
+#ifdef __cplusplus
+      public:
+        npu_op_irq_t(uint32_t _mask) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), mask(_mask & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_op_irq_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), mask(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_IRQ);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_irq_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_irq_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_mask() const
+        {
+            return static_cast<uint32_t>(mask);
+        }
+        CONSTEXPR npu_op_irq_t &set_mask(uint32_t value)
+        {
+            mask = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("mask", std::to_string(mask)));
+        }
+#endif
+#endif
+    };
+    // 2D convolution
+    struct npu_op_conv_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+#ifdef __cplusplus
+      public:
+        CONSTEXPR npu_op_conv_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_CONV)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_CONV) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_CONV);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_conv_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_conv_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const {}
+#endif
+#endif
+    };
+    // Depth-wise 2D convolution
+    struct npu_op_depthwise_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+#ifdef __cplusplus
+      public:
+        CONSTEXPR npu_op_depthwise_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DEPTHWISE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DEPTHWISE) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DEPTHWISE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_depthwise_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_depthwise_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const {}
+#endif
+#endif
+    };
+    // Pooling
+    struct npu_op_pool_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;      //  control
+        uint32_t pooling_mode : 3; //  Pooling mode
+        uint32_t reserved1 : 13;
+#ifdef __cplusplus
+      public:
+        npu_op_pool_t(NPU_NAMESPACE::pooling_mode _pooling_mode) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            pooling_mode(static_cast<uint8_t>(_pooling_mode) & ((1U << 3) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_op_pool_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pooling_mode(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_POOL);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_pool_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_pool_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::pooling_mode get_pooling_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::pooling_mode>(pooling_mode);
+        }
+        CONSTEXPR npu_op_pool_t &set_pooling_mode(NPU_NAMESPACE::pooling_mode value)
+        {
+            pooling_mode = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "pooling_mode",
+                (pooling_mode < (sizeof(pooling_mode_str) / sizeof(pooling_mode_str[0])) ?
+                     pooling_mode_str[pooling_mode] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // Elementwise operation
+    struct npu_op_elementwise_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;          //  control
+        uint32_t elementwise_mode : 6; //  Elementwise mode
+        uint32_t reserved1 : 10;
+#ifdef __cplusplus
+      public:
+        npu_op_elementwise_t(NPU_NAMESPACE::elementwise_mode _elementwise_mode) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            elementwise_mode(static_cast<uint8_t>(_elementwise_mode) & ((1U << 6) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_op_elementwise_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), elementwise_mode(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_ELEMENTWISE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_elementwise_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_elementwise_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::elementwise_mode get_elementwise_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::elementwise_mode>(elementwise_mode);
+        }
+        CONSTEXPR npu_op_elementwise_t &set_elementwise_mode(NPU_NAMESPACE::elementwise_mode value)
+        {
+            elementwise_mode = static_cast<uint8_t>(value) & ((1U << 6) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "elementwise_mode",
+                (elementwise_mode < (sizeof(elementwise_mode_str) / sizeof(elementwise_mode_str[0])) ?
+                     elementwise_mode_str[elementwise_mode] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // Queue new DMA for the given channel
+    struct npu_op_dma_start_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+#ifdef __cplusplus
+      public:
+        CONSTEXPR npu_op_dma_start_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_START)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_START) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_START);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_dma_start_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_dma_start_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const {}
+#endif
+#endif
+    };
+    // Wait for the DMA channel to have k or fewer active descriptors outstanding
+    struct npu_op_dma_wait_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t k : 4;       //  Number of outstanding descriptors
+        uint32_t reserved1 : 12;
+#ifdef __cplusplus
+      public:
+        npu_op_dma_wait_t(uint32_t _k) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), k(_k & ((1U << 4) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_op_dma_wait_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), k(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_DMA_WAIT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_dma_wait_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_dma_wait_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_k() const
+        {
+            return static_cast<uint32_t>(k);
+        }
+        CONSTEXPR npu_op_dma_wait_t &set_k(uint32_t value)
+        {
+            k = static_cast<uint8_t>(value) & ((1U << 4) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("k", std::to_string(k)));
+        }
+#endif
+#endif
+    };
+    // Wait for n or fewer kernel operations to be remaining
+    struct npu_op_kernel_wait_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t n : 2;       //  Number of kernel operations in range 0-3
+        uint32_t reserved1 : 14;
+#ifdef __cplusplus
+      public:
+        npu_op_kernel_wait_t(uint32_t _n) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), n(_n & ((1U << 2) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_op_kernel_wait_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), n(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_KERNEL_WAIT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_kernel_wait_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_kernel_wait_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_n() const
+        {
+            return static_cast<uint32_t>(n);
+        }
+        CONSTEXPR npu_op_kernel_wait_t &set_n(uint32_t value)
+        {
+            n = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("n", std::to_string(n)));
+        }
+#endif
+#endif
+    };
+    // Enable or disable PMU counting (debug feature only)
+    struct npu_op_pmu_mask_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t enable : 1;  //  Enable or disable PMU mask
+        uint32_t reserved1 : 15;
+#ifdef __cplusplus
+      public:
+        npu_op_pmu_mask_t(uint32_t _enable) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), enable(_enable & ((1U << 1) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_op_pmu_mask_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), enable(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_OP_PMU_MASK);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_op_pmu_mask_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_op_pmu_mask_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_enable() const
+        {
+            return static_cast<uint32_t>(enable);
+        }
+        CONSTEXPR npu_op_pmu_mask_t &set_enable(uint32_t value)
+        {
+            enable = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("enable", std::to_string(enable)));
+        }
+#endif
+#endif
+    };
+    // IFM top pad
+    struct npu_set_ifm_pad_top_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t pad : 7;     //  IFM top pad
+        uint32_t reserved1 : 9;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_pad_top_t(uint32_t _pad) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(_pad & ((1U << 7) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm_pad_top_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_TOP);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_pad_top_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_pad_top_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_pad() const
+        {
+            return static_cast<uint32_t>(pad);
+        }
+        CONSTEXPR npu_set_ifm_pad_top_t &set_pad(uint32_t value)
+        {
+            pad = static_cast<uint8_t>(value) & ((1U << 7) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("pad", std::to_string(pad)));
+        }
+#endif
+#endif
+    };
+    // IFM left pad
+    struct npu_set_ifm_pad_left_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t pad : 7;     //  IFM left pad
+        uint32_t reserved1 : 9;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_pad_left_t(uint32_t _pad) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(_pad & ((1U << 7) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm_pad_left_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_LEFT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_pad_left_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_pad_left_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_pad() const
+        {
+            return static_cast<uint32_t>(pad);
+        }
+        CONSTEXPR npu_set_ifm_pad_left_t &set_pad(uint32_t value)
+        {
+            pad = static_cast<uint8_t>(value) & ((1U << 7) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("pad", std::to_string(pad)));
+        }
+#endif
+#endif
+    };
+    // IFM right pad
+    struct npu_set_ifm_pad_right_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t pad : 8;     //  IFM right pad. Max value is 128
+        uint32_t reserved1 : 8;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_pad_right_t(uint32_t _pad) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(_pad & ((1U << 8) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm_pad_right_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_RIGHT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_pad_right_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_pad_right_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_pad() const
+        {
+            return static_cast<uint32_t>(pad);
+        }
+        CONSTEXPR npu_set_ifm_pad_right_t &set_pad(uint32_t value)
+        {
+            pad = static_cast<uint8_t>(value) & ((1U << 8) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("pad", std::to_string(pad)));
+        }
+#endif
+#endif
+    };
+    // IFM bottom pad
+    struct npu_set_ifm_pad_bottom_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t pad : 8;     //  IFM bottom pad. Max value is 128
+        uint32_t reserved1 : 8;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_pad_bottom_t(uint32_t _pad) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(_pad & ((1U << 8) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm_pad_bottom_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), pad(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PAD_BOTTOM);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_pad_bottom_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_pad_bottom_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_pad() const
+        {
+            return static_cast<uint32_t>(pad);
+        }
+        CONSTEXPR npu_set_ifm_pad_bottom_t &set_pad(uint32_t value)
+        {
+            pad = static_cast<uint8_t>(value) & ((1U << 8) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("pad", std::to_string(pad)));
+        }
+#endif
+#endif
+    };
+    // Number of input channels for convolution
+    struct npu_set_ifm_depth_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t depth_m1 : 16; //  Number of input channels for convolution
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_depth_m1_t(uint32_t _depth_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), depth_m1(_depth_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm_depth_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), depth_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_DEPTH_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_depth_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_depth_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_depth_m1() const
+        {
+            return static_cast<uint32_t>(depth_m1);
+        }
+        CONSTEXPR npu_set_ifm_depth_m1_t &set_depth_m1(uint32_t value)
+        {
+            depth_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("depth_m1", std::to_string(depth_m1)));
+        }
+#endif
+#endif
+    };
+    // IFM Precision
+    struct npu_set_ifm_precision_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;         //  control
+        uint32_t activation_type : 1; //  IFM type
+        uint32_t reserved1 : 1;
+        uint32_t activation_precision : 2; //  IFM precision
+        uint32_t reserved2 : 2;
+        uint32_t activation_format : 2; //  IFM format
+        uint32_t scale_mode : 2;        //  IFM scale mode
+        uint32_t reserved3 : 4;
+        uint32_t round_mode : 2; //  IFM round mode
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_precision_t(NPU_NAMESPACE::activation_type _activation_type,
+                                NPU_NAMESPACE::activation_precision _activation_precision,
+                                NPU_NAMESPACE::activation_format _activation_format,
+                                NPU_NAMESPACE::ifm_scale_mode _scale_mode,
+                                NPU_NAMESPACE::round_mode _round_mode) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            activation_type(static_cast<uint8_t>(_activation_type) & ((1U << 1) - 1)), reserved1(0),
+            activation_precision(static_cast<uint8_t>(_activation_precision) & ((1U << 2) - 1)), reserved2(0),
+            activation_format(static_cast<uint8_t>(_activation_format) & ((1U << 2) - 1)),
+            scale_mode(static_cast<uint8_t>(_scale_mode) & ((1U << 2) - 1)), reserved3(0),
+            round_mode(static_cast<uint8_t>(_round_mode) & ((1U << 2) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm_precision_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), activation_type(0), reserved1(0),
+            activation_precision(0), reserved2(0), activation_format(0), scale_mode(0), reserved3(0), round_mode(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_PRECISION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_precision_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_precision_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_type get_activation_type() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_type>(activation_type);
+        }
+        CONSTEXPR npu_set_ifm_precision_t &set_activation_type(NPU_NAMESPACE::activation_type value)
+        {
+            activation_type = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_precision get_activation_precision() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_precision>(activation_precision);
+        }
+        CONSTEXPR npu_set_ifm_precision_t &set_activation_precision(NPU_NAMESPACE::activation_precision value)
+        {
+            activation_precision = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_format get_activation_format() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_format>(activation_format);
+        }
+        CONSTEXPR npu_set_ifm_precision_t &set_activation_format(NPU_NAMESPACE::activation_format value)
+        {
+            activation_format = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::ifm_scale_mode get_scale_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::ifm_scale_mode>(scale_mode);
+        }
+        CONSTEXPR npu_set_ifm_precision_t &set_scale_mode(NPU_NAMESPACE::ifm_scale_mode value)
+        {
+            scale_mode = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::round_mode get_round_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::round_mode>(round_mode);
+        }
+        CONSTEXPR npu_set_ifm_precision_t &set_round_mode(NPU_NAMESPACE::round_mode value)
+        {
+            round_mode = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_type",
+                (activation_type < (sizeof(activation_type_str) / sizeof(activation_type_str[0])) ?
+                     activation_type_str[activation_type] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_precision",
+                (activation_precision < (sizeof(activation_precision_str) / sizeof(activation_precision_str[0])) ?
+                     activation_precision_str[activation_precision] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_format",
+                (activation_format < (sizeof(activation_format_str) / sizeof(activation_format_str[0])) ?
+                     activation_format_str[activation_format] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "scale_mode",
+                (scale_mode < (sizeof(ifm_scale_mode_str) / sizeof(ifm_scale_mode_str[0])) ?
+                     ifm_scale_mode_str[scale_mode] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "round_mode",
+                (round_mode < (sizeof(round_mode_str) / sizeof(round_mode_str[0])) ? round_mode_str[round_mode] :
+                                                                                     "****")));
+        }
+#endif
+#endif
+    };
+    // IFM upscale mode
+    struct npu_set_ifm_upscale_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t mode : 2;    //  IFM upscale mode
+        uint32_t reserved1 : 14;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_upscale_t(NPU_NAMESPACE::ifm_upscale_mode _mode) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            mode(static_cast<uint8_t>(_mode) & ((1U << 2) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm_upscale_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), mode(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_UPSCALE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_upscale_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_upscale_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::ifm_upscale_mode get_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::ifm_upscale_mode>(mode);
+        }
+        CONSTEXPR npu_set_ifm_upscale_t &set_mode(NPU_NAMESPACE::ifm_upscale_mode value)
+        {
+            mode = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "mode",
+                (mode < (sizeof(ifm_upscale_mode_str) / sizeof(ifm_upscale_mode_str[0])) ? ifm_upscale_mode_str[mode] :
+                                                                                           "****")));
+        }
+#endif
+#endif
+    };
+    // IFM zero point
+    struct npu_set_ifm_zero_point_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;     //  control
+        uint32_t zero_point : 16; //  Zero point offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_zero_point_t(uint32_t _zero_point) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            zero_point(_zero_point & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm_zero_point_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), zero_point(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_ZERO_POINT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_zero_point_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_zero_point_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_zero_point() const
+        {
+            return static_cast<uint32_t>(zero_point);
+        }
+        CONSTEXPR npu_set_ifm_zero_point_t &set_zero_point(uint32_t value)
+        {
+            zero_point = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("zero_point", std::to_string(zero_point)));
+        }
+#endif
+#endif
+    };
+    // IFM Tile 0 and tile 2 width
+    struct npu_set_ifm_width0_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t width_m1 : 16; //  IFM Tile 0 and tile 2 width
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_width0_m1_t(uint32_t _width_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(_width_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm_width0_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_WIDTH0_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_width0_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_width0_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_width_m1() const
+        {
+            return static_cast<uint32_t>(width_m1);
+        }
+        CONSTEXPR npu_set_ifm_width0_m1_t &set_width_m1(uint32_t value)
+        {
+            width_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+        }
+#endif
+#endif
+    };
+    // IFM Tile 0 height
+    struct npu_set_ifm_height0_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  IFM Tile 0 height
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_height0_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm_height0_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT0_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_height0_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_height0_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ifm_height0_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // IFM Tile 1 height
+    struct npu_set_ifm_height1_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  IFM Tile 1 height
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_height1_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm_height1_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_HEIGHT1_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_height1_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_height1_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ifm_height1_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // End of IB0,IB1 buffers
+    struct npu_set_ifm_ib_end_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t ib_end : 6;  //  End of IB0,IB1 buffers in the SHRAM in KB units. Multiple of 2
+        uint32_t reserved1 : 10;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_ib_end_t(uint32_t _ib_end) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_IB_END)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), ib_end(_ib_end & ((1U << 6) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm_ib_end_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_IB_END)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), ib_end(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_IB_END) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_IB_END);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_ib_end_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_ib_end_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_ib_end() const
+        {
+            return static_cast<uint32_t>(ib_end);
+        }
+        CONSTEXPR npu_set_ifm_ib_end_t &set_ib_end(uint32_t value)
+        {
+            ib_end = static_cast<uint8_t>(value) & ((1U << 6) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("ib_end", std::to_string(ib_end)));
+        }
+#endif
+#endif
+    };
+    // Index n for IFM access
+    struct npu_set_ifm_region_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t region : 3;  //  Region number n
+        uint32_t reserved1 : 13;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_region_t(uint32_t _region) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(_region & ((1U << 3) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm_region_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM_REGION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm_region_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm_region_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_region() const
+        {
+            return static_cast<uint32_t>(region);
+        }
+        CONSTEXPR npu_set_ifm_region_t &set_region(uint32_t value)
+        {
+            region = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+        }
+#endif
+#endif
+    };
+    // Output feature map width
+    struct npu_set_ofm_width_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t width_m1 : 16; //  Output feature map width
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_width_m1_t(uint32_t _width_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(_width_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_width_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_width_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_width_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_width_m1() const
+        {
+            return static_cast<uint32_t>(width_m1);
+        }
+        CONSTEXPR npu_set_ofm_width_m1_t &set_width_m1(uint32_t value)
+        {
+            width_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+        }
+#endif
+#endif
+    };
+    // Output feature map height
+    struct npu_set_ofm_height_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  Output feature map height
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_height_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_height_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_height_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_height_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ofm_height_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // Output feature map depth
+    struct npu_set_ofm_depth_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t depth_m1 : 16; //  Output feature map depth
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_depth_m1_t(uint32_t _depth_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), depth_m1(_depth_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_depth_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), depth_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_DEPTH_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_depth_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_depth_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_depth_m1() const
+        {
+            return static_cast<uint32_t>(depth_m1);
+        }
+        CONSTEXPR npu_set_ofm_depth_m1_t &set_depth_m1(uint32_t value)
+        {
+            depth_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("depth_m1", std::to_string(depth_m1)));
+        }
+#endif
+#endif
+    };
+    // OFM Precision
+    struct npu_set_ofm_precision_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;              //  control
+        uint32_t activation_type : 1;      //  OFM type
+        uint32_t activation_precision : 2; //  OFM precision
+        uint32_t reserved1 : 3;
+        uint32_t activation_format : 2; //  OFM format
+        uint32_t scale_mode : 1;        //  OFM scale mode
+        uint32_t reserved2 : 5;
+        uint32_t round_mode : 2; //  OFM round mode
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_precision_t(NPU_NAMESPACE::activation_type _activation_type,
+                                NPU_NAMESPACE::activation_precision _activation_precision,
+                                NPU_NAMESPACE::activation_format _activation_format,
+                                NPU_NAMESPACE::ofm_scale_mode _scale_mode,
+                                NPU_NAMESPACE::round_mode _round_mode) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            activation_type(static_cast<uint8_t>(_activation_type) & ((1U << 1) - 1)),
+            activation_precision(static_cast<uint8_t>(_activation_precision) & ((1U << 2) - 1)), reserved1(0),
+            activation_format(static_cast<uint8_t>(_activation_format) & ((1U << 2) - 1)),
+            scale_mode(static_cast<uint8_t>(_scale_mode) & ((1U << 1) - 1)), reserved2(0),
+            round_mode(static_cast<uint8_t>(_round_mode) & ((1U << 2) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_precision_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), activation_type(0),
+            activation_precision(0), reserved1(0), activation_format(0), scale_mode(0), reserved2(0), round_mode(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_PRECISION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_precision_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_precision_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_type get_activation_type() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_type>(activation_type);
+        }
+        CONSTEXPR npu_set_ofm_precision_t &set_activation_type(NPU_NAMESPACE::activation_type value)
+        {
+            activation_type = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_precision get_activation_precision() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_precision>(activation_precision);
+        }
+        CONSTEXPR npu_set_ofm_precision_t &set_activation_precision(NPU_NAMESPACE::activation_precision value)
+        {
+            activation_precision = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_format get_activation_format() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_format>(activation_format);
+        }
+        CONSTEXPR npu_set_ofm_precision_t &set_activation_format(NPU_NAMESPACE::activation_format value)
+        {
+            activation_format = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::ofm_scale_mode get_scale_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::ofm_scale_mode>(scale_mode);
+        }
+        CONSTEXPR npu_set_ofm_precision_t &set_scale_mode(NPU_NAMESPACE::ofm_scale_mode value)
+        {
+            scale_mode = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::round_mode get_round_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::round_mode>(round_mode);
+        }
+        CONSTEXPR npu_set_ofm_precision_t &set_round_mode(NPU_NAMESPACE::round_mode value)
+        {
+            round_mode = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_type",
+                (activation_type < (sizeof(activation_type_str) / sizeof(activation_type_str[0])) ?
+                     activation_type_str[activation_type] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_precision",
+                (activation_precision < (sizeof(activation_precision_str) / sizeof(activation_precision_str[0])) ?
+                     activation_precision_str[activation_precision] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_format",
+                (activation_format < (sizeof(activation_format_str) / sizeof(activation_format_str[0])) ?
+                     activation_format_str[activation_format] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "scale_mode",
+                (scale_mode < (sizeof(ofm_scale_mode_str) / sizeof(ofm_scale_mode_str[0])) ?
+                     ofm_scale_mode_str[scale_mode] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "round_mode",
+                (round_mode < (sizeof(round_mode_str) / sizeof(round_mode_str[0])) ? round_mode_str[round_mode] :
+                                                                                     "****")));
+        }
+#endif
+#endif
+    };
+    // OFM block width
+    struct npu_set_ofm_blk_width_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;  //  control
+        uint32_t width_m1 : 6; //  OFM block width
+        uint32_t reserved1 : 10;
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_blk_width_m1_t(uint32_t _width_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(_width_m1 & ((1U << 6) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ofm_blk_width_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_WIDTH_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_blk_width_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_blk_width_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_width_m1() const
+        {
+            return static_cast<uint32_t>(width_m1);
+        }
+        CONSTEXPR npu_set_ofm_blk_width_m1_t &set_width_m1(uint32_t value)
+        {
+            width_m1 = static_cast<uint8_t>(value) & ((1U << 6) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+        }
+#endif
+#endif
+    };
+    // OFM block height
+    struct npu_set_ofm_blk_height_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t height_m1 : 5; //  OFM block height
+        uint32_t reserved1 : 11;
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_blk_height_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 5) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ofm_blk_height_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_HEIGHT_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_blk_height_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_blk_height_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ofm_blk_height_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint8_t>(value) & ((1U << 5) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // OFM block depth
+    struct npu_set_ofm_blk_depth_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;  //  control
+        uint32_t depth_m1 : 7; //  OFM block depth
+        uint32_t reserved1 : 9;
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_blk_depth_m1_t(uint32_t _depth_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), depth_m1(_depth_m1 & ((1U << 7) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ofm_blk_depth_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), depth_m1(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_BLK_DEPTH_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_blk_depth_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_blk_depth_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_depth_m1() const
+        {
+            return static_cast<uint32_t>(depth_m1);
+        }
+        CONSTEXPR npu_set_ofm_blk_depth_m1_t &set_depth_m1(uint32_t value)
+        {
+            depth_m1 = static_cast<uint8_t>(value) & ((1U << 7) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("depth_m1", std::to_string(depth_m1)));
+        }
+#endif
+#endif
+    };
+    // OFM zero point
+    struct npu_set_ofm_zero_point_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;     //  control
+        uint32_t zero_point : 16; //  Zero point offset
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_zero_point_t(uint32_t _zero_point) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            zero_point(_zero_point & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_zero_point_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), zero_point(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_ZERO_POINT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_zero_point_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_zero_point_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_zero_point() const
+        {
+            return static_cast<uint32_t>(zero_point);
+        }
+        CONSTEXPR npu_set_ofm_zero_point_t &set_zero_point(uint32_t value)
+        {
+            zero_point = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("zero_point", std::to_string(zero_point)));
+        }
+#endif
+#endif
+    };
+    // OFM Tile 0 and tile 2 width
+    struct npu_set_ofm_width0_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t width_m1 : 16; //  OFM Tile 0 and tile 2 width
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_width0_m1_t(uint32_t _width_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(_width_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_width0_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_WIDTH0_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_width0_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_width0_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_width_m1() const
+        {
+            return static_cast<uint32_t>(width_m1);
+        }
+        CONSTEXPR npu_set_ofm_width0_m1_t &set_width_m1(uint32_t value)
+        {
+            width_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+        }
+#endif
+#endif
+    };
+    // OFM Tile 0 height
+    struct npu_set_ofm_height0_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  OFM Tile 0 height
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_height0_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_height0_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT0_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_height0_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_height0_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ofm_height0_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // OFM Tile 1 height
+    struct npu_set_ofm_height1_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  OFM Tile 1 height
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_height1_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ofm_height1_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_HEIGHT1_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_height1_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_height1_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ofm_height1_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // Index n for OFM access
+    struct npu_set_ofm_region_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t region : 3;  //  Index n for OFM access
+        uint32_t reserved1 : 13;
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_region_t(uint32_t _region) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(_region & ((1U << 3) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ofm_region_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_OFM_REGION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_region_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_region_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_region() const
+        {
+            return static_cast<uint32_t>(region);
+        }
+        CONSTEXPR npu_set_ofm_region_t &set_region(uint32_t value)
+        {
+            region = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+        }
+#endif
+#endif
+    };
+    // Kernel width
+    struct npu_set_kernel_width_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t width_m1 : 16; //  Kernel width
+#ifdef __cplusplus
+      public:
+        npu_set_kernel_width_m1_t(uint32_t _width_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(_width_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_kernel_width_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_WIDTH_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_kernel_width_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_kernel_width_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_width_m1() const
+        {
+            return static_cast<uint32_t>(width_m1);
+        }
+        CONSTEXPR npu_set_kernel_width_m1_t &set_width_m1(uint32_t value)
+        {
+            width_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+        }
+#endif
+#endif
+    };
+    // Kernel height
+    struct npu_set_kernel_height_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  Kernel height
+#ifdef __cplusplus
+      public:
+        npu_set_kernel_height_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_kernel_height_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_HEIGHT_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_kernel_height_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_kernel_height_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_kernel_height_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // Kernel stride
+    struct npu_set_kernel_stride_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;       //  control
+        uint32_t stride_x_lsb : 1;  //  Stride x LSB. (kernel_x_stride - 1)[0]
+        uint32_t stride_y_lsb : 1;  //  Stride y LSB. (kernel_y_stride - 1)[0]
+        uint32_t weight_order : 1;  //  Weight ordering mode
+        uint32_t dilation_x : 1;    //  Kernel x dilation
+        uint32_t dilation_y : 1;    //  Kernel y dilation
+        uint32_t decomposition : 1; //  Kernel decomposition
+        uint32_t stride_x_msb : 1;  //  Stride x MSB. (kernel_x_stride - 1) >> 1
+        uint32_t reserved1 : 2;
+        uint32_t stride_y_msb : 1; //  Stride y MSB. (kernel_y_stride - 1) >> 1
+        uint32_t reserved2 : 6;
+#ifdef __cplusplus
+      public:
+        npu_set_kernel_stride_t(uint32_t _stride_x_lsb,
+                                uint32_t _stride_y_lsb,
+                                NPU_NAMESPACE::weight_order _weight_order,
+                                NPU_NAMESPACE::kernel_dilation _dilation_x,
+                                NPU_NAMESPACE::kernel_dilation _dilation_y,
+                                NPU_NAMESPACE::kernel_decomposition _decomposition,
+                                uint32_t _stride_x_msb,
+                                uint32_t _stride_y_msb) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            stride_x_lsb(_stride_x_lsb & ((1U << 1) - 1)), stride_y_lsb(_stride_y_lsb & ((1U << 1) - 1)),
+            weight_order(static_cast<uint8_t>(_weight_order) & ((1U << 1) - 1)),
+            dilation_x(static_cast<uint8_t>(_dilation_x) & ((1U << 1) - 1)),
+            dilation_y(static_cast<uint8_t>(_dilation_y) & ((1U << 1) - 1)),
+            decomposition(static_cast<uint8_t>(_decomposition) & ((1U << 1) - 1)),
+            stride_x_msb(_stride_x_msb & ((1U << 1) - 1)), reserved1(0), stride_y_msb(_stride_y_msb & ((1U << 1) - 1)),
+            reserved2(0)
+        {
+        }
+        CONSTEXPR npu_set_kernel_stride_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), stride_x_lsb(0), stride_y_lsb(0),
+            weight_order(0), dilation_x(0), dilation_y(0), decomposition(0), stride_x_msb(0), reserved1(0),
+            stride_y_msb(0), reserved2(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_KERNEL_STRIDE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_stride_x_lsb() const
+        {
+            return static_cast<uint32_t>(stride_x_lsb);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_stride_x_lsb(uint32_t value)
+        {
+            stride_x_lsb = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_stride_y_lsb() const
+        {
+            return static_cast<uint32_t>(stride_y_lsb);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_stride_y_lsb(uint32_t value)
+        {
+            stride_y_lsb = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::weight_order get_weight_order() const
+        {
+            return static_cast<NPU_NAMESPACE::weight_order>(weight_order);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_weight_order(NPU_NAMESPACE::weight_order value)
+        {
+            weight_order = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::kernel_dilation get_dilation_x() const
+        {
+            return static_cast<NPU_NAMESPACE::kernel_dilation>(dilation_x);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_dilation_x(NPU_NAMESPACE::kernel_dilation value)
+        {
+            dilation_x = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::kernel_dilation get_dilation_y() const
+        {
+            return static_cast<NPU_NAMESPACE::kernel_dilation>(dilation_y);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_dilation_y(NPU_NAMESPACE::kernel_dilation value)
+        {
+            dilation_y = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::kernel_decomposition get_decomposition() const
+        {
+            return static_cast<NPU_NAMESPACE::kernel_decomposition>(decomposition);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_decomposition(NPU_NAMESPACE::kernel_decomposition value)
+        {
+            decomposition = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_stride_x_msb() const
+        {
+            return static_cast<uint32_t>(stride_x_msb);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_stride_x_msb(uint32_t value)
+        {
+            stride_x_msb = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_stride_y_msb() const
+        {
+            return static_cast<uint32_t>(stride_y_msb);
+        }
+        CONSTEXPR npu_set_kernel_stride_t &set_stride_y_msb(uint32_t value)
+        {
+            stride_y_msb = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("stride_x_lsb", std::to_string(stride_x_lsb)));
+            fields.push_back(std::make_pair<std::string, std::string>("stride_y_lsb", std::to_string(stride_y_lsb)));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "weight_order",
+                (weight_order < (sizeof(weight_order_str) / sizeof(weight_order_str[0])) ?
+                     weight_order_str[weight_order] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "dilation_x",
+                (dilation_x < (sizeof(kernel_dilation_str) / sizeof(kernel_dilation_str[0])) ?
+                     kernel_dilation_str[dilation_x] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "dilation_y",
+                (dilation_y < (sizeof(kernel_dilation_str) / sizeof(kernel_dilation_str[0])) ?
+                     kernel_dilation_str[dilation_y] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "decomposition",
+                (decomposition < (sizeof(kernel_decomposition_str) / sizeof(kernel_decomposition_str[0])) ?
+                     kernel_decomposition_str[decomposition] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>("stride_x_msb", std::to_string(stride_x_msb)));
+            fields.push_back(std::make_pair<std::string, std::string>("stride_y_msb", std::to_string(stride_y_msb)));
+        }
+#endif
+#endif
+    };
+    // Multi-core parallel mode
+    struct npu_set_parallel_mode_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;       //  control
+        uint32_t parallel_mode : 1; //  Multi-core parallel mode
+        uint32_t reserved1 : 15;
+#ifdef __cplusplus
+      public:
+        npu_set_parallel_mode_t(NPU_NAMESPACE::parallel_mode _parallel_mode) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_PARALLEL_MODE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            parallel_mode(static_cast<uint8_t>(_parallel_mode) & ((1U << 1) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_parallel_mode_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_PARALLEL_MODE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), parallel_mode(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_PARALLEL_MODE) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_PARALLEL_MODE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_parallel_mode_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_parallel_mode_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::parallel_mode get_parallel_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::parallel_mode>(parallel_mode);
+        }
+        CONSTEXPR npu_set_parallel_mode_t &set_parallel_mode(NPU_NAMESPACE::parallel_mode value)
+        {
+            parallel_mode = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "parallel_mode",
+                (parallel_mode < (sizeof(parallel_mode_str) / sizeof(parallel_mode_str[0])) ?
+                     parallel_mode_str[parallel_mode] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // Accumulator format
+    struct npu_set_acc_format_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t acc_format : 2; //  Accumulator format
+        uint32_t reserved1 : 14;
+#ifdef __cplusplus
+      public:
+        npu_set_acc_format_t(NPU_NAMESPACE::acc_format _acc_format) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            acc_format(static_cast<uint8_t>(_acc_format) & ((1U << 2) - 1)), reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_acc_format_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), acc_format(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACC_FORMAT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_acc_format_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_acc_format_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::acc_format get_acc_format() const
+        {
+            return static_cast<NPU_NAMESPACE::acc_format>(acc_format);
+        }
+        CONSTEXPR npu_set_acc_format_t &set_acc_format(NPU_NAMESPACE::acc_format value)
+        {
+            acc_format = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "acc_format",
+                (acc_format < (sizeof(acc_format_str) / sizeof(acc_format_str[0])) ? acc_format_str[acc_format] :
+                                                                                     "****")));
+        }
+#endif
+#endif
+    };
+    // Activation function and clip range
+    struct npu_set_activation_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;             //  control
+        uint32_t activation_function : 5; //  Activation function (before table lookup)
+        uint32_t reserved1 : 7;
+        uint32_t activation_clip_range : 3; //  Activation clip range. This must be set to 0 if table lookup is not used
+        uint32_t reserved2 : 1;
+#ifdef __cplusplus
+      public:
+        npu_set_activation_t(NPU_NAMESPACE::activation_function _activation_function,
+                             NPU_NAMESPACE::activation_clip_range _activation_clip_range) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            activation_function(static_cast<uint8_t>(_activation_function) & ((1U << 5) - 1)), reserved1(0),
+            activation_clip_range(static_cast<uint8_t>(_activation_clip_range) & ((1U << 3) - 1)), reserved2(0)
+        {
+        }
+        CONSTEXPR npu_set_activation_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), activation_function(0), reserved1(0),
+            activation_clip_range(0), reserved2(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_activation_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_activation_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_function get_activation_function() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_function>(activation_function);
+        }
+        CONSTEXPR npu_set_activation_t &set_activation_function(NPU_NAMESPACE::activation_function value)
+        {
+            activation_function = static_cast<uint8_t>(value) & ((1U << 5) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_clip_range get_activation_clip_range() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_clip_range>(activation_clip_range);
+        }
+        CONSTEXPR npu_set_activation_t &set_activation_clip_range(NPU_NAMESPACE::activation_clip_range value)
+        {
+            activation_clip_range = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_function",
+                (activation_function < (sizeof(activation_function_str) / sizeof(activation_function_str[0])) ?
+                     activation_function_str[activation_function] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_clip_range",
+                (activation_clip_range < (sizeof(activation_clip_range_str) / sizeof(activation_clip_range_str[0])) ?
+                     activation_clip_range_str[activation_clip_range] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // Lower bound clip
+    struct npu_set_activation_min_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;        //  control
+        uint32_t clip_boundary : 16; //  Clip boundary for OFM activations
+#ifdef __cplusplus
+      public:
+        npu_set_activation_min_t(uint32_t _clip_boundary) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            clip_boundary(_clip_boundary & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_activation_min_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), clip_boundary(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MIN);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_activation_min_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_activation_min_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_clip_boundary() const
+        {
+            return static_cast<uint32_t>(clip_boundary);
+        }
+        CONSTEXPR npu_set_activation_min_t &set_clip_boundary(uint32_t value)
+        {
+            clip_boundary = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("clip_boundary", std::to_string(clip_boundary)));
+        }
+#endif
+#endif
+    };
+    // Upper bound clip
+    struct npu_set_activation_max_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;        //  control
+        uint32_t clip_boundary : 16; //  Clip boundary for OFM activations
+#ifdef __cplusplus
+      public:
+        npu_set_activation_max_t(uint32_t _clip_boundary) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            clip_boundary(_clip_boundary & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_activation_max_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), clip_boundary(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_ACTIVATION_MAX);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_activation_max_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_activation_max_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_clip_boundary() const
+        {
+            return static_cast<uint32_t>(clip_boundary);
+        }
+        CONSTEXPR npu_set_activation_max_t &set_clip_boundary(uint32_t value)
+        {
+            clip_boundary = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("clip_boundary", std::to_string(clip_boundary)));
+        }
+#endif
+#endif
+    };
+    // Index n for weight stream access
+    struct npu_set_weight_region_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t region : 3;  //  Index n for weight stream access
+        uint32_t reserved1 : 13;
+#ifdef __cplusplus
+      public:
+        npu_set_weight_region_t(uint32_t _region) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(_region & ((1U << 3) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_weight_region_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_WEIGHT_REGION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_weight_region_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_weight_region_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_region() const
+        {
+            return static_cast<uint32_t>(region);
+        }
+        CONSTEXPR npu_set_weight_region_t &set_region(uint32_t value)
+        {
+            region = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+        }
+#endif
+#endif
+    };
+    // Index n for scale stream access
+    struct npu_set_scale_region_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t region : 3;  //  Index n for scale stream access
+        uint32_t reserved1 : 13;
+#ifdef __cplusplus
+      public:
+        npu_set_scale_region_t(uint32_t _region) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(_region & ((1U << 3) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_scale_region_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_SCALE_REGION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_scale_region_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_scale_region_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_region() const
+        {
+            return static_cast<uint32_t>(region);
+        }
+        CONSTEXPR npu_set_scale_region_t &set_region(uint32_t value)
+        {
+            region = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+        }
+#endif
+#endif
+    };
+    // Start of ACC0,ACC1 buffers
+    struct npu_set_ab_start_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;  //  control
+        uint32_t ab_start : 6; //  Start of ACC0,ACC1 buffers in the SHRAM in KB units. Multiple of 2
+        uint32_t reserved1 : 10;
+#ifdef __cplusplus
+      public:
+        npu_set_ab_start_t(uint32_t _ab_start) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_AB_START)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), ab_start(_ab_start & ((1U << 6) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ab_start_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_AB_START)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), ab_start(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_AB_START) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_AB_START);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ab_start_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ab_start_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_ab_start() const
+        {
+            return static_cast<uint32_t>(ab_start);
+        }
+        CONSTEXPR npu_set_ab_start_t &set_ab_start(uint32_t value)
+        {
+            ab_start = static_cast<uint8_t>(value) & ((1U << 6) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("ab_start", std::to_string(ab_start)));
+        }
+#endif
+#endif
+    };
+    // Block number of blocks dependency
+    struct npu_set_blockdep_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;  //  control
+        uint32_t blockdep : 2; //  Block number of blocks dependency between kernel operations
+        uint32_t reserved1 : 14;
+#ifdef __cplusplus
+      public:
+        npu_set_blockdep_t(uint32_t _blockdep) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), blockdep(_blockdep & ((1U << 2) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_blockdep_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), blockdep(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_BLOCKDEP);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_blockdep_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_blockdep_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_blockdep() const
+        {
+            return static_cast<uint32_t>(blockdep);
+        }
+        CONSTEXPR npu_set_blockdep_t &set_blockdep(uint32_t value)
+        {
+            blockdep = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("blockdep", std::to_string(blockdep)));
+        }
+#endif
+#endif
+    };
+    // DMA0 source region
+    struct npu_set_dma0_src_region_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t region : 3;  //  Region number
+        uint32_t reserved1 : 5;
+        uint32_t region_mode : 1; //  Region mode
+        uint32_t stride_mode : 2; //  Stride mode
+        uint32_t reserved2 : 5;
+#ifdef __cplusplus
+      public:
+        npu_set_dma0_src_region_t(uint32_t _region,
+                                  NPU_NAMESPACE::dma_region_mode _region_mode,
+                                  NPU_NAMESPACE::dma_stride_mode _stride_mode) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            region(_region & ((1U << 3) - 1)), reserved1(0),
+            region_mode(static_cast<uint8_t>(_region_mode) & ((1U << 1) - 1)),
+            stride_mode(static_cast<uint8_t>(_stride_mode) & ((1U << 2) - 1)), reserved2(0)
+        {
+        }
+        CONSTEXPR npu_set_dma0_src_region_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(0), reserved1(0), region_mode(0),
+            stride_mode(0), reserved2(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SRC_REGION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_dma0_src_region_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_dma0_src_region_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_region() const
+        {
+            return static_cast<uint32_t>(region);
+        }
+        CONSTEXPR npu_set_dma0_src_region_t &set_region(uint32_t value)
+        {
+            region = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::dma_region_mode get_region_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::dma_region_mode>(region_mode);
+        }
+        CONSTEXPR npu_set_dma0_src_region_t &set_region_mode(NPU_NAMESPACE::dma_region_mode value)
+        {
+            region_mode = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::dma_stride_mode get_stride_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::dma_stride_mode>(stride_mode);
+        }
+        CONSTEXPR npu_set_dma0_src_region_t &set_stride_mode(NPU_NAMESPACE::dma_stride_mode value)
+        {
+            stride_mode = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "region_mode",
+                (region_mode < (sizeof(dma_region_mode_str) / sizeof(dma_region_mode_str[0])) ?
+                     dma_region_mode_str[region_mode] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "stride_mode",
+                (stride_mode < (sizeof(dma_stride_mode_str) / sizeof(dma_stride_mode_str[0])) ?
+                     dma_stride_mode_str[stride_mode] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // DMA0 destination region
+    struct npu_set_dma0_dst_region_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t region : 3; //  Region number if region_mode is region_mode_external. Else core mask to write to (bit k
+                             //  set for core k=0,1)
+        uint32_t reserved1 : 5;
+        uint32_t region_mode : 1; //  Region mode
+        uint32_t stride_mode : 2; //  Stride mode
+        uint32_t reserved2 : 5;
+#ifdef __cplusplus
+      public:
+        npu_set_dma0_dst_region_t(uint32_t _region,
+                                  NPU_NAMESPACE::dma_region_mode _region_mode,
+                                  NPU_NAMESPACE::dma_stride_mode _stride_mode) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            region(_region & ((1U << 3) - 1)), reserved1(0),
+            region_mode(static_cast<uint8_t>(_region_mode) & ((1U << 1) - 1)),
+            stride_mode(static_cast<uint8_t>(_stride_mode) & ((1U << 2) - 1)), reserved2(0)
+        {
+        }
+        CONSTEXPR npu_set_dma0_dst_region_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(0), reserved1(0), region_mode(0),
+            stride_mode(0), reserved2(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_DST_REGION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_dma0_dst_region_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_dma0_dst_region_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_region() const
+        {
+            return static_cast<uint32_t>(region);
+        }
+        CONSTEXPR npu_set_dma0_dst_region_t &set_region(uint32_t value)
+        {
+            region = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::dma_region_mode get_region_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::dma_region_mode>(region_mode);
+        }
+        CONSTEXPR npu_set_dma0_dst_region_t &set_region_mode(NPU_NAMESPACE::dma_region_mode value)
+        {
+            region_mode = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::dma_stride_mode get_stride_mode() const
+        {
+            return static_cast<NPU_NAMESPACE::dma_stride_mode>(stride_mode);
+        }
+        CONSTEXPR npu_set_dma0_dst_region_t &set_stride_mode(NPU_NAMESPACE::dma_stride_mode value)
+        {
+            stride_mode = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "region_mode",
+                (region_mode < (sizeof(dma_region_mode_str) / sizeof(dma_region_mode_str[0])) ?
+                     dma_region_mode_str[region_mode] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "stride_mode",
+                (stride_mode < (sizeof(dma_stride_mode_str) / sizeof(dma_stride_mode_str[0])) ?
+                     dma_stride_mode_str[stride_mode] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // Size of second dimension for 2D/3D transfers
+    struct npu_set_dma0_size0_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t size : 16;   //  Size of second dimension for 2D/3D transfers
+#ifdef __cplusplus
+      public:
+        npu_set_dma0_size0_t(uint32_t _size) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), size(_size & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_dma0_size0_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), size(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE0);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_dma0_size0_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_dma0_size0_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_size() const
+        {
+            return static_cast<uint32_t>(size);
+        }
+        CONSTEXPR npu_set_dma0_size0_t &set_size(uint32_t value)
+        {
+            size = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("size", std::to_string(size)));
+        }
+#endif
+#endif
+    };
+    // Size of third dimension for 3D transfers
+    struct npu_set_dma0_size1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t size : 16;   //  Size of third dimension for 3D transfers
+#ifdef __cplusplus
+      public:
+        npu_set_dma0_size1_t(uint32_t _size) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), size(_size & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_dma0_size1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), size(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_DMA0_SIZE1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_dma0_size1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_dma0_size1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_size() const
+        {
+            return static_cast<uint32_t>(size);
+        }
+        CONSTEXPR npu_set_dma0_size1_t &set_size(uint32_t value)
+        {
+            size = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("size", std::to_string(size)));
+        }
+#endif
+#endif
+    };
+    // IFM2 broadcast configuration
+    struct npu_set_ifm2_broadcast_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t
+            broadcast_h : 1; //  Broadcast H dimension (if set then any accesses to IFM2 sets y=0 and IFM2 height=1)
+        uint32_t broadcast_w : 1; //  Broadcast W dimension (if set then any accesses to IFM2 sets x=0 and IFM2 width=1)
+        uint32_t broadcast_c : 1; //  Broadcast C dimension (if set then any accesses to IFM2 sets c=0 and IFM2 depth=1)
+        uint32_t reserved1 : 3;
+        uint32_t operand_order : 1;      //  Operand order
+        uint32_t broadcast_constant : 1; //  Broadcast constant given by NPU_SET_IFM2_SCALAR and so ignore BH, BW and BC
+        uint32_t reserved2 : 8;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_broadcast_t(NPU_NAMESPACE::broadcast_mode _broadcast_h,
+                                 NPU_NAMESPACE::broadcast_mode _broadcast_w,
+                                 NPU_NAMESPACE::broadcast_mode _broadcast_c,
+                                 NPU_NAMESPACE::ifm2_operand_order _operand_order,
+                                 NPU_NAMESPACE::broadcast_mode _broadcast_constant) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            broadcast_h(static_cast<uint8_t>(_broadcast_h) & ((1U << 1) - 1)),
+            broadcast_w(static_cast<uint8_t>(_broadcast_w) & ((1U << 1) - 1)),
+            broadcast_c(static_cast<uint8_t>(_broadcast_c) & ((1U << 1) - 1)), reserved1(0),
+            operand_order(static_cast<uint8_t>(_operand_order) & ((1U << 1) - 1)),
+            broadcast_constant(static_cast<uint8_t>(_broadcast_constant) & ((1U << 1) - 1)), reserved2(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), broadcast_h(0), broadcast_w(0),
+            broadcast_c(0), reserved1(0), operand_order(0), broadcast_constant(0), reserved2(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_BROADCAST);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::broadcast_mode get_broadcast_h() const
+        {
+            return static_cast<NPU_NAMESPACE::broadcast_mode>(broadcast_h);
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_h(NPU_NAMESPACE::broadcast_mode value)
+        {
+            broadcast_h = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::broadcast_mode get_broadcast_w() const
+        {
+            return static_cast<NPU_NAMESPACE::broadcast_mode>(broadcast_w);
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_w(NPU_NAMESPACE::broadcast_mode value)
+        {
+            broadcast_w = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::broadcast_mode get_broadcast_c() const
+        {
+            return static_cast<NPU_NAMESPACE::broadcast_mode>(broadcast_c);
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_c(NPU_NAMESPACE::broadcast_mode value)
+        {
+            broadcast_c = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::ifm2_operand_order get_operand_order() const
+        {
+            return static_cast<NPU_NAMESPACE::ifm2_operand_order>(operand_order);
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t &set_operand_order(NPU_NAMESPACE::ifm2_operand_order value)
+        {
+            operand_order = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::broadcast_mode get_broadcast_constant() const
+        {
+            return static_cast<NPU_NAMESPACE::broadcast_mode>(broadcast_constant);
+        }
+        CONSTEXPR npu_set_ifm2_broadcast_t &set_broadcast_constant(NPU_NAMESPACE::broadcast_mode value)
+        {
+            broadcast_constant = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "broadcast_h",
+                (broadcast_h < (sizeof(broadcast_mode_str) / sizeof(broadcast_mode_str[0])) ?
+                     broadcast_mode_str[broadcast_h] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "broadcast_w",
+                (broadcast_w < (sizeof(broadcast_mode_str) / sizeof(broadcast_mode_str[0])) ?
+                     broadcast_mode_str[broadcast_w] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "broadcast_c",
+                (broadcast_c < (sizeof(broadcast_mode_str) / sizeof(broadcast_mode_str[0])) ?
+                     broadcast_mode_str[broadcast_c] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "operand_order",
+                (operand_order < (sizeof(ifm2_operand_order_str) / sizeof(ifm2_operand_order_str[0])) ?
+                     ifm2_operand_order_str[operand_order] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "broadcast_constant",
+                (broadcast_constant < (sizeof(broadcast_mode_str) / sizeof(broadcast_mode_str[0])) ?
+                     broadcast_mode_str[broadcast_constant] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // IFM2 scalar value
+    struct npu_set_ifm2_scalar_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t scalar : 16; //  int16 or uint16 depending on ifm2_precision.type
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_scalar_t(uint32_t _scalar) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_SCALAR)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), scalar(_scalar & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_scalar_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_SCALAR)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), scalar(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_SCALAR) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_SCALAR);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_scalar_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_scalar_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_scalar() const
+        {
+            return static_cast<uint32_t>(scalar);
+        }
+        CONSTEXPR npu_set_ifm2_scalar_t &set_scalar(uint32_t value)
+        {
+            scalar = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("scalar", std::to_string(scalar)));
+        }
+#endif
+#endif
+    };
+    // IFM2 Precision
+    struct npu_set_ifm2_precision_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;         //  control
+        uint32_t activation_type : 1; //  IFM type - MUST MATCH IFM
+        uint32_t reserved1 : 1;
+        uint32_t activation_precision : 2; //  IFM precision - MUST MATCH IFM
+        uint32_t reserved2 : 2;
+        uint32_t activation_format : 2; //  IFM format
+        uint32_t reserved3 : 8;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_precision_t(NPU_NAMESPACE::activation_type _activation_type,
+                                 NPU_NAMESPACE::activation_precision _activation_precision,
+                                 NPU_NAMESPACE::activation_format _activation_format) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION)),
+            reserved0(0), control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            activation_type(static_cast<uint8_t>(_activation_type) & ((1U << 1) - 1)), reserved1(0),
+            activation_precision(static_cast<uint8_t>(_activation_precision) & ((1U << 2) - 1)), reserved2(0),
+            activation_format(static_cast<uint8_t>(_activation_format) & ((1U << 2) - 1)), reserved3(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_precision_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), activation_type(0), reserved1(0),
+            activation_precision(0), reserved2(0), activation_format(0), reserved3(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_PRECISION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_precision_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_precision_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_type get_activation_type() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_type>(activation_type);
+        }
+        CONSTEXPR npu_set_ifm2_precision_t &set_activation_type(NPU_NAMESPACE::activation_type value)
+        {
+            activation_type = static_cast<uint8_t>(value) & ((1U << 1) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_precision get_activation_precision() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_precision>(activation_precision);
+        }
+        CONSTEXPR npu_set_ifm2_precision_t &set_activation_precision(NPU_NAMESPACE::activation_precision value)
+        {
+            activation_precision = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::activation_format get_activation_format() const
+        {
+            return static_cast<NPU_NAMESPACE::activation_format>(activation_format);
+        }
+        CONSTEXPR npu_set_ifm2_precision_t &set_activation_format(NPU_NAMESPACE::activation_format value)
+        {
+            activation_format = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_type",
+                (activation_type < (sizeof(activation_type_str) / sizeof(activation_type_str[0])) ?
+                     activation_type_str[activation_type] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_precision",
+                (activation_precision < (sizeof(activation_precision_str) / sizeof(activation_precision_str[0])) ?
+                     activation_precision_str[activation_precision] :
+                     "****")));
+            fields.push_back(std::make_pair<std::string, std::string>(
+                "activation_format",
+                (activation_format < (sizeof(activation_format_str) / sizeof(activation_format_str[0])) ?
+                     activation_format_str[activation_format] :
+                     "****")));
+        }
+#endif
+#endif
+    };
+    // IFM2 zero point
+    struct npu_set_ifm2_zero_point_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;     //  control
+        uint32_t zero_point : 16; //  Zero point offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_zero_point_t(uint32_t _zero_point) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)),
+            zero_point(_zero_point & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_zero_point_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), zero_point(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_ZERO_POINT);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_zero_point_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_zero_point_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_zero_point() const
+        {
+            return static_cast<uint32_t>(zero_point);
+        }
+        CONSTEXPR npu_set_ifm2_zero_point_t &set_zero_point(uint32_t value)
+        {
+            zero_point = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("zero_point", std::to_string(zero_point)));
+        }
+#endif
+#endif
+    };
+    // IFM2 Tile 0 and tile 2 width
+    struct npu_set_ifm2_width0_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;   //  control
+        uint32_t width_m1 : 16; //  IFM2 Tile 0 and tile 2 width
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_width0_m1_t(uint32_t _width_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(_width_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_width0_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), width_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_WIDTH0_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_width0_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_width0_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_width_m1() const
+        {
+            return static_cast<uint32_t>(width_m1);
+        }
+        CONSTEXPR npu_set_ifm2_width0_m1_t &set_width_m1(uint32_t value)
+        {
+            width_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("width_m1", std::to_string(width_m1)));
+        }
+#endif
+#endif
+    };
+    // IFM2 Tile 0 height
+    struct npu_set_ifm2_height0_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  IFM2 Tile 0 height
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_height0_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_height0_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT0_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_height0_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_height0_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ifm2_height0_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // IFM2 Tile 1 height
+    struct npu_set_ifm2_height1_m1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;    //  control
+        uint32_t height_m1 : 16; //  IFM2 Tile 1 height
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_height1_m1_t(uint32_t _height_m1) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(_height_m1 & ((1U << 16) - 1))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_height1_m1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), height_m1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_HEIGHT1_M1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_height1_m1_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_height1_m1_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_height_m1() const
+        {
+            return static_cast<uint32_t>(height_m1);
+        }
+        CONSTEXPR npu_set_ifm2_height1_m1_t &set_height_m1(uint32_t value)
+        {
+            height_m1 = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("height_m1", std::to_string(height_m1)));
+        }
+#endif
+#endif
+    };
+    // Start of IB0,IB1 buffers for IFM2
+    struct npu_set_ifm2_ib_start_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2;  //  control
+        uint32_t ib_start : 6; //  Start of IB0,IB1 buffers for IFM2 in the SHRAM in KB units. Multiple of 2
+        uint32_t reserved1 : 10;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_ib_start_t(uint32_t _ib_start) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_IB_START)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), ib_start(_ib_start & ((1U << 6) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_ib_start_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_IB_START)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), ib_start(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_IB_START) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_IB_START);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_ib_start_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_ib_start_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_ib_start() const
+        {
+            return static_cast<uint32_t>(ib_start);
+        }
+        CONSTEXPR npu_set_ifm2_ib_start_t &set_ib_start(uint32_t value)
+        {
+            ib_start = static_cast<uint8_t>(value) & ((1U << 6) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("ib_start", std::to_string(ib_start)));
+        }
+#endif
+#endif
+    };
+    // Index n for IFM2 access
+    struct npu_set_ifm2_region_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t region : 3;  //  Index n for IFM2 access
+        uint32_t reserved1 : 13;
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_region_t(uint32_t _region) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(_region & ((1U << 3) - 1)),
+            reserved1(0)
+        {
+        }
+        CONSTEXPR npu_set_ifm2_region_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL)), region(0), reserved1(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION) &&
+                   control == static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd0_opcode::NPU_SET_IFM2_REGION);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD0_CTRL);
+        }
+        operator uint32_t()
+        {
+            uint32_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd0_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd0_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ifm2_region_t &set_opcode(NPU_NAMESPACE::cmd0_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ifm2_region_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_region() const
+        {
+            return static_cast<uint32_t>(region);
+        }
+        CONSTEXPR npu_set_ifm2_region_t &set_region(uint32_t value)
+        {
+            region = static_cast<uint8_t>(value) & ((1U << 3) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("region", std::to_string(region)));
+        }
+#endif
+#endif
+    };
+    // IFM Tile 0 address
+    struct npu_set_ifm_base0_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_base0_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ifm_base0_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE0);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ifm_base0_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM Tile 1 address
+    struct npu_set_ifm_base1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_base1_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ifm_base1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ifm_base1_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM Tile 2 address
+    struct npu_set_ifm_base2_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_base2_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ifm_base2_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE2);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ifm_base2_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM Tile 3 address
+    struct npu_set_ifm_base3_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_base3_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ifm_base3_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_BASE3);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ifm_base3_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM byte stride between horizontal values
+    struct npu_set_ifm_stride_x_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_stride_x_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ifm_stride_x_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_X);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ifm_stride_x_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM byte stride between vertical values
+    struct npu_set_ifm_stride_y_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_stride_y_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ifm_stride_y_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_Y);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ifm_stride_y_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM byte stride between channel blocks (of 16 bytes each block)
+    struct npu_set_ifm_stride_c_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm_stride_c_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ifm_stride_c_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM_STRIDE_C);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ifm_stride_c_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // OFM Tile 0 address
+    struct npu_set_ofm_base0_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_base0_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ofm_base0_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE0);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ofm_base0_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // OFM Tile 1 address
+    struct npu_set_ofm_base1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_base1_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ofm_base1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ofm_base1_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // OFM Tile 2 address
+    struct npu_set_ofm_base2_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_base2_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ofm_base2_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE2);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ofm_base2_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // OFM Tile 3 address
+    struct npu_set_ofm_base3_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_base3_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ofm_base3_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_BASE3);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ofm_base3_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // OFM byte stride between horizontal values
+    struct npu_set_ofm_stride_x_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_stride_x_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ofm_stride_x_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_X);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ofm_stride_x_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // OFM byte stride between vertical values
+    struct npu_set_ofm_stride_y_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_stride_y_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ofm_stride_y_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_Y);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ofm_stride_y_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // OFM byte stride between channel blocks (of 16 bytes each block)
+    struct npu_set_ofm_stride_c_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_stride_c_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ofm_stride_c_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_STRIDE_C);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ofm_stride_c_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // Weight stream byte offset in WEIGHT_REGION
+    struct npu_set_weight_base_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_weight_base_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_weight_base_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_BASE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_weight_base_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // Weight stream byte length
+    struct npu_set_weight_length_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t length : 32; //  Weight stream byte length
+#ifdef __cplusplus
+      public:
+        npu_set_weight_length_t(uint32_t _length) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), length(_length)
+        {
+        }
+        CONSTEXPR npu_set_weight_length_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), length(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT_LENGTH);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_weight_length_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_weight_length_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_length() const
+        {
+            return static_cast<uint32_t>(length);
+        }
+        CONSTEXPR npu_set_weight_length_t &set_length(uint32_t value)
+        {
+            length = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("length", std::to_string(length)));
+        }
+#endif
+#endif
+    };
+    // Scale and bias stream input byte offset from SCALE_REGION
+    struct npu_set_scale_base_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_scale_base_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_scale_base_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_BASE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_scale_base_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // Scale and bias stream input byte length
+    struct npu_set_scale_length_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t length : 20; //  Scale and bias stream byte length
+        uint32_t reserved2 : 12;
+#ifdef __cplusplus
+      public:
+        npu_set_scale_length_t(uint32_t _length) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0),
+            length(_length & ((1U << 20) - 1)), reserved2(0)
+        {
+        }
+        CONSTEXPR npu_set_scale_length_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), length(0), reserved2(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE_LENGTH);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_scale_length_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_scale_length_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_length() const
+        {
+            return static_cast<uint32_t>(length);
+        }
+        CONSTEXPR npu_set_scale_length_t &set_length(uint32_t value)
+        {
+            length = value & ((1U << 20) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("length", std::to_string(length)));
+        }
+#endif
+#endif
+    };
+    // OFM scale
+    struct npu_set_ofm_scale_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t shift : 6;   //  Shift
+        uint32_t reserved1 : 10;
+        uint32_t scale : 32; //  Scale. Not applied for 32-bit operations
+#ifdef __cplusplus
+      public:
+        npu_set_ofm_scale_t(uint32_t _shift, uint32_t _scale) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), shift(_shift & ((1U << 6) - 1)),
+            reserved1(0), scale(_scale)
+        {
+        }
+        CONSTEXPR npu_set_ofm_scale_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), shift(0), reserved1(0), scale(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OFM_SCALE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_ofm_scale_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_ofm_scale_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_shift() const
+        {
+            return static_cast<uint32_t>(shift);
+        }
+        CONSTEXPR npu_set_ofm_scale_t &set_shift(uint32_t value)
+        {
+            shift = static_cast<uint8_t>(value) & ((1U << 6) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_scale() const
+        {
+            return static_cast<uint32_t>(scale);
+        }
+        CONSTEXPR npu_set_ofm_scale_t &set_scale(uint32_t value)
+        {
+            scale = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("shift", std::to_string(shift)));
+            fields.push_back(std::make_pair<std::string, std::string>("scale", std::to_string(scale)));
+        }
+#endif
+#endif
+    };
+    // Input operand A scale
+    struct npu_set_opa_scale_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t shift : 6;   //  Shift. Ignored if IFM scale mode is 0
+        uint32_t reserved1 : 10;
+        uint32_t scale : 32; //  Scale. 16-bit if IFM scale mode is 0
+#ifdef __cplusplus
+      public:
+        npu_set_opa_scale_t(uint32_t _shift, uint32_t _scale) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPA_SCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), shift(_shift & ((1U << 6) - 1)),
+            reserved1(0), scale(_scale)
+        {
+        }
+        CONSTEXPR npu_set_opa_scale_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPA_SCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), shift(0), reserved1(0), scale(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPA_SCALE) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPA_SCALE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_opa_scale_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_opa_scale_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_shift() const
+        {
+            return static_cast<uint32_t>(shift);
+        }
+        CONSTEXPR npu_set_opa_scale_t &set_shift(uint32_t value)
+        {
+            shift = static_cast<uint8_t>(value) & ((1U << 6) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_scale() const
+        {
+            return static_cast<uint32_t>(scale);
+        }
+        CONSTEXPR npu_set_opa_scale_t &set_scale(uint32_t value)
+        {
+            scale = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("shift", std::to_string(shift)));
+            fields.push_back(std::make_pair<std::string, std::string>("scale", std::to_string(scale)));
+        }
+#endif
+#endif
+    };
+    // Input operand B scale
+    struct npu_set_opb_scale_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t scale : 16; //  Scale. Not used if IFM scale mode is 1 or 2
+        uint32_t reserved2 : 16;
+#ifdef __cplusplus
+      public:
+        npu_set_opb_scale_t(uint32_t _scale) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPB_SCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0),
+            scale(_scale & ((1U << 16) - 1)), reserved2(0)
+        {
+        }
+        CONSTEXPR npu_set_opb_scale_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPB_SCALE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), scale(0), reserved2(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPB_SCALE) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_OPB_SCALE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_opb_scale_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_opb_scale_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_scale() const
+        {
+            return static_cast<uint32_t>(scale);
+        }
+        CONSTEXPR npu_set_opb_scale_t &set_scale(uint32_t value)
+        {
+            scale = static_cast<uint16_t>(value) & ((1U << 16) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("scale", std::to_string(scale)));
+        }
+#endif
+#endif
+    };
+    // DMA user channel 0 source byte offset from DMA0_SRC_REGION
+    struct npu_set_dma0_src_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_dma0_src_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_dma0_src_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SRC);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_dma0_src_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // DMA user channel 0 destination byte offset from DMA0_DST_REGION
+    struct npu_set_dma0_dst_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_dma0_dst_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_dma0_dst_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_DST);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_dma0_dst_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // DMA user channel 0 transfer length in bytes for each 1D transfer
+    struct npu_set_dma0_len_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_dma0_len_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_dma0_len_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_LEN);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_dma0_len_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // byte distance to skip after each inner (1D) transfer (2D/3D mode) (any alignment)
+    struct npu_set_dma0_skip0_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_dma0_skip0_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SKIP0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_dma0_skip0_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SKIP0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SKIP0) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SKIP0);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_dma0_skip0_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // byte distance to skip after each 2D transfer (3D mode) (any alignment)
+    struct npu_set_dma0_skip1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_dma0_skip1_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SKIP1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_dma0_skip1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SKIP1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SKIP1) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_DMA0_SKIP1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_dma0_skip1_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM2 Tile 0 address
+    struct npu_set_ifm2_base0_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_base0_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_base0_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE0);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ifm2_base0_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM2 Tile 1 address
+    struct npu_set_ifm2_base1_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_base1_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_base1_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE1);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ifm2_base1_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM2 Tile 2 address
+    struct npu_set_ifm2_base2_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_base2_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_base2_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE2);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ifm2_base2_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM2 Tile 3 address
+    struct npu_set_ifm2_base3_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_base3_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_base3_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_BASE3);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ifm2_base3_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM2 byte stride between horizontal values
+    struct npu_set_ifm2_stride_x_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_stride_x_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_stride_x_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_X);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ifm2_stride_x_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM2 byte stride between vertical values
+    struct npu_set_ifm2_stride_y_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_stride_y_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_stride_y_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_Y);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ifm2_stride_y_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // IFM2 byte stride between channel blocks (of 16 bytes each block)
+    struct npu_set_ifm2_stride_c_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_ifm2_stride_c_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_ifm2_stride_c_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_IFM2_STRIDE_C);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_ifm2_stride_c_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // Weight stream byte offset in WEIGHT_REGION for core 1
+    struct npu_set_weight1_base_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_weight1_base_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_BASE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_weight1_base_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_BASE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_BASE) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_BASE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_weight1_base_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // Weight stream byte length for core 1
+    struct npu_set_weight1_length_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t length : 32; //  Weight stream byte length
+#ifdef __cplusplus
+      public:
+        npu_set_weight1_length_t(uint32_t _length) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_LENGTH)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), length(_length)
+        {
+        }
+        CONSTEXPR npu_set_weight1_length_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_LENGTH)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), length(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_LENGTH) &&
+                   control >= 1 && control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_WEIGHT1_LENGTH);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_weight1_length_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_weight1_length_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_length() const
+        {
+            return static_cast<uint32_t>(length);
+        }
+        CONSTEXPR npu_set_weight1_length_t &set_length(uint32_t value)
+        {
+            length = value;
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("length", std::to_string(length)));
+        }
+#endif
+#endif
+    };
+    // Scale and bias stream input byte offset from SCALE_REGION for core 1
+    struct npu_set_scale1_base_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t addr_hi : 8; //  address extension
+        uint32_t reserved1 : 8;
+        uint32_t addr_lo : 32; //  address offset
+#ifdef __cplusplus
+      public:
+        npu_set_scale1_base_t(uint64_t _addr) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE1_BASE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)),
+            addr_hi(static_cast<uint8_t>((_addr >> 32) & std::numeric_limits<uint64_t>::max())), reserved1(0),
+            addr_lo(static_cast<uint32_t>((_addr)&std::numeric_limits<uint64_t>::max()))
+        {
+        }
+        CONSTEXPR npu_set_scale1_base_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE1_BASE)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), addr_hi(0), reserved1(0), addr_lo(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE1_BASE) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE1_BASE);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR uint64_t get_addr() const
+        {
+            return (static_cast<uint64_t>(addr_hi) << 32) | addr_lo;
+        }
+        CONSTEXPR npu_set_scale1_base_t &set_addr(uint64_t value)
+        {
+            addr_lo = static_cast<uint32_t>((value)&std::numeric_limits<uint64_t>::max());
+            addr_hi = static_cast<uint8_t>((value >> 32) & std::numeric_limits<uint64_t>::max());
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            std::stringstream saddr;
+            saddr << std::hex << "0x" << get_addr();
+            fields.push_back(std::make_pair<std::string, std::string>("addr", saddr.str()));
+        }
+#endif
+#endif
+    };
+    // Scale and bias stream input byte length for core 1
+    struct npu_set_scale1_length_t
+    {
+#ifdef __cplusplus
+      private:
+#endif
+        uint32_t opcode : 10; //  opcode
+        uint32_t reserved0 : 4;
+        uint32_t control : 2; //  control
+        uint32_t reserved1 : 16;
+        uint32_t length : 20; //  Scale and bias stream byte length
+        uint32_t reserved2 : 12;
+#ifdef __cplusplus
+      public:
+        npu_set_scale1_length_t(uint32_t _length) :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE1_LENGTH)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0),
+            length(_length & ((1U << 20) - 1)), reserved2(0)
+        {
+        }
+        CONSTEXPR npu_set_scale1_length_t() :
+            opcode(static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE1_LENGTH)), reserved0(0),
+            control(static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL)), reserved1(0), length(0), reserved2(0)
+        {
+        }
+        CONSTEXPR bool valid() const
+        {
+            return opcode == static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE1_LENGTH) && control >= 1 &&
+                   control <= 2;
+        }
+        CONSTEXPR void init()
+        {
+            opcode  = static_cast<uint16_t>(NPU_NAMESPACE::cmd1_opcode::NPU_SET_SCALE1_LENGTH);
+            control = static_cast<uint8_t>(NPU_NAMESPACE::cmd_ctrl::CMD1_CTRL);
+        }
+        operator uint64_t()
+        {
+            uint64_t word;
+            std::memcpy(&word, this, sizeof(word));
+            return word;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd1_opcode get_opcode() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd1_opcode>(opcode);
+        }
+        CONSTEXPR npu_set_scale1_length_t &set_opcode(NPU_NAMESPACE::cmd1_opcode value)
+        {
+            opcode = static_cast<uint16_t>(value) & ((1U << 10) - 1);
+            return *this;
+        }
+        CONSTEXPR NPU_NAMESPACE::cmd_ctrl get_control() const
+        {
+            return static_cast<NPU_NAMESPACE::cmd_ctrl>(control);
+        }
+        CONSTEXPR npu_set_scale1_length_t &set_control(NPU_NAMESPACE::cmd_ctrl value)
+        {
+            control = static_cast<uint8_t>(value) & ((1U << 2) - 1);
+            return *this;
+        }
+        CONSTEXPR uint32_t get_length() const
+        {
+            return static_cast<uint32_t>(length);
+        }
+        CONSTEXPR npu_set_scale1_length_t &set_length(uint32_t value)
+        {
+            length = value & ((1U << 20) - 1);
+            return *this;
+        }
+#ifdef NPU_DISASSEMBLE
+        void disassemble(std::vector<std::pair<std::string, std::string>> &fields) const
+        {
+            fields.push_back(std::make_pair<std::string, std::string>("length", std::to_string(length)));
+        }
+#endif
+#endif
+    };
+#ifdef __cplusplus
+};
+#endif
+#define NPU_OP_STRUCTS                                                                                                 \
+    NPU_OP_(stop)                                                                                                      \
+    NPU_OP_(irq)                                                                                                       \
+    NPU_OP_(conv)                                                                                                      \
+    NPU_OP_(depthwise)                                                                                                 \
+    NPU_OP_(pool)                                                                                                      \
+    NPU_OP_(elementwise)                                                                                               \
+    NPU_OP_(dma_start)                                                                                                 \
+    NPU_OP_(dma_wait)                                                                                                  \
+    NPU_OP_(kernel_wait)                                                                                               \
+    NPU_OP_(pmu_mask)
+
+#define NPU_SET_STRUCTS                                                                                                \
+    NPU_SET_(ifm_pad_top)                                                                                              \
+    NPU_SET_(ifm_pad_left)                                                                                             \
+    NPU_SET_(ifm_pad_right)                                                                                            \
+    NPU_SET_(ifm_pad_bottom)                                                                                           \
+    NPU_SET_(ifm_depth_m1)                                                                                             \
+    NPU_SET_(ifm_precision)                                                                                            \
+    NPU_SET_(ifm_upscale)                                                                                              \
+    NPU_SET_(ifm_zero_point)                                                                                           \
+    NPU_SET_(ifm_width0_m1)                                                                                            \
+    NPU_SET_(ifm_height0_m1)                                                                                           \
+    NPU_SET_(ifm_height1_m1)                                                                                           \
+    NPU_SET_(ifm_ib_end)                                                                                               \
+    NPU_SET_(ifm_region)                                                                                               \
+    NPU_SET_(ofm_width_m1)                                                                                             \
+    NPU_SET_(ofm_height_m1)                                                                                            \
+    NPU_SET_(ofm_depth_m1)                                                                                             \
+    NPU_SET_(ofm_precision)                                                                                            \
+    NPU_SET_(ofm_blk_width_m1)                                                                                         \
+    NPU_SET_(ofm_blk_height_m1)                                                                                        \
+    NPU_SET_(ofm_blk_depth_m1)                                                                                         \
+    NPU_SET_(ofm_zero_point)                                                                                           \
+    NPU_SET_(ofm_width0_m1)                                                                                            \
+    NPU_SET_(ofm_height0_m1)                                                                                           \
+    NPU_SET_(ofm_height1_m1)                                                                                           \
+    NPU_SET_(ofm_region)                                                                                               \
+    NPU_SET_(kernel_width_m1)                                                                                          \
+    NPU_SET_(kernel_height_m1)                                                                                         \
+    NPU_SET_(kernel_stride)                                                                                            \
+    NPU_SET_(parallel_mode)                                                                                            \
+    NPU_SET_(acc_format)                                                                                               \
+    NPU_SET_(activation)                                                                                               \
+    NPU_SET_(activation_min)                                                                                           \
+    NPU_SET_(activation_max)                                                                                           \
+    NPU_SET_(weight_region)                                                                                            \
+    NPU_SET_(scale_region)                                                                                             \
+    NPU_SET_(ab_start)                                                                                                 \
+    NPU_SET_(blockdep)                                                                                                 \
+    NPU_SET_(dma0_src_region)                                                                                          \
+    NPU_SET_(dma0_dst_region)                                                                                          \
+    NPU_SET_(dma0_size0)                                                                                               \
+    NPU_SET_(dma0_size1)                                                                                               \
+    NPU_SET_(ifm2_broadcast)                                                                                           \
+    NPU_SET_(ifm2_scalar)                                                                                              \
+    NPU_SET_(ifm2_precision)                                                                                           \
+    NPU_SET_(ifm2_zero_point)                                                                                          \
+    NPU_SET_(ifm2_width0_m1)                                                                                           \
+    NPU_SET_(ifm2_height0_m1)                                                                                          \
+    NPU_SET_(ifm2_height1_m1)                                                                                          \
+    NPU_SET_(ifm2_ib_start)                                                                                            \
+    NPU_SET_(ifm2_region)                                                                                              \
+    NPU_SET_(ifm_base0)                                                                                                \
+    NPU_SET_(ifm_base1)                                                                                                \
+    NPU_SET_(ifm_base2)                                                                                                \
+    NPU_SET_(ifm_base3)                                                                                                \
+    NPU_SET_(ifm_stride_x)                                                                                             \
+    NPU_SET_(ifm_stride_y)                                                                                             \
+    NPU_SET_(ifm_stride_c)                                                                                             \
+    NPU_SET_(ofm_base0)                                                                                                \
+    NPU_SET_(ofm_base1)                                                                                                \
+    NPU_SET_(ofm_base2)                                                                                                \
+    NPU_SET_(ofm_base3)                                                                                                \
+    NPU_SET_(ofm_stride_x)                                                                                             \
+    NPU_SET_(ofm_stride_y)                                                                                             \
+    NPU_SET_(ofm_stride_c)                                                                                             \
+    NPU_SET_(weight_base)                                                                                              \
+    NPU_SET_(weight_length)                                                                                            \
+    NPU_SET_(scale_base)                                                                                               \
+    NPU_SET_(scale_length)                                                                                             \
+    NPU_SET_(ofm_scale)                                                                                                \
+    NPU_SET_(opa_scale)                                                                                                \
+    NPU_SET_(opb_scale)                                                                                                \
+    NPU_SET_(dma0_src)                                                                                                 \
+    NPU_SET_(dma0_dst)                                                                                                 \
+    NPU_SET_(dma0_len)                                                                                                 \
+    NPU_SET_(dma0_skip0)                                                                                               \
+    NPU_SET_(dma0_skip1)                                                                                               \
+    NPU_SET_(ifm2_base0)                                                                                               \
+    NPU_SET_(ifm2_base1)                                                                                               \
+    NPU_SET_(ifm2_base2)                                                                                               \
+    NPU_SET_(ifm2_base3)                                                                                               \
+    NPU_SET_(ifm2_stride_x)                                                                                            \
+    NPU_SET_(ifm2_stride_y)                                                                                            \
+    NPU_SET_(ifm2_stride_c)                                                                                            \
+    NPU_SET_(weight1_base)                                                                                             \
+    NPU_SET_(weight1_length)                                                                                           \
+    NPU_SET_(scale1_base)                                                                                              \
+    NPU_SET_(scale1_length)
+
+#define EXPAND_ACC_FORMAT(FUNC, SEP) FUNC(acc_format, I32) SEP FUNC(acc_format, I40) SEP FUNC(acc_format, F16)
+
+#define EXPAND_ACTIVATION_CLIP_RANGE(FUNC, SEP)                                                                        \
+    FUNC(activation_clip_range, OFM_PRECISION)                                                                         \
+    SEP FUNC(activation_clip_range, FORCE_UINT8) SEP FUNC(activation_clip_range, FORCE_INT8)                           \
+        SEP FUNC(activation_clip_range, FORCE_INT16)
+
+#define EXPAND_ACTIVATION_FORMAT(FUNC, SEP) FUNC(activation_format, NHWC) SEP FUNC(activation_format, NHCWB16)
+
+#define EXPAND_ACTIVATION_FUNCTION(FUNC, SEP)                                                                          \
+    FUNC(activation_function, RELU)                                                                                    \
+    SEP FUNC(activation_function, TANH) SEP FUNC(activation_function, SIGMOID) SEP FUNC(activation_function, TABLE_0)  \
+        SEP FUNC(activation_function, TABLE_1) SEP FUNC(activation_function, TABLE_2)                                  \
+            SEP FUNC(activation_function, TABLE_3) SEP FUNC(activation_function, TABLE_4)                              \
+                SEP FUNC(activation_function, TABLE_5) SEP FUNC(activation_function, TABLE_6)                          \
+                    SEP FUNC(activation_function, TABLE_7)
+
+#define EXPAND_ACTIVATION_PRECISION(FUNC, SEP)                                                                         \
+    FUNC(activation_precision, B8)                                                                                     \
+    SEP FUNC(activation_precision, B16) SEP FUNC(activation_precision, B32) SEP FUNC(activation_precision, B64)
+
+#define EXPAND_ACTIVATION_TYPE(FUNC, SEP) FUNC(activation_type, UNSIGNED) SEP FUNC(activation_type, SIGNED)
+
+#define EXPAND_AXI_MEM_ENCODING(FUNC, SEP)                                                                             \
+    FUNC(axi_mem_encoding, DEVICE_NON_BUFFERABLE)                                                                      \
+    SEP FUNC(axi_mem_encoding, DEVICE_BUFFERABLE) SEP FUNC(axi_mem_encoding, NORMAL_NON_CACHEABLE_NON_BUFFERABLE)      \
+        SEP FUNC(axi_mem_encoding, NORMAL_NON_CACHEABLE_BUFFERABLE)                                                    \
+            SEP FUNC(axi_mem_encoding, WRITE_THROUGH_NO_ALLOCATE)                                                      \
+                SEP FUNC(axi_mem_encoding, WRITE_THROUGH_READ_ALLOCATE)                                                \
+                    SEP FUNC(axi_mem_encoding, WRITE_THROUGH_WRITE_ALLOCATE)                                           \
+                        SEP FUNC(axi_mem_encoding, WRITE_THROUGH_READ_AND_WRITE_ALLOCATE)                              \
+                            SEP FUNC(axi_mem_encoding, WRITE_BACK_NO_ALLOCATE)                                         \
+                                SEP FUNC(axi_mem_encoding, WRITE_BACK_READ_ALLOCATE)                                   \
+                                    SEP FUNC(axi_mem_encoding, WRITE_BACK_WRITE_ALLOCATE)                              \
+                                        SEP FUNC(axi_mem_encoding, WRITE_BACK_READ_AND_WRITE_ALLOCATE)
+
+#define EXPAND_BROADCAST_MODE(FUNC, SEP) FUNC(broadcast_mode, DISABLE) SEP FUNC(broadcast_mode, ENABLE)
+
+#define EXPAND_CMD0_OPCODE(FUNC, SEP)                                                                                  \
+    FUNC(cmd0_opcode, NPU_OP_STOP)                                                                                     \
+    SEP FUNC(cmd0_opcode, NPU_OP_IRQ) SEP FUNC(cmd0_opcode, NPU_OP_CONV) SEP FUNC(                                     \
+        cmd0_opcode, NPU_OP_DEPTHWISE) SEP FUNC(cmd0_opcode, NPU_OP_POOL) SEP FUNC(cmd0_opcode, NPU_OP_ELEMENTWISE)    \
+        SEP FUNC(cmd0_opcode, NPU_OP_DMA_START) SEP FUNC(cmd0_opcode, NPU_OP_DMA_WAIT) SEP FUNC(                       \
+            cmd0_opcode, NPU_OP_KERNEL_WAIT) SEP FUNC(cmd0_opcode, NPU_OP_PMU_MASK) SEP FUNC(cmd0_opcode,              \
+                                                                                             NPU_SET_IFM_PAD_TOP)      \
+            SEP FUNC(cmd0_opcode, NPU_SET_IFM_PAD_LEFT) SEP FUNC(cmd0_opcode, NPU_SET_IFM_PAD_RIGHT) SEP FUNC(         \
+                cmd0_opcode, NPU_SET_IFM_PAD_BOTTOM) SEP FUNC(cmd0_opcode,                                             \
+                                                              NPU_SET_IFM_DEPTH_M1) SEP FUNC(cmd0_opcode,              \
+                                                                                             NPU_SET_IFM_PRECISION)    \
+                SEP FUNC(cmd0_opcode, NPU_SET_IFM_UPSCALE) SEP FUNC(cmd0_opcode, NPU_SET_IFM_ZERO_POINT) SEP FUNC(     \
+                    cmd0_opcode, NPU_SET_IFM_WIDTH0_M1) SEP FUNC(cmd0_opcode, NPU_SET_IFM_HEIGHT0_M1)                  \
+                    SEP FUNC(cmd0_opcode, NPU_SET_IFM_HEIGHT1_M1) SEP FUNC(cmd0_opcode, NPU_SET_IFM_IB_END) SEP FUNC(  \
+                        cmd0_opcode, NPU_SET_IFM_REGION) SEP FUNC(cmd0_opcode, NPU_SET_OFM_WIDTH_M1)                   \
+                        SEP FUNC(cmd0_opcode, NPU_SET_OFM_HEIGHT_M1) SEP FUNC(cmd0_opcode, NPU_SET_OFM_DEPTH_M1)       \
+                            SEP FUNC(cmd0_opcode, NPU_SET_OFM_PRECISION) SEP FUNC(                                     \
+                                cmd0_opcode, NPU_SET_OFM_BLK_WIDTH_M1) SEP FUNC(cmd0_opcode,                           \
+                                                                                NPU_SET_OFM_BLK_HEIGHT_M1)             \
+                                SEP FUNC(cmd0_opcode, NPU_SET_OFM_BLK_DEPTH_M1) SEP FUNC(                              \
+                                    cmd0_opcode, NPU_SET_OFM_ZERO_POINT) SEP FUNC(cmd0_opcode, NPU_SET_OFM_WIDTH0_M1)  \
+                                    SEP FUNC(cmd0_opcode, NPU_SET_OFM_HEIGHT0_M1) SEP FUNC(                            \
+                                        cmd0_opcode,                                                                   \
+                                        NPU_SET_OFM_HEIGHT1_M1) SEP FUNC(cmd0_opcode, NPU_SET_OFM_REGION)              \
+                                        SEP FUNC(cmd0_opcode, NPU_SET_KERNEL_WIDTH_M1) SEP FUNC(                       \
+                                            cmd0_opcode,                                                               \
+                                            NPU_SET_KERNEL_HEIGHT_M1) SEP FUNC(cmd0_opcode, NPU_SET_KERNEL_STRIDE)     \
+                                            SEP FUNC(cmd0_opcode, NPU_SET_PARALLEL_MODE) SEP FUNC(                     \
+                                                cmd0_opcode,                                                           \
+                                                NPU_SET_ACC_FORMAT) SEP FUNC(cmd0_opcode, NPU_SET_ACTIVATION)          \
+                                                SEP FUNC(cmd0_opcode,                                                  \
+                                                         NPU_SET_ACTIVATION_MIN) SEP FUNC(cmd0_opcode,                 \
+                                                                                          NPU_SET_ACTIVATION_MAX)      \
+                                                    SEP FUNC(cmd0_opcode, NPU_SET_WEIGHT_REGION) SEP FUNC(             \
+                                                        cmd0_opcode,                                                   \
+                                                        NPU_SET_SCALE_REGION) SEP FUNC(cmd0_opcode, NPU_SET_AB_START)  \
+                                                        SEP FUNC(cmd0_opcode, NPU_SET_BLOCKDEP)                        \
+                                                            SEP FUNC(cmd0_opcode, NPU_SET_DMA0_SRC_REGION) SEP FUNC(   \
+                                                                cmd0_opcode,                                           \
+                                                                NPU_SET_DMA0_DST_REGION) SEP FUNC(cmd0_opcode,         \
+                                                                                                  NPU_SET_DMA0_SIZE0)  \
+                                                                SEP FUNC(cmd0_opcode, NPU_SET_DMA0_SIZE1) SEP FUNC(    \
+                                                                    cmd0_opcode,                                       \
+                                                                    NPU_SET_IFM2_BROADCAST) SEP                        \
+                                                                    FUNC(cmd0_opcode, NPU_SET_IFM2_SCALAR) SEP FUNC(   \
+                                                                        cmd0_opcode,                                   \
+                                                                        NPU_SET_IFM2_PRECISION) SEP                    \
+                                                                        FUNC(cmd0_opcode, NPU_SET_IFM2_ZERO_POINT) SEP \
+                                                                            FUNC(cmd0_opcode,                          \
+                                                                                 NPU_SET_IFM2_WIDTH0_M1) SEP           \
+                                                                                FUNC(cmd0_opcode,                      \
+                                                                                     NPU_SET_IFM2_HEIGHT0_M1) SEP      \
+                                                                                    FUNC(cmd0_opcode,                  \
+                                                                                         NPU_SET_IFM2_HEIGHT1_M1)      \
+                                                                                        SEP FUNC(                      \
+                                                                                            cmd0_opcode,               \
+                                                                                            NPU_SET_IFM2_IB_START)     \
+                                                                                            SEP FUNC(                  \
+                                                                                                cmd0_opcode,           \
+                                                                                                NPU_SET_IFM2_REGION)
+
+#define EXPAND_CMD1_OPCODE(FUNC, SEP)                                                                                  \
+    FUNC(cmd1_opcode, NPU_SET_IFM_BASE0)                                                                               \
+    SEP FUNC(cmd1_opcode, NPU_SET_IFM_BASE1) SEP FUNC(cmd1_opcode, NPU_SET_IFM_BASE2)                                  \
+        SEP FUNC(cmd1_opcode, NPU_SET_IFM_BASE3) SEP FUNC(cmd1_opcode, NPU_SET_IFM_STRIDE_X)                           \
+            SEP FUNC(cmd1_opcode, NPU_SET_IFM_STRIDE_Y) SEP FUNC(cmd1_opcode, NPU_SET_IFM_STRIDE_C) SEP FUNC(          \
+                cmd1_opcode, NPU_SET_OFM_BASE0) SEP FUNC(cmd1_opcode, NPU_SET_OFM_BASE1)                               \
+                SEP FUNC(cmd1_opcode, NPU_SET_OFM_BASE2) SEP FUNC(cmd1_opcode, NPU_SET_OFM_BASE3) SEP FUNC(            \
+                    cmd1_opcode, NPU_SET_OFM_STRIDE_X) SEP FUNC(cmd1_opcode, NPU_SET_OFM_STRIDE_Y)                     \
+                    SEP FUNC(cmd1_opcode, NPU_SET_OFM_STRIDE_C) SEP FUNC(cmd1_opcode, NPU_SET_WEIGHT_BASE) SEP FUNC(   \
+                        cmd1_opcode, NPU_SET_WEIGHT_LENGTH) SEP FUNC(cmd1_opcode, NPU_SET_SCALE_BASE)                  \
+                        SEP FUNC(cmd1_opcode, NPU_SET_SCALE_LENGTH) SEP FUNC(cmd1_opcode, NPU_SET_OFM_SCALE)           \
+                            SEP FUNC(cmd1_opcode, NPU_SET_OPA_SCALE) SEP FUNC(cmd1_opcode, NPU_SET_OPB_SCALE)          \
+                                SEP FUNC(cmd1_opcode, NPU_SET_DMA0_SRC) SEP FUNC(cmd1_opcode, NPU_SET_DMA0_DST)        \
+                                    SEP FUNC(cmd1_opcode, NPU_SET_DMA0_LEN) SEP FUNC(cmd1_opcode, NPU_SET_DMA0_SKIP0)  \
+                                        SEP FUNC(cmd1_opcode, NPU_SET_DMA0_SKIP1) SEP FUNC(                            \
+                                            cmd1_opcode, NPU_SET_IFM2_BASE0) SEP FUNC(cmd1_opcode, NPU_SET_IFM2_BASE1) \
+                                            SEP FUNC(cmd1_opcode, NPU_SET_IFM2_BASE2) SEP FUNC(cmd1_opcode,            \
+                                                                                               NPU_SET_IFM2_BASE3)     \
+                                                SEP FUNC(cmd1_opcode, NPU_SET_IFM2_STRIDE_X)                           \
+                                                    SEP FUNC(cmd1_opcode, NPU_SET_IFM2_STRIDE_Y)                       \
+                                                        SEP FUNC(cmd1_opcode, NPU_SET_IFM2_STRIDE_C)                   \
+                                                            SEP FUNC(cmd1_opcode, NPU_SET_WEIGHT1_BASE)                \
+                                                                SEP FUNC(cmd1_opcode, NPU_SET_WEIGHT1_LENGTH)          \
+                                                                    SEP FUNC(cmd1_opcode, NPU_SET_SCALE1_BASE)         \
+                                                                        SEP FUNC(cmd1_opcode, NPU_SET_SCALE1_LENGTH)
+
+#define EXPAND_CMD_CTRL(FUNC, SEP) FUNC(cmd_ctrl, CMD0_CTRL) SEP FUNC(cmd_ctrl, CMD1_CTRL)
+
+#define EXPAND_CUSTOM_DMA(FUNC, SEP) FUNC(custom_dma, NOT_IMPLEMENTED) SEP FUNC(custom_dma, IMPLEMENTED)
+
+#define EXPAND_DMA_FAULT_SRC(FUNC, SEP) FUNC(dma_fault_src, AXI_M0) SEP FUNC(dma_fault_src, AXI_M1)
+
+#define EXPAND_DMA_REGION_MODE(FUNC, SEP) FUNC(dma_region_mode, EXTERNAL) SEP FUNC(dma_region_mode, INTERNAL)
+
+#define EXPAND_DMA_STRIDE_MODE(FUNC, SEP)                                                                              \
+    FUNC(dma_stride_mode, D1) SEP FUNC(dma_stride_mode, D2) SEP FUNC(dma_stride_mode, D3)
+
+#define EXPAND_ELEMENTWISE_MODE(FUNC, SEP)                                                                             \
+    FUNC(elementwise_mode, MUL)                                                                                        \
+    SEP FUNC(elementwise_mode, ADD) SEP FUNC(elementwise_mode, SUB) SEP FUNC(elementwise_mode, MIN)                    \
+        SEP FUNC(elementwise_mode, MAX) SEP FUNC(elementwise_mode, LRELU) SEP FUNC(elementwise_mode, ABS)              \
+            SEP FUNC(elementwise_mode, CLZ) SEP FUNC(elementwise_mode, SHR) SEP FUNC(elementwise_mode, SHL)
+
+#define EXPAND_FUNCTIONAL_SAFETY(FUNC, SEP)                                                                            \
+    FUNC(functional_safety, NOT_IMPLEMENTED) SEP FUNC(functional_safety, IMPLEMENTED)
+
+#define EXPAND_IFM2_OPERAND_ORDER(FUNC, SEP) FUNC(ifm2_operand_order, ORDER_B) SEP FUNC(ifm2_operand_order, ORDER_A)
+
+#define EXPAND_IFM_SCALE_MODE(FUNC, SEP)                                                                               \
+    FUNC(ifm_scale_mode, OPA_OPB_16) SEP FUNC(ifm_scale_mode, OPA_32) SEP FUNC(ifm_scale_mode, OPB_32)
+
+#define EXPAND_IFM_UPSCALE_MODE(FUNC, SEP)                                                                             \
+    FUNC(ifm_upscale_mode, NONE) SEP FUNC(ifm_upscale_mode, NEAREST) SEP FUNC(ifm_upscale_mode, ZEROS)
+
+#define EXPAND_KERNEL_DECOMPOSITION(FUNC, SEP) FUNC(kernel_decomposition, D8X8) SEP FUNC(kernel_decomposition, D4X4)
+
+#define EXPAND_KERNEL_DILATION(FUNC, SEP) FUNC(kernel_dilation, NONE) SEP FUNC(kernel_dilation, X2)
+
+#define EXPAND_MAX_BEATS(FUNC, SEP) FUNC(max_beats, B64) SEP FUNC(max_beats, B128) SEP FUNC(max_beats, B256)
+
+#define EXPAND_MEM_ATTR(FUNC, SEP)                                                                                     \
+    FUNC(mem_attr, AXI0_OUTSTANDING_COUNTER0)                                                                          \
+    SEP FUNC(mem_attr, AXI0_OUTSTANDING_COUNTER1) SEP FUNC(mem_attr, AXI1_OUTSTANDING_COUNTER2)                        \
+        SEP FUNC(mem_attr, AXI1_OUTSTANDING_COUNTER3)
+
+#define EXPAND_OFM_SCALE_MODE(FUNC, SEP) FUNC(ofm_scale_mode, PER_CHANNEL) SEP FUNC(ofm_scale_mode, GLOBAL)
+
+#define EXPAND_PARALLEL_MODE(FUNC, SEP) FUNC(parallel_mode, SINGLE_CORE) SEP FUNC(parallel_mode, DUAL_CORE_DEPTH)
+
+#define EXPAND_PMU_AXI_CHANNEL(FUNC, SEP)                                                                              \
+    FUNC(pmu_axi_channel, RD_CMD)                                                                                      \
+    SEP FUNC(pmu_axi_channel, RD_IFM) SEP FUNC(pmu_axi_channel, RD_WEIGHTS) SEP FUNC(pmu_axi_channel, RD_SCALE_BIAS)   \
+        SEP FUNC(pmu_axi_channel, RD_MEM2MEM) SEP FUNC(pmu_axi_channel, WR_OFM) SEP FUNC(pmu_axi_channel, WR_MEM2MEM)
+
+#define EXPAND_PMU_EVENT(FUNC, SEP)                                                                                                    \
+    FUNC(pmu_event, NO_EVENT)                                                                                                          \
+    SEP FUNC(pmu_event, CYCLE) SEP FUNC(pmu_event, NPU_IDLE) SEP FUNC(pmu_event, CC_STALLED_ON_BLOCKDEP) SEP FUNC(                     \
+        pmu_event, CC_STALLED_ON_SHRAM_RECONFIG) SEP FUNC(pmu_event, NPU_ACTIVE) SEP FUNC(pmu_event, MAC_ACTIVE)                       \
+        SEP FUNC(pmu_event, MAC_ACTIVE_8BIT) SEP FUNC(pmu_event, MAC_ACTIVE_16BIT) SEP FUNC(                                           \
+            pmu_event, MAC_DPU_ACTIVE) SEP FUNC(pmu_event, MAC_STALLED_BY_WD_ACC) SEP FUNC(pmu_event,                                  \
+                                                                                           MAC_STALLED_BY_WD)                          \
+            SEP FUNC(pmu_event, MAC_STALLED_BY_ACC) SEP FUNC(pmu_event, MAC_STALLED_BY_IB) SEP FUNC(                                   \
+                pmu_event,                                                                                                             \
+                MAC_ACTIVE_32BIT) SEP FUNC(pmu_event,                                                                                  \
+                                           MAC_STALLED_BY_INT_W) SEP FUNC(pmu_event,                                                   \
+                                                                          MAC_STALLED_BY_INT_ACC) SEP FUNC(pmu_event,                  \
+                                                                                                           AO_ACTIVE)                  \
+                SEP FUNC(pmu_event, AO_ACTIVE_8BIT) SEP FUNC(pmu_event, AO_ACTIVE_16BIT) SEP FUNC(                                     \
+                    pmu_event, AO_STALLED_BY_OFMP_OB) SEP FUNC(pmu_event, AO_STALLED_BY_OFMP) SEP                                      \
+                    FUNC(pmu_event, AO_STALLED_BY_OB) SEP FUNC(pmu_event, AO_STALLED_BY_ACC_IB) SEP FUNC(                              \
+                        pmu_event, AO_STALLED_BY_ACC) SEP FUNC(pmu_event, AO_STALLED_BY_IB) SEP                                        \
+                        FUNC(pmu_event, WD_ACTIVE) SEP FUNC(pmu_event, WD_STALLED) SEP FUNC(pmu_event, WD_STALLED_BY_WS) SEP FUNC(     \
+                            pmu_event, WD_STALLED_BY_WD_BUF) SEP FUNC(pmu_event,                                                       \
+                                                                      WD_PARSE_ACTIVE) SEP                                             \
+                            FUNC(pmu_event, WD_PARSE_STALLED) SEP FUNC(pmu_event, WD_PARSE_STALLED_IN) SEP FUNC(                       \
+                                pmu_event, WD_PARSE_STALLED_OUT) SEP FUNC(pmu_event,                                                   \
+                                                                          WD_TRANS_WS) SEP                                             \
+                                FUNC(pmu_event, WD_TRANS_WB) SEP FUNC(pmu_event, WD_TRANS_DW0) SEP FUNC(                               \
+                                    pmu_event, WD_TRANS_DW1) SEP FUNC(pmu_event,                                                       \
+                                                                      AXI0_RD_TRANS_ACCEPTED) SEP                                      \
+                                    FUNC(pmu_event, AXI0_RD_TRANS_COMPLETED) SEP FUNC(pmu_event, AXI0_RD_DATA_BEAT_RECEIVED) SEP FUNC( \
+                                        pmu_event, AXI0_RD_TRAN_REQ_STALLED) SEP FUNC(pmu_event,                                       \
+                                                                                      AXI0_WR_TRANS_ACCEPTED) SEP                      \
+                                        FUNC(pmu_event, AXI0_WR_TRANS_COMPLETED_M) SEP FUNC(                                           \
+                                            pmu_event, AXI0_WR_TRANS_COMPLETED_S) SEP                                                  \
+                                            FUNC(pmu_event, AXI0_WR_DATA_BEAT_WRITTEN) SEP FUNC(                                       \
+                                                pmu_event, AXI0_WR_TRAN_REQ_STALLED) SEP                                               \
+                                                FUNC(pmu_event, AXI0_WR_DATA_BEAT_STALLED) SEP FUNC(                                   \
+                                                    pmu_event,                                                                         \
+                                                    AXI0_ENABLED_CYCLES) SEP FUNC(pmu_event,                                           \
+                                                                                  AXI0_RD_STALL_LIMIT) SEP                             \
+                                                    FUNC(pmu_event, AXI0_WR_STALL_LIMIT) SEP FUNC(                                     \
+                                                        pmu_event,                                                                     \
+                                                        AXI_LATENCY_ANY) SEP FUNC(pmu_event,                                           \
+                                                                                  AXI_LATENCY_32) SEP                                  \
+                                                        FUNC(pmu_event,                                                                \
+                                                             AXI_LATENCY_64) SEP FUNC(pmu_event,                                       \
+                                                                                      AXI_LATENCY_128) SEP                             \
+                                                            FUNC(pmu_event, AXI_LATENCY_256) SEP FUNC(                                 \
+                                                                pmu_event,                                                             \
+                                                                AXI_LATENCY_512) SEP FUNC(pmu_event,                                   \
+                                                                                          AXI_LATENCY_1024) SEP                        \
+                                                                FUNC(pmu_event, ECC_DMA) SEP FUNC(                                     \
+                                                                    pmu_event,                                                         \
+                                                                    ECC_SB0) SEP FUNC(pmu_event,                                       \
+                                                                                      AXI1_RD_TRANS_ACCEPTED) SEP                      \
+                                                                    FUNC(pmu_event, AXI1_RD_TRANS_COMPLETED) SEP FUNC(                 \
+                                                                        pmu_event, AXI1_RD_DATA_BEAT_RECEIVED) SEP                     \
+                                                                        FUNC(pmu_event, AXI1_RD_TRAN_REQ_STALLED) SEP FUNC(            \
+                                                                            pmu_event, AXI1_WR_TRANS_ACCEPTED) SEP                     \
+                                                                            FUNC(pmu_event, AXI1_WR_TRANS_COMPLETED_M) SEP FUNC(       \
+                                                                                pmu_event,                                             \
+                                                                                AXI1_WR_TRANS_COMPLETED_S) SEP                         \
+                                                                                FUNC(pmu_event,                                        \
+                                                                                     AXI1_WR_DATA_BEAT_WRITTEN) SEP                    \
+                                                                                    FUNC(pmu_event,                                    \
+                                                                                         AXI1_WR_TRAN_REQ_STALLED) SEP                 \
+                                                                                        FUNC(                                          \
+                                                                                            pmu_event,                                 \
+                                                                                            AXI1_WR_DATA_BEAT_STALLED) SEP             \
+                                                                                            FUNC(                                      \
+                                                                                                pmu_event,                             \
+                                                                                                AXI1_ENABLED_CYCLES) SEP               \
+                                                                                                FUNC(                                  \
+                                                                                                    pmu_event,                         \
+                                                                                                    AXI1_RD_STALL_LIMIT) SEP           \
+                                                                                                    FUNC(                              \
+                                                                                                        pmu_event,                     \
+                                                                                                        AXI1_WR_STALL_LIMIT)           \
+                                                                                                        SEP FUNC(                      \
+                                                                                                            pmu_event,                 \
+                                                                                                            ECC_SB1)
+
+#define EXPAND_POOLING_MODE(FUNC, SEP)                                                                                 \
+    FUNC(pooling_mode, MAX) SEP FUNC(pooling_mode, AVERAGE) SEP FUNC(pooling_mode, REDUCE_SUM)
+
+#define EXPAND_PRIVILEGE_LEVEL(FUNC, SEP) FUNC(privilege_level, USER) SEP FUNC(privilege_level, PRIVILEGED)
+
+#define EXPAND_ROUND_MODE(FUNC, SEP) FUNC(round_mode, DBL) SEP FUNC(round_mode, TRUNCATE) SEP FUNC(round_mode, NATURAL)
+
+#define EXPAND_SECURITY_LEVEL(FUNC, SEP) FUNC(security_level, SECURE) SEP FUNC(security_level, NON_SECURE)
+
+#define EXPAND_STATE(FUNC, SEP) FUNC(state, STOPPED) SEP FUNC(state, RUNNING)
+
+#define EXPAND_WD_CORE_SLICE_STATE(FUNC, SEP)                                                                          \
+    FUNC(wd_core_slice_state, HEADER) SEP FUNC(wd_core_slice_state, PALETTE) SEP FUNC(wd_core_slice_state, WEIGHTS)
+
+#define EXPAND_WD_CTRL_STATE(FUNC, SEP)                                                                                \
+    FUNC(wd_ctrl_state, IDLE)                                                                                          \
+    SEP FUNC(wd_ctrl_state, DRAIN) SEP FUNC(wd_ctrl_state, OFD_INIT) SEP FUNC(wd_ctrl_state, OFD_RUN)
+
+#define EXPAND_WEIGHT_ORDER(FUNC, SEP) FUNC(weight_order, DEPTH_FIRST) SEP FUNC(weight_order, PART_KERNEL_FIRST)
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_common.h b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_common.h
deleted file mode 100644
index 0402411..0000000
--- a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_common.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ETHOSU_COMMON_H
-#define ETHOSU_COMMON_H
-
-/******************************************************************************
- * Includes
- ******************************************************************************/
-
-#include "ethosu55_interface.h"
-
-#include <stdio.h>
-
-/******************************************************************************
- * Defines
- ******************************************************************************/
-
-// Log severity levels
-#define ETHOSU_LOG_EMERG 0
-#define ETHOSU_LOG_ALERT 1
-#define ETHOSU_LOG_CRIT 2
-#define ETHOSU_LOG_ERR 3
-#define ETHOSU_LOG_WARN 4
-#define ETHOSU_LOG_NOTICE 5
-#define ETHOSU_LOG_INFO 6
-#define ETHOSU_LOG_DEBUG 7
-
-// Define default log severity
-#ifndef ETHOSU_LOG_SEVERITY
-#define ETHOSU_LOG_SEVERITY ETHOSU_LOG_DEBUG
-#endif
-
-#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_EMERG
-#define LOG_EMERG(format, ...)                                                                                         \
-    fprintf(stderr, format, ##__VA_ARGS__);                                                                            \
-    fflush(stderr);                                                                                                    \
-    exit(-1)
-#else
-#define LOG_EMERG(format, ...)
-#endif
-
-#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_ALERT
-#define LOG_ALERT(format, ...)                                                                                         \
-    fprintf(stderr, format, ##__VA_ARGS__);                                                                            \
-    fflush(stderr);                                                                                                    \
-    exit(-1)
-#else
-#define LOG_ALERT(format, ...)
-#endif
-
-#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_CRIT
-#define LOG_CRIT(format, ...)                                                                                          \
-    fprintf(stderr, format, ##__VA_ARGS__);                                                                            \
-    fflush(stderr);                                                                                                    \
-    exit(-1)
-#else
-#define LOG_CRIT(format, ...)
-#endif
-
-#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_ERR
-#define LOG_ERR(format, ...)                                                                                           \
-    fprintf(stderr, format, ##__VA_ARGS__);                                                                            \
-    fflush(stderr)
-#else
-#define LOG_ERR(format, ...)
-#endif
-
-#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_WARN
-#define LOG_WARN(format, ...) fprintf(stdout, format, ##__VA_ARGS__)
-#else
-#define LOG_WARN(format, ...)
-#endif
-
-#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_NOTICE
-#define LOG_NOTICE(format, ...) fprintf(stdout, format, ##__VA_ARGS__)
-#else
-#define LOG_NOTICE(format, ...)
-#endif
-
-#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_INFO
-#define LOG_INFO(format, ...) fprintf(stdout, format, ##__VA_ARGS__)
-#else
-#define LOG_INFO(format, ...)
-#endif
-
-#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_DEBUG
-#define LOG_DEBUG(format, ...) fprintf(stdout, format, ##__VA_ARGS__)
-#else
-#define LOG_DEBUG(format, ...)
-#endif
-
-#define UNUSED(x) ((void)x)
-
-#define VER_STR(X) VNUM_STR(X)
-#define VNUM_STR(X) #X
-
-#define MASK_0_31_BITS (0xFFFFFFFF)
-#define MASK_32_47_BITS (0xFFFF00000000)
-
-/******************************************************************************
- * Inline functions
- ******************************************************************************/
-
-static const __attribute__((section("npu_driver_version"))) char driver_version_str[] = VER_STR(
-    ETHOSU_DRIVER_VERSION_MAJOR) "." VER_STR(ETHOSU_DRIVER_VERSION_MINOR) "." VER_STR(ETHOSU_DRIVER_VERSION_PATCH);
-
-static const __attribute__((section("npu_driver_arch_version"))) char driver_arch_version_str[] =
-    VER_STR(NNX_ARCH_VERSION_MAJOR) "." VER_STR(NNX_ARCH_VERSION_MINOR) "." VER_STR(NNX_ARCH_VERSION_PATCH);
-
-#endif // ETHOSU_COMMON_H
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_config.h b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_config_u55.h
similarity index 91%
rename from edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_config.h
rename to edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_config_u55.h
index a822e93..9330bb1 100644
--- a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_config.h
+++ b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_config_u55.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2019-2020,2022 Arm Limited.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -60,12 +60,15 @@
 #ifndef AXI_LIMIT0_MAX_BEATS_BYTES
 #define AXI_LIMIT0_MAX_BEATS_BYTES 0x0
 #endif
+
 #ifndef AXI_LIMIT0_MEM_TYPE
 #define AXI_LIMIT0_MEM_TYPE 0x0
 #endif
+
 #ifndef AXI_LIMIT0_MAX_OUTSTANDING_READS
 #define AXI_LIMIT0_MAX_OUTSTANDING_READS 32
 #endif
+
 #ifndef AXI_LIMIT0_MAX_OUTSTANDING_WRITES
 #define AXI_LIMIT0_MAX_OUTSTANDING_WRITES 16
 #endif
@@ -73,12 +76,15 @@
 #ifndef AXI_LIMIT1_MAX_BEATS_BYTES
 #define AXI_LIMIT1_MAX_BEATS_BYTES 0x0
 #endif
+
 #ifndef AXI_LIMIT1_MEM_TYPE
 #define AXI_LIMIT1_MEM_TYPE 0x0
 #endif
+
 #ifndef AXI_LIMIT1_MAX_OUTSTANDING_READS
 #define AXI_LIMIT1_MAX_OUTSTANDING_READS 32
 #endif
+
 #ifndef AXI_LIMIT1_MAX_OUTSTANDING_WRITES
 #define AXI_LIMIT1_MAX_OUTSTANDING_WRITES 16
 #endif
@@ -86,34 +92,33 @@
 #ifndef AXI_LIMIT2_MAX_BEATS_BYTES
 #define AXI_LIMIT2_MAX_BEATS_BYTES 0x0
 #endif
+
 #ifndef AXI_LIMIT2_MEM_TYPE
 #define AXI_LIMIT2_MEM_TYPE 0x0
 #endif
+
 #ifndef AXI_LIMIT2_MAX_OUTSTANDING_READS
 #define AXI_LIMIT2_MAX_OUTSTANDING_READS 32
 #endif
+
 #ifndef AXI_LIMIT2_MAX_OUTSTANDING_WRITES
 #define AXI_LIMIT2_MAX_OUTSTANDING_WRITES 16
 #endif
+
 #ifndef AXI_LIMIT3_MAX_BEATS_BYTES
 #define AXI_LIMIT3_MAX_BEATS_BYTES 0x0
 #endif
+
 #ifndef AXI_LIMIT3_MEM_TYPE
 #define AXI_LIMIT3_MEM_TYPE 0x0
 #endif
+
 #ifndef AXI_LIMIT3_MAX_OUTSTANDING_READS
 #define AXI_LIMIT3_MAX_OUTSTANDING_READS 32
 #endif
+
 #ifndef AXI_LIMIT3_MAX_OUTSTANDING_WRITES
 #define AXI_LIMIT3_MAX_OUTSTANDING_WRITES 16
 #endif
 
-/*
- * Address offset between the CPU and the NPU. The offset is
- * applied to the QBASE and BASEP registers.
- */
-#ifndef BASE_POINTER_OFFSET
-#define BASE_POINTER_OFFSET 0
-#endif
-
 #endif /* #ifndef ETHOSU_CONFIG_H */
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_device.c b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_device.c
deleted file mode 100644
index 8c17337..0000000
--- a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_device.c
+++ /dev/null
@@ -1,716 +0,0 @@
-/*
- * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#if EI_ETHOS
-
-#include "ethosu_device.h"
-#include "ethosu_common.h"
-#include "ethosu_config.h"
-
-#include <assert.h>
-#include <stddef.h>
-#include <stdio.h>
-
-#define BASEP_OFFSET 4
-#define REG_OFFSET 4
-#define BYTES_1KB 1024
-
-#define ADDRESS_BITS 48
-#define ADDRESS_MASK ((1ull << ADDRESS_BITS) - 1)
-
-#if defined(ARM_NPU_STUB)
-static uint32_t stream_length = 0;
-#endif
-
-enum ethosu_error_codes ethosu_dev_init(struct ethosu_device *dev,
-                                        const void *base_address,
-                                        uint32_t secure_enable,
-                                        uint32_t privilege_enable)
-{
-#if !defined(ARM_NPU_STUB)
-    dev->base_address = (volatile uintptr_t)base_address;
-    dev->secure       = secure_enable;
-    dev->privileged   = privilege_enable;
-
-    ethosu_save_pmu_config(dev);
-#else
-    UNUSED(dev);
-    UNUSED(base_address);
-#endif
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_get_id(struct ethosu_device *dev, struct ethosu_id *id)
-{
-    struct id_r _id;
-
-#if !defined(ARM_NPU_STUB)
-    _id.word = ethosu_read_reg(dev, NPU_REG_ID);
-#else
-    UNUSED(dev);
-
-    _id.word           = 0;
-    _id.arch_patch_rev = NNX_ARCH_VERSION_PATCH;
-    _id.arch_minor_rev = NNX_ARCH_VERSION_MINOR;
-    _id.arch_major_rev = NNX_ARCH_VERSION_MAJOR;
-#endif
-
-    id->version_status = _id.version_status;
-    id->version_minor  = _id.version_minor;
-    id->version_major  = _id.version_major;
-    id->product_major  = _id.product_major;
-    id->arch_patch_rev = _id.arch_patch_rev;
-    id->arch_minor_rev = _id.arch_minor_rev;
-    id->arch_major_rev = _id.arch_major_rev;
-
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_get_config(struct ethosu_device *dev, struct ethosu_config *config)
-{
-    struct config_r cfg = {.word = 0};
-
-#if !defined(ARM_NPU_STUB)
-    cfg.word = ethosu_read_reg(dev, NPU_REG_CONFIG);
-#else
-    UNUSED(dev);
-#endif
-
-    config->macs_per_cc        = cfg.macs_per_cc;
-    config->cmd_stream_version = cfg.cmd_stream_version;
-    config->shram_size         = cfg.shram_size;
-    config->custom_dma         = cfg.custom_dma;
-
-    return ETHOSU_SUCCESS;
-}
-
-// Added by Edge Impulse
-// Test for memory in DTCM.  If so, use global address
-uint64_t alias_memory_if_needed(uint64_t addr) {
-#if EI_ALIF_ADDR_TRANSLATION
-    if ((addr & 0xFF000000) == 0x20000000) {
-#if EI_CONFIG_ETHOS_U55_128 // means HE core
-        addr = 0x60800000 | ( addr & 0x007FFFFF );
-#else // assume HP core
-        addr = 0x50800000 | ( addr & 0x007FFFFF );
-#endif
-    }
-#endif
-    return addr;
-}
-
-enum ethosu_error_codes ethosu_run_command_stream(struct ethosu_device *dev,
-                                                  const uint8_t *cmd_stream_ptr,
-                                                  uint32_t cms_length,
-                                                  const uint64_t *base_addr,
-                                                  int num_base_addr)
-{
-    enum ethosu_error_codes ret_code = ETHOSU_SUCCESS;
-
-#if !defined(ARM_NPU_STUB)
-    assert(num_base_addr <= ETHOSU_DRIVER_BASEP_INDEXES);
-
-    uint64_t qbase = (uintptr_t)cmd_stream_ptr + BASE_POINTER_OFFSET;
-
-    // Added by Edge Impulse
-    // Test for memory in DTCM.  If so, use global address
-    qbase = alias_memory_if_needed(qbase);
-
-    assert(qbase <= ADDRESS_MASK);
-    LOG_DEBUG("QBASE=0x%016llx, QSIZE=%u, base_pointer_offset=0x%08x\n", qbase, cms_length, BASE_POINTER_OFFSET);
-    ethosu_write_reg(dev, NPU_REG_QBASE0, qbase & 0xffffffff);
-    ethosu_write_reg(dev, NPU_REG_QBASE1, qbase >> 32);
-    ethosu_write_reg(dev, NPU_REG_QSIZE, cms_length);
-
-    for (int i = 0; i < num_base_addr; i++)
-    {
-        uint64_t addr = base_addr[i] + BASE_POINTER_OFFSET;
-        assert(addr <= ADDRESS_MASK);
-        LOG_DEBUG("BASEP%d=0x%016llx\n", i, addr);
-
-        // Added by Edge Impulse
-        // Test for memory in DTCM.  If so, use global address
-        addr = alias_memory_if_needed(addr);
-
-        ethosu_write_reg(dev, NPU_REG_BASEP0 + (2 * i) * BASEP_OFFSET, addr & 0xffffffff);
-        ethosu_write_reg(dev, NPU_REG_BASEP0 + (2 * i + 1) * BASEP_OFFSET, addr >> 32);
-    }
-
-    ret_code = ethosu_set_command_run(dev);
-#else
-    // NPU stubbed
-    UNUSED(dev);
-    stream_length = cms_length;
-    UNUSED(cmd_stream_ptr);
-    UNUSED(base_addr);
-    assert(num_base_addr < ETHOSU_DRIVER_BASEP_INDEXES);
-#if defined(NDEBUG)
-    UNUSED(num_base_addr);
-#endif
-#endif
-
-    return ret_code;
-}
-
-enum ethosu_error_codes ethosu_is_irq_raised(struct ethosu_device *dev, uint8_t *irq_raised)
-{
-#if !defined(ARM_NPU_STUB)
-    struct status_r status;
-    status.word = ethosu_read_reg(dev, NPU_REG_STATUS);
-    if (status.irq_raised == 1)
-    {
-        *irq_raised = 1;
-    }
-    else
-    {
-        *irq_raised = 0;
-    }
-#else
-    UNUSED(dev);
-    *irq_raised = 1;
-#endif
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_clear_irq_status(struct ethosu_device *dev)
-{
-#if !defined(ARM_NPU_STUB)
-    struct cmd_r oldcmd;
-    oldcmd.word = ethosu_read_reg(dev, NPU_REG_CMD);
-    struct cmd_r cmd;
-
-    cmd.word           = 0;
-    cmd.clear_irq      = 1;
-    cmd.clock_q_enable = oldcmd.clock_q_enable;
-    cmd.power_q_enable = oldcmd.power_q_enable;
-    ethosu_write_reg(dev, NPU_REG_CMD, cmd.word);
-    LOG_DEBUG("CMD=0x%08x\n", cmd.word);
-#else
-    UNUSED(dev);
-#endif
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_soft_reset(struct ethosu_device *dev)
-{
-    enum ethosu_error_codes return_code = ETHOSU_SUCCESS;
-#if !defined(ARM_NPU_STUB)
-    struct reset_r reset;
-    struct prot_r prot;
-
-    reset.word        = 0;
-    reset.pending_CPL = dev->privileged ? PRIVILEGE_LEVEL_PRIVILEGED : PRIVILEGE_LEVEL_USER;
-    reset.pending_CSL = dev->secure ? SECURITY_LEVEL_SECURE : SECURITY_LEVEL_NON_SECURE;
-
-    // Reset and set security level
-    LOG_INFO("Soft reset NPU\n");
-    ethosu_write_reg(dev, NPU_REG_RESET, reset.word);
-
-    // Wait for reset to complete
-    return_code = ethosu_wait_for_reset(dev);
-    if (return_code != ETHOSU_SUCCESS)
-    {
-        LOG_ERR("Soft reset timed out\n");
-        return return_code;
-    }
-
-    // Verify that NPU has switched security state and privilege level
-    prot.word = ethosu_read_reg(dev, NPU_REG_PROT);
-    if (prot.active_CPL != reset.pending_CPL || prot.active_CSL != reset.pending_CSL)
-    {
-        LOG_ERR("Failed to switch security state and privilege level\n");
-        // Register access not permitted
-        return ETHOSU_GENERIC_FAILURE;
-    }
-
-    // Save the prot register
-    dev->proto = ethosu_read_reg(dev, NPU_REG_PROT);
-
-    // Soft reset will clear the PMU configuration and counters. The shadow PMU counters
-    // are cleared by saving the PMU counters to ram, which will read back zeros.
-    // The PMU configuration will be restored in the invoke function after power save
-    // has been disabled.
-    ethosu_save_pmu_counters(dev);
-#else
-    UNUSED(dev);
-#endif
-
-    return return_code;
-}
-
-enum ethosu_error_codes ethosu_wait_for_reset(struct ethosu_device *dev)
-{
-#if !defined(ARM_NPU_STUB)
-    struct status_r status;
-
-    // Wait until reset status indicates that reset has been completed
-    for (int i = 0; i < 100000; i++)
-    {
-        status.word = ethosu_read_reg(dev, NPU_REG_STATUS);
-        if (0 == status.reset_status)
-        {
-            break;
-        }
-    }
-
-    if (1 == status.reset_status)
-    {
-        return ETHOSU_GENERIC_FAILURE;
-    }
-#else
-    UNUSED(dev);
-#endif
-
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_read_apb_reg(struct ethosu_device *dev,
-                                            uint32_t start_address,
-                                            uint16_t num_reg,
-                                            uint32_t *reg)
-{
-#if !defined(ARM_NPU_STUB)
-    uint32_t address = start_address;
-
-    assert((start_address + num_reg) < ID_REGISTERS_SIZE);
-
-    for (int i = 0; i < num_reg; i++)
-    {
-        reg[i] = ethosu_read_reg(dev, address);
-        address += REG_OFFSET;
-    }
-#else
-    // NPU stubbed
-    UNUSED(dev);
-    UNUSED(start_address);
-    UNUSED(num_reg);
-    UNUSED(reg);
-#endif
-
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_set_qconfig(struct ethosu_device *dev, enum ethosu_memory_type memory_type)
-{
-    if (memory_type > ETHOSU_AXI1_OUTSTANDING_COUNTER3)
-    {
-        return ETHOSU_INVALID_PARAM;
-    }
-#if !defined(ARM_NPU_STUB)
-    ethosu_write_reg(dev, NPU_REG_QCONFIG, memory_type);
-    LOG_DEBUG("QCONFIG=0x%08x\n", memory_type);
-#else
-    // NPU stubbed
-    UNUSED(dev);
-    UNUSED(memory_type);
-#endif
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_set_regioncfg(struct ethosu_device *dev,
-                                             uint8_t region,
-                                             enum ethosu_memory_type memory_type)
-{
-    if (region > 7)
-    {
-        return ETHOSU_INVALID_PARAM;
-    }
-#if !defined(ARM_NPU_STUB)
-    struct regioncfg_r regioncfg;
-    regioncfg.word = ethosu_read_reg(dev, NPU_REG_REGIONCFG);
-    regioncfg.word &= ~(0x3 << (2 * region));
-    regioncfg.word |= (memory_type & 0x3) << (2 * region);
-    ethosu_write_reg(dev, NPU_REG_REGIONCFG, regioncfg.word);
-    LOG_DEBUG("REGIONCFG%u=0x%08x\n", region, regioncfg.word);
-#else
-    // NPU stubbed
-    UNUSED(dev);
-    UNUSED(region);
-    UNUSED(memory_type);
-#endif
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_set_axi_limit0(struct ethosu_device *dev,
-                                              enum ethosu_axi_limit_beats max_beats,
-                                              enum ethosu_axi_limit_mem_type memtype,
-                                              uint8_t max_reads,
-                                              uint8_t max_writes)
-{
-#if !defined(ARM_NPU_STUB)
-    struct axi_limit0_r axi_limit0;
-    axi_limit0.word                     = 0;
-    axi_limit0.max_beats                = max_beats;
-    axi_limit0.memtype                  = memtype;
-    axi_limit0.max_outstanding_read_m1  = max_reads - 1;
-    axi_limit0.max_outstanding_write_m1 = max_writes - 1;
-
-    ethosu_write_reg(dev, NPU_REG_AXI_LIMIT0, axi_limit0.word);
-    LOG_DEBUG("AXI_LIMIT0=0x%08x\n", axi_limit0.word);
-#else
-    // NPU stubbed
-    UNUSED(dev);
-    UNUSED(max_beats);
-    UNUSED(memtype);
-    UNUSED(max_reads);
-    UNUSED(max_writes);
-#endif
-
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_set_axi_limit1(struct ethosu_device *dev,
-                                              enum ethosu_axi_limit_beats max_beats,
-                                              enum ethosu_axi_limit_mem_type memtype,
-                                              uint8_t max_reads,
-                                              uint8_t max_writes)
-{
-#if !defined(ARM_NPU_STUB)
-    struct axi_limit1_r axi_limit1;
-    axi_limit1.word                     = 0;
-    axi_limit1.max_beats                = max_beats;
-    axi_limit1.memtype                  = memtype;
-    axi_limit1.max_outstanding_read_m1  = max_reads - 1;
-    axi_limit1.max_outstanding_write_m1 = max_writes - 1;
-
-    ethosu_write_reg(dev, NPU_REG_AXI_LIMIT1, axi_limit1.word);
-    LOG_DEBUG("AXI_LIMIT1=0x%08x\n", axi_limit1.word);
-#else
-    // NPU stubbed
-    UNUSED(dev);
-    UNUSED(max_beats);
-    UNUSED(memtype);
-    UNUSED(max_reads);
-    UNUSED(max_writes);
-#endif
-
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_set_axi_limit2(struct ethosu_device *dev,
-                                              enum ethosu_axi_limit_beats max_beats,
-                                              enum ethosu_axi_limit_mem_type memtype,
-                                              uint8_t max_reads,
-                                              uint8_t max_writes)
-{
-#if !defined(ARM_NPU_STUB)
-    struct axi_limit2_r axi_limit2;
-    axi_limit2.word                     = 0;
-    axi_limit2.max_beats                = max_beats;
-    axi_limit2.memtype                  = memtype;
-    axi_limit2.max_outstanding_read_m1  = max_reads - 1;
-    axi_limit2.max_outstanding_write_m1 = max_writes - 1;
-
-    ethosu_write_reg(dev, NPU_REG_AXI_LIMIT2, axi_limit2.word);
-    LOG_DEBUG("AXI_LIMIT2=0x%08x\n", axi_limit2.word);
-#else
-    // NPU stubbed
-    UNUSED(dev);
-    UNUSED(max_beats);
-    UNUSED(memtype);
-    UNUSED(max_reads);
-    UNUSED(max_writes);
-#endif
-
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_set_axi_limit3(struct ethosu_device *dev,
-                                              enum ethosu_axi_limit_beats max_beats,
-                                              enum ethosu_axi_limit_mem_type memtype,
-                                              uint8_t max_reads,
-                                              uint8_t max_writes)
-{
-#if !defined(ARM_NPU_STUB)
-    struct axi_limit3_r axi_limit3;
-    axi_limit3.word                     = 0;
-    axi_limit3.max_beats                = max_beats;
-    axi_limit3.memtype                  = memtype;
-    axi_limit3.max_outstanding_read_m1  = max_reads - 1;
-    axi_limit3.max_outstanding_write_m1 = max_writes - 1;
-
-    ethosu_write_reg(dev, NPU_REG_AXI_LIMIT3, axi_limit3.word);
-    LOG_DEBUG("AXI_LIMIT3=0x%08x\n", axi_limit3.word);
-#else
-    // NPU stubbed
-    UNUSED(dev);
-    UNUSED(max_beats);
-    UNUSED(memtype);
-    UNUSED(max_reads);
-    UNUSED(max_writes);
-#endif
-
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_get_revision(struct ethosu_device *dev, uint32_t *revision)
-{
-#if !defined(ARM_NPU_STUB)
-    *revision = ethosu_read_reg(dev, NPU_REG_REVISION);
-#else
-    UNUSED(dev);
-    *revision = 0xDEADC0DE;
-#endif
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_get_qread(struct ethosu_device *dev, uint32_t *qread)
-{
-#if !defined(ARM_NPU_STUB)
-    *qread = ethosu_read_reg(dev, NPU_REG_QREAD);
-#else
-    UNUSED(dev);
-    *qread = stream_length;
-#endif
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_get_status_mask(struct ethosu_device *dev, uint16_t *status_mask)
-{
-#if !defined(ARM_NPU_STUB)
-    struct status_r status;
-
-    status.word  = ethosu_read_reg(dev, NPU_REG_STATUS);
-    *status_mask = status.word & 0xFFFF;
-#else
-    UNUSED(dev);
-    *status_mask = 0x0000;
-#endif
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_get_irq_history_mask(struct ethosu_device *dev, uint16_t *irq_history_mask)
-{
-#if !defined(ARM_NPU_STUB)
-    struct status_r status;
-
-    status.word       = ethosu_read_reg(dev, NPU_REG_STATUS);
-    *irq_history_mask = status.irq_history_mask;
-#else
-    UNUSED(dev);
-    *irq_history_mask = 0xffff;
-#endif
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_clear_irq_history_mask(struct ethosu_device *dev, uint16_t irq_history_clear_mask)
-{
-#if !defined(ARM_NPU_STUB)
-    struct cmd_r oldcmd;
-    oldcmd.word = ethosu_read_reg(dev, NPU_REG_CMD);
-
-    struct cmd_r cmd;
-    cmd.word              = 0;
-    cmd.clock_q_enable    = oldcmd.clock_q_enable;
-    cmd.power_q_enable    = oldcmd.power_q_enable;
-    cmd.clear_irq_history = irq_history_clear_mask;
-    ethosu_write_reg(dev, NPU_REG_CMD, cmd.word);
-    LOG_DEBUG("CMD=0x%08x\n", cmd.word);
-#else
-    UNUSED(dev);
-    UNUSED(irq_history_clear_mask);
-#endif
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_set_command_run(struct ethosu_device *dev)
-{
-#if !defined(ARM_NPU_STUB)
-    struct cmd_r oldcmd;
-    oldcmd.word = ethosu_read_reg(dev, NPU_REG_CMD);
-
-    struct cmd_r cmd;
-    cmd.word                        = 0;
-    cmd.transition_to_running_state = 1;
-    cmd.clock_q_enable              = oldcmd.clock_q_enable;
-    cmd.power_q_enable              = oldcmd.power_q_enable;
-    ethosu_write_reg(dev, NPU_REG_CMD, cmd.word);
-    LOG_DEBUG("CMD=0x%08x\n", cmd.word);
-#else
-    UNUSED(dev);
-#endif
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_get_shram_data(struct ethosu_device *dev, int section, uint32_t *shram_p)
-{
-#if !defined(ARM_NPU_STUB)
-    int i            = 0;
-    uint32_t address = NPU_REG_SHARED_BUFFER0;
-    ethosu_write_reg(dev, NPU_REG_DEBUG_ADDRESS, section * BYTES_1KB);
-
-    while (address <= NPU_REG_SHARED_BUFFER255)
-    {
-        shram_p[i] = ethosu_read_reg(dev, address);
-        address += REG_OFFSET;
-        i++;
-    }
-#else
-    // NPU stubbed
-    UNUSED(dev);
-    UNUSED(section);
-    UNUSED(shram_p);
-#endif
-
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_set_clock_and_power(struct ethosu_device *dev,
-                                                   enum ethosu_clock_q_request clock_q,
-                                                   enum ethosu_power_q_request power_q)
-{
-#if !defined(ARM_NPU_STUB)
-    struct cmd_r cmd;
-    cmd.word           = 0;
-    cmd.clock_q_enable = clock_q;
-    cmd.power_q_enable = power_q;
-    ethosu_write_reg(dev, NPU_REG_CMD, cmd.word);
-    LOG_DEBUG("CMD=0x%08x\n", cmd.word);
-#else
-    UNUSED(dev);
-    UNUSED(clock_q);
-    UNUSED(power_q);
-#endif
-    return ETHOSU_SUCCESS;
-}
-
-uint32_t ethosu_read_reg(struct ethosu_device *dev, uint32_t address)
-{
-#if !defined(ARM_NPU_STUB)
-    assert(dev->base_address != 0);
-    assert(address % 4 == 0);
-
-    volatile uint32_t *reg = (volatile uint32_t *)(dev->base_address + address);
-    return *reg;
-#else
-    UNUSED(dev);
-    UNUSED(address);
-
-    return 0;
-#endif
-}
-
-void ethosu_write_reg(struct ethosu_device *dev, uint32_t address, uint32_t value)
-{
-#if !defined(ARM_NPU_STUB)
-    assert(dev->base_address != 0);
-    assert(address % 4 == 0);
-
-    volatile uint32_t *reg = (volatile uint32_t *)(dev->base_address + address);
-    *reg                   = value;
-#else
-    UNUSED(dev);
-    UNUSED(address);
-    UNUSED(value);
-#endif
-}
-
-void ethosu_write_reg_shadow(struct ethosu_device *dev, uint32_t address, uint32_t value, uint32_t *shadow)
-{
-    ethosu_write_reg(dev, address, value);
-    *shadow = ethosu_read_reg(dev, address);
-}
-
-enum ethosu_error_codes ethosu_save_pmu_config(struct ethosu_device *dev)
-{
-#if !defined(ARM_NPU_STUB)
-    // Save the PMU control register
-    dev->pmcr = ethosu_read_reg(dev, NPU_REG_PMCR);
-
-    // Save IRQ control
-    dev->pmint = ethosu_read_reg(dev, NPU_REG_PMINTSET);
-
-    // Save the enabled events mask
-    dev->pmcnten = ethosu_read_reg(dev, NPU_REG_PMCNTENSET);
-
-    // Save start and stop event
-    dev->pmccntr_cfg = ethosu_read_reg(dev, NPU_REG_PMCCNTR_CFG);
-
-    // Save the event settings and counters
-    for (uint32_t i = 0; i < ETHOSU_PMU_NCOUNTERS; i++)
-    {
-        dev->pmu_evtypr[i] = ethosu_read_reg(dev, NPU_REG_PMEVTYPER0 + i * sizeof(uint32_t));
-    }
-#else
-    UNUSED(dev);
-#endif
-
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_restore_pmu_config(struct ethosu_device *dev)
-{
-#if !defined(ARM_NPU_STUB)
-    // Restore PMU control register
-    ethosu_write_reg(dev, NPU_REG_PMCR, dev->pmcr);
-
-    // Restore IRQ control
-    ethosu_write_reg(dev, NPU_REG_PMINTSET, dev->pmint);
-
-    // Restore enabled event mask
-    ethosu_write_reg(dev, NPU_REG_PMCNTENSET, dev->pmcnten);
-
-    // Restore start and stop event
-    ethosu_write_reg(dev, NPU_REG_PMCCNTR_CFG, dev->pmccntr_cfg);
-
-    // Save the event settings and counters
-    for (uint32_t i = 0; i < ETHOSU_PMU_NCOUNTERS; i++)
-    {
-        ethosu_write_reg(dev, NPU_REG_PMEVTYPER0 + i * sizeof(uint32_t), dev->pmu_evtypr[i]);
-    }
-#else
-    UNUSED(dev);
-#endif
-
-    return ETHOSU_SUCCESS;
-}
-
-enum ethosu_error_codes ethosu_save_pmu_counters(struct ethosu_device *dev)
-{
-#if !defined(ARM_NPU_STUB)
-    // Save the cycle counter
-    dev->pmccntr[0] = ethosu_read_reg(dev, NPU_REG_PMCCNTR_LO);
-    dev->pmccntr[1] = ethosu_read_reg(dev, NPU_REG_PMCCNTR_HI);
-
-    // Save the event settings and counters
-    for (uint32_t i = 0; i < ETHOSU_PMU_NCOUNTERS; i++)
-    {
-        dev->pmu_evcntr[i] = ethosu_read_reg(dev, NPU_REG_PMEVCNTR0 + i * sizeof(uint32_t));
-    }
-#else
-    UNUSED(dev);
-#endif
-
-    return ETHOSU_SUCCESS;
-}
-
-bool ethosu_status_has_error(struct ethosu_device *dev)
-{
-    bool status_error = false;
-#if !defined(ARM_NPU_STUB)
-    struct status_r status;
-    status.word  = ethosu_read_reg(dev, NPU_REG_STATUS);
-    status_error = ((1 == status.bus_status) || (1 == status.cmd_parse_error) || (1 == status.wd_fault) ||
-                    (1 == status.ecc_fault));
-#else
-    UNUSED(dev);
-#endif
-    return status_error;
-}
-
-#endif //EI ETHOS
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_device.h b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_device.h
new file mode 100644
index 0000000..02942b1
--- /dev/null
+++ b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_device.h
@@ -0,0 +1,142 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2019-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ETHOSU_DEVICE_H
+#define ETHOSU_DEVICE_H
+
+/******************************************************************************
+ * Includes
+ ******************************************************************************/
+#include "ethosu_types.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************
+ * Defines
+ ******************************************************************************/
+
+// NOTE: Deprecated
+#ifndef ETHOSU_PMU_NCOUNTERS
+#define ETHOSU_PMU_NCOUNTERS 4
+#endif
+
+/******************************************************************************
+ * Types
+ ******************************************************************************/
+struct NPU_REG; // Forward declare, to be implemented by each device
+
+struct ethosu_device
+{
+    volatile struct NPU_REG *reg; // Register map
+    uint32_t secure;
+    uint32_t privileged;
+};
+
+/******************************************************************************
+ * Prototypes
+ ******************************************************************************/
+
+/**
+ * Initialize the device.
+ */
+struct ethosu_device *ethosu_dev_init(void *const base_address, uint32_t secure_enable, uint32_t privilege_enable);
+
+/**
+ * Deinitialize the device.
+ */
+void ethosu_dev_deinit(struct ethosu_device *dev);
+
+/**
+ * Initialize AXI settings for device.
+ */
+enum ethosu_error_codes ethosu_dev_axi_init(struct ethosu_device *dev);
+
+/**
+ * Execute a given command stream on NPU.
+ * \param[in] cmd_stream_ptr   Pointer to the command stream
+ * \param[in] cms_length       Command stream length
+ * \param[in] base_addr        Pointer to array of base addresses
+ *                             - 0: weight tensor
+ *                             - 1: scratch tensor
+ *                             - All input tensors
+ *                             - All output tensors
+ * \param[in] num_base_addr    Number of base addresses.
+ */
+void ethosu_dev_run_command_stream(struct ethosu_device *dev,
+                                   const uint8_t *cmd_stream_ptr,
+                                   uint32_t cms_length,
+                                   const uint64_t *base_addr,
+                                   int num_base_addr);
+
+/**
+ * Print information on NPU error status
+ */
+void ethosu_dev_print_err_status(struct ethosu_device *dev);
+
+/**
+ *  Interrupt handler on device layer
+ * \return                     true if NPU status is OK, otherwise false
+ */
+bool ethosu_dev_handle_interrupt(struct ethosu_device *dev);
+
+/**
+ * Get hardware information from NPU
+ * \param[out] hwinfo          Pointer to the hardware info struct to be filled in.
+ */
+void ethosu_dev_get_hw_info(struct ethosu_device *dev, struct ethosu_hw_info *hwinfo);
+
+/**
+ * Verify that requested security state and privilege mode are active
+ * \return                     32 bit status value
+ */
+bool ethosu_dev_verify_access_state(struct ethosu_device *dev);
+
+/**
+ * Performs a NPU soft reset and waits for the NPU to become ready
+ * \return                     \ref ethosu_error_codes
+ */
+enum ethosu_error_codes ethosu_dev_soft_reset(struct ethosu_device *dev);
+
+/**
+ * Enable/disable clock and power using clock/power q interface.
+ * \param[in] clock_q          Clock q ENABLE/DISABLE \ref clock_q_request.
+ * \param[in] power_q          Power q ENABLE/DISABLE \ref power_q_request.
+ * \return                     \ref ethosu_error_codes
+ */
+enum ethosu_error_codes ethosu_dev_set_clock_and_power(struct ethosu_device *dev,
+                                                       enum ethosu_clock_q_request clock_q,
+                                                       enum ethosu_power_q_request power_q);
+
+/**
+ * Verifies that optimizer parameters from model are compatible with the hardware
+ * \param[in] cfg              Config data from optimizer.
+ * \param[in] id               Id data from optimizer.
+ * \return                     true if parameters match with hardware, false otherwise.
+ */
+bool ethosu_dev_verify_optimizer_config(struct ethosu_device *dev, uint32_t cfg_in, uint32_t id_in);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // ETHOSU_DEVICE_H
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_device_u55_u65.c b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_device_u55_u65.c
new file mode 100644
index 0000000..7de0daa
--- /dev/null
+++ b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_device_u55_u65.c
@@ -0,0 +1,392 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2019-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/******************************************************************************
+ * Includes
+ ******************************************************************************/
+#if EI_ETHOS
+
+#include "ethosu_interface.h"
+
+#include "ethosu_device.h"
+#include "ethosu_log.h"
+
+#ifdef ETHOSU55
+#include "ethosu_config_u55.h"
+#else
+#include "ethosu_config_u65.h"
+#endif
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/******************************************************************************
+ * Defines
+ ******************************************************************************/
+
+#define ETHOSU_PRODUCT_U55 0
+#define ETHOSU_PRODUCT_U65 1
+
+#define BASEP_OFFSET 4
+
+#ifdef ETHOSU65
+#define ADDRESS_BITS 40
+#else
+#define ADDRESS_BITS 32
+#endif
+
+#define ADDRESS_MASK ((1ull << ADDRESS_BITS) - 1)
+
+#define NPU_CMD_PWR_CLK_MASK (0xC)
+
+/******************************************************************************
+ * Functions
+ ******************************************************************************/
+
+uint64_t __attribute__((weak)) ethosu_address_remap(uint64_t address, int index)
+{
+    (void)(index);
+    return address;
+}
+
+struct ethosu_device *ethosu_dev_init(void *const base_address, uint32_t secure_enable, uint32_t privilege_enable)
+{
+    struct ethosu_device *dev = malloc(sizeof(struct ethosu_device));
+    if (!dev)
+    {
+        LOG_ERR("Failed to allocate memory for Ethos-U device");
+        return NULL;
+    }
+
+    dev->reg        = (volatile struct NPU_REG *)base_address;
+    dev->secure     = secure_enable;
+    dev->privileged = privilege_enable;
+
+#ifdef ETHOSU55
+    if (dev->reg->CONFIG.product != ETHOSU_PRODUCT_U55)
+#else
+    if (dev->reg->CONFIG.product != ETHOSU_PRODUCT_U65)
+#endif
+    {
+        LOG_ERR("Failed to initialize device. Driver has not been compiled for this product");
+        goto err;
+    }
+
+    // Make sure the NPU is in a known state
+    if (ethosu_dev_soft_reset(dev) != ETHOSU_SUCCESS)
+    {
+        goto err;
+    }
+
+    return dev;
+
+err:
+    free(dev);
+    return NULL;
+}
+
+void ethosu_dev_deinit(struct ethosu_device *dev)
+{
+    free(dev);
+}
+
+enum ethosu_error_codes ethosu_dev_axi_init(struct ethosu_device *dev)
+{
+    struct regioncfg_r rcfg = {0};
+    struct axi_limit0_r l0  = {0};
+    struct axi_limit1_r l1  = {0};
+    struct axi_limit2_r l2  = {0};
+    struct axi_limit3_r l3  = {0};
+
+    dev->reg->QCONFIG.word = NPU_QCONFIG;
+
+    rcfg.region0             = NPU_REGIONCFG_0;
+    rcfg.region1             = NPU_REGIONCFG_1;
+    rcfg.region2             = NPU_REGIONCFG_2;
+    rcfg.region3             = NPU_REGIONCFG_3;
+    rcfg.region4             = NPU_REGIONCFG_4;
+    rcfg.region5             = NPU_REGIONCFG_5;
+    rcfg.region6             = NPU_REGIONCFG_6;
+    rcfg.region7             = NPU_REGIONCFG_7;
+    dev->reg->REGIONCFG.word = rcfg.word;
+
+    l0.max_beats                = AXI_LIMIT0_MAX_BEATS_BYTES;
+    l0.memtype                  = AXI_LIMIT0_MEM_TYPE;
+    l0.max_outstanding_read_m1  = AXI_LIMIT0_MAX_OUTSTANDING_READS - 1;
+    l0.max_outstanding_write_m1 = AXI_LIMIT0_MAX_OUTSTANDING_WRITES - 1;
+
+    l1.max_beats                = AXI_LIMIT1_MAX_BEATS_BYTES;
+    l1.memtype                  = AXI_LIMIT1_MEM_TYPE;
+    l1.max_outstanding_read_m1  = AXI_LIMIT1_MAX_OUTSTANDING_READS - 1;
+    l1.max_outstanding_write_m1 = AXI_LIMIT1_MAX_OUTSTANDING_WRITES - 1;
+
+    l2.max_beats                = AXI_LIMIT2_MAX_BEATS_BYTES;
+    l2.memtype                  = AXI_LIMIT2_MEM_TYPE;
+    l2.max_outstanding_read_m1  = AXI_LIMIT2_MAX_OUTSTANDING_READS - 1;
+    l2.max_outstanding_write_m1 = AXI_LIMIT2_MAX_OUTSTANDING_WRITES - 1;
+
+    l3.max_beats                = AXI_LIMIT3_MAX_BEATS_BYTES;
+    l3.memtype                  = AXI_LIMIT3_MEM_TYPE;
+    l3.max_outstanding_read_m1  = AXI_LIMIT3_MAX_OUTSTANDING_READS - 1;
+    l3.max_outstanding_write_m1 = AXI_LIMIT3_MAX_OUTSTANDING_WRITES - 1;
+
+    dev->reg->AXI_LIMIT0.word = l0.word;
+    dev->reg->AXI_LIMIT1.word = l1.word;
+    dev->reg->AXI_LIMIT2.word = l2.word;
+    dev->reg->AXI_LIMIT3.word = l3.word;
+
+    return ETHOSU_SUCCESS;
+}
+
+void ethosu_dev_run_command_stream(struct ethosu_device *dev,
+                                   const uint8_t *cmd_stream_ptr,
+                                   uint32_t cms_length,
+                                   const uint64_t *base_addr,
+                                   int num_base_addr)
+{
+    assert(num_base_addr <= NPU_REG_BASEP_ARRLEN);
+
+    struct cmd_r cmd;
+    uint64_t qbase = ethosu_address_remap((uintptr_t)cmd_stream_ptr, -1);
+    assert(qbase <= ADDRESS_MASK);
+    LOG_DEBUG("QBASE=0x%016llx, QSIZE=%" PRIu32 ", cmd_stream_ptr=%p", qbase, cms_length, cmd_stream_ptr);
+
+    dev->reg->QBASE.word[0] = qbase & 0xffffffff;
+#ifdef ETHOSU65
+    dev->reg->QBASE.word[1] = qbase >> 32;
+#endif
+    dev->reg->QSIZE.word = cms_length;
+
+    for (int i = 0; i < num_base_addr; i++)
+    {
+        uint64_t addr = ethosu_address_remap(base_addr[i], i);
+        assert(addr <= ADDRESS_MASK);
+        LOG_DEBUG("BASEP%d=0x%016llx", i, addr);
+        dev->reg->BASEP[i].word[0] = addr & 0xffffffff;
+#ifdef ETHOSU65
+        dev->reg->BASEP[i].word[1] = addr >> 32;
+#endif
+    }
+
+    cmd.word                        = dev->reg->CMD.word & NPU_CMD_PWR_CLK_MASK;
+    cmd.transition_to_running_state = 1;
+
+    dev->reg->CMD.word = cmd.word;
+    LOG_DEBUG("CMD=0x%08" PRIx32, cmd.word);
+}
+
+void ethosu_dev_print_err_status(struct ethosu_device *dev)
+{
+    LOG_ERR("NPU status=0x%08" PRIx32 ", qread=%" PRIu32 ", cmd_end_reached=%u",
+            dev->reg->STATUS.word,
+            dev->reg->QREAD.word,
+            dev->reg->STATUS.cmd_end_reached);
+}
+
+bool ethosu_dev_handle_interrupt(struct ethosu_device *dev)
+{
+    struct cmd_r cmd;
+
+    // Clear interrupt
+    cmd.word           = dev->reg->CMD.word & NPU_CMD_PWR_CLK_MASK;
+    cmd.clear_irq      = 1;
+    dev->reg->CMD.word = cmd.word;
+
+    // If a fault has occured, the NPU needs to be reset
+    if (dev->reg->STATUS.bus_status || dev->reg->STATUS.cmd_parse_error || dev->reg->STATUS.wd_fault ||
+        dev->reg->STATUS.ecc_fault || !dev->reg->STATUS.cmd_end_reached)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+bool ethosu_dev_verify_access_state(struct ethosu_device *dev)
+{
+    if (dev->reg->PROT.active_CSL != (dev->secure ? SECURITY_LEVEL_SECURE : SECURITY_LEVEL_NON_SECURE) ||
+        dev->reg->PROT.active_CPL != (dev->privileged ? PRIVILEGE_LEVEL_PRIVILEGED : PRIVILEGE_LEVEL_USER))
+    {
+        return false;
+    }
+    return true;
+}
+
+enum ethosu_error_codes ethosu_dev_soft_reset(struct ethosu_device *dev)
+{
+    // Note that after a soft-reset, the NPU is unconditionally
+    // powered until the next CMD gets written.
+
+    struct reset_r reset;
+
+    reset.word        = 0;
+    reset.pending_CPL = dev->privileged ? PRIVILEGE_LEVEL_PRIVILEGED : PRIVILEGE_LEVEL_USER;
+    reset.pending_CSL = dev->secure ? SECURITY_LEVEL_SECURE : SECURITY_LEVEL_NON_SECURE;
+
+    // Reset and set security level
+    LOG_INFO("Soft reset NPU");
+    dev->reg->RESET.word = reset.word;
+
+    // Wait until reset status indicates that reset has been completed
+    for (int i = 0; i < 100000 && dev->reg->STATUS.reset_status != 0; i++)
+    {
+    }
+
+    if (dev->reg->STATUS.reset_status != 0)
+    {
+        LOG_ERR("Soft reset timed out");
+        return ETHOSU_GENERIC_FAILURE;
+    }
+
+    // Verify that NPU has switched security state and privilege level
+    if (ethosu_dev_verify_access_state(dev) != true)
+    {
+        LOG_ERR("Failed to switch security state and privilege level");
+        return ETHOSU_GENERIC_FAILURE;
+    }
+
+    // Reinitialize AXI settings
+    ethosu_dev_axi_init(dev);
+
+    return ETHOSU_SUCCESS;
+}
+
+void ethosu_dev_get_hw_info(struct ethosu_device *dev, struct ethosu_hw_info *hwinfo)
+{
+    struct config_r cfg;
+    struct id_r id;
+
+    cfg.word = dev->reg->CONFIG.word;
+    id.word  = dev->reg->ID.word;
+
+    hwinfo->cfg.cmd_stream_version = cfg.cmd_stream_version;
+    hwinfo->cfg.custom_dma         = cfg.custom_dma;
+    hwinfo->cfg.macs_per_cc        = cfg.macs_per_cc;
+
+    hwinfo->version.arch_major_rev = id.arch_major_rev;
+    hwinfo->version.arch_minor_rev = id.arch_minor_rev;
+    hwinfo->version.arch_patch_rev = id.arch_patch_rev;
+    hwinfo->version.product_major  = id.product_major;
+    hwinfo->version.version_major  = id.version_major;
+    hwinfo->version.version_minor  = id.version_minor;
+    hwinfo->version.version_status = id.version_status;
+}
+
+enum ethosu_error_codes ethosu_dev_set_clock_and_power(struct ethosu_device *dev,
+                                                       enum ethosu_clock_q_request clock_q,
+                                                       enum ethosu_power_q_request power_q)
+{
+    struct cmd_r cmd = {0};
+    cmd.word         = dev->reg->CMD.word & NPU_CMD_PWR_CLK_MASK;
+
+    if (power_q != ETHOSU_POWER_Q_UNCHANGED)
+    {
+        cmd.power_q_enable = power_q == ETHOSU_POWER_Q_ENABLE ? 1 : 0;
+    }
+    if (clock_q != ETHOSU_CLOCK_Q_UNCHANGED)
+    {
+        cmd.clock_q_enable = clock_q == ETHOSU_CLOCK_Q_ENABLE ? 1 : 0;
+    }
+
+    dev->reg->CMD.word = cmd.word;
+    LOG_DEBUG("CMD=0x%08" PRIx32, cmd.word);
+
+    return ETHOSU_SUCCESS;
+}
+
+bool ethosu_dev_verify_optimizer_config(struct ethosu_device *dev, uint32_t cfg_in, uint32_t id_in)
+{
+    struct config_r *opt_cfg = (struct config_r *)&cfg_in;
+    struct config_r hw_cfg;
+    struct id_r *opt_id = (struct id_r *)&id_in;
+    struct id_r hw_id;
+    bool ret = true;
+
+    hw_cfg.word = dev->reg->CONFIG.word;
+    hw_id.word  = dev->reg->ID.word;
+
+    LOG_INFO("Optimizer config. product=%u, cmd_stream_version=%u, macs_per_cc=%u, shram_size=%u, custom_dma=%u",
+             opt_cfg->product,
+             opt_cfg->cmd_stream_version,
+             opt_cfg->macs_per_cc,
+             opt_cfg->shram_size,
+             opt_cfg->custom_dma);
+    LOG_INFO("Optimizer config. arch version: %u.%u.%u",
+             opt_id->arch_major_rev,
+             opt_id->arch_minor_rev,
+             opt_id->arch_patch_rev);
+    LOG_INFO("Ethos-U config. product=%u, cmd_stream_version=%u, macs_per_cc=%u, shram_size=%u, custom_dma=%u",
+             hw_cfg.product,
+             hw_cfg.cmd_stream_version,
+             hw_cfg.macs_per_cc,
+             hw_cfg.shram_size,
+             hw_cfg.custom_dma);
+    LOG_INFO("Ethos-U. arch version=%u.%u.%u", hw_id.arch_major_rev, hw_id.arch_minor_rev, hw_id.arch_patch_rev);
+
+    if (opt_cfg->word != hw_cfg.word)
+    {
+        if (hw_cfg.product != opt_cfg->product)
+        {
+            LOG_ERR("NPU config mismatch. npu.product=%u, optimizer.product=%u", hw_cfg.product, opt_cfg->product);
+            ret = false;
+        }
+
+        if (hw_cfg.macs_per_cc != opt_cfg->macs_per_cc)
+        {
+            LOG_ERR("NPU config mismatch. npu.macs_per_cc=%u, optimizer.macs_per_cc=%u",
+                    hw_cfg.macs_per_cc,
+                    opt_cfg->macs_per_cc);
+            ret = false;
+        }
+
+        if (hw_cfg.cmd_stream_version != opt_cfg->cmd_stream_version)
+        {
+            LOG_ERR("NPU config mismatch. npu.cmd_stream_version=%u, optimizer.cmd_stream_version=%u",
+                    hw_cfg.cmd_stream_version,
+                    opt_cfg->cmd_stream_version);
+            ret = false;
+        }
+
+        if (!hw_cfg.custom_dma && opt_cfg->custom_dma)
+        {
+            LOG_ERR("NPU config mismatch. npu.custom_dma=%u, optimizer.custom_dma=%u",
+                    hw_cfg.custom_dma,
+                    opt_cfg->custom_dma);
+            ret = false;
+        }
+    }
+
+    if ((hw_id.arch_major_rev != opt_id->arch_major_rev) || (hw_id.arch_minor_rev < opt_id->arch_minor_rev))
+    {
+        LOG_ERR("NPU arch mismatch. npu.arch=%u.%u.%u, optimizer.arch=%u.%u.%u",
+                hw_id.arch_major_rev,
+                hw_id.arch_minor_rev,
+                hw_id.arch_patch_rev,
+                opt_id->arch_major_rev,
+                opt_id->arch_minor_rev,
+                opt_id->arch_patch_rev);
+        ret = false;
+    }
+
+    return ret;
+}
+#endif // EI_ETHOS
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_driver.c b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_driver.c
index f616dfb..ae038e3 100644
--- a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_driver.c
+++ b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_driver.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2019-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -19,13 +19,17 @@
 /******************************************************************************
  * Includes
  ******************************************************************************/
-
 #if EI_ETHOS
 
 #include "ethosu_driver.h"
-#include "ethosu_common.h"
-#include "ethosu_config.h"
 #include "ethosu_device.h"
+#include "ethosu_log.h"
+
+#ifdef ETHOSU55
+#include "ethosu_config_u55.h"
+#else
+#include "ethosu_config_u65.h"
+#endif
 
 #include <assert.h>
 #include <cmsis_compiler.h>
@@ -39,19 +43,14 @@
  * Defines
  ******************************************************************************/
 
-#define MACS_PER_CYCLE_LOG2_MASK 0x000F
-#define SHRAM_SIZE_MASK 0xFF00
-#define SHRAM_SIZE_RIGHT_SHIFT 8
+#define UNUSED(x) ((void)x)
+
 #define BYTES_IN_32_BITS 4
-#define CUSTOM_OPTION_LENGTH_32_BIT_WORD 1
-#define DRIVER_ACTION_LENGTH_32_BIT_WORD 1
+#define MASK_16_BYTE_ALIGN (0xF)
 #define OPTIMIZER_CONFIG_LENGTH_32_BIT_WORD 2
+#define DRIVER_ACTION_LENGTH_32_BIT_WORD 1
 #define ETHOSU_FOURCC ('1' << 24 | 'P' << 16 | 'O' << 8 | 'C') // "Custom Operator Payload 1"
-#define APB_START_ADDR_MASK 0x0FFF
-#define APB_NUM_REG_BIT_SHIFT 12
-#define BYTES_1KB 1024
-#define PRODUCT_MAJOR_ETHOSU55 (4)
-#define MASK_16_BYTE_ALIGN (0xF)
+
 #define FAST_MEMORY_BASE_ADDR_INDEX 2
 
 /******************************************************************************
@@ -64,23 +63,18 @@ enum DRIVER_ACTION_e
     RESERVED         = 0,
     OPTIMIZER_CONFIG = 1,
     COMMAND_STREAM   = 2,
-    READ_APB_REG     = 3,
-    DUMP_SHRAM       = 4,
     NOP              = 5,
 };
 
-// Custom data struct
-struct custom_data_s
+// Custom operator payload data struct
+struct cop_data_s
 {
     union
     {
         // Driver action data
         struct
         {
-            // Driver action command (valid values in DRIVER_ACTION_e)
-            uint8_t driver_action_command;
-
-            // reserved
+            uint8_t driver_action_command; // (valid values in DRIVER_ACTION_e)
             uint8_t reserved;
 
             // Driver action data
@@ -100,13 +94,6 @@ struct custom_data_s
                     uint16_t length;
                 };
 
-                // DA_CMD_READAPB
-                struct
-                {
-                    uint16_t start_address : 12;
-                    uint16_t nbr_reg_minus1 : 4;
-                };
-
                 uint16_t driver_action_data;
             };
         };
@@ -118,61 +105,57 @@ struct custom_data_s
 // optimizer config struct
 struct opt_cfg_s
 {
-    struct custom_data_s da_data;
-    union
-    {
-        struct
-        {
-            uint32_t macs_per_cc : 4;
-            uint32_t cmd_stream_version : 4;
-            uint32_t shram_size : 8;
-            uint32_t reserved0 : 11;
-            uint32_t custom_dma : 1;
-            uint32_t product : 4;
-        };
-        uint32_t npu_cfg;
-    };
-    union
-    {
-        struct
-        {
-            uint32_t version_status : 4;
-            uint32_t version_minor : 4;
-            uint32_t version_major : 4;
-            uint32_t product_major : 4;
-            uint32_t arch_patch_rev : 4;
-            uint32_t arch_minor_rev : 8;
-            uint32_t arch_major_rev : 4;
-        };
-        uint32_t ethosu_id;
-    };
+    struct cop_data_s da_data;
+    uint32_t cfg;
+    uint32_t id;
 };
 
 /******************************************************************************
- * Functions
+ * Variables
  ******************************************************************************/
 
-struct ethosu_driver ethosu_drv = {
-    .dev = {.base_address = NULL, .proto = 0, .pmccntr = {0}, .pmu_evcntr = {0, 0, 0, 0}, .pmu_evtypr = {0, 0, 0, 0}},
-    .abort_inference     = false,
-    .status_error        = false,
-    .dev_power_always_on = false};
-
 // Registered drivers linked list HEAD
 static struct ethosu_driver *registered_drivers = NULL;
 
+/******************************************************************************
+ * Weak functions - Cache
+ *
+ * Default NOP operations. Override if available on the targeted device.
+ ******************************************************************************/
+
+/*
+ * Flush/clean the data cache by address and size. Passing NULL as p argument
+ * expects the whole cache to be flushed.
+ */
+void __attribute__((weak)) ethosu_flush_dcache(uint32_t *p, size_t bytes)
+{
+    UNUSED(p);
+    UNUSED(bytes);
+}
+
 /*
- * Following section handles the minimal sempahore and mutex implementation in case of baremetal applications.
- * Weak symbols will be overwritten by RTOS definitions and implement true thread-safety. (Done in application layer)
+ * Invalidate the data cache by address and size. Passing NULL as p argument
+ * expects the whole cache to be invalidated.
  */
+void __attribute__((weak)) ethosu_invalidate_dcache(uint32_t *p, size_t bytes)
+{
+    UNUSED(p);
+    UNUSED(bytes);
+}
+
+/******************************************************************************
+ * Weak functions - Semaphore/Mutex for multi NPU
+ *
+ * Following section handles the minimal sempahore and mutex implementation in
+ * case of baremetal applications. Weak symbols will be overridden by RTOS
+ * definitions and implement true thread-safety (in application layer).
+ ******************************************************************************/
 
-// Baremetal sempahore implementation
 struct ethosu_semaphore_t
 {
-    int count;
+    uint8_t count;
 };
 
-// Minimal needed declaration to allow baremetal functionality.
 static void *ethosu_mutex;
 static void *ethosu_semaphore;
 
@@ -181,1112 +164,602 @@ void *__attribute__((weak)) ethosu_mutex_create(void)
     return NULL;
 }
 
-void __attribute__((weak)) ethosu_mutex_lock(void *mutex)
+void __attribute__((weak)) ethosu_mutex_destroy(void *mutex)
 {
     UNUSED(mutex);
 }
 
-void __attribute__((weak)) ethosu_mutex_unlock(void *mutex)
+int __attribute__((weak)) ethosu_mutex_lock(void *mutex)
 {
     UNUSED(mutex);
+    return 0;
+}
+
+int __attribute__((weak)) ethosu_mutex_unlock(void *mutex)
+{
+    UNUSED(mutex);
+    return 0;
 }
 
 // Baremetal implementation of creating a semaphore
 void *__attribute__((weak)) ethosu_semaphore_create(void)
 {
     struct ethosu_semaphore_t *sem = malloc(sizeof(*sem));
-    sem->count                     = 1;
+    if (sem != NULL)
+    {
+        sem->count = 0;
+    }
     return sem;
 }
 
+void __attribute__((weak)) ethosu_semaphore_destroy(void *sem)
+{
+    free((struct ethosu_semaphore_t *)sem);
+}
+
 // Baremetal simulation of waiting/sleeping for and then taking a semaphore using intrisics
-void __attribute__((weak)) ethosu_semaphore_take(void *sem)
+int __attribute__((weak)) ethosu_semaphore_take(void *sem)
 {
     struct ethosu_semaphore_t *s = sem;
-    while (s->count <= 0)
+    while (s->count == 0)
     {
         __WFE();
     }
     s->count--;
+    return 0;
 }
 
 // Baremetal simulation of giving a semaphore and waking up processes using intrinsics
-void __attribute__((weak)) ethosu_semaphore_give(void *sem)
+int __attribute__((weak)) ethosu_semaphore_give(void *sem)
 {
     struct ethosu_semaphore_t *s = sem;
     s->count++;
     __SEV();
+    return 0;
 }
-// <--- End of semaphore and mutex implementations
 
-static int ethosu_soft_reset_and_restore(struct ethosu_driver *drv);
+/******************************************************************************
+ * Weak functions - Inference begin/end callbacks
+ ******************************************************************************/
 
-void __attribute__((weak)) ethosu_irq_handler(struct ethosu_driver *drv)
+void __attribute__((weak)) ethosu_inference_begin(struct ethosu_driver *drv, void *user_arg)
 {
-    uint8_t irq_raised = 0;
-
-    LOG_DEBUG("Interrupt. status=0x%08x, qread=%d\n",
-              ethosu_read_reg(&drv->dev, NPU_REG_STATUS),
-              ethosu_read_reg(&drv->dev, NPU_REG_QREAD));
-
-    // Verify that interrupt has been raised
-    (void)ethosu_is_irq_raised(&drv->dev, &irq_raised);
-    assert(irq_raised == 1);
-    drv->irq_triggered = true;
-
-    // Clear interrupt
-    (void)ethosu_clear_irq_status(&drv->dev);
-
-    // Verify that interrupt has been successfully cleared
-    (void)ethosu_is_irq_raised(&drv->dev, &irq_raised);
-    assert(irq_raised == 0);
-
-    if (ethosu_status_has_error(&drv->dev))
-    {
-        ethosu_soft_reset_and_restore(drv);
-        drv->status_error = true;
-    }
-
-    ethosu_semaphore_give(drv->semaphore);
+    UNUSED(user_arg);
+    UNUSED(drv);
 }
 
-static inline void wait_for_irq(struct ethosu_driver *drv)
+void __attribute__((weak)) ethosu_inference_end(struct ethosu_driver *drv, void *user_arg)
 {
-    while (1)
-    {
-        if (drv->irq_triggered || drv->abort_inference)
-        {
-            drv->irq_triggered = false;
-            break;
-        }
-
-        ethosu_semaphore_take(drv->semaphore);
-    }
+    UNUSED(user_arg);
+    UNUSED(drv);
 }
 
-void __attribute__((weak)) ethosu_inference_begin(struct ethosu_driver *drv, const void *inference_data)
+/******************************************************************************
+ * Static functions
+ ******************************************************************************/
+static void ethosu_register_driver(struct ethosu_driver *drv)
 {
-    (void)inference_data;
-    (void)drv;
-}
+    ethosu_mutex_lock(ethosu_mutex);
+    drv->next          = registered_drivers;
+    registered_drivers = drv;
+    ethosu_mutex_unlock(ethosu_mutex);
 
-void __attribute__((weak)) ethosu_inference_end(struct ethosu_driver *drv, const void *inference_data)
-{
-    (void)inference_data;
-    (void)drv;
-}
+    ethosu_semaphore_give(ethosu_semaphore);
 
-static int handle_optimizer_config(struct ethosu_driver *drv, struct opt_cfg_s *opt_cfg_p);
-static int handle_command_stream(struct ethosu_driver *drv,
-                                 const uint8_t *cmd_stream,
-                                 const int cms_length,
-                                 const uint64_t *base_addr,
-                                 const size_t *base_addr_size,
-                                 const int num_base_addr);
-static int read_apb_reg(struct ethosu_driver *drv, uint16_t);
-static int dump_shram(struct ethosu_driver *drv);
-static void dump_npu_register(struct ethosu_driver *drv, int npu_reg, int npu_reg_end);
-static void dump_command_stream(const uint32_t *cmd_stream, const int cms_length, int qread);
-static void npu_axi_init(struct ethosu_driver *drv);
-static struct ethosu_driver *ethosu_find_and_reserve_driver(void);
+    LOG_INFO("New NPU driver registered (handle: 0x%p, NPU: 0x%p)", drv, drv->dev->reg);
+}
 
-int ethosu_init(struct ethosu_driver *drv,
-                const void *base_address,
-                const void *fast_memory,
-                const size_t fast_memory_size,
-                uint32_t secure_enable,
-                uint32_t privilege_enable)
+static int ethosu_deregister_driver(struct ethosu_driver *drv)
 {
-    int return_code = 0;
-
-    LOG_INFO("%s. base_address=%p, fast_memory=%p, fast_memory_size=%zu, secure=%" PRIu32 ", privileged=%" PRIu32 "\n",
-             __FUNCTION__,
-             base_address,
-             fast_memory,
-             fast_memory_size,
-             secure_enable,
-             privilege_enable);
+    struct ethosu_driver *curr;
+    struct ethosu_driver **prev;
 
-    if (!ethosu_mutex)
-    {
-        ethosu_mutex = ethosu_mutex_create();
-    }
-
-    if (!ethosu_semaphore)
-    {
-        ethosu_semaphore = ethosu_semaphore_create();
-    }
-
-    ethosu_register_driver(drv);
-
-    drv->fast_memory      = (uint32_t)fast_memory;
-    drv->fast_memory_size = fast_memory_size;
-    drv->irq_triggered    = false;
-    drv->semaphore        = ethosu_semaphore_create();
+    ethosu_mutex_lock(ethosu_mutex);
+    curr = registered_drivers;
+    prev = &registered_drivers;
 
-    if (ETHOSU_SUCCESS != ethosu_dev_init(&drv->dev, base_address, secure_enable, privilege_enable))
+    while (curr != NULL)
     {
-        LOG_ERR("Failed in ethosu_dev_init");
-        return -1;
-    }
+        if (curr == drv)
+        {
+            *prev = curr->next;
+            LOG_INFO("NPU driver handle %p deregistered.", drv);
+            ethosu_semaphore_take(ethosu_semaphore);
+            break;
+        }
 
-    if (ETHOSU_SUCCESS !=
-        set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_DISABLE, ETHOSU_POWER_Q_DISABLE))
-    {
-        LOG_ERR("Failed to disable clock-q & power-q for Ethos-U\n");
-        return -1;
+        prev = &curr->next;
+        curr = curr->next;
     }
 
-    if (ETHOSU_SUCCESS != ethosu_soft_reset(&drv->dev))
-    {
-        return -1;
-    }
+    ethosu_mutex_unlock(ethosu_mutex);
 
-    if (ETHOSU_SUCCESS != ethosu_wait_for_reset(&drv->dev))
+    if (curr == NULL)
     {
-        LOG_ERR("Failed reset of Ethos-U\n");
+        LOG_ERR("No NPU driver handle registered at address %p.", drv);
         return -1;
     }
 
-    drv->status_error = false;
+    return 0;
+}
 
-    return return_code;
+static void ethosu_reset_job(struct ethosu_driver *drv)
+{
+    memset(&drv->job, 0, sizeof(struct ethosu_job));
 }
 
-int ethosu_get_version(struct ethosu_driver *drv, struct ethosu_version *version)
+static int handle_optimizer_config(struct ethosu_driver *drv, struct opt_cfg_s const *opt_cfg_p)
 {
-    int return_code = 0;
+    LOG_INFO("Optimizer release nbr: %u patch: %u", opt_cfg_p->da_data.rel_nbr, opt_cfg_p->da_data.patch_nbr);
 
-    if (NULL != version)
-    {
-        struct ethosu_id id;
-        struct ethosu_config cfg;
-        (void)ethosu_get_id(&drv->dev, &id);
-        (void)ethosu_get_config(&drv->dev, &cfg);
-
-        version->id.version_status      = id.version_status;
-        version->id.version_minor       = id.version_minor;
-        version->id.version_major       = id.version_major;
-        version->id.product_major       = id.product_major;
-        version->id.arch_patch_rev      = id.arch_patch_rev;
-        version->id.arch_minor_rev      = id.arch_minor_rev;
-        version->id.arch_major_rev      = id.arch_major_rev;
-        version->id.driver_patch_rev    = ETHOSU_DRIVER_VERSION_PATCH;
-        version->id.driver_minor_rev    = ETHOSU_DRIVER_VERSION_MINOR;
-        version->id.driver_major_rev    = ETHOSU_DRIVER_VERSION_MAJOR;
-        version->cfg.macs_per_cc        = cfg.macs_per_cc;
-        version->cfg.cmd_stream_version = cfg.cmd_stream_version;
-        version->cfg.shram_size         = cfg.shram_size;
-        version->cfg.custom_dma         = cfg.custom_dma;
-    }
-    else
+    if (ethosu_dev_verify_optimizer_config(drv->dev, opt_cfg_p->cfg, opt_cfg_p->id) != true)
     {
-        return_code = -1;
+        return -1;
     }
 
-    return return_code;
+    return 0;
 }
 
-int ethosu_invoke(struct ethosu_driver *drv,
-                  const void *custom_data_ptr,
-                  const int custom_data_size,
-                  const uint64_t *base_addr,
-                  const size_t *base_addr_size,
-                  const int num_base_addr)
+static int handle_command_stream(struct ethosu_driver *drv, const uint8_t *cmd_stream, const int cms_length)
 {
-    const struct custom_data_s *data_ptr = custom_data_ptr;
-    const struct custom_data_s *data_end = custom_data_ptr + custom_data_size;
-    int return_code                      = 0;
-
-    LOG_INFO("%s\n", __FUNCTION__);
+    uint32_t cms_bytes       = cms_length * BYTES_IN_32_BITS;
+    ptrdiff_t cmd_stream_ptr = (ptrdiff_t)cmd_stream;
 
-    // First word in custom_data_ptr should contain "Custom Operator Payload 1"
-    if (data_ptr->word != ETHOSU_FOURCC)
-    {
-        LOG_ERR("Custom Operator Payload: %" PRIu32 " is not correct, expected %x\n", data_ptr->word, ETHOSU_FOURCC);
-        return -1;
-    }
+    LOG_INFO("handle_command_stream: cmd_stream=%p, cms_length %d", cmd_stream, cms_length);
 
-    // Custom data length must be a multiple of 32 bits
-    if ((custom_data_size % BYTES_IN_32_BITS) != 0)
+    if (0 != ((ptrdiff_t)cmd_stream & MASK_16_BYTE_ALIGN))
     {
-        LOG_ERR("ethosu_invoke ERROR custom_data_size=0x%x not a multiple of 4\n", custom_data_size);
+        LOG_ERR("Command stream addr %p not aligned to 16 bytes", cmd_stream);
         return -1;
     }
 
-    ++data_ptr;
-
-    // Adjust base address to fast memory area
-    if (drv->fast_memory != 0 && num_base_addr >= FAST_MEMORY_BASE_ADDR_INDEX)
+    // Verify 16 byte alignment for base address'
+    for (int i = 0; i < drv->job.num_base_addr; i++)
     {
-        uint64_t *fast_memory = (uint64_t *)&base_addr[FAST_MEMORY_BASE_ADDR_INDEX];
-
-        if (base_addr_size != NULL && base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX] > drv->fast_memory_size)
+        if (0 != (drv->job.base_addr[i] & MASK_16_BYTE_ALIGN))
         {
-            LOG_ERR("Fast memory area too small. fast_memory_size=%u, base_addr_size=%u\n",
-                    drv->fast_memory_size,
-                    base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX]);
+            LOG_ERR("Base addr %d: 0x%llx not aligned to 16 bytes", i, drv->job.base_addr[i]);
             return -1;
         }
-
-        *fast_memory = drv->fast_memory;
     }
 
-    if (!drv->dev_power_always_on)
+    // Flush the cache if available on CPU.
+    // The upcasting to uin32_t* is ok since the pointer never is dereferenced.
+    // The base_addr_size is null if invoking from prior to invoke_V2, in that case
+    // the whole cache is being flushed.
+
+    if (drv->job.base_addr_size != NULL)
     {
-        // Only soft reset if securty state or privilege level needs changing
-        if (drv->dev.proto != ethosu_read_reg(&drv->dev, NPU_REG_PROT))
+        ethosu_flush_dcache((uint32_t *)cmd_stream_ptr, cms_bytes);
+        for (int i = 0; i < drv->job.num_base_addr; i++)
         {
-            if (ETHOSU_SUCCESS != ethosu_soft_reset(&drv->dev))
-            {
-                return -1;
-            }
+            ethosu_flush_dcache((uint32_t *)(uintptr_t)drv->job.base_addr[i], drv->job.base_addr_size[i]);
         }
-
-        drv->status_error = false;
-        set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_DISABLE);
-        ethosu_restore_pmu_config(&drv->dev);
-        npu_axi_init(drv);
+    }
+    else
+    {
+        ethosu_flush_dcache(NULL, 0);
     }
 
-    drv->status_error = false;
-
-    ethosu_inference_begin(drv, custom_data_ptr);
-    while (data_ptr < data_end)
+    // Request power gating disabled during inference run
+    if (ethosu_request_power(drv))
     {
-        int ret = 0;
-        switch (data_ptr->driver_action_command)
-        {
-        case OPTIMIZER_CONFIG:
-            LOG_INFO("ethosu_invoke OPTIMIZER_CONFIG\n");
-            struct opt_cfg_s *opt_cfg_p = (struct opt_cfg_s *)data_ptr;
+        LOG_ERR("Failed to request power");
+        return -1;
+    }
 
-            ret = handle_optimizer_config(drv, opt_cfg_p);
-            data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + OPTIMIZER_CONFIG_LENGTH_32_BIT_WORD;
-            break;
-        case COMMAND_STREAM:
-            LOG_INFO("ethosu_invoke COMMAND_STREAM\n");
-            void *command_stream = (uint8_t *)(data_ptr) + sizeof(struct custom_data_s);
-            int cms_length       = (data_ptr->reserved << 16) | data_ptr->length;
+    drv->job.state = ETHOSU_JOB_RUNNING;
 
-            drv->abort_inference = false;
-            // It is safe to clear this flag without atomic, because npu is not running.
-            drv->irq_triggered = false;
+    // Inference begin callback
+    ethosu_inference_begin(drv, drv->job.user_arg);
 
-            ret = handle_command_stream(drv, command_stream, cms_length, base_addr, base_addr_size, num_base_addr);
+    // Execute the command stream
+    ethosu_dev_run_command_stream(drv->dev, cmd_stream, cms_bytes, drv->job.base_addr, drv->job.num_base_addr);
 
-            if (return_code == -1 && drv->abort_inference)
-            {
-                uint32_t qread = 0;
-                ethosu_get_qread(&drv->dev, &qread);
-                LOG_ERR("NPU timeout\n");
-                dump_command_stream(command_stream, cms_length, qread);
-                dump_npu_register(drv, 0x200, 0x2BF);
-                dump_npu_register(drv, 0x800, 0xB3F);
-                dump_shram(drv);
-            }
+    return 0;
+}
 
-            data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + cms_length;
-            break;
-        case READ_APB_REG:
-            LOG_INFO("ethosu_invoke READ_APB_REG\n");
-            ret = read_apb_reg(drv, data_ptr->driver_action_data);
-            data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD;
-            break;
-        case DUMP_SHRAM:
-            LOG_INFO("ethosu_invoke DUMP_SHRAM\n");
-            ret = dump_shram(drv);
-            data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD;
-            break;
-        case NOP:
-            LOG_INFO("ethosu_invoke NOP\n");
-            data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD;
-            break;
-        default:
-            LOG_ERR("ethosu_invoke UNSUPPORTED driver_action_command %d \n", data_ptr->driver_action_command);
-            ret = -1;
-            break;
-        }
-        if (ret != 0)
-        {
-            return_code = -1;
-            break;
-        }
-    }
-    ethosu_inference_end(drv, custom_data_ptr);
+/******************************************************************************
+ * Weak functions - Interrupt handler
+ ******************************************************************************/
+void __attribute__((weak)) ethosu_irq_handler(struct ethosu_driver *drv)
+{
+    LOG_DEBUG("Got interrupt from Ethos-U");
 
-    if (!drv->status_error && !drv->dev_power_always_on)
+    drv->job.state = ETHOSU_JOB_DONE;
+    if (!ethosu_dev_handle_interrupt(drv->dev))
     {
-        ethosu_save_pmu_counters(&drv->dev);
-        set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_ENABLE);
+        drv->status_error = true;
     }
-
-    return return_code;
+    ethosu_semaphore_give(drv->semaphore);
 }
 
-void ethosu_abort(struct ethosu_driver *drv)
-{
-    drv->abort_inference = true;
-}
+/******************************************************************************
+ * Functions API
+ ******************************************************************************/
 
-void ethosu_set_power_mode(struct ethosu_driver *drv, bool always_on)
+int ethosu_init(struct ethosu_driver *drv,
+                void *const base_address,
+                const void *fast_memory,
+                const size_t fast_memory_size,
+                uint32_t secure_enable,
+                uint32_t privilege_enable)
 {
-    drv->dev_power_always_on = always_on;
+    LOG_INFO("Initializing NPU: base_address=%p, fast_memory=%p, fast_memory_size=%zu, secure=%" PRIu32
+             ", privileged=%" PRIu32,
+             base_address,
+             fast_memory,
+             fast_memory_size,
+             secure_enable,
+             privilege_enable);
 
-    if (always_on)
+    if (!ethosu_mutex)
     {
-        npu_axi_init(drv);
+        ethosu_mutex = ethosu_mutex_create();
     }
-}
 
-int ethosu_register_driver(struct ethosu_driver *drv)
-{
-    // Safeguard check for if driver is already registered
-    struct ethosu_driver *cur = registered_drivers;
-    while (cur != NULL)
+    if (!ethosu_semaphore)
     {
-        if (cur == drv)
+        ethosu_semaphore = ethosu_semaphore_create();
+        if (!ethosu_semaphore)
         {
-            LOG_ERR("%s: NPU driver at address %p is already registered.\n", __FUNCTION__, drv);
+            LOG_ERR("Failed to create global driver semaphore");
             return -1;
         }
-        cur = cur->next;
     }
 
-    drv->next = registered_drivers;
-    // Designate new registered driver HEAD
-    registered_drivers = drv;
+    drv->fast_memory           = (uint32_t)fast_memory;
+    drv->fast_memory_size      = fast_memory_size;
+    drv->power_request_counter = 0;
 
-    LOG_INFO("%s: New NPU driver at address %p is registered.\n", __FUNCTION__, drv);
-    return 0;
-}
+    // Initialize the device and set requested security state and privilege mode
+    drv->dev = ethosu_dev_init(base_address, secure_enable, privilege_enable);
 
-int ethosu_deregister_driver(struct ethosu_driver *drv)
-{
-    struct ethosu_driver *cur   = registered_drivers;
-    struct ethosu_driver **prev = &registered_drivers;
-
-    while (cur != NULL)
+    if (drv->dev == NULL)
     {
-        if (cur == drv)
-        {
-            *prev = cur->next;
-            LOG_INFO("%s: NPU driver at address %p is deregistered.\n", __FUNCTION__, drv);
-            return 0;
-        }
-
-        prev = &cur->next;
-        cur  = cur->next;
+        LOG_ERR("Failed to initialize Ethos-U device");
+        return -1;
     }
 
-    LOG_ERR("%s: NPU driver at address %p does not match a registered driver and therefore may not be deregistered.\n",
-            __FUNCTION__,
-            drv);
-
-    return -1;
-}
-
-struct ethosu_driver *ethosu_reserve_driver(void)
-{
-    struct ethosu_driver *drv = NULL;
-
-    do
-    {
-        ethosu_mutex_lock(ethosu_mutex);
-        drv = ethosu_find_and_reserve_driver();
-        ethosu_mutex_unlock(ethosu_mutex);
-
-        if (drv != NULL)
-        {
-            break;
-        }
-
-        LOG_INFO("%s - Waiting for driver \n", __FUNCTION__);
-        ethosu_semaphore_take(ethosu_semaphore);
-
-    } while (1);
-
-    return drv;
-}
-
-static struct ethosu_driver *ethosu_find_and_reserve_driver(void)
-{
-    struct ethosu_driver *drv = registered_drivers;
-
-    while (drv != NULL)
+    drv->semaphore = ethosu_semaphore_create();
+    if (!drv->semaphore)
     {
-        if (!drv->reserved)
-        {
-            drv->reserved = true;
-            LOG_INFO("%s - Driver %p reserved.\n", __FUNCTION__, drv);
-            return drv;
-        }
-        drv = drv->next;
+        LOG_ERR("Failed to create driver semaphore");
+        ethosu_dev_deinit(drv->dev);
+        drv->dev = NULL;
+        return -1;
     }
 
-    LOG_INFO("%s: No available drivers.\n", __FUNCTION__);
+    drv->status_error = false;
+
+    ethosu_reset_job(drv);
+    ethosu_register_driver(drv);
 
-    return NULL;
+    return 0;
 }
 
-void ethosu_release_driver(struct ethosu_driver *drv)
+void ethosu_deinit(struct ethosu_driver *drv)
 {
-    ethosu_mutex_lock(ethosu_mutex);
-    if (drv != NULL && drv->reserved)
-    {
-        drv->reserved = false;
-        LOG_INFO("%s - Driver %p released\n", __FUNCTION__, drv);
-        ethosu_semaphore_give(ethosu_semaphore);
-    }
-    ethosu_mutex_unlock(ethosu_mutex);
+    ethosu_deregister_driver(drv);
+    ethosu_semaphore_destroy(drv->semaphore);
+    ethosu_dev_deinit(drv->dev);
+    drv->dev = NULL;
 }
 
-static int ethosu_soft_reset_and_restore(struct ethosu_driver *drv)
+int ethosu_soft_reset(struct ethosu_driver *drv)
 {
-
-    if (ETHOSU_SUCCESS != ethosu_soft_reset(&drv->dev))
+    // Soft reset the NPU
+    if (ethosu_dev_soft_reset(drv->dev) != ETHOSU_SUCCESS)
     {
+        LOG_ERR("Failed to soft-reset NPU");
         return -1;
     }
 
-    set_clock_and_power_request(drv, ETHOSU_INFERENCE_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_DISABLE);
-
-    npu_axi_init(drv);
-    ethosu_restore_pmu_config(&drv->dev);
+    // Update power and clock gating after the soft reset
+    ethosu_dev_set_clock_and_power(drv->dev,
+                                   drv->power_request_counter > 0 ? ETHOSU_CLOCK_Q_DISABLE : ETHOSU_CLOCK_Q_ENABLE,
+                                   drv->power_request_counter > 0 ? ETHOSU_POWER_Q_DISABLE : ETHOSU_POWER_Q_ENABLE);
 
     return 0;
 }
 
-enum ethosu_error_codes set_clock_and_power_request(struct ethosu_driver *drv,
-                                                    enum ethosu_request_clients client,
-                                                    enum ethosu_clock_q_request clock_request,
-                                                    enum ethosu_power_q_request power_request)
+int ethosu_request_power(struct ethosu_driver *drv)
 {
-    // Set clock request bit for client
-    if (clock_request == ETHOSU_CLOCK_Q_DISABLE)
+    // Check if this is the first power request, increase counter
+    if (drv->power_request_counter++ == 0)
     {
-        drv->clock_request |= (1 << client);
-    }
-    else
-    {
-        drv->clock_request &= ~(1 << client);
+        // Always reset to a known state. Changes to requested
+        // security state/privilege mode if necessary.
+        if (ethosu_soft_reset(drv))
+        {
+            LOG_ERR("Failed to request power for Ethos-U");
+            drv->power_request_counter--;
+            return -1;
+        }
     }
-    // Get current clock request (ENABLE if both PMU and INFERENCE asks for clock request, else DISABLE)
-    clock_request = drv->clock_request == 0 ? ETHOSU_CLOCK_Q_ENABLE : ETHOSU_CLOCK_Q_DISABLE;
+    return 0;
+}
 
-    // Set power request bit for client
-    if (power_request == ETHOSU_POWER_Q_DISABLE)
+void ethosu_release_power(struct ethosu_driver *drv)
+{
+    if (drv->power_request_counter == 0)
     {
-        drv->power_request |= (1 << client);
+        LOG_WARN("No power request left to release, reference counter is 0");
     }
     else
     {
-        drv->power_request &= ~(1 << client);
+        // Decrement ref counter and enable power gating if no requests remain
+        if (--drv->power_request_counter == 0)
+        {
+            ethosu_dev_set_clock_and_power(drv->dev, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_ENABLE);
+        }
     }
-    // Get current power request (ENABLE if both PMU and INFERENCE asks for power request, else DISABLE)
-    power_request = drv->power_request == 0 ? ETHOSU_POWER_Q_ENABLE : ETHOSU_POWER_Q_DISABLE;
+}
 
-    // Set clock and power
-    enum ethosu_error_codes ret = ethosu_set_clock_and_power(&drv->dev, clock_request, power_request);
+void ethosu_get_driver_version(struct ethosu_driver_version *ver)
+{
+    assert(ver != NULL);
+    ver->major = ETHOSU_DRIVER_VERSION_MAJOR;
+    ver->minor = ETHOSU_DRIVER_VERSION_MINOR;
+    ver->patch = ETHOSU_DRIVER_VERSION_PATCH;
+}
 
-    return ret;
+void ethosu_get_hw_info(struct ethosu_driver *drv, struct ethosu_hw_info *hw)
+{
+    assert(hw != NULL);
+    ethosu_dev_get_hw_info(drv->dev, hw);
 }
 
-static int handle_optimizer_config(struct ethosu_driver *drv, struct opt_cfg_s *opt_cfg_p)
+int ethosu_wait(struct ethosu_driver *drv, bool block)
 {
-    struct ethosu_config cfg;
-    struct ethosu_id id;
-    int return_code = 0;
-
-    LOG_INFO("handle_optimizer_config:\n");
-    LOG_INFO("Optimizer release nbr: %d patch: %d\n", opt_cfg_p->da_data.rel_nbr, opt_cfg_p->da_data.patch_nbr);
-    LOG_INFO("Optimizer config cmd_stream_version: %d macs_per_cc: %d shram_size: %d custom_dma: %d\n",
-             opt_cfg_p->cmd_stream_version,
-             opt_cfg_p->macs_per_cc,
-             opt_cfg_p->shram_size,
-             opt_cfg_p->custom_dma);
-    LOG_INFO("Optimizer config Ethos-U version: %d.%d.%d\n",
-             opt_cfg_p->arch_major_rev,
-             opt_cfg_p->arch_minor_rev,
-             opt_cfg_p->arch_patch_rev);
-
-    (void)ethosu_get_config(&drv->dev, &cfg);
-    (void)ethosu_get_id(&drv->dev, &id);
-    LOG_INFO("Ethos-U config cmd_stream_version: %" PRIu32 " macs_per_cc: %" PRIu32 " shram_size: %" PRIu32
-             " custom_dma: %" PRIu32 "\n",
-             cfg.cmd_stream_version,
-             cfg.macs_per_cc,
-             cfg.shram_size,
-             cfg.custom_dma);
-    LOG_INFO("Ethos-U version: %" PRIu32 ".%" PRIu32 ".%" PRIu32 "\n",
-             id.arch_major_rev,
-             id.arch_minor_rev,
-             id.arch_patch_rev);
-
-    if ((cfg.macs_per_cc != opt_cfg_p->macs_per_cc) || (cfg.shram_size != opt_cfg_p->shram_size) ||
-        (cfg.cmd_stream_version != opt_cfg_p->cmd_stream_version) || (!cfg.custom_dma && opt_cfg_p->custom_dma))
+    int ret = 0;
+
+    switch (drv->job.state)
     {
-        if (cfg.macs_per_cc != opt_cfg_p->macs_per_cc)
-        {
-            LOG_ERR("NPU config mismatch: npu.macs_per_cc=%" PRIu32 " optimizer.macs_per_cc=%d\n",
-                    cfg.macs_per_cc,
-                    opt_cfg_p->macs_per_cc);
-        }
-        if (cfg.shram_size != opt_cfg_p->shram_size)
-        {
-            LOG_ERR("NPU config mismatch: npu.shram_size=%" PRIu32 " optimizer.shram_size=%d\n",
-                    cfg.shram_size,
-                    opt_cfg_p->shram_size);
-        }
-        if (cfg.cmd_stream_version != opt_cfg_p->cmd_stream_version)
+    case ETHOSU_JOB_IDLE:
+        LOG_ERR("Inference job not running...");
+        ret = -2;
+        break;
+    case ETHOSU_JOB_RUNNING:
+        if (!block)
         {
-            LOG_ERR("NPU config mismatch: npu.cmd_stream_version=%" PRIu32 " optimizer.cmd_stream_version=%d\n",
-                    cfg.cmd_stream_version,
-                    opt_cfg_p->cmd_stream_version);
+            // Inference still running, do not block
+            ret = 1;
+            break;
         }
-        if (!cfg.custom_dma && opt_cfg_p->custom_dma)
+        // fall through
+    case ETHOSU_JOB_DONE:
+        // Wait for interrupt in blocking mode. In non-blocking mode
+        // the interrupt has already triggered
+        ethosu_semaphore_take(drv->semaphore);
+
+        // Inference done callback
+        ethosu_inference_end(drv, drv->job.user_arg);
+
+        // Relase power gating disabled requirement
+        ethosu_release_power(drv);
+
+        // Check NPU and interrupt status
+        if (drv->status_error)
         {
-            LOG_ERR("NPU config mismatch: npu.custom_dma=%" PRIu32 " optimize.custom_dma=%d\n",
-                    cfg.custom_dma,
-                    opt_cfg_p->custom_dma);
+            LOG_ERR("NPU error(s) occured during inference.");
+            ethosu_dev_print_err_status(drv->dev);
+
+            // Reset the NPU
+            (void)ethosu_soft_reset(drv);
+            // NPU is no longer in error state
+            drv->status_error = false;
+
+            ret = -1;
         }
-        LOG_ERR("Did you choose the correct target core? This model was compiled for a different Ethos configuration\n");
-        return_code = -1;
-    }
 
-    if ((id.arch_major_rev != opt_cfg_p->arch_major_rev) || (id.arch_minor_rev < opt_cfg_p->arch_minor_rev))
-    {
-        LOG_ERR("NPU arch mismatch: npu.arch=%" PRIu32 ".%" PRIu32 ".%" PRIu32 " optimizer.arch=%d.%d.%d\n",
-                id.arch_major_rev,
-                id.arch_minor_rev,
-                id.arch_patch_rev,
-                opt_cfg_p->arch_major_rev,
-                opt_cfg_p->arch_minor_rev,
-                opt_cfg_p->arch_patch_rev);
-        return_code = -1;
-    }
+        if (ret == 0)
+        {
+            // Invalidate cache
+            if (drv->job.base_addr_size != NULL)
+            {
+                for (int i = 0; i < drv->job.num_base_addr; i++)
+                {
+                    ethosu_invalidate_dcache((uint32_t *)(uintptr_t)drv->job.base_addr[i], drv->job.base_addr_size[i]);
+                }
+            }
+            else
+            {
+                ethosu_invalidate_dcache(NULL, 0);
+            }
 
-#if !defined(LOG_ENABLED)
-    UNUSED(opt_cfg_p);
-#endif
-    return return_code;
-}
+            LOG_DEBUG("Inference finished successfully...");
+        }
 
-static void npu_axi_init(struct ethosu_driver *drv)
-{
-    ethosu_set_qconfig(&drv->dev, NPU_QCONFIG);
-
-    ethosu_set_regioncfg(&drv->dev, 0, NPU_REGIONCFG_0);
-    ethosu_set_regioncfg(&drv->dev, 1, NPU_REGIONCFG_1);
-    ethosu_set_regioncfg(&drv->dev, 2, NPU_REGIONCFG_2);
-    ethosu_set_regioncfg(&drv->dev, 3, NPU_REGIONCFG_3);
-    ethosu_set_regioncfg(&drv->dev, 4, NPU_REGIONCFG_4);
-    ethosu_set_regioncfg(&drv->dev, 5, NPU_REGIONCFG_5);
-    ethosu_set_regioncfg(&drv->dev, 6, NPU_REGIONCFG_6);
-    ethosu_set_regioncfg(&drv->dev, 7, NPU_REGIONCFG_7);
-
-    (void)ethosu_set_axi_limit0(&drv->dev,
-                                AXI_LIMIT0_MAX_BEATS_BYTES,
-                                AXI_LIMIT0_MEM_TYPE,
-                                AXI_LIMIT0_MAX_OUTSTANDING_READS,
-                                AXI_LIMIT0_MAX_OUTSTANDING_WRITES);
-    (void)ethosu_set_axi_limit1(&drv->dev,
-                                AXI_LIMIT1_MAX_BEATS_BYTES,
-                                AXI_LIMIT1_MEM_TYPE,
-                                AXI_LIMIT1_MAX_OUTSTANDING_READS,
-                                AXI_LIMIT1_MAX_OUTSTANDING_WRITES);
-    (void)ethosu_set_axi_limit2(&drv->dev,
-                                AXI_LIMIT2_MAX_BEATS_BYTES,
-                                AXI_LIMIT2_MEM_TYPE,
-                                AXI_LIMIT2_MAX_OUTSTANDING_READS,
-                                AXI_LIMIT2_MAX_OUTSTANDING_WRITES);
-    (void)ethosu_set_axi_limit3(&drv->dev,
-                                AXI_LIMIT3_MAX_BEATS_BYTES,
-                                AXI_LIMIT3_MEM_TYPE,
-                                AXI_LIMIT3_MAX_OUTSTANDING_READS,
-                                AXI_LIMIT3_MAX_OUTSTANDING_WRITES);
-}
+        // Reset internal job (state resets to IDLE)
+        ethosu_reset_job(drv);
+        break;
 
-/* Default implementation to flush the data cache. Override if available on the targeted device.
- * Passing NULL as p argument expects the whole cache to be flushed.
- */
-void __attribute__((weak)) ethosu_flush_dcache(uint32_t *p, size_t bytes)
-{
-    (void)p;
-    (void)bytes;
-}
+    default:
+        LOG_ERR("Unexpected job state");
+        ethosu_reset_job(drv);
+        ret = -1;
+        break;
+    }
 
-/* Default implementation to invalidate the data cache. Override if available on the targeted device.
- * Passing NULL as p argument expects the whole cache to be flushed.
- */
-void __attribute__((weak)) ethosu_invalidate_dcache(uint32_t *p, size_t bytes)
-{
-    (void)p;
-    (void)bytes;
+    // Return inference job status
+    return ret;
 }
 
-static int handle_command_stream(struct ethosu_driver *drv,
-                                 const uint8_t *cmd_stream,
-                                 const int cms_length,
-                                 const uint64_t *base_addr,
-                                 const size_t *base_addr_size,
-                                 const int num_base_addr)
+int ethosu_invoke_async(struct ethosu_driver *drv,
+                        const void *custom_data_ptr,
+                        const int custom_data_size,
+                        uint64_t *const base_addr,
+                        const size_t *base_addr_size,
+                        const int num_base_addr,
+                        void *user_arg)
 {
-    uint32_t qread           = 0;
-    uint32_t cms_bytes       = cms_length * BYTES_IN_32_BITS;
-    ptrdiff_t cmd_stream_ptr = (ptrdiff_t)cmd_stream;
 
-    LOG_INFO("handle_command_stream: cmd_stream=%p, cms_length %d\n", cmd_stream, cms_length);
+    const struct cop_data_s *data_ptr = custom_data_ptr;
+    const struct cop_data_s *data_end = (struct cop_data_s *)((ptrdiff_t)custom_data_ptr + custom_data_size);
 
-    if (0 != ((ptrdiff_t)cmd_stream & MASK_16_BYTE_ALIGN))
+    // Make sure an inference is not already running
+    if (drv->job.state != ETHOSU_JOB_IDLE)
     {
-        LOG_ERR("Error: Command stream addr %p not aligned to 16 bytes\n", cmd_stream);
+        LOG_ERR("Inference already running, or waiting to be cleared...");
         return -1;
     }
 
-    bool base_addr_invalid = false;
-    for (int i = 0; i < num_base_addr; i++)
+    drv->job.state            = ETHOSU_JOB_IDLE;
+    drv->job.custom_data_ptr  = custom_data_ptr;
+    drv->job.custom_data_size = custom_data_size;
+    drv->job.base_addr        = base_addr;
+    drv->job.base_addr_size   = base_addr_size;
+    drv->job.num_base_addr    = num_base_addr;
+    drv->job.user_arg         = user_arg;
+
+    // First word in custom_data_ptr should contain "Custom Operator Payload 1"
+    if (data_ptr->word != ETHOSU_FOURCC)
     {
-        if (0 != (base_addr[i] & MASK_16_BYTE_ALIGN))
-        {
-            LOG_ERR("Error: Base addr %d: 0x%llx not aligned to 16 bytes\n", i, base_addr[i]);
-            base_addr_invalid = true;
-        }
+        LOG_ERR("Custom Operator Payload: %" PRIu32 " is not correct, expected %x", data_ptr->word, ETHOSU_FOURCC);
+        goto err;
     }
 
-    if (base_addr_invalid)
+    // Custom data length must be a multiple of 32 bits
+    if ((custom_data_size % BYTES_IN_32_BITS) != 0)
     {
-        return -1;
+        LOG_ERR("custom_data_size=0x%x not a multiple of 4", (unsigned)custom_data_size);
+        goto err;
     }
 
-    /* Flush the cache if available on our CPU.
-     * The upcasting to uin32_t* is ok since the pointer never is dereferenced.
-     * The base_addr_size is null if invoking from prior to invoke_V2, in that case
-     * the whole cache is being flushed.
-     */
+    data_ptr++;
 
-    if (base_addr_size != NULL)
+    // Adjust base address to fast memory area
+    if (drv->fast_memory != 0 && num_base_addr >= FAST_MEMORY_BASE_ADDR_INDEX)
     {
-        ethosu_flush_dcache((uint32_t *)cmd_stream_ptr, cms_bytes);
-        for (int i = 0; i < num_base_addr; i++)
+
+        if (base_addr_size != NULL && base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX] > drv->fast_memory_size)
         {
-            ethosu_flush_dcache((uint32_t *)(uintptr_t)base_addr[i], base_addr_size[i]);
+            LOG_ERR("Fast memory area too small. fast_memory_size=%u, base_addr_size=%u",
+                    drv->fast_memory_size,
+                    base_addr_size[FAST_MEMORY_BASE_ADDR_INDEX]);
+            goto err;
         }
-    }
-    else
-    {
-        ethosu_flush_dcache(NULL, 0);
-    }
 
-    if (ETHOSU_SUCCESS != ethosu_run_command_stream(&drv->dev, cmd_stream, cms_bytes, base_addr, num_base_addr))
-    {
-        return -1;
+        base_addr[FAST_MEMORY_BASE_ADDR_INDEX] = drv->fast_memory;
     }
 
-    wait_for_irq(drv);
-
-    if (drv->status_error)
-    {
-        return -1;
-    }
+    drv->status_error = false;
 
-    if (base_addr_size != NULL)
+    // Parse Custom Operator Payload data
+    while (data_ptr < data_end)
     {
-        for (int i = 0; i < num_base_addr; i++)
+        switch (data_ptr->driver_action_command)
         {
-            ethosu_invalidate_dcache((uint32_t *)(uintptr_t)base_addr[i], base_addr_size[i]);
-        }
-    }
-    else
-    {
-        ethosu_invalidate_dcache(NULL, 0);
-    }
+        case OPTIMIZER_CONFIG:
+            LOG_DEBUG("OPTIMIZER_CONFIG");
+            struct opt_cfg_s const *opt_cfg_p = (const struct opt_cfg_s *)data_ptr;
 
-    (void)ethosu_get_qread(&drv->dev, &qread);
-    if (qread != cms_bytes)
-    {
-        LOG_WARN(
-            "Failure: IRQ received but qread (%" PRIu32 ") not at end of stream (%" PRIu32 ").\n", qread, cms_bytes);
-        return -1;
+            if (handle_optimizer_config(drv, opt_cfg_p) < 0)
+            {
+                goto err;
+            }
+            data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + OPTIMIZER_CONFIG_LENGTH_32_BIT_WORD;
+            break;
+        case COMMAND_STREAM:
+            // Vela only supports putting one COMMAND_STREAM per op
+            LOG_DEBUG("COMMAND_STREAM");
+            const uint8_t *command_stream = (const uint8_t *)(data_ptr + 1);
+            int cms_length                = (data_ptr->reserved << 16) | data_ptr->length;
+
+            if (handle_command_stream(drv, command_stream, cms_length) < 0)
+            {
+                goto err;
+            }
+            data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD + cms_length;
+            break;
+        case NOP:
+            LOG_DEBUG("NOP");
+            data_ptr += DRIVER_ACTION_LENGTH_32_BIT_WORD;
+            break;
+        default:
+            LOG_ERR("UNSUPPORTED driver_action_command: %u", data_ptr->driver_action_command);
+            goto err;
+            break;
+        }
     }
 
     return 0;
+err:
+    LOG_ERR("Failed to invoke inference.");
+    ethosu_reset_job(drv);
+    return -1;
 }
 
-static int read_apb_reg(struct ethosu_driver *drv, uint16_t da_data)
+int ethosu_invoke_v3(struct ethosu_driver *drv,
+                     const void *custom_data_ptr,
+                     const int custom_data_size,
+                     uint64_t *const base_addr,
+                     const size_t *base_addr_size,
+                     const int num_base_addr,
+                     void *user_arg)
 {
-    uint32_t *reg_p;
-    uint32_t start_address = (uint32_t)(da_data & APB_START_ADDR_MASK);
-    uint16_t num_reg       = (da_data >> APB_NUM_REG_BIT_SHIFT) + 1;
-
-    reg_p = (uint32_t *)malloc(num_reg * sizeof(uint32_t));
-    if (reg_p == NULL)
+    if (ethosu_invoke_async(
+            drv, custom_data_ptr, custom_data_size, base_addr, base_addr_size, num_base_addr, user_arg) < 0)
     {
-        LOG_INFO("read_apb_reg, Error! memory not allocated.");
         return -1;
     }
 
-    if (ETHOSU_SUCCESS == ethosu_read_apb_reg(&drv->dev, start_address, num_reg, reg_p))
-    {
-        for (int i = 0; i < num_reg; i++)
-        {
-            LOG_INFO(
-                "NPU_REG ADDR 0x%04" PRIu32 " = 0x%08" PRIu32 "\n", (start_address + (i * BYTES_IN_32_BITS)), reg_p[i]);
-        }
-    }
-    else
-    {
-        free(reg_p);
-        return -1;
-    }
-
-    free(reg_p);
-    return 0;
+    return ethosu_wait(drv, true);
 }
 
-static int dump_shram(struct ethosu_driver *drv)
+struct ethosu_driver *ethosu_reserve_driver(void)
 {
-    struct ethosu_config cfg;
-    uint32_t *shram_p;
-    (void)ethosu_get_config(&drv->dev, &cfg);
+    struct ethosu_driver *drv = NULL;
 
-    LOG_INFO("dump_shram size = %" PRIu32 " KB\n", cfg.shram_size);
+    LOG_INFO("Acquiring NPU driver handle");
+    ethosu_semaphore_take(ethosu_semaphore); // This is meant to block until available
 
-    shram_p = (uint32_t *)malloc(BYTES_1KB);
-    if (shram_p == NULL)
-    {
-        LOG_ERR("read_shram, Error! memory not allocated.");
-        return -1;
-    }
+    ethosu_mutex_lock(ethosu_mutex);
+    drv = registered_drivers;
 
-    for (uint32_t i = 0; i < cfg.shram_size; i++)
+    while (drv != NULL)
     {
-        ethosu_get_shram_data(&drv->dev, i, (uint32_t *)shram_p);
-        // Output 1KB of SHRAM
-        LOG_INFO("***SHRAM SECTION %" PRIu32 "***\n", i);
-        for (int j = 0; j < (BYTES_1KB / BYTES_IN_32_BITS); j++)
+        if (!drv->reserved)
         {
-            LOG_INFO("[0x%04" PRIx32 "] %" PRIx32 "\n", (i * 1024 + j * 4), shram_p[j]);
+            drv->reserved = true;
+            LOG_DEBUG("NPU driver handle %p reserved", drv);
+            break;
         }
+        drv = drv->next;
     }
-    free(shram_p);
-
-    return 0;
-}
-
-typedef struct
-{
-    int number;
-    const char *name;
-} name_lookup_t;
-
-static const name_lookup_t npu_reg_name_tbl[] = {
-    {0x200, "KERNEL_X"},
-    {0x204, "KERNEL_Y"},
-    {0x208, "KERNEL_W_M1"},
-    {0x20C, "KERNEL_H_M1"},
-    {0x210, "OFM_CBLK_WIDTH_M1"},
-    {0x214, "OFM_CBLK_HEIGHT_M1"},
-    {0x218, "OFM_CBLK_DEPTH_M1"},
-    {0x21c, "IFM_CBLK_DEPTH_M1"},
-    {0x220, "OFM_X"},
-    {0x224, "OFM_Y"},
-    {0x228, "OFM_Z"},
-    {0x22C, "IFM_Z"},
-    {0x230, "PAD_TOP"},
-    {0x234, "PAD_LEFT"},
-    {0x238, "IFM_CBLK_WIDTH"},
-    {0x23C, "IFM_CBLK_HEIGHT"},
-    {0x240, "DMA_IFM_SRC"},
-    {0x244, "DMA_IFM_SRC_HI"},
-    {0x248, "DMA_IFM_DST"},
-    {0x24c, "DMA_OFM_SRC"},
-    {0x250, "DMA_OFM_DST"},
-    {0x254, "DMA_OFM_DST_HI"},
-    {0x258, "DMA_WEIGHT_SRC"},
-    {0x25c, "DMA_WEIGHT_SRC_HI"},
-    {0x260, "DMA_CMD_SRC"},
-    {0x264, "DMA_CMD_SRC_HI"},
-    {0x268, "DMA_CMD_SIZE"},
-    {0x26c, "DMA_M2M_SRC"},
-    {0x270, "DMA_M2M_SRC_HI"},
-    {0x274, "DMA_M2M_DST"},
-    {0x278, "DMA_M2M_DST_HI"},
-    {0x27c, "CURRENT_QREAD"},
-    {0x280, "DMA_SCALE_SRC"},
-    {0x284, "DMA_SCALE_SRC_HI"},
-    {0x2BC, "CURRENT_CMD"},
-    {0x800, "IFM_PAD_TOP"},
-    {0x804, "IFM_PAD_LEFT"},
-    {0x808, "IFM_PAD_RIGHT"},
-    {0x80C, "IFM_PAD_BOTTOM"},
-    {0x810, "IFM_DEPTH_M1"},
-    {0x814, "IFM_PRECISION"},
-    {0x81C, "IFM_UPSCALE"},
-    {0x824, "IFM_ZERO_POINT"},
-    {0x828, "IFM_WIDTH0_M1"},
-    {0x82C, "IFM_HEIGHT0_M1"},
-    {0x830, "IFM_HEIGHT1_M1"},
-    {0x834, "IFM_IB_END"},
-    {0x83C, "IFM_REGION"},
-    {0x844, "OFM_WIDTH_M1"},
-    {0x848, "OFM_HEIGHT_M1"},
-    {0x84C, "OFM_DEPTH_M1"},
-    {0x850, "OFM_PRECISION"},
-    {0x854, "OFM_BLK_WIDTH_M1"},
-    {0x858, "OFM_BLK_HEIGHT_M1"},
-    {0x85C, "OFM_BLK_DEPTH_M1"},
-    {0x860, "OFM_ZERO_POINT"},
-    {0x868, "OFM_WIDTH0_M1"},
-    {0x86C, "OFM_HEIGHT0_M1"},
-    {0x870, "OFM_HEIGHT1_M1"},
-    {0x87C, "OFM_REGION"},
-    {0x880, "KERNEL_WIDTH_M1"},
-    {0x884, "KERNEL_HEIGHT_M1"},
-    {0x888, "KERNEL_STRIDE"},
-    {0x88C, "PARALLEL_MODE"},
-    {0x890, "ACC_FORMAT"},
-    {0x894, "ACTIVATION"},
-    {0x898, "ACTIVATION_MIN"},
-    {0x89C, "ACTIVATION_MAX"},
-    {0x8A0, "WEIGHT_REGION"},
-    {0x8A4, "SCALE_REGION"},
-    {0x8B4, "AB_START"},
-    {0x8BC, "BLOCKDEP"},
-    {0x8C0, "DMA0_SRC_REGION"},
-    {0x8C4, "DMA0_DST_REGION"},
-    {0x8C8, "DMA0_SIZE0"},
-    {0x8CC, "DMA0_SIZE1"},
-    {0x900, "IFM2_BROADCAST"},
-    {0x904, "IFM2_SCALAR"},
-    {0x924, "IFM2_ZERO_POINT"},
-    {0x928, "IFM2_WIDTH0_M1"},
-    {0x92C, "IFM2_HEIGHT0_M1"},
-    {0x930, "IFM2_HEIGHT1_M1"},
-    {0x934, "IFM2_IB_START"},
-    {0x93C, "IFM2_REGION"},
-    {0xA00, "IFM_BASE0"},
-    {0xA04, "IFM_BASE0_HI"},
-    {0xA08, "IFM_BASE1"},
-    {0xA0C, "IFM_BASE1_HI"},
-    {0xA10, "IFM_BASE2"},
-    {0xA14, "IFM_BASE2_HI"},
-    {0xA18, "IFM_BASE3"},
-    {0xA1C, "IFM_BASE3_HI"},
-    {0xA20, "IFM_STRIDE_X"},
-    {0xA24, "IFM_STRIDE_X_HI"},
-    {0xA28, "IFM_STRIDE_Y"},
-    {0xA2C, "IFM_STRIDE_Y_HI"},
-    {0xA30, "IFM_STRIDE_C"},
-    {0xA34, "IFM_STRIDE_C_HI"},
-    {0xA40, "OFM_BASE0"},
-    {0xA44, "OFM_BASE0_HI"},
-    {0xA48, "OFM_BASE1"},
-    {0xA4C, "OFM_BASE1_HI"},
-    {0xA50, "OFM_BASE2"},
-    {0xA54, "OFM_BASE2_HI"},
-    {0xA58, "OFM_BASE3"},
-    {0xA5C, "OFM_BASE3_HI"},
-    {0xA60, "OFM_STRIDE_X"},
-    {0xA64, "OFM_STRIDE_X_HI"},
-    {0xA68, "OFM_STRIDE_Y"},
-    {0xA6C, "OFM_STRIDE_Y_HI"},
-    {0xA70, "OFM_STRIDE_C"},
-    {0xA74, "OFM_STRIDE_C_HI"},
-    {0xA80, "WEIGHT_BASE"},
-    {0xA84, "WEIGHT_BASE_HI"},
-    {0xA88, "WEIGHT_LENGTH"},
-    {0xA8C, "WEIGHT_LENGTH_HI"},
-    {0xA90, "SCALE_BASE"},
-    {0xA94, "SCALE_BASE_HI"},
-    {0xA98, "SCALE_LENGTH"},
-    {0xAA0, "OFM_SCALE"},
-    {0xAA4, "OFM_SCALE_SHIFT"},
-    {0xAA8, "OPA_SCALE "},
-    {0xAB0, "OPB_SCALE"},
-    {0xAC0, "DMA0_SRC"},
-    {0xAC4, "DMA0_SRC_HI"},
-    {0xAC8, "DMA0_DST"},
-    {0xACC, "DMA0_DST_HI"},
-    {0xAD0, "DMA0_LEN"},
-    {0xAD4, "DMA0_LEN_HI"},
-    {0xAD8, "DMA0_SKIP0"},
-    {0xADC, "DMA0_SKIP0_HI"},
-    {0xAE0, "DMA0_SKIP1"},
-    {0xAE4, "DMA0_SKIP1_HI"},
-    {0xB00, "IFM2_BASE0"},
-    {0xB04, "IFM2_BASE0_HI"},
-    {0xB08, "IFM2_BASE1"},
-    {0xB0C, "IFM2_BASE1_HI"},
-    {0xB10, "IFM2_BASE2"},
-    {0xB14, "IFM2_BASE2_HI"},
-    {0xB18, "IFM2_BASE3"},
-    {0xB1C, "IFM2_BASE3_HI"},
-    {0xB20, "IFM2_STRIDE_X"},
-    {0xB24, "IFM2_STRIDE_X_HI"},
-    {0xB28, "IFM2_STRIDE_Y"},
-    {0xB2C, "IFM2_STRIDE_Y_HI"},
-    {0xB30, "IFM2_STRIDE_C"},
-    {0xB34, "IFM2_STRIDE_C_HI"},
-    {0xB40, "WEIGHT1_BASE"},
-    {0xB44, "WEIGHT1_BASE_HI"},
-    {0xB48, "WEIGHT1_LENGTH"},
-    {0xB4C, "WEIGHT1_LENGTH_HI"},
-    {0xB50, "SCALE1_BASE"},
-    {0xB54, "SCALE1_BASE_HI"},
-    {0xB58, "SCALE1_LENGTH"},
-};
+    ethosu_mutex_unlock(ethosu_mutex);
 
-static const char *lookup_name(const name_lookup_t *lookup_table, int lookup_table_count, int find)
-{
-    int n;
-    for (n = 0; n < lookup_table_count; n++)
+    if (!drv)
     {
-        if (lookup_table[n].number == find)
-        {
-            return lookup_table[n].name;
-        }
+        LOG_ERR("No NPU driver handle available, but semaphore taken");
     }
-    // Not found
-    return 0;
-}
 
-static void dump_npu_register(struct ethosu_driver *drv, int npu_reg, int npu_reg_end)
-{
-    unsigned int reg_val;
-    const char *reg_name;
-    int npu_reg_name_tbl_count = sizeof(npu_reg_name_tbl) / sizeof(npu_reg_name_tbl[0]);
-
-    LOG_INFO("dump_register %X - %X\n", npu_reg, npu_reg_end);
-    for (; npu_reg <= npu_reg_end; npu_reg += sizeof(int))
-    {
-        reg_val  = ethosu_read_reg(&drv->dev, npu_reg);
-        reg_name = lookup_name(npu_reg_name_tbl, npu_reg_name_tbl_count, npu_reg);
-        LOG_INFO("[0x%.4X] 0x%.8X\t%s\n", npu_reg, reg_val, (reg_name) ? reg_name : "");
-    }
+    return drv;
 }
 
-static const name_lookup_t cmd0_name_tbl[] = {
-    {0x000, "NPU_OP_STOP"},
-    {0x001, "NPU_OP_IRQ"},
-    {0x002, "NPU_OP_CONV"},
-    {0x003, "NPU_OP_DEPTHWISE"},
-    {0x004, "NPU_OP_VECTOR_PROD"},
-    {0x005, "NPU_OP_POOL"},
-    {0x006, "NPU_OP_ELEMENTWISE"},
-    {0x010, "NPU_OP_DMA_START"},
-    {0x011, "NPU_OP_DMA_WAIT"},
-    {0x012, "NPU_OP_KERNEL_WAIT"},
-    {0x100, "NPU_SET_IFM_PAD_TOP"},
-    {0x101, "NPU_SET_IFM_PAD_LEFT"},
-    {0x102, "NPU_SET_IFM_PAD_RIGHT"},
-    {0x103, "NPU_SET_IFM_PAD_BOTTOM"},
-    {0x104, "NPU_SET_IFM_DEPTH_M1"},
-    {0x105, "NPU_SET_IFM_PRECISION"},
-    {0x107, "NPU_SET_IFM_UPSCALE"},
-    {0x109, "NPU_SET_IFM_ZERO_POINT"},
-    {0x10A, "NPU_SET_IFM_WIDTH0_M1"},
-    {0x10B, "NPU_SET_IFM_HEIGHT0_M1"},
-    {0x10C, "NPU_SET_IFM_HEIGHT1_M1"},
-    {0x10D, "NPU_SET_IFM_IB_END"},
-    {0x10F, "NPU_SET_IFM_REGION"},
-    {0x110, "NPU_SET_OFM_BATCH_SIZE_M1"},
-    {0x111, "NPU_SET_OFM_WIDTH_M1"},
-    {0x112, "NPU_SET_OFM_HEIGHT_M1"},
-    {0x113, "NPU_SET_OFM_DEPTH_M1"},
-    {0x114, "NPU_SET_OFM_PRECISION"},
-    {0x115, "NPU_SET_OFM_BLK_WIDTH_M1"},
-    {0x116, "NPU_SET_OFM_BLK_HEIGHT_M1"},
-    {0x117, "NPU_SET_OFM_BLK_DEPTH_M1"},
-    {0x118, "NPU_SET_OFM_ZERO_POINT"},
-    {0x11A, "NPU_SET_OFM_WIDTH0_M1"},
-    {0x11B, "NPU_SET_OFM_HEIGHT0_M1"},
-    {0x11C, "NPU_SET_OFM_HEIGHT1_M1"},
-    {0x11F, "NPU_SET_OFM_REGION"},
-    {0x120, "NPU_SET_KERNEL_WIDTH_M1"},
-    {0x121, "NPU_SET_KERNEL_HEIGHT_M1"},
-    {0x122, "NPU_SET_KERNEL_STRIDE"},
-    {0x124, "NPU_SET_ACC_FORMAT"},
-    {0x125, "NPU_SET_ACTIVATION"},
-    {0x126, "NPU_SET_ACTIVATION_MIN"},
-    {0x127, "NPU_SET_ACTIVATION_MAX"},
-    {0x128, "NPU_SET_WEIGHT_REGION"},
-    {0x129, "NPU_SET_SCALE_REGION"},
-    {0x12D, "NPU_SET_AB_START"},
-    {0x12F, "NPU_SET_BLOCKDEP"},
-    {0x130, "NPU_SET_DMA0_SRC_REGION"},
-    {0x131, "NPU_SET_DMA0_DST_REGION"},
-    {0x180, "NPU_SET_IFM2_BROADCAST"},
-    {0x181, "NPU_SET_IFM2_SCALAR"},
-    {0x185, "NPU_SET_IFM2_PRECISION"},
-    {0x189, "NPU_SET_IFM2_ZERO_POINT"},
-    {0x18A, "NPU_SET_IFM2_WIDTH0_M1"},
-    {0x18B, "NPU_SET_IFM2_HEIGHT0_M1"},
-    {0x18C, "NPU_SET_IFM2_HEIGHT1_M1"},
-    {0x18D, "NPU_SET_IFM2_IB_START"},
-    {0x18F, "NPU_SET_IFM2_REGION"},
-};
-
-static const name_lookup_t cmd1_name_tbl[] = {
-    {0x000, "NPU_SET_IFM_BASE0"},     {0x001, "NPU_SET_IFM_BASE1"},     {0x002, "NPU_SET_IFM_BASE2"},
-    {0x003, "NPU_SET_IFM_BASE3"},     {0x004, "NPU_SET_IFM_STRIDE_X"},  {0x005, "NPU_SET_IFM_STRIDE_Y"},
-    {0x006, "NPU_SET_IFM_STRIDE_C"},  {0x007, "NPU_SET_IFM_STRIDE_N"},  {0x010, "NPU_SET_OFM_BASE0"},
-    {0x011, "NPU_SET_OFM_BASE1"},     {0x012, "NPU_SET_OFM_BASE2"},     {0x013, "NPU_SET_OFM_BASE3"},
-    {0x014, "NPU_SET_OFM_STRIDE_X"},  {0x015, "NPU_SET_OFM_STRIDE_Y"},  {0x016, "NPU_SET_OFM_STRIDE_C"},
-    {0x017, "NPU_SET_OFM_STRIDE_N"},  {0x020, "NPU_SET_WEIGHT_BASE"},   {0x021, "NPU_SET_WEIGHT_LENGTH"},
-    {0x022, "NPU_SET_SCALE_BASE"},    {0x023, "NPU_SET_SCALE_LENGTH"},  {0x024, "NPU_SET_OFM_SCALE"},
-    {0x025, "NPU_SET_OPA_SCALE"},     {0x026, "NPU_SET_OPB_SCALE"},     {0x030, "NPU_SET_DMA0_SRC"},
-    {0x031, "NPU_SET_DMA0_DST"},      {0x032, "NPU_SET_DMA0_LEN"},      {0x080, "NPU_SET_IFM2_BASE0"},
-    {0x081, "NPU_SET_IFM2_BASE1"},    {0x082, "NPU_SET_IFM2_BASE2"},    {0x083, "NPU_SET_IFM2_BASE3"},
-    {0x084, "NPU_SET_IFM2_STRIDE_X"}, {0x085, "NPU_SET_IFM2_STRIDE_Y"}, {0x086, "NPU_SET_IFM2_STRIDE_C"},
-};
-
-static void dump_command_stream(const uint32_t *cmd_stream, const int cms_length, int qread)
+void ethosu_release_driver(struct ethosu_driver *drv)
 {
-    int n;
-    int offset;
-    uint32_t cmd_val;
-    const uint8_t *cmd_ptr;
-    const char *cmd_name;
-    int cmd0_name_tbl_count = sizeof(cmd0_name_tbl) / sizeof(cmd0_name_tbl[0]);
-    int cmd1_name_tbl_count = sizeof(cmd1_name_tbl) / sizeof(cmd1_name_tbl[0]);
-
-    LOG_INFO("dump_command_stream cmd_stream = 0x%8p cms_length = %d\n", cmd_stream, cms_length);
-    for (n = 0; n < cms_length; n++)
+    ethosu_mutex_lock(ethosu_mutex);
+    if (drv != NULL && drv->reserved)
     {
-        // Offset
-        offset = n * sizeof(int);
-        LOG_INFO("[%.4d] ", offset);
-        // Command
-        cmd_ptr = (const uint8_t *)&cmd_stream[n];
-        LOG_INFO("0x%.2X 0x%.2X 0x%.2X 0x%.2X ", cmd_ptr[0], cmd_ptr[1], cmd_ptr[2], cmd_ptr[3]);
-        // Command name and payload
-        if (cmd_stream[n] & 0x4000)
-        {
-            cmd_name = lookup_name(cmd1_name_tbl, cmd1_name_tbl_count, cmd_stream[n] & 0x3FF);
-            n++;
-            cmd_val = cmd_stream[n];
-            cmd_ptr = (const uint8_t *)&cmd_stream[n];
-            LOG_INFO("0x%.2X 0x%.2X 0x%.2X 0x%.2X ", cmd_ptr[0], cmd_ptr[1], cmd_ptr[2], cmd_ptr[3]);
-        }
-        else
+        if (drv->job.state == ETHOSU_JOB_RUNNING || drv->job.state == ETHOSU_JOB_DONE)
         {
-            cmd_val  = cmd_stream[n] >> 16;
-            cmd_name = lookup_name(cmd0_name_tbl, cmd0_name_tbl_count, cmd_stream[n] & 0x3FF);
-        }
-        if (cmd_name)
-        {
-            LOG_INFO("\t%s 0x%.8" PRIX32, cmd_name, cmd_val);
-        }
-        if (offset == qread)
-        {
-            LOG_INFO(" <<== QREAD\n");
-        }
-        else
-        {
-            LOG_INFO("\n");
+            // Give the inference one shot to complete or force kill the job
+            if (ethosu_wait(drv, false) == 1)
+            {
+                // Still running, soft reset the NPU and reset driver
+                drv->power_request_counter = 0;
+                ethosu_soft_reset(drv);
+                ethosu_reset_job(drv);
+                drv->status_error = false;
+            }
         }
+
+        drv->reserved = false;
+        LOG_DEBUG("NPU driver handle %p released", drv);
+        ethosu_semaphore_give(ethosu_semaphore);
     }
+    ethosu_mutex_unlock(ethosu_mutex);
 }
-
-#endif //EI ETHOS
+#endif // EI_ETHOS
\ No newline at end of file
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_interface.h b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_interface.h
new file mode 100644
index 0000000..2409cb4
--- /dev/null
+++ b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_interface.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020-2021 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// clang-format off
+#ifndef ETHOSU_INTERFACE_WRAPPER_
+#define ETHOSU_INTERFACE_WRAPPER_
+
+#define xstr(a) str(a)
+#define str(a)  #a
+
+#define catm(a, b)  catm_(a, b)
+#define catm_(a, b) a##b
+
+#define ETHOSU_INTERFACE_FILE xstr(catm(ethos, ETHOSU_ARCH)_interface.h)
+
+#include ETHOSU_INTERFACE_FILE
+
+#endif // ETHOSU_INTERFACE_WRAPPER_
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_log.h b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_log.h
new file mode 100644
index 0000000..582b91d
--- /dev/null
+++ b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_log.h
@@ -0,0 +1,72 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2021-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ETHOSU_LOG_H
+#define ETHOSU_LOG_H
+
+/******************************************************************************
+ * Includes
+ ******************************************************************************/
+
+#include <stdio.h>
+#include <string.h>
+
+/******************************************************************************
+ * Defines
+ ******************************************************************************/
+
+// Log severity levels
+#define ETHOSU_LOG_ERR 0
+#define ETHOSU_LOG_WARN 1
+#define ETHOSU_LOG_INFO 2
+#define ETHOSU_LOG_DEBUG 3
+
+// Define default log severity
+#ifndef ETHOSU_LOG_SEVERITY
+#define ETHOSU_LOG_SEVERITY ETHOSU_LOG_WARN
+#endif
+
+// Log formatting
+#define LOG(f, ...) (void)fprintf(stdout, f, ##__VA_ARGS__)
+
+#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_ERR
+#define LOG_ERR(f, ...)                                                                                                \
+    (void)fprintf(stderr, "E: " f " (%s:%d)\n", ##__VA_ARGS__, strrchr("/" __FILE__, '/') + 1, __LINE__)
+#else
+#define LOG_ERR(f, ...)
+#endif
+
+#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_WARN
+#define LOG_WARN(f, ...) (void)fprintf(stdout, "W: " f "\n", ##__VA_ARGS__)
+#else
+#define LOG_WARN(f, ...)
+#endif
+
+#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_INFO
+#define LOG_INFO(f, ...) (void)fprintf(stdout, "I: " f "\n", ##__VA_ARGS__)
+#else
+#define LOG_INFO(f, ...)
+#endif
+
+#if ETHOSU_LOG_SEVERITY >= ETHOSU_LOG_DEBUG
+#define LOG_DEBUG(f, ...) (void)fprintf(stdout, "D: %s(): " f "\n", __FUNCTION__, ##__VA_ARGS__)
+#else
+#define LOG_DEBUG(f, ...)
+#endif
+
+#endif
\ No newline at end of file
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_pmu.c b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_pmu.c
index 818b20d..6832005 100644
--- a/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_pmu.c
+++ b/edge-impulse-sdk/porting/ethos-core-driver/src/ethosu_pmu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright 2019-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,9 +21,10 @@
  *****************************************************************************/
 #if EI_ETHOS
 
-#include "ethosu55_interface.h"
-#include "ethosu_common.h"
+#include "ethosu_device.h"
 #include "ethosu_driver.h"
+#include "ethosu_interface.h"
+#include "ethosu_log.h"
 #include "pmu_ethosu.h"
 
 #include <assert.h>
@@ -34,23 +35,23 @@
  * Defines
  *****************************************************************************/
 
+#define MASK_0_31_BITS (0xFFFFFFFF)
+#define MASK_32_47_BITS (0xFFFF00000000)
+
 #define COMMA ,
 #define SEMICOLON ;
 
 #define EVTYPE(A, name)                                                                                                \
-    case PMU_EVENT_TYPE_##name:                                                                                        \
+    case PMU_EVENT_##name:                                                                                             \
         return ETHOSU_PMU_##name
 
-#define EVID(A, name) (PMU_EVENT_TYPE_##name)
-
-#define NPU_REG_PMEVCNTR(x) (NPU_REG_PMEVCNTR0 + ((x) * sizeof(uint32_t)))
-#define NPU_REG_PMEVTYPER(x) (NPU_REG_PMEVTYPER0 + ((x) * sizeof(uint32_t)))
+#define EVID(A, name) (PMU_EVENT_##name)
 
 /*****************************************************************************
  * Variables
  *****************************************************************************/
 
-static const enum pmu_event_type eventbyid[] = {EXPAND_PMU_EVENT_TYPE(EVID, COMMA)};
+static const enum pmu_event eventbyid[] = {EXPAND_PMU_EVENT(EVID, COMMA)};
 
 /*****************************************************************************
  * Static functions
@@ -60,9 +61,9 @@ static enum ethosu_pmu_event_type pmu_event_type(uint32_t id)
 {
     switch (id)
     {
-        EXPAND_PMU_EVENT_TYPE(EVTYPE, SEMICOLON);
+        EXPAND_PMU_EVENT(EVTYPE, SEMICOLON);
     default:
-        LOG_ERR("Unknown PMU event id: 0x%" PRIx32 "\n", id);
+        LOG_ERR("Unknown PMU event id: 0x%" PRIx32, id);
     }
 
     return ETHOSU_PMU_SENTINEL;
@@ -87,103 +88,86 @@ static uint32_t pmu_event_value(enum ethosu_pmu_event_type event)
 
 void ETHOSU_PMU_Enable(struct ethosu_driver *drv)
 {
-    LOG_DEBUG("%s:\n", __FUNCTION__);
-    struct pmcr_r pmcr;
-    pmcr.word   = drv->dev.pmcr;
-    pmcr.cnt_en = 1;
-    set_clock_and_power_request(drv, ETHOSU_PMU_REQUEST, ETHOSU_CLOCK_Q_DISABLE, ETHOSU_POWER_Q_DISABLE);
-    ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCR, pmcr.word, &drv->dev.pmcr);
+    LOG_DEBUG("Enable PMU");
+    struct pmcr_r pmcr = {0};
+    pmcr.cnt_en        = 1;
+    ethosu_request_power(drv);
+    drv->dev->reg->PMCR.word = pmcr.word;
 }
 
 void ETHOSU_PMU_Disable(struct ethosu_driver *drv)
 {
-    LOG_DEBUG("%s:\n", __FUNCTION__);
-    struct pmcr_r pmcr;
-    pmcr.word   = drv->dev.pmcr;
-    pmcr.cnt_en = 0;
-    set_clock_and_power_request(drv, ETHOSU_PMU_REQUEST, ETHOSU_CLOCK_Q_ENABLE, ETHOSU_POWER_Q_ENABLE);
-    ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCR, pmcr.word, &drv->dev.pmcr);
+    LOG_DEBUG("Disable PMU");
+    drv->dev->reg->PMCR.word = 0;
+    ethosu_release_power(drv);
+}
+
+uint32_t ETHOSU_PMU_Get_NumEventCounters(void)
+{
+    return NPU_REG_PMEVCNTR_ARRLEN;
 }
 
 void ETHOSU_PMU_Set_EVTYPER(struct ethosu_driver *drv, uint32_t num, enum ethosu_pmu_event_type type)
 {
     assert(num < ETHOSU_PMU_NCOUNTERS);
     uint32_t val = pmu_event_value(type);
-    LOG_DEBUG("%s: num=%u, type=%d, val=%u\n", __FUNCTION__, num, type, val);
-    ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMEVTYPER(num), val, &drv->dev.pmu_evtypr[num]);
+    LOG_DEBUG("num=%" PRIu32 ", type=%d, val=%" PRIu32, num, type, val);
+    drv->dev->reg->PMEVTYPER[num].word = val;
 }
 
 enum ethosu_pmu_event_type ETHOSU_PMU_Get_EVTYPER(struct ethosu_driver *drv, uint32_t num)
 {
     assert(num < ETHOSU_PMU_NCOUNTERS);
-    uint32_t val                    = drv->dev.pmu_evtypr[num];
+    uint32_t val                    = drv->dev->reg->PMEVTYPER[num].word;
     enum ethosu_pmu_event_type type = pmu_event_type(val);
-    LOG_DEBUG("%s: num=%u, type=%d, val=%u\n", __FUNCTION__, num, type, val);
+    LOG_DEBUG("num=%" PRIu32 ", type=%d, val=%" PRIu32, num, type, val);
     return type;
 }
 
 void ETHOSU_PMU_CYCCNT_Reset(struct ethosu_driver *drv)
 {
-    LOG_DEBUG("%s:\n", __FUNCTION__);
+    LOG_DEBUG("Reset PMU cycle counter");
     struct pmcr_r pmcr;
-    pmcr.word          = drv->dev.pmcr;
-    pmcr.cycle_cnt_rst = 1;
-    ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCR, pmcr.word, &drv->dev.pmcr);
-    drv->dev.pmccntr[0] = 0;
-    drv->dev.pmccntr[1] = 0;
+    pmcr.word                = drv->dev->reg->PMCR.word;
+    pmcr.cycle_cnt_rst       = 1;
+    drv->dev->reg->PMCR.word = pmcr.word;
 }
 
 void ETHOSU_PMU_EVCNTR_ALL_Reset(struct ethosu_driver *drv)
 {
-    LOG_DEBUG("%s:\n", __FUNCTION__);
+    LOG_DEBUG("Reset all events");
     struct pmcr_r pmcr;
-    pmcr.word          = drv->dev.pmcr;
-    pmcr.event_cnt_rst = 1;
-    ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCR, pmcr.word, &drv->dev.pmcr);
-
-    for (uint32_t i = 0; i < ETHOSU_PMU_NCOUNTERS; i++)
-    {
-        drv->dev.pmu_evcntr[i] = 0;
-    }
+    pmcr.word                = drv->dev->reg->PMCR.word;
+    pmcr.event_cnt_rst       = 1;
+    drv->dev->reg->PMCR.word = pmcr.word;
 }
 
 void ETHOSU_PMU_CNTR_Enable(struct ethosu_driver *drv, uint32_t mask)
 {
-    LOG_DEBUG("%s: mask=0x%08x\n", __FUNCTION__, mask);
-    ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCNTENSET, mask, &drv->dev.pmcnten);
+    LOG_DEBUG("mask=0x%08" PRIx32, mask);
+    drv->dev->reg->PMCNTENSET.word = mask;
 }
 
 void ETHOSU_PMU_CNTR_Disable(struct ethosu_driver *drv, uint32_t mask)
 {
-    LOG_DEBUG("%s: mask=0x%08x\n", __FUNCTION__, mask);
-    ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCNTENCLR, mask, &drv->dev.pmcnten);
+    LOG_DEBUG("mask=0x%08" PRIx32, mask);
+    drv->dev->reg->PMCNTENCLR.word = mask;
 }
 
 uint32_t ETHOSU_PMU_CNTR_Status(struct ethosu_driver *drv)
 {
-    LOG_DEBUG("%s: mask=0x%08x\n", __FUNCTION__, drv->dev.pmcnten);
-    return drv->dev.pmcnten;
+    uint32_t pmcntenset = drv->dev->reg->PMCNTENSET.word;
+    LOG_DEBUG("mask=0x%08" PRIx32, pmcntenset);
+    return pmcntenset;
 }
 
 uint64_t ETHOSU_PMU_Get_CCNTR(struct ethosu_driver *drv)
 {
-    uint32_t val_lo = ethosu_read_reg(&drv->dev, NPU_REG_PMCCNTR_LO);
-    uint32_t val_hi = ethosu_read_reg(&drv->dev, NPU_REG_PMCCNTR_HI);
+    uint32_t val_lo = drv->dev->reg->PMCCNTR.CYCLE_CNT_LO;
+    uint32_t val_hi = drv->dev->reg->PMCCNTR.CYCLE_CNT_HI;
     uint64_t val    = ((uint64_t)val_hi << 32) | val_lo;
-    uint64_t shadow = ((uint64_t)drv->dev.pmccntr[1] << 32) | drv->dev.pmccntr[0];
-
-    LOG_DEBUG("%s: val=%" PRIu64 ", shadow=%" PRIu64 "\n", __FUNCTION__, val, shadow);
-
-    // Return the shadow variable in case the NPU was powered off and lost the cycle count
-    if (shadow > val)
-    {
-        return shadow;
-    }
-
-    // Update the shadow variable
-    drv->dev.pmccntr[0] = val_lo;
-    drv->dev.pmccntr[1] = val_hi;
 
+    LOG_DEBUG("val=%" PRIu64, val);
     return val;
 }
 
@@ -191,15 +175,15 @@ void ETHOSU_PMU_Set_CCNTR(struct ethosu_driver *drv, uint64_t val)
 {
     uint32_t active = ETHOSU_PMU_CNTR_Status(drv) & ETHOSU_PMU_CCNT_Msk;
 
-    LOG_DEBUG("%s: val=%llu\n", __FUNCTION__, val);
+    LOG_DEBUG("val=%llu", val);
 
     if (active)
     {
         ETHOSU_PMU_CNTR_Disable(drv, ETHOSU_PMU_CCNT_Msk);
     }
 
-    ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCCNTR_LO, val & MASK_0_31_BITS, &drv->dev.pmccntr[0]);
-    ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCCNTR_HI, (val & MASK_32_47_BITS) >> 32, &drv->dev.pmccntr[1]);
+    drv->dev->reg->PMCCNTR.CYCLE_CNT_LO = val & MASK_0_31_BITS;
+    drv->dev->reg->PMCCNTR.CYCLE_CNT_HI = (val & MASK_32_47_BITS) >> 32;
 
     if (active)
     {
@@ -210,17 +194,8 @@ void ETHOSU_PMU_Set_CCNTR(struct ethosu_driver *drv, uint64_t val)
 uint32_t ETHOSU_PMU_Get_EVCNTR(struct ethosu_driver *drv, uint32_t num)
 {
     assert(num < ETHOSU_PMU_NCOUNTERS);
-    uint32_t val = ethosu_read_reg(&drv->dev, NPU_REG_PMEVCNTR(num));
-    LOG_DEBUG("%s: num=%u, val=%u, shadow=%u\n", __FUNCTION__, num, val, drv->dev.pmu_evcntr[num]);
-
-    // Return the shadow variable in case the NPU was powered off and lost the event count
-    if (drv->dev.pmu_evcntr[num] > val)
-    {
-        return drv->dev.pmu_evcntr[num];
-    }
-
-    // Update the shadow variable
-    drv->dev.pmu_evcntr[num] = val;
+    uint32_t val = drv->dev->reg->PMEVCNTR[num].word;
+    LOG_DEBUG("num=%" PRIu32 ", val=%" PRIu32, num, val);
 
     return val;
 }
@@ -228,43 +203,44 @@ uint32_t ETHOSU_PMU_Get_EVCNTR(struct ethosu_driver *drv, uint32_t num)
 void ETHOSU_PMU_Set_EVCNTR(struct ethosu_driver *drv, uint32_t num, uint32_t val)
 {
     assert(num < ETHOSU_PMU_NCOUNTERS);
-    LOG_DEBUG("%s: num=%u, val=%u\n", __FUNCTION__, num, val);
-    ethosu_write_reg(&drv->dev, NPU_REG_PMEVCNTR(num), val);
+    LOG_DEBUG("num=%" PRIu32 ", val=%" PRIu32, num, val);
+    drv->dev->reg->PMEVCNTR[num].word = val;
 }
 
 uint32_t ETHOSU_PMU_Get_CNTR_OVS(struct ethosu_driver *drv)
 {
-    LOG_DEBUG("%s:\n", __FUNCTION__);
-    return ethosu_read_reg(&drv->dev, NPU_REG_PMOVSSET);
+    LOG_DEBUG("");
+    return drv->dev->reg->PMOVSSET.word;
 }
 
 void ETHOSU_PMU_Set_CNTR_OVS(struct ethosu_driver *drv, uint32_t mask)
 {
-    LOG_DEBUG("%s:\n", __FUNCTION__);
-    ethosu_write_reg(&drv->dev, NPU_REG_PMOVSCLR, mask);
+    LOG_DEBUG("");
+    drv->dev->reg->PMOVSCLR.word = mask;
 }
 
 void ETHOSU_PMU_Set_CNTR_IRQ_Enable(struct ethosu_driver *drv, uint32_t mask)
 {
-    LOG_DEBUG("%s: mask=0x%08x\n", __FUNCTION__, mask);
-    ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMINTSET, mask, &drv->dev.pmint);
+    LOG_DEBUG("mask=0x%08" PRIx32, mask);
+    drv->dev->reg->PMINTSET.word = mask;
 }
 
 void ETHOSU_PMU_Set_CNTR_IRQ_Disable(struct ethosu_driver *drv, uint32_t mask)
 {
-    LOG_DEBUG("%s: mask=0x%08x\n", __FUNCTION__, mask);
-    ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMINTCLR, mask, &drv->dev.pmint);
+    LOG_DEBUG("mask=0x%08" PRIx32, mask);
+    drv->dev->reg->PMINTCLR.word = mask;
 }
 
 uint32_t ETHOSU_PMU_Get_IRQ_Enable(struct ethosu_driver *drv)
 {
-    LOG_DEBUG("%s: mask=0x%08x\n", __FUNCTION__, drv->dev.pmint);
-    return drv->dev.pmint;
+    uint32_t pmint = drv->dev->reg->PMINTSET.word;
+    LOG_DEBUG("mask=0x%08" PRIx32, pmint);
+    return pmint;
 }
 
 void ETHOSU_PMU_CNTR_Increment(struct ethosu_driver *drv, uint32_t mask)
 {
-    LOG_DEBUG("%s:\n", __FUNCTION__);
+    LOG_DEBUG("");
     uint32_t cntrs_active = ETHOSU_PMU_CNTR_Status(drv);
 
     // Disable counters
@@ -273,17 +249,17 @@ void ETHOSU_PMU_CNTR_Increment(struct ethosu_driver *drv, uint32_t mask)
     // Increment cycle counter
     if (mask & ETHOSU_PMU_CCNT_Msk)
     {
-        uint64_t val = ETHOSU_PMU_Get_CCNTR(drv) + 1;
-        ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCCNTR_LO, val & MASK_0_31_BITS, &drv->dev.pmccntr[0]);
-        ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCCNTR_HI, (val & MASK_32_47_BITS) >> 32, &drv->dev.pmccntr[1]);
+        uint64_t val                        = ETHOSU_PMU_Get_CCNTR(drv) + 1;
+        drv->dev->reg->PMCCNTR.CYCLE_CNT_LO = val & MASK_0_31_BITS;
+        drv->dev->reg->PMCCNTR.CYCLE_CNT_HI = (val & MASK_32_47_BITS) >> 32;
     }
 
     for (int i = 0; i < ETHOSU_PMU_NCOUNTERS; i++)
     {
-        if (mask & (1 << i))
+        if (mask & (1u << i))
         {
-            uint32_t val = ETHOSU_PMU_Get_EVCNTR(drv, i);
-            ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMEVCNTR(i), val + 1, &drv->dev.pmu_evcntr[i]);
+            uint32_t val                    = ETHOSU_PMU_Get_EVCNTR(drv, i);
+            drv->dev->reg->PMEVCNTR[i].word = val + 1;
         }
     }
 
@@ -293,22 +269,36 @@ void ETHOSU_PMU_CNTR_Increment(struct ethosu_driver *drv, uint32_t mask)
 
 void ETHOSU_PMU_PMCCNTR_CFG_Set_Start_Event(struct ethosu_driver *drv, enum ethosu_pmu_event_type start_event)
 {
-    LOG_DEBUG("%s: start_event=%u\n", __FUNCTION__, start_event);
+    LOG_DEBUG("start_event=%u", start_event);
     uint32_t val = pmu_event_value(start_event);
     struct pmccntr_cfg_r cfg;
-    cfg.word                = drv->dev.pmccntr_cfg;
-    cfg.CYCLE_CNT_CFG_START = val;
-    ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCCNTR_CFG, cfg.word, &drv->dev.pmccntr_cfg);
+    cfg.word                        = drv->dev->reg->PMCCNTR_CFG.word;
+    cfg.CYCLE_CNT_CFG_START         = val;
+    drv->dev->reg->PMCCNTR_CFG.word = cfg.word;
 }
 
 void ETHOSU_PMU_PMCCNTR_CFG_Set_Stop_Event(struct ethosu_driver *drv, enum ethosu_pmu_event_type stop_event)
 {
-    LOG_DEBUG("%s: stop_event=%u\n", __FUNCTION__, stop_event);
+    LOG_DEBUG("stop_event=%u", stop_event);
     uint32_t val = pmu_event_value(stop_event);
     struct pmccntr_cfg_r cfg;
-    cfg.word               = drv->dev.pmccntr_cfg;
-    cfg.CYCLE_CNT_CFG_STOP = val;
-    ethosu_write_reg_shadow(&drv->dev, NPU_REG_PMCCNTR_CFG, cfg.word, &drv->dev.pmccntr_cfg);
+    cfg.word                        = drv->dev->reg->PMCCNTR_CFG.word;
+    cfg.CYCLE_CNT_CFG_STOP          = val;
+    drv->dev->reg->PMCCNTR_CFG.word = cfg.word;
+}
+
+uint32_t ETHOSU_PMU_Get_QREAD(struct ethosu_driver *drv)
+{
+    uint32_t val = drv->dev->reg->QREAD.word;
+    LOG_DEBUG("qread=%" PRIu32, val);
+    return val;
+}
+
+uint32_t ETHOSU_PMU_Get_STATUS(struct ethosu_driver *drv)
+{
+    uint32_t val = drv->dev->reg->STATUS.word;
+    LOG_DEBUG("status=0x%" PRIx32, val);
+    return val;
 }
 
-#endif //EI ETHOS
+#endif // EI_ETHOS
\ No newline at end of file
diff --git a/edge-impulse-sdk/porting/ethos-core-driver/version.txt b/edge-impulse-sdk/porting/ethos-core-driver/version.txt
new file mode 100644
index 0000000..5656be6
--- /dev/null
+++ b/edge-impulse-sdk/porting/ethos-core-driver/version.txt
@@ -0,0 +1 @@
+v1.23.2
\ No newline at end of file
diff --git a/edge-impulse-sdk/porting/iar/debug_log.cpp b/edge-impulse-sdk/porting/iar/debug_log.cpp
new file mode 100644
index 0000000..ae378e4
--- /dev/null
+++ b/edge-impulse-sdk/porting/iar/debug_log.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 EdgeImpulse Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS
+ * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "../ei_classifier_porting.h"
+#if EI_PORTING_IAR == 1
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h"
+#include <stdio.h>
+#include <stdarg.h>
+
+// Redirect TFLite DebugLog to ei_printf
+#if defined(__cplusplus) && EI_C_LINKAGE == 1
+extern "C"
+#endif // defined(__cplusplus) && EI_C_LINKAGE == 1
+void DebugLog(const char* s) {
+    ei_printf("%s", s);
+}
+
+#endif // EI_PORTING_IAR == 1
diff --git a/edge-impulse-sdk/porting/iar/ei_classifier_porting.cpp b/edge-impulse-sdk/porting/iar/ei_classifier_porting.cpp
new file mode 100644
index 0000000..3d450f0
--- /dev/null
+++ b/edge-impulse-sdk/porting/iar/ei_classifier_porting.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 EdgeImpulse Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS
+ * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "../ei_classifier_porting.h"
+#if EI_PORTING_IAR == 1
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <cstdio>
+
+#include "main.h"
+#include "stm32f4xx_hal.h"
+
+
+__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() {
+    return EI_IMPULSE_OK;
+}
+
+/**
+ * Cancelable sleep, can be triggered with signal from other thread
+ */
+__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) {
+
+    HAL_Delay(time_ms);
+
+    return EI_IMPULSE_OK;
+}
+
+uint64_t ei_read_timer_ms() {
+
+    return HAL_GetTick();
+}
+
+uint64_t ei_read_timer_us() {
+
+    return HAL_GetTick() * 1000;
+}
+
+__attribute__((weak)) void ei_printf(const char *format, ...) {
+
+    va_list myargs;
+    va_start(myargs, format);
+    vprintf(format, myargs);
+    va_end(myargs);
+}
+
+__attribute__((weak)) void ei_printf_float(float f) {
+    ei_printf("%f", f);
+}
+
+__attribute__((weak)) void ei_putchar(char data)
+{
+    putchar(data);
+}
+
+__attribute__((weak)) void *ei_malloc(size_t size) {
+    return malloc(size);
+}
+
+__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) {
+    return calloc(nitems, size);
+}
+
+__attribute__((weak)) void ei_free(void *ptr) {
+    free(ptr);
+}
+
+#if defined(__cplusplus) && EI_C_LINKAGE == 1
+extern "C"
+#endif
+__attribute__((weak)) void DebugLog(const char* s) {
+    ei_printf("%s", s);
+}
+
+#endif // EI_PORTING_IAR == 1
diff --git a/edge-impulse-sdk/porting/posix/debug_log.cpp b/edge-impulse-sdk/porting/posix/debug_log.cpp
index 9745992..6f7164a 100644
--- a/edge-impulse-sdk/porting/posix/debug_log.cpp
+++ b/edge-impulse-sdk/porting/posix/debug_log.cpp
@@ -18,7 +18,6 @@
 #include "../ei_classifier_porting.h"
 #if EI_PORTING_POSIX == 1
 
-#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h"
 #include <stdio.h>
 #include <stdarg.h>
 
diff --git a/edge-impulse-sdk/porting/posix/ei_classifier_porting.cpp b/edge-impulse-sdk/porting/posix/ei_classifier_porting.cpp
index e5f8e25..fe7a60d 100644
--- a/edge-impulse-sdk/porting/posix/ei_classifier_porting.cpp
+++ b/edge-impulse-sdk/porting/posix/ei_classifier_porting.cpp
@@ -15,7 +15,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-#include "../ei_classifier_porting.h"
+#include "edge-impulse-sdk/porting/ei_classifier_porting.h"
 #if EI_PORTING_POSIX == 1
 
 #include <inttypes.h>
@@ -70,6 +70,16 @@ __attribute__((weak)) void ei_printf_float(float f) {
     ei_printf("%f", f);
 }
 
+__attribute__((weak)) void ei_putchar(char data)
+{
+    putchar(data);
+}
+
+__attribute__((weak)) char ei_getchar(void)
+{
+    return getchar();
+}
+
 __attribute__((weak)) void *ei_malloc(size_t size) {
     return malloc(size);
 }
diff --git a/edge-impulse-sdk/porting/renesas-ra6m5/debug_log.cpp b/edge-impulse-sdk/porting/renesas-ra/debug_log.cpp
similarity index 93%
rename from edge-impulse-sdk/porting/renesas-ra6m5/debug_log.cpp
rename to edge-impulse-sdk/porting/renesas-ra/debug_log.cpp
index 34801a7..47c6847 100644
--- a/edge-impulse-sdk/porting/renesas-ra6m5/debug_log.cpp
+++ b/edge-impulse-sdk/porting/renesas-ra/debug_log.cpp
@@ -16,7 +16,7 @@
  */
 
 #include "../ei_classifier_porting.h"
-#if EI_PORTING_RENESASRA65 == 1
+#if ((EI_PORTING_RENESASRA65 == 1) || (EI_PORTING_RENESASRA8D1 == 1))
 
 #include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h"
 #include <stdio.h>
diff --git a/edge-impulse-sdk/porting/renesas-ra/ei_classifier_porting.cpp b/edge-impulse-sdk/porting/renesas-ra/ei_classifier_porting.cpp
new file mode 100644
index 0000000..0ef561f
--- /dev/null
+++ b/edge-impulse-sdk/porting/renesas-ra/ei_classifier_porting.cpp
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2022 EdgeImpulse Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS
+ * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language
+ * governing permissions and limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/* Includes */
+#include "../ei_classifier_porting.h"
+
+#if ((EI_PORTING_RENESASRA65 == 1) || (EI_PORTING_RENESASRA8D1 == 1))
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <cstdio>
+#include "unistd.h"
+#include "peripheral/uart_ep.h"
+#include <math.h>
+
+extern "C" uint32_t timer_get_ms(void);
+extern "C" uint32_t timer_get_us(void);
+
+__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() {
+    return EI_IMPULSE_OK;
+}
+
+/**
+ * Cancelable sleep, can be triggered with signal from other thread
+ */
+__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) {
+
+    uint64_t start_time = ei_read_timer_ms();
+
+    while(start_time + time_ms > ei_read_timer_ms()){};
+
+    return EI_IMPULSE_OK;
+}
+
+uint64_t ei_read_timer_ms() {
+
+    return timer_get_ms();
+}
+
+uint64_t ei_read_timer_us() {
+
+    return timer_get_us();
+}
+
+__attribute__((weak)) char ei_getchar()
+{
+    // dummy implementation
+    char ch = 0;
+    return ch;
+}
+
+__attribute__((weak)) void ei_printf(const char *format, ...) {
+
+    char buffer[1024] = {0};
+    int length;
+    va_list myargs;
+    va_start(myargs, format);
+    length = vsnprintf(buffer, sizeof(buffer), format, myargs);
+    va_end(myargs);
+
+    if (length > 0) {
+        uart_print_user_msg((uint8_t *)buffer, length);
+    }
+}
+
+__attribute__((weak)) void ei_printf_float(float f) {
+    float n = f;
+
+    static double PRECISION = 0.00001;
+    static int MAX_NUMBER_STRING_SIZE = 32;
+
+    char s[MAX_NUMBER_STRING_SIZE];
+
+    if (n == 0.0) {
+        strcpy(s, "0");
+    }
+    else {
+        int digit, m;
+        char *c = s;
+        int neg = (n < 0);
+        if (neg) {
+            n = -n;
+        }
+        // calculate magnitude
+        m = log10(n);
+        if (neg) {
+            *(c++) = '-';
+        }
+        if (m < 1.0) {
+            m = 0;
+        }
+        // convert the number
+        while (n > PRECISION || m >= 0) {
+            double weight = pow(10.0, m);
+            if (weight > 0 && !isinf(weight)) {
+                digit = floor(n / weight);
+                n -= (digit * weight);
+                *(c++) = '0' + digit;
+            }
+            if (m == 0 && n > 0) {
+                *(c++) = '.';
+            }
+            m--;
+        }
+        *(c) = '\0';
+    }
+
+    ei_printf("%s", s);
+}
+
+/**
+ *
+ * @param c
+ */
+void ei_putchar(char c)
+{
+    uart_putc(c);
+}
+
+__attribute__((weak)) void *ei_malloc(size_t size) {
+    return malloc(size);
+}
+
+__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) {
+    return calloc(nitems, size);
+}
+
+__attribute__((weak)) void ei_free(void *ptr) {
+    free(ptr);
+}
+
+#if defined(__cplusplus) && EI_C_LINKAGE == 1
+extern "C"
+#endif
+__attribute__((weak)) void DebugLog(const char* s) {
+    ei_printf("%s", s);
+}
+
+#elif EI_PORTING_RENESASRA8D1_FREERTOS == 1
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <cstdio>
+#include "unistd.h"
+#include "peripheral/uart.h"
+#include "peripheral/usb/usb.h"
+#include <math.h>
+
+#include "FreeRTOS.h"
+#include "task.h"
+#include "stream_buffer.h"
+#include "common_data.h"
+
+extern "C" uint32_t timer_get_ms(void);
+extern "C" uint32_t timer_get_us(void);
+
+__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() {
+    return EI_IMPULSE_OK;
+}
+
+/**
+ * Cancelable sleep, can be triggered with signal from other thread
+ */
+/**
+ * Cancelable sleep, can be triggered with signal from other thread
+ */
+__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) {
+
+    vTaskDelay(time_ms / portTICK_PERIOD_MS);
+
+    return EI_IMPULSE_OK;
+}
+
+uint64_t ei_read_timer_ms() {
+
+    return timer_get_ms();
+}
+
+uint64_t ei_read_timer_us() {
+
+    return timer_get_us();
+}
+
+__attribute__((weak)) char ei_getchar()
+{
+    // dummy implementation
+    char ch = 0;
+    return ch;
+}
+
+#include <peripheral/uart.h>
+
+__attribute__((weak)) void ei_printf(const char *format, ...) {
+
+    char buffer[1024] = {0};
+    int length;
+    va_list myargs;
+    va_start(myargs, format);
+    length = vsnprintf(buffer, sizeof(buffer), format, myargs);
+    va_end(myargs);
+
+    if (length > 0) {
+        //uart_print_user_msg((uint8_t *)buffer, length);
+        //xStreamBufferSend(g_uart_buffer, buffer, length, 0);
+        //uart_print_to_console((uint8_t *)buffer, length);
+        comms_send((uint8_t *)buffer, length, 1000);
+    }
+}
+
+__attribute__((weak)) void ei_printf_float(float f) {
+    float n = f;
+
+    static double PRECISION = 0.00001;
+    static int MAX_NUMBER_STRING_SIZE = 32;
+
+    char s[MAX_NUMBER_STRING_SIZE];
+
+    if (n == 0.0) {
+        strcpy(s, "0");
+    }
+    else {
+        int digit, m;
+        char *c = s;
+        int neg = (n < 0);
+        if (neg) {
+            n = -n;
+        }
+        // calculate magnitude
+        m = log10(n);
+        if (neg) {
+            *(c++) = '-';
+        }
+        if (m < 1.0) {
+            m = 0;
+        }
+        // convert the number
+        while (n > PRECISION || m >= 0) {
+            double weight = pow(10.0, m);
+            if (weight > 0 && !isinf(weight)) {
+                digit = floor(n / weight);
+                n -= (digit * weight);
+                *(c++) = '0' + digit;
+            }
+            if (m == 0 && n > 0) {
+                *(c++) = '.';
+            }
+            m--;
+        }
+        *(c) = '\0';
+    }
+
+    ei_printf("%s", s);
+}
+
+/**
+ *
+ * @param c
+ */
+void ei_putchar(char c)
+{
+    ei_printf("%c", c);
+}
+
+__attribute__((weak)) void *ei_malloc(size_t size) {
+    if (size > 0){
+        return pvPortMalloc(size);
+    }
+    else {
+        return NULL;
+    }
+}
+
+__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) {
+    if ((size*nitems) > 0) {
+        return pvPortCalloc(nitems, size);
+    }
+    else {
+        return NULL;
+    }  
+}
+
+__attribute__((weak)) void ei_free(void *ptr) {
+    vPortFree(ptr);
+}
+
+#if defined(__cplusplus) && EI_C_LINKAGE == 1
+extern "C"
+#endif
+__attribute__((weak)) void DebugLog(const char* s) {
+    ei_printf("%s", s);
+}
+
+void * operator new( size_t size )
+{
+    return pvPortMalloc( size );
+}
+
+void * operator new[]( size_t size )
+{
+    return pvPortMalloc(size);
+}
+
+void operator delete( void * ptr )
+{
+    vPortFree ( ptr );
+}
+
+void operator delete[]( void * ptr )
+{
+    vPortFree ( ptr );
+}
+
+#endif // EI_PORTING_RENESASRA8D1_FREERTOS == 1
diff --git a/edge-impulse-sdk/porting/renesas-ra6m5/ei_classifier_porting.cpp b/edge-impulse-sdk/porting/renesas-ra6m5/ei_classifier_porting.cpp
deleted file mode 100644
index 66ec09b..0000000
--- a/edge-impulse-sdk/porting/renesas-ra6m5/ei_classifier_porting.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2022 EdgeImpulse Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an "AS
- * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
- * express or implied. See the License for the specific language
- * governing permissions and limitations under the License.
- *
- * SPDX-License-Identifier: Apache-2.0
- */
-
-/* Includes */
-#include "../ei_classifier_porting.h"
-#if EI_PORTING_RENESASRA65 == 1
-
-#include "common_utils.h"
-#include <stdarg.h>
-#include <stdlib.h>
-#include <cstdio>
-#include "unistd.h"
-#include "peripheral/uart_ep.h"
-#include <math.h>
-
-// extern "C" void Serial_Out(char *string, int length);
-extern "C" uint64_t timer_get_ms(void);
-// extern "C" fsp_err_t uart_print_user_msg(uint8_t *p_msg);
-
-__attribute__((weak)) EI_IMPULSE_ERROR ei_run_impulse_check_canceled() {
-    return EI_IMPULSE_OK;
-}
-
-/**
- * Cancelable sleep, can be triggered with signal from other thread
- */
-__attribute__((weak)) EI_IMPULSE_ERROR ei_sleep(int32_t time_ms) {
-
-    uint64_t start_time = ei_read_timer_ms();
-
-    while(start_time + time_ms > ei_read_timer_ms()){};
-
-    return EI_IMPULSE_OK;
-}
-
-uint64_t ei_read_timer_ms() {
-
-    return timer_get_ms();//Timer_getMs();
-}
-
-uint64_t ei_read_timer_us() {
-
-    return timer_get_ms()*1000;
-}
-
-__attribute__((weak)) char ei_getchar()
-{
-    // dummy implementation
-    char ch = 0;
-    return ch;
-}
-
-__attribute__((weak)) void ei_printf(const char *format, ...) {
-
-    char buffer[256] = {0};
-    int length;
-    va_list myargs;
-    va_start(myargs, format);
-    length = vsnprintf(buffer, sizeof(buffer), format, myargs);
-    va_end(myargs);
-
-    if (length > 0){
-        uart_print_user_msg((uint8_t *)buffer);
-    }
-
-}
-
-__attribute__((weak)) void ei_printf_float(float f) {
-    float n = f;
-
-    static double PRECISION = 0.00001;
-    static int MAX_NUMBER_STRING_SIZE = 32;
-
-    char s[MAX_NUMBER_STRING_SIZE];
-
-    if (n == 0.0) {
-        strcpy(s, "0");
-    }
-    else {
-        int digit, m;
-        char *c = s;
-        int neg = (n < 0);
-        if (neg) {
-            n = -n;
-        }
-        // calculate magnitude
-        m = log10(n);
-        if (neg) {
-            *(c++) = '-';
-        }
-        if (m < 1.0) {
-            m = 0;
-        }
-        // convert the number
-        while (n > PRECISION || m >= 0) {
-            double weight = pow(10.0, m);
-            if (weight > 0 && !isinf(weight)) {
-                digit = floor(n / weight);
-                n -= (digit * weight);
-                *(c++) = '0' + digit;
-            }
-            if (m == 0 && n > 0) {
-                *(c++) = '.';
-            }
-            m--;
-        }
-        *(c) = '\0';
-    }
-
-    ei_printf("%s", s);
-}
-
-/**
- *
- * @param c
- */
-void ei_putchar(char c)
-{
-    ei_printf("%c", c);
-}
-
-__attribute__((weak)) void *ei_malloc(size_t size) {
-    return malloc(size);
-}
-
-__attribute__((weak)) void *ei_calloc(size_t nitems, size_t size) {
-    return calloc(nitems, size);
-}
-
-__attribute__((weak)) void ei_free(void *ptr) {
-    free(ptr);
-}
-
-#if defined(__cplusplus) && EI_C_LINKAGE == 1
-extern "C"
-#endif
-__attribute__((weak)) void DebugLog(const char* s) {
-    ei_printf("%s", s);
-}
-
-#endif // EI_PORTING_RENESASRA65 == 1
diff --git a/edge-impulse-sdk/tensorflow/LICENSE b/edge-impulse-sdk/tensorflow/LICENSE
index fb26962..d645695 100644
--- a/edge-impulse-sdk/tensorflow/LICENSE
+++ b/edge-impulse-sdk/tensorflow/LICENSE
@@ -1,4 +1,3 @@
-Copyright 2019 The TensorFlow Authors.  All rights reserved.
 
                                  Apache License
                            Version 2.0, January 2004
@@ -201,48 +200,3 @@ Copyright 2019 The TensorFlow Authors.  All rights reserved.
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
-
-MIT License
-
-Copyright (c) 2017-2021 Arm Limited
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-LICENSE
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-   ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-   ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/edge-impulse-sdk/tensorflow/lite/builtin_op_data.h b/edge-impulse-sdk/tensorflow/lite/builtin_op_data.h
new file mode 100644
index 0000000..b512ba7
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/builtin_op_data.h
@@ -0,0 +1,22 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+// Compatibility shim for new location of interface definitions.
+
+#ifndef TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
+#define TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h"
+
+#endif  // TENSORFLOW_LITE_BUILTIN_OP_DATA_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/builtin_ops.h b/edge-impulse-sdk/tensorflow/lite/builtin_ops.h
new file mode 100644
index 0000000..3370730
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/builtin_ops.h
@@ -0,0 +1,194 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_BUILTIN_OPS_H_
+#define TENSORFLOW_LITE_BUILTIN_OPS_H_
+
+// DO NOT EDIT MANUALLY: This file is automatically generated by
+// `schema/builtin_ops_header/generator.cc`.
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+// The enum for builtin operators.
+// Note: CUSTOM, DELEGATE, and PLACEHOLDER_FOR_GREATER_OP_CODES are 3 special
+// ops which are not real built-in ops.
+typedef enum {
+  kTfLiteBuiltinAdd = 0,
+  kTfLiteBuiltinAveragePool2d = 1,
+  kTfLiteBuiltinConcatenation = 2,
+  kTfLiteBuiltinConv2d = 3,
+  kTfLiteBuiltinDepthwiseConv2d = 4,
+  kTfLiteBuiltinDepthToSpace = 5,
+  kTfLiteBuiltinDequantize = 6,
+  kTfLiteBuiltinEmbeddingLookup = 7,
+  kTfLiteBuiltinFloor = 8,
+  kTfLiteBuiltinFullyConnected = 9,
+  kTfLiteBuiltinHashtableLookup = 10,
+  kTfLiteBuiltinL2Normalization = 11,
+  kTfLiteBuiltinL2Pool2d = 12,
+  kTfLiteBuiltinLocalResponseNormalization = 13,
+  kTfLiteBuiltinLogistic = 14,
+  kTfLiteBuiltinLshProjection = 15,
+  kTfLiteBuiltinLstm = 16,
+  kTfLiteBuiltinMaxPool2d = 17,
+  kTfLiteBuiltinMul = 18,
+  kTfLiteBuiltinRelu = 19,
+  kTfLiteBuiltinReluN1To1 = 20,
+  kTfLiteBuiltinRelu6 = 21,
+  kTfLiteBuiltinReshape = 22,
+  kTfLiteBuiltinResizeBilinear = 23,
+  kTfLiteBuiltinRnn = 24,
+  kTfLiteBuiltinSoftmax = 25,
+  kTfLiteBuiltinSpaceToDepth = 26,
+  kTfLiteBuiltinSvdf = 27,
+  kTfLiteBuiltinTanh = 28,
+  kTfLiteBuiltinConcatEmbeddings = 29,
+  kTfLiteBuiltinSkipGram = 30,
+  kTfLiteBuiltinCall = 31,
+  kTfLiteBuiltinCustom = 32,
+  kTfLiteBuiltinEmbeddingLookupSparse = 33,
+  kTfLiteBuiltinPad = 34,
+  kTfLiteBuiltinUnidirectionalSequenceRnn = 35,
+  kTfLiteBuiltinGather = 36,
+  kTfLiteBuiltinBatchToSpaceNd = 37,
+  kTfLiteBuiltinSpaceToBatchNd = 38,
+  kTfLiteBuiltinTranspose = 39,
+  kTfLiteBuiltinMean = 40,
+  kTfLiteBuiltinSub = 41,
+  kTfLiteBuiltinDiv = 42,
+  kTfLiteBuiltinSqueeze = 43,
+  kTfLiteBuiltinUnidirectionalSequenceLstm = 44,
+  kTfLiteBuiltinStridedSlice = 45,
+  kTfLiteBuiltinBidirectionalSequenceRnn = 46,
+  kTfLiteBuiltinExp = 47,
+  kTfLiteBuiltinTopkV2 = 48,
+  kTfLiteBuiltinSplit = 49,
+  kTfLiteBuiltinLogSoftmax = 50,
+  kTfLiteBuiltinDelegate = 51,
+  kTfLiteBuiltinBidirectionalSequenceLstm = 52,
+  kTfLiteBuiltinCast = 53,
+  kTfLiteBuiltinPrelu = 54,
+  kTfLiteBuiltinMaximum = 55,
+  kTfLiteBuiltinArgMax = 56,
+  kTfLiteBuiltinMinimum = 57,
+  kTfLiteBuiltinLess = 58,
+  kTfLiteBuiltinNeg = 59,
+  kTfLiteBuiltinPadv2 = 60,
+  kTfLiteBuiltinGreater = 61,
+  kTfLiteBuiltinGreaterEqual = 62,
+  kTfLiteBuiltinLessEqual = 63,
+  kTfLiteBuiltinSelect = 64,
+  kTfLiteBuiltinSlice = 65,
+  kTfLiteBuiltinSin = 66,
+  kTfLiteBuiltinTransposeConv = 67,
+  kTfLiteBuiltinSparseToDense = 68,
+  kTfLiteBuiltinTile = 69,
+  kTfLiteBuiltinExpandDims = 70,
+  kTfLiteBuiltinEqual = 71,
+  kTfLiteBuiltinNotEqual = 72,
+  kTfLiteBuiltinLog = 73,
+  kTfLiteBuiltinSum = 74,
+  kTfLiteBuiltinSqrt = 75,
+  kTfLiteBuiltinRsqrt = 76,
+  kTfLiteBuiltinShape = 77,
+  kTfLiteBuiltinPow = 78,
+  kTfLiteBuiltinArgMin = 79,
+  kTfLiteBuiltinFakeQuant = 80,
+  kTfLiteBuiltinReduceProd = 81,
+  kTfLiteBuiltinReduceMax = 82,
+  kTfLiteBuiltinPack = 83,
+  kTfLiteBuiltinLogicalOr = 84,
+  kTfLiteBuiltinOneHot = 85,
+  kTfLiteBuiltinLogicalAnd = 86,
+  kTfLiteBuiltinLogicalNot = 87,
+  kTfLiteBuiltinUnpack = 88,
+  kTfLiteBuiltinReduceMin = 89,
+  kTfLiteBuiltinFloorDiv = 90,
+  kTfLiteBuiltinReduceAny = 91,
+  kTfLiteBuiltinSquare = 92,
+  kTfLiteBuiltinZerosLike = 93,
+  kTfLiteBuiltinFill = 94,
+  kTfLiteBuiltinFloorMod = 95,
+  kTfLiteBuiltinRange = 96,
+  kTfLiteBuiltinResizeNearestNeighbor = 97,
+  kTfLiteBuiltinLeakyRelu = 98,
+  kTfLiteBuiltinSquaredDifference = 99,
+  kTfLiteBuiltinMirrorPad = 100,
+  kTfLiteBuiltinAbs = 101,
+  kTfLiteBuiltinSplitV = 102,
+  kTfLiteBuiltinUnique = 103,
+  kTfLiteBuiltinCeil = 104,
+  kTfLiteBuiltinReverseV2 = 105,
+  kTfLiteBuiltinAddN = 106,
+  kTfLiteBuiltinGatherNd = 107,
+  kTfLiteBuiltinCos = 108,
+  kTfLiteBuiltinWhere = 109,
+  kTfLiteBuiltinRank = 110,
+  kTfLiteBuiltinElu = 111,
+  kTfLiteBuiltinReverseSequence = 112,
+  kTfLiteBuiltinMatrixDiag = 113,
+  kTfLiteBuiltinQuantize = 114,
+  kTfLiteBuiltinMatrixSetDiag = 115,
+  kTfLiteBuiltinRound = 116,
+  kTfLiteBuiltinHardSwish = 117,
+  kTfLiteBuiltinIf = 118,
+  kTfLiteBuiltinWhile = 119,
+  kTfLiteBuiltinNonMaxSuppressionV4 = 120,
+  kTfLiteBuiltinNonMaxSuppressionV5 = 121,
+  kTfLiteBuiltinScatterNd = 122,
+  kTfLiteBuiltinSelectV2 = 123,
+  kTfLiteBuiltinDensify = 124,
+  kTfLiteBuiltinSegmentSum = 125,
+  kTfLiteBuiltinBatchMatmul = 126,
+  kTfLiteBuiltinPlaceholderForGreaterOpCodes = 127,
+  kTfLiteBuiltinCumsum = 128,
+  kTfLiteBuiltinCallOnce = 129,
+  kTfLiteBuiltinBroadcastTo = 130,
+  kTfLiteBuiltinRfft2d = 131,
+  kTfLiteBuiltinConv3d = 132,
+  kTfLiteBuiltinImag = 133,
+  kTfLiteBuiltinReal = 134,
+  kTfLiteBuiltinComplexAbs = 135,
+  kTfLiteBuiltinHashtable = 136,
+  kTfLiteBuiltinHashtableFind = 137,
+  kTfLiteBuiltinHashtableImport = 138,
+  kTfLiteBuiltinHashtableSize = 139,
+  kTfLiteBuiltinReduceAll = 140,
+  kTfLiteBuiltinConv3dTranspose = 141,
+  kTfLiteBuiltinVarHandle = 142,
+  kTfLiteBuiltinReadVariable = 143,
+  kTfLiteBuiltinAssignVariable = 144,
+  kTfLiteBuiltinBroadcastArgs = 145,
+  kTfLiteBuiltinRandomStandardNormal = 146,
+  kTfLiteBuiltinBucketize = 147,
+  kTfLiteBuiltinRandomUniform = 148,
+  kTfLiteBuiltinMultinomial = 149,
+  kTfLiteBuiltinGelu = 150,
+  kTfLiteBuiltinDynamicUpdateSlice = 151,
+  kTfLiteBuiltinRelu0To1 = 152,
+  kTfLiteBuiltinUnsortedSegmentProd = 153,
+  kTfLiteBuiltinUnsortedSegmentMax = 154,
+  kTfLiteBuiltinUnsortedSegmentSum = 155,
+  kTfLiteBuiltinAtan2 = 156,
+  kTfLiteBuiltinUnsortedSegmentMin = 157,
+  kTfLiteBuiltinSign = 158,
+} TfLiteBuiltinOperator;
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+#endif  // TENSORFLOW_LITE_BUILTIN_OPS_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h b/edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h
index 29bdd7e..f1e511a 100644
--- a/edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h
+++ b/edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h
@@ -15,488 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
 #define TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
 
-#include <stdint.h>
-
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-// TfLiteReshapeParams can't have dynamic data so we fix the maximum possible
-// number of dimensions.
-#define TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT 8
-
-// TODO(aselle): Consider using "if this then that" for testing.
-
-// Useful placeholder to put in otherwise empty structs to avoid size warnings.
-typedef struct {
-  char dummy;
-} EmptyStructPlaceholder;
-
-// IMPORTANT: All new members of structs must be added at the end to ensure
-// backwards compatibility.
-
-// Possible padding types (for convolutions)
-typedef enum {
-  kTfLitePaddingUnknown = 0,
-  kTfLitePaddingSame,
-  kTfLitePaddingValid,
-} TfLitePadding;
-
-typedef enum {
-  kTfLiteMirrorPaddingUnknown = 0,
-  kTfLiteMirrorPaddingReflect,
-  kTfLiteMirrorPaddingSymmetric,
-} TfLiteMirrorPaddingMode;
-
-// TODO(b/130259536): We should move this out of builtin_op_data.
-typedef struct {
-  int width;
-  int height;
-  int width_offset;
-  int height_offset;
-} TfLitePaddingValues;
-
-typedef struct {
-  TfLiteMirrorPaddingMode mode;
-} TfLiteMirrorPaddingParams;
-
-// Possible fused activation functions.
-// TODO(aselle): rename to TfLiteActivation
-typedef enum {
-  kTfLiteActNone = 0,
-  kTfLiteActRelu,
-  kTfLiteActReluN1To1,  // min(max(-1, x), 1)
-  kTfLiteActRelu6,      // min(max(0, x), 6)
-  kTfLiteActTanh,
-  kTfLiteActSignBit,
-  kTfLiteActSigmoid,
-} TfLiteFusedActivation;
-
-typedef struct {
-  // Parameters for CONV_2D version 1.
-  TfLitePadding padding;
-  int stride_width;
-  int stride_height;
-  TfLiteFusedActivation activation;
-
-  // Parameters for CONV_2D version 2.
-  // Note: Version 2 supports dilation values not equal to 1.
-  int dilation_width_factor;
-  int dilation_height_factor;
-} TfLiteConvParams;
-
-typedef struct {
-  TfLitePadding padding;
-  int stride_width;
-  int stride_height;
-  int stride_depth;
-  int dilation_width_factor;
-  int dilation_height_factor;
-  int dilation_depth_factor;
-  TfLiteFusedActivation activation;
-} TfLiteConv3DParams;
-
-typedef struct {
-  TfLitePadding padding;
-  int stride_width;
-  int stride_height;
-  int filter_width;
-  int filter_height;
-  TfLiteFusedActivation activation;
-  struct {
-    TfLitePaddingValues padding;
-  } computed;
-} TfLitePoolParams;
-
-typedef struct {
-  // Parameters for DepthwiseConv version 1 or above.
-  TfLitePadding padding;
-  int stride_width;
-  int stride_height;
-  // `depth_multiplier` is redundant. It's used by CPU kernels in
-  // TensorFlow 2.0 or below, but ignored in versions above.
-  //
-  // The information can be deduced from the shape of input and the shape of
-  // weights. Since the TFLiteConverter toolchain doesn't support partially
-  // specified shapes, relying on `depth_multiplier` stops us from supporting
-  // graphs with dynamic shape tensors.
-  //
-  // Note: Some of the delegates (e.g. NNAPI, GPU) are still relying on this
-  // field.
-  int depth_multiplier;
-  TfLiteFusedActivation activation;
-  // Parameters for DepthwiseConv version 2 or above.
-  int dilation_width_factor;
-  int dilation_height_factor;
-} TfLiteDepthwiseConvParams;
-
-typedef struct {
-  int rank;
-  TfLiteFusedActivation activation;
-
-  // Parameter for SVDF version 4.
-  bool asymmetric_quantize_inputs;
-} TfLiteSVDFParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-
-  // Parameter for RNN version 3.
-  bool asymmetric_quantize_inputs;
-} TfLiteRNNParams;
-
-typedef struct {
-  bool time_major;
-  TfLiteFusedActivation activation;
-
-  // Parameter for Sequence RNN version 3.
-  bool asymmetric_quantize_inputs;
-} TfLiteSequenceRNNParams;
-
-typedef struct {
-  bool time_major;
-  TfLiteFusedActivation activation;
-  bool merge_outputs;
-
-  // Parameter for Bidirectional RNN verison 3.
-  bool asymmetric_quantize_inputs;
-} TfLiteBidirectionalSequenceRNNParams;
-
-typedef enum {
-  kTfLiteFullyConnectedWeightsFormatDefault = 0,
-  kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1,
-} TfLiteFullyConnectedWeightsFormat;
-
-typedef struct {
-  // Parameters for FullyConnected version 1 or above.
-  TfLiteFusedActivation activation;
-
-  // Parameters for FullyConnected version 2 or above.
-  TfLiteFullyConnectedWeightsFormat weights_format;
-
-  // Parameters for FullyConnected version 5 or above.
-  // If set to true, then the number of dimensions in the input and the output
-  // tensors are the same. Furthermore, all but the last dimension of the input
-  // and output shapes will be equal.
-  bool keep_num_dims;
-
-  // Parameters for FullyConnected version 7 or above.
-  // If set to true and the weights are quantized, then non constant inputs
-  // are quantized at evaluation time with asymmetric quantization.
-  bool asymmetric_quantize_inputs;
-} TfLiteFullyConnectedParams;
-
-typedef enum {
-  kTfLiteLshProjectionUnknown = 0,
-  kTfLiteLshProjectionSparse = 1,
-  kTfLiteLshProjectionDense = 2,
-} TfLiteLSHProjectionType;
-
-typedef struct {
-  TfLiteLSHProjectionType type;
-} TfLiteLSHProjectionParams;
-
-typedef struct {
-  float beta;
-} TfLiteSoftmaxParams;
-
-typedef struct {
-  int axis;
-  TfLiteFusedActivation activation;
-} TfLiteConcatenationParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-  // Parameter added for the version 4.
-  bool pot_scale_int16;
-} TfLiteAddParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteSpaceToBatchNDParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteBatchToSpaceNDParams;
-
-typedef struct {
-  bool adj_x;
-  bool adj_y;
-  // Parameters for BatchMatMul version 4 or above.
-  // If set to true and the weights are quantized, then non constant inputs
-  // are quantized at evaluation time with asymmetric quantization.
-  bool asymmetric_quantize_inputs;
-} TfLiteBatchMatMulParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-} TfLiteMulParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-  // Parameter added for the version 5.
-  bool pot_scale_int16;
-} TfLiteSubParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-} TfLiteDivParams;
-
-typedef struct {
-  TfLiteFusedActivation activation;
-} TfLiteL2NormParams;
-
-typedef struct {
-  int radius;
-  float bias;
-  float alpha;
-  float beta;
-} TfLiteLocalResponseNormParams;
-
-typedef enum {
-  kTfLiteLSTMFullKernel = 0,
-  kTfLiteLSTMBasicKernel
-} TfLiteLSTMKernelType;
-
-typedef struct {
-  // Parameters for LSTM version 1.
-  TfLiteFusedActivation activation;
-  float cell_clip;
-  float proj_clip;
-
-  // Parameters for LSTM version 2.
-  // kTfLiteLSTMBasicKernel is only supported in version 2 or above.
-  TfLiteLSTMKernelType kernel_type;
-
-  // Parameters for LSTM version 4.
-  bool asymmetric_quantize_inputs;
-} TfLiteLSTMParams;
-
-typedef struct {
-  // Parameters needed for the underlying LSTM.
-  TfLiteFusedActivation activation;
-  float cell_clip;
-  float proj_clip;
-
-  // If set to true then the first dimension is time, otherwise batch.
-  bool time_major;
-
-  // Parameter for unidirectional sequence RNN version 3.
-  bool asymmetric_quantize_inputs;
-} TfLiteUnidirectionalSequenceLSTMParams;
-
-typedef struct {
-  // Parameters supported by version 1:
-  // Parameters inherited for the LSTM kernel.
-  TfLiteFusedActivation activation;
-  float cell_clip;
-  float proj_clip;
-
-  // If true, store the outputs of both directions in the first output.
-  bool merge_outputs;
-
-  // Parameters supported by version 2:
-  // If set to true then the first dimension is time, otherwise batch.
-  bool time_major;
-
-  // Parameters supported by version 4:
-  // If set to true, then hybrid ops use asymmetric quantization for inputs.
-  bool asymmetric_quantize_inputs;
-} TfLiteBidirectionalSequenceLSTMParams;
-
-typedef struct {
-  bool align_corners;
-  // half_pixel_centers assumes pixels are of half the actual dimensions, and
-  // yields more accurate resizes. Corresponds to the same argument for the
-  // original TensorFlow op in TF2.0.
-  bool half_pixel_centers;
-} TfLiteResizeBilinearParams;
-
-typedef struct {
-  bool align_corners;
-  bool half_pixel_centers;
-} TfLiteResizeNearestNeighborParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLitePadParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLitePadV2Params;
-
-typedef struct {
-  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
-  // For now we will fix the maximum possible number of dimensions.
-  int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
-  int num_dimensions;
-} TfLiteReshapeParams;
-
-typedef struct {
-  int ngram_size;
-  int max_skip_size;
-  bool include_all_ngrams;
-} TfLiteSkipGramParams;
-
-typedef struct {
-  int block_size;
-} TfLiteSpaceToDepthParams;
-
-typedef struct {
-  int block_size;
-} TfLiteDepthToSpaceParams;
-
-typedef struct {
-  TfLiteType in_data_type;
-  TfLiteType out_data_type;
-} TfLiteCastParams;
-
-typedef enum {
-  kTfLiteCombinerTypeSum = 0,
-  kTfLiteCombinerTypeMean = 1,
-  kTfLiteCombinerTypeSqrtn = 2,
-} TfLiteCombinerType;
-
-typedef struct {
-  TfLiteCombinerType combiner;
-} TfLiteEmbeddingLookupSparseParams;
-
-typedef struct {
-  int axis;
-  int batch_dims;
-} TfLiteGatherParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteTransposeParams;
-
-typedef struct {
-  bool keep_dims;
-} TfLiteReducerParams;
-
-typedef struct {
-  int num_splits;
-} TfLiteSplitParams;
-
-typedef struct {
-  int num_splits;
-} TfLiteSplitVParams;
-
-typedef struct {
-  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
-  // For now we will fix the maximum possible number of dimensions.
-  int squeeze_dims[8];
-  int num_squeeze_dims;
-} TfLiteSqueezeParams;
-
-typedef struct {
-  int begin_mask;
-  int end_mask;
-  int ellipsis_mask;
-  int new_axis_mask;
-  int shrink_axis_mask;
-} TfLiteStridedSliceParams;
-
-typedef struct {
-  TfLiteType output_type;
-} TfLiteArgMaxParams;
-
-typedef struct {
-  TfLiteType output_type;
-} TfLiteArgMinParams;
-
-typedef struct {
-  TfLitePadding padding;
-  int stride_width;
-  int stride_height;
-} TfLiteTransposeConvParams;
-
-typedef struct {
-  bool validate_indices;
-} TfLiteSparseToDenseParams;
-
-typedef struct {
-  TfLiteType out_type;
-} TfLiteShapeParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteRankParams;
-
-typedef struct {
-  // Parameters supported by version 1:
-  float min;
-  float max;
-  int num_bits;
-
-  // Parameters supported by version 2:
-  bool narrow_range;
-} TfLiteFakeQuantParams;
-
-typedef struct {
-  int values_count;
-  int axis;
-} TfLitePackParams;
-
-typedef struct {
-  int axis;
-} TfLiteOneHotParams;
-
-typedef struct {
-  int num;
-  int axis;
-} TfLiteUnpackParams;
-
-typedef struct {
-  float alpha;
-} TfLiteLeakyReluParams;
-
-typedef struct {
-  TfLiteType index_out_type;
-} TfLiteUniqueParams;
-
-typedef struct {
-  int seq_dim;
-  int batch_dim;
-} TfLiteReverseSequenceParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteMatrixDiagParams;
-
-typedef struct {
-  EmptyStructPlaceholder placeholder;
-} TfLiteMatrixSetDiagParams;
-
-typedef struct {
-  int then_subgraph_index;
-  int else_subgraph_index;
-} TfLiteIfParams;
-
-typedef struct {
-  int cond_subgraph_index;
-  int body_subgraph_index;
-} TfLiteWhileParams;
-
-typedef struct {
-  bool exclusive;
-  bool reverse;
-} TfLiteCumsumParams;
-
-typedef struct {
-  int init_subgraph_index;
-} TfLiteCallOnceParams;
-
-typedef struct {
-  int table_id;
-  TfLiteType key_dtype;
-  TfLiteType value_dtype;
-} TfLiteHashtableParams;
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif  // __cplusplus
+/// For documentation, see
+/// third_party/tensorflow/lite/core/c/builtin_op_data.h.
+#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h"  // IWYU pragma: export
 
 #endif  // TENSORFLOW_LITE_C_BUILTIN_OP_DATA_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/c/c_api_types.h b/edge-impulse-sdk/tensorflow/lite/c/c_api_types.h
index 0128477..4d3fab2 100644
--- a/edge-impulse-sdk/tensorflow/lite/c/c_api_types.h
+++ b/edge-impulse-sdk/tensorflow/lite/c/c_api_types.h
@@ -19,77 +19,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_C_C_API_TYPES_H_
 #define TENSORFLOW_LITE_C_C_API_TYPES_H_
 
-#include <stdint.h>
+/// For documentation, see
+/// third_party/tensorflow/lite/core/c/c_api_types.h.
+#include "edge-impulse-sdk/tensorflow/lite/core/c/c_api_types.h"  // IWYU pragma: export
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Define TFL_CAPI_EXPORT macro to export a function properly with a shared
-// library.
-#ifdef SWIG
-#define TFL_CAPI_EXPORT
-#else
-#if defined(_WIN32)
-#ifdef TFL_COMPILE_LIBRARY
-#define TFL_CAPI_EXPORT __declspec(dllexport)
-#else
-#define TFL_CAPI_EXPORT __declspec(dllimport)
-#endif  // TFL_COMPILE_LIBRARY
-#else
-#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
-#endif  // _WIN32
-#endif  // SWIG
-
-typedef enum TfLiteStatus {
-  kTfLiteOk = 0,
-
-  // Generally referring to an error in the runtime (i.e. interpreter)
-  kTfLiteError = 1,
-
-  // Generally referring to an error from a TfLiteDelegate itself.
-  kTfLiteDelegateError = 2,
-
-  // Generally referring to an error in applying a delegate due to
-  // incompatibility between runtime and delegate, e.g., this error is returned
-  // when trying to apply a TfLite delegate onto a model graph that's already
-  // immutable.
-  kTfLiteApplicationError = 3
-} TfLiteStatus;
-
-// Types supported by tensor
-typedef enum {
-  kTfLiteNoType = 0,
-  kTfLiteFloat32 = 1,
-  kTfLiteInt32 = 2,
-  kTfLiteUInt8 = 3,
-  kTfLiteInt64 = 4,
-  kTfLiteString = 5,
-  kTfLiteBool = 6,
-  kTfLiteInt16 = 7,
-  kTfLiteComplex64 = 8,
-  kTfLiteInt8 = 9,
-  kTfLiteFloat16 = 10,
-  kTfLiteFloat64 = 11,
-  kTfLiteComplex128 = 12,
-  kTfLiteUInt64 = 13,
-  kTfLiteResource = 14,
-  kTfLiteVariant = 15,
-  kTfLiteUInt32 = 16,
-} TfLiteType;
-
-// Legacy. Will be deprecated in favor of TfLiteAffineQuantization.
-// If per-layer quantization is specified this field will still be populated in
-// addition to TfLiteAffineQuantization.
-// Parameters for asymmetric quantization. Quantized values can be converted
-// back to float using:
-//     real_value = scale * (quantized_value - zero_point)
-typedef struct TfLiteQuantizationParams {
-  float scale;
-  int32_t zero_point;
-} TfLiteQuantizationParams;
-
-#ifdef __cplusplus
-}  // extern C
-#endif
 #endif  // TENSORFLOW_LITE_C_C_API_TYPES_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/c/common.c b/edge-impulse-sdk/tensorflow/lite/c/common.c
index e141d66..9efcd3a 100644
--- a/edge-impulse-sdk/tensorflow/lite/c/common.c
+++ b/edge-impulse-sdk/tensorflow/lite/c/common.c
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,233 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+// Dummy file for backwards compatibility.
+// See core/api/common.cc
 
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h"
-
-#ifndef TF_LITE_STATIC_MEMORY
-#include <stdlib.h>
-#include <string.h>
-#endif  // TF_LITE_STATIC_MEMORY
-
-int TfLiteIntArrayGetSizeInBytes(int size) {
-  static TfLiteIntArray dummy;
-  return sizeof(dummy) + sizeof(dummy.data[0]) * size;
-}
-
-int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b) {
-  if (a == b) return 1;
-  if (a == NULL || b == NULL) return 0;
-  return TfLiteIntArrayEqualsArray(a, b->size, b->data);
-}
-
-int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
-                              const int b_data[]) {
-  if (a == NULL) return (b_size == 0);
-  if (a->size != b_size) return 0;
-  int i = 0;
-  for (; i < a->size; i++)
-    if (a->data[i] != b_data[i]) return 0;
-  return 1;
-}
-
-#ifndef TF_LITE_STATIC_MEMORY
-
-TfLiteIntArray* TfLiteIntArrayCreate(int size) {
-  int alloc_size = TfLiteIntArrayGetSizeInBytes(size);
-  if (alloc_size <= 0) return NULL;
-  TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size);
-  if (!ret) return ret;
-  ret->size = size;
-  return ret;
-}
-
-TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src) {
-  if (!src) return NULL;
-  TfLiteIntArray* ret = TfLiteIntArrayCreate(src->size);
-  if (ret) {
-    memcpy(ret->data, src->data, src->size * sizeof(int));
-  }
-  return ret;
-}
-
-void TfLiteIntArrayFree(TfLiteIntArray* a) { free(a); }
-
-#endif  // TF_LITE_STATIC_MEMORY
-
-int TfLiteFloatArrayGetSizeInBytes(int size) {
-  static TfLiteFloatArray dummy;
-  return sizeof(dummy) + sizeof(dummy.data[0]) * size;
-}
-
-#ifndef TF_LITE_STATIC_MEMORY
-
-TfLiteFloatArray* TfLiteFloatArrayCreate(int size) {
-  TfLiteFloatArray* ret =
-      (TfLiteFloatArray*)malloc(TfLiteFloatArrayGetSizeInBytes(size));
-  ret->size = size;
-  return ret;
-}
-
-void TfLiteFloatArrayFree(TfLiteFloatArray* a) { free(a); }
-
-void TfLiteTensorDataFree(TfLiteTensor* t) {
-  if (t->allocation_type == kTfLiteDynamic ||
-      t->allocation_type == kTfLitePersistentRo) {
-    free(t->data.raw);
-  }
-  t->data.raw = NULL;
-}
-
-void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
-  if (quantization->type == kTfLiteAffineQuantization) {
-    TfLiteAffineQuantization* q_params =
-        (TfLiteAffineQuantization*)(quantization->params);
-    if (q_params->scale) {
-      TfLiteFloatArrayFree(q_params->scale);
-      q_params->scale = NULL;
-    }
-    if (q_params->zero_point) {
-      TfLiteIntArrayFree(q_params->zero_point);
-      q_params->zero_point = NULL;
-    }
-    free(q_params);
-  }
-  quantization->params = NULL;
-  quantization->type = kTfLiteNoQuantization;
-}
-
-void TfLiteSparsityFree(TfLiteSparsity* sparsity) {
-  if (sparsity == NULL) {
-    return;
-  }
-
-  if (sparsity->traversal_order) {
-    TfLiteIntArrayFree(sparsity->traversal_order);
-    sparsity->traversal_order = NULL;
-  }
-
-  if (sparsity->block_map) {
-    TfLiteIntArrayFree(sparsity->block_map);
-    sparsity->block_map = NULL;
-  }
-
-  if (sparsity->dim_metadata) {
-    int i = 0;
-    for (; i < sparsity->dim_metadata_size; i++) {
-      TfLiteDimensionMetadata metadata = sparsity->dim_metadata[i];
-      if (metadata.format == kTfLiteDimSparseCSR) {
-        TfLiteIntArrayFree(metadata.array_segments);
-        metadata.array_segments = NULL;
-        TfLiteIntArrayFree(metadata.array_indices);
-        metadata.array_indices = NULL;
-      }
-    }
-    free(sparsity->dim_metadata);
-    sparsity->dim_metadata = NULL;
-  }
-
-  free(sparsity);
-}
-
-void TfLiteTensorFree(TfLiteTensor* t) {
-  TfLiteTensorDataFree(t);
-  if (t->dims) TfLiteIntArrayFree(t->dims);
-  t->dims = NULL;
-
-  if (t->dims_signature) {
-    TfLiteIntArrayFree((TfLiteIntArray *) t->dims_signature);
-  }
-  t->dims_signature = NULL;
-
-  TfLiteQuantizationFree(&t->quantization);
-  TfLiteSparsityFree(t->sparsity);
-  t->sparsity = NULL;
-}
-
-void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
-                       TfLiteQuantizationParams quantization, char* buffer,
-                       size_t size, TfLiteAllocationType allocation_type,
-                       const void* allocation, bool is_variable,
-                       TfLiteTensor* tensor) {
-  TfLiteTensorFree(tensor);
-  tensor->type = type;
-  tensor->name = name;
-  tensor->dims = dims;
-  tensor->params = quantization;
-  tensor->data.raw = buffer;
-  tensor->bytes = size;
-  tensor->allocation_type = allocation_type;
-  tensor->allocation = allocation;
-  tensor->is_variable = is_variable;
-
-  tensor->quantization.type = kTfLiteNoQuantization;
-  tensor->quantization.params = NULL;
-}
-
-void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
-  if (tensor->allocation_type != kTfLiteDynamic &&
-      tensor->allocation_type != kTfLitePersistentRo) {
-    return;
-  }
-  // TODO(b/145340303): Tensor data should be aligned.
-  if (!tensor->data.raw) {
-    tensor->data.raw = malloc(num_bytes);
-  } else if (num_bytes > tensor->bytes) {
-    tensor->data.raw = realloc(tensor->data.raw, num_bytes);
-  }
-  tensor->bytes = num_bytes;
-}
-#endif  // TF_LITE_STATIC_MEMORY
-
-const char* TfLiteTypeGetName(TfLiteType type) {
-  switch (type) {
-    case kTfLiteNoType:
-      return "NOTYPE";
-    case kTfLiteFloat32:
-      return "FLOAT32";
-    case kTfLiteInt16:
-      return "INT16";
-    case kTfLiteInt32:
-      return "INT32";
-    case kTfLiteUInt32:
-      return "UINT32";
-    case kTfLiteUInt8:
-      return "UINT8";
-    case kTfLiteInt8:
-      return "INT8";
-    case kTfLiteInt64:
-      return "INT64";
-    case kTfLiteUInt64:
-      return "UINT64";
-    case kTfLiteBool:
-      return "BOOL";
-    case kTfLiteComplex64:
-      return "COMPLEX64";
-    case kTfLiteComplex128:
-      return "COMPLEX128";
-    case kTfLiteString:
-      return "STRING";
-    case kTfLiteFloat16:
-      return "FLOAT16";
-    case kTfLiteFloat64:
-      return "FLOAT64";
-    case kTfLiteResource:
-      return "RESOURCE";
-    case kTfLiteVariant:
-      return "VARIANT";
-  }
-  return "Unknown type";
-}
-
-TfLiteDelegate TfLiteDelegateCreate() {
-  TfLiteDelegate d = {
-      .data_ = NULL,
-      .Prepare = NULL,
-      .CopyFromBufferHandle = NULL,
-      .CopyToBufferHandle = NULL,
-      .FreeBufferHandle = NULL,
-      .flags = kTfLiteDelegateFlagsNone,
-  };
-  return d;
-}
diff --git a/edge-impulse-sdk/tensorflow/lite/c/common.h b/edge-impulse-sdk/tensorflow/lite/c/common.h
index 19f09af..00c3768 100644
--- a/edge-impulse-sdk/tensorflow/lite/c/common.h
+++ b/edge-impulse-sdk/tensorflow/lite/c/common.h
@@ -36,902 +36,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_C_COMMON_H_
 #define TENSORFLOW_LITE_C_COMMON_H_
 
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
+/// For documentation, see
+/// third_party/tensorflow/lite/core/c/common.h.
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"  // IWYU pragma: export
 
-#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h"  // IWYU pragma: export
-
-#ifdef __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-// The list of external context types known to TF Lite. This list exists solely
-// to avoid conflicts and to ensure ops can share the external contexts they
-// need. Access to the external contexts is controlled by one of the
-// corresponding support files.
-typedef enum TfLiteExternalContextType {
-  kTfLiteEigenContext = 0,       // include eigen_support.h to use.
-  kTfLiteGemmLowpContext = 1,    // include gemm_support.h to use.
-  kTfLiteEdgeTpuContext = 2,     // Placeholder for Edge TPU support.
-  kTfLiteCpuBackendContext = 3,  // include cpu_backend_context.h to use.
-  kTfLiteMaxExternalContexts = 4
-} TfLiteExternalContextType;
-
-// Forward declare so dependent structs and methods can reference these types
-// prior to the struct definitions.
-struct TfLiteContext;
-struct TfLiteDelegate;
-struct TfLiteRegistration;
-
-// An external context is a collection of information unrelated to the TF Lite
-// framework, but useful to a subset of the ops. TF Lite knows very little
-// about the actual contexts, but it keeps a list of them, and is able to
-// refresh them if configurations like the number of recommended threads
-// change.
-typedef struct TfLiteExternalContext {
-  TfLiteExternalContextType type;
-  TfLiteStatus (*Refresh)(struct TfLiteContext* context);
-} TfLiteExternalContext;
-
-#define kTfLiteOptionalTensor (-1)
-
-// Fixed size list of integers. Used for dimensions and inputs/outputs tensor
-// indices
-typedef struct TfLiteIntArray {
-  int size;
-// gcc 6.1+ have a bug where flexible members aren't properly handled
-// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
-#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
-     __GNUC_MINOR__ >= 1) ||                                      \
-    defined(HEXAGON) ||                                           \
-    (defined(__clang__) && __clang_major__ == 7 && __clang_minor__ == 1)
-  int data[0];
-#else
-  int data[];
-#endif
-} TfLiteIntArray;
-
-// Given the size (number of elements) in a TfLiteIntArray, calculate its size
-// in bytes.
-int TfLiteIntArrayGetSizeInBytes(int size);
-
-#ifndef TF_LITE_STATIC_MEMORY
-// Create a array of a given `size` (uninitialized entries).
-// This returns a pointer, that you must free using TfLiteIntArrayFree().
-TfLiteIntArray* TfLiteIntArrayCreate(int size);
-#endif
-
-// Check if two intarrays are equal. Returns 1 if they are equal, 0 otherwise.
-int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b);
-
-// Check if an intarray equals an array. Returns 1 if equals, 0 otherwise.
-int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
-                              const int b_data[]);
-
-#ifndef TF_LITE_STATIC_MEMORY
-// Create a copy of an array passed as `src`.
-// You are expected to free memory with TfLiteIntArrayFree
-TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src);
-
-// Free memory of array `a`.
-void TfLiteIntArrayFree(TfLiteIntArray* a);
-#endif  // TF_LITE_STATIC_MEMORY
-
-// Fixed size list of floats. Used for per-channel quantization.
-typedef struct TfLiteFloatArray {
-  int size;
-// gcc 6.1+ have a bug where flexible members aren't properly handled
-// https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
-// This also applies to the toolchain used for Qualcomm Hexagon DSPs.
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
-    __GNUC_MINOR__ >= 1
-  float data[0];
-#else
-  float data[];
-#endif
-} TfLiteFloatArray;
-
-// Given the size (number of elements) in a TfLiteFloatArray, calculate its size
-// in bytes.
-int TfLiteFloatArrayGetSizeInBytes(int size);
-
-#ifndef TF_LITE_STATIC_MEMORY
-// Create a array of a given `size` (uninitialized entries).
-// This returns a pointer, that you must free using TfLiteFloatArrayFree().
-TfLiteFloatArray* TfLiteFloatArrayCreate(int size);
-
-// Free memory of array `a`.
-void TfLiteFloatArrayFree(TfLiteFloatArray* a);
-#endif  // TF_LITE_STATIC_MEMORY
-
-// Since we must not depend on any libraries, define a minimal subset of
-// error macros while avoiding names that have pre-conceived meanings like
-// assert and check.
-
-// Try to make all reporting calls through TF_LITE_KERNEL_LOG rather than
-// calling the context->ReportError function directly, so that message strings
-// can be stripped out if the binary size needs to be severely optimized.
-#ifndef TF_LITE_STRIP_ERROR_STRINGS
-#ifdef TF_LITE_LOG_FILE_NAME
-#define TF_LITE_KERNEL_LOG(context, ...)            \
-  do {                                              \
-    (context)->ReportError((context), __FILE__ " " __VA_ARGS__); \
-  } while (false)
-
-#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)        \
-  do {                                                \
-    if ((context) != nullptr) {                       \
-      (context)->ReportError((context), __FILE__ " " __VA_ARGS__); \
-    }                                                 \
-  } while (false)
-#else // TF_LITE_LOG_FILE_NAME
-#define TF_LITE_KERNEL_LOG(context, ...)            \
-  do {                                              \
-    (context)->ReportError((context), __VA_ARGS__); \
-  } while (false)
-
-#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)        \
-  do {                                                \
-    if ((context) != nullptr) {                       \
-      (context)->ReportError((context), __VA_ARGS__); \
-    }                                                 \
-  } while (false)
-#endif // TF_LITE_LOG_FILE_NAME
-#else  // TF_LITE_STRIP_ERROR_STRINGSTF_LITE_KERNEL_LOG
-#define TF_LITE_KERNEL_LOG(context, ...)
-#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)
-#endif  // TF_LITE_STRIP_ERROR_STRINGS
-
-// Check whether value is true, and if not return kTfLiteError from
-// the current function (and report the error string msg).
-#define TF_LITE_ENSURE_MSG(context, value, msg)        \
-  do {                                                 \
-    if (!(value)) {                                    \
-      TF_LITE_KERNEL_LOG((context), msg); \
-      return kTfLiteError;                             \
-    }                                                  \
-  } while (0)
-
-// Check whether the value `a` is true, and if not return kTfLiteError from
-// the current function, while also reporting the location of the error.
-#define TF_LITE_ENSURE(context, a)                                      \
-  do {                                                                  \
-    if (!(a)) {                                                         \
-      TF_LITE_KERNEL_LOG((context), "%s:%d %s was not true.", __FILE__, \
-                         __LINE__, #a);                                 \
-      return kTfLiteError;                                              \
-    }                                                                   \
-  } while (0)
-
-#define TF_LITE_ENSURE_STATUS(a) \
-  do {                           \
-    const TfLiteStatus s = (a);  \
-    if (s != kTfLiteOk) {        \
-      return s;                  \
-    }                            \
-  } while (0)
-
-// Check whether the value `a == b` is true, and if not return kTfLiteError from
-// the current function, while also reporting the location of the error.
-// `a` and `b` may be evaluated more than once, so no side effects or
-// extremely expensive computations should be done.
-// NOTE: Use TF_LITE_ENSURE_TYPES_EQ if comparing TfLiteTypes.
-#define TF_LITE_ENSURE_EQ(context, a, b)                                   \
-  do {                                                                     \
-    if ((a) != (b)) {                                                      \
-      TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%d != %d)", __FILE__, \
-                         __LINE__, #a, #b, (a), (b));                      \
-      return kTfLiteError;                                                 \
-    }                                                                      \
-  } while (0)
-
-#define TF_LITE_ENSURE_TYPES_EQ(context, a, b)                             \
-  do {                                                                     \
-    if ((a) != (b)) {                                                      \
-      TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%s != %s)", __FILE__, \
-                         __LINE__, #a, #b, TfLiteTypeGetName(a),           \
-                         TfLiteTypeGetName(b));                            \
-      return kTfLiteError;                                                 \
-    }                                                                      \
-  } while (0)
-
-#define TF_LITE_ENSURE_NEAR(context, a, b, epsilon)                          \
-  do {                                                                       \
-    auto delta = ((a) > (b)) ? ((a) - (b)) : ((b) - (a));                    \
-    if (delta > epsilon) {                                                   \
-      TF_LITE_KERNEL_LOG((context), "%s:%d %s not near %s (%f != %f)",       \
-                         __FILE__, __LINE__, #a, #b, static_cast<double>(a), \
-                         static_cast<double>(b));                            \
-      return kTfLiteError;                                                   \
-    }                                                                        \
-  } while (0)
-
-#define TF_LITE_ENSURE_OK(context, status) \
-  do {                                     \
-    const TfLiteStatus s = (status);       \
-    if ((s) != kTfLiteOk) {                \
-      return s;                            \
-    }                                      \
-  } while (0)
-
-// Single-precision complex data type compatible with the C99 definition.
-typedef struct TfLiteComplex64 {
-  float re, im;  // real and imaginary parts, respectively.
-} TfLiteComplex64;
-
-// Double-precision complex data type compatible with the C99 definition.
-typedef struct TfLiteComplex128 {
-  double re, im;  // real and imaginary parts, respectively.
-} TfLiteComplex128;
-
-// Half precision data type compatible with the C99 definition.
-typedef struct TfLiteFloat16 {
-  uint16_t data;
-} TfLiteFloat16;
-
-// Return the name of a given type, for error reporting purposes.
-const char* TfLiteTypeGetName(TfLiteType type);
-
-// SupportedQuantizationTypes.
-typedef enum TfLiteQuantizationType {
-  // No quantization.
-  kTfLiteNoQuantization = 0,
-  // Affine quantization (with support for per-channel quantization).
-  // Corresponds to TfLiteAffineQuantization.
-  kTfLiteAffineQuantization = 1,
-} TfLiteQuantizationType;
-
-// Structure specifying the quantization used by the tensor, if-any.
-typedef struct TfLiteQuantization {
-  // The type of quantization held by params.
-  TfLiteQuantizationType type;
-  // Holds an optional reference to a quantization param structure. The actual
-  // type depends on the value of the `type` field (see the comment there for
-  // the values and corresponding types).
-  void* params;
-} TfLiteQuantization;
-
-// Parameters for asymmetric quantization across a dimension (i.e per output
-// channel quantization).
-// quantized_dimension specifies which dimension the scales and zero_points
-// correspond to.
-// For a particular value in quantized_dimension, quantized values can be
-// converted back to float using:
-//     real_value = scale * (quantized_value - zero_point)
-typedef struct TfLiteAffineQuantization {
-  TfLiteFloatArray* scale;
-  TfLiteIntArray* zero_point;
-  int32_t quantized_dimension;
-} TfLiteAffineQuantization;
-
-/* A union of pointers that points to memory for a given tensor. */
-typedef union TfLitePtrUnion {
-  /* Do not access these members directly, if possible, use
-   * GetTensorData<TYPE>(tensor) instead, otherwise only access .data, as other
-   * members are deprecated. */
-  int32_t* i32;
-  uint32_t* u32;
-  int64_t* i64;
-  uint64_t* u64;
-  float* f;
-  TfLiteFloat16* f16;
-  double* f64;
-  char* raw;
-  const char* raw_const;
-  uint8_t* uint8;
-  bool* b;
-  int16_t* i16;
-  TfLiteComplex64* c64;
-  TfLiteComplex128* c128;
-  int8_t* int8;
-  /* Only use this member. */
-  void* data;
-} TfLitePtrUnion;
-
-// Memory allocation strategies.
-//  * kTfLiteMmapRo: Read-only memory-mapped data, or data externally allocated.
-//  * kTfLiteArenaRw: Arena allocated with no guarantees about persistence,
-//        and available during eval.
-//  * kTfLiteArenaRwPersistent: Arena allocated but persistent across eval, and
-//        only available during eval.
-//  * kTfLiteDynamic: Allocated during eval, or for string tensors.
-//  * kTfLitePersistentRo: Allocated and populated during prepare. This is
-//        useful for tensors that can be computed during prepare and treated
-//        as constant inputs for downstream ops (also in prepare).
-//  * kTfLiteCustom: Custom memory allocation provided by the user. See
-//        TfLiteCustomAllocation below.
-typedef enum TfLiteAllocationType {
-  kTfLiteMemNone = 0,
-  kTfLiteMmapRo,
-  kTfLiteArenaRw,
-  kTfLiteArenaRwPersistent,
-  kTfLiteDynamic,
-  kTfLitePersistentRo,
-  kTfLiteCustom,
-} TfLiteAllocationType;
-
-// The delegates should use zero or positive integers to represent handles.
-// -1 is reserved from unallocated status.
-typedef int TfLiteBufferHandle;
-enum {
-  kTfLiteNullBufferHandle = -1,
-};
-
-// Storage format of each dimension in a sparse tensor.
-typedef enum TfLiteDimensionType {
-  kTfLiteDimDense = 0,
-  kTfLiteDimSparseCSR,
-} TfLiteDimensionType;
-
-// Metadata to encode each dimension in a sparse tensor.
-typedef struct TfLiteDimensionMetadata {
-  TfLiteDimensionType format;
-  int dense_size;
-  TfLiteIntArray* array_segments;
-  TfLiteIntArray* array_indices;
-} TfLiteDimensionMetadata;
-
-// Parameters used to encode a sparse tensor. For detailed explanation of each
-// field please refer to lite/schema/schema.fbs.
-typedef struct TfLiteSparsity {
-  TfLiteIntArray* traversal_order;
-  TfLiteIntArray* block_map;
-  TfLiteDimensionMetadata* dim_metadata;
-  int dim_metadata_size;
-} TfLiteSparsity;
-
-// Defines a custom memory allocation not owned by the runtime.
-// `data` should be aligned to kDefaultTensorAlignment defined in
-// lite/util.h. (Currently 64 bytes)
-// NOTE: See Interpreter.SetCustomAllocationForTensor for details on usage.
-typedef struct TfLiteCustomAllocation {
-  void* data;
-  size_t bytes;
-} TfLiteCustomAllocation;
-
-// The flags used in `Interpreter::SetCustomAllocationForTensor`.
-// Note that this is a bitmask, so the values should be 1, 2, 4, 8, ...etc.
-typedef enum TfLiteCustomAllocationFlags {
-  kTfLiteCustomAllocationFlagsNone = 0,
-  // Skips checking whether allocation.data points to an aligned buffer as
-  // expected by the TFLite runtime.
-  // NOTE: Setting this flag can cause crashes when calling Invoke().
-  // Use with caution.
-  kTfLiteCustomAllocationFlagsSkipAlignCheck = 1,
-} TfLiteCustomAllocationFlags;
-
-// A tensor in the interpreter system which is a wrapper around a buffer of
-// data including a dimensionality (or NULL if not currently defined).
-#ifndef TF_LITE_STATIC_MEMORY
-typedef struct TfLiteTensor {
-  // The data type specification for data stored in `data`. This affects
-  // what member of `data` union should be used.
-  TfLiteType type;
-  // A union of data pointers. The appropriate type should be used for a typed
-  // tensor based on `type`.
-  TfLitePtrUnion data;
-  // A pointer to a structure representing the dimensionality interpretation
-  // that the buffer should have. NOTE: the product of elements of `dims`
-  // and the element datatype size should be equal to `bytes` below.
-  TfLiteIntArray* dims;
-  // Quantization information.
-  TfLiteQuantizationParams params;
-  // How memory is mapped
-  //  kTfLiteMmapRo: Memory mapped read only.
-  //  i.e. weights
-  //  kTfLiteArenaRw: Arena allocated read write memory
-  //  (i.e. temporaries, outputs).
-  TfLiteAllocationType allocation_type;
-  // The number of bytes required to store the data of this Tensor. I.e.
-  // (bytes of each element) * dims[0] * ... * dims[n-1].  For example, if
-  // type is kTfLiteFloat32 and dims = {3, 2} then
-  // bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
-  size_t bytes;
-
-  // An opaque pointer to a tflite::MMapAllocation
-  const void* allocation;
-
-  // Null-terminated name of this tensor.
-  const char* name;
-
-  // The delegate which knows how to handle `buffer_handle`.
-  // WARNING: This is an experimental interface that is subject to change.
-  struct TfLiteDelegate* delegate;
-
-  // An integer buffer handle that can be handled by `delegate`.
-  // The value is valid only when delegate is not null.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteBufferHandle buffer_handle;
-
-  // If the delegate uses its own buffer (e.g. GPU memory), the delegate is
-  // responsible to set data_is_stale to true.
-  // `delegate->CopyFromBufferHandle` can be called to copy the data from
-  // delegate buffer.
-  // WARNING: This is an // experimental interface that is subject to change.
-  bool data_is_stale;
-
-  // True if the tensor is a variable.
-  bool is_variable;
-
-  // Quantization information. Replaces params field above.
-  TfLiteQuantization quantization;
-
-  // Parameters used to encode a sparse tensor.
-  // This is optional. The field is NULL if a tensor is dense.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteSparsity* sparsity;
-
-  // Optional. Encodes shapes with unknown dimensions with -1. This field is
-  // only populated when unknown dimensions exist in a read-write tensor (i.e.
-  // an input or output tensor). (e.g.  `dims` contains [1, 1, 1, 3] and
-  // `dims_signature` contains [1, -1, -1, 3]).
-  const TfLiteIntArray* dims_signature;
-} TfLiteTensor;
-
-// A structure representing an instance of a node.
-// This structure only exhibits the inputs, outputs and user defined data, not
-// other features like the type.
-typedef struct TfLiteNode {
-  // Inputs to this node expressed as indices into the simulator's tensors.
-  TfLiteIntArray* inputs;
-
-  // Outputs to this node expressed as indices into the simulator's tensors.
-  TfLiteIntArray* outputs;
-
-  // intermediate tensors to this node expressed as indices into the simulator's
-  // tensors.
-  TfLiteIntArray* intermediates;
-
-  // Temporary tensors uses during the computations. This usually contains no
-  // tensors, but ops are allowed to change that if they need scratch space of
-  // any sort.
-  TfLiteIntArray* temporaries;
-
-  // Opaque data provided by the node implementer through `Registration.init`.
-  void* user_data;
-
-  // Opaque data provided to the node if the node is a builtin. This is usually
-  // a structure defined in builtin_op_data.h
-  void* builtin_data;
-
-  // Custom initial data. This is the opaque data provided in the flatbuffer.
-  // WARNING: This is an experimental interface that is subject to change.
-  const void* custom_initial_data;
-  int custom_initial_data_size;
-
-  // The pointer to the delegate. This is non-null only when the node is
-  // created by calling `interpreter.ModifyGraphWithDelegate`.
-  // WARNING: This is an experimental interface that is subject to change.
-  struct TfLiteDelegate* delegate;
-} TfLiteNode;
-#else   // defined(TF_LITE_STATIC_MEMORY)?
-// NOTE: This flag is opt-in only at compile time.
-//
-// Specific reduced TfLiteTensor struct for TF Micro runtime. This struct
-// contains only the minimum fields required to initialize and prepare a micro
-// inference graph. The fields in this struct have been ordered from
-// largest-to-smallest for optimal struct sizeof.
-//
-// This struct does not use:
-// - allocation
-// - buffer_handle
-// - data_is_stale
-// - delegate
-// - dims_signature
-// - name
-// - sparsity
-typedef struct TfLiteTensor {
-  // TODO(b/155784997): Consider consolidating these quantization fields:
-  // Quantization information. Replaces params field above.
-  TfLiteQuantization quantization;
-
-  // Quantization information.
-  TfLiteQuantizationParams params;
-
-  // A union of data pointers. The appropriate type should be used for a typed
-  // tensor based on `type`.
-  TfLitePtrUnion data;
-
-  // A pointer to a structure representing the dimensionality interpretation
-  // that the buffer should have. NOTE: the product of elements of `dims`
-  // and the element datatype size should be equal to `bytes` below.
-  TfLiteIntArray* dims;
-
-  // The number of bytes required to store the data of this Tensor. I.e.
-  // (bytes of each element) * dims[0] * ... * dims[n-1].  For example, if
-  // type is kTfLiteFloat32 and dims = {3, 2} then
-  // bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
-  size_t bytes;
-
-  // The data type specification for data stored in `data`. This affects
-  // what member of `data` union should be used.
-  TfLiteType type;
-
-  // How memory is mapped
-  //  kTfLiteMmapRo: Memory mapped read only.
-  //  i.e. weights
-  //  kTfLiteArenaRw: Arena allocated read write memory
-  //  (i.e. temporaries, outputs).
-  TfLiteAllocationType allocation_type;
-
-  // True if the tensor is a variable.
-  bool is_variable;
-} TfLiteTensor;
-
-// Specific reduced TfLiteNode struct for TF Micro runtime. This struct contains
-// only the minimum fields required to represent a node.
-//
-// This struct does not use:
-// - delegate
-// - intermediates
-// - temporaries
-typedef struct TfLiteNode {
-  // Inputs to this node expressed as indices into the simulator's tensors.
-  TfLiteIntArray* inputs;
-
-  // Outputs to this node expressed as indices into the simulator's tensors.
-  TfLiteIntArray* outputs;
-
-  // Opaque data provided by the node implementer through `Registration.init`.
-  void* user_data;
-
-  // Opaque data provided to the node if the node is a builtin. This is usually
-  // a structure defined in builtin_op_data.h
-  void* builtin_data;
-
-  // Custom initial data. This is the opaque data provided in the flatbuffer.
-  // WARNING: This is an experimental interface that is subject to change.
-  const void* custom_initial_data;
-  int custom_initial_data_size;
-} TfLiteNode;
-#endif  // TF_LITE_STATIC_MEMORY
-
-// Light-weight tensor struct for TF Micro runtime. Provides the minimal amount
-// of information required for a kernel to run during TfLiteRegistration::Eval.
-// TODO(b/160955687): Move this field into TF_LITE_STATIC_MEMORY when TFLM
-// builds with this flag by default internally.
-typedef struct TfLiteEvalTensor {
-  // A union of data pointers. The appropriate type should be used for a typed
-  // tensor based on `type`.
-  TfLitePtrUnion data;
-
-  // A pointer to a structure representing the dimensionality interpretation
-  // that the buffer should have.
-  TfLiteIntArray* dims;
-
-  // The data type specification for data stored in `data`. This affects
-  // what member of `data` union should be used.
-  TfLiteType type;
-} TfLiteEvalTensor;
-
-#ifndef TF_LITE_STATIC_MEMORY
-// Free data memory of tensor `t`.
-void TfLiteTensorDataFree(TfLiteTensor* t);
-
-// Free quantization data.
-void TfLiteQuantizationFree(TfLiteQuantization* quantization);
-
-// Free sparsity parameters.
-void TfLiteSparsityFree(TfLiteSparsity* sparsity);
-
-// Free memory of tensor `t`.
-void TfLiteTensorFree(TfLiteTensor* t);
-
-// Set all of a tensor's fields (and free any previously allocated data).
-void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
-                       TfLiteQuantizationParams quantization, char* buffer,
-                       size_t size, TfLiteAllocationType allocation_type,
-                       const void* allocation, bool is_variable,
-                       TfLiteTensor* tensor);
-
-// Resize the allocated data of a (dynamic) tensor. Tensors with allocation
-// types other than kTfLiteDynamic will be ignored.
-void TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
-#endif  // TF_LITE_STATIC_MEMORY
-
-// WARNING: This is an experimental interface that is subject to change.
-//
-// Currently, TfLiteDelegateParams has to be allocated in a way that it's
-// trivially destructable. It will be stored as `builtin_data` field in
-// `TfLiteNode` of the delegate node.
-//
-// See also the `CreateDelegateParams` function in `interpreter.cc` details.
-typedef struct TfLiteDelegateParams {
-  struct TfLiteDelegate* delegate;
-  TfLiteIntArray* nodes_to_replace;
-  TfLiteIntArray* input_tensors;
-  TfLiteIntArray* output_tensors;
-} TfLiteDelegateParams;
-
-typedef struct TfLiteContext {
-  // Number of tensors in the context.
-  size_t tensors_size;
-
-  // The execution plan contains a list of the node indices in execution
-  // order. execution_plan->size is the current number of nodes. And,
-  // execution_plan->data[0] is the first node that needs to be run.
-  // TfLiteDelegates can traverse the current execution plan by iterating
-  // through each member of this array and using GetNodeAndRegistration() to
-  // access details about a node. i.e.
-  // TfLiteIntArray* execution_plan;
-  // TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan));
-  // for (int exec_index = 0; exec_index < execution_plan->size; exec_index++) {
-  //    int node_index = execution_plan->data[exec_index];
-  //    TfLiteNode* node;
-  //    TfLiteRegistration* reg;
-  //    context->GetNodeAndRegistration(context, node_index, &node, &reg);
-  // }
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteStatus (*GetExecutionPlan)(struct TfLiteContext* context,
-                                   TfLiteIntArray** execution_plan);
-
-  // opaque full context ptr (an opaque c++ data structure)
-  void* impl_;
-
-  // Request memory pointer be resized. Updates dimensions on the tensor.
-  // NOTE: ResizeTensor takes ownership of newSize.
-  TfLiteStatus (*ResizeTensor)(struct TfLiteContext*, TfLiteTensor* tensor,
-                               TfLiteIntArray* new_size);
-  // Request that an error be reported with format string msg.
-  void (*ReportError)(struct TfLiteContext*, const char* msg, ...);
-
-  // Add `tensors_to_add` tensors, preserving pre-existing Tensor entries.  If
-  // non-null, the value pointed to by `first_new_tensor_index` will be set to
-  // the index of the first new tensor.
-  TfLiteStatus (*AddTensors)(struct TfLiteContext*, int tensors_to_add,
-                             int* first_new_tensor_index);
-
-  // Get a Tensor node by node_index.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteStatus (*GetNodeAndRegistration)(
-      struct TfLiteContext*, int node_index, TfLiteNode** node,
-      struct TfLiteRegistration** registration);
-
-  // Replace ops with one or more stub delegate operations. This function
-  // does not take ownership of `nodes_to_replace`.
-  TfLiteStatus (*ReplaceNodeSubsetsWithDelegateKernels)(
-      struct TfLiteContext*, struct TfLiteRegistration registration,
-      const TfLiteIntArray* nodes_to_replace, struct TfLiteDelegate* delegate);
-
-  // Number of threads that are recommended to subsystems like gemmlowp and
-  // eigen.
-  int recommended_num_threads;
-
-  // Access external contexts by type.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteExternalContext* (*GetExternalContext)(struct TfLiteContext*,
-                                               TfLiteExternalContextType);
-  // Set the value of a external context. Does not take ownership of the
-  // pointer.
-  // WARNING: This is an experimental interface that is subject to change.
-  void (*SetExternalContext)(struct TfLiteContext*, TfLiteExternalContextType,
-                             TfLiteExternalContext*);
-
-  // Flag for allowing float16 precision for FP32 calculation.
-  // default: false.
-  // WARNING: This is an experimental API and subject to change.
-  bool allow_fp32_relax_to_fp16;
-
-  // Pointer to the op-level profiler, if set; nullptr otherwise.
-  void* profiler;
-
-  // Allocate persistent buffer which has the same life time as the interpreter.
-  // Returns nullptr on failure.
-  // The memory is allocated from heap for TFL, and from tail in TFLM.
-  // This method is only available in Init or Prepare stage.
-  // WARNING: This is an experimental interface that is subject to change.
-  void* (*AllocatePersistentBuffer)(struct TfLiteContext* ctx, size_t bytes);
-
-  // Allocate a buffer which will be deallocated right after invoke phase.
-  // The memory is allocated from heap in TFL, and from volatile arena in TFLM.
-  // This method is only available in invoke stage.
-  // NOTE: If possible use RequestScratchBufferInArena method to avoid memory
-  // allocation during inference time.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteStatus (*AllocateBufferForEval)(struct TfLiteContext* ctx, size_t bytes,
-                                        void** ptr);
-
-  // Request a scratch buffer in the arena through static memory planning.
-  // This method is only available in Prepare stage and the buffer is allocated
-  // by the interpreter between Prepare and Eval stage. In Eval stage,
-  // GetScratchBuffer API can be used to fetch the address.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteStatus (*RequestScratchBufferInArena)(struct TfLiteContext* ctx,
-                                              size_t bytes, int* buffer_idx);
-
-  // Get the scratch buffer pointer.
-  // This method is only available in Eval stage.
-  // WARNING: This is an experimental interface that is subject to change.
-  void* (*GetScratchBuffer)(struct TfLiteContext* ctx, int buffer_idx);
-
-  // Resize the memory pointer of the `tensor`. This method behaves the same as
-  // `ResizeTensor`, except that it makes a copy of the shape array internally
-  // so the shape array could be deallocated right afterwards.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteStatus (*ResizeTensorExplicit)(struct TfLiteContext* ctx,
-                                       TfLiteTensor* tensor, int dims,
-                                       const int* shape);
-
-  // This method provides a preview of post-delegation partitioning. Each
-  // TfLiteDelegateParams in the referenced array corresponds to one instance of
-  // the delegate kernel.
-  // Example usage:
-  //
-  // TfLiteIntArray* nodes_to_replace = ...;
-  // TfLiteDelegateParams* params_array;
-  // int num_partitions = 0;
-  // TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
-  //    context, delegate, nodes_to_replace, &params_array, &num_partitions));
-  // for (int idx = 0; idx < num_partitions; idx++) {
-  //    const auto& partition_params = params_array[idx];
-  //    ...
-  // }
-  //
-  // NOTE: The context owns the memory referenced by partition_params_array. It
-  // will be cleared with another call to PreviewDelegateParitioning, or after
-  // TfLiteDelegateParams::Prepare returns.
-  //
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteStatus (*PreviewDelegatePartitioning)(
-      struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
-      TfLiteDelegateParams** partition_params_array, int* num_partitions);
-
-  // Returns a TfLiteTensor struct for a given index.
-  // WARNING: This is an experimental interface that is subject to change.
-  // WARNING: This method may not be available on all platforms.
-  TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context,
-                             int tensor_idx);
-
-  // Returns a TfLiteEvalTensor struct for a given index.
-  // WARNING: This is an experimental interface that is subject to change.
-  // WARNING: This method may not be available on all platforms.
-  TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context,
-                                     int tensor_idx);
-} TfLiteContext;
-
-typedef struct TfLiteRegistration {
-  // Initializes the op from serialized data.
-  // If a built-in op:
-  //   `buffer` is the op's params data (TfLiteLSTMParams*).
-  //   `length` is zero.
-  // If custom op:
-  //   `buffer` is the op's `custom_options`.
-  //   `length` is the size of the buffer.
-  //
-  // Returns a type-punned (i.e. void*) opaque data (e.g. a primitive pointer
-  // or an instance of a struct).
-  //
-  // The returned pointer will be stored with the node in the `user_data` field,
-  // accessible within prepare and invoke functions below.
-  // NOTE: if the data is already in the desired format, simply implement this
-  // function to return `nullptr` and implement the free function to be a no-op.
-  void* (*init)(TfLiteContext* context, const char* buffer, size_t length);
-
-  // The pointer `buffer` is the data previously returned by an init invocation.
-  void (*free)(TfLiteContext* context, void* buffer);
-
-  // prepare is called when the inputs this node depends on have been resized.
-  // context->ResizeTensor() can be called to request output tensors to be
-  // resized.
-  //
-  // Returns kTfLiteOk on success.
-  TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node);
-
-  // Execute the node (should read node->inputs and output to node->outputs).
-  // Returns kTfLiteOk on success.
-  TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node);
-
-  // profiling_string is called during summarization of profiling information
-  // in order to group executions together. Providing a value here will cause a
-  // given op to appear multiple times is the profiling report. This is
-  // particularly useful for custom ops that can perform significantly
-  // different calculations depending on their `user-data`.
-  const char* (*profiling_string)(const TfLiteContext* context,
-                                  const TfLiteNode* node);
-
-  // Builtin codes. If this kernel refers to a builtin this is the code
-  // of the builtin. This is so we can do marshaling to other frameworks like
-  // NN API.
-  // Note: It is the responsibility of the registration binder to set this
-  // properly.
-  int32_t builtin_code;
-
-  // Custom op name. If the op is a builtin, this will be null.
-  // Note: It is the responsibility of the registration binder to set this
-  // properly.
-  // WARNING: This is an experimental interface that is subject to change.
-  const char* custom_name;
-
-  // The version of the op.
-  // Note: It is the responsibility of the registration binder to set this
-  // properly.
-  int version;
-} TfLiteRegistration;
-
-// The flags used in `TfLiteDelegate`. Note that this is a bitmask, so the
-// values should be 1, 2, 4, 8, ...etc.
-typedef enum TfLiteDelegateFlags {
-  kTfLiteDelegateFlagsNone = 0,
-  // The flag is set if the delegate can handle dynamic sized tensors.
-  // For example, the output shape of a `Resize` op with non-constant shape
-  // can only be inferred when the op is invoked.
-  // In this case, the Delegate is responsible for calling
-  // `SetTensorToDynamic` to mark the tensor as a dynamic tensor, and calling
-  // `ResizeTensor` when invoking the op.
-  //
-  // If the delegate isn't capable to handle dynamic tensors, this flag need
-  // to be set to false.
-  kTfLiteDelegateFlagsAllowDynamicTensors = 1,
-
-  // This flag can be used by delegates (that allow dynamic tensors) to ensure
-  // applicable tensor shapes are automatically propagated in the case of tensor
-  // resizing.
-  // This means that non-dynamic (allocation_type != kTfLiteDynamic) I/O tensors
-  // of a delegate kernel will have correct shapes before its Prepare() method
-  // is called. The runtime leverages TFLite builtin ops in the original
-  // execution plan to propagate shapes.
-  //
-  // A few points to note:
-  // 1. This requires kTfLiteDelegateFlagsAllowDynamicTensors. If that flag is
-  // false, this one is redundant since the delegate kernels are re-initialized
-  // every time tensors are resized.
-  // 2. Enabling this flag adds some overhead to AllocateTensors(), since extra
-  // work is required to prepare the original execution plan.
-  // 3. This flag requires that the original execution plan only have ops with
-  // valid registrations (and not 'dummy' custom ops like with Flex).
-  // WARNING: This feature is experimental and subject to change.
-  kTfLiteDelegateFlagsRequirePropagatedShapes = 2
-} TfLiteDelegateFlags;
-
-// WARNING: This is an experimental interface that is subject to change.
-typedef struct TfLiteDelegate {
-  // Data that delegate needs to identify itself. This data is owned by the
-  // delegate. The delegate is owned in the user code, so the delegate is
-  // responsible for doing this when it is destroyed.
-  void* data_;
-
-  // Invoked by ModifyGraphWithDelegate. This prepare is called, giving the
-  // delegate a view of the current graph through TfLiteContext*. It typically
-  // will look at the nodes and call ReplaceNodeSubsetsWithDelegateKernels()
-  // to ask the TensorFlow lite runtime to create macro-nodes to represent
-  // delegated subgraphs of the original graph.
-  TfLiteStatus (*Prepare)(TfLiteContext* context,
-                          struct TfLiteDelegate* delegate);
-
-  // Copy the data from delegate buffer handle into raw memory of the given
-  // 'tensor'. Note that the delegate is allowed to allocate the raw bytes as
-  // long as it follows the rules for kTfLiteDynamic tensors, in which case this
-  // cannot be null.
-  TfLiteStatus (*CopyFromBufferHandle)(TfLiteContext* context,
-                                       struct TfLiteDelegate* delegate,
-                                       TfLiteBufferHandle buffer_handle,
-                                       TfLiteTensor* tensor);
-
-  // Copy the data from raw memory of the given 'tensor' to delegate buffer
-  // handle. This can be null if the delegate doesn't use its own buffer.
-  TfLiteStatus (*CopyToBufferHandle)(TfLiteContext* context,
-                                     struct TfLiteDelegate* delegate,
-                                     TfLiteBufferHandle buffer_handle,
-                                     TfLiteTensor* tensor);
-
-  // Free the Delegate Buffer Handle. Note: This only frees the handle, but
-  // this doesn't release the underlying resource (e.g. textures). The
-  // resources are either owned by application layer or the delegate.
-  // This can be null if the delegate doesn't use its own buffer.
-  void (*FreeBufferHandle)(TfLiteContext* context,
-                           struct TfLiteDelegate* delegate,
-                           TfLiteBufferHandle* handle);
-
-  // Bitmask flags. See the comments in `TfLiteDelegateFlags`.
-  int64_t flags;
-} TfLiteDelegate;
-
-// Build a 'null' delegate, with all the fields properly set to their default
-// values.
-TfLiteDelegate TfLiteDelegateCreate();
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif  // __cplusplus
 #endif  // TENSORFLOW_LITE_C_COMMON_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/context_util.h b/edge-impulse-sdk/tensorflow/lite/context_util.h
new file mode 100644
index 0000000..8c97a8d
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/context_util.h
@@ -0,0 +1,54 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+/// \file
+///
+/// This provides a few C++ helpers that are useful for manipulating C
+/// structures in C++.
+#ifndef TENSORFLOW_LITE_CONTEXT_UTIL_H_
+#define TENSORFLOW_LITE_CONTEXT_UTIL_H_
+
+#include <stddef.h>
+
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
+
+namespace tflite {
+
+/// Provides a range iterable wrapper for TfLiteIntArray* (C lists) that TfLite
+/// C api uses.
+// Can't use the google array_view, since we can't depend on even
+// absl for embedded device reasons.
+class TfLiteIntArrayView {
+ public:
+  /// Construct a view of a TfLiteIntArray*. Note, `int_array` should be
+  /// non-null and this view does not take ownership of it.
+  explicit TfLiteIntArrayView(const TfLiteIntArray* int_array)
+      : int_array_(int_array) {}
+
+  TfLiteIntArrayView(const TfLiteIntArrayView&) = default;
+  TfLiteIntArrayView& operator=(const TfLiteIntArrayView& rhs) = default;
+
+  typedef const int* const_iterator;
+  const_iterator begin() const { return int_array_->data; }
+  const_iterator end() const { return &int_array_->data[int_array_->size]; }
+  size_t size() const { return end() - begin(); }
+  int operator[](size_t pos) const { return int_array_->data[pos]; }
+
+ private:
+  const TfLiteIntArray* int_array_;
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_CONTEXT_UTIL_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/core/api/common.cc b/edge-impulse-sdk/tensorflow/lite/core/api/common.cc
new file mode 100644
index 0000000..67b8c6c
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/core/api/common.cc
@@ -0,0 +1,354 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
+
+#include "edge-impulse-sdk/tensorflow/lite/core/c/c_api_types.h"
+#ifdef TF_LITE_TENSORFLOW_PROFILER
+#include "edge-impulse-sdk/tensorflow/lite/tensorflow_profiler_logger.h"
+#endif
+
+#ifndef TF_LITE_STATIC_MEMORY
+#include <stdlib.h>
+#include <string.h>
+#endif  // TF_LITE_STATIC_MEMORY
+
+extern "C" {
+
+size_t TfLiteIntArrayGetSizeInBytes(int size) {
+  static TfLiteIntArray dummy;
+
+  size_t computed_size = sizeof(dummy) + sizeof(dummy.data[0]) * size;
+#if defined(_MSC_VER)
+  // Context for why this is needed is in http://b/189926408#comment21
+  computed_size -= sizeof(dummy.data[0]);
+#endif
+  return computed_size;
+}
+
+int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b) {
+  if (a == b) return 1;
+  if (a == nullptr || b == nullptr) return 0;
+  return TfLiteIntArrayEqualsArray(a, b->size, b->data);
+}
+
+int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
+                              const int b_data[]) {
+  if (a == nullptr) return (b_size == 0);
+  if (a->size != b_size) return 0;
+  int i = 0;
+  for (; i < a->size; i++)
+    if (a->data[i] != b_data[i]) return 0;
+  return 1;
+}
+
+#ifndef TF_LITE_STATIC_MEMORY
+
+TfLiteIntArray* TfLiteIntArrayCreate(int size) {
+  size_t alloc_size = TfLiteIntArrayGetSizeInBytes(size);
+  if (alloc_size <= 0) return nullptr;
+  TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size);
+  if (!ret) return ret;
+  ret->size = size;
+  return ret;
+}
+
+TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src) {
+  if (!src) return nullptr;
+  TfLiteIntArray* ret = TfLiteIntArrayCreate(src->size);
+  if (ret) {
+    memcpy(ret->data, src->data, src->size * sizeof(int));
+  }
+  return ret;
+}
+
+void TfLiteIntArrayFree(TfLiteIntArray* a) { free(a); }
+
+#endif  // TF_LITE_STATIC_MEMORY
+
+int TfLiteFloatArrayGetSizeInBytes(int size) {
+  static TfLiteFloatArray dummy;
+
+  int computed_size = sizeof(dummy) + sizeof(dummy.data[0]) * size;
+#if defined(_MSC_VER)
+  // Context for why this is needed is in http://b/189926408#comment21
+  computed_size -= sizeof(dummy.data[0]);
+#endif
+  return computed_size;
+}
+
+#ifndef TF_LITE_STATIC_MEMORY
+
+TfLiteFloatArray* TfLiteFloatArrayCreate(int size) {
+  TfLiteFloatArray* ret =
+      (TfLiteFloatArray*)malloc(TfLiteFloatArrayGetSizeInBytes(size));
+  ret->size = size;
+  return ret;
+}
+
+void TfLiteFloatArrayFree(TfLiteFloatArray* a) { free(a); }
+
+void TfLiteTensorDataFree(TfLiteTensor* t) {
+  if (t->allocation_type == kTfLiteDynamic ||
+      t->allocation_type == kTfLitePersistentRo) {
+    if (t->data.raw) {
+#ifdef TF_LITE_TENSORFLOW_PROFILER
+      tflite::PauseHeapMonitoring(/*pause=*/true);
+      tflite::OnTfLiteTensorDealloc(t);
+#endif
+      free(t->data.raw);
+#ifdef TF_LITE_TENSORFLOW_PROFILER
+      tflite::PauseHeapMonitoring(/*pause=*/false);
+#endif
+    }
+  }
+  t->data.raw = nullptr;
+}
+
+void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
+  if (quantization->type == kTfLiteAffineQuantization) {
+    TfLiteAffineQuantization* q_params =
+        (TfLiteAffineQuantization*)(quantization->params);
+    if (q_params->scale) {
+      TfLiteFloatArrayFree(q_params->scale);
+      q_params->scale = nullptr;
+    }
+    if (q_params->zero_point) {
+      TfLiteIntArrayFree(q_params->zero_point);
+      q_params->zero_point = nullptr;
+    }
+    free(q_params);
+  }
+  quantization->params = nullptr;
+  quantization->type = kTfLiteNoQuantization;
+}
+
+void TfLiteSparsityFree(TfLiteSparsity* sparsity) {
+  if (sparsity == nullptr) {
+    return;
+  }
+
+  if (sparsity->traversal_order) {
+    TfLiteIntArrayFree(sparsity->traversal_order);
+    sparsity->traversal_order = nullptr;
+  }
+
+  if (sparsity->block_map) {
+    TfLiteIntArrayFree(sparsity->block_map);
+    sparsity->block_map = nullptr;
+  }
+
+  if (sparsity->dim_metadata) {
+    int i = 0;
+    for (; i < sparsity->dim_metadata_size; i++) {
+      TfLiteDimensionMetadata metadata = sparsity->dim_metadata[i];
+      if (metadata.format == kTfLiteDimSparseCSR) {
+        TfLiteIntArrayFree(metadata.array_segments);
+        metadata.array_segments = nullptr;
+        TfLiteIntArrayFree(metadata.array_indices);
+        metadata.array_indices = nullptr;
+      }
+    }
+    free(sparsity->dim_metadata);
+    sparsity->dim_metadata = nullptr;
+  }
+
+  free(sparsity);
+}
+
+void TfLiteTensorFree(TfLiteTensor* t) {
+  TfLiteTensorDataFree(t);
+  if (t->dims) TfLiteIntArrayFree(t->dims);
+  t->dims = nullptr;
+
+  if (t->dims_signature) {
+    TfLiteIntArrayFree((TfLiteIntArray*)t->dims_signature);
+  }
+  t->dims_signature = nullptr;
+
+  TfLiteQuantizationFree(&t->quantization);
+  TfLiteSparsityFree(t->sparsity);
+  t->sparsity = nullptr;
+}
+
+void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
+                       TfLiteQuantizationParams quantization, char* buffer,
+                       size_t size, TfLiteAllocationType allocation_type,
+                       const void* allocation, bool is_variable,
+                       TfLiteTensor* tensor) {
+  TfLiteTensorFree(tensor);
+  tensor->type = type;
+  tensor->name = name;
+  tensor->dims = dims;
+  tensor->params = quantization;
+  tensor->data.raw = buffer;
+  tensor->bytes = size;
+  tensor->allocation_type = allocation_type;
+  tensor->allocation = allocation;
+  tensor->is_variable = is_variable;
+
+  tensor->quantization.type = kTfLiteNoQuantization;
+  tensor->quantization.params = nullptr;
+}
+
+TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst) {
+  if (!src || !dst) return kTfLiteOk;
+  if (src->bytes != dst->bytes) return kTfLiteError;
+  if (src == dst) return kTfLiteOk;
+
+  dst->type = src->type;
+  if (dst->dims) TfLiteIntArrayFree(dst->dims);
+  dst->dims = TfLiteIntArrayCopy(src->dims);
+  memcpy(dst->data.raw, src->data.raw, src->bytes);
+  dst->buffer_handle = src->buffer_handle;
+  dst->data_is_stale = src->data_is_stale;
+  dst->delegate = src->delegate;
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor,
+                                         bool preserve_data) {
+  if (tensor->allocation_type != kTfLiteDynamic &&
+      tensor->allocation_type != kTfLitePersistentRo) {
+    return kTfLiteOk;
+  }
+#ifdef TF_LITE_TENSORFLOW_PROFILER
+  tflite::PauseHeapMonitoring(/*pause=*/true);
+#endif
+  size_t alloc_bytes = num_bytes;
+  // TODO(b/145340303): Tensor data should be aligned.
+#ifdef TFLITE_KERNEL_USE_XNNPACK
+  alloc_bytes += 16;  // XNNPACK_EXTRA_BYTES = 16
+#endif
+  if (!tensor->data.data) {
+    tensor->data.data = (char*)malloc(alloc_bytes);
+#ifdef TF_LITE_TENSORFLOW_PROFILER
+    tflite::OnTfLiteTensorAlloc(tensor, alloc_bytes);
+#endif
+  } else if (num_bytes > tensor->bytes) {
+#ifdef TF_LITE_TENSORFLOW_PROFILER
+    tflite::OnTfLiteTensorDealloc(tensor);
+#endif
+    if (preserve_data) {
+      tensor->data.data = (char*)realloc(tensor->data.data, alloc_bytes);
+    } else {
+      // Calling free and malloc can be more efficient as it avoids needlessly
+      // copying the data when it is not required.
+      free(tensor->data.data);
+      tensor->data.data = (char*)malloc(alloc_bytes);
+    }
+#ifdef TF_LITE_TENSORFLOW_PROFILER
+    tflite::OnTfLiteTensorAlloc(tensor, alloc_bytes);
+#endif
+  }
+#ifdef TF_LITE_TENSORFLOW_PROFILER
+  tflite::PauseHeapMonitoring(/*pause=*/false);
+#endif
+  tensor->bytes = num_bytes;
+  if (tensor->data.data == nullptr && num_bytes != 0) {
+    // We are done allocating but tensor is pointing to null and a valid size
+    // was requested, so we error.
+    return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor) {
+  return TfLiteTensorResizeMaybeCopy(num_bytes, tensor, true);
+}
+#endif  // TF_LITE_STATIC_MEMORY
+
+const char* TfLiteTypeGetName(TfLiteType type) {
+  switch (type) {
+    case kTfLiteNoType:
+      return "NOTYPE";
+    case kTfLiteFloat32:
+      return "FLOAT32";
+    case kTfLiteUInt16:
+      return "UINT16";
+    case kTfLiteInt16:
+      return "INT16";
+    case kTfLiteInt32:
+      return "INT32";
+    case kTfLiteUInt32:
+      return "UINT32";
+    case kTfLiteUInt8:
+      return "UINT8";
+    case kTfLiteInt8:
+      return "INT8";
+    case kTfLiteInt64:
+      return "INT64";
+    case kTfLiteUInt64:
+      return "UINT64";
+    case kTfLiteBool:
+      return "BOOL";
+    case kTfLiteComplex64:
+      return "COMPLEX64";
+    case kTfLiteComplex128:
+      return "COMPLEX128";
+    case kTfLiteString:
+      return "STRING";
+    case kTfLiteFloat16:
+      return "FLOAT16";
+    case kTfLiteFloat64:
+      return "FLOAT64";
+    case kTfLiteResource:
+      return "RESOURCE";
+    case kTfLiteVariant:
+      return "VARIANT";
+    case kTfLiteInt4:
+      return "INT4";
+  }
+  return "Unknown type";
+}
+
+TfLiteDelegate TfLiteDelegateCreate() { return TfLiteDelegate{}; }
+
+TfLiteOpaqueDelegate* TfLiteOpaqueDelegateCreate(
+    const TfLiteOpaqueDelegateBuilder* opaque_delegate_builder) {
+  if (!opaque_delegate_builder) return nullptr;
+
+  TfLiteDelegate* result = new TfLiteDelegate{};
+  result->opaque_delegate_builder = new TfLiteOpaqueDelegateBuilder{};
+  *(result->opaque_delegate_builder) = *opaque_delegate_builder;
+
+  return reinterpret_cast<TfLiteOpaqueDelegate*>(result);
+}
+
+void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* opaque_delegate) {
+  if (!opaque_delegate) return;
+
+  const TfLiteDelegate* tflite_delegate =
+      reinterpret_cast<const TfLiteDelegate*>(opaque_delegate);
+  delete tflite_delegate->opaque_delegate_builder;
+  delete tflite_delegate;
+}
+
+void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate) {
+  if (!delegate) return nullptr;
+
+  // The following cast is safe only because this code is part of the
+  // TF Lite runtime implementation.  Apps using TF Lite should not rely on
+  // 'TfLiteOpaqueDelegate' and 'TfLiteDelegate' being equivalent.
+  const auto* tflite_delegate =
+      reinterpret_cast<const TfLiteDelegate*>(delegate);
+
+  if (!tflite_delegate->opaque_delegate_builder) return tflite_delegate->data_;
+
+  return tflite_delegate->opaque_delegate_builder->data;
+}
+
+}  // extern "C"
diff --git a/edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h b/edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h
index 05839a6..99ab8cf 100644
--- a/edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h
+++ b/edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h
@@ -34,9 +34,22 @@ namespace tflite {
 /// that drives a GUI error log box.
 class ErrorReporter {
  public:
-  virtual ~ErrorReporter() {}
+  virtual ~ErrorReporter() = default;
+  /// Converts `args` to character equivalents according to `format` string,
+  /// constructs the error string and report it.
+  /// Returns number of characters written or zero on success, and negative
+  /// number on error.
   virtual int Report(const char* format, va_list args) = 0;
+
+  /// Converts arguments to character equivalents according to `format` string,
+  /// constructs the error string and report it.
+  /// Returns number of characters written or zero on success, and negative
+  /// number on error.
   int Report(const char* format, ...);
+
+  /// Equivalent to `Report` above. The additional `void*` parameter is unused.
+  /// This method is for compatibility with macros that takes `TfLiteContext`,
+  /// like TF_LITE_ENSURE and related macros.
   int ReportError(void*, const char* format, ...);
 };
 
diff --git a/edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.cc b/edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.cc
index 4af3800..31d4af9 100644
--- a/edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.cc
+++ b/edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -19,12 +19,13 @@ limitations under the License.
 #include <cstdint>
 #include <memory>
 
-#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"  // from @flatbuffers
-#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h" // from @flatbuffers
 #include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h"
 
 namespace tflite {
 
@@ -131,6 +132,17 @@ TfLitePadding ConvertPadding(Padding padding) {
   return kTfLitePaddingUnknown;
 }
 
+// Converts the flatbuffer mirror padding enum to what is used at runtime.
+TfLiteMirrorPaddingMode ConvertMirrorPadding(MirrorPadMode padding) {
+  switch (padding) {
+    case MirrorPadMode_REFLECT:
+      return kTfLiteMirrorPaddingReflect;
+    case MirrorPadMode_SYMMETRIC:
+      return kTfLiteMirrorPaddingSymmetric;
+  }
+  return kTfLiteMirrorPaddingUnknown;
+}
+
 #ifndef TF_LITE_STATIC_MEMORY
 TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
                                ErrorReporter* error_reporter,
@@ -181,6 +193,10 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
       return ParseArgMin(op, error_reporter, allocator, builtin_data);
     }
 
+    case BuiltinOperator_ASSIGN_VARIABLE: {
+      return ParseAssignVariable(op, error_reporter, allocator, builtin_data);
+    }
+
     case BuiltinOperator_AVERAGE_POOL_2D: {
       return ParsePool(op, error_reporter, allocator, builtin_data);
     }
@@ -193,6 +209,18 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
       return ParseBatchToSpaceNd(op, error_reporter, allocator, builtin_data);
     }
 
+    case BuiltinOperator_BROADCAST_ARGS: {
+      return ParseBroadcastArgs(op, error_reporter, allocator, builtin_data);
+    }
+
+    case BuiltinOperator_BROADCAST_TO: {
+      return ParseBroadcastTo(op, error_reporter, allocator, builtin_data);
+    }
+
+    case BuiltinOperator_CALL_ONCE: {
+      return ParseCallOnce(op, error_reporter, allocator, builtin_data);
+    }
+
     case BuiltinOperator_CEIL: {
       return ParseCeil(op, error_reporter, allocator, builtin_data);
     }
@@ -317,6 +345,10 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
       return ParseLogSoftmax(op, error_reporter, allocator, builtin_data);
     }
 
+    case BuiltinOperator_LSTM: {
+      return ParseLSTM(op, error_reporter, allocator, builtin_data);
+    }
+
     case BuiltinOperator_MAXIMUM: {
       return ParseMaximum(op, error_reporter, allocator, builtin_data);
     }
@@ -325,6 +357,10 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
       return ParsePool(op, error_reporter, allocator, builtin_data);
     }
 
+    case BuiltinOperator_MIRROR_PAD: {
+      return ParseMirrorPad(op, error_reporter, allocator, builtin_data);
+    }
+
     case BuiltinOperator_MEAN: {
       return ParseReducer(op, error_reporter, allocator, builtin_data);
     }
@@ -369,10 +405,18 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
       return ParseQuantize(op, error_reporter, allocator, builtin_data);
     }
 
+    case BuiltinOperator_READ_VARIABLE: {
+      return ParseReadVariable(op, error_reporter, allocator, builtin_data);
+    }
+
     case BuiltinOperator_REDUCE_ANY: {
       return ParseReducer(op, error_reporter, allocator, builtin_data);
     }
 
+    case BuiltinOperator_REDUCE_ALL: {
+      return ParseReducer(op, error_reporter, allocator, builtin_data);
+    }
+
     case BuiltinOperator_REDUCE_MAX: {
       return ParseReducer(op, error_reporter, allocator, builtin_data);
     }
@@ -414,6 +458,10 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
       return ParseRsqrt(op, error_reporter, allocator, builtin_data);
     }
 
+    case BuiltinOperator_SELECT_V2: {
+      return ParseSelectV2(op, error_reporter, allocator, builtin_data);
+    }
+
     case BuiltinOperator_SHAPE: {
       return ParseShape(op, error_reporter, allocator, builtin_data);
     }
@@ -450,6 +498,11 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
       return ParseSquare(op, error_reporter, allocator, builtin_data);
     }
 
+    case BuiltinOperator_SQUARED_DIFFERENCE: {
+      return ParseSquaredDifference(op, error_reporter, allocator,
+                                    builtin_data);
+    }
+
     case BuiltinOperator_SQUEEZE: {
       return ParseSqueeze(op, error_reporter, allocator, builtin_data);
     }
@@ -482,6 +535,10 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
       return ParseUnpack(op, error_reporter, allocator, builtin_data);
     }
 
+    case BuiltinOperator_VAR_HANDLE: {
+      return ParseVarHandle(op, error_reporter, allocator, builtin_data);
+    }
+
     case BuiltinOperator_ZEROS_LIKE: {
       return ParseZerosLike(op, error_reporter, allocator, builtin_data);
     }
@@ -570,53 +627,9 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
       *builtin_data = params.release();
       return kTfLiteOk;
     }
-    case BuiltinOperator_LSTM: {
-      auto params = safe_allocator.Allocate<TfLiteLSTMParams>();
-      TF_LITE_ENSURE(error_reporter, params != nullptr);
-      if (const auto* lstm_params = op->builtin_options_as_LSTMOptions()) {
-        params->activation =
-            ConvertActivation(lstm_params->fused_activation_function());
-        params->cell_clip = lstm_params->cell_clip();
-        params->proj_clip = lstm_params->proj_clip();
-        switch (lstm_params->kernel_type()) {
-          case LSTMKernelType_FULL:
-            params->kernel_type = kTfLiteLSTMFullKernel;
-            break;
-          case LSTMKernelType_BASIC:
-            params->kernel_type = kTfLiteLSTMBasicKernel;
-            break;
-          default:
-            TF_LITE_REPORT_ERROR(error_reporter,
-                                 "Unhandled LSTM kernel type: %d",
-                                 lstm_params->kernel_type());
-            return kTfLiteError;
-        }
-        params->asymmetric_quantize_inputs =
-            lstm_params->asymmetric_quantize_inputs();
-      } else {
-        TF_LITE_REPORT_ERROR(error_reporter,
-                             "No valid LSTM builtin options exist");
-        return kTfLiteError;
-      }
-      *builtin_data = params.release();
-      return kTfLiteOk;
-    }
     case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: {
-      auto params =
-          safe_allocator.Allocate<TfLiteUnidirectionalSequenceLSTMParams>();
-      TF_LITE_ENSURE(error_reporter, params != nullptr);
-      if (const auto* seq_lstm_params =
-              op->builtin_options_as_UnidirectionalSequenceLSTMOptions()) {
-        params->activation =
-            ConvertActivation(seq_lstm_params->fused_activation_function());
-        params->cell_clip = seq_lstm_params->cell_clip();
-        params->proj_clip = seq_lstm_params->proj_clip();
-        params->time_major = seq_lstm_params->time_major();
-        params->asymmetric_quantize_inputs =
-            seq_lstm_params->asymmetric_quantize_inputs();
-      }
-      *builtin_data = params.release();
-      return kTfLiteOk;
+      return ParseUnidirectionalSequenceLSTM(op, error_reporter, allocator,
+                                             builtin_data);
     }
     case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: {
       auto params =
@@ -663,7 +676,6 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
       return kTfLiteOk;
     }
     case BuiltinOperator_DELEGATE: {
-      // TODO(ycling): Revisit when supporting saving delegated models.
       TF_LITE_REPORT_ERROR(error_reporter,
                            "DELEGATE op shouldn't exist in model.");
       return kTfLiteError;
@@ -690,19 +702,6 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
       *builtin_data = params.release();
       return kTfLiteOk;
     }
-    case BuiltinOperator_MIRROR_PAD: {
-      auto params = safe_allocator.Allocate<TfLiteMirrorPaddingParams>();
-      TF_LITE_ENSURE(error_reporter, params != nullptr);
-      const auto* mirror_pad_params = op->builtin_options_as_MirrorPadOptions();
-      if (mirror_pad_params != nullptr) {
-        params->mode =
-            mirror_pad_params->mode() == tflite::MirrorPadMode_REFLECT
-                ? TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingReflect
-                : TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingSymmetric;
-      }
-      *builtin_data = params.release();
-      return kTfLiteOk;
-    }
     case BuiltinOperator_UNIQUE: {
       auto params = safe_allocator.Allocate<TfLiteUniqueParams>();
       TF_LITE_ENSURE(error_reporter, params != nullptr);
@@ -747,17 +746,8 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
       *builtin_data = params.release();
       return kTfLiteOk;
     }
-    case BuiltinOperator_CALL_ONCE: {
-      auto params = safe_allocator.Allocate<TfLiteCallOnceParams>();
-      TF_LITE_ENSURE(error_reporter, params != nullptr);
-      if (const auto* call_once_params =
-              op->builtin_options_as_CallOnceOptions()) {
-        params->init_subgraph_index = call_once_params->init_subgraph_index();
-      }
-      *builtin_data = params.release();
-      return kTfLiteOk;
-    }
-    case BuiltinOperator_CONV_3D: {
+    case BuiltinOperator_CONV_3D:
+    case BuiltinOperator_CONV_3D_TRANSPOSE: {
       auto params = safe_allocator.Allocate<TfLiteConv3DParams>();
       TF_LITE_ENSURE(error_reporter, params != nullptr);
       if (const auto* conv3d_params = op->builtin_options_as_Conv3DOptions()) {
@@ -789,42 +779,114 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
       *builtin_data = params.release();
       return kTfLiteOk;
     }
+    case BuiltinOperator_MULTINOMIAL: {
+      auto params = safe_allocator.Allocate<TfLiteRandomParams>();
+      TF_LITE_ENSURE(error_reporter, params != nullptr);
+      if (const auto* multinomial_params =
+              op->builtin_options_as_RandomOptions()) {
+        params->seed = multinomial_params->seed();
+        params->seed2 = multinomial_params->seed2();
+      }
+      *builtin_data = params.release();
+      return kTfLiteOk;
+    }
+    case BuiltinOperator_RANDOM_STANDARD_NORMAL: {
+      auto params = safe_allocator.Allocate<TfLiteRandomParams>();
+      TF_LITE_ENSURE(error_reporter, params != nullptr);
+      if (const auto* random_std_normal_params =
+              op->builtin_options_as_RandomOptions()) {
+        params->seed = random_std_normal_params->seed();
+        params->seed2 = random_std_normal_params->seed2();
+      }
+      *builtin_data = params.release();
+      return kTfLiteOk;
+    }
+    case BuiltinOperator_BUCKETIZE: {
+      auto params = safe_allocator.Allocate<TfLiteBucketizeParams>();
+      TF_LITE_ENSURE(error_reporter, params != nullptr);
+      if (const auto* bucketize_params =
+              op->builtin_options_as_BucketizeOptions()) {
+        const flatbuffers::Vector<float>* boundaries =
+            bucketize_params->boundaries();
+        if (boundaries == nullptr) {
+          TF_LITE_REPORT_ERROR(
+              error_reporter,
+              "boundaries array not provided for operation 'bucketize'.\n");
+          return kTfLiteError;
+        }
+        params->num_boundaries = boundaries->size();
+        if (boundaries->data() == nullptr) {
+          TF_LITE_REPORT_ERROR(error_reporter,
+                               "boundaries.data() returned nullptr for "
+                               "operation 'bucketize'.\n");
+          return kTfLiteError;
+        }
+        params->boundaries = boundaries->data();
+      }
+      *builtin_data = params.release();
+      return kTfLiteOk;
+    }
+    case BuiltinOperator_RANDOM_UNIFORM: {
+      auto params = safe_allocator.Allocate<TfLiteRandomParams>();
+      TF_LITE_ENSURE(error_reporter, params != nullptr);
+      if (const auto* random_uniform_params =
+              op->builtin_options_as_RandomOptions()) {
+        params->seed = random_uniform_params->seed();
+        params->seed2 = random_uniform_params->seed2();
+      }
+      *builtin_data = params.release();
+      return kTfLiteOk;
+    }
+    case BuiltinOperator_GELU: {
+      auto params = safe_allocator.Allocate<TfLiteGeluParams>();
+      TF_LITE_ENSURE(error_reporter, params != nullptr);
+      if (const auto* gelu_params = op->builtin_options_as_GeluOptions()) {
+        params->approximate = gelu_params->approximate();
+      }
+      *builtin_data = params.release();
+      return kTfLiteOk;
+    }
     // Below are the ops with no builtin_data structure.
     // TODO(aselle): Implement call in BuiltinOptions, but nullptrs are
     // ok for now, since there is no call implementation either.
     case BuiltinOperator_CALL:
+    case BuiltinOperator_COMPLEX_ABS:
     case BuiltinOperator_CONCAT_EMBEDDINGS:
     case BuiltinOperator_COS:
     case BuiltinOperator_CUSTOM:
+    case BuiltinOperator_DENSIFY:
+    case BuiltinOperator_DYNAMIC_UPDATE_SLICE:
     case BuiltinOperator_EMBEDDING_LOOKUP:
     case BuiltinOperator_EQUAL:
+    case BuiltinOperator_HASHTABLE_FIND:
+    case BuiltinOperator_HASHTABLE_IMPORT:
+    case BuiltinOperator_HASHTABLE_SIZE:
+    case BuiltinOperator_IMAG:
     case BuiltinOperator_MATRIX_DIAG:
     case BuiltinOperator_MATRIX_SET_DIAG:
+    case BuiltinOperator_NON_MAX_SUPPRESSION_V4:
+    case BuiltinOperator_NON_MAX_SUPPRESSION_V5:
     case BuiltinOperator_RELU_N1_TO_1:
+    case BuiltinOperator_RELU_0_TO_1:
+    case BuiltinOperator_SCATTER_ND:
     case BuiltinOperator_SELECT:
-    case BuiltinOperator_SELECT_V2:
     case BuiltinOperator_SLICE:
     case BuiltinOperator_TILE:
     case BuiltinOperator_TOPK_V2:
     case BuiltinOperator_TRANSPOSE:
     case BuiltinOperator_RANGE:
-    case BuiltinOperator_SQUARED_DIFFERENCE:
-    case BuiltinOperator_REVERSE_V2:
-    case BuiltinOperator_WHERE:
     case BuiltinOperator_RANK:
-    case BuiltinOperator_NON_MAX_SUPPRESSION_V4:
-    case BuiltinOperator_NON_MAX_SUPPRESSION_V5:
-    case BuiltinOperator_SCATTER_ND:
-    case BuiltinOperator_DENSIFY:
-    case BuiltinOperator_SEGMENT_SUM:
-    case BuiltinOperator_BROADCAST_TO:
-    case BuiltinOperator_RFFT2D:
-    case BuiltinOperator_IMAG:
     case BuiltinOperator_REAL:
-    case BuiltinOperator_COMPLEX_ABS:
-    case BuiltinOperator_HASHTABLE_FIND:
-    case BuiltinOperator_HASHTABLE_IMPORT:
-    case BuiltinOperator_HASHTABLE_SIZE:
+    case BuiltinOperator_RFFT2D:
+    case BuiltinOperator_SEGMENT_SUM:
+    case BuiltinOperator_REVERSE_V2:
+    case BuiltinOperator_UNSORTED_SEGMENT_MAX:
+    case BuiltinOperator_UNSORTED_SEGMENT_MIN:
+    case BuiltinOperator_UNSORTED_SEGMENT_PROD:
+    case BuiltinOperator_UNSORTED_SEGMENT_SUM:
+    case BuiltinOperator_ATAN2:
+    case BuiltinOperator_SIGN:
+    case BuiltinOperator_WHERE:
       return kTfLiteOk;
     case BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES:
       return kTfLiteError;
@@ -849,6 +911,9 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
     case TensorType_INT16:
       *type = kTfLiteInt16;
       return kTfLiteOk;
+    case TensorType_UINT16:
+      *type = kTfLiteUInt16;
+      return kTfLiteOk;
     case TensorType_INT32:
       *type = kTfLiteInt32;
       return kTfLiteOk;
@@ -885,6 +950,9 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
     case TensorType_VARIANT:
       *type = kTfLiteVariant;
       return kTfLiteOk;
+    case TensorType_INT4:
+      *type = kTfLiteInt4;
+      return kTfLiteOk;
     default:
       *type = kTfLiteNoType;
       TF_LITE_REPORT_ERROR(error_reporter,
@@ -981,6 +1049,14 @@ TfLiteStatus ParseArgMin(const Operator* op, ErrorReporter* error_reporter,
   return kTfLiteOk;
 }
 
+// We have this parse function instead of directly returning kTfLiteOk from the
+// switch-case in ParseOpData because this function is used as part of the
+// selective registration for the OpResolver implementation in micro.
+TfLiteStatus ParseAssignVariable(const Operator*, ErrorReporter*,
+                                 BuiltinDataAllocator*, void**) {
+  return kTfLiteOk;
+}
+
 // We have this parse function instead of directly returning kTfLiteOk from the
 // switch-case in ParseOpData because this function is used as part of the
 // selective registration for the OpResolver implementation in micro.
@@ -1010,6 +1086,49 @@ TfLiteStatus ParseBatchToSpaceNd(const Operator*, ErrorReporter*,
   return kTfLiteOk;
 }
 
+// We have this parse function instead of directly returning kTfLiteOk from the
+// switch-case in ParseOpData because this function is used as part of the
+// selective registration for the OpResolver implementation in micro.
+TfLiteStatus ParseBroadcastArgs(const Operator*, ErrorReporter*,
+                                BuiltinDataAllocator*, void**) {
+  return kTfLiteOk;
+}
+
+// We have this parse function instead of directly returning kTfLiteOk from the
+// switch-case in ParseOpData because this function is used as part of the
+// selective registration for the OpResolver implementation in micro.
+TfLiteStatus ParseBroadcastTo(const Operator*, ErrorReporter*,
+                              BuiltinDataAllocator*, void**) {
+  return kTfLiteOk;
+}
+
+TfLiteStatus ParseCallOnce(const Operator* op, ErrorReporter* error_reporter,
+                           BuiltinDataAllocator* allocator,
+                           void** builtin_data) {
+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
+
+  SafeBuiltinDataAllocator safe_allocator(allocator);
+  std::unique_ptr<TfLiteCallOnceParams,
+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
+      params = safe_allocator.Allocate<TfLiteCallOnceParams>();
+  TF_LITE_ENSURE(error_reporter, params != nullptr);
+
+  const CallOnceOptions* schema_params =
+      op->builtin_options_as_CallOnceOptions();
+
+  if (schema_params != nullptr) {
+    params->init_subgraph_index = schema_params->init_subgraph_index();
+
+  } else {
+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
+    // reasonable defaults in the params struct. We are not doing so until we
+    // better undertand the ramifications of changing the legacy behavior.
+  }
+
+  *builtin_data = params.release();
+  return kTfLiteOk;
+}
+
 // We have this parse function instead of directly returning kTfLiteOk from the
 // switch-case in ParseOpData because this function is used as part of the
 // selective registration for the OpResolver implementation in micro.
@@ -1388,6 +1507,30 @@ TfLiteStatus ParseImag(const Operator*, ErrorReporter*,
   return kTfLiteOk;
 }
 
+TfLiteStatus ParseIf(const Operator* op, ErrorReporter* error_reporter,
+                     BuiltinDataAllocator* allocator, void** builtin_data) {
+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
+
+  SafeBuiltinDataAllocator safe_allocator(allocator);
+  std::unique_ptr<TfLiteIfParams, SafeBuiltinDataAllocator::BuiltinDataDeleter>
+      params = safe_allocator.Allocate<TfLiteIfParams>();
+  TF_LITE_ENSURE(error_reporter, params != nullptr);
+
+  const IfOptions* schema_params = op->builtin_options_as_IfOptions();
+
+  if (schema_params != nullptr) {
+    params->then_subgraph_index = schema_params->then_subgraph_index();
+    params->else_subgraph_index = schema_params->else_subgraph_index();
+  } else {
+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
+    // reasonable defaults in the params struct. We are not doing so until we
+    // better undertand the ramifications of changing the legacy behavior.
+  }
+
+  *builtin_data = params.release();
+  return kTfLiteOk;
+}
+
 TfLiteStatus ParseL2Normalization(const Operator* op,
                                   ErrorReporter* error_reporter,
                                   BuiltinDataAllocator* allocator,
@@ -1495,6 +1638,40 @@ TfLiteStatus ParseLogSoftmax(const Operator*, ErrorReporter*,
   return kTfLiteOk;
 }
 
+TfLiteStatus ParseLSTM(const Operator* op, ErrorReporter* error_reporter,
+                       BuiltinDataAllocator* allocator, void** builtin_data) {
+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
+
+  SafeBuiltinDataAllocator safe_allocator(allocator);
+  auto params = safe_allocator.Allocate<TfLiteLSTMParams>();
+  TF_LITE_ENSURE(error_reporter, params != nullptr);
+  if (const auto* lstm_params = op->builtin_options_as_LSTMOptions()) {
+    params->activation =
+        ConvertActivation(lstm_params->fused_activation_function());
+    params->cell_clip = lstm_params->cell_clip();
+    params->proj_clip = lstm_params->proj_clip();
+    switch (lstm_params->kernel_type()) {
+      case LSTMKernelType_FULL:
+        params->kernel_type = kTfLiteLSTMFullKernel;
+        break;
+      case LSTMKernelType_BASIC:
+        params->kernel_type = kTfLiteLSTMBasicKernel;
+        break;
+      default:
+        TF_LITE_REPORT_ERROR(error_reporter, "Unhandled LSTM kernel type: %d",
+                             lstm_params->kernel_type());
+        return kTfLiteError;
+    }
+    params->asymmetric_quantize_inputs =
+        lstm_params->asymmetric_quantize_inputs();
+  } else {
+    TF_LITE_REPORT_ERROR(error_reporter, "No valid LSTM builtin options exist");
+    return kTfLiteError;
+  }
+  *builtin_data = params.release();
+  return kTfLiteOk;
+}
+
 // We have this parse function instead of directly returning kTfLiteOk from the
 // switch-case in ParseOpData because this function is used as part of the
 // selective registration for the OpResolver implementation in micro.
@@ -1511,6 +1688,32 @@ TfLiteStatus ParseMinimum(const Operator*, ErrorReporter*,
   return kTfLiteOk;
 }
 
+TfLiteStatus ParseMirrorPad(const Operator* op, ErrorReporter* error_reporter,
+                            BuiltinDataAllocator* allocator,
+                            void** builtin_data) {
+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
+
+  SafeBuiltinDataAllocator safe_allocator(allocator);
+  std::unique_ptr<TfLiteMirrorPaddingParams,
+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
+      params = safe_allocator.Allocate<TfLiteMirrorPaddingParams>();
+  TF_LITE_ENSURE(error_reporter, params != nullptr);
+
+  const MirrorPadOptions* schema_params =
+      op->builtin_options_as_MirrorPadOptions();
+
+  if (schema_params != nullptr) {
+    params->mode = ConvertMirrorPadding(schema_params->mode());
+  } else {
+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
+    // reasonable defaults in the params struct. We are not doing so until we
+    // better undertand the ramifications of changing the legacy behavior.
+  }
+
+  *builtin_data = params.release();
+  return kTfLiteOk;
+}
+
 TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter,
                       BuiltinDataAllocator* allocator, void** builtin_data) {
   CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
@@ -1654,6 +1857,14 @@ TfLiteStatus ParseReal(const Operator*, ErrorReporter*,
   return kTfLiteOk;
 }
 
+// We have this parse function instead of directly returning kTfLiteOk from the
+// switch-case in ParseOpData because this function is used as part of the
+// selective registration for the OpResolver implementation in micro.
+TfLiteStatus ParseReadVariable(const Operator*, ErrorReporter*,
+                               BuiltinDataAllocator*, void**) {
+  return kTfLiteOk;
+}
+
 TfLiteStatus ParseReducer(const Operator* op, ErrorReporter* error_reporter,
                           BuiltinDataAllocator* allocator,
                           void** builtin_data) {
@@ -1811,6 +2022,22 @@ TfLiteStatus ParseRsqrt(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
   return kTfLiteOk;
 }
 
+// We have this parse function instead of directly returning kTfLiteOk from the
+// switch-case in ParseOpData because this function is used as part of the
+// selective registration for the OpResolver implementation in micro.
+TfLiteStatus ParseSelect(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
+                         void**) {
+  return kTfLiteOk;
+}
+
+// We have this parse function instead of directly returning kTfLiteOk from the
+// switch-case in ParseOpData because this function is used as part of the
+// selective registration for the OpResolver implementation in micro.
+TfLiteStatus ParseSelectV2(const Operator*, ErrorReporter*,
+                           BuiltinDataAllocator*, void**) {
+  return kTfLiteOk;
+}
+
 TfLiteStatus ParseShape(const Operator* op, ErrorReporter* error_reporter,
                         BuiltinDataAllocator* allocator, void** builtin_data) {
   SafeBuiltinDataAllocator safe_allocator(allocator);
@@ -1837,19 +2064,14 @@ TfLiteStatus ParseShape(const Operator* op, ErrorReporter* error_reporter,
 // We have this parse function instead of directly returning kTfLiteOk from the
 // switch-case in ParseOpData because this function is used as part of the
 // selective registration for the OpResolver implementation in micro.
-TfLiteStatus ParseSelect(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
-                         void**) {
+TfLiteStatus ParseSin(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
+                      void**) {
   return kTfLiteOk;
 }
 
 // We have this parse function instead of directly returning kTfLiteOk from the
 // switch-case in ParseOpData because this function is used as part of the
 // selective registration for the OpResolver implementation in micro.
-TfLiteStatus ParseSin(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
-                      void**) {
-  return kTfLiteOk;
-}
-
 TfLiteStatus ParseSlice(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
                         void**) {
   return kTfLiteOk;
@@ -1961,11 +2183,28 @@ TfLiteStatus ParseSplitV(const Operator* op, ErrorReporter* error_reporter,
   return kTfLiteOk;
 }
 
-// We have this parse function instead of directly returning kTfLiteOk from the
-// switch-case in ParseOpData because this function is used as part of the
-// selective registration for the OpResolver implementation in micro.
-TfLiteStatus ParseSquaredDifference(const Operator*, ErrorReporter*,
-                                    BuiltinDataAllocator*, void**) {
+TfLiteStatus ParseUnidirectionalSequenceLSTM(const Operator* op,
+                                             ErrorReporter* error_reporter,
+                                             BuiltinDataAllocator* allocator,
+                                             void** builtin_data) {
+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
+  SafeBuiltinDataAllocator safe_allocator(allocator);
+  auto params =
+      safe_allocator.Allocate<TfLiteUnidirectionalSequenceLSTMParams>();
+  TF_LITE_ENSURE(error_reporter, params != nullptr);
+  if (const auto* seq_lstm_params =
+          op->builtin_options_as_UnidirectionalSequenceLSTMOptions()) {
+    params->activation =
+        ConvertActivation(seq_lstm_params->fused_activation_function());
+    params->cell_clip = seq_lstm_params->cell_clip();
+    params->proj_clip = seq_lstm_params->proj_clip();
+    params->time_major = seq_lstm_params->time_major();
+    params->asymmetric_quantize_inputs =
+        seq_lstm_params->asymmetric_quantize_inputs();
+    params->diagonal_recurrent_tensors =
+        seq_lstm_params->diagonal_recurrent_tensors();
+  }
+  *builtin_data = params.release();
   return kTfLiteOk;
 }
 
@@ -2018,6 +2257,14 @@ TfLiteStatus ParseSquare(const Operator*, ErrorReporter*, BuiltinDataAllocator*,
   return kTfLiteOk;
 }
 
+// We have this parse function instead of directly returning kTfLiteOk from the
+// switch-case in ParseOpData because this function is used as part of the
+// selective registration for the OpResolver implementation in micro.
+TfLiteStatus ParseSquaredDifference(const Operator*, ErrorReporter*,
+                                    BuiltinDataAllocator*, void**) {
+  return kTfLiteOk;
+}
+
 TfLiteStatus ParseStridedSlice(const Operator* op,
                                ErrorReporter* error_reporter,
                                BuiltinDataAllocator* allocator,
@@ -2134,6 +2381,9 @@ TfLiteStatus ParseTransposeConv(const Operator* op,
     params->padding = ConvertPadding(transpose_conv_params->padding());
     params->stride_width = transpose_conv_params->stride_w();
     params->stride_height = transpose_conv_params->stride_h();
+
+    params->activation =
+        ConvertActivation(transpose_conv_params->fused_activation_function());
   } else {
     // TODO(b/157480169): We should either return kTfLiteError or fill in some
     // reasonable defaults in the params struct. We are not doing so until we
@@ -2168,6 +2418,62 @@ TfLiteStatus ParseUnpack(const Operator* op, ErrorReporter* error_reporter,
   return kTfLiteOk;
 }
 
+TfLiteStatus ParseVarHandle(const Operator* op, ErrorReporter* error_reporter,
+                            BuiltinDataAllocator* allocator,
+                            void** builtin_data) {
+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
+
+  SafeBuiltinDataAllocator safe_allocator(allocator);
+  std::unique_ptr<TfLiteVarHandleParams,
+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
+      params = safe_allocator.Allocate<TfLiteVarHandleParams>();
+  TF_LITE_ENSURE(error_reporter, params != nullptr);
+
+  const VarHandleOptions* schema_params =
+      op->builtin_options_as_VarHandleOptions();
+
+  if (schema_params != nullptr) {
+    if (schema_params->container()) {
+      params->container = schema_params->container()->c_str();
+    }
+    if (schema_params->shared_name()) {
+      params->shared_name = schema_params->shared_name()->c_str();
+    }
+  } else {
+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
+    // reasonable defaults in the params struct. We are not doing so until we
+    // better undertand the ramifications of changing the legacy behavior.
+  }
+
+  *builtin_data = params.release();
+  return kTfLiteOk;
+}
+
+TfLiteStatus ParseWhile(const Operator* op, ErrorReporter* error_reporter,
+                        BuiltinDataAllocator* allocator, void** builtin_data) {
+  CheckParsePointerParams(op, error_reporter, allocator, builtin_data);
+
+  SafeBuiltinDataAllocator safe_allocator(allocator);
+  std::unique_ptr<TfLiteWhileParams,
+                  SafeBuiltinDataAllocator::BuiltinDataDeleter>
+      params = safe_allocator.Allocate<TfLiteWhileParams>();
+  TF_LITE_ENSURE(error_reporter, params != nullptr);
+
+  const WhileOptions* schema_params = op->builtin_options_as_WhileOptions();
+
+  if (schema_params != nullptr) {
+    params->cond_subgraph_index = schema_params->cond_subgraph_index();
+    params->body_subgraph_index = schema_params->body_subgraph_index();
+  } else {
+    // TODO(b/157480169): We should either return kTfLiteError or fill in some
+    // reasonable defaults in the params struct. We are not doing so until we
+    // better undertand the ramifications of changing the legacy behavior.
+  }
+
+  *builtin_data = params.release();
+  return kTfLiteOk;
+}
+
 // We have this parse function instead of directly returning kTfLiteOk from the
 // switch-case in ParseOpData because this function is used as part of the
 // selective registration for the OpResolver implementation in micro.
diff --git a/edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h b/edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h
index 8163dd6..b8e6019 100644
--- a/edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h
+++ b/edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,8 +23,8 @@ limitations under the License.
 #include <new>
 #include <type_traits>
 
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
 
 namespace tflite {
@@ -84,6 +84,11 @@ TfLiteStatus ParseArgMax(const Operator* op, ErrorReporter* error_reporter,
 TfLiteStatus ParseArgMin(const Operator* op, ErrorReporter* error_reporter,
                          BuiltinDataAllocator* allocator, void** builtin_data);
 
+TfLiteStatus ParseAssignVariable(const Operator* op,
+                                 ErrorReporter* error_reporter,
+                                 BuiltinDataAllocator* allocator,
+                                 void** builtin_data);
+
 TfLiteStatus ParseBatchMatMul(const Operator* op, ErrorReporter* error_reporter,
                               BuiltinDataAllocator* allocator,
                               void** builtin_data);
@@ -93,6 +98,19 @@ TfLiteStatus ParseBatchToSpaceNd(const Operator* op,
                                  BuiltinDataAllocator* allocator,
                                  void** builtin_data);
 
+TfLiteStatus ParseBroadcastArgs(const Operator* op,
+                                ErrorReporter* error_reporter,
+                                BuiltinDataAllocator* allocator,
+                                void** builtin_data);
+
+TfLiteStatus ParseBroadcastTo(const Operator* op, ErrorReporter* error_reporter,
+                              BuiltinDataAllocator* allocator,
+                              void** builtin_data);
+
+TfLiteStatus ParseCallOnce(const Operator* op, ErrorReporter* error_reporter,
+                           BuiltinDataAllocator* allocator,
+                           void** builtin_data);
+
 TfLiteStatus ParseCeil(const Operator* op, ErrorReporter* error_reporter,
                        BuiltinDataAllocator* allocator, void** builtin_data);
 
@@ -187,6 +205,9 @@ TfLiteStatus ParseHardSwish(const Operator* op, ErrorReporter* error_reporter,
 TfLiteStatus ParseImag(const Operator* op, ErrorReporter* error_reporter,
                        BuiltinDataAllocator* allocator, void** builtin_data);
 
+TfLiteStatus ParseIf(const Operator* op, ErrorReporter* error_reporter,
+                     BuiltinDataAllocator* allocator, void** builtin_data);
+
 TfLiteStatus ParseL2Normalization(const Operator* op,
                                   ErrorReporter* error_reporter,
                                   BuiltinDataAllocator* allocator,
@@ -226,12 +247,19 @@ TfLiteStatus ParseLogSoftmax(const Operator* op, ErrorReporter* error_reporter,
                              BuiltinDataAllocator* allocator,
                              void** builtin_data);
 
+TfLiteStatus ParseLSTM(const Operator* op, ErrorReporter* error_reporter,
+                       BuiltinDataAllocator* allocator, void** builtin_data);
+
 TfLiteStatus ParseMaximum(const Operator* op, ErrorReporter* error_reporter,
                           BuiltinDataAllocator* allocator, void** builtin_data);
 
 TfLiteStatus ParseMinimum(const Operator* op, ErrorReporter* error_reporter,
                           BuiltinDataAllocator* allocator, void** builtin_data);
 
+TfLiteStatus ParseMirrorPad(const Operator* op, ErrorReporter* error_reporter,
+                            BuiltinDataAllocator* allocator,
+                            void** builtin_data);
+
 TfLiteStatus ParseMul(const Operator* op, ErrorReporter* error_reporter,
                       BuiltinDataAllocator* allocator, void** builtin_data);
 
@@ -267,6 +295,11 @@ TfLiteStatus ParseQuantize(const Operator* op, ErrorReporter* error_reporter,
 TfLiteStatus ParseReal(const Operator* op, ErrorReporter* error_reporter,
                        BuiltinDataAllocator* allocator, void** builtin_data);
 
+TfLiteStatus ParseReadVariable(const Operator* op,
+                               ErrorReporter* error_reporter,
+                               BuiltinDataAllocator* allocator,
+                               void** builtin_data);
+
 TfLiteStatus ParseReducer(const Operator* op, ErrorReporter* error_reporter,
                           BuiltinDataAllocator* allocator, void** builtin_data);
 
@@ -301,6 +334,10 @@ TfLiteStatus ParseRsqrt(const Operator* op, ErrorReporter* error_reporter,
 TfLiteStatus ParseSelect(const Operator* op, ErrorReporter* error_reporter,
                          BuiltinDataAllocator* allocator, void** builtin_data);
 
+TfLiteStatus ParseSelectV2(const Operator* op, ErrorReporter* error_reporter,
+                           BuiltinDataAllocator* allocator,
+                           void** builtin_data);
+
 TfLiteStatus ParseShape(const Operator* op, ErrorReporter* error_reporter,
                         BuiltinDataAllocator* allocator, void** builtin_data);
 
@@ -329,9 +366,6 @@ TfLiteStatus ParseSplit(const Operator* op, ErrorReporter* error_reporter,
 TfLiteStatus ParseSplitV(const Operator* op, ErrorReporter* error_reporter,
                          BuiltinDataAllocator* allocator, void** builtin_data);
 
-TfLiteStatus ParseSquaredDifference(const Operator* op, ErrorReporter* error_reporter,
-                                    BuiltinDataAllocator* allocator, void** builtin_data);
-
 TfLiteStatus ParseSqueeze(const Operator* op, ErrorReporter* error_reporter,
                           BuiltinDataAllocator* allocator, void** builtin_data);
 
@@ -341,6 +375,11 @@ TfLiteStatus ParseSqrt(const Operator* op, ErrorReporter* error_reporter,
 TfLiteStatus ParseSquare(const Operator* op, ErrorReporter* error_reporter,
                          BuiltinDataAllocator* allocator, void** builtin_data);
 
+TfLiteStatus ParseSquaredDifference(const Operator* op,
+                                    ErrorReporter* error_reporter,
+                                    BuiltinDataAllocator* allocator,
+                                    void** builtin_data);
+
 TfLiteStatus ParseStridedSlice(const Operator* op,
                                ErrorReporter* error_reporter,
                                BuiltinDataAllocator* allocator,
@@ -367,6 +406,18 @@ TfLiteStatus ParseTransposeConv(const Operator* op,
 TfLiteStatus ParseUnpack(const Operator* op, ErrorReporter* error_reporter,
                          BuiltinDataAllocator* allocator, void** builtin_data);
 
+TfLiteStatus ParseUnidirectionalSequenceLSTM(const Operator* op,
+                                             ErrorReporter* error_reporter,
+                                             BuiltinDataAllocator* allocator,
+                                             void** builtin_data);
+
+TfLiteStatus ParseVarHandle(const Operator* op, ErrorReporter* error_reporter,
+                            BuiltinDataAllocator* allocator,
+                            void** builtin_data);
+
+TfLiteStatus ParseWhile(const Operator* op, ErrorReporter* error_reporter,
+                        BuiltinDataAllocator* allocator, void** builtin_data);
+
 TfLiteStatus ParseZerosLike(const Operator* op, ErrorReporter* error_reporter,
                             BuiltinDataAllocator* allocator,
                             void** builtin_data);
diff --git a/edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.cc b/edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.cc
index 66ddcc5..bb2e080 100644
--- a/edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.cc
+++ b/edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h"
 
 #include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"  // from @flatbuffers
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_utils.h"
 
 namespace tflite {
@@ -30,8 +30,7 @@ TfLiteStatus GetRegistrationFromOpCode(
   auto builtin_code = GetBuiltinCode(opcode);
   int version = opcode->version();
 
-  if (builtin_code > BuiltinOperator_MAX ||
-      builtin_code < BuiltinOperator_MIN) {
+  if (builtin_code > BuiltinOperator_MAX) {
     TF_LITE_REPORT_ERROR(
         error_reporter,
         "Op builtin_code out of range: %d. Are you using old TFLite binary "
@@ -44,7 +43,8 @@ TfLiteStatus GetRegistrationFromOpCode(
       TF_LITE_REPORT_ERROR(
           error_reporter,
           "Didn't find op for builtin opcode '%s' version '%d'. "
-          "This model is not supported by EON Compiler of TensorFlow Lite Micro, but is in full TFLite (e.g. on Linux).\n",
+          "This model is not supported by EON Compiler of TensorFlow Lite Micro,",
+          "but is in full TFLite (e.g. on Linux).\n",
           EnumNameBuiltinOperator(builtin_code), version);
       status = kTfLiteError;
     }
diff --git a/edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h b/edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h
index b87548d..75fc5d0 100644
--- a/edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h
+++ b/edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h
@@ -15,11 +15,12 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
 #define TENSORFLOW_LITE_CORE_API_OP_RESOLVER_H_
 
+#include <functional>
 #include <memory>
 #include <vector>
 
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
 
 namespace tflite {
@@ -36,16 +37,83 @@ class OpResolver {
   virtual const TfLiteRegistration* FindOp(const char* op,
                                            int version) const = 0;
 
+  // Represents a sequence of delegates.
+  using TfLiteDelegatePtrVector =
+      std::vector<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>>;
+
   // Returns optional delegates for resolving and handling ops in the flatbuffer
   // model. This may be used in addition to the standard TfLiteRegistration
   // lookup for graph resolution.
-  using TfLiteDelegatePtrVector =
-      std::vector<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>>;
+  // WARNING: This API is deprecated, GetDelegateCreators is preferred.
   virtual TfLiteDelegatePtrVector GetDelegates(int num_threads) const {
-    return TfLiteDelegatePtrVector();
+    return {};
+  }
+
+  // Represents a function that creates a TfLite delegate instance.
+  using TfLiteDelegateCreator =
+      std::function<std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>(
+          TfLiteContext* /*context*/)>;
+
+  // Represents a sequence of delegate creator functions.
+  using TfLiteDelegateCreators = std::vector<TfLiteDelegateCreator>;
+
+  // Returns a vector of delegate creators to create optional delegates for
+  // resolving and handling ops in the flatbuffer model. This may be used in
+  // addition to the standard TfLiteRegistration lookup for graph resolution.
+  //
+  // Note that this method is not used (will not be called) if you are using
+  // TF Lite in Google Play Services; the GetOpaqueDelegateCreators method
+  // (see below) is used for that case.
+  virtual TfLiteDelegateCreators GetDelegateCreators() const { return {}; }
+
+  // TODO(b/202712825): it would be nice if we could avoid the need for separate
+  // "opaque" types & methods for use only with TF Lite in Google Play Services.
+
+  // Represents an opaque delegate instance.
+  // WARNING: Experimental interface, subject to change.
+  using TfLiteOpaqueDelegatePtr =
+      std::unique_ptr<TfLiteOpaqueDelegate, void (*)(TfLiteOpaqueDelegate*)>;
+
+  // Represents a function that creates an opaque delegate instance.
+  // WARNING: Experimental interface, subject to change.
+  using TfLiteOpaqueDelegateCreator =
+      std::function<TfLiteOpaqueDelegatePtr(int /*num_threads*/)>;
+
+  // Represents a sequence of opaque delegate creator functions.
+  // WARNING: Experimental interface, subject to change.
+  using TfLiteOpaqueDelegateCreators = std::vector<TfLiteOpaqueDelegateCreator>;
+
+  // Returns a vector of opaque delegate creators to create optional opaque
+  // delegates for resolving and handling ops in the flatbuffer model. This may
+  // be used in addition to the standard TfLiteRegistration lookup for graph
+  // resolution.
+  //
+  // Note that this method will be called only if you are using TF Lite in
+  // Google Play Services; if you are using regular TF Lite, GetDelegateCreators
+  // (see above) is used instead.
+  //
+  // WARNING: Experimental interface, subject to change.
+  virtual TfLiteOpaqueDelegateCreators GetOpaqueDelegateCreators() const {
+    return {};
   }
 
   virtual ~OpResolver() {}
+
+ private:
+  /// Returns true if this OpResolver may contain any "user defined" ops.
+  /// By "user defined" ops, we mean any op definitions other than those
+  /// contained in tflite::ops::builtin::BuiltinOpResolver.
+  ///
+  /// If this method returns true, it doesn't necessarily mean that the
+  /// OpResolver contains a user-defined op, just that the absence of
+  /// user-defined ops can't be guaranteed.
+  ///
+  /// Note that "user-defined" ops are not the same as "custom" ops;
+  /// BuiltinOpResolver may support certain "custom" ops, in addition to
+  /// "builtin" ops, and may not support all of the "builtin" op enum values.
+  virtual bool MayContainUserDefinedOps() const { return true; }
+
+  friend class OpResolverInternal;
 };
 
 // Handles the logic for converting between an OperatorCode structure extracted
diff --git a/edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.cc b/edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.cc
index 4288daf..b62d50c 100644
--- a/edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.cc
+++ b/edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.cc
@@ -17,7 +17,7 @@ limitations under the License.
 
 #include <string.h>
 
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
 
 namespace tflite {
 
diff --git a/edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.h b/edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.h
index 76d7545..608128a 100644
--- a/edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.h
+++ b/edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
 #define TENSORFLOW_LITE_CORE_API_TENSOR_UTILS_H_
 
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
 
 namespace tflite {
 
diff --git a/edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h b/edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h
new file mode 100644
index 0000000..3a1ee0e
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h
@@ -0,0 +1,537 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+/// WARNING: Users of TensorFlow Lite should not include this file directly,
+/// but should instead include
+/// "third_party/tensorflow/lite/c/builtin_op_data.h".
+/// Only the TensorFlow Lite implementation itself should include this
+/// file directly.
+#ifndef TENSORFLOW_LITE_CORE_C_BUILTIN_OP_DATA_H_
+#define TENSORFLOW_LITE_CORE_C_BUILTIN_OP_DATA_H_
+
+#include <stdint.h>
+
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+// TfLiteReshapeParams can't have dynamic data so we fix the maximum possible
+// number of dimensions.
+#define TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT 8
+
+// TODO(aselle): Consider using "if this then that" for testing.
+
+// Useful placeholder to put in otherwise empty structs to avoid size warnings.
+typedef struct {
+  char dummy;
+} EmptyStructPlaceholder;
+
+// IMPORTANT: All new members of structs must be added at the end to ensure
+// backwards compatibility.
+
+// Possible padding types (for convolutions)
+typedef enum {
+  kTfLitePaddingUnknown = 0,
+  kTfLitePaddingSame,
+  kTfLitePaddingValid,
+} TfLitePadding;
+
+typedef enum {
+  kTfLiteMirrorPaddingUnknown = 0,
+  kTfLiteMirrorPaddingReflect,
+  kTfLiteMirrorPaddingSymmetric,
+} TfLiteMirrorPaddingMode;
+
+// TODO(b/130259536): We should move this out of builtin_op_data.
+typedef struct {
+  int width;
+  int height;
+  int width_offset;
+  int height_offset;
+} TfLitePaddingValues;
+
+typedef struct {
+  TfLiteMirrorPaddingMode mode;
+} TfLiteMirrorPaddingParams;
+
+// Possible fused activation functions.
+typedef enum {
+  kTfLiteActNone = 0,
+  kTfLiteActRelu,
+  kTfLiteActReluN1To1,  // min(max(-1, x), 1)
+  kTfLiteActRelu6,      // min(max(0, x), 6)
+  kTfLiteActTanh,
+  kTfLiteActSignBit,
+  kTfLiteActSigmoid,
+} TfLiteFusedActivation;
+
+typedef struct {
+  // Parameters for CONV_2D version 1.
+  TfLitePadding padding;
+  int stride_width;
+  int stride_height;
+  TfLiteFusedActivation activation;
+
+  // Parameters for CONV_2D version 2.
+  // Note: Version 2 supports dilation values not equal to 1.
+  int dilation_width_factor;
+  int dilation_height_factor;
+} TfLiteConvParams;
+
+typedef struct {
+  TfLitePadding padding;
+  int stride_width;
+  int stride_height;
+  int stride_depth;
+  int dilation_width_factor;
+  int dilation_height_factor;
+  int dilation_depth_factor;
+  TfLiteFusedActivation activation;
+} TfLiteConv3DParams;
+
+typedef TfLiteConv3DParams TfLiteConv3DTransposeParams;
+
+typedef struct {
+  TfLitePadding padding;
+  int stride_width;
+  int stride_height;
+  int filter_width;
+  int filter_height;
+  TfLiteFusedActivation activation;
+  struct {
+    TfLitePaddingValues padding;
+  } computed;
+} TfLitePoolParams;
+
+typedef struct {
+  // Parameters for DepthwiseConv version 1 or above.
+  TfLitePadding padding;
+  int stride_width;
+  int stride_height;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  //
+  // The information can be deduced from the shape of input and the shape of
+  // weights. Since the TFLiteConverter toolchain doesn't support partially
+  // specified shapes, relying on `depth_multiplier` stops us from supporting
+  // graphs with dynamic shape tensors.
+  //
+  // Note: Some of the delegates (e.g. NNAPI, GPU) are still relying on this
+  // field.
+  int depth_multiplier;
+  TfLiteFusedActivation activation;
+  // Parameters for DepthwiseConv version 2 or above.
+  int dilation_width_factor;
+  int dilation_height_factor;
+} TfLiteDepthwiseConvParams;
+
+typedef struct {
+  int rank;
+  TfLiteFusedActivation activation;
+
+  // Parameter for SVDF version 4.
+  bool asymmetric_quantize_inputs;
+} TfLiteSVDFParams;
+
+typedef struct {
+  TfLiteFusedActivation activation;
+
+  // Parameter for RNN version 3.
+  bool asymmetric_quantize_inputs;
+} TfLiteRNNParams;
+
+typedef struct {
+  bool time_major;
+  TfLiteFusedActivation activation;
+
+  // Parameter for Sequence RNN version 3.
+  bool asymmetric_quantize_inputs;
+} TfLiteSequenceRNNParams;
+
+typedef struct {
+  bool time_major;
+  TfLiteFusedActivation activation;
+  bool merge_outputs;
+
+  // Parameter for Bidirectional RNN verison 3.
+  bool asymmetric_quantize_inputs;
+} TfLiteBidirectionalSequenceRNNParams;
+
+typedef enum {
+  kTfLiteFullyConnectedWeightsFormatDefault = 0,
+  kTfLiteFullyConnectedWeightsFormatShuffled4x16Int8 = 1,
+} TfLiteFullyConnectedWeightsFormat;
+
+typedef struct {
+  // Parameters for FullyConnected version 1 or above.
+  TfLiteFusedActivation activation;
+
+  // Parameters for FullyConnected version 2 or above.
+  TfLiteFullyConnectedWeightsFormat weights_format;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimensions in the input and the output
+  // tensors are the same. Furthermore, all but the last dimension of the input
+  // and output shapes will be equal.
+  bool keep_num_dims;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true and the weights are quantized, then non constant inputs
+  // are quantized at evaluation time with asymmetric quantization.
+  bool asymmetric_quantize_inputs;
+} TfLiteFullyConnectedParams;
+
+typedef enum {
+  kTfLiteLshProjectionUnknown = 0,
+  kTfLiteLshProjectionSparse = 1,
+  kTfLiteLshProjectionDense = 2,
+} TfLiteLSHProjectionType;
+
+typedef struct {
+  TfLiteLSHProjectionType type;
+} TfLiteLSHProjectionParams;
+
+typedef struct {
+  float beta;
+} TfLiteSoftmaxParams;
+
+typedef struct {
+  int axis;
+  TfLiteFusedActivation activation;
+} TfLiteConcatenationParams;
+
+typedef struct {
+  TfLiteFusedActivation activation;
+  // Parameter added for the version 4.
+  bool pot_scale_int16;
+} TfLiteAddParams;
+
+typedef struct {
+  EmptyStructPlaceholder placeholder;
+} TfLiteSpaceToBatchNDParams;
+
+typedef struct {
+  EmptyStructPlaceholder placeholder;
+} TfLiteBatchToSpaceNDParams;
+
+typedef struct {
+  bool adj_x;
+  bool adj_y;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true and the weights are quantized, then non constant inputs
+  // are quantized at evaluation time with asymmetric quantization.
+  bool asymmetric_quantize_inputs;
+} TfLiteBatchMatMulParams;
+
+typedef struct {
+  TfLiteFusedActivation activation;
+} TfLiteMulParams;
+
+typedef struct {
+  TfLiteFusedActivation activation;
+  // Parameter added for the version 5.
+  bool pot_scale_int16;
+} TfLiteSubParams;
+
+typedef struct {
+  TfLiteFusedActivation activation;
+} TfLiteDivParams;
+
+typedef struct {
+  TfLiteFusedActivation activation;
+} TfLiteL2NormParams;
+
+typedef struct {
+  int radius;
+  float bias;
+  float alpha;
+  float beta;
+} TfLiteLocalResponseNormParams;
+
+typedef enum {
+  kTfLiteLSTMFullKernel = 0,
+  kTfLiteLSTMBasicKernel
+} TfLiteLSTMKernelType;
+
+typedef struct {
+  // Parameters for LSTM version 1.
+  TfLiteFusedActivation activation;
+  float cell_clip;
+  float proj_clip;
+
+  // Parameters for LSTM version 2.
+  // kTfLiteLSTMBasicKernel is only supported in version 2 or above.
+  TfLiteLSTMKernelType kernel_type;
+
+  // Parameters for LSTM version 4.
+  bool asymmetric_quantize_inputs;
+} TfLiteLSTMParams;
+
+typedef struct {
+  // Parameters needed for the underlying LSTM.
+  TfLiteFusedActivation activation;
+  float cell_clip;
+  float proj_clip;
+
+  // If set to true then the first dimension is time, otherwise batch.
+  bool time_major;
+
+  // Parameter for unidirectional sequence RNN version 3.
+  bool asymmetric_quantize_inputs;
+
+  // Parameter for unidirectional sequence RNN version 4.
+  bool diagonal_recurrent_tensors;
+} TfLiteUnidirectionalSequenceLSTMParams;
+
+typedef struct {
+  // Parameters supported by version 1:
+  // Parameters inherited for the LSTM kernel.
+  TfLiteFusedActivation activation;
+  float cell_clip;
+  float proj_clip;
+
+  // If true, store the outputs of both directions in the first output.
+  bool merge_outputs;
+
+  // Parameters supported by version 2:
+  // If set to true then the first dimension is time, otherwise batch.
+  bool time_major;
+
+  // Parameters supported by version 3:
+  // If set to true, then hybrid ops use asymmetric quantization for inputs.
+  bool asymmetric_quantize_inputs;
+} TfLiteBidirectionalSequenceLSTMParams;
+
+typedef struct {
+  bool align_corners;
+  // half_pixel_centers assumes pixels are of half the actual dimensions, and
+  // yields more accurate resizes. Corresponds to the same argument for the
+  // original TensorFlow op in TF2.0.
+  bool half_pixel_centers;
+} TfLiteResizeBilinearParams;
+
+typedef struct {
+  bool align_corners;
+  bool half_pixel_centers;
+} TfLiteResizeNearestNeighborParams;
+
+typedef struct {
+  EmptyStructPlaceholder placeholder;
+} TfLitePadParams;
+
+typedef struct {
+  EmptyStructPlaceholder placeholder;
+} TfLitePadV2Params;
+
+typedef struct {
+  // These fields are only used in old models for backward compatibility.
+  // In the current implementation, we use the 2nd input of the op as the shape,
+  // and these fields are unused.
+  int shape[TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT];
+  int num_dimensions;
+} TfLiteReshapeParams;
+
+typedef struct {
+  int ngram_size;
+  int max_skip_size;
+  bool include_all_ngrams;
+} TfLiteSkipGramParams;
+
+typedef struct {
+  int block_size;
+} TfLiteSpaceToDepthParams;
+
+typedef struct {
+  int block_size;
+} TfLiteDepthToSpaceParams;
+
+typedef struct {
+  TfLiteType in_data_type;
+  TfLiteType out_data_type;
+} TfLiteCastParams;
+
+typedef enum {
+  kTfLiteCombinerTypeSum = 0,
+  kTfLiteCombinerTypeMean = 1,
+  kTfLiteCombinerTypeSqrtn = 2,
+} TfLiteCombinerType;
+
+typedef struct {
+  TfLiteCombinerType combiner;
+} TfLiteEmbeddingLookupSparseParams;
+
+typedef struct {
+  int axis;
+  int batch_dims;
+} TfLiteGatherParams;
+
+typedef struct {
+  EmptyStructPlaceholder placeholder;
+} TfLiteTransposeParams;
+
+typedef struct {
+  bool keep_dims;
+} TfLiteReducerParams;
+
+typedef struct {
+  int num_splits;
+} TfLiteSplitParams;
+
+typedef struct {
+  int num_splits;
+} TfLiteSplitVParams;
+
+typedef struct {
+  // TODO(ahentz): We can't have dynamic data in this struct, at least not yet.
+  // For now we will fix the maximum possible number of dimensions.
+  int squeeze_dims[8];
+  int num_squeeze_dims;
+} TfLiteSqueezeParams;
+
+typedef struct {
+  int begin_mask;
+  int end_mask;
+  int ellipsis_mask;
+  int new_axis_mask;
+  int shrink_axis_mask;
+} TfLiteStridedSliceParams;
+
+typedef struct {
+  TfLiteType output_type;
+} TfLiteArgMaxParams;
+
+typedef struct {
+  TfLiteType output_type;
+} TfLiteArgMinParams;
+
+typedef struct {
+  // Parameters supported by version 1:
+  TfLitePadding padding;
+  int stride_width;
+  int stride_height;
+
+  // Parameters supported by version 4:
+  TfLiteFusedActivation activation;
+} TfLiteTransposeConvParams;
+
+typedef struct {
+  bool validate_indices;
+} TfLiteSparseToDenseParams;
+
+typedef struct {
+  TfLiteType out_type;
+} TfLiteShapeParams;
+
+typedef struct {
+  EmptyStructPlaceholder placeholder;
+} TfLiteRankParams;
+
+typedef struct {
+  // Parameters supported by version 1:
+  float min;
+  float max;
+  int num_bits;
+
+  // Parameters supported by version 2:
+  bool narrow_range;
+} TfLiteFakeQuantParams;
+
+typedef struct {
+  int values_count;
+  int axis;
+} TfLitePackParams;
+
+typedef struct {
+  int axis;
+} TfLiteOneHotParams;
+
+typedef struct {
+  int num;
+  int axis;
+} TfLiteUnpackParams;
+
+typedef struct {
+  float alpha;
+} TfLiteLeakyReluParams;
+
+typedef struct {
+  TfLiteType index_out_type;
+} TfLiteUniqueParams;
+
+typedef struct {
+  int seq_dim;
+  int batch_dim;
+} TfLiteReverseSequenceParams;
+
+typedef struct {
+  EmptyStructPlaceholder placeholder;
+} TfLiteMatrixDiagParams;
+
+typedef struct {
+  EmptyStructPlaceholder placeholder;
+} TfLiteMatrixSetDiagParams;
+
+typedef struct {
+  int then_subgraph_index;
+  int else_subgraph_index;
+} TfLiteIfParams;
+
+typedef struct {
+  int cond_subgraph_index;
+  int body_subgraph_index;
+} TfLiteWhileParams;
+
+typedef struct {
+  bool exclusive;
+  bool reverse;
+} TfLiteCumsumParams;
+
+typedef struct {
+  int init_subgraph_index;
+} TfLiteCallOnceParams;
+
+typedef struct {
+  int table_id;
+  TfLiteType key_dtype;
+  TfLiteType value_dtype;
+} TfLiteHashtableParams;
+
+typedef struct {
+  const char* container;
+  const char* shared_name;
+} TfLiteVarHandleParams;
+
+typedef struct {
+  int seed;
+  int seed2;
+} TfLiteRandomParams;
+
+typedef struct {
+  int num_boundaries;
+  // This points to the memory stored in the model (flatbuffer),
+  // and is not owned.
+  const float* boundaries;
+} TfLiteBucketizeParams;
+
+typedef struct {
+  bool approximate;
+} TfLiteGeluParams;
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+
+#endif  // TENSORFLOW_LITE_CORE_C_BUILTIN_OP_DATA_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/core/c/c_api_types.h b/edge-impulse-sdk/tensorflow/lite/core/c/c_api_types.h
new file mode 100644
index 0000000..3aab43f
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/core/c/c_api_types.h
@@ -0,0 +1,168 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file declares types used by the pure C inference API defined in c_api.h,
+// some of which are also used in the C++ and C kernel and interpreter APIs.
+
+/// WARNING: Users of TensorFlow Lite should not include this file directly,
+/// but should instead include
+/// "third_party/tensorflow/lite/c/c_api_types.h".
+/// Only the TensorFlow Lite implementation itself should include this
+/// file directly.
+
+#ifndef TENSORFLOW_LITE_CORE_C_C_API_TYPES_H_
+#define TENSORFLOW_LITE_CORE_C_C_API_TYPES_H_
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Define TFL_CAPI_EXPORT macro to export a function properly with a shared
+// library.
+#ifdef SWIG
+#define TFL_CAPI_EXPORT
+#elif defined(TFL_STATIC_LIBRARY_BUILD)
+#define TFL_CAPI_EXPORT
+#else  // not definded TFL_STATIC_LIBRARY_BUILD
+#if defined(_WIN32)
+#ifdef TFL_COMPILE_LIBRARY
+#define TFL_CAPI_EXPORT __declspec(dllexport)
+#else
+#define TFL_CAPI_EXPORT __declspec(dllimport)
+#endif  // TFL_COMPILE_LIBRARY
+#else
+#define TFL_CAPI_EXPORT __attribute__((visibility("default")))
+#endif  // _WIN32
+#endif  // SWIG
+
+// Note that new error status values may be added in future in order to
+// indicate more fine-grained internal states, therefore, applications should
+// not rely on status values being members of the enum.
+typedef enum TfLiteStatus {
+  kTfLiteOk = 0,
+
+  // Generally referring to an error in the runtime (i.e. interpreter)
+  kTfLiteError = 1,
+
+  // Generally referring to an error from a TfLiteDelegate itself.
+  kTfLiteDelegateError = 2,
+
+  // Generally referring to an error in applying a delegate due to
+  // incompatibility between runtime and delegate, e.g., this error is returned
+  // when trying to apply a TF Lite delegate onto a model graph that's already
+  // immutable.
+  kTfLiteApplicationError = 3,
+
+  // Generally referring to serialized delegate data not being found.
+  // See tflite::delegates::Serialization.
+  kTfLiteDelegateDataNotFound = 4,
+
+  // Generally referring to data-writing issues in delegate serialization.
+  // See tflite::delegates::Serialization.
+  kTfLiteDelegateDataWriteError = 5,
+
+  // Generally referring to data-reading issues in delegate serialization.
+  // See tflite::delegates::Serialization.
+  kTfLiteDelegateDataReadError = 6,
+
+  // Generally referring to issues when the TF Lite model has ops that cannot be
+  // resolved at runtime. This could happen when the specific op is not
+  // registered or built with the TF Lite framework.
+  kTfLiteUnresolvedOps = 7,
+
+  // Generally referring to invocation cancelled by the user.
+  // See `interpreter::Cancel`.
+  // TODO(b/194915839): Implement `interpreter::Cancel`.
+  // TODO(b/250636993): Cancellation triggered by `SetCancellationFunction`
+  // should also return this status code.
+  kTfLiteCancelled = 8,
+} TfLiteStatus;
+
+// Types supported by tensor
+typedef enum {
+  kTfLiteNoType = 0,
+  kTfLiteFloat32 = 1,
+  kTfLiteInt32 = 2,
+  kTfLiteUInt8 = 3,
+  kTfLiteInt64 = 4,
+  kTfLiteString = 5,
+  kTfLiteBool = 6,
+  kTfLiteInt16 = 7,
+  kTfLiteComplex64 = 8,
+  kTfLiteInt8 = 9,
+  kTfLiteFloat16 = 10,
+  kTfLiteFloat64 = 11,
+  kTfLiteComplex128 = 12,
+  kTfLiteUInt64 = 13,
+  kTfLiteResource = 14,
+  kTfLiteVariant = 15,
+  kTfLiteUInt32 = 16,
+  kTfLiteUInt16 = 17,
+  kTfLiteInt4 = 18,
+} TfLiteType;
+
+// Legacy. Will be deprecated in favor of TfLiteAffineQuantization.
+// If per-layer quantization is specified this field will still be populated in
+// addition to TfLiteAffineQuantization.
+// Parameters for asymmetric quantization. Quantized values can be converted
+// back to float using:
+//     real_value = scale * (quantized_value - zero_point)
+typedef struct TfLiteQuantizationParams {
+  float scale;
+  int32_t zero_point;
+} TfLiteQuantizationParams;
+
+// --------------------------------------------------------------------------
+// Opaque types used by c_api.h, c_api_opaque.h and common.h.
+
+// TfLiteOpaqueContext is an opaque version of TfLiteContext;
+typedef struct TfLiteOpaqueContext TfLiteOpaqueContext;
+
+// TfLiteOpaqueNode is an opaque version of TfLiteNode;
+typedef struct TfLiteOpaqueNode TfLiteOpaqueNode;
+
+// TfLiteOpaqueTensor is an opaque version of TfLiteTensor;
+typedef struct TfLiteOpaqueTensor TfLiteOpaqueTensor;
+
+// TfLiteDelegate: allows delegation of nodes to alternative backends.
+// Forward declaration of concrete type declared in common.h.
+typedef struct TfLiteDelegate TfLiteDelegate;
+
+// TfLiteOpaqueDelegateStruct: unconditionally opaque version of
+// TfLiteDelegate; allows delegation of nodes to alternative backends.
+//
+// This is an abstract type that is intended to have the same
+// role as TfLiteDelegate, but without exposing the implementation
+// details of how delegates are implemented.
+// WARNING: This is an experimental type and subject to change.
+typedef struct TfLiteOpaqueDelegateStruct TfLiteOpaqueDelegateStruct;
+
+// TfLiteOpaqueDelegate: conditionally opaque version of
+// TfLiteDelegate; allows delegation of nodes to alternative backends.
+// For TF Lite in Play Services, this is an opaque type,
+// but for regular TF Lite, this is just a typedef for TfLiteDelegate.
+// WARNING: This is an experimental type and subject to change.
+#if TFLITE_WITH_STABLE_ABI || TFLITE_USE_OPAQUE_DELEGATE
+typedef TfLiteOpaqueDelegateStruct TfLiteOpaqueDelegate;
+#else
+typedef TfLiteDelegate TfLiteOpaqueDelegate;
+#endif
+
+#ifdef __cplusplus
+}  // extern C
+#endif
+#endif  // TENSORFLOW_LITE_CORE_C_C_API_TYPES_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/core/c/common.h b/edge-impulse-sdk/tensorflow/lite/core/c/common.h
new file mode 100644
index 0000000..83b4a31
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/core/c/common.h
@@ -0,0 +1,1170 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file defines common C types and APIs for implementing operations,
+// delegates and other constructs in TensorFlow Lite. The actual operations and
+// delegates can be defined using C++, but the interface between the interpreter
+// and the operations are C.
+//
+// Summary of abstractions
+// TF_LITE_ENSURE - Self-sufficient error checking
+// TfLiteStatus - Status reporting
+// TfLiteIntArray - stores tensor shapes (dims),
+// TfLiteContext - allows an op to access the tensors
+// TfLiteTensor - tensor (a multidimensional array)
+// TfLiteNode - a single node or operation
+// TfLiteRegistration - the implementation of a conceptual operation.
+// TfLiteDelegate - allows delegation of nodes to alternative backends.
+//
+// Some abstractions in this file are created and managed by Interpreter.
+//
+// NOTE: The order of values in these structs are "semi-ABI stable". New values
+// should be added only to the end of structs and never reordered.
+
+/// WARNING: Users of TensorFlow Lite should not include this file directly,
+/// but should instead include
+/// "third_party/tensorflow/lite/c/common.h".
+/// Only the TensorFlow Lite implementation itself should include this
+/// file directly.
+
+#ifndef TENSORFLOW_LITE_CORE_C_COMMON_H_
+#define TENSORFLOW_LITE_CORE_C_COMMON_H_
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "edge-impulse-sdk/tensorflow/lite/core/c/c_api_types.h"  // IWYU pragma: export
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+// The list of external context types known to TF Lite. This list exists solely
+// to avoid conflicts and to ensure ops can share the external contexts they
+// need. Access to the external contexts is controlled by one of the
+// corresponding support files.
+typedef enum TfLiteExternalContextType {
+  kTfLiteEigenContext = 0,       // include eigen_support.h to use.
+  kTfLiteGemmLowpContext = 1,    // include gemm_support.h to use.
+  kTfLiteEdgeTpuContext = 2,     // Placeholder for Edge TPU support.
+  kTfLiteCpuBackendContext = 3,  // include cpu_backend_context.h to use.
+  kTfLiteMaxExternalContexts = 4
+} TfLiteExternalContextType;
+
+// Forward declare so dependent structs and methods can reference these types
+// prior to the struct definitions.
+struct TfLiteContext;
+struct TfLiteDelegate;
+struct TfLiteRegistration;
+struct TfLiteOpaqueDelegateBuilder;
+
+// An external context is a collection of information unrelated to the TF Lite
+// framework, but useful to a subset of the ops. TF Lite knows very little
+// about the actual contexts, but it keeps a list of them, and is able to
+// refresh them if configurations like the number of recommended threads
+// change.
+typedef struct TfLiteExternalContext {
+  TfLiteExternalContextType type;
+  TfLiteStatus (*Refresh)(struct TfLiteContext* context);
+} TfLiteExternalContext;
+
+#define kTfLiteOptionalTensor (-1)
+
+// Fixed size list of integers. Used for dimensions and inputs/outputs tensor
+// indices
+typedef struct TfLiteIntArray {
+  int size;
+
+#if defined(_MSC_VER)
+  // Context for why this is needed is in http://b/189926408#comment21
+  int data[1];
+#elif (!defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
+       __GNUC_MINOR__ >= 1) ||                                      \
+    defined(HEXAGON) ||                                             \
+    (defined(__clang__) && __clang_major__ == 7 && __clang_minor__ == 1)
+  // gcc 6.1+ have a bug where flexible members aren't properly handled
+  // https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
+  int data[0];
+#else
+  int data[];
+#endif
+} TfLiteIntArray;
+
+// Given the size (number of elements) in a TfLiteIntArray, calculate its size
+// in bytes.
+size_t TfLiteIntArrayGetSizeInBytes(int size);
+
+#ifndef TF_LITE_STATIC_MEMORY
+// Create a array of a given `size` (uninitialized entries).
+// This returns a pointer, that you must free using TfLiteIntArrayFree().
+TfLiteIntArray* TfLiteIntArrayCreate(int size);
+#endif
+
+// Check if two intarrays are equal. Returns 1 if they are equal, 0 otherwise.
+int TfLiteIntArrayEqual(const TfLiteIntArray* a, const TfLiteIntArray* b);
+
+// Check if an intarray equals an array. Returns 1 if equals, 0 otherwise.
+int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
+                              const int b_data[]);
+
+#ifndef TF_LITE_STATIC_MEMORY
+// Create a copy of an array passed as `src`.
+// You are expected to free memory with TfLiteIntArrayFree
+TfLiteIntArray* TfLiteIntArrayCopy(const TfLiteIntArray* src);
+
+// Free memory of array `a`.
+void TfLiteIntArrayFree(TfLiteIntArray* a);
+#endif  // TF_LITE_STATIC_MEMORY
+
+// Fixed size list of floats. Used for per-channel quantization.
+typedef struct TfLiteFloatArray {
+  int size;
+#if defined(_MSC_VER)
+  // Context for why this is needed is in http://b/189926408#comment21
+  float data[1];
+#elif (!defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && \
+       __GNUC_MINOR__ >= 1) ||                                      \
+    defined(HEXAGON) ||                                             \
+    (defined(__clang__) && __clang_major__ == 7 && __clang_minor__ == 1)
+  // gcc 6.1+ have a bug where flexible members aren't properly handled
+  // https://github.com/google/re2/commit/b94b7cd42e9f02673cd748c1ac1d16db4052514c
+  float data[0];
+#else
+  float data[];
+#endif
+} TfLiteFloatArray;
+
+// Given the size (number of elements) in a TfLiteFloatArray, calculate its size
+// in bytes.
+int TfLiteFloatArrayGetSizeInBytes(int size);
+
+#ifndef TF_LITE_STATIC_MEMORY
+// Create a array of a given `size` (uninitialized entries).
+// This returns a pointer, that you must free using TfLiteFloatArrayFree().
+TfLiteFloatArray* TfLiteFloatArrayCreate(int size);
+
+// Free memory of array `a`.
+void TfLiteFloatArrayFree(TfLiteFloatArray* a);
+#endif  // TF_LITE_STATIC_MEMORY
+
+// Since we must not depend on any libraries, define a minimal subset of
+// error macros while avoiding names that have pre-conceived meanings like
+// assert and check.
+
+// Try to make all reporting calls through TF_LITE_KERNEL_LOG rather than
+// calling the context->ReportError function directly, so that message strings
+// can be stripped out if the binary size needs to be severely optimized.
+#ifndef TF_LITE_STRIP_ERROR_STRINGS
+#ifdef TF_LITE_LOG_FILE_NAME
+#define TF_LITE_KERNEL_LOG(context, ...)            \
+  do {                                              \
+    (context)->ReportError((context), __FILE__ " " __VA_ARGS__); \
+  } while (false)
+
+#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)        \
+  do {                                                \
+    if ((context) != nullptr) {                       \
+      (context)->ReportError((context), __FILE__ " " __VA_ARGS__); \
+    }                                                 \
+  } while (false)
+#else // TF_LITE_LOG_FILE_NAME
+#define TF_LITE_KERNEL_LOG(context, ...)            \
+  do {                                              \
+    (context)->ReportError((context), __VA_ARGS__); \
+  } while (false)
+
+#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)        \
+  do {                                                \
+    if ((context) != nullptr) {                       \
+      (context)->ReportError((context), __VA_ARGS__); \
+    }                                                 \
+  } while (false)
+#endif // TF_LITE_LOG_FILE_NAME
+#else  // TF_LITE_STRIP_ERROR_STRINGS
+#define ARGS_UNUSED(...) (void)sizeof(#__VA_ARGS__)
+#define TF_LITE_KERNEL_LOG(context, ...) ARGS_UNUSED(__VA_ARGS__)
+#define TF_LITE_MAYBE_KERNEL_LOG(context, ...) ARGS_UNUSED(__VA_ARGS__)
+#endif  // TF_LITE_STRIP_ERROR_STRINGS
+
+// Check whether value is true, and if not return kTfLiteError from
+// the current function (and report the error string msg).
+#define TF_LITE_ENSURE_MSG(context, value, msg)        \
+  do {                                                 \
+    if (!(value)) {                                    \
+      TF_LITE_KERNEL_LOG((context), __FILE__ " " msg); \
+      return kTfLiteError;                             \
+    }                                                  \
+  } while (0)
+
+// Check whether the value `a` is true, and if not return kTfLiteError from
+// the current function, while also reporting the location of the error.
+#define TF_LITE_ENSURE(context, a)                                      \
+  do {                                                                  \
+    if (!(a)) {                                                         \
+      TF_LITE_KERNEL_LOG((context), "%s:%d %s was not true.", __FILE__, \
+                         __LINE__, #a);                                 \
+      return kTfLiteError;                                              \
+    }                                                                   \
+  } while (0)
+
+#define TF_LITE_ENSURE_STATUS(a) \
+  do {                           \
+    const TfLiteStatus s = (a);  \
+    if (s != kTfLiteOk) {        \
+      return s;                  \
+    }                            \
+  } while (0)
+
+// Check whether the value `a == b` is true, and if not return kTfLiteError from
+// the current function, while also reporting the location of the error.
+// `a` and `b` may be evaluated more than once, so no side effects or
+// extremely expensive computations should be done.
+// NOTE: Use TF_LITE_ENSURE_TYPES_EQ if comparing TfLiteTypes.
+#define TF_LITE_ENSURE_EQ(context, a, b)                                   \
+  do {                                                                     \
+    if ((a) != (b)) {                                                      \
+      TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%d != %d)", __FILE__, \
+                         __LINE__, #a, #b, (a), (b));                      \
+      return kTfLiteError;                                                 \
+    }                                                                      \
+  } while (0)
+
+#define TF_LITE_ENSURE_TYPES_EQ(context, a, b)                             \
+  do {                                                                     \
+    if ((a) != (b)) {                                                      \
+      TF_LITE_KERNEL_LOG((context), "%s:%d %s != %s (%s != %s)", __FILE__, \
+                         __LINE__, #a, #b, TfLiteTypeGetName(a),           \
+                         TfLiteTypeGetName(b));                            \
+      return kTfLiteError;                                                 \
+    }                                                                      \
+  } while (0)
+
+#define TF_LITE_ENSURE_NEAR(context, a, b, epsilon)                          \
+  do {                                                                       \
+    auto delta = ((a) > (b)) ? ((a) - (b)) : ((b) - (a));                    \
+    if (delta > epsilon) {                                                   \
+      TF_LITE_KERNEL_LOG((context), "%s:%d %s not near %s (%f != %f)",       \
+                         __FILE__, __LINE__, #a, #b, static_cast<double>(a), \
+                         static_cast<double>(b));                            \
+      return kTfLiteError;                                                   \
+    }                                                                        \
+  } while (0)
+
+#define TF_LITE_ENSURE_OK(context, status) \
+  do {                                     \
+    const TfLiteStatus s = (status);       \
+    if ((s) != kTfLiteOk) {                \
+      return s;                            \
+    }                                      \
+  } while (0)
+
+// Single-precision complex data type compatible with the C99 definition.
+typedef struct TfLiteComplex64 {
+  float re, im;  // real and imaginary parts, respectively.
+} TfLiteComplex64;
+
+// Double-precision complex data type compatible with the C99 definition.
+typedef struct TfLiteComplex128 {
+  double re, im;  // real and imaginary parts, respectively.
+} TfLiteComplex128;
+
+// Half precision data type compatible with the C99 definition.
+typedef struct TfLiteFloat16 {
+  uint16_t data;
+} TfLiteFloat16;
+
+// Return the name of a given type, for error reporting purposes.
+const char* TfLiteTypeGetName(TfLiteType type);
+
+// SupportedQuantizationTypes.
+typedef enum TfLiteQuantizationType {
+  // No quantization.
+  kTfLiteNoQuantization = 0,
+  // Affine quantization (with support for per-channel quantization).
+  // Corresponds to TfLiteAffineQuantization.
+  kTfLiteAffineQuantization = 1,
+} TfLiteQuantizationType;
+
+// Structure specifying the quantization used by the tensor, if-any.
+typedef struct TfLiteQuantization {
+  // The type of quantization held by params.
+  TfLiteQuantizationType type;
+  // Holds an optional reference to a quantization param structure. The actual
+  // type depends on the value of the `type` field (see the comment there for
+  // the values and corresponding types).
+  void* params;
+} TfLiteQuantization;
+
+// Parameters for asymmetric quantization across a dimension (i.e per output
+// channel quantization).
+// quantized_dimension specifies which dimension the scales and zero_points
+// correspond to.
+// For a particular value in quantized_dimension, quantized values can be
+// converted back to float using:
+//     real_value = scale * (quantized_value - zero_point)
+typedef struct TfLiteAffineQuantization {
+  TfLiteFloatArray* scale;
+  TfLiteIntArray* zero_point;
+  int32_t quantized_dimension;
+} TfLiteAffineQuantization;
+
+/* A union of pointers that points to memory for a given tensor. */
+typedef union TfLitePtrUnion {
+  /* Do not access these members directly, if possible, use
+   * GetTensorData<TYPE>(tensor) instead, otherwise only access .data, as other
+   * members are deprecated. */
+  int32_t* i32;
+  uint32_t* u32;
+  int64_t* i64;
+  uint64_t* u64;
+  float* f;
+  TfLiteFloat16* f16;
+  double* f64;
+  char* raw;
+  const char* raw_const;
+  uint8_t* uint8;
+  bool* b;
+  int16_t* i16;
+  uint16_t* ui16;
+  TfLiteComplex64* c64;
+  TfLiteComplex128* c128;
+  int8_t* int8;
+  /* Only use this member. */
+  void* data;
+} TfLitePtrUnion;
+
+// Memory allocation strategies.
+//  * kTfLiteMmapRo: Read-only memory-mapped data, or data externally allocated.
+//  * kTfLiteArenaRw: Arena allocated with no guarantees about persistence,
+//        and available during eval.
+//  * kTfLiteArenaRwPersistent: Arena allocated but persistent across eval, and
+//        only available during eval.
+//  * kTfLiteDynamic: Allocated during eval, or for string tensors.
+//  * kTfLitePersistentRo: Allocated and populated during prepare. This is
+//        useful for tensors that can be computed during prepare and treated
+//        as constant inputs for downstream ops (also in prepare).
+//  * kTfLiteCustom: Custom memory allocation provided by the user. See
+//        TfLiteCustomAllocation below.
+typedef enum TfLiteAllocationType {
+  kTfLiteMemNone = 0,
+  kTfLiteMmapRo,
+  kTfLiteArenaRw,
+  kTfLiteArenaRwPersistent,
+  kTfLiteDynamic,
+  kTfLitePersistentRo,
+  kTfLiteCustom,
+} TfLiteAllocationType;
+
+// The delegates should use zero or positive integers to represent handles.
+// -1 is reserved from unallocated status.
+typedef int TfLiteBufferHandle;
+enum {
+  kTfLiteNullBufferHandle = -1,
+};
+
+// Storage format of each dimension in a sparse tensor.
+typedef enum TfLiteDimensionType {
+  kTfLiteDimDense = 0,
+  kTfLiteDimSparseCSR,
+} TfLiteDimensionType;
+
+// Metadata to encode each dimension in a sparse tensor.
+typedef struct TfLiteDimensionMetadata {
+  TfLiteDimensionType format;
+  int dense_size;
+  TfLiteIntArray* array_segments;
+  TfLiteIntArray* array_indices;
+} TfLiteDimensionMetadata;
+
+// Parameters used to encode a sparse tensor. For detailed explanation of each
+// field please refer to lite/schema/schema.fbs.
+typedef struct TfLiteSparsity {
+  TfLiteIntArray* traversal_order;
+  TfLiteIntArray* block_map;
+  TfLiteDimensionMetadata* dim_metadata;
+  int dim_metadata_size;
+} TfLiteSparsity;
+
+// Defines a custom memory allocation not owned by the runtime.
+// `data` should be aligned to kDefaultTensorAlignment defined in
+// lite/util.h. (Currently 64 bytes)
+// NOTE: See Interpreter.SetCustomAllocationForTensor for details on usage.
+typedef struct TfLiteCustomAllocation {
+  void* data;
+  size_t bytes;
+} TfLiteCustomAllocation;
+
+// The flags used in `Interpreter::SetCustomAllocationForTensor`.
+// Note that this is a bitmask, so the values should be 1, 2, 4, 8, ...etc.
+typedef enum TfLiteCustomAllocationFlags {
+  kTfLiteCustomAllocationFlagsNone = 0,
+  // Skips checking whether allocation.data points to an aligned buffer as
+  // expected by the TFLite runtime.
+  // NOTE: Setting this flag can cause crashes when calling Invoke().
+  // Use with caution.
+  kTfLiteCustomAllocationFlagsSkipAlignCheck = 1,
+} TfLiteCustomAllocationFlags;
+
+// A tensor in the interpreter system which is a wrapper around a buffer of
+// data including a dimensionality (or NULL if not currently defined).
+#ifndef TF_LITE_STATIC_MEMORY
+typedef struct TfLiteTensor {
+  // The data type specification for data stored in `data`. This affects
+  // what member of `data` union should be used.
+  TfLiteType type;
+  // A union of data pointers. The appropriate type should be used for a typed
+  // tensor based on `type`.
+  TfLitePtrUnion data;
+  // A pointer to a structure representing the dimensionality interpretation
+  // that the buffer should have. NOTE: the product of elements of `dims`
+  // and the element datatype size should be equal to `bytes` below.
+  TfLiteIntArray* dims;
+  // Quantization information.
+  TfLiteQuantizationParams params;
+  // How memory is mapped
+  //  kTfLiteMmapRo: Memory mapped read only.
+  //  i.e. weights
+  //  kTfLiteArenaRw: Arena allocated read write memory
+  //  (i.e. temporaries, outputs).
+  TfLiteAllocationType allocation_type;
+  // The number of bytes required to store the data of this Tensor. I.e.
+  // (bytes of each element) * dims[0] * ... * dims[n-1].  For example, if
+  // type is kTfLiteFloat32 and dims = {3, 2} then
+  // bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
+  size_t bytes;
+
+  // An opaque pointer to a tflite::MMapAllocation
+  const void* allocation;
+
+  // Null-terminated name of this tensor.
+  const char* name;
+
+  // The delegate which knows how to handle `buffer_handle`.
+  // WARNING: This is an experimental interface that is subject to change.
+  struct TfLiteDelegate* delegate;
+
+  // An integer buffer handle that can be handled by `delegate`.
+  // The value is valid only when delegate is not null.
+  // WARNING: This is an experimental interface that is subject to change.
+  TfLiteBufferHandle buffer_handle;
+
+  // If the delegate uses its own buffer (e.g. GPU memory), the delegate is
+  // responsible to set data_is_stale to true.
+  // `delegate->CopyFromBufferHandle` can be called to copy the data from
+  // delegate buffer.
+  // WARNING: This is an // experimental interface that is subject to change.
+  bool data_is_stale;
+
+  // True if the tensor is a variable.
+  bool is_variable;
+
+  // Quantization information. Replaces params field above.
+  TfLiteQuantization quantization;
+
+  // Parameters used to encode a sparse tensor.
+  // This is optional. The field is NULL if a tensor is dense.
+  // WARNING: This is an experimental interface that is subject to change.
+  TfLiteSparsity* sparsity;
+
+  // Optional. Encodes shapes with unknown dimensions with -1. This field is
+  // only populated when unknown dimensions exist in a read-write tensor (i.e.
+  // an input or output tensor). (e.g.  `dims` contains [1, 1, 1, 3] and
+  // `dims_signature` contains [1, -1, -1, 3]).  If no unknown dimensions exist
+  // then `dims_signature` is either null, or set to an empty array.  Note that
+  // this field only exists when TF_LITE_STATIC_MEMORY is not defined.
+  const TfLiteIntArray* dims_signature;
+} TfLiteTensor;
+
+// A structure representing an instance of a node.
+// This structure only exhibits the inputs, outputs, user defined data and some
+// node properties (like statefulness), not other features like the type.
+typedef struct TfLiteNode {
+  // Inputs to this node expressed as indices into the simulator's tensors.
+  TfLiteIntArray* inputs;
+
+  // Outputs to this node expressed as indices into the simulator's tensors.
+  TfLiteIntArray* outputs;
+
+  // intermediate tensors to this node expressed as indices into the simulator's
+  // tensors.
+  TfLiteIntArray* intermediates;
+
+  // Temporary tensors uses during the computations. This usually contains no
+  // tensors, but ops are allowed to change that if they need scratch space of
+  // any sort.
+  TfLiteIntArray* temporaries;
+
+  // Opaque data provided by the node implementer through `Registration.init`.
+  void* user_data;
+
+  // Opaque data provided to the node if the node is a builtin. This is usually
+  // a structure defined in builtin_op_data.h
+  void* builtin_data;
+
+  // Custom initial data. This is the opaque data provided in the flatbuffer.
+  // WARNING: This is an experimental interface that is subject to change.
+  const void* custom_initial_data;
+  int custom_initial_data_size;
+
+  // The pointer to the delegate. This is non-null only when the node is
+  // created by calling `interpreter.ModifyGraphWithDelegate`.
+  // WARNING: This is an experimental interface that is subject to change.
+  struct TfLiteDelegate* delegate;
+
+  // Whether this op might have side effect (e.g. stateful op).
+  bool might_have_side_effect;
+} TfLiteNode;
+#else   // defined(TF_LITE_STATIC_MEMORY)?
+// NOTE: This flag is opt-in only at compile time.
+//
+// Specific reduced TfLiteTensor struct for TF Micro runtime. This struct
+// contains only the minimum fields required to initialize and prepare a micro
+// inference graph. The fields in this struct have been ordered from
+// largest-to-smallest for optimal struct sizeof.
+//
+// This struct does not use:
+// - allocation
+// - buffer_handle
+// - data_is_stale
+// - delegate
+// - dims_signature
+// - name
+// - sparsity
+typedef struct TfLiteTensor {
+  // TODO(b/155784997): Consider consolidating these quantization fields:
+  // Quantization information. Replaces params field above.
+  TfLiteQuantization quantization;
+
+  // Quantization information.
+  TfLiteQuantizationParams params;
+
+  // A union of data pointers. The appropriate type should be used for a typed
+  // tensor based on `type`.
+  TfLitePtrUnion data;
+
+  // A pointer to a structure representing the dimensionality interpretation
+  // that the buffer should have. NOTE: the product of elements of `dims`
+  // and the element datatype size should be equal to `bytes` below.
+  TfLiteIntArray* dims;
+
+  // The number of bytes required to store the data of this Tensor. I.e.
+  // (bytes of each element) * dims[0] * ... * dims[n-1].  For example, if
+  // type is kTfLiteFloat32 and dims = {3, 2} then
+  // bytes = sizeof(float) * 3 * 2 = 4 * 3 * 2 = 24.
+  size_t bytes;
+
+  // The data type specification for data stored in `data`. This affects
+  // what member of `data` union should be used.
+  TfLiteType type;
+
+  // How memory is mapped
+  //  kTfLiteMmapRo: Memory mapped read only.
+  //  i.e. weights
+  //  kTfLiteArenaRw: Arena allocated read write memory
+  //  (i.e. temporaries, outputs).
+  TfLiteAllocationType allocation_type;
+
+  // True if the tensor is a variable.
+  bool is_variable;
+} TfLiteTensor;
+
+// Specific reduced TfLiteNode struct for TF Micro runtime. This struct contains
+// only the minimum fields required to represent a node.
+//
+// This struct does not use:
+// - delegate
+// - intermediates
+// - temporaries
+typedef struct TfLiteNode {
+  // Inputs to this node expressed as indices into the simulator's tensors.
+  TfLiteIntArray* inputs;
+
+  // Outputs to this node expressed as indices into the simulator's tensors.
+  TfLiteIntArray* outputs;
+
+  // intermediate tensors to this node expressed as indices into the simulator's
+  // tensors.
+  TfLiteIntArray* intermediates;
+
+  // Opaque data provided by the node implementer through `Registration.init`.
+  void* user_data;
+
+  // Opaque data provided to the node if the node is a builtin. This is usually
+  // a structure defined in builtin_op_data.h
+  void* builtin_data;
+
+  // Custom initial data. This is the opaque data provided in the flatbuffer.
+  // WARNING: This is an experimental interface that is subject to change.
+  const void* custom_initial_data;
+  int custom_initial_data_size;
+} TfLiteNode;
+#endif  // TF_LITE_STATIC_MEMORY
+
+// Light-weight tensor struct for TF Micro runtime. Provides the minimal amount
+// of information required for a kernel to run during TfLiteRegistration::Eval.
+// TODO(b/160955687): Move this field into TF_LITE_STATIC_MEMORY when TFLM
+// builds with this flag by default internally.
+typedef struct TfLiteEvalTensor {
+  // A union of data pointers. The appropriate type should be used for a typed
+  // tensor based on `type`.
+  TfLitePtrUnion data;
+
+  // A pointer to a structure representing the dimensionality interpretation
+  // that the buffer should have.
+  TfLiteIntArray* dims;
+
+  // The data type specification for data stored in `data`. This affects
+  // what member of `data` union should be used.
+  TfLiteType type;
+} TfLiteEvalTensor;
+
+#ifndef TF_LITE_STATIC_MEMORY
+// Free data memory of tensor `t`.
+void TfLiteTensorDataFree(TfLiteTensor* t);
+
+// Free quantization data.
+void TfLiteQuantizationFree(TfLiteQuantization* quantization);
+
+// Free sparsity parameters.
+void TfLiteSparsityFree(TfLiteSparsity* sparsity);
+
+// Free memory of tensor `t`.
+void TfLiteTensorFree(TfLiteTensor* t);
+
+// Set all of a tensor's fields (and free any previously allocated data).
+void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
+                       TfLiteQuantizationParams quantization, char* buffer,
+                       size_t size, TfLiteAllocationType allocation_type,
+                       const void* allocation, bool is_variable,
+                       TfLiteTensor* tensor);
+
+// Copies the contents of 'src' in 'dst'.
+// Function does nothing if either 'src' or 'dst' is passed as nullptr and
+// return kTfLiteOk.
+// Returns kTfLiteError if 'src' and 'dst' doesn't have matching data size.
+// Note function copies contents, so it won't create new data pointer
+// or change allocation type.
+// All Tensor related properties will be copied from 'src' to 'dst' like
+// quantization, sparsity, ...
+TfLiteStatus TfLiteTensorCopy(const TfLiteTensor* src, TfLiteTensor* dst);
+
+// Change the size of the memory block owned by `tensor` to `num_bytes`.
+// Tensors with allocation types other than `kTfLiteDynamic` will be ignored and
+// a kTfLiteOk will be returned.
+// `tensor`'s internal data buffer will be assigned a pointer
+// which can safely be passed to free or realloc if `num_bytes` is zero.
+// If `preserve_data` is true, tensor data will be unchanged in the range from
+// the start of the region up to the minimum of the old and new sizes. In the
+// case of NULL tensor, or an error allocating new memory, returns
+// `kTfLiteError`.
+TfLiteStatus TfLiteTensorResizeMaybeCopy(size_t num_bytes, TfLiteTensor* tensor,
+                                         bool preserve_data);
+
+// Change the size of the memory block owned by `tensor` to `num_bytes`.
+// Tensors with allocation types other than kTfLiteDynamic will be ignored and
+// a kTfLiteOk will be returned.
+// `tensor`'s internal data buffer will be assigned a pointer
+// which can safely be passed to free or realloc if `num_bytes` is zero.
+// Tensor data will be unchanged in the range from the start of the region up to
+// the minimum of the old and new sizes. In the case
+// of NULL tensor, or an error allocating new memory, returns `kTfLiteError`.
+TfLiteStatus TfLiteTensorRealloc(size_t num_bytes, TfLiteTensor* tensor);
+#endif  // TF_LITE_STATIC_MEMORY
+
+// WARNING: This is an experimental interface that is subject to change.
+//
+// Currently, TfLiteDelegateParams has to be allocated in a way that it's
+// trivially destructable. It will be stored as `builtin_data` field in
+// `TfLiteNode` of the delegate node.
+//
+// See also the `CreateDelegateParams` function in `interpreter.cc` details.
+typedef struct TfLiteDelegateParams {
+  struct TfLiteDelegate* delegate;
+  TfLiteIntArray* nodes_to_replace;
+  TfLiteIntArray* input_tensors;
+  TfLiteIntArray* output_tensors;
+} TfLiteDelegateParams;
+
+// WARNING: This is an experimental interface that is subject to change.
+//
+// Currently, TfLiteOpaqueDelegateParams has to be allocated in a way that it's
+// trivially destructable. It will be stored as `builtin_data` field in
+// `TfLiteNode` of the delegate node.
+//
+// See also the `CreateOpaqueDelegateParams` function in `subgraph.cc`
+// details.
+typedef struct TfLiteOpaqueDelegateParams {
+  TfLiteOpaqueDelegate* delegate;
+  void* delegate_data;
+  TfLiteIntArray* nodes_to_replace;
+  TfLiteIntArray* input_tensors;
+  TfLiteIntArray* output_tensors;
+} TfLiteOpaqueDelegateParams;
+
+typedef struct TfLiteContext {
+  // Number of tensors in the context.
+  size_t tensors_size;
+
+  // The execution plan contains a list of the node indices in execution
+  // order. execution_plan->size is the current number of nodes. And,
+  // execution_plan->data[0] is the first node that needs to be run.
+  // TfLiteDelegates can traverse the current execution plan by iterating
+  // through each member of this array and using GetNodeAndRegistration() to
+  // access details about a node. i.e.
+  //
+  // TfLiteIntArray* execution_plan;
+  // TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan));
+  // for (int exec_index = 0; exec_index < execution_plan->size; exec_index++) {
+  //    int node_index = execution_plan->data[exec_index];
+  //    TfLiteNode* node;
+  //    TfLiteRegistration* reg;
+  //    context->GetNodeAndRegistration(context, node_index, &node, &reg);
+  // }
+  // Note: the memory pointed by '`*execution_plan` is OWNED by TfLite runtime.
+  // Future calls to GetExecutionPlan invalidates earlier outputs. The following
+  // code snippet shows the issue of such an invocation pattern. After calling
+  // CheckNode, subsequent access to `plan_1st` is undefined.
+  //
+  // void CheckNode(const TfLiteNode* node) {
+  //   ...
+  //   TfLiteIntArray* plan_2nd;
+  //   TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan_2nd));
+  //   ...
+  // }
+  //
+  // TfLiteIntArray* plan_1st;
+  // TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan_1st));
+  // for (int exec_index = 0; exec_index < plan_1st->size; exec_index++) {
+  //    int node_index = plan_1st->data[exec_index];
+  //    TfLiteNode* node;
+  //    TfLiteRegistration* reg;
+  //    context->GetNodeAndRegistration(context, node_index, &node, &reg);
+  //    CheckNode(node);
+  // }
+  //
+  // WARNING: This is an experimental interface that is subject to change.
+  TfLiteStatus (*GetExecutionPlan)(struct TfLiteContext* context,
+                                   TfLiteIntArray** execution_plan);
+
+  // opaque full context ptr (an opaque c++ data structure)
+  void* impl_;
+
+  // Request memory pointer be resized. Updates dimensions on the tensor.
+  // NOTE: ResizeTensor takes ownership of newSize.
+  TfLiteStatus (*ResizeTensor)(struct TfLiteContext*, TfLiteTensor* tensor,
+                               TfLiteIntArray* new_size);
+  // Request that an error be reported with format string msg.
+  void (*ReportError)(struct TfLiteContext*, const char* msg, ...);
+
+  // Add `tensors_to_add` tensors, preserving pre-existing Tensor entries.  If
+  // non-null, the value pointed to by `first_new_tensor_index` will be set to
+  // the index of the first new tensor.
+  TfLiteStatus (*AddTensors)(struct TfLiteContext*, int tensors_to_add,
+                             int* first_new_tensor_index);
+
+  // Get a Tensor node by node_index.
+  // WARNING: This is an experimental interface that is subject to change.
+  TfLiteStatus (*GetNodeAndRegistration)(
+      struct TfLiteContext*, int node_index, TfLiteNode** node,
+      struct TfLiteRegistration** registration);
+
+  // Replace ops with one or more stub delegate operations. This function
+  // does not take ownership of `nodes_to_replace`.
+  TfLiteStatus (*ReplaceNodeSubsetsWithDelegateKernels)(
+      struct TfLiteContext*, struct TfLiteRegistration registration,
+      const TfLiteIntArray* nodes_to_replace, struct TfLiteDelegate* delegate);
+
+  // Number of threads that are recommended to subsystems like gemmlowp and
+  // eigen.
+  int recommended_num_threads;
+
+  // Access external contexts by type.
+  // WARNING: This is an experimental interface that is subject to change.
+  TfLiteExternalContext* (*GetExternalContext)(struct TfLiteContext*,
+                                               TfLiteExternalContextType);
+  // Set the value of a external context. Does not take ownership of the
+  // pointer.
+  // WARNING: This is an experimental interface that is subject to change.
+  void (*SetExternalContext)(struct TfLiteContext*, TfLiteExternalContextType,
+                             TfLiteExternalContext*);
+
+  // Flag for allowing float16 precision for FP32 calculation.
+  // default: false.
+  // WARNING: This is an experimental API and subject to change.
+  bool allow_fp32_relax_to_fp16;
+
+  // Pointer to the op-level profiler, if set; nullptr otherwise.
+  void* profiler;
+
+  // Allocate persistent buffer which has the same life time as the interpreter.
+  // Returns nullptr on failure.
+  // The memory is allocated from heap for TFL, and from tail in TFLM.
+  // This method is only available in Init or Prepare stage.
+  // WARNING: This is an experimental interface that is subject to change.
+  void* (*AllocatePersistentBuffer)(struct TfLiteContext* ctx, size_t bytes);
+
+  // Allocate a buffer which will be deallocated right after invoke phase.
+  // The memory is allocated from heap in TFL, and from volatile arena in TFLM.
+  // This method is only available in invoke stage.
+  // NOTE: If possible use RequestScratchBufferInArena method to avoid memory
+  // allocation during inference time.
+  // WARNING: This is an experimental interface that is subject to change.
+  TfLiteStatus (*AllocateBufferForEval)(struct TfLiteContext* ctx, size_t bytes,
+                                        void** ptr);
+
+  // Request a scratch buffer in the arena through static memory planning.
+  // This method is only available in Prepare stage and the buffer is allocated
+  // by the interpreter between Prepare and Eval stage. In Eval stage,
+  // GetScratchBuffer API can be used to fetch the address.
+  // WARNING: This is an experimental interface that is subject to change.
+  TfLiteStatus (*RequestScratchBufferInArena)(struct TfLiteContext* ctx,
+                                              size_t bytes, int* buffer_idx);
+
+  // Get the scratch buffer pointer.
+  // This method is only available in Eval stage.
+  // WARNING: This is an experimental interface that is subject to change.
+  void* (*GetScratchBuffer)(struct TfLiteContext* ctx, int buffer_idx);
+
+  // Resize the memory pointer of the `tensor`. This method behaves the same as
+  // `ResizeTensor`, except that it makes a copy of the shape array internally
+  // so the shape array could be deallocated right afterwards.
+  // WARNING: This is an experimental interface that is subject to change.
+  TfLiteStatus (*ResizeTensorExplicit)(struct TfLiteContext* ctx,
+                                       TfLiteTensor* tensor, int dims,
+                                       const int* shape);
+
+  // This method provides a preview of post-delegation partitioning. Each
+  // TfLiteDelegateParams in the referenced array corresponds to one instance of
+  // the delegate kernel.
+  // Example usage:
+  //
+  // TfLiteIntArray* nodes_to_replace = ...;
+  // TfLiteDelegateParams* params_array;
+  // int num_partitions = 0;
+  // TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
+  //    context, delegate, nodes_to_replace, &params_array, &num_partitions));
+  // for (int idx = 0; idx < num_partitions; idx++) {
+  //    const auto& partition_params = params_array[idx];
+  //    ...
+  // }
+  //
+  // NOTE: The context owns the memory referenced by partition_params_array. It
+  // will be cleared with another call to PreviewDelegateParitioning, or after
+  // TfLiteDelegateParams::Prepare returns.
+  //
+  // WARNING: This is an experimental interface that is subject to change.
+  TfLiteStatus (*PreviewDelegatePartitioning)(
+      struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
+      TfLiteDelegateParams** partition_params_array, int* num_partitions);
+
+  // Returns a TfLiteTensor struct for a given index.
+  // WARNING: This is an experimental interface that is subject to change.
+  // WARNING: This method may not be available on all platforms.
+  TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context,
+                             int tensor_idx);
+
+  // Returns a TfLiteEvalTensor struct for a given index.
+  // WARNING: This is an experimental interface that is subject to change.
+  // WARNING: This method may not be available on all platforms.
+  TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context,
+                                     int tensor_idx);
+
+  // Retrieves named metadata buffer from the TFLite model.
+  // Returns kTfLiteOk if metadata is successfully obtained from the flatbuffer
+  // Model: that is, there exists a `metadata` entry with given `name` string.
+  // (see TFLite's schema.fbs).
+  // The corresponding `buffer` information is populated in `ptr` & `bytes`.
+  // The data from `ptr` is valid for the lifetime of the Interpreter.
+  //
+  // WARNING: This is an experimental interface that is subject to change.
+  TfLiteStatus (*GetModelMetadata)(const struct TfLiteContext* context,
+                                   const char* name, const char** ptr,
+                                   size_t* bytes);
+} TfLiteContext;
+
+// `TfLiteRegistrationExternal` is an external version of `TfLiteRegistration`
+// for C API which doesn't use internal types (such as `TfLiteContext`) but only
+// uses stable API types (such as `TfLiteOpaqueContext`). The purpose of each
+// field is the exactly the same as with `TfLiteRegistration`.
+typedef struct TfLiteRegistrationExternal TfLiteRegistrationExternal;
+
+typedef struct TfLiteRegistration {
+  // Initializes the op from serialized data.
+  // Called only *once* for the lifetime of the op, so any one-time allocations
+  // should be made here (unless they depend on tensor sizes).
+  //
+  // If a built-in op:
+  //   `buffer` is the op's params data (TfLiteLSTMParams*).
+  //   `length` is zero.
+  // If custom op:
+  //   `buffer` is the op's `custom_options`.
+  //   `length` is the size of the buffer.
+  //
+  // Returns a type-punned (i.e. void*) opaque data (e.g. a primitive pointer
+  // or an instance of a struct).
+  //
+  // The returned pointer will be stored with the node in the `user_data` field,
+  // accessible within prepare and invoke functions below.
+  // NOTE: if the data is already in the desired format, simply implement this
+  // function to return `nullptr` and implement the free function to be a no-op.
+  void* (*init)(TfLiteContext* context, const char* buffer, size_t length);
+
+  // The pointer `buffer` is the data previously returned by an init invocation.
+  void (*free)(TfLiteContext* context, void* buffer);
+
+  // prepare is called when the inputs this node depends on have been resized.
+  // context->ResizeTensor() can be called to request output tensors to be
+  // resized.
+  // Can be called multiple times for the lifetime of the op.
+  //
+  // Returns kTfLiteOk on success.
+  TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node);
+
+  // Execute the node (should read node->inputs and output to node->outputs).
+  // Returns kTfLiteOk on success.
+  TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node);
+
+  // profiling_string is called during summarization of profiling information
+  // in order to group executions together. Providing a value here will cause a
+  // given op to appear multiple times is the profiling report. This is
+  // particularly useful for custom ops that can perform significantly
+  // different calculations depending on their `user-data`.
+  const char* (*profiling_string)(const TfLiteContext* context,
+                                  const TfLiteNode* node);
+
+  // Builtin codes. If this kernel refers to a builtin this is the code
+  // of the builtin. This is so we can do marshaling to other frameworks like
+  // NN API.
+  // Note: It is the responsibility of the registration binder to set this
+  // properly.
+  int32_t builtin_code;
+
+  // Custom op name. If the op is a builtin, this will be null.
+  // Note: It is the responsibility of the registration binder to set this
+  // properly.
+  // WARNING: This is an experimental interface that is subject to change.
+  const char* custom_name;
+
+  // The version of the op.
+  // Note: It is the responsibility of the registration binder to set this
+  // properly.
+  int version;
+
+  // The external version of `TfLiteRegistration`. Since we can't use internal
+  // types (such as `TfLiteContext`) for C API to maintain ABI stability.
+  // C API user will provide `TfLiteRegistrationExternal` to implement custom
+  // ops. We keep it inside of `TfLiteRegistration` and use it to route
+  // callbacks properly.
+  TfLiteRegistrationExternal* registration_external;
+} TfLiteRegistration;
+
+// Old version of `TfLiteRegistration` to maintain binary backward
+// compatibility.
+// WARNING: This structure is deprecated / not an official part of the API.
+// It should be only used for binary backward compatibility.
+typedef struct TfLiteRegistration_V1 {
+  void* (*init)(TfLiteContext* context, const char* buffer, size_t length);
+  void (*free)(TfLiteContext* context, void* buffer);
+  TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node);
+  TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node);
+  const char* (*profiling_string)(const TfLiteContext* context,
+                                  const TfLiteNode* node);
+  int32_t builtin_code;
+  const char* custom_name;
+  int version;
+} TfLiteRegistration_V1;
+
+// The flags used in `TfLiteDelegate`. Note that this is a bitmask, so the
+// values should be 1, 2, 4, 8, ...etc.
+typedef enum TfLiteDelegateFlags {
+  kTfLiteDelegateFlagsNone = 0,
+  // The flag is set if the delegate can handle dynamic sized tensors.
+  // For example, the output shape of a `Resize` op with non-constant shape
+  // can only be inferred when the op is invoked.
+  // In this case, the Delegate is responsible for calling
+  // `SetTensorToDynamic` to mark the tensor as a dynamic tensor, and calling
+  // `ResizeTensor` when invoking the op.
+  //
+  // If the delegate isn't capable to handle dynamic tensors, this flag need
+  // to be set to false.
+  kTfLiteDelegateFlagsAllowDynamicTensors = 1,
+
+  // This flag can be used by delegates (that allow dynamic tensors) to ensure
+  // applicable tensor shapes are automatically propagated in the case of tensor
+  // resizing.
+  // This means that non-dynamic (allocation_type != kTfLiteDynamic) I/O tensors
+  // of a delegate kernel will have correct shapes before its Prepare() method
+  // is called. The runtime leverages TFLite builtin ops in the original
+  // execution plan to propagate shapes.
+  //
+  // A few points to note:
+  // 1. This requires kTfLiteDelegateFlagsAllowDynamicTensors. If that flag is
+  // false, this one is redundant since the delegate kernels are re-initialized
+  // every time tensors are resized.
+  // 2. Enabling this flag adds some overhead to AllocateTensors(), since extra
+  // work is required to prepare the original execution plan.
+  // 3. This flag requires that the original execution plan only have ops with
+  // valid registrations (and not 'dummy' custom ops like with Flex).
+  // WARNING: This feature is experimental and subject to change.
+  kTfLiteDelegateFlagsRequirePropagatedShapes = 2,
+
+  // This flag can be used by delegates to request per-operator profiling. If a
+  // node is a delegate node, this flag will be checked before profiling. If
+  // set, then the node will not be profiled. The delegate will then add per
+  // operator information using Profiler::EventType::OPERATOR_INVOKE_EVENT and
+  // the results will appear in the operator-wise Profiling section and not in
+  // the Delegate internal section.
+  kTfLiteDelegateFlagsPerOperatorProfiling = 4
+} TfLiteDelegateFlags;
+
+// WARNING: This is an experimental interface that is subject to change.
+typedef struct TfLiteDelegate {
+  // Data that delegate needs to identify itself. This data is owned by the
+  // delegate. The delegate is owned in the user code, so the delegate is
+  // responsible for deallocating this when it is destroyed.
+  void* data_;
+
+  // Invoked by ModifyGraphWithDelegate. This prepare is called, giving the
+  // delegate a view of the current graph through TfLiteContext*. It typically
+  // will look at the nodes and call ReplaceNodeSubsetsWithDelegateKernels()
+  // to ask the TensorFlow lite runtime to create macro-nodes to represent
+  // delegated subgraphs of the original graph.
+  TfLiteStatus (*Prepare)(TfLiteContext* context,
+                          struct TfLiteDelegate* delegate);
+
+  // Copy the data from delegate buffer handle into raw memory of the given
+  // 'tensor'. Note that the delegate is allowed to allocate the raw bytes as
+  // long as it follows the rules for kTfLiteDynamic tensors, in which case this
+  // cannot be null.
+  TfLiteStatus (*CopyFromBufferHandle)(TfLiteContext* context,
+                                       struct TfLiteDelegate* delegate,
+                                       TfLiteBufferHandle buffer_handle,
+                                       TfLiteTensor* tensor);
+
+  // Copy the data from raw memory of the given 'tensor' to delegate buffer
+  // handle. This can be null if the delegate doesn't use its own buffer.
+  TfLiteStatus (*CopyToBufferHandle)(TfLiteContext* context,
+                                     struct TfLiteDelegate* delegate,
+                                     TfLiteBufferHandle buffer_handle,
+                                     TfLiteTensor* tensor);
+
+  // Free the Delegate Buffer Handle. Note: This only frees the handle, but
+  // this doesn't release the underlying resource (e.g. textures). The
+  // resources are either owned by application layer or the delegate.
+  // This can be null if the delegate doesn't use its own buffer.
+  void (*FreeBufferHandle)(TfLiteContext* context,
+                           struct TfLiteDelegate* delegate,
+                           TfLiteBufferHandle* handle);
+
+  // Bitmask flags. See the comments in `TfLiteDelegateFlags`.
+  int64_t flags;
+
+  // The opaque delegate builder associated with this object.  If set then the
+  // TF Lite runtime will give precedence to this field.  E.g. instead of
+  // invoking 'Prepare' via the function pointer inside the 'TfLiteDelegate'
+  // object, the runtime will first check if the corresponding function
+  // pointer inside 'opaque_delegate_builder' is set and if so invoke that.
+  //
+  // If this field is non-null, then the 'Prepare' field (of the
+  // 'TfLiteDelegate') should be null.
+  struct TfLiteOpaqueDelegateBuilder* opaque_delegate_builder;
+} TfLiteDelegate;
+
+// Build a 'null' delegate, with all the fields properly set to their default
+// values.
+TfLiteDelegate TfLiteDelegateCreate(void);
+
+// `TfLiteOpaqueDelegateBuilder` is used for constructing
+// `TfLiteOpaqueDelegate`, see `TfLiteOpaqueDelegateCreate` below.  Note:
+// This struct is not ABI stable.
+//
+// For forward source compatibility `TfLiteOpaqueDelegateBuilder` objects should
+// be brace-initialized, so that all fields (including any that might be added
+// in the future) get zero-initialized.  The purpose of each field is exactly
+// the same as with `TfLiteDelegate`.
+//
+// WARNING: This is an experimental interface that is subject to change.
+typedef struct TfLiteOpaqueDelegateBuilder {
+  // Data that delegate needs to identify itself. This data is owned by the
+  // delegate. The delegate is owned in the user code, so the delegate is
+  // responsible for deallocating this when it is destroyed.
+  void* data;
+  // Invoked by ModifyGraphWithDelegate. This prepare is called, giving the
+  // delegate a view of the current graph through TfLiteContext*. It typically
+  // will look at the nodes and call ReplaceNodeSubsetsWithDelegateKernels()
+  // to ask the TensorFlow lite runtime to create macro-nodes to represent
+  // delegated subgraphs of the original graph.
+  TfLiteStatus (*Prepare)(TfLiteOpaqueContext* context,  // NOLINT
+                          TfLiteOpaqueDelegate* delegate, void* data);
+  // Copies the data from delegate buffer handle into raw memory of the given
+  // 'tensor'. Note that the delegate is allowed to allocate the raw bytes as
+  // long as it follows the rules for kTfLiteDynamic tensors, in which case this
+  // cannot be null.
+  TfLiteStatus (*CopyFromBufferHandle)(  // NOLINT
+      TfLiteOpaqueContext* context, TfLiteOpaqueDelegate* delegate, void* data,
+      TfLiteBufferHandle buffer_handle, TfLiteOpaqueTensor* tensor);
+  // Copies the data from raw memory of the given 'tensor' to delegate buffer
+  // handle. This can be null if the delegate doesn't use its own buffer.
+  TfLiteStatus (*CopyToBufferHandle)(  // NOLINT
+      TfLiteOpaqueContext* context, TfLiteOpaqueDelegate* delegate, void* data,
+      TfLiteBufferHandle buffer_handle, TfLiteOpaqueTensor* tensor);
+  // Frees the Delegate Buffer Handle. Note: This only frees the handle, but
+  // this doesn't release the underlying resource (e.g. textures). The
+  // resources are either owned by application layer or the delegate.
+  // This can be null if the delegate doesn't use its own buffer.
+  void (*FreeBufferHandle)(TfLiteOpaqueContext* context,  // NOLINT
+                           TfLiteOpaqueDelegate* delegate, void* data,
+                           TfLiteBufferHandle* handle);
+  // Bitmask flags. See the comments in `TfLiteDelegateFlags`.
+  int64_t flags;
+} TfLiteOpaqueDelegateBuilder;
+
+// Creates an opaque delegate and returns its address.  The opaque delegate will
+// behave according to the provided 'opaque_delegate_builder'.  The lifetime of
+// the objects pointed to by any of the fields within the
+// 'opaque_delegate_builder' must outlive the returned
+// 'TfLiteOpaqueDelegate' and any 'TfLiteInterpreter',
+// 'TfLiteInterpreterOptions', 'tflite::Interpreter', or
+// 'tflite::InterpreterBuilder' that the delegate is added to.  The returned
+// address should be passed to 'TfLiteOpaqueDelegateDelete' for deletion.  If
+// 'opaque_delegate_builder' is a null pointer, then a null pointer will be
+// returned.
+TfLiteOpaqueDelegate* TfLiteOpaqueDelegateCreate(
+    const TfLiteOpaqueDelegateBuilder* opaque_delegate_builder);
+
+// Deletes the provided opaque 'delegate'.  This function has no effect if the
+// 'delegate' is a null pointer.
+void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* delegate);
+
+// Returns a pointer to the data associated with the provided opaque 'delegate'.
+//
+// A null pointer will be returned when:
+// - The 'delegate' is null.
+// - The 'data' field of the 'TfLiteOpaqueDelegateBuilder' used to construct the
+//   'delegate' was null.
+// - Or in case of any other error.
+// - The 'delegate' has been constructed via a 'TfLiteOpaqueDelegateBuilder',
+//   but the 'data' field of the 'TfLiteOpaqueDelegateBuilder' is null.
+//
+//  The data_ field of 'delegate' will be returned if the
+//  'opaque_delegate_builder' field is null.
+void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+#endif  // TENSORFLOW_LITE_CORE_C_COMMON_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.cc b/edge-impulse-sdk/tensorflow/lite/kernels/custom/tree_ensemble_classifier.cc
similarity index 96%
rename from edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.cc
rename to edge-impulse-sdk/tensorflow/lite/kernels/custom/tree_ensemble_classifier.cc
index e330644..dd733f4 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.cc
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/custom/tree_ensemble_classifier.cc
@@ -171,10 +171,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration* Register_TREE_ENSEMBLE_CLASSIFIER() {
-
   static TfLiteRegistration r = {
-      tree_ensemble_classifier::Init, nullptr,
-      tree_ensemble_classifier::Prepare, tree_ensemble_classifier::Eval};
+          tree_ensemble_classifier::Init,
+          nullptr,
+          tree_ensemble_classifier::Prepare,
+          tree_ensemble_classifier::Eval,
+          /*profiling_string=*/nullptr,
+          /*builtin_code=*/0,
+          /*custom_name=*/nullptr,
+          /*version=*/0};
   return &r;
 }
 
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.h b/edge-impulse-sdk/tensorflow/lite/kernels/custom/tree_ensemble_classifier.h
similarity index 100%
rename from edge-impulse-sdk/tensorflow/lite/kernels/tree_ensemble_classifier.h
rename to edge-impulse-sdk/tensorflow/lite/kernels/custom/tree_ensemble_classifier.h
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h
index df895c8..05af6fd 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h
@@ -15,12 +15,14 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
 
+#include <algorithm>
 #ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
 #ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
 #define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
 #endif
 #endif
 
+#include <cmath>
 #include <functional>
 
 #include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h"
@@ -75,6 +77,7 @@ float ActivationFunction(float x) {
 inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
                          const float* bias_data, int array_size,
                          float* array_data) {
+  if (bias_size == 0) return;
   // Note: see b/132215220: in May 2019 we thought it would be OK to replace
   // this with the Eigen one-liner:
   //   return (array.colwise() + bias).cwiseMin(clamp_max).cwiseMin(clamp_max).
@@ -138,6 +141,100 @@ inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
 #endif
 }
 
+// Single-rounding MultiplyByQuantizedMultiplier
+#if TFLITE_SINGLE_ROUNDING
+inline int32_t MultiplyByQuantizedMultiplier(int32_t x,
+                                             int32_t quantized_multiplier,
+                                             int shift) {
+  TFLITE_DCHECK(quantized_multiplier >= 0);
+  TFLITE_DCHECK(shift >= -31 && shift <= 30);
+
+  const int64_t total_shift = 31 - shift;
+  const int64_t round = static_cast<int64_t>(1) << (total_shift - 1);
+  int64_t result = x * static_cast<int64_t>(quantized_multiplier) + round;
+  result = result >> total_shift;
+
+  TFLITE_DCHECK(result >= std::numeric_limits<int32_t>::min() &&
+                result <= std::numeric_limits<int32_t>::max());
+  return static_cast<int32_t>(result);
+}
+
+inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
+    int32_t x, int32_t quantized_multiplier, int shift) {
+  TFLITE_DCHECK_LE(shift, 0);
+  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
+}
+
+inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
+    int32_t x, int32_t quantized_multiplier, int shift) {
+  TFLITE_DCHECK_GE(shift, 0);
+  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
+}
+
+inline int32_t MultiplyByQuantizedMultiplier(int64_t x,
+                                             int32_t quantized_multiplier,
+                                             int shift) {
+  // Inputs:
+  // - quantized_multiplier has fixed point at bit 31
+  // - shift is -31 to +7 (negative for right shift)
+  //
+  // Assumptions: The following input ranges are assumed
+  // - quantize_scale>=0  (the usual range is (1<<30) to (1>>31)-1)
+  // - scaling is chosen so final scaled result fits in int32_t
+  // - input x is in the range -(1<<47) <= x < (1<<47)
+  TFLITE_DCHECK(quantized_multiplier >= 0);
+  TFLITE_DCHECK(shift >= -31 && shift < 8);
+  TFLITE_DCHECK(x >= -(static_cast<int64_t>(1) << 47) &&
+                x < (static_cast<int64_t>(1) << 47));
+
+  const int32_t reduced_multiplier =
+      (quantized_multiplier < 0x7FFF0000)
+          ? ((quantized_multiplier + (1 << 15)) >> 16)
+          : 0x7FFF;
+  const int64_t total_shift = 15 - shift;
+  const int64_t round = static_cast<int64_t>(1) << (total_shift - 1);
+  int64_t result = x * static_cast<int64_t>(reduced_multiplier) + round;
+  result = result >> total_shift;
+
+  TFLITE_DCHECK(result >= std::numeric_limits<int32_t>::min() &&
+                result <= std::numeric_limits<int32_t>::max());
+  return static_cast<int32_t>(result);
+}
+
+#ifdef USE_NEON
+inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(
+    int32x4x4_t input_val, int32_t quantized_multiplier, int shift) {
+  TFLITE_DCHECK(quantized_multiplier >= 0);
+
+  const int right_shift = std::min(-1, shift);
+  const int left_shift = shift - right_shift;
+
+  const int32x4_t multiplier_dup = vdupq_n_s32(quantized_multiplier);
+  const int32x4_t left_shift_dup = vdupq_n_s32(left_shift);
+  const int32x4_t right_shift_dup = vdupq_n_s32(right_shift);
+
+  int32x4x4_t result;
+  result.val[0] = vrshlq_s32(
+      vqdmulhq_s32(vshlq_s32(input_val.val[0], left_shift_dup), multiplier_dup),
+      right_shift_dup);
+
+  result.val[1] = vrshlq_s32(
+      vqdmulhq_s32(vshlq_s32(input_val.val[1], left_shift_dup), multiplier_dup),
+      right_shift_dup);
+
+  result.val[2] = vrshlq_s32(
+      vqdmulhq_s32(vshlq_s32(input_val.val[2], left_shift_dup), multiplier_dup),
+      right_shift_dup);
+
+  result.val[3] = vrshlq_s32(
+      vqdmulhq_s32(vshlq_s32(input_val.val[3], left_shift_dup), multiplier_dup),
+      right_shift_dup);
+
+  return result;
+}
+#endif  // USE_NEON
+// Double-rounding MultiplyByQuantizedMultiplier
+#else
 inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp(
     int32_t x, int32_t quantized_multiplier, int left_shift) {
   using gemmlowp::RoundingDivideByPOT;
@@ -224,7 +321,8 @@ inline int32x4x4_t MultiplyByQuantizedMultiplier4Rows(
 
   return result;
 }
-#endif
+#endif  // USE_NEON
+#endif  // TFLITE_SINGLE_ROUNDING
 
 template <typename T>
 int CountLeadingZeros(T integer_input) {
@@ -279,81 +377,216 @@ inline Integer FloorLog2(Integer n) {
   }
 }
 
-// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
-// softmax
-// func - the function to build the LUT for (e.g exp(x))
-// min,max - table limits
-// table - pointer to buffer
-// num - number of elements in the LUT
-inline void gen_lut(double (*func)(double), double min, double max,
-                    int16_t* table, const int num) {
-  // size of table should equal to num + 1
-  // last element only for slope calculation
-  double step = (max - min) / (num - 1);
-  double half_step = step / 2.0;
-  for (int i = 0; i < num - 1; i++) {
-    double sample_val = TfLiteRound(func(min + i * step) * 32768.0);
-    double midpoint_interp_val =
-        TfLiteRound((func(min + (i + 1) * step) * 32768.0 +
-                     TfLiteRound(func(min + i * step) * 32768.0)) /
-                    2.0);
-    double midpoint_val =
-        TfLiteRound(func(min + i * step + half_step) * 32768.0);
-    double midpoint_err = midpoint_interp_val - midpoint_val;
-    double bias = TfLiteRound(midpoint_err / 2.0);
-    table[i] = std::min<double>(std::max<double>(sample_val - bias, -32768.0),
-                                32767.0);
+namespace detail {
+
+// LUTPopulate takes an optional type-erased transform_params to allow passing
+// extra parameters to the transform function pointer. const void* is used
+// instead of std::function to be compatible with TFLite Micro
+template <typename FloatT, typename Func>
+inline typename std::enable_if<std::is_same<Func, FloatT (*)(FloatT)>::value,
+                               FloatT>::type
+LUTTransform(Func transform, const void* /*transform_params*/, FloatT value) {
+  static_assert(std::is_floating_point<FloatT>::value,
+                "FloatT must be a floating-point type.");
+  return transform(value);
+}
+
+template <typename FloatT, typename Func>
+inline typename std::enable_if<
+    std::is_same<Func, FloatT (*)(FloatT, const void*)>::value, FloatT>::type
+LUTTransform(Func transform, const void* transform_params, FloatT value) {
+  static_assert(std::is_floating_point<FloatT>::value,
+                "FloatT must be a floating-point type.");
+  return transform(value, transform_params);
+}
+
+// Use the same LUT generation code for both uint8_t and int8_t. Int8_t indexes
+// will be directly casted to uint8_t, the int8 LUT will thus be ordered as [0,
+// 1, ..., 127, -128, ..., -2, -1] instead of [-128, -127, ..., -1, 0, 1, ...,
+// 126, 127].
+template <typename T, typename Func>
+inline void LUTPopulateInt8(float input_scale, int32_t input_zero_point,
+                            float output_scale, int32_t output_zero_point,
+                            Func transform, const void* transform_params,
+                            T* lut) {
+  static_assert(
+      std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value,
+      "T must be an uint8 or int8 type.");
+  uint8_t* lut_uint8 = reinterpret_cast<uint8_t*>(lut);
+  const float inverse_scale = 1 / output_scale;
+  int32_t maxval = std::numeric_limits<T>::max();
+  int32_t minval = std::numeric_limits<T>::min();
+  for (int32_t val = minval; val <= maxval; ++val) {
+    const float dequantized = input_scale * (val - input_zero_point);
+    const float transformed =
+        LUTTransform(transform, transform_params, dequantized);
+    const float rescaled = TfLiteRound(transformed * inverse_scale);
+    const int32_t quantized =
+        static_cast<int32_t>(rescaled + output_zero_point);
+    lut_uint8[static_cast<uint8_t>(static_cast<T>(val))] = static_cast<uint8_t>(
+        static_cast<T>(std::max(std::min(maxval, quantized), minval)));
   }
-  table[num - 1] = std::min<double>(
-      std::max<double>(TfLiteRound(func(max) * 32768.0), -32768.0), 32767.0);
 }
 
-// generate INT16 LUT for function(), e.g., table exp(x) and 1/(1+x) used in
-// softmax
-// func - the function to build the LUT for (e.g exp(x))
-// min,max - table limits
-// table - pointer to buffer
-// num - number of elements in the LUT
-inline void gen_lut(float (*func)(float), float min, float max, int16_t* table,
-                    const int num) {
-  // size of table should equal to num + 1
-  // last element only for slope calculation
-  float step = (max - min) / (num - 1);
-  float half_step = step / 2.0f;
-  for (int i = 0; i < num - 1; i++) {
-    float sample_val = TfLiteRound(func(min + i * step) * 32768.0f);
-    float midpoint_interp_val =
-        TfLiteRound((func(min + (i + 1) * step) * 32768.0f +
-                     TfLiteRound(func(min + i * step) * 32768.0f)) /
-                    2.0f);
-    float midpoint_val =
-        TfLiteRound(func(min + i * step + half_step) * 32768.0f);
-    float midpoint_err = midpoint_interp_val - midpoint_val;
-    float bias = TfLiteRound(midpoint_err / 2.0f);
-    table[i] = std::min<float>(std::max<float>(sample_val - bias, -32768.0f),
-                               32767.0f);
+// Keep floating-point type configurable for backward compatibility. float
+// should be used for FloatT by default.
+template <typename FloatT, typename Func>
+inline void LUTPopulateInt16(FloatT input_scale, int32_t input_zero_point,
+                             FloatT output_scale, int32_t output_zero_point,
+                             Func transform, const void* transform_params,
+                             int16_t* lut) {
+  static_assert(std::is_floating_point<FloatT>::value,
+                "FloatT must be a floating-point type.");
+  const FloatT input_min =
+      input_scale * (std::numeric_limits<int16_t>::min() - input_zero_point);
+  const FloatT input_max =
+      input_scale * (std::numeric_limits<int16_t>::max() - input_zero_point);
+  const FloatT output_min =
+      output_scale * (std::numeric_limits<int16_t>::min() - output_zero_point);
+  const FloatT output_max =
+      output_scale * (std::numeric_limits<int16_t>::max() - output_zero_point);
+
+  const int nb_steps = 512;
+  const FloatT step = (input_max - input_min) / nb_steps;
+  const FloatT half_step = step / 2;
+  const FloatT output_scaling_inv =
+      static_cast<FloatT>(std::numeric_limits<int16_t>::max() -
+                          std::numeric_limits<int16_t>::min() + 1) /
+      (output_max - output_min);
+  const FloatT table_min =
+      static_cast<FloatT>(std::numeric_limits<int16_t>::min());
+  const FloatT table_max =
+      static_cast<FloatT>(std::numeric_limits<int16_t>::max());
+
+  for (int i = 0; i < nb_steps; i++) {
+    const FloatT val =
+        LUTTransform<FloatT>(transform, transform_params, input_min + i * step);
+    const FloatT val_midpoint = LUTTransform<FloatT>(
+        transform, transform_params, input_min + i * step + half_step);
+    const FloatT val_next = LUTTransform<FloatT>(transform, transform_params,
+                                                 input_min + (i + 1) * step);
+
+    const FloatT sample_val = TfLiteRound(val * output_scaling_inv);
+    const FloatT midpoint_interp_val =
+        TfLiteRound((val_next * output_scaling_inv +
+                     TfLiteRound(val * output_scaling_inv)) /
+                    2);
+    const FloatT midpoint_val = TfLiteRound(val_midpoint * output_scaling_inv);
+    const FloatT midpoint_err = midpoint_interp_val - midpoint_val;
+    const FloatT bias = TfLiteRound(midpoint_err / 2);
+
+    lut[i] = static_cast<int16_t>(std::min<FloatT>(
+        std::max<FloatT>(sample_val - bias, table_min), table_max));
   }
-  table[num - 1] = std::min<float>(
-      std::max<float>(TfLiteRound(func(max) * 32768.0f), -32768.0f), 32767.0f);
+
+  lut[nb_steps] = static_cast<int16_t>(std::min<FloatT>(
+      std::max<FloatT>(TfLiteRound(LUTTransform<FloatT>(
+                                       transform, transform_params, input_max) *
+                                   output_scaling_inv),
+                       table_min),
+      table_max));
 }
 
-// int16_t func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax
-inline int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) {
-  // 512 base value, lut[513] only for calculate slope
-  uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
+}  // namespace detail
+
+template <typename T>
+inline typename std::enable_if<std::is_same<T, uint8_t>::value ||
+                                   std::is_same<T, int8_t>::value,
+                               void>::type
+LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale,
+            int32_t output_zero_point, float (*transform)(float), T* lut) {
+  detail::LUTPopulateInt8(input_scale, input_zero_point, output_scale,
+                          output_zero_point, transform, nullptr, lut);
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_same<T, uint8_t>::value ||
+                                   std::is_same<T, int8_t>::value,
+                               void>::type
+LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale,
+            int32_t output_zero_point, float (*transform)(float, const void*),
+            const void* transform_params, T* lut) {
+  detail::LUTPopulateInt8(input_scale, input_zero_point, output_scale,
+                          output_zero_point, transform, transform_params, lut);
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_same<T, int16_t>::value, void>::type
+LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale,
+            int32_t output_zero_point, float (*transform)(float), T* lut) {
+  detail::LUTPopulateInt16<float>(input_scale, input_zero_point, output_scale,
+                                  output_zero_point, transform, nullptr, lut);
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_same<T, int16_t>::value, void>::type
+LUTPopulate(float input_scale, int32_t input_zero_point, float output_scale,
+            int32_t output_zero_point, float (*transform)(float, const void*),
+            const void* transform_params, T* lut) {
+  detail::LUTPopulateInt16<float>(input_scale, input_zero_point, output_scale,
+                                  output_zero_point, transform,
+                                  transform_params, lut);
+}
+
+// Deprecated, avoid usage and prefer the float version. Kept for
+// backward-compatiblity.
+template <typename T>
+inline typename std::enable_if<std::is_same<T, int16_t>::value, void>::type
+LUTPopulate(double input_scale, int32_t input_zero_point, double output_scale,
+            int32_t output_zero_point, double (*transform)(double), T* lut) {
+  detail::LUTPopulateInt16<double>(input_scale, input_zero_point, output_scale,
+                                   output_zero_point, transform, nullptr, lut);
+}
+
+// The size of the LUT depends on the type of input. For uint8 and int8 inputs a
+// simple 256 entries LUT is used. For int16 inputs the high 9 bits are used for
+// indexing and the 7 remaining bits are used for interpolation. We thus use a
+// 513-entries LUT for int16 cases, 512 for the 9-bit indexing and 1 extra entry
+// to interpolate the last value.
+template <typename T>
+constexpr int LUTSize() {
+  static_assert(std::is_same<T, uint8_t>::value ||
+                    std::is_same<T, int8_t>::value ||
+                    std::is_same<T, int16_t>::value,
+                "Only LUTs with uint8, int8 or int16 inputs are supported.");
+  // As per c++11: constexpr methods cannot have more than one return statement.
+  return (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
+             ? 256
+             : 513;
+}
+
+// int16_t -> int16_t table lookup with interpolation
+// LUT must have 513 values
+inline int16_t LUTLookup(int16_t value, const int16_t* lut) {
+  // 512 base values, lut[513] is only used to calculate the slope
+  const uint16_t index = static_cast<uint16_t>(256 + (value >> 7));
   assert(index < 512 && "LUT index out of range.");
-  int16_t offset = value & 0x7f;
+  const int16_t offset = value & 0x7f;
 
-  // base and slope are Q0.15
-  int16_t base = lut[index];
-  int16_t slope = lut[index + 1] - lut[index];
+  // Base and slope are Q0.x
+  const int16_t base = lut[index];
+  const int16_t slope = lut[index + 1] - lut[index];
 
-  // Q0.15 * Q0.7 = Q0.22
-  // Round and convert from Q0.22 to Q0.15
-  int32_t delta = (static_cast<int32_t>(slope) * offset + 64) >> 7;
+  // Q0.x * Q0.7 = Q0.(x + 7)
+  // Round and convert from Q0.(x + 7) to Q0.x
+  const int delta = (slope * offset + 64) >> 7;
 
   // Q0.15 + Q0.15
-  return base + delta;
+  return static_cast<int16_t>(base + delta);
+}
+
+// int8_t -> int8_t table lookup without interpolation
+// LUT must have 256 values
+// LUTPopulate<int8_t> has ordered the LUT so that indexing it with an
+// int8_t is just done by casting it to an uint8_t.
+inline int8_t LUTLookup(int8_t value, const int8_t* lut) {
+  return lut[static_cast<uint8_t>(value)];
+}
+
+// uint8_t -> uint8_t table lookup without interpolation
+// LUT must have 256 values
+inline uint8_t LUTLookup(uint8_t value, const uint8_t* lut) {
+  return lut[value];
 }
 
 // Table of sigmoid(i/24) at 0.16 format - 256 elements.
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h
index 71922f3..ede9cd6 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h
@@ -86,6 +86,16 @@ using int32 = std::int32_t;
 using uint32 = std::uint32_t;
 #endif  // !defined(TF_LITE_STATIC_MEMORY)
 
+// Allow for cross-compiler usage of function signatures - currently used for
+// specifying named RUY profiler regions in templated methods.
+#if defined(_MSC_VER)
+#define TFLITE_PRETTY_FUNCTION __FUNCSIG__
+#elif defined(__GNUC__)
+#define TFLITE_PRETTY_FUNCTION __PRETTY_FUNCTION__
+#else
+#define TFLITE_PRETTY_FUNCTION __func__
+#endif
+
 // TFLITE_DEPRECATED()
 //
 // Duplicated from absl/base/macros.h to avoid pulling in that library.
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h
index 5a32774..c97cc31 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h
@@ -19,9 +19,8 @@ limitations under the License.
 
 namespace tflite {
 
-#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) ||                           \
-    (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || \
-    defined(__ZEPHYR__)
+#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
+    (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(__ZEPHYR__)
 #define TF_LITE_GLOBAL_STD_PREFIX
 #else
 #define TF_LITE_GLOBAL_STD_PREFIX std
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/optimized/neon_check.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/optimized/neon_check.h
index bbf745c..7df1129 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/optimized/neon_check.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/optimized/neon_check.h
@@ -15,26 +15,6 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
 
-#if defined(__ARM_NEON__) || defined(__ARM_NEON)
-#define USE_NEON
-#include <arm_neon.h>
-#endif
-
-#if defined __GNUC__ && defined __SSE4_1__ && !defined TF_LITE_DISABLE_X86_NEON
-#define USE_NEON
-#include "NEON_2_SSE.h"
-#endif
-
-// NEON_OR_PORTABLE(SomeFunc, args) calls NeonSomeFunc(args) if USE_NEON is
-// defined, PortableSomeFunc(args) otherwise.
-#ifdef USE_NEON
-// Always use Neon code
-#define NEON_OR_PORTABLE(funcname, ...) Neon##funcname(__VA_ARGS__)
-
-#else
-// No NEON available: Use Portable code
-#define NEON_OR_PORTABLE(funcname, ...) Portable##funcname(__VA_ARGS__)
-
-#endif  // defined(USE_NEON)
+// TFLM does not need to utilize any Neon optimizations.
 
 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_NEON_CHECK_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h
index 18cd633..a03e502 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h
@@ -17,7 +17,7 @@ limitations under the License.
 
 #include <vector>
 
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.cc b/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.cc
new file mode 100644
index 0000000..ec7ad76
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.cc
@@ -0,0 +1,86 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_UTILS_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_UTILS_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h"
+
+#if defined(_MSC_VER)
+#define __restrict__ __restrict
+#endif
+
+namespace tflite {
+
+// Not all backends support CpuBackendContext usage, so forward declare to avoid
+// pulling in its implementation. Use of CpuBackendContext in method
+// implementations is purely optional.
+class CpuBackendContext;
+
+namespace tensor_utils {
+
+// Apply Rectified Linear to elements of a vector.
+void ApplyReluToVector(const float* __restrict__ vector, int v_size,
+                       float* __restrict__ result) {
+  for (int v = 0; v < v_size; v++) {
+    result[v] = std::max(0.0f, vector[v]);
+  }
+}
+
+// Apply Rectified Linear 1 (cap to [-1;1]) to elements of a vector
+void ApplyRelu1ToVector(const float* __restrict__ vector, int v_size,
+                        float* __restrict__ result) {
+  for (int v = 0; v < v_size; v++) {
+    result[v] = std::max(-1.0f, std::min(vector[v], 1.0f));
+  }
+}
+
+// Apply Rectified Linear 6 (cap to [0;6]) to elements of a vector
+void ApplyRelu6ToVector(const float* __restrict__ vector, int v_size,
+                        float* __restrict__ result) {
+  for (int v = 0; v < v_size; v++) {
+    result[v] = std::max(0.0f, std::min(vector[v], 6.0f));
+  }
+}
+
+// Apply signbit to elements of a vector
+void ApplySignbitToVector(const float* __restrict__ vector, int v_size,
+                          float* __restrict__ result) {
+  for (int v = 0; v < v_size; v++) {
+    result[v] = std::signbit(vector[v]);
+  }
+}
+
+void UnpackDenseInt4IntoInt8(const int8_t* src_buffer, int num_elements,
+                             int8_t* dst_buffer) {
+  for (int i = 0; i < num_elements; i += 2) {
+    // Shift left first so that sign is properly extended when shifted right
+    dst_buffer[i] = static_cast<int8_t>(src_buffer[i / 2] << 4) >> 4;
+    // Break early if the tensor has odd length and the higher nibble should be
+    // ignored.
+    if (i + 1 == num_elements) break;
+    dst_buffer[i + 1] = static_cast<int8_t>(src_buffer[i / 2]) >> 4;
+  }
+}
+
+}  // namespace tensor_utils
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_UTILS_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_utils_common.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h
similarity index 71%
rename from edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_utils_common.h
rename to edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h
index 3fbaafe..5674e2e 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_utils_common.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h
@@ -12,20 +12,115 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_UTILS_COMMON_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_UTILS_COMMON_H_
+
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
 
 #include <algorithm>
+#include <cmath>
 #include <cstdint>
 
+#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
+
 #if defined(_MSC_VER)
 #define __restrict__ __restrict
 #endif
 
 namespace tflite {
 
+// Not all backends support CpuBackendContext usage, so forward declare to avoid
+// pulling in its implementation. Use of CpuBackendContext in method
+// implementations is purely optional.
+class CpuBackendContext;
+
 namespace tensor_utils {
 
+// Multiplies a matrix with a scalar and reduce the result on each row to a
+// scalar.
+// Parameters:
+//     - matrix: matrix of size n_row * n_col
+//     - scalar: the scalar that is multiplied to each element in the matrix
+//     - n_row:  the row count of the matrix
+//     - n_col:  the column count of the matrix
+//     - output: the 32bit output
+// Note: We do not need saturation because the int8 * int8 is safe from overflow
+// in (2^31-1) / (2^14) = 131072, which is bigger than the n_row. Non-zero
+// initial output value is not exceptionally large.
+void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
+                                    int32_t n_row, int32_t n_col,
+                                    int32_t* output);
+
+// Add another vector for each batch in the batch vector.
+template <typename T>
+void VectorBatchVectorAdd(const T* vector, int v_size, int n_batch,
+                          T* batch_vector) {
+  for (int b = 0; b < n_batch; b++) {
+    for (int i = 0; i < v_size; ++i) {
+      batch_vector[i] += vector[i];
+    }
+    batch_vector += v_size;
+  }
+}
+
+// Cwise product of two vectors.
+template <typename T>
+inline void VectorVectorCwiseProduct(const T* vector1, const T* vector2,
+                                     int v_size, T* result) {
+  for (int v = 0; v < v_size; v++) {
+    *result++ = *vector1++ * *vector2++;
+  }
+}
+
+// Cwise product of a vector and a batch-vector.
+template <typename T>
+inline void VectorBatchVectorCwiseProduct(const T* vector, int v_size,
+                                          const T* batch_vector, int n_batch,
+                                          T* result) {
+  for (int b = 0; b < n_batch; b++) {
+    VectorVectorCwiseProduct(vector, batch_vector, v_size, result);
+    // Update the pointers.
+    result += v_size;
+    batch_vector += v_size;
+  }
+}
+
+// Cwise product and accumulate of two vectors. Since it's a MAC operation, the
+// assumption here is that result array is initialized to valid values.
+template <typename T>
+inline void VectorVectorCwiseProductAccumulate(const T* __restrict__ vector1,
+                                               const T* __restrict__ vector2,
+                                               int v_size,
+                                               T* __restrict__ result) {
+  for (int v = 0; v < v_size; v++) {
+    *result++ += *vector1++ * *vector2++;
+  }
+}
+
+// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
+// operation, the assumption here is that result array is initialized to valid
+// values.
+template <typename T>
+inline void VectorBatchVectorCwiseProductAccumulate(const T* vector, int v_size,
+                                                    const T* batch_vector,
+                                                    int n_batch, T* result) {
+  for (int b = 0; b < n_batch; b++) {
+    VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result);
+    // Update the pointers.
+    result += v_size;
+    batch_vector += v_size;
+  }
+}
+
+// Batch vector initialization with another vector.
+template <typename T>
+void VectorBatchVectorAssign(const T* vector, int v_size, int n_batch,
+                             T* batch_vector) {
+  for (int b = 0; b < n_batch; b++) {
+    std::copy_n(vector, v_size, batch_vector + b * v_size);
+  }
+}
+
 // Checks if all entries of vector are zero for float.
 bool IsZeroVector(const float* vector, int v_size);
 
@@ -136,6 +231,20 @@ void MatrixBatchVectorMultiplyAccumulate(
     float* __restrict__ result, const float* __restrict__ per_channel_scale,
     const int32_t* __restrict__ input_offset);
 
+// Same as the function above, but the matrix is a sparse tensor with block
+// pattern 1x16.
+// This function assumes that m_cols is a multiple of the block size (16 in this
+// case) so that there's no incomplete block. Also, it assumes all offsets of
+// input, output and filter are zero.
+void SparseMatrixBatchVectorMultiplyAccumulate1x16(
+    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
+    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
+    const int32_t output_shift, const int32_t output_offset,
+    const int32_t output_activation_min, const int32_t output_activation_max,
+    int8_t* __restrict__ result);
+
 // Same as the function above, but the matrix is stored in block compressed
 // sparse row format with block pattern 1x16 which consists of two arrays:
 //   1. A matrix array stores non-zero blocks of the matrix in row major.
@@ -173,21 +282,6 @@ void MatrixBatchVectorMultiply(const int16_t* hidden,
                                int32_t n_hidden, int32_t n_output,
                                int32_t output_zp, int8_t* proj_output);
 
-// Multiplies a matrix with a scalar and reduce the result on each row to a
-// scalar.
-// Parameters:
-//     - matrix: matrix of size n_row * n_col
-//     - scalar: the scalar that is multiplied to each element in the matrix
-//     - n_row:  the row count of the matrix
-//     - n_col:  the column count of the matrix
-//     - output: the 32bit output
-// Note: We do not need saturation because the int8 * int8 is safe from overflow
-// in (2^31-1) / (2^14) = 131072, which is bigger than the n_row. Non-zero
-// initial output value is not exceptionally large.
-void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
-                                    int32_t n_row, int32_t n_col,
-                                    int32_t* output);
-
 // Apply Layer Normalization (https://arxiv.org/abs/1607.06450) to a Quantized
 // vector.
 // Parameters:
@@ -235,7 +329,7 @@ void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
 //     - n_input: the size for input and output.
 //     - output:  the 16 bit output
 // The input is in Qm.15-m format and the output is in Q0.15 format.
-void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
+void ApplyTanh(int32_t intger_bits, const int16_t* input, int32_t n_batch,
                int32_t n_input, int16_t* output);
 
 // Apply Tanh to a quantized vector. Tbe internal calculation is in float.
@@ -307,28 +401,6 @@ void CwiseClipping(int16_t* vector, const int v_size,
 void CwiseClipping(int8_t* vector, const int v_size,
                    const int8_t clipping_value);
 
-// Cwise product of two vectors.
-template <typename T>
-inline void VectorVectorCwiseProduct(const T* __restrict__ vector1,
-                                     const T* __restrict__ vector2, int v_size,
-                                     T* __restrict__ result) {
-  for (int v = 0; v < v_size; v++) {
-    *result++ = *vector1++ * *vector2++;
-  }
-}
-
-// Cwise product and accumulate of two vectors. Since it's a MAC operation, the
-// assumption here is that result array is initialized to valid values.
-template <typename T>
-inline void VectorVectorCwiseProductAccumulate(const T* __restrict__ vector1,
-                                               const T* __restrict__ vector2,
-                                               int v_size,
-                                               T* __restrict__ result) {
-  for (int v = 0; v < v_size; v++) {
-    *result++ += *vector1++ * *vector2++;
-  }
-}
-
 // Dot product of two vectors.
 float VectorVectorDotProduct(const float* vector1, const float* vector2,
                              int v_size);
@@ -363,61 +435,12 @@ void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
                                       const int16_t* vector2, int v_size,
                                       int n_batch, int32_t* result);
 
-// Cwise product of a vector and a batch-vector.
-template <typename T>
-inline void VectorBatchVectorCwiseProduct(const T* vector, int v_size,
-                                          const T* batch_vector, int n_batch,
-                                          T* result) {
-  for (int b = 0; b < n_batch; b++) {
-    VectorVectorCwiseProduct(vector, batch_vector, v_size, result);
-    // Update the pointers.
-    result += v_size;
-    batch_vector += v_size;
-  }
-}
-
-// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
-// operation, the assumption here is that result array is initialized to valid
-// values.
-template <typename T>
-inline void VectorBatchVectorCwiseProductAccumulate(const T* vector, int v_size,
-                                                    const T* batch_vector,
-                                                    int n_batch, T* result) {
-  for (int b = 0; b < n_batch; b++) {
-    VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result);
-    // Update the pointers.
-    result += v_size;
-    batch_vector += v_size;
-  }
-}
-
 // Same as above, but inputs are 16bit integer and output is 16bit integer.
 void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
                                              const int16_t* batch_vector,
                                              int n_batch, int32_t multiplier,
                                              int shift, int16_t* result);
 
-// Add another vector for each batch in the batch vector.
-template <typename T>
-void VectorBatchVectorAdd(const T* vector, int v_size, int n_batch,
-                          T* batch_vector) {
-  for (int b = 0; b < n_batch; b++) {
-    for (int i = 0; i < v_size; ++i) {
-      batch_vector[i] += vector[i];
-    }
-    batch_vector += v_size;
-  }
-}
-
-// Batch vector initialization with another vector.
-template <typename T>
-void VectorBatchVectorAssign(const T* vector, int v_size, int n_batch,
-                             T* batch_vector) {
-  for (int b = 0; b < n_batch; b++) {
-    std::copy_n(vector, v_size, batch_vector + b * v_size);
-  }
-}
-
 // Compute "1.0f - elements of vector" (used in CIFG).
 void Sub1Vector(const float* vector, int v_size, float* result);
 
@@ -425,10 +448,6 @@ void Sub1Vector(const float* vector, int v_size, float* result);
 // "vector" has range [0, 32767] because it is the output of sigmoid function.
 void Sub1Vector(const int16_t* vector, int v_size, int16_t* result);
 
-// Multiply all elements of vector with a scalar.
-void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
-                          float* result);
-
 // Reduce-sum on a float input vector:
 // input_vector: float pointer to input vector.
 // output_vector: float pointer to vector.
@@ -446,10 +465,13 @@ void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
 void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
                         int output_size, int reduction_size);
 
+// Multiply all elements of vector with a scalar.
+void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
+                          float* result);
+
 // Layer norm for each batch.
-void MeanStddevNormalization(const float* __restrict__ input_vector,
-                             float* __restrict__ output_vector, int v_size,
-                             int n_batch);
+void MeanStddevNormalization(const float* input_vector, float* output_vector,
+                             int v_size, int n_batch);
 
 // Saturate Add with rescale on both inputs.
 void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
@@ -460,7 +482,142 @@ void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
                           int32_t recurrent_effective_scale_b, int32_t n_batch,
                           int32_t n_cell, int16_t* output);
 
+// Same as the function above, but provide a scratch buffer for the
+// int8 x int8 -> int32 and a CpuBackendContext for the accumulator
+// computation.
+void MatrixBatchVectorMultiplyAccumulate(
+    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
+    const int8_t* __restrict__ vectors,
+    const float* __restrict__ scaling_factors, int n_batch,
+    int32_t* __restrict__ scratch, float* __restrict__ result,
+    CpuBackendContext* __restrict__ context);
+
+// Same as the function above except that can make use of cached row sums.
+void MatrixBatchVectorMultiplyAccumulate(
+    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
+    const int8_t* __restrict__ vectors, const float* scaling_factors,
+    int n_batch, float* __restrict__ result, const float* per_channel_scale,
+    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
+    bool* compute_row_sums, CpuBackendContext* context);
+
+// Same as the function above, but provides separate scaling factor for the
+// matrix and the vectors. The scaling factors are multiplied in the
+// scaling_factor_scratch buffer.
+inline void MatrixBatchVectorMultiplyAccumulate(
+    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
+    const int8_t* __restrict__ vectors, const float matrix_scaling_factor,
+    const float* vector_scaling_factors, int n_batch,
+    float* __restrict__ result, const float* per_channel_scale,
+    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
+    bool* compute_row_sums, float* scaling_factor_scratch,
+    CpuBackendContext* context) {
+  for (int b = 0; b < n_batch; ++b) {
+    scaling_factor_scratch[b] =
+        vector_scaling_factors[b] * matrix_scaling_factor;
+  }
+  MatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vectors,
+                                      scaling_factor_scratch, n_batch, result,
+                                      per_channel_scale, input_offset, scratch,
+                                      row_sums, compute_row_sums, context);
+}
+
+// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch
+// dimension composed by input vectors independent from each other). The result
+// of the multiplication is accumulated to the passed result buffer.
+// More specifically, for a matrix M of shape [n, i] and a batched-vector
+// of shape [i, batch] it will first compute the product of shape [n, batch].
+// This product will be accumulated to the result buffer,
+// Parameters:
+//     - input: batch vector of size n_batch * n_input
+//     - bias:  vector of size b_input
+//     - input_to_gate_weights: matrix of size n_input * n_output
+//     - multiplier: scalar
+//     - shift: scalar
+//     - n_batch: the batch size
+//     - n_input: the input size
+//     - n_output: the output size
+//     - output_zp: the zero point of the output.
+//     - scratch: batch vector of size n_batch * n_output
+//     - output: the 16 bit output
+// Notes:
+//     - this is used for gate matmul: for non-cifg it is for input, forget,
+//       cell, output gates; for cifg, it is for forget, cell, output gates.
+//     - multiplier and shift combined gives the scale.
+//     - assumes input zero point is 0.
+//     - scratch is created for optimization purpose only.
+// TODO(b/152066492): this can be removed if some future optimization
+// work makes it unnecessary.
+void MatrixBatchVectorMultiplyAccumulate(
+    const int8_t* input, const int32_t* bias,
+    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
+    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
+    int32_t* scratch, int16_t* output, CpuBackendContext* context);
+
+// Multiplies a matrix by a "batched" vector (i.e. a matrix with a batch
+// dimension composed by input vectors independent from each other). The result
+// of the multiplication is accumulated to the passed result buffer.
+// More specifically, for a matrix M of shape [n, i] and a batched-vector
+// of shape [i, batch] it will first compute the product of shape [n, batch].
+// This product will be accumulated to the result buffer,
+// Parameters:
+//     - input: batch vector of size n_batch * n_input
+//     - bias:  vector of size b_input
+//     - input_to_gate_weights: matrix of size n_input * n_output
+//     - multiplier: scalar
+//     - shift: scalar
+//     - n_batch: the batch size
+//     - n_input: the input size
+//     - n_output: the output size
+//     - output_zp: the zero point of the output.
+//     - scratch: batch vector of size n_batch * n_output
+//     - output: the 8 bit output
+// Notes:
+//     - this is used for projection matmul.
+//     - multiplier and shift combined gives the scale.
+//     - assumes input zero point is 0.
+//     - scratch is created for optimization purpose only.
+// TODO(b/152066492): this can be removed if some future optimization
+// work makes it unnecessary.
+void MatrixBatchVectorMultiplyAccumulate(
+    const int8_t* input, const int32_t* bias,
+    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
+    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
+    int32_t* scratch, int8_t* output, CpuBackendContext* context);
+
+// Apply Rectified Linear to elements of a vector.
+void ApplyReluToVector(const float* __restrict__ vector, int v_size,
+                       float* __restrict__ result);
+
+// Apply Rectified Linear 1 (cap to [-1;1]) to elements of a vector
+void ApplyRelu1ToVector(const float* __restrict__ vector, int v_size,
+                        float* __restrict__ result);
+
+// Apply Rectified Linear 6 (cap to [0;6]) to elements of a vector
+void ApplyRelu6ToVector(const float* __restrict__ vector, int v_size,
+                        float* __restrict__ result);
+
+// Apply signbit to elements of a vector
+void ApplySignbitToVector(const float* __restrict__ vector, int v_size,
+                          float* __restrict__ result);
+
+// Unpack or inflate `src_buffer` by taking each element and splitting it as
+// two elements into `dst_buffer`.
+// Parameters:
+//   src_buffer   : Densely packed buffer containing int4 values
+//   num_elements : Number of elements stored in the buffer. Note that this can
+//                  be smaller than the size of `src_buffer` by 1 if it's odd,
+//                  in which case the last nibble in `src_buffer` is ignored.
+//                  This should be equal to the size of `dst_buffer`.
+//   dst_buffer   : Buffer to unpack into. Should be allocated by the caller.
+//                  Size should be at least `num_elements`.
+// Notes:
+//   For example, given `src_buffer = {0x12, 0x34};`, calling this function
+//   will return `dst_buffer = {0x02, 0x01, 0x04, 0x03}`.
+void UnpackDenseInt4IntoInt8(const int8_t* src_buffer, int num_elements,
+                             int8_t* dst_buffer);
+
 }  // namespace tensor_utils
+
 }  // namespace tflite
 
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_UTILS_COMMON_H_
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_PORTABLE_TENSOR_UTILS_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.cc b/edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.cc
index 88285f4..efd57db 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.cc
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.cc
@@ -52,6 +52,11 @@ constexpr uint32_t kFractionRoundingThreshold = 0x00200000;
 
 void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
                         int* shift) {
+#if TFLITE_SINGLE_ROUNDING
+  // Single-rounding MultiplyByQuantizedMultiplier only supports positive
+  // multipliers.
+  // TFLITE_DCHECK(double_multiplier >= 0);
+#endif
   if (double_multiplier == 0.) {
     *quantized_multiplier = 0;
     *shift = 0;
@@ -65,10 +70,10 @@ void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
   int64_t q_fixed = IntegerFrExp(double_multiplier, shift);
 #else   // TFLITE_EMULATE_FLOAT
   const double q = std::frexp(double_multiplier, shift);
-  auto q_fixed = static_cast<int64_t>(TfLiteRound(q * (1ll << 31)));
+  auto q_fixed = static_cast<int64_t>(TfLiteRound(q * (1LL << 31)));
 #endif  // TFLITE_EMULATE_FLOAT
-  TFLITE_CHECK(q_fixed <= (1ll << 31));
-  if (q_fixed == (1ll << 31)) {
+  TFLITE_CHECK(q_fixed <= (1LL << 31));
+  if (q_fixed == (1LL << 31)) {
     q_fixed /= 2;
     ++*shift;
   }
@@ -87,6 +92,14 @@ void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
     *shift = 0;
     q_fixed = 0;
   }
+#if TFLITE_SINGLE_ROUNDING
+  // Single-rounding MultiplyByQuantizedMultiplier doesn't support a shift > 30,
+  // saturate it.
+  if (*shift > 30) {
+    *shift = 30;
+    q_fixed = (1LL << 31) - 1;
+  }
+#endif
   *quantized_multiplier = static_cast<int32_t>(q_fixed);
 }
 
@@ -278,6 +291,12 @@ void PreprocessSoftmaxScaling(double beta, double input_scale,
   // result is double equivalent of Q0.31 (actually with more precision). Thus
   // this generates a Q(input_integer_bits).(31-input_integer_bits)
   // representation.
+#if TFLITE_SINGLE_ROUNDING
+  const double max_real_multiplier = (1LL << 30) - 1.0;
+#else
+  const double max_real_multiplier = (1LL << 31) - 1.0;
+#endif
+
 #ifdef TFLITE_EMULATE_FLOAT
   const double input_beta = IntegerDoubleMultiply(beta, input_scale);
   int shift;
@@ -285,12 +304,14 @@ void PreprocessSoftmaxScaling(double beta, double input_scale,
   shift += (31 - input_integer_bits);
   double input_beta_real_multiplier =
       DoubleFromFractionAndShift(fraction, shift);
-  if (IntegerDoubleCompare(input_beta_real_multiplier, (1ll << 31) - 1.0) > 0) {
-    input_beta_real_multiplier = (1ll << 31) - 1.0;
+  if (IntegerDoubleCompare(input_beta_real_multiplier, max_real_multiplier) >
+      0) {
+    input_beta_real_multiplier = max_real_multiplier;
   }
 #else   // TFLITE_EMULATE_FLOAT
-  const double input_beta_real_multiplier = std::min<double>(
-      beta * input_scale * (1 << (31 - input_integer_bits)), (1ll << 31) - 1.0);
+  const double input_beta_real_multiplier =
+      std::min<double>(beta * input_scale * (1 << (31 - input_integer_bits)),
+                       max_real_multiplier);
 #endif  // TFLITE_EMULATE_FLOAT
 
   QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier,
@@ -324,8 +345,8 @@ int CalculateInputRadius(int input_integer_bits, int input_left_shift,
 #else   // TFLITE_EMULATE_FLOAT
   const double max_input_rescaled =
       1.0 * ((1 << input_integer_bits) - 1) *
-      (1ll << (total_signed_bits - input_integer_bits)) /
-      (1ll << input_left_shift);
+      (1LL << (total_signed_bits - input_integer_bits)) /
+      (1LL << input_left_shift);
   // Tighten bound using floor.  Suppose that we could use the exact value.
   // After scaling the difference, the result would be at the maximum.  Thus we
   // must ensure that our value has lower magnitude.
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h
index ef664be..ada6696 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h
@@ -15,6 +15,9 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_H_
 
+#include <algorithm>
+#include <type_traits>
+
 #include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 
@@ -27,25 +30,14 @@ inline void Add(const ArithmeticParams& params,
                 const RuntimeShape& input1_shape, const T* input1_data,
                 const RuntimeShape& input2_shape, const T* input2_data,
                 const RuntimeShape& output_shape, T* output_data) {
-  const int flat_size =
-      MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    output_data[i] = ActivationFunctionWithMinMax(
-        input1_data[i] + input2_data[i], params.quantized_activation_min,
-        params.quantized_activation_max);
-  }
-}
+  T activation_min, activation_max;
+  GetActivationParams(params, &activation_min, &activation_max);
 
-inline void Add(const ArithmeticParams& params,
-                const RuntimeShape& input1_shape, const float* input1_data,
-                const RuntimeShape& input2_shape, const float* input2_data,
-                const RuntimeShape& output_shape, float* output_data) {
   const int flat_size =
       MatchingElementsSize(input1_shape, input2_shape, output_shape);
-  for (int i = 0; i < flat_size; i++) {
-    auto x = input1_data[i] + input2_data[i];
+  for (int i = 0; i < flat_size; ++i) {
     output_data[i] = ActivationFunctionWithMinMax(
-        x, params.float_activation_min, params.float_activation_max);
+        input1_data[i] + input2_data[i], activation_min, activation_max);
   }
 }
 
@@ -202,13 +194,12 @@ inline void Add(const ArithmeticParams& params,
   }
 }
 
-inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
-                               const RuntimeShape& input1_shape,
-                               const float* input1_data,
-                               const RuntimeShape& input2_shape,
-                               const float* input2_data,
-                               const RuntimeShape& output_shape,
-                               float* output_data) {
+template <typename T>
+inline typename std::enable_if<!is_small_integer<T>::value, void>::type
+BroadcastAdd4DSlow(const ArithmeticParams& params,
+                   const RuntimeShape& input1_shape, const T* input1_data,
+                   const RuntimeShape& input2_shape, const T* input2_data,
+                   const RuntimeShape& output_shape, T* output_data) {
   NdArrayDesc<4> desc1;
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
@@ -216,45 +207,8 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
   const RuntimeShape extended_output_shape =
       RuntimeShape::ExtendedShape(4, output_shape);
 
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
-          output_data[Offset(extended_output_shape, b, y, x, c)] =
-              ActivationFunctionWithMinMax(
-                  input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
-                      input2_data[SubscriptToIndex(desc2, b, y, x, c)],
-                  params.float_activation_min, params.float_activation_max);
-        }
-      }
-    }
-  }
-}
-
-inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
-                               const RuntimeShape& input1_shape,
-                               const int32_t* input1_data,
-                               const RuntimeShape& input2_shape,
-                               const int32_t* input2_data,
-                               const RuntimeShape& output_shape,
-                               int32_t* output_data) {
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  const RuntimeShape extended_output_shape =
-      RuntimeShape::ExtendedShape(4, output_shape);
+  T activation_min, activation_max;
+  GetActivationParams(params, &activation_min, &activation_max);
 
   // In Tensorflow, the dimensions are canonically named (batch_number, row,
   // col, channel), with extents (batches, height, width, depth), with the
@@ -272,11 +226,10 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
       for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
         for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
           output_data[Offset(extended_output_shape, b, y, x, c)] =
-              ActivationFunctionWithMinMax(
+              ActivationFunctionWithMinMax<T>(
                   input1_data[SubscriptToIndex(desc1, b, y, x, c)] +
                       input2_data[SubscriptToIndex(desc2, b, y, x, c)],
-                  params.quantized_activation_min,
-                  params.quantized_activation_max);
+                  activation_min, activation_max);
         }
       }
     }
@@ -287,10 +240,11 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
 // is 32-bit for both cases. The overflow does not happen due to the
 // choice of the shift (20 or 15, accordingly - see add.cc for more comments).
 template <typename T>
-inline void BroadcastAdd4DSlow(
-    const ArithmeticParams& params, const RuntimeShape& input1_shape,
-    const T* input1_data, const RuntimeShape& input2_shape,
-    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
+inline typename std::enable_if<is_small_integer<T>::value, void>::type
+BroadcastAdd4DSlow(const ArithmeticParams& params,
+                   const RuntimeShape& input1_shape, const T* input1_data,
+                   const RuntimeShape& input2_shape, const T* input2_data,
+                   const RuntimeShape& output_shape, T* output_data) {
   NdArrayDesc<4> desc1;
   NdArrayDesc<4> desc2;
   NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add_n.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add_n.h
index dde1501..7b5424c 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add_n.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add_n.h
@@ -15,7 +15,10 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ADD_N_H_
 
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+#include <algorithm>
+#include <limits>
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 
 namespace tflite {
 namespace reference_ops {
@@ -36,6 +39,47 @@ inline void AddN(const RuntimeShape& input_shape, const size_t num_inputs,
   }
 }
 
+inline void AddN(const ArithmeticParams& params,
+                 const RuntimeShape& input_shape, const size_t num_inputs,
+                 const int8_t* const* input_data, int8_t* output_data) {
+  TFLITE_DCHECK_LE(params.quantized_activation_min,
+                   params.quantized_activation_max);
+  // Input offset is negative input zero point. Activation tensors are
+  // asymmetric quantized so they span the full int8 range.
+  // All inputs should have same zero-point and scale, this is checked during
+  // Prepare stage.
+  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
+  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
+
+  // All inputs and output should have the same shape, this is checked during
+  // Prepare stage.
+  const size_t size = input_shape.FlatSize();
+  for (size_t i = 0; i < size; ++i) {
+    // accumulate in scaled_x before clamping to avoid overflow
+    const int32_t x = params.input1_offset;  // x = 0
+    const int32_t shifted_x = x * (1 << params.left_shift);
+    int32_t scaled_x = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+        shifted_x, params.input1_multiplier, params.input1_shift);
+
+    for (size_t j = 0; j < num_inputs; ++j) {
+      const int32_t y = params.input1_offset + input_data[j][i];
+      const int32_t shifted_y = y * (1 << params.left_shift);
+      int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+          shifted_y, params.input1_multiplier, params.input1_shift);
+      scaled_x += scaled_y;
+    }
+
+    const int32_t raw_output =
+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
+            scaled_x, params.output_multiplier, params.output_shift) +
+        params.output_offset;
+    const int32_t clamped_output =
+        std::min(params.quantized_activation_max,
+                 std::max(params.quantized_activation_min, raw_output));
+    output_data[i] = static_cast<int8_t>(clamped_output);
+  }
+}
+
 }  // namespace reference_ops
 }  // namespace tflite
 
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/batch_matmul.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/batch_matmul.h
index 0e08a55..3695bad 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/batch_matmul.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/batch_matmul.h
@@ -20,7 +20,7 @@ limitations under the License.
 
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_utils_common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 
 namespace tflite {
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h
index a747931..66101d9 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h
@@ -43,16 +43,27 @@ inline void BroadcastBinaryFunction4DSlow(
   NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
                                       unextended_input2_shape, &desc1, &desc2);
 
+  const int* dims_data =
+      reinterpret_cast<const int*>(output_shape.DimsDataUpTo5D());
   for (int b = 0; b < output_shape.Dims(0); ++b) {
+    int out_idx_b = b * dims_data[1];
+    int in_idx1_b = desc1.strides[0] * b;
+    int in_idx2_b = desc2.strides[0] * b;
     for (int y = 0; y < output_shape.Dims(1); ++y) {
+      int out_idx_y = (out_idx_b + y) * dims_data[2];
+      int in_idx1_y = in_idx1_b + desc1.strides[1] * y;
+      int in_idx2_y = in_idx2_b + desc2.strides[1] * y;
       for (int x = 0; x < output_shape.Dims(2); ++x) {
+        int out_idx_x = (out_idx_y + x) * dims_data[3];
+        int in1_idx = in_idx1_y + desc1.strides[2] * x;
+        int in2_idx = in_idx2_y + desc2.strides[2] * x;
         for (int c = 0; c < output_shape.Dims(3); ++c) {
-          auto out_idx = Offset(output_shape, b, y, x, c);
-          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
-          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+          auto out_idx = out_idx_x + c;
           auto in1_val = input1_data[in1_idx];
           auto in2_val = input2_data[in2_idx];
           output_data[out_idx] = func(in1_val, in2_val);
+          in1_idx += desc1.strides[3];
+          in2_idx += desc2.strides[3];
         }
       }
     }
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_args.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_args.h
new file mode 100644
index 0000000..341c418
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_args.h
@@ -0,0 +1,56 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+namespace reference_ops {
+
+template <typename T>
+void BroadcastArgs(const RuntimeShape& input1_shape, const T* input1_data,
+                   const RuntimeShape& input2_shape, const T* input2_data,
+                   const RuntimeShape& output_shape, T* output_data) {
+  // Gets data at the backward index i of the shape tensor. Returns 1 if the
+  // index is out of range.
+  auto get_shape_data = [](const RuntimeShape& shape, const T* data,
+                           int backward_idx) -> T {
+    int forward_idx = shape.FlatSize() - 1 - backward_idx;
+    if (forward_idx < 0) return 1;
+    return data[forward_idx];
+  };
+
+  int output_num_elements = output_shape.FlatSize();
+  for (int i = 0; i < output_num_elements; ++i) {
+    int backward_i = output_num_elements - 1 - i;
+    int shape1_i = get_shape_data(input1_shape, input1_data, i);
+    int shape2_i = get_shape_data(input2_shape, input2_data, i);
+    if (shape1_i == 1) {
+      output_data[backward_i] = shape2_i;
+    } else if (shape2_i == 1) {
+      output_data[backward_i] = shape1_i;
+    } else {
+      TFLITE_CHECK_EQ(shape1_i, shape2_i);
+      output_data[backward_i] = shape1_i;
+    }
+  }
+}
+
+}  // namespace reference_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_to.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_to.h
new file mode 100644
index 0000000..79756cb
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_to.h
@@ -0,0 +1,97 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace reference_ops {
+template <int N>
+void BroadcastImpl(const NdArrayDesc<N>& input_desc, const char* input_data,
+                   const NdArrayDesc<N>& output_desc, char* output_data,
+                   int indexes[N], int dim, const int last_broadcasting_dim,
+                   const int type_size) {
+  // Copy data from input to output.
+  if (dim == last_broadcasting_dim) {
+    int copy_size = output_desc.strides[dim] * type_size;
+    const char* data_src =
+        input_data + SubscriptToIndex(input_desc, indexes) * type_size;
+    char* data_dst =
+        output_data + SubscriptToIndex(output_desc, indexes) * type_size;
+    for (int i = 0; i < output_desc.extents[dim]; ++i, data_dst += copy_size) {
+      memcpy(data_dst, data_src, copy_size);
+    }
+    return;
+  }
+
+  // Recursive call to find the next broadcasting.
+  for (indexes[dim] = 0; indexes[dim] < input_desc.extents[dim];
+       ++indexes[dim]) {
+    BroadcastImpl<N>(input_desc, input_data, output_desc, output_data, indexes,
+                     dim + 1, last_broadcasting_dim, type_size);
+  }
+
+  // Duplicate data in output tensor.
+  indexes[dim] = 0;
+  if (input_desc.extents[dim] != output_desc.extents[dim]) {
+    int copy_size = output_desc.strides[dim] * type_size;
+    char* data_src =
+        output_data + SubscriptToIndex(output_desc, indexes) * type_size;
+    char* data_dst = data_src + copy_size;
+    for (int i = 1; i < output_desc.extents[dim]; ++i, data_dst += copy_size) {
+      memcpy(data_dst, data_src, copy_size);
+    }
+  }
+}
+
+template <int N>
+inline void BroadcastTo(const RuntimeShape& unextended_input_shape,
+                        const char* input_data,
+                        const RuntimeShape& unextended_output_shape,
+                        char* output_data, TfLiteType data_type) {
+  NdArrayDesc<N> input_desc;
+  NdArrayDesc<N> output_desc;
+  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape),
+                 &input_desc);
+  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
+                 &output_desc);
+
+  // Get the last dimension has broadcasting. At this dimension, the data is
+  // copied from input tensor to output tensor.
+  int last_broadcast_dim = -1;
+  for (int i = N - 1; i >= 0; --i) {
+    if (input_desc.extents[i] != output_desc.extents[i]) {
+      last_broadcast_dim = i;
+      break;
+    }
+  }
+
+  // If non-broadcasting, just copy data from input to output tensor.
+  if (last_broadcast_dim == -1) {
+    memcpy(output_data, input_data,
+           unextended_input_shape.FlatSize() * TfLiteTypeGetSize(data_type));
+    return;
+  }
+
+  // Broadcasting using memcpy.
+  int indexes[N] = {0};
+  BroadcastImpl<N>(input_desc, input_data, output_desc, output_data, indexes, 0,
+                   last_broadcast_dim, TfLiteTypeGetSize(data_type));
+}
+}  // namespace reference_ops
+}  // namespace tflite
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/comparisons.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/comparisons.h
index cd2c741..f3d6bcc 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/comparisons.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/comparisons.h
@@ -15,7 +15,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_COMPARISONS_H_
 
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/concatenation.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/concatenation.h
index 4f3637e..9d03523 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/concatenation.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/concatenation.h
@@ -16,6 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONCATENATION_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h"
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/conv.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/conv.h
index a9b73d2..a244ec0 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/conv.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/conv.h
@@ -15,6 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 
@@ -43,7 +45,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
   (void)im2col_data;   // only used in optimized code.
   (void)im2col_shape;  // only used in optimized code.
   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int input_depth = input_shape.Dims(3);
   const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
   if (bias_data) {
     TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
@@ -52,14 +54,20 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
   const int input_width = input_shape.Dims(2);
   const int filter_height = filter_shape.Dims(1);
   const int filter_width = filter_shape.Dims(2);
+  const int filter_input_depth = filter_shape.Dims(3);
+  const int groups = input_depth / filter_input_depth;
+  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
+  const int filters_per_group = output_depth / groups;
   const int output_height = output_shape.Dims(1);
   const int output_width = output_shape.Dims(2);
+
   for (int batch = 0; batch < batches; ++batch) {
     for (int out_y = 0; out_y < output_height; ++out_y) {
       const int in_y_origin = (out_y * stride_height) - pad_height;
       for (int out_x = 0; out_x < output_width; ++out_x) {
         const int in_x_origin = (out_x * stride_width) - pad_width;
         for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
+          auto group = out_channel / filters_per_group;
           float total = 0.f;
           for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
             const int in_y = in_y_origin + dilation_height_factor * filter_y;
@@ -74,10 +82,11 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
               if (!is_point_inside_image) {
                 continue;
               }
-
-              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-                float input_value = input_data[Offset(input_shape, batch, in_y,
-                                                      in_x, in_channel)];
+              for (int in_channel = 0; in_channel < filter_input_depth;
+                   ++in_channel) {
+                float input_value =
+                    input_data[Offset(input_shape, batch, in_y, in_x,
+                                      in_channel + group * filter_input_depth)];
                 float filter_value = filter_data[Offset(
                     filter_shape, out_channel, filter_y, filter_x, in_channel)];
                 total += (input_value * filter_value);
@@ -126,7 +135,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int input_depth = input_shape.Dims(3);
   const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
   if (bias_data) {
     TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
@@ -135,6 +144,10 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
   const int input_width = input_shape.Dims(2);
   const int filter_height = filter_shape.Dims(1);
   const int filter_width = filter_shape.Dims(2);
+  const int filter_input_depth = filter_shape.Dims(3);
+  const int groups = input_depth / filter_input_depth;
+  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
+  const int filters_per_group = output_depth / groups;
   const int output_height = output_shape.Dims(1);
   const int output_width = output_shape.Dims(2);
   for (int batch = 0; batch < batches; ++batch) {
@@ -143,6 +156,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
       for (int out_x = 0; out_x < output_width; ++out_x) {
         const int in_x_origin = (out_x * stride_width) - pad_width;
         for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
+          auto group = out_channel / filters_per_group;
           int32_t acc = 0;
           for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
             const int in_y = in_y_origin + dilation_height_factor * filter_y;
@@ -158,9 +172,11 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
                 continue;
               }
 
-              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-                int32_t input_val = input_data[Offset(input_shape, batch, in_y,
-                                                      in_x, in_channel)];
+              for (int in_channel = 0; in_channel < filter_input_depth;
+                   ++in_channel) {
+                int32_t input_val =
+                    input_data[Offset(input_shape, batch, in_y, in_x,
+                                      in_channel + group * filter_input_depth)];
                 int32_t filter_val = filter_data[Offset(
                     filter_shape, out_channel, filter_y, filter_x, in_channel)];
                 acc +=
@@ -206,7 +222,7 @@ inline void HybridConvPerChannel(
   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int input_depth = input_shape.Dims(3);
   const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
   if (bias_data) {
     TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
@@ -215,18 +231,24 @@ inline void HybridConvPerChannel(
   const int input_width = input_shape.Dims(2);
   const int filter_height = filter_shape.Dims(1);
   const int filter_width = filter_shape.Dims(2);
+  const int filter_input_depth = filter_shape.Dims(3);
+  const int groups = input_depth / filter_input_depth;
+  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
+  const int filters_per_group = output_depth / groups;
   const int output_height = output_shape.Dims(1);
   const int output_width = output_shape.Dims(2);
   for (int batch = 0; batch < batches; ++batch) {
     for (int out_y = 0; out_y < output_height; ++out_y) {
       for (int out_x = 0; out_x < output_width; ++out_x) {
         for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
+          auto group = out_channel / filters_per_group;
           const int in_x_origin = (out_x * stride_width) - pad_width;
           const int in_y_origin = (out_y * stride_height) - pad_height;
           int32_t acc = 0;
           for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
             for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
+              for (int in_channel = 0; in_channel < filter_input_depth;
+                   ++in_channel) {
                 const int in_x = in_x_origin + dilation_width_factor * filter_x;
                 const int in_y =
                     in_y_origin + dilation_height_factor * filter_y;
@@ -235,7 +257,8 @@ inline void HybridConvPerChannel(
                 if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
                     (in_y < input_height)) {
                   int32_t input_val = input_data[Offset(
-                      input_shape, batch, in_y, in_x, in_channel)];
+                      input_shape, batch, in_y, in_x,
+                      in_channel + group * filter_input_depth)];
                   int32_t filter_val =
                       filter_data[Offset(filter_shape, out_channel, filter_y,
                                          filter_x, in_channel)];
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/cumsum.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/cumsum.h
new file mode 100644
index 0000000..56698a0
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/cumsum.h
@@ -0,0 +1,175 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+
+namespace tflite {
+namespace reference_ops {
+
+template <typename T>
+inline void CumSum(const T* input_data, const RuntimeShape& shape, int32_t axis,
+                   bool exclusive, bool reverse, T* output_data) {
+  const int32_t rank = shape.DimensionsCount();
+  TFLITE_DCHECK_GE(rank, 1);
+  TFLITE_DCHECK_GE(axis, 0);
+  TFLITE_DCHECK_LT(axis, rank);
+
+  size_t inner = 1;
+  size_t outer = 1;
+  size_t depth = 1;
+  for (int32_t i = 0; i < rank; i++) {
+    if (i < axis)
+      inner *= shape.Dims(i);
+    else if (i > axis)
+      outer *= shape.Dims(i);
+    else
+      depth = shape.Dims(i);
+  }
+
+  for (size_t outer_index = 0; outer_index < outer; outer_index++) {
+    size_t outer_index_adj;
+    if (reverse)
+      outer_index_adj = (outer - 1) - outer_index;
+    else
+      outer_index_adj = outer_index;
+    for (size_t inner_index = 0; inner_index < inner; inner_index++) {
+      T accumulator = 0;
+      size_t inner_index_adj;
+      if (reverse)
+        inner_index_adj = (inner - 1) - inner_index;
+      else
+        inner_index_adj = inner_index;
+      for (size_t depth_index = 0; depth_index < depth; depth_index++) {
+        size_t depth_index_adj;
+        if (reverse)
+          depth_index_adj = (depth - 1) - depth_index;
+        else
+          depth_index_adj = depth_index;
+
+        size_t index = outer_index_adj;
+        index += inner_index_adj * depth * outer;
+        index += depth_index_adj * outer;
+
+        if (exclusive) {
+          output_data[index] = accumulator;
+          accumulator += input_data[index];
+        } else {
+          accumulator += input_data[index];
+          output_data[index] = accumulator;
+        }
+      }
+    }
+  }
+}
+
+//
+// Quantized INT8 CUMSUM
+//
+inline void CumSum(const ArithmeticParams& params, const int8_t* input_data,
+                   const RuntimeShape& shape, int32_t axis, bool exclusive,
+                   bool reverse, int8_t* output_data) {
+  TFLITE_DCHECK_LE(params.quantized_activation_min,
+                   params.quantized_activation_max);
+  // Input offset is negative input zero point. Activation tensors are
+  // asymmetric quantized so they span the full int8 range.
+  // All inputs should have same zero-point and scale, this is checked during
+  // Prepare stage.
+  TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits<int8_t>::min());
+  TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits<int8_t>::max());
+
+  const int32_t rank = shape.DimensionsCount();
+  TFLITE_DCHECK_GE(rank, 1);
+  TFLITE_DCHECK_GE(axis, 0);
+  TFLITE_DCHECK_LT(axis, rank);
+
+  size_t inner = 1;
+  size_t outer = 1;
+  size_t depth = 1;
+  for (int32_t i = 0; i < rank; i++) {
+    if (i < axis)
+      inner *= shape.Dims(i);
+    else if (i > axis)
+      outer *= shape.Dims(i);
+    else
+      depth = shape.Dims(i);
+  }
+
+  for (size_t outer_index = 0; outer_index < outer; outer_index++) {
+    size_t outer_index_adj;
+    if (reverse)
+      outer_index_adj = (outer - 1) - outer_index;
+    else
+      outer_index_adj = outer_index;
+    for (size_t inner_index = 0; inner_index < inner; inner_index++) {
+      int32_t accumulator = params.input1_offset;  // accumulator = 0
+      accumulator *= (1 << params.left_shift);
+      accumulator = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+          accumulator, params.input1_multiplier, params.input1_shift);
+
+      size_t inner_index_adj;
+      if (reverse)
+        inner_index_adj = (inner - 1) - inner_index;
+      else
+        inner_index_adj = inner_index;
+
+      for (size_t depth_index = 0; depth_index < depth; depth_index++) {
+        size_t depth_index_adj;
+        if (reverse)
+          depth_index_adj = (depth - 1) - depth_index;
+        else
+          depth_index_adj = depth_index;
+
+        size_t index = outer_index_adj;
+        index += inner_index_adj * depth * outer;
+        index += depth_index_adj * outer;
+
+        const int32_t y = params.input1_offset + input_data[index];
+        const int32_t shifted_y = y * (1 << params.left_shift);
+        const int32_t scaled_y = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+            shifted_y, params.input1_multiplier, params.input1_shift);
+
+        int32_t scaled_output;
+        if (exclusive) {
+          scaled_output = accumulator;
+          accumulator += scaled_y;
+        } else {
+          accumulator += scaled_y;
+          scaled_output = accumulator;
+        }
+
+        const int32_t raw_output =
+            MultiplyByQuantizedMultiplierSmallerThanOneExp(
+                scaled_output, params.output_multiplier, params.output_shift) +
+            params.output_offset;
+        const int32_t clamped_output =
+            std::min(params.quantized_activation_max,
+                     std::max(params.quantized_activation_min, raw_output));
+        output_data[index] = static_cast<int8_t>(clamped_output);
+      }
+    }
+  }
+}
+
+}  // namespace reference_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CUMSUM_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depth_to_space.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depth_to_space.h
new file mode 100644
index 0000000..41b2679
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depth_to_space.h
@@ -0,0 +1,79 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+namespace reference_ops {
+
+template <typename T>
+inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params,
+                         const RuntimeShape& unextended_input_shape,
+                         const T* input_data,
+                         const RuntimeShape& unextended_output_shape,
+                         T* output_data) {
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape =
+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape output_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+  const int input_depth = input_shape.Dims(3);
+  const int input_width = input_shape.Dims(2);
+  const int input_height = input_shape.Dims(1);
+  const int input_batch = input_shape.Dims(0);
+
+  const int output_depth = output_shape.Dims(3);
+  const int output_width = output_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_batch = output_shape.Dims(0);
+
+  const int32_t block_size = op_params.block_size;
+
+  TFLITE_DCHECK_EQ(input_width * block_size, output_width);
+  TFLITE_DCHECK_EQ(input_height * block_size, output_height);
+  TFLITE_DCHECK_EQ(input_depth, output_depth * block_size * block_size);
+  TFLITE_DCHECK_EQ(input_batch, output_batch);
+
+  for (int out_b = 0; out_b < output_batch; ++out_b) {
+    for (int out_h = 0; out_h < output_height; ++out_h) {
+      for (int out_w = 0; out_w < output_width; ++out_w) {
+        for (int out_d = 0; out_d < output_depth; ++out_d) {
+          const int in_d =
+              out_d + ((out_h % block_size) * block_size + out_w % block_size) *
+                          output_depth;
+
+          const int in_w = out_w / block_size;
+          const int in_h = out_h / block_size;
+          const int in_b = out_b;
+
+          const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d);
+          const int output_index =
+              Offset(output_shape, out_b, out_h, out_w, out_d);
+
+          output_data[output_index] = input_data[input_index];
+        }
+      }
+    }
+  }
+}
+
+}  // namespace reference_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_DEPTH_TO_SPACE_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h
index dd418ce..4dc5245 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h
@@ -68,6 +68,27 @@ inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier,
   return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
 }
 
+// Single-rounding MultiplyByQuantizedMultiplier
+#if TFLITE_SINGLE_ROUNDING
+template <>
+inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
+    int32_t x, int32_t quantized_multiplier, int shift) {
+  using gemmlowp::RoundingDivideByPOT;
+  using gemmlowp::SaturatingRoundingDoublingHighMul;
+  int left_shift = shift > 0 ? shift : 0;
+  int right_shift = shift > 0 ? 0 : -shift;
+  return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
+                                 x * (1 << left_shift), quantized_multiplier),
+                             right_shift);
+}
+
+template <>
+inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
+    int32_t x, int32_t quantized_multiplier, int shift) {
+  return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift);
+}
+// Double-rounding MultiplyByQuantizedMultiplier
+#else
 template <>
 inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kAwayFromZero>(
     int32_t x, int32_t quantized_multiplier, int shift) {
@@ -86,6 +107,7 @@ inline int32_t DepthwiseConvRound<DepthwiseConvOutputRounding::kUpward>(
           rounding_offset) >>
          right_shift;
 }
+#endif  // TFLITE_SINGLE_ROUNDING
 
 template <DepthwiseConvOutputRounding output_rounding>
 struct DepthwiseConvBasicKernel {
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/div.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/div.h
index a38a503..71bbeaf 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/div.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/div.h
@@ -48,13 +48,17 @@ inline void DivElementwise(int size, const ArithmeticParams& params,
   DivCheckArithmeticParams<T>(params);
 
   for (int i = 0; i < size; ++i) {
-    const int32_t input1_val = params.input1_offset + input1_data[i];
-    const int32_t input2_val = params.input2_offset + input2_data[i];
+    int32_t input1_val = params.input1_offset + input1_data[i];
+    int32_t input2_val = params.input2_offset + input2_data[i];
     TFLITE_DCHECK_NE(input2_val, 0);
+    if (input2_val < 0) {
+      // Invert signs to avoid a negative input2_val as input2_inv needs to be
+      // positive to be used as multiplier of MultiplyByQuantizedMultiplier.
+      input1_val = -input1_val;
+      input2_val = -input2_val;
+    }
     int recip_shift;
-    const int32_t input2_inv =
-        (input2_val > 0) ? GetReciprocal(input2_val, 31, &recip_shift)
-                         : -GetReciprocal(-input2_val, 31, &recip_shift);
+    const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
     const int headroom = CountLeadingSignBits(input1_val);
     const int32_t unscaled_quotient =
         MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
@@ -116,15 +120,19 @@ inline void BroadcastDivSlowQuantized(
   DivCheckArithmeticParams<T>(params);
 
   auto div_func = [&](int indexes[N]) {
-    const int32_t input1_val =
+    int32_t input1_val =
         params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
-    const int32_t input2_val =
+    int32_t input2_val =
         params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
     TFLITE_DCHECK_NE(input2_val, 0);
+    if (input2_val < 0) {
+      // Invert signs to avoid a negative input2_val as input2_inv needs to be
+      // positive to be used as multiplier of MultiplyByQuantizedMultiplier.
+      input1_val = -input1_val;
+      input2_val = -input2_val;
+    }
     int recip_shift;
-    const int32_t input2_inv =
-        (input2_val > 0) ? GetReciprocal(input2_val, 31, &recip_shift)
-                         : -GetReciprocal(-input2_val, 31, &recip_shift);
+    const int32_t input2_inv = GetReciprocal(input2_val, 31, &recip_shift);
     const int headroom = CountLeadingSignBits(input1_val);
     const int32_t unscaled_quotient =
         MultiplyByQuantizedMultiplierGreaterThanOne(input1_val, input2_inv,
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_utils.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_div.h
similarity index 50%
rename from edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_utils.h
rename to edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_div.h
index e406ac1..dbda3f8 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_utils.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_div.h
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,29 +12,24 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
-#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
+
+#include <cmath>
+#include <functional>
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+
 namespace tflite {
-namespace ops {
-namespace micro {
-
-// Same as gtl::Greater but defined here to reduce dependencies and
-// binary size for micro environment.
-struct Greater {
-  template <typename T>
-  bool operator()(const T& x, const T& y) const {
-    return x > y;
-  }
-};
-
-struct Less {
-  template <typename T>
-  bool operator()(const T& x, const T& y) const {
-    return x < y;
-  }
-};
-
-}  // namespace micro
-}  // namespace ops
+namespace reference_ops {
+
+template <typename T>
+T FloorDiv(T input1, T input2) {
+  return std::floor(std::divides<double>()(static_cast<double>(input1),
+                                           static_cast<double>(input2)));
+}
+
+}  // namespace reference_ops
 }  // namespace tflite
-#endif  // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_UTILS_H_
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_DIV_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_mod.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_mod.h
new file mode 100644
index 0000000..20ce18b
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_mod.h
@@ -0,0 +1,44 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
+
+#include <cmath>
+#include <functional>
+
+namespace tflite {
+
+namespace reference_ops {
+
+template <typename T>
+T FloorMod(T input1, T input2) {
+  struct FloatMod {
+    float operator()(const float lhs, const float rhs) const {
+      return std::fmod(lhs, rhs);
+    }
+  };
+  using ModFunc = typename std::conditional<std::is_integral<T>::value,
+                                            std::modulus<T>, FloatMod>::type;
+  ModFunc mod_func;
+  T trunc_mod = mod_func(input1, input2);
+  return (trunc_mod != 0) && ((input2 < 0) != (trunc_mod < 0))
+             ? (trunc_mod + input2)
+             : trunc_mod;
+}
+
+}  // namespace reference_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FLOOR_MOD_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h
index adb4ea8..6cd8f66 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h
@@ -15,6 +15,9 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_FULLY_CONNECTED_H_
 
+#include <algorithm>
+
+#include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h"  // from @ruy
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/hard_swish.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/hard_swish.h
index 30e18af..c427205 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/hard_swish.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/hard_swish.h
@@ -12,8 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
-#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ACTIVATIONS_H_
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_HARD_SWISH_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_HARD_SWISH_H_
+
+#include <algorithm>
 
 #include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h"  // from @ruy
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
@@ -23,9 +25,9 @@ namespace tflite {
 namespace reference_ops {
 
 inline int16_t SaturatingLeftShift(int16_t value, int amount) {
-  int32_t result = static_cast<int32_t>(value) * (1 << amount);
-  result = std::min<int32_t>(result, std::numeric_limits<int16_t>::max());
-  result = std::max<int32_t>(result, std::numeric_limits<int16_t>::min());
+  int64_t result = static_cast<int64_t>(value) * (1 << amount);
+  result = std::min<int64_t>(result, std::numeric_limits<int16_t>::max());
+  result = std::max<int64_t>(result, std::numeric_limits<int16_t>::min());
   return result;
 }
 
@@ -163,4 +165,4 @@ inline void HardSwish(const HardSwishParams& params,
 }  // namespace reference_ops
 }  // namespace tflite
 
-#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_CONV_H_
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_HARD_SWISH_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
index c334fe4..12064e3 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h
@@ -15,6 +15,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_ADD_H_
 
+#include <algorithm>
 #include <limits>
 
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
index 413e5da..3b9adcb 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
@@ -15,6 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_CONV_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 
 namespace tflite {
@@ -48,7 +50,7 @@ inline void ConvPerChannel(
   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int input_depth = input_shape.Dims(3);
   const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
   if (bias_data) {
     TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
@@ -59,6 +61,10 @@ inline void ConvPerChannel(
   const int input_width = input_shape.Dims(2);
   const int filter_height = filter_shape.Dims(1);
   const int filter_width = filter_shape.Dims(2);
+  const int filter_input_depth = filter_shape.Dims(3);
+  const int groups = input_depth / filter_input_depth;
+  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
+  const int filters_per_group = output_depth / groups;
   const int output_height = output_shape.Dims(1);
   const int output_width = output_shape.Dims(2);
   for (int batch = 0; batch < batches; ++batch) {
@@ -67,6 +73,7 @@ inline void ConvPerChannel(
       for (int out_x = 0; out_x < output_width; ++out_x) {
         const int in_x_origin = (out_x * stride_width) - pad_width;
         for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
+          auto group = out_channel / filters_per_group;
           int32_t acc = 0;
           for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
             const int in_y = in_y_origin + dilation_height_factor * filter_y;
@@ -82,9 +89,11 @@ inline void ConvPerChannel(
                 continue;
               }
 
-              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-                int32_t input_val = input_data[Offset(input_shape, batch, in_y,
-                                                      in_x, in_channel)];
+              for (int in_channel = 0; in_channel < filter_input_depth;
+                   ++in_channel) {
+                int32_t input_val =
+                    input_data[Offset(input_shape, batch, in_y, in_x,
+                                      in_channel + group * filter_input_depth)];
                 int32_t filter_val = filter_data[Offset(
                     filter_shape, out_channel, filter_y, filter_x, in_channel)];
                 // Accumulate with 32 bits accumulator.
@@ -124,14 +133,16 @@ inline void ConvPerChannel(
   }
 }
 
+
 // Fixed-point per-channel-quantization convolution reference kernel.
 // 16-bit data and 8-bit filter
+template <typename AccumScalar>
 inline void ConvPerChannel(
     const ConvParams& params, const int32_t* output_multiplier,
     const int32_t* output_shift, const RuntimeShape& input_shape,
     const int16_t* input_data, const RuntimeShape& filter_shape,
     const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const std::int64_t* bias_data, const RuntimeShape& output_shape,
+    const AccumScalar* bias_data, const RuntimeShape& output_shape,
     int16_t* output_data) {
   // Get parameters.
   const int stride_width = params.stride_width;
@@ -151,7 +162,7 @@ inline void ConvPerChannel(
   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
-  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int input_depth = input_shape.Dims(3);
   const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
   if (bias_data) {
     TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
@@ -162,6 +173,10 @@ inline void ConvPerChannel(
   const int input_width = input_shape.Dims(2);
   const int filter_height = filter_shape.Dims(1);
   const int filter_width = filter_shape.Dims(2);
+  const int filter_input_depth = filter_shape.Dims(3);
+  const int groups = input_depth / filter_input_depth;
+  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
+  const int filters_per_group = output_depth / groups;
   const int output_height = output_shape.Dims(1);
   const int output_width = output_shape.Dims(2);
   for (int batch = 0; batch < batches; ++batch) {
@@ -170,7 +185,8 @@ inline void ConvPerChannel(
       for (int out_x = 0; out_x < output_width; ++out_x) {
         const int in_x_origin = (out_x * stride_width) - pad_width;
         for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          std::int64_t acc = 0;
+          auto group = out_channel / filters_per_group;
+          AccumScalar acc = 0;
           for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
             const int in_y = in_y_origin + dilation_height_factor * filter_y;
             for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
@@ -185,9 +201,11 @@ inline void ConvPerChannel(
                 continue;
               }
 
-              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
-                int32_t input_val = input_data[Offset(input_shape, batch, in_y,
-                                                      in_x, in_channel)];
+              for (int in_channel = 0; in_channel < filter_input_depth;
+                   ++in_channel) {
+                int32_t input_val =
+                    input_data[Offset(input_shape, batch, in_y, in_x,
+                                      in_channel + group * filter_input_depth)];
                 int32_t filter_val = filter_data[Offset(
                     filter_shape, out_channel, filter_y, filter_x, in_channel)];
                 // Accumulate with 64 bits accumulator.
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
index 42d2536..95e7337 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h
@@ -15,6 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEPTHWISE_CONV_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 
 namespace tflite {
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
index de21e14..4be7987 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h
@@ -15,22 +15,30 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 
 namespace tflite {
 namespace reference_integer_ops {
 
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const int8_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int32_t* bias_data, const RuntimeShape& output_shape,
-    int8_t* output_data) {
+// For per-channel functions, since it is defined in quantization spec that
+// weights are symmetric
+// (https://www.tensorflow.org/lite/performance/quantization_spec#symmetric_vs_asymmetric),
+// zero_point (params.weights_offset) is always 0.
+// However, for per-tensor functions, params.weights_offset is still applied for
+// backward compatibility.
+template <typename InputType, typename WeightType, typename OutputType,
+          typename BiasType>
+void FullyConnectedPerChannel(
+    const FullyConnectedParams& params, const int32_t* output_multiplier,
+    const int* output_shift, const RuntimeShape& input_shape,
+    const InputType* input_data, const RuntimeShape& filter_shape,
+    const WeightType* filter_data, const RuntimeShape& bias_shape,
+    const BiasType* bias_data, const RuntimeShape& output_shape,
+    OutputType* output_data) {
   const int32_t input_offset = params.input_offset;
-  const int32_t filter_offset = params.weights_offset;
   const int32_t output_offset = params.output_offset;
-  const int32_t output_multiplier = params.output_multiplier;
-  const int output_shift = params.output_shift;
   const int32_t output_activation_min = params.quantized_activation_min;
   const int32_t output_activation_max = params.quantized_activation_max;
   TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
@@ -44,60 +52,70 @@ inline void FullyConnected(
   const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
   for (int b = 0; b < batches; ++b) {
     for (int out_c = 0; out_c < output_depth; ++out_c) {
-      int32_t acc = 0;
+      BiasType acc = 0;
       for (int d = 0; d < accum_depth; ++d) {
         int32_t input_val = input_data[b * accum_depth + d];
         int32_t filter_val = filter_data[out_c * accum_depth + d];
-        acc += (filter_val + filter_offset) * (input_val + input_offset);
+        acc += filter_val * (input_val + input_offset);
       }
       if (bias_data) {
         acc += bias_data[out_c];
       }
-      acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
-      acc += output_offset;
-      acc = std::max(acc, output_activation_min);
-      acc = std::min(acc, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int8_t>(acc);
+      int32_t acc_scaled = MultiplyByQuantizedMultiplier(
+          acc, output_multiplier[out_c], output_shift[out_c]);
+      acc_scaled += output_offset;
+      acc_scaled = std::max(acc_scaled, output_activation_min);
+      acc_scaled = std::min(acc_scaled, output_activation_max);
+      output_data[out_c + output_depth * b] =
+          static_cast<OutputType>(acc_scaled);
     }
   }
 }
 
-inline void FullyConnected(
-    const FullyConnectedParams& params, const RuntimeShape& input_shape,
-    const int16_t* input_data, const RuntimeShape& filter_shape,
-    const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const int64_t* bias_data, const RuntimeShape& output_shape,
-    int16_t* output_data) {
+template <typename InputType, typename WeightType, typename OutputType,
+          typename BiasType>
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape,
+                    const InputType* input_data,
+                    const RuntimeShape& filter_shape,
+                    const WeightType* filter_data,
+                    const RuntimeShape& bias_shape, const BiasType* bias_data,
+                    const RuntimeShape& output_shape, OutputType* output_data) {
+  const int32_t input_offset = params.input_offset;
   const int32_t filter_offset = params.weights_offset;
+  const int32_t output_offset = params.output_offset;
   const int32_t output_multiplier = params.output_multiplier;
   const int output_shift = params.output_shift;
   const int32_t output_activation_min = params.quantized_activation_min;
   const int32_t output_activation_max = params.quantized_activation_max;
   TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
+  TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
 
   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
   const int filter_dim_count = filter_shape.DimensionsCount();
-  const int batches = output_shape.Dims(0);
-  const int output_depth = output_shape.Dims(1);
+  const int output_dim_count = output_shape.DimensionsCount();
+  const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  const int output_depth = output_shape.Dims(output_dim_count - 1);
   TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2));
   const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
   for (int b = 0; b < batches; ++b) {
     for (int out_c = 0; out_c < output_depth; ++out_c) {
-      int64_t acc = 0;
+      BiasType acc = 0;
       for (int d = 0; d < accum_depth; ++d) {
         int32_t input_val = input_data[b * accum_depth + d];
         int32_t filter_val = filter_data[out_c * accum_depth + d];
-        acc += (filter_val + filter_offset) * input_val;
+        acc += (filter_val + filter_offset) * (input_val + input_offset);
       }
       if (bias_data) {
         acc += bias_data[out_c];
       }
       int32_t acc_scaled =
           MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+      acc_scaled += output_offset;
       acc_scaled = std::max(acc_scaled, output_activation_min);
       acc_scaled = std::min(acc_scaled, output_activation_max);
-      output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled);
+      output_data[out_c + output_depth * b] =
+          static_cast<OutputType>(acc_scaled);
     }
   }
 }
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h
index c5fb00e..582713b 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h
@@ -15,6 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_L2NORMALIZATION_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 
 namespace tflite {
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
index b53bfd9..2119103 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h
@@ -15,7 +15,9 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_LOGISTIC_H_
 
+#include <algorithm>
 #include <limits>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 
 namespace tflite {
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h
index fdb584e..0ba0f66 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h
@@ -15,18 +15,19 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MEAN_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 
 namespace tflite {
 namespace reference_integer_ops {
 
 template <typename integer_type>
-inline void MeanOrSum(const tflite::MeanParams& op_params, int32_t multiplier,
-                      int32_t shift, const RuntimeShape& unextended_input_shape,
-                      const integer_type* input_data, int32_t input_zero_point,
-                      const RuntimeShape& unextended_output_shape,
-                      integer_type* output_data, int32_t output_zero_point,
-                      bool compute_sum) {
+inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier,
+                 int32_t shift, const RuntimeShape& unextended_input_shape,
+                 const integer_type* input_data, int32_t input_zero_point,
+                 const RuntimeShape& unextended_output_shape,
+                 integer_type* output_data, int32_t output_zero_point) {
   // Current implementation only supports dimension equals 4 and simultaneous
   // reduction over width and height.
   TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
@@ -62,10 +63,8 @@ inline void MeanOrSum(const tflite::MeanParams& op_params, int32_t multiplier,
         }
       }
       acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
-      if (!compute_sum) {
-        acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis
-                      : (acc - num_elements_in_axis / 2) / num_elements_in_axis;
-      }
+      acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis
+                    : (acc - num_elements_in_axis / 2) / num_elements_in_axis;
       acc += output_zero_point;
       acc = std::min(std::max(acc, kMinInt), kMaxInt);
       output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
index 3c809db..168e3ae 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h
@@ -15,6 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_MUL_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h"
 #include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h"  // from @ruy
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
@@ -22,10 +24,10 @@ limitations under the License.
 namespace tflite {
 namespace reference_integer_ops {
 
-template <typename T>
-inline void MulElementwise(int size, const ArithmeticParams& params,
-                           const T* input1_data, const T* input2_data,
-                           T* output_data) {
+template <typename InputType, typename OutputType>
+void MulElementwise(int size, const ArithmeticParams& params,
+                    const InputType* input1_data, const InputType* input2_data,
+                    OutputType* output_data) {
   for (int i = 0; i < size; ++i) {
     const int32_t input1_val = params.input1_offset + input1_data[i];
     const int32_t input2_val = params.input2_offset + input2_data[i];
@@ -37,7 +39,7 @@ inline void MulElementwise(int size, const ArithmeticParams& params,
     const int32_t clamped_output =
         std::min(params.quantized_activation_max,
                  std::max(params.quantized_activation_min, unclamped_result));
-    output_data[i] = static_cast<T>(clamped_output);
+    output_data[i] = static_cast<OutputType>(clamped_output);
   }
 }
 
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
index f4eedc6..ee026fd 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
@@ -15,13 +15,15 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_POOLING_H_
 
+#include <algorithm>
 #include <limits>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 
 namespace tflite {
 namespace reference_integer_ops {
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const int8_t* input_data,
                         const RuntimeShape& output_shape, int8_t* output_data) {
@@ -66,6 +68,7 @@ inline void AveragePool(const PoolParams& params,
               filter_count++;
             }
           }
+          if (filter_count == 0) return false;
           // Round to the closest integer value.
           acc = acc > 0 ? (acc + filter_count / 2) / filter_count
                         : (acc - filter_count / 2) / filter_count;
@@ -77,6 +80,7 @@ inline void AveragePool(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
 inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
@@ -136,7 +140,7 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
   }
 }
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const int16_t* input_data,
                         const RuntimeShape& output_shape,
@@ -182,6 +186,7 @@ inline void AveragePool(const PoolParams& params,
               filter_count++;
             }
           }
+          if (filter_count == 0) return false;
           // Round to the closest integer value.
           acc = acc > 0 ? (acc + filter_count / 2) / filter_count
                         : (acc - filter_count / 2) / filter_count;
@@ -193,6 +198,7 @@ inline void AveragePool(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
 inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
index 2dc2ad4..d7feb45 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h
@@ -15,6 +15,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_
 
+#include <algorithm>
 #include <limits>
 
 #include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h"
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
index 57622ba..8ce1cb7 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h
@@ -15,6 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TRANSPOSE_CONV_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 
 namespace tflite {
@@ -53,8 +55,8 @@ inline void TransposeConv(
   const int output_width = output_shape.Dims(2);
   const int32_t input_offset = params.input_offset;
   const int32_t output_offset = params.output_offset;
-  const int32_t output_activation_min = std::numeric_limits<int8_t>::min();
-  const int32_t output_activation_max = std::numeric_limits<int8_t>::max();
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 
   const int num_elements = output_shape.FlatSize();
@@ -119,15 +121,16 @@ inline void TransposeConv(
   }
 }
 
-// int16_t input (zero_point=0), int8_t filter, int64 accumulator
+// int16_t input (zero_point=0), int8_t filter, int32 or int64 accumulator
+template <typename Scalar>
 inline void TransposeConv(
     const ConvParams& params, const int32_t* output_multiplier,
     const int32_t* output_shift, const RuntimeShape& input_shape,
     const int16_t* input_data, const RuntimeShape& filter_shape,
     const int8_t* filter_data, const RuntimeShape& bias_shape,
-    const std::int64_t* bias_data, const RuntimeShape& output_shape,
+    const Scalar* bias_data, const RuntimeShape& output_shape,
     int16_t* output_data, const RuntimeShape& im2col_shape, int8_t* im2col_data,
-    std::int64_t* scratch_buffer) {
+    Scalar* scratch_buffer) {
   const int stride_width = params.stride_width;
   const int stride_height = params.stride_height;
   const int pad_width = params.padding_values.width;
@@ -150,14 +153,14 @@ inline void TransposeConv(
   const int filter_width = filter_shape.Dims(2);
   const int output_height = output_shape.Dims(1);
   const int output_width = output_shape.Dims(2);
-  const int32_t output_activation_min = std::numeric_limits<int16_t>::min();
-  const int32_t output_activation_max = std::numeric_limits<int16_t>::max();
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
   TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
 
   const int num_elements = output_shape.FlatSize();
   // We need to initialize scratch_buffer to all 0s, as we apply the same
   // 'scatter' based trick as in float version.
-  memset(scratch_buffer, 0, num_elements * sizeof(std::int64_t));
+  memset(scratch_buffer, 0, num_elements * sizeof(Scalar));
 
   // Loop through input elements one at a time.
   for (int batch = 0; batch < batches; ++batch) {
@@ -198,8 +201,8 @@ inline void TransposeConv(
     for (int out_y = 0; out_y < output_height; ++out_y) {
       for (int out_x = 0; out_x < output_width; ++out_x) {
         for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-          std::int64_t acc = scratch_buffer[Offset(output_shape, batch, out_y,
-                                                   out_x, out_channel)];
+          Scalar acc = scratch_buffer[Offset(output_shape, batch, out_y, out_x,
+                                             out_channel)];
           if (bias_data) {
             acc += bias_data[out_channel];
           }
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/l2normalization.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/l2normalization.h
index af83de9..cf32ea5 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/l2normalization.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/l2normalization.h
@@ -18,7 +18,7 @@ limitations under the License.
 #include <algorithm>
 #include <cmath>
 
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/lstm_cell.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/lstm_cell.h
new file mode 100644
index 0000000..de1c485
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/lstm_cell.h
@@ -0,0 +1,422 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/concatenation.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+namespace reference_ops {
+
+inline void LstmCell(
+    const LstmCellParams& params, const RuntimeShape& unextended_input_shape,
+    const float* input_data, const RuntimeShape& unextended_prev_activ_shape,
+    const float* prev_activ_data, const RuntimeShape& weights_shape,
+    const float* weights_data, const RuntimeShape& unextended_bias_shape,
+    const float* bias_data, const RuntimeShape& unextended_prev_state_shape,
+    const float* prev_state_data,
+    const RuntimeShape& unextended_output_state_shape, float* output_state_data,
+    const RuntimeShape& unextended_output_activ_shape, float* output_activ_data,
+    const RuntimeShape& unextended_concat_temp_shape, float* concat_temp_data,
+    const RuntimeShape& unextended_activ_temp_shape, float* activ_temp_data) {
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape =
+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape =
+      RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape =
+      RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape =
+      RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+      RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape =
+      RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int batches =
+      MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
+                  output_state_shape, 0, output_activ_shape, 0);
+  const int height =
+      MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
+                  output_state_shape, 1, output_activ_shape, 1);
+  const int width =
+      MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
+                  output_state_shape, 2, output_activ_shape, 2);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
+  const int total_input_depth = prev_activ_depth + input_depth;
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
+                   total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  const int intern_activ_depth =
+      MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
+                   intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+  const int output_depth =
+      MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                  3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+
+  // Concatenate prev_activ and input data together
+  float const* concat_input_arrays_data[2] = {input_data, prev_activ_data};
+  const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape,
+                                                       &prev_activ_shape};
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = 2;
+  Concatenation(concat_params, concat_input_arrays_shapes,
+                concat_input_arrays_data, concat_temp_shape, concat_temp_data);
+
+  // Fully connected
+  tflite::FullyConnectedParams fc_params;
+  fc_params.float_activation_min = std::numeric_limits<float>::lowest();
+  fc_params.float_activation_max = std::numeric_limits<float>::max();
+  FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape,
+                 weights_data, bias_shape, bias_data, activ_temp_shape,
+                 activ_temp_data);
+
+  // Memory state update (the LSTM "guts")
+  for (int b = 0; b < batches; ++b) {
+    for (int w = 0; w < width; ++w) {
+      for (int h = 0; h < height; ++h) {
+        for (int c = 0; c < output_depth; ++c) {
+          const float input_gate =
+              1.f /
+              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
+                                                      0 * output_depth + c)]));
+          const float new_input = std::tanh(activ_temp_data[Offset(
+              activ_temp_shape, b, h, w, 1 * output_depth + c)]);
+          const float forget_gate =
+              1.f /
+              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
+                                                      2 * output_depth + c)]));
+          const float output_gate =
+              1.f /
+              (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
+                                                      3 * output_depth + c)]));
+          const float new_state =
+              input_gate * new_input +
+              forget_gate *
+                  prev_state_data[Offset(prev_state_shape, b, h, w, c)];
+          output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
+          output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
+              output_gate * std::tanh(new_state);
+        }
+      }
+    }
+  }
+}
+
+// Quantized LSTM cell implementation.
+// The quantization of the input, output arrays is as follows:
+//  - The input activations are quantized as uint8 on the interval
+//    [-1, 127/128].
+//    The rationale for that is that is the natural interval for output
+//    activations (see next point) and these need to be concatenated together.
+//    We could accommodate different ranges by re-scaling, but we empirically
+//    found that setting the input activations range to be [-1, 127/128] in the
+//    first place, removing the need for re-scaling, greatly improves accuracy.
+//  - The output activations are quantized as uint8 on the interval
+//    [-1, 127/128].
+//    The rationale for that is that the definition of a LSTM cell makes them
+//    intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
+//    makes for simpler, more accurate fixed-point arithmetic.
+//  - The output-at-previous-timestep state array is obviously quantized as
+//    the output activations.
+//  - The internal LSTM memory (not the output-at-previous-timestep, the other
+//    internal state array) is int16-quantized and may use any power-of-two,
+//    symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
+//    StateIntegerBits below, see the below discussion of that template
+//    parameter ("The StateIntegerBits template parameter").
+//  - The output of the internal fully-connected node is int16-quantized
+//    on the interval [-8, 8 * 32767/32768], the rationale for which is
+//    explained just below ("Why [-8, 8] for fully-connected output?").
+//
+//
+// === The StateIntegerBits template parameter ===
+//
+// The StateIntegerBits template parameter controls the fixed-point format used
+// to represent the internal memory of the LSTM cell (not the
+// output-at-previous-timestep, the other internal state array). It's currently
+// a template parameter so that the model can control that. The most typical
+// value for StateIntegerBits is 4. Other plausible values are anywhere between
+// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
+// and drop that template parameter. The reason why it can't be a runtime
+// parameter is that this controls the fixed-point format used, i.e. we need to
+// generate actually different code based on it. In particular, we generate code
+// for a fixed-point tanh() implementation for that format, which internally
+// uses a fixed-point exp() implementation, which internally uses a
+// barrel-shifter with a number of steps that depends on StateIntegerBits.
+// Another consequence of that is that a higher value of StateIntegerBits
+// results in a more expensive implementation (more barrel shifter steps
+// needed).
+//
+//
+// === Why [-8, 8] for fully-connected output? ===
+//
+// This array is only fed to Logistic and Tanh functions, for which
+// the quantized implementation will want to use fixed-point arithmetic,
+// requiring a power-of-two representation interval. Thus, we should right
+// away quantize this array to a power-of-two interval; otherwise,
+// implementation will need to rescale that, losing any benefit that a tighter
+// representation interval might otherwise yield, while introducing some
+// numerical error and computational overhead.
+//
+// Now, Logistic and Tanh
+// are nearly constant (nearly equal to their horizontal asymptotes)
+// outside of a small bounded interval around 0:
+//
+//   Logistic(4) = 1 - 1.8e-2     Tanh(4) = 1 - 6.7e-4
+//   Logistic(8) = 1 - 3.4e-4     Tanh(8) = 1 - 2.3e-7
+//   Logistic(16) = 1 - 1.1e-7    Tanh(16) = 1 - 2.5e-14
+//
+// From this, we see that clamping to [-4, 4] would be too inaccurate
+// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
+// while clamping to [-16, 16] would make no difference even in float32.
+// However, for a fixed-point implementation in 16-bit integers, using 5
+// integer bits to represent the [-16, 16] range would leave only 11
+// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
+// representable values. Notice that is higher than the
+// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
+// Using [-8, 8] thus seems like the better compromise overall, enjoying
+// an increment of 2.4e-4 between representable values and a worst-case
+// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
+// [-16, 16].
+//
+// Moreover, all other things being equal, it is nice to choose the narrower
+// representation range, as that makes the implementation of fixed-point
+// math functions a little cheaper (each integer bit requires an additional
+// barrel-shifter atep in the implementation of exp(-x)). That is further
+// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
+// sense for 32-bit float or 32-bit fixed-point quantization, but we are
+// aiming for 16-bit fixed-point quantization of these internal nodes here.
+//
+template <int StateIntegerBits>
+inline void LstmCell(const LstmCellParams& params,
+                     const RuntimeShape& unextended_input_shape,
+                     const uint8_t* input_data_uint8,
+                     const RuntimeShape& unextended_prev_activ_shape,
+                     const uint8_t* prev_activ_data_uint8,
+                     const RuntimeShape& weights_shape,
+                     const uint8_t* weights_data_uint8,
+                     const RuntimeShape& unextended_bias_shape,
+                     const int32_t* bias_data_int32,
+                     const RuntimeShape& unextended_prev_state_shape,
+                     const int16_t* prev_state_data_int16,
+                     const RuntimeShape& unextended_output_state_shape,
+                     int16_t* output_state_data_int16,
+                     const RuntimeShape& unextended_output_activ_shape,
+                     uint8_t* output_activ_data_uint8,
+                     const RuntimeShape& unextended_concat_temp_shape,
+                     uint8_t* concat_temp_data_uint8,
+                     const RuntimeShape& unextended_activ_temp_shape,
+                     int16_t* activ_temp_data_int16, void* gemmlowp_context) {
+  (void)gemmlowp_context;  // only used in optimized code.
+  int32_t weights_zero_point = params.weights_zero_point;
+  int32_t accum_multiplier = params.accum_multiplier;
+  int accum_shift = params.accum_shift;
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape =
+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape prev_activ_shape =
+      RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+  const RuntimeShape bias_shape =
+      RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+  const RuntimeShape prev_state_shape =
+      RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+  const RuntimeShape output_state_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+  const RuntimeShape output_activ_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+  const RuntimeShape concat_temp_shape =
+      RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+  const RuntimeShape activ_temp_shape =
+      RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+  TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+  // Gather dimensions information, and perform consistency checks.
+  const int weights_dim_count = weights_shape.DimensionsCount();
+  const int outer_size = MatchingFlatSizeSkipDim(
+      input_shape, 3, prev_activ_shape, prev_state_shape, output_state_shape,
+      output_activ_shape);
+  const int input_depth = input_shape.Dims(3);
+  const int prev_activ_depth = prev_activ_shape.Dims(3);
+  const int total_input_depth = prev_activ_depth + input_depth;
+  TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
+                   total_input_depth);
+  const int intern_activ_depth =
+      MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+  TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
+                   intern_activ_depth * total_input_depth);
+  TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+  TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+  const int output_depth =
+      MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+                  3, output_activ_shape, 3);
+  TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+  const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
+  const int fc_output_depth =
+      MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
+  const int fc_accum_depth = total_input_depth;
+  TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
+
+  // Depth-concatenate prev_activ and input data together.
+  uint8_t const* concat_input_arrays_data[2] = {input_data_uint8,
+                                                prev_activ_data_uint8};
+  const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape,
+                                                       &prev_activ_shape};
+  tflite::ConcatenationParams concat_params;
+  concat_params.axis = 3;
+  concat_params.inputs_count = 2;
+  Concatenation(concat_params, concat_input_arrays_shapes,
+                concat_input_arrays_data, concat_temp_shape,
+                concat_temp_data_uint8);
+
+  // Implementation of the fully connected node inside the LSTM cell.
+  // The operands are 8-bit integers, the accumulators are internally 32bit
+  // integers, and the output is 16-bit fixed-point with 3 integer bits so
+  // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
+  // is explained in the function comment above.
+  for (int b = 0; b < fc_batches; ++b) {
+    for (int out_c = 0; out_c < fc_output_depth; ++out_c) {
+      // Internal accumulation.
+      // Initialize accumulator with the bias-value.
+      int32_t accum = bias_data_int32[out_c];
+      // Accumulation loop.
+      for (int d = 0; d < fc_accum_depth; ++d) {
+        int16_t input_val =
+            concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
+        int16_t weights_val =
+            weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
+        accum += input_val * weights_val;
+      }
+      // Down-scale the final int32 accumulator to the scale used by our
+      // (16-bit, using 3 integer bits) fixed-point format. The quantized
+      // multiplier and shift here have been pre-computed offline
+      // (e.g. by toco).
+      accum =
+          MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
+      // Saturate, cast to int16, and store to the temporary activations array.
+      accum = std::max(-32768, std::min(32767, accum));
+      activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
+    }
+  }
+
+  // Rest of the LSTM cell: tanh and logistic math functions, and some adds
+  // and muls, all done in 16-bit fixed-point.
+  for (int b = 0; b < outer_size; ++b) {
+    for (int c = 0; c < output_depth; ++c) {
+      // Define the fixed-point data types that we will use here. All use
+      // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
+      // They only differ by the number of integral vs. fractional bits,
+      // determining the range of values that they can represent.
+      //
+      // F0 uses 0 integer bits, range [-1, 1].
+      // This is the return type of math functions such as tanh, logistic,
+      // whose range is in [-1, 1].
+      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+      // F3 uses 3 integer bits, range [-8, 8].
+      // This is the range of the previous fully-connected node's output,
+      // which is our input here.
+      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
+      // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
+      // 2^StateIntegerBits]. It's used to represent the internal state, whose
+      // number of integer bits is currently dictated by the model. See comment
+      // on the StateIntegerBits template parameter above.
+      using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
+      // Implementation of input gate, using fixed-point logistic function.
+      F3 input_gate_input = F3::FromRaw(
+          activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
+      F0 input_gate_output = gemmlowp::logistic(input_gate_input);
+      // Implementation of input modulation gate, using fixed-point tanh
+      // function.
+      F3 input_modulation_gate_input = F3::FromRaw(
+          activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
+      F0 input_modulation_gate_output =
+          gemmlowp::tanh(input_modulation_gate_input);
+      // Implementation of forget gate, using fixed-point logistic function.
+      F3 forget_gate_input = F3::FromRaw(
+          activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
+      F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
+      // Implementation of output gate, using fixed-point logistic function.
+      F3 output_gate_input = F3::FromRaw(
+          activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
+      F0 output_gate_output = gemmlowp::logistic(output_gate_input);
+      // Implementation of internal multiplication nodes, still in fixed-point.
+      F0 input_times_input_modulation =
+          input_gate_output * input_modulation_gate_output;
+      FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
+      FS prev_state_times_forget_state = forget_gate_output * prev_state;
+      // Implementation of internal addition node, saturating.
+      FS new_state = gemmlowp::SaturatingAdd(
+          gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
+          prev_state_times_forget_state);
+      // Implementation of last internal Tanh node, still in fixed-point.
+      // Since a Tanh fixed-point implementation is specialized for a given
+      // number or integer bits, and each specialization can have a substantial
+      // code size, and we already used above a Tanh on an input with 3 integer
+      // bits, and per the table in the above function comment there is no
+      // significant accuracy to be lost by clamping to [-8, +8] for a
+      // 3-integer-bits representation, let us just do that. This helps people
+      // porting this to targets where code footprint must be minimized.
+      F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
+      F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
+      // Store the new internal state back to memory, as 16-bit integers.
+      // Note: here we store the original value with StateIntegerBits, not
+      // the rescaled 3-integer-bits value fed to tanh.
+      output_state_data_int16[b * output_depth + c] = new_state.raw();
+      // Down-scale the output activations to 8-bit integers, saturating,
+      // and store back to memory.
+      int16_t rescaled_output_activ =
+          gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
+      int16_t clamped_output_activ = std::max<int16_t>(
+          -128, std::min<int16_t>(127, rescaled_output_activ));
+      output_activ_data_uint8[b * output_depth + c] =
+          128 + clamped_output_activ;
+    }
+  }
+}
+
+}  // namespace reference_ops
+}  // namespace tflite
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h
index d6aaf8b..63ece01 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h
@@ -15,6 +15,9 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MUL_H_
 
+#include <algorithm>
+#include <complex>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 
 namespace tflite {
@@ -51,7 +54,7 @@ inline void Mul(const ArithmeticParams& params,
   GetActivationParams(params, &output_activation_min, &output_activation_max);
 
   const int flat_size =
-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
+      MatchingExtendedShapeFlatSize(input1_shape, input2_shape, output_shape);
   for (int i = 0; i < flat_size; ++i) {
     output_data[i] = ActivationFunctionWithMinMax(
         input1_data[i] * input2_data[i], output_activation_min,
@@ -59,6 +62,20 @@ inline void Mul(const ArithmeticParams& params,
   }
 }
 
+inline void Mul(const ArithmeticParams& params,
+                const RuntimeShape& input1_shape,
+                const std::complex<float>* input1_data,
+                const RuntimeShape& input2_shape,
+                const std::complex<float>* input2_data,
+                const RuntimeShape& output_shape,
+                std::complex<float>* output_data) {
+  const int flat_size =
+      MatchingExtendedShapeFlatSize(input1_shape, input2_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i) {
+    output_data[i] = input1_data[i] * input2_data[i];
+  }
+}
+
 inline void Mul(const ArithmeticParams& params,
                 const RuntimeShape& input1_shape, const uint8_t* input1_data,
                 const RuntimeShape& input2_shape, const uint8_t* input2_data,
@@ -66,7 +83,7 @@ inline void Mul(const ArithmeticParams& params,
   TFLITE_DCHECK_LE(params.quantized_activation_min,
                    params.quantized_activation_max);
   const int flat_size =
-      MatchingFlatSize(input1_shape, input2_shape, output_shape);
+      MatchingExtendedShapeFlatSize(input1_shape, input2_shape, output_shape);
 
   MulElementwise(flat_size, params, input1_data, input2_data, output_data);
 }
@@ -160,6 +177,37 @@ void BroadcastMul4DSlow(const ArithmeticParams& params,
   }
 }
 
+inline void BroadcastMul4DSlow(const ArithmeticParams& params,
+                               const RuntimeShape& unextended_input1_shape,
+                               const std::complex<float>* input1_data,
+                               const RuntimeShape& unextended_input2_shape,
+                               const std::complex<float>* input2_data,
+                               const RuntimeShape& unextended_output_shape,
+                               std::complex<float>* output_data) {
+  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
+                                      unextended_input2_shape, &desc1, &desc2);
+
+  for (int b = 0; b < output_shape.Dims(0); ++b) {
+    for (int y = 0; y < output_shape.Dims(1); ++y) {
+      for (int x = 0; x < output_shape.Dims(2); ++x) {
+        for (int c = 0; c < output_shape.Dims(3); ++c) {
+          output_data[Offset(output_shape, b, y, x, c)] =
+              input1_data[SubscriptToIndex(desc1, b, y, x, c)] *
+              input2_data[SubscriptToIndex(desc2, b, y, x, c)];
+        }
+      }
+    }
+  }
+}
+
 }  // namespace reference_ops
 }  // namespace tflite
 
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pad.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pad.h
index fe1b8f4..b4b2a75 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pad.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pad.h
@@ -24,8 +24,8 @@ namespace tflite {
 
 namespace reference_ops {
 
-// TFLite Pad supports activation tensors with up to 4 dimensions.
-constexpr int PadKernelMaxDimensionCount() { return 4; }
+// TFLite Pad supports activation tensors with up to 5 dimensions.
+constexpr int PadKernelMaxDimensionCount() { return 5; }
 
 // There are two versions of pad: Pad and PadV2.  In PadV2 there is a second
 // scalar input that provides the padding value.  Therefore pad_value_ptr can be
@@ -46,8 +46,8 @@ inline void PadImpl(const tflite::PadParams& op_params,
   TFLITE_DCHECK_LE(op_params.left_padding_count, PadKernelMaxDimensionCount());
   TFLITE_DCHECK_LE(op_params.right_padding_count, PadKernelMaxDimensionCount());
 
-  // Runtime calls are currently fixed at 4 dimensions. Copy inputs so we can
-  // pad them to 4 dims (yes, we are "padding the padding").
+  // Runtime calls are currently fixed at 5 dimensions. Copy inputs so we can
+  // pad them to 5 dims (yes, we are "padding the padding").
   int left_padding_copy[PadKernelMaxDimensionCount()];
   for (int i = 0; i < PadKernelMaxDimensionCount(); i++) {
     left_padding_copy[i] = 0;
@@ -67,39 +67,46 @@ inline void PadImpl(const tflite::PadParams& op_params,
   }
 
   const int output_batch = ext_output_shape.Dims(0);
-  const int output_height = ext_output_shape.Dims(1);
-  const int output_width = ext_output_shape.Dims(2);
-  const int output_depth = ext_output_shape.Dims(3);
+  const int output_plane = ext_output_shape.Dims(1);
+  const int output_height = ext_output_shape.Dims(2);
+  const int output_width = ext_output_shape.Dims(3);
+  const int output_depth = ext_output_shape.Dims(4);
 
   const int left_b_padding = left_padding_copy[0];
-  const int left_h_padding = left_padding_copy[1];
-  const int left_w_padding = left_padding_copy[2];
-  const int left_d_padding = left_padding_copy[3];
+  const int left_p_padding = left_padding_copy[1];
+  const int left_h_padding = left_padding_copy[2];
+  const int left_w_padding = left_padding_copy[3];
+  const int left_d_padding = left_padding_copy[4];
 
   const int right_b_padding = right_padding_copy[0];
-  const int right_h_padding = right_padding_copy[1];
-  const int right_w_padding = right_padding_copy[2];
-  const int right_d_padding = right_padding_copy[3];
+  const int right_p_padding = right_padding_copy[1];
+  const int right_h_padding = right_padding_copy[2];
+  const int right_w_padding = right_padding_copy[3];
+  const int right_d_padding = right_padding_copy[4];
 
   const T pad_value = *pad_value_ptr;
 
   const T* in_ptr = input_data;
   T* out_ptr = output_data;
   for (int out_b = 0; out_b < output_batch; ++out_b) {
-    for (int out_h = 0; out_h < output_height; ++out_h) {
-      for (int out_w = 0; out_w < output_width; ++out_w) {
-        for (int out_d = 0; out_d < output_depth; ++out_d) {
-          if (out_b < left_b_padding ||
-              out_b >= output_batch - right_b_padding ||
-              out_h < left_h_padding ||
-              out_h >= output_height - right_h_padding ||
-              out_w < left_w_padding ||
-              out_w >= output_width - right_w_padding ||
-              out_d < left_d_padding ||
-              out_d >= output_depth - right_d_padding) {
-            *out_ptr++ = pad_value;
-          } else {
-            *out_ptr++ = *in_ptr++;
+    for (int out_p = 0; out_p < output_plane; ++out_p) {
+      for (int out_h = 0; out_h < output_height; ++out_h) {
+        for (int out_w = 0; out_w < output_width; ++out_w) {
+          for (int out_d = 0; out_d < output_depth; ++out_d) {
+            if (out_b < left_b_padding ||
+                out_b >= output_batch - right_b_padding ||
+                out_p < left_p_padding ||
+                out_p >= output_plane - right_p_padding ||
+                out_h < left_h_padding ||
+                out_h >= output_height - right_h_padding ||
+                out_w < left_w_padding ||
+                out_w >= output_width - right_w_padding ||
+                out_d < left_d_padding ||
+                out_d >= output_depth - right_d_padding) {
+              *out_ptr++ = pad_value;
+            } else {
+              *out_ptr++ = *in_ptr++;
+            }
           }
         }
       }
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h
index 904372a..3657ffd 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h
@@ -15,6 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_POOLING_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
@@ -23,7 +25,7 @@ limitations under the License.
 namespace tflite {
 namespace reference_ops {
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const float* input_data,
                         const RuntimeShape& output_shape, float* output_data) {
@@ -66,6 +68,7 @@ inline void AveragePool(const PoolParams& params,
               filter_count++;
             }
           }
+          if (filter_count == 0) return false;
           const float average = total / filter_count;
           output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
               ActivationFunctionWithMinMax(average, params.float_activation_min,
@@ -74,9 +77,10 @@ inline void AveragePool(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const uint8_t* input_data,
                         const RuntimeShape& output_shape,
@@ -122,6 +126,7 @@ inline void AveragePool(const PoolParams& params,
               filter_count++;
             }
           }
+          if (filter_count == 0) return false;
           acc = (acc + filter_count / 2) / filter_count;
           acc = std::max(acc, params.quantized_activation_min);
           acc = std::min(acc, params.quantized_activation_max);
@@ -131,6 +136,7 @@ inline void AveragePool(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
 inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape,
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/prelu.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/prelu.h
index b66af02..6d1dbe0 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/prelu.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/prelu.h
@@ -15,6 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PRELU_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h
index 01dceec..760f54d 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h
@@ -15,6 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PROCESS_BROADCAST_SHAPES_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 
 namespace tflite {
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/quantize.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/quantize.h
index 0c561fd..b791413 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/quantize.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/quantize.h
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <algorithm>
 #include <limits>
+#include <vector>
 
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
@@ -49,6 +50,39 @@ inline void AffineQuantize(const tflite::QuantizationParams& op_params,
   }
 }
 
+// Quantizes per-channel.
+template <typename InputT, typename OutputT>
+inline void PerChannelQuantize(
+    const tflite::PerChannelQuantizationParams& op_params,
+    const RuntimeShape& input_shape, const InputT* input_data,
+    const RuntimeShape& output_shape, OutputT* output_data) {
+  // Ensure flat size is same.
+  MatchingFlatSize(input_shape, output_shape);
+
+  const int32_t* zero_point = op_params.zero_point;
+  const float* scale = op_params.scale;
+  const int32_t quantized_dimension = op_params.quantized_dimension;
+  const int32_t num_dims = input_shape.DimensionsCount();
+  const int32_t* dims_data = input_shape.DimsData();
+  std::vector<int> current_dim(num_dims, 0);
+  static constexpr int32_t min_val = std::numeric_limits<OutputT>::min();
+  static constexpr int32_t max_val = std::numeric_limits<OutputT>::max();
+
+  do {
+    size_t offset =
+        ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
+                            current_dim.data(), 0, nullptr);
+    const InputT val = input_data[offset];
+    const int channel = current_dim[quantized_dimension];
+    int32_t unclamped = static_cast<int32_t>(TfLiteRound(
+                            val / static_cast<float>(scale[channel]))) +
+                        zero_point[channel];
+    int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
+    output_data[offset] = static_cast<OutputT>(clamped);
+  } while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
+                     current_dim.data()));
+}
+
 }  // namespace reference_ops
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/reduce.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/reduce.h
index 38e7029..54f24f4 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/reduce.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/reduce.h
@@ -15,6 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REDUCE_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h"  // from @ruy
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h"
@@ -23,6 +25,25 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 
+// Check if the reduction at index is the first one along the dimensions given
+// in axis.
+inline bool IsFirstReduction(const int* index, const int num_axis,
+                             const int* axis) {
+  if (num_axis == 0) {
+    return true;
+  }
+
+  TFLITE_DCHECK(index != nullptr);
+  TFLITE_DCHECK(axis != nullptr);
+  for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) {
+    if (index[axis[axis_idx]] != 0) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
 namespace tflite {
 
 namespace reference_ops {
@@ -35,8 +56,7 @@ inline bool Reduce(const In* input_data, const int* input_dims,
                    const int* output_dims, const int input_num_dims,
                    const int output_num_dims, const int* axis,
                    const int num_axis, int* input_iter,
-                   Out reducer(const Out current, const In in),
-                   Out* output_data) {
+                   Out reducer(Out current, const In in), Out* output_data) {
   // Reset input iterator.
   for (int idx = 0; idx < input_num_dims; ++idx) {
     input_iter[idx] = 0;
@@ -53,6 +73,37 @@ inline bool Reduce(const In* input_data, const int* input_dims,
   return true;
 }
 
+// Similar to above Reduce function but takes two reducer functions.
+// The 'reducer_first' is called with the first value of the reduction,
+// 'reducer_next' is then called for all the others.
+template <typename In, typename Out>
+inline bool Reduce(const In* input_data, const int* input_dims,
+                   const int* output_dims, const int input_num_dims,
+                   const int output_num_dims, const int* axis,
+                   const int num_axis, int* input_iter,
+                   const std::function<Out(In in)>& reducer_first,
+                   const std::function<Out(Out current, In in)>& reducer_next,
+                   Out* output_data) {
+  // Reset input iterator.
+  for (int idx = 0; idx < input_num_dims; ++idx) {
+    input_iter[idx] = 0;
+  }
+  // Iterate through input_data.
+  do {
+    size_t input_offset =
+        ReducedOutputOffset(input_num_dims, input_dims, input_iter, 0, nullptr);
+    size_t output_offset = ReducedOutputOffset(input_num_dims, input_dims,
+                                               input_iter, num_axis, axis);
+    if (IsFirstReduction(input_iter, num_axis, axis)) {
+      output_data[output_offset] = reducer_first(input_data[input_offset]);
+    } else {
+      output_data[output_offset] =
+          reducer_next(output_data[output_offset], input_data[input_offset]);
+    }
+  } while (NextIndex(input_num_dims, input_dims, input_iter));
+  return true;
+}
+
 // This method parses the input 'axis' to remove duplicates and handle negative
 // values, and returns a valid 'out_axis'
 inline bool ResolveAxis(const int num_dims, const int* axis,
@@ -111,7 +162,8 @@ inline bool InitTensorDataForReduce(const int* dims, const int num_dims,
   for (int idx = 0; idx < num_dims; ++idx) {
     size_t current = static_cast<size_t>(dims[idx]);
     // Overflow prevention.
-    if (num_elements > std::numeric_limits<size_t>::max() / current) {
+    if (current > 0 &&
+        num_elements > std::numeric_limits<size_t>::max() / current) {
       return false;
     }
     num_elements *= current;
@@ -132,17 +184,20 @@ inline bool ReduceGeneric(const T* input_data, const int* input_dims,
                           bool keep_dims, int* temp_index, int* resolved_axis,
                           T init_value,
                           T reducer(const T current, const T in)) {
-  // Return early when input shape has zero dim.
-  for (int i = 0; i < input_num_dims; ++i) {
-    if (input_dims[i] == 0) return true;
-  }
-
   // Reset output data.
   if (!InitTensorDataForReduce(output_dims, output_num_dims, init_value,
                                output_data)) {
     return false;
   }
 
+  // Return early when input shape has zero dim. This is done after initializing
+  // data for output tensor because there are cases that the input tensor is
+  // empty but output tensor is not. In that case, output tensor should be
+  // filled with init_value.
+  for (int i = 0; i < input_num_dims; ++i) {
+    if (input_dims[i] == 0) return true;
+  }
+
   // Resolve axis.
   int num_resolved_axis = 0;
   if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
@@ -159,12 +214,11 @@ inline bool ReduceGeneric(const T* input_data, const int* input_dims,
 // It does so in two stages, first calculates the sum of elements along the axis
 // then divides it by the number of element in axis.
 template <typename T, typename U>
-inline bool MeanOrSum(const T* input_data, const int* input_dims,
-                      const int input_num_dims, T* output_data,
-                      const int* output_dims, const int output_num_dims,
-                      const int* axis, const int num_axis_dimensions, bool keep_dims,
-                      int* temp_index, int* resolved_axis, U* temp_sum,
-                      bool compute_sum) {
+inline bool Mean(const T* input_data, const int* input_dims,
+                 const int input_num_dims, T* output_data,
+                 const int* output_dims, const int output_num_dims,
+                 const int* axis, const int num_axis_dimensions, bool keep_dims,
+                 int* temp_index, int* resolved_axis, U* temp_sum) {
   ruy::profiler::ScopeLabel label("Mean");
   // Reset output data.
   size_t num_outputs = 1;
@@ -207,24 +261,18 @@ inline bool MeanOrSum(const T* input_data, const int* input_dims,
 
   if (num_elements_in_axis > 0) {
     for (size_t idx = 0; idx < num_outputs; ++idx) {
-      if (compute_sum) {
-        output_data[idx] = static_cast<T>(temp_sum[idx] / static_cast<U>(1));
-      }
-      else {
-        output_data[idx] =
-            static_cast<T>(temp_sum[idx] / static_cast<U>(num_elements_in_axis));
-      }
+      output_data[idx] =
+          static_cast<T>(temp_sum[idx] / static_cast<U>(num_elements_in_axis));
     }
   }
   return true;
 }
 
 template <typename T>
-inline void MeanOrSum(const tflite::MeanParams& op_params,
-                      const RuntimeShape& unextended_input_shape,
-                      const T* input_data,
-                      const RuntimeShape& unextended_output_shape, T* output_data,
-                      bool compute_sum) {
+inline void Mean(const tflite::MeanParams& op_params,
+                 const RuntimeShape& unextended_input_shape,
+                 const T* input_data,
+                 const RuntimeShape& unextended_output_shape, T* output_data) {
   ruy::profiler::ScopeLabel label("Mean4D");
 
   // Current implementation only supports dimension equals 4 and simultaneous
@@ -258,25 +306,18 @@ inline void MeanOrSum(const tflite::MeanParams& op_params,
           value += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)];
         }
       }
-
-      if (compute_sum) {
-        output_data[Offset(output_shape, out_b, 0, 0, out_d)] = value;
-      }
-      else {
-        output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
-            value / (input_width * input_height);
-      }
+      output_data[Offset(output_shape, out_b, 0, 0, out_d)] =
+          value / (input_width * input_height);
     }
   }
 }
 
-inline void MeanOrSum(const tflite::MeanParams& op_params,
-                      const RuntimeShape& unextended_input_shape,
-                      const uint8_t* input_data, int32_t input_zero_point,
-                      float input_scale, const RuntimeShape& unextended_output_shape,
-                      uint8_t* output_data, int32_t output_zero_point,
-                      float output_scale,
-                      bool compute_sum) {
+inline void Mean(const tflite::MeanParams& op_params,
+                 const RuntimeShape& unextended_input_shape,
+                 const uint8_t* input_data, int32_t input_zero_point,
+                 float input_scale, const RuntimeShape& unextended_output_shape,
+                 uint8_t* output_data, int32_t output_zero_point,
+                 float output_scale) {
   ruy::profiler::ScopeLabel label("Mean4D/Uint8");
 
   // Current implementation only supports dimension equals 4 and simultaneous
@@ -304,11 +345,10 @@ inline void MeanOrSum(const tflite::MeanParams& op_params,
   constexpr int32_t kMinValue = std::numeric_limits<uint8_t>::min();
   constexpr int32_t kMaxValue = std::numeric_limits<uint8_t>::max();
 
-  int32_t bias =
-      output_zero_point -
-      static_cast<int32_t>(input_zero_point * input_scale / output_scale);
-  double real_scale = compute_sum ?
-      static_cast<double>(input_scale / (1 * output_scale)) :
+  float temp = input_zero_point * input_scale / output_scale;
+  temp = temp > 0 ? temp + 0.5f : temp - 0.5f;
+  int32_t bias = output_zero_point - static_cast<int32_t>(temp);
+  double real_scale =
       static_cast<double>(input_scale / (num_elements_in_axis * output_scale));
 
   int32_t multiplier;
@@ -368,6 +408,14 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
     temp_sum[idx] = U();
   }
 
+  // Return early when input shape has zero dim. This is done after initializing
+  // data for output tensor because there are cases that the input tensor is
+  // empty but output tensor is not. In that case, output tensor should be
+  // filled with init_value.
+  for (int i = 0; i < input_num_dims; ++i) {
+    if (input_dims[i] == 0) return true;
+  }
+
   // Resolve axis.
   int num_resolved_axis = 0;
   if (!ResolveAxis(input_num_dims, axis, num_axis_dimensions, resolved_axis,
@@ -420,6 +468,73 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
   return true;
 }
 
+template <typename T, typename U>
+inline bool QuantizedMeanOrSumExtraArgs(
+    const T* input_data, int32_t input_zero_point, float input_scale,
+    const int* input_dims, const int input_num_dims, T* output_data,
+    float output_scale, int32_t output_multiplier, int output_shift,
+    int32_t output_zero_point, const int* output_dims,
+    const int output_num_dims, const int* axis, const int num_axis_dimensions,
+    bool keep_dims, int* temp_index, int* resolved_axis, U* temp_sum,
+    bool compute_sum) {
+  return QuantizedMeanOrSum<T, U>(
+      input_data, input_zero_point, input_scale, input_dims, input_num_dims,
+      output_data, output_zero_point, output_scale, output_dims,
+      output_num_dims, axis, num_axis_dimensions, keep_dims, temp_index,
+      resolved_axis, temp_sum, compute_sum);
+}
+
+template <typename T>
+inline bool QuantizedReduceProd(const T* input_data, int32_t input_zero_point,
+                                const RuntimeShape& input_shape, T* output_data,
+                                int32_t output_zero_point,
+                                const RuntimeShape& output_shape,
+                                const int* axis,
+                                const int64_t num_axis_dimensions,
+                                bool keep_dims, int* temp_index,
+                                int* resolved_axis, int32_t* temp_prod,
+                                int32_t scaling_multiplier, int scaling_shift) {
+  const int32_t kMinValue = std::numeric_limits<T>::min();
+  const int32_t kMaxValue = std::numeric_limits<T>::max();
+
+  // Resolve axis.
+  int num_resolved_axis = 0;
+  if (!ResolveAxis(input_shape.DimensionsCount(), axis, num_axis_dimensions,
+                   resolved_axis, &num_resolved_axis)) {
+    return false;
+  }
+
+  // Calculate the reduced product by rescaling each multiplication step to
+  // avoid an overflow.
+  auto reducer_first = [&](T in) -> int32_t { return in - input_zero_point; };
+
+  auto reducer_next = [&](int32_t current, T in) -> int32_t {
+    const int64_t result =
+        static_cast<int64_t>(current) * (in - input_zero_point);
+    return MultiplyByQuantizedMultiplier(result, scaling_multiplier,
+                                         scaling_shift);
+  };
+
+  if (!Reduce<T, int32_t>(
+          input_data, input_shape.DimsData(), output_shape.DimsData(),
+          input_shape.DimensionsCount(), output_shape.DimensionsCount(),
+          resolved_axis, num_resolved_axis, temp_index, reducer_first,
+          reducer_next, temp_prod)) {
+    return false;
+  }
+
+  for (int i = 0; i < output_shape.FlatSize(); i++) {
+    int32_t result =
+        MultiplyByQuantizedMultiplier(static_cast<int64_t>(temp_prod[i]),
+                                      scaling_multiplier, scaling_shift) +
+        output_zero_point;
+    result = std::min(std::max(result, kMinValue), kMaxValue);
+    output_data[i] = static_cast<T>(result);
+  }
+
+  return true;
+}
+
 }  // namespace reference_ops
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/requantize.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/requantize.h
index 5d7e3b1..662046f 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/requantize.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/requantize.h
@@ -15,6 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_REQUANTIZE_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h"  // from @ruy
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_bilinear.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_bilinear.h
new file mode 100644
index 0000000..ec8ec26
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_bilinear.h
@@ -0,0 +1,228 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <limits>
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+namespace reference_ops {
+
+inline void ComputeInterpolationValues(const float value, const float scale,
+                                       const bool half_pixel_centers,
+                                       int32_t input_size, float* scaled_value,
+                                       int32_t* lower_bound,
+                                       int32_t* upper_bound) {
+  if (half_pixel_centers) {
+    *scaled_value = (value + 0.5f) * scale - 0.5f;
+  } else {
+    *scaled_value = value * scale;
+  }
+  float scaled_value_floor = std::floor(*scaled_value);
+  *lower_bound = std::max(static_cast<int32_t>(scaled_value_floor),
+                          static_cast<int32_t>(0));
+  *upper_bound =
+      std::min(static_cast<int32_t>(std::ceil(*scaled_value)), input_size - 1);
+}
+
+template <typename T>
+inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params,
+                           const RuntimeShape& unextended_input_shape,
+                           const T* input_data,
+                           const RuntimeShape& unextended_output_size_shape,
+                           const int32_t* output_size_data,
+                           const RuntimeShape& unextended_output_shape,
+                           T* output_data) {
+  // If half_pixel_centers is True, align_corners must be False.
+  TFLITE_DCHECK(!op_params.half_pixel_centers || !op_params.align_corners);
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape =
+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape output_size_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_size_shape);
+  const RuntimeShape output_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+  int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
+  int32_t input_height = input_shape.Dims(1);
+  int32_t input_width = input_shape.Dims(2);
+  int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
+
+  TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1);
+  TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1);
+  TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1);
+  TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2);
+  int32_t output_height =
+      output_size_data[Offset(output_size_shape, 0, 0, 0, 0)];
+  int32_t output_width =
+      output_size_data[Offset(output_size_shape, 0, 0, 0, 1)];
+
+  float height_scale = static_cast<float>(input_height) / output_height;
+  float width_scale = static_cast<float>(input_width) / output_width;
+  if (op_params.align_corners && output_height > 1) {
+    height_scale = static_cast<float>(input_height - 1) / (output_height - 1);
+  }
+  if (op_params.align_corners && output_width > 1) {
+    width_scale = static_cast<float>(input_width - 1) / (output_width - 1);
+  }
+  const float rounding_offset = std::numeric_limits<T>::is_integer ? .5f : .0f;
+
+  for (int b = 0; b < batches; ++b) {
+    for (int y = 0; y < output_height; ++y) {
+      float input_y;
+      int32_t y0, y1;
+      ComputeInterpolationValues(y, height_scale, op_params.half_pixel_centers,
+                                 input_height, &input_y, &y0, &y1);
+      for (int x = 0; x < output_width; ++x) {
+        float input_x;
+        int32_t x0, x1;
+        ComputeInterpolationValues(x, width_scale, op_params.half_pixel_centers,
+                                   input_width, &input_x, &x0, &x1);
+        for (int c = 0; c < depth; ++c) {
+          T interpolation =
+              static_cast<T>(input_data[Offset(input_shape, b, y0, x0, c)] *
+                                 (1 - (input_y - y0)) * (1 - (input_x - x0)) +
+                             input_data[Offset(input_shape, b, y1, x0, c)] *
+                                 (input_y - y0) * (1 - (input_x - x0)) +
+                             input_data[Offset(input_shape, b, y0, x1, c)] *
+                                 (1 - (input_y - y0)) * (input_x - x0) +
+                             input_data[Offset(input_shape, b, y1, x1, c)] *
+                                 (input_y - y0) * (input_x - x0) +
+                             rounding_offset);
+          output_data[Offset(output_shape, b, y, x, c)] = interpolation;
+        }
+      }
+    }
+  }
+}
+
+inline void ComputeInterpolationValuesInteger(
+    const int32_t value, const int32_t scale_10, const bool half_pixel_centers,
+    int32_t input_size, int32_t* scaled_value, int32_t* lower_bound,
+    int32_t* upper_bound) {
+  if (half_pixel_centers) {
+    *scaled_value = value * scale_10 + scale_10 / 2 - (1 << 9);
+  } else {
+    *scaled_value = value * scale_10;
+  }
+  constexpr int32_t zero = 0;
+  *lower_bound = std::max(*scaled_value / (1 << 10), zero);
+  *upper_bound =
+      std::min((*scaled_value + (1 << 10) - 1) / (1 << 10), input_size - 1);
+}
+
+// Same as above but doesn't use any floating-point for the resize
+template <typename T>
+inline void ResizeBilinearInteger(
+    const tflite::ResizeBilinearParams& op_params,
+    const RuntimeShape& unextended_input_shape, const T* input_data,
+    const RuntimeShape& unextended_output_size_shape,
+    const int32_t* output_size_data,
+    const RuntimeShape& unextended_output_shape, T* output_data) {
+  // If half_pixel_centers is True, align_corners must be False.
+  TFLITE_DCHECK(!op_params.half_pixel_centers || !op_params.align_corners);
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_size_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape =
+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape output_size_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_size_shape);
+  const RuntimeShape output_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+  const int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int32_t input_height = input_shape.Dims(1);
+  const int32_t input_width = input_shape.Dims(2);
+  const int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
+
+  TFLITE_DCHECK_EQ(output_size_shape.Dims(0), 1);
+  TFLITE_DCHECK_EQ(output_size_shape.Dims(1), 1);
+  TFLITE_DCHECK_EQ(output_size_shape.Dims(2), 1);
+  TFLITE_DCHECK_EQ(output_size_shape.Dims(3), 2);
+  const int32_t output_height =
+      output_size_data[Offset(output_size_shape, 0, 0, 0, 0)];
+  const int32_t output_width =
+      output_size_data[Offset(output_size_shape, 0, 0, 0, 1)];
+
+  int32_t height_scale_10 =
+      ((1 << 10) * input_height + output_height / 2) / output_height;
+  int32_t width_scale_10 =
+      ((1 << 10) * input_width + output_width / 2) / output_width;
+  if (op_params.align_corners && output_height > 1) {
+    height_scale_10 =
+        ((1 << 10) * (input_height - 1) + (output_height - 1) / 2) /
+        (output_height - 1);
+  }
+  if (op_params.align_corners && output_width > 1) {
+    width_scale_10 = ((1 << 10) * (input_width - 1) + (output_width - 1) / 2) /
+                     (output_width - 1);
+  }
+
+  for (int b = 0; b < batches; ++b) {
+    for (int y = 0; y < output_height; ++y) {
+      int32_t input_y, y0, y1;
+      ComputeInterpolationValuesInteger(y, height_scale_10,
+                                        op_params.half_pixel_centers,
+                                        input_height, &input_y, &y0, &y1);
+      for (int x = 0; x < output_width; ++x) {
+        int32_t input_x, x0, x1;
+        ComputeInterpolationValuesInteger(x, width_scale_10,
+                                          op_params.half_pixel_centers,
+                                          input_width, &input_x, &x0, &x1);
+        for (int c = 0; c < depth; ++c) {
+          const int64_t output_20_ll =
+              static_cast<int64_t>(
+                  input_data[Offset(input_shape, b, y0, x0, c)]) *
+              ((1 << 10) - (input_y - (1 << 10) * y0)) *
+              ((1 << 10) - (input_x - (1 << 10) * x0));
+          const int64_t output_20_lu =
+              static_cast<int64_t>(
+                  input_data[Offset(input_shape, b, y1, x0, c)]) *
+              (input_y - (1 << 10) * y0) *
+              ((1 << 10) - (input_x - (1 << 10) * x0));
+          const int64_t output_20_rl =
+              static_cast<int64_t>(
+                  input_data[Offset(input_shape, b, y0, x1, c)]) *
+              ((1 << 10) - (input_y - (1 << 10) * y0)) *
+              (input_x - (1 << 10) * x0);
+          const int64_t output_20_ru =
+              static_cast<int64_t>(
+                  input_data[Offset(input_shape, b, y1, x1, c)]) *
+              (input_y - (1 << 10) * y0) * (input_x - (1 << 10) * x0);
+          const int64_t output_20 =
+              output_20_ll + output_20_lu + output_20_rl + output_20_ru;
+          const int64_t round = (output_20 > 0) ? (1 << 19) : -(1 << 19);
+          const T interpolation =
+              static_cast<T>((output_20 + round) / (1 << 20));
+          output_data[Offset(output_shape, b, y, x, c)] = interpolation;
+        }
+      }
+    }
+  }
+}
+
+}  // namespace reference_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_BILINEAR_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h
index 0fd7f01..bbed46a 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h
@@ -15,6 +15,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_RESIZE_NEAREST_NEIGHBOR_H_
 
+#include <algorithm>
 #include <cmath>
 
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h"
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/select.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/select.h
new file mode 100644
index 0000000..2230c96
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/select.h
@@ -0,0 +1,151 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_
+
+#include <cmath>
+
+#include "edge-impulse-sdk/third_party/ruy/ruy/profiler/instrumentation.h"  // from @ruy
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+namespace reference_ops {
+
+template <typename D, typename T>
+void Select(const RuntimeShape& input_condition_shape,
+            const D* input_condition_data, const RuntimeShape& input_x_shape,
+            const T* input_x_data, const RuntimeShape& input_y_shape,
+            const T* input_y_data, const RuntimeShape& output_shape,
+            T* output_data) {
+  ruy::profiler::ScopeLabel label("Select");
+  int64_t flatsize;
+  // Allow select operator executions on mixed scalar tensors and one element
+  // tensors.
+  if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
+      input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1) {
+    flatsize = 1;
+  } else {
+    flatsize = MatchingFlatSize(input_condition_shape, input_x_shape,
+                                input_y_shape, output_shape);
+  }
+  for (int64_t i = 0; i < flatsize; ++i) {
+    output_data[i] =
+        input_condition_data[i] ? input_x_data[i] : input_y_data[i];
+  }
+}
+
+template <typename D, typename T>
+void RankOneSelect(const RuntimeShape& input_condition_shape,
+                   const D* input_condition_data,
+                   const RuntimeShape& input_x_shape, const T* input_x_data,
+                   const RuntimeShape& input_y_shape, const T* input_y_data,
+                   const RuntimeShape& output_shape, T* output_data) {
+  ruy::profiler::ScopeLabel label("Select/RankOneSelect");
+  const int64_t outer_size = input_condition_shape.FlatSize();
+  int64_t inner_size;
+  if (input_condition_shape.DimensionsCount() == 0) {
+    inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
+  } else {
+    TFLITE_DCHECK_EQ(
+        MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0),
+        outer_size);
+    inner_size =
+        MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
+  }
+
+  int64_t offset = 0;
+  for (int64_t i = 0; i < outer_size; i++) {
+    const T* input_data = input_condition_data[i] ? input_x_data : input_y_data;
+    memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
+    offset += inner_size;
+  }
+}
+
+template <typename D, typename T>
+void BroadcastSelect5DSlow(const RuntimeShape& input_condition_shape,
+                           const D* input_condition_data,
+                           const RuntimeShape& input_x_shape,
+                           const T* input_x_data,
+                           const RuntimeShape& input_y_shape,
+                           const T* input_y_data,
+                           const RuntimeShape& output_shape, T* output_data) {
+  ruy::profiler::ScopeLabel label("Select/BroadcastSelectSlow");
+  TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 5);
+  TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 5);
+  TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 5);
+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 5);
+
+  NdArrayDesc<5> desc_condition;
+  NdArrayDesc<5> desc_x;
+  NdArrayDesc<5> desc_y;
+  NdArrayDesc<5> desc_output;
+  const RuntimeShape extended_output_shape =
+      RuntimeShape::ExtendedShape(5, output_shape);
+  CopyDimsToDesc(extended_output_shape, &desc_output);
+  NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape,
+                                      input_y_shape, &desc_condition, &desc_x,
+                                      &desc_y);
+
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest
+  // stride, typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for
+  // the best cache behavior.
+  for (int n = 0; n < desc_output.extents[0]; ++n) {
+    int out_idx_n = desc_output.extents[1] * n;
+    int cond_idx_n = desc_condition.strides[0] * n;
+    int in_idx1_n = desc_x.strides[0] * n;
+    int in_idx2_n = desc_y.strides[0] * n;
+    for (int b = 0; b < desc_output.extents[1]; ++b) {
+      int out_idx_b = (out_idx_n + b) * desc_output.extents[2];
+      int cond_idx_b = cond_idx_n + desc_condition.strides[1] * b;
+      int in_idx1_b = in_idx1_n + desc_x.strides[1] * b;
+      int in_idx2_b = in_idx2_n + desc_y.strides[1] * b;
+      for (int y = 0; y < desc_output.extents[2]; ++y) {
+        int out_idx_y = (out_idx_b + y) * desc_output.extents[3];
+        int cond_idx_y = cond_idx_b + desc_condition.strides[2] * y;
+        int in_idx1_y = in_idx1_b + desc_x.strides[2] * y;
+        int in_idx2_y = in_idx2_b + desc_y.strides[2] * y;
+        for (int x = 0; x < desc_output.extents[3]; ++x) {
+          int out_idx = (out_idx_y + x) * desc_output.extents[4];
+          int cond_idx = cond_idx_y + desc_condition.strides[3] * x;
+          int in_idx1 = in_idx1_y + desc_x.strides[3] * x;
+          int in_idx2 = in_idx2_y + desc_y.strides[3] * x;
+          for (int c = 0; c < desc_output.extents[4]; ++c) {
+            output_data[out_idx] = input_condition_data[cond_idx]
+                                       ? input_x_data[in_idx1]
+                                       : input_y_data[in_idx2];
+            out_idx++;
+            cond_idx += desc_condition.strides[4];
+            in_idx1 += desc_x.strides[4];
+            in_idx2 += desc_y.strides[4];
+          }
+        }
+      }
+    }
+  }
+}
+
+}  // namespace reference_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SELECT_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/slice.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/slice.h
new file mode 100644
index 0000000..8214269
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/slice.h
@@ -0,0 +1,80 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SLICE_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SLICE_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+
+namespace reference_ops {
+
+template <typename T>
+inline void Slice(const tflite::SliceParams& op_params,
+                  const RuntimeShape& input_shape,
+                  const RuntimeShape& output_shape,
+                  SequentialTensorWriter<T>* writer) {
+  const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
+  TFLITE_DCHECK_LE(op_params.begin_count, 5);
+  TFLITE_DCHECK_LE(op_params.size_count, 5);
+  const int begin_count = op_params.begin_count;
+  const int size_count = op_params.size_count;
+  // We front-pad the begin and size vectors.
+  int start[5];
+  int stop[5];
+  for (int i = 0; i < 5; ++i) {
+    int padded_i = 5 - i;
+    start[i] =
+        begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
+    stop[i] =
+        (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
+            ? ext_shape.Dims(i)
+            : start[i] + op_params.size[size_count - padded_i];
+  }
+
+  for (int i0 = start[0]; i0 < stop[0]; ++i0) {
+    for (int i1 = start[1]; i1 < stop[1]; ++i1) {
+      for (int i2 = start[2]; i2 < stop[2]; ++i2) {
+        for (int i3 = start[3]; i3 < stop[3]; ++i3) {
+          for (int i4 = start[4]; i4 < stop[4]; ++i4) {
+            writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams& op_params,
+                  const RuntimeShape& input_shape, const T* input_data,
+                  const RuntimeShape& output_shape, T* output_data) {
+  SequentialTensorWriter<T> writer(input_data, output_data);
+  return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams& op_params,
+                  const RuntimeShape& input_shape, const TfLiteTensor* input,
+                  const RuntimeShape& output_shape, TfLiteTensor* output) {
+  SequentialTensorWriter<T> writer(input, output);
+  return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+}  // namespace reference_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SLICE_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/softmax.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/softmax.h
index 25a1b45..1c6c0b9 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/softmax.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/softmax.h
@@ -15,6 +15,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SOFTMAX_H_
 
+#include <algorithm>
 #include <limits>
 
 #include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h"
@@ -159,7 +160,7 @@ inline int16_t SoftMaxCalculateExp(const SoftmaxParams& params,
       std::min(std::max(sym_scaled_diff, static_cast<int32_t>(-32768)),
                static_cast<int32_t>(32767));
   // apply the exp() LUT activation function
-  return generic_int16_table_lookup(sat_sym_scaled_diff, params.exp_lut);
+  return LUTLookup(sat_sym_scaled_diff, params.exp_lut);
 }
 // Quantized softmax with int16_t input and int16_t output.
 inline void SoftmaxInt16(const SoftmaxParams& params,
@@ -207,8 +208,8 @@ inline void SoftmaxInt16(const SoftmaxParams& params,
         std::min(std::max(sym_shifted_sum, static_cast<int32_t>(-32768)),
                  static_cast<int32_t>(32767)));
     // apply 1/(1 + x) LUT activation function
-    int16_t reciprocal_scale_Q015 = generic_int16_table_lookup(
-        sat_sym_shifted_sum, params.one_over_one_plus_x_lut);
+    int16_t reciprocal_scale_Q015 =
+        LUTLookup(sat_sym_shifted_sum, params.one_over_one_plus_x_lut);
 
     // Rescale the exp_result with reciprocal
     // range of output is [0, 32767] correspond to [0.0, 1.0]
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/space_to_depth.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/space_to_depth.h
new file mode 100644
index 0000000..53260ae
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/space_to_depth.h
@@ -0,0 +1,80 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_
+
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+namespace reference_ops {
+
+template <typename T>
+inline void SpaceToDepth(const tflite::SpaceToDepthParams& op_params,
+                         const RuntimeShape& unextended_input_shape,
+                         const T* input_data,
+                         const RuntimeShape& unextended_output_shape,
+                         T* output_data) {
+  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape input_shape =
+      RuntimeShape::ExtendedShape(4, unextended_input_shape);
+  const RuntimeShape output_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+  const int input_depth = input_shape.Dims(3);
+  const int input_width = input_shape.Dims(2);
+  const int input_height = input_shape.Dims(1);
+  const int input_batch = input_shape.Dims(0);
+
+  const int output_depth = output_shape.Dims(3);
+  const int output_width = output_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_batch = output_shape.Dims(0);
+
+  const int32_t block_size = op_params.block_size;
+
+  TFLITE_DCHECK_EQ(input_width, output_width * block_size);
+  TFLITE_DCHECK_EQ(input_height, output_height * block_size);
+  TFLITE_DCHECK_EQ(input_depth * block_size * block_size, output_depth);
+  TFLITE_DCHECK_EQ(input_batch, output_batch);
+
+  for (int in_b = 0; in_b < input_batch; ++in_b) {
+    for (int in_h = 0; in_h < input_height; ++in_h) {
+      for (int in_w = 0; in_w < input_width; ++in_w) {
+        for (int in_d = 0; in_d < input_depth; ++in_d) {
+          const int out_d =
+              in_d + ((in_h % block_size) * block_size + in_w % block_size) *
+                         input_depth;
+          const int out_w = in_w / block_size;
+          const int out_h = in_h / block_size;
+          const int out_b = in_b;
+
+          const int input_index = Offset(input_shape, in_b, in_h, in_w, in_d);
+          const int output_index =
+              Offset(output_shape, out_b, out_h, out_w, out_d);
+
+          output_data[output_index] = input_data[input_index];
+        }
+      }
+    }
+  }
+}
+
+}  // namespace reference_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_SPACE_TO_DEPTH_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/strided_slice.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/strided_slice.h
index 7d111d0..493d8f3 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/strided_slice.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/strided_slice.h
@@ -31,10 +31,6 @@ inline void StridedSlice(const tflite::StridedSliceParams& op_params,
                          const RuntimeShape& unextended_input_shape,
                          const RuntimeShape& unextended_output_shape,
                          SequentialTensorWriter<T>* writer) {
-  using strided_slice::LoopCondition;
-  using strided_slice::StartForAxis;
-  using strided_slice::StopForAxis;
-
   ruy::profiler::ScopeLabel label("StridedSlice");
 
   // Note that the output_shape is not used herein.
@@ -51,41 +47,71 @@ inline void StridedSlice(const tflite::StridedSliceParams& op_params,
   // requires (ie. all shapes must be 5D and are given backwards).
   strided_slice::StridedSlicePadIndices(&params_copy, 5);
 
-  const int start_0 = StartForAxis(params_copy, input_shape, 0);
-  const int stop_0 = StopForAxis(params_copy, input_shape, 0, start_0);
-  const int start_1 = StartForAxis(params_copy, input_shape, 1);
-  const int stop_1 = StopForAxis(params_copy, input_shape, 1, start_1);
-  const int start_2 = StartForAxis(params_copy, input_shape, 2);
-  const int stop_2 = StopForAxis(params_copy, input_shape, 2, start_2);
-  const int start_3 = StartForAxis(params_copy, input_shape, 3);
-  const int stop_3 = StopForAxis(params_copy, input_shape, 3, start_3);
-  const int start_4 = StartForAxis(params_copy, input_shape, 4);
-  const int stop_4 = StopForAxis(params_copy, input_shape, 4, start_4);
-
-  for (int offset_0 = start_0 * input_shape.Dims(1),
-           end_0 = stop_0 * input_shape.Dims(1),
-           step_0 = params_copy.strides[0] * input_shape.Dims(1);
-       !LoopCondition(offset_0, end_0, params_copy.strides[0]);
-       offset_0 += step_0) {
-    for (int offset_1 = (offset_0 + start_1) * input_shape.Dims(2),
-             end_1 = (offset_0 + stop_1) * input_shape.Dims(2),
-             step_1 = params_copy.strides[1] * input_shape.Dims(2);
-         !LoopCondition(offset_1, end_1, params_copy.strides[1]);
-         offset_1 += step_1) {
-      for (int offset_2 = (offset_1 + start_2) * input_shape.Dims(3),
-               end_2 = (offset_1 + stop_2) * input_shape.Dims(3),
-               step_2 = params_copy.strides[2] * input_shape.Dims(3);
-           !LoopCondition(offset_2, end_2, params_copy.strides[2]);
-           offset_2 += step_2) {
-        for (int offset_3 = (offset_2 + start_3) * input_shape.Dims(4),
-                 end_3 = (offset_2 + stop_3) * input_shape.Dims(4),
-                 step_3 = params_copy.strides[3] * input_shape.Dims(4);
-             !LoopCondition(offset_3, end_3, params_copy.strides[3]);
-             offset_3 += step_3) {
-          for (int offset_4 = offset_3 + start_4, end_4 = offset_3 + stop_4;
-               !LoopCondition(offset_4, end_4, params_copy.strides[4]);
-               offset_4 += params_copy.strides[4]) {
-            writer->Write(offset_4);
+  const int start_0 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 0);
+  const int stop_0 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 0, start_0);
+  const int start_1 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 1);
+  const int stop_1 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 1, start_1);
+  const int start_2 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 2);
+  const int stop_2 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 2, start_2);
+  const int start_3 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 3);
+  const int stop_3 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 3, start_3);
+  const int start_4 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 4);
+  const int stop_4 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 4, start_4);
+
+  auto lc = [&](int end, int stride, int index) {
+    if (stride < 0) {
+      return index > end;
+    } else {
+      return index < end;
+    }
+  };
+  // With a static_cast it is not possible to initialize
+  // a variable of type 'const int *'
+  // with an rvalue of type 'const int32_t *' (aka 'const long *').
+  // reinterpret_cast is required to handle this casting.
+  const int* shape = reinterpret_cast<const int*>(input_shape.DimsData());
+  const int* stride = reinterpret_cast<const int*>(params_copy.strides);
+  const bool inner_stride_is_1 = params_copy.strides[4] == 1;
+
+  for (int offset_0 = start_0; lc(stop_0, stride[0], offset_0);
+       offset_0 += stride[0]) {
+    for (int offset_1 = start_1; lc(stop_1, stride[1], offset_1);
+         offset_1 += stride[1]) {
+      for (int offset_2 = start_2; lc(stop_2, stride[2], offset_2);
+           offset_2 += stride[2]) {
+        for (int offset_3 = start_3; lc(stop_3, stride[3], offset_3);
+             offset_3 += stride[3]) {
+          // When the stride is 1, the inner loop is equivalent to the
+          // optimized slice inner loop. Otherwise, it is identical to the
+          // strided_slice reference implementation inner loop.
+          if (inner_stride_is_1) {
+            const int len = stop_4 - start_4;
+            int index = start_4 + offset_3 * shape[4] +
+                        offset_2 * shape[3] * shape[4] +
+                        offset_1 * shape[2] * shape[3] * shape[4] +
+                        offset_0 * shape[1] * shape[2] * shape[3] * shape[4];
+            if (len > 0) {
+              writer->WriteN(index, len);
+            }
+          } else {
+            for (int offset_4 = start_4; lc(stop_4, stride[4], offset_4);
+                 offset_4 += stride[4]) {
+              int index = offset_4 + offset_3 * shape[4] +
+                          offset_2 * shape[3] * shape[4] +
+                          offset_1 * shape[2] * shape[3] * shape[4] +
+                          offset_0 * shape[1] * shape[2] * shape[3] * shape[4];
+              writer->Write(index);
+            }
           }
         }
       }
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/sub.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/sub.h
index 7c66b63..44718a8 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/sub.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/sub.h
@@ -105,63 +105,6 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
   NDOpsHelper<N>(output_desc, sub_func);
 }
 
-template <int N = 5>
-inline void BroadcastSubSlow(const ArithmeticParams& params,
-                             const RuntimeShape& input1_shape,
-                             const uint8_t* input1_data,
-                             const RuntimeShape& input2_shape,
-                             const uint8_t* input2_data,
-                             const RuntimeShape& output_shape,
-                             uint8_t* output_data) {
-  ruy::profiler::ScopeLabel label("BroadcastSubSlow/uint8_t");
-  TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
-  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
-  NdArrayDesc<N> desc1;
-  NdArrayDesc<N> desc2;
-  NdArrayDesc<N> output_desc;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  auto sub_func = [&](int indexes[N]) {
-    const int32_t input1_val =
-        params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
-    const int32_t input2_val =
-        params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
-    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
-    const int32_t scaled_input1_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input1_val, params.input1_multiplier, params.input1_shift);
-    const int32_t scaled_input2_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
-    const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
-    const int32_t raw_output =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            raw_sub, params.output_multiplier, params.output_shift) +
-        params.output_offset;
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, raw_output));
-    output_data[SubscriptToIndex(output_desc, indexes)] =
-        static_cast<uint8_t>(clamped_output);
-  };
-  NDOpsHelper<N>(output_desc, sub_func);
-}
-
 template <int N = 5>
 inline void BroadcastSubSlow(const ArithmeticParams& params,
                              const RuntimeShape& input1_shape,
@@ -202,60 +145,6 @@ inline void BroadcastSubSlow(const ArithmeticParams& params,
   NDOpsHelper<N>(output_desc, sub_func);
 }
 
-template <int N = 5>
-inline void BroadcastSubSlow(const ArithmeticParams& params,
-                             const RuntimeShape& input1_shape,
-                             const int8_t* input1_data,
-                             const RuntimeShape& input2_shape,
-                             const int8_t* input2_data,
-                             const RuntimeShape& output_shape,
-                             int8_t* output_data) {
-  ruy::profiler::ScopeLabel label("BroadcastSubSlow/int8_t");
-  NdArrayDesc<N> desc1;
-  NdArrayDesc<N> desc2;
-  NdArrayDesc<N> output_desc;
-  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
-                                      &desc2);
-  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest stride,
-  // typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for the
-  // best cache behavior.
-  auto sub_func = [&](int indexes[N]) {
-    const int32_t input1_val =
-        params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
-    const int32_t input2_val =
-        params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
-    const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
-    const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
-    const int32_t scaled_input1_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input1_val, params.input1_multiplier, params.input1_shift);
-    const int32_t scaled_input2_val =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            shifted_input2_val, params.input2_multiplier, params.input2_shift);
-    const int32_t raw_sub = scaled_input1_val - scaled_input2_val;
-    const int32_t raw_output =
-        MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            raw_sub, params.output_multiplier, params.output_shift) +
-        params.output_offset;
-    const int32_t clamped_output =
-        std::min(params.quantized_activation_max,
-                 std::max(params.quantized_activation_min, raw_output));
-    output_data[SubscriptToIndex(output_desc, indexes)] =
-        static_cast<int8_t>(clamped_output);
-  };
-  NDOpsHelper<N>(output_desc, sub_func);
-}
-
 template <int N = 5>
 void BroadcastSubSlow(const ArithmeticParams& params,
                       const RuntimeShape& input1_shape,
@@ -376,19 +265,40 @@ inline void BroadcastSub16POTSlow(const ArithmeticParams& params,
   NDOpsHelper<N>(output_desc, sub_func);
 }
 
-// Element-wise Sub that can often be used for inner loop of broadcast sub as
-// well as the non-broadcast sub.
-inline void SubElementwise(int size, const ArithmeticParams& params,
-                           const uint8_t* input1_data,
-                           const uint8_t* input2_data, uint8_t* output_data) {
-  TFLITE_DCHECK_GT(params.input1_offset, -256);
-  TFLITE_DCHECK_GT(params.input2_offset, -256);
-  TFLITE_DCHECK_LT(params.input1_offset, 256);
-  TFLITE_DCHECK_LT(params.input2_offset, 256);
+template <typename T, int N = 5>
+void BroadcastQuantSubSlow(const ArithmeticParams& params,
+                           const RuntimeShape& input1_shape,
+                           const T* input1_data,
+                           const RuntimeShape& input2_shape,
+                           const T* input2_data,
+                           const RuntimeShape& output_shape, T* output_data) {
+  ruy::profiler::ScopeLabel label("BroadcastQuantSubSlow/T");
+  TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N);
+  TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N);
+  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N);
+  NdArrayDesc<N> desc1;
+  NdArrayDesc<N> desc2;
+  NdArrayDesc<N> output_desc;
+  NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
+                                      &desc2);
+  CopyDimsToDesc(RuntimeShape::ExtendedShape(N, output_shape), &output_desc);
 
-  for (int i = 0; i < size; ++i) {
-    const int32_t input1_val = params.input1_offset + input1_data[i];
-    const int32_t input2_val = params.input2_offset + input2_data[i];
+  // In Tensorflow, the dimensions are canonically named (batch_number, row,
+  // col, channel), with extents (batches, height, width, depth), with the
+  // trailing dimension changing most rapidly (channels has the smallest stride,
+  // typically 1 element).
+  //
+  // In generated C code, we store arrays with the dimensions reversed. The
+  // first dimension has smallest stride.
+  //
+  // We name our variables by their Tensorflow convention, but generate C code
+  // nesting loops such that the innermost loop has the smallest stride for the
+  // best cache behavior.
+  auto sub_func = [&](int indexes[N]) {
+    const int32_t input1_val =
+        params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)];
+    const int32_t input2_val =
+        params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)];
     const int32_t shifted_input1_val = input1_val * (1 << params.left_shift);
     const int32_t shifted_input2_val = input2_val * (1 << params.left_shift);
     const int32_t scaled_input1_val =
@@ -405,21 +315,18 @@ inline void SubElementwise(int size, const ArithmeticParams& params,
     const int32_t clamped_output =
         std::min(params.quantized_activation_max,
                  std::max(params.quantized_activation_min, raw_output));
-    output_data[i] = static_cast<uint8_t>(clamped_output);
-  }
+    output_data[SubscriptToIndex(output_desc, indexes)] =
+        static_cast<T>(clamped_output);
+  };
+  NDOpsHelper<N>(output_desc, sub_func);
 }
 
 // Element-wise add that can often be used for inner loop of broadcast add as
 // well as the non-broadcast add.
+template <typename T>
 inline void SubElementwise(int size, const ArithmeticParams& params,
-                           const int8_t* input1_data, const int8_t* input2_data,
-                           int8_t* output_data) {
-  const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
-  TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
-  TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
-  TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
-  TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
-
+                           const T* input1_data, const T* input2_data,
+                           T* output_data) {
   for (int i = 0; i < size; ++i) {
     const int32_t input1_val = params.input1_offset + input1_data[i];
     const int32_t input2_val = params.input2_offset + input2_data[i];
@@ -439,7 +346,7 @@ inline void SubElementwise(int size, const ArithmeticParams& params,
     const int32_t clamped_output =
         std::min(params.quantized_activation_max,
                  std::max(params.quantized_activation_min, raw_output));
-    output_data[i] = static_cast<int8_t>(clamped_output);
+    output_data[i] = static_cast<T>(clamped_output);
   }
 }
 
@@ -469,11 +376,27 @@ inline void Sub(const ArithmeticParams& params,
   const int flat_size =
       MatchingElementsSize(input1_shape, input2_shape, output_shape);
 
-  const int32_t int8_max_value = std::numeric_limits<int8_t>::max();
-  TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value);
-  TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value);
-  TFLITE_DCHECK_LE(params.input1_offset, int8_max_value);
-  TFLITE_DCHECK_LE(params.input2_offset, int8_max_value);
+  TFLITE_DCHECK_GE(params.input1_offset, -128);
+  TFLITE_DCHECK_GE(params.input2_offset, -128);
+  // offset = -quantization_params.zero_point in PrepareGeneralSubOp().
+  // So it's maximum can be 128 not 127.
+  TFLITE_DCHECK_LE(params.input1_offset, 128);
+  TFLITE_DCHECK_LE(params.input2_offset, 128);
+  SubElementwise(flat_size, params, input1_data, input2_data, output_data);
+}
+
+inline void Sub(const ArithmeticParams& params,
+                const RuntimeShape& input1_shape, const int16_t* input1_data,
+                const RuntimeShape& input2_shape, const int16_t* input2_data,
+                const RuntimeShape& output_shape, int16_t* output_data) {
+  TFLITE_DCHECK_LE(params.quantized_activation_min,
+                   params.quantized_activation_max);
+
+  const int flat_size =
+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+  TFLITE_DCHECK_EQ(params.input1_offset, 0);
+  TFLITE_DCHECK_EQ(params.input2_offset, 0);
   SubElementwise(flat_size, params, input1_data, input2_data, output_data);
 }
 
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose_conv.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose_conv.h
index ca8a6e9..55fae7d 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose_conv.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose_conv.h
@@ -15,6 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_CONV_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_TRANSPOSE_CONV_H_
 
+#include <algorithm>
+
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 
@@ -47,6 +49,8 @@ inline void TransposeConv(
   const int filter_width = filter_shape.Dims(2);
   const int output_height = output_shape.Dims(1);
   const int output_width = output_shape.Dims(2);
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
   if (bias_data) {
     TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
   }
@@ -97,14 +101,18 @@ inline void TransposeConv(
       }
     }
   }
-  if (bias_data) {
-    for (int batch = 0; batch < batches; ++batch) {
-      for (int out_y = 0; out_y < output_height; ++out_y) {
-        for (int out_x = 0; out_x < output_width; ++out_x) {
-          for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-            output_data[Offset(output_shape, batch, out_y, out_x,
-                               out_channel)] += bias_data[out_channel];
-          }
+
+  for (int batch = 0; batch < batches; ++batch) {
+    for (int out_y = 0; out_y < output_height; ++out_y) {
+      for (int out_x = 0; out_x < output_width; ++out_x) {
+        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
+          float acc = output_data[Offset(output_shape, batch, out_y, out_x,
+                                         out_channel)];
+          if (bias_data) acc += bias_data[out_channel];
+
+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
+              ActivationFunctionWithMinMax(acc, output_activation_min,
+                                           output_activation_max);
         }
       }
     }
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils.cc b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils.cc
new file mode 100644
index 0000000..6ae01b8
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils.cc
@@ -0,0 +1,809 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <utility>
+
+#include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils_impl.h"
+
+#if defined(_MSC_VER)
+#define __restrict__ __restrict
+#endif
+
+namespace tflite {
+namespace tensor_utils {
+
+namespace {
+const int32_t kInt16Max = std::numeric_limits<int16_t>::max();
+const int32_t kInt16Min = std::numeric_limits<int16_t>::min();
+}  // namespace
+
+void PortableSymmetricQuantizeFloats(const float* values, const int size,
+                                     int8_t* quantized_values, float* min_value,
+                                     float* max_value, float* scaling_factor) {
+  auto minmax = std::minmax_element(values, values + size);
+  *min_value = *minmax.first;
+  *max_value = *minmax.second;
+
+  PortableSymmetricQuantizeFloats(values, size, quantized_values, *min_value,
+                                  *max_value, scaling_factor);
+}
+
+void PortableSymmetricQuantizeFloats(const float* values, const int size,
+                                     int8_t* quantized_values, float min_value,
+                                     float max_value, float* scaling_factor) {
+  const int32_t kScale = 127;
+  const float range = std::max(std::abs(min_value), std::abs(max_value));
+  if (range == 0) {
+    memset(quantized_values, 0, size * sizeof(int8_t));
+    *scaling_factor = 1;
+    return;
+  }
+  *scaling_factor = range / kScale;
+  const float scaling_factor_inv = kScale / range;
+  for (int i = 0; i < size; ++i) {
+    const int32_t quantized_value =
+        static_cast<int32_t>(TfLiteRound(values[i] * scaling_factor_inv));
+    // Clamp: just in case some odd numeric offset.
+    quantized_values[i] = static_cast<int8_t>(
+        std::min(kScale, std::max(-kScale, quantized_value)));
+  }
+}
+
+void PortableAsymmetricQuantizeFloats(const float* values, const int size,
+                                      int8_t* quantized_values,
+                                      float* scaling_factor, int32_t* offset) {
+  const int32_t kMinScale = -128;
+  const int32_t kMaxScale = 127;
+  const double qmin_double = kMinScale;
+  const double qmax_double = kMaxScale;
+  const auto minmax = std::minmax_element(values, values + size);
+  const double rmin = static_cast<double>(std::min(0.0f, *minmax.first));
+  const double rmax = static_cast<double>(std::max(0.0f, *minmax.second));
+  if (rmin == rmax) {
+    memset(quantized_values, 0, size * sizeof(int8_t));
+    *scaling_factor = 1;
+    *offset = 0;
+    return;
+  } else {
+    double scale = (rmax - rmin) / (qmax_double - qmin_double);
+    const double zero_point_from_min = qmin_double - rmin / scale;
+    const double zero_point_from_max = qmax_double - rmax / scale;
+    const double zero_point_from_min_error =
+        std::abs(qmin_double) + std::abs(rmin / scale);
+    const double zero_point_from_max_error =
+        std::abs(qmax_double) + std::abs(rmax / scale);
+    const double zero_point_double =
+        zero_point_from_min_error < zero_point_from_max_error
+            ? zero_point_from_min
+            : zero_point_from_max;
+    int8_t nudged_zero_point = 0;
+    if (zero_point_double <= qmin_double) {
+      nudged_zero_point = kMinScale;
+    } else if (zero_point_double >= qmax_double) {
+      nudged_zero_point = kMaxScale;
+    } else {
+      nudged_zero_point = static_cast<int8_t>(round(zero_point_double));
+    }
+    *scaling_factor = scale;
+    *offset = nudged_zero_point;
+  }
+  const float scaling_factor_inv = 1.0f / *scaling_factor;
+  for (int i = 0; i < size; ++i) {
+    const int32_t quantized_value = static_cast<int32_t>(
+        TfLiteRound(*offset + values[i] * scaling_factor_inv));
+    quantized_values[i] =
+        std::min(kMaxScale, std::max(kMinScale, quantized_value));
+  }
+}
+
+void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
+                                                 int m_rows, int m_cols,
+                                                 const float* vector,
+                                                 int n_batch, float* result) {
+  float* result_in_batch = result;
+  for (int b = 0; b < n_batch; b++) {
+    const float* matrix_ptr = matrix;
+    for (int r = 0; r < m_rows; r++) {
+      float dot_prod = 0.0f;
+      const float* vector_in_batch = vector + b * m_cols;
+      for (int c = 0; c < m_cols; c++) {
+        dot_prod += *matrix_ptr++ * *vector_in_batch++;
+      }
+      *result_in_batch += dot_prod;
+      ++result_in_batch;
+    }
+  }
+}
+
+void PortableMatrixBatchVectorMultiplyAccumulate(
+    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
+    const int8_t* __restrict__ vectors, const float* scaling_factors,
+    int n_batch, float* __restrict__ result) {
+  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
+    const float batch_scaling_factor = scaling_factors[batch];
+    // Get the address of the first row.
+    const int8_t* row_ptr = matrix;
+    for (int row = 0; row < m_rows; ++row) {
+      // Initialize the dot product sum for the row to 0.
+      int32_t dotprod = 0;
+#if defined(__GNUC__)
+      // Prefetch the row to cache.
+      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
+                         3 /* temporal locality */);
+#endif
+      for (int col = 0; col < m_cols; ++col, ++row_ptr) {
+        dotprod += (*row_ptr) * (vectors[col]);
+      }  // for col
+      *result += dotprod * batch_scaling_factor;
+      ++result;
+    }  // for row
+  }    // for batch
+}
+
+void PortableMatrixBatchVectorMultiplyAccumulate(
+    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
+    const int8_t* __restrict__ vectors, const float* scaling_factors,
+    int n_batch, float* __restrict__ result, const float* per_channel_scale,
+    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
+    bool* compute_row_sums, CpuBackendContext* context) {
+  if (input_offset == nullptr) {
+    PortableMatrixBatchVectorMultiplyAccumulate(
+        matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result);
+    return;
+  }
+  if (!compute_row_sums || *compute_row_sums) {
+    PortableReductionSumVector(matrix, row_sums, m_rows, m_cols);
+    if (compute_row_sums) {
+      *compute_row_sums = false;
+    }
+  }
+
+  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
+    const float batch_scaling_factor = scaling_factors[batch];
+    const int32_t batch_offset = input_offset[batch];
+    const int8_t* row_ptr = matrix;
+    for (int row = 0; row < m_rows; ++row) {
+      int32_t dotprod = 0;
+      float scale = batch_scaling_factor;
+      if (per_channel_scale) {
+        scale *= per_channel_scale[row];
+      }
+#if defined(__GNUC__)
+      // Prefetch the row to cache.
+      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
+                         3 /* temporal locality */);
+#endif
+      for (int col = 0; col < m_cols; ++col, ++row_ptr) {
+        dotprod += (*row_ptr) * vectors[col];
+      }  // for col
+      dotprod -= row_sums[row] * batch_offset;
+      *result += dotprod * scale;
+      ++result;
+    }  // for row
+  }    // for batch
+}
+
+void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
+    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
+  const int kBlockSize = 4;
+  TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0);
+  for (int batch = 0; batch < n_batch; batch++) {
+    const float* matrix_ptr = matrix;
+    for (int row = 0; row < m_rows; row++) {
+      float dot_prod = 0.0f;
+      const float* vector_in_batch = vector + batch * m_cols;
+      for (int i = segments[row]; i < segments[row + 1]; i++) {
+        const int block_start_index = indices[i] * kBlockSize;
+        const float* vector_block_in_batch_ptr =
+            vector_in_batch + block_start_index;
+        for (int c = 0; c < kBlockSize; c++) {
+          dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
+        }
+      }
+      result[batch * m_rows + row] += dot_prod;
+    }
+  }
+}
+
+void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
+    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
+    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
+    const int32_t output_shift, const int32_t output_offset,
+    const int32_t output_activation_min, const int32_t output_activation_max,
+    int8_t* __restrict__ result) {
+  const int kBlockSize = 16;
+  TFLITE_DCHECK_EQ(m_cols % kBlockSize, 0);
+  for (int batch = 0; batch < n_batch; ++batch) {
+    const int8_t* matrix_ptr = matrix;
+    for (int row = 0; row < m_rows; ++row) {
+      int32_t dot_prod = 0;
+      const int8_t* vector_in_batch = vector + batch * m_cols;
+      for (int i = segments[row]; i < segments[row + 1]; ++i) {
+        const int block_start_index = indices[i] * kBlockSize;
+        const int8_t* vector_block_in_batch_ptr =
+            vector_in_batch + block_start_index;
+        for (int c = 0; c < kBlockSize; c++) {
+          dot_prod += *matrix_ptr * *vector_block_in_batch_ptr++;
+          dot_prod += *matrix_ptr++ * input_offset;
+        }
+      }
+      const int32_t bias_value = bias_vector != nullptr ? bias_vector[row] : 0;
+      dot_prod = MultiplyByQuantizedMultiplier(dot_prod + bias_value,
+                                               output_multiplier, output_shift);
+      dot_prod += output_offset;
+      result[batch * m_rows + row] =
+          static_cast<int8_t>(ActivationFunctionWithMinMax(
+              dot_prod, output_activation_min, output_activation_max));
+    }
+  }
+}
+
+void PortableSparseMatrixBatchVectorMultiplyAccumulate(
+    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
+    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
+    float* __restrict__ result) {
+  const int kBlockSize = 16;
+  TFLITE_DCHECK_EQ(  // NOLINT
+      m_cols % kBlockSize, 0);
+  for (int batch = 0; batch < n_batch; batch++) {
+    const float* matrix_ptr = matrix;
+    const uint8_t* ledger_ptr = ledger;
+    for (int row = 0; row < m_rows; row++) {
+      float dot_prod = 0.0f;
+      int num_nonzero_blocks = *ledger_ptr++;
+      if (num_nonzero_blocks > 0) {
+        const float* vector_in_batch = vector + batch * m_cols;
+        for (int i = 0; i < num_nonzero_blocks; i++) {
+          const int block_start_index = *ledger_ptr++ * kBlockSize;
+          const float* vector_block_in_batch_ptr =
+              vector_in_batch + block_start_index;
+          for (int c = 0; c < kBlockSize; c++) {
+            dot_prod += *matrix_ptr++ * *vector_block_in_batch_ptr++;
+          }
+        }
+      }
+      result[batch * m_rows + row] += dot_prod;
+    }
+  }
+}
+
+void PortableSparseMatrixBatchVectorMultiplyAccumulate(
+    const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
+    const int m_cols, const int8_t* __restrict__ vectors,
+    const float* scaling_factors, int n_batch, float* __restrict__ result) {
+  static const int kBlockSize = 16;
+  TFLITE_DCHECK_EQ(  // NOLINT
+      m_cols % kBlockSize, 0);
+  for (int batch = 0; batch < n_batch; ++batch, vectors += m_cols) {
+    const float batch_scaling_factor = scaling_factors[batch];
+    const uint8_t* ledger_ptr = ledger;
+    // Get the address of the first row.
+    const int8_t* row_ptr = matrix;
+    for (int row = 0; row < m_rows; ++row) {
+      // Initialize the dot product sum for the row to 0.
+      int32_t dotprod = 0;
+#if defined(__GNUC__)
+      // Prefetch the row to cache.
+      __builtin_prefetch(row_ptr, 0 /* prefetch for read */,
+                         3 /* temporal locality */);
+#endif
+      int num_nonzero_blocks = *ledger_ptr++;
+      for (int i = 0; i < num_nonzero_blocks; i++) {
+        const int block_start_index = *ledger_ptr++ * kBlockSize;
+        const int8_t* vector_block_ptr = vectors + block_start_index;
+        for (int c = 0; c < kBlockSize; c++) {
+          dotprod += (*row_ptr++) * (*vector_block_ptr++);
+        }  // for block
+      }    // for num_nonzero_blocks
+      result[batch * m_rows + row] += dotprod * batch_scaling_factor;
+    }  // for row
+  }    // for batch
+}
+
+template <typename T>
+void PortableMatrixBatchVectorMultiplyAccumulateImpl(
+    const int8_t* input, const int32_t* bias,
+    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
+    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
+    T* output) {
+  const int16_t output_max = std::numeric_limits<T>::max();
+  const int16_t output_min = std::numeric_limits<T>::min();
+  for (int batch = 0; batch < n_batch; ++batch) {
+    for (int row = 0; row < n_output; ++row) {
+      int32_t acc = bias[row];
+      for (int col = 0; col < n_input; ++col) {
+        int8_t input_val = input[batch * n_input + col];
+        int8_t weights_val = input_to_gate_weights[row * n_input + col];
+        acc += input_val * weights_val;
+      }
+      acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift);
+      acc += output_zp;
+      acc += output[batch * n_output + row];
+      if (acc > output_max) {
+        acc = output_max;
+      }
+      if (acc < output_min) {
+        acc = output_min;
+      }
+      output[batch * n_output + row] = static_cast<T>(acc);
+    }
+  }
+}
+
+void PortableMatrixBatchVectorMultiplyAccumulate(
+    const int8_t* input, const int32_t* bias,
+    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
+    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
+    int32_t* scratch, int16_t* output, CpuBackendContext* context) {
+  PortableMatrixBatchVectorMultiplyAccumulateImpl(
+      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
+      n_output, output_zp, output);
+}
+
+void PortableMatrixBatchVectorMultiplyAccumulate(
+    const int8_t* input, const int32_t* bias,
+    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
+    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
+    int32_t* scratch, int8_t* output, CpuBackendContext* context) {
+  PortableMatrixBatchVectorMultiplyAccumulateImpl(
+      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
+      n_output, output_zp, output);
+}
+
+void PortableMatrixBatchVectorMultiply(const int8_t* input,
+                                       int32_t input_zeropoint,
+                                       const int8_t* input_to_gate_weights,
+                                       int32_t input_to_gate_effective_scale_a,
+                                       int32_t input_to_gate_effective_scale_b,
+                                       int32_t n_batch, int32_t n_input,
+                                       int32_t n_cell, int8_t* gate_output,
+                                       int8_t gate_output_zp) {
+  const int32_t int8_max = std::numeric_limits<int8_t>::max();
+  const int32_t int8_min = std::numeric_limits<int8_t>::min();
+  for (int batch = 0; batch < n_batch; ++batch) {
+    for (int row = 0; row < n_cell; ++row) {
+      int32_t acc = 0;
+      for (int col = 0; col < n_input; ++col) {
+        int32_t input_val = input[batch * n_input + col];
+        int8_t weights_val = input_to_gate_weights[row * n_input + col];
+        acc += (input_val - input_zeropoint) * weights_val;
+      }
+      acc = MultiplyByQuantizedMultiplier(acc, input_to_gate_effective_scale_a,
+                                          input_to_gate_effective_scale_b);
+      acc += gate_output_zp;
+      if (acc > int8_max) {
+        acc = int8_max;
+      }
+      if (acc < int8_min) {
+        acc = int8_min;
+      }
+      gate_output[batch * n_cell + row] = static_cast<int8_t>(acc);
+    }
+  }
+}
+
+void PortableMatrixBatchVectorMultiply(
+    const int16_t* hidden, const int8_t* hidden_to_output_weights,
+    int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
+    const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
+    int32_t n_output, int32_t output_zp, int8_t* proj_output) {
+  const int16_t int8_max = std::numeric_limits<int8_t>::max();
+  const int16_t int8_min = std::numeric_limits<int8_t>::min();
+  for (int batch = 0; batch < n_batch; ++batch) {
+    for (int row = 0; row < n_output; ++row) {
+      int64_t acc = gate_bias[row];
+      for (int col = 0; col < n_hidden; ++col) {
+        int16_t input_val = hidden[batch * n_hidden + col];
+        int8_t weights_val = hidden_to_output_weights[row * n_hidden + col];
+        int64_t curr = acc;
+        acc += input_val * weights_val;
+        if (input_val * weights_val > 0 && acc < curr) {
+          acc = std::numeric_limits<int32_t>::max();
+        }
+        if (input_val * weights_val < 0 && acc > curr) {
+          acc = std::numeric_limits<int32_t>::min();
+        }
+      }
+      acc = MultiplyByQuantizedMultiplier(acc, proj_effective_scale_a,
+                                          proj_effective_scale_b);
+      acc += output_zp;
+      if (acc > int8_max) {
+        acc = int8_max;
+      }
+      if (acc < int8_min) {
+        acc = int8_min;
+      }
+      proj_output[batch * n_output + row] = acc;
+    }
+  }
+}
+
+void PortableApplyLayerNorm(const int16_t* input,
+                            const int16_t* layer_norm_weights,
+                            const int32_t* bias, int32_t layer_norm_scale_a,
+                            int32_t layer_norm_scale_b, int32_t variance_limit,
+                            int n_batch, int n_input, int16_t* output) {
+  // The square of std::pow(2, 10), which is the extra factor that makes sure
+  // normalized values has enough resolution.
+  static const int kTwoToPower20 = 1 << 20;
+  for (int i = 0; i < n_batch; ++i) {
+    int64_t sum = 0;
+    int64_t sum_sq = 0;
+    for (int j = 0; j < n_input; ++j) {
+      const int32_t index = i * n_input + j;
+      int32_t val = static_cast<int32_t>(input[index]);
+      sum += val;
+      sum_sq += val * val;
+    }
+    int32_t mean =
+        static_cast<int32_t>(static_cast<int64_t>(sum) * 1024 / n_input);
+    // TODO(b/173994730): Avoids overflow but only works for POT n_input.
+    int32_t temp = kTwoToPower20 / n_input;
+    int64_t variance =
+        sum_sq * temp - static_cast<int64_t>(mean) * static_cast<int64_t>(mean);
+    int32_t variance2 = static_cast<int32_t>(variance / kTwoToPower20);
+    if (variance2 < 1) {
+      variance2 = variance_limit;
+    }
+    int32_t stddev_inverse_a;
+    int stddev_inverse_b;
+    GetInvSqrtQuantizedMultiplierExp(variance2, /*reverse_shift*/ -1,
+                                     &stddev_inverse_a, &stddev_inverse_b);
+
+    for (int j = 0; j < n_input; ++j) {
+      const int32_t index = i * n_input + j;
+      int32_t val = static_cast<int32_t>(input[index]);
+      int32_t shifted = 1024 * val - mean;
+      int32_t rescaled = MultiplyByQuantizedMultiplier(
+          shifted, stddev_inverse_a, stddev_inverse_b);
+      // TODO(jianlijianli): Saturate this.
+      int64_t val3 = rescaled * layer_norm_weights[j] + bias[j];
+      int32_t val4 =
+          static_cast<int32_t>((val3 > 0 ? val3 + 512 : val3 - 512) / 1024);
+      int32_t val5 = MultiplyByQuantizedMultiplier(val4, layer_norm_scale_a,
+                                                   layer_norm_scale_b + 12);
+      val5 = std::min(std::max(kInt16Min, val5), kInt16Max);
+      output[index] = static_cast<int16_t>(val5);
+    }
+  }
+}
+
+void PortableApplyLayerNormFloat(const int16_t* input,
+                                 const int16_t* layer_norm_weights,
+                                 int32_t layer_norm_scale_a,
+                                 int32_t layer_norm_scale_b,
+                                 const int32_t* bias, int n_batch, int n_input,
+                                 int16_t* output) {
+  const int32_t int16_max = std::numeric_limits<int16_t>::max();
+  const int32_t int16_min = std::numeric_limits<int16_t>::min();
+  const float layer_norm_scale =
+      layer_norm_scale_a *
+      std::pow(2.0, static_cast<double>(layer_norm_scale_b - 31));
+  const float bias_scale =
+      static_cast<float>(std::pow(2.0, -10)) * layer_norm_scale;
+
+  for (int batch = 0; batch < n_batch; ++batch) {
+    float sum = 0.0f;
+    float sum_sq = 0.0f;
+    for (int i = 0; i < n_input; ++i) {
+      const int index = batch * n_input + i;
+      const float value = static_cast<float>(input[index]);
+      sum += value;
+      sum_sq += value * value;
+    }
+    const float mean = sum / n_input;
+    float stddev_inv = 0.0f;
+    const float variance = sum_sq / n_input - mean * mean;
+    if (variance == 0) {
+      stddev_inv = 1.0f / std::sqrt(1e-8f);
+    } else {
+      stddev_inv = 1.0f / std::sqrt(variance);
+    }
+    for (int i = 0; i < n_input; ++i) {
+      const int index = batch * n_input + i;
+      const float normalized_value =
+          (static_cast<float>(input[index]) - mean) * stddev_inv;
+      const float weighted_normalized_value =
+          normalized_value * layer_norm_weights[i] * layer_norm_scale +
+          bias[i] * bias_scale;
+      const int32_t quant_output = static_cast<int32_t>(round(
+          weighted_normalized_value * static_cast<float>(std::pow(2, 12))));
+      output[index] = std::min(int16_max, std::max(int16_min, quant_output));
+    }
+  }
+}
+
+void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
+                                            int32_t scalar, int32_t n_row,
+                                            int32_t n_col, int32_t* output) {
+  for (int i = 0; i < n_row; ++i) {
+    int32_t row_sum = 0;
+    for (int j = 0; j < n_col; ++j) {
+      row_sum += *matrix++;
+    }
+    output[i] += row_sum * scalar;
+  }
+}
+
+void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
+                          int32_t n_input, int16_t* output) {
+  for (int batch = 0; batch < n_batch; ++batch) {
+    for (int c = 0; c < n_input; c++) {
+      using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
+      using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+      const int index = batch * n_input + c;
+      F3 sigmoid_input = F3::FromRaw(input[index]);
+      F0 sigmoid_output = gemmlowp::logistic(sigmoid_input);
+      output[index] = sigmoid_output.raw();
+    }
+  }
+}
+
+void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
+                               int32_t n_input, int16_t* output) {
+  const int32_t int16_max = std::numeric_limits<int16_t>::max();
+  const int32_t int16_min = std::numeric_limits<int16_t>::min();
+  for (int batch = 0; batch < n_batch; ++batch) {
+    for (int i = 0; i < n_input; ++i) {
+      const int index = batch * n_input + i;
+      const float float_input =
+          input[index] * static_cast<float>(std::pow(2, -12));
+      const float float_output = 1.0f / (1.0f + std::exp(-float_input));
+      const int32_t quant_output = static_cast<int32_t>(
+          float_output * static_cast<float>(std::pow(2, 15)));
+      const int32_t quant_output_clamped =
+          std::min(int16_max, std::max(int16_min, quant_output));
+      output[index] = static_cast<int16_t>(quant_output_clamped);
+    }
+  }
+}
+
+template <int IntegerBits>
+void PortableApplyTanhImpl(const int16_t* input, int32_t n_batch,
+                           int32_t n_input, int16_t* output) {
+  using FX = gemmlowp::FixedPoint<std::int16_t, IntegerBits>;
+  using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+  for (int batch = 0; batch < n_batch; ++batch) {
+    for (int i = 0; i < n_input; ++i) {
+      const int index = batch * n_input + i;
+      FX tanh_input = FX::FromRaw(input[index]);
+      F0 tanh_output = gemmlowp::tanh(tanh_input);
+      output[index] = tanh_output.raw();
+    }
+  }
+}
+
+void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
+                       int32_t n_batch, int32_t n_input, int16_t* output) {
+  assert(integer_bits <= 6);
+#define DISPATCH_TANH(i)                                       \
+  case i:                                                      \
+    PortableApplyTanhImpl<i>(input, n_batch, n_input, output); \
+    break;
+  switch (integer_bits) {
+    DISPATCH_TANH(0);
+    DISPATCH_TANH(1);
+    DISPATCH_TANH(2);
+    DISPATCH_TANH(3);
+    DISPATCH_TANH(4);
+    DISPATCH_TANH(5);
+    DISPATCH_TANH(6);
+    default:
+      return;
+  }
+#undef DISPATCH_TANH
+}
+
+void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
+                            int32_t n_input, int32_t integer_bits,
+                            int16_t* output) {
+  const int32_t int16_max = std::numeric_limits<int16_t>::max();
+  const int32_t int16_min = std::numeric_limits<int16_t>::min();
+  const double two = 2.0;
+  for (int batch = 0; batch < n_batch; ++batch) {
+    for (int i = 0; i < n_input; ++i) {
+      const int index = batch * n_input + i;
+      const float float_input =
+          input[index] * std::pow(two, static_cast<double>(integer_bits));
+      const float float_output = std::tanh(float_input);
+      const int32_t quant_output = static_cast<int32_t>(
+          float_output * static_cast<float>(std::pow(2, 15)));
+      const int32_t quant_output_clamped =
+          std::min(int16_max, std::max(int16_min, quant_output));
+      output[index] = static_cast<int16_t>(quant_output_clamped);
+    }
+  }
+}
+
+void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
+                      int n_batch, int n_input, int shift, int16_t* output) {
+  for (int batch = 0; batch < n_batch; ++batch) {
+    for (int i = 0; i < n_input; ++i) {
+      const int index = batch * n_input + i;
+      const int16_t a = input_1[index];
+      const int16_t b = input_2[index];
+      const int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
+      output[index] =
+          static_cast<int16_t>(gemmlowp::RoundingDivideByPOT(value, shift));
+    }
+  }
+}
+
+void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
+                      int32_t multiplier, int32_t shift, int32_t n_batch,
+                      int32_t n_input, int32_t output_zp, int8_t* output) {
+  for (int batch = 0; batch < n_batch; ++batch) {
+    for (int i = 0; i < n_input; ++i) {
+      const int index = batch * n_input + i;
+      const int16_t a = input_1[index];
+      const int16_t b = input_2[index];
+      int32_t value = static_cast<int32_t>(a) * static_cast<int32_t>(b);
+      value = MultiplyByQuantizedMultiplier(value, multiplier, shift);
+      value += output_zp;
+      value = std::min(std::max(static_cast<int32_t>(-128), value),
+                       static_cast<int32_t>(127));
+
+      output[index] = static_cast<int8_t>(value);
+    }
+  }
+}
+
+void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
+                      int n_batch, int n_input, int16_t* output) {
+  for (int batch = 0; batch < n_batch; ++batch) {
+    for (int i = 0; i < n_input; ++i) {
+      const int index = batch * n_input + i;
+      int32_t sum = input_1[index] + input_2[index];
+      const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum));
+      output[index] = static_cast<int16_t>(sum_clamped);
+    }
+  }
+}
+
+float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
+                                     int v_size) {
+  float result = 0.0;
+  for (int v = 0; v < v_size; v++) {
+    result += *vector1++ * *vector2++;
+  }
+  return result;
+}
+
+namespace {
+inline int32_t VectorVectorDotProduct(const int16_t* vector1,
+                                      const int16_t* vector2, int v_size) {
+  int32_t result = 0;
+  for (int v = 0; v < v_size; v++) {
+    result += *vector1++ * *vector2++;
+  }
+  return result;
+}
+}  // namespace
+
+void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
+                                              const int16_t* vector2,
+                                              int v_size, int n_batch,
+                                              int32_t* result) {
+  for (int b = 0; b < n_batch; b++) {
+    result[b] = VectorVectorDotProduct(vector1, vector2, v_size);
+    vector1 += v_size;
+    vector2 += v_size;
+  }
+}
+
+void PortableVectorBatchVectorCwiseProductAccumulate(
+    const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
+    int32_t multiplier, int shift, int16_t* result) {
+  for (int b = 0; b < n_batch; b++) {
+    for (int v = 0; v < v_size; v++) {
+      int32_t prod = vector[v] * *batch_vector++;
+      prod = MultiplyByQuantizedMultiplier(prod, multiplier, shift);
+      int32_t output = prod + *result;
+      output = std::max(std::min(static_cast<int32_t>(32767), output),
+                        static_cast<int32_t>(-32768));
+      *result++ = output;
+    }
+  }
+}
+
+void PortableSub1Vector(const float* vector, int v_size, float* result) {
+  for (int v = 0; v < v_size; v++) {
+    *result++ = 1.0f - *vector++;
+  }
+}
+
+void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result) {
+  static const int16_t kOne = 32767;
+  for (int v = 0; v < v_size; v++) {
+    *result++ = kOne - *vector++;
+  }
+}
+
+void PortableVectorScalarMultiply(const int8_t* vector, const int v_size,
+                                  const float scale, float* result) {
+  for (int v = 0; v < v_size; ++v) {
+    *result++ = scale * *vector++;
+  }
+}
+
+void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
+                                     float* __restrict__ output_vector,
+                                     int v_size, int n_batch) {
+  for (int batch = 0; batch < n_batch; ++batch) {
+    float sum = 0.0f;
+    for (int i = 0; i < v_size; ++i) {
+      sum += input_vector[i];
+    }
+    const float mean = sum / v_size;
+    float sum_diff_sq = 0.0f;
+    for (int i = 0; i < v_size; ++i) {
+      const float diff = input_vector[i] - mean;
+      sum_diff_sq += diff * diff;
+    }
+    const float variance = sum_diff_sq / v_size;
+    constexpr float kNormalizationConstant = 1e-8f;
+    const float stddev_inv =
+        1.0f / std::sqrt(variance + kNormalizationConstant);
+    for (int i = 0; i < v_size; ++i) {
+      output_vector[i] = (input_vector[i] - mean) * stddev_inv;
+    }
+    input_vector += v_size;
+    output_vector += v_size;
+  }
+}
+
+void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
+                                  const int8_t* recurrent, int8_t recurrent_zp,
+                                  int32_t input_effective_scale_a,
+                                  int32_t input_effective_scale_b,
+                                  int32_t recurrent_effective_scale_a,
+                                  int32_t recurrent_effective_scale_b,
+                                  int32_t n_batch, int32_t n_cell,
+                                  int16_t* output) {
+  const int32_t int16_max = std::numeric_limits<int16_t>::max();
+  const int32_t int16_min = std::numeric_limits<int16_t>::min();
+  for (int i = 0; i < n_batch * n_cell; ++i) {
+    int32_t x = static_cast<int32_t>(input[i]) - static_cast<int32_t>(input_zp);
+    int32_t h =
+        static_cast<int32_t>(recurrent[i]) - static_cast<int32_t>(recurrent_zp);
+    int32_t x_scaled = MultiplyByQuantizedMultiplier(x, input_effective_scale_a,
+                                                     input_effective_scale_b);
+    int32_t h_scaled = MultiplyByQuantizedMultiplier(
+        h, recurrent_effective_scale_a, recurrent_effective_scale_b);
+    int32_t y = h_scaled + x_scaled;
+    if (y > int16_max) {
+      y = int16_max;
+    }
+    if (y < int16_min) {
+      y = int16_min;
+    }
+    output[i] = static_cast<int16_t>(y);
+  }
+}
+
+}  // namespace tensor_utils
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils.h
new file mode 100644
index 0000000..06c867c
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils.h
@@ -0,0 +1,333 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils_impl.h"
+
+#if defined(_MSC_VER)
+#define __restrict__ __restrict
+#endif
+
+namespace tflite {
+namespace tensor_utils {
+
+// Check if all entries of a vector are zero for float.
+bool IsZeroVector(const float* vector, int v_size) {
+  return PortableIsZeroVector(vector, v_size);
+}
+
+// Check if all entries of a vector are zero for int8_t.
+bool IsZeroVector(const int8_t* vector, int v_size) {
+  return PortableIsZeroVector(vector, v_size);
+}
+
+void SymmetricQuantizeFloats(const float* values, const int size,
+                             int8_t* quantized_values, float* min, float* max,
+                             float* scaling_factor) {
+  PortableSymmetricQuantizeFloats(values, size, quantized_values, min, max,
+                                  scaling_factor);
+}
+
+void SymmetricQuantizeFloats(const float* values, const int size,
+                             int8_t* quantized_values, float min_value,
+                             float max_value, float* scaling_factor) {
+  PortableSymmetricQuantizeFloats(values, size, quantized_values, min_value,
+                                  max_value, scaling_factor);
+}
+
+void AsymmetricQuantizeFloats(const float* values, const int size,
+                              int8_t* quantized_values, float* scaling_factor,
+                              int32_t* offset) {
+  PortableAsymmetricQuantizeFloats(values, size, quantized_values,
+                                   scaling_factor, offset);
+}
+
+void MatrixBatchVectorMultiplyAccumulate(const float* matrix, int m_rows,
+                                         int m_cols, const float* vector,
+                                         int n_batch, float* result) {
+  PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
+                                              n_batch, result);
+}
+
+void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
+                                         const int m_rows, const int m_cols,
+                                         const int8_t* __restrict__ vector,
+                                         const float* scaling_factors,
+                                         int n_batch,
+                                         float* __restrict__ result) {
+  PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
+                                              scaling_factors, n_batch, result);
+}
+
+void MatrixBatchVectorMultiplyAccumulate(
+    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
+    const int8_t* __restrict__ vectors, const float* scaling_factors,
+    int n_batch, float* __restrict__ result, const float* per_channel_scale,
+    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
+    bool* compute_row_sums, CpuBackendContext* context) {
+  PortableMatrixBatchVectorMultiplyAccumulate(
+      matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result,
+      per_channel_scale, input_offset, scratch, row_sums, compute_row_sums,
+      context);
+}
+
+void MatrixBatchVectorMultiplyAccumulate(const int8_t* __restrict__ matrix,
+                                         const int m_rows, const int m_cols,
+                                         const int8_t* __restrict__ vector,
+                                         const float* scaling_factors,
+                                         int n_batch, int32_t* scratch,
+                                         float* __restrict__ result,
+                                         CpuBackendContext* context) {
+  PortableMatrixBatchVectorMultiplyAccumulate(matrix, m_rows, m_cols, vector,
+                                              scaling_factors, n_batch, result);
+}
+
+void SparseMatrixBatchVectorMultiplyAccumulate1x4(
+    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const float* __restrict__ vector, int n_batch, float* __restrict__ result) {
+  PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
+      matrix, segments, indices, m_rows, m_cols, vector, n_batch, result);
+}
+
+void SparseMatrixBatchVectorMultiplyAccumulate(
+    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
+    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
+    float* __restrict__ result) {
+  PortableSparseMatrixBatchVectorMultiplyAccumulate(
+      matrix, ledger, m_rows, m_cols, vector, n_batch, result);
+}
+
+void SparseMatrixBatchVectorMultiplyAccumulate1x16(
+    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
+    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
+    const int32_t output_shift, const int32_t output_offset,
+    const int32_t output_activation_min, const int32_t output_activation_max,
+
+    int8_t* __restrict__ result) {
+  PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
+      matrix, segments, indices, m_rows, m_cols, vector, bias_vector, n_batch,
+      input_offset, output_multiplier, output_shift, output_offset,
+      output_activation_min, output_activation_max, result);
+}
+
+void SparseMatrixBatchVectorMultiplyAccumulate(
+    const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
+    const int m_cols, const int8_t* __restrict__ vectors,
+    const float* scaling_factors, int n_batch, float* __restrict__ result) {
+  PortableSparseMatrixBatchVectorMultiplyAccumulate(
+      matrix, ledger, m_rows, m_cols, vectors, scaling_factors, n_batch,
+      result);
+}
+
+void MatrixBatchVectorMultiplyAccumulate(
+    const int8_t* input, const int32_t* bias,
+    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
+    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
+    int32_t* scratch, int16_t* output, CpuBackendContext* context) {
+  PortableMatrixBatchVectorMultiplyAccumulate(
+      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
+      n_output, output_zp, scratch, output, context);
+}
+
+void MatrixBatchVectorMultiplyAccumulate(
+    const int8_t* input, const int32_t* bias,
+    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
+    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
+    int32_t* scratch, int8_t* output, CpuBackendContext* context) {
+  PortableMatrixBatchVectorMultiplyAccumulate(
+      input, bias, input_to_gate_weights, multiplier, shift, n_batch, n_input,
+      n_output, output_zp, scratch, output, context);
+}
+
+void MatrixScalarMultiplyAccumulate(const int8_t* matrix, int32_t scalar,
+                                    int32_t n_row, int32_t n_col,
+                                    int32_t* output) {
+  PortableMatrixScalarMultiplyAccumulate(matrix, scalar, n_row, n_col, output);
+}
+
+void MatrixBatchVectorMultiply(const int8_t* input, int32_t input_zeropoint,
+                               const int8_t* input_to_gate_weights,
+                               int32_t input_to_gate_effective_scale_a,
+                               int32_t input_to_gate_effective_scale_b,
+                               int32_t n_batch, int32_t n_input, int32_t n_cell,
+                               int8_t* gate_output, int8_t gate_output_zp) {
+  PortableMatrixBatchVectorMultiply(
+      input, input_zeropoint, input_to_gate_weights,
+      input_to_gate_effective_scale_a, input_to_gate_effective_scale_b, n_batch,
+      n_input, n_cell, gate_output, gate_output_zp);
+}
+
+void MatrixBatchVectorMultiply(const int16_t* hidden,
+                               const int8_t* hidden_to_output_weights,
+                               int32_t proj_effective_scale_a,
+                               int32_t proj_effective_scale_b,
+                               const int32_t* gate_bias, int32_t n_batch,
+                               int32_t n_hidden, int32_t n_output,
+                               int32_t output_zp, int8_t* proj_output) {
+  PortableMatrixBatchVectorMultiply(hidden, hidden_to_output_weights,
+                                    proj_effective_scale_a,
+                                    proj_effective_scale_b, gate_bias, n_batch,
+                                    n_hidden, n_output, output_zp, proj_output);
+}
+
+void ApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
+                    const int32_t* bias, int32_t layer_norm_scale_a,
+                    int32_t layer_norm_scale_b, int32_t variance_limit,
+                    int n_batch, int n_input, int16_t* output) {
+  PortableApplyLayerNorm(input, layer_norm_weights, bias, layer_norm_scale_a,
+                         layer_norm_scale_b, variance_limit, n_batch, n_input,
+                         output);
+}
+
+void ApplyLayerNormFloat(const int16_t* input,
+                         const int16_t* layer_norm_weights,
+                         int32_t layer_norm_scale_a, int32_t layer_norm_scale_b,
+                         const int32_t* bias, int n_batch, int n_input,
+                         int16_t* output) {
+  PortableApplyLayerNormFloat(input, layer_norm_weights, layer_norm_scale_a,
+                              layer_norm_scale_b, bias, n_batch, n_input,
+                              output);
+}
+
+void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
+                  int16_t* output) {
+  PortableApplySigmoid(input, n_batch, n_input, output);
+}
+
+void ApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
+                       int16_t* output) {
+  PortableApplySigmoidFloat(input, n_batch, n_input, output);
+}
+
+void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
+               int32_t n_input, int16_t* output) {
+  PortableApplyTanh(integer_bits, input, n_batch, n_input, output);
+}
+
+void ApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input,
+                    int32_t integer_bits, int16_t* output) {
+  PortableApplyTanhFloat(input, n_batch, n_input, integer_bits, output);
+}
+
+void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
+              int n_input, int shift, int16_t* output) {
+  PortableCwiseMul(input_1, input_2, n_batch, n_input, shift, output);
+}
+
+void CwiseMul(const int16_t* input_1, const int16_t* input_2,
+              int32_t multiplier, int32_t shift, int32_t n_batch,
+              int32_t n_input, int32_t output_zp, int8_t* output) {
+  PortableCwiseMul(input_1, input_2, multiplier, shift, n_batch, n_input,
+                   output_zp, output);
+}
+
+void CwiseAdd(const int16_t* input_1, const int16_t* input_2, int n_batch,
+              int n_input, int16_t* output) {
+  PortableCwiseAdd(input_1, input_2, n_batch, n_input, output);
+}
+
+void CwiseClipping(float* vector, const int v_size,
+                   const float clipping_value) {
+  PortableCwiseClipping(vector, v_size, clipping_value);
+}
+
+void CwiseClipping(int16_t* vector, const int v_size,
+                   const int16_t clipping_value) {
+  PortableCwiseClipping(vector, v_size, clipping_value);
+}
+
+void CwiseClipping(int8_t* vector, const int v_size,
+                   const int8_t clipping_value) {
+  PortableCwiseClipping(vector, v_size, clipping_value);
+}
+
+void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
+                                             const int16_t* batch_vector,
+                                             int n_batch, int32_t multiplier,
+                                             int shift, int16_t* result) {
+  PortableVectorBatchVectorCwiseProductAccumulate(
+      vector, v_size, batch_vector, n_batch, multiplier, shift, result);
+}
+
+float VectorVectorDotProduct(const float* vector1, const float* vector2,
+                             int v_size) {
+  return PortableVectorVectorDotProduct(vector1, vector2, v_size);
+}
+
+void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
+                                      const int16_t* vector2, int v_size,
+                                      int n_batch, int32_t* result) {
+  PortableBatchVectorBatchVectorDotProduct(vector1, vector2, v_size, n_batch,
+                                           result);
+}
+
+void Sub1Vector(const float* vector, int v_size, float* result) {
+  PortableSub1Vector(vector, v_size, result);
+}
+
+void Sub1Vector(const int16_t* vector, int v_size, int16_t* result) {
+  PortableSub1Vector(vector, v_size, result);
+}
+
+// Multiply all elements of vector with a scalar.
+void VectorScalarMultiply(const int8_t* vector, int v_size, float scale,
+                          float* result) {
+  PortableVectorScalarMultiply(vector, v_size, scale, result);
+}
+
+void ReductionSumVector(const float* input_vector, float* output_vector,
+                        int output_size, int reduction_size) {
+  PortableReductionSumVector(input_vector, output_vector, output_size,
+                             reduction_size);
+}
+
+void ReductionSumVector(const int32_t* input_vector, int32_t* output_vector,
+                        int output_size, int reduction_size) {
+  PortableReductionSumVector(input_vector, output_vector, output_size,
+                             reduction_size);
+}
+
+void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
+                        int output_size, int reduction_size) {
+  PortableReductionSumVector(input_vector, output_vector, output_size,
+                             reduction_size);
+}
+
+void MeanStddevNormalization(const float* input_vector, float* output_vector,
+                             int v_size, int n_batch) {
+  PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch);
+}
+
+void TwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
+                          const int8_t* recurrent, int8_t recurrent_zp,
+                          int32_t input_effective_scale_a,
+                          int32_t input_effective_scale_b,
+                          int32_t recurrent_effective_scale_a,
+                          int32_t recurrent_effective_scale_b, int32_t n_batch,
+                          int32_t n_cell, int16_t* output) {
+  PortableTwoGateSaturatingAdd(
+      input, input_zp, recurrent, recurrent_zp, input_effective_scale_a,
+      input_effective_scale_b, recurrent_effective_scale_a,
+      recurrent_effective_scale_b, n_batch, n_cell, output);
+}
+
+}  // namespace tensor_utils
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils_impl.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils_impl.h
new file mode 100644
index 0000000..6c404d5
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils_impl.h
@@ -0,0 +1,244 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
+
+#include <algorithm>
+#include <cstdint>
+
+#if defined(_MSC_VER)
+#define __restrict__ __restrict
+#endif
+
+namespace tflite {
+
+// Not all backends support CpuBackendContext usage, so forward declare to avoid
+// pulling in its implementation.
+class CpuBackendContext;
+
+namespace tensor_utils {
+
+template <typename T>
+bool PortableIsZeroVector(const T* vector, int v_size) {
+  for (int i = 0; i < v_size; ++i) {
+    if (vector[i] != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+void PortableSymmetricQuantizeFloats(const float* values, const int size,
+                                     int8_t* quantized_values, float* min_value,
+                                     float* max_value, float* scaling_factor);
+
+void PortableSymmetricQuantizeFloats(const float* values, const int size,
+                                     int8_t* quantized_values, float min_value,
+                                     float max_value, float* scaling_factor);
+
+void PortableAsymmetricQuantizeFloats(const float* values, const int size,
+                                      int8_t* quantized_values,
+                                      float* scaling_factor, int32_t* offset);
+
+// Multiply a matrix by a batch vector, and store results in a batch-size
+// vector.
+void PortableMatrixBatchVectorMultiplyAccumulate(const float* matrix,
+                                                 int m_rows, int m_cols,
+                                                 const float* vector,
+                                                 int n_batch, float* result);
+
+void PortableMatrixBatchVectorMultiplyAccumulate(
+    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
+    const int8_t* __restrict__ vectors, const float* scaling_factors,
+    int n_batch, float* __restrict__ result);
+
+void PortableMatrixBatchVectorMultiplyAccumulate(
+    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
+    const int8_t* __restrict__ vectors, const float* scaling_factors,
+    int n_batch, float* __restrict__ result, const float* per_channel_scale,
+    const int32_t* input_offset, int32_t* scratch, int32_t* row_sums,
+    bool* compute_row_sums, CpuBackendContext* context);
+
+void PortableMatrixBatchVectorMultiplyAccumulate(
+    const int8_t* __restrict__ matrix, const int m_rows, const int m_cols,
+    const int8_t* __restrict__ vector, const float* scaling_factors,
+    int n_batch, int32_t* scratch, float* __restrict__ result,
+    CpuBackendContext* context);
+
+void PortableSparseMatrixBatchVectorMultiplyAccumulate1x4(
+    const float* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const float* __restrict__ vector, int n_batch, float* __restrict__ result);
+
+void PortableSparseMatrixBatchVectorMultiplyAccumulate(
+    const float* __restrict__ matrix, const uint8_t* __restrict__ ledger,
+    int m_rows, int m_cols, const float* __restrict__ vector, int n_batch,
+    float* __restrict__ result);
+
+void PortableSparseMatrixBatchVectorMultiplyAccumulate1x16(
+    const int8_t* __restrict__ matrix, const int32_t* __restrict__ segments,
+    const int32_t* __restrict__ indices, int m_rows, int m_cols,
+    const int8_t* __restrict__ vector, const int32_t* __restrict__ bias_vector,
+    int n_batch, const int32_t input_offset, const int32_t output_multiplier,
+    const int32_t output_shift, const int32_t output_offset,
+    const int32_t output_activation_min, const int32_t output_activation_max,
+    int8_t* __restrict__ result);
+
+void PortableSparseMatrixBatchVectorMultiplyAccumulate(
+    const int8_t* __restrict__ matrix, const uint8_t* ledger, const int m_rows,
+    const int m_cols, const int8_t* __restrict__ vectors,
+    const float* scaling_factors, int n_batch, float* __restrict__ result);
+
+// Dot product of two vectors.
+float PortableVectorVectorDotProduct(const float* vector1, const float* vector2,
+                                     int v_size);
+
+void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
+                                              const int16_t* vector2,
+                                              int v_size, int n_batch,
+                                              int32_t* result);
+
+void PortableVectorBatchVectorCwiseProductAccumulate(
+    const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
+    int32_t multiplier, int shift, int16_t* result);
+
+void PortableMatrixBatchVectorMultiplyAccumulate(
+    const int8_t* input, const int32_t* bias,
+    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
+    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
+    int32_t* scratch, int16_t* output, CpuBackendContext* context);
+
+void PortableMatrixBatchVectorMultiplyAccumulate(
+    const int8_t* input, const int32_t* bias,
+    const int8_t* input_to_gate_weights, int32_t multiplier, int32_t shift,
+    int32_t n_batch, int32_t n_input, int32_t n_output, int32_t output_zp,
+    int32_t* scratch, int8_t* output, CpuBackendContext* context);
+
+void PortableMatrixBatchVectorMultiply(const int8_t* input,
+                                       int32_t input_zeropoint,
+                                       const int8_t* input_to_gate_weights,
+                                       int32_t input_to_gate_effective_scale_a,
+                                       int32_t input_to_gate_effective_scale_b,
+                                       int32_t n_batch, int32_t n_input,
+                                       int32_t n_cell, int8_t* gate_output,
+                                       int8_t gate_output_zp);
+
+void PortableMatrixBatchVectorMultiply(
+    const int16_t* hidden, const int8_t* hidden_to_output_weights,
+    int32_t proj_effective_scale_a, int32_t proj_effective_scale_b,
+    const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden,
+    int32_t n_output, int32_t output_zp, int8_t* proj_output);
+
+void PortableMatrixScalarMultiplyAccumulate(const int8_t* matrix,
+                                            int32_t scalar, int32_t n_row,
+                                            int32_t n_col, int32_t* output);
+
+void PortableApplyLayerNorm(const int16_t* input,
+                            const int16_t* layer_norm_weights,
+                            const int32_t* bias, int32_t layer_norm_scale_a,
+                            int32_t layer_norm_scale_b, int32_t variance_limit,
+                            int n_batch, int n_input, int16_t* output);
+
+void PortableApplyLayerNormFloat(const int16_t* input,
+                                 const int16_t* layer_norm_weights,
+                                 int32_t layer_norm_scale_a,
+                                 int32_t layer_norm_scale_b,
+                                 const int32_t* bias, int n_batch, int n_input,
+                                 int16_t* output);
+
+void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
+                          int32_t n_input, int16_t* output);
+
+void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch,
+                               int32_t n_input, int16_t* output);
+
+void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
+                       int32_t n_batch, int32_t n_input, int16_t* output);
+
+void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch,
+                            int32_t n_input, int32_t integer_bits,
+                            int16_t* output);
+
+void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
+                      int n_batch, int n_input, int shift, int16_t* output);
+
+void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
+                      int32_t multiplier, int32_t shift, int32_t n_batch,
+                      int32_t n_input, int32_t output_zp, int8_t* output);
+
+void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2,
+                      int n_batch, int n_input, int16_t* output);
+
+template <typename T>
+void PortableCwiseClipping(T* vector, const int v_size,
+                           const T& clipping_value) {
+  for (int i = 0; i < v_size; i++) {
+    vector[i] = std::max(std::min(clipping_value, vector[i]),
+                         static_cast<T>(-clipping_value));
+  }
+}
+
+// Batch vector initialization with another vector.
+void PortableVectorBatchVectorAssign(const float* vector, int v_size,
+                                     int n_batch, float* batch_vector);
+
+// Compute "1.0f - elements of vector" (used in CIFG).
+void PortableSub1Vector(const float* vector, int v_size, float* result);
+
+void PortableSub1Vector(const int16_t* vector, int v_size, int16_t* result);
+
+// Multiply all elements of vector with a scalar.
+void PortableVectorScalarMultiply(const int8_t* vector, int v_size, float scale,
+                                  float* result);
+
+// Reduce-sum on a vector:
+// input_vector: pointer to input vector.
+// output_vector: pointer to vector.
+// output_size: output vector size.
+// reduction_size: number of consecutive elements from input vector which are
+// added to get one element of output.
+template <typename INPUT, typename OUTPUT>
+void PortableReductionSumVector(const INPUT* input_vector,
+                                OUTPUT* output_vector, int output_size,
+                                int reduction_size) {
+  for (int o = 0; o < output_size; o++) {
+    OUTPUT result = 0;
+    for (int r = 0; r < reduction_size; r++) {
+      result += input_vector[r];
+    }
+    output_vector[o] = result;
+    input_vector += reduction_size;
+  }
+}
+
+// Layer norm for each batch.
+void PortableMeanStddevNormalization(const float* __restrict__ input_vector,
+                                     float* __restrict__ output_vector,
+                                     int v_size, int n_batch);
+
+// Saturate Add.
+void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp,
+                                  const int8_t* recurrent, int8_t recurrent_zp,
+                                  int32_t input_effective_scale_a,
+                                  int32_t input_effective_scale_b,
+                                  int32_t recurrent_effective_scale_a,
+                                  int32_t recurrent_effective_scale_b,
+                                  int32_t n_batch, int32_t n_cell,
+                                  int16_t* output);
+
+}  // namespace tensor_utils
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_PORTABLE_TENSOR_UTILS_IMPL_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/runtime_shape.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/runtime_shape.h
new file mode 100644
index 0000000..c2678b5
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/runtime_shape.h
@@ -0,0 +1,158 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_RUNTIME_SHAPE_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_RUNTIME_SHAPE_H_
+
+namespace tflite {
+
+template <int N>
+struct Dims {
+  int sizes[N];
+  int strides[N];
+};
+
+class RuntimeShape {
+ public:
+  RuntimeShape& operator=(RuntimeShape const&) = delete;
+
+  // RuntimeShape in TFLM supports up to 5 dimensions.
+  // The name kMaxSmallSize comes from the same file of the upstream
+  // tensorflow lite repo and need to be kept the same for max reuse.
+  static constexpr int kMaxSmallSize = 5;
+
+  RuntimeShape() : size_(0) {}
+
+  explicit RuntimeShape(int dimensions_count) : size_(dimensions_count) {}
+
+  RuntimeShape(int shape_size, int32_t value) : size_(shape_size) {
+    for (int i = 0; i < shape_size; ++i) {
+      SetDim(i, value);
+    }
+  }
+
+  RuntimeShape(int dimensions_count, const int32_t* dims_data)
+      : size_(dimensions_count) {
+    ReplaceWith(dimensions_count, dims_data);
+  }
+
+  bool operator==(const RuntimeShape& comp) const {
+    return this->size_ == comp.size_ &&
+           std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32_t)) ==
+               0;
+  }
+
+  ~RuntimeShape() {}
+
+  int32_t DimensionsCount() const { return size_; }
+  int32_t Dims(int i) const {
+    TFLITE_DCHECK_GE(i, 0);
+    TFLITE_DCHECK_LT(i, size_);
+    return dims_[i];
+  }
+  void SetDim(int i, int32_t val) {
+    TFLITE_DCHECK_GE(i, 0);
+    TFLITE_DCHECK_LT(i, size_);
+    dims_[i] = val;
+  }
+
+  static RuntimeShape ExtendedShape(int new_shape_size,
+                                    const RuntimeShape& shape) {
+    return RuntimeShape(new_shape_size, shape, 1);
+  }
+  int32_t* DimsData() { return dims_; }
+  const int32_t* DimsData() const { return dims_; }
+  const int32_t* DimsDataUpTo5D() const { return dims_; }
+
+  void ReplaceWith(int dimensions_count, const int32_t* dims_data) {
+    size_ = dimensions_count;
+    int32_t* dst_dims = DimsData();
+    std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t));
+  }
+
+  // Returns the total count of elements, that is the size when flattened into a
+  // vector.
+  int FlatSize() const {
+    int buffer_size = 1;
+    const int* dims_data = reinterpret_cast<const int*>(DimsData());
+    for (int i = 0; i < size_; i++) {
+      buffer_size *= dims_data[i];
+    }
+    return buffer_size;
+  }
+
+ private:
+  // For use only by ExtendedShape(), written to guarantee (return-value) copy
+  // elision in C++17.
+  // This creates a shape padded to the desired size with the specified value.
+  RuntimeShape(int new_shape_size, const RuntimeShape& shape, int pad_value)
+      : size_(new_shape_size) {
+    // If the following check fails, it is likely because a 4D-only kernel is
+    // being used with an array of larger dimension count.
+    TFLITE_CHECK_GE(new_shape_size, shape.DimensionsCount());
+    const int size_increase = new_shape_size - shape.DimensionsCount();
+    for (int i = 0; i < size_increase; ++i) {
+      SetDim(i, pad_value);
+    }
+    std::memcpy(DimsData() + size_increase, shape.DimsData(),
+                sizeof(int32_t) * shape.DimensionsCount());
+  }
+
+  int32_t size_;
+  union {
+    int32_t dims_[kMaxSmallSize];
+  };
+};
+
+// Since tensors with '0' in their shape are valid in TF, these offset functions
+// allow that as long as the corresponding index is also 0. It is upto the
+// calling ops to ensure that they perform verification checks on tensor shapes
+// if they don't support a particular behavior.
+
+inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3) {
+  TFLITE_DCHECK_EQ(shape.DimensionsCount(), 4);
+  const int* dims_data = reinterpret_cast<const int*>(shape.DimsData());
+  TFLITE_DCHECK((dims_data[0] == 0 && i0 == 0) ||
+                (i0 >= 0 && i0 < dims_data[0]));
+  TFLITE_DCHECK((dims_data[1] == 0 && i1 == 0) ||
+                (i1 >= 0 && i1 < dims_data[1]));
+  TFLITE_DCHECK((dims_data[2] == 0 && i2 == 0) ||
+                (i2 >= 0 && i2 < dims_data[2]));
+  TFLITE_DCHECK((dims_data[3] == 0 && i3 == 0) ||
+                (i3 >= 0 && i3 < dims_data[3]));
+  return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
+}
+
+inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3,
+                  int i4) {
+  TFLITE_DCHECK_EQ(shape.DimensionsCount(), 5);
+  const int* dims_data = reinterpret_cast<const int*>(shape.DimsData());
+  TFLITE_DCHECK((dims_data[0] == 0 && i0 == 0) ||
+                (i0 >= 0 && i0 < dims_data[0]));
+  TFLITE_DCHECK((dims_data[1] == 0 && i1 == 0) ||
+                (i1 >= 0 && i1 < dims_data[1]));
+  TFLITE_DCHECK((dims_data[2] == 0 && i2 == 0) ||
+                (i2 >= 0 && i2 < dims_data[2]));
+  TFLITE_DCHECK((dims_data[3] == 0 && i3 == 0) ||
+                (i3 >= 0 && i3 < dims_data[3]));
+  TFLITE_DCHECK((dims_data[4] == 0 && i4 == 0) ||
+                (i4 >= 0 && i4 < dims_data[4]));
+  return (((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3) *
+             dims_data[4] +
+         i4;
+}
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_RUNTIME_SHAPE_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/strided_slice_logic.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/strided_slice_logic.h
index 002f907..18a7940 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/strided_slice_logic.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/strided_slice_logic.h
@@ -69,6 +69,69 @@ inline void StridedSlicePadIndices(tflite::StridedSliceParams* p,
   p->strides_count = dim_count;
 }
 
+// Return the index for the first element along that axis. This index will be a
+// positive integer between [0, axis_size] (or [-1, axis_size -1] if stride < 0)
+// that can be used to index directly into the data.
+inline int StridedSliceStartForAxis(const tflite::StridedSliceParams& params,
+                                    const RuntimeShape& input_shape,
+                                    int32_t axis) {
+  const int32_t axis_size = input_shape.Dims(axis);
+  int32_t start = params.start_indices[axis];
+  const int32_t stride = params.strides[axis];
+  const int32_t begin_mask = (params.begin_mask & 1 << axis);
+  if (start < 0) {
+    start += axis_size;
+  }
+  if (stride > 0) {
+    start = Clamp(start, 0, axis_size);
+  } else {
+    start = Clamp(start, -1, axis_size - 1);
+  }
+  if (begin_mask) {
+    if (stride > 0) {
+      start = 0;
+    } else {
+      start = axis_size - 1;
+    }
+  }
+  return start;
+}
+
+inline int StridedSliceEndForAxis(const tflite::StridedSliceParams& params,
+                                  const RuntimeShape& input_shape, int axis,
+                                  int start) {
+  const auto shrink_axis_mask = params.shrink_axis_mask;
+  const bool shrink_axis = shrink_axis_mask & (1 << axis);
+  const int axis_size = input_shape.Dims(axis);
+  if (shrink_axis) {
+    if (start >= axis_size) {
+      return start;
+    } else {
+      return start + 1;
+    }
+  }
+  const auto* indices = params.stop_indices;
+  int end = indices[axis];
+  const int32_t stride = params.strides[axis];
+  const int32_t end_mask = (params.end_mask & 1 << axis);
+  if (end < 0) {
+    end += axis_size;
+  }
+  if (stride > 0) {
+    end = Clamp(end, 0, axis_size);
+  } else {
+    end = Clamp(end, -1, axis_size - 1);
+  }
+  if (end_mask) {
+    if (stride > 0) {
+      end = axis_size;
+    } else {
+      end = -1;
+    }
+  }
+  return end;
+}
+
 // Return the index for the first element along that axis. This index will be a
 // positive integer between [0, axis_size] (or [-1, axis_size -1] if stride < 0)
 // that can be used to index directly into the data.
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h
index 831843c..de2d802 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h
@@ -15,7 +15,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_TENSOR_CTYPES_H_
 
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 
 namespace tflite {
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_utils.cc b/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_utils.cc
new file mode 100644
index 0000000..7527994
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_utils.cc
@@ -0,0 +1,25 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================
+*/
+
+// internal/reference_portable_tensor_utils.h has the implementation of the
+// functions declared in internal/portable_tensor_utils.h. This somewhat
+// confusing setup is derived from how the code is organized in TfLite where it
+// is used to select between NEON, SSE and portable implementaitons. See
+// https://github.com/tensorflow/tensorflow/blob/d76c23975c4a3a0d7987cfe3f45c76566df06180/tensorflow/lite/kernels/internal/tensor_utils.cc
+// for how the code is written in TfLite.
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference_portable_tensor_utils.h"
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h b/edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h
index 803d1d3..9e73812 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include <initializer_list>
 
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/runtime_shape.h"
 
 namespace tflite {
 
@@ -139,211 +140,22 @@ inline bool operator==(const QuantizationParams& qp1,
   return qp1.zero_point == qp2.zero_point && qp1.scale == qp2.scale;
 }
 
-template <int N>
-struct Dims {
-  int sizes[N];
-  int strides[N];
-};
-
-class RuntimeShape {
- public:
-  // Shapes with dimensions up to 5 are stored directly in the structure, while
-  // larger shapes are separately allocated.
-  static constexpr int kMaxSmallSize = 5;
-
-  RuntimeShape& operator=(RuntimeShape const&) = delete;
-
-  RuntimeShape() : size_(0) {}
-
-  explicit RuntimeShape(int dimensions_count) : size_(dimensions_count) {
-    if (dimensions_count > kMaxSmallSize) {
-#ifdef TF_LITE_STATIC_MEMORY
-      TFLITE_CHECK(false && "No shape resizing supported on this platform");
-#else  // TF_LITE_STATIC_MEMORY
-      dims_pointer_ = new int32_t[dimensions_count];
-#endif  // TF_LITE_STATIC_MEMORY
-    }
-  }
-
-  RuntimeShape(int shape_size, int32_t value) : size_(0) {
-    Resize(shape_size);
-    for (int i = 0; i < shape_size; ++i) {
-      SetDim(i, value);
-    }
-  }
-
-  RuntimeShape(int dimensions_count, const int32_t* dims_data) : size_(0) {
-    ReplaceWith(dimensions_count, dims_data);
-  }
-
-  RuntimeShape(const std::initializer_list<int> init_list) : size_(0) {
-    BuildFrom(init_list);
-  }
-
-  // Avoid using this constructor.  We should be able to delete it when C++17
-  // rolls out.
-  RuntimeShape(RuntimeShape const& other) : size_(other.DimensionsCount()) {
-    if (size_ > kMaxSmallSize) {
-#ifdef TF_LITE_STATIC_MEMORY
-      TFLITE_CHECK(false && "No shape resizing supported on this platform");
-#else
-      dims_pointer_ = new int32_t[size_];
-#endif
-    }
-    std::memcpy(DimsData(), other.DimsData(), sizeof(int32_t) * size_);
-  }
-
-  bool operator==(const RuntimeShape& comp) const {
-    return this->size_ == comp.size_ &&
-           std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32_t)) ==
-               0;
-  }
-
-  ~RuntimeShape() {
-    if (size_ > kMaxSmallSize) {
-#ifdef TF_LITE_STATIC_MEMORY
-      TFLITE_CHECK(false && "No shape resizing supported on this platform");
-#else  // TF_LITE_STATIC_MEMORY
-      delete[] dims_pointer_;
-#endif  // TF_LITE_STATIC_MEMORY
-    }
-  }
-
-  inline int32_t DimensionsCount() const { return size_; }
-  inline int32_t Dims(int i) const {
-    TFLITE_DCHECK_GE(i, 0);
-    TFLITE_DCHECK_LT(i, size_);
-    return size_ > kMaxSmallSize ? dims_pointer_[i] : dims_[i];
-  }
-  inline void SetDim(int i, int32_t val) {
-    TFLITE_DCHECK_GE(i, 0);
-    TFLITE_DCHECK_LT(i, size_);
-    if (size_ > kMaxSmallSize) {
-      dims_pointer_[i] = val;
-    } else {
-      dims_[i] = val;
-    }
-  }
-
-  inline int32_t* DimsData() {
-    return size_ > kMaxSmallSize ? dims_pointer_ : dims_;
-  }
-  inline const int32_t* DimsData() const {
-    return size_ > kMaxSmallSize ? dims_pointer_ : dims_;
-  }
-  // The caller must ensure that the shape is no bigger than 5-D.
-  inline const int32_t* DimsDataUpTo5D() const { return dims_; }
-
-  inline void Resize(int dimensions_count) {
-    if (size_ > kMaxSmallSize) {
-#ifdef TF_LITE_STATIC_MEMORY
-      TFLITE_CHECK(false && "No shape resizing supported on this platform");
-#else  // TF_LITE_STATIC_MEMORY
-      delete[] dims_pointer_;
-#endif  // TF_LITE_STATIC_MEMORY
-    }
-    size_ = dimensions_count;
-    if (dimensions_count > kMaxSmallSize) {
-#ifdef TF_LITE_STATIC_MEMORY
-      TFLITE_CHECK(false && "No shape resizing supported on this platform");
-#else  // TF_LITE_STATIC_MEMORY
-      dims_pointer_ = new int32_t[dimensions_count];
-#endif  // TF_LITE_STATIC_MEMORY
-    }
-  }
-
-  inline void ReplaceWith(int dimensions_count, const int32_t* dims_data) {
-    Resize(dimensions_count);
-    int32_t* dst_dims = DimsData();
-    std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t));
-  }
-
-  template <typename T>
-  inline void BuildFrom(const T& src_iterable) {
-    const int dimensions_count =
-        std::distance(src_iterable.begin(), src_iterable.end());
-    Resize(dimensions_count);
-    int32_t* data = DimsData();
-    for (auto it : src_iterable) {
-      *data = it;
-      ++data;
-    }
-  }
-
-  // This will probably be factored out. Old code made substantial use of 4-D
-  // shapes, and so this function is used to extend smaller shapes. Note that
-  // (a) as Dims<4>-dependent code is eliminated, the reliance on this should be
-  // reduced, and (b) some kernels are stricly 4-D, but then the shapes of their
-  // inputs should already be 4-D, so this function should not be needed.
-  inline static RuntimeShape ExtendedShape(int new_shape_size,
-                                           const RuntimeShape& shape) {
-    return RuntimeShape(new_shape_size, shape, 1);
-  }
-
-  inline void BuildFrom(const std::initializer_list<int> init_list) {
-    BuildFrom<const std::initializer_list<int>>(init_list);
-  }
-
-  // Returns the total count of elements, that is the size when flattened into a
-  // vector.
-  inline int FlatSize() const {
-    int buffer_size = 1;
-    const int* dims_data = reinterpret_cast<const int*>(DimsData());
-    for (int i = 0; i < size_; i++) {
-      buffer_size *= dims_data[i];
-    }
-    return buffer_size;
-  }
-
-  bool operator!=(const RuntimeShape& comp) const { return !((*this) == comp); }
-
- private:
-  // For use only by ExtendedShape(), written to guarantee (return-value) copy
-  // elision in C++17.
-  // This creates a shape padded to the desired size with the specified value.
-  RuntimeShape(int new_shape_size, const RuntimeShape& shape, int pad_value)
-      : size_(0) {
-    // If the following check fails, it is likely because a 4D-only kernel is
-    // being used with an array of larger dimension count.
-    TFLITE_CHECK_GE(new_shape_size, shape.DimensionsCount());
-    Resize(new_shape_size);
-    const int size_increase = new_shape_size - shape.DimensionsCount();
-    for (int i = 0; i < size_increase; ++i) {
-      SetDim(i, pad_value);
-    }
-    std::memcpy(DimsData() + size_increase, shape.DimsData(),
-                sizeof(int32_t) * shape.DimensionsCount());
-  }
-
-  int32_t size_;
-  union {
-    int32_t dims_[kMaxSmallSize];
-    int32_t* dims_pointer_;
-  };
+// Quantization parameters for each channel, determining the mapping of
+// quantized values to real values. See QuantizationParams for a single set of
+// parameters per tensor. This has one parameters set per each channel.
+//
+// The correspondence is as follows:
+//
+//   real_value = scale[channel] * (quantized_value - zero_point[channel]);
+//
+struct PerChannelQuantizationParams {
+  // The following members typically point to the corresponding members of a
+  // TfLiteAffineQuantization struct.
+  const float* scale;
+  const int32_t* zero_point;
+  int32_t quantized_dimension;
 };
 
-// Converts inference-style shape to legacy tflite::Dims<4>.
-inline tflite::Dims<4> ToRuntimeDims(const tflite::RuntimeShape& array_shape) {
-  tflite::Dims<4> result;
-  const int dimensions_count = array_shape.DimensionsCount();
-  TFLITE_CHECK_LE(dimensions_count, 4);
-  int cum_prod = 1;
-  for (int i = 0; i < 4; i++) {
-    const int new_dim =
-        (i < dimensions_count) ? array_shape.Dims(dimensions_count - 1 - i) : 1;
-    result.sizes[i] = new_dim;
-    result.strides[i] = cum_prod;
-    cum_prod *= new_dim;
-  }
-  return result;
-}
-
-// TODO(b/80418076): Move to legacy ops file, update invocations.
-inline RuntimeShape DimsToShape(const tflite::Dims<4>& dims) {
-  return RuntimeShape(
-      {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]});
-}
-
 // Gets next index to iterate through a multidimensional array.
 inline bool NextIndex(const int num_dims, const int* dims, int* current) {
   if (num_dims == 0) {
@@ -400,35 +212,20 @@ inline size_t ReducedOutputOffset(const int num_dims, const int* dims,
   return offset;
 }
 
-inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3) {
-  TFLITE_DCHECK_EQ(shape.DimensionsCount(), 4);
-  const int* dims_data = reinterpret_cast<const int*>(shape.DimsDataUpTo5D());
-  TFLITE_DCHECK(i0 >= 0 && i0 < dims_data[0]);
-  TFLITE_DCHECK(i1 >= 0 && i1 < dims_data[1]);
-  TFLITE_DCHECK(i2 >= 0 && i2 < dims_data[2]);
-  TFLITE_DCHECK(i3 >= 0 && i3 < dims_data[3]);
-  return ((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3;
-}
-
-inline int Offset(const RuntimeShape& shape, int i0, int i1, int i2, int i3,
-                  int i4) {
-  TFLITE_DCHECK_EQ(shape.DimensionsCount(), 5);
-  const int* dims_data = reinterpret_cast<const int*>(shape.DimsDataUpTo5D());
-  TFLITE_DCHECK(i0 >= 0 && i0 < dims_data[0]);
-  TFLITE_DCHECK(i1 >= 0 && i1 < dims_data[1]);
-  TFLITE_DCHECK(i2 >= 0 && i2 < dims_data[2]);
-  TFLITE_DCHECK(i3 >= 0 && i3 < dims_data[3]);
-  TFLITE_DCHECK(i4 >= 0 && i4 < dims_data[4]);
-  return (((i0 * dims_data[1] + i1) * dims_data[2] + i2) * dims_data[3] + i3) *
-             dims_data[4] +
-         i4;
-}
+// Since tensors with '0' in their shape are valid in TF, these offset functions
+// allow that as long as the corresponding index is also 0. It is upto the
+// calling ops to ensure that they perform verification checks on tensor shapes
+// if they don't support a particular behavior.
 
 inline int Offset(const Dims<4>& dims, int i0, int i1, int i2, int i3) {
-  TFLITE_DCHECK(i0 >= 0 && i0 < dims.sizes[0]);
-  TFLITE_DCHECK(i1 >= 0 && i1 < dims.sizes[1]);
-  TFLITE_DCHECK(i2 >= 0 && i2 < dims.sizes[2]);
-  TFLITE_DCHECK(i3 >= 0 && i3 < dims.sizes[3]);
+  TFLITE_DCHECK((i0 == 0 && dims.sizes[0] == 0) ||
+                (i0 >= 0 && i0 < dims.sizes[0]));
+  TFLITE_DCHECK((i1 == 0 && dims.sizes[1] == 0) ||
+                (i1 >= 0 && i1 < dims.sizes[1]));
+  TFLITE_DCHECK((i2 == 0 && dims.sizes[2] == 0) ||
+                (i2 >= 0 && i2 < dims.sizes[2]));
+  TFLITE_DCHECK((i3 == 0 && dims.sizes[3] == 0) ||
+                (i3 >= 0 && i3 < dims.sizes[3]));
   return i0 * dims.strides[0] + i1 * dims.strides[1] + i2 * dims.strides[2] +
          i3 * dims.strides[3];
 }
@@ -437,10 +234,6 @@ inline int Offset(const Dims<4>& dims, int* index) {
   return Offset(dims, index[0], index[1], index[2], index[3]);
 }
 
-inline int Offset(const RuntimeShape& shape, int* index) {
-  return Offset(shape, index[0], index[1], index[2], index[3]);
-}
-
 // Get array size, DCHECKing that the dim index is in range.
 //
 // Note that this will be phased out with Dims<4>, since RuntimeShape::Dims()
@@ -602,6 +395,58 @@ inline int MatchingFlatSize(const Dims<N>& dims, const Dims<N>& check_dims_0,
   return MatchingFlatSize(dims, check_dims_1, check_dims_2, check_dims_3);
 }
 
+// Flat size calculation, checking if their extended shapes match.
+inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
+                                         const RuntimeShape& check_shape_0) {
+  const int shape_dims = shape.DimensionsCount();
+  const int check_shape_0_dims = check_shape_0.DimensionsCount();
+  const int min_dims = std::min(shape_dims, check_shape_0_dims);
+
+  for (int i = 0; i < min_dims; ++i) {
+    TFLITE_DCHECK_EQ(shape.Dims(shape_dims - 1 - i),
+                     check_shape_0.Dims(check_shape_0_dims - 1 - i));
+  }
+  for (int i = min_dims; i < shape_dims; ++i) {
+    TFLITE_DCHECK_EQ(shape.Dims(shape_dims - 1 - i), 1);
+  }
+  for (int i = min_dims; i < check_shape_0_dims; ++i) {
+    TFLITE_DCHECK_EQ(check_shape_0.Dims(check_shape_0_dims - 1 - i), 1);
+  }
+  return shape.FlatSize();
+}
+
+inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
+                                         const RuntimeShape& check_shape_0,
+                                         const RuntimeShape& check_shape_1) {
+  const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0);
+  TFLITE_DCHECK_EQ(MatchingExtendedShapeFlatSize(shape, check_shape_1),
+                   flat_size);
+  return flat_size;
+}
+
+inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
+                                         const RuntimeShape& check_shape_0,
+                                         const RuntimeShape& check_shape_1,
+                                         const RuntimeShape& check_shape_2) {
+  const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0);
+  TFLITE_DCHECK_EQ(
+      MatchingExtendedShapeFlatSize(shape, check_shape_1, check_shape_2),
+      flat_size);
+  return flat_size;
+}
+
+inline int MatchingExtendedShapeFlatSize(const RuntimeShape& shape,
+                                         const RuntimeShape& check_shape_0,
+                                         const RuntimeShape& check_shape_1,
+                                         const RuntimeShape& check_shape_2,
+                                         const RuntimeShape& check_shape_3) {
+  const int flat_size = MatchingExtendedShapeFlatSize(shape, check_shape_0);
+  TFLITE_DCHECK_EQ(MatchingExtendedShapeFlatSize(shape, check_shape_1,
+                                                 check_shape_2, check_shape_3),
+                   flat_size);
+  return flat_size;
+}
+
 // Data is required to be contiguous, and so many operators can use either the
 // full array flat size or the flat size with one dimension skipped (commonly
 // the depth).
@@ -885,6 +730,8 @@ struct Conv3DParams {
   float float_activation_max;
 };
 
+typedef Conv3DParams Conv3DTransposeParams;
+
 struct DepthToSpaceParams {
   int32_t block_size;
 };
@@ -1019,9 +866,9 @@ struct PackParams {
 
 struct PadParams {
   int8_t left_padding_count;
-  int32_t left_padding[4];
+  int32_t left_padding[5];
   int8_t right_padding_count;
-  int32_t right_padding[4];
+  int32_t right_padding[5];
   ResizingCategory resizing_category;
 };
 
@@ -1127,11 +974,11 @@ struct StridedSliceParams {
   int8_t strides_count;
   int32_t strides[5];
 
-  int16_t begin_mask;
-  int16_t ellipsis_mask;
-  int16_t end_mask;
-  int16_t new_axis_mask;
-  int16_t shrink_axis_mask;
+  uint16_t begin_mask;
+  uint16_t ellipsis_mask;
+  uint16_t end_mask;
+  uint16_t new_axis_mask;
+  uint16_t shrink_axis_mask;
 };
 
 struct TanhParams {
@@ -1145,7 +992,7 @@ constexpr int kTransposeMaxDimensions = 6;
 
 struct TransposeParams {
   int8_t perm_count;
-  int32_t perm[5];
+  int32_t perm[kTransposeMaxDimensions];
 };
 
 struct UnpackParams {
@@ -1198,6 +1045,23 @@ inline void GetActivationParams(const P& params, int64_t* min, int64_t* max) {
   *min = params.int64_activation_min;
   *max = params.int64_activation_max;
 }
+
+// Type trait to check of given type has size smaller than 4 bytes.
+template <typename T>
+struct is_small_integer
+    : public std::integral_constant<bool,
+                                    std::is_same<T, int8_t>::value ||
+                                        std::is_same<T, uint8_t>::value ||
+                                        std::is_same<T, int16_t>::value ||
+                                        std::is_same<T, uint16_t>::value> {};
+
+// Type trait to check of given type is int32 or int64.
+template <typename T>
+struct is_int32_or_int64
+    : public std::integral_constant<bool, std::is_same<T, int32_t>::value ||
+                                              std::is_same<T, int64_t>::value> {
+};
+
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_TYPES_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h b/edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h
index 01a1cd2..5f33173 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h
@@ -18,9 +18,12 @@ limitations under the License.
 #include <stdint.h>
 
 #include <limits>
+#ifndef TF_LITE_STATIC_MEMORY
+#include <string>
+#endif  // TF_LITE_STATIC_MEMORY
 
-#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
 
 namespace tflite {
 
@@ -148,13 +151,18 @@ inline int NumDimensions(const TfLiteTensor* t) { return t->dims->size; }
 inline int SizeOfDimension(const TfLiteTensor* t, int dim) {
   return t->dims->data[dim];
 }
+
 inline int NumDimensions(const TfLiteEvalTensor* t) { return t->dims->size; }
 inline int SizeOfDimension(const TfLiteEvalTensor* t, int dim) {
   return t->dims->data[dim];
 }
 
-inline int NumInputs(const TfLiteNode* node) { return node->inputs->size; }
-inline int NumOutputs(const TfLiteNode* node) { return node->outputs->size; }
+inline int NumInputs(const TfLiteNode* node) {
+  return node->inputs == nullptr ? 0 : node->inputs->size;
+}
+inline int NumOutputs(const TfLiteNode* node) {
+  return node->outputs == nullptr ? 0 : node->outputs->size;
+}
 
 #ifndef TF_LITE_STATIC_MEMORY
 inline int NumIntermediates(const TfLiteNode* node) {
@@ -174,6 +182,14 @@ inline int64_t NumElements(const TfLiteTensor* t) {
   return NumElements(t->dims);
 }
 
+inline int64_t NumElements(const int* dims, int num_dims) {
+  int64_t count = 1;
+  for (int i = 0; i < num_dims; ++i) {
+    count *= dims[i];
+  }
+  return count;
+}
+
 // Determines whether tensor is constant.
 // TODO(b/138199592): Introduce new query which checks for constant OR
 // persistent-read-only, which would be useful for most tensor kernels that
@@ -183,6 +199,11 @@ inline bool IsConstantTensor(const TfLiteTensor* tensor) {
   return tensor->allocation_type == kTfLiteMmapRo;
 }
 
+inline bool IsConstantOrPersistentTensor(const TfLiteTensor* tensor) {
+  return IsConstantTensor(tensor) ||
+         (tensor->allocation_type == kTfLitePersistentRo);
+}
+
 // Determines whether tensor is dynamic. Note that a tensor can be non-const and
 // not dynamic. This function specifically checks for a dynamic tensor.
 inline bool IsDynamicTensor(const TfLiteTensor* tensor) {
@@ -218,14 +239,15 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
     const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
     const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
     int32_t* output_activation_min, int32_t* output_activation_max,
-    int32_t* per_channel_multiplier, int* per_channel_shift);
+    int32_t* per_channel_multiplier, int32_t* per_channel_shift);
 
 TfLiteStatus PopulateConvolutionQuantizationParams(
     TfLiteContext* context, const TfLiteTensor* input,
     const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
     const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
     int32_t* output_activation_min, int32_t* output_activation_max,
-    int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels);
+    int32_t* per_channel_multiplier, int32_t* per_channel_shift,
+    int num_channels);
 
 // Calculates the multiplication factor for a quantized convolution (or
 // quantized depthwise convolution) involving the given tensors. Returns an
@@ -274,6 +296,16 @@ void CalculateActivationRange(TfLiteFusedActivation activation,
 // Return true if the given tensors have the same shape.
 bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2);
 
+#if !defined(TF_LITE_STATIC_MEMORY)
+// Gets the output shape from the input tensor.
+TfLiteStatus GetOutputShapeFromInput(TfLiteContext* context,
+                                     const TfLiteTensor* input,
+                                     TfLiteIntArray** output_shape);
+
+const std::string GetShapeDebugString(const TfLiteIntArray* shape);
+
+#endif  // !defined(TF_LITE_STATIC_MEMORY)
+
 // Calculates the output_shape that is necessary for element-wise operations
 // with broadcasting involving the two input tensors.
 TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
@@ -289,12 +321,15 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
                                         const TfLiteTensor* input3,
                                         TfLiteIntArray** output_shape);
 
-// Return the size of given type in bytes. Return 0 in in case of string.
+// Return the size of given type in bytes. Return 0 in case of string.
 int TfLiteTypeGetSize(TfLiteType type);
 
 // Whether the current platform is mobile (Android or iOS).
 bool IsMobilePlatform();
 
+// Returns whether there is unspecified dimension in the tensor's dim signature.
+bool HasUnspecifiedDimension(const TfLiteTensor* tensor);
+
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_KERNELS_KERNEL_UTIL_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/kernel_util_lite.cc b/edge-impulse-sdk/tensorflow/lite/kernels/kernel_util_lite.cc
index d83d06f..a786b68 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/kernel_util_lite.cc
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/kernel_util_lite.cc
@@ -14,14 +14,6 @@ limitations under the License.
 ==============================================================================*/
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 
-// Patched by Edge Impulse, remove these declarations for Eta Compute
-#ifdef ECM3532
-#undef _GLIBCXX_HAVE_ENOTSUP
-#undef _GLIBCXX_HAVE_ECANCELED
-#undef _GLIBCXX_HAVE_EOWNERDEAD
-#undef _GLIBCXX_HAVE_ENOTRECOVERABLE
-#endif
-
 #include <stdint.h>
 #include <stdlib.h>
 
@@ -33,8 +25,9 @@ limitations under the License.
 #include <string>
 #endif  // TF_LITE_STATIC_MEMORY
 
-#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/context_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
 
@@ -123,6 +116,7 @@ TfLiteStatus GetInputSafe(const TfLiteContext* context, const TfLiteNode* node,
 TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
                                int index) {
   TfLiteTensor* tensor = GetMutableInput(context, node, index);
+  if (tensor == nullptr) return nullptr;
   return tensor->is_variable ? tensor : nullptr;
 }
 
@@ -201,7 +195,7 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
     const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
     const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
     int32_t* output_activation_min, int32_t* output_activation_max,
-    int32_t* per_channel_multiplier, int* per_channel_shift) {
+    int32_t* per_channel_multiplier, int32_t* per_channel_shift) {
   const auto* affine_quantization =
       reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
   return PopulateConvolutionQuantizationParams(
@@ -216,7 +210,8 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
     const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
     const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
     int32_t* output_activation_min, int32_t* output_activation_max,
-    int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels) {
+    int32_t* per_channel_multiplier, int32_t* per_channel_shift,
+    int num_channels) {
   TF_LITE_ENSURE_EQ(context, input->quantization.type,
                     kTfLiteAffineQuantization);
   TF_LITE_ENSURE_EQ(context, filter->quantization.type,
@@ -237,7 +232,8 @@ TfLiteStatus PopulateConvolutionQuantizationParams(
     //  Currently only Int8/Int16 is supported for per channel quantization.
     TF_LITE_ENSURE(context,
                    input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
-    TF_LITE_ENSURE_EQ(context, filter->type, kTfLiteInt8);
+    TF_LITE_ENSURE(context,
+                   filter->type == kTfLiteInt8 || filter->type == kTfLiteInt4);
     TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, num_channels);
     TF_LITE_ENSURE_EQ(
         context, num_channels,
@@ -337,30 +333,49 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
 }
 
 namespace {
-void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,
-                                           int32_t qmin, int32_t qmax,
-                                           TfLiteTensor* output,
-                                           int32_t* act_min, int32_t* act_max) {
+
+inline TfLiteStatus Quantize(TfLiteContext* context, float scale,
+                             int32_t zero_point, float f, int32_t& q) {
+  const float tmp = TfLiteRound(f / scale);
+  const bool no_integer_overflow_from_quantization =
+      (tmp >= static_cast<float>(std::numeric_limits<int32_t>::min()) &&
+       tmp <= static_cast<float>(std::numeric_limits<int32_t>::max()));
+  TF_LITE_ENSURE(context, no_integer_overflow_from_quantization);
+  q = zero_point + static_cast<int32_t>(tmp);
+  return kTfLiteOk;
+}
+
+TfLiteStatus CalculateActivationRangeQuantizedImpl(
+    TfLiteContext* context, TfLiteFusedActivation activation, int32_t qmin,
+    int32_t qmax, TfLiteTensor* output, int32_t* act_min, int32_t* act_max) {
   const auto scale = output->params.scale;
   const auto zero_point = output->params.zero_point;
 
-  auto quantize = [scale, zero_point](float f) {
-    return zero_point + static_cast<int32_t>(TfLiteRound(f / scale));
-  };
-
+  int32_t tmp_q;
   if (activation == kTfLiteActRelu) {
-    *act_min = std::max(qmin, quantize(0.0));
+    TF_LITE_ENSURE_OK(context,
+                      Quantize(context, scale, zero_point, 0.0, tmp_q));
+    *act_min = std::max(qmin, tmp_q);
     *act_max = qmax;
   } else if (activation == kTfLiteActRelu6) {
-    *act_min = std::max(qmin, quantize(0.0));
-    *act_max = std::min(qmax, quantize(6.0));
+    TF_LITE_ENSURE_OK(context,
+                      Quantize(context, scale, zero_point, 0.0, tmp_q));
+    *act_min = std::max(qmin, tmp_q);
+    TF_LITE_ENSURE_OK(context,
+                      Quantize(context, scale, zero_point, 6.0, tmp_q));
+    *act_max = std::min(qmax, tmp_q);
   } else if (activation == kTfLiteActReluN1To1) {
-    *act_min = std::max(qmin, quantize(-1.0));
-    *act_max = std::min(qmax, quantize(1.0));
+    TF_LITE_ENSURE_OK(context,
+                      Quantize(context, scale, zero_point, -1.0, tmp_q));
+    *act_min = std::max(qmin, tmp_q);
+    TF_LITE_ENSURE_OK(context,
+                      Quantize(context, scale, zero_point, 1.0, tmp_q));
+    *act_max = std::min(qmax, tmp_q);
   } else {
     *act_min = qmin;
     *act_max = qmax;
   }
+  return kTfLiteOk;
 }
 }  // namespace
 
@@ -384,9 +399,8 @@ TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
     TF_LITE_ENSURE(context, false);
   }
 
-  CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
-                                        act_max);
-  return kTfLiteOk;
+  return CalculateActivationRangeQuantizedImpl(context, activation, qmin, qmax,
+                                               output, act_min, act_max);
 }
 
 bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
@@ -394,6 +408,24 @@ bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
 }
 
 #ifndef TF_LITE_STATIC_MEMORY
+TfLiteStatus GetOutputShapeFromInput(TfLiteContext* context,
+                                     const TfLiteTensor* input,
+                                     TfLiteIntArray** output_shape) {
+  if (NumDimensions(input) != 1) {
+    TF_LITE_KERNEL_LOG(const_cast<TfLiteContext*>(context),
+                       "Invalid %dD input tensor (must be a 1D tensor).",
+                       NumDimensions(input));
+    return kTfLiteError;
+  }
+  const int output_dims = SizeOfDimension(input, 0);
+  std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
+      TfLiteIntArrayCreate(output_dims), TfLiteIntArrayFree);
+  for (int i = 0; i < output_dims; i++) {
+    shape->data[i] = input->data.i32[i];
+  }
+  *output_shape = shape.release();
+  return kTfLiteOk;
+}
 
 // TODO(b/172067338): Having this function be part of TF_LITE_STATIC_MEMORY
 // build results in a 6KB size increase, even though the function is unsused for
@@ -401,15 +433,24 @@ bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
 // unsused function, the string library that gets pulled in is not dropped,
 // resulting in the increased binary size.
 // Patched by Edge Impulse, issues with building for TinkerGen
-std::string GetShapeDebugString(const TfLiteIntArray* shape) {
-  std::string str = "GetShapeDebugString";
-  // for (int d = 0; d < shape->size; ++d) {
-  //   if (str.empty())
-  //     str = "[" + std::to_string(shape->data[d]);
-  //   else
-  //     str += ", " + std::to_string(shape->data[d]);
-  // }
-  // str += "]";
+// TODO inspect if we still need this
+const std::string GetShapeDebugString(const TfLiteIntArray* shape) {
+  std::string str = "GetShapeDebugString";;
+  /*
+  for (int d = 0; d < shape->size; ++d) {
+    if (str.empty())
+      str = "[" + std::to_string(shape->data[d]);
+    else
+      // Don't add space after "," to make the output consistent with
+      // tensorflow::shape_inference::InferenceContext::DebugString()
+      str += "," + std::to_string(shape->data[d]);
+  }
+  if (str.empty()) {
+    str = "[]";
+  } else {
+    str += "]";
+  }
+  */
   return str;
 }
 
@@ -417,26 +458,28 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
                                         const TfLiteTensor* input1,
                                         const TfLiteTensor* input2,
                                         TfLiteIntArray** output_shape) {
-  int dims1 = NumDimensions(input1);
-  int dims2 = NumDimensions(input2);
-  int out_dims = std::max(dims1, dims2);
-  if (NumElements(input1) == 0) {
-    *output_shape = TfLiteIntArrayCopy(input1->dims);
-    return kTfLiteOk;
-  }
+  const int dims1 = NumDimensions(input1);
+  const int dims2 = NumDimensions(input2);
+  const int out_dims = std::max(dims1, dims2);
+
   std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
       TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
   for (int i = 0; i < out_dims; ++i) {
-    int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
-    int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
+    const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
+    const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
     if (!(d1 == d2 || d1 == 1 || d2 == 1)) {
-      context->ReportError(context,
-                           "Given shapes, %s and %s, are not broadcastable.",
-                           GetShapeDebugString(input1->dims).c_str(),
-                           GetShapeDebugString(input2->dims).c_str());
+      TF_LITE_KERNEL_LOG(context,
+                         "Given shapes, %s and %s, are not broadcastable.",
+                         GetShapeDebugString(input1->dims).c_str(),
+                         GetShapeDebugString(input2->dims).c_str());
       return kTfLiteError;
     }
-    shape->data[out_dims - i - 1] = std::max(d1, d2);
+
+    if (d1 == 0 || d2 == 0) {
+      shape->data[out_dims - i - 1] = 0;
+    } else {
+      shape->data[out_dims - i - 1] = std::max(d1, d2);
+    }
   }
   *output_shape = shape.release();
   return kTfLiteOk;
@@ -447,24 +490,27 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
                                         const TfLiteTensor* input2,
                                         const TfLiteTensor* input3,
                                         TfLiteIntArray** output_shape) {
-  int dims1 = NumDimensions(input1);
-  int dims2 = NumDimensions(input2);
-  int dims3 = NumDimensions(input3);
-  int out_dims = std::max(std::max(dims1, dims2), dims3);
+  const int dims1 = NumDimensions(input1);
+  const int dims2 = NumDimensions(input2);
+  const int dims3 = NumDimensions(input3);
+  const int out_dims = std::max(std::max(dims1, dims2), dims3);
   std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
       TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
   for (int i = 0; i < out_dims; ++i) {
-    int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
-    int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
-    int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1);
+    const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
+    const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
+    const int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1);
+    const int min_value = std::min(std::min(d1, d2), d3);
     int max_value = std::max(std::max(d1, d2), d3);
+    // If one dimention is 0, others must be 0 or 1.
+    if (min_value == 0) max_value = 0;
     if (!(d1 == 1 || d1 == max_value) || !(d2 == 1 || d2 == max_value) ||
         !(d3 == 1 || d3 == max_value)) {
-      context->ReportError(
-          context, "Given shapes, %s, %s and %s, are not broadcastable.",
-          GetShapeDebugString(input1->dims).c_str(),
-          GetShapeDebugString(input2->dims).c_str(),
-          GetShapeDebugString(input3->dims).c_str());
+      TF_LITE_KERNEL_LOG(context,
+                         "Given shapes, %s, %s and %s, are not broadcastable.",
+                         GetShapeDebugString(input1->dims).c_str(),
+                         GetShapeDebugString(input2->dims).c_str(),
+                         GetShapeDebugString(input3->dims).c_str());
       return kTfLiteError;
     }
     shape->data[out_dims - i - 1] = max_value;
@@ -478,42 +524,45 @@ TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
 int TfLiteTypeGetSize(TfLiteType type) {
   switch (type) {
     case kTfLiteUInt8:
-      TF_LITE_ASSERT_EQ(sizeof(uint8_t), 1);
+      static_assert(sizeof(uint8_t) == 1, "");
       return 1;
     case kTfLiteInt8:
-      TF_LITE_ASSERT_EQ(sizeof(int8_t), 1);
+      static_assert(sizeof(int8_t) == 1, "");
       return 1;
     case kTfLiteBool:
       return sizeof(bool);
+    case kTfLiteUInt16:
+      static_assert(sizeof(uint16_t) == 2, "");
+      return 2;
     case kTfLiteInt16:
-      TF_LITE_ASSERT_EQ(sizeof(int16_t), 2);
+      static_assert(sizeof(int16_t) == 2, "");
       return 2;
     case kTfLiteFloat16:
-      TF_LITE_ASSERT_EQ(sizeof(int16_t), 2);
+      static_assert(sizeof(int16_t) == 2, "");
       return 2;
     case kTfLiteFloat32:
-      TF_LITE_ASSERT_EQ(sizeof(float), 4);
+      static_assert(sizeof(float) == 4, "");
       return 4;
     case kTfLiteInt32:
-      TF_LITE_ASSERT_EQ(sizeof(int32_t), 4);
+      static_assert(sizeof(int32_t) == 4, "");
       return 4;
     case kTfLiteUInt32:
-      TF_LITE_ASSERT_EQ(sizeof(uint32_t), 4);
+      static_assert(sizeof(uint32_t) == 4, "");
       return 4;
     case kTfLiteInt64:
-      TF_LITE_ASSERT_EQ(sizeof(int64_t), 8);
+      static_assert(sizeof(int64_t) == 8, "");
       return 8;
     case kTfLiteUInt64:
-      TF_LITE_ASSERT_EQ(sizeof(uint64_t), 8);
+      static_assert(sizeof(uint64_t) == 8, "");
       return 8;
     case kTfLiteFloat64:
-      TF_LITE_ASSERT_EQ(sizeof(double), 8);
+      static_assert(sizeof(double) == 8, "");
       return 8;
     case kTfLiteComplex64:
-      TF_LITE_ASSERT_EQ(sizeof(std::complex<float>), 8);
+      static_assert(sizeof(std::complex<float>) == 8, "");
       return 8;
     case kTfLiteComplex128:
-      TF_LITE_ASSERT_EQ(sizeof(std::complex<double>), 16);
+      static_assert(sizeof(std::complex<double>) == 16, "");
       return 16;
     default:
       return 0;
@@ -531,4 +580,15 @@ bool IsMobilePlatform() {
   return false;
 }
 
+bool HasUnspecifiedDimension(const TfLiteTensor* tensor) {
+#ifndef TF_LITE_STATIC_MEMORY
+  if (tensor->dims_signature) {
+    for (int i : TfLiteIntArrayView(tensor->dims_signature)) {
+      if (i == -1) return true;
+    }
+  }
+#endif  // TF_LITE_STATIC_MEMORY
+  return false;
+}
+
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h b/edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h
index 0f91f5e..d3c50bb 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h
@@ -15,69 +15,24 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
 #define TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
 
-// If we're on a platform without standard IO functions, fall back to a
-// non-portable function.
-#ifdef TF_LITE_MCU_DEBUG_LOG
-
 #include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h"
 
-#define DEBUG_LOG(x) \
-  do {               \
-    DebugLog(x);     \
-  } while (0)
-
-inline void InfiniteLoop() {
-  DEBUG_LOG("HALTED\n");
+#if !defined(TF_LITE_MCU_DEBUG_LOG)
+#include <cstdlib>
+#define TFLITE_ABORT abort()
+#else
+inline void AbortImpl() {
+  DebugLog("HALTED\n");
   while (1) {
   }
 }
+#define TFLITE_ABORT AbortImpl();
+#endif
 
-#define TFLITE_ABORT InfiniteLoop();
-
-#else  // TF_LITE_MCU_DEBUG_LOG
-
-#include <cstdio>
-#include <cstdlib>
-
-#define DEBUG_LOG(x)            \
-  do {                          \
-    fprintf(stderr, "%s", (x)); \
-  } while (0)
-
-// Report Error for unsupported type by op 'op_name' and returns kTfLiteError.
-#define TF_LITE_UNSUPPORTED_TYPE(context, type, op_name)                    \
-  do {                                                                      \
-    TF_LITE_KERNEL_LOG((context), "%s:%d Type %s is unsupported by op %s.", \
-                       __FILE__, __LINE__, TfLiteTypeGetName(type),         \
-                       (op_name));                                          \
-    return kTfLiteError;                                                    \
-  } while (0)
-
-#define TFLITE_ABORT abort()
-
-#endif  // TF_LITE_MCU_DEBUG_LOG
-
-#if defined(NDEBUG) || defined(ARDUINO)
+#if defined(NDEBUG)
 #define TFLITE_ASSERT_FALSE (static_cast<void>(0))
 #else
 #define TFLITE_ASSERT_FALSE TFLITE_ABORT
 #endif
 
-#define TF_LITE_FATAL(msg)  \
-  do {                      \
-    DEBUG_LOG(msg);         \
-    DEBUG_LOG("\nFATAL\n"); \
-    TFLITE_ABORT;           \
-  } while (0)
-
-#define TF_LITE_ASSERT(x)        \
-  do {                           \
-    if (!(x)) TF_LITE_FATAL(#x); \
-  } while (0)
-
-#define TF_LITE_ASSERT_EQ(x, y)                            \
-  do {                                                     \
-    if ((x) != (y)) TF_LITE_FATAL(#x " didn't equal " #y); \
-  } while (0)
-
 #endif  // TENSORFLOW_LITE_KERNELS_OP_MACROS_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/kernels/padding.h b/edge-impulse-sdk/tensorflow/lite/kernels/padding.h
index 62eb7a4..836ca92 100644
--- a/edge-impulse-sdk/tensorflow/lite/kernels/padding.h
+++ b/edge-impulse-sdk/tensorflow/lite/kernels/padding.h
@@ -15,12 +15,11 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_PADDING_H_
 #define TENSORFLOW_LITE_KERNELS_PADDING_H_
 
-#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 
 namespace tflite {
 
-// TODO(renjieliu): Migrate others to use ComputePaddingWithLeftover.
 inline int ComputePadding(int stride, int dilation_rate, int in_size,
                           int filter_size, int out_size) {
   int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.cc b/edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.cc
index c5efa7f..e9d2d6f 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -26,32 +26,45 @@ AllOpsResolver::AllOpsResolver() {
   AddAddN();
   AddArgMax();
   AddArgMin();
+  AddAssignVariable();
   AddAveragePool2D();
   AddBatchMatMul();
   AddBatchToSpaceNd();
+  AddBroadcastArgs();
+  AddBroadcastTo();
+  AddCallOnce();
+  AddCast();
   AddCeil();
   AddComplexAbs();
+  AddCircularBuffer();
   AddConcatenation();
   AddConv2D();
   AddCos();
+  AddCumSum();
+  AddDepthToSpace();
   AddDepthwiseConv2D();
   AddDequantize();
-  // AddDetectionPostprocess();
+  AddDetectionPostprocess();
   AddDiv();
   AddElu();
   AddEqual();
   AddEthosU();
   AddExp();
   AddExpandDims();
+  AddFill();
   AddFloor();
+  AddFloorDiv();
+  AddFloorMod();
   AddFullyConnected();
 #ifndef TF_LITE_STATIC_MEMORY
   AddGather();
 #endif // TF_LITE_STATIC_MEMORY
+  AddGatherNd();
   AddGreater();
   AddGreaterEqual();
   AddHardSwish();
   AddImag();
+  AddIf();
   AddL2Normalization();
   AddL2Pool2D();
   AddLeakyRelu();
@@ -67,6 +80,7 @@ AllOpsResolver::AllOpsResolver() {
   AddMaximum();
   AddMean();
   AddMinimum();
+  AddMirrorPad();
   AddMul();
   AddNeg();
   AddNotEqual();
@@ -76,11 +90,13 @@ AllOpsResolver::AllOpsResolver() {
   AddPrelu();
   AddQuantize();
   AddReal();
+  AddReadVariable();
   AddReduceMax();
   AddReduceMin();
   AddRelu();
   AddRelu6();
   AddReshape();
+  AddResizeBilinear();
   AddResizeNearestNeighbor();
   AddRfft2D();
   AddRound();
@@ -94,6 +110,7 @@ AllOpsResolver::AllOpsResolver() {
   AddSlice();
   AddSoftmax();
   AddSpaceToBatchNd();
+  AddSpaceToDepth();
   AddSplit();
   AddSplitV();
   AddSqrt();
@@ -108,7 +125,11 @@ AllOpsResolver::AllOpsResolver() {
   AddTranspose();
   AddTransposeConv();
   AddTreeEnsembleClassifier();
+  AddUnidirectionalSequenceLstm();
   AddUnpack();
+  AddVarHandle();
+  AddWhile();
+  AddZerosLike();
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/compatibility.h b/edge-impulse-sdk/tensorflow/lite/micro/compatibility.h
index 70df53d..db117ca 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/compatibility.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/compatibility.h
@@ -23,7 +23,9 @@ limitations under the License.
 // This macro needs to be included in all subclasses of a virtual base class in
 // the private section.
 
-// Patched by Edge Impulse, actually declaring `void operator delete(void* p) {}` yields compiler errors on some compilers
+// Patched by Edge Impulse,
+// actually declaring `void operator delete(void* p) {}`
+// yields compiler errors on some compilers
 #define TF_LITE_REMOVE_VIRTUAL_DELETE
 
 #endif  // TENSORFLOW_LITE_MICRO_COMPATIBILITY_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.cc b/edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.cc
new file mode 100644
index 0000000..5ca66ab
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.cc
@@ -0,0 +1,110 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.h"
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_arena_constants.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+
+// Patched by Edge Impulse
+constexpr int FakeMicroContext::kNumScratchBuffers_;
+
+namespace {
+// Dummy static variables to allow creation of dummy MicroAllocator.
+// All tests are guarateed to run serially.
+static constexpr int KDummyTensorArenaSize = 256;
+static uint8_t dummy_tensor_arena[KDummyTensorArenaSize];
+}  // namespace
+
+FakeMicroContext::FakeMicroContext(TfLiteTensor* tensors,
+                                   SingleArenaBufferAllocator* allocator,
+                                   MicroGraph* micro_graph)
+    : MicroContext(
+          MicroAllocator::Create(dummy_tensor_arena, KDummyTensorArenaSize),
+          nullptr, micro_graph),
+      tensors_(tensors),
+      allocator_(allocator) {}
+
+TfLiteTensor* FakeMicroContext::AllocateTempTfLiteTensor(int tensor_index) {
+  allocated_tensor_count_++;
+  return &tensors_[tensor_index];
+}
+
+void FakeMicroContext::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) {
+  allocated_tensor_count_--;
+}
+
+bool FakeMicroContext::IsAllTempTfLiteTensorDeallocated() {
+  return !allocated_tensor_count_;
+}
+
+TfLiteEvalTensor* FakeMicroContext::GetEvalTensor(int tensor_index) {
+  TfLiteEvalTensor* eval_tensor =
+      reinterpret_cast<TfLiteEvalTensor*>(allocator_->AllocateTemp(
+          sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
+  TFLITE_DCHECK(eval_tensor != nullptr);
+
+  // In unit tests, the TfLiteTensor pointer contains the source of truth for
+  // buffers and values:
+  eval_tensor->data = tensors_[tensor_index].data;
+  eval_tensor->dims = tensors_[tensor_index].dims;
+  eval_tensor->type = tensors_[tensor_index].type;
+  return eval_tensor;
+}
+
+void* FakeMicroContext::AllocatePersistentBuffer(size_t bytes) {
+  // FakeMicroContext use SingleArenaBufferAllocator, which does not
+  // automatically apply the buffer alignment like MicroAllocator. The buffer
+  // alignment is potentially wasteful but allows the fake_micro_context to work
+  // correctly with optimized kernels.
+  return allocator_->AllocatePersistentBuffer(bytes,
+                                              MicroArenaBufferAlignment());
+}
+
+TfLiteStatus FakeMicroContext::RequestScratchBufferInArena(size_t bytes,
+                                                           int* buffer_index) {
+  TFLITE_DCHECK(buffer_index != nullptr);
+
+  if (scratch_buffer_count_ == kNumScratchBuffers_) {
+    MicroPrintf("Exceeded the maximum number of scratch tensors allowed (%d).",
+                kNumScratchBuffers_);
+    return kTfLiteError;
+  }
+
+  // For tests, we allocate scratch buffers from the tail and keep them around
+  // for the lifetime of model. This means that the arena size in the tests will
+  // be more than what we would have if the scratch buffers could share memory.
+  scratch_buffers_[scratch_buffer_count_] =
+      allocator_->AllocatePersistentBuffer(bytes, MicroArenaBufferAlignment());
+  TFLITE_DCHECK(scratch_buffers_[scratch_buffer_count_] != nullptr);
+
+  *buffer_index = scratch_buffer_count_++;
+  return kTfLiteOk;
+}
+
+void* FakeMicroContext::GetScratchBuffer(int buffer_index) {
+  TFLITE_DCHECK(scratch_buffer_count_ <= kNumScratchBuffers_);
+  if (buffer_index >= scratch_buffer_count_) {
+    return nullptr;
+  }
+  return scratch_buffers_[buffer_index];
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.h b/edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.h
new file mode 100644
index 0000000..a7af023
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.h
@@ -0,0 +1,56 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
+#define TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h"
+
+namespace tflite {
+// A fake of MicroContext for kernel util tests.
+class FakeMicroContext : public MicroContext {
+ public:
+  FakeMicroContext(TfLiteTensor* tensors, SingleArenaBufferAllocator* allocator,
+                   MicroGraph* micro_graph);
+
+  void* AllocatePersistentBuffer(size_t bytes) override;
+  TfLiteStatus RequestScratchBufferInArena(size_t bytes,
+                                           int* buffer_index) override;
+  void* GetScratchBuffer(int buffer_index) override;
+
+  TfLiteTensor* AllocateTempTfLiteTensor(int tensor_index) override;
+  void DeallocateTempTfLiteTensor(TfLiteTensor* tensor) override;
+  bool IsAllTempTfLiteTensorDeallocated();
+
+  TfLiteEvalTensor* GetEvalTensor(int tensor_index) override;
+
+ private:
+  static constexpr int kNumScratchBuffers_ = 12;
+
+  int scratch_buffer_count_ = 0;
+  uint8_t* scratch_buffers_[kNumScratchBuffers_];
+
+  TfLiteTensor* tensors_;
+  int allocated_tensor_count_ = 0;
+
+  SingleArenaBufferAllocator* allocator_;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_FAKE_MICRO_CONTEXT_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.cc b/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.cc
new file mode 100644
index 0000000..2fe1663
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.cc
@@ -0,0 +1,34 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h"
+
+#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type) {
+  return ConvertTensorType(tensor_type, type, tflite::GetMicroErrorReporter());
+}
+
+TfLiteStatus CallBuiltinParseFunction(TfLiteBridgeBuiltinParseFunction parser,
+                                      const Operator* op,
+                                      BuiltinDataAllocator* allocator,
+                                      void** builtin_data) {
+  return parser(op, tflite::GetMicroErrorReporter(), allocator, builtin_data);
+}
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h b/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h
new file mode 100644
index 0000000..a2a1ad4
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h
@@ -0,0 +1,45 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_FLATBUFFER_CONVERSIONS_BRIDGE_H_
+#define TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_FLATBUFFER_CONVERSIONS_BRIDGE_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+// Forward declaration of the ErrorReporter class to hide it from the TFLM code.
+class ErrorReporter;
+
+using TfLiteBridgeBuiltinDataAllocator = BuiltinDataAllocator;
+
+using TfLiteBridgeBuiltinParseFunction =
+    TfLiteStatus (*)(const Operator* op, ErrorReporter* error_reporter,
+                     BuiltinDataAllocator* allocator, void** builtin_data);
+
+// Converts the tensor data type used in the flatbuffer to the representation
+// used by the runtime.
+TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type);
+
+// CallBuiltinParseFunction is a wrapper function to wrap the parser function
+// calls to Call parser(op, allocator, builtin_data)
+TfLiteStatus CallBuiltinParseFunction(TfLiteBridgeBuiltinParseFunction parser,
+                                      const Operator* op,
+                                      BuiltinDataAllocator* allocator,
+                                      void** builtin_data);
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_FLATBUFFER_CONVERSIONS_BRIDGE_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.cc b/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.cc
new file mode 100644
index 0000000..e5d779b
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.cc
@@ -0,0 +1,85 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h"
+
+namespace tflite {
+
+FlexbufferWrapper::FlexbufferWrapper(const uint8_t* buffer, size_t size)
+    : flexbuffers::Vector(flexbuffers::GetRoot(buffer, size).AsVector()) {}
+
+int64_t FlexbufferWrapper::ElementAsInt64(size_t i) const {
+  const uint8_t* elem = data_ + i * byte_width_;
+  return ::flexbuffers::ReadInt64(elem, byte_width_);
+}
+
+uint64_t FlexbufferWrapper::ElementAsUInt64(size_t i) const {
+  const uint8_t* elem = data_ + i * byte_width_;
+  return ::flexbuffers::ReadUInt64(elem, byte_width_);
+}
+
+int32_t FlexbufferWrapper::ElementAsInt32(size_t i) const {
+  return static_cast<int32_t>(ElementAsInt64(i));
+}
+
+bool FlexbufferWrapper::ElementAsBool(size_t i) const {
+  return static_cast<bool>(ElementAsUInt64(i));
+}
+
+double FlexbufferWrapper::ElementAsDouble(size_t i) const {
+  const uint8_t* elem = data_ + i * byte_width_;
+  return ::flexbuffers::ReadDouble(elem, byte_width_);
+}
+
+float FlexbufferWrapper::ElementAsFloat(size_t i) const {
+  return static_cast<float>(FlexbufferWrapper::ElementAsDouble(i));
+}
+
+// TODO(b/192589496): Ops must always be there. Remove this function when fixed
+uint32_t NumSubgraphOperators(const SubGraph* subgraph) {
+  if (subgraph->operators() != nullptr) {
+    return subgraph->operators()->size();
+  } else {
+    return 0;
+  }
+}
+// TODO(b/192589496): Ops must always be there. Remove this function when fixed
+uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx) {
+  const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
+  return NumSubgraphOperators(subgraph);
+}
+
+TfLiteIntArray* FlatBufferVectorToTfLiteTypeArray(
+    const flatbuffers::Vector<int32_t>* flatbuffer_array) {
+  // On little-endian machines, TfLiteIntArray happens to have the same memory
+  // layout as flatbuffers:Vector<int32_t>, so we can reinterpret_cast the
+  // flatbuffer vector and avoid a copy and malloc.
+  // TODO(b/188459715): audit this usage of const_cast.
+  return const_cast<TfLiteIntArray*>(
+      reinterpret_cast<const TfLiteIntArray*>(flatbuffer_array));
+}
+
+TfLiteFloatArray* FlatBufferVectorToTfLiteTypeArray(
+    const flatbuffers::Vector<float>* flatbuffer_array) {
+  // On little-endian machines, TfLiteFloatArray happens to have the same memory
+  // layout as flatbuffers:Vector<float>, so we can reinterpret_cast the
+  // flatbuffer vector and avoid a copy and malloc.
+  // TODO(b/188459715): audit this usage of const_cast.
+  return const_cast<TfLiteFloatArray*>(
+      reinterpret_cast<const TfLiteFloatArray*>(flatbuffer_array));
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h b/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h
new file mode 100644
index 0000000..a5a7f9e
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h
@@ -0,0 +1,65 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
+#define THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+// Kernels use flexbuffers::Map to pack their init parameters in a tflite file,
+// with the parameter names as map keys and the parameter values as the
+// corresponding map values.
+// Accessing the map values using the flexbuffers:Map class is inline heavy,
+// which can cause the code size to bloat beyond what's reasonable for a micro
+// application. Use this class instead, when possible.
+// FlexbufferWrapper takes advantage of the following properties of
+// flexbuffers::Map:
+// 1. It can be viewed as a flexbuffers::Vector of the values.
+// 2. The values in the vector are ordered alphabetically by their keys.
+// 3. All integer and Boolean values are stored as 64-bit numbers.
+// 4. All floating point values are stored as double precision numbers.
+// The properties are mentioned in the flexbuffers docs, but we rely on
+// a unit test to catch design changes.
+class FlexbufferWrapper : public flexbuffers::Vector {
+ public:
+  // Construct with a serialized flexbuffer 'buffer' of 'size' bytes
+  explicit FlexbufferWrapper(const uint8_t* buffer, size_t size);
+  int64_t ElementAsInt64(size_t i) const;
+  uint64_t ElementAsUInt64(size_t i) const;
+  int32_t ElementAsInt32(size_t i) const;
+  bool ElementAsBool(size_t i) const;
+  double ElementAsDouble(size_t i) const;
+  float ElementAsFloat(size_t i) const;
+};
+
+// Return the number of operators in a subgraph tflite
+uint32_t NumSubgraphOperators(const SubGraph* subgraph);
+uint32_t NumSubgraphOperators(const Model* model, int subgraph_idx);
+
+// Converts a flatbuffer array to a TfLiteArray.
+// TODO(b/188459715): These function convert a const input to a non-const via a
+// const_cast. It is unclear exactly why this is required.
+TfLiteIntArray* FlatBufferVectorToTfLiteTypeArray(
+    const flatbuffers::Vector<int32_t>* flatbuffer_array);
+TfLiteFloatArray* FlatBufferVectorToTfLiteTypeArray(
+    const flatbuffers::Vector<float>* flatbuffer_array);
+
+}  // namespace tflite
+
+#endif  // THIRD_PARTY_TFLITE_MICRO_TENSORFLOW_LITE_MICRO_FLATBUFFER_UTILS_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/ibuffer_allocator.h b/edge-impulse-sdk/tensorflow/lite/micro/ibuffer_allocator.h
new file mode 100644
index 0000000..287eea3
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/ibuffer_allocator.h
@@ -0,0 +1,100 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_
+#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_
+
+#include <cstddef>
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h"
+
+namespace tflite {
+// Interface classes that the TFLM framework relies on to get buffers it needs.
+// There are two types of buffers that the TFLM framework requires: persistent
+// and non-persistent. Persistent buffers, once allocated, are never freed by
+// the TFLM framework. Non-persist buffers can be allocated and deallocated by
+// the TFLM framework. This file defines two interfaces classes that TFLM
+// framework will rely on to manage these buffers.
+
+// Interface class for managing persistent buffers.
+class IPersistentBufferAllocator {
+ public:
+  IPersistentBufferAllocator() {}
+  virtual ~IPersistentBufferAllocator() {}
+
+  // Allocates persistent memory. The persistent buffer is never freed.
+  virtual uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) = 0;
+
+  // Returns the size of all persistent allocations in bytes.
+  virtual size_t GetPersistentUsedBytes() const = 0;
+};
+
+// Interface class for managing non-persistent buffers.
+// The default non-persistent buffers are temp buffers that are not resizable.
+// Support of at least one resizable buffer is required.
+class INonPersistentBufferAllocator {
+ public:
+  INonPersistentBufferAllocator() {}
+  virtual ~INonPersistentBufferAllocator() {}
+
+  // Allocates a temporary buffer. This buffer is not resizable.
+  virtual uint8_t* AllocateTemp(size_t size, size_t alignment) = 0;
+
+  // Signals that a temporary buffer is no longer needed.
+  virtual void DeallocateTemp(uint8_t* buf) = 0;
+
+  // Returns true if all temporary buffers are already deallocated.
+  virtual bool IsAllTempDeallocated() = 0;
+
+  // Signals that all temporary allocations can be reclaimed. TFLM calls this
+  // API when it knows that all temporary buffers that it requested has been
+  // deallocated. The goal of API is to facilitate implementations of
+  // INonPersistentBufferAllocator can reuse buffer with some reasonable
+  // complexity.
+  virtual TfLiteStatus ResetTempAllocations() = 0;
+
+  // Returns a buffer that is resizable viable ResizeBuffer().
+  virtual uint8_t* AllocateResizableBuffer(size_t size, size_t alignment) = 0;
+
+  // Resizes a buffer that is previously returned by the
+  // AllocateResizableBuffer.
+  virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
+                                    size_t alignment) = 0;
+
+  // Frees up the memory occupied by the resizable buffer.
+  virtual TfLiteStatus DeallocateResizableBuffer(uint8_t* resizable_buf) = 0;
+
+  // Returns a pointer pointing to the start of the overlay memory, which is
+  // used for activation tensors and scratch buffers by kernels at Invoke stage.
+  virtual uint8_t* GetOverlayMemoryAddress() const = 0;
+
+  // Reserves the size of the overlay memory. This overlay is reserved for the
+  // kernels at Invoke stage. This is referred to as the overlay because before
+  // Invoket state, the same memory can be used for temp buffers. The layout of
+  // the memory is planned by the memory planner separately at Invoke stage.
+  virtual TfLiteStatus ReserveNonPersistentOverlayMemory(size_t size,
+                                                         size_t alignment) = 0;
+
+  // Returns the size of non-persistent buffer in use.
+  virtual size_t GetNonPersistentUsedBytes() const = 0;
+
+  // Returns the number of bytes available with a given alignment. This number
+  // takes in account any temporary allocations.
+  virtual size_t GetAvailableMemory(size_t alignment) const = 0;
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_IBUFFER_ALLOCATOR_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.cc
index 7e2a032..4f4cf81 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.h"
+
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
@@ -22,144 +24,25 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace activations {
 namespace {
 
-struct ReluOpData {
-  ReluParams params;
-};
-
-struct Relu6OpData {
-  int8_t six_int8;
-  int8_t zero_int8;
-  uint8_t six_uint8;
-  uint8_t zero_uint8;
-};
-
-}  // namespace
-
-constexpr int kInputTensor = 0;
-constexpr int kOutputTensor = 0;
-
-template <typename T>
-inline void ReluQuantized(const ReluOpData& data,
-                          const RuntimeShape& input_shape,
-                          const RuntimeShape& output_shape, const T* input_data,
-                          T* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    const int32_t val = static_cast<int32_t>(input_data[i]);
-    int32_t clamped =
-        data.params.output_offset +
-        MultiplyByQuantizedMultiplier(val - data.params.input_offset,
-                                      data.params.output_multiplier,
-                                      data.params.output_shift);
-    clamped = std::max(data.params.quantized_activation_min, clamped);
-    clamped = std::min(data.params.quantized_activation_max, clamped);
-    output_data[i] = static_cast<T>(clamped);
-  }
-}
-
-template <typename T>
-inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
-                                ReluOpData* data) {
-  float act_min = 0.0;
-  float act_max = std::numeric_limits<float>::infinity();
-  double real_multiplier =
-      static_cast<double>(input->params.scale / output->params.scale);
-
-  const RuntimeShape input_shape = GetTensorShape(input);
-  const RuntimeShape output_shape = GetTensorShape(output);
-
-  QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
-                     &data->params.output_shift);
-
-  data->params.quantized_activation_min = std::max(
-      static_cast<int32_t>(std::numeric_limits<T>::min()),
-      output->params.zero_point +
-          static_cast<int32_t>(roundf(act_min / output->params.scale)));
-  data->params.quantized_activation_max =
-      act_max == std::numeric_limits<float>::infinity()
-          ? static_cast<int32_t>(std::numeric_limits<T>::max())
-          : std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
-                     output->params.zero_point +
-                         static_cast<int32_t>(
-                             roundf(act_max / output->params.scale)));
-  data->params.input_offset = input->params.zero_point;
-  data->params.output_offset = output->params.zero_point;
-}
-
-inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
-                      const RuntimeShape& output_shape, float* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    const float val = input_data[i];
-    const float lower = 0.0f;
-    const float clamped = val < lower ? lower : val;
-    output_data[i] = clamped;
-  }
-}
-
-inline void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
-                       const RuntimeShape& output_shape, float* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    const float val = input_data[i];
-    const float upper = 6.0f;
-    const float lower = 0.0f;
-    const float clamped = val > upper ? upper : val < lower ? lower : val;
-    output_data[i] = clamped;
-  }
-}
-
-template <typename Q>
-inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape,
-                           const Q* input_data,
-                           const RuntimeShape& output_shape, Q* output_data) {
-  const int flat_size = MatchingFlatSize(input_shape, output_shape);
-  for (int i = 0; i < flat_size; ++i) {
-    const Q val = input_data[i];
-    const Q clamped = val > upper ? upper : val < lower ? lower : val;
-    output_data[i] = clamped;
-  }
-}
-
 void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(ReluOpData));
 }
 
-TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
-
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  if (input->type == kTfLiteInt8) {
-    CalculateReluOpData<int8_t>(input, output, data);
-  } else if (input->type == kTfLiteUInt8) {
-    CalculateReluOpData<uint8_t>(input, output, data);
-  }
-
-  return kTfLiteOk;
-}
-
 TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
   const ReluOpData& data = *(static_cast<const ReluOpData*>(node->user_data));
 
   const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);
 
   switch (input->type) {
     case kTfLiteFloat32: {
@@ -171,22 +54,15 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) {
       return kTfLiteOk;
     }
     case kTfLiteInt8: {
-      ReluQuantized<int8_t>(data, tflite::micro::GetTensorShape(input),
+      tflite::ReluQuantized(data, tflite::micro::GetTensorShape(input),
                             tflite::micro::GetTensorShape(output),
                             tflite::micro::GetTensorData<int8_t>(input),
                             tflite::micro::GetTensorData<int8_t>(output));
       return kTfLiteOk;
     }
-    case kTfLiteUInt8: {
-      ReluQuantized<uint8_t>(data, tflite::micro::GetTensorShape(input),
-                             tflite::micro::GetTensorShape(output),
-                             tflite::micro::GetTensorData<uint8_t>(input),
-                             tflite::micro::GetTensorData<uint8_t>(output));
-      return kTfLiteOk;
-    }
     default: {
-      TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
-                         TfLiteTypeGetName(input->type));
+      MicroPrintf("Only float32 is supported currently, got %s",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
     }
   }
@@ -197,34 +73,14 @@ void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) {
   return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData));
 }
 
-TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
-
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-
-  if (input->type == kTfLiteInt8) {
-    data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
-                                                  input->params.zero_point);
-    data->zero_int8 = input->params.zero_point;
-  } else if (input->type == kTfLiteUInt8) {
-    data->six_uint8 = FloatToQuantizedType<uint8_t>(6.0f, input->params.scale,
-                                                    input->params.zero_point);
-    data->zero_uint8 = input->params.zero_point;
-  }
-
-  return kTfLiteOk;
-}
-
 TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
   const Relu6OpData& data = *(static_cast<const Relu6OpData*>(node->user_data));
 
   const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      tflite::micro::GetEvalInput(context, node, kActivationsInputTensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kActivationsOutputTensor);
 
   switch (input->type) {
     case kTfLiteFloat32: {
@@ -236,53 +92,29 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) {
       return kTfLiteOk;
     }
     case kTfLiteInt8: {
-      Relu6Quantized<int8_t>(data.zero_int8, data.six_int8,
-                             tflite::micro::GetTensorShape(input),
-                             tflite::micro::GetTensorData<int8_t>(input),
-                             tflite::micro::GetTensorShape(output),
-                             tflite::micro::GetTensorData<int8_t>(output));
-      return kTfLiteOk;
-    }
-    case kTfLiteUInt8: {
-      Relu6Quantized<uint8_t>(data.zero_uint8, data.six_uint8,
-                              tflite::micro::GetTensorShape(input),
-                              tflite::micro::GetTensorData<uint8_t>(input),
-                              tflite::micro::GetTensorShape(output),
-                              tflite::micro::GetTensorData<uint8_t>(output));
+      Relu6Quantized(data.zero_int8, data.six_int8,
+                     tflite::micro::GetTensorShape(input),
+                     tflite::micro::GetTensorData<int8_t>(input),
+                     tflite::micro::GetTensorShape(output),
+                     tflite::micro::GetTensorData<int8_t>(output));
       return kTfLiteOk;
     }
     default: {
-      TF_LITE_KERNEL_LOG(context, "Only float32 is supported currently, got %s",
-                         TfLiteTypeGetName(input->type));
+      MicroPrintf("Only float32 is supported currently, got %s",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
     }
   }
 }
 
-}  // namespace activations
+}  // namespace
 
 TfLiteRegistration Register_RELU() {
-  return {/*init=*/activations::ReluInit,
-          /*free=*/nullptr,
-          /*prepare=*/activations::ReluPrepare,
-          /*invoke=*/activations::ReluEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(ReluInit, ReluPrepare, ReluEval);
 }
 
 TfLiteRegistration Register_RELU6() {
-  return {/*init=*/activations::Relu6Init,
-          /*free=*/nullptr,
-          /*prepare=*/activations::Relu6Prepare,
-          /*invoke=*/activations::Relu6Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Relu6Init, Relu6Prepare, Relu6Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.h
new file mode 100644
index 0000000..c6dddcd
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.h
@@ -0,0 +1,63 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
+
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+
+extern const int kActivationsInputTensor;
+extern const int kActivationsOutputTensor;
+
+struct ReluOpData {
+  ReluParams params;
+};
+
+struct Relu6OpData {
+  int8_t six_int8;
+  int8_t zero_int8;
+};
+
+void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
+                   const RuntimeShape& output_shape, const int8_t* input_data,
+                   int8_t* output_data);
+
+template <typename T>
+void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
+                         ReluOpData* data);
+
+void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
+               const RuntimeShape& output_shape, float* output_data);
+
+void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
+                const RuntimeShape& output_shape, float* output_data);
+
+void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
+                    const int8_t* input_data, const RuntimeShape& output_shape,
+                    int8_t* output_data);
+
+TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node);
+
+TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_ACTIVATIONS_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations_common.cc
new file mode 100644
index 0000000..d270813
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/activations_common.cc
@@ -0,0 +1,158 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <algorithm>
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/activations.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+
+const int kActivationsInputTensor = 0;
+const int kActivationsOutputTensor = 0;
+
+void ReluQuantized(const ReluOpData& data, const RuntimeShape& input_shape,
+                   const RuntimeShape& output_shape, const int8_t* input_data,
+                   int8_t* output_data) {
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i) {
+    const int32_t val = static_cast<int32_t>(input_data[i]);
+    int32_t clamped =
+        data.params.output_offset +
+        MultiplyByQuantizedMultiplier(val - data.params.input_offset,
+                                      data.params.output_multiplier,
+                                      data.params.output_shift);
+    clamped = std::max(data.params.quantized_activation_min, clamped);
+    clamped = std::min(data.params.quantized_activation_max, clamped);
+    output_data[i] = static_cast<int8_t>(clamped);
+  }
+}
+
+template <typename T>
+void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
+                         ReluOpData* data) {
+  float act_min = 0.0;
+  float act_max = std::numeric_limits<float>::infinity();
+  double real_multiplier = static_cast<double>(input->params.scale) /
+                           static_cast<double>(output->params.scale);
+
+  const RuntimeShape input_shape = GetTensorShape(input);
+  const RuntimeShape output_shape = GetTensorShape(output);
+
+  QuantizeMultiplier(real_multiplier, &data->params.output_multiplier,
+                     &data->params.output_shift);
+
+  data->params.quantized_activation_min = std::max(
+      static_cast<int32_t>(std::numeric_limits<T>::min()),
+      output->params.zero_point +
+          static_cast<int32_t>(roundf(act_min / output->params.scale)));
+  data->params.quantized_activation_max =
+      act_max == std::numeric_limits<float>::infinity()
+          ? static_cast<int32_t>(std::numeric_limits<T>::max())
+          : std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
+                     output->params.zero_point +
+                         static_cast<int32_t>(
+                             roundf(act_max / output->params.scale)));
+  data->params.input_offset = input->params.zero_point;
+  data->params.output_offset = output->params.zero_point;
+}
+
+void ReluFloat(const RuntimeShape& input_shape, const float* input_data,
+               const RuntimeShape& output_shape, float* output_data) {
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i) {
+    const float val = input_data[i];
+    const float lower = 0.0f;
+    const float clamped = val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+void Relu6Float(const RuntimeShape& input_shape, const float* input_data,
+                const RuntimeShape& output_shape, float* output_data) {
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i) {
+    const float val = input_data[i];
+    const float upper = 6.0f;
+    const float lower = 0.0f;
+    const float clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+void Relu6Quantized(int8_t lower, int8_t upper, const RuntimeShape& input_shape,
+                    const int8_t* input_data, const RuntimeShape& output_shape,
+                    int8_t* output_data) {
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i) {
+    const int8_t val = input_data[i];
+    const int8_t clamped = val > upper ? upper : val < lower ? lower : val;
+    output_data[i] = clamped;
+  }
+}
+
+TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
+
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kActivationsInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kActivationsOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  if (input->type == kTfLiteInt8) {
+    CalculateReluOpData<int8_t>(input, output, data);
+  }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
+
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kActivationsInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+
+  if (input->type == kTfLiteInt8) {
+    data->six_int8 = FloatToQuantizedType<int8_t>(6.0f, input->params.scale,
+                                                  input->params.zero_point);
+    data->zero_int8 = input->params.zero_point;
+  }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/add.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/add.cc
index 048fb96..2140d1f 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/add.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/add.cc
@@ -3,7 +3,7 @@
 #if 0 == 1
 /* noop */
 #elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -30,12 +30,11 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace add {
 
+namespace {
 constexpr int kInputTensor1 = 0;
 constexpr int kInputTensor2 = 1;
 constexpr int kOutputTensor = 0;
@@ -54,8 +53,10 @@ struct OpData {
   int32_t input1_multiplier;
   int32_t input2_multiplier;
   int32_t output_multiplier;
+
   int output_shift;
   int left_shift;
+
   int32_t input1_offset;
   int32_t input2_offset;
   int32_t output_offset;
@@ -71,12 +72,12 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
                              OpData* data) {
   data->requires_broadcast = !HaveSameShapes(input1, input2);
 
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+  if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
     // 8bit -> 8bit general quantized path, with general rescalings
     data->input1_offset = -input1->params.zero_point;
     data->input2_offset = -input2->params.zero_point;
     data->output_offset = output->params.zero_point;
-    data->left_shift = 20;
+    data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20;
     const double twice_max_input_scale =
         2 * static_cast<double>(
                 std::max(input1->params.scale, input2->params.scale));
@@ -109,25 +110,122 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
   return kTfLiteOk;
 }
 
-void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
-             const OpData* data, const TfLiteEvalTensor* input1,
-             const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
+void UpdateOpParams(tflite::ArithmeticParams* const op_params,
+                    const OpData* data) {
+  op_params->left_shift = data->left_shift;
+  op_params->input1_offset = data->input1_offset;
+  op_params->input1_multiplier = data->input1_multiplier;
+  op_params->input1_shift = data->input1_shift;
+  op_params->input2_offset = data->input2_offset;
+  op_params->input2_multiplier = data->input2_multiplier;
+  op_params->input2_shift = data->input2_shift;
+  op_params->output_offset = data->output_offset;
+  op_params->output_multiplier = data->output_multiplier;
+  op_params->output_shift = data->output_shift;
+  SetActivationParams(data->output_activation_min, data->output_activation_max,
+                      op_params);
+}
+
+TfLiteStatus EvalAddQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
+                                  TfLiteAddParams* params, const OpData* data,
+                                  const TfLiteEvalTensor* input1,
+                                  const TfLiteEvalTensor* input2,
+                                  TfLiteEvalTensor* output) {
+  tflite::ArithmeticParams op_params;
+  UpdateOpParams(&op_params, data);
+
+  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+      tflite::micro::GetTensorShape(input1),
+      tflite::micro::GetTensorShape(input2), &op_params);
+
+  if (need_broadcast) {
+    reference_integer_ops::BroadcastAdd4DSlow(
+        op_params, tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<int8_t>(input1),
+        tflite::micro::GetTensorShape(input2),
+        tflite::micro::GetTensorData<int8_t>(input2),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<int8_t>(output));
+  } else {
+    arm_elementwise_add_s8(
+        tflite::micro::GetTensorData<int8_t>(input1),
+
+        tflite::micro::GetTensorData<int8_t>(input2), op_params.input1_offset,
+        op_params.input1_multiplier, op_params.input1_shift,
+        op_params.input2_offset, op_params.input2_multiplier,
+        op_params.input2_shift, op_params.left_shift,
+        tflite::micro::GetTensorData<int8_t>(output), op_params.output_offset,
+        op_params.output_multiplier, op_params.output_shift,
+        op_params.quantized_activation_min, op_params.quantized_activation_max,
+        MatchingElementsSize(tflite::micro::GetTensorShape(input1),
+                             tflite::micro::GetTensorShape(input2),
+                             tflite::micro::GetTensorShape(output)));
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalAddQuantizedInt16(TfLiteContext* context, TfLiteNode* node,
+                                   TfLiteAddParams* params, const OpData* data,
+                                   const TfLiteEvalTensor* input1,
+                                   const TfLiteEvalTensor* input2,
+                                   TfLiteEvalTensor* output) {
+  tflite::ArithmeticParams op_params;
+  UpdateOpParams(&op_params, data);
+
+  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+      tflite::micro::GetTensorShape(input1),
+      tflite::micro::GetTensorShape(input2), &op_params);
+
+  if (need_broadcast) {
+    reference_ops::BroadcastAdd4DSlow(
+        op_params, tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<int16_t>(input1),
+        tflite::micro::GetTensorShape(input2),
+        tflite::micro::GetTensorData<int16_t>(input2),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<int16_t>(output));
+  } else {
+    arm_elementwise_add_s16(
+        tflite::micro::GetTensorData<int16_t>(input1),
+        tflite::micro::GetTensorData<int16_t>(input2), op_params.input1_offset,
+        op_params.input1_multiplier, op_params.input1_shift,
+        op_params.input2_offset, op_params.input2_multiplier,
+        op_params.input2_shift, op_params.left_shift,
+        tflite::micro::GetTensorData<int16_t>(output), op_params.output_offset,
+        op_params.output_multiplier, op_params.output_shift,
+        op_params.quantized_activation_min, op_params.quantized_activation_max,
+        MatchingElementsSize(tflite::micro::GetTensorShape(input1),
+                             tflite::micro::GetTensorShape(input2),
+                             tflite::micro::GetTensorShape(output)));
+  }
+
+  return kTfLiteOk;
+}
+
+void EvalAddFloat(TfLiteContext* context, TfLiteNode* node,
+                  TfLiteAddParams* params, const OpData* data,
+                  const TfLiteEvalTensor* input1,
+                  const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
   tflite::ArithmeticParams op_params;
   SetActivationParams(data->output_activation_min_f32,
                       data->output_activation_max_f32, &op_params);
-#define TF_LITE_ADD(opname)                                               \
-  reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \
-                        tflite::micro::GetTensorData<float>(input1),      \
-                        tflite::micro::GetTensorShape(input2),            \
-                        tflite::micro::GetTensorData<float>(input2),      \
-                        tflite::micro::GetTensorShape(output),            \
-                        tflite::micro::GetTensorData<float>(output))
   if (data->requires_broadcast) {
-    TF_LITE_ADD(BroadcastAdd4DSlow);
+    reference_ops::BroadcastAdd4DSlow(
+        op_params, tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<float>(input1),
+        tflite::micro::GetTensorShape(input2),
+        tflite::micro::GetTensorData<float>(input2),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<float>(output));
   } else {
-    TF_LITE_ADD(Add);
+    reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
+                       tflite::micro::GetTensorData<float>(input1),
+                       tflite::micro::GetTensorShape(input2),
+                       tflite::micro::GetTensorData<float>(input2),
+                       tflite::micro::GetTensorShape(output),
+                       tflite::micro::GetTensorData<float>(output));
   }
-#undef TF_LITE_ADD
 }
 
 TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
@@ -135,87 +233,68 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
                               const TfLiteEvalTensor* input1,
                               const TfLiteEvalTensor* input2,
                               TfLiteEvalTensor* output) {
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
-    tflite::ArithmeticParams op_params;
-    op_params.left_shift = data->left_shift;
-    op_params.input1_offset = data->input1_offset;
-    op_params.input1_multiplier = data->input1_multiplier;
-    op_params.input1_shift = data->input1_shift;
-    op_params.input2_offset = data->input2_offset;
-    op_params.input2_multiplier = data->input2_multiplier;
-    op_params.input2_shift = data->input2_shift;
-    op_params.output_offset = data->output_offset;
-    op_params.output_multiplier = data->output_multiplier;
-    op_params.output_shift = data->output_shift;
-    SetActivationParams(data->output_activation_min,
-                        data->output_activation_max, &op_params);
-    bool need_broadcast = reference_ops::ProcessBroadcastShapes(
-        tflite::micro::GetTensorShape(input1),
-        tflite::micro::GetTensorShape(input2), &op_params);
-#define TF_LITE_ADD(type, opname, dtype)                         \
-  type::opname(op_params, tflite::micro::GetTensorShape(input1), \
-               tflite::micro::GetTensorData<dtype>(input1),      \
-               tflite::micro::GetTensorShape(input2),            \
-               tflite::micro::GetTensorData<dtype>(input2),      \
-               tflite::micro::GetTensorShape(output),            \
-               tflite::micro::GetTensorData<dtype>(output));
-    if (output->type == kTfLiteInt8) {
-      if (need_broadcast) {
-        TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
-      } else {
-        arm_elementwise_add_s8(
-            tflite::micro::GetTensorData<int8_t>(input1),
-            tflite::micro::GetTensorData<int8_t>(input2),
-            op_params.input1_offset, op_params.input1_multiplier,
-            op_params.input1_shift, op_params.input2_offset,
-            op_params.input2_multiplier, op_params.input2_shift,
-            op_params.left_shift, tflite::micro::GetTensorData<int8_t>(output),
-            op_params.output_offset, op_params.output_multiplier,
-            op_params.output_shift, op_params.quantized_activation_min,
-            op_params.quantized_activation_max,
-            MatchingElementsSize(tflite::micro::GetTensorShape(input1),
-                                 tflite::micro::GetTensorShape(input2),
-                                 tflite::micro::GetTensorShape(output)));
-      }
-    } else {
-      if (need_broadcast) {
-        TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t);
-      } else {
-        TF_LITE_ADD(reference_ops, Add, uint8_t);
-      }
+  switch (output->type) {
+    case kTfLiteInt8: {
+      EvalAddQuantizedInt8(context, node, params, data, input1, input2, output);
+      break;
     }
-#undef TF_LITE_ADD
+    case kTfLiteInt16: {
+      EvalAddQuantizedInt16(context, node, params, data, input1, input2,
+                            output);
+      break;
+    }
+    default:
+      MicroPrintf("Type %s (%d) not supported.",
+                  TfLiteTypeGetName(output->type), output->type);
+      return kTfLiteError;
   }
 
   return kTfLiteOk;
 }
 
-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+}  // namespace
+
+void* InitAdd(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(OpData));
 }
 
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus PrepareAdd(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
   TFLITE_DCHECK(node->builtin_data != nullptr);
 
-  const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input1 =
+      micro_context->AllocateTempInputTensor(node, kInputTensor1);
   TF_LITE_ENSURE(context, input1 != nullptr);
-  const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
+  TfLiteTensor* input2 =
+      micro_context->AllocateTempInputTensor(node, kInputTensor2);
   TF_LITE_ENSURE(context, input2 != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
 
+  if (input1->type == kTfLiteInt16) {
+    TF_LITE_ENSURE_EQ(context, input1->params.zero_point, 0);
+    TF_LITE_ENSURE_EQ(context, input2->params.zero_point, 0);
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+  }
+
   OpData* data = static_cast<OpData*>(node->user_data);
   auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
 
   TF_LITE_ENSURE_STATUS(
       CalculateOpData(context, params, input1, input2, output, data));
 
+  micro_context->DeallocateTempTfLiteTensor(input1);
+  micro_context->DeallocateTempTfLiteTensor(input2);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
   return kTfLiteOk;
 }
 
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus EvalAdd(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
 
   const TfLiteEvalTensor* input1 =
@@ -229,38 +308,75 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const OpData* data = static_cast<const OpData*>(node->user_data);
 
   if (output->type == kTfLiteFloat32) {
-    EvalAdd(context, node, params, data, input1, input2, output);
-  } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+    EvalAddFloat(context, node, params, data, input1, input2, output);
+  } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
     TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
                                                 input1, input2, output));
   } else {
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                       TfLiteTypeGetName(output->type), output->type);
+    MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type),
+                output->type);
     return kTfLiteError;
   }
 
   return kTfLiteOk;
 }
 
-}  // namespace add
+TfLiteStatus EvalAddInt8(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
+
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor1);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor2);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(output->type == kTfLiteInt8);
+  const OpData* data = static_cast<const OpData*>(node->user_data);
+
+  TF_LITE_ENSURE_OK(context, EvalAddQuantizedInt8(context, node, params, data,
+                                                  input1, input2, output));
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalAddInt16(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
+
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor1);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor2);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(output->type == kTfLiteInt16);
+  const OpData* data = static_cast<const OpData*>(node->user_data);
+
+  TF_LITE_ENSURE_OK(context, EvalAddQuantizedInt16(context, node, params, data,
+                                                   input1, input2, output));
+
+  return kTfLiteOk;
+}
 
 TfLiteRegistration Register_ADD() {
-  return {/*init=*/add::Init,
-          /*free=*/nullptr,
-          /*prepare=*/add::Prepare,
-          /*invoke=*/add::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(InitAdd, PrepareAdd, EvalAdd);
+}
+
+TfLiteRegistration Register_ADD_INT8() {
+  return tflite::micro::RegisterOp(InitAdd, PrepareAdd, EvalAddInt8);
+}
+
+TfLiteRegistration Register_ADD_INT16() {
+  return tflite::micro::RegisterOp(InitAdd, PrepareAdd, EvalAddInt16);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
 
-#elif EI_CLASSIFIER_TFLITE_ENABLE_SILABS_MVP == 1
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#elif EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -277,6 +393,10 @@ limitations under the License.
 
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h"
 
+#include <algorithm>
+#include <limits>
+
+#include "mli_api.h"  // NOLINT
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
@@ -285,12 +405,16 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
-#include "sl_mvp_ml_add.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace sl {
-namespace add {
 
 constexpr int kInputTensor1 = 0;
 constexpr int kInputTensor2 = 1;
@@ -299,19 +423,34 @@ constexpr int kOutputTensor = 0;
 struct OpData {
   bool requires_broadcast;
 
+  // These fields are used in both the general 8-bit -> 8bit quantized path,
+  // and the special 16-bit -> 16bit quantized path
   int input1_shift;
   int input2_shift;
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+
+  // These fields are used only in the general 8-bit -> 8bit quantized path
   int32_t input1_multiplier;
   int32_t input2_multiplier;
   int32_t output_multiplier;
   int output_shift;
   int left_shift;
-
-  sli_mvp_ml_add_s8_params_t params;
+  int32_t input1_offset;
+  int32_t input2_offset;
+  int32_t output_offset;
 
   // Used only for float evals:
   float output_activation_min_f32;
   float output_activation_max_f32;
+
+  // The result of checking if MLI optimized version of tensors can be used.
+  bool is_mli_applicable;
+
+  // Tensors in MLI format.
+  mutable ops::micro::MliTensorInterface mli_input1;
+  mutable ops::micro::MliTensorInterface mli_input2;
+  mutable ops::micro::MliTensorInterface mli_out;
 };
 
 TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
@@ -320,26 +459,61 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
                              OpData* data) {
   data->requires_broadcast = !HaveSameShapes(input1, input2);
 
-  if (output->type == kTfLiteInt8) {
-    data->params.input1_offset = -input1->params.zero_point;
-    data->params.input2_offset = -input2->params.zero_point;
-    data->params.output_offset = output->params.zero_point;
-    data->params.input1_multiplier = input1->params.scale;
-    data->params.input2_multiplier = input2->params.scale;
-    data->params.output_multiplier = 1.0 / output->params.scale;
-    data->params.length = GetTensorShape(input1).FlatSize();
-
-    int32_t activation_min;
-    int32_t activation_max;
+  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
     TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
-        context, params->activation, output, &activation_min,
-        &activation_max));
-    data->params.activation_min = static_cast<int8_t>(activation_min);
-    data->params.activation_max = static_cast<int8_t>(activation_max);
+        context, params->activation, output, &data->output_activation_min,
+        &data->output_activation_max));
 
-    // These multipliers and parameters are not used by the MVP codepath,
-    // however are needed in cases where broadcast is used.
-    data->left_shift = 20;
+    // MLI 2.0 optimized version only supports int8_t datatype and min/max
+    // within container range. Broadcasting isn't supported on the primitive
+    // level (but might be implemented as part of slicing in future)
+#ifdef MLI_2_0  //
+    data->is_mli_applicable =
+        (input1->type == kTfLiteInt8) && (input2->type == kTfLiteInt8) &&
+        (output->type == kTfLiteInt8) && !data->requires_broadcast &&
+        data->output_activation_min == std::numeric_limits<int8_t>::min() &&
+        data->output_activation_max == std::numeric_limits<int8_t>::max();
+#else
+    data->is_mli_applicable = false;
+#endif
+
+    if (data->is_mli_applicable) {
+      data->mli_input1 =
+          ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+              context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+      data->mli_input2 =
+          ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+              context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+      data->mli_out = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+          context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+
+      ops::micro::ConvertToMliTensor(input1, &data->mli_input1);
+      ops::micro::ConvertToMliTensor(input2, &data->mli_input2);
+      ops::micro::ConvertToMliTensor(output, &data->mli_out);
+      /* Flatten tensors to simplify the process (as we don't support
+       * broadcasting). */
+      data->mli_input1.Shape()[0] =
+          mli_hlp_count_elem_num(data->mli_input1.MliTensor(), 0);
+      data->mli_input2.Shape()[0] =
+          mli_hlp_count_elem_num(data->mli_input2.MliTensor(), 0);
+      data->mli_out.Shape()[0] =
+          mli_hlp_count_elem_num(data->mli_out.MliTensor(), 0);
+      data->mli_input1.MemStride()[0] = data->mli_input2.MemStride()[0] = 1;
+      data->mli_out.MemStride()[0] = 1;
+      *data->mli_input1.Rank() = *data->mli_input2.Rank() = 1;
+      *data->mli_out.Rank() = 1;
+    }
+  } else {
+    data->is_mli_applicable = false;
+  }
+
+#if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
+  if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
+    // 8bit -> 8bit general quantized path, with general rescalings
+    data->input1_offset = -input1->params.zero_point;
+    data->input2_offset = -input2->params.zero_point;
+    data->output_offset = output->params.zero_point;
+    data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20;
     const double twice_max_input_scale =
         2 * static_cast<double>(
                 std::max(input1->params.scale, input2->params.scale));
@@ -360,31 +534,48 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
     QuantizeMultiplierSmallerThanOneExp(
         real_output_multiplier, &data->output_multiplier, &data->output_shift);
 
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, params->activation, output, &data->output_activation_min,
+        &data->output_activation_max));
   } else if (output->type == kTfLiteFloat32) {
     CalculateActivationRange(params->activation,
                              &data->output_activation_min_f32,
                              &data->output_activation_max_f32);
+#endif  // !defined(TF_LITE_STRIP_REFERENCE_IMPL)
   }
 
   return kTfLiteOk;
 }
 
-void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
-             const OpData* data, const TfLiteEvalTensor* input1,
-             const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
+TfLiteStatus EvalAdd(TfLiteContext* context, TfLiteNode* node,
+                     TfLiteAddParams* params, const OpData* data,
+                     const TfLiteEvalTensor* input1,
+                     const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
+#if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
   tflite::ArithmeticParams op_params;
   SetActivationParams(data->output_activation_min_f32,
                       data->output_activation_max_f32, &op_params);
   if (data->requires_broadcast) {
-    reference_ops::BroadcastAdd4DSlow(op_params, tflite::micro::GetTensorShape(input1), tflite::micro::GetTensorData<float>(input1),
-                                      tflite::micro::GetTensorShape(input2), tflite::micro::GetTensorData<float>(input2),
-                                      tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData<float>(output));
+    reference_ops::BroadcastAdd4DSlow(
+        op_params, tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<float>(input1),
+        tflite::micro::GetTensorShape(input2),
+        tflite::micro::GetTensorData<float>(input2),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<float>(output));
   } else {
-    reference_ops::Add(op_params,
-                       tflite::micro::GetTensorShape(input1), tflite::micro::GetTensorData<float>(input1),
-                       tflite::micro::GetTensorShape(input2), tflite::micro::GetTensorData<float>(input2),
-                       tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData<float>(output));
+    reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
+                       tflite::micro::GetTensorData<float>(input1),
+                       tflite::micro::GetTensorShape(input2),
+                       tflite::micro::GetTensorData<float>(input2),
+                       tflite::micro::GetTensorShape(output),
+                       tflite::micro::GetTensorData<float>(output));
   }
+  return kTfLiteOk;
+#else
+  MicroPrintf("Node configuration is not supported by ARC MLI Library.");
+  return kTfLiteError;
+#endif
 }
 
 TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
@@ -392,46 +583,370 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
                               const TfLiteEvalTensor* input1,
                               const TfLiteEvalTensor* input2,
                               TfLiteEvalTensor* output) {
-  TfLiteStatus status = kTfLiteOk;
+#if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
   tflite::ArithmeticParams op_params;
   op_params.left_shift = data->left_shift;
-  op_params.input1_offset = data->params.input1_offset;
+  op_params.input1_offset = data->input1_offset;
   op_params.input1_multiplier = data->input1_multiplier;
   op_params.input1_shift = data->input1_shift;
-  op_params.input2_offset = data->params.input2_offset;
+  op_params.input2_offset = data->input2_offset;
   op_params.input2_multiplier = data->input2_multiplier;
   op_params.input2_shift = data->input2_shift;
-  op_params.output_offset = data->params.output_offset;
+  op_params.output_offset = data->output_offset;
   op_params.output_multiplier = data->output_multiplier;
   op_params.output_shift = data->output_shift;
-  op_params.quantized_activation_min = data->params.activation_min;
-  op_params.quantized_activation_max = data->params.activation_max;
-
-  // TODO: Do we need to support the broadcast scenario?
-  bool need_broadcast = reference_ops::ProcessBroadcastShapes(tflite::micro::GetTensorShape(input1), tflite::micro::GetTensorShape(input2), &op_params);
-
-  if (need_broadcast) {
-    reference_integer_ops::BroadcastAdd4DSlow(op_params,
-                                              tflite::micro::GetTensorShape(input1), tflite::micro::GetTensorData<int8_t>(input1),
-                                              tflite::micro::GetTensorShape(input2), tflite::micro::GetTensorData<int8_t>(input2),
-                                              tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData<int8_t>(output));
-  } else {
-    sli_mvp_ml_add_s8_params_t params = data->params;
-    params.input1 = tflite::micro::GetTensorData<int8_t>(input1);
-    params.input2 = tflite::micro::GetTensorData<int8_t>(input2);
-    params.output = tflite::micro::GetTensorData<int8_t>(output);
-    sl_status_t ret = sli_mvp_ml_add_s8(&params);
-    if (ret != SL_STATUS_OK) {
-        status = kTfLiteError;
-    }
-  }
-
-  return status;
-}
+  SetActivationParams(data->output_activation_min, data->output_activation_max,
+                      &op_params);
+  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+      tflite::micro::GetTensorShape(input1),
+      tflite::micro::GetTensorShape(input2), &op_params);
 
-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
-  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+  switch (output->type) {
+    case kTfLiteInt8: {
+      if (need_broadcast) {
+        reference_integer_ops::BroadcastAdd4DSlow(
+            op_params, tflite::micro::GetTensorShape(input1),
+            tflite::micro::GetTensorData<int8_t>(input1),
+            tflite::micro::GetTensorShape(input2),
+            tflite::micro::GetTensorData<int8_t>(input2),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int8_t>(output));
+      } else {
+        reference_integer_ops::Add(
+            op_params, tflite::micro::GetTensorShape(input1),
+            tflite::micro::GetTensorData<int8_t>(input1),
+            tflite::micro::GetTensorShape(input2),
+            tflite::micro::GetTensorData<int8_t>(input2),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int8_t>(output));
+      }
+      break;
+    }
+    case kTfLiteInt16: {
+      if (need_broadcast) {
+        reference_ops::BroadcastAdd4DSlow(
+            op_params, tflite::micro::GetTensorShape(input1),
+            tflite::micro::GetTensorData<int16_t>(input1),
+            tflite::micro::GetTensorShape(input2),
+            tflite::micro::GetTensorData<int16_t>(input2),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int16_t>(output));
+      } else {
+        reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
+                           tflite::micro::GetTensorData<int16_t>(input1),
+                           tflite::micro::GetTensorShape(input2),
+                           tflite::micro::GetTensorData<int16_t>(input2),
+                           tflite::micro::GetTensorShape(output),
+                           tflite::micro::GetTensorData<int16_t>(output),
+                           false);
+      }
+      break;
+    }
+    default:
+      MicroPrintf("Type %s (%d) not supported.",
+                  TfLiteTypeGetName(output->type), output->type);
+      return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+#else
+  MicroPrintf("Node configuration is not supported by ARC MLI Library.");
+  return kTfLiteError;
+#endif
+}
+
+TfLiteStatus EvalMLIAddInt8(TfLiteContext* context, TfLiteNode* node,
+                            TfLiteAddParams* params, const OpData* data,
+                            const TfLiteEvalTensor* input1,
+                            const TfLiteEvalTensor* input2,
+                            TfLiteEvalTensor* output) {
+#ifdef MLI_2_0
+  TF_LITE_ENSURE(context, data->is_mli_applicable == true);
+  TF_LITE_ENSURE(context, input1->type == kTfLiteInt8);
+  TF_LITE_ENSURE(context, input2->type == kTfLiteInt8);
+  TF_LITE_ENSURE(context, output->type == kTfLiteInt8);
+
+  ops::micro::MliTensorAttachBuffer<int8_t>(input1, &data->mli_input1);
+  ops::micro::MliTensorAttachBuffer<int8_t>(input2, &data->mli_input2);
+  ops::micro::MliTensorAttachBuffer<int8_t>(output, &data->mli_out);
+
+  // mli_mov config and tensors for data in fast (local) memory with interface
+  mli_mov_cfg_t copy_config;
+  mli_mov_cfg_for_copy(&copy_config);
+  mli_tensor input1_local_tsr = *data->mli_input1.MliTensor();
+  mli_tensor input2_local_tsr = *data->mli_input2.MliTensor();
+  mli_tensor out_local_tsr = *data->mli_out.MliTensor();
+  ops::micro::MliTensorInterface input1_local(&input1_local_tsr);
+  ops::micro::MliTensorInterface input2_local(&input2_local_tsr);
+  ops::micro::MliTensorInterface out_local(&out_local_tsr);
+
+  /* allocate the local buffers, and compute the slice size */
+  TF_LITE_ENSURE_STATUS(ops::micro::get_arc_scratch_buffer_for_eltwise_tensors(
+      context, &input1_local, &input2_local, &out_local));
+  TF_LITE_ENSURE(context, *input1_local.Rank() == 1 &&
+                              *input2_local.Rank() == 1 &&
+                              *out_local.Rank() == 1);
+  uint32_t min_capacity = *input1_local.DataCapacity();
+  min_capacity = std::min(min_capacity, *input2_local.DataCapacity());
+  min_capacity = std::min(min_capacity, *out_local.DataCapacity());
+  const int slice_dim = 0;
+  const int slice_size =
+      min_capacity / mli_hlp_tensor_element_size(out_local.MliTensor());
+
+  /* is_local indicates that the tensor is already in local memory,
+     so in that case the original tensor can be used,
+     and there is no need to copy it to the local tensor*/
+  const bool input1_is_local =
+      input1_local.Data<int8_t>() == data->mli_input1.Data<int8_t>();
+  const bool input2_is_local =
+      input2_local.Data<int8_t>() == data->mli_input2.Data<int8_t>();
+  const bool out_is_local =
+      out_local.Data<int8_t>() == data->mli_out.Data<int8_t>();
+
+  ops::micro::TensorSlicer input1_slice(data->mli_input1.MliTensor(), slice_dim,
+                                        slice_size);
+  ops::micro::TensorSlicer input2_slice(data->mli_input2.MliTensor(), slice_dim,
+                                        slice_size);
+  ops::micro::TensorSlicer out_slice(data->mli_out.MliTensor(), slice_dim,
+                                     slice_size);
+
+  mli_tensor* input1_tsr =
+      input1_is_local ? input1_slice.Sub() : input1_local.MliTensor();
+  mli_tensor* input2_tsr =
+      input2_is_local ? input2_slice.Sub() : input2_local.MliTensor();
+  mli_tensor* out_tsr = out_is_local ? out_slice.Sub() : out_local.MliTensor();
+
+  while (!out_slice.Done()) {
+    mli_mov_tensor_sync(input1_slice.Sub(), &copy_config, input1_tsr);
+    mli_mov_tensor_sync(input2_slice.Sub(), &copy_config, input2_tsr);
+
+    mli_krn_eltwise_add_sa8(input1_tsr, input2_tsr, out_tsr);
+
+    mli_mov_tensor_sync(out_tsr, &copy_config, out_slice.Sub());
+    input1_slice.Next();
+    input2_slice.Next();
+    out_slice.Next();
+  }
+  return kTfLiteOk;
+#else
+  return kTfLiteError;
+#endif
+}
+
+void* AddInit(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+}
+
+TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) {
+  TfLiteStatus ret_val = kTfLiteOk;
+  auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData* data = static_cast<const OpData*>(node->user_data);
+
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor1);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor2);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+  if (data->is_mli_applicable) {
+    ret_val =
+        EvalMLIAddInt8(context, node, params, data, input1, input2, output);
+  } else if (output->type == kTfLiteFloat32) {
+    ret_val = EvalAdd(context, node, params, data, input1, input2, output);
+  } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
+    ret_val =
+        EvalAddQuantized(context, node, params, data, input1, input2, output);
+  } else {
+    MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type),
+                output->type);
+    ret_val = kTfLiteError;
+  }
+
+  return ret_val;
+}
+
+TfLiteRegistration Register_ADD() {
+  return tflite::micro::RegisterOp(AddInit, AddPrepare, AddEval);
+}
+
+}  // namespace tflite
+
+#elif EI_CLASSIFIER_TFLITE_ENABLE_SILABS_MVP == 1
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h"
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "sl_mvp_ml_add.h"
+
+namespace tflite {
+namespace sl {
+namespace add {
+
+constexpr int kInputTensor1 = 0;
+constexpr int kInputTensor2 = 1;
+constexpr int kOutputTensor = 0;
+
+struct OpData {
+  bool requires_broadcast;
+
+  int input1_shift;
+  int input2_shift;
+  int32_t input1_multiplier;
+  int32_t input2_multiplier;
+  int32_t output_multiplier;
+  int output_shift;
+  int left_shift;
+
+  sli_mvp_ml_add_s8_params_t params;
+
+  // Used only for float evals:
+  float output_activation_min_f32;
+  float output_activation_max_f32;
+};
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
+                             const TfLiteTensor* input1,
+                             const TfLiteTensor* input2, TfLiteTensor* output,
+                             OpData* data) {
+  data->requires_broadcast = !HaveSameShapes(input1, input2);
+
+  if (output->type == kTfLiteInt8) {
+    data->params.input1_offset = -input1->params.zero_point;
+    data->params.input2_offset = -input2->params.zero_point;
+    data->params.output_offset = output->params.zero_point;
+    data->params.input1_multiplier = input1->params.scale;
+    data->params.input2_multiplier = input2->params.scale;
+    data->params.output_multiplier = 1.0 / output->params.scale;
+    data->params.length = GetTensorShape(input1).FlatSize();
+
+    int32_t activation_min;
+    int32_t activation_max;
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, params->activation, output, &activation_min,
+        &activation_max));
+    data->params.activation_min = static_cast<int8_t>(activation_min);
+    data->params.activation_max = static_cast<int8_t>(activation_max);
+
+    // These multipliers and parameters are not used by the MVP codepath,
+    // however are needed in cases where broadcast is used.
+    data->left_shift = 20;
+    const double twice_max_input_scale =
+        2 * static_cast<double>(
+                std::max(input1->params.scale, input2->params.scale));
+    const double real_input1_multiplier =
+        static_cast<double>(input1->params.scale) / twice_max_input_scale;
+    const double real_input2_multiplier =
+        static_cast<double>(input2->params.scale) / twice_max_input_scale;
+    const double real_output_multiplier =
+        twice_max_input_scale /
+        ((1 << data->left_shift) * static_cast<double>(output->params.scale));
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_output_multiplier, &data->output_multiplier, &data->output_shift);
+
+  } else if (output->type == kTfLiteFloat32) {
+    CalculateActivationRange(params->activation,
+                             &data->output_activation_min_f32,
+                             &data->output_activation_max_f32);
+  }
+
+  return kTfLiteOk;
+}
+
+void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
+             const OpData* data, const TfLiteEvalTensor* input1,
+             const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
+  tflite::ArithmeticParams op_params;
+  SetActivationParams(data->output_activation_min_f32,
+                      data->output_activation_max_f32, &op_params);
+  if (data->requires_broadcast) {
+    reference_ops::BroadcastAdd4DSlow(op_params, tflite::micro::GetTensorShape(input1), tflite::micro::GetTensorData<float>(input1),
+                                      tflite::micro::GetTensorShape(input2), tflite::micro::GetTensorData<float>(input2),
+                                      tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData<float>(output));
+  } else {
+    reference_ops::Add(op_params,
+                       tflite::micro::GetTensorShape(input1), tflite::micro::GetTensorData<float>(input1),
+                       tflite::micro::GetTensorShape(input2), tflite::micro::GetTensorData<float>(input2),
+                       tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData<float>(output));
+  }
+}
+
+TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
+                              TfLiteAddParams* params, const OpData* data,
+                              const TfLiteEvalTensor* input1,
+                              const TfLiteEvalTensor* input2,
+                              TfLiteEvalTensor* output) {
+  TfLiteStatus status = kTfLiteOk;
+  tflite::ArithmeticParams op_params;
+  op_params.left_shift = data->left_shift;
+  op_params.input1_offset = data->params.input1_offset;
+  op_params.input1_multiplier = data->input1_multiplier;
+  op_params.input1_shift = data->input1_shift;
+  op_params.input2_offset = data->params.input2_offset;
+  op_params.input2_multiplier = data->input2_multiplier;
+  op_params.input2_shift = data->input2_shift;
+  op_params.output_offset = data->params.output_offset;
+  op_params.output_multiplier = data->output_multiplier;
+  op_params.output_shift = data->output_shift;
+  op_params.quantized_activation_min = data->params.activation_min;
+  op_params.quantized_activation_max = data->params.activation_max;
+
+  // TODO: Do we need to support the broadcast scenario?
+  bool need_broadcast = reference_ops::ProcessBroadcastShapes(tflite::micro::GetTensorShape(input1), tflite::micro::GetTensorShape(input2), &op_params);
+
+  if (need_broadcast) {
+    reference_integer_ops::BroadcastAdd4DSlow(op_params,
+                                              tflite::micro::GetTensorShape(input1), tflite::micro::GetTensorData<int8_t>(input1),
+                                              tflite::micro::GetTensorShape(input2), tflite::micro::GetTensorData<int8_t>(input2),
+                                              tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData<int8_t>(output));
+  } else {
+    sli_mvp_ml_add_s8_params_t params = data->params;
+    params.input1 = tflite::micro::GetTensorData<int8_t>(input1);
+    params.input2 = tflite::micro::GetTensorData<int8_t>(input2);
+    params.output = tflite::micro::GetTensorData<int8_t>(output);
+    sl_status_t ret = sli_mvp_ml_add_s8(&params);
+    if (ret != SL_STATUS_OK) {
+        status = kTfLiteError;
+    }
+  }
+
+  return status;
+}
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpData));
 }
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
@@ -481,8 +996,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace add
 }  // namespace sl
 
-namespace ops {
-namespace micro {
 TfLiteRegistration Register_ADD() {
   return {/*init=*/sl::add::Init,
           /*free=*/nullptr,
@@ -494,8 +1007,6 @@ TfLiteRegistration Register_ADD() {
           /*version=*/0};
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
 
 #elif EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN == 1
@@ -514,59 +1025,33 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h"
+
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
-
-#include <esp_timer.h>
-#include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h"
-
-long long add_total_time = 0;
-
-namespace tflite {
-namespace ops {
-namespace micro {
-namespace add {
-
-constexpr int kInputTensor1 = 0;
-constexpr int kInputTensor2 = 1;
-constexpr int kOutputTensor = 0;
-
-struct OpData {
-  bool requires_broadcast;
-
-  // These fields are used in both the general 8-bit -> 8bit quantized path,
-  // and the special 16-bit -> 16bit quantized path
-  int input1_shift;
-  int input2_shift;
-  int32_t output_activation_min;
-  int32_t output_activation_max;
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
-  // These fields are used only in the general 8-bit -> 8bit quantized path
-  int32_t input1_multiplier;
-  int32_t input2_multiplier;
-  int32_t output_multiplier;
-  int output_shift;
-  int left_shift;
-  int32_t input1_offset;
-  int32_t input2_offset;
-  int32_t output_offset;
+#include <esp_timer.h>
 
-  // Used only for float evals:
-  float output_activation_min_f32;
-  float output_activation_max_f32;
-};
+#if ESP_NN
+#include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h"
+#endif
+
+long long add_total_time = 0;
+
+namespace tflite {
 
 void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
-             const OpData* data, const TfLiteEvalTensor* input1,
+             const OpDataAdd* data, const TfLiteEvalTensor* input1,
              const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
   tflite::ArithmeticParams op_params;
   SetActivationParams(data->output_activation_min_f32,
@@ -590,7 +1075,7 @@ void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
 }
 
 TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
-                              TfLiteAddParams* params, const OpData* data,
+                              TfLiteAddParams* params, const OpDataAdd* data,
                               const TfLiteEvalTensor* input1,
                               const TfLiteEvalTensor* input2,
                               TfLiteEvalTensor* output) {
@@ -622,6 +1107,7 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
             tflite::micro::GetTensorShape(output),
             tflite::micro::GetTensorData<int8_t>(output));
       } else {
+#if ESP_NN
         const int8_t *input1_data = tflite::micro::GetTensorData<int8_t>(input1);
         const int8_t *input2_data = tflite::micro::GetTensorData<int8_t>(input2);
         int8_t *out_data = tflite::micro::GetTensorData<int8_t>(output);
@@ -645,6 +1131,15 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
                                                        tflite::micro::GetTensorShape(input2),
                                                        tflite::micro::GetTensorShape(output))
                                   );
+#else
+        reference_integer_ops::Add(
+            op_params, tflite::micro::GetTensorShape(input1),
+            tflite::micro::GetTensorData<int8_t>(input1),
+            tflite::micro::GetTensorShape(input2),
+            tflite::micro::GetTensorData<int8_t>(input2),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int8_t>(output));
+#endif
       }
       break;
     }
@@ -669,95 +1164,31 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
       break;
     }
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                       TfLiteTypeGetName(output->type), output->type);
+      MicroPrintf("Type %s (%d) not supported.",
+                  TfLiteTypeGetName(output->type), output->type);
       return kTfLiteError;
   }
 
   return kTfLiteOk;
 }
 
-TfLiteStatus CalculateOpDataAdd(TfLiteContext* context, TfLiteAddParams* params,
-                                const TfLiteTensor* input1,
-                                const TfLiteTensor* input2,
-                                TfLiteTensor* output, OpData* data) {
-  data->requires_broadcast = !HaveSameShapes(input1, input2);
-
-  if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
-    // 8bit -> 8bit general quantized path, with general rescalings
-    data->input1_offset = -input1->params.zero_point;
-    data->input2_offset = -input2->params.zero_point;
-    data->output_offset = output->params.zero_point;
-    data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20;
-    const double twice_max_input_scale =
-        2 * static_cast<double>(
-                std::max(input1->params.scale, input2->params.scale));
-    const double real_input1_multiplier =
-        static_cast<double>(input1->params.scale) / twice_max_input_scale;
-    const double real_input2_multiplier =
-        static_cast<double>(input2->params.scale) / twice_max_input_scale;
-    const double real_output_multiplier =
-        twice_max_input_scale /
-        ((1 << data->left_shift) * static_cast<double>(output->params.scale));
-
-    QuantizeMultiplierSmallerThanOneExp(
-        real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
-
-    QuantizeMultiplierSmallerThanOneExp(
-        real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
-
-    QuantizeMultiplierSmallerThanOneExp(
-        real_output_multiplier, &data->output_multiplier, &data->output_shift);
-
-    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
-        context, params->activation, output, &data->output_activation_min,
-        &data->output_activation_max));
-  } else if (output->type == kTfLiteFloat32) {
-    CalculateActivationRange(params->activation,
-                             &data->output_activation_min_f32,
-                             &data->output_activation_max_f32);
-  }
-
-  return kTfLiteOk;
-}
-
-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+void* AddInit(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  TFLITE_DCHECK(node->builtin_data != nullptr);
-
-  const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
-  TF_LITE_ENSURE(context, input1 != nullptr);
-  const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
-  TF_LITE_ENSURE(context, input2 != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  OpData* data = static_cast<OpData*>(node->user_data);
-  auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
-
-  TF_LITE_ENSURE_STATUS(
-      CalculateOpDataAdd(context, params, input1, input2, output, data));
-
-  return kTfLiteOk;
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd));
 }
 
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
 
   TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData* data = static_cast<const OpData*>(node->user_data);
+  const OpDataAdd* data = static_cast<const OpDataAdd*>(node->user_data);
 
   const TfLiteEvalTensor* input1 =
-      tflite::micro::GetEvalInput(context, node, kInputTensor1);
+      tflite::micro::GetEvalInput(context, node, kAddInputTensor1);
   const TfLiteEvalTensor* input2 =
-      tflite::micro::GetEvalInput(context, node, kInputTensor2);
+      tflite::micro::GetEvalInput(context, node, kAddInputTensor2);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kAddOutputTensor);
 
   long long start_time = esp_timer_get_time();
 
@@ -767,8 +1198,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
                                                 input1, input2, output));
   } else {
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                       TfLiteTypeGetName(output->type), output->type);
+    MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type),
+                output->type);
     return kTfLiteError;
   }
   add_total_time += esp_timer_get_time() - start_time;
@@ -776,25 +1207,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-}  // namespace add
-
 TfLiteRegistration Register_ADD() {
-  return {/*init=*/add::Init,
-          /*free=*/nullptr,
-          /*prepare=*/add::Prepare,
-          /*invoke=*/add::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(AddInit, AddPrepare, AddEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
 
 #else
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -819,89 +1239,15 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace add {
-
-constexpr int kInputTensor1 = 0;
-constexpr int kInputTensor2 = 1;
-constexpr int kOutputTensor = 0;
-
-struct OpData {
-  bool requires_broadcast;
-
-  // These fields are used in both the general 8-bit -> 8bit quantized path,
-  // and the special 16-bit -> 16bit quantized path
-  int input1_shift;
-  int input2_shift;
-  int32_t output_activation_min;
-  int32_t output_activation_max;
-
-  // These fields are used only in the general 8-bit -> 8bit quantized path
-  int32_t input1_multiplier;
-  int32_t input2_multiplier;
-  int32_t output_multiplier;
-  int output_shift;
-  int left_shift;
-  int32_t input1_offset;
-  int32_t input2_offset;
-  int32_t output_offset;
-
-  // Used only for float evals:
-  float output_activation_min_f32;
-  float output_activation_max_f32;
-};
-
-TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
-                             const TfLiteTensor* input1,
-                             const TfLiteTensor* input2, TfLiteTensor* output,
-                             OpData* data) {
-  data->requires_broadcast = !HaveSameShapes(input1, input2);
-
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
-    // 8bit -> 8bit general quantized path, with general rescalings
-    data->input1_offset = -input1->params.zero_point;
-    data->input2_offset = -input2->params.zero_point;
-    data->output_offset = output->params.zero_point;
-    data->left_shift = 20;
-    const double twice_max_input_scale =
-        2 * static_cast<double>(
-                std::max(input1->params.scale, input2->params.scale));
-    const double real_input1_multiplier =
-        static_cast<double>(input1->params.scale) / twice_max_input_scale;
-    const double real_input2_multiplier =
-        static_cast<double>(input2->params.scale) / twice_max_input_scale;
-    const double real_output_multiplier =
-        twice_max_input_scale /
-        ((1 << data->left_shift) * static_cast<double>(output->params.scale));
-
-    QuantizeMultiplierSmallerThanOneExp(
-        real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
-
-    QuantizeMultiplierSmallerThanOneExp(
-        real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
-
-    QuantizeMultiplierSmallerThanOneExp(
-        real_output_multiplier, &data->output_multiplier, &data->output_shift);
-
-    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
-        context, params->activation, output, &data->output_activation_min,
-        &data->output_activation_max));
-  } else if (output->type == kTfLiteFloat32) {
-    CalculateActivationRange(params->activation,
-                             &data->output_activation_min_f32,
-                             &data->output_activation_max_f32);
-  }
-
-  return kTfLiteOk;
-}
 
 void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
-             const OpData* data, const TfLiteEvalTensor* input1,
+             const OpDataAdd* data, const TfLiteEvalTensor* input1,
              const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
   tflite::ArithmeticParams op_params;
   SetActivationParams(data->output_activation_min_f32,
@@ -925,28 +1271,29 @@ void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
 }
 
 TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
-                              TfLiteAddParams* params, const OpData* data,
+                              TfLiteAddParams* params, const OpDataAdd* data,
                               const TfLiteEvalTensor* input1,
                               const TfLiteEvalTensor* input2,
                               TfLiteEvalTensor* output) {
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
-    tflite::ArithmeticParams op_params;
-    op_params.left_shift = data->left_shift;
-    op_params.input1_offset = data->input1_offset;
-    op_params.input1_multiplier = data->input1_multiplier;
-    op_params.input1_shift = data->input1_shift;
-    op_params.input2_offset = data->input2_offset;
-    op_params.input2_multiplier = data->input2_multiplier;
-    op_params.input2_shift = data->input2_shift;
-    op_params.output_offset = data->output_offset;
-    op_params.output_multiplier = data->output_multiplier;
-    op_params.output_shift = data->output_shift;
-    SetActivationParams(data->output_activation_min,
-                        data->output_activation_max, &op_params);
-    bool need_broadcast = reference_ops::ProcessBroadcastShapes(
-        tflite::micro::GetTensorShape(input1),
-        tflite::micro::GetTensorShape(input2), &op_params);
-    if (output->type == kTfLiteInt8) {
+  tflite::ArithmeticParams op_params;
+  op_params.left_shift = data->left_shift;
+  op_params.input1_offset = data->input1_offset;
+  op_params.input1_multiplier = data->input1_multiplier;
+  op_params.input1_shift = data->input1_shift;
+  op_params.input2_offset = data->input2_offset;
+  op_params.input2_multiplier = data->input2_multiplier;
+  op_params.input2_shift = data->input2_shift;
+  op_params.output_offset = data->output_offset;
+  op_params.output_multiplier = data->output_multiplier;
+  op_params.output_shift = data->output_shift;
+  SetActivationParams(data->output_activation_min, data->output_activation_max,
+                      &op_params);
+  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+      tflite::micro::GetTensorShape(input1),
+      tflite::micro::GetTensorShape(input2), &op_params);
+
+  switch (output->type) {
+    case kTfLiteInt8: {
       if (need_broadcast) {
         reference_integer_ops::BroadcastAdd4DSlow(
             op_params, tflite::micro::GetTensorShape(input1),
@@ -964,96 +1311,73 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
             tflite::micro::GetTensorShape(output),
             tflite::micro::GetTensorData<int8_t>(output));
       }
-    } else {
+      break;
+    }
+    case kTfLiteInt16: {
       if (need_broadcast) {
         reference_ops::BroadcastAdd4DSlow(
             op_params, tflite::micro::GetTensorShape(input1),
-            tflite::micro::GetTensorData<uint8_t>(input1),
+            tflite::micro::GetTensorData<int16_t>(input1),
             tflite::micro::GetTensorShape(input2),
-            tflite::micro::GetTensorData<uint8_t>(input2),
+            tflite::micro::GetTensorData<int16_t>(input2),
             tflite::micro::GetTensorShape(output),
-            tflite::micro::GetTensorData<uint8_t>(output));
+            tflite::micro::GetTensorData<int16_t>(output));
       } else {
         reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
-                           tflite::micro::GetTensorData<uint8_t>(input1),
+                           tflite::micro::GetTensorData<int16_t>(input1),
                            tflite::micro::GetTensorShape(input2),
-                           tflite::micro::GetTensorData<uint8_t>(input2),
+                           tflite::micro::GetTensorData<int16_t>(input2),
                            tflite::micro::GetTensorShape(output),
-                           tflite::micro::GetTensorData<uint8_t>(output));
+                           tflite::micro::GetTensorData<int16_t>(output),
+                           false);
       }
+      break;
     }
+    default:
+      MicroPrintf("Type %s (%d) not supported.",
+                  TfLiteTypeGetName(output->type), output->type);
+      return kTfLiteError;
   }
 
   return kTfLiteOk;
 }
 
-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+void* AddInit(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  TFLITE_DCHECK(node->builtin_data != nullptr);
-
-  const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
-  TF_LITE_ENSURE(context, input1 != nullptr);
-  const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
-  TF_LITE_ENSURE(context, input2 != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  OpData* data = static_cast<OpData*>(node->user_data);
-  auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
-
-  TF_LITE_ENSURE_STATUS(
-      CalculateOpData(context, params, input1, input2, output, data));
-
-  return kTfLiteOk;
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd));
 }
 
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus AddEval(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
 
   TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData* data = static_cast<const OpData*>(node->user_data);
+  const OpDataAdd* data = static_cast<const OpDataAdd*>(node->user_data);
 
   const TfLiteEvalTensor* input1 =
-      tflite::micro::GetEvalInput(context, node, kInputTensor1);
+      tflite::micro::GetEvalInput(context, node, kAddInputTensor1);
   const TfLiteEvalTensor* input2 =
-      tflite::micro::GetEvalInput(context, node, kInputTensor2);
+      tflite::micro::GetEvalInput(context, node, kAddInputTensor2);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kAddOutputTensor);
 
   if (output->type == kTfLiteFloat32) {
     EvalAdd(context, node, params, data, input1, input2, output);
-  } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+  } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
     TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data,
                                                 input1, input2, output));
   } else {
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                       TfLiteTypeGetName(output->type), output->type);
+    MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type),
+                output->type);
     return kTfLiteError;
   }
 
   return kTfLiteOk;
 }
 
-}  // namespace add
-
 TfLiteRegistration Register_ADD() {
-  return {/*init=*/add::Init,
-          /*free=*/nullptr,
-          /*prepare=*/add::Prepare,
-          /*invoke=*/add::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(AddInit, AddPrepare, AddEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
 
 #endif
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h
new file mode 100644
index 0000000..e91ffb3
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h
@@ -0,0 +1,77 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ADD_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_ADD_H_
+
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+
+namespace tflite {
+
+extern const int kAddInputTensor1;
+extern const int kAddInputTensor2;
+extern const int kAddOutputTensor;
+
+struct OpDataAdd {
+  bool requires_broadcast;
+
+  // These fields are used in both the general 8-bit -> 8bit quantized path,
+  // and the special 16-bit -> 16bit quantized path
+  int input1_shift;
+  int input2_shift;
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+
+  // These fields are used only in the general 8-bit -> 8bit quantized path
+  int32_t input1_multiplier;
+  int32_t input2_multiplier;
+  int32_t output_multiplier;
+  int output_shift;
+  int left_shift;
+  int32_t input1_offset;
+  int32_t input2_offset;
+  int32_t output_offset;
+
+  // Used only for float evals:
+  float output_activation_min_f32;
+  float output_activation_max_f32;
+};
+
+TfLiteStatus CalculateOpDataAdd(TfLiteContext* context, TfLiteAddParams* params,
+                                const TfLiteTensor* input1,
+                                const TfLiteTensor* input2,
+                                TfLiteTensor* output, OpDataAdd* data);
+
+TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node);
+
+// Generic must define registration function.
+TfLiteRegistration Register_ADD();
+
+#if defined(CMSIS_NN)
+TfLiteRegistration Register_ADD_INT8();
+
+TfLiteRegistration Register_ADD_INT16();
+#else
+// Fallback registration
+inline TfLiteRegistration Register_ADD_INT8() { return Register_ADD(); }
+
+inline TfLiteRegistration Register_ADD_INT16() { return Register_ADD(); }
+#endif
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_ADD_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/add_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/add_common.cc
new file mode 100644
index 0000000..d9622a2
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/add_common.cc
@@ -0,0 +1,106 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+
+namespace tflite {
+
+const int kAddInputTensor1 = 0;
+const int kAddInputTensor2 = 1;
+const int kAddOutputTensor = 0;
+
+TfLiteStatus CalculateOpDataAdd(TfLiteContext* context, TfLiteAddParams* params,
+                                const TfLiteTensor* input1,
+                                const TfLiteTensor* input2,
+                                TfLiteTensor* output, OpDataAdd* data) {
+  data->requires_broadcast = !HaveSameShapes(input1, input2);
+
+  if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
+    // 8bit -> 8bit general quantized path, with general rescalings
+    data->input1_offset = -input1->params.zero_point;
+    data->input2_offset = -input2->params.zero_point;
+    data->output_offset = output->params.zero_point;
+    data->left_shift = (output->type == kTfLiteInt16) ? 15 : 20;
+    const double twice_max_input_scale =
+        2 * static_cast<double>(
+                std::max(input1->params.scale, input2->params.scale));
+    const double real_input1_multiplier =
+        static_cast<double>(input1->params.scale) / twice_max_input_scale;
+    const double real_input2_multiplier =
+        static_cast<double>(input2->params.scale) / twice_max_input_scale;
+    const double real_output_multiplier =
+        twice_max_input_scale /
+        ((1 << data->left_shift) * static_cast<double>(output->params.scale));
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_output_multiplier, &data->output_multiplier, &data->output_shift);
+
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, params->activation, output, &data->output_activation_min,
+        &data->output_activation_max));
+  } else if (output->type == kTfLiteFloat32) {
+    CalculateActivationRange(params->activation,
+                             &data->output_activation_min_f32,
+                             &data->output_activation_max_f32);
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus AddPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* input1 =
+      micro_context->AllocateTempInputTensor(node, kAddInputTensor1);
+  TF_LITE_ENSURE(context, input1 != nullptr);
+  TfLiteTensor* input2 =
+      micro_context->AllocateTempInputTensor(node, kAddInputTensor2);
+  TF_LITE_ENSURE(context, input2 != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kAddOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  OpDataAdd* data = static_cast<OpDataAdd*>(node->user_data);
+  auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
+
+  TF_LITE_ENSURE_STATUS(
+      CalculateOpDataAdd(context, params, input1, input2, output, data));
+
+  micro_context->DeallocateTempTfLiteTensor(input1);
+  micro_context->DeallocateTempTfLiteTensor(input2);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/add_n.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/add_n.cc
index a36a986..0ec3276 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/add_n.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/add_n.cc
@@ -18,9 +18,11 @@ limitations under the License.
 #include <cstdint>
 
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -28,42 +30,106 @@ namespace {
 constexpr int kInputTensor0 = 0;
 constexpr int kOutputTensor = 0;
 
+constexpr int kAddNIntegerShift = 20;
+
+// only used with INT8 tensors
+struct OpData {
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t input_offset;
+  int32_t output_offset;
+  int32_t input_multiplier;
+  int32_t output_multiplier;
+  int input_shift;
+  int output_shift;
+  int left_shift;
+  int scratch_index;
+};
+
 TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
   int num_inputs = NumInputs(node);
   TF_LITE_ENSURE(context, num_inputs >= 2);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
-  const TfLiteTensor* input_tensor_first;
-  TF_LITE_ENSURE_OK(
-      context, GetInputSafe(context, node, kInputTensor0, &input_tensor_first));
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* input_tensor_first =
+      micro_context->AllocateTempInputTensor(node, kInputTensor0);
+  TF_LITE_ENSURE(context, input_tensor_first != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   // Check that all tensors have the same shape and type.
   TF_LITE_ENSURE_TYPES_EQ(context, output->type, input_tensor_first->type);
   for (int i = kInputTensor0 + 1; i < num_inputs; ++i) {
-    const TfLiteTensor* input;
-    TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, i, &input));
+    TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, i);
+    TF_LITE_ENSURE(context, input != nullptr);
     TF_LITE_ENSURE(context, HaveSameShapes(input_tensor_first, input));
     TF_LITE_ENSURE_TYPES_EQ(context, input_tensor_first->type, input->type);
+
+    // Check that all INT8 input tensors have the same zero-point and scale.
+    if (input_tensor_first->type == kTfLiteInt8) {
+      TF_LITE_ENSURE(context, input_tensor_first->params.zero_point ==
+                                  input->params.zero_point);
+      TF_LITE_ENSURE(context,
+                     input_tensor_first->params.scale == input->params.scale);
+    }
+
+    micro_context->DeallocateTempTfLiteTensor(input);
   }
 
-  // Allocate scratch buffer space for pointer to each tensor's data
-  // and store the scratch buffer index in the node's user_data
   if (output->type == kTfLiteFloat32) {
+    // Allocate scratch buffer space for pointer to each tensor's data
+    // and store the scratch buffer index in the node's user_data
     int scratch_index;
     size_t scratch_size = sizeof(float*) * num_inputs;
     TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena(
                                    context, scratch_size, &scratch_index));
     node->user_data =
         reinterpret_cast<decltype(node->user_data)>(scratch_index);
+  } else if (output->type == kTfLiteInt8) {
+    node->user_data =
+        context->AllocatePersistentBuffer(context, sizeof(OpData));
+    OpData* data = static_cast<OpData*>(node->user_data);
+
+    // Allocate scratch buffer space for pointer to each tensor's data
+    // and store the scratch buffer index in OpData
+    size_t scratch_size = sizeof(int8_t*) * num_inputs;
+    TF_LITE_ENSURE_OK(
+        context, context->RequestScratchBufferInArena(context, scratch_size,
+                                                      &data->scratch_index));
+
+    // 8bit -> 8bit general quantized path, with general rescalings
+    data->input_offset = -input_tensor_first->params.zero_point;
+    data->output_offset = output->params.zero_point;
+    data->left_shift = kAddNIntegerShift;
+    const double twice_max_input_scale =
+        2 * static_cast<double>(input_tensor_first->params.scale);
+    const double real_input_multiplier =
+        static_cast<double>(input_tensor_first->params.scale) /
+        twice_max_input_scale;
+    const double real_output_multiplier =
+        twice_max_input_scale /
+        ((1 << data->left_shift) * static_cast<double>(output->params.scale));
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_input_multiplier, &data->input_multiplier, &data->input_shift);
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_output_multiplier, &data->output_multiplier, &data->output_shift);
+
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, kTfLiteActNone, output, &data->output_activation_min,
+        &data->output_activation_max));
   } else {
-    TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32, got %s.",
-                       TfLiteTypeGetName(output->type));
+    MicroPrintf("ADD_N only supports FLOAT32 and INT8, got %s.",
+                TfLiteTypeGetName(output->type));
     return kTfLiteError;
   }
 
+  micro_context->DeallocateTempTfLiteTensor(input_tensor_first);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
   return kTfLiteOk;
 }
 
@@ -72,12 +138,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 template <typename T>
-void EvalAddN(TfLiteContext* context, TfLiteNode* node,
-              TfLiteEvalTensor* output) {
+inline const T** CopyInputsToScratchBuffer(TfLiteContext* context,
+                                           TfLiteNode* node,
+                                           const int scratch_index) {
   int num_inputs = NumInputs(node);
-
-  int scratch_index =
-      static_cast<int>(reinterpret_cast<intptr_t>(node->user_data));
   void* scratch_buffer = context->GetScratchBuffer(context, scratch_index);
   const T** all_inputs = static_cast<decltype(all_inputs)>(scratch_buffer);
   for (int i = 0; i < num_inputs; i++) {
@@ -86,18 +150,57 @@ void EvalAddN(TfLiteContext* context, TfLiteNode* node,
     all_inputs[i] = tflite::micro::GetTensorData<T>(next_input);
   }
 
+  return all_inputs;
+}
+
+template <typename T>
+void EvalAddN(TfLiteContext* context, TfLiteNode* node,
+              TfLiteEvalTensor* output) {
+  int num_inputs = NumInputs(node);
+
+  int scratch_index =
+      static_cast<int>(reinterpret_cast<intptr_t>(node->user_data));
+  const T** all_inputs =
+      CopyInputsToScratchBuffer<T>(context, node, scratch_index);
+
   reference_ops::AddN<T>(tflite::micro::GetTensorShape(output), num_inputs,
                          all_inputs, tflite::micro::GetTensorData<T>(output));
 }
 
+template <typename T>
+void EvalAddNQuantized(TfLiteContext* context, TfLiteNode* node,
+                       TfLiteEvalTensor* output) {
+  int num_inputs = NumInputs(node);
+
+  OpData* data = static_cast<OpData*>(node->user_data);
+  const T** all_inputs =
+      CopyInputsToScratchBuffer<T>(context, node, data->scratch_index);
+
+  ArithmeticParams params;
+  params.left_shift = data->left_shift;
+  params.input1_offset = data->input_offset;
+  params.input1_multiplier = data->input_multiplier;
+  params.input1_shift = data->input_shift;
+  params.output_offset = data->output_offset;
+  params.output_multiplier = data->output_multiplier;
+  params.output_shift = data->output_shift;
+  SetActivationParams(data->output_activation_min, data->output_activation_max,
+                      &params);
+
+  reference_ops::AddN(params, tflite::micro::GetTensorShape(output), num_inputs,
+                      all_inputs, tflite::micro::GetTensorData<T>(output));
+}
+
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TfLiteEvalTensor* output =
       tflite::micro::GetEvalOutput(context, node, kOutputTensor);
   if (output->type == kTfLiteFloat32) {
     EvalAddN<float>(context, node, output);
+  } else if (output->type == kTfLiteInt8) {
+    EvalAddNQuantized<int8_t>(context, node, output);
   } else {
-    TF_LITE_KERNEL_LOG(context, "ADD_N only supports FLOAT32, got %s.",
-                       TfLiteTypeGetName(output->type));
+    MicroPrintf("ADD_N only supports FLOAT32 and INT8, got %s.",
+                TfLiteTypeGetName(output->type));
     return kTfLiteError;
   }
   return kTfLiteOk;
@@ -106,14 +209,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_ADD_N() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/arg_min_max.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/arg_min_max.cc
index 5fa261a..f781ab5 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/arg_min_max.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/arg_min_max.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,15 +17,15 @@ limitations under the License.
 
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/comparisons.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace arg_min_max {
+
+namespace {
 
 constexpr int kInputTensor = 0;
 constexpr int kAxis = 1;
@@ -36,12 +36,17 @@ inline void ArgMinMaxHelper(const RuntimeShape& input1_shape,
                             const T1* input1_data, const T3* input2_data,
                             const RuntimeShape& output_shape, T2* output_data,
                             bool is_arg_max) {
+  // Use Greater/Less from comparisons.h (formerly from kernels/micro_utils.h
+  // which was deprecated). Same as gtl::Greater but used here to reduce
+  // dependencies and binary size for micro environment.
   if (is_arg_max) {
     reference_ops::ArgMinMax(input1_shape, input1_data, input2_data,
-                             output_shape, output_data, micro::Greater());
+                             output_shape, output_data,
+                             reference_ops::GreaterFn<T1>);
   } else {
     reference_ops::ArgMinMax(input1_shape, input1_data, input2_data,
-                             output_shape, output_data, micro::Less());
+                             output_shape, output_data,
+                             reference_ops::LessFn<T1>);
   }
 }
 
@@ -66,28 +71,24 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
         case kTfLiteFloat32:
           TF_LITE_ARG_MIN_MAX(float, int32_t, int32_t);
           break;
-        case kTfLiteUInt8:
-          TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
-          break;
         case kTfLiteInt8:
           TF_LITE_ARG_MIN_MAX(int8_t, int32_t, int32_t);
           break;
         default:
-          TF_LITE_KERNEL_LOG(context,
-                             "Only float32, uint8_t and int8_t are "
-                             "supported currently, got %s.",
-                             TfLiteTypeGetName(input->type));
+          MicroPrintf(
+              "Only float32, uint8_t and int8_t are "
+              "supported currently, got %s.",
+              TfLiteTypeGetName(input->type));
           return kTfLiteError;
       }
     } else {
-      TF_LITE_KERNEL_LOG(context,
-                         "Only int32_t are supported currently, got %s.",
-                         TfLiteTypeGetName(output->type));
+      MicroPrintf("Only int32_t are supported currently, got %s.",
+                  TfLiteTypeGetName(output->type));
       return kTfLiteError;
     }
   } else {
-    TF_LITE_KERNEL_LOG(context, "Only int32_t are supported currently, got %s.",
-                       TfLiteTypeGetName(axis->type));
+    MicroPrintf("Only int32_t are supported currently, got %s.",
+                TfLiteTypeGetName(axis->type));
     return kTfLiteError;
   }
 
@@ -104,30 +105,14 @@ TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) {
   return Eval(context, node, true);
 }
 
-}  // namespace arg_min_max
+}  // namespace
 
 TfLiteRegistration Register_ARG_MAX() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/nullptr,
-          /*invoke=*/arg_min_max::ArgMaxEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, nullptr, ArgMaxEval);
 }
 
 TfLiteRegistration Register_ARG_MIN() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/nullptr,
-          /*invoke=*/arg_min_max::ArgMinEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, nullptr, ArgMinEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/assign_variable.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/assign_variable.cc
new file mode 100644
index 0000000..e650294
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/assign_variable.cc
@@ -0,0 +1,101 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <stddef.h>
+
+#include <cstring>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+namespace {
+
+constexpr int kInputVariableId = 0;
+constexpr int kInputValue = 1;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 0);
+
+  // This must be a TfLiteEvalTensor despite this being in Prepare, because
+  // CreateTensor allocates a temp tensor from the flatbuffer, which does not
+  // contain the correct ID generated within the VAR_HANDLE op. EvalTensors are
+  // all allocated during StartModelAllocation which happens before
+  // init/prepare, and VAR_HANDLE Prepare() references its own op_data in the
+  // TfLiteEvalTensor, so reading the ID here is valid.
+  const TfLiteEvalTensor* input_resource_id_tensor =
+      tflite::micro::GetEvalInput(context, node, kInputVariableId);
+  TFLITE_DCHECK(input_resource_id_tensor != nullptr);
+  TF_LITE_ENSURE(context, (input_resource_id_tensor->type == kTfLiteResource ||
+                           input_resource_id_tensor->type == kTfLiteInt32));
+  TF_LITE_ENSURE_EQ(context, NumElements(input_resource_id_tensor->dims), 1);
+
+  tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
+  TfLiteTensor* input_value =
+      micro_context->AllocateTempInputTensor(node, kInputValue);
+  TFLITE_DCHECK(input_value != nullptr);
+
+  MicroGraph& graph_info = micro_context->graph();
+
+  MicroResourceVariables* resources = graph_info.GetResourceVariables();
+  TF_LITE_ENSURE_OK(context,
+                    resources->Allocate(input_resource_id_tensor->data.i32[0],
+                                        context, input_value));
+
+  micro_context->DeallocateTempTfLiteTensor(input_value);
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input_id =
+      tflite::micro::GetEvalInput(context, node, kInputVariableId);
+  TFLITE_DCHECK(input_id != nullptr);
+
+  const TfLiteEvalTensor* input_value =
+      tflite::micro::GetEvalInput(context, node, kInputValue);
+  TFLITE_DCHECK(input_value != nullptr);
+
+  tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
+  MicroGraph& graph_info = micro_context->graph();
+
+  MicroResourceVariables* resources = graph_info.GetResourceVariables();
+  if (resources == nullptr) {
+    MicroPrintf(
+        "ASSIGN_VARIABLE requires resource variables. Please create "
+        "ResourceVariables and pass it to the interpreter.");
+    return kTfLiteError;
+  }
+  TF_LITE_ENSURE_OK(context,
+                    resources->Assign(input_id->data.i32[0], input_value));
+  return kTfLiteOk;
+}
+
+}  // namespace.
+
+TfLiteRegistration Register_ASSIGN_VARIABLE() {
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/batch_matmul.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/batch_matmul.cc
index 38639ee..3858f73 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/batch_matmul.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/batch_matmul.cc
@@ -83,7 +83,7 @@ struct OpData {
 
 struct OpContext {
   OpContext(TfLiteContext* context, TfLiteNode* node) {
-    params = static_cast<TfLiteBatchMatMulParams*>(node->builtin_data);
+    params = reinterpret_cast<TfLiteBatchMatMulParams*>(node->builtin_data);
     opdata = static_cast<OpData*>(node->user_data);
   }
 
@@ -94,13 +94,13 @@ struct OpContext {
 struct PrepareOpContext : OpContext {
   PrepareOpContext(TfLiteContext* context, TfLiteNode* node)
       : OpContext(context, node) {
-    lhs = GetInput(context, node, kInputLHSTensor);
-    rhs = GetInput(context, node, kInputRHSTensor);
-    output = GetOutput(context, node, kOutputTensor);
+    MicroContext* micro_context = GetMicroContext(context);
+    lhs = micro_context->AllocateTempInputTensor(node, kInputLHSTensor);
+    rhs = micro_context->AllocateTempInputTensor(node, kInputRHSTensor);
+    output = micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   }
-
-  const TfLiteTensor* lhs;
-  const TfLiteTensor* rhs;
+  TfLiteTensor* lhs;
+  TfLiteTensor* rhs;
   TfLiteTensor* output;
 };
 
@@ -341,6 +341,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
+  MicroContext* micro_context = GetMicroContext(context);
+
   PrepareOpContext op_context(context, node);
   const TfLiteTensor* lhs_data = op_context.lhs;
   TF_LITE_ENSURE(context, lhs_data != nullptr);
@@ -427,6 +429,11 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
   TfLiteStatus status =
       ResizeOutputTensor(context, node, extended_lhs_shape, extended_rhs_shape,
                          adj_x, adj_y, output_rank, output);
+
+  micro_context->DeallocateTempTfLiteTensor(op_context.lhs);
+  micro_context->DeallocateTempTfLiteTensor(op_context.rhs);
+  micro_context->DeallocateTempTfLiteTensor(op_context.output);
+
   return status;
 }
 
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/batch_to_space_nd.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/batch_to_space_nd.cc
index 7ba4df9..9959e47 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/batch_to_space_nd.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/batch_to_space_nd.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
@@ -41,8 +42,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, input != nullptr && output != nullptr);
 
   TF_LITE_ENSURE(context, NumDimensions(input) >= kInputOutputMinDimensionNum);
@@ -51,6 +56,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE(context, NumDimensions(output) <= kInputOutputMaxDimensionNum);
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
 
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
   return kTfLiteOk;
 }
 
@@ -88,8 +96,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           tflite::micro::GetTensorData<int8_t>(output));
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -98,14 +106,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace.
 
 TfLiteRegistration Register_BATCH_TO_SPACE_ND() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/broadcast_args.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/broadcast_args.cc
new file mode 100644
index 0000000..002a192
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/broadcast_args.cc
@@ -0,0 +1,91 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_args.h"
+
+#include <stdint.h>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h"
+
+namespace tflite {
+namespace {
+constexpr int kShape1Tensor = 0;
+constexpr int kShape2Tensor = 1;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus BroadcastArgsPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE(context, NumInputs(node) == 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* shape1 =
+      micro_context->AllocateTempInputTensor(node, kShape1Tensor);
+  TfLiteTensor* shape2 =
+      micro_context->AllocateTempInputTensor(node, kShape2Tensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+
+  TF_LITE_ENSURE(context,
+                 shape1->type == kTfLiteInt32 || shape1->type == kTfLiteInt64);
+  TF_LITE_ENSURE_EQ(context, shape1->type, shape2->type);
+  TF_LITE_ENSURE_EQ(context, shape1->type, output->type);
+
+  // Ensures the shapes are 1D tensor.
+  TF_LITE_ENSURE_EQ(context, NumDimensions(shape1), 1);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(shape2), 1);
+
+  // Ensure the shape of the output tensor is compatible
+  TF_LITE_ENSURE_EQ(context, NumDimensions(output), 1);
+
+  micro_context->DeallocateTempTfLiteTensor(shape1);
+  micro_context->DeallocateTempTfLiteTensor(shape2);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus BroadcastArgsEval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* shape1 =
+      micro::GetEvalInput(context, node, kShape1Tensor);
+  const TfLiteEvalTensor* shape2 =
+      micro::GetEvalInput(context, node, kShape2Tensor);
+  TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor);
+
+  if (output->type == kTfLiteInt32) {
+    reference_ops::BroadcastArgs(
+        micro::GetTensorShape(shape1), micro::GetTensorData<int32_t>(shape1),
+        micro::GetTensorShape(shape2), micro::GetTensorData<int32_t>(shape2),
+        micro::GetTensorShape(output), micro::GetTensorData<int32_t>(output));
+  } else {
+    reference_ops::BroadcastArgs(
+        micro::GetTensorShape(shape1), micro::GetTensorData<int64_t>(shape1),
+        micro::GetTensorShape(shape2), micro::GetTensorData<int64_t>(shape2),
+        micro::GetTensorShape(output), micro::GetTensorData<int64_t>(output));
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteRegistration Register_BROADCAST_ARGS() {
+  return tflite::micro::RegisterOp(nullptr, BroadcastArgsPrepare,
+                                   BroadcastArgsEval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/broadcast_to.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/broadcast_to.cc
new file mode 100644
index 0000000..51b19e0
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/broadcast_to.cc
@@ -0,0 +1,123 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/broadcast_to.h"
+
+#include <stdint.h>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h"
+
+namespace tflite {
+
+namespace {
+constexpr int kInputTensor = 0;
+constexpr int kShapeTensor = 1;
+constexpr int kOutputTensor = 0;
+// Support a maximum of 5 dimensions in TFLM.
+constexpr int kMaxDims = 5;
+
+TfLiteStatus ValidateOutputTensor(TfLiteContext* context, TfLiteTensor* input,
+                                  TfLiteTensor* shape, TfLiteTensor* output) {
+  // Ensures the shape is 1D tensor.
+  TF_LITE_ENSURE_EQ(context, NumDimensions(shape), 1);
+
+  // Ensure output dims is not less than input dims.
+  int input_num_dims = NumDimensions(input);
+  int output_num_dims = NumDimensions(output);
+  int shape_num_dims = SizeOfDimension(shape, 0);
+  TF_LITE_ENSURE_MSG(context, output_num_dims == shape_num_dims,
+                     "Output must match with the expected shape dimension.");
+  TF_LITE_ENSURE_MSG(context, input_num_dims <= output_num_dims,
+                     "Output shape must be broadcastable from input shape.");
+  TF_LITE_ENSURE_MSG(context, output_num_dims <= kMaxDims,
+                     "BroadcastTo only supports 1-5D tensor.");
+
+  // Check if output shape is broadcastable from input shape.
+  auto get_shape_data = [shape](int i) -> int32_t {
+    if (shape->type == kTfLiteInt32) {
+      return GetTensorData<int32_t>(shape)[i];
+    } else {
+      return GetTensorData<int64_t>(shape)[i];
+    }
+  };
+
+  int extending_dims = output_num_dims - input_num_dims;
+  for (int idx = 0; idx < input_num_dims; ++idx) {
+    TF_LITE_ENSURE_MSG(
+        context,
+        (SizeOfDimension(input, idx) == 1 ||
+         SizeOfDimension(input, idx) == get_shape_data(extending_dims + idx)),
+        "Output shape must be broadcastable from input shape.");
+  }
+
+  // Validating the shape of the output tensor.
+  tflite::RuntimeShape output_shape = tflite::GetTensorShape(output);
+  for (int idx = 0; idx < output_num_dims; ++idx) {
+    TF_LITE_ENSURE(context, output_shape.Dims(idx) == get_shape_data(idx));
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus BroadcastToPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE(context, NumInputs(node) == 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TfLiteTensor* shape =
+      micro_context->AllocateTempInputTensor(node, kShapeTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+
+  TF_LITE_ENSURE_MSG(context, (NumDimensions(input) <= kMaxDims),
+                     "BroadcastTo only supports 1-5D tensor.");
+
+  TF_LITE_ENSURE(context,
+                 shape->type == kTfLiteInt32 || shape->type == kTfLiteInt64);
+  TF_LITE_ENSURE_EQ(context, input->type, output->type);
+
+  // Does not support String type due to its variable size. This limitation is
+  // the same as TFLite.
+  TF_LITE_ENSURE(context, input->type != kTfLiteString);
+
+  TF_LITE_ENSURE_STATUS(ValidateOutputTensor(context, input, shape, output));
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(shape);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  return kTfLiteOk;
+}
+
+TfLiteStatus BroadcastToEval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input =
+      micro::GetEvalInput(context, node, kInputTensor);
+  TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor);
+
+  // BroadcastTo op support upto 5 dims, different from 8 dims in TFLite.
+  reference_ops::BroadcastTo<kMaxDims>(
+      micro::GetTensorShape(input), input->data.raw,
+      micro::GetTensorShape(output), output->data.raw, input->type);
+  return kTfLiteOk;
+}
+}  // namespace
+
+TfLiteRegistration Register_BROADCAST_TO() {
+  return tflite::micro::RegisterOp(nullptr, BroadcastToPrepare,
+                                   BroadcastToEval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/call_once.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/call_once.cc
new file mode 100644
index 0000000..21643c8
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/call_once.cc
@@ -0,0 +1,88 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <stddef.h>
+
+#include <cstring>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+namespace {
+
+struct OpData {
+  int init_subgraph_index;
+  bool has_run;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+  const auto* params =
+      reinterpret_cast<const TfLiteCallOnceParams*>(node->builtin_data);
+  op_data->init_subgraph_index = params->init_subgraph_index;
+  op_data->has_run = false;
+
+  TF_LITE_ENSURE(context, NumInputs(node) == 0);
+  TF_LITE_ENSURE(context, NumOutputs(node) == 0);
+
+  tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
+  MicroGraph& graph_info = micro_context->graph();
+
+  TF_LITE_ENSURE(context,
+                 op_data->init_subgraph_index < graph_info.NumSubgraphs());
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+
+  // Call once only runs one time then is a no-op for every subsequent call.
+  if (op_data->has_run) {
+    return kTfLiteOk;
+  }
+
+  tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
+  MicroGraph& graph_info = micro_context->graph();
+
+  TF_LITE_ENSURE_OK(context,
+                    graph_info.InvokeSubgraph(op_data->init_subgraph_index));
+
+  op_data->has_run = true;
+
+  return kTfLiteOk;
+}
+
+}  // namespace.
+
+TfLiteRegistration Register_CALL_ONCE() {
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/cast.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/cast.cc
index 7253245..19e545f 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/cast.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/cast.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -27,11 +28,19 @@ constexpr int kOutputTensor = 0;
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
 
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
   return kTfLiteOk;
 }
 
@@ -48,13 +57,19 @@ TfLiteStatus copyToTensor(TfLiteContext* context, const FromT* in,
     case kTfLiteInt8:
       copyCast(in, out->data.int8, num_elements);
       break;
+    case kTfLiteInt16:
+      copyCast(in, out->data.i16, num_elements);
+      break;
+    case kTfLiteInt32:
+      copyCast(in, out->data.i32, num_elements);
+      break;
     case kTfLiteFloat32:
       copyCast(in, tflite::micro::GetTensorData<float>(out), num_elements);
       break;
     default:
       // Unsupported type.
-      TF_LITE_KERNEL_LOG(context, "Output type %s (%d) not supported.",
-                         TfLiteTypeGetName(out->type), out->type);
+      MicroPrintf("Output type %s (%d) not supported.",
+                  TfLiteTypeGetName(out->type), out->type);
   }
   return kTfLiteOk;
 }
@@ -70,27 +85,30 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   switch (input->type) {
     case kTfLiteInt8:
       return copyToTensor(context, input->data.int8, output, num_elements);
+    case kTfLiteInt16:
+      return copyToTensor(context, tflite::micro::GetTensorData<int16_t>(input),
+                          output, num_elements);
+    case kTfLiteInt32:
+      return copyToTensor(context, tflite::micro::GetTensorData<int32_t>(input),
+                          output, num_elements);
+    case kTfLiteUInt32:
+      return copyToTensor(context,
+                          tflite::micro::GetTensorData<uint32_t>(input), output,
+                          num_elements);
     case kTfLiteFloat32:
       return copyToTensor(context, tflite::micro::GetTensorData<float>(input),
                           output, num_elements);
     default:
       // Unsupported type.
-      TF_LITE_KERNEL_LOG(context, "Input type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Input type %s (%d) not supported.",
+                  TfLiteTypeGetName(input->type), input->type);
   }
   return kTfLiteOk;
 }
 }  // namespace
 
 TfLiteRegistration Register_CAST() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/ceil.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/ceil.cc
index 0b78d48..0f09137 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/ceil.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/ceil.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -21,17 +21,20 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace ceil {
+
+namespace {
 
 constexpr int kInputTensor = 0;
 constexpr int kOutputTensor = 0;
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
@@ -42,6 +45,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   for (int i = 0; i < output->dims->size; ++i) {
     TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
   }
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -58,19 +63,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   return kTfLiteOk;
 }
-}  // namespace ceil
+
+}  // namespace
 
 TfLiteRegistration Register_CEIL() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/ceil::Prepare,
-          /*invoke=*/ceil::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.cc
index 007f103..bf69599 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.cc
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#define FLATBUFFERS_LOCALE_INDEPENDENT 0
-#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.h"
+
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
@@ -22,7 +22,9 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 /*
  * The circular buffer custom operator is used to implement strided streaming
@@ -46,38 +48,17 @@ limitations under the License.
  * - Input and output quantization params must be identical.
  */
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace circular_buffer {
-
-namespace {
-
-// The CircularBuffer op has one input and one output tensor.
-constexpr int kInputTensor = 0;
-constexpr int kOutputTensor = 0;
-
-// TODO(b/149795762): Add this to TfLiteStatus enum.
-constexpr TfLiteStatus kTfLiteAbort = static_cast<TfLiteStatus>(-9);
 
-// These fields control the stride period of a strided streaming model. This op
-// returns kTfLiteAbort until cycles_until_run-- is zero.  At this time,
-// cycles_until_run is reset to cycles_max.
-struct OpData {
-  int cycles_until_run;
-  int cycles_max;
-};
-
-}  // namespace
-
-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+void* CircularBufferInit(TfLiteContext* context, const char* buffer,
+                         size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  OpData* op_data = static_cast<OpData*>(
-      context->AllocatePersistentBuffer(context, sizeof(OpData)));
+  OpDataCircularBuffer* op_data = static_cast<OpDataCircularBuffer*>(
+      context->AllocatePersistentBuffer(context, sizeof(OpDataCircularBuffer)));
 
   if (buffer != nullptr && length > 0) {
     const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
-    const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
-    op_data->cycles_max = m["cycles_max"].AsInt32();
+    tflite::FlexbufferWrapper wrapper(buffer_t, length);
+    op_data->cycles_max = wrapper.ElementAsInt32(kCircularBufferCyclesMaxIndex);
   } else {
     op_data->cycles_max = 0;
   }
@@ -85,53 +66,6 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   return op_data;
 }
 
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-
-  TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* op_data = static_cast<OpData*>(node->user_data);
-
-  TF_LITE_ENSURE(context, input != nullptr);
-  TF_LITE_ENSURE(context, output != nullptr);
-  TF_LITE_ENSURE_EQ(context, input->dims->data[0], output->dims->data[0]);
-  TF_LITE_ENSURE_EQ(context, 1, input->dims->data[1]);
-  TF_LITE_ENSURE_EQ(context, input->dims->data[2], output->dims->data[2]);
-  TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
-
-  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
-
-  // The circular buffer custom operator currently only supports int8.
-  TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
-
-  if (op_data->cycles_max <= 0) {
-    // The last circular buffer layer simply accumulates outputs, and does not
-    // run periodically.
-    // TODO(b/150001379): Move this special case logic to the tflite flatbuffer.
-    static int cb_prepare_count = 0;
-    cb_prepare_count++;
-    // These checks specifically work for the only two streaming models
-    // supported on TFLM. They use the shape of the output tensor along with the
-    // layer number to determine if the circular buffer period should be 1 or 2.
-
-    // These models are outlined int the following documents:
-    // https://docs.google.com/document/d/1lc_G2ZFhjiKFo02UHjBaljye1xsL0EkfybkaVELEE3Q/edit?usp=sharing
-    // https://docs.google.com/document/d/1pGc42PuWyrk-Jy1-9qeqtggvsmHr1ifz8Lmqfpr2rKA/edit?usp=sharing
-    if (output->dims->data[1] == 5 || output->dims->data[1] == 13 ||
-        (cb_prepare_count == 5 && output->dims->data[2] == 2 &&
-         output->dims->data[3] == 96)) {
-      op_data->cycles_max = 1;
-      cb_prepare_count = 0;
-    } else {
-      op_data->cycles_max = 2;
-    }
-  }
-  op_data->cycles_until_run = op_data->cycles_max;
-  node->user_data = op_data;
-
-  return kTfLiteOk;
-}
-
 // Shifts buffer over by the output depth, and write new input to end of buffer.
 // num_slots is the number of samples stored in the output buffer.
 // depth is the size of each sample.
@@ -140,14 +74,15 @@ void EvalInt8(const int8_t* input, int num_slots, int depth, int8_t* output) {
   memcpy(&output[(num_slots - 1) * depth], input, depth);
 }
 
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus CircularBufferEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      tflite::micro::GetEvalInput(context, node, kCircularBufferInputTensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kCircularBufferOutputTensor);
 
   TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = reinterpret_cast<OpData*>(node->user_data);
+  OpDataCircularBuffer* data =
+      reinterpret_cast<OpDataCircularBuffer*>(node->user_data);
 
   int num_slots = output->dims->data[1];
   int depth = output->dims->data[2] * output->dims->data[3];
@@ -156,7 +91,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     EvalInt8(tflite::micro::GetTensorData<int8_t>(input), num_slots, depth,
              tflite::micro::GetTensorData<int8_t>(output));
   } else {
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
+    MicroPrintf("Type %s (%d) not supported.",
                        TfLiteTypeGetName(input->type), input->type);
     return kTfLiteError;
   }
@@ -173,20 +108,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-}  // namespace circular_buffer
-
 TfLiteRegistration* Register_CIRCULAR_BUFFER() {
-  static TfLiteRegistration r = {/*init=*/circular_buffer::Init,
-                                 /*free=*/nullptr,
-                                 /*prepare=*/circular_buffer::Prepare,
-                                 /*invoke=*/circular_buffer::Eval,
-                                 /*profiling_string=*/nullptr,
-                                 /*builtin_code=*/0,
-                                 /*custom_name=*/nullptr,
-                                 /*version=*/0};
+  static TfLiteRegistration r = tflite::micro::RegisterOp(
+      CircularBufferInit, CircularBufferPrepare, CircularBufferEval);
   return &r;
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.h
new file mode 100644
index 0000000..c52a1ec
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.h
@@ -0,0 +1,48 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+
+namespace tflite {
+
+// The CircularBuffer op has one input and one output tensor.
+extern const int kCircularBufferInputTensor;
+extern const int kCircularBufferOutputTensor;
+
+// Indices into the init flexbuffer's vector.
+// The parameter's name is in the comment that follows.
+// Elements in the vectors are ordered alphabetically by parameter name.
+extern const int kCircularBufferCyclesMaxIndex;  // 'cycles_max'
+
+// TODO(b/149795762): Add this to TfLiteStatus enum.
+extern const TfLiteStatus kTfLiteAbort;
+
+// These fields control the stride period of a strided streaming model. This op
+// returns kTfLiteAbort until cycles_until_run-- is zero.  At this time,
+// cycles_until_run is reset to cycles_max.
+struct OpDataCircularBuffer {
+  int cycles_until_run;
+  int cycles_max;
+};
+
+TfLiteStatus CircularBufferPrepare(TfLiteContext* context, TfLiteNode* node);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_CIRCULAR_BUFFER_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer_common.cc
new file mode 100644
index 0000000..b6d1f0d
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer_common.cc
@@ -0,0 +1,97 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/circular_buffer.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+
+namespace tflite {
+
+// The CircularBuffer op has one input and one output tensor.
+const int kCircularBufferInputTensor = 0;
+const int kCircularBufferOutputTensor = 0;
+
+// Indices into the init flexbuffer's vector.
+// The parameter's name is in the comment that follows.
+// Elements in the vectors are ordered alphabetically by parameter name.
+const int kCircularBufferCyclesMaxIndex = 0;  // 'cycles_max'
+
+// TODO(b/149795762): Add this to TfLiteStatus enum.
+const TfLiteStatus kTfLiteAbort = static_cast<TfLiteStatus>(-9);
+
+TfLiteStatus CircularBufferPrepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kCircularBufferInputTensor);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(
+      node, kCircularBufferOutputTensor);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpDataCircularBuffer* op_data =
+      static_cast<OpDataCircularBuffer*>(node->user_data);
+
+  TF_LITE_ENSURE(context, input != nullptr);
+  TF_LITE_ENSURE(context, output != nullptr);
+  TF_LITE_ENSURE_EQ(context, input->dims->data[0], output->dims->data[0]);
+  TF_LITE_ENSURE_EQ(context, 1, input->dims->data[1]);
+  TF_LITE_ENSURE_EQ(context, input->dims->data[2], output->dims->data[2]);
+  TF_LITE_ENSURE_EQ(context, output->dims->data[3], input->dims->data[3]);
+
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+
+  // The circular buffer custom operator currently only supports int8.
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
+
+  if (op_data->cycles_max <= 0) {
+    // The last circular buffer layer simply accumulates outputs, and does not
+    // run periodically.
+    // TODO(b/150001379): Move this special case logic to the tflite flatbuffer.
+    static int cb_prepare_count = 0;
+    cb_prepare_count++;
+    // These checks specifically work for the only two streaming models
+    // supported on TFLM. They use the shape of the output tensor along with the
+    // layer number to determine if the circular buffer period should be 1 or 2.
+
+    // These models are outlined int the following documents:
+    // https://docs.google.com/document/d/1lc_G2ZFhjiKFo02UHjBaljye1xsL0EkfybkaVELEE3Q/edit?usp=sharing
+    // https://docs.google.com/document/d/1pGc42PuWyrk-Jy1-9qeqtggvsmHr1ifz8Lmqfpr2rKA/edit?usp=sharing
+    if (output->dims->data[1] == 5 || output->dims->data[1] == 13 ||
+        output->dims->data[1] == 25 ||
+        (cb_prepare_count == 5 && output->dims->data[2] == 2 &&
+         output->dims->data[3] == 96)) {
+      op_data->cycles_max = 1;
+      cb_prepare_count = 0;
+    } else {
+      op_data->cycles_max = 2;
+    }
+  }
+  op_data->cycles_until_run = op_data->cycles_max;
+  node->user_data = op_data;
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/comparisons.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/comparisons.cc
index 4990b77..1a8fbb0 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/comparisons.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/comparisons.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -19,11 +19,10 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace comparisons {
+
 namespace {
 
 struct OpData {
@@ -104,19 +103,6 @@ TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
                 tflite::micro::GetTensorData<int64_t>(input2), output_shape,
                 output_data);
       break;
-    case kTfLiteUInt8:
-      requires_broadcast
-          ? reference_ops::Broadcast4DSlowEqualWithScaling(
-                data->params, input1_shape,
-                tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
-                tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
-                output_data)
-          : reference_ops::EqualWithScaling(
-                data->params, input1_shape,
-                tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
-                tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
-                output_data);
-      break;
     case kTfLiteInt8:
       requires_broadcast
           ? reference_ops::Broadcast4DSlowEqualWithScaling(
@@ -131,8 +117,8 @@ TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) {
                 output_data);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input1->type), input1->type);
+      MicroPrintf("Type %s (%d) not supported.",
+                  TfLiteTypeGetName(input1->type), input1->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -209,19 +195,6 @@ TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
                 tflite::micro::GetTensorData<int64_t>(input2), output_shape,
                 output_data);
       break;
-    case kTfLiteUInt8:
-      requires_broadcast
-          ? reference_ops::Broadcast4DSlowNotEqualWithScaling(
-                data->params, input1_shape,
-                tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
-                tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
-                output_data)
-          : reference_ops::NotEqualWithScaling(
-                data->params, input1_shape,
-                tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
-                tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
-                output_data);
-      break;
     case kTfLiteInt8:
       requires_broadcast
           ? reference_ops::Broadcast4DSlowNotEqualWithScaling(
@@ -236,8 +209,8 @@ TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) {
                 output_data);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input1->type), input1->type);
+      MicroPrintf("Type %s (%d) not supported.",
+                  TfLiteTypeGetName(input1->type), input1->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -300,19 +273,6 @@ TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
                 tflite::micro::GetTensorData<int64_t>(input2), output_shape,
                 output_data);
       break;
-    case kTfLiteUInt8:
-      requires_broadcast
-          ? reference_ops::Broadcast4DSlowGreaterWithScaling(
-                data->params, input1_shape,
-                tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
-                tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
-                output_data)
-          : reference_ops::GreaterWithScaling(
-                data->params, input1_shape,
-                tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
-                tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
-                output_data);
-      break;
     case kTfLiteInt8:
       requires_broadcast
           ? reference_ops::Broadcast4DSlowGreaterWithScaling(
@@ -327,8 +287,8 @@ TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) {
                 output_data);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input1->type), input1->type);
+      MicroPrintf("Type %s (%d) not supported.",
+                  TfLiteTypeGetName(input1->type), input1->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -391,19 +351,6 @@ TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
                 tflite::micro::GetTensorData<int64_t>(input2), output_shape,
                 output_data);
       break;
-    case kTfLiteUInt8:
-      requires_broadcast
-          ? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
-                data->params, input1_shape,
-                tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
-                tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
-                output_data)
-          : reference_ops::GreaterEqualWithScaling(
-                data->params, input1_shape,
-                tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
-                tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
-                output_data);
-      break;
     case kTfLiteInt8:
       requires_broadcast
           ? reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
@@ -418,8 +365,8 @@ TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) {
                 output_data);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input1->type), input1->type);
+      MicroPrintf("Type %s (%d) not supported.",
+                  TfLiteTypeGetName(input1->type), input1->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -482,19 +429,6 @@ TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
                 tflite::micro::GetTensorData<int64_t>(input2), output_shape,
                 output_data);
       break;
-    case kTfLiteUInt8:
-      requires_broadcast
-          ? reference_ops::Broadcast4DSlowLessWithScaling(
-                data->params, input1_shape,
-                tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
-                tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
-                output_data)
-          : reference_ops::LessWithScaling(
-                data->params, input1_shape,
-                tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
-                tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
-                output_data);
-      break;
     case kTfLiteInt8:
       requires_broadcast
           ? reference_ops::Broadcast4DSlowLessWithScaling(
@@ -509,8 +443,8 @@ TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) {
                 output_data);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input1->type), input1->type);
+      MicroPrintf("Type %s (%d) not supported.",
+                  TfLiteTypeGetName(input1->type), input1->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -573,19 +507,6 @@ TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
                 tflite::micro::GetTensorData<int64_t>(input2), output_shape,
                 output_data);
       break;
-    case kTfLiteUInt8:
-      requires_broadcast
-          ? reference_ops::Broadcast4DSlowLessEqualWithScaling(
-                data->params, input1_shape,
-                tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
-                tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
-                output_data)
-          : reference_ops::LessEqualWithScaling(
-                data->params, input1_shape,
-                tflite::micro::GetTensorData<uint8_t>(input1), input2_shape,
-                tflite::micro::GetTensorData<uint8_t>(input2), output_shape,
-                output_data);
-      break;
     case kTfLiteInt8:
       requires_broadcast
           ? reference_ops::Broadcast4DSlowLessEqualWithScaling(
@@ -600,15 +521,13 @@ TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) {
                 output_data);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input1->type), input1->type);
+      MicroPrintf("Type %s (%d) not supported.",
+                  TfLiteTypeGetName(input1->type), input1->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
 }
 
-}  // namespace
-
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(OpData));
@@ -618,12 +537,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
   OpData* data = static_cast<OpData*>(node->user_data);
 
-  const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input1 =
+      micro_context->AllocateTempInputTensor(node, kInputTensor1);
   TF_LITE_ENSURE(context, input1 != nullptr);
-  const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
+  TfLiteTensor* input2 =
+      micro_context->AllocateTempInputTensor(node, kInputTensor2);
   TF_LITE_ENSURE(context, input2 != nullptr);
 
-  if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) {
+  if (input1->type == kTfLiteInt8) {
     auto input1_offset = -input1->params.zero_point;
     auto input2_offset = -input2->params.zero_point;
     const int kLeftShift = 8;
@@ -648,77 +571,36 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     data->params.input2_shift = input2_shift;
   }
 
+  micro_context->DeallocateTempTfLiteTensor(input1);
+  micro_context->DeallocateTempTfLiteTensor(input2);
+
   return kTfLiteOk;
 }
 
-}  // namespace comparisons
+}  // namespace
 
 TfLiteRegistration Register_EQUAL() {
-  return {/*init=*/comparisons::Init,
-          /*free=*/nullptr,
-          /*prepare=*/comparisons::Prepare,
-          /*invoke=*/comparisons::EqualEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, EqualEval);
 }
 
 TfLiteRegistration Register_NOT_EQUAL() {
-  return {/*init=*/comparisons::Init,
-          /*free=*/nullptr,
-          /*prepare=*/comparisons::Prepare,
-          /*invoke=*/comparisons::NotEqualEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, NotEqualEval);
 }
 
 TfLiteRegistration Register_GREATER() {
-  return {/*init=*/comparisons::Init,
-          /*free=*/nullptr,
-          /*prepare=*/comparisons::Prepare,
-          /*invoke=*/comparisons::GreaterEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, GreaterEval);
 }
 
 TfLiteRegistration Register_GREATER_EQUAL() {
-  return {/*init=*/comparisons::Init,
-          /*free=*/nullptr,
-          /*prepare=*/comparisons::Prepare,
-          /*invoke=*/comparisons::GreaterEqualEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, GreaterEqualEval);
 }
 
 TfLiteRegistration Register_LESS() {
-  return {/*init=*/comparisons::Init,
-          /*free=*/nullptr,
-          /*prepare=*/comparisons::Prepare,
-          /*invoke=*/comparisons::LessEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, LessEval);
 }
 
 TfLiteRegistration Register_LESS_EQUAL() {
-  return {/*init=*/comparisons::Init,
-          /*free=*/nullptr,
-          /*prepare=*/comparisons::Prepare,
-          /*invoke=*/comparisons::LessEqualEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, LessEqualEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/complex_abs.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/complex_abs.cc
index db14e20..94a6107 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/complex_abs.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/complex_abs.cc
@@ -26,7 +26,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
 namespace ops {
@@ -42,10 +42,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
+  // Check type and shape of the input tensor
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
 
   if (input->type != kTfLiteComplex64 || output->type != kTfLiteFloat32) {
       TF_LITE_KERNEL_LOG(context, "Types input %s (%d), output %s (%d) not supported.",
@@ -54,13 +58,19 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       return kTfLiteError;
   }
 
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
   return kTfLiteOk;
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
 
   size_t total_input_els = 1;
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/concatenation.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/concatenation.cc
index e912d54..13a5d63 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/concatenation.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/concatenation.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,11 +23,14 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace concatenation {
+
+// Patched by Edge Impulse
+constexpr int RuntimeShape::kMaxSmallSize;
+
+namespace {
 
 constexpr int kMaxInputNum = 10;  // Maximum number of input tensors
 constexpr int kOutputTensor = 0;
@@ -104,51 +107,37 @@ void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) {
                                tflite::micro::GetTensorData<data_type>(output));
 }
 
-void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) {
-  // Collect the shapes and data pointer of input tensors
-  RuntimeShape inputs_shape[kMaxInputNum];
-  const RuntimeShape* inputs_shape_ptr[kMaxInputNum];
-  const uint8_t* inputs_data[kMaxInputNum];
-  GetAllInputTensorShapes(context, node, inputs_shape);
-  GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr);
-  GetAllInputTensorData(context, node, inputs_data);
-
-  TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
-
-  TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData* data = static_cast<const OpData*>(node->user_data);
-
-  reference_ops::ConcatenationWithScaling(
-      data->params, inputs_shape_ptr, inputs_data,
-      tflite::micro::GetTensorShape(output),
-      tflite::micro::GetTensorData<uint8_t>(output));
-}
-
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(OpData));
 }
 
+
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // This function only checks the types. Additional shape validations are
   // performed in the reference implementation called during Eval().
   const TfLiteConcatenationParams* params =
       reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
 
-  const TfLiteTensor* input_tensor = GetInput(context, node, 0);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input_tensor = micro_context->AllocateTempInputTensor(node, 0);
   TF_LITE_ENSURE(context, input_tensor != nullptr);
   TfLiteType input_type = input_tensor->type;
-  const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* output_tensor =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, output_tensor != nullptr);
   TfLiteType output_type = output_tensor->type;
 
+  micro_context->DeallocateTempTfLiteTensor(input_tensor);
+  micro_context->DeallocateTempTfLiteTensor(output_tensor);
+
   // Check activation and input type
   TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
   TF_LITE_ENSURE(context,
-                 input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 ||
-                     input_type == kTfLiteInt8 || input_type == kTfLiteInt32 ||
-                     input_type == kTfLiteInt64);
+                 input_type == kTfLiteFloat32 || input_type == kTfLiteInt8 ||
+                     input_type == kTfLiteInt16 || input_type == kTfLiteInt32 ||
+                     input_type == kTfLiteInt64 || input_type == kTfLiteBool);
 
   // Output type must match input type
   TF_LITE_ENSURE_EQ(context, output_type, input_type);
@@ -159,36 +148,38 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
   // Shapes with dimensions >4 are not yet supported with static allocation.
   for (int i = 0; i < num_inputs; ++i) {
-    const TfLiteTensor* input = GetInput(context, node, i);
+    TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, i);
     TF_LITE_ENSURE(context, input != nullptr);
     int num_dimensions = NumDimensions(input);
 
-    if (num_dimensions > 4) {
-      TF_LITE_KERNEL_LOG(
-          context,
-          "Op Concatenation does not currently support num dimensions >4 "
+    if (num_dimensions > RuntimeShape::kMaxSmallSize) {
+      MicroPrintf(
+          "Op Concatenation does not currently support num dimensions > %d "
           "Tensor has %d dimensions.",
-          num_dimensions);
+          RuntimeShape::kMaxSmallSize, num_dimensions);
       return kTfLiteError;
     }
+    micro_context->DeallocateTempTfLiteTensor(input);
   }
 
   // Calculate OpData.
   TFLITE_DCHECK(node->user_data != nullptr);
   OpData* data = static_cast<OpData*>(node->user_data);
 
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
 
   switch (output_type) {  // Already know in/outtypes are same.
+    case kTfLiteBool:
     case kTfLiteFloat32:
+    case kTfLiteInt16:
     case kTfLiteInt32:
     case kTfLiteInt64: {
       data->params.axis = CalculatePositiveAxis(params->axis, output);
       data->params.inputs_count = node->inputs->size;
       break;
     }
-    case kTfLiteUInt8:
     case kTfLiteInt8: {
       data->params.axis = CalculatePositiveAxis(params->axis, output);
       data->params.inputs_count = node->inputs->size;
@@ -204,10 +195,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       // Allocate persistent scale and zeropoint buffers.
       // Store input scale and zero point values in OpParams:
       for (int i = 0; i < node->inputs->size; ++i) {
-        const TfLiteTensor* t = GetInput(context, node, i);
+        TfLiteTensor* t = micro_context->AllocateTempInputTensor(node, i);
         TF_LITE_ENSURE(context, t != nullptr);
         input_scales[i] = t->params.scale;
         input_zero_points[i] = t->params.zero_point;
+        micro_context->DeallocateTempTfLiteTensor(t);
       }
 
       data->params.input_scale = input_scales;
@@ -217,17 +209,19 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       break;
     }
     default:
-      TF_LITE_KERNEL_LOG(
-          context, "Op Concatenation does not currently support Type '%s'.",
-          TfLiteTypeGetName(output_type));
+      MicroPrintf("Op Concatenation does not currently support Type '%s'.",
+                  TfLiteTypeGetName(output_type));
       return kTfLiteError;
   }
 
+  micro_context->DeallocateTempTfLiteTensor(output);
+
   return kTfLiteOk;
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor);
+  const TfLiteEvalTensor* output_tensor =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
   TF_LITE_ENSURE(context, output_tensor != nullptr);
   TfLiteType output_type = output_tensor->type;
 
@@ -238,39 +232,32 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     case kTfLiteInt32:
       EvalUnquantized<int32_t>(context, node);
       break;
-    case kTfLiteUInt8:
-      EvalQuantizedUInt8(context, node);
-      break;
     case kTfLiteInt8:
       EvalUnquantized<int8_t>(context, node);
       break;
     case kTfLiteInt64:
       EvalUnquantized<int64_t>(context, node);
       break;
+    case kTfLiteInt16:
+      EvalUnquantized<int16_t>(context, node);
+      break;
+    case kTfLiteBool:
+      EvalUnquantized<bool>(context, node);
+      break;
 
     default:
-      TF_LITE_KERNEL_LOG(
-          context, "Op Concatenation does not currently support Type '%s'.",
-          TfLiteTypeGetName(output_type));
+      MicroPrintf("Op Concatenation does not currently support Type '%s'.",
+                  TfLiteTypeGetName(output_type));
       return kTfLiteError;
   }
 
   return kTfLiteOk;
 }
 
-}  // namespace concatenation
+}  // namespace
 
 TfLiteRegistration Register_CONCATENATION() {
-  return {/*init=*/concatenation::Init,
-          /*free=*/nullptr,
-          /*prepare=*/concatenation::Prepare,
-          /*invoke=*/concatenation::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.cc
index b5482fc..32177b3 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.cc
@@ -3,7 +3,7 @@
 #if 0 == 1
 /* noop */
 #elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -32,6 +32,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -57,11 +58,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       *(static_cast<const TfLiteConvParams*>(node->builtin_data));
   OpData* data = static_cast<OpData*>(node->user_data);
 
-  const TfLiteTensor* input = GetInput(context, node, kConvInputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kConvInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
+  TfLiteTensor* filter =
+      micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
   TF_LITE_ENSURE(context, filter != nullptr);
-  const TfLiteTensor* output = GetOutput(context, node, kConvOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
 
   RuntimeShape input_shape = GetTensorShape(input);
@@ -88,25 +94,31 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   output_dims.w = output->dims->data[2];
   output_dims.c = output_shape.Dims(3);
 
-  // Dynamically allocate per-channel quantization parameters.
-  // TODO(#42883): This allocation is done even for non-int8 cases to get around
-  // a bug in kernel_util.cc which incorrectly uses per_channel_output_shift in
-  // non-int8 cases. Protect this section with a if (input->type == kTfLiteInt8)
-  // when the issue is fixed.
-  const int num_channels = filter->dims->data[kConvQuantizedDimension];
-  data->reference_op_data.per_channel_output_multiplier =
-      static_cast<int32_t*>(context->AllocatePersistentBuffer(
-          context, num_channels * sizeof(int32_t)));
-  data->reference_op_data.per_channel_output_shift =
-      static_cast<int32_t*>(context->AllocatePersistentBuffer(
-          context, num_channels * sizeof(int32_t)));
+  if (filter->type == kTfLiteInt4) {
+    int filter_size =
+        RuntimeShape(filter->dims->size,
+                     reinterpret_cast<const int32_t*>(filter->dims->data))
+            .FlatSize();
+    context->RequestScratchBufferInArena(
+        context, filter_size, &data->reference_op_data.filter_buffer_index);
+  }
+
+  if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
+    const int num_channels = filter->dims->data[kConvQuantizedDimension];
+    data->reference_op_data.per_channel_output_multiplier =
+        static_cast<int32_t*>(context->AllocatePersistentBuffer(
+            context, num_channels * sizeof(int32_t)));
+    data->reference_op_data.per_channel_output_shift =
+        static_cast<int32_t*>(context->AllocatePersistentBuffer(
+            context, num_channels * sizeof(int32_t)));
+  }
 
   TF_LITE_ENSURE_STATUS(CalculateOpDataConv(
       context, node, params, input_dims.w, input_dims.h, filter_dims.w,
       filter_dims.h, output_dims.w, output_dims.h, input->type,
       &data->reference_op_data));
 
-  if (input->type == kTfLiteInt8) {
+  if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
     // Initialize cmsis_nn convolution parameters
     cmsis_nn_conv_params conv_params;
     conv_params.input_offset = -input->params.zero_point;
@@ -120,133 +132,273 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     conv_params.activation.min = data->reference_op_data.output_activation_min;
     conv_params.activation.max = data->reference_op_data.output_activation_max;
 
-    buf_size = arm_convolve_wrapper_s8_get_buffer_size(
-        &conv_params, &input_dims, &filter_dims, &output_dims);
+    if (input->type == kTfLiteInt8) {
+      buf_size = arm_convolve_wrapper_s8_get_buffer_size(
+          &conv_params, &input_dims, &filter_dims, &output_dims);
+    } else if (input->type == kTfLiteInt16) {
+      TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
+      TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+      buf_size = arm_convolve_wrapper_s16_get_buffer_size(
+          &conv_params, &input_dims, &filter_dims, &output_dims);
+    }
+
+    if (buf_size > 0) {
+      TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
+          context, buf_size, &data->buffer_idx));
+    } else {
+      data->buffer_idx = -1;
+    }
   }
 
-  if (buf_size > 0) {
-    TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
-        context, buf_size, &data->buffer_idx));
-  } else {
-    data->buffer_idx = -1;
+  micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
+                                     const TfLiteConvParams& params,
+                                     const OpData& data,
+                                     const TfLiteEvalTensor* input,
+                                     const TfLiteEvalTensor* filter,
+                                     const TfLiteEvalTensor* bias,
+                                     TfLiteEvalTensor* output) {
+  cmsis_nn_conv_params conv_params;
+  conv_params.dilation.h = params.dilation_height_factor;
+  conv_params.dilation.w = params.dilation_width_factor;
+
+  // Initialize cmsis_nn convolution parameters
+  conv_params.input_offset = -data.reference_op_data.input_zero_point;
+  conv_params.output_offset = data.reference_op_data.output_zero_point;
+  conv_params.stride.h = params.stride_height;
+  conv_params.stride.w = params.stride_width;
+  conv_params.padding.h = data.reference_op_data.padding.height;
+  conv_params.padding.w = data.reference_op_data.padding.width;
+  conv_params.activation.min = data.reference_op_data.output_activation_min;
+  conv_params.activation.max = data.reference_op_data.output_activation_max;
+
+  // Initialize cmsis_nn per channel quantization parameters
+  cmsis_nn_per_channel_quant_params quant_params;
+  quant_params.multiplier = const_cast<int32_t*>(
+      data.reference_op_data.per_channel_output_multiplier);
+  quant_params.shift =
+      const_cast<int32_t*>(data.reference_op_data.per_channel_output_shift);
+
+  RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
+  RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
+  RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
+  RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
+
+  // Consistency check.
+  TFLITE_DCHECK_LE(conv_params.activation.min, conv_params.activation.max);
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+  const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  if (tflite::micro::GetOptionalTensorData<int8_t>(bias)) {
+    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
   }
+
+  // Initialize cmsis_nn dimensions
+  // Input
+  cmsis_nn_dims input_dims;
+  input_dims.n = batch_size;
+  input_dims.h = input_shape.Dims(1);
+  input_dims.w = input_shape.Dims(2);
+  input_dims.c = input_depth;
+
+  // Filter
+  cmsis_nn_dims filter_dims;
+  filter_dims.n = output_depth;
+  filter_dims.h = filter_shape.Dims(1);
+  filter_dims.w = filter_shape.Dims(2);
+  filter_dims.c = input_depth;
+
+  // Bias
+  cmsis_nn_dims bias_dims;
+  bias_dims.n = 1;
+  bias_dims.h = 1;
+  bias_dims.w = 1;
+  bias_dims.c = output_depth;
+
+  // Output
+  cmsis_nn_dims output_dims;
+  output_dims.n = batch_size;
+  output_dims.h = output_shape.Dims(1);
+  output_dims.w = output_shape.Dims(2);
+  output_dims.c = output_depth;
+
+  // Initialize cmsis_nn context
+  cmsis_nn_context ctx;
+  ctx.buf = nullptr;
+  ctx.size = 0;
+
+  if (data.buffer_idx > -1) {
+    ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
+    // Note: ctx.size is currently not used in cmsis_nn.
+    // The buffer should be allocated in the Prepare function through
+    // arm_convolve_wrapper_s8_get_buffer_size
+  }
+
+  // arm_convolve_wrapper_s8 dispatches the optimized kernel accordingly with
+  // the parameters passed
+  TFLITE_DCHECK_EQ(
+      arm_convolve_wrapper_s8(
+          &ctx, &conv_params, &quant_params, &input_dims,
+          tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
+          tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
+          tflite::micro::GetOptionalTensorData<int32_t>(bias), &output_dims,
+          tflite::micro::GetTensorData<int8_t>(output)),
+      ARM_CMSIS_NN_SUCCESS);
+
   return kTfLiteOk;
 }
 
-TfLiteStatus EvalQuantizedPerChannel(
+TfLiteStatus EvalQuantizedPerChannel16x8(
     TfLiteContext* context, TfLiteNode* node, const TfLiteConvParams& params,
     const OpData& data, const TfLiteEvalTensor* input,
     const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
-    TfLiteEvalTensor* output, TfLiteEvalTensor* im2col) {
+    TfLiteEvalTensor* output) {
   cmsis_nn_conv_params conv_params;
   conv_params.dilation.h = params.dilation_height_factor;
   conv_params.dilation.w = params.dilation_width_factor;
-  // TODO(#43557) Remove checks for dilation and call to reference
-  // implementation when dilation is supported in the optimized implementation
-  // by CMSIS-NN.
-  if (conv_params.dilation.h == 1 && conv_params.dilation.w == 1) {
-    // Initialize cmsis_nn convolution parameters
-    conv_params.input_offset = -data.reference_op_data.input_zero_point;
-    conv_params.output_offset = data.reference_op_data.output_zero_point;
-    conv_params.stride.h = params.stride_height;
-    conv_params.stride.w = params.stride_width;
-    conv_params.padding.h = data.reference_op_data.padding.height;
-    conv_params.padding.w = data.reference_op_data.padding.width;
-    conv_params.activation.min = data.reference_op_data.output_activation_min;
-    conv_params.activation.max = data.reference_op_data.output_activation_max;
-
-    // Initialize cmsis_nn per channel quantization parameters
-    cmsis_nn_per_channel_quant_params quant_params;
-    quant_params.multiplier = const_cast<int32_t*>(
-        data.reference_op_data.per_channel_output_multiplier);
-    quant_params.shift =
-        const_cast<int32_t*>(data.reference_op_data.per_channel_output_shift);
 
-    RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
-    RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
-    RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
-    RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
+  // Initialize cmsis_nn convolution parameters
+  conv_params.input_offset = -data.reference_op_data.input_zero_point;
+  conv_params.output_offset = data.reference_op_data.output_zero_point;
+  conv_params.stride.h = params.stride_height;
+  conv_params.stride.w = params.stride_width;
+  conv_params.padding.h = data.reference_op_data.padding.height;
+  conv_params.padding.w = data.reference_op_data.padding.width;
+  conv_params.activation.min = data.reference_op_data.output_activation_min;
+  conv_params.activation.max = data.reference_op_data.output_activation_max;
+
+  // Initialize cmsis_nn per channel quantization parameters
+  cmsis_nn_per_channel_quant_params quant_params;
+  quant_params.multiplier = const_cast<int32_t*>(
+      data.reference_op_data.per_channel_output_multiplier);
+  quant_params.shift =
+      const_cast<int32_t*>(data.reference_op_data.per_channel_output_shift);
+
+  RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
+  RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
+  RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
+  RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
+
+  // Consistency check.
+  TFLITE_DCHECK_LE(conv_params.activation.min, conv_params.activation.max);
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+  const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  if (tflite::micro::GetOptionalTensorData<int8_t>(bias)) {
+    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
+  }
 
-    // Consistency check.
-    TFLITE_DCHECK_LE(conv_params.activation.min, conv_params.activation.max);
-    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
-    TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
-    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-    const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
-    const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
-    const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
-    if (tflite::micro::GetTensorData<int8_t>(bias)) {
-      TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-    }
+  // Initialize cmsis_nn dimensions
+  // Input
+  cmsis_nn_dims input_dims;
+  input_dims.n = batch_size;
+  input_dims.h = input_shape.Dims(1);
+  input_dims.w = input_shape.Dims(2);
+  input_dims.c = input_depth;
 
-    // Initialize cmsis_nn dimensions
-    // Input
-    cmsis_nn_dims input_dims;
-    input_dims.n = batch_size;
-    input_dims.h = input_shape.Dims(1);
-    input_dims.w = input_shape.Dims(2);
-    input_dims.c = input_depth;
-
-    // Filter
-    cmsis_nn_dims filter_dims;
-    filter_dims.n = output_depth;
-    filter_dims.h = filter_shape.Dims(1);
-    filter_dims.w = filter_shape.Dims(2);
-    filter_dims.c = input_depth;
-
-    // Bias
-    cmsis_nn_dims bias_dims;
-    bias_dims.n = 1;
-    bias_dims.h = 1;
-    bias_dims.w = 1;
-    bias_dims.c = output_depth;
-
-    // Output
-    cmsis_nn_dims output_dims;
-    output_dims.n = batch_size;
-    output_dims.h = output_shape.Dims(1);
-    output_dims.w = output_shape.Dims(2);
-    output_dims.c = output_depth;
-
-    // Initialize cmsis_nn context
-    cmsis_nn_context ctx;
-    ctx.buf = nullptr;
-    ctx.size = 0;
+  // Filter
+  cmsis_nn_dims filter_dims;
+  filter_dims.n = output_depth;
+  filter_dims.h = filter_shape.Dims(1);
+  filter_dims.w = filter_shape.Dims(2);
+  filter_dims.c = input_depth;
 
-    if (data.buffer_idx > -1) {
-      ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
-      // Note: ctx.size is currently not used in cmsis_nn.
-      // The buffer should be allocated in the Prepare function through
-      // arm_convolve_wrapper_s8_get_buffer_size
-    }
+  // Bias
+  cmsis_nn_dims bias_dims;
+  bias_dims.n = 1;
+  bias_dims.h = 1;
+  bias_dims.w = 1;
+  bias_dims.c = output_depth;
 
-    // arm_convolve_wrapper_s8 dispatches the optimized kernel accordingly with
-    // the parameters passed
-    TFLITE_DCHECK_EQ(
-        arm_convolve_wrapper_s8(
-            &ctx, &conv_params, &quant_params, &input_dims,
-            tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
-            tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
-            tflite::micro::GetTensorData<int32_t>(bias), &output_dims,
-            tflite::micro::GetTensorData<int8_t>(output)),
-        ARM_MATH_SUCCESS);
-  } else {
-    reference_integer_ops::ConvPerChannel(
-        ConvParamsQuantized(params, data.reference_op_data),
-        data.reference_op_data.per_channel_output_multiplier,
-        data.reference_op_data.per_channel_output_shift,
-        tflite::micro::GetTensorShape(input),
-        tflite::micro::GetTensorData<int8_t>(input),
-        tflite::micro::GetTensorShape(filter),
-        tflite::micro::GetTensorData<int8_t>(filter),
-        tflite::micro::GetTensorShape(bias),
-        tflite::micro::GetTensorData<int32_t>(bias),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<int8_t>(output));
+  // Output
+  cmsis_nn_dims output_dims;
+  output_dims.n = batch_size;
+  output_dims.h = output_shape.Dims(1);
+  output_dims.w = output_shape.Dims(2);
+  output_dims.c = output_depth;
+
+  // Initialize cmsis_nn context
+  cmsis_nn_context ctx;
+  ctx.buf = nullptr;
+  ctx.size = 0;
+
+  if (data.buffer_idx > -1) {
+    ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
+    // Note: ctx.size is currently not used in cmsis_nn.
+    // The buffer should be allocated in the Prepare function through
+    // arm_convolve_wrapper_s8_get_buffer_size
   }
+
+  TFLITE_DCHECK_EQ(
+      arm_convolve_wrapper_s16(
+          &ctx, &conv_params, &quant_params, &input_dims,
+          tflite::micro::GetTensorData<int16_t>(input), &filter_dims,
+          tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
+          tflite::micro::GetOptionalTensorData<int64_t>(bias), &output_dims,
+          tflite::micro::GetTensorData<int16_t>(output)),
+      ARM_CMSIS_NN_SUCCESS);
+
   return kTfLiteOk;
 }
 
+TfLiteStatus EvalInt8(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kConvInputTensor);
+  const TfLiteEvalTensor* filter =
+      tflite::micro::GetEvalInput(context, node, kConvWeightsTensor);
+  const TfLiteEvalTensor* bias =
+      (NumInputs(node) == 3)
+          ? tflite::micro::GetEvalInput(context, node, kConvBiasTensor)
+          : nullptr;
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kConvOutputTensor);
+
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  const auto& params =
+      *(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));
+  TfLiteEvalTensor filter_int8 = tflite::micro::MakeUnpackedInt4Tensor(
+      context, data.reference_op_data.filter_buffer_index, filter);
+
+  return EvalQuantizedPerChannel(context, node, params, data, input,
+                                 &filter_int8, bias, output);
+}
+
+TfLiteStatus EvalInt16x8(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kConvInputTensor);
+  const TfLiteEvalTensor* filter =
+      tflite::micro::GetEvalInput(context, node, kConvWeightsTensor);
+  const TfLiteEvalTensor* bias =
+      (NumInputs(node) == 3)
+          ? tflite::micro::GetEvalInput(context, node, kConvBiasTensor)
+          : nullptr;
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kConvOutputTensor);
+
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  const auto& params =
+      *(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));
+
+  return EvalQuantizedPerChannel16x8(context, node, params, data, input, filter,
+                                     bias, output);
+}
+
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteEvalTensor* input =
       tflite::micro::GetEvalInput(context, node, kConvInputTensor);
@@ -266,17 +418,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const OpData& data = *(static_cast<const OpData*>(node->user_data));
 
   TF_LITE_ENSURE_EQ(context, input->type, output->type);
-  TF_LITE_ENSURE_MSG(context, input->type == filter->type,
-                     "Hybrid models are not supported on TFLite Micro.");
+  TF_LITE_ENSURE_MSG(
+      context,
+      input->type == filter->type ||
+          (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8) ||
+          (input->type == kTfLiteInt8 && filter->type == kTfLiteInt4),
+      "Hybrid models are not supported on TFLite Micro.");
+
+  TfLiteEvalTensor filter_int8 = tflite::micro::MakeUnpackedInt4Tensor(
+      context, data.reference_op_data.filter_buffer_index, filter);
 
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32: {
-      #if EI_TFLITE_DISABLE_CONV_2D_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_CONV_2D_IN_F32
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       tflite::reference_ops::Conv(
           ConvParamsFloat(params, data.reference_op_data),
           tflite::micro::GetTensorShape(input),
@@ -284,25 +442,39 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           tflite::micro::GetTensorShape(filter),
           tflite::micro::GetTensorData<float>(filter),
           tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<float>(bias),
+          tflite::micro::GetOptionalTensorData<float>(bias),
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output),
           tflite::micro::GetTensorShape(nullptr), nullptr);
       break;
     }
     case kTfLiteInt8:
-      #if EI_TFLITE_DISABLE_CONV_2D_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_CONV_2D_IN_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
+#endif
+      switch (filter_int8.type) {
+        case kTfLiteInt8: {
+          return EvalQuantizedPerChannel(context, node, params, data, input,
+                                         &filter_int8, bias, output);
+        }
+
+        default: {
+          MicroPrintf("Filter type %s (%d) not supported.",
+                      TfLiteTypeGetName(filter->type), filter->type);
+          return kTfLiteError;
+        }
+      }
 
-      return EvalQuantizedPerChannel(context, node, params, data, input, filter,
-                                     bias, output, nullptr);
+      break;
+    case kTfLiteInt16:
+      return EvalQuantizedPerChannel16x8(context, node, params, data, input,
+                                         filter, bias, output);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -311,20 +483,21 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_CONV_2D() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
+}
+
+TfLiteRegistration Register_CONV_2D_INT8() {
+  return tflite::micro::RegisterOp(Init, Prepare, EvalInt8);
+}
+
+TfLiteRegistration Register_CONV_2D_INT16() {
+  return tflite::micro::RegisterOp(Init, Prepare, EvalInt16x8);
 }
 
 }  // namespace tflite
 
 #elif EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -350,11 +523,13 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_function_specializations.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -366,7 +541,11 @@ constexpr int kOutputTensor = 0;
 
 // Conv is quantized along dimension 0:
 // https://www.tensorflow.org/lite/performance/quantization_spec
+#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST)
+constexpr int kConvQuantizedDimension = 3;
+#else
 constexpr int kConvQuantizedDimension = 0;
+#endif
 
 // This file has 2 implementation of Conv.
 
@@ -386,6 +565,9 @@ struct OpData {
   // Per channel output multiplier and shift.
   int32_t* per_channel_output_multiplier;
   int32_t* per_channel_output_shift;
+#ifdef MLI_2_0
+  int8_t* per_channel_scale_frac_bits;
+#endif
 
   // The range of the fused activation layer. For example for kNone and
   // uint8_t these would be 0 and 255.
@@ -396,11 +578,14 @@ struct OpData {
   bool is_mli_applicable;
 
   // Tensors in MLI format.
-  mli_tensor* mli_in;
-  mli_tensor* mli_weights;
-  mli_tensor* mli_bias;
-  mli_tensor* mli_out;
+  mutable ops::micro::MliTensorInterface mli_in;
+  mutable ops::micro::MliTensorInterface mli_weights;
+  mutable ops::micro::MliTensorInterface mli_bias;
+  mutable ops::micro::MliTensorInterface mli_out;
   mli_conv2d_cfg* cfg;
+
+  // Pointer to the mli convolution function.
+  conv_func_ptr p_mli_krn_conv2d_sa8_sa8_sa32;
 };
 
 #if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
@@ -452,10 +637,15 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
   // Note that quantized inference requires that all tensors have their
   // parameters set. This is usually done during quantized training.
 #if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
-  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TfLiteTensor* filter =
+      micro_context->AllocateTempInputTensor(node, kFilterTensor);
+  TfLiteTensor* bias =
+      micro_context->AllocateTempInputTensor(node, kBiasTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
 
   if (data_type != kTfLiteFloat32 && !data->is_mli_applicable) {
     int output_channels = filter->dims->data[kConvQuantizedDimension];
@@ -465,9 +655,14 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
         &data->output_multiplier, &data->output_shift,
         &data->output_activation_min, &data->output_activation_max,
         data->per_channel_output_multiplier,
-        reinterpret_cast<int*>(data->per_channel_output_shift),
+        reinterpret_cast<int32_t*>(data->per_channel_output_shift),
         output_channels));
   }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
+  micro_context->DeallocateTempTfLiteTensor(bias);
+  micro_context->DeallocateTempTfLiteTensor(output);
 #endif
   return kTfLiteOk;
 }
@@ -483,15 +678,26 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   OpData* data = static_cast<OpData*>(node->user_data);
   const auto params = static_cast<const TfLiteConvParams*>(node->builtin_data);
 
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
-  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TfLiteTensor* filter =
+      micro_context->AllocateTempInputTensor(node, kFilterTensor);
+  TfLiteTensor* bias =
+      micro_context->AllocateTempInputTensor(node, kBiasTensor);
 
   int input_width = input->dims->data[2];
   int input_height = input->dims->data[1];
+#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST)
+  int filter_width = filter->dims->data[1];
+  int filter_height = filter->dims->data[0];
+#else
   int filter_width = filter->dims->data[2];
   int filter_height = filter->dims->data[1];
+#endif
   int output_width = output->dims->data[2];
   int output_height = output->dims->data[1];
 
@@ -535,34 +741,87 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   data->output_zero_point = output->params.zero_point;
 
   if (data->is_mli_applicable) {
-    data->mli_in = static_cast<mli_tensor*>(
-        context->AllocatePersistentBuffer(context, sizeof(mli_tensor)));
-    data->mli_weights = static_cast<mli_tensor*>(
-        context->AllocatePersistentBuffer(context, sizeof(mli_tensor)));
-    data->mli_bias = static_cast<mli_tensor*>(
-        context->AllocatePersistentBuffer(context, sizeof(mli_tensor)));
-    data->mli_out = static_cast<mli_tensor*>(
-        context->AllocatePersistentBuffer(context, sizeof(mli_tensor)));
+    data->mli_in = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+    data->mli_weights = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+    data->mli_bias = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+    data->mli_out = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
     data->cfg = static_cast<mli_conv2d_cfg*>(
         context->AllocatePersistentBuffer(context, sizeof(mli_conv2d_cfg)));
 
-    // reuse space allocated for OpData parameters
-    data->mli_weights->el_params.asym.scale.pi32 =
+#ifdef MLI_2_0
+    data->per_channel_scale_frac_bits =
+        static_cast<int8_t*>(context->AllocatePersistentBuffer(
+            context, 2 * num_channels * sizeof(int16_t)));
+#endif
+
+    // Reuse space allocated for OpData parameters.
+#ifdef MLI_2_0
+    *data->mli_weights.Scale<int16_t**>() =
+        reinterpret_cast<int16_t*>(data->per_channel_output_multiplier);
+    *data->mli_bias.Scale<int16_t**>() =
+        reinterpret_cast<int16_t*>(data->per_channel_output_multiplier) +
+        num_channels;
+#else
+    *data->mli_weights.Scale<int32_t**>() =
         static_cast<int32_t*>(data->per_channel_output_multiplier);
-    data->mli_bias->el_params.asym.scale.pi32 =
+    *data->mli_bias.Scale<int32_t**>() =
         static_cast<int32_t*>(data->per_channel_output_shift);
+#endif
 
-    data->mli_weights->el_params.asym.zero_point.pi16 =
+#ifdef MLI_2_0
+    *data->mli_weights.ZeroPoint<int16_t**>() =
+        reinterpret_cast<int16_t*>(data->per_channel_output_shift);
+    *data->mli_bias.ZeroPoint<int16_t**>() =
+        reinterpret_cast<int16_t*>(data->per_channel_output_shift) +
+        num_channels;
+#else
+    *data->mli_weights.ZeroPoint<int16_t**>() =
         reinterpret_cast<int16_t*>(&data->filter_zero_point);
-    data->mli_bias->el_params.asym.zero_point.pi16 =
+    *data->mli_bias.ZeroPoint<int16_t**>() =
         reinterpret_cast<int16_t*>(&data->filter_zero_point) + sizeof(int16_t);
+#endif
 
-    ops::micro::ConvertToMliTensor(input, data->mli_in);
-    ops::micro::ConvertToMliTensorPerChannel(filter, data->mli_weights);
-    ops::micro::ConvertToMliTensorPerChannel(bias, data->mli_bias);
-    ops::micro::ConvertToMliTensor(output, data->mli_out);
+#ifdef MLI_2_0
+    *data->mli_weights.ScaleFracBits<int8_t**>() =
+        reinterpret_cast<int8_t*>(data->per_channel_scale_frac_bits);
+    *data->mli_bias.ScaleFracBits<int8_t**>() =
+        reinterpret_cast<int8_t*>(data->per_channel_scale_frac_bits) +
+        num_channels;
+#endif
+
+    ops::micro::ConvertToMliTensor(input, &data->mli_in);
+    ops::micro::ConvertToMliTensorPerChannel(filter, &data->mli_weights,
+                                             /* is_bias_tensor = */ false);
+    ops::micro::ConvertToMliTensorPerChannel(bias, &data->mli_bias,
+                                             /* is_bias_tensor = */ true);
+#ifdef MLI_2_0
+    ops::micro::AdjustBiasTensor(&data->mli_bias, &data->mli_in,
+                                 &data->mli_weights);
+#endif
+    ops::micro::ConvertToMliTensor(output, &data->mli_out);
+
+#ifdef MLI_2_0
+    // Choose convolution mli specialized function.
+    data->p_mli_krn_conv2d_sa8_sa8_sa32 =
+        mli_krn_conv2d_hwcn(data->mli_weights.MliTensor());
+#else
+    data->p_mli_krn_conv2d_sa8_sa8_sa32 =
+        mli_krn_conv2d_hwcn(data->mli_weights.MliTensor(), data->cfg);
+#endif
 
-    if (params->activation == kTfLiteActRelu) {
+#ifdef MLI_2_0
+    data->cfg->dilation_width = 1;
+    data->cfg->dilation_height = 1;
+#endif
+
+    if (data->output_activation_min == -128 &&
+        data->output_activation_max == 127) {
+      data->cfg->relu.type = MLI_RELU_NONE;
+    } else if (params->activation == kTfLiteActRelu) {
       data->cfg->relu.type = MLI_RELU_GEN;
     } else if (params->activation == kTfLiteActRelu6) {
       data->cfg->relu.type = MLI_RELU_6;
@@ -587,50 +846,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
           data->padding.height + data->padding.height_offset;
     }
   }
-  return kTfLiteOk;
-}
 
-void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                   TfLiteConvParams* params, const OpData& data,
-                   const TfLiteEvalTensor* input,
-                   const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
-                   TfLiteEvalTensor* im2col, TfLiteEvalTensor* hwcn_weights,
-                   TfLiteEvalTensor* output) {
-#if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
-  const int32_t input_offset = -data.input_zero_point;
-  const int32_t filter_offset = -data.filter_zero_point;
-  const int32_t output_offset = data.output_zero_point;
-
-  ConvParams op_params;
-  op_params.padding_type = RuntimePaddingType(params->padding);
-  op_params.padding_values.width = data.padding.width;
-  op_params.padding_values.height = data.padding.height;
-  op_params.stride_width = params->stride_width;
-  op_params.stride_height = params->stride_height;
-  op_params.dilation_width_factor = params->dilation_width_factor;
-  op_params.dilation_height_factor = params->dilation_height_factor;
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
-  op_params.output_multiplier = data.output_multiplier;
-  op_params.output_shift = -data.output_shift;
-  op_params.quantized_activation_min = data.output_activation_min;
-  op_params.quantized_activation_max = data.output_activation_max;
-  reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input),
-                      tflite::micro::GetTensorData<uint8_t>(input),
-                      tflite::micro::GetTensorShape(filter),
-                      tflite::micro::GetTensorData<uint8_t>(filter),
-                      tflite::micro::GetTensorShape(bias),
-                      tflite::micro::GetTensorData<int32_t>(bias),
-                      tflite::micro::GetTensorShape(output),
-                      tflite::micro::GetTensorData<uint8_t>(output),
-                      tflite::micro::GetTensorShape(im2col),
-                      tflite::micro::GetTensorData<uint8_t>(im2col), nullptr);
-#else
-  TF_LITE_KERNEL_LOG(context,
-                     "Type %s (%d) is not supported by ARC MLI Library.",
-                     TfLiteTypeGetName(input->type), input->type);
-#endif
+  micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
+  micro_context->DeallocateTempTfLiteTensor(bias);
+  return kTfLiteOk;
 }
 
 TfLiteStatus EvalMliQuantizedPerChannel(
@@ -644,70 +865,104 @@ TfLiteStatus EvalMliQuantizedPerChannel(
     // Copy configuration data from external to local memory
     mli_conv2d_cfg cfg_local = *data.cfg;
 
-    ops::micro::MliTensorAttachBuffer<int8_t>(input, data.mli_in);
-    ops::micro::MliTensorAttachBuffer<int8_t>(filter, data.mli_weights);
-    ops::micro::MliTensorAttachBuffer<int32_t>(bias, data.mli_bias);
-    ops::micro::MliTensorAttachBuffer<int8_t>(output, data.mli_out);
+    ops::micro::MliTensorAttachBuffer<int8_t>(input, &data.mli_in);
+    ops::micro::MliTensorAttachBuffer<int8_t>(filter, &data.mli_weights);
+    ops::micro::MliTensorAttachBuffer<int32_t>(bias, &data.mli_bias);
+    ops::micro::MliTensorAttachBuffer<int8_t>(output, &data.mli_out);
 
     // for height slicing
     const int height_dimension = 1;
     int in_slice_height = 0;
     int out_slice_height = 0;
     const int kernel_height =
-        static_cast<int>(data.mli_weights->shape[KRNL_H_DIM_HWC]);
+        static_cast<int>(data.mli_weights.Shape()[KRNL_H_DIM_HWC]);
     const int overlap = kernel_height - cfg_local.stride_height;
 
-    // for weight slicing (on output channels)
+// for weight slicing (on output channels)
+#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST)
+    // HWCN layout for weights, output channel dimension is the first dimension.
+    const int weight_out_ch_dimension = 3;
+#else
     // NHWC layout for weights, output channel dimension is the first dimension.
     const int weight_out_ch_dimension = 0;
+#endif
+    // bias has only 1 dimension
+    const int bias_out_ch_dimension = 0;
     int slice_channels =
-        static_cast<int>(data.mli_weights->shape[weight_out_ch_dimension]);
+        static_cast<int>(data.mli_weights.Shape()[weight_out_ch_dimension]);
     // Batch-Height-Width-Channel layout means last dimension is output
     // channels.
     const int out_tensor_ch_dimension = 3;
 
     // Tensors for data in fast (local) memory and config to copy data from
     // external to local memory
-    mli_tensor weights_local = *data.mli_weights;
-    mli_tensor bias_local = *data.mli_bias;
-    mli_tensor in_local = *data.mli_in;
-    mli_tensor out_local = *data.mli_out;
+    mli_tensor weights_local = *data.mli_weights.MliTensor();
+    mli_tensor bias_local = *data.mli_bias.MliTensor();
+    mli_tensor in_local = *data.mli_in.MliTensor();
+    mli_tensor out_local = *data.mli_out.MliTensor();
+
+    ops::micro::MliTensorInterface weights_local_interface(&weights_local);
+    ops::micro::MliTensorInterface bias_local_interface(&bias_local);
+    ops::micro::MliTensorInterface in_local_interface(&in_local);
+    ops::micro::MliTensorInterface out_local_interface(&out_local);
+
     mli_mov_cfg_t copy_config;
     mli_mov_cfg_for_copy(&copy_config);
+
     TF_LITE_ENSURE_STATUS(ops::micro::get_arc_scratch_buffer_for_conv_tensors(
-        context, &in_local, &weights_local, &bias_local, &out_local));
+        context, &in_local_interface, &weights_local_interface,
+        &bias_local_interface, &out_local_interface));
     TF_LITE_ENSURE_STATUS(ops::micro::arc_scratch_buffer_calc_slice_size_io(
-        &in_local, &out_local, kernel_height, cfg_local.stride_height,
-        cfg_local.padding_top, cfg_local.padding_bottom, &in_slice_height,
-        &out_slice_height));
+        &in_local_interface, &out_local_interface, kernel_height,
+        cfg_local.stride_height, cfg_local.padding_top,
+        cfg_local.padding_bottom, &in_slice_height, &out_slice_height));
     TF_LITE_ENSURE_STATUS(
         ops::micro::arc_scratch_buffer_calc_slice_size_weights(
-            &weights_local, &bias_local, weight_out_ch_dimension,
-            &slice_channels));
+            &weights_local_interface, &bias_local_interface,
+            weight_out_ch_dimension, &slice_channels));
 
     /* is_local indicates that the tensor is already in local memory,
        so in that case the original tensor can be used,
        and there is no need to copy it to the local tensor*/
-    const bool in_is_local = in_local.data == data.mli_in->data;
-    const bool out_is_local = out_local.data == data.mli_out->data;
-    const bool w_is_local = weights_local.data == data.mli_weights->data;
-    const bool b_is_local = bias_local.data == data.mli_bias->data;
-
-    ops::micro::TensorSlicer w_slice(data.mli_weights, weight_out_ch_dimension,
-                                     slice_channels);
-    ops::micro::TensorSlicer b_slice(data.mli_bias, weight_out_ch_dimension,
-                                     slice_channels);
-    ops::micro::TensorSlicer out_ch_slice(data.mli_out, out_tensor_ch_dimension,
+    const bool in_is_local =
+        in_local_interface.Data<int8_t>() == data.mli_in.Data<int8_t>();
+    const bool out_is_local =
+        out_local_interface.Data<int8_t>() == data.mli_out.Data<int8_t>();
+    const bool b_is_local =
+        bias_local_interface.Data<int32_t>() == data.mli_bias.Data<int32_t>();
+#ifndef MLI_2_0_KRNL_TEST
+    const bool w_is_local = weights_local_interface.Data<int8_t>() ==
+                            data.mli_weights.Data<int8_t>();
+#endif
+
+#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST)
+    ops::micro::TensorSlicer w_slice(data.mli_weights.MliTensor(),
+                                     weight_out_ch_dimension, slice_channels, 0,
+                                     0, 0, true);
+#else
+    ops::micro::TensorSlicer w_slice(data.mli_weights.MliTensor(),
+                                     weight_out_ch_dimension, slice_channels);
+#endif
+    ops::micro::TensorSlicer b_slice(data.mli_bias.MliTensor(),
+                                     bias_out_ch_dimension, slice_channels);
+    ops::micro::TensorSlicer out_ch_slice(data.mli_out.MliTensor(),
+                                          out_tensor_ch_dimension,
                                           slice_channels, 0, 0, 0, true);
 
+#ifdef MLI_2_0_KRNL_TEST
+    mli_tensor* w_ptr = &weights_local;
+#else
     mli_tensor* w_ptr = w_is_local ? w_slice.Sub() : &weights_local;
+#endif
     mli_tensor* b_ptr = b_is_local ? b_slice.Sub() : &bias_local;
 
     void* input_buffer_ptr = NULL;
     uint32_t input_buffer_size = 0;
 
     while (!w_slice.Done()) {
+#ifndef MLI_2_0_KRNL_TEST
       mli_mov_tensor_sync(w_slice.Sub(), &copy_config, w_ptr);
+#endif
       mli_mov_tensor_sync(b_slice.Sub(), &copy_config, b_ptr);
 
       /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional
@@ -717,9 +972,9 @@ TfLiteStatus EvalMliQuantizedPerChannel(
       dimension. for that the sliceHeight has been calculated. The tensor slicer
       is configured that it will completely slice the nBatch dimension (0) and
       slice the height dimension (1) in chunks of 'sliceHeight' */
-      ops::micro::TensorSlicer in_slice(data.mli_in, height_dimension,
-                                        in_slice_height, cfg_local.padding_top,
-                                        cfg_local.padding_bottom, overlap);
+      ops::micro::TensorSlicer in_slice(
+          data.mli_in.MliTensor(), height_dimension, in_slice_height,
+          cfg_local.padding_top, cfg_local.padding_bottom, overlap);
 
       /* output tensor is already sliced in the output channel dimension.
       out_ch_slice.Sub() is the tensor for the amount of output channels of this
@@ -733,20 +988,52 @@ TfLiteStatus EvalMliQuantizedPerChannel(
       mli_tensor* in_ptr = in_is_local ? in_slice.Sub() : &in_local;
       mli_tensor* out_ptr = out_is_local ? out_slice.Sub() : &out_local;
 
+#ifdef MLI_2_0_KRNL_TEST
+      /* Permute weights tensor to the HWCN layout */
+      // Checking conditions here to prevent usage non-contiguous buffer memory.
+      if (data.mli_out.Shape()[out_tensor_ch_dimension] !=
+              out_slice.Sub()->shape[FMAP_C_DIM_HWC] ||
+          data.mli_out.Shape()[height_dimension] !=
+              out_slice.Sub()->shape[FMAP_H_DIM_HWC]) {
+        MicroPrintf("Slicing is not supported with real-time permutation.");
+        return kTfLiteError;
+      }
+      mli_permute_cfg permute_cfg = {{1, 2, 3, 0}};
+      ops::micro::permute_weights(data.mli_weights.MliTensor(), &permute_cfg,
+                                  w_ptr, &out_ptr->data);
+#endif
+
       while (!out_slice.Done()) {
+        if (!out_is_local) {
+          ops::micro::PrepareLocalTensor(out_slice.Sub(), &out_local);
+          ops::micro::PrepareLocalTensor(in_slice.Sub(), &in_local);
+        }
+
         TF_LITE_ENSURE(context, !in_slice.Done());
         cfg_local.padding_top = in_slice.GetPaddingPre();
         cfg_local.padding_bottom = in_slice.GetPaddingPost();
 
         // if same input copy as previous iteration, skip the copy of input
+#ifdef MLI_2_0
+        if ((in_slice.Sub()->data.mem.pi8 != input_buffer_ptr) ||
+            (mli_hlp_count_elem_num(in_slice.Sub(), 0) != input_buffer_size)) {
+          mli_mov_tensor_sync(in_slice.Sub(), &copy_config, in_ptr);
+          input_buffer_ptr = in_slice.Sub()->data.mem.pi8;
+          input_buffer_size = mli_hlp_count_elem_num(in_slice.Sub(), 0);
+        }
+
+        data.p_mli_krn_conv2d_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, &cfg_local,
+                                           out_ptr);
+#else
         if ((in_slice.Sub()->data != input_buffer_ptr) ||
             (mli_hlp_count_elem_num(in_slice.Sub(), 0) != input_buffer_size)) {
           mli_mov_tensor_sync(in_slice.Sub(), &copy_config, in_ptr);
           input_buffer_ptr = in_slice.Sub()->data;
           input_buffer_size = mli_hlp_count_elem_num(in_slice.Sub(), 0);
         }
-        mli_krn_conv2d_nhwc_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, &cfg_local,
-                                         out_ptr);
+        data.p_mli_krn_conv2d_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, &cfg_local,
+                                           out_ptr);
+#endif
         mli_mov_tensor_sync(out_ptr, &copy_config, out_slice.Sub());
 
         in_slice.Next();
@@ -792,8 +1079,41 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
       tflite::micro::GetTensorShape(output),
       tflite::micro::GetTensorData<int8_t>(output));
 #else
-  TF_LITE_KERNEL_LOG(context,
-                     "Node configuration is not supported by ARC MLI Library.");
+  MicroPrintf("Node configuration is not supported by ARC MLI Library.");
+#endif
+}
+
+void EvalQuantizedPerChannelInt16(TfLiteContext* context, TfLiteNode* node,
+                                  TfLiteConvParams* params, const OpData& data,
+                                  const TfLiteEvalTensor* input,
+                                  const TfLiteEvalTensor* filter,
+                                  const TfLiteEvalTensor* bias,
+                                  TfLiteEvalTensor* output) {
+#if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
+  ConvParams op_params;
+  op_params.input_offset = -data.input_zero_point;
+  op_params.output_offset = data.output_zero_point;
+  op_params.stride_height = params->stride_height;
+  op_params.stride_width = params->stride_width;
+  op_params.dilation_height_factor = params->dilation_height_factor;
+  op_params.dilation_width_factor = params->dilation_width_factor;
+  op_params.padding_values.height = data.padding.height;
+  op_params.padding_values.width = data.padding.width;
+  op_params.quantized_activation_min = data.output_activation_min;
+  op_params.quantized_activation_max = data.output_activation_max;
+
+  reference_integer_ops::ConvPerChannel(
+      op_params, data.per_channel_output_multiplier,
+      data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
+      tflite::micro::GetTensorData<int16_t>(input),
+      tflite::micro::GetTensorShape(filter),
+      tflite::micro::GetTensorData<int8_t>(filter),
+      tflite::micro::GetTensorShape(bias),
+      tflite::micro::GetTensorData<std::int64_t>(bias),
+      tflite::micro::GetTensorShape(output),
+      tflite::micro::GetTensorData<int16_t>(output));
+#else
+  MicroPrintf("Node configuration is not supported by ARC MLI Library.");
 #endif
 }
 
@@ -828,9 +1148,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
                       tflite::micro::GetTensorShape(im2col),
                       tflite::micro::GetTensorData<float>(im2col));
 #else
-  TF_LITE_KERNEL_LOG(context,
-                     "Type %s (%d) is not supported by ARC MLI Library.",
-                     TfLiteTypeGetName(input->type), input->type);
+  MicroPrintf("Type %s (%d) is not supported by ARC MLI Library.",
+              TfLiteTypeGetName(input->type), input->type);
 #endif
 }
 
@@ -849,24 +1168,29 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
   const OpData& data = *(static_cast<const OpData*>(node->user_data));
 
+  TF_LITE_ENSURE_EQ(context, input->type, output->type);
+  TF_LITE_ENSURE_MSG(
+      context,
+      input->type == filter->type ||
+          (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
+      "Hybrid models are not supported on TFLite Micro.");
+
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32:
-      #if EI_TFLITE_DISABLE_CONV_2D_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_CONV_2D_IN_F32
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       EvalFloat(context, node, params, data, input, filter, bias, nullptr,
                 nullptr, output);
       break;
     case kTfLiteInt8:
-      #if EI_TFLITE_DISABLE_CONV_2D_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_CONV_2D_IN_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       if (data.is_mli_applicable) {
         EvalMliQuantizedPerChannel(context, node, params, data, input, filter,
                                    bias, output);
@@ -875,19 +1199,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                                 bias, output, nullptr);
       }
       break;
-    case kTfLiteUInt8:
-      #if EI_TFLITE_DISABLE_CONV_2D_IN_U8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
-
-      EvalQuantized(context, node, params, data, input, filter, bias, nullptr,
-                    nullptr, output);
+    case kTfLiteInt16:
+      EvalQuantizedPerChannelInt16(context, node, params, data, input, filter,
+                                   bias, output);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -896,14 +1214,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_CONV_2D() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
 }  // namespace tflite
@@ -1094,7 +1405,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node)
         reinterpret_cast<int32_t*>(&data->op_params.output_activation_min),
         reinterpret_cast<int32_t*>(&data->op_params.output_activation_max),
         data->per_channel_output_multiplier,
-        reinterpret_cast<int*>(data->per_channel_output_shift),
+        reinterpret_cast<int32_t*>(data->per_channel_output_shift),
         num_channels));
 
       if (data->op_params.dilation_height == 1 && data->op_params.dilation_width == 1) {
@@ -1374,6 +1685,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h"
+
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
@@ -1383,12 +1696,15 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
-#include "freertos/FreeRTOS.h"
 #include <esp_timer.h>
+
+#if ESP_NN
 #include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h"
+#endif
+
 
 long long conv_total_time = 0;
 
@@ -1397,7 +1713,9 @@ namespace {
 
 struct NodeData {
   OpDataConv op_data;
+#if ESP_NN
   int buffer_idx;
+#endif
 };
 
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
@@ -1413,12 +1731,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const auto& params =
       *(static_cast<const TfLiteConvParams*>(node->builtin_data));
 
-  TfLiteTensor* output = GetOutput(context, node, kConvOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-  const TfLiteTensor* input = GetInput(context, node, kConvInputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kConvInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
+  TfLiteTensor* filter =
+      micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
   TF_LITE_ENSURE(context, filter != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   const int input_width = input->dims->data[2];
   const int input_height = input->dims->data[1];
@@ -1459,8 +1782,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       context, node, params, input_width, input_height, filter_width,
       filter_height, output_width, output_height, input->type, &data->op_data));
 
+#if ESP_NN
   if (input->type == kTfLiteInt8) {
-
     data_dims_t input_dims =  {
                                 .width = input_width, .height = input_height,
                                 .channels = input->dims->data[3], 1
@@ -1486,14 +1809,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       data->buffer_idx = -1;
     }
   }
+#endif
 
-  //micro_context->DeallocateTempTfLiteTensor(output);
-  //micro_context->DeallocateTempTfLiteTensor(input);
-  //micro_context->DeallocateTempTfLiteTensor(filter);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
 
   return kTfLiteOk;
 }
 
+#if ESP_NN
 // Fixed-point per-channel-quantization convolution Int8 function wrapper.
 inline void EvalQuantizedPerChannel(
     TfLiteContext* context, TfLiteNode* node, const TfLiteConvParams& params,
@@ -1596,6 +1921,7 @@ inline void EvalQuantizedPerChannel(
         tflite::micro::GetTensorData<int8_t>(output));
   }
 }
+#endif
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteEvalTensor* input =
@@ -1622,11 +1948,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   long long start_time = esp_timer_get_time();
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32: {
-      #if EI_TFLITE_DISABLE_CONV_2D_IN_F32
+#if EI_TFLITE_DISABLE_CONV_2D_IN_F32
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
+#endif
       tflite::reference_ops::Conv(
           ConvParamsFloat(params, data.op_data),
           tflite::micro::GetTensorShape(input),
@@ -1641,21 +1967,36 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       break;
     }
     case kTfLiteInt8: {
-      #if EI_TFLITE_DISABLE_CONV_2D_IN_I8
+#if EI_TFLITE_DISABLE_CONV_2D_IN_I8
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
+#endif
+#if ESP_NN
       EvalQuantizedPerChannel(context, node, params, data, input, filter,
                               bias, output);
+#else
+      reference_integer_ops::ConvPerChannel(
+          ConvParamsQuantized(params, data.op_data),
+          data.op_data.per_channel_output_multiplier,
+          data.op_data.per_channel_output_shift,
+          tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<int8_t>(input),
+          tflite::micro::GetTensorShape(filter),
+          tflite::micro::GetTensorData<int8_t>(filter),
+          tflite::micro::GetTensorShape(bias),
+          tflite::micro::GetTensorData<int32_t>(bias),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int8_t>(output));
+#endif
       break;
     }
     case kTfLiteUInt8: {
-      #if EI_TFLITE_DISABLE_CONV_2D_IN_U8
+#if EI_TFLITE_DISABLE_CONV_2D_IN_U8
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
+#endif
       //EvalQuantized
       reference_ops::Conv(ConvParamsQuantized(params, data.op_data),
                           tflite::micro::GetTensorShape(input),
@@ -1670,34 +2011,25 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                           nullptr);
       break;
     }
-
     default:
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                          TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
   }
-  
   long long time_this_instance = esp_timer_get_time() - start_time;
   conv_total_time += time_this_instance;
   //printf("time this instance: %llu\n", time_this_instance / 1000);
-
   return kTfLiteOk;
 }
 
 }  // namespace
 
 TfLiteRegistration Register_CONV_2D() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
 }  // namespace tflite
+
 #else
 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 
@@ -1718,14 +2050,12 @@ limitations under the License.
 
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/conv.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -1754,51 +2084,119 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const auto& data = *(static_cast<const OpDataConv*>(node->user_data));
 
   TF_LITE_ENSURE_EQ(context, input->type, output->type);
-  TF_LITE_ENSURE_MSG(context, input->type == filter->type,
-                     "Hybrid models are not supported on TFLite Micro.");
+  TF_LITE_ENSURE_MSG(
+      context,
+      input->type == filter->type ||
+          (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8) ||
+          (input->type == kTfLiteInt8 && filter->type == kTfLiteInt4),
+      "Hybrid models are not supported on TFLite Micro.");
 
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32: {
-      #if EI_TFLITE_DISABLE_CONV_2D_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_CONV_2D_IN_F32
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       tflite::reference_ops::Conv(
           ConvParamsFloat(params, data), tflite::micro::GetTensorShape(input),
           tflite::micro::GetTensorData<float>(input),
           tflite::micro::GetTensorShape(filter),
           tflite::micro::GetTensorData<float>(filter),
           tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<float>(bias),
+          tflite::micro::GetOptionalTensorData<float>(bias),
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output),
           tflite::micro::GetTensorShape(nullptr), nullptr);
       break;
     }
+    case kTfLiteInt16: {
+      switch (bias->type) {
+        case kTfLiteInt32: {
+          reference_integer_ops::ConvPerChannel(
+              ConvParamsQuantized(params, data),
+              data.per_channel_output_multiplier, data.per_channel_output_shift,
+              tflite::micro::GetTensorShape(input),
+              tflite::micro::GetTensorData<int16_t>(input),
+              tflite::micro::GetTensorShape(filter),
+              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetOptionalTensorData<std::int32_t>(bias),
+              tflite::micro::GetTensorShape(output),
+              tflite::micro::GetTensorData<int16_t>(output));
+          break;
+        }
+        case kTfLiteInt64: {
+          reference_integer_ops::ConvPerChannel(
+              ConvParamsQuantized(params, data),
+              data.per_channel_output_multiplier, data.per_channel_output_shift,
+              tflite::micro::GetTensorShape(input),
+              tflite::micro::GetTensorData<int16_t>(input),
+              tflite::micro::GetTensorShape(filter),
+              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetOptionalTensorData<std::int64_t>(bias),
+              tflite::micro::GetTensorShape(output),
+              tflite::micro::GetTensorData<int16_t>(output));
+          break;
+        }
+        default:
+          MicroPrintf("Bias type %s (%d) not supported.",
+                      TfLiteTypeGetName(bias->type), bias->type);
+          return kTfLiteError;
+      }
+      break;
+    }
     case kTfLiteInt8: {
-      #if EI_TFLITE_DISABLE_CONV_2D_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_CONV_2D_IN_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
-      reference_integer_ops::ConvPerChannel(
-          ConvParamsQuantized(params, data), data.per_channel_output_multiplier,
-          data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
-          tflite::micro::GetTensorData<int8_t>(input),
-          tflite::micro::GetTensorShape(filter),
-          tflite::micro::GetTensorData<int8_t>(filter),
-          tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<int32_t>(bias),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<int8_t>(output));
+#endif
+      switch (filter->type) {
+        case kTfLiteInt4: {
+          int8_t* unpacked_filter_data = static_cast<int8_t*>(
+              context->GetScratchBuffer(context, data.filter_buffer_index));
+          tflite::tensor_utils::UnpackDenseInt4IntoInt8(
+              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(filter).FlatSize(),
+              unpacked_filter_data);
+          reference_integer_ops::ConvPerChannel(
+              ConvParamsQuantized(params, data),
+              data.per_channel_output_multiplier, data.per_channel_output_shift,
+              tflite::micro::GetTensorShape(input),
+              tflite::micro::GetTensorData<int8_t>(input),
+              tflite::micro::GetTensorShape(filter), unpacked_filter_data,
+              tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetOptionalTensorData<int32_t>(bias),
+              tflite::micro::GetTensorShape(output),
+              tflite::micro::GetTensorData<int8_t>(output));
+          break;
+        }
+        case kTfLiteInt8: {
+          reference_integer_ops::ConvPerChannel(
+              ConvParamsQuantized(params, data),
+              data.per_channel_output_multiplier, data.per_channel_output_shift,
+              tflite::micro::GetTensorShape(input),
+              tflite::micro::GetTensorData<int8_t>(input),
+              tflite::micro::GetTensorShape(filter),
+              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetOptionalTensorData<int32_t>(bias),
+              tflite::micro::GetTensorShape(output),
+              tflite::micro::GetTensorData<int8_t>(output));
+          break;
+        }
+        default:
+          MicroPrintf("Weight type %s (%d) not supported.",
+                      TfLiteTypeGetName(filter->type), filter->type);
+          return kTfLiteError;
+      }
       break;
     }
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -1807,14 +2205,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_CONV_2D() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/ConvPrepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, ConvPrepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h
index 3399526..2a4b63d 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -45,6 +45,10 @@ struct OpDataConv {
   // uint8_t these would be 0 and 255.
   int32_t output_activation_min;
   int32_t output_activation_max;
+
+  // A buffer used to store unpacked filter values. This is used if the source
+  // tensor is of n-bit precision that cannot be easily processed by kernels.
+  int filter_buffer_index;
 };
 
 extern const int kConvInputTensor;
@@ -72,6 +76,41 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
 
 TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node);
 
+// This is the most generic TfLiteRegistration. The actual supported types may
+// still be target dependent. The only requirement is that every implementation
+// (reference or optimized) must define this function.
+TfLiteRegistration Register_CONV_2D();
+
+#if defined(XTENSA)
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int8 activations and int8 weights and always calls the reference
+// implementation.
+TfLiteRegistration Register_CONV_2D_INT8REF();
+#else
+inline TfLiteRegistration Register_CONV_2D_INT8REF() {
+  return Register_CONV_2D();
+}
+#endif
+
+#if defined(CMSIS_NN)
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int8 activations and int8 weights and uses the latency optimized
+// implementations.
+TfLiteRegistration Register_CONV_2D_INT8();
+
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int16 activations and int8 weights and uses the latency optimized
+// implementations.
+TfLiteRegistration Register_CONV_2D_INT16();
+
+#else
+inline TfLiteRegistration Register_CONV_2D_INT8() { return Register_CONV_2D(); }
+
+inline TfLiteRegistration Register_CONV_2D_INT16() {
+  return Register_CONV_2D();
+}
+#endif
+
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv_common.cc
index 8a21348..fe23085 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv_common.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv_common.cc
@@ -14,12 +14,8 @@ limitations under the License.
 ==============================================================================*/
 
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/conv.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h"
@@ -93,13 +89,18 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
       params.dilation_width_factor, height, width, filter_height, filter_width,
       padding, &out_height, &out_width);
 
-  const TfLiteTensor* input = GetInput(context, node, kConvInputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kConvInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
+  TfLiteTensor* filter =
+      micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
   TF_LITE_ENSURE(context, filter != nullptr);
-  const TfLiteTensor* bias =
-      GetOptionalInputTensor(context, node, kConvBiasTensor);
-  TfLiteTensor* output = GetOutput(context, node, kConvOutputTensor);
+  TfLiteTensor* bias =
+      micro_context->AllocateTempInputTensor(node, kConvBiasTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
 
   // Note that quantized inference requires that all tensors have their
@@ -111,8 +112,7 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
         context, input, filter, bias, output, params.activation,
         &data->output_multiplier, &data->output_shift,
         &data->output_activation_min, &data->output_activation_max,
-        data->per_channel_output_multiplier,
-        reinterpret_cast<int*>(data->per_channel_output_shift),
+        data->per_channel_output_multiplier, data->per_channel_output_shift,
         output_channels));
   }
 
@@ -120,6 +120,11 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
   data->filter_zero_point = filter->params.zero_point;
   data->output_zero_point = output->params.zero_point;
 
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(bias);
+
   return kTfLiteOk;
 }
 
@@ -130,12 +135,16 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
   OpDataConv* data = static_cast<OpDataConv*>(node->user_data);
   const auto& params =
       *(static_cast<const TfLiteConvParams*>(node->builtin_data));
+  MicroContext* micro_context = GetMicroContext(context);
 
-  TfLiteTensor* output = GetOutput(context, node, kConvOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
-  const TfLiteTensor* input = GetInput(context, node, kConvInputTensor);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kConvInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
+  TfLiteTensor* filter =
+      micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
   TF_LITE_ENSURE(context, filter != nullptr);
 
   const int input_width = input->dims->data[2];
@@ -146,16 +155,18 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
   const int output_height = output->dims->data[1];
 
   // Dynamically allocate per-channel quantization parameters.
-  const int num_channels = filter->dims->data[kConvQuantizedDimension];
-  data->per_channel_output_multiplier =
-      static_cast<int32_t*>(context->AllocatePersistentBuffer(
-          context, num_channels * sizeof(int32_t)));
-  data->per_channel_output_shift =
-      static_cast<int32_t*>(context->AllocatePersistentBuffer(
-          context, num_channels * sizeof(int32_t)));
+  if (input->type != kTfLiteFloat32) {
+    const int num_channels = filter->dims->data[kConvQuantizedDimension];
+    data->per_channel_output_multiplier =
+        static_cast<int32_t*>(context->AllocatePersistentBuffer(
+            context, num_channels * sizeof(int32_t)));
+    data->per_channel_output_shift =
+        static_cast<int32_t*>(context->AllocatePersistentBuffer(
+            context, num_channels * sizeof(int32_t)));
+  }
 
   // All per-channel quantized tensors need valid zero point and scale arrays.
-  if (input->type == kTfLiteInt8) {
+  if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
     TF_LITE_ENSURE_EQ(context, filter->quantization.type,
                       kTfLiteAffineQuantization);
 
@@ -169,14 +180,25 @@ TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node) {
                    affine_quantization->scale->size == 1 ||
                        affine_quantization->scale->size ==
                            filter->dims->data[kConvQuantizedDimension]);
-    TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
-                      affine_quantization->zero_point->size);
   }
 
   TF_LITE_ENSURE_STATUS(CalculateOpDataConv(
       context, node, params, input_width, input_height, filter_width,
       filter_height, output_width, output_height, input->type, data));
 
+  if (filter->type == kTfLiteInt4) {
+    int filter_size =
+        RuntimeShape(filter->dims->size,
+                     reinterpret_cast<const int32_t*>(filter->dims->data))
+            .FlatSize();
+    context->RequestScratchBufferInArena(context, filter_size,
+                                         &data->filter_buffer_index);
+  }
+
+  micro_context->DeallocateTempTfLiteTensor(filter);
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
   return kTfLiteOk;
 }
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv_test.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv_test.h
index f7a2459..cdaaefa 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv_test.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/conv_test.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
-#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
 
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
@@ -59,36 +59,56 @@ TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
                                  TfLiteRegistration registration,
                                  uint8_t* output_data, float tolerance = 1e-5);
 
-TfLiteStatus TestConvFloat(const int* input_dims_data, const float* input_data,
-                           const int* filter_dims_data,
-                           const float* filter_data, const int* bias_dims_data,
-                           const float* bias_data, const int* output_dims_data,
+TfLiteStatus TestConvFloat(int* input_dims_data, const float* input_data,
+                           int* filter_dims_data, const float* filter_data,
+                           int* bias_dims_data, const float* bias_data,
+                           int* output_dims_data,
                            const float* expected_output_data,
                            TfLiteConvParams* conv_params,
                            TfLiteRegistration registration, float* output_data);
 
 TfLiteStatus TestConvQuantizedPerLayer(
-    const int* input_dims_data, const float* input_data,
-    uint8_t* input_quantized, float input_scale, const int* filter_dims_data,
-    const float* filter_data, uint8_t* filter_quantized, float filter_scale,
-    const int* bias_dims_data, const float* bias_data, int32_t* bias_quantized,
-    const int* output_dims_data, const float* expected_output_data,
-    uint8_t* expected_output_quantized, float output_scale,
-    TfLiteConvParams* conv_params, TfLiteRegistration registration,
-    uint8_t* output_data);
+    int* input_dims_data, const float* input_data, uint8_t* input_quantized,
+    float input_scale, int* filter_dims_data, const float* filter_data,
+    uint8_t* filter_quantized, float filter_scale, int* bias_dims_data,
+    const float* bias_data, int32_t* bias_quantized, int* output_dims_data,
+    const float* expected_output_data, uint8_t* expected_output_quantized,
+    float output_scale, TfLiteConvParams* conv_params,
+    TfLiteRegistration registration, uint8_t* output_data);
 
 TfLiteStatus TestConvQuantizedPerChannel(
-    const int* input_dims_data, const float* input_data,
-    int8_t* input_quantized, float input_scale, int input_zero_point,
-    const int* filter_dims_data, const float* filter_data,
-    int8_t* filter_data_quantized, const int* bias_dims_data,
-    const float* bias_data, int32_t* bias_data_quantized, float* bias_scales,
-    int* bias_zero_points, const int* output_dims_data,
+    int* input_dims_data, const float* input_data, int8_t* input_quantized,
+    float input_scale, int input_zero_point, int* filter_dims_data,
+    const float* filter_data, int8_t* filter_data_quantized,
+    int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized,
+    float* bias_scales, int* bias_zero_points, int* output_dims_data,
     const float* expected_output_data, int8_t* expected_output_data_quantized,
     float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
-    TfLiteRegistration registration, int8_t* output_data);
+    TfLiteRegistration registration, int8_t* output_data,
+    TfLiteType tensor_weight_type = kTfLiteNoType);
+
+TfLiteStatus TestConvQuantizedPerChannel(
+    int* input_dims_data, const float* input_data, int16_t* input_quantized,
+    float input_scale, int input_zero_point, int* filter_dims_data,
+    const float* filter_data, int8_t* filter_data_quantized,
+    int* bias_dims_data, const float* bias_data,
+    std::int64_t* bias_data_quantized, float* bias_scales,
+    int* bias_zero_points, int* output_dims_data,
+    const float* expected_output_data, int16_t* expected_output_data_quantized,
+    float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
+    TfLiteRegistration registration, int16_t* output_data);
+
+TfLiteStatus TestConvQuantizedPerChannel(
+    int* input_dims_data, const float* input_data, int16_t* input_quantized,
+    float input_scale, int input_zero_point, int* filter_dims_data,
+    const float* filter_data, int8_t* filter_data_quantized,
+    int* bias_dims_data, const float* bias_data, int32_t* bias_data_quantized,
+    float* bias_scales, int* bias_zero_points, int* output_dims_data,
+    const float* expected_output_data, int16_t* expected_output_data_quantized,
+    float output_scale, int output_zero_point, TfLiteConvParams* conv_params,
+    TfLiteRegistration registration, int16_t* output_data);
 
 }  // namespace testing
 }  // namespace tflite
 
-#endif  // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/cumsum.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/cumsum.cc
new file mode 100644
index 0000000..bdc888b
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/cumsum.cc
@@ -0,0 +1,175 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/cumsum.h"
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kAxisTensor = 1;
+constexpr int kOutputTensor = 0;
+
+constexpr int kCumSumIntegerShift = 20;
+
+// only used with INT8 tensors
+struct OpData {
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t input_offset;
+  int32_t output_offset;
+  int32_t input_multiplier;
+  int32_t output_multiplier;
+  int input_shift;
+  int output_shift;
+  int left_shift;
+};
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TfLiteTensor* axis =
+      micro_context->AllocateTempInputTensor(node, kAxisTensor);
+
+  TF_LITE_ENSURE(context,
+                 input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
+  TF_LITE_ENSURE_EQ(context, axis->type, kTfLiteInt32);
+
+  TF_LITE_ENSURE_EQ(context, NumElements(axis), 1);
+
+  TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
+
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+
+  TF_LITE_ENSURE_EQ(context, input->type, output->type);
+  TF_LITE_ENSURE(context, HaveSameShapes(input, output));
+
+  if (output->type == kTfLiteInt8) {
+    node->user_data =
+        context->AllocatePersistentBuffer(context, sizeof(OpData));
+    OpData* data = static_cast<OpData*>(node->user_data);
+
+    // 8bit -> 8bit general quantized path, with general rescalings
+    data->input_offset = -input->params.zero_point;
+    data->output_offset = output->params.zero_point;
+    data->left_shift = kCumSumIntegerShift;
+    const double twice_max_input_scale =
+        2 * static_cast<double>(input->params.scale);
+    const double real_input_multiplier =
+        static_cast<double>(input->params.scale) / twice_max_input_scale;
+    const double real_output_multiplier =
+        twice_max_input_scale /
+        ((1 << data->left_shift) * static_cast<double>(output->params.scale));
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_input_multiplier, &data->input_multiplier, &data->input_shift);
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_output_multiplier, &data->output_multiplier, &data->output_shift);
+
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, kTfLiteActNone, output, &data->output_activation_min,
+        &data->output_activation_max));
+  }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(axis);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return CalculateOpData(context, node);
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  const TfLiteEvalTensor* axis_tensor =
+      tflite::micro::GetEvalInput(context, node, kAxisTensor);
+
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  auto* cs_params = static_cast<TfLiteCumsumParams*>(node->builtin_data);
+  auto input_shape = tflite::micro::GetTensorShape(input);
+
+  int32_t axis = *tflite::micro::GetTensorData<int32_t>(axis_tensor);
+  if (axis < 0) axis += input_shape.DimensionsCount();
+
+  if (axis < 0 || axis >= input_shape.DimensionsCount()) {
+    MicroPrintf("CUMSUM Invalid axis: %d", axis);
+    return kTfLiteError;
+  }
+
+  switch (input->type) {
+    case kTfLiteFloat32: {
+      reference_ops::CumSum(tflite::micro::GetTensorData<float>(input),
+                            input_shape, axis, cs_params->exclusive,
+                            cs_params->reverse,
+                            tflite::micro::GetTensorData<float>(output));
+      return kTfLiteOk;
+    } break;
+
+    case kTfLiteInt8: {
+      auto* data = static_cast<OpData*>(node->user_data);
+      ArithmeticParams params;
+      params.left_shift = data->left_shift;
+      params.input1_offset = data->input_offset;
+      params.input1_multiplier = data->input_multiplier;
+      params.input1_shift = data->input_shift;
+      params.output_offset = data->output_offset;
+      params.output_multiplier = data->output_multiplier;
+      params.output_shift = data->output_shift;
+      SetActivationParams(data->output_activation_min,
+                          data->output_activation_max, &params);
+      reference_ops::CumSum(params, tflite::micro::GetTensorData<int8_t>(input),
+                            input_shape, axis, cs_params->exclusive,
+                            cs_params->reverse,
+                            tflite::micro::GetTensorData<int8_t>(output));
+      return kTfLiteOk;
+    } break;
+
+    default: {
+      MicroPrintf("CUMSUM only supports FLOAT32 and INT8, got %s.",
+                  TfLiteTypeGetName(output->type));
+      return kTfLiteError;
+    }
+  }
+
+  return kTfLiteError;
+}
+
+}  // namespace
+
+TfLiteRegistration Register_CUMSUM() {
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/depth_to_space.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/depth_to_space.cc
new file mode 100644
index 0000000..72e1545
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/depth_to_space.cc
@@ -0,0 +1,142 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depth_to_space.h"
+
+#include <stdint.h>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kOutputTensor = 0;
+
+// input/output tensor shape rank associations
+constexpr int kBatchRank = 0;
+constexpr int kHeightRank = 1;
+constexpr int kWidthRank = 2;
+constexpr int kDepthRank = 3;
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
+  auto* params =
+      reinterpret_cast<TfLiteDepthToSpaceParams*>(node->builtin_data);
+
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
+
+  auto data_type = output->type;
+  TF_LITE_ENSURE(context,
+                 data_type == kTfLiteFloat32 || data_type == kTfLiteInt8);
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+
+  const int block_size = params->block_size;
+  TF_LITE_ENSURE(context, block_size > 0);
+  const int input_height = input->dims->data[kHeightRank];
+  const int input_width = input->dims->data[kWidthRank];
+  const int input_channels = input->dims->data[kDepthRank];
+  int output_height = input_height * block_size;
+  int output_width = input_width * block_size;
+  int output_channels = input_channels / block_size / block_size;
+
+  TF_LITE_ENSURE_EQ(context, input_height, output_height / block_size);
+  TF_LITE_ENSURE_EQ(context, input_width, output_width / block_size);
+  TF_LITE_ENSURE_EQ(context, input_channels,
+                    output_channels * block_size * block_size);
+
+  // We must update the output tensor dimensions.
+  // The dims storage is expected to be the same area in memory
+  // for both TfLiteTensor and TfLiteEvalTensor.  This is important
+  // because TfLiteTensor in the MicroInterpreter is a temporary
+  // allocation.  For the KernelRunner interpreter, TfLiteEvalTensor
+  // is a temporary allocation.  We must therefore relocate the dims
+  // from the FlatBuffer to the persistant storage arena.
+  TfLiteEvalTensor* output_eval =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
+                                 context, output, output_eval));
+  output->dims->data[kBatchRank] = input->dims->data[kBatchRank];
+  output->dims->data[kHeightRank] = output_height;
+  output->dims->data[kWidthRank] = output_width;
+  output->dims->data[kDepthRank] = output_channels;
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return CalculateOpData(context, node);
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params =
+      reinterpret_cast<TfLiteDepthToSpaceParams*>(node->builtin_data);
+
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  tflite::DepthToSpaceParams op_params;
+  op_params.block_size = static_cast<int32_t>(params->block_size);
+
+  switch (input->type) {  // Already know in/out types are same.
+    case kTfLiteFloat32:
+      reference_ops::DepthToSpace(op_params,
+                                  tflite::micro::GetTensorShape(input),
+                                  tflite::micro::GetTensorData<float>(input),
+                                  tflite::micro::GetTensorShape(output),
+                                  tflite::micro::GetTensorData<float>(output));
+      break;
+    case kTfLiteInt8:
+      reference_ops::DepthToSpace(op_params,
+                                  tflite::micro::GetTensorShape(input),
+                                  tflite::micro::GetTensorData<int8_t>(input),
+                                  tflite::micro::GetTensorShape(output),
+                                  tflite::micro::GetTensorData<int8_t>(output));
+      break;
+    default:
+      MicroPrintf("DEPTH_TO_SPACE only supports FLOAT32 and INT8, got %s.",
+                  TfLiteTypeGetName(output->type));
+      return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteRegistration Register_DEPTH_TO_SPACE() {
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.cc
index 55e5fb6..000bb0b 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.cc
@@ -3,7 +3,7 @@
 #if 0 == 1
 /* noop */
 #elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -26,13 +26,13 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -44,6 +44,17 @@ struct OpData {
   int buffer_idx;
 };
 
+// Always inline for optimal code size.
+void PopulateDwConvParams(
+    cmsis_nn_dw_conv_params* const dw_conv_params,
+    cmsis_nn_per_channel_quant_params* const quant_params,
+    cmsis_nn_dims* const input_dims, cmsis_nn_dims* const filter_dims,
+    cmsis_nn_dims* const bias_dims, cmsis_nn_dims* const output_dims,
+    const TfLiteDepthwiseConvParams& params, const OpData& data,
+    const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
+    const TfLiteEvalTensor* bias, TfLiteEvalTensor* output)
+    __attribute__((always_inline));
+
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(OpData));
@@ -57,13 +68,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const auto& params =
       *(reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data));
 
-  const TfLiteTensor* input =
-      GetInput(context, node, kDepthwiseConvInputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kDepthwiseConvInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* filter =
-      GetInput(context, node, kDepthwiseConvWeightsTensor);
+  TfLiteTensor* filter =
+      micro_context->AllocateTempInputTensor(node, kDepthwiseConvWeightsTensor);
   TF_LITE_ENSURE(context, filter != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kDepthwiseConvOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kDepthwiseConvOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
 
   const TfLiteType data_type = input->type;
@@ -74,10 +88,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   int output_width = SizeOfDimension(output, 2);
   int output_height = SizeOfDimension(output, 1);
 
-  if (input->type == kTfLiteInt8) {
+  if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
     TF_LITE_ENSURE_EQ(context, filter->quantization.type,
                       kTfLiteAffineQuantization);
 
+    if (input->type == kTfLiteInt16) {
+      TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
+      TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+    }
+
     // All per-channel quantized tensors need valid zero point and scale arrays.
     const auto* affine_quantization =
         reinterpret_cast<TfLiteAffineQuantization*>(
@@ -91,17 +110,27 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                          filter->dims->data[kDepthwiseConvQuantizedDimension]);
     TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size,
                       affine_quantization->zero_point->size);
-  }
 
-  // Allocate memory for per-channel quantization parameters
-  const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
+    // Allocate memory for per-channel quantization parameters
+    const int num_channels =
+        filter->dims->data[kDepthwiseConvQuantizedDimension];
 
-  data->reference_op_data.per_channel_output_multiplier =
-      reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
-          context, num_channels * sizeof(int32_t)));
-  data->reference_op_data.per_channel_output_shift =
-      reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
-          context, num_channels * sizeof(int32_t)));
+    data->reference_op_data.per_channel_output_multiplier =
+        reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
+            context, num_channels * sizeof(int32_t)));
+    data->reference_op_data.per_channel_output_shift =
+        reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
+            context, num_channels * sizeof(int32_t)));
+  }
+
+  if (filter->type == kTfLiteInt4) {
+    int filter_size =
+        RuntimeShape(filter->dims->size,
+                     reinterpret_cast<const int32_t*>(filter->dims->data))
+            .FlatSize();
+    context->RequestScratchBufferInArena(
+        context, filter_size, &data->reference_op_data.filter_buffer_index);
+  }
 
   TF_LITE_ENSURE_STATUS(CalculateOpDataDepthwiseConv(
       context, node, params, input_width, input_height, filter_width,
@@ -141,6 +170,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     cmsis_nn_dw_conv_params dw_conv_params;
     dw_conv_params.padding.h = data->reference_op_data.padding.height;
     dw_conv_params.padding.w = data->reference_op_data.padding.width;
+    dw_conv_params.dilation.h = params.dilation_height_factor;
+    dw_conv_params.dilation.w = params.dilation_width_factor;
 
     const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
         &dw_conv_params, &input_dims, &filter_dims, &output_dims);
@@ -152,9 +183,77 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       data->buffer_idx = -1;
     }
   }
+
+  micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
+
   return kTfLiteOk;
 }
 
+inline void PopulateDwConvParams(
+    cmsis_nn_dw_conv_params* const dw_conv_params,
+    cmsis_nn_per_channel_quant_params* const quant_params,
+    cmsis_nn_dims* const input_dims, cmsis_nn_dims* const filter_dims,
+    cmsis_nn_dims* const bias_dims, cmsis_nn_dims* const output_dims,
+    const TfLiteDepthwiseConvParams& params, const OpData& data,
+    const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter,
+    const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) {
+  dw_conv_params->dilation.h = params.dilation_height_factor;
+  dw_conv_params->dilation.w = params.dilation_width_factor;
+
+  dw_conv_params->input_offset = -data.reference_op_data.input_zero_point;
+  dw_conv_params->output_offset = data.reference_op_data.output_zero_point;
+  dw_conv_params->stride.h = params.stride_height;
+  dw_conv_params->stride.w = params.stride_width;
+  dw_conv_params->padding.h = data.reference_op_data.padding.height;
+  dw_conv_params->padding.w = data.reference_op_data.padding.width;
+
+  dw_conv_params->activation.min = data.reference_op_data.output_activation_min;
+  dw_conv_params->activation.max = data.reference_op_data.output_activation_max;
+
+  dw_conv_params->ch_mult = params.depth_multiplier;
+
+  quant_params->multiplier =
+      data.reference_op_data.per_channel_output_multiplier;
+  quant_params->shift = data.reference_op_data.per_channel_output_shift;
+
+  RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
+  RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
+  RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
+  RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
+
+  TFLITE_DCHECK_LE(dw_conv_params->activation.min,
+                   dw_conv_params->activation.max);
+
+  const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
+  const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+
+  if (tflite::micro::GetOptionalTensorData<int8_t>(bias)) {
+    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
+  }
+
+  input_dims->n = batch_size;
+  input_dims->h = input_shape.Dims(1);
+  input_dims->w = input_shape.Dims(2);
+  input_dims->c = input_shape.Dims(3);
+
+  filter_dims->n = filter_shape.Dims(0);
+  filter_dims->h = filter_shape.Dims(1);
+  filter_dims->w = filter_shape.Dims(2);
+  filter_dims->c = output_depth;
+
+  bias_dims->n = 1;
+  bias_dims->h = 1;
+  bias_dims->w = 1;
+  bias_dims->c = output_depth;
+
+  output_dims->n = batch_size;
+  output_dims->h = output_shape.Dims(1);
+  output_dims->w = output_shape.Dims(2);
+  output_dims->c = output_depth;
+}
+
 void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
                              const TfLiteDepthwiseConvParams& params,
                              const OpData& data, const TfLiteEvalTensor* input,
@@ -162,97 +261,66 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
                              const TfLiteEvalTensor* bias,
                              TfLiteEvalTensor* output) {
   cmsis_nn_dw_conv_params dw_conv_params;
-  dw_conv_params.dilation.h = params.dilation_height_factor;
-  dw_conv_params.dilation.w = params.dilation_width_factor;
-  // Call to reference implementation can be removed when dilation is supported
-  // in the optimized implementations.
-  if (1 == dw_conv_params.dilation.h && 1 == dw_conv_params.dilation.w) {
-    dw_conv_params.input_offset = -data.reference_op_data.input_zero_point;
-    dw_conv_params.output_offset = data.reference_op_data.output_zero_point;
-    dw_conv_params.stride.h = params.stride_height;
-    dw_conv_params.stride.w = params.stride_width;
-    dw_conv_params.padding.h = data.reference_op_data.padding.height;
-    dw_conv_params.padding.w = data.reference_op_data.padding.width;
-    // TODO(b/130439627): Use calculated value for clamping.
-    dw_conv_params.activation.min = std::numeric_limits<int8_t>::min();
-    dw_conv_params.activation.max = std::numeric_limits<int8_t>::max();
-    dw_conv_params.ch_mult = params.depth_multiplier;
-
-    cmsis_nn_per_channel_quant_params quant_params;
-    quant_params.multiplier =
-        data.reference_op_data.per_channel_output_multiplier;
-    quant_params.shift = data.reference_op_data.per_channel_output_shift;
-
-    RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
-    RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
-    RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
-    RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias);
-
-    TFLITE_DCHECK_LE(dw_conv_params.activation.min,
-                     dw_conv_params.activation.max);
-
-    const int batch_size = MatchingDim(input_shape, 0, output_shape, 0);
-    const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+  cmsis_nn_per_channel_quant_params quant_params;
+  cmsis_nn_dims input_dims;
+  cmsis_nn_dims filter_dims;
+  cmsis_nn_dims bias_dims;
+  cmsis_nn_dims output_dims;
 
-    if (tflite::micro::GetTensorData<int8_t>(bias)) {
-      TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
-    }
+  PopulateDwConvParams(&dw_conv_params, &quant_params, &input_dims,
+                       &filter_dims, &bias_dims, &output_dims, params, data,
+                       input, filter, bias, output);
 
-    cmsis_nn_dims input_dims;
-    input_dims.n = batch_size;
-    input_dims.h = input_shape.Dims(1);
-    input_dims.w = input_shape.Dims(2);
-    input_dims.c = input_shape.Dims(3);
+  cmsis_nn_context ctx;
+  ctx.buf = nullptr;
+  /* 'size' is unused */
+  ctx.size = 0;
 
-    cmsis_nn_dims filter_dims;
-    filter_dims.n = filter_shape.Dims(0);
-    filter_dims.h = filter_shape.Dims(1);
-    filter_dims.w = filter_shape.Dims(2);
-    filter_dims.c = output_depth;
+  if (data.buffer_idx > -1) {
+    ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
+  }
 
-    cmsis_nn_dims bias_dims;
-    bias_dims.n = 1;
-    bias_dims.h = 1;
-    bias_dims.w = 1;
-    bias_dims.c = output_depth;
+  TFLITE_DCHECK_EQ(
+      arm_depthwise_conv_wrapper_s8(
+          &ctx, &dw_conv_params, &quant_params, &input_dims,
+          tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
+          tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
+          tflite::micro::GetOptionalTensorData<int32_t>(bias), &output_dims,
+          tflite::micro::GetTensorData<int8_t>(output)),
+      ARM_CMSIS_NN_SUCCESS);
+}
 
-    cmsis_nn_dims output_dims;
-    output_dims.n = batch_size;
-    output_dims.h = output_shape.Dims(1);
-    output_dims.w = output_shape.Dims(2);
-    output_dims.c = output_depth;
+void EvalQuantizedPerChannel16x8(TfLiteContext* context, TfLiteNode* node,
+                                 const TfLiteDepthwiseConvParams& params,
+                                 const OpData& data,
+                                 const TfLiteEvalTensor* input,
+                                 const TfLiteEvalTensor* filter,
+                                 const TfLiteEvalTensor* bias,
+                                 TfLiteEvalTensor* output) {
+  cmsis_nn_dw_conv_params dw_conv_params;
+  cmsis_nn_per_channel_quant_params quant_params;
+  cmsis_nn_dims input_dims;
+  cmsis_nn_dims filter_dims;
+  cmsis_nn_dims bias_dims;
+  cmsis_nn_dims output_dims;
 
-    cmsis_nn_context ctx;
-    ctx.buf = nullptr;
-    /* 'size' is unused */
-    ctx.size = 0;
+  PopulateDwConvParams(&dw_conv_params, &quant_params, &input_dims,
+                       &filter_dims, &bias_dims, &output_dims, params, data,
+                       input, filter, bias, output);
 
-    if (data.buffer_idx > -1) {
-      ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
-    }
+  cmsis_nn_context ctx;
+  ctx.buf = nullptr;
+  /* 'size' is unused */
+  ctx.size = 0;
 
-    TFLITE_DCHECK_EQ(
-        arm_depthwise_conv_wrapper_s8(
-            &ctx, &dw_conv_params, &quant_params, &input_dims,
-            tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
-            tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
-            tflite::micro::GetTensorData<int32_t>(bias), &output_dims,
-            tflite::micro::GetTensorData<int8_t>(output)),
-        ARM_MATH_SUCCESS);
-  } else {
-    reference_integer_ops::DepthwiseConvPerChannel(
-        DepthwiseConvParamsQuantized(params, data.reference_op_data),
-        data.reference_op_data.per_channel_output_multiplier,
-        data.reference_op_data.per_channel_output_shift,
-        tflite::micro::GetTensorShape(input),
-        tflite::micro::GetTensorData<int8_t>(input),
-        tflite::micro::GetTensorShape(filter),
-        tflite::micro::GetTensorData<int8_t>(filter),
-        tflite::micro::GetTensorShape(bias),
-        tflite::micro::GetTensorData<int32_t>(bias),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<int8_t>(output));
-  }
+  TFLITE_DCHECK_EQ(
+      arm_depthwise_conv_s16(
+          &ctx, &dw_conv_params, &quant_params, &input_dims,
+          tflite::micro::GetTensorData<int16_t>(input), &filter_dims,
+          tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
+          tflite::micro::GetOptionalTensorData<int64_t>(bias), &output_dims,
+          tflite::micro::GetTensorData<int16_t>(output)),
+      ARM_CMSIS_NN_SUCCESS);
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
@@ -274,14 +342,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           ? tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor)
           : nullptr;
 
+  TfLiteEvalTensor filter_int8 = tflite::micro::MakeUnpackedInt4Tensor(
+      context, data.reference_op_data.filter_buffer_index, filter);
+
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32: {
-      #if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_F32
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       tflite::reference_ops::DepthwiseConv(
           DepthwiseConvParamsFloat(params, data.reference_op_data),
           tflite::micro::GetTensorShape(input),
@@ -289,46 +359,111 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           tflite::micro::GetTensorShape(filter),
           tflite::micro::GetTensorData<float>(filter),
           tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<float>(bias),
+          tflite::micro::GetOptionalTensorData<float>(bias),
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output));
       break;
     }
     case kTfLiteInt8:
-      #if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
-      EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
-                              output);
+#endif
+      switch (filter_int8.type) {
+        case kTfLiteInt8: {
+          EvalQuantizedPerChannel(context, node, params, data, input,
+                                  &filter_int8, bias, output);
+          break;
+        }
+        default: {
+          MicroPrintf("Filter type %s (%d) not supported.",
+                      TfLiteTypeGetName(filter->type), filter->type);
+          return kTfLiteError;
+        }
+      }
+      break;
+    case kTfLiteInt16:
+      EvalQuantizedPerChannel16x8(context, node, params, data, input, filter,
+                                  bias, output);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
 }
 
+TfLiteStatus EvalInt8(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+
+  const auto& params =
+      *(reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data));
+  const OpData& data = *(static_cast<OpData*>(node->user_data));
+
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kDepthwiseConvOutputTensor);
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kDepthwiseConvInputTensor);
+  const TfLiteEvalTensor* filter =
+      tflite::micro::GetEvalInput(context, node, kDepthwiseConvWeightsTensor);
+  const TfLiteEvalTensor* bias =
+      (NumInputs(node) == 3)
+          ? tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor)
+          : nullptr;
+
+  TfLiteEvalTensor filter_int8 = tflite::micro::MakeUnpackedInt4Tensor(
+      context, data.reference_op_data.filter_buffer_index, filter);
+
+  EvalQuantizedPerChannel(context, node, params, data, input, &filter_int8,
+                          bias, output);
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalInt16x8(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+
+  const auto& params =
+      *(reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data));
+  const OpData& data = *(static_cast<OpData*>(node->user_data));
+
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kDepthwiseConvOutputTensor);
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kDepthwiseConvInputTensor);
+  const TfLiteEvalTensor* filter =
+      tflite::micro::GetEvalInput(context, node, kDepthwiseConvWeightsTensor);
+  const TfLiteEvalTensor* bias =
+      (NumInputs(node) == 3)
+          ? tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor)
+          : nullptr;
+
+  EvalQuantizedPerChannel16x8(context, node, params, data, input, filter, bias,
+                              output);
+  return kTfLiteOk;
+}
+
 }  // namespace
 
 TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
+}
+
+TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT8() {
+  return tflite::micro::RegisterOp(Init, Prepare, EvalInt8);
+}
+
+TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT16() {
+  return tflite::micro::RegisterOp(Init, Prepare, EvalInt16x8);
 }
 
 }  // namespace tflite
 
 #elif EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -355,11 +490,13 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_function_specializations.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -389,6 +526,9 @@ struct OpData {
   // Per channel output multiplier and shift.
   int32_t* per_channel_output_multiplier;
   int32_t* per_channel_output_shift;
+#ifdef MLI_2_0
+  int8_t* per_channel_scale_frac_bits;
+#endif
 
   // The range of the fused activation layer. For example for kNone and
   // uint8_t these would be 0 and 255.
@@ -399,11 +539,15 @@ struct OpData {
   bool is_mli_applicable;
 
   // Tensors in MLI format.
-  mli_tensor* mli_in;
-  mli_tensor* mli_weights;
-  mli_tensor* mli_bias;
-  mli_tensor* mli_out;
+  mutable ops::micro::MliTensorInterface mli_in;
+  mutable ops::micro::MliTensorInterface mli_weights;
+  mutable ops::micro::MliTensorInterface mli_bias;
+  mutable ops::micro::MliTensorInterface mli_out;
   mli_conv2d_cfg* cfg;
+
+  // Pointer to the required depthwise function. For “channel multiplier”
+  // functionality group convolution is used.
+  depthwise_func_ptr p_mli_krn_depthwise_conv2d_sa8_sa8_sa32;
 };
 
 bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input,
@@ -411,20 +555,28 @@ bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input,
                      const TfLiteDepthwiseConvParams* params) {
   const auto* affine_quantization =
       reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
+
+#ifndef MLI_2_0
   const int in_ch = SizeOfDimension(input, 3);
   const int filters_num = SizeOfDimension(filter, 3);
+#endif
 
   // MLI optimized version only supports int8_t datatype, dilation factor of 1
-  // and per-axis quantization of weights (no broadcasting/per-tensor) (in_ch ==
-  // filters_num) || (in_ch == 1)) is a forbidding of channel multiplier logic
-  // for multichannel input.
+  // and per-axis quantization of weights (no broadcasting/per-tensor). For
+  // MLI 1.1 (in_ch == filters_num) || (in_ch == 1)) is used to prevent usage of
+  // channel multiplier logic for multichannel input.
+
   bool ret_val = (filter->type == kTfLiteInt8) &&
                  (input->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) &&
                  (params->dilation_width_factor == 1) &&
                  (params->dilation_height_factor == 1) &&
                  (affine_quantization->scale->size ==
+#ifdef MLI_2_0
+                  filter->dims->data[kDepthwiseConvQuantizedDimension]);
+#else
                   filter->dims->data[kDepthwiseConvQuantizedDimension]) &&
                  ((in_ch == filters_num) || (in_ch == 1));
+#endif
   return ret_val;
 }
 
@@ -446,10 +598,16 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
   // Note that quantized inference requires that all tensors have their
   // parameters set. This is usually done during quantized training.
 #if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
-  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TfLiteTensor* filter =
+      micro_context->AllocateTempInputTensor(node, kFilterTensor);
+  TfLiteTensor* bias =
+      micro_context->AllocateTempInputTensor(node, kBiasTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
 
   if (data_type != kTfLiteFloat32 && !data->is_mli_applicable) {
     int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
@@ -459,8 +617,13 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
         &data->output_multiplier, &data->output_shift,
         &data->output_activation_min, &data->output_activation_max,
         data->per_channel_output_multiplier,
-        reinterpret_cast<int*>(data->per_channel_output_shift), num_channels);
+        reinterpret_cast<int32_t*>(data->per_channel_output_shift), num_channels);
   }
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
+  micro_context->DeallocateTempTfLiteTensor(bias);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
 #endif
   return kTfLiteOk;
 }
@@ -478,16 +641,23 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
   OpData* data = static_cast<OpData*>(node->user_data);
 
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
-  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
+  MicroContext* micro_context = GetMicroContext(context);
 
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  const TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, kInputTensor);
+  const TfLiteTensor* filter = micro_context->AllocateTempInputTensor(node, kFilterTensor);
+  const TfLiteTensor* bias = micro_context->AllocateTempInputTensor(node, kBiasTensor);
   const TfLiteType data_type = input->type;
   int width = SizeOfDimension(input, 2);
   int height = SizeOfDimension(input, 1);
+
+#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST)
+  int filter_width = SizeOfDimension(filter, 1);
+  int filter_height = SizeOfDimension(filter, 0);
+#else
   int filter_width = SizeOfDimension(filter, 2);
   int filter_height = SizeOfDimension(filter, 1);
+#endif
 
   // Per channel quantization is only needed for int8 inference. For other
   // quantized types, only a single scale and zero point is needed.
@@ -531,34 +701,96 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   data->output_zero_point = output->params.zero_point;
 
   if (data->is_mli_applicable) {
-    data->mli_in = static_cast<mli_tensor*>(
-        context->AllocatePersistentBuffer(context, sizeof(mli_tensor)));
-    data->mli_weights = static_cast<mli_tensor*>(
-        context->AllocatePersistentBuffer(context, sizeof(mli_tensor)));
-    data->mli_bias = static_cast<mli_tensor*>(
-        context->AllocatePersistentBuffer(context, sizeof(mli_tensor)));
-    data->mli_out = static_cast<mli_tensor*>(
-        context->AllocatePersistentBuffer(context, sizeof(mli_tensor)));
+    data->mli_in = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+    data->mli_weights = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+    data->mli_bias = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+    data->mli_out = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
     data->cfg = static_cast<mli_conv2d_cfg*>(
         context->AllocatePersistentBuffer(context, sizeof(mli_conv2d_cfg)));
 
-    // reuse space allocated for OpData parameters
-    data->mli_weights->el_params.asym.scale.pi32 =
+#ifdef MLI_2_0
+    const int num_buffers = 2;
+    data->per_channel_scale_frac_bits =
+        static_cast<int8_t*>(context->AllocatePersistentBuffer(
+            context, num_buffers * num_channels * sizeof(int16_t)));
+#endif
+
+    // Reuse space allocated for OpData parameters.
+#ifdef MLI_2_0
+    *data->mli_weights.Scale<int16_t**>() =
+        reinterpret_cast<int16_t*>(data->per_channel_output_multiplier);
+    *data->mli_bias.Scale<int16_t**>() =
+        reinterpret_cast<int16_t*>(data->per_channel_output_multiplier) +
+        num_channels;
+#else
+    *data->mli_weights.Scale<int32_t**>() =
         static_cast<int32_t*>(data->per_channel_output_multiplier);
-    data->mli_bias->el_params.asym.scale.pi32 =
+    *data->mli_bias.Scale<int32_t**>() =
         static_cast<int32_t*>(data->per_channel_output_shift);
+#endif
 
-    data->mli_weights->el_params.asym.zero_point.pi16 =
+#ifdef MLI_2_0
+    *data->mli_weights.ZeroPoint<int16_t**>() =
+        reinterpret_cast<int16_t*>(data->per_channel_output_shift);
+    *data->mli_bias.ZeroPoint<int16_t**>() =
+        reinterpret_cast<int16_t*>(data->per_channel_output_shift) +
+        num_channels;
+#else
+    *data->mli_weights.ZeroPoint<int16_t**>() =
         reinterpret_cast<int16_t*>(&data->filter_zero_point);
-    data->mli_bias->el_params.asym.zero_point.pi16 =
+    *data->mli_bias.ZeroPoint<int16_t**>() =
         reinterpret_cast<int16_t*>(&data->filter_zero_point) + sizeof(int16_t);
+#endif
+
+#ifdef MLI_2_0
+    *data->mli_weights.ScaleFracBits<int8_t**>() =
+        reinterpret_cast<int8_t*>(data->per_channel_scale_frac_bits);
+    *data->mli_bias.ScaleFracBits<int8_t**>() =
+        reinterpret_cast<int8_t*>(data->per_channel_scale_frac_bits) +
+        num_channels;
+#endif
+
+    ops::micro::ConvertToMliTensor(input, &data->mli_in);
+    ops::micro::ConvertToMliTensorPerChannel(filter, &data->mli_weights,
+                                             /* is_bias_tensor = */ false);
+    ops::micro::ConvertToMliTensorPerChannel(bias, &data->mli_bias,
+                                             /* is_bias_tensor = */ true);
+#ifdef MLI_2_0
+    ops::micro::AdjustBiasTensor(&data->mli_bias, &data->mli_in,
+                                 &data->mli_weights);
+#endif
+    ops::micro::ConvertToMliTensor(output, &data->mli_out);
+
+#ifdef MLI_2_0
+    // Choose group convolution function for "channel multiplier" functionality.
+    const int in_ch = SizeOfDimension(input, 3);
+    const int filters_num = SizeOfDimension(filter, 3);
+    const int channels_num = SizeOfDimension(filter, 2);
+    if (in_ch == filters_num && channels_num == 1) {
+      data->p_mli_krn_depthwise_conv2d_sa8_sa8_sa32 =
+          mli_krn_depthwise_conv2d(data->mli_weights.MliTensor());
+    } else {
+      data->p_mli_krn_depthwise_conv2d_sa8_sa8_sa32 =
+          mli_krn_group_conv2d(data->mli_weights.MliTensor());
+    }
+#else
+    data->p_mli_krn_depthwise_conv2d_sa8_sa8_sa32 =
+        mli_krn_depthwise_conv2d(data->mli_weights.MliTensor(), data->cfg);
+#endif
 
-    ops::micro::ConvertToMliTensor(input, data->mli_in);
-    ops::micro::ConvertToMliTensorPerChannel(filter, data->mli_weights);
-    ops::micro::ConvertToMliTensorPerChannel(bias, data->mli_bias);
-    ops::micro::ConvertToMliTensor(output, data->mli_out);
+#ifdef MLI_2_0
+    data->cfg->dilation_width = 1;
+    data->cfg->dilation_height = 1;
+#endif
 
-    if (params->activation == kTfLiteActRelu) {
+    if (data->output_activation_min == -128 &&
+        data->output_activation_max == 127) {
+      data->cfg->relu.type = MLI_RELU_NONE;
+    } else if (params->activation == kTfLiteActRelu) {
       data->cfg->relu.type = MLI_RELU_GEN;
     } else if (params->activation == kTfLiteActRelu6) {
       data->cfg->relu.type = MLI_RELU_6;
@@ -619,9 +851,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
       tflite::micro::GetTensorShape(output),
       tflite::micro::GetTensorData<float>(output));
 #else
-  TF_LITE_KERNEL_LOG(context,
-                     "Type %s (%d) is not supported by ARC MLI Library.",
-                     TfLiteTypeGetName(input->type), input->type);
+  MicroPrintf("Type %s (%d) is not supported by ARC MLI Library.",
+              TfLiteTypeGetName(input->type), input->type);
 #endif
 }
 TfLiteStatus EvalMliQuantizedPerChannel(
@@ -635,18 +866,24 @@ TfLiteStatus EvalMliQuantizedPerChannel(
     // Copy configuration data from external to local memory
     mli_conv2d_cfg cfg_local = *data.cfg;
 
-    ops::micro::MliTensorAttachBuffer<int8_t>(input, data.mli_in);
-    ops::micro::MliTensorAttachBuffer<int8_t>(filter, data.mli_weights);
-    ops::micro::MliTensorAttachBuffer<int32_t>(bias, data.mli_bias);
-    ops::micro::MliTensorAttachBuffer<int8_t>(output, data.mli_out);
+    ops::micro::MliTensorAttachBuffer<int8_t>(input, &data.mli_in);
+    ops::micro::MliTensorAttachBuffer<int8_t>(filter, &data.mli_weights);
+    ops::micro::MliTensorAttachBuffer<int32_t>(bias, &data.mli_bias);
+    ops::micro::MliTensorAttachBuffer<int8_t>(output, &data.mli_out);
 
     // for height slicing
-    const int heightDimension = 1;
-    int inSliceHeight = 0;
-    int outSliceHeight = 0;
-    const int kernelHeight =
-        static_cast<int>(data.mli_weights->shape[KRNL_DW_H_DIM_HWC]);
-    const int overlap = kernelHeight - cfg_local.stride_height;
+    const int height_dimension = 1;
+    int in_slice_height = 0;
+    int out_slice_height = 0;
+    uint32_t* mli_weights_shape = data.mli_weights.Shape();
+#ifdef MLI_2_0
+    const int kernel_height =
+        static_cast<int>(mli_weights_shape[KRNL_DW_H_DIM_HW1N]);
+#else
+    const int kernel_height =
+        static_cast<int>(mli_weights_shape[KRNL_DW_H_DIM_HWC]);
+#endif
+    const int overlap = kernel_height - cfg_local.stride_height;
 
     // for weight slicing (on output channels)
     // HWCN layout for weights, output channel dimension is the first dimension.
@@ -656,40 +893,52 @@ TfLiteStatus EvalMliQuantizedPerChannel(
     // Batch-Height-Width-Channel layout means last dimension is output
     // channels.
     const int out_tensor_ch_dimension = 3;
-    const int32_t in_channels = data.mli_in->shape[out_tensor_ch_dimension];
-    const int32_t out_channels = data.mli_out->shape[out_tensor_ch_dimension];
+    const int32_t in_channels = data.mli_in.Shape()[out_tensor_ch_dimension];
+    const int32_t out_channels = data.mli_out.Shape()[out_tensor_ch_dimension];
     int slice_channels =
-        static_cast<int>(data.mli_weights->shape[weight_out_ch_dimension]);
+        static_cast<int>(mli_weights_shape[weight_out_ch_dimension]);
 
     // Tensors for data in fast (local) memory
     // and config to copy data from external to local memory
-    mli_tensor weights_local = *data.mli_weights;
-    mli_tensor bias_local = *data.mli_bias;
-    mli_tensor in_local = *data.mli_in;
+    mli_tensor weights_local = *data.mli_weights.MliTensor();
+    mli_tensor bias_local = *data.mli_bias.MliTensor();
+    mli_tensor in_local = *data.mli_in.MliTensor();
     mli_tensor out_local =
-        *data.mli_out;  // this assumes that output shape
-                        // is already filled in the tensor struct.
+        *data.mli_out.MliTensor();  // this assumes that output shape
+                                    // is already filled in the tensor struct.
+
+    ops::micro::MliTensorInterface weights_local_interface(&weights_local);
+    ops::micro::MliTensorInterface bias_local_interface(&bias_local);
+    ops::micro::MliTensorInterface in_local_interface(&in_local);
+    ops::micro::MliTensorInterface out_local_interface(&out_local);
+
     mli_mov_cfg_t copy_config;
     mli_mov_cfg_for_copy(&copy_config);
 
     TF_LITE_ENSURE_STATUS(ops::micro::get_arc_scratch_buffer_for_conv_tensors(
-        context, &in_local, &weights_local, &bias_local, &out_local));
+        context, &in_local_interface, &weights_local_interface,
+        &bias_local_interface, &out_local_interface));
+
     /* is_local indicates that the tensor is already in local memory,
      so in that case the original tensor can be used,
      and there is no need to copy it to the local tensor*/
-    const bool in_is_local = in_local.data == data.mli_in->data;
-    const bool out_is_local = out_local.data == data.mli_out->data;
-    const bool w_is_local = weights_local.data == data.mli_weights->data;
-    const bool b_is_local = bias_local.data == data.mli_bias->data;
+    const bool in_is_local =
+        in_local_interface.Data<int8_t>() == data.mli_in.Data<int8_t>();
+    const bool out_is_local =
+        out_local_interface.Data<int8_t>() == data.mli_out.Data<int8_t>();
+    const bool w_is_local = weights_local_interface.Data<int8_t>() ==
+                            data.mli_weights.Data<int8_t>();
+    const bool b_is_local =
+        bias_local_interface.Data<int32_t>() == data.mli_bias.Data<int32_t>();
 
     TF_LITE_ENSURE_STATUS(ops::micro::arc_scratch_buffer_calc_slice_size_io(
-        &in_local, &out_local, kernelHeight, cfg_local.stride_height,
-        cfg_local.padding_top, cfg_local.padding_bottom, &inSliceHeight,
-        &outSliceHeight));
+        &in_local_interface, &out_local_interface, kernel_height,
+        cfg_local.stride_height, cfg_local.padding_top,
+        cfg_local.padding_bottom, &in_slice_height, &out_slice_height));
     TF_LITE_ENSURE_STATUS(
         ops::micro::arc_scratch_buffer_calc_slice_size_weights(
-            &weights_local, &bias_local, weight_out_ch_dimension,
-            &slice_channels));
+            &weights_local_interface, &bias_local_interface,
+            weight_out_ch_dimension, &slice_channels));
 
     /* if input channels is not equal to output channels, a channel multiplier
        is used. in this case the slice channels needs to be rounded down to a
@@ -698,13 +947,16 @@ TfLiteStatus EvalMliQuantizedPerChannel(
       slice_channels = (slice_channels / in_channels) * in_channels;
     }
 
-    ops::micro::TensorSlicer b_slice(data.mli_bias, bias_out_ch_dimension,
-                                     slice_channels);
-    ops::micro::TensorSlicer w_slice(data.mli_weights, weight_out_ch_dimension,
-                                     slice_channels, 0, 0, 0, true);
-    ops::micro::TensorSlicer out_ch_slice(data.mli_out, out_tensor_ch_dimension,
+    ops::micro::TensorSlicer b_slice(data.mli_bias.MliTensor(),
+                                     bias_out_ch_dimension, slice_channels);
+    ops::micro::TensorSlicer w_slice(data.mli_weights.MliTensor(),
+                                     weight_out_ch_dimension, slice_channels, 0,
+                                     0, 0, true);
+    ops::micro::TensorSlicer out_ch_slice(data.mli_out.MliTensor(),
+                                          out_tensor_ch_dimension,
                                           slice_channels, 0, 0, 0, true);
-    ops::micro::TensorSlicer in_ch_slice(data.mli_in, out_tensor_ch_dimension,
+    ops::micro::TensorSlicer in_ch_slice(data.mli_in.MliTensor(),
+                                         out_tensor_ch_dimension,
                                          slice_channels, 0, 0, 0, true);
 
     mli_tensor* w_ptr = w_is_local ? w_slice.Sub() : &weights_local;
@@ -730,16 +982,16 @@ TfLiteStatus EvalMliQuantizedPerChannel(
       the sliceHeight has been calculated. The tensor slicer is configured that
       it will completely slice the nBatch dimension (0) and slice the height
       dimension (1) in chunks of 'sliceHeight' */
-      ops::micro::TensorSlicer in_slice(in_ch_slice.Sub(), heightDimension,
-                                        inSliceHeight, padding_top,
+      ops::micro::TensorSlicer in_slice(in_ch_slice.Sub(), height_dimension,
+                                        in_slice_height, padding_top,
                                         padding_bottom, overlap);
 
       /* output tensor is already sliced in the output channel dimension.
       out_ch_slice.Sub() is the tensor for the amount of output channels of this
       iteration of the weight slice loop. This tensor needs to be further
       sliced over the batch and height dimension. */
-      ops::micro::TensorSlicer out_slice(out_ch_slice.Sub(), heightDimension,
-                                         outSliceHeight);
+      ops::micro::TensorSlicer out_slice(out_ch_slice.Sub(), height_dimension,
+                                         out_slice_height);
 
       /* setup the pointers to the local or remote tensor to make the code
        * inside the loop easier. */
@@ -747,19 +999,48 @@ TfLiteStatus EvalMliQuantizedPerChannel(
       mli_tensor* out_ptr = out_is_local ? out_slice.Sub() : &out_local;
 
       while (!out_slice.Done()) {
+        if (!out_is_local) {
+          ops::micro::PrepareLocalTensor(out_slice.Sub(), &out_local);
+          ops::micro::PrepareLocalTensor(in_slice.Sub(), &in_local);
+        }
         TF_LITE_ENSURE(context, !in_slice.Done());
         cfg_local.padding_top = in_slice.GetPaddingPre();
         cfg_local.padding_bottom = in_slice.GetPaddingPost();
 
         // if same input copy as previous iteration, skip the copy of input
+#ifdef MLI_2_0
+        if ((in_slice.Sub()->data.mem.pi8 != input_buffer_ptr) ||
+            (mli_hlp_count_elem_num(in_slice.Sub(), 0) != input_buffer_size)) {
+          mli_mov_tensor_sync(in_slice.Sub(), &copy_config, in_ptr);
+          input_buffer_ptr = in_slice.Sub()->data.mem.pi8;
+          input_buffer_size = mli_hlp_count_elem_num(in_slice.Sub(), 0);
+        }
+
+#ifdef MLI_2_0_KRNL_TEST
+        // Checking conditions here to prevent usage non-contiguous buffer
+        // memory.
+        if (mli_weights_shape[weight_out_ch_dimension] !=
+            w_slice.Sub()->shape[3]) {
+          MicroPrintf("Slicing is not supported with real-time permutation.");
+          return kTfLiteError;
+        }
+        uint8_t dim_order[] = {1, 2, 0, 3};
+        ops::micro::change_shape(w_ptr, dim_order);
+#endif
+
+        data.p_mli_krn_depthwise_conv2d_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr,
+                                                     &cfg_local, out_ptr);
+#else
         if ((in_slice.Sub()->data != input_buffer_ptr) ||
             (mli_hlp_count_elem_num(in_slice.Sub(), 0) != input_buffer_size)) {
           mli_mov_tensor_sync(in_slice.Sub(), &copy_config, in_ptr);
           input_buffer_ptr = in_slice.Sub()->data;
           input_buffer_size = mli_hlp_count_elem_num(in_slice.Sub(), 0);
         }
-        mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr,
-                                                   &cfg_local, out_ptr);
+        data.p_mli_krn_depthwise_conv2d_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr,
+                                                     &cfg_local, out_ptr);
+#endif
+
         mli_mov_tensor_sync(out_ptr, &copy_config, out_slice.Sub());
 
         in_slice.Next();
@@ -808,53 +1089,7 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
       tflite::micro::GetTensorShape(output),
       tflite::micro::GetTensorData<int8_t>(output));
 #else
-  TF_LITE_KERNEL_LOG(context,
-                     "Node configuration is not supported by ARC MLI Library.");
-#endif
-}
-
-void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                   TfLiteDepthwiseConvParams* params, const OpData& data,
-                   const TfLiteEvalTensor* input,
-                   const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias,
-                   TfLiteEvalTensor* output) {
-#if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
-  const int32_t input_offset = -data.input_zero_point;
-  const int32_t filter_offset = -data.filter_zero_point;
-  const int32_t output_offset = data.output_zero_point;
-
-  tflite::DepthwiseParams op_params;
-  // Padding type is ignored, but still set.
-  op_params.padding_type = PaddingType::kSame;
-  op_params.padding_values.width = data.padding.width;
-  op_params.padding_values.height = data.padding.height;
-  op_params.stride_width = params->stride_width;
-  op_params.stride_height = params->stride_height;
-  op_params.dilation_width_factor = params->dilation_width_factor;
-  op_params.dilation_height_factor = params->dilation_height_factor;
-  op_params.depth_multiplier = params->depth_multiplier;
-  op_params.quantized_activation_min = data.output_activation_min;
-  op_params.quantized_activation_max = data.output_activation_max;
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
-  op_params.output_multiplier = data.output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
-  op_params.output_shift = -data.output_shift;
-
-  tflite::reference_ops::DepthwiseConv(
-      op_params, tflite::micro::GetTensorShape(input),
-      tflite::micro::GetTensorData<uint8_t>(input),
-      tflite::micro::GetTensorShape(filter),
-      tflite::micro::GetTensorData<uint8_t>(filter),
-      tflite::micro::GetTensorShape(bias),
-      tflite::micro::GetTensorData<int32_t>(bias),
-      tflite::micro::GetTensorShape(output),
-      tflite::micro::GetTensorData<uint8_t>(output));
-#else
-  TF_LITE_KERNEL_LOG(context,
-                     "Type %s (%d) is not supported by ARC MLI Library.",
-                     TfLiteTypeGetName(input->type), input->type);
+  MicroPrintf("Node configuration is not supported by ARC MLI Library.");
 #endif
 }
 
@@ -879,21 +1114,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32:
-      #if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_F32
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       EvalFloat(context, node, params, data, input, filter, bias, output);
       break;
     case kTfLiteInt8:
-      #if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       if (data.is_mli_applicable) {
         EvalMliQuantizedPerChannel(context, node, params, data, input, filter,
                                    bias, output);
@@ -902,18 +1135,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                                 bias, output);
       }
       break;
-    case kTfLiteUInt8:
-      #if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_U8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
-
-      EvalQuantized(context, node, params, data, input, filter, bias, output);
-      break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -922,14 +1146,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
 }  // namespace tflite
@@ -1120,7 +1337,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node)
         reinterpret_cast<int32_t*>(&data->op_params.output_activation_min),
         reinterpret_cast<int32_t*>(&data->op_params.output_activation_max),
         data->per_channel_output_multiplier,
-        reinterpret_cast<int*>(data->per_channel_output_shift),
+        reinterpret_cast<int32_t*>(data->per_channel_output_shift),
         num_channels));
 
       if (data->op_params.dilation_height == 1 && data->op_params.dilation_width == 1) {
@@ -1403,6 +1620,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h"
+
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
@@ -1413,12 +1632,14 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
-#include "freertos/FreeRTOS.h"
 #include <esp_timer.h>
+
+#if ESP_NN
 #include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h"
+#endif
 
 long long dc_total_time = 0;
 
@@ -1427,7 +1648,9 @@ namespace {
 
 struct NodeData {
   OpDataConv op_data;
+#if ESP_NN
   int buffer_idx;
+#endif
 };
 
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
@@ -1435,6 +1658,7 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   return context->AllocatePersistentBuffer(context, sizeof(NodeData));
 }
 
+#if ESP_NN
 inline void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
                                     const TfLiteDepthwiseConvParams& params,
                                     const NodeData& data,
@@ -1495,6 +1719,7 @@ inline void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
     if (data.buffer_idx > -1) {
       scratch_buf = context->GetScratchBuffer(context, data.buffer_idx);
     }
+
     esp_nn_set_depthwise_conv_scratch_buf(scratch_buf);
 
     data_dims_t input_dims =  {
@@ -1540,6 +1765,7 @@ inline void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
         tflite::micro::GetTensorData<int8_t>(output));
   }
 }
+#endif
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
@@ -1549,14 +1775,19 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteDepthwiseConvParams& params =
       *(static_cast<const TfLiteDepthwiseConvParams*>(node->builtin_data));
 
-  TfLiteTensor* output = GetOutput(context, node, kDepthwiseConvOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-  const TfLiteTensor* input =
-      GetInput(context, node, kDepthwiseConvInputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kConvInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* filter =
-      GetInput(context, node, kDepthwiseConvWeightsTensor);
+  TfLiteTensor* filter =
+      micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
   TF_LITE_ENSURE(context, filter != nullptr);
+  TfLiteTensor* bias =
+      micro_context->AllocateTempInputTensor(node, kConvBiasTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   const int input_width = input->dims->data[2];
   const int input_height = input->dims->data[1];
@@ -1598,6 +1829,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       context, node, params, input_width, input_height, filter_width,
       filter_height, output_width, output_height, input->type, &data->op_data));
 
+#if ESP_NN
   if (input->type == kTfLiteInt8) {
     data_dims_t input_dims =  {
                                 .width = input_width, .height = input_height,
@@ -1625,11 +1857,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       data->buffer_idx = -1;
     }
   }
+#endif
 
-  //micro_context->DeallocateTempTfLiteTensor(input);
-  //micro_context->DeallocateTempTfLiteTensor(filter);
-  //micro_context->DeallocateTempTfLiteTensor(bias);
-  //micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
+  micro_context->DeallocateTempTfLiteTensor(bias);
+  micro_context->DeallocateTempTfLiteTensor(output);
 
   return kTfLiteOk;
 }
@@ -1656,11 +1889,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   long long start_time = esp_timer_get_time();
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32:
-      #if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_F32
+#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_F32
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
+#endif
       tflite::reference_ops::DepthwiseConv(
           DepthwiseConvParamsFloat(params, data.op_data),
           tflite::micro::GetTensorShape(input),
@@ -1673,20 +1906,35 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           tflite::micro::GetTensorData<float>(output));
       break;
     case kTfLiteInt8:
-      #if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_I8
+#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_I8
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
+#endif
+#if ESP_NN
       EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
                               output);
+#else
+      reference_integer_ops::DepthwiseConvPerChannel(
+          DepthwiseConvParamsQuantized(params, data.op_data),
+          data.op_data.per_channel_output_multiplier,
+          data.op_data.per_channel_output_shift,
+          tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<int8_t>(input),
+          tflite::micro::GetTensorShape(filter),
+          tflite::micro::GetTensorData<int8_t>(filter),
+          tflite::micro::GetTensorShape(bias),
+          tflite::micro::GetTensorData<int32_t>(bias),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int8_t>(output));
+#endif
       break;
     case kTfLiteUInt8:
-      #if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_U8
+#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_U8
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
+#endif
       //EvalQuantized(context, node, params, &data, input, filter, bias, output);
       reference_ops::DepthwiseConv(
           DepthwiseConvParamsQuantized(params, data.op_data),
@@ -1704,7 +1952,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                          TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
   }
-
   long long time_this_instance = esp_timer_get_time() - start_time;
   dc_total_time += time_this_instance;
   // printf("time this instance: %llu\n", time_this_instance / 1000);
@@ -1715,17 +1962,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
 }  // namespace tflite
+
 #else
 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 
@@ -1746,15 +1987,12 @@ limitations under the License.
 
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -1785,12 +2023,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32: {
-      #if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_F32
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       tflite::reference_ops::DepthwiseConv(
           DepthwiseConvParamsFloat(params, data),
           tflite::micro::GetTensorShape(input),
@@ -1798,34 +2035,61 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           tflite::micro::GetTensorShape(filter),
           tflite::micro::GetTensorData<float>(filter),
           tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<float>(bias),
+          tflite::micro::GetOptionalTensorData<float>(bias),
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output));
       break;
     }
     case kTfLiteInt8: {
-      #if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
-      reference_integer_ops::DepthwiseConvPerChannel(
-          DepthwiseConvParamsQuantized(params, data),
-          data.per_channel_output_multiplier, data.per_channel_output_shift,
-          tflite::micro::GetTensorShape(input),
-          tflite::micro::GetTensorData<int8_t>(input),
-          tflite::micro::GetTensorShape(filter),
-          tflite::micro::GetTensorData<int8_t>(filter),
-          tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<int32_t>(bias),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<int8_t>(output));
+#endif
+      switch (filter->type) {
+        case kTfLiteInt4: {
+          int8_t* unpacked_filter_data = static_cast<int8_t*>(
+              context->GetScratchBuffer(context, data.filter_buffer_index));
+          tflite::tensor_utils::UnpackDenseInt4IntoInt8(
+              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(filter).FlatSize(),
+              unpacked_filter_data);
+          reference_integer_ops::DepthwiseConvPerChannel(
+              DepthwiseConvParamsQuantized(params, data),
+              data.per_channel_output_multiplier, data.per_channel_output_shift,
+              tflite::micro::GetTensorShape(input),
+              tflite::micro::GetTensorData<int8_t>(input),
+              tflite::micro::GetTensorShape(filter), unpacked_filter_data,
+              tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetOptionalTensorData<int32_t>(bias),
+              tflite::micro::GetTensorShape(output),
+              tflite::micro::GetTensorData<int8_t>(output));
+          break;
+        }
+        case kTfLiteInt8: {
+          reference_integer_ops::DepthwiseConvPerChannel(
+              DepthwiseConvParamsQuantized(params, data),
+              data.per_channel_output_multiplier, data.per_channel_output_shift,
+              tflite::micro::GetTensorShape(input),
+              tflite::micro::GetTensorData<int8_t>(input),
+              tflite::micro::GetTensorShape(filter),
+              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetOptionalTensorData<int32_t>(bias),
+              tflite::micro::GetTensorShape(output),
+              tflite::micro::GetTensorData<int8_t>(output));
+          break;
+        }
+        default:
+          MicroPrintf("Filter type %s (%d) not supported.",
+                      TfLiteTypeGetName(filter->type), filter->type);
+          return kTfLiteError;
+      }
       break;
     }
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Input type %s (%d) not supported.",
+                  TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -1834,14 +2098,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_DEPTHWISE_CONV_2D() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/DepthwiseConvPrepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, DepthwiseConvPrepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h
index 049af09..000e792 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -49,6 +49,32 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
 
 TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node);
 
+// This is the most generic TfLiteRegistration. The actual supported types may
+// still be target dependent. The only requirement is that every implementation
+// (reference or optimized) must define this function.
+TfLiteRegistration Register_DEPTHWISE_CONV_2D();
+
+#if defined(CMSIS_NN)
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int8 activations and int8 weights and uses the latency optimized
+// implementations.
+TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT8();
+
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int16 activations and int8 weights and uses the latency optimized
+// implementations.
+TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT16();
+
+#else
+inline TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT8() {
+  return Register_DEPTHWISE_CONV_2D();
+}
+
+inline TfLiteRegistration Register_DEPTHWISE_CONV_2D_INT16() {
+  return Register_DEPTHWISE_CONV_2D();
+}
+#endif
+
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_DEPTHWISE_CONV_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv_common.cc
index 4b444e8..5263961 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv_common.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv_common.cc
@@ -18,7 +18,6 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
@@ -95,13 +94,18 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
       params.dilation_width_factor, height, width, filter_height, filter_width,
       padding, &out_height, &out_width);
 
-  const TfLiteTensor* input = GetInput(context, node, kConvInputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kConvInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
+  TfLiteTensor* filter =
+      micro_context->AllocateTempInputTensor(node, kConvWeightsTensor);
   TF_LITE_ENSURE(context, filter != nullptr);
-  const TfLiteTensor* bias =
-      GetOptionalInputTensor(context, node, kConvBiasTensor);
-  TfLiteTensor* output = GetOutput(context, node, kConvOutputTensor);
+  TfLiteTensor* bias =
+      micro_context->AllocateTempInputTensor(node, kConvBiasTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kConvOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
 
   // Note that quantized inference requires that all tensors have their
@@ -113,8 +117,7 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
         context, input, filter, bias, output, params.activation,
         &data->output_multiplier, &data->output_shift,
         &data->output_activation_min, &data->output_activation_max,
-        data->per_channel_output_multiplier,
-        reinterpret_cast<int*>(data->per_channel_output_shift),
+        data->per_channel_output_multiplier, data->per_channel_output_shift,
         output_channels));
   }
 
@@ -122,6 +125,11 @@ TfLiteStatus CalculateOpDataDepthwiseConv(
   data->filter_zero_point = filter->params.zero_point;
   data->output_zero_point = output->params.zero_point;
 
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
+  micro_context->DeallocateTempTfLiteTensor(bias);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
   return kTfLiteOk;
 }
 
@@ -132,14 +140,16 @@ TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) {
   OpDataConv* data = static_cast<OpDataConv*>(node->user_data);
   const auto& params =
       *(static_cast<const TfLiteDepthwiseConvParams*>(node->builtin_data));
+  MicroContext* micro_context = GetMicroContext(context);
 
-  TfLiteTensor* output = GetOutput(context, node, kDepthwiseConvOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kDepthwiseConvOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
-  const TfLiteTensor* input =
-      GetInput(context, node, kDepthwiseConvInputTensor);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kDepthwiseConvInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* filter =
-      GetInput(context, node, kDepthwiseConvWeightsTensor);
+  TfLiteTensor* filter =
+      micro_context->AllocateTempInputTensor(node, kDepthwiseConvWeightsTensor);
   TF_LITE_ENSURE(context, filter != nullptr);
 
   const int input_width = input->dims->data[2];
@@ -150,13 +160,15 @@ TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) {
   const int output_height = output->dims->data[1];
 
   // Dynamically allocate per-channel quantization parameters.
-  const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
-  data->per_channel_output_multiplier =
-      static_cast<int32_t*>(context->AllocatePersistentBuffer(
-          context, num_channels * sizeof(int32_t)));
-  data->per_channel_output_shift =
-      static_cast<int32_t*>(context->AllocatePersistentBuffer(
-          context, num_channels * sizeof(int32_t)));
+  if (input->type != kTfLiteFloat32) {
+    const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
+    data->per_channel_output_multiplier =
+        static_cast<int32_t*>(context->AllocatePersistentBuffer(
+            context, num_channels * sizeof(int32_t)));
+    data->per_channel_output_shift =
+        static_cast<int32_t*>(context->AllocatePersistentBuffer(
+            context, num_channels * sizeof(int32_t)));
+  }
 
   // All per-channel quantized tensors need valid zero point and scale arrays.
   if (input->type == kTfLiteInt8) {
@@ -178,10 +190,23 @@ TfLiteStatus DepthwiseConvPrepare(TfLiteContext* context, TfLiteNode* node) {
                       affine_quantization->zero_point->size);
   }
 
+  if (filter->type == kTfLiteInt4) {
+    int filter_size =
+        RuntimeShape(filter->dims->size,
+                     reinterpret_cast<const int32_t*>(filter->dims->data))
+            .FlatSize();
+    context->RequestScratchBufferInArena(context, filter_size,
+                                         &data->filter_buffer_index);
+  }
+
   TF_LITE_ENSURE_STATUS(CalculateOpDataDepthwiseConv(
       context, node, params, input_width, input_height, filter_width,
       filter_height, output_width, output_height, input->type, data));
 
+  micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
+
   return kTfLiteOk;
 }
 
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.cc
index b31e913..c41036e 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -22,118 +22,67 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/requantize.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace dequantize {
 
-struct OpData {
-  tflite::DequantizationParams quantization_params;
-  // The scaling factor from input to output (aka the 'real multiplier') can
-  // be represented as a fixed point multiplier plus a left shift.
-  int32_t output_multiplier;
-  int output_shift;
-  int32_t output_zero_point;
-};
-
-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+void* DequantizeInit(TfLiteContext* context, const char* buffer,
+                     size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
-
-  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
-  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-
-  // TODO(b/140515557): Add cached dequant to improve hybrid model performance.
-  const TfLiteTensor* input = GetInput(context, node, 0);
-  TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, 0);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  TF_LITE_ENSURE(context, input->type == kTfLiteUInt8 ||
-                              input->type == kTfLiteInt8 ||
-                              input->type == kTfLiteInt16);
-  TF_LITE_ENSURE(context, output->type == kTfLiteFloat32);
-
-  if (output->type == kTfLiteInt32) {
-    const double effective_output_scale =
-        static_cast<double>(input->params.scale) /
-        static_cast<double>(output->params.scale);
-    QuantizeMultiplier(effective_output_scale, &data->output_multiplier,
-                       &data->output_shift);
-  }
-
-  data->quantization_params.zero_point = input->params.zero_point;
-  data->quantization_params.scale = static_cast<double>(input->params.scale);
-  data->output_zero_point = output->params.zero_point;
-  return kTfLiteOk;
+  return context->AllocatePersistentBuffer(context, sizeof(DequantizeOpData));
 }
 
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus DequantizeEval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
+  DequantizeOpData* data = static_cast<DequantizeOpData*>(node->user_data);
 
   const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
   TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
 
   if (output->type == kTfLiteFloat32) {
     switch (input->type) {
-      case kTfLiteUInt8:
+      case kTfLiteInt8:
         reference_ops::Dequantize(data->quantization_params,
                                   tflite::micro::GetTensorShape(input),
-                                  tflite::micro::GetTensorData<uint8_t>(input),
+                                  tflite::micro::GetTensorData<int8_t>(input),
                                   tflite::micro::GetTensorShape(output),
                                   tflite::micro::GetTensorData<float>(output));
         break;
-      case kTfLiteInt8:
+      case kTfLiteInt16:
         reference_ops::Dequantize(data->quantization_params,
                                   tflite::micro::GetTensorShape(input),
-                                  tflite::micro::GetTensorData<int8_t>(input),
+                                  tflite::micro::GetTensorData<int16_t>(input),
                                   tflite::micro::GetTensorShape(output),
                                   tflite::micro::GetTensorData<float>(output));
         break;
-      case kTfLiteInt16:
+      case kTfLiteUInt8:
         reference_ops::Dequantize(data->quantization_params,
                                   tflite::micro::GetTensorShape(input),
-                                  tflite::micro::GetTensorData<int16_t>(input),
+                                  tflite::micro::GetTensorData<uint8_t>(input),
                                   tflite::micro::GetTensorShape(output),
                                   tflite::micro::GetTensorData<float>(output));
         break;
       default:
-        TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                           TfLiteTypeGetName(input->type),
-                           TfLiteTypeGetName(output->type));
+        MicroPrintf("Input %s, output %s not supported.",
+                    TfLiteTypeGetName(input->type),
+                    TfLiteTypeGetName(output->type));
         return kTfLiteError;
     }
   } else {
-    TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                       TfLiteTypeGetName(input->type),
-                       TfLiteTypeGetName(output->type));
+    MicroPrintf("Input %s, output %s not supported.",
+                TfLiteTypeGetName(input->type),
+                TfLiteTypeGetName(output->type));
     return kTfLiteError;
   }
 
   return kTfLiteOk;
 }
 
-}  // namespace dequantize
-
 TfLiteRegistration Register_DEQUANTIZE() {
-  return {/*init=*/dequantize::Init,
-          /*free=*/nullptr,
-          /*prepare=*/dequantize::Prepare,
-          /*invoke=*/dequantize::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(DequantizeInit, DequantizePrepare,
+                                   DequantizeEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.h
new file mode 100644
index 0000000..ee45f36
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.h
@@ -0,0 +1,38 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+
+struct DequantizeOpData {
+  tflite::DequantizationParams quantization_params;
+  // The scaling factor from input to output (aka the 'real multiplier') can
+  // be represented as a fixed point multiplier plus a left shift.
+  int32_t output_multiplier;
+  int output_shift;
+  int32_t output_zero_point;
+};
+
+TfLiteStatus DequantizePrepare(TfLiteContext* context, TfLiteNode* node);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_DEQUANTIZE_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize_common.cc
new file mode 100644
index 0000000..e8ae297
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize_common.cc
@@ -0,0 +1,67 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/dequantize.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/quantize.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/requantize.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/dequantize.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+
+namespace tflite {
+
+TfLiteStatus DequantizePrepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  DequantizeOpData* data = static_cast<DequantizeOpData*>(node->user_data);
+
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  // TODO(b/140515557): Add cached dequant to improve hybrid model performance.
+  TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE(context, input->type == kTfLiteInt8 ||
+                              input->type == kTfLiteInt16 ||
+                              input->type == kTfLiteUInt8);
+  TF_LITE_ENSURE(context, output->type == kTfLiteFloat32);
+
+  if (output->type == kTfLiteInt32) {
+    const double effective_output_scale =
+        static_cast<double>(input->params.scale) /
+        static_cast<double>(output->params.scale);
+    QuantizeMultiplier(effective_output_scale, &data->output_multiplier,
+                       &data->output_shift);
+  }
+
+  data->quantization_params.zero_point = input->params.zero_point;
+  data->quantization_params.scale = static_cast<double>(input->params.scale);
+  data->output_zero_point = output->params.zero_point;
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/detection_postprocess.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/detection_postprocess.cc
new file mode 100644
index 0000000..2209a58
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/detection_postprocess.cc
@@ -0,0 +1,807 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <algorithm>
+#include <numeric>
+#include <tuple>
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+namespace {
+
+/**
+ * This version of detection_postprocess is specific to TFLite Micro. It
+ * contains the following differences between the TFLite version:
+ *
+ * 1.) Temporaries (temporary tensors) - Micro use instead scratch buffer API.
+ * 2.) Output dimensions - the TFLite version does not support undefined out
+ * dimensions. So model must have static out dimensions.
+ */
+
+// Input tensors
+constexpr int kInputTensorBoxEncodings = 0;
+constexpr int kInputTensorClassPredictions = 1;
+constexpr int kInputTensorAnchors = 2;
+
+// Output tensors
+constexpr int kOutputTensorDetectionBoxes = 0;
+constexpr int kOutputTensorDetectionClasses = 1;
+constexpr int kOutputTensorDetectionScores = 2;
+constexpr int kOutputTensorNumDetections = 3;
+
+constexpr int kNumCoordBox = 4;
+constexpr int kBatchSize = 1;
+
+constexpr int kNumDetectionsPerClass = 100;
+
+// Object Detection model produces axis-aligned boxes in two formats:
+// BoxCorner represents the lower left corner (xmin, ymin) and
+// the upper right corner (xmax, ymax).
+// CenterSize represents the center (xcenter, ycenter), height and width.
+// BoxCornerEncoding and CenterSizeEncoding are related as follows:
+// ycenter = y / y_scale * anchor.h + anchor.y;
+// xcenter = x / x_scale * anchor.w + anchor.x;
+// half_h = 0.5*exp(h/ h_scale)) * anchor.h;
+// half_w = 0.5*exp(w / w_scale)) * anchor.w;
+// ymin = ycenter - half_h
+// ymax = ycenter + half_h
+// xmin = xcenter - half_w
+// xmax = xcenter + half_w
+struct BoxCornerEncoding {
+  float ymin;
+  float xmin;
+  float ymax;
+  float xmax;
+};
+
+struct CenterSizeEncoding {
+  float y;
+  float x;
+  float h;
+  float w;
+};
+// We make sure that the memory allocations are contiguous with static_assert.
+static_assert(sizeof(BoxCornerEncoding) == sizeof(float) * kNumCoordBox,
+              "Size of BoxCornerEncoding is 4 float values");
+static_assert(sizeof(CenterSizeEncoding) == sizeof(float) * kNumCoordBox,
+              "Size of CenterSizeEncoding is 4 float values");
+
+struct OpData {
+  int max_detections;
+  int max_classes_per_detection;  // Fast Non-Max-Suppression
+  int detections_per_class;       // Regular Non-Max-Suppression
+  float non_max_suppression_score_threshold;
+  float intersection_over_union_threshold;
+  int num_classes;
+  bool use_regular_non_max_suppression;
+  CenterSizeEncoding scale_values;
+
+  // Scratch buffers indexes
+  int active_candidate_idx;
+  int decoded_boxes_idx;
+  int scores_idx;
+  int score_buffer_idx;
+  int keep_scores_idx;
+  int scores_after_regular_non_max_suppression_idx;
+  int sorted_values_idx;
+  int keep_indices_idx;
+  int sorted_indices_idx;
+  int buffer_idx;
+  int selected_idx;
+
+  // Cached tensor scale and zero point values for quantized operations
+  TfLiteQuantizationParams input_box_encodings;
+  TfLiteQuantizationParams input_class_predictions;
+  TfLiteQuantizationParams input_anchors;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  OpData* op_data = nullptr;
+
+  const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
+  const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
+  op_data = reinterpret_cast<OpData*>(
+      context->AllocatePersistentBuffer(context, sizeof(OpData)));
+
+  op_data->max_detections = m["max_detections"].AsInt32();
+  op_data->max_classes_per_detection = m["max_classes_per_detection"].AsInt32();
+  if (m["detections_per_class"].IsNull())
+    op_data->detections_per_class = kNumDetectionsPerClass;
+  else
+    op_data->detections_per_class = m["detections_per_class"].AsInt32();
+  if (m["use_regular_nms"].IsNull())
+    op_data->use_regular_non_max_suppression = false;
+  else
+    op_data->use_regular_non_max_suppression = m["use_regular_nms"].AsBool();
+
+  op_data->non_max_suppression_score_threshold =
+      m["nms_score_threshold"].AsFloat();
+  op_data->intersection_over_union_threshold = m["nms_iou_threshold"].AsFloat();
+  op_data->num_classes = m["num_classes"].AsInt32();
+  op_data->scale_values.y = m["y_scale"].AsFloat();
+  op_data->scale_values.x = m["x_scale"].AsFloat();
+  op_data->scale_values.h = m["h_scale"].AsFloat();
+  op_data->scale_values.w = m["w_scale"].AsFloat();
+
+  return op_data;
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  auto* op_data = static_cast<OpData*>(node->user_data);
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  // Inputs: box_encodings, scores, anchors
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
+  TfLiteTensor* input_box_encodings =
+      micro_context->AllocateTempInputTensor(node, kInputTensorBoxEncodings);
+  TfLiteTensor* input_class_predictions =
+      micro_context->AllocateTempInputTensor(node,
+                                             kInputTensorClassPredictions);
+  TfLiteTensor* input_anchors =
+      micro_context->AllocateTempInputTensor(node, kInputTensorAnchors);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(input_box_encodings), 3);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(input_class_predictions), 3);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(input_anchors), 2);
+
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 4);
+  const int num_boxes = input_box_encodings->dims->data[1];
+  const int num_classes = op_data->num_classes;
+
+  op_data->input_box_encodings.scale = input_box_encodings->params.scale;
+  op_data->input_box_encodings.zero_point =
+      input_box_encodings->params.zero_point;
+  op_data->input_class_predictions.scale =
+      input_class_predictions->params.scale;
+  op_data->input_class_predictions.zero_point =
+      input_class_predictions->params.zero_point;
+  op_data->input_anchors.scale = input_anchors->params.scale;
+  op_data->input_anchors.zero_point = input_anchors->params.zero_point;
+
+  // Scratch tensors
+  context->RequestScratchBufferInArena(context, num_boxes,
+                                       &op_data->active_candidate_idx);
+  context->RequestScratchBufferInArena(context,
+                                       num_boxes * kNumCoordBox * sizeof(float),
+                                       &op_data->decoded_boxes_idx);
+  context->RequestScratchBufferInArena(
+      context,
+      input_class_predictions->dims->data[1] *
+          input_class_predictions->dims->data[2] * sizeof(float),
+      &op_data->scores_idx);
+
+  // Additional buffers
+  context->RequestScratchBufferInArena(context, num_boxes * sizeof(float),
+                                       &op_data->score_buffer_idx);
+  context->RequestScratchBufferInArena(context, num_boxes * sizeof(float),
+                                       &op_data->keep_scores_idx);
+  context->RequestScratchBufferInArena(
+      context, op_data->max_detections * num_boxes * sizeof(float),
+      &op_data->scores_after_regular_non_max_suppression_idx);
+  context->RequestScratchBufferInArena(
+      context, op_data->max_detections * num_boxes * sizeof(float),
+      &op_data->sorted_values_idx);
+  context->RequestScratchBufferInArena(context, num_boxes * sizeof(int),
+                                       &op_data->keep_indices_idx);
+  context->RequestScratchBufferInArena(
+      context, op_data->max_detections * num_boxes * sizeof(int),
+      &op_data->sorted_indices_idx);
+  int buffer_size = std::max(num_classes, op_data->max_detections);
+  context->RequestScratchBufferInArena(
+      context, buffer_size * num_boxes * sizeof(int), &op_data->buffer_idx);
+  buffer_size = std::min(num_boxes, op_data->max_detections);
+  context->RequestScratchBufferInArena(
+      context, buffer_size * num_boxes * sizeof(int), &op_data->selected_idx);
+
+  // Outputs: detection_boxes, detection_scores, detection_classes,
+  // num_detections
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 4);
+
+  micro_context->DeallocateTempTfLiteTensor(input_box_encodings);
+  micro_context->DeallocateTempTfLiteTensor(input_class_predictions);
+  micro_context->DeallocateTempTfLiteTensor(input_anchors);
+
+  return kTfLiteOk;
+}
+
+class Dequantizer {
+ public:
+  Dequantizer(int zero_point, float scale)
+      : zero_point_(zero_point), scale_(scale) {}
+  float operator()(uint8_t x) {
+    return (static_cast<float>(x) - zero_point_) * scale_;
+  }
+
+ private:
+  int zero_point_;
+  float scale_;
+};
+
+template <class T>
+T ReInterpretTensor(const TfLiteEvalTensor* tensor) {
+  const float* tensor_base = tflite::micro::GetTensorData<float>(tensor);
+  return reinterpret_cast<T>(tensor_base);
+}
+
+template <class T>
+T ReInterpretTensor(TfLiteEvalTensor* tensor) {
+  float* tensor_base = tflite::micro::GetTensorData<float>(tensor);
+  return reinterpret_cast<T>(tensor_base);
+}
+
+TfLiteStatus DecodeCenterSizeBoxes(TfLiteContext* context, TfLiteNode* node,
+                                   OpData* op_data) {
+  // Parse input tensor boxencodings
+  const TfLiteEvalTensor* input_box_encodings =
+      tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
+  TF_LITE_ENSURE_EQ(context, input_box_encodings->dims->data[0], kBatchSize);
+  const int num_boxes = input_box_encodings->dims->data[1];
+  TF_LITE_ENSURE(context, input_box_encodings->dims->data[2] >= kNumCoordBox);
+  const TfLiteEvalTensor* input_anchors =
+      tflite::micro::GetEvalInput(context, node, kInputTensorAnchors);
+
+  // Decode the boxes to get (ymin, xmin, ymax, xmax) based on the anchors
+  CenterSizeEncoding box_centersize;
+  CenterSizeEncoding scale_values = op_data->scale_values;
+  CenterSizeEncoding anchor;
+  for (int idx = 0; idx < num_boxes; ++idx) {
+    switch (input_box_encodings->type) {
+        // Float
+      case kTfLiteFloat32: {
+        // Please see DequantizeBoxEncodings function for the support detail.
+        const int box_encoding_idx = idx * input_box_encodings->dims->data[2];
+        const float* boxes = &(tflite::micro::GetTensorData<float>(
+            input_box_encodings)[box_encoding_idx]);
+        box_centersize = *reinterpret_cast<const CenterSizeEncoding*>(boxes);
+        anchor =
+            ReInterpretTensor<const CenterSizeEncoding*>(input_anchors)[idx];
+        break;
+      }
+      default:
+        // Unsupported type.
+        return kTfLiteError;
+    }
+
+    float ycenter = static_cast<float>(static_cast<double>(box_centersize.y) /
+                                           static_cast<double>(scale_values.y) *
+                                           static_cast<double>(anchor.h) +
+                                       static_cast<double>(anchor.y));
+
+    float xcenter = static_cast<float>(static_cast<double>(box_centersize.x) /
+                                           static_cast<double>(scale_values.x) *
+                                           static_cast<double>(anchor.w) +
+                                       static_cast<double>(anchor.x));
+
+    float half_h =
+        static_cast<float>(0.5 *
+                           (std::exp(static_cast<double>(box_centersize.h) /
+                                     static_cast<double>(scale_values.h))) *
+                           static_cast<double>(anchor.h));
+    float half_w =
+        static_cast<float>(0.5 *
+                           (std::exp(static_cast<double>(box_centersize.w) /
+                                     static_cast<double>(scale_values.w))) *
+                           static_cast<double>(anchor.w));
+
+    float* decoded_boxes = reinterpret_cast<float*>(
+        context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
+    auto& box = reinterpret_cast<BoxCornerEncoding*>(decoded_boxes)[idx];
+    box.ymin = ycenter - half_h;
+    box.xmin = xcenter - half_w;
+    box.ymax = ycenter + half_h;
+    box.xmax = xcenter + half_w;
+  }
+  return kTfLiteOk;
+}
+
+void DecreasingPartialArgSort(const float* values, int num_values,
+                              int num_to_sort, int* indices) {
+  std::iota(indices, indices + num_values, 0);
+  std::partial_sort(indices, indices + num_to_sort, indices + num_values,
+                    [&values](const int i, const int j) {
+                      return std::tie(values[i], j) > std::tie(values[j], i);
+                    });
+}
+
+template <typename Compare>
+void InsertionSort(int* start, int* end, Compare compare) {
+  for (int* i = start; i != end; ++i) {
+    std::rotate(std::upper_bound(start, i, *i, compare), i, i + 1);
+  }
+}
+
+template <typename Compare>
+void TopDownMerge(int* values, int* scratch, const int half_num_values,
+                  int num_values, Compare compare) {
+  int left = 0;
+  int right = half_num_values;
+
+  for (int i = 0; i < num_values; i++) {
+    if (left >= half_num_values ||
+        (right < num_values && compare(values[right], values[left]))) {
+      scratch[i] = values[right++];
+    } else {
+      scratch[i] = values[left++];
+    }
+  }
+  memcpy(values, scratch, num_values * sizeof(int));
+}
+
+template <typename Compare>
+void MergeSort(int* values, int* scratch, const int num_values,
+               Compare compare) {
+  constexpr int threshold = 20;
+
+  if (num_values < threshold) {
+    InsertionSort(values, values + num_values, compare);
+    return;
+  }
+
+  const int half_num_values = num_values / 2;
+
+  MergeSort(values, scratch, half_num_values, compare);
+  MergeSort(values + half_num_values, scratch, num_values - half_num_values,
+            compare);
+  TopDownMerge(values, scratch, half_num_values, num_values, compare);
+}
+
+void DecreasingArgSort(const float* values, int num_values, int* indices,
+                       int* scratch) {
+  std::iota(indices, indices + num_values, 0);
+
+  MergeSort(indices, scratch, num_values, [&values](const int i, const int j) {
+    return values[i] > values[j];
+  });
+}
+
+int SelectDetectionsAboveScoreThreshold(const float* values, int size,
+                                        const float threshold,
+                                        float* keep_values, int* keep_indices) {
+  int counter = 0;
+  for (int i = 0; i < size; i++) {
+    if (values[i] >= threshold) {
+      keep_values[counter] = values[i];
+      keep_indices[counter] = i;
+      counter++;
+    }
+  }
+  return counter;
+}
+
+bool ValidateBoxes(const float* decoded_boxes, const int num_boxes) {
+  for (int i = 0; i < num_boxes; ++i) {
+    // ymax>=ymin, xmax>=xmin
+    auto& box = reinterpret_cast<const BoxCornerEncoding*>(decoded_boxes)[i];
+    if (box.ymin >= box.ymax || box.xmin >= box.xmax) {
+      return false;
+    }
+  }
+  return true;
+}
+
+float ComputeIntersectionOverUnion(const float* decoded_boxes, const int i,
+                                   const int j) {
+  auto& box_i = reinterpret_cast<const BoxCornerEncoding*>(decoded_boxes)[i];
+  auto& box_j = reinterpret_cast<const BoxCornerEncoding*>(decoded_boxes)[j];
+  const float area_i = (box_i.ymax - box_i.ymin) * (box_i.xmax - box_i.xmin);
+  const float area_j = (box_j.ymax - box_j.ymin) * (box_j.xmax - box_j.xmin);
+  if (area_i <= 0 || area_j <= 0) return 0.0;
+  const float intersection_ymin = std::max<float>(box_i.ymin, box_j.ymin);
+  const float intersection_xmin = std::max<float>(box_i.xmin, box_j.xmin);
+  const float intersection_ymax = std::min<float>(box_i.ymax, box_j.ymax);
+  const float intersection_xmax = std::min<float>(box_i.xmax, box_j.xmax);
+  const float intersection_area =
+      std::max<float>(intersection_ymax - intersection_ymin, 0.0) *
+      std::max<float>(intersection_xmax - intersection_xmin, 0.0);
+  return intersection_area / (area_i + area_j - intersection_area);
+}
+
+// NonMaxSuppressionSingleClass() prunes out the box locations with high overlap
+// before selecting the highest scoring boxes (max_detections in number)
+// It assumes all boxes are good in beginning and sorts based on the scores.
+// If lower-scoring box has too much overlap with a higher-scoring box,
+// we get rid of the lower-scoring box.
+// Complexity is O(N^2) pairwise comparison between boxes
+TfLiteStatus NonMaxSuppressionSingleClassHelper(
+    TfLiteContext* context, TfLiteNode* node, OpData* op_data,
+    const float* scores, int* selected, int* selected_size,
+    int max_detections) {
+  const TfLiteEvalTensor* input_box_encodings =
+      tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
+  const int num_boxes = input_box_encodings->dims->data[1];
+  const float non_max_suppression_score_threshold =
+      op_data->non_max_suppression_score_threshold;
+  const float intersection_over_union_threshold =
+      op_data->intersection_over_union_threshold;
+  // Maximum detections should be positive.
+  TF_LITE_ENSURE(context, (max_detections >= 0));
+  // intersection_over_union_threshold should be positive
+  // and should be less than 1.
+  TF_LITE_ENSURE(context, (intersection_over_union_threshold > 0.0f) &&
+                              (intersection_over_union_threshold <= 1.0f));
+  // Validate boxes
+  float* decoded_boxes = reinterpret_cast<float*>(
+      context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
+
+  TF_LITE_ENSURE(context, ValidateBoxes(decoded_boxes, num_boxes));
+
+  // threshold scores
+  int* keep_indices = reinterpret_cast<int*>(
+      context->GetScratchBuffer(context, op_data->keep_indices_idx));
+  float* keep_scores = reinterpret_cast<float*>(
+      context->GetScratchBuffer(context, op_data->keep_scores_idx));
+  int num_scores_kept = SelectDetectionsAboveScoreThreshold(
+      scores, num_boxes, non_max_suppression_score_threshold, keep_scores,
+      keep_indices);
+  int* sorted_indices = reinterpret_cast<int*>(
+      context->GetScratchBuffer(context, op_data->sorted_indices_idx));
+
+  // Reusing keep_indices for scratch buffer and write back its values
+  // after the sorting is done.
+  DecreasingArgSort(keep_scores, num_scores_kept, sorted_indices, keep_indices);
+  int counter = 0;
+  for (int i = 0; i < num_boxes; i++) {
+    if (scores[i] >= non_max_suppression_score_threshold) {
+      keep_indices[counter] = i;
+      counter++;
+    }
+  }
+
+  const int num_boxes_kept = num_scores_kept;
+  const int output_size = std::min(num_boxes_kept, max_detections);
+  *selected_size = 0;
+
+  int num_active_candidate = num_boxes_kept;
+  uint8_t* active_box_candidate = reinterpret_cast<uint8_t*>(
+      context->GetScratchBuffer(context, op_data->active_candidate_idx));
+
+  for (int row = 0; row < num_boxes_kept; row++) {
+    active_box_candidate[row] = 1;
+  }
+  for (int i = 0; i < num_boxes_kept; ++i) {
+    if (num_active_candidate == 0 || *selected_size >= output_size) break;
+    if (active_box_candidate[i] == 1) {
+      selected[(*selected_size)++] = keep_indices[sorted_indices[i]];
+      active_box_candidate[i] = 0;
+      num_active_candidate--;
+    } else {
+      continue;
+    }
+    for (int j = i + 1; j < num_boxes_kept; ++j) {
+      if (active_box_candidate[j] == 1) {
+        float intersection_over_union = ComputeIntersectionOverUnion(
+            decoded_boxes, keep_indices[sorted_indices[i]],
+            keep_indices[sorted_indices[j]]);
+
+        if (intersection_over_union > intersection_over_union_threshold) {
+          active_box_candidate[j] = 0;
+          num_active_candidate--;
+        }
+      }
+    }
+  }
+
+  return kTfLiteOk;
+}
+
+// This function implements a regular version of Non Maximal Suppression (NMS)
+// for multiple classes where
+// 1) we do NMS separately for each class across all anchors and
+// 2) keep only the highest anchor scores across all classes
+// 3) The worst runtime of the regular NMS is O(K*N^2)
+// where N is the number of anchors and K the number of
+// classes.
+TfLiteStatus NonMaxSuppressionMultiClassRegularHelper(TfLiteContext* context,
+                                                      TfLiteNode* node,
+                                                      OpData* op_data,
+                                                      const float* scores) {
+  const TfLiteEvalTensor* input_box_encodings =
+      tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
+  const TfLiteEvalTensor* input_class_predictions =
+      tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions);
+  TfLiteEvalTensor* detection_boxes =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionBoxes);
+  TfLiteEvalTensor* detection_classes = tflite::micro::GetEvalOutput(
+      context, node, kOutputTensorDetectionClasses);
+  TfLiteEvalTensor* detection_scores =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionScores);
+  TfLiteEvalTensor* num_detections =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensorNumDetections);
+
+  const int num_boxes = input_box_encodings->dims->data[1];
+  const int num_classes = op_data->num_classes;
+  const int num_detections_per_class = op_data->detections_per_class;
+  const int max_detections = op_data->max_detections;
+  const int num_classes_with_background =
+      input_class_predictions->dims->data[2];
+  // The row index offset is 1 if background class is included and 0 otherwise.
+  int label_offset = num_classes_with_background - num_classes;
+  TF_LITE_ENSURE(context, num_detections_per_class > 0);
+
+  // For each class, perform non-max suppression.
+  float* class_scores = reinterpret_cast<float*>(
+      context->GetScratchBuffer(context, op_data->score_buffer_idx));
+  int* box_indices_after_regular_non_max_suppression = reinterpret_cast<int*>(
+      context->GetScratchBuffer(context, op_data->buffer_idx));
+  float* scores_after_regular_non_max_suppression =
+      reinterpret_cast<float*>(context->GetScratchBuffer(
+          context, op_data->scores_after_regular_non_max_suppression_idx));
+
+  int size_of_sorted_indices = 0;
+  int* sorted_indices = reinterpret_cast<int*>(
+      context->GetScratchBuffer(context, op_data->sorted_indices_idx));
+  float* sorted_values = reinterpret_cast<float*>(
+      context->GetScratchBuffer(context, op_data->sorted_values_idx));
+
+  for (int col = 0; col < num_classes; col++) {
+    for (int row = 0; row < num_boxes; row++) {
+      // Get scores of boxes corresponding to all anchors for single class
+      class_scores[row] =
+          *(scores + row * num_classes_with_background + col + label_offset);
+    }
+    // Perform non-maximal suppression on single class
+    int selected_size = 0;
+    int* selected = reinterpret_cast<int*>(
+        context->GetScratchBuffer(context, op_data->selected_idx));
+    TF_LITE_ENSURE_STATUS(NonMaxSuppressionSingleClassHelper(
+        context, node, op_data, class_scores, selected, &selected_size,
+        num_detections_per_class));
+    // Add selected indices from non-max suppression of boxes in this class
+    int output_index = size_of_sorted_indices;
+    for (int i = 0; i < selected_size; i++) {
+      int selected_index = selected[i];
+
+      box_indices_after_regular_non_max_suppression[output_index] =
+          (selected_index * num_classes_with_background + col + label_offset);
+      scores_after_regular_non_max_suppression[output_index] =
+          class_scores[selected_index];
+      output_index++;
+    }
+    // Sort the max scores among the selected indices
+    // Get the indices for top scores
+    int num_indices_to_sort = std::min(output_index, max_detections);
+    DecreasingPartialArgSort(scores_after_regular_non_max_suppression,
+                             output_index, num_indices_to_sort, sorted_indices);
+
+    // Copy values to temporary vectors
+    for (int row = 0; row < num_indices_to_sort; row++) {
+      int temp = sorted_indices[row];
+      sorted_indices[row] = box_indices_after_regular_non_max_suppression[temp];
+      sorted_values[row] = scores_after_regular_non_max_suppression[temp];
+    }
+    // Copy scores and indices from temporary vectors
+    for (int row = 0; row < num_indices_to_sort; row++) {
+      box_indices_after_regular_non_max_suppression[row] = sorted_indices[row];
+      scores_after_regular_non_max_suppression[row] = sorted_values[row];
+    }
+    size_of_sorted_indices = num_indices_to_sort;
+  }
+
+  // Allocate output tensors
+  for (int output_box_index = 0; output_box_index < max_detections;
+       output_box_index++) {
+    if (output_box_index < size_of_sorted_indices) {
+      const int anchor_index = floor(
+          box_indices_after_regular_non_max_suppression[output_box_index] /
+          num_classes_with_background);
+      const int class_index =
+          box_indices_after_regular_non_max_suppression[output_box_index] -
+          anchor_index * num_classes_with_background - label_offset;
+      const float selected_score =
+          scores_after_regular_non_max_suppression[output_box_index];
+      // detection_boxes
+      float* decoded_boxes = reinterpret_cast<float*>(
+          context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
+      ReInterpretTensor<BoxCornerEncoding*>(detection_boxes)[output_box_index] =
+          reinterpret_cast<BoxCornerEncoding*>(decoded_boxes)[anchor_index];
+      // detection_classes
+      tflite::micro::GetTensorData<float>(detection_classes)[output_box_index] =
+          class_index;
+      // detection_scores
+      tflite::micro::GetTensorData<float>(detection_scores)[output_box_index] =
+          selected_score;
+    } else {
+      ReInterpretTensor<BoxCornerEncoding*>(
+          detection_boxes)[output_box_index] = {0.0f, 0.0f, 0.0f, 0.0f};
+      // detection_classes
+      tflite::micro::GetTensorData<float>(detection_classes)[output_box_index] =
+          0.0f;
+      // detection_scores
+      tflite::micro::GetTensorData<float>(detection_scores)[output_box_index] =
+          0.0f;
+    }
+  }
+  tflite::micro::GetTensorData<float>(num_detections)[0] =
+      size_of_sorted_indices;
+
+  return kTfLiteOk;
+}
+
+// This function implements a fast version of Non Maximal Suppression for
+// multiple classes where
+// 1) we keep the top-k scores for each anchor and
+// 2) during NMS, each anchor only uses the highest class score for sorting.
+// 3) Compared to standard NMS, the worst runtime of this version is O(N^2)
+// instead of O(KN^2) where N is the number of anchors and K the number of
+// classes.
+TfLiteStatus NonMaxSuppressionMultiClassFastHelper(TfLiteContext* context,
+                                                   TfLiteNode* node,
+                                                   OpData* op_data,
+                                                   const float* scores) {
+  const TfLiteEvalTensor* input_box_encodings =
+      tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
+  const TfLiteEvalTensor* input_class_predictions =
+      tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions);
+  TfLiteEvalTensor* detection_boxes =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionBoxes);
+
+  TfLiteEvalTensor* detection_classes = tflite::micro::GetEvalOutput(
+      context, node, kOutputTensorDetectionClasses);
+  TfLiteEvalTensor* detection_scores =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensorDetectionScores);
+  TfLiteEvalTensor* num_detections =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensorNumDetections);
+
+  const int num_boxes = input_box_encodings->dims->data[1];
+  const int num_classes = op_data->num_classes;
+  const int max_categories_per_anchor = op_data->max_classes_per_detection;
+  const int num_classes_with_background =
+      input_class_predictions->dims->data[2];
+
+  // The row index offset is 1 if background class is included and 0 otherwise.
+  int label_offset = num_classes_with_background - num_classes;
+  TF_LITE_ENSURE(context, (max_categories_per_anchor > 0));
+  const int num_categories_per_anchor =
+      std::min(max_categories_per_anchor, num_classes);
+  float* max_scores = reinterpret_cast<float*>(
+      context->GetScratchBuffer(context, op_data->score_buffer_idx));
+  int* sorted_class_indices = reinterpret_cast<int*>(
+      context->GetScratchBuffer(context, op_data->buffer_idx));
+
+  for (int row = 0; row < num_boxes; row++) {
+    const float* box_scores =
+        scores + row * num_classes_with_background + label_offset;
+    int* class_indices = sorted_class_indices + row * num_classes;
+    DecreasingPartialArgSort(box_scores, num_classes, num_categories_per_anchor,
+                             class_indices);
+    max_scores[row] = box_scores[class_indices[0]];
+  }
+
+  // Perform non-maximal suppression on max scores
+  int selected_size = 0;
+  int* selected = reinterpret_cast<int*>(
+      context->GetScratchBuffer(context, op_data->selected_idx));
+  TF_LITE_ENSURE_STATUS(NonMaxSuppressionSingleClassHelper(
+      context, node, op_data, max_scores, selected, &selected_size,
+      op_data->max_detections));
+
+  // Allocate output tensors
+  int output_box_index = 0;
+
+  for (int i = 0; i < selected_size; i++) {
+    int selected_index = selected[i];
+
+    const float* box_scores =
+        scores + selected_index * num_classes_with_background + label_offset;
+    const int* class_indices =
+        sorted_class_indices + selected_index * num_classes;
+
+    for (int col = 0; col < num_categories_per_anchor; ++col) {
+      int box_offset = num_categories_per_anchor * output_box_index + col;
+
+      // detection_boxes
+      float* decoded_boxes = reinterpret_cast<float*>(
+          context->GetScratchBuffer(context, op_data->decoded_boxes_idx));
+      ReInterpretTensor<BoxCornerEncoding*>(detection_boxes)[box_offset] =
+          reinterpret_cast<BoxCornerEncoding*>(decoded_boxes)[selected_index];
+
+      // detection_classes
+      tflite::micro::GetTensorData<float>(detection_classes)[box_offset] =
+          class_indices[col];
+
+      // detection_scores
+      tflite::micro::GetTensorData<float>(detection_scores)[box_offset] =
+          box_scores[class_indices[col]];
+
+      output_box_index++;
+    }
+  }
+
+  tflite::micro::GetTensorData<float>(num_detections)[0] = output_box_index;
+  return kTfLiteOk;
+}
+
+TfLiteStatus NonMaxSuppressionMultiClass(TfLiteContext* context,
+                                         TfLiteNode* node, OpData* op_data) {
+  // Get the input tensors
+  const TfLiteEvalTensor* input_box_encodings =
+      tflite::micro::GetEvalInput(context, node, kInputTensorBoxEncodings);
+  const TfLiteEvalTensor* input_class_predictions =
+      tflite::micro::GetEvalInput(context, node, kInputTensorClassPredictions);
+  const int num_boxes = input_box_encodings->dims->data[1];
+  const int num_classes = op_data->num_classes;
+
+  TF_LITE_ENSURE_EQ(context, input_class_predictions->dims->data[0],
+                    kBatchSize);
+  TF_LITE_ENSURE_EQ(context, input_class_predictions->dims->data[1], num_boxes);
+  const int num_classes_with_background =
+      input_class_predictions->dims->data[2];
+
+  TF_LITE_ENSURE(context, (num_classes_with_background - num_classes <= 1));
+  TF_LITE_ENSURE(context, (num_classes_with_background >= num_classes));
+
+  const float* scores;
+  switch (input_class_predictions->type) {
+    case kTfLiteFloat32:
+      scores = tflite::micro::GetTensorData<float>(input_class_predictions);
+      break;
+    default:
+      // Unsupported type.
+      return kTfLiteError;
+  }
+
+  if (op_data->use_regular_non_max_suppression) {
+    TF_LITE_ENSURE_STATUS(NonMaxSuppressionMultiClassRegularHelper(
+        context, node, op_data, scores));
+  } else {
+    TF_LITE_ENSURE_STATUS(
+        NonMaxSuppressionMultiClassFastHelper(context, node, op_data, scores));
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE(context, (kBatchSize == 1));
+  auto* op_data = static_cast<OpData*>(node->user_data);
+
+  // These two functions correspond to two blocks in the Object Detection model.
+  // In future, we would like to break the custom op in two blocks, which is
+  // currently not feasible because we would like to input quantized inputs
+  // and do all calculations in float. Mixed quantized/float calculations are
+  // currently not supported in TFLite.
+
+  // This fills in temporary decoded_boxes
+  // by transforming input_box_encodings and input_anchors from
+  // CenterSizeEncodings to BoxCornerEncoding
+  TF_LITE_ENSURE_STATUS(DecodeCenterSizeBoxes(context, node, op_data));
+
+  // This fills in the output tensors
+  // by choosing effective set of decoded boxes
+  // based on Non Maximal Suppression, i.e. selecting
+  // highest scoring non-overlapping boxes.
+  TF_LITE_ENSURE_STATUS(NonMaxSuppressionMultiClass(context, node, op_data));
+
+  return kTfLiteOk;
+}
+}  // namespace
+
+TfLiteRegistration* Register_DETECTION_POSTPROCESS() {
+  static TfLiteRegistration r = tflite::micro::RegisterOp(Init, Prepare, Eval);
+  return &r;
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/div.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/div.cc
index 4defb74..e5fb262 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/div.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/div.cc
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -29,7 +30,7 @@ constexpr int kInputTensor1 = 0;
 constexpr int kInputTensor2 = 1;
 constexpr int kOutputTensor = 0;
 
-struct OpData {
+struct OpDataDiv {
   // Parameters used in the quantized paths where the output is 8bit
   int32_t input1_zero_point;
   int32_t input2_zero_point;
@@ -42,21 +43,9 @@ struct OpData {
   int output_shift;
 };
 
-TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
-                             TfLiteDivParams* params, OpData* data) {
-  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
-  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-
-  const TfLiteTensor* input1;
-  TF_LITE_ENSURE_OK(context,
-                    GetInputSafe(context, node, kInputTensor1, &input1));
-  const TfLiteTensor* input2;
-  TF_LITE_ENSURE_OK(context,
-                    GetInputSafe(context, node, kInputTensor2, &input2));
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
-
+TfLiteStatus CalculateOpDataDiv(TfLiteContext* context, TfLiteTensor* input1,
+                                TfLiteTensor* input2, TfLiteTensor* output,
+                                TfLiteDivParams* params, OpDataDiv* data) {
   TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
   TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
 
@@ -78,17 +67,38 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
 
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataDiv));
 }
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  auto* params = static_cast<TfLiteDivParams*>(node->builtin_data);
-  auto* data = static_cast<OpData*>(node->user_data);
-  return CalculateOpData(context, node, params, data);
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* input1 =
+      micro_context->AllocateTempInputTensor(node, kInputTensor1);
+  TF_LITE_ENSURE(context, input1 != nullptr);
+  TfLiteTensor* input2 =
+      micro_context->AllocateTempInputTensor(node, kInputTensor2);
+  TF_LITE_ENSURE(context, input2 != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  OpDataDiv* data = static_cast<OpDataDiv*>(node->user_data);
+  auto* params = reinterpret_cast<TfLiteDivParams*>(node->builtin_data);
+
+  TF_LITE_ENSURE_STATUS(
+      CalculateOpDataDiv(context, input1, input2, output, params, data));
+
+  micro_context->DeallocateTempTfLiteTensor(input1);
+  micro_context->DeallocateTempTfLiteTensor(input2);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  return kTfLiteOk;
 }
 
 void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params,
-             const OpData* data, const TfLiteEvalTensor* input1,
+             const OpDataDiv* data, const TfLiteEvalTensor* input1,
              const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
   tflite::ArithmeticParams op_params = {};
 
@@ -118,7 +128,7 @@ void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params,
 }
 
 TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                           TfLiteDivParams* params, const OpData* data,
+                           TfLiteDivParams* params, const OpDataDiv* data,
                            const TfLiteEvalTensor* input1,
                            const TfLiteEvalTensor* input2,
                            TfLiteEvalTensor* output) {
@@ -153,8 +163,7 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
     }
 #undef TF_LITE_DIV
   } else {
-    TF_LITE_KERNEL_LOG(
-        context, "Unsupported combination of input and output types in DIV.");
+    MicroPrintf("Unsupported combination of input and output types in DIV.");
     return kTfLiteError;
   }
 
@@ -165,7 +174,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->builtin_data != nullptr);
   auto* params = static_cast<TfLiteDivParams*>(node->builtin_data);
   TFLITE_DCHECK(node->user_data != nullptr);
-  auto* data = static_cast<OpData*>(node->user_data);
+  auto* data = static_cast<OpDataDiv*>(node->user_data);
 
   const TfLiteEvalTensor* input1 =
       tflite::micro::GetEvalInput(context, node, kInputTensor1);
@@ -180,10 +189,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     TF_LITE_ENSURE_OK(context, EvalQuantized(context, node, params, data,
                                              input1, input2, output));
   } else {
-    TF_LITE_KERNEL_LOG(context,
-                       "DIV only supports FLOAT32, quantized INT8 "
-                       "now, got type %s (%d).",
-                       TfLiteTypeGetName(output->type), output->type);
+    MicroPrintf(
+        "DIV only supports FLOAT32, quantized INT8 "
+        "now, got type %s (%d).",
+        TfLiteTypeGetName(output->type), output->type);
     return kTfLiteError;
   }
 
@@ -193,14 +202,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_DIV() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/elementwise.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/elementwise.cc
index 1b9f889..4ee7f2c 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/elementwise.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/elementwise.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
@@ -79,34 +80,41 @@ inline void SetRsqrtOutputMultiplier(const float input_scale,
 typedef bool (*IsSupportedType)(TfLiteType);
 template <IsSupportedType>
 TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-  const TfLiteTensor* input = GetInput(context, node, kElementwiseInputTensor);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kElementwiseInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kElementwiseOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kElementwiseOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
   if (!IsSupportedType(input->type)) {
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                       TfLiteTypeGetName(input->type), input->type);
+    MicroPrintf("Input data type %s (%d) is not supported.",
+                TfLiteTypeGetName(input->type), input->type);
     return kTfLiteError;
   }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
 typedef bool (*IsSupportedType)(TfLiteType);
 template <IsSupportedType, const int op_nameid>
 TfLiteStatus PrepareAbsRsqrt(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-  const TfLiteTensor* input = GetInput(context, node, 0);
+  TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
   TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, 0);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
   TF_LITE_ENSURE(context, output != nullptr);
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
   if (!IsSupportedType(input->type)) {
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                       TfLiteTypeGetName(input->type), input->type);
+    MicroPrintf("Input data type %s (%d) is not supported.",
+                TfLiteTypeGetName(input->type), input->type);
     return kTfLiteError;
   }
 
@@ -155,6 +163,8 @@ TfLiteStatus PrepareAbsRsqrt(TfLiteContext* context, TfLiteNode* node) {
                                &op_data->shift);
     }
   }
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -308,8 +318,8 @@ TfLiteStatus AbsEval(TfLiteContext* context, TfLiteNode* node) {
                                            type);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(type), type);
+      MicroPrintf("Current data type %s is not supported.",
+                  TfLiteTypeGetName(type));
       return kTfLiteError;
       break;
   }
@@ -324,10 +334,6 @@ TfLiteStatus CosEval(TfLiteContext* context, TfLiteNode* node) {
   return EvalNumeric(context, node, std::cos);
 }
 
-TfLiteStatus ExpEval(TfLiteContext* context, TfLiteNode* node) {
-  return EvalNumeric(context, node, std::exp);
-}
-
 TfLiteStatus LogEval(TfLiteContext* context, TfLiteNode* node) {
   return EvalNumeric(context, node, std::log);
 }
@@ -350,8 +356,8 @@ TfLiteStatus RsqrtEval(TfLiteContext* context, TfLiteNode* node) {
                                        elementwise::validate_input_func, type);
 
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(type), type);
+      MicroPrintf("Current data type %s is not supported.",
+                  TfLiteTypeGetName(type));
       return kTfLiteError;
   }
 }
@@ -367,116 +373,56 @@ TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 }  // namespace elementwise
 
-
-
 TfLiteRegistration Register_ABS() {
-  return {/*init=*/elementwise::ElementWiseAbsRsqrtInit,
-          /*free=*/nullptr,
-          /*prepare=*/
-          elementwise::PrepareAbsRsqrt<elementwise::IsAbsSupportedType,
+  return tflite::micro::RegisterOp(
+      elementwise::ElementWiseAbsRsqrtInit,
+      elementwise::PrepareAbsRsqrt<elementwise::IsAbsSupportedType,
                                    elementwise::kAbsNameId>,
-          /*invoke=*/elementwise::AbsEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+      elementwise::AbsEval);
 }
 
 TfLiteRegistration Register_SIN() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/
-          elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
-          /*invoke=*/elementwise::SinEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(
+      nullptr, elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
+      elementwise::SinEval);
 }
 
 TfLiteRegistration Register_COS() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/
-          elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
-          /*invoke=*/elementwise::CosEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
-}
-
-TfLiteRegistration Register_EXP() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/
-          elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
-          /*invoke=*/elementwise::ExpEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(
+      nullptr, elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
+      elementwise::CosEval);
 }
 
 TfLiteRegistration Register_LOG() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/
-          elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
-          /*invoke=*/elementwise::LogEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(
+      nullptr, elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
+      elementwise::LogEval);
 }
 
 TfLiteRegistration Register_SQRT() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/
-          elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
-          /*invoke=*/elementwise::SqrtEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(
+      nullptr, elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
+      elementwise::SqrtEval);
 }
 
 TfLiteRegistration Register_RSQRT() {
-  return {/*init=*/elementwise::ElementWiseAbsRsqrtInit,
-          /*free=*/nullptr,
-          /*prepare=*/
-          elementwise::PrepareAbsRsqrt<elementwise::IsRsqrtSupportedType,
+  return tflite::micro::RegisterOp(
+      elementwise::ElementWiseAbsRsqrtInit,
+      elementwise::PrepareAbsRsqrt<elementwise::IsRsqrtSupportedType,
                                    elementwise::kRsrqtNameId>,
-          /*invoke=*/elementwise::RsqrtEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+      elementwise::RsqrtEval);
 }
 
 TfLiteRegistration Register_SQUARE() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/
-          elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
-          /*invoke=*/elementwise::SquareEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(
+      nullptr, elementwise::GenericPrepare<elementwise::IsNumericSupportedType>,
+      elementwise::SquareEval);
 }
 
 TfLiteRegistration Register_LOGICAL_NOT() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/
-          elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
-          /*invoke=*/elementwise::LogicalNotEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(
+      nullptr, elementwise::GenericPrepare<elementwise::IsLogicalSupportedType>,
+      elementwise::LogicalNotEval);
 }
 
 }  // namespace micro
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/elu.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/elu.cc
index 0e5f83b..7581772 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/elu.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/elu.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -45,7 +46,10 @@ using TransformFunc = float (*)(float);
 template <typename T>
 void PopulateLookupTable(const TfLiteTensor* input, const TfLiteTensor* output,
                          const TransformFunc transform, OpData* data) {
-  if (sizeof(T) != 1) TF_LITE_FATAL("Lookup table valid only for 8bit");
+  if (sizeof(T) != 1) {
+    MicroPrintf("Lookup table valid only for 8bit");
+    TFLITE_ABORT;
+  }
 
   const float inverse_scale = 1 / output->params.scale;
   int32_t maxval = std::numeric_limits<T>::max();
@@ -76,13 +80,16 @@ void EvalUsingLookupTable(const OpData* data, const TfLiteEvalTensor* input,
 }
 
 TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-  const TfLiteTensor* input;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
 
   // Use LUT to handle quantized elu path.
@@ -93,7 +100,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
     };
     PopulateLookupTable<int8_t>(input, output, transform, data);
   }
-
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -128,9 +136,8 @@ TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) {
       return kTfLiteOk;
     }
     default:
-      TF_LITE_KERNEL_LOG(
-          context, "ELU only supports float32 and int8 currently, got %s.",
-          TfLiteTypeGetName(input->type));
+      MicroPrintf("ELU only supports float32 and int8 currently, got %s.",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
   }
 }
@@ -138,14 +145,7 @@ TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_ELU() {
-  return {/*init=*/EluInit,
-          /*free=*/nullptr,
-          /*prepare=*/EluPrepare,
-          /*invoke=*/EluEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(EluInit, EluPrepare, EluEval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/ethosu.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/ethosu.cc
index cf0bdd5..e2bccde 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/ethosu.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/ethosu.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,6 +17,8 @@ limitations under the License.
 #include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 #if EI_CLASSIFIER_TFLITE_ETHOSU_POLYFILL || EI_ETHOS
 
@@ -67,16 +69,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   OpData* data = static_cast<OpData*>(node->user_data);
   int num_base_addr = node->inputs->size + node->outputs->size;
 
-  // Request arrays for the base address pointers and sizes
+  // Request arrays for the base address pointers and sizes.
   TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
       context, num_base_addr * sizeof(uint64_t), &data->base_addr_idx));
   TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
       context, num_base_addr * sizeof(size_t), &data->base_addr_size_idx));
 
-  // Get command stream data size
-  TfLiteTensor* tensor = context->GetTensor(context, node->inputs->data[0]);
+  // Get command stream data size.
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* tensor = micro_context->AllocateTempInputTensor(node, 0);
   data->cms_data_size = tensor->bytes;
-
+  micro_context->DeallocateTempTfLiteTensor(tensor);
   return kTfLiteOk;
 }
 
@@ -85,7 +88,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(context != nullptr);
   TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
 
-  // Get base addresses
+  // Get base addresses.
   TfLiteEvalTensor* tensor;
   int i = 0;
   int num_tensors = 0;
@@ -103,15 +106,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   auto root = flexbuffers::GetRoot(custom_data, node->custom_initial_data_size);
   co_type = root.AsInt8();
   if (co_type != CO_TYPE_ETHOSU) {
-    TF_LITE_KERNEL_LOG(context, "CO_TYPE != ETHOSU");
+    MicroPrintf("CO_TYPE != ETHOSU");
     return kTfLiteError;
   }
 
-  // Get command stream data address
+  // Get command stream data address.
   tensor = context->GetEvalTensor(context, node->inputs->data[0]);
   cms_data = reinterpret_cast<void*>(tensor->data.uint8);
 
-  // Get addresses to weights/scratch/input data
+  // Get addresses to weights/scratch/input data.
   for (i = 1; i < node->inputs->size; ++i) {
     tensor = context->GetEvalTensor(context, node->inputs->data[i]);
     base_addrs[num_tensors] =
@@ -124,7 +127,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     num_tensors++;
   }
 
-  // Get addresses to output data
+  // Get addresses to output data.
   for (i = 0; i < node->outputs->size; ++i) {
     tensor = context->GetEvalTensor(context, node->outputs->data[i]);
     base_addrs[num_tensors] =
@@ -139,6 +142,35 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   // Ethos-U guarantees that the tensors that require a base pointer are among
   // the 8 first tensors
+  // When Vela optimizes a tflite file it will assign the tensors like this:
+  //
+  // +-------+------------------------+  +--------+-------------+
+  // | INPUT | Description            |  | OUTPUT | Description |
+  // +-------+------------------------+  +--------+-------------+
+  // |     0 | Ethos-U command stream |  |   0..m | Outputs     |
+  // |     1 | TFLM model             |  +--------+-------------+
+  // |     2 | TFLM arena             |
+  // |     3 | Ethos-U fast scratch   |
+  // |  4..n | Inputs                 |
+  // +-------+------------------------+
+  //
+  // This code will assign the NPU base addresses like this:
+  //
+  // +--------------+----------------------+
+  // | Base address | Description          |
+  // +--------------+----------------------+
+  // |            0 | TFLM model           |
+  // |            1 | TFLM arena           |
+  // |            2 | Ethos-U fast scratch |
+  // |         3..n | Input tensors        |
+  // |         n..m | Output tensors       |
+  // +--------------+----------------------+
+  //
+  // The number of base address will be limited to 8.
+  //
+  // NOTE! The command stream produced by Vela will access the IFM and OFM
+  // buffers using base address 1. This means that it is not possible to point
+  // the input and output tensors outside of the TFLM arena.
   num_tensors = std::min(num_tensors, 8);
 
   struct ethosu_driver* drv = ethosu_reserve_driver();
@@ -156,14 +188,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration* Register_ETHOSU() {
-  static TfLiteRegistration r = {Init,
-                                 Free,
-                                 Prepare,
-                                 Eval,
-                                 /*profiling_string=*/nullptr,
-                                 /*builtin_code=*/0,
-                                 /*custom_name=*/nullptr,
-                                 /*version=*/0};
+  static TfLiteRegistration r = tflite::micro::RegisterOp(Init, Prepare, Eval);
   return &r;
 }
 
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/exp.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/exp.cc
new file mode 100644
index 0000000..c727cb9
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/exp.cc
@@ -0,0 +1,79 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/exp.h"
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
+  TF_LITE_ENSURE_TYPES_EQ(context, output->type, input->type);
+  TF_LITE_ENSURE_EQ(context, output->bytes, input->bytes);
+  TF_LITE_ENSURE_EQ(context, output->dims->size, input->dims->size);
+  for (int i = 0; i < output->dims->size; ++i) {
+    TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
+  }
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+  int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input),
+                                   tflite::micro::GetTensorShape(output));
+
+  if (input->type == kTfLiteFloat32) {
+    reference_ops::Exp(tflite::micro::GetTensorData<float>(input),
+                       static_cast<size_t>(flat_size),
+                       tflite::micro::GetTensorData<float>(output));
+  } else {
+    MicroPrintf("Type %s (%d) currently not supported by Exp.",
+                TfLiteTypeGetName(input->type), input->type);
+    return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+}  // namespace
+
+TfLiteRegistration Register_EXP() {
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/expand_dims.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/expand_dims.cc
index 90c1808..f2b638b 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/expand_dims.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/expand_dims.cc
@@ -17,8 +17,8 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h"
 
 namespace tflite {
 namespace {
@@ -83,13 +83,18 @@ TfLiteStatus VerifyTensorDim(TfLiteContext* context, const TfLiteTensor* input,
 }
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* axis = GetInput(context, node, kAxisTensor);
+  TfLiteTensor* axis =
+      micro_context->AllocateTempInputTensor(node, kAxisTensor);
   TF_LITE_ENSURE(context, axis != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
   output->type = input->type;
   if (IsDynamicTensor(axis)) {
@@ -98,6 +103,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   }
   TF_LITE_ENSURE_OK(context, VerifyTensorDim(context, input, axis, output));
 
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(axis);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -135,14 +143,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_EXPAND_DIMS() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/fill.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/fill.cc
index 90a235c..202caef 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/fill.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/fill.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 
@@ -46,8 +47,6 @@ TfLiteStatus EnsureEq(TfLiteContext* context, const TfLiteIntArray* array,
   switch (tensor->type) {
     case kTfLiteInt8:
       return EnsureEqImpl<int8_t>(context, array, tensor);
-    case kTfLiteUInt8:
-      return EnsureEqImpl<uint8_t>(context, array, tensor);
     case kTfLiteInt16:
       return EnsureEqImpl<int16_t>(context, array, tensor);
     case kTfLiteInt32:
@@ -55,9 +54,8 @@ TfLiteStatus EnsureEq(TfLiteContext* context, const TfLiteIntArray* array,
     case kTfLiteInt64:
       return EnsureEqImpl<int64_t>(context, array, tensor);
     default:
-      TF_LITE_KERNEL_LOG(context,
-                         "cannot compare int array to tensor of type %d.",
-                         tensor->type);
+      MicroPrintf("cannot compare int array to tensor of type %d.",
+                  tensor->type);
       return kTfLiteError;
   }
 }
@@ -67,14 +65,18 @@ constexpr int kValueTensor = 1;
 constexpr int kOutputTensor = 0;
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   // Ensure inputs and outputs exist.
-  const TfLiteTensor* dims;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kDimsTensor, &dims));
-  const TfLiteTensor* value;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kValueTensor, &value));
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
+  TfLiteTensor* dims =
+      micro_context->AllocateTempInputTensor(node, kDimsTensor);
+  TF_LITE_ENSURE(context, dims != nullptr);
+  TfLiteTensor* value =
+      micro_context->AllocateTempInputTensor(node, kValueTensor);
+  TF_LITE_ENSURE(context, value != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   // The value tensor must be a scalar.
   TF_LITE_ENSURE_EQ(context, NumDimensions(value), 0);
@@ -82,10 +84,19 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // The value type and output type must match.
   TF_LITE_ENSURE_EQ(context, value->type, output->type);
 
-  // The dims tensor must match the output tensor shape. As a byproduct,
-  // ensures the dims tensor is of an integer type.
-  TF_LITE_ENSURE_OK(context, EnsureEq(context, output->dims, dims));
+  // The dimension of the output tensor is known in model already.
+  TFLITE_DCHECK(output->dims != nullptr);
+
+  if (dims->data.data != nullptr) {
+    // When the dims tensor is specified in model already (i.e. is not an
+    // activation tensor), the dims tensor must match the output tensor shape.
+    // As a byproduct, ensures the dims tensor is of an integer type.
+    TF_LITE_ENSURE_OK(context, EnsureEq(context, output->dims, dims));
+  }
 
+  micro_context->DeallocateTempTfLiteTensor(dims);
+  micro_context->DeallocateTempTfLiteTensor(value);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -105,10 +116,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     case kTfLiteFloat32:
       FillImpl<float>(value, output);
       break;
+    case kTfLiteInt32:
+      FillImpl<int32_t>(value, output);
+      break;
+    case kTfLiteInt8:
+      FillImpl<int8_t>(value, output);
+      break;
     default:
-      TF_LITE_KERNEL_LOG(
-          context, "Fill only currently supports float32 for input 1, got %d.",
-          TfLiteTypeGetName(value->type));
+      MicroPrintf("Fill only currently supports float32 for input 1, got %d.",
+                  TfLiteTypeGetName(value->type));
       return kTfLiteError;
   }
 
@@ -118,14 +134,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_FILL() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor.cc
index 9fa0b2b..76c1a19 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -20,9 +20,8 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace floor {
+
+namespace {
 
 constexpr int kInputTensor = 0;
 constexpr int kOutputTensor = 0;
@@ -39,19 +38,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                        tflite::micro::GetTensorData<float>(output));
   return kTfLiteOk;
 }
-}  // namespace floor
+
+}  // namespace
 
 TfLiteRegistration Register_FLOOR() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/nullptr,
-          /*invoke=*/floor::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, nullptr, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor_div.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor_div.cc
new file mode 100644
index 0000000..9fc135c
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor_div.cc
@@ -0,0 +1,130 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_div.h"
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+namespace {
+
+// Input/output tensor index.
+constexpr int kInputTensor1 = 0;
+constexpr int kInputTensor2 = 1;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TfLiteTensor* input1 =
+      micro_context->AllocateTempInputTensor(node, kInputTensor1);
+  TF_LITE_ENSURE(context, input1 != nullptr);
+  TfLiteTensor* input2 =
+      micro_context->AllocateTempInputTensor(node, kInputTensor2);
+  TF_LITE_ENSURE(context, input2 != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
+  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
+
+  micro_context->DeallocateTempTfLiteTensor(input1);
+  micro_context->DeallocateTempTfLiteTensor(input2);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  return nullptr;
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return CalculateOpData(context, node);
+}
+
+template <typename T>
+TfLiteStatus EvalFloorDiv(TfLiteContext* context,
+                          const TfLiteEvalTensor* input1,
+                          const TfLiteEvalTensor* input2,
+                          TfLiteEvalTensor* output) {
+  const T* denominator_data = tflite::micro::GetTensorData<T>(input2);
+
+  // Validate the denominator.
+  for (int i = 0; i < tflite::ElementCount(*input2->dims); ++i) {
+    if (std::equal_to<T>()(denominator_data[i], 0)) {
+      MicroPrintf("Division by 0");
+      return kTfLiteError;
+    }
+  }
+
+  bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
+
+  if (requires_broadcast) {
+    reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
+        tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<T>(input1),
+        tflite::micro::GetTensorShape(input2), denominator_data,
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<T>(output), reference_ops::FloorDiv<T>);
+  } else {
+    reference_ops::BinaryFunction<T, T, T>(
+        tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<T>(input1),
+        tflite::micro::GetTensorShape(input2), denominator_data,
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<T>(output), reference_ops::FloorDiv<T>);
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor1);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor2);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  switch (input1->type) {
+    case kTfLiteFloat32: {
+      return EvalFloorDiv<float>(context, input1, input2, output);
+    }
+    default: {
+      MicroPrintf("Type '%s' is not supported by FLOOR_DIV.",
+                  TfLiteTypeGetName(input1->type));
+      return kTfLiteError;
+    }
+  }
+}
+
+}  // namespace
+
+TfLiteRegistration Register_FLOOR_DIV() {
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor_mod.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor_mod.cc
new file mode 100644
index 0000000..acf4bbc
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/floor_mod.cc
@@ -0,0 +1,128 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/floor_mod.h"
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
+
+// OLD-TODO(b/117523611): We should factor out a binary_op and put binary ops
+// there.
+namespace tflite {
+namespace {
+
+// Input/output tensor index.
+constexpr int kInputTensor1 = 0;
+constexpr int kInputTensor2 = 1;
+constexpr int kOutputTensor = 0;
+
+// OLD-TODO(b/117912880): Support quantization.
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TfLiteTensor* input1 =
+      micro_context->AllocateTempInputTensor(node, kInputTensor1);
+  TF_LITE_ENSURE(context, input1 != nullptr);
+  TfLiteTensor* input2 =
+      micro_context->AllocateTempInputTensor(node, kInputTensor2);
+  TF_LITE_ENSURE(context, input2 != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
+  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, output->type);
+
+  micro_context->DeallocateTempTfLiteTensor(input1);
+  micro_context->DeallocateTempTfLiteTensor(input2);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  return nullptr;
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return CalculateOpData(context, node);
+}
+
+template <typename T>
+TfLiteStatus EvalFloorMod(TfLiteContext* context, bool requires_broadcast,
+                          const TfLiteEvalTensor* input1,
+                          const TfLiteEvalTensor* input2,
+                          TfLiteEvalTensor* output) {
+  const T* denominator_data = tflite::micro::GetTensorData<T>(input2);
+
+  if (requires_broadcast) {
+    reference_ops::BroadcastBinaryFunction4DSlow<T, T, T>(
+        tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<T>(input1),
+        tflite::micro::GetTensorShape(input2), denominator_data,
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<T>(output), reference_ops::FloorMod<T>);
+  } else {
+    reference_ops::BinaryFunction<T, T, T>(
+        tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<T>(input1),
+        tflite::micro::GetTensorShape(input2), denominator_data,
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<T>(output), reference_ops::FloorMod<T>);
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor1);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kInputTensor2);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2);
+
+  switch (input1->type) {
+    case kTfLiteFloat32: {
+      return EvalFloorMod<float>(context, requires_broadcast, input1, input2,
+                                 output);
+    }
+    default: {
+      MicroPrintf("Type '%s' is not supported by FLOOR_MOD.",
+                  TfLiteTypeGetName(input1->type));
+      return kTfLiteError;
+    }
+  }
+}
+
+}  // namespace
+
+TfLiteRegistration Register_FLOOR_MOD() {
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.cc
index 34fcd5e..27ef622 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.cc
@@ -3,7 +3,7 @@
 #if 0 == 1
 /* noop */
 #elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -24,12 +24,14 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -37,14 +39,18 @@ namespace {
 struct OpData {
   OpDataFullyConnected reference_op_data;
 
+  // Conv 1x1 that may be invoked in some cases currently need per channel
+  // quantization.
+  int32_t* per_channel_output_multiplier;
+  int32_t* per_channel_output_shift;
+
   // Index to buffer for optimizations if applicable.
   int buffer_idx;
-};
 
-// TODO(b/169801227): This global struct is needed for the linker to drop unused
-// code (for example, by using Register_FULLY_CONNECTED_INT8 instead of
-// Register_FULLY_CONNECTED).
-TfLiteRegistration fully_connected_registration;
+  int32_t batches;
+  int32_t accum_depth;
+  int32_t output_depth;
+};
 
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
@@ -59,20 +65,34 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const auto params =
       static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
 
-  const TfLiteTensor* input =
-      GetInput(context, node, kFullyConnectedInputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kFullyConnectedInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* filter =
-      GetInput(context, node, kFullyConnectedWeightsTensor);
+  TfLiteTensor* filter = micro_context->AllocateTempInputTensor(
+      node, kFullyConnectedWeightsTensor);
   TF_LITE_ENSURE(context, filter != nullptr);
-  const TfLiteTensor* bias =
-      GetOptionalInputTensor(context, node, kFullyConnectedBiasTensor);
-  TfLiteTensor* output = GetOutput(context, node, kFullyConnectedOutputTensor);
+  TfLiteTensor* bias =
+      micro_context->AllocateTempInputTensor(node, kFullyConnectedBiasTensor);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(
+      node, kFullyConnectedOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
-  TF_LITE_ENSURE_MSG(context, input->type == filter->type,
-                     "Hybrid models are not supported on TFLite Micro.");
+
+  const RuntimeShape filter_shape = GetTensorShape(filter);
+  const RuntimeShape output_shape = GetTensorShape(output);
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int output_dim_count = output_shape.DimensionsCount();
+  cmsis_nn_dims filter_dims;
+  filter_dims.n = filter_shape.Dims(filter_dim_count - 1);
+  filter_dims.h = 1;
+  filter_dims.w = 1;
+  filter_dims.c = output_shape.Dims(output_dim_count - 1);
+
+  data->accum_depth = filter_shape.Dims(filter_dim_count - 1);
+  data->batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+  data->output_depth = output_shape.Dims(output_dim_count - 1);
 
   // Set buffer index to a reset value
   data->buffer_idx = -1;
@@ -80,110 +100,243 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       context, params->activation, input->type, input, filter, bias, output,
       &(data->reference_op_data)));
 
-  if (input->type == kTfLiteInt8) {
-    #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                    TfLiteTypeGetName(output->type), output->type);
-    return kTfLiteError;
-    #endif
-
-    RuntimeShape filter_shape = GetTensorShape(filter);
-    RuntimeShape output_shape = GetTensorShape(output);
+  int32_t buf_size = 0;
 
-    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
-    const int filter_dim_count = filter_shape.DimensionsCount();
-    cmsis_nn_dims filter_dims;
-    filter_dims.n = filter_shape.Dims(filter_dim_count - 1);
-    filter_dims.h = 1;
-    filter_dims.w = 1;
-    filter_dims.c = output_shape.Dims(1);
+  if (input->type == kTfLiteInt16) {
+#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I16
+        MicroPrintf("Filter data type %s currently not supported.",
+                              TfLiteTypeGetName(input->type));
+        return kTfLiteError;
+#endif
+    TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+    buf_size = arm_fully_connected_s16_get_buffer_size(&filter_dims);
+  } else if (input->type == kTfLiteInt8) {
+#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8
+        MicroPrintf("Filter data type %s currently not supported.",
+                              TfLiteTypeGetName(input->type));
+        return kTfLiteError;
+#endif
+    const RuntimeShape input_shape = GetTensorShape(input);
 
-    const int32_t buf_size =
-        arm_fully_connected_s8_get_buffer_size(&filter_dims);
+    TFLITE_DCHECK_GE(output_dim_count, 2);
+    TFLITE_DCHECK_LE(output_dim_count, 4);
 
-    if (buf_size > 0) {
-      TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
-          context, buf_size, &data->buffer_idx));
+#if EI_TFLITE_DISABLE_CONV_2D_IN_I8
+    buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
+#else
+    if (output_dim_count > 2 && data->accum_depth % 4 == 0) {
+      data->per_channel_output_multiplier =
+          static_cast<int32_t*>(context->AllocatePersistentBuffer(
+              context, data->output_depth * sizeof(int32_t)));
+      data->per_channel_output_shift =
+          static_cast<int32_t*>(context->AllocatePersistentBuffer(
+              context, data->output_depth * sizeof(int32_t)));
+
+      cmsis_nn_dims input_dims;
+      input_dims.n = data->batches;
+      input_dims.h = 1;
+      input_dims.w = 1;
+      input_dims.c = data->accum_depth;
+
+      buf_size = arm_convolve_1x1_s8_fast_get_buffer_size(&input_dims);
     } else {
-      data->buffer_idx = -1;
+      buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
     }
+#endif
   }
+
+  if (filter->type == kTfLiteInt4) {
+    int filter_size =
+        RuntimeShape(filter->dims->size,
+                     reinterpret_cast<const int32_t*>(filter->dims->data))
+            .FlatSize();
+    context->RequestScratchBufferInArena(
+        context, filter_size, &data->reference_op_data.filter_buffer_index);
+  }
+
+  if (buf_size > 0) {
+    TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
+        context, buf_size, &data->buffer_idx));
+  }
+
+  micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
+  if (bias != nullptr) {
+    micro_context->DeallocateTempTfLiteTensor(bias);
+  }
+
   return kTfLiteOk;
 }
 
+void PopulateCommonParams(TfLiteContext* context,
+                          cmsis_nn_per_tensor_quant_params* const quant_params,
+                          cmsis_nn_dims* const input_dims,
+                          cmsis_nn_dims* const filter_dims,
+                          cmsis_nn_dims* const bias_dims,
+                          cmsis_nn_dims* const output_dims,
+                          cmsis_nn_context* const ctx, const OpData& data) {
+  quant_params->multiplier = data.reference_op_data.output_multiplier;
+  quant_params->shift = data.reference_op_data.output_shift;
+
+  input_dims->n = data.batches;
+  input_dims->h = 1;
+  input_dims->w = 1;
+  input_dims->c = data.accum_depth;
+
+  filter_dims->n = data.accum_depth;
+  filter_dims->h = 1;
+  filter_dims->w = 1;
+  filter_dims->c = data.output_depth;
+
+  bias_dims->n = 1;
+  bias_dims->h = 1;
+  bias_dims->w = 1;
+  bias_dims->c = data.output_depth;
+
+  output_dims->n = data.batches;
+  output_dims->h = 1;
+  output_dims->w = 1;
+  output_dims->c = data.output_depth;
+
+  ctx->buf = nullptr;
+  ctx->size = 0;
+  if (data.buffer_idx > -1) {
+    ctx->buf = context->GetScratchBuffer(context, data.buffer_idx);
+  }
+}
+
 TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
                                const OpData& data,
                                const TfLiteEvalTensor* input,
                                const TfLiteEvalTensor* filter,
                                const TfLiteEvalTensor* bias,
                                TfLiteEvalTensor* output) {
-  #if EI_TFLITE_DISABLE_FULLY_CONNECTED_OUT_I8
-  TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                  TfLiteTypeGetName(output->type), output->type);
-  return kTfLiteError;
-  #endif
-
   const RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
-  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2);
-  const int batches = output_shape.Dims(0);
-  const int output_depth = output_shape.Dims(1);
-  const RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter);
-  const int filter_dim_count = filter_shape.DimensionsCount();
-  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
-  const RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
-
-  cmsis_nn_fc_params fc_params;
-  fc_params.input_offset = -data.reference_op_data.input_zero_point;
-  fc_params.output_offset = data.reference_op_data.output_zero_point;
-  fc_params.filter_offset = -data.reference_op_data.filter_zero_point;
-  fc_params.activation.min = data.reference_op_data.output_activation_min;
-  fc_params.activation.max = data.reference_op_data.output_activation_max;
+  const int output_dim_count = output_shape.DimensionsCount();
+  TFLITE_DCHECK_GE(output_dim_count, 2);
+  TFLITE_DCHECK_LE(output_dim_count, 4);
 
   cmsis_nn_per_tensor_quant_params quant_params;
-  quant_params.multiplier = data.reference_op_data.output_multiplier;
-  quant_params.shift = data.reference_op_data.output_shift;
-
   cmsis_nn_dims input_dims;
-  input_dims.n = batches;
-  input_dims.h = 1;
-  input_dims.w = 1;
-  input_dims.c = accum_depth;
-
   cmsis_nn_dims filter_dims;
-  filter_dims.n = accum_depth;
-  filter_dims.h = 1;
-  filter_dims.w = 1;
-  filter_dims.c = output_depth;
-
   cmsis_nn_dims bias_dims;
-  bias_dims.n = 1;
-  bias_dims.h = 1;
-  bias_dims.w = 1;
-  bias_dims.c = output_depth;
-
   cmsis_nn_dims output_dims;
-  output_dims.n = batches;
-  output_dims.h = 1;
-  output_dims.w = 1;
-  output_dims.c = output_depth;
-
   cmsis_nn_context ctx;
-  ctx.buf = nullptr;
-  ctx.size = 0;
 
-  if (data.buffer_idx > -1) {
-    ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
+  PopulateCommonParams(context, &quant_params, &input_dims, &filter_dims,
+                       &bias_dims, &output_dims, &ctx, data);
+
+  const int32_t* bias_data =
+      tflite::micro::GetOptionalTensorData<int32_t>(bias);
+
+#if EI_TFLITE_DISABLE_CONV_2D_IN_I8
+    cmsis_nn_fc_params fc_params;
+    fc_params.input_offset = -data.reference_op_data.input_zero_point;
+    fc_params.output_offset = data.reference_op_data.output_zero_point;
+    fc_params.filter_offset = 0;
+    fc_params.activation.min = data.reference_op_data.output_activation_min;
+    fc_params.activation.max = data.reference_op_data.output_activation_max;
+
+    TF_LITE_ENSURE_EQ(
+        context,
+        arm_fully_connected_s8(
+            &ctx, &fc_params, &quant_params, &input_dims,
+            tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
+            tflite::micro::GetTensorData<int8_t>(filter), &bias_dims, bias_data,
+            &output_dims, tflite::micro::GetTensorData<int8_t>(output)),
+        ARM_CMSIS_NN_SUCCESS);
+#else
+
+  if (output_dim_count > 2 && data.accum_depth % 4 == 0) {
+    cmsis_nn_conv_params conv_params;
+    conv_params.dilation.h = 1;
+    conv_params.dilation.w = 1;
+    conv_params.input_offset = -data.reference_op_data.input_zero_point;
+    conv_params.output_offset = data.reference_op_data.output_zero_point;
+    conv_params.stride.h = 1;
+    conv_params.stride.w = 1;
+    conv_params.padding.h = 0;
+    conv_params.padding.w = 0;
+    conv_params.activation.min = data.reference_op_data.output_activation_min;
+    conv_params.activation.max = data.reference_op_data.output_activation_max;
+
+    cmsis_nn_per_channel_quant_params per_channel_quant_params;
+    per_channel_quant_params.multiplier =
+        const_cast<int32_t*>(data.per_channel_output_multiplier);
+    per_channel_quant_params.shift =
+        const_cast<int32_t*>(data.per_channel_output_shift);
+
+    for (int i = 0; i < data.output_depth; i++) {
+      per_channel_quant_params.multiplier[i] = quant_params.multiplier;
+      per_channel_quant_params.shift[i] = quant_params.shift;
+    }
+
+    TF_LITE_ENSURE_EQ(
+        context,
+        arm_convolve_1x1_s8_fast(
+            &ctx, &conv_params, &per_channel_quant_params, &input_dims,
+            tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
+            tflite::micro::GetTensorData<int8_t>(filter), &bias_dims, bias_data,
+            &output_dims, tflite::micro::GetTensorData<int8_t>(output)),
+        ARM_CMSIS_NN_SUCCESS);
+  } else {
+    cmsis_nn_fc_params fc_params;
+    fc_params.input_offset = -data.reference_op_data.input_zero_point;
+    fc_params.output_offset = data.reference_op_data.output_zero_point;
+    fc_params.filter_offset = 0;
+    fc_params.activation.min = data.reference_op_data.output_activation_min;
+    fc_params.activation.max = data.reference_op_data.output_activation_max;
+
+    TF_LITE_ENSURE_EQ(
+        context,
+        arm_fully_connected_s8(
+            &ctx, &fc_params, &quant_params, &input_dims,
+            tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
+            tflite::micro::GetTensorData<int8_t>(filter), &bias_dims, bias_data,
+            &output_dims, tflite::micro::GetTensorData<int8_t>(output)),
+        ARM_CMSIS_NN_SUCCESS);
   }
+#endif
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalQuantizedInt16(TfLiteContext* context, TfLiteNode* node,
+                                const OpData& data,
+                                const TfLiteEvalTensor* input,
+                                const TfLiteEvalTensor* filter,
+                                const TfLiteEvalTensor* bias,
+                                TfLiteEvalTensor* output) {
+  cmsis_nn_per_tensor_quant_params quant_params;
+  cmsis_nn_dims input_dims;
+  cmsis_nn_dims filter_dims;
+  cmsis_nn_dims bias_dims;
+  cmsis_nn_dims output_dims;
+  cmsis_nn_context ctx;
+
+  PopulateCommonParams(context, &quant_params, &input_dims, &filter_dims,
+                       &bias_dims, &output_dims, &ctx, data);
+
+  const int64_t* bias_data =
+      tflite::micro::GetOptionalTensorData<int64_t>(bias);
+
+  cmsis_nn_fc_params fc_params;
+  fc_params.input_offset = -data.reference_op_data.input_zero_point;
+  fc_params.output_offset = data.reference_op_data.output_zero_point;
+  fc_params.filter_offset = 0;
+  fc_params.activation.min = data.reference_op_data.output_activation_min;
+  fc_params.activation.max = data.reference_op_data.output_activation_max;
 
   TF_LITE_ENSURE_EQ(
       context,
-      arm_fully_connected_s8(
+      arm_fully_connected_s16(
           &ctx, &fc_params, &quant_params, &input_dims,
-          tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
-          tflite::micro::GetTensorData<int8_t>(filter), &bias_dims,
-          tflite::micro::GetTensorData<int32_t>(bias), &output_dims,
-          tflite::micro::GetTensorData<int8_t>(output)),
-      ARM_MATH_SUCCESS);
+          tflite::micro::GetTensorData<int16_t>(input), &filter_dims,
+          tflite::micro::GetTensorData<int8_t>(filter), &bias_dims, bias_data,
+          &output_dims, tflite::micro::GetTensorData<int16_t>(output)),
+      ARM_CMSIS_NN_SUCCESS);
 
   return kTfLiteOk;
 }
@@ -205,59 +358,59 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
   const OpData& data = *(static_cast<const OpData*>(node->user_data));
 
+  TfLiteEvalTensor filter_int8 = tflite::micro::MakeUnpackedInt4Tensor(
+      context, data.reference_op_data.filter_buffer_index, filter);
+
   // Checks in Prepare ensure input, output and filter types are all the same.
   switch (input->type) {
     case kTfLiteFloat32: {
-      #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
+#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_F32
+      MicroPrintf("Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
-
+#endif
+      const float* bias_data =
+          tflite::micro::GetOptionalTensorData<float>(bias);
       tflite::reference_ops::FullyConnected(
           FullyConnectedParamsFloat(params->activation),
           tflite::micro::GetTensorShape(input),
           tflite::micro::GetTensorData<float>(input),
           tflite::micro::GetTensorShape(filter),
           tflite::micro::GetTensorData<float>(filter),
-          tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<float>(bias),
+          tflite::micro::GetTensorShape(bias), bias_data,
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output));
       break;
     }
     case kTfLiteInt8: {
-      #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
-
-      return EvalQuantizedInt8(context, node, data, input, filter, bias,
-                               output);
-    }
-    case kTfLiteUInt8: {
-      #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_U8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
-
-      tflite::reference_ops::FullyConnected(
-          FullyConnectedParamsQuantized(data.reference_op_data),
-          tflite::micro::GetTensorShape(input),
-          tflite::micro::GetTensorData<uint8_t>(input),
-          tflite::micro::GetTensorShape(filter),
-          tflite::micro::GetTensorData<uint8_t>(filter),
-          tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<int32_t>(bias),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<uint8_t>(output));
+      switch (filter_int8.type) {
+        case kTfLiteInt8:
+#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8
+        MicroPrintf("Filter data type %s currently not supported.",
+                              TfLiteTypeGetName(filter->type));
+        return kTfLiteError;
+#endif
+          return EvalQuantizedInt8(context, node, data, input, &filter_int8,
+                                   bias, output);
+        default:
+          MicroPrintf("Filter Type %s (%d) not supported.",
+                      TfLiteTypeGetName(filter->type), filter->type);
+          return kTfLiteError;
+      }
       break;
     }
+    case kTfLiteInt16: {
+#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I16
+        MicroPrintf("Filter data type %s currently not supported.",
+                              TfLiteTypeGetName(filter->type));
+        return kTfLiteError;
+#endif
+      return EvalQuantizedInt16(context, node, data, input, filter, bias,
+                                output);
+    }
     default: {
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
     }
   }
@@ -285,44 +438,59 @@ TfLiteStatus EvalInt8(TfLiteContext* context, TfLiteNode* node) {
 
   // Checks in Prepare ensure input, output and filter types are all the same.
   if (input->type != kTfLiteInt8) {
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                       TfLiteTypeGetName(input->type), input->type);
+    MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                input->type);
     return kTfLiteError;
   }
 
-  return EvalQuantizedInt8(context, node, data, input, filter, bias, output);
+  TfLiteEvalTensor filter_int8 = tflite::micro::MakeUnpackedInt4Tensor(
+      context, data.reference_op_data.filter_buffer_index, filter);
+
+  return EvalQuantizedInt8(context, node, data, input, &filter_int8, bias,
+                           output);
+}
+
+TfLiteStatus EvalInt16(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kFullyConnectedInputTensor);
+  const TfLiteEvalTensor* filter =
+      tflite::micro::GetEvalInput(context, node, kFullyConnectedWeightsTensor);
+  const TfLiteEvalTensor* bias =
+      tflite::micro::GetEvalInput(context, node, kFullyConnectedBiasTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));
+
+  // Checks in Prepare ensure input, output and filter types are all the same.
+  if (input->type != kTfLiteInt16) {
+    MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                input->type);
+    return kTfLiteError;
+  }
+
+  return EvalQuantizedInt16(context, node, data, input, filter, bias, output);
 }
 
 }  // namespace
 
 TfLiteRegistration Register_FULLY_CONNECTED() {
-  fully_connected_registration.init = Init;
-  fully_connected_registration.free = nullptr;
-  fully_connected_registration.prepare = Prepare;
-  fully_connected_registration.invoke = Eval;
-  fully_connected_registration.profiling_string = nullptr;
-  fully_connected_registration.builtin_code = 0;
-  fully_connected_registration.custom_name = nullptr;
-  fully_connected_registration.version = 0;
-  return fully_connected_registration;
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
 TfLiteRegistration Register_FULLY_CONNECTED_INT8() {
-  fully_connected_registration.init = Init;
-  fully_connected_registration.free = nullptr;
-  fully_connected_registration.prepare = Prepare;
-  fully_connected_registration.invoke = EvalInt8;
-  fully_connected_registration.profiling_string = nullptr;
-  fully_connected_registration.builtin_code = 0;
-  fully_connected_registration.custom_name = nullptr;
-  fully_connected_registration.version = 0;
-  return fully_connected_registration;
+  return tflite::micro::RegisterOp(Init, Prepare, EvalInt8);
+}
+
+TfLiteRegistration Register_FULLY_CONNECTED_INT16() {
+  return tflite::micro::RegisterOp(Init, Prepare, EvalInt16);
 }
 
 }  // namespace tflite
 
 #elif EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -352,6 +520,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -376,10 +545,14 @@ struct OpData {
   bool is_mli_applicable;
 
   // Tensors in MLI format.
-  mli_tensor* mli_in;
-  mli_tensor* mli_weights;
-  mli_tensor* mli_bias;
-  mli_tensor* mli_out;
+  mutable ops::micro::MliTensorInterface mli_in;
+  mutable ops::micro::MliTensorInterface mli_weights;
+  mutable ops::micro::MliTensorInterface mli_bias;
+  mutable ops::micro::MliTensorInterface mli_out;
+
+#ifdef MLI_2_0
+  mli_fully_connected_cfg* cfg;
+#endif
 };
 
 constexpr int kInputTensor = 0;
@@ -389,13 +562,19 @@ constexpr int kOutputTensor = 0;
 
 bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input,
                      const TfLiteTensor* filter, const TfLiteTensor* bias,
-                     const TfLiteFullyConnectedParams* params) {
+                     const TfLiteFullyConnectedParams* params,
+                     int32_t output_activation_min,
+                     int32_t output_activation_max) {
   // MLI optimized version only supports int8_t datatype and no fused Relu and
   // symmetric per-tensor quantization of weights (not per-axis)
-  bool ret_val = (filter->type == kTfLiteInt8) &&
-                 (input->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) &&
-                 (params->activation == kTfLiteActNone) &&
-                 (filter->params.zero_point == 0);
+  bool ret_val =
+      (filter->type == kTfLiteInt8) && (input->type == kTfLiteInt8) &&
+      (bias->type == kTfLiteInt32) &&
+#ifndef MLI_2_0
+      (params->activation == kTfLiteActNone ||
+       (output_activation_min == -128 && output_activation_max == 127)) &&
+#endif
+      (filter->params.zero_point == 0);
   return ret_val;
 }
 
@@ -437,10 +616,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const auto params =
       static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
 
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
-  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TfLiteTensor* filter =
+      micro_context->AllocateTempInputTensor(node, kWeightsTensor);
+  TfLiteTensor* bias = micro_context->AllocateTempInputTensor(node, kBiasTensor);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, kOutputTensor);
 
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
   TF_LITE_ENSURE_MSG(context, input->type == filter->type,
@@ -450,43 +633,74 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   data->filter_zero_point = filter->params.zero_point;
   data->output_zero_point = output->params.zero_point;
 
+  TfLiteStatus status = CalculateOpData(context, params, input->type, input,
+                                        filter, bias, output, data);
+
   data->is_mli_applicable =
-      IsMliApplicable(context, input, filter, bias, params);
+      IsMliApplicable(context, input, filter, bias, params,
+                      data->output_activation_min, data->output_activation_max);
 
   if (input->type == kTfLiteInt8 && data->is_mli_applicable) {
-    #if EI_TFLITE_DISABLE_FULLY_CONNECTED_OUT_I8
+#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8
     TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                     TfLiteTypeGetName(output->type), output->type);
     return kTfLiteError;
-    #endif
-
-    data->mli_in = static_cast<mli_tensor*>(
-        context->AllocatePersistentBuffer(context, sizeof(mli_tensor)));
-    data->mli_weights = static_cast<mli_tensor*>(
-        context->AllocatePersistentBuffer(context, sizeof(mli_tensor)));
-    data->mli_bias = static_cast<mli_tensor*>(
-        context->AllocatePersistentBuffer(context, sizeof(mli_tensor)));
-    data->mli_out = static_cast<mli_tensor*>(
-        context->AllocatePersistentBuffer(context, sizeof(mli_tensor)));
-
-    ops::micro::ConvertToMliTensor(input, data->mli_in);
-    ops::micro::ConvertToMliTensor(filter, data->mli_weights);
-    ops::micro::ConvertToMliTensor(bias, data->mli_bias);
-    ops::micro::ConvertToMliTensor(output, data->mli_out);
+#endif
+    data->mli_in = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+    data->mli_weights = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+    data->mli_bias = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+    data->mli_out = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+
+    ops::micro::ConvertToMliTensor(input, &data->mli_in);
+    ops::micro::ConvertToMliTensor(filter, &data->mli_weights);
+    ops::micro::ConvertToMliTensor(bias, &data->mli_bias);
+#ifdef MLI_2_0
+    ops::micro::AdjustBiasTensor(&data->mli_bias, &data->mli_in,
+                                 &data->mli_weights);
+#endif
+    ops::micro::ConvertToMliTensor(output, &data->mli_out);
+
+#ifdef MLI_2_0
+    if (data->output_activation_min == -128 &&
+        data->output_activation_max == 127) {
+      data->cfg->relu.type = MLI_RELU_NONE;
+    } else if (params->activation == kTfLiteActRelu) {
+      data->cfg->relu.type = MLI_RELU_GEN;
+    } else if (params->activation == kTfLiteActRelu6) {
+      data->cfg->relu.type = MLI_RELU_6;
+    } else if (params->activation == kTfLiteActReluN1To1) {
+      data->cfg->relu.type = MLI_RELU_1;
+    } else {
+      data->cfg->relu.type = MLI_RELU_NONE;
+    }
+#endif
 
     /* The input tensor can have more than 2 dimensions. for the compute this
    doesn't make any difference because all the inputs or a batch entry will
    be used anyway. because the MLI kernel doesn't recognize the multiple
    dimensions, the tensor shape is casted to a {batchnum, inputsize} shape. */
-    data->mli_in->shape[0] = data->mli_out->shape[0];
-    data->mli_in->shape[1] = data->mli_weights->shape[1];
-    data->mli_in->shape[2] = 0;
-    data->mli_in->shape[3] = 0;
-    data->mli_in->rank = 2;
+    data->mli_in.Shape()[0] = data->mli_out.Shape()[0];
+#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST)
+    data->mli_in.Shape()[1] = data->mli_weights.Shape()[0];
+#else
+    data->mli_in.Shape()[1] = data->mli_weights.Shape()[1];
+#endif
+    data->mli_in.Shape()[2] = 0;
+    data->mli_in.Shape()[3] = 0;
+    data->mli_in.MemStride()[0] = data->mli_in.Shape()[1];
+    data->mli_in.MemStride()[1] = 0;
+    *data->mli_in.Rank() = 2;
   }
 
-  return (CalculateOpData(context, params, input->type, input, filter, bias,
-                          output, data));
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
+  micro_context->DeallocateTempTfLiteTensor(bias);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  return status;
 }
 
 TfLiteStatus EvalMliQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
@@ -496,62 +710,103 @@ TfLiteStatus EvalMliQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
                                   const TfLiteEvalTensor* filter,
                                   const TfLiteEvalTensor* bias,
                                   TfLiteEvalTensor* output) {
-  ops::micro::MliTensorAttachBuffer<int8_t>(input, data.mli_in);
-  ops::micro::MliTensorAttachBuffer<int8_t>(filter, data.mli_weights);
-  ops::micro::MliTensorAttachBuffer<int32_t>(bias, data.mli_bias);
-  ops::micro::MliTensorAttachBuffer<int8_t>(output, data.mli_out);
+  ops::micro::MliTensorAttachBuffer<int8_t>(input, &data.mli_in);
+  ops::micro::MliTensorAttachBuffer<int8_t>(filter, &data.mli_weights);
+  ops::micro::MliTensorAttachBuffer<int32_t>(bias, &data.mli_bias);
+  ops::micro::MliTensorAttachBuffer<int8_t>(output, &data.mli_out);
 
   // Tensors for data in fast (local) memory and config to copy data from
   // external to local memory
-  mli_tensor weights_local = *data.mli_weights;
-  mli_tensor bias_local = *data.mli_bias;
-  mli_tensor in_local = *data.mli_in;
-  mli_tensor out_local = *data.mli_out;
+  mli_tensor weights_local = *data.mli_weights.MliTensor();
+  mli_tensor bias_local = *data.mli_bias.MliTensor();
+  mli_tensor in_local = *data.mli_in.MliTensor();
+  mli_tensor out_local = *data.mli_out.MliTensor();
+
+  ops::micro::MliTensorInterface weights_local_interface(&weights_local);
+  ops::micro::MliTensorInterface bias_local_interface(&bias_local);
+  ops::micro::MliTensorInterface in_local_interface(&in_local);
+  ops::micro::MliTensorInterface out_local_interface(&out_local);
+
   mli_mov_cfg_t copy_config;
   mli_mov_cfg_for_copy(&copy_config);
+#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST)
+  const int weight_out_dimension = 1;
+#else
   const int weight_out_dimension = 0;
+#endif
+  // bias has only 1 dimension
+  const int bias_out_ch_dimension = 0;
   const int out_tensor_dimension = 1;
   const int input_size_dimension = 1;
-  int slice_size = data.mli_weights->shape[weight_out_dimension];
+  int slice_size = data.mli_weights.Shape()[weight_out_dimension];
 
   /* allocate the local buffers, and compute the slice size */
   TF_LITE_ENSURE_STATUS(
       ops::micro::get_arc_scratch_buffer_for_fully_connect_tensors(
-          context, &in_local, &weights_local, &bias_local, &out_local));
+          context, &in_local_interface, &weights_local_interface,
+          &bias_local_interface, &out_local_interface));
   TF_LITE_ENSURE_STATUS(ops::micro::arc_scratch_buffer_calc_slice_size_weights(
-      &weights_local, &bias_local, weight_out_dimension, &slice_size));
-  int max_out_slice_size =
-      out_local.capacity / mli_hlp_tensor_element_size(&out_local);
+      &weights_local_interface, &bias_local_interface, weight_out_dimension,
+      &slice_size));
+
+  int max_out_slice_size = *out_local_interface.DataCapacity() /
+                           mli_hlp_tensor_element_size(&out_local);
+
   if (slice_size > max_out_slice_size) slice_size = max_out_slice_size;
 
   /* is_local indicates that the tensor is already in local memory,
      so in that case the original tensor can be used,
      and there is no need to copy it to the local tensor*/
-  const bool in_is_local = in_local.data == data.mli_in->data;
-  const bool out_is_local = out_local.data == data.mli_out->data;
-  const bool w_is_local = weights_local.data == data.mli_weights->data;
-  const bool b_is_local = bias_local.data == data.mli_bias->data;
-
-  ops::micro::TensorSlicer w_slice(data.mli_weights, weight_out_dimension,
-                                   slice_size);
-  ops::micro::TensorSlicer b_slice(data.mli_bias, weight_out_dimension,
-                                   slice_size);
-  ops::micro::TensorSlicer out_ch_slice(data.mli_out, out_tensor_dimension,
-                                        slice_size, 0, 0, 0, true);
+  const bool in_is_local =
+      in_local_interface.Data<int8_t>() == data.mli_in.Data<int8_t>();
+  const bool out_is_local =
+      out_local_interface.Data<int8_t>() == data.mli_out.Data<int8_t>();
+  const bool b_is_local =
+      bias_local_interface.Data<int32_t>() == data.mli_bias.Data<int32_t>();
+#ifndef MLI_2_0_KRNL_TEST
+  const bool w_is_local =
+      weights_local_interface.Data<int8_t>() == data.mli_weights.Data<int8_t>();
+#endif
 
+#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST)
+  ops::micro::TensorSlicer w_slice(data.mli_weights.MliTensor(),
+                                   weight_out_dimension, slice_size, 0, 0, 0,
+                                   true);
+#else
+  ops::micro::TensorSlicer w_slice(data.mli_weights.MliTensor(),
+                                   weight_out_dimension, slice_size);
+#endif
+  ops::micro::TensorSlicer b_slice(data.mli_bias.MliTensor(),
+                                   bias_out_ch_dimension, slice_size);
+  ops::micro::TensorSlicer out_ch_slice(data.mli_out.MliTensor(),
+                                        out_tensor_dimension, slice_size, 0, 0,
+                                        0, true);
+
+#ifdef MLI_2_0_KRNL_TEST
+  mli_tensor* w_ptr = &weights_local;
+#else
   mli_tensor* w_ptr = w_is_local ? w_slice.Sub() : &weights_local;
+#endif
   mli_tensor* b_ptr = b_is_local ? b_slice.Sub() : &bias_local;
 
   void* input_buffer_ptr = NULL;
 
   while (!w_slice.Done()) {
+#if defined(MLI_2_0) && !defined(MLI_2_0_KRNL_TEST)
+    w_ptr->el_params.sa.scale.mem.pi16 = NULL;
+    b_ptr->el_params.sa.scale.mem.pi16 = NULL;
+#endif
+
+#ifndef MLI_2_0_KRNL_TEST
     mli_mov_tensor_sync(w_slice.Sub(), &copy_config, w_ptr);
+#endif
     mli_mov_tensor_sync(b_slice.Sub(), &copy_config, b_ptr);
 
     // Slice the input over the batches (one at a time with the size of a
     // complete input)
-    ops::micro::TensorSlicer in_slice(data.mli_in, input_size_dimension,
-                                      data.mli_in->shape[input_size_dimension]);
+    ops::micro::TensorSlicer in_slice(
+        data.mli_in.MliTensor(), input_size_dimension,
+        data.mli_in.Shape()[input_size_dimension]);
 
     /* output tensor is already sliced in the output size dimension.
     out_ch_slice.Sub() is the tensor for the amount of output size of this
@@ -565,13 +820,41 @@ TfLiteStatus EvalMliQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
     mli_tensor* in_ptr = in_is_local ? in_slice.Sub() : &in_local;
     mli_tensor* out_ptr = out_is_local ? out_slice.Sub() : &out_local;
 
+#ifdef MLI_2_0_KRNL_TEST
+    /* Permute weights tensor to the HWCN layout */
+    // Assertion here to prevent usage non-contiguous buffer memory.
+    if (data.mli_out.Shape()[out_tensor_dimension] !=
+        out_slice.Sub()->shape[0]) {
+      MicroPrintf("Slicing is not supported with real-time permutation.");
+      return kTfLiteError;
+    }
+    mli_permute_cfg permute_cfg = {{1, 0, 2, 3}};
+    ops::micro::permute_weights(data.mli_weights.MliTensor(), &permute_cfg,
+                                w_ptr, &out_ptr->data);
+#endif
+
     while (!out_slice.Done()) {
+      if (!out_is_local) {
+        ops::micro::PrepareLocalTensor(out_slice.Sub(), &out_local);
+        ops::micro::PrepareLocalTensor(in_slice.Sub(), &in_local);
+      }
       // if same input copy as previous iteration, skip the copy of input
+#ifdef MLI_2_0
+      if (in_slice.Sub()->data.mem.pi8 != input_buffer_ptr) {
+        mli_mov_tensor_sync(in_slice.Sub(), &copy_config, in_ptr);
+        input_buffer_ptr = in_slice.Sub()->data.mem.pi8;
+      }
+      mli_fully_connected_cfg cfg;
+      cfg.relu.type = MLI_RELU_NONE;
+      mli_krn_fully_connected_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, &cfg, out_ptr);
+#else
       if (in_slice.Sub()->data != input_buffer_ptr) {
         mli_mov_tensor_sync(in_slice.Sub(), &copy_config, in_ptr);
         input_buffer_ptr = in_slice.Sub()->data;
       }
       mli_krn_fully_connected_sa8_sa8_sa32(in_ptr, w_ptr, b_ptr, out_ptr);
+#endif
+
       mli_mov_tensor_sync(out_ptr, &copy_config, out_slice.Sub());
 
       in_slice.Next();
@@ -584,55 +867,17 @@ TfLiteStatus EvalMliQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
   return kTfLiteOk;
 }
 
-TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
-                               const OpData& data,
-                               const TfLiteEvalTensor* input,
-                               const TfLiteEvalTensor* filter,
-                               const TfLiteEvalTensor* bias,
-                               TfLiteEvalTensor* output) {
-#if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
-  tflite::FullyConnectedParams op_params;
-  op_params.input_offset = -data.input_zero_point;
-  op_params.weights_offset = -data.filter_zero_point;
-  op_params.output_offset = data.output_zero_point;
-  op_params.output_multiplier = data.output_multiplier;
-  op_params.output_shift = -data.output_shift;
-  op_params.quantized_activation_min = data.output_activation_min;
-  op_params.quantized_activation_max = data.output_activation_max;
-
-  reference_integer_ops::FullyConnected(
-      op_params, tflite::micro::GetTensorShape(input),
-      tflite::micro::GetTensorData<int8_t>(input),
-      tflite::micro::GetTensorShape(filter),
-      tflite::micro::GetTensorData<int8_t>(filter),
-      tflite::micro::GetTensorShape(bias),
-      tflite::micro::GetTensorData<int32_t>(bias),
-      tflite::micro::GetTensorShape(output),
-      tflite::micro::GetTensorData<int8_t>(output));
-  return kTfLiteOk;
-#else
-  TF_LITE_KERNEL_LOG(context,
-                     "Node configuration is not supported by ARC MLI Library.");
-  return kTfLiteError;
-#endif
-}
-
 TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
                            const OpData& data, const TfLiteEvalTensor* input,
                            const TfLiteEvalTensor* filter,
                            const TfLiteEvalTensor* bias,
                            TfLiteEvalTensor* output) {
 #if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
-  const int32_t input_offset = -data.input_zero_point;
-  const int32_t filter_offset = -data.filter_zero_point;
-  const int32_t output_offset = data.output_zero_point;
-
   tflite::FullyConnectedParams op_params;
-  op_params.input_offset = input_offset;
-  op_params.weights_offset = filter_offset;
-  op_params.output_offset = output_offset;
+  op_params.input_offset = -data.input_zero_point;
+  op_params.weights_offset = -data.filter_zero_point;
+  op_params.output_offset = data.output_zero_point;
   op_params.output_multiplier = data.output_multiplier;
-  // Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
   op_params.output_shift = -data.output_shift;
   op_params.quantized_activation_min = data.output_activation_min;
   op_params.quantized_activation_max = data.output_activation_max;
@@ -646,12 +891,13 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
       tflite::micro::GetTensorShape(bias),             \
       tflite::micro::GetTensorData<int32_t>(bias),     \
       tflite::micro::GetTensorShape(output),           \
-      tflite::micro::GetTensorData<output_data_type>(output))
+      tflite::micro::GetTensorData<uint8_t>(output))
+
   switch (output->type) {
     case kTfLiteUInt8:
       #if EI_TFLITE_DISABLE_FULLY_CONNECTED_OUT_U8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(output->type), output->type);
+      MicroPrintf("Type %s currently not supported.",
+                            TfLiteTypeGetName(filter->type));
       return kTfLiteError;
       #endif
 
@@ -659,26 +905,24 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
       break;
     case kTfLiteInt16:
       #if EI_TFLITE_DISABLE_FULLY_CONNECTED_OUT_I16
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(output->type), output->type);
+      MicroPrintf("Type %s currently not supported.",
+                            TfLiteTypeGetName(filter->type));
       return kTfLiteError;
       #endif
 
       TF_LITE_FULLY_CONNECTED(int16_t);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
+      MicroPrintf("Type %s (%d) not supported.",
                          TfLiteTypeGetName(output->type), output->type);
       return kTfLiteError;
-  }
 
   return kTfLiteOk;
 #else
-  TF_LITE_KERNEL_LOG(context,
-                     "Type %s (%d) is not supported by ARC MLI Library.",
-                     TfLiteTypeGetName(input->type), input->type);
+  MicroPrintf("Node configuration is not supported by ARC MLI Library.");
   return kTfLiteError;
 #endif
+  }
 }
 
 TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
@@ -699,14 +943,13 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
       tflite::micro::GetTensorShape(filter),
       tflite::micro::GetTensorData<float>(filter),
       tflite::micro::GetTensorShape(bias),
-      tflite::micro::GetTensorData<float>(bias),
+      tflite::micro::GetOptionalTensorData<float>(bias),
       tflite::micro::GetTensorShape(output),
       tflite::micro::GetTensorData<float>(output));
   return kTfLiteOk;
 #else
-  TF_LITE_KERNEL_LOG(context,
-                     "Type %s (%d) is not supported by ARC MLI Library.",
-                     TfLiteTypeGetName(input->type), input->type);
+  MicroPrintf("Type %s (%d) is not supported by ARC MLI Library.",
+              TfLiteTypeGetName(input->type), input->type);
   return kTfLiteError;
 #endif
 }
@@ -732,9 +975,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   switch (input->type) {
     case kTfLiteFloat32:
       #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_F32
-      context->ReportError(context,
-                            "Filter data type %s currently not supported.",
-                            TfLiteTypeGetName(filter->type));
+      MicroPrintf("Type %s (%d) not supported.",
+                      TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
       #endif
 
@@ -742,9 +984,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                        output);
     case kTfLiteInt8:
       #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8
-      context->ReportError(context,
-                            "Filter data type %s currently not supported.",
-                            TfLiteTypeGetName(filter->type));
+      MicroPrintf("Type %s (%d) not supported.",
+                      TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
       #endif
 
@@ -752,37 +993,28 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         return EvalMliQuantizedInt8(context, node, params, data, input, filter,
                                     bias, output);
       } else {
-        return EvalQuantizedInt8(context, node, data, input, filter, bias,
-                                 output);
+        return EvalQuantized(context, node, data, input, filter, bias, output);
       }
 
     case kTfLiteUInt8:
       #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_U8
-      context->ReportError(context,
-                            "Filter data type %s currently not supported.",
-                            TfLiteTypeGetName(filter->type));
+      MicroPrintf("Type %s (%d) not supported.",
+                      TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
       #endif
 
       return EvalQuantized(context, node, data, input, filter, bias, output);
 
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
 }
 
 TfLiteRegistration Register_FULLY_CONNECTED() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
 }  // namespace tflite
@@ -1050,7 +1282,7 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
       tflite::micro::GetTensorShape(filter),
       tflite::micro::GetTensorData<float>(filter),
       tflite::micro::GetTensorShape(bias),
-      tflite::micro::GetTensorData<float>(bias),
+      tflite::micro::GetOptionalTensorData<float>(bias),
       tflite::micro::GetTensorShape(output),
       tflite::micro::GetTensorData<float>(output));
   return kTfLiteOk;
@@ -1173,6 +1405,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h"
+
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
@@ -1181,10 +1415,13 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
+#if ESP_NN
 #include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h"
+#endif
+
 #include <esp_timer.h>
 
 long long fc_total_time = 0;
@@ -1199,6 +1436,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
 }
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   TFLITE_DCHECK(node->user_data != nullptr);
   TFLITE_DCHECK(node->builtin_data != nullptr);
 
@@ -1206,23 +1445,33 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const auto params =
       static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
 
-  const TfLiteTensor* input =
-      GetInput(context, node, kFullyConnectedInputTensor);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kFullyConnectedInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* filter =
-      GetInput(context, node, kFullyConnectedWeightsTensor);
+  TfLiteTensor* filter = micro_context->AllocateTempInputTensor(
+      node, kFullyConnectedWeightsTensor);
   TF_LITE_ENSURE(context, filter != nullptr);
-  const TfLiteTensor* bias =
-      GetOptionalInputTensor(context, node, kFullyConnectedBiasTensor);
-  TfLiteTensor* output = GetOutput(context, node, kFullyConnectedOutputTensor);
+  TfLiteTensor* bias =
+      micro_context->AllocateTempInputTensor(node, kFullyConnectedBiasTensor);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(
+      node, kFullyConnectedOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
   TF_LITE_ENSURE_MSG(context, input->type == filter->type,
                      "Hybrid models are not supported on TFLite Micro.");
 
-  return CalculateOpDataFullyConnected(context, params->activation, input->type,
-                                       input, filter, bias, output, data);
+  TF_LITE_ENSURE_OK(context, CalculateOpDataFullyConnected(
+                                 context, params->activation, input->type,
+                                 input, filter, bias, output, data));
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
+  if (bias != nullptr) {
+    micro_context->DeallocateTempTfLiteTensor(bias);
+  }
+  micro_context->DeallocateTempTfLiteTensor(output);
+  return kTfLiteOk;
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
@@ -1247,11 +1496,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   // Checks in Prepare ensure input, output and filter types are all the same.
   switch (input->type) {
     case kTfLiteFloat32: {
-      #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_F32
+#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_F32
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
+#endif
       tflite::reference_ops::FullyConnected(
           FullyConnectedParamsFloat(params->activation),
           tflite::micro::GetTensorShape(input),
@@ -1259,21 +1508,22 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           tflite::micro::GetTensorShape(filter),
           tflite::micro::GetTensorData<float>(filter),
           tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<float>(bias),
+          tflite::micro::GetOptionalTensorData<float>(bias),
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output));
       break;
     }
 
     case kTfLiteInt8: {
-      #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8
+#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
+#endif
       const int32_t* bias_data =
           nullptr != bias ? tflite::micro::GetTensorData<int32_t>(bias)
                           : nullptr;
+#if ESP_NN
       const RuntimeShape& filter_shape = tflite::micro::GetTensorShape(filter);
       const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
       const int filter_dim_count = filter_shape.DimensionsCount();
@@ -1298,15 +1548,26 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         input_data += accum_depth;
         output_data += output_depth;
       }
+#else
+      tflite::reference_integer_ops::FullyConnected(
+          FullyConnectedParamsQuantized(data),
+          tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<int8_t>(input),
+          tflite::micro::GetTensorShape(filter),
+          tflite::micro::GetTensorData<int8_t>(filter),
+          tflite::micro::GetTensorShape(bias), bias_data,
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int8_t>(output));
+#endif
       break;
     }
 
     case kTfLiteUInt8: {
-      #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_U8
+#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_U8
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
+#endif
       tflite::reference_ops::FullyConnected(
           FullyConnectedParamsQuantized(data),
           tflite::micro::GetTensorShape(input),
@@ -1314,12 +1575,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           tflite::micro::GetTensorShape(filter),
           tflite::micro::GetTensorData<uint8_t>(filter),
           tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<int32_t>(bias),
+          tflite::micro::GetOptionalTensorData<int32_t>(bias),
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<uint8_t>(output));
       break;
     }
-    
     default: {
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                          TfLiteTypeGetName(input->type), input->type);
@@ -1333,19 +1593,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_FULLY_CONNECTED() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
 }  // namespace tflite
+
 #else
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -1364,13 +1618,11 @@ limitations under the License.
 
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -1382,6 +1634,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
 }
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   TFLITE_DCHECK(node->user_data != nullptr);
   TFLITE_DCHECK(node->builtin_data != nullptr);
 
@@ -1389,23 +1643,39 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const auto params =
       static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
 
-  const TfLiteTensor* input =
-      GetInput(context, node, kFullyConnectedInputTensor);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kFullyConnectedInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* filter =
-      GetInput(context, node, kFullyConnectedWeightsTensor);
+  TfLiteTensor* filter = micro_context->AllocateTempInputTensor(
+      node, kFullyConnectedWeightsTensor);
   TF_LITE_ENSURE(context, filter != nullptr);
-  const TfLiteTensor* bias =
-      GetOptionalInputTensor(context, node, kFullyConnectedBiasTensor);
-  TfLiteTensor* output = GetOutput(context, node, kFullyConnectedOutputTensor);
+  TfLiteTensor* bias =
+      micro_context->AllocateTempInputTensor(node, kFullyConnectedBiasTensor);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(
+      node, kFullyConnectedOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
-
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
-  TF_LITE_ENSURE_MSG(context, input->type == filter->type,
-                     "Hybrid models are not supported on TFLite Micro.");
 
-  return CalculateOpDataFullyConnected(context, params->activation, input->type,
-                                       input, filter, bias, output, data);
+  if (filter->type == kTfLiteInt4) {
+    int filter_size =
+        RuntimeShape(filter->dims->size,
+                     reinterpret_cast<const int32_t*>(filter->dims->data))
+            .FlatSize();
+    context->RequestScratchBufferInArena(context, filter_size,
+                                         &data->filter_buffer_index);
+  }
+
+  TF_LITE_ENSURE_OK(context, CalculateOpDataFullyConnected(
+                                 context, params->activation, input->type,
+                                 input, filter, bias, output, data));
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
+  if (bias != nullptr) {
+    micro_context->DeallocateTempTfLiteTensor(bias);
+  }
+  micro_context->DeallocateTempTfLiteTensor(output);
+  return kTfLiteOk;
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
@@ -1423,18 +1693,18 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       tflite::micro::GetEvalOutput(context, node, kFullyConnectedOutputTensor);
 
   TFLITE_DCHECK(node->user_data != nullptr);
+
   const auto& data =
       *(static_cast<const OpDataFullyConnected*>(node->user_data));
 
   // Checks in Prepare ensure input, output and filter types are all the same.
   switch (input->type) {
     case kTfLiteFloat32: {
-      #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
+#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_F32
+      MicroPrintf("Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       tflite::reference_ops::FullyConnected(
           FullyConnectedParamsFloat(params->activation),
           tflite::micro::GetTensorShape(input),
@@ -1442,54 +1712,86 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           tflite::micro::GetTensorShape(filter),
           tflite::micro::GetTensorData<float>(filter),
           tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<float>(bias),
+          tflite::micro::GetOptionalTensorData<float>(bias),
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output));
       break;
     }
 
     case kTfLiteInt8: {
-      #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
+#if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I8
+      MicroPrintf("Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
-
-      tflite::reference_integer_ops::FullyConnected(
-          FullyConnectedParamsQuantized(data),
-          tflite::micro::GetTensorShape(input),
-          tflite::micro::GetTensorData<int8_t>(input),
-          tflite::micro::GetTensorShape(filter),
-          tflite::micro::GetTensorData<int8_t>(filter),
-          tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<int32_t>(bias),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<int8_t>(output));
+#endif
+      switch (filter->type) {
+        case kTfLiteInt4: {
+          int8_t* unpacked_filter_data = static_cast<int8_t*>(
+              context->GetScratchBuffer(context, data.filter_buffer_index));
+          tflite::tensor_utils::UnpackDenseInt4IntoInt8(
+              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(filter).FlatSize(),
+              unpacked_filter_data);
+          tflite::reference_integer_ops::FullyConnected(
+              FullyConnectedParamsQuantized(data),
+              tflite::micro::GetTensorShape(input),
+              tflite::micro::GetTensorData<int8_t>(input),
+              tflite::micro::GetTensorShape(filter), unpacked_filter_data,
+              tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetOptionalTensorData<int32_t>(bias),
+              tflite::micro::GetTensorShape(output),
+              tflite::micro::GetTensorData<int8_t>(output));
+          break;
+        }
+        case kTfLiteInt8: {
+          tflite::reference_integer_ops::FullyConnected(
+              FullyConnectedParamsQuantized(data),
+              tflite::micro::GetTensorShape(input),
+              tflite::micro::GetTensorData<int8_t>(input),
+              tflite::micro::GetTensorShape(filter),
+              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetOptionalTensorData<int32_t>(bias),
+              tflite::micro::GetTensorShape(output),
+              tflite::micro::GetTensorData<int8_t>(output));
+          break;
+        }
+        default: {
+          MicroPrintf("Filter type %s (%d) not supported.",
+                      TfLiteTypeGetName(filter->type), input->type);
+          return kTfLiteError;
+        }
+      }
       break;
     }
 
-    case kTfLiteUInt8: {
-      #if EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_U8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
-
-      tflite::reference_ops::FullyConnected(
-          FullyConnectedParamsQuantized(data),
-          tflite::micro::GetTensorShape(input),
-          tflite::micro::GetTensorData<uint8_t>(input),
-          tflite::micro::GetTensorShape(filter),
-          tflite::micro::GetTensorData<uint8_t>(filter),
-          tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<int32_t>(bias),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<uint8_t>(output));
+    case kTfLiteInt16: {
+      switch (filter->type) {
+        case kTfLiteInt8: {
+          tflite::reference_integer_ops::FullyConnected(
+              FullyConnectedParamsQuantized(data),
+              tflite::micro::GetTensorShape(input),
+              tflite::micro::GetTensorData<int16_t>(input),
+              tflite::micro::GetTensorShape(filter),
+              tflite::micro::GetTensorData<int8_t>(filter),
+              tflite::micro::GetTensorShape(bias),
+              tflite::micro::GetOptionalTensorData<int64_t>(bias),
+              tflite::micro::GetTensorShape(output),
+              tflite::micro::GetTensorData<int16_t>(output));
+          break;
+        }
+        default: {
+          MicroPrintf("Filter type %s (%d) not supported.",
+                      TfLiteTypeGetName(filter->type), input->type);
+          return kTfLiteError;
+        }
+      }
       break;
     }
+
     default: {
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Input type %s (%d) not supported.",
+                  TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
     }
   }
@@ -1499,14 +1801,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_FULLY_CONNECTED() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h
index d60c157..b245abe 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -38,6 +38,14 @@ struct OpDataFullyConnected {
   int32_t input_zero_point;
   int32_t filter_zero_point;
   int32_t output_zero_point;
+
+// TODO(b/258710417): enable by default once optimized fully-connected works for
+// all targets.
+#if !defined(HEXAGON)
+  // A buffer used to store unpacked filter values. This is used if the source
+  // tensor is of n-bit precision that cannot be easily processed by kernels.
+  int filter_buffer_index;
+#endif
 };
 
 extern const int kFullyConnectedInputTensor;
@@ -65,14 +73,9 @@ TfLiteStatus CalculateOpDataFullyConnected(
 // (reference or optimized) must define this function.
 TfLiteRegistration Register_FULLY_CONNECTED();
 
-#if defined(CMSIS_NN) || defined(ARDUINO) || defined(EI_CLASSIFIER_TFLITE_ENABLE_SILABS_MVP)
-// The Arduino is a special case where we use the CMSIS kernels, but because of
-// the current approach to building for Arduino, we do not support -DCMSIS_NN as
-// part of the build. As a result, we use defined(ARDUINO) as proxy for the
-// CMSIS kernels for this one special case.
-
-// Returns a TfLiteRegistration struct for cmsis_nn kernel variant that only
-// supports int8.
+#if defined(CMSIS_NN) || defined(HEXAGON) || defined(EI_CLASSIFIER_TFLITE_ENABLE_SILABS_MVP)
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int8.
 TfLiteRegistration Register_FULLY_CONNECTED_INT8();
 
 #else
@@ -86,6 +89,24 @@ inline TfLiteRegistration Register_FULLY_CONNECTED_INT8() {
 }
 
 #endif
+
+#if defined(CMSIS_NN)
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int16.
+TfLiteRegistration Register_FULLY_CONNECTED_INT16();
+
+#else
+// Note that while this block gets used for both reference and optimized kernels
+// that do not have any specialized implementations, the only goal here is to
+// define fallback implementation that allow reference kernels to still be used
+// from applications that call a more specific kernel variant.
+
+inline TfLiteRegistration Register_FULLY_CONNECTED_INT16() {
+  return Register_FULLY_CONNECTED();
+}
+
+#endif
+
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_FULLY_CONNECTED_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected_common.cc
index 5e1fca1..d38ea3e 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected_common.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected_common.cc
@@ -65,6 +65,11 @@ TfLiteStatus CalculateOpDataFullyConnected(
                        &data->output_shift);
 
     data->input_zero_point = input->params.zero_point;
+    // Filter weights will always be symmetric quantized since we only support
+    // int8 quantization. See
+    // https://github.com/tensorflow/tensorflow/issues/44912 for additional
+    // context.
+    TFLITE_DCHECK(filter->params.zero_point == 0);
     data->filter_zero_point = filter->params.zero_point;
     data->output_zero_point = output->params.zero_point;
 
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/gather.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/gather.cc
index 47d7fe8..4fb05d8 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/gather.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/gather.cc
@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -14,86 +14,116 @@ limitations under the License.
 ==============================================================================*/
 #ifndef TF_LITE_STATIC_MEMORY
 
-#include <stdint.h>
-
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace gather {
-
-template <typename T, typename CoordsT = int32>
-inline void Gather(const tflite::GatherParams& op_params,
-                   const RuntimeShape& input_shape, const T* input_data,
-                   const RuntimeShape& coords_shape, const CoordsT* coords_data,
-                   const RuntimeShape& output_shape, T* output_data) {
-  int axis = op_params.axis;
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kInputPositions = 1;
+constexpr int kOutputTensor = 0;
+
+template <typename InputT, typename CoordsT = int32_t>
+TfLiteStatus Gather(const TfLiteGatherParams* params,
+                    const TfLiteEvalTensor* input,
+                    const TfLiteEvalTensor* coords, TfLiteEvalTensor* output) {
+  const InputT* input_data = tflite::micro::GetTensorData<InputT>(input);
+  const CoordsT* coords_data = tflite::micro::GetTensorData<CoordsT>(coords);
+  InputT* output_data = tflite::micro::GetTensorData<InputT>(output);
+  const TfLiteIntArray* input_dims = input->dims;
+  const int input_dims_size = input_dims->size;
+  int axis = params->axis;
   if (axis < 0) {
-    axis += input_shape.DimensionsCount();
+    axis += input_dims_size;
   }
   TFLITE_DCHECK_GE(axis, 0);
-  TFLITE_DCHECK_LT(axis, input_shape.DimensionsCount());
-  const int axis_size = input_shape.Dims(axis);
-  const int coords_count = coords_shape.FlatSize();
+  TFLITE_DCHECK_LT(axis, input_dims_size);
+
+  int batch_dims = params->batch_dims;
+  // batch_dims should be in range: [-rank(coords), rank(coords)].
+  // Negative batch_dims is added with rank of coords.
+  const TfLiteIntArray* coords_dims = coords->dims;
+  const int coords_dims_size = coords_dims->size;
+  if (batch_dims < 0) {
+    batch_dims += coords_dims_size;
+  }
+  TFLITE_DCHECK_GE(batch_dims, 0);
+  TFLITE_DCHECK_LT(batch_dims, input_dims_size);
+  TFLITE_DCHECK_LE(batch_dims, coords_dims_size);
+  TFLITE_DCHECK_GE(axis, batch_dims);
+  for (int i = 0; i < batch_dims; ++i) {
+    TFLITE_DCHECK_EQ(input_dims->data[i], coords_dims->data[i]);
+  }
 
+  const int axis_size = input_dims->data[axis];
+
+  int batch_size = 1;
+  for (int i = 0; i < batch_dims; ++i) {
+    batch_size *= input_dims->data[i];
+  }
   int outer_size = 1;
-  for (int i = 0; i < axis; ++i) {
-    outer_size *= input_shape.Dims(i);
+  for (int i = batch_dims; i < axis; ++i) {
+    outer_size *= input_dims->data[i];
   }
-
   int inner_size = 1;
-  for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i) {
-    inner_size *= input_shape.Dims(i);
-  }
-
-  for (int outer = 0; outer < outer_size; ++outer) {
-    for (int i = 0; i < coords_count; ++i) {
-      TFLITE_DCHECK_GE(coords_data[i], 0);
-      TFLITE_DCHECK_LT(coords_data[i], axis_size);
-      // TODO(rsun): replace memcpy with a for loop
-      std::memcpy(
-          output_data + (outer * coords_count + i) * inner_size,
-          input_data + (outer * axis_size + coords_data[i]) * inner_size,
-          sizeof(T) * inner_size);
+  for (int i = axis + 1; i < input_dims_size; ++i) {
+    inner_size *= input_dims->data[i];
+  }
+  int coord_size = 1;
+  for (int i = batch_dims; i < coords_dims_size; ++i) {
+    coord_size *= coords_dims->data[i];
+  }
+
+  for (int batch = 0; batch < batch_size; ++batch) {
+    for (int outer = 0; outer < outer_size; ++outer) {
+      for (int coord = 0; coord < coord_size; ++coord) {
+        TFLITE_DCHECK_GE(coords_data[coord], 0);
+        TFLITE_DCHECK_LT(coords_data[coord], axis_size);
+        std::memcpy(output_data +
+                        (((batch * outer_size) + outer) * coord_size + coord) *
+                            inner_size,
+                    input_data + (((batch * outer_size) + outer) * axis_size +
+                                  coords_data[batch * coord_size + coord]) *
+                                     inner_size,
+                    sizeof(InputT) * inner_size);
+      }
     }
   }
+  return kTfLiteOk;
 }
 
-constexpr int kInputTensor = 0;
-constexpr int kInputPositions = 1;
-constexpr int kOutputTensor = 0;
-
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
   const auto* params =
       reinterpret_cast<const TfLiteGatherParams*>(node->builtin_data);
-
-  const TfLiteTensor* input;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
-  const TfLiteTensor* positions;
-  TF_LITE_ENSURE_OK(context,
-                    GetInputSafe(context, node, kInputPositions, &positions));
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
-
-  switch (positions->type) {
-    case kTfLiteInt64:
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* coords =
+      micro_context->AllocateTempInputTensor(node, kInputPositions);
+  TF_LITE_ENSURE(context, coords != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  switch (coords->type) {
     case kTfLiteInt32:
       break;
     default:
-      context->ReportError(
-          context, "Positions of type '%s' are not supported by gather.",
-          TfLiteTypeGetName(positions->type));
+      MicroPrintf("Positions of type '%s' are not supported by gather.",
+                  TfLiteTypeGetName(coords->type));
       return kTfLiteError;
+      break;
   }
 
   // Assign to output the input type.
@@ -102,17 +132,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // Check conditions for different types.
   switch (input->type) {
     case kTfLiteFloat32:
-    case kTfLiteUInt8:
     case kTfLiteInt8:
-    case kTfLiteInt16:
-    case kTfLiteInt64:
-    case kTfLiteInt32:
-    case kTfLiteBool:
       break;
     default:
-      context->ReportError(context, "Type '%s' is not supported by gather.",
-                           TfLiteTypeGetName(input->type));
+      MicroPrintf("Type '%s' is not supported by gather.",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
+      break;
   }
 
   int axis = params->axis;
@@ -121,112 +147,80 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   }
   TF_LITE_ENSURE(context, 0 <= axis && axis < NumDimensions(input));
 
-  const int num_dimensions =
-      NumDimensions(input) + NumDimensions(positions) - 1;
-  TfLiteIntArray* output_shape = TfLiteIntArrayCreate(num_dimensions);
+  int batch_dims = params->batch_dims;
+  // batch_dims should be in range: [-rank(coords), rank(coords)].
+  // Negative batch_dims is added with rank of coords.
+  if (batch_dims < 0) {
+    batch_dims += NumDimensions(coords);
+  }
+  TF_LITE_ENSURE(context, batch_dims <= axis);
+  TF_LITE_ENSURE(context, 0 <= batch_dims && batch_dims < NumDimensions(input));
+  TF_LITE_ENSURE(context, batch_dims <= NumDimensions(coords));
+  for (int i = 0; i < batch_dims; ++i) {
+    TF_LITE_ENSURE_EQ(context, input->dims->data[i], coords->dims->data[i]);
+  }
+
+  // GATHER updates the output tensor dimensions, but TfLiteTensor in the
+  // MicroInterpreter is a temporary allocation. We must therefore relocate the
+  // dims from the FlatBuffer to the persistant storage arena.
+  TfLiteEvalTensor* output_eval =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
+                                 context, output, output_eval));
+
+  TfLiteIntArray* output_shape = output->dims;
+  output_shape->size =
+      NumDimensions(input) + NumDimensions(coords) - 1 - batch_dims;
   int output_index = 0;
   for (int i = 0; i < axis; ++i) {
     output_shape->data[output_index++] = input->dims->data[i];
   }
-  for (int i = 0; i < positions->dims->size; ++i) {
-    output_shape->data[output_index++] = positions->dims->data[i];
+  for (int i = batch_dims; i < coords->dims->size; ++i) {
+    output_shape->data[output_index++] = coords->dims->data[i];
   }
   for (int i = axis + 1; i < input->dims->size; ++i) {
     output_shape->data[output_index++] = input->dims->data[i];
   }
 
-  return kTfLiteOk;
-}
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(coords);
+  micro_context->DeallocateTempTfLiteTensor(output);
 
-template <typename InputT, typename PositionsT>
-TfLiteStatus Gather(const TfLiteGatherParams& params, const TfLiteTensor* input,
-                    const TfLiteTensor* positions, TfLiteTensor* output) {
-  tflite::GatherParams op_params;
-  op_params.axis = params.axis;
-  Gather(op_params, GetTensorShape(input),
-                    GetTensorData<InputT>(input), GetTensorShape(positions),
-                    GetTensorData<PositionsT>(positions),
-                    GetTensorShape(output), GetTensorData<InputT>(output));
   return kTfLiteOk;
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const auto* params =
       reinterpret_cast<const TfLiteGatherParams*>(node->builtin_data);
-  const TfLiteTensor* input;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
-  const TfLiteTensor* positions;
-  TF_LITE_ENSURE_OK(context,
-                    GetInputSafe(context, node, kInputPositions, &positions));
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
-
-  if (positions->type == kTfLiteInt32) {
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  const TfLiteEvalTensor* coords =
+      tflite::micro::GetEvalInput(context, node, kInputPositions);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  if (coords->type == kTfLiteInt32) {
     switch (input->type) {
       case kTfLiteFloat32:
-        return Gather<float, int32_t>(*params, input, positions, output);
-      case kTfLiteUInt8:
-        return Gather<uint8_t, int32_t>(*params, input, positions, output);
+        return Gather<float, int32_t>(params, input, coords, output);
+        break;
       case kTfLiteInt8:
-        return Gather<int8_t, int32_t>(*params, input, positions, output);
-      case kTfLiteInt16:
-        return Gather<int16_t, int32_t>(*params, input, positions, output);
-      case kTfLiteInt32:
-        return Gather<int32_t, int32_t>(*params, input, positions, output);
-      case kTfLiteInt64:
-        return Gather<int64_t, int32_t>(*params, input, positions, output);
-      case kTfLiteBool:
-        return Gather<bool, int32_t>(*params, input, positions, output);
+        return Gather<int8_t, int32_t>(params, input, coords, output);
+        break;
       default:
-        context->ReportError(context, "Type '%s' is not supported by gather.",
-                             TfLiteTypeGetName(input->type));
+        MicroPrintf("Type '%s' is not supported by gather.",
+                    TfLiteTypeGetName(input->type));
         return kTfLiteError;
+        break;
     }
   }
-  if (positions->type == kTfLiteInt64) {
-    switch (input->type) {
-      case kTfLiteFloat32:
-        return Gather<float, int64_t>(*params, input, positions, output);
-      case kTfLiteUInt8:
-        return Gather<uint8_t, int64_t>(*params, input, positions, output);
-      case kTfLiteInt8:
-        return Gather<int8_t, int64_t>(*params, input, positions, output);
-      case kTfLiteInt16:
-        return Gather<int16_t, int64_t>(*params, input, positions, output);
-      case kTfLiteInt32:
-        return Gather<int32_t, int64_t>(*params, input, positions, output);
-      case kTfLiteInt64:
-        return Gather<int64_t, int64_t>(*params, input, positions, output);
-      case kTfLiteBool:
-        return Gather<bool, int64_t>(*params, input, positions, output);
-      default:
-        context->ReportError(context, "Type '%s' is not supported by gather.",
-                             TfLiteTypeGetName(input->type));
-        return kTfLiteError;
-    }
-  }
-  context->ReportError(context,
-                       "Positions of type '%s' are not supported by gather.",
-                       TfLiteTypeGetName(positions->type));
-  return kTfLiteError;
+  return kTfLiteOk;
 }
-
-}  // namespace gather
-}  // namespace micro
-}  // namespace ops
+}  // namespace
 
 TfLiteRegistration Register_GATHER() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/ops::micro::gather::Prepare,
-          /*invoke=*/ops::micro::gather::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
 }  // namespace tflite
-
-#endif // TF_LITE_STATIC_MEMORY
+#endif // TF_LITE_STATIC_MEMORY
\ No newline at end of file
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/gather_nd.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/gather_nd.cc
new file mode 100644
index 0000000..5e4b261
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/gather_nd.cc
@@ -0,0 +1,212 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+namespace {
+
+constexpr int kParams = 0;
+constexpr int kIndices = 1;
+constexpr int kOutputTensor = 0;
+constexpr int MAX_INDICES_ND = 5;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TfLiteTensor* params = micro_context->AllocateTempInputTensor(node, kParams);
+  TF_LITE_ENSURE(context, params != nullptr);
+  TfLiteTensor* indices =
+      micro_context->AllocateTempInputTensor(node, kIndices);
+  TF_LITE_ENSURE(context, indices != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  switch (params->type) {
+    case kTfLiteFloat32:
+    case kTfLiteInt8:
+      break;
+    default:
+      MicroPrintf("Params of type '%s' are not supported by gather_nd.",
+                  TfLiteTypeGetName(params->type));
+      return kTfLiteError;
+      break;
+  }
+  switch (indices->type) {
+    case kTfLiteInt32:
+      break;
+    default:
+      MicroPrintf("Indices of type '%s' are not supported by gather_nd.",
+                  TfLiteTypeGetName(indices->type));
+      return kTfLiteError;
+  }
+
+  const int params_rank = NumDimensions(params);
+  const int indices_rank = NumDimensions(indices);
+  const int indices_nd = SizeOfDimension(indices, indices_rank - 1);
+  if (params_rank < 1) {
+    MicroPrintf("Params must be at least a vector.");
+    return kTfLiteError;
+  }
+  if (indices_rank < 1) {
+    MicroPrintf("Indices must be at least a vector.");
+    return kTfLiteError;
+  }
+  if (indices_nd > params_rank) {
+    MicroPrintf("Index innermost dimension length must be <= params rank.");
+    return kTfLiteError;
+  }
+  if (indices_nd > MAX_INDICES_ND) {
+    MicroPrintf("Index innermost dimension length must not exceed %d.",
+                MAX_INDICES_ND);
+    return kTfLiteError;
+  }
+
+  // Assign to output the input type.
+  output->type = params->type;
+
+  // The tensor output dims must be relocated
+  // from the FlatBuffer to the persistant storage arena.
+  TfLiteEvalTensor* output_eval =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
+                                 context, output, output_eval));
+
+  // TFLM gather_nd does not create the output tensor, but it needs to ensure
+  // that the output shape is correct. The result shape is
+  // indices.shape[:-1] + params.shape[indices.shape[-1]:]
+  TfLiteIntArray* output_shape = output->dims;
+  int output_index = 0;
+  for (int i = 0; i < indices_rank - 1; ++i) {
+    output_shape->data[output_index++] = indices->dims->data[i];
+  }
+  for (int i = indices_nd; i < params_rank; ++i) {
+    output_shape->data[output_index++] = params->dims->data[i];
+  }
+  output_shape->size = output_index;
+
+  micro_context->DeallocateTempTfLiteTensor(params);
+  micro_context->DeallocateTempTfLiteTensor(indices);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  return kTfLiteOk;
+}
+
+template <typename ParamsT, typename IndicesT>
+TfLiteStatus GatherNd(const TfLiteEvalTensor* params,
+                      const TfLiteEvalTensor* indices,
+                      TfLiteEvalTensor* output) {
+  const int indices_dims = indices->dims->size;
+  const int indices_nd = indices->dims->data[indices_dims - 1];
+  const int params_dims = params->dims->size;
+  const IndicesT* index_data = tflite::micro::GetTensorData<IndicesT>(indices);
+  const ParamsT* param_data = tflite::micro::GetTensorData<ParamsT>(params);
+  ParamsT* output_data = tflite::micro::GetTensorData<ParamsT>(output);
+
+  int n_slices = 1;
+  for (int i = 0; i < indices_dims - 1; ++i) {
+    n_slices *= indices->dims->data[i];
+  }
+
+  // If indices[-1] == params.rank, fetch single elements.
+  // If indices[-1] < params.rank, fetch slices.
+  int slice_size = 1;
+  for (int i = indices_nd; i < params_dims; ++i) {
+    slice_size *= params->dims->data[i];
+  }
+
+  int params_flat_size = ElementCount(*params->dims);
+  int remain_flat_size = params_flat_size;
+
+  // Number of elements per dimension
+  int dims_to_count[MAX_INDICES_ND];
+  for (int i = 0; i < indices_nd; ++i) {
+    dims_to_count[i] = remain_flat_size / params->dims->data[i];
+    remain_flat_size = dims_to_count[i];
+  }
+
+  for (int i = 0; i < n_slices; ++i) {
+    int from_pos = 0;
+    for (int j = 0; j < indices_nd; ++j) {
+      int offset = i * indices_nd + j;
+      IndicesT index = index_data[offset];
+      from_pos += index * dims_to_count[j];
+    }
+    if (from_pos < 0 || from_pos + slice_size > params_flat_size) {
+      return kTfLiteError;
+    }
+    std::memcpy(output_data + i * slice_size, param_data + from_pos,
+                sizeof(ParamsT) * slice_size);
+  }
+  return kTfLiteOk;
+}
+
+template <typename IndicesT>
+TfLiteStatus EvalGatherNd(TfLiteContext* context,
+                          const TfLiteEvalTensor* params,
+                          const TfLiteEvalTensor* indices,
+                          TfLiteEvalTensor* output) {
+  TfLiteStatus status = kTfLiteError;
+  switch (params->type) {
+    case kTfLiteFloat32:
+      status = GatherNd<float, IndicesT>(params, indices, output);
+      break;
+    case kTfLiteInt8:
+      status = GatherNd<int8_t, IndicesT>(params, indices, output);
+      break;
+    default:
+      MicroPrintf("Params type '%s' are not supported by gather_nd.",
+                  TfLiteTypeGetName(params->type));
+      return kTfLiteError;
+  }
+  if (status != kTfLiteOk) {
+    MicroPrintf("gather_nd index out of bounds");
+  }
+  return status;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* params =
+      tflite::micro::GetEvalInput(context, node, kParams);
+  const TfLiteEvalTensor* indices =
+      tflite::micro::GetEvalInput(context, node, kIndices);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  switch (indices->type) {
+    case kTfLiteInt32:
+      return EvalGatherNd<int32_t>(context, params, indices, output);
+      break;
+    default:
+      MicroPrintf("Indices of type '%s' are not supported by gather_nd.",
+                  TfLiteTypeGetName(indices->type));
+      return kTfLiteError;
+  }
+}
+}  // namespace
+
+TfLiteRegistration Register_GATHER_ND() {
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.cc
index b0c179e..0f8a718 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,72 +23,23 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace hard_swish {
-
-constexpr int kInputTensor = 0;
-constexpr int kOutputTensor = 0;
-
+namespace {
 void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams));
 }
 
-TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
-  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
-    HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
-
-    params->input_zero_point = input->params.zero_point;
-    params->output_zero_point = output->params.zero_point;
-
-    const float input_scale = input->params.scale;
-    const float hires_input_scale = (1.0f / 128.0f) * input_scale;
-    const float reluish_scale = 3.0f / 32768.0f;
-    const float output_scale = output->params.scale;
-
-    const double output_multiplier =
-        static_cast<double>(hires_input_scale / output_scale);
-    int32_t output_multiplier_fixedpoint_int32;
-    QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
-                       &params->output_multiplier_exponent);
-    DownScaleInt32ToInt16Multiplier(
-        output_multiplier_fixedpoint_int32,
-        &params->output_multiplier_fixedpoint_int16);
-
-    TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
-
-    const double reluish_multiplier =
-        static_cast<double>(hires_input_scale / reluish_scale);
-    int32_t reluish_multiplier_fixedpoint_int32;
-    QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
-                       &params->reluish_multiplier_exponent);
-    DownScaleInt32ToInt16Multiplier(
-        reluish_multiplier_fixedpoint_int32,
-        &params->reluish_multiplier_fixedpoint_int16);
-  }
-
-  return kTfLiteOk;
-}
-
 TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      tflite::micro::GetEvalInput(context, node, kHardSwishInputTensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kHardSwishOutputTensor);
   HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
 
   switch (input->type) {
@@ -99,13 +50,6 @@ TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output));
     } break;
-    case kTfLiteUInt8: {
-      tflite::reference_ops::HardSwish<uint8_t>(
-          *params, tflite::micro::GetTensorShape(input),
-          tflite::micro::GetTensorData<uint8_t>(input),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<uint8_t>(output));
-    } break;
     case kTfLiteInt8: {
       tflite::reference_ops::HardSwish<int8_t>(
           *params, tflite::micro::GetTensorShape(input),
@@ -114,29 +58,18 @@ TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) {
           tflite::micro::GetTensorData<int8_t>(output));
     } break;
     default: {
-      TF_LITE_KERNEL_LOG(
-          context,
-          "Only float32/int8_t/uint8_t are supported currently, got %s",
-          TfLiteTypeGetName(input->type));
+      MicroPrintf("Unsupported type %s", TfLiteTypeGetName(input->type));
       return kTfLiteError;
     }
   }
   return kTfLiteOk;
 }
 
-}  // namespace hard_swish
+}  // namespace
 
 TfLiteRegistration Register_HARD_SWISH() {
-  return {/*init=*/hard_swish::HardSwishInit,
-          /*free=*/nullptr,
-          /*prepare=*/hard_swish::HardSwishPrepare,
-          /*invoke=*/hard_swish::HardSwishEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(HardSwishInit, tflite::HardSwishPrepare,
+                                   HardSwishEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.h
new file mode 100644
index 0000000..cb34f13
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.h
@@ -0,0 +1,30 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+
+namespace tflite {
+
+extern const int kHardSwishInputTensor;
+extern const int kHardSwishOutputTensor;
+
+TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node);
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_HARD_SWISH_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish_common.cc
new file mode 100644
index 0000000..1b82154
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish_common.cc
@@ -0,0 +1,86 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/hard_swish.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/hard_swish.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+
+const int kHardSwishInputTensor = 0;
+const int kHardSwishOutputTensor = 0;
+
+TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kHardSwishInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kHardSwishOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  if (input->type == kTfLiteInt8) {
+    HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
+
+    params->input_zero_point = input->params.zero_point;
+    params->output_zero_point = output->params.zero_point;
+
+    const float input_scale = input->params.scale;
+    const float hires_input_scale = (1.0f / 128.0f) * input_scale;
+    const float reluish_scale = 3.0f / 32768.0f;
+    const float output_scale = output->params.scale;
+
+    const double output_multiplier =
+        static_cast<double>(hires_input_scale / output_scale);
+    int32_t output_multiplier_fixedpoint_int32;
+    QuantizeMultiplier(output_multiplier, &output_multiplier_fixedpoint_int32,
+                       &params->output_multiplier_exponent);
+    DownScaleInt32ToInt16Multiplier(
+        output_multiplier_fixedpoint_int32,
+        &params->output_multiplier_fixedpoint_int16);
+
+    TF_LITE_ENSURE(context, params->output_multiplier_exponent <= 0);
+
+    const double reluish_multiplier =
+        static_cast<double>(hires_input_scale / reluish_scale);
+    int32_t reluish_multiplier_fixedpoint_int32;
+    QuantizeMultiplier(reluish_multiplier, &reluish_multiplier_fixedpoint_int32,
+                       &params->reluish_multiplier_exponent);
+    DownScaleInt32ToInt16Multiplier(
+        reluish_multiplier_fixedpoint_int32,
+        &params->reluish_multiplier_fixedpoint_int16);
+  }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/if.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/if.cc
new file mode 100644
index 0000000..afa9920
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/if.cc
@@ -0,0 +1,121 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <stddef.h>
+
+#include <cstring>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+namespace {
+
+struct OpData {
+  int then_subgraph_index;
+  int else_subgraph_index;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+  const auto* params =
+      reinterpret_cast<const TfLiteIfParams*>(node->builtin_data);
+  op_data->then_subgraph_index = params->then_subgraph_index;
+  op_data->else_subgraph_index = params->else_subgraph_index;
+
+  TF_LITE_ENSURE(context, node->inputs->size > 0);
+
+  // The first input is the condition.
+  tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
+  TfLiteTensor* cond = micro_context->AllocateTempInputTensor(node, 0);
+
+  TF_LITE_ENSURE(context, cond != nullptr);
+  TF_LITE_ENSURE_EQ(context, cond->type, kTfLiteBool);
+  TF_LITE_ENSURE_EQ(context, NumElements(cond), 1);
+
+  micro_context->DeallocateTempTfLiteTensor(cond);
+
+  // The first input of the node is the condition. The rest of inputs are
+  // passed to the branch subgraphs. Therefore, the number of subgraph inputs
+  // will be the number of node inputs - 1.
+  size_t num_inputs = node->inputs->size - 1;
+  size_t num_outputs = node->outputs->size;
+
+  MicroGraph& graph_info = micro_context->graph();
+
+  TF_LITE_ENSURE(context,
+                 op_data->then_subgraph_index < graph_info.NumSubgraphs());
+  TF_LITE_ENSURE(context,
+                 op_data->else_subgraph_index < graph_info.NumSubgraphs());
+
+  TF_LITE_ENSURE_EQ(context, num_inputs,
+                    graph_info.NumSubgraphInputs(op_data->then_subgraph_index));
+  TF_LITE_ENSURE_EQ(
+      context, num_outputs,
+      graph_info.NumSubgraphOutputs(op_data->then_subgraph_index));
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+
+  tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
+  TfLiteTensor* cond = micro_context->AllocateTempInputTensor(node, 0);
+
+  TF_LITE_ENSURE(context, cond != nullptr);
+  bool cond_value = cond->data.b[0];
+  micro_context->DeallocateTempTfLiteTensor(cond);
+
+  MicroGraph* graph_info = &micro_context->graph();
+  // Currently we copy the input / output between the subgraphs.
+  int active_branch_subgraph_index =
+      cond_value ? op_data->then_subgraph_index : op_data->else_subgraph_index;
+
+  TF_LITE_ENSURE_OK(context,
+                    tflite::micro::CopyOpInputsToSubgraphInputs(
+                        context, node, graph_info, active_branch_subgraph_index,
+                        /*first_tensor_idx=*/1));
+
+  TF_LITE_ENSURE_OK(context,
+                    graph_info->InvokeSubgraph(active_branch_subgraph_index));
+
+  TF_LITE_ENSURE_OK(
+      context, tflite::micro::CopySubgraphOutputsToOpOutputs(
+                   context, node, graph_info, active_branch_subgraph_index));
+
+  return kTfLiteOk;
+}
+
+}  // namespace.
+
+TfLiteRegistration Register_IF() {
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.cc
index bf7dd92..e731f4e 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.cc
@@ -15,147 +15,107 @@ limitations under the License.
 
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.h"
 
-#include "edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_arena_constants.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h"
 
 namespace tflite {
 namespace micro {
 
-namespace {
-constexpr size_t kBufferAlignment = 16;
-}  // namespace
-
 // TODO(b/161841696): Consider moving away from global arena buffers:
-constexpr int KernelRunner::kNumScratchBuffers_;
 constexpr int KernelRunner::kKernelRunnerBufferSize_;
 uint8_t KernelRunner::kKernelRunnerBuffer_[];
 
+void ClearBufferApi(TfLiteContext* context_) {
+  context_->GetScratchBuffer = nullptr;
+  context_->GetExternalContext = nullptr;
+  context_->AllocatePersistentBuffer = nullptr;
+  context_->RequestScratchBufferInArena = nullptr;
+}
+
 KernelRunner::KernelRunner(const TfLiteRegistration& registration,
                            TfLiteTensor* tensors, int tensors_size,
                            TfLiteIntArray* inputs, TfLiteIntArray* outputs,
-                           void* builtin_data)
-    : allocator_(SimpleMemoryAllocator::Create(GetMicroErrorReporter(),
-                                               kKernelRunnerBuffer_,
-                                               kKernelRunnerBufferSize_)),
-      registration_(registration),
-      tensors_(tensors) {
+                           void* builtin_data, TfLiteIntArray* intermediates)
+    : registration_(registration),
+      allocator_(SingleArenaBufferAllocator::Create(kKernelRunnerBuffer_,
+                                                    kKernelRunnerBufferSize_)),
+      mock_micro_graph_(allocator_),
+      fake_micro_context_(tensors, allocator_, &mock_micro_graph_) {
   // Prepare TfLiteContext:
-  context_.impl_ = static_cast<void*>(this);
-  context_.ReportError = ReportOpError;
+  context_.impl_ = static_cast<void*>(&fake_micro_context_);
+  context_.ReportError = MicroContextReportOpError;
   context_.recommended_num_threads = 1;
-  context_.GetTensor = GetTensor;
-  context_.GetEvalTensor = GetEvalTensor;
-  context_.AllocatePersistentBuffer = AllocatePersistentBuffer;
-  context_.RequestScratchBufferInArena = RequestScratchBufferInArena;
-  context_.GetScratchBuffer = GetScratchBuffer;
+  context_.GetTensor = MicroContextGetTensor;
+  context_.GetEvalTensor = MicroContextGetEvalTensor;
+  tflite::micro::ClearBufferApi(&context_);
+  context_.AllocatePersistentBuffer = MicroContextAllocatePersistentBuffer;
+
+  context_.recommended_num_threads = 0;
 
   // Prepare TfLiteNode:
   node_.inputs = inputs;
   node_.outputs = outputs;
   node_.builtin_data = builtin_data;
+  node_.intermediates = intermediates;
+}
+
+bool KernelRunner::ValidateTempBufferDeallocated() {
+  return fake_micro_context_.IsAllTempTfLiteTensorDeallocated();
 }
 
 TfLiteStatus KernelRunner::InitAndPrepare(const char* init_data,
                                           size_t length) {
   if (registration_.init) {
+    tflite::micro::ClearBufferApi(&context_);
+    context_.AllocatePersistentBuffer = MicroContextAllocatePersistentBuffer;
     node_.user_data = registration_.init(&context_, init_data, length);
   }
+
+  TF_LITE_ENSURE(&context_, ValidateTempBufferDeallocated());
+
   if (registration_.prepare) {
+    tflite ::micro::ClearBufferApi(&context_);
+    context_.AllocatePersistentBuffer = MicroContextAllocatePersistentBuffer;
+    context_.RequestScratchBufferInArena =
+        MicroContextRequestScratchBufferInArena;
+    context_.GetExternalContext = MicroContextGetExternalContext;
     TF_LITE_ENSURE_STATUS(registration_.prepare(&context_, &node_));
   }
+
+  TF_LITE_ENSURE(&context_, ValidateTempBufferDeallocated());
+
   return kTfLiteOk;
 }
 
 TfLiteStatus KernelRunner::Invoke() {
+  tflite::micro::ClearBufferApi(&context_);
+  context_.GetScratchBuffer = MicroContextGetScratchBuffer;
+
   if (registration_.invoke == nullptr) {
     MicroPrintf("TfLiteRegistration missing invoke function pointer!");
     return kTfLiteError;
   }
-  return registration_.invoke(&context_, &node_);
-}
-
-TfLiteTensor* KernelRunner::GetTensor(const struct TfLiteContext* context,
-                                      int tensor_index) {
-  TFLITE_DCHECK(context != nullptr);
-  KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
-  TFLITE_DCHECK(runner != nullptr);
 
-  return &runner->tensors_[tensor_index];
-}
-
-TfLiteEvalTensor* KernelRunner::GetEvalTensor(
-    const struct TfLiteContext* context, int tensor_index) {
-  TFLITE_DCHECK(context != nullptr);
-  KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
-  TFLITE_DCHECK(runner != nullptr);
-
-  TfLiteEvalTensor* eval_tensor =
-      reinterpret_cast<TfLiteEvalTensor*>(runner->allocator_->AllocateTemp(
-          sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
-  TFLITE_DCHECK(eval_tensor != nullptr);
-
-  // In unit tests, the TfLiteTensor pointer contains the source of truth for
-  // buffers and values:
-  eval_tensor->data = runner->tensors_[tensor_index].data;
-  eval_tensor->dims = runner->tensors_[tensor_index].dims;
-  eval_tensor->type = runner->tensors_[tensor_index].type;
-  return eval_tensor;
-}
+  TF_LITE_ENSURE_STATUS(registration_.invoke(&context_, &node_));
 
-void* KernelRunner::AllocatePersistentBuffer(TfLiteContext* context,
-                                             size_t bytes) {
-  TFLITE_DCHECK(context != nullptr);
-  KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
-  TFLITE_DCHECK(runner != nullptr);
+  TF_LITE_ENSURE(&context_, ValidateTempBufferDeallocated());
 
-  return runner->allocator_->AllocateFromTail(bytes, kBufferAlignment);
+  return kTfLiteOk;
 }
 
-TfLiteStatus KernelRunner::RequestScratchBufferInArena(TfLiteContext* context,
-                                                       size_t bytes,
-                                                       int* buffer_index) {
-  TFLITE_DCHECK(context != nullptr);
-  TFLITE_DCHECK(buffer_index != nullptr);
+TfLiteStatus KernelRunner::Free() {
+  tflite::micro::ClearBufferApi(&context_);
+  context_.GetScratchBuffer = MicroContextGetScratchBuffer;
 
-  KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
-  TFLITE_DCHECK(runner != nullptr);
-
-  if (runner->scratch_buffer_count_ == kNumScratchBuffers_) {
-    MicroPrintf("Exceeded the maximum number of scratch tensors allowed (%d).",
-                kNumScratchBuffers_);
+  if (registration_.free == nullptr) {
+    MicroPrintf("TfLiteRegistration missing free function pointer!");
     return kTfLiteError;
   }
 
-  // For tests, we allocate scratch buffers from the tail and keep them around
-  // for the lifetime of model. This means that the arena size in the tests will
-  // be more than what we would have if the scratch buffers could share memory.
-  runner->scratch_buffers_[runner->scratch_buffer_count_] =
-      runner->allocator_->AllocateFromTail(bytes, kBufferAlignment);
-  TFLITE_DCHECK(runner->scratch_buffers_[runner->scratch_buffer_count_] !=
-                nullptr);
-
-  *buffer_index = runner->scratch_buffer_count_++;
+  registration_.free(&context_, node_.user_data);
   return kTfLiteOk;
 }
-
-void* KernelRunner::GetScratchBuffer(TfLiteContext* context, int buffer_index) {
-  TFLITE_DCHECK(context != nullptr);
-  KernelRunner* runner = reinterpret_cast<KernelRunner*>(context->impl_);
-  TFLITE_DCHECK(runner != nullptr);
-
-  TFLITE_DCHECK(runner->scratch_buffer_count_ <= kNumScratchBuffers_);
-  if (buffer_index >= runner->scratch_buffer_count_) {
-    return nullptr;
-  }
-  return runner->scratch_buffers_[buffer_index];
-}
-
-void KernelRunner::ReportOpError(struct TfLiteContext* context,
-                                 const char* format, ...) {
-  va_list args;
-  va_start(args, format);
-  GetMicroErrorReporter()->Report(format, args);
-  va_end(args);
-}
-
 }  // namespace micro
-}  // namespace tflite
+}  // namespace tflite
\ No newline at end of file
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.h
index 7a29c86..cf3c690 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_runner.h
@@ -18,7 +18,9 @@ limitations under the License.
 
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/simple_memory_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/fake_micro_context.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.h"
 
 namespace tflite {
 namespace micro {
@@ -33,7 +35,8 @@ class KernelRunner {
  public:
   KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors,
                int tensors_size, TfLiteIntArray* inputs,
-               TfLiteIntArray* outputs, void* builtin_data);
+               TfLiteIntArray* outputs, void* builtin_data,
+               TfLiteIntArray* intermediates = nullptr);
 
   // Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any
   // exceptions will be DebugLog'd and returned as a status code.
@@ -45,34 +48,31 @@ class KernelRunner {
   // passed into the constructor of this class.
   TfLiteStatus Invoke();
 
- protected:
-  static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
-                                 int tensor_index);
-  static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
-                                         int tensor_index);
-  static void* AllocatePersistentBuffer(TfLiteContext* context, size_t bytes);
-  static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context,
-                                                  size_t bytes,
-                                                  int* buffer_index);
-  static void* GetScratchBuffer(TfLiteContext* context, int buffer_index);
-  static void ReportOpError(struct TfLiteContext* context, const char* format,
-                            ...);
+  // Calls Free on a given TfLiteRegistration pointer(if it's implemented).
+  // After successful Free, kTfLiteOk status will be returned. If Free is not
+  // implemented for a given kernel kTfLiteError will be returned.
+  TfLiteStatus Free();
 
- private:
-  static constexpr int kNumScratchBuffers_ = 12;
+  // Returns a pointer to the internal MockMicroGraph which KernelRunner uses
+  // to stub out MicroGraph methods and track invocations on each subgraph.
+  MockMicroGraph* GetMockGraph() { return &mock_micro_graph_; }
+
+  // Returns true if all temp buffer in tests are deallocated.
+  // TODO(b/209453859): move this function to private after deallocation checks
+  // are enabled for all kernel tests.
+  bool ValidateTempBufferDeallocated();
 
+ private:
   static constexpr int kKernelRunnerBufferSize_ = 10000;
   static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_];
 
-  SimpleMemoryAllocator* allocator_ = nullptr;
-  const TfLiteRegistration& registration_;
-  TfLiteTensor* tensors_ = nullptr;
-
   TfLiteContext context_ = {};
   TfLiteNode node_ = {};
+  const TfLiteRegistration& registration_;
 
-  int scratch_buffer_count_ = 0;
-  uint8_t* scratch_buffers_[kNumScratchBuffers_];
+  SingleArenaBufferAllocator* allocator_;
+  MockMicroGraph mock_micro_graph_;
+  FakeMicroContext fake_micro_context_;
 };
 
 }  // namespace micro
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h
index b44f4a5..616e7ff 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -21,48 +21,71 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h"
 
 namespace tflite {
 namespace micro {
 
+TfLiteRegistration RegisterOp(
+    void* (*init)(TfLiteContext* context, const char* buffer, size_t length),
+    TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node),
+    TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node),
+    void (*free)(TfLiteContext* context, void* buffer) = nullptr);
+
+// Prints out n bytes in a int8_t buffer as hex
+void PrintNBytes(const int8_t* tensor_data, int n_bytes,
+                 const char* prefix = nullptr);
+
+// Prints out the the n bytes in a TfLiteEvalTensor as hex
+void PrintNBytes(const TfLiteEvalTensor* tensor, int n_bytes,
+                 const char* prefix = nullptr);
+
+// Prints out the the n bytes in a TfLiteTensor as hex
+void PrintNBytes(const TfLiteTensor* tensor, int n_bytes,
+                 const char* prefix = nullptr);
+
 // Returns a mutable tensor for a given input index. is_variable must be checked
 // during prepare when the full TfLiteTensor is available.
-inline TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context,
-                                             const TfLiteNode* node,
-                                             int index) {
-  TFLITE_DCHECK(context != nullptr);
-  TFLITE_DCHECK(node != nullptr);
-  return context->GetEvalTensor(context, node->inputs->data[index]);
-}
+TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context,
+                                      const TfLiteNode* node, int index);
 
 // Returns the TfLiteEvalTensor struct for a given input index in a node.
-inline const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context,
-                                            const TfLiteNode* node, int index) {
-  return GetMutableEvalInput(context, node, index);
-}
+const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context,
+                                     const TfLiteNode* node, int index);
 
 // Returns the TfLiteEvalTensor struct for a given output index in a node.
-inline TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context,
-                                       const TfLiteNode* node, int index) {
-  TFLITE_DCHECK(context != nullptr);
-  TFLITE_DCHECK(node != nullptr);
-  return context->GetEvalTensor(context, node->outputs->data[index]);
-}
+TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context,
+                                const TfLiteNode* node, int index);
 
-// Returns data for a TfLiteEvalTensor struct.
+// Returns data for a TfLiteEvalTensor struct that are expected to exist.
 template <typename T>
 T* GetTensorData(TfLiteEvalTensor* tensor) {
-  return tensor != nullptr ? reinterpret_cast<T*>(tensor->data.raw) : nullptr;
+  TFLITE_DCHECK(tensor != nullptr);
+  return reinterpret_cast<T*>(tensor->data.raw);
 }
 
-// Returns const data for a TfLiteEvalTensor struct.
+// Returns const data for a TfLiteEvalTensor struct that are expected to exist.
 template <typename T>
 const T* GetTensorData(const TfLiteEvalTensor* tensor) {
   TFLITE_DCHECK(tensor != nullptr);
   return reinterpret_cast<const T*>(tensor->data.raw);
 }
 
+// Returns data for a TfLiteEvalTensor struct that could be null.
+template <typename T>
+T* GetOptionalTensorData(TfLiteEvalTensor* tensor) {
+  return tensor == nullptr ? nullptr : reinterpret_cast<T*>(tensor->data.raw);
+}
+
+// Returns const data for a TfLiteEvalTensor struct that could be null.
+template <typename T>
+const T* GetOptionalTensorData(const TfLiteEvalTensor* tensor) {
+  return tensor == nullptr ? nullptr
+                           : reinterpret_cast<const T*>(tensor->data.raw);
+}
+
 // Returns the shape of a TfLiteEvalTensor struct.
 const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor);
 
@@ -72,10 +95,50 @@ bool HaveSameShapes(const TfLiteEvalTensor* input1,
 
 PaddingType RuntimePaddingType(TfLitePadding padding);
 
+// Relocate tensor dims from FlatBuffer to the persistent storage arena.
+// The old dims data is copied to the new storage area.
+// The tensor and eval_tensor must be the same tensor.
+// Only use during Prepare phase.
 TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
                                               TfLiteTensor* tensor,
                                               TfLiteEvalTensor* eval_tensor);
 
+// Copy all op input tensors to op output tensors. Requires all op input tensor
+// shapes and types to be identical to op output tensor shapes and types.
+TfLiteStatus CopyOpInputsToOpOutputs(TfLiteContext* context, TfLiteNode* node);
+
+// Copy all op input tensors to subgraph input tensors. Requires all op input
+// tensor shapes and types to be identical to subgraph input tensor shapes and
+// types.
+TfLiteStatus CopyOpInputsToSubgraphInputs(TfLiteContext* context,
+                                          TfLiteNode* node,
+                                          MicroGraph* graph_info,
+                                          int subgraph_idx,
+                                          int first_tensor_idx);
+
+// Copy all op output tensors to subgraph input tensors. Requires all op output
+// tensor shapes and types to be identical to subgraph input tensor shapes and
+// types.
+TfLiteStatus CopyOpOutputsToSubgraphInputs(TfLiteContext* context,
+                                           TfLiteNode* node,
+                                           MicroGraph* graph_info,
+                                           int subgraph_idx);
+
+// Copy all subgraph output tensors to op outputs. Requires all subgraph output
+// tensor shapes and types to be identical to op output tensor shapes and types.
+TfLiteStatus CopySubgraphOutputsToOpOutputs(TfLiteContext* context,
+                                            TfLiteNode* node,
+                                            MicroGraph* graph_info,
+                                            int subgraph_idx);
+
+// If tensor is INT4, make a new TfLiteEvalTensor with data unpacked into
+// a scratch buffer. The returned tensor will have the kTfLiteInt8 type.
+// Assume scratch buffer is previously requested in Prepare, and
+// scratch_buffer_index can be used to retrieve that buffer.
+// If the tensor is not INT4, a shallow copy is returned.
+TfLiteEvalTensor MakeUnpackedInt4Tensor(TfLiteContext* context,
+                                        int scratch_buffer_index,
+                                        const TfLiteEvalTensor* tensor);
 }  // namespace micro
 }  // namespace tflite
 
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util_micro.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util_micro.cc
index 35fe072..73ab130 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util_micro.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util_micro.cc
@@ -16,10 +16,74 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
 
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace micro {
 
+namespace {
+
+int ValidateTensorIndexing(const TfLiteContext* context, int index,
+                           int max_size, const int* tensor_indices) {
+  if (index >= 0 && index < max_size) {
+    const int tensor_index = tensor_indices[index];
+    if (tensor_index != kTfLiteOptionalTensor) {
+      return tensor_index;
+    }
+  }
+  return -1;
+}
+
+}  // namespace
+
+TfLiteRegistration RegisterOp(
+    void* (*init)(TfLiteContext* context, const char* buffer, size_t length),
+    TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node),
+    TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node),
+    void (*free)(TfLiteContext* context, void* buffer)) {
+  return {/*init=*/init,
+          /*free=*/free,
+          /*prepare=*/prepare,
+          /*invoke=*/invoke,
+          /*profiling_string=*/nullptr,
+          /*builtin_code=*/0,
+          /*custom_name=*/nullptr,
+          /*version=*/0,
+          /*registration_external=*/nullptr};
+}
+
+// Returns a mutable tensor for a given input index. is_variable must be checked
+// during prepare when the full TfLiteTensor is available.
+TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context,
+                                      const TfLiteNode* node, int index) {
+  TFLITE_DCHECK(context != nullptr);
+  TFLITE_DCHECK(node != nullptr);
+  const int tensor_index = ValidateTensorIndexing(
+      context, index, node->inputs->size, node->inputs->data);
+
+  if (tensor_index < 0) {
+    return nullptr;
+  }
+
+  return context->GetEvalTensor(context, node->inputs->data[index]);
+}
+
+// Returns the TfLiteEvalTensor struct for a given input index in a node.
+const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context,
+                                     const TfLiteNode* node, int index) {
+  return GetMutableEvalInput(context, node, index);
+}
+
+// Returns the TfLiteEvalTensor struct for a given output index in a node.
+TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context,
+                                const TfLiteNode* node, int index) {
+  TFLITE_DCHECK(context != nullptr);
+  TFLITE_DCHECK(node != nullptr);
+  return context->GetEvalTensor(context, node->outputs->data[index]);
+}
+
 bool HaveSameShapes(const TfLiteEvalTensor* input1,
                     const TfLiteEvalTensor* input2) {
   TFLITE_DCHECK(input1 != nullptr);
@@ -60,8 +124,7 @@ TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
   TF_LITE_ENSURE(context, eval_tensor != nullptr);
   TF_LITE_ENSURE(context, context->AllocatePersistentBuffer != nullptr);
   int ranks = tensor->dims->size;
-  // always allocate max ranks to allow for reshaping
-  size_t alloc_size = TfLiteIntArrayGetSizeInBytes(RuntimeShape::kMaxSmallSize);
+  size_t alloc_size = TfLiteIntArrayGetSizeInBytes(ranks);
   TfLiteIntArray* new_dims = static_cast<TfLiteIntArray*>(
       context->AllocatePersistentBuffer(context, alloc_size));
   TfLiteIntArray* old_dims = tensor->dims;
@@ -71,8 +134,147 @@ TfLiteStatus CreateWritableTensorDimsWithCopy(TfLiteContext* context,
   for (int i = 0; i < ranks; i++) {
     new_dims->data[i] = old_dims->data[i];
   }
+
+  return kTfLiteOk;
+}
+
+// Verify that both tensors have the same type and size, then return the size
+// of both tensors in bytes if they are the same, or -1 if they are different.
+size_t ValidateAndGetTensorSizes(const TfLiteEvalTensor* tensor1,
+                                 const TfLiteEvalTensor* tensor2) {
+  TFLITE_DCHECK(tensor1->type == tensor2->type);
+  size_t tensor1_size = 0;
+  size_t tensor2_size = 0;
+  TfLiteEvalTensorByteLength(tensor1, &tensor1_size);
+  TfLiteEvalTensorByteLength(tensor2, &tensor2_size);
+  return (tensor1_size == tensor2_size) ? tensor1_size : -1;
+}
+
+TfLiteStatus CopyOpInputsToOpOutputs(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE(context, node->inputs->size == node->outputs->size);
+  for (int i = 0; i < node->inputs->size; i++) {
+    const TfLiteEvalTensor* input =
+        tflite::micro::GetEvalInput(context, node, i);
+    TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, i);
+    int bytes = ValidateAndGetTensorSizes(input, output);
+    TF_LITE_ENSURE(context, bytes >= 0);
+    memcpy(output->data.raw, input->data.raw, bytes);
+  }
   return kTfLiteOk;
 }
 
+//  Args:
+//    1. int8_t tensor_data - int8_t buffer of unknown size who's data you'd
+//    like
+//  to print
+//    2. int n_btyes -  a small int representing number of bytes you want to
+//    print
+//  to debug output. It should always be <= tensor_data's size.
+//    3. prefix - optional message you'd like to print before printing bytes
+//
+//  Purpose:
+//    Function takes in paramaters above and prints n_bytes bytes from the
+//  tensor_data buffer. This can be use to debug  the output of a model and it's
+//  op.
+
+void PrintNBytes(const int8_t* tensor_data, int n_bytes, const char* prefix) {
+  if (prefix != nullptr) {
+    MicroPrintf("%s", prefix);
+  }
+
+  for (int i = 0; i < n_bytes; ++i) {
+    MicroPrintf(" %x", tensor_data[i]);
+  }
+  MicroPrintf("\n");
+}
+
+// same as the PrintNBytes above but the buffer needs to be extracted out of the
+// TfLiteEvalTensor*
+void PrintNBytes(const TfLiteEvalTensor* tensor, int n_bytes,
+                 const char* prefix) {
+  const int8_t* tensor_data = tflite::micro::GetTensorData<int8_t>(tensor);
+  PrintNBytes(tensor_data, n_bytes, prefix);
+}
+
+// same as the PrintNBytes above but the buffer needs to be extracted out of the
+// TfLiteEvalTensor*
+void PrintNBytes(const TfLiteTensor* tensor, int n_bytes, const char* prefix) {
+  const int8_t* tensor_data = tflite::GetTensorData<int8_t>(tensor);
+  PrintNBytes(tensor_data, n_bytes, prefix);
+}
+
+TfLiteStatus CopyOpInputsToSubgraphInputs(TfLiteContext* context,
+                                          TfLiteNode* node,
+                                          MicroGraph* graph_info,
+                                          int subgraph_idx,
+                                          int first_tensor_idx) {
+  TF_LITE_ENSURE(context,
+                 static_cast<size_t>(node->inputs->size - first_tensor_idx) ==
+                     graph_info->NumSubgraphInputs(subgraph_idx));
+  for (int i = 0; i < node->inputs->size - first_tensor_idx; i++) {
+    const TfLiteEvalTensor* input =
+        tflite::micro::GetEvalInput(context, node, i + first_tensor_idx);
+    TfLiteEvalTensor* subgraph_input =
+        graph_info->GetSubgraphInput(subgraph_idx, i);
+    int bytes = ValidateAndGetTensorSizes(input, subgraph_input);
+    TF_LITE_ENSURE(context, bytes >= 0);
+    memcpy(subgraph_input->data.raw, input->data.raw, bytes);
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus CopyOpOutputsToSubgraphInputs(TfLiteContext* context,
+                                           TfLiteNode* node,
+                                           MicroGraph* graph_info,
+                                           int subgraph_idx) {
+  TF_LITE_ENSURE(context, static_cast<size_t>(node->outputs->size) ==
+                              graph_info->NumSubgraphInputs(subgraph_idx));
+  for (int i = 0; i < node->outputs->size; i++) {
+    TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, i);
+    TfLiteEvalTensor* subgraph_input =
+        graph_info->GetSubgraphInput(subgraph_idx, i);
+    int bytes = ValidateAndGetTensorSizes(output, subgraph_input);
+    TF_LITE_ENSURE(context, bytes >= 0);
+    memcpy(subgraph_input->data.raw, output->data.raw, bytes);
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus CopySubgraphOutputsToOpOutputs(TfLiteContext* context,
+                                            TfLiteNode* node,
+                                            MicroGraph* graph_info,
+                                            int subgraph_idx) {
+  TF_LITE_ENSURE(context, static_cast<size_t>(node->outputs->size) ==
+                              graph_info->NumSubgraphOutputs(subgraph_idx));
+  for (int i = 0; i < node->outputs->size; i++) {
+    TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, i);
+    TfLiteEvalTensor* subgraph_output =
+        graph_info->GetSubgraphOutput(subgraph_idx, i);
+    int bytes = ValidateAndGetTensorSizes(output, subgraph_output);
+    TF_LITE_ENSURE(context, bytes >= 0);
+    memcpy(output->data.raw, subgraph_output->data.raw, bytes);
+  }
+  return kTfLiteOk;
+}
+
+TfLiteEvalTensor MakeUnpackedInt4Tensor(TfLiteContext* context,
+                                        int scratch_buffer_index,
+                                        const TfLiteEvalTensor* tensor) {
+  if (tensor->type != kTfLiteInt4) {
+    return *tensor;
+  }
+
+  TfLiteEvalTensor new_tensor;
+  new_tensor.data.data = static_cast<int8_t*>(
+      context->GetScratchBuffer(context, scratch_buffer_index));
+  new_tensor.dims = tensor->dims;
+  new_tensor.type = kTfLiteInt8;
+  tflite::tensor_utils::UnpackDenseInt4IntoInt8(
+      tflite::micro::GetTensorData<int8_t>(tensor),
+      tflite::micro::GetTensorShape(tensor).FlatSize(),
+      tflite::micro::GetTensorData<int8_t>(&new_tensor));
+  return new_tensor;
+}
+
 }  // namespace micro
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/l2_pool_2d.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/l2_pool_2d.cc
index a693eba..8cd1e7e 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/l2_pool_2d.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/l2_pool_2d.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -36,15 +37,18 @@ constexpr int kTensorShapeRank = 4;
 enum { kBatchRank = 0, kHeightRank, kWidthRank, kChannelRank };
 
 TfLiteStatus L2Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   auto* params = static_cast<TfLitePoolParams*>(node->builtin_data);
 
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
-  const TfLiteTensor* input;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   TF_LITE_ENSURE_EQ(context, NumDimensions(input), kTensorShapeRank);
   TF_LITE_ENSURE_EQ(context, NumDimensions(output), kTensorShapeRank);
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
@@ -70,12 +74,21 @@ TfLiteStatus L2Prepare(TfLiteContext* context, TfLiteNode* node) {
   // The dims storage is expected to be the same area in memory
   // for both TfLiteTensor and TfLiteEvalTensor.  This is important
   // because TfLiteTensor in the MicroInterpreter is a temporary
-  // allocation.
+  // allocation.  For the KernelRunner interpreter, TfLiteEvalTensor
+  // is a temporary allocation.  We must therefore relocate the dims
+  // from the FlatBuffer to the persistant storage arena.
+  TfLiteEvalTensor* output_eval =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE_OK(context, tflite::micro::CreateWritableTensorDimsWithCopy(
+                                 context, output, output_eval));
   output->dims->data[kBatchRank] = batches;
   output->dims->data[kHeightRank] = out_height;
   output->dims->data[kWidthRank] = out_width;
   output->dims->data[kChannelRank] = channels_out;
 
+  micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(input);
+
   return kTfLiteOk;
 }
 
@@ -113,9 +126,8 @@ TfLiteStatus L2Eval(TfLiteContext* context, TfLiteNode* node) {
       L2EvalFloat(*params, *input, &op_params, output);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context,
-                         "L2_POOL_2D only supports float32 currently, got %s.",
-                         TfLiteTypeGetName(input->type));
+      MicroPrintf("L2_POOL_2D only supports float32 currently, got %s.",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -124,14 +136,7 @@ TfLiteStatus L2Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_L2_POOL_2D() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/L2Prepare,
-          /*invoke=*/L2Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, L2Prepare, L2Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/l2norm.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/l2norm.cc
index 764929c..ede02db 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/l2norm.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/l2norm.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/l2normalization.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace ops {
@@ -49,28 +50,31 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
-
   TF_LITE_ENSURE(context, NumDimensions(input) <= 4);
 
-  TF_LITE_ENSURE(context, output->type == kTfLiteFloat32 ||
-                              output->type == kTfLiteUInt8 ||
-                              output->type == kTfLiteInt8);
+  TF_LITE_ENSURE(context,
+                 output->type == kTfLiteFloat32 || output->type == kTfLiteInt8);
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
 
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+  if (output->type == kTfLiteInt8) {
     data->input_zero_point = input->params.zero_point;
   } else if (output->type == kTfLiteFloat32) {
     data->input_zero_point = 0;
   }
 
-  // TODO(ahentz): For some reason our implementations don't support
-  // activations.
+  // Our implementations don't currently support activations.
   TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
 
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -110,12 +114,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                                    tflite::micro::GetTensorShape(output),
                                    tflite::micro::GetTensorData<float>(output),
                                    epsilon);
-  } else if (output->type == kTfLiteUInt8) {
-    reference_ops::L2Normalization(
-        data, tflite::micro::GetTensorShape(input),
-        tflite::micro::GetTensorData<uint8_t>(input),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<uint8_t>(output));
   } else if (output->type == kTfLiteInt8) {
     const auto input_shape = tflite::micro::GetTensorShape(input);
     const auto output_shape = tflite::micro::GetTensorShape(output);
@@ -129,8 +127,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         tflite::micro::GetTensorData<int8_t>(input),
         tflite::micro::GetTensorData<int8_t>(output));
   } else {
-    TF_LITE_KERNEL_LOG(context, "Output type is %s, requires float.",
-                       TfLiteTypeGetName(output->type));
+    MicroPrintf("Output type is %s, requires float.",
+                TfLiteTypeGetName(output->type));
     return kTfLiteError;
   }
 
@@ -140,14 +138,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace l2norm
 
 TfLiteRegistration Register_L2NORM_REF() {
-  return {/*init=*/l2norm::Init,
-          /*free=*/nullptr,
-          /*prepare=*/l2norm::Prepare,
-          /*invoke=*/l2norm::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(l2norm::Init, l2norm::Prepare, l2norm::Eval);
 }
 
 TfLiteRegistration Register_L2_NORMALIZATION() { return Register_L2NORM_REF(); }
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.cc
index 1222f30..042528d 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.cc
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -21,23 +21,10 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace {
-
-// Input/output tensor index.
-constexpr int kInputTensor = 0;
-constexpr int kOutputTensor = 0;
-
-struct LeakyReluOpData {
-  // quantization parameters
-  int32_t output_multiplier_alpha;
-  int32_t output_shift_alpha;
-  int32_t output_multiplier_identity;
-  int32_t output_shift_identity;
-  int32_t input_zero_point;
-  int32_t output_zero_point;
-};
 
 template <typename T>
 void QuantizeLeakyRelu(const LeakyReluOpData& data,
@@ -58,51 +45,11 @@ void QuantizeLeakyRelu(const LeakyReluOpData& data,
                                    tflite::micro::GetTensorData<T>(output));
 }
 
-TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
-  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
-  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-  const TfLiteTensor* input;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
-  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
-
-  if (output->type == kTfLiteInt8) {
-    LeakyReluOpData* data = static_cast<LeakyReluOpData*>(node->user_data);
-    const auto* params =
-        static_cast<TfLiteLeakyReluParams*>(node->builtin_data);
-
-    data->input_zero_point = input->params.zero_point;
-    data->output_zero_point = output->params.zero_point;
-
-    int output_shift_alpha;
-    double alpha_multiplier = static_cast<double>(
-        input->params.scale * params->alpha / output->params.scale);
-    QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha,
-                       &output_shift_alpha);
-    data->output_shift_alpha = static_cast<int32_t>(output_shift_alpha);
-
-    int output_shift_identity;
-    double identity_multiplier =
-        static_cast<double>(input->params.scale / output->params.scale);
-    QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity,
-                       &output_shift_identity);
-    data->output_shift_identity = static_cast<int32_t>(output_shift_identity);
-  }
-
-  return kTfLiteOk;
-}
-
 void* LeakyReluInit(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(LeakyReluOpData));
 }
 
-TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) {
-  return CalculateOpData(context, node);
-}
-
 TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteEvalTensor* input =
       tflite::micro::GetEvalInput(context, node, kInputTensor);
@@ -127,27 +74,22 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
       QuantizeLeakyRelu<int8_t>(data, input, output);
       return kTfLiteOk;
     } break;
+    case kTfLiteInt16: {
+      QuantizeLeakyRelu<int16_t>(data, input, output);
+      return kTfLiteOk;
+    } break;
     default:
-      TF_LITE_KERNEL_LOG(
-          context, "Only float32, int8 are supported by LEAKY_RELU, got %s.",
-          TfLiteTypeGetName(input->type));
+      MicroPrintf("Only float32, int8 are supported by LEAKY_RELU, got %s.",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
   }
 
   return kTfLiteError;
 }
 
-}  // namespace
-
 TfLiteRegistration Register_LEAKY_RELU() {
-  return {/*init=*/LeakyReluInit,
-          /*free=*/nullptr,
-          /*prepare=*/LeakyReluPrepare,
-          /*invoke=*/LeakyReluEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(LeakyReluInit, LeakyReluPrepare,
+                                   LeakyReluEval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.h
new file mode 100644
index 0000000..fe43060
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.h
@@ -0,0 +1,43 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+
+namespace tflite {
+
+// Input/output tensor index.
+extern const int kInputTensor;
+extern const int kOutputTensor;
+
+struct LeakyReluOpData {
+  // quantization parameters
+  int32_t output_multiplier_alpha;
+  int32_t output_shift_alpha;
+  int32_t output_multiplier_identity;
+  int32_t output_shift_identity;
+  int32_t input_zero_point;
+  int32_t output_zero_point;
+};
+
+TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context, TfLiteNode* node);
+
+TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_LEAKY_RELU_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu_common.cc
new file mode 100644
index 0000000..b71b743
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu_common.cc
@@ -0,0 +1,78 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/leaky_relu.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/leaky_relu.h"
+
+namespace tflite {
+
+// Input/output tensor index.
+const int kInputTensor = 0;
+const int kOutputTensor = 0;
+
+TfLiteStatus CalculateOpDataLeakyRelu(TfLiteContext* context,
+                                      TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+
+  if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
+    LeakyReluOpData* data = static_cast<LeakyReluOpData*>(node->user_data);
+    const auto* params =
+        static_cast<TfLiteLeakyReluParams*>(node->builtin_data);
+
+    data->input_zero_point = input->params.zero_point;
+    data->output_zero_point = output->params.zero_point;
+
+    int output_shift_alpha;
+    double alpha_multiplier = static_cast<double>(
+        input->params.scale * params->alpha / output->params.scale);
+    QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha,
+                       &output_shift_alpha);
+    data->output_shift_alpha = static_cast<int32_t>(output_shift_alpha);
+
+    int output_shift_identity;
+    double identity_multiplier =
+        static_cast<double>(input->params.scale / output->params.scale);
+    QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity,
+                       &output_shift_identity);
+    data->output_shift_identity = static_cast<int32_t>(output_shift_identity);
+  }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) {
+  return CalculateOpDataLeakyRelu(context, node);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/log_softmax.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/log_softmax.cc
index bdf5ea3..4cfccb2 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/log_softmax.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/log_softmax.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -43,11 +44,15 @@ constexpr int kInputTensor = 0;
 constexpr int kOutputTensor = 0;
 
 TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
 
@@ -88,6 +93,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
     data->depth = static_cast<size_t>(input_shape.Dims(trailing_dim));
   }
 
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -126,8 +133,8 @@ TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
       return kTfLiteOk;
     }
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("LOG_SOFTMAX only supports float32, int8, got %s.",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
   }
 }
@@ -135,14 +142,7 @@ TfLiteStatus LogSoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_LOG_SOFTMAX() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/LogSoftmaxPrepare,
-          /*invoke=*/LogSoftmaxEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, LogSoftmaxPrepare, LogSoftmaxEval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.cc
index 2f3a062..2b38501 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.h"
+
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
@@ -19,87 +21,24 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace logical {
 namespace {
 
-// Input/output tensor index.
-constexpr int kInputTensor1 = 0;
-constexpr int kInputTensor2 = 1;
-constexpr int kOutputTensor = 0;
-
-TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
-                         bool (*func)(bool, bool)) {
-  const TfLiteEvalTensor* input1 =
-      tflite::micro::GetEvalInput(context, node, kInputTensor1);
-  const TfLiteEvalTensor* input2 =
-      tflite::micro::GetEvalInput(context, node, kInputTensor2);
-  TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
-
-  if (tflite::micro::HaveSameShapes(input1, input2)) {
-    reference_ops::BinaryFunction<bool, bool, bool>(
-        tflite::micro::GetTensorShape(input1),
-        tflite::micro::GetTensorData<bool>(input1),
-        tflite::micro::GetTensorShape(input2),
-        tflite::micro::GetTensorData<bool>(input2),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<bool>(output), func);
-  } else {
-    reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
-        tflite::micro::GetTensorShape(input1),
-        tflite::micro::GetTensorData<bool>(input1),
-        tflite::micro::GetTensorShape(input2),
-        tflite::micro::GetTensorData<bool>(input2),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<bool>(output), func);
-  }
-
-  return kTfLiteOk;
-}
-
-bool LogicalOr(bool x, bool y) { return x || y; }
-
 TfLiteStatus LogicalOrEval(TfLiteContext* context, TfLiteNode* node) {
   return LogicalImpl(context, node, LogicalOr);
 }
 
-bool LogicalAnd(bool x, bool y) { return x && y; }
-
 TfLiteStatus LogicalAndEval(TfLiteContext* context, TfLiteNode* node) {
   return LogicalImpl(context, node, LogicalAnd);
 }
 
 }  // namespace
-}  // namespace logical
 
 TfLiteRegistration Register_LOGICAL_OR() {
-  // Init, Free, Prepare, Eval are satisfying the Interface required by
-  // TfLiteRegistration.
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/nullptr,
-          /*invoke=*/logical::LogicalOrEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, nullptr, LogicalOrEval);
 }
 
 TfLiteRegistration Register_LOGICAL_AND() {
-  // Init, Free, Prepare, Eval are satisfying the Interface required by
-  // TfLiteRegistration.
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/nullptr,
-          /*invoke=*/logical::LogicalAndEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, nullptr, LogicalAndEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.h
new file mode 100644
index 0000000..8dadde4
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.h
@@ -0,0 +1,35 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+
+namespace tflite {
+// Input/output tensor index.
+extern const int kLogicalInputTensor1;
+extern const int kLogicalInputTensor2;
+extern const int kLogicalOutputTensor;
+
+TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
+                         bool (*func)(bool, bool));
+
+bool LogicalOr(bool x, bool y);
+bool LogicalAnd(bool x, bool y);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_LOGICAL_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical_common.cc
new file mode 100644
index 0000000..1586d2f
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/logical_common.cc
@@ -0,0 +1,63 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/logical.h"
+
+namespace tflite {
+
+// Input/output tensor index.
+const int kLogicalInputTensor1 = 0;
+const int kLogicalInputTensor2 = 1;
+const int kLogicalOutputTensor = 0;
+
+TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node,
+                         bool (*func)(bool, bool)) {
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kLogicalInputTensor1);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kLogicalInputTensor2);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kLogicalOutputTensor);
+
+  if (tflite::micro::HaveSameShapes(input1, input2)) {
+    reference_ops::BinaryFunction<bool, bool, bool>(
+        tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<bool>(input1),
+        tflite::micro::GetTensorShape(input2),
+        tflite::micro::GetTensorData<bool>(input2),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<bool>(output), func);
+  } else {
+    reference_ops::BroadcastBinaryFunction4DSlow<bool, bool, bool>(
+        tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<bool>(input1),
+        tflite::micro::GetTensorShape(input2),
+        tflite::micro::GetTensorData<bool>(input2),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<bool>(output), func);
+  }
+
+  return kTfLiteOk;
+}
+
+bool LogicalOr(bool x, bool y) { return x || y; }
+
+bool LogicalAnd(bool x, bool y) { return x && y; }
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.cc
index 8959178..82579ea 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -24,71 +24,25 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace activations {
 namespace {
-constexpr int kInputTensor = 0;
-constexpr int kOutputTensor = 0;
-
-struct OpData {
-  int32_t input_zero_point;
-  int32_t input_range_radius;
-  int32_t input_multiplier;
-  int input_left_shift;
-};
-
-TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
-                                       OpData* data) {
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
-  if (input->type == kTfLiteInt8) {
-    TF_LITE_ENSURE_EQ(context, output->params.zero_point,
-                      std::numeric_limits<int8_t>::min());
-
-    static constexpr int kInputIntegerBits = 4;
-    const double input_real_multiplier =
-        static_cast<double>(input->params.scale) *
-        static_cast<double>(1 << (31 - kInputIntegerBits));
-
-    data->input_zero_point = input->params.zero_point;
-
-    const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
-    data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
-
-    data->input_range_radius =
-        CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
-  }
-  return kTfLiteOk;
-}
-}  // namespace
 
 void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
-TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
-
-  return CalculateArithmeticOpData(context, node, data);
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataLogistic));
 }
 
 TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      tflite::micro::GetEvalInput(context, node, kLogisticInputTensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kLogisticOutputTensor);
 
   TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
+  OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);
 
   if (input->type == kTfLiteFloat32) {
     switch (output->type) {
@@ -100,9 +54,25 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
         return kTfLiteOk;
       }
       default:
-        TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                           TfLiteTypeGetName(input->type),
-                           TfLiteTypeGetName(output->type));
+        MicroPrintf("Input %s, output %s not supported.",
+                    TfLiteTypeGetName(input->type),
+                    TfLiteTypeGetName(output->type));
+        return kTfLiteError;
+    }
+  } else if (input->type == kTfLiteInt16) {
+    switch (output->type) {
+      case kTfLiteInt16: {
+        reference_integer_ops::Logistic(
+            data->input_multiplier, data->input_left_shift,
+            NumElements(input->dims),
+            tflite::micro::GetTensorData<int16_t>(input),
+            tflite::micro::GetTensorData<int16_t>(output));
+        return kTfLiteOk;
+      }
+      default:
+        MicroPrintf("Input %s, output %s not supported.",
+                    TfLiteTypeGetName(input->type),
+                    TfLiteTypeGetName(output->type));
         return kTfLiteError;
     }
   } else if (input->type == kTfLiteInt8) {
@@ -117,34 +87,25 @@ TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
         return kTfLiteOk;
       }
       default:
-        TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                           TfLiteTypeGetName(input->type),
-                           TfLiteTypeGetName(output->type));
+        MicroPrintf("Input %s, output %s not supported.",
+                    TfLiteTypeGetName(input->type),
+                    TfLiteTypeGetName(output->type));
         return kTfLiteError;
     }
   } else {
     // TODO(b/141211002): Also support other data types once we have supported
     // temporary tensors in TFLM.
-    TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                       TfLiteTypeGetName(input->type),
-                       TfLiteTypeGetName(output->type));
+    MicroPrintf("Input %s, output %s not supported.",
+                TfLiteTypeGetName(input->type),
+                TfLiteTypeGetName(output->type));
     return kTfLiteError;
   }
   return kTfLiteOk;
 }
 
-}  // namespace activations
+}  // namespace
 
 TfLiteRegistration Register_LOGISTIC() {
-  return {/*init=*/activations::LogisticInit,
-          /*free=*/nullptr,
-          /*prepare=*/activations::LogisticPrepare,
-          /*invoke=*/activations::LogisticEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(LogisticInit, LogisticPrepare, LogisticEval);
 }
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.h
new file mode 100644
index 0000000..43325e1
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.h
@@ -0,0 +1,42 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
+
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+
+namespace tflite {
+extern const int kLogisticInputTensor;
+extern const int kLogisticOutputTensor;
+
+struct OpDataLogistic {
+  int32_t input_zero_point;
+  int32_t input_range_radius;
+  int32_t input_multiplier;
+  int input_left_shift;
+};
+
+TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
+                                               TfLiteNode* node,
+                                               OpDataLogistic* data);
+
+TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node);
+
+}  // namespace tflite
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_LOGISTIC_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic_common.cc
new file mode 100644
index 0000000..9f27a91
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic_common.cc
@@ -0,0 +1,119 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/logistic.h"
+
+namespace tflite {
+const int kLogisticInputTensor = 0;
+const int kLogisticOutputTensor = 0;
+
+TfLiteStatus CalculateArithmeticOpDataLogistic(TfLiteContext* context,
+                                               TfLiteNode* node,
+                                               OpDataLogistic* data) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kLogisticInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kLogisticOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+  if (input->type == kTfLiteInt8) {
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point,
+                      std::numeric_limits<int8_t>::min());
+
+    static constexpr int kInputIntegerBits = 4;
+    const double input_real_multiplier =
+        static_cast<double>(input->params.scale) *
+        static_cast<double>(1 << (31 - kInputIntegerBits));
+
+    data->input_zero_point = input->params.zero_point;
+
+    const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
+    data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
+
+    data->input_range_radius =
+        CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
+  }
+
+  if (input->type == kTfLiteInt16) {
+    static constexpr int kInputIntegerBits = 3;
+    static constexpr int kOutputFractionalBits = 15;
+
+    // See comments in TanhPrepare about requiring zero_point==0
+    // and a power-of-two ("POT") scale.
+
+    TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+
+    int input_scale_log2_rounded;
+    bool param_scale_pot =
+        CheckedLog2(input->params.scale, &input_scale_log2_rounded);
+
+    data->input_left_shift =
+        (15 - kInputIntegerBits) + input_scale_log2_rounded;
+    param_scale_pot &= (data->input_left_shift == 0);
+
+    if (param_scale_pot) {
+      data->input_multiplier = 0;
+    } else {
+      // Calculate multiplier to change input scale to 1/(3*4096)
+      // as required by the table lookup.
+      // In this scaling +/-2^17 represents +/-10.7
+      double multiplier =
+          static_cast<double>(input->params.scale) * 4096.0 * 3.0;
+
+      data->input_left_shift = 0;
+
+      while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) {
+        data->input_left_shift++;
+        multiplier = multiplier * 2.0;
+      }
+
+      data->input_multiplier = static_cast<int32_t>(multiplier);
+    }
+
+    int output_scale_log2_rounded;
+    TF_LITE_ENSURE(
+        context, CheckedLog2(output->params.scale, &output_scale_log2_rounded));
+    TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded,
+                      -kOutputFractionalBits);
+  }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  return kTfLiteOk;
+}
+
+TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);
+
+  return CalculateArithmeticOpDataLogistic(context, node, data);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.cc
new file mode 100644
index 0000000..037caf7
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.cc
@@ -0,0 +1,222 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.h"
+
+#include <limits>
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/tanh.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+namespace lstm_internal {
+
+const int32_t kInt16Max = std::numeric_limits<int16_t>::max();
+const int32_t kInt16Min = std::numeric_limits<int16_t>::min();
+
+void AddElementWise(const int16_t* input_1, const int16_t* input_2, int n_batch,
+                    int n_input, int16_t* output) {
+  for (int batch = 0; batch < n_batch; ++batch) {
+    for (int i = 0; i < n_input; ++i) {
+      const int index = batch * n_input + i;
+      int32_t sum = input_1[index] + input_2[index];
+      const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum));
+      output[index] = static_cast<int16_t>(sum_clamped);
+    }
+  }
+}
+
+void AddElementWise(const float* input_1, const float* input_2, int n_batch,
+                    int n_input, float* output) {
+  for (int batch = 0; batch < n_batch; ++batch) {
+    for (int i = 0; i < n_input; ++i) {
+      const int index = batch * n_input + i;
+      output[index] = input_1[index] + input_2[index];
+    }
+  }
+}
+
+void Sigmoid(const RuntimeShape& data_shape, int16_t* data) {
+  reference_integer_ops::Logistic(
+      0 /*data->input_multiplier*/, 0 /*data->input_left_shift */,
+      data_shape.FlatSize() /*NumElements(input->dims)*/,
+      data /* tflite::micro::GetTensorData<int16_t>(input) */,
+      data /*tflite::micro::GetTensorData<int16_t>(output) */);
+}
+
+void Sigmoid(const RuntimeShape& data_shape, float* data) {
+  reference_ops::Logistic(data_shape, data, data_shape, data);
+}
+
+void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape,
+          int16_t* input_data, const RuntimeShape& output_data_shape,
+          int16_t* output_data) {
+  int32_t tanh_input_left_shift = (15 + cell_state_scale_power) - 3;
+  if (tanh_input_left_shift < 0) /* handling negative shift value */
+  {
+    int32_t i;
+    tanh_input_left_shift = -tanh_input_left_shift;
+    for (i = 0; i < input_data_shape.FlatSize(); i++) {
+      input_data[i] = input_data[i] >> tanh_input_left_shift;
+    }
+    tanh_input_left_shift = 0;
+  }
+  reference_integer_ops::Tanh(0, tanh_input_left_shift, input_data_shape,
+                              input_data, output_data_shape, output_data);
+}
+
+void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape,
+          float* input_data, const RuntimeShape& output_data_shape,
+          float* output_data) {
+  reference_ops::Tanh(input_data_shape, input_data, output_data_shape,
+                      output_data);
+}
+
+// Input and output have the same shape in LSTM
+void Mul(const RuntimeShape& shape, const ArithmeticParams& params,
+         const int16_t* input1_data, const int16_t* input2_data,
+         int8_t* output_data) {
+  return reference_integer_ops::MulElementwise(
+      shape.FlatSize(), params, input1_data, input2_data, output_data);
+}
+
+// Input and output have the same shape in LSTM
+void Mul(const RuntimeShape& shape, const ArithmeticParams& params,
+         const int16_t* input1_data, const int16_t* input2_data,
+         int16_t* output_data) {
+  return reference_integer_ops::MulElementwise(
+      shape.FlatSize(), params, input1_data, input2_data, output_data);
+}
+
+// Input and output have the same shape in LSTM
+void Mul(const RuntimeShape& shape, const ArithmeticParams& params,
+         const float* input1_data, const float* input2_data,
+         float* output_data) {
+  return reference_ops::Mul(params, shape, input1_data, shape, input2_data,
+                            shape, output_data);
+}
+
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape, const int8_t* input_data,
+                    const RuntimeShape& filter_shape, const int8_t* filter_data,
+                    const RuntimeShape& bias_shape, const int32_t* bias_data,
+                    const RuntimeShape& output_shape, int16_t* output_data) {
+  return tflite::reference_integer_ops::FullyConnected(
+      params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+      bias_data, output_shape, output_data);
+}
+
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape, const int16_t* input_data,
+                    const RuntimeShape& filter_shape, const int8_t* filter_data,
+                    const RuntimeShape& bias_shape, const int64_t* bias_data,
+                    const RuntimeShape& output_shape, int16_t* output_data) {
+  return tflite::reference_integer_ops::FullyConnected(
+      params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+      bias_data, output_shape, output_data);
+}
+
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape, const float* input_data,
+                    const RuntimeShape& filter_shape, const float* filter_data,
+                    const RuntimeShape& bias_shape, const float* bias_data,
+                    const RuntimeShape& output_shape, float* output_data) {
+  return tflite::reference_ops::FullyConnected(
+      params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+      bias_data, output_shape, output_data);
+}
+
+void Clipping(const int v_size, const CellStateInfo& cell_state_info,
+              int16_t* vector) {
+  for (int i = 0; i < v_size; i++) {
+    vector[i] =
+        std::max(std::min(cell_state_info.quantized_cell_clip, vector[i]),
+                 static_cast<int16_t>(-cell_state_info.quantized_cell_clip));
+  }
+}
+
+void Clipping(const int v_size, const CellStateInfo& cell_state_info,
+              float* vector) {
+  for (int i = 0; i < v_size; i++) {
+    vector[i] = std::max(std::min(cell_state_info.cell_clip, vector[i]),
+                         -cell_state_info.cell_clip);
+  }
+}
+
+// Increment the data offset so the sigle time step invocation call can access
+// the corresponding input/output tensor data at the time step
+void LstmStepManager::UpdateTime() {
+  current_time_ += 1;
+  TFLITE_DCHECK_LE(current_time_, size_info_.time_steps);
+  // default as one batch per inference
+  int input_step = size_info_.input_dimension;
+  int output_step = size_info_.state_dimension;
+  // time major: batch inference
+  if (size_info_.time_major) {
+    input_step = input_step * size_info_.batch_size;
+    output_step = output_step * size_info_.batch_size;
+  }
+
+  input_offset_ += input_step;
+  output_offset_ += output_step;
+}
+
+// Increment the data offset so the sigle time step invocation call can access
+// the corresponding hidden/cell state tensor data at the time step (for single
+// batch inference only)
+void LstmStepManager::UpdateBatch() {
+  current_batch_ += 1;
+  TFLITE_DCHECK_LE(current_batch_, size_info_.batch_size);
+  // batch inference for time major: no action needed
+  if (size_info_.time_major) {
+    return;
+  }
+  // otherwise: singe batch inference, go to the next batch
+  hidden_state_offset_ += size_info_.state_dimension;
+  cell_state_offset_ += size_info_.state_dimension;
+}
+
+// Input shape for each single time LSTM invocation.
+// Multi-batch for time_major input
+RuntimeShape LstmStepManager::InputShape() const {
+  int batch_size = 1;
+  if (size_info_.time_major) {
+    batch_size = size_info_.batch_size;
+  }
+  const int dims[2] = {batch_size, size_info_.input_dimension};
+  const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims);
+  return RuntimeShape(2, dims_data);
+}
+
+// State shape (both hidden and cell) for each single time LSTM invocation.
+// Multi-batch for time_major input
+RuntimeShape LstmStepManager::StateShape() const {
+  int batch_size = 1;
+  if (size_info_.time_major) {
+    batch_size = size_info_.batch_size;
+  }
+  const int dims[2] = {batch_size, size_info_.state_dimension};
+  const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims);
+  return RuntimeShape(2, dims_data);
+}
+
+}  // namespace lstm_internal
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.h
new file mode 100644
index 0000000..fcdbfe8
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.h
@@ -0,0 +1,417 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Functions to perform integer evaulation for standard LSTM (e.g., defined in
+// the keras lstm layer, no peephole etc.). Currently used by the 16 bits
+// activation case only
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_GENERAL_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_GENERAL_H_
+#include <algorithm>
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_shared.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+// Since LSTM includes multiple intermediate stages, introducing the internal
+// namespace to expose them for testing
+namespace lstm_internal {
+
+void Sigmoid(const RuntimeShape& data_shape, int16_t* data);
+
+void Sigmoid(const RuntimeShape& data_shape, float* data);
+
+void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape,
+          int16_t* input_data, const RuntimeShape& output_data_shape,
+          int16_t* output_data);
+
+void Tanh(int32_t cell_state_scale_power, const RuntimeShape& input_data_shape,
+          float* input_data, const RuntimeShape& output_data_shape,
+          float* output_data);
+
+void Mul(const RuntimeShape& shape, const ArithmeticParams& params,
+         const int16_t* input1_data, const int16_t* input2_data,
+         int8_t* output_data);
+
+void Mul(const RuntimeShape& shape, const ArithmeticParams& params,
+         const int16_t* input1_data, const int16_t* input2_data,
+         int16_t* output_data);
+
+void Mul(const RuntimeShape& shape, const ArithmeticParams& params,
+         const float* input1_data, const float* input2_data,
+         float* output_data);
+
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape, const int8_t* input_data,
+                    const RuntimeShape& filter_shape, const int8_t* filter_data,
+                    const RuntimeShape& bias_shape, const int32_t* bias_data,
+                    const RuntimeShape& output_shape, int16_t* output_data);
+
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape, const int16_t* input_data,
+                    const RuntimeShape& filter_shape, const int8_t* filter_data,
+                    const RuntimeShape& bias_shape, const int64_t* bias_data,
+                    const RuntimeShape& output_shape, int16_t* output_data);
+
+void FullyConnected(const FullyConnectedParams& params,
+                    const RuntimeShape& input_shape, const float* input_data,
+                    const RuntimeShape& filter_shape, const float* filter_data,
+                    const RuntimeShape& bias_shape, const float* bias_data,
+                    const RuntimeShape& output_shape, float* output_data);
+
+void AddElementWise(const int16_t* input_1, const int16_t* input_2, int n_batch,
+                    int n_input, int16_t* output);
+
+void AddElementWise(const float* input_1, const float* input_2, int n_batch,
+                    int n_input, float* output);
+
+void Clipping(const int v_size, const CellStateInfo& cell_state_info,
+              int16_t* vector);
+
+void Clipping(const int v_size, const CellStateInfo& cell_state_info,
+              float* vector);
+
+// Manages the slice position (offset), slice length (sliced tensor shape),
+// and update rules for input/output/hidden state/cell state tensors at each
+// time step.
+class LstmStepManager {
+ public:
+  LstmStepManager() = delete;
+  // Does not take any ownership, and all pointers must refer to valid objects
+  // that outlive the one constructed.
+  explicit LstmStepManager(const LstmSizeInfo* size_info)
+      : size_info_(*size_info) {}
+
+  void UpdateTime();
+  void UpdateBatch();
+
+  void ResetTime() { current_time_ = 0; }
+  RuntimeShape InputShape() const;
+  RuntimeShape StateShape() const;
+
+  int InputOffset() const { return input_offset_; }
+  int OutputOffset() const { return output_offset_; }
+  int HiddenStateOffset() const { return hidden_state_offset_; }
+  int CellStateOffset() const { return cell_state_offset_; }
+
+ private:
+  int current_time_ = 0;
+  int current_batch_ = 0;
+  int input_offset_ = 0;
+  int output_offset_ = 0;
+  int hidden_state_offset_ = 0;
+  int cell_state_offset_ = 0;
+  // Sizeinfo is from LstmOpData, which reside in the memory arena
+  // (guarante to outlast LSTMStepManager, which reside in stack)
+  const LstmSizeInfo& size_info_;
+};
+
+// Calculates a single LSTM gate.
+// Implements the following formula:
+//   gate = activate(FC(input) + FC(recurrent))
+// Activation is sigmoid except for the "cell" gate (configurable, usually tanh)
+template <typename ActivationType, typename WeightType, typename CellType,
+          typename BiasType>
+void CalculateLstmGate(
+    const LstmStepManager& step_info, const GateParameters& gate_params,
+    // Input FC
+    const TfLiteEvalTensor* input, const TfLiteEvalTensor* input_weight,
+    const TfLiteEvalTensor* input_bias,
+    // Recurrent FC
+    const TfLiteEvalTensor* recurrent, const TfLiteEvalTensor* recurrent_weight,
+    const TfLiteEvalTensor* recurrent_bias,
+    // Output
+    CellType* gate_output,
+    // Scratch arrays
+    CellType* fc_output_buffer, const TfLiteFusedActivation activation) {
+  const auto gate_output_shape = step_info.StateShape();
+  // Check offset validity to avoid memory overflow
+  TFLITE_DCHECK_LE(step_info.InputOffset() + step_info.InputShape().FlatSize(),
+                   tflite::micro::GetTensorShape(input).FlatSize());
+  TFLITE_DCHECK_LE(
+      step_info.HiddenStateOffset() + step_info.StateShape().FlatSize(),
+      tflite::micro::GetTensorShape(recurrent).FlatSize());
+
+  // Input FC
+  FullyConnected(gate_params.input_fc_params, step_info.InputShape(),
+                 tflite::micro::GetTensorData<ActivationType>(input) +
+                     step_info.InputOffset(),
+                 micro::GetTensorShape(input_weight),
+                 tflite::micro::GetTensorData<WeightType>(input_weight),
+                 tflite::micro::GetTensorShape(input_bias),
+                 tflite::micro::GetOptionalTensorData<BiasType>(input_bias),
+                 gate_output_shape, gate_output);
+
+  // Recurrent FC
+  FullyConnected(gate_params.recurrent_fc_params, step_info.StateShape(),
+                 tflite::micro::GetTensorData<ActivationType>(recurrent) +
+                     step_info.HiddenStateOffset(),
+                 tflite::micro::GetTensorShape(recurrent_weight),
+                 tflite::micro::GetTensorData<WeightType>(recurrent_weight),
+                 tflite::micro::GetTensorShape(recurrent_bias),
+                 tflite::micro::GetOptionalTensorData<BiasType>(recurrent_bias),
+                 gate_output_shape, fc_output_buffer);
+
+  AddElementWise(gate_output, fc_output_buffer,
+                 /*n_batch=*/gate_output_shape.DimsData()[0],
+                 /*n_state=*/gate_output_shape.DimsData()[1], gate_output);
+  // Apply activation
+  switch (activation) {
+    case kTfLiteActSigmoid:
+      Sigmoid(gate_output_shape, gate_output);
+      break;
+    case kTfLiteActTanh: {
+      // Set the scale power to -12 to avoid shift
+      Tanh(/*cell_state_scale_power=*/-12, gate_output_shape, gate_output,
+           gate_output_shape, gate_output);
+    } break;
+    default:
+      // Only Sigmoid or Tanh is used.
+      TFLITE_ASSERT_FALSE;
+  }
+}
+
+// Update the cell state using the output from the forget gate, input gate, and
+// cell gate Formula: updated_cell_state = forget_gate_output*cell_state +
+// input_gate_output * cell_gate_output, where * denotes element wise
+// multiplication
+template <typename CellType>
+void UpdateLstmCell(const LstmStepManager& step_info,
+                    TfLiteEvalTensor* cell_state,
+                    // Gate outputs
+                    CellType* forget_gate_output,
+                    const CellType* input_gate_output,
+                    const CellType* cell_gate_output,
+                    // Mul parameters
+                    const ArithmeticParams& forget_cell_mul_params,
+                    const ArithmeticParams& input_mul_params,
+                    const CellStateInfo& cell_state_info, CellType* buffer) {
+  // Check offset validity to avoid memory overflow
+  TFLITE_DCHECK_LE(
+      step_info.CellStateOffset() + step_info.StateShape().FlatSize(),
+      tflite::micro::GetTensorShape(cell_state).FlatSize());
+
+  auto cell_state_shape = step_info.StateShape();
+  // Forget Gate x Cell State
+  Mul(cell_state_shape, forget_cell_mul_params, forget_gate_output,
+      tflite::micro::GetTensorData<CellType>(cell_state) +
+          step_info.CellStateOffset(),
+      tflite::micro::GetTensorData<CellType>(cell_state) +
+          step_info.CellStateOffset());
+  // Input Gate x Cell Gate
+  Mul(cell_state_shape, input_mul_params, input_gate_output, cell_gate_output,
+      buffer);
+
+  // Update the cell state
+  AddElementWise(tflite::micro::GetTensorData<CellType>(cell_state) +
+                     step_info.CellStateOffset(),
+                 buffer,
+                 /*n_batch=*/cell_state_shape.DimsData()[0],
+                 /*n_state=*/cell_state_shape.DimsData()[1],
+                 tflite::micro::GetTensorData<CellType>(cell_state) +
+                     step_info.CellStateOffset());
+
+  if (cell_state_info.cell_clip > 0) {
+    Clipping(cell_state_shape.FlatSize(), cell_state_info,
+             tflite::micro::GetTensorData<CellType>(cell_state) +
+                 step_info.CellStateOffset());
+  }
+}
+
+// Update the hidden state of the LSTM kernel using the following formula:
+// updated_hidden_state = Tanh(updated_cell_state) * output_gate_output, * means
+// element wise multiplication
+template <typename CellType, typename ActivationType>
+void UpdateLstmHidden(const LstmStepManager& step_info,
+                      TfLiteEvalTensor* cell_state,
+                      TfLiteEvalTensor* hidden_state,
+                      const CellType* output_gate_output,
+                      const ArithmeticParams& mul_params,
+                      int32_t cell_state_scale_power, CellType* buffer) {
+  // Check offset validity to avoid memory overflow
+  TFLITE_DCHECK_LE(
+      step_info.CellStateOffset() + step_info.StateShape().FlatSize(),
+      tflite::micro::GetTensorShape(cell_state).FlatSize());
+  TFLITE_DCHECK_LE(
+      step_info.HiddenStateOffset() + step_info.StateShape().FlatSize(),
+      tflite::micro::GetTensorShape(hidden_state).FlatSize());
+
+  auto cell_state_shape = step_info.StateShape();
+  CellType* cell_state_data =
+      tflite::micro::GetTensorData<CellType>(cell_state) +
+      step_info.CellStateOffset();
+  // Tanh(cell_state)
+  Tanh(cell_state_scale_power, cell_state_shape, cell_state_data,
+       cell_state_shape, buffer);
+  // Update the hidden state
+  Mul(cell_state_shape, mul_params, buffer, output_gate_output,
+      tflite::micro::GetTensorData<ActivationType>(hidden_state) +
+          step_info.HiddenStateOffset());
+}
+
+template <typename ActivationType, typename WeightType, typename CellType,
+          typename BiasType>
+void LstmStep(const LstmStepManager& step_info, const OpDataLSTM& op_data,
+              LSTMKernelContents& kernel_content,
+              LSTMBuffers<CellType>& buffers) {
+  /*Step1: Calculate gate outputs to prepare cell state update*/
+  CellType* gate_internal_buffer = buffers.buffer3;
+  CellType* forget_gate_output = buffers.buffer0;
+  CalculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+      step_info, op_data.forget_gate_parameters,
+      // Input FC
+      kernel_content.GetInternalTensor(tflite::kLstmInputTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmInputToForgetWeightsTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmForgetGateBiasTensor),
+      // Recurrent FC
+      kernel_content.HiddenStateTensor(),
+      kernel_content.GetInternalTensor(
+          tflite::kLstmRecurrentToForgetWeightsTensor),
+      /*recurrent_bias*/ nullptr,
+      // Output
+      forget_gate_output,
+      // Scratch arrays
+      gate_internal_buffer, kTfLiteActSigmoid);
+
+  // Input Gate calculation;
+  CellType* input_gate_output = buffers.buffer1;
+  CalculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+      step_info, op_data.input_gate_parameters,
+      // Input FC
+      kernel_content.GetInternalTensor(tflite::kLstmInputTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmInputToInputWeightsTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmInputGateBiasTensor),
+      // Recurrent FC
+      kernel_content.HiddenStateTensor(),
+      kernel_content.GetInternalTensor(
+          tflite::kLstmRecurrentToInputWeightsTensor),
+      /*recurrent_bias*/ nullptr,
+      // Output
+      input_gate_output,
+      // Scratch arrays
+      gate_internal_buffer, kTfLiteActSigmoid);
+
+  // Cell Gate calculation
+  CellType* cell_gate_output = buffers.buffer2;
+  CalculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+      step_info, op_data.cell_gate_parameters,
+      // Input FC
+      kernel_content.GetInternalTensor(tflite::kLstmInputTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmInputToCellWeightsTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmCellGateBiasTensor),
+      // Recurrent FC
+      kernel_content.HiddenStateTensor(),
+      kernel_content.GetInternalTensor(
+          tflite::kLstmRecurrentToCellWeightsTensor),
+      /*recurrent_bias*/ nullptr,
+      // Output
+      cell_gate_output,
+      // Scratch arrays
+      gate_internal_buffer, op_data.cell_gate_nonlinear_type);
+
+  /*Step2: update the cell state */
+  const InterGateParameters& inter_gate_params = op_data.inter_gate_parameters;
+  CellType* updated_input_buffer = buffers.buffer1;  // reuse buffer
+
+  UpdateLstmCell<CellType>(step_info, kernel_content.CellStateTensor(),
+                           forget_gate_output, input_gate_output,
+                           cell_gate_output,
+                           inter_gate_params.forget_cell_mul_params,
+                           inter_gate_params.input_mul_params,
+                           op_data.cell_state_info, updated_input_buffer);
+
+  /*Step3: update the hidden state */
+  CellType* output_gate_output = buffers.buffer1;  // reuse buffer
+  CalculateLstmGate<ActivationType, WeightType, CellType, BiasType>(
+      step_info, op_data.output_gate_parameters,
+      // Input FC
+      kernel_content.GetInternalTensor(tflite::kLstmInputTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmInputToOutputWeightsTensor),
+      kernel_content.GetInternalTensor(tflite::kLstmOutputGateBiasTensor),
+      // Recurrent FC
+      kernel_content.HiddenStateTensor(),
+      kernel_content.GetInternalTensor(
+          tflite::kLstmRecurrentToOutputWeightsTensor),
+      /*recurrent_bias*/ nullptr,
+      // Output
+      output_gate_output,
+      // Scratch arrays
+      gate_internal_buffer, kTfLiteActSigmoid);
+
+  CellType* tanh_activated_cell_buffer = buffers.buffer0;  // reuse buffer
+  tflite::lstm_internal::UpdateLstmHidden<CellType, ActivationType>(
+      step_info, kernel_content.CellStateTensor(),
+      kernel_content.HiddenStateTensor(), output_gate_output,
+      inter_gate_params.output_mul_params,
+      op_data.cell_state_info.cell_state_scale_power,
+      tanh_activated_cell_buffer);
+
+  /*Step4: copy the update the hidden state to output*/
+  // Check offset validity to avoid memory overflow
+  TFLITE_DCHECK_LE(
+      step_info.OutputOffset() + step_info.StateShape().FlatSize(),
+      tflite::micro::GetTensorShape(kernel_content.output_tensor).FlatSize());
+  // record the output (from the updated hidden state)
+  ActivationType* output_ptr = tflite::micro::GetTensorData<ActivationType>(
+      kernel_content.output_tensor);
+  const auto* hidden_state = kernel_content.HiddenStateTensor();
+  std::memcpy(output_ptr + step_info.OutputOffset(),
+              tflite::micro::GetTensorData<ActivationType>(hidden_state) +
+                  step_info.HiddenStateOffset(),
+              step_info.StateShape().FlatSize() * sizeof(ActivationType));
+}
+
+}  // namespace lstm_internal
+
+// Evaulate the LSTM kernel with (potential) multi-steps and multi-batch input
+// Since
+template <typename ActivationType, typename WeightType, typename CellType,
+          typename BiasType>
+TfLiteStatus EvalLstm(const OpDataLSTM& op_data,
+                      LSTMKernelContents& kernel_content,
+                      LSTMBuffers<CellType>& buffers) {
+  lstm_internal::LstmStepManager step_info(&op_data.size_info);
+  const auto& size_info = op_data.size_info;
+  // time is the first dimention, enable batch computation
+  if (size_info.time_major) {
+    for (int t = 0; t < size_info.time_steps; t++) {
+      lstm_internal::LstmStep<ActivationType, WeightType, CellType, BiasType>(
+          step_info, op_data, kernel_content, buffers);
+      // prepare for the next time step
+      step_info.UpdateTime();
+    }
+  } else {
+    // batch first, unable to size the input data. single batch inference
+    for (int b = 0; b < size_info.batch_size; b++) {
+      for (int t = 0; t < size_info.time_steps; t++) {
+        lstm_internal::LstmStep<ActivationType, WeightType, CellType, BiasType>(
+            step_info, op_data, kernel_content, buffers);
+        // prepare for the next time step
+        step_info.UpdateTime();
+      }
+      // prepare for the next batch
+      step_info.UpdateBatch();
+      step_info.ResetTime();
+    }
+  }
+  return kTfLiteOk;
+}
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_16ACT_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval_test.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval_test.h
new file mode 100644
index 0000000..cfaec49
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval_test.h
@@ -0,0 +1,817 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_
+
+#include <algorithm>
+#include <limits>
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/testing/micro_test.h"
+
+namespace tflite {
+namespace testing {
+
+/*Helper Functions (mainly about mimicking the kernel preparation)*/
+
+// Create fully connected parameters using quantization settings of input and
+// weight tensors.
+// Since TfLiteContext is not available during the kernel test, here we mimic
+// (put into stack memory) CalculateOpDataFullyConnected in
+// tensorflow/lite/micro/kernels/fully_connected_common.cc
+template <typename CellType>
+tflite::FullyConnectedParams CreateFCParams(
+    const TensorQuantizationParameters& input_quant_params,
+    const TensorQuantizationParameters& weight_quant_params,
+    const float nonlinear_activation_input_scale) {
+  OpDataFullyConnected data;
+  const double input_product_scale =
+      input_quant_params.scale * weight_quant_params.scale;
+  double effective_scale =
+      input_product_scale /
+      static_cast<double>(nonlinear_activation_input_scale);
+
+  QuantizeMultiplier(effective_scale, &data.output_multiplier,
+                     &data.output_shift);
+
+  data.input_zero_point = input_quant_params.zero_point;
+
+  data.filter_zero_point = 0;  // symmetrically quantized
+  data.output_zero_point = 0;  // symmetrically quantized
+
+  data.output_activation_min = std::numeric_limits<CellType>::min();
+  data.output_activation_max = std::numeric_limits<CellType>::max();
+
+  return tflite::FullyConnectedParamsQuantized(data);
+}
+
+inline tflite::FullyConnectedParams CreateFCParamsFloat() {
+  FullyConnectedParams op_params;
+  CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min,
+                           &op_params.float_activation_max);
+  return op_params;
+}
+
+// Wrapper function to create gate parameters for the four internal LSTM gates
+template <typename CellType>
+tflite::GateParameters CreateGateParams(
+    const TensorQuantizationParameters& input_quant_params,
+    const TensorQuantizationParameters& hidden_state_quant_params,
+    const GateQuantizationParameters& gate_quantization_settings,
+    const float nonlinear_activation_input_scale) {
+  tflite::GateParameters gate_params = {};
+  gate_params.input_fc_params = CreateFCParams<CellType>(
+      input_quant_params, gate_quantization_settings.activation_weight,
+      nonlinear_activation_input_scale);
+  gate_params.recurrent_fc_params = CreateFCParams<CellType>(
+      hidden_state_quant_params, gate_quantization_settings.recurrent_weight,
+      nonlinear_activation_input_scale);
+  return gate_params;
+}
+
+inline tflite::GateParameters CreateGateParamsFloat() {
+  tflite::GateParameters gate_params = {};
+  gate_params.input_fc_params = CreateFCParamsFloat();
+  gate_params.recurrent_fc_params = CreateFCParamsFloat();
+  return gate_params;
+}
+// Create parameters for element wise multiplication that happens in a) cell
+// state update ; b) hidden state update
+// Note that all the output of gates are symmetrically quantized so only scales
+// are required for input. However, during the hidden state update phase, the
+// output is the updated hidden state, which is asymmetrically quantized. Thus
+// output may require zero point
+template <typename OutputType>
+tflite::ArithmeticParams CreateInterGateMulParams(const float input1_scale,
+                                                  const float input2_scale,
+                                                  const float output_scale,
+                                                  const int output_zp = 0) {
+  tflite::ArithmeticParams op_params = {};
+  op_params.quantized_activation_min = std::numeric_limits<OutputType>::min();
+  op_params.quantized_activation_max = std::numeric_limits<OutputType>::max();
+  op_params.input1_offset = 0;
+  op_params.input2_offset = 0;
+  op_params.output_offset = output_zp;
+
+  const double input_product_scale =
+      static_cast<double>(input1_scale) * static_cast<double>(input2_scale);
+  double effective_scale =
+      input_product_scale / static_cast<double>(output_scale);
+
+  QuantizeMultiplier(effective_scale, &op_params.output_multiplier,
+                     &op_params.output_shift);
+  return op_params;
+}
+
+inline tflite::ArithmeticParams CreateInterGateMulParamsFloat() {
+  tflite::ArithmeticParams op_params = {};
+  CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min,
+                           &op_params.float_activation_max);
+  return op_params;
+}
+
+// Create the additional information about the cell state, which include:
+// cell_state_scale_power: used in integer nonlinear function (e.g., tanh)
+// quantized_cell_clip: quantized cell clip range
+CellStateInfo CreateLstmCellStateInfo(const float cell_state_scale,
+                                      const float cell_clip) {
+  CellStateInfo cell_state_info;
+  // cell_state_scale_power: 2^-cell_state_scale_power = cell state scale
+  int buffer;
+  tflite::CheckedLog2(cell_state_scale, &buffer);
+  cell_state_info.cell_state_scale_power = buffer;
+  // Cell state specifics
+  cell_state_info.cell_clip = cell_clip;
+  cell_state_info.quantized_cell_clip = static_cast<int16_t>(
+      std::min(std::max(static_cast<double>(cell_clip) /
+                            static_cast<double>(cell_state_scale),
+                        -32768.0),
+               32767.0));
+  return cell_state_info;
+}
+
+// Create LSTMKernelContents from LstmNodeContent by copying TfLiteEvalTensor
+// pointers
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+LSTMKernelContents CreateLSTMKernelContent(
+    LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
+                    time_steps, input_dimension, state_dimension>&
+        node_contents) {
+  LSTMKernelContents kernel_content;
+  // Point to correct tensors
+  kernel_content.internal_tensors[kLstmInputTensor] =
+      node_contents.GetEvalTensor(kLstmInputTensor);
+  kernel_content.internal_tensors[kLstmInputToInputWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmInputToInputWeightsTensor);
+  kernel_content.internal_tensors[kLstmInputToForgetWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmInputToForgetWeightsTensor);
+  kernel_content.internal_tensors[kLstmInputToCellWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmInputToCellWeightsTensor);
+  kernel_content.internal_tensors[kLstmInputToOutputWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmInputToOutputWeightsTensor);
+  kernel_content.internal_tensors[kLstmRecurrentToInputWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmRecurrentToInputWeightsTensor);
+  kernel_content.internal_tensors[kLstmRecurrentToForgetWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmRecurrentToForgetWeightsTensor);
+  kernel_content.internal_tensors[kLstmRecurrentToCellWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmRecurrentToCellWeightsTensor);
+  kernel_content.internal_tensors[kLstmRecurrentToOutputWeightsTensor] =
+      node_contents.GetEvalTensor(kLstmRecurrentToOutputWeightsTensor);
+  kernel_content.internal_tensors[kLstmInputGateBiasTensor] =
+      node_contents.GetEvalTensor(kLstmInputGateBiasTensor);
+  kernel_content.internal_tensors[kLstmForgetGateBiasTensor] =
+      node_contents.GetEvalTensor(kLstmForgetGateBiasTensor);
+  kernel_content.internal_tensors[kLstmCellGateBiasTensor] =
+      node_contents.GetEvalTensor(kLstmCellGateBiasTensor);
+  kernel_content.internal_tensors[kLstmOutputGateBiasTensor] =
+      node_contents.GetEvalTensor(kLstmOutputGateBiasTensor);
+  kernel_content.internal_tensors[kLstmOutputStateTensor] =
+      node_contents.GetEvalTensor(kLstmOutputStateTensor);
+  kernel_content.internal_tensors[kLstmOutputGateBiasTensor] =
+      node_contents.GetEvalTensor(kLstmOutputGateBiasTensor);
+  kernel_content.internal_tensors[kLstmCellStateTensor] =
+      node_contents.GetEvalTensor(kLstmCellStateTensor);
+  // Not used internal tensors
+  kernel_content.internal_tensors[kLstmCellToInputWeightsTensor] = nullptr;
+  kernel_content.internal_tensors[kLstmCellToForgetWeightsTensor] = nullptr;
+  kernel_content.internal_tensors[kLstmCellToOutputWeightsTensor] = nullptr;
+  kernel_content.internal_tensors[kLstmProjectionWeightsTensor] = nullptr;
+  kernel_content.internal_tensors[kLstmProjectionBiasTensor] = nullptr;
+  kernel_content.internal_tensors[kLstmInputLayerNormCoefficientsTensor] =
+      nullptr;
+  kernel_content.internal_tensors[kLstmForgetLayerNormCoefficientsTensor] =
+      nullptr;
+  kernel_content.internal_tensors[kLstmInputLayerNormCoefficientsTensor] =
+      nullptr;
+  kernel_content.internal_tensors[kLstmCellLayerNormCoefficientsTensor] =
+      nullptr;
+  kernel_content.internal_tensors[kLstmOutputLayerNormCoefficientsTensor] =
+      nullptr;
+  // Output tensor
+  kernel_content.output_tensor = node_contents.OutputEvalTensor();
+  return kernel_content;
+}
+
+// Deduce the size information (Batch (B), Time Steps (T), Input dimension (I),
+// State dimension (S)) that defines the LSTM using the input and hidden state
+// tensor
+LstmSizeInfo CreateLstmSizeInfo(
+    const bool time_major, const TfLiteIntArray* input_tensor_shape,
+    const TfLiteIntArray* hidden_state_tensor_shape) {
+  LstmSizeInfo size_info;
+  size_info.time_major = time_major;
+  size_info.batch_size =
+      time_major ? input_tensor_shape->data[1] : input_tensor_shape->data[0];
+  size_info.time_steps =
+      time_major ? input_tensor_shape->data[0] : input_tensor_shape->data[1];
+  size_info.input_dimension = input_tensor_shape->data[2];
+  size_info.state_dimension = hidden_state_tensor_shape->data[1];
+  return size_info;
+}
+
+// Create the LstmOpData using the LstmNodeContent and
+// NodeQuantizationParameters (defined in test_data/lstm_test_data) During the
+// actual inference phase, OpDataLSTM is created using information from the
+// flatbuffer file. The test divide the complete LSTM node information into
+// LstmNodeContent and NodeQuantizationParameters for easy construction
+// purposes
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+OpDataLSTM CreateLstmOpData(
+    LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
+                    time_steps, input_dimension, state_dimension>&
+        node_contents) {
+  const auto& builtin_data = node_contents.BuiltinData();
+  const auto& quantization_settings = node_contents.QuantizationSettings();
+  OpDataLSTM op_data;
+
+  op_data.cell_gate_nonlinear_type = builtin_data.activation;
+  op_data.size_info =
+      CreateLstmSizeInfo(builtin_data.time_major,
+                         node_contents.GetEvalTensor(kLstmInputTensor)->dims,
+                         node_contents.HiddenStateEvalTensor()->dims);
+
+  op_data.cell_state_info = CreateLstmCellStateInfo(
+      quantization_settings.cell_state.scale, builtin_data.cell_clip);
+
+  // Gate Parameters
+  op_data.forget_gate_parameters = CreateGateParams<CellType>(
+      quantization_settings.input, quantization_settings.hidden_state,
+      quantization_settings.forget_gate,
+      quantization_settings.nonlinear_activation_input_scale);
+  op_data.input_gate_parameters = CreateGateParams<CellType>(
+      quantization_settings.input, quantization_settings.hidden_state,
+      quantization_settings.input_gate,
+      quantization_settings.nonlinear_activation_input_scale);
+  op_data.cell_gate_parameters = CreateGateParams<CellType>(
+      quantization_settings.input, quantization_settings.hidden_state,
+      quantization_settings.cell_gate,
+      quantization_settings.nonlinear_activation_input_scale);
+  op_data.output_gate_parameters = CreateGateParams<CellType>(
+      quantization_settings.input, quantization_settings.hidden_state,
+      quantization_settings.output_gate,
+      quantization_settings.nonlinear_activation_input_scale);
+  // Inter gate multiplication parameters
+  op_data.inter_gate_parameters.forget_cell_mul_params =
+      CreateInterGateMulParams<CellType>(
+          quantization_settings.nonlinear_activation_output_scale,
+          quantization_settings.cell_state.scale,
+          quantization_settings.cell_state.scale);
+  op_data.inter_gate_parameters.input_mul_params =
+      CreateInterGateMulParams<CellType>(
+          quantization_settings.nonlinear_activation_output_scale,
+          quantization_settings.nonlinear_activation_output_scale,
+          quantization_settings.cell_state.scale);
+  op_data.inter_gate_parameters.output_mul_params =
+      CreateInterGateMulParams<ActivationType>(
+          quantization_settings.nonlinear_activation_output_scale,
+          quantization_settings.nonlinear_activation_output_scale,
+          quantization_settings.hidden_state.scale,
+          quantization_settings.hidden_state.zero_point);
+  return op_data;
+}
+
+template <int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+OpDataLSTM CreateLstmOpDataFloat(
+    LstmNodeContent<float, float, float, float, batch_size, time_steps,
+                    input_dimension, state_dimension>& node_contents) {
+  const auto& builtin_data = node_contents.BuiltinData();
+  OpDataLSTM op_data;
+
+  op_data.cell_gate_nonlinear_type = builtin_data.activation;
+  op_data.size_info =
+      CreateLstmSizeInfo(builtin_data.time_major,
+                         node_contents.GetEvalTensor(kLstmInputTensor)->dims,
+                         node_contents.HiddenStateEvalTensor()->dims);
+  op_data.cell_state_info.cell_clip = builtin_data.cell_clip;
+  op_data.cell_state_info.quantized_cell_clip = 0;     // No quantization
+  op_data.cell_state_info.cell_state_scale_power = 0;  // No quantization
+
+  // Gate Parameters
+  op_data.forget_gate_parameters = CreateGateParamsFloat();
+  op_data.input_gate_parameters = CreateGateParamsFloat();
+  op_data.cell_gate_parameters = CreateGateParamsFloat();
+  op_data.output_gate_parameters = CreateGateParamsFloat();
+  // Inter gate multiplication parameters
+  op_data.inter_gate_parameters.forget_cell_mul_params =
+      CreateInterGateMulParamsFloat();
+  op_data.inter_gate_parameters.input_mul_params =
+      CreateInterGateMulParamsFloat();
+  op_data.inter_gate_parameters.output_mul_params =
+      CreateInterGateMulParamsFloat();
+  return op_data;
+}
+
+/*Test Functions Below Here*/
+template <typename T>
+void ValidateResultGoldens(const T* golden, const T* output_data,
+                           const int output_len, const float tolerance) {
+  for (int i = 0; i < output_len; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], tolerance);
+  }
+}
+
+template <int batch_size, int state_dimension>
+void TestCalculateLstmGateFloat(const TfLiteEvalTensor* input,
+                                const TfLiteEvalTensor* input_weight,
+                                const TfLiteEvalTensor* input_bias,
+                                // Recurrent FC
+                                const TfLiteEvalTensor* recurrent,
+                                const TfLiteEvalTensor* recurrent_weight,
+                                const TfLiteEvalTensor* recurrent_bias,
+                                // Result comparison
+                                TfLiteFusedActivation nonlinear_type,
+                                const float* expected_vals, float tolerance) {
+  float gate_output[batch_size * state_dimension] = {};
+  float fc_output_buffer[batch_size * state_dimension] = {};
+
+  tflite::GateParameters gate_params = CreateGateParamsFloat();
+
+  // Create step information: only one time step, no need to update
+  auto size_info = tflite::testing::CreateLstmSizeInfo(
+      /*time_major*/ false, input->dims, recurrent->dims);
+  // revise time_major = true to enable batch inference
+  size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&size_info);
+
+  tflite::lstm_internal::CalculateLstmGate<float, float, float, float>(
+      step_info, gate_params,
+      // Input FC
+      input, input_weight, input_bias,
+      // Recurrent FC
+      recurrent, recurrent_weight, recurrent_bias,
+      // Output
+      gate_output,
+      // Scratch arrays
+      fc_output_buffer, nonlinear_type);
+
+  ValidateResultGoldens(expected_vals, gate_output,
+                        batch_size * state_dimension, tolerance);
+}
+
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int state_dimension>
+void TestCalculateLstmGateInteger(
+    const TfLiteEvalTensor* input, const TfLiteEvalTensor* input_weight,
+    const TfLiteEvalTensor* input_bias,
+    // Recurrent FC
+    const TfLiteEvalTensor* recurrent, const TfLiteEvalTensor* recurrent_weight,
+    const TfLiteEvalTensor* recurrent_bias,
+    // Quantization settings
+    const NodeQuantizationParameters& node_quantization_settings,
+    const GateQuantizationParameters& gate_quantization_settings,
+    // Result comparison
+    TfLiteFusedActivation nonlinear_type, const float* expected_vals,
+    float tolerance) {
+  CellType gate_output[batch_size * state_dimension] = {};
+  CellType fc_output_buffer[batch_size * state_dimension] = {};
+
+  tflite::GateParameters gate_params = CreateGateParams<CellType>(
+      node_quantization_settings.input, node_quantization_settings.hidden_state,
+      gate_quantization_settings,
+      node_quantization_settings.nonlinear_activation_input_scale);
+
+  // Create step information: only one time step, no need to update
+  auto size_info = tflite::testing::CreateLstmSizeInfo(
+      /*time_major*/ false, input->dims, recurrent->dims);
+  // revise time_major = true to enable batch inference
+  size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&size_info);
+
+  // only int8 weight is supported now
+  tflite::lstm_internal::CalculateLstmGate<ActivationType, WeightType, CellType,
+                                           BiasType>(
+      step_info, gate_params,
+      // Input FC
+      input, input_weight, input_bias,
+      // Recurrent FC
+      recurrent, recurrent_weight, recurrent_bias,
+      // Output
+      gate_output,
+      // Scratch arrays
+      fc_output_buffer, nonlinear_type);
+
+  float gate_output_float[batch_size * state_dimension] = {};
+  Dequantize(gate_output, batch_size * state_dimension,
+             node_quantization_settings.nonlinear_activation_output_scale, 0,
+             gate_output_float);
+
+  ValidateResultGoldens(expected_vals, gate_output_float,
+                        batch_size * state_dimension, tolerance);
+}
+
+template <int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+void TestUpdateLstmCellFloat(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    LstmNodeContent<float, float, float, float, batch_size, time_steps,
+                    input_dimension, state_dimension>& node_content,
+    const float tolerance) {
+  float buffer[batch_size * state_dimension] = {};
+
+  auto forget_cell_mul_params = CreateInterGateMulParamsFloat();
+  auto input_mul_params = CreateInterGateMulParamsFloat();
+
+  auto cell_state = node_content.CellStateEvalTensor();
+  // Create step information: only one time step, no need to update
+  auto size_info = tflite::testing::CreateLstmSizeInfo(
+      /*time_major*/ false,
+      node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims,
+      node_content.HiddenStateEvalTensor()->dims);
+  // revise time_major = true to enable batch inference
+  size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&size_info);
+
+  // copy the data since it will be updated
+  float forget_gate[batch_size * state_dimension] = {};
+  std::memcpy(forget_gate, gate_output_data.expected_forget_gate_output,
+              batch_size * state_dimension * sizeof(float));
+
+  CellStateInfo cell_state_info;
+  cell_state_info.cell_clip = node_content.BuiltinData().cell_clip;
+  // Call the function to be tested
+  tflite::lstm_internal::UpdateLstmCell<float>(
+      step_info, cell_state, forget_gate,
+      gate_output_data.expected_input_gate_output,
+      gate_output_data.expected_cell_gate_output, forget_cell_mul_params,
+      input_mul_params, cell_state_info, buffer);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_cell,
+                        tflite::micro::GetTensorData<float>(cell_state),
+                        batch_size * state_dimension, tolerance);
+}
+
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+void TestUpdateLstmCellInteger(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
+                    time_steps, input_dimension, state_dimension>& node_content,
+    const float tolerance) {
+  const auto& quantization_settings = node_content.QuantizationSettings();
+  CellType quantized_forget_gate[batch_size * state_dimension] = {};
+  tflite::Quantize(gate_output_data.expected_forget_gate_output,
+                   quantized_forget_gate, batch_size * state_dimension,
+                   quantization_settings.nonlinear_activation_output_scale, 0);
+
+  CellType quantized_input_gate[batch_size * state_dimension] = {};
+  tflite::Quantize(gate_output_data.expected_input_gate_output,
+                   quantized_input_gate, batch_size * state_dimension,
+                   quantization_settings.nonlinear_activation_output_scale, 0);
+
+  CellType quantized_cell_gate[batch_size * state_dimension] = {};
+  tflite::Quantize(gate_output_data.expected_cell_gate_output,
+                   quantized_cell_gate, batch_size * state_dimension,
+                   quantization_settings.nonlinear_activation_output_scale, 0);
+
+  CellType buffer[batch_size * state_dimension] = {};
+
+  auto forget_cell_mul_params = CreateInterGateMulParams<CellType>(
+      quantization_settings.nonlinear_activation_output_scale,
+      quantization_settings.cell_state.scale,
+      quantization_settings.cell_state.scale);
+  auto input_mul_params = CreateInterGateMulParams<CellType>(
+      quantization_settings.nonlinear_activation_output_scale,
+      quantization_settings.nonlinear_activation_output_scale,
+      quantization_settings.cell_state.scale);
+
+  auto cell_state_info =
+      CreateLstmCellStateInfo(quantization_settings.cell_state.scale,
+                              node_content.BuiltinData().cell_clip);
+
+  auto cell_state = node_content.CellStateEvalTensor();
+  // Create step information: only one time step, no need to update
+  auto size_info = tflite::testing::CreateLstmSizeInfo(
+      /*time_major*/ false,
+      node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims,
+      node_content.HiddenStateEvalTensor()->dims);
+  // revise time_major = true to enable batch inference
+  size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&size_info);
+
+  // Call the function to be tested
+  tflite::lstm_internal::UpdateLstmCell<CellType>(
+      step_info, cell_state, quantized_forget_gate, quantized_input_gate,
+      quantized_cell_gate, forget_cell_mul_params, input_mul_params,
+      cell_state_info, buffer);
+
+  float cell_state_float[batch_size * state_dimension] = {};
+  Dequantize(tflite::micro::GetTensorData<CellType>(cell_state),
+             batch_size * state_dimension,
+             quantization_settings.cell_state.scale,
+             quantization_settings.cell_state.zero_point, cell_state_float);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_cell,
+                        cell_state_float, batch_size * state_dimension,
+                        tolerance);
+}
+
+template <int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+void TestUpdateLstmHiddenFloat(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    LstmNodeContent<float, float, float, float, batch_size, time_steps,
+                    input_dimension, state_dimension>& node_content,
+    const float tolerance) {
+  float buffer[batch_size * state_dimension] = {};
+
+  auto mul_params = CreateInterGateMulParamsFloat();
+
+  int32_t cell_state_scale_power = 0;
+
+  // Create step information: only one time step, no need to update
+  auto size_info = tflite::testing::CreateLstmSizeInfo(
+      /*time_major*/ false,
+      node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims,
+      node_content.HiddenStateEvalTensor()->dims);
+  // revise time_major = true to enable batch inference
+  size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&size_info);
+
+  auto cell_state = node_content.CellStateEvalTensor();
+  auto hidden_state = node_content.HiddenStateEvalTensor();
+
+  tflite::lstm_internal::UpdateLstmHidden<float, float>(
+      step_info, cell_state, hidden_state,
+      gate_output_data.expected_output_gate_output, mul_params,
+      cell_state_scale_power, buffer);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_hidden,
+                        tflite::micro::GetTensorData<float>(hidden_state),
+                        batch_size * state_dimension, tolerance);
+}
+
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+void TestUpdateLstmHiddenInteger(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
+                    time_steps, input_dimension, state_dimension>& node_content,
+    const float tolerance) {
+  const auto& quantization_settings = node_content.QuantizationSettings();
+  CellType quantized_output_gate[batch_size * state_dimension] = {};
+  tflite::Quantize(gate_output_data.expected_output_gate_output,
+                   quantized_output_gate, batch_size * state_dimension,
+                   quantization_settings.nonlinear_activation_output_scale, 0);
+
+  CellType buffer[batch_size * state_dimension] = {};
+
+  auto mul_params = CreateInterGateMulParams<ActivationType>(
+      quantization_settings.nonlinear_activation_output_scale,
+      quantization_settings.nonlinear_activation_output_scale,
+      quantization_settings.hidden_state.scale,
+      quantization_settings.hidden_state.zero_point);
+
+  int cell_state_scale_power_buffer;
+  tflite::CheckedLog2(quantization_settings.cell_state.scale,
+                      &cell_state_scale_power_buffer);
+  int32_t cell_state_scale_power = cell_state_scale_power_buffer;
+
+  // Create step information: only one time step, no need to update
+  auto size_info = tflite::testing::CreateLstmSizeInfo(
+      /*time_major*/ false,
+      node_content.GetEvalTensor(tflite::kLstmInputTensor)->dims,
+      node_content.HiddenStateEvalTensor()->dims);
+  // revise time_major = true to enable batch inference
+  size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&size_info);
+
+  auto cell_state = node_content.CellStateEvalTensor();
+  auto hidden_state = node_content.HiddenStateEvalTensor();
+
+  tflite::lstm_internal::UpdateLstmHidden<CellType, ActivationType>(
+      step_info, cell_state, hidden_state, quantized_output_gate, mul_params,
+      cell_state_scale_power, buffer);
+
+  float hidden_state_float[batch_size * state_dimension] = {};
+  Dequantize(tflite::micro::GetTensorData<ActivationType>(hidden_state),
+             batch_size * state_dimension,
+             quantization_settings.hidden_state.scale,
+             quantization_settings.hidden_state.zero_point, hidden_state_float);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_hidden,
+                        hidden_state_float, batch_size * state_dimension,
+                        tolerance);
+}
+
+template <int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+void TestLstmStepFloat(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    const float hidden_state_tolerance, const float cell_state_tolerance,
+    /*can not be const, state will be updated*/
+    LstmNodeContent<float, float, float, float, batch_size, time_steps,
+                    input_dimension, state_dimension>& node_contents) {
+  // Mimicking the kernel preparation phase, node_contents approximate the
+  LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents);
+  LSTMBuffers<float> buffers;
+  // Scratch buffers on the stack
+  float buffer0[batch_size * state_dimension] = {};
+  buffers.buffer0 = buffer0;
+  float buffer1[batch_size * state_dimension] = {};
+  buffers.buffer1 = buffer1;
+  float buffer2[batch_size * state_dimension] = {};
+  buffers.buffer2 = buffer2;
+  float buffer3[batch_size * state_dimension] = {};
+  buffers.buffer3 = buffer3;
+
+  OpDataLSTM op_data = CreateLstmOpDataFloat(node_contents);
+  // set time_major to true to test batch inference
+  op_data.size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&op_data.size_info);
+  tflite::lstm_internal::LstmStep<float, float, float, float>(
+      step_info, op_data, kernel_content, buffers);
+
+  ValidateResultGoldens(
+      gate_output_data.expected_updated_hidden,
+      tflite::micro::GetTensorData<float>(kernel_content.HiddenStateTensor()),
+      batch_size * state_dimension, hidden_state_tolerance);
+  ValidateResultGoldens(
+      gate_output_data.expected_updated_cell,
+      tflite::micro::GetTensorData<float>(kernel_content.CellStateTensor()),
+      batch_size * state_dimension, cell_state_tolerance);
+}
+
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+void TestLstmStepInteger(
+    const GateOutputCheckData<batch_size * input_dimension,
+                              batch_size * state_dimension>& gate_output_data,
+    const float hidden_state_tolerance, const float cell_state_tolerance,
+    /*can not be const, state will be updated*/
+    LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
+                    time_steps, input_dimension, state_dimension>&
+        node_contents) {
+  // Mimicking the kernel preparation phase, node_contents approximate the
+  LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents);
+  LSTMBuffers<CellType> buffers;
+
+  // Scratch buffers on the stack
+  CellType buffer0[batch_size * state_dimension] = {};
+  buffers.buffer0 = buffer0;
+  CellType buffer1[batch_size * state_dimension] = {};
+  buffers.buffer1 = buffer1;
+  CellType buffer2[batch_size * state_dimension] = {};
+  buffers.buffer2 = buffer2;
+  CellType buffer3[batch_size * state_dimension] = {};
+  buffers.buffer3 = buffer3;
+
+  OpDataLSTM op_data = CreateLstmOpData(node_contents);
+  // set time_major to true to test batch inference
+  op_data.size_info.time_major = true;
+  tflite::lstm_internal::LstmStepManager step_info(&op_data.size_info);
+  tflite::lstm_internal::LstmStep<ActivationType, WeightType, CellType,
+                                  BiasType>(step_info, op_data, kernel_content,
+                                            buffers);
+
+  const auto& quantization_settings = node_contents.QuantizationSettings();
+  float dequantized_hidden_state[batch_size * state_dimension] = {};
+  Dequantize(
+      tflite::micro::GetTensorData<ActivationType>(
+          kernel_content.HiddenStateTensor()),
+      batch_size * state_dimension, quantization_settings.hidden_state.scale,
+      quantization_settings.hidden_state.zero_point, dequantized_hidden_state);
+
+  float dequantized_cell_state[batch_size * state_dimension] = {};
+  Dequantize(
+      tflite::micro::GetTensorData<CellType>(kernel_content.CellStateTensor()),
+      batch_size * state_dimension, quantization_settings.cell_state.scale,
+      quantization_settings.cell_state.zero_point, dequantized_cell_state);
+
+  ValidateResultGoldens(gate_output_data.expected_updated_hidden,
+                        dequantized_hidden_state, batch_size * state_dimension,
+                        hidden_state_tolerance);
+  ValidateResultGoldens(gate_output_data.expected_updated_cell,
+                        dequantized_cell_state, batch_size * state_dimension,
+                        cell_state_tolerance);
+}
+
+template <int batch_size, int time_steps, int input_dimension,
+          int state_dimension>
+void TestEvalLstmFloat(
+    const LstmEvalCheckData<
+        batch_size * time_steps * input_dimension, batch_size * state_dimension,
+        batch_size * state_dimension * time_steps>& eval_check_data,
+    const float hidden_state_tolerance, const float cell_state_tolerance,
+    LstmNodeContent<float, float, float, float, batch_size, time_steps,
+                    input_dimension, state_dimension>& node_contents) {
+  // Mimicking the kernel preparation phase, node_contents approximate the node
+  LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents);
+  // Scratch buffers on the stack
+  LSTMBuffers<float> buffers;
+  float buffer0[batch_size * state_dimension] = {};
+  buffers.buffer0 = buffer0;
+  float buffer1[batch_size * state_dimension] = {};
+  buffers.buffer1 = buffer1;
+  float buffer2[batch_size * state_dimension] = {};
+  buffers.buffer2 = buffer2;
+  float buffer3[batch_size * state_dimension] = {};
+  buffers.buffer3 = buffer3;
+
+  OpDataLSTM op_data = CreateLstmOpDataFloat(node_contents);
+
+  tflite::EvalLstm<float, float, float, float>(op_data, kernel_content,
+                                               buffers);
+
+  ValidateResultGoldens(eval_check_data.expected_hidden_state,
+                        node_contents.GetHiddenStateData(),
+                        batch_size * state_dimension, hidden_state_tolerance);
+
+  ValidateResultGoldens(eval_check_data.expected_cell_state,
+                        node_contents.GetCellStateData(),
+                        batch_size * state_dimension, cell_state_tolerance);
+
+  ValidateResultGoldens(eval_check_data.expected_output,
+                        node_contents.GetOutputData(),
+                        batch_size * state_dimension, hidden_state_tolerance);
+}
+
+template <typename ActivationType, typename WeightType, typename BiasType,
+          typename CellType, int batch_size, int time_steps,
+          int input_dimension, int state_dimension>
+void TestEvalLstmInteger(
+    const LstmEvalCheckData<
+        batch_size * time_steps * input_dimension, batch_size * state_dimension,
+        batch_size * state_dimension * time_steps>& eval_check_data,
+    const float hidden_state_tolerance, const float cell_state_tolerance,
+    LstmNodeContent<ActivationType, WeightType, BiasType, CellType, batch_size,
+                    time_steps, input_dimension, state_dimension>&
+        node_contents) {
+  // Mimicking the kernel preparation phase, node_contents approximate the node
+  LSTMKernelContents kernel_content = CreateLSTMKernelContent(node_contents);
+  // Scratch buffers on the stack
+  LSTMBuffers<CellType> buffers;
+  CellType buffer0[batch_size * state_dimension] = {};
+  buffers.buffer0 = buffer0;
+  CellType buffer1[batch_size * state_dimension] = {};
+  buffers.buffer1 = buffer1;
+  CellType buffer2[batch_size * state_dimension] = {};
+  buffers.buffer2 = buffer2;
+  CellType buffer3[batch_size * state_dimension] = {};
+  buffers.buffer3 = buffer3;
+
+  OpDataLSTM op_data = CreateLstmOpData(node_contents);
+
+  tflite::EvalLstm<ActivationType, WeightType, CellType, BiasType>(
+      op_data, kernel_content, buffers);
+
+  const auto& quantization_settings = node_contents.QuantizationSettings();
+  float dequantized_hidden_state[batch_size * state_dimension] = {};
+  Dequantize(node_contents.GetHiddenStateData(), batch_size * state_dimension,
+             quantization_settings.hidden_state.scale,
+             quantization_settings.hidden_state.zero_point,
+             dequantized_hidden_state);
+
+  ValidateResultGoldens(eval_check_data.expected_hidden_state,
+                        dequantized_hidden_state, batch_size * state_dimension,
+                        hidden_state_tolerance);
+
+  float dequantized_cell_state[batch_size * state_dimension] = {};
+  Dequantize(node_contents.GetCellStateData(), batch_size * state_dimension,
+             quantization_settings.cell_state.scale,
+             quantization_settings.cell_state.zero_point,
+             dequantized_cell_state);
+  ValidateResultGoldens(eval_check_data.expected_cell_state,
+                        dequantized_cell_state, batch_size * state_dimension,
+                        cell_state_tolerance);
+
+  float dequantized_output[batch_size * state_dimension * time_steps] = {};
+  Dequantize(node_contents.GetOutputData(),
+             batch_size * state_dimension * time_steps,
+             quantization_settings.output.scale,
+             quantization_settings.output.zero_point, dequantized_output);
+  ValidateResultGoldens(eval_check_data.expected_output, dequantized_output,
+                        batch_size * state_dimension, hidden_state_tolerance);
+}
+
+}  // namespace testing
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_EVAL_TEST_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_shared.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_shared.h
new file mode 100644
index 0000000..54020f6
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_shared.h
@@ -0,0 +1,150 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+
+// Input Tensors of size {n_batch, n_input}
+constexpr int kLstmInputTensor = 0;
+
+// Input weight tensors of size: {n_cell, n_input}
+constexpr int kLstmInputToInputWeightsTensor = 1;  // Optional
+constexpr int kLstmInputToForgetWeightsTensor = 2;
+constexpr int kLstmInputToCellWeightsTensor = 3;
+constexpr int kLstmInputToOutputWeightsTensor = 4;
+
+// Recurrent weight tensors of size {n_cell, n_output}
+constexpr int kLstmRecurrentToInputWeightsTensor = 5;  // Optional
+constexpr int kLstmRecurrentToForgetWeightsTensor = 6;
+constexpr int kLstmRecurrentToCellWeightsTensor = 7;
+constexpr int kLstmRecurrentToOutputWeightsTensor = 8;
+
+// Peephole weights tensors of size {n_cell}, representing a diagonal matrix.
+constexpr int kLstmCellToInputWeightsTensor = 9;    // Optional
+constexpr int kLstmCellToForgetWeightsTensor = 10;  // Optional
+constexpr int kLstmCellToOutputWeightsTensor = 11;  // Optional
+
+// Gates bias tensors of size {n_cell}
+constexpr int kLstmInputGateBiasTensor = 12;  // Optional
+constexpr int kLstmForgetGateBiasTensor = 13;
+constexpr int kLstmCellGateBiasTensor = 14;
+constexpr int kLstmOutputGateBiasTensor = 15;
+
+// Projection weight tensor of size {n_output, n_cell}
+constexpr int kLstmProjectionWeightsTensor = 16;  // Optional
+// Projection bias tensor of size {n_output}
+constexpr int kLstmProjectionBiasTensor = 17;  // Optional
+
+// These state tensors are defined as variable tensors, and will be modified by
+// this op.
+constexpr int kLstmOutputStateTensor = 18;
+constexpr int kLstmCellStateTensor = 19;
+
+// Layer norm coefficient tensors of size {n_cell}, representing a diagonal
+// matrix.
+constexpr int kLstmInputLayerNormCoefficientsTensor = 20;   // Optional
+constexpr int kLstmForgetLayerNormCoefficientsTensor = 21;  // Optional
+constexpr int kLstmCellLayerNormCoefficientsTensor = 22;    // Optional
+constexpr int kLstmOutputLayerNormCoefficientsTensor = 23;  // Optional
+
+// Output tensors.
+constexpr int kLstmOutputTensor = 0;
+
+// Parameters for the two fully conncted computation inside each gate
+struct GateParameters {
+  FullyConnectedParams input_fc_params;
+  FullyConnectedParams recurrent_fc_params;
+};
+
+// Paramaters for the element wise multiplications between gate outputs
+struct InterGateParameters {
+  ArithmeticParams forget_cell_mul_params;
+  ArithmeticParams input_mul_params;
+  ArithmeticParams output_mul_params;
+};
+
+// Size information about the LSTM kernel, which is deduced from tensors stored
+// in the flat buffer file.
+struct LstmSizeInfo {
+  bool time_major;
+  int batch_size;
+  int time_steps;
+  int input_dimension;
+  int state_dimension;
+};
+
+// Contains information about the cell state tensor
+struct CellStateInfo {
+  float cell_clip;
+  // clipping range for cell state only 16 bits cell is supported (could be
+  // generalized through templatation)
+  int16_t quantized_cell_clip;
+  // 2^-cell_state_scale_power = cell state scale, required by integer tanh
+  // computation
+  int32_t cell_state_scale_power;
+};
+
+// Contains required computation information for LSTM kernel evaluation.
+// Specifically, it includes shape and quantization settings for the LSTM
+// internal operations. Formatted to support operations defined in the
+// tensorflow/lite/kernels/internal/reference/integer_ops
+// Should be constructed during the preparation phase
+struct OpDataLSTM {
+  LstmSizeInfo size_info;
+  CellStateInfo cell_state_info;
+  TfLiteFusedActivation cell_gate_nonlinear_type;
+  GateParameters forget_gate_parameters;
+  GateParameters input_gate_parameters;
+  GateParameters cell_gate_parameters;
+  GateParameters output_gate_parameters;
+  InterGateParameters inter_gate_parameters;
+  int buffer_indices[4];  // TFLM only
+};
+
+// Provide an interface to access the internal tensors and buffers used for LSTM
+// invocation. Constructed during the invocation phase
+struct LSTMKernelContents {
+ public:
+  // Internal tensors, fixed (const). see lstm_shared.h for tensor names
+  const TfLiteEvalTensor* GetInternalTensor(const int tensor_index) const {
+    return internal_tensors[tensor_index];
+  }
+  // Variable tensors (will be changed, can not be const)
+  TfLiteEvalTensor* HiddenStateTensor() {
+    return internal_tensors[kLstmOutputStateTensor];
+  }
+  TfLiteEvalTensor* CellStateTensor() {
+    return internal_tensors[kLstmCellStateTensor];
+  }
+  // Node internal tensors with indexes defined at the beginning of the file
+  TfLiteEvalTensor* internal_tensors[24];
+  TfLiteEvalTensor* output_tensor;
+};
+
+template <typename CellType>
+struct LSTMBuffers {
+  // TFLM buffers requires buffer index from LstmOpData.
+  CellType* buffer0;
+  CellType* buffer1;
+  CellType* buffer2;
+  CellType* buffer3;
+};
+
+}  // namespace tflite
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_LSTM_SHARED_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/maximum_minimum.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/maximum_minimum.cc
index c253928..c003e68 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/maximum_minimum.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/maximum_minimum.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,11 +23,10 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace maximum_minimum {
+
 namespace {
 
 // This file has a reference implementation of TFMaximum/TFMinimum.
@@ -64,8 +63,6 @@ struct MinimumOp {
   }
 };
 
-}  // namespace
-
 template <typename data_type, typename op_type>
 void TFLiteOperation(TfLiteContext* context, TfLiteNode* node,
                      const OpContext& op_context) {
@@ -88,9 +85,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       case kTfLiteFloat32:
         TFLiteOperation<float, OpType>(context, node, op_context);
         break;
-      case kTfLiteUInt8:
-        TFLiteOperation<uint8_t, OpType>(context, node, op_context);
-        break;
       case kTfLiteInt8:
         TFLiteOperation<int8_t, OpType>(context, node, op_context);
         break;
@@ -101,48 +95,28 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         TFLiteOperation<int64_t, OpType>(context, node, op_context);
         break;
       default:
-        TF_LITE_KERNEL_LOG(context,
-                           "Type %s (%d) is not supported by Maximum/Minimum.",
-                           TfLiteTypeGetName(op_context.output->type),
-                           op_context.output->type);
+        MicroPrintf("Type %s (%d) is not supported by Maximum/Minimum.",
+                    TfLiteTypeGetName(op_context.output->type),
+                    op_context.output->type);
         return kTfLiteError;
     }
   } else {
-    TF_LITE_KERNEL_LOG(context,
-                       "Kernel type not supported by Maximum/Minimum.");
+    MicroPrintf("Kernel type not supported by Maximum/Minimum.");
     return kTfLiteError;
   }
   return kTfLiteOk;
 }
 
-}  // namespace maximum_minimum
+}  // namespace
 
 TfLiteRegistration Register_MAXIMUM() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/nullptr,
-          /*invoke=*/
-          maximum_minimum::Eval<maximum_minimum::kReference,
-                                maximum_minimum::MaximumOp>,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, nullptr,
+                                   Eval<kReference, MaximumOp>);
 }
 
 TfLiteRegistration Register_MINIMUM() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/nullptr,
-          /*invoke=*/
-          maximum_minimum::Eval<maximum_minimum::kReference,
-                                maximum_minimum::MinimumOp>,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, nullptr,
+                                   Eval<kReference, MinimumOp>);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_ops.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_ops.h
index baa4f17..fd28a32 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_ops.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_ops.h
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -31,26 +31,76 @@ namespace tflite {
 // (https://abseil.io/tips/130). Any new ops (or cleanup of existing ops should
 // have their Register function declarations in the tflite namespace.
 
+TfLiteRegistration Register_ADD();
 TfLiteRegistration Register_ADD_N();
+TfLiteRegistration Register_ARG_MAX();
+TfLiteRegistration Register_ARG_MIN();
+TfLiteRegistration Register_ASSIGN_VARIABLE();
+TfLiteRegistration Register_AVERAGE_POOL_2D();
 TfLiteRegistration Register_BATCH_MATMUL();
 TfLiteRegistration Register_BATCH_TO_SPACE_ND();
+TfLiteRegistration Register_BROADCAST_ARGS();
+TfLiteRegistration Register_BROADCAST_TO();
+TfLiteRegistration Register_CALL_ONCE();
 TfLiteRegistration Register_CAST();
+TfLiteRegistration Register_CEIL();
 TfLiteRegistration Register_COMPLEX_ABS();
+// TODO(b/160234179): Change custom OPs to also return by value.
+TfLiteRegistration* Register_CIRCULAR_BUFFER();
+TfLiteRegistration Register_CONCATENATION();
 TfLiteRegistration Register_CONV_2D();
+TfLiteRegistration Register_CUMSUM();
+TfLiteRegistration Register_DEPTH_TO_SPACE();
 TfLiteRegistration Register_DEPTHWISE_CONV_2D();
+TfLiteRegistration Register_DEQUANTIZE();
 TfLiteRegistration Register_DIV();
 TfLiteRegistration Register_ELU();
+TfLiteRegistration Register_EQUAL();
+TfLiteRegistration* Register_ETHOSU();
+TfLiteRegistration Register_EXP();
 TfLiteRegistration Register_EXPAND_DIMS();
 TfLiteRegistration Register_FILL();
+TfLiteRegistration Register_FLOOR();
+TfLiteRegistration Register_FLOOR_DIV();
+TfLiteRegistration Register_FLOOR_MOD();
+TfLiteRegistration Register_FULLY_CONNECTED();
 #ifndef TF_LITE_STATIC_MEMORY
 TfLiteRegistration Register_GATHER();
 #endif // TF_LITE_STATIC_MEMORY
+TfLiteRegistration Register_GATHER_ND();
+TfLiteRegistration Register_GREATER();
+TfLiteRegistration Register_GREATER_EQUAL();
+TfLiteRegistration Register_HARD_SWISH();
 TfLiteRegistration Register_IMAG();
+TfLiteRegistration Register_IF();
 TfLiteRegistration Register_L2_POOL_2D();
 TfLiteRegistration Register_LEAKY_RELU();
+TfLiteRegistration Register_LESS();
+TfLiteRegistration Register_LESS_EQUAL();
 TfLiteRegistration Register_LOG_SOFTMAX();
+TfLiteRegistration Register_LOGICAL_AND();
+TfLiteRegistration Register_LOGICAL_OR();
+TfLiteRegistration Register_LOGISTIC();
+TfLiteRegistration Register_MAX_POOL_2D();
+TfLiteRegistration Register_MAXIMUM();
+TfLiteRegistration Register_MEAN();
+TfLiteRegistration Register_MINIMUM();
+TfLiteRegistration Register_MIRROR_PAD();
+TfLiteRegistration Register_MUL();
+TfLiteRegistration Register_NEG();
+TfLiteRegistration Register_NOT_EQUAL();
+TfLiteRegistration Register_PACK();
+TfLiteRegistration Register_PAD();
+TfLiteRegistration Register_PADV2();
+TfLiteRegistration Register_PRELU();
 TfLiteRegistration Register_QUANTIZE();
+TfLiteRegistration Register_READ_VARIABLE();
 TfLiteRegistration Register_REAL();
+TfLiteRegistration Register_REDUCE_MAX();
+TfLiteRegistration Register_REDUCE_MIN();
+TfLiteRegistration Register_RELU();
+TfLiteRegistration Register_RELU6();
+TfLiteRegistration Register_RESIZE_BILINEAR();
 TfLiteRegistration Register_RFFT2D();
 #ifndef TF_LITE_STATIC_MEMORY
 TfLiteRegistration Register_SELECT();
@@ -60,67 +110,37 @@ TfLiteRegistration Register_SHAPE();
 TfLiteRegistration Register_SLICE();
 TfLiteRegistration Register_SOFTMAX();
 TfLiteRegistration Register_SPACE_TO_BATCH_ND();
+TfLiteRegistration Register_SPACE_TO_DEPTH();
+TfLiteRegistration Register_SPLIT_V();
 TfLiteRegistration Register_SQUARED_DIFFERENCE();
 TfLiteRegistration Register_SQUEEZE();
+TfLiteRegistration Register_STRIDED_SLICE();
+TfLiteRegistration Register_SUB();
+TfLiteRegistration Register_SUM();
 TfLiteRegistration Register_SVDF();
 TfLiteRegistration Register_TRANSPOSE();
 TfLiteRegistration Register_TRANSPOSE_CONV();
+// TODO(b/230666079): resolve conflict with xtensa implementation
+TfLiteRegistration Register_UNIDIRECTIONAL_SEQUENCE_LSTM();
+TfLiteRegistration Register_VAR_HANDLE();
+TfLiteRegistration Register_WHILE();
 TfLiteRegistration Register_ZEROS_LIKE();
 
 namespace ops {
 namespace micro {
 
 TfLiteRegistration Register_ABS();
-TfLiteRegistration Register_ADD();
-TfLiteRegistration Register_ARG_MAX();
-TfLiteRegistration Register_ARG_MIN();
-TfLiteRegistration Register_AVERAGE_POOL_2D();
-TfLiteRegistration Register_CEIL();
-// TODO(b/160234179): Change custom OPs to also return by value.
-TfLiteRegistration* Register_CIRCULAR_BUFFER();
-TfLiteRegistration Register_CONCATENATION();
 TfLiteRegistration Register_COS();
-TfLiteRegistration Register_DEQUANTIZE();
-TfLiteRegistration Register_EQUAL();
-TfLiteRegistration Register_EXP();
-TfLiteRegistration Register_FLOOR();
-TfLiteRegistration Register_GREATER();
-TfLiteRegistration Register_GREATER_EQUAL();
-TfLiteRegistration Register_HARD_SWISH();
-TfLiteRegistration Register_LESS();
-TfLiteRegistration Register_LESS_EQUAL();
 TfLiteRegistration Register_LOG();
-TfLiteRegistration Register_LOGICAL_AND();
 TfLiteRegistration Register_LOGICAL_NOT();
-TfLiteRegistration Register_LOGICAL_OR();
-TfLiteRegistration Register_LOGISTIC();
-TfLiteRegistration Register_MAXIMUM();
-TfLiteRegistration Register_MAX_POOL_2D();
-TfLiteRegistration Register_MEAN();
-TfLiteRegistration Register_MINIMUM();
-TfLiteRegistration Register_MUL();
-TfLiteRegistration Register_NEG();
-TfLiteRegistration Register_NOT_EQUAL();
-TfLiteRegistration Register_PACK();
-TfLiteRegistration Register_PAD();
-TfLiteRegistration Register_PADV2();
-TfLiteRegistration Register_PRELU();
-TfLiteRegistration Register_REDUCE_MAX();
-TfLiteRegistration Register_REDUCE_MIN();
-TfLiteRegistration Register_RELU();
-TfLiteRegistration Register_RELU6();
 TfLiteRegistration Register_RESHAPE();
 TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR();
 TfLiteRegistration Register_ROUND();
 TfLiteRegistration Register_RSQRT();
 TfLiteRegistration Register_SIN();
 TfLiteRegistration Register_SPLIT();
-TfLiteRegistration Register_SPLIT_V();
 TfLiteRegistration Register_SQRT();
 TfLiteRegistration Register_SQUARE();
-TfLiteRegistration Register_STRIDED_SLICE();
-TfLiteRegistration Register_SUB();
-TfLiteRegistration Register_SUM();
 TfLiteRegistration Register_UNPACK();
 TfLiteRegistration Register_L2_NORMALIZATION();
 TfLiteRegistration Register_TANH();
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_tensor_utils.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_tensor_utils.cc
new file mode 100644
index 0000000..14be12c
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_tensor_utils.cc
@@ -0,0 +1,67 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_tensor_utils.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <utility>
+
+#include "edge-impulse-sdk/third_party/gemmlowp/fixedpoint/fixedpoint.h"  // from @gemmlowp
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+
+namespace tflite {
+
+// Apply sigmoid to elements of a vector.
+void PortableApplySigmoidToVector(const float* vector, int v_size,
+                                  float* result) {
+  for (int v = 0; v < v_size; v++) {
+    result[v] = 1.0f / (1.0f + std::exp(-vector[v]));
+  }
+}
+
+void PortableApplyTanhToVector(const float* vector, int v_size, float* result) {
+  for (int v = 0; v < v_size; v++) {
+    result[v] = std::tanh(vector[v]);
+  }
+}
+
+void PortableApplyActivationToVector(const float* vector, int v_size,
+                                     TfLiteFusedActivation activation,
+                                     float* result) {
+  switch (activation) {
+    case kTfLiteActNone:
+      return;
+    case kTfLiteActRelu:
+      return tflite::tensor_utils::ApplyReluToVector(vector, v_size, result);
+    case kTfLiteActReluN1To1:
+      return tflite::tensor_utils::ApplyRelu1ToVector(vector, v_size, result);
+    case kTfLiteActRelu6:
+      return tflite::tensor_utils::ApplyRelu6ToVector(vector, v_size, result);
+    case kTfLiteActTanh:
+      return PortableApplyTanhToVector(vector, v_size, result);
+    case kTfLiteActSignBit:
+      return tflite::tensor_utils::ApplySignbitToVector(vector, v_size, result);
+    case kTfLiteActSigmoid:
+      return PortableApplySigmoidToVector(vector, v_size, result);
+  }
+}
+
+}  // namespace tflite
\ No newline at end of file
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_tensor_utils.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_tensor_utils.h
new file mode 100644
index 0000000..fb3d97f
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_tensor_utils.h
@@ -0,0 +1,56 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file and the associated .cc file is branched from
+// tensorflow/lite/kernels/internal/reference_portable_tensor_utils*
+// TFLM needs to create its own because the original files are coupled with
+// the tensor_utils module, which we cannot reuse due to its use of the
+// Eigen library.
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor_utils.h"
+
+#if defined(_MSC_VER)
+#define __restrict__ __restrict
+#endif
+
+namespace tflite {
+
+// Not all backends support CpuBackendContext usage, so forward declare to avoid
+// pulling in its implementation.
+// TODO(b/230666277): consider removing this since micro does not utilize it
+class CpuBackendContext;
+
+// Apply sigmoid to elements of a vector.
+void PortableApplySigmoidToVector(const float* vector, int v_size,
+                                  float* result);
+// Apply tanh to elements of a vector
+void PortableApplyTanhToVector(const float* vector, int v_size, float* result);
+// Apply appropriate activation function to elements of a vector.
+void PortableApplyActivationToVector(const float* vector, int v_size,
+                                     TfLiteFusedActivation activation,
+                                     float* result);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_MICRO_TENSOR_UTILS_H_
\ No newline at end of file
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/mirror_pad.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mirror_pad.cc
new file mode 100644
index 0000000..c409fcc
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mirror_pad.cc
@@ -0,0 +1,215 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+
+namespace tflite {
+namespace {
+
+struct OpDataMirrorPad {
+  int input_dims;
+  int output_size;
+  int offset;
+  int output_dims_num_elements_buffer_index;
+  int input_dims_num_elements_buffer_index;
+};
+
+// Helper method that fills the left and right pads.
+template <typename T>
+inline void GetPadding(const T* data, int offset, int64_t* left_pad,
+                       int64_t* right_pad) {
+  *left_pad = static_cast<int64_t>(*(data + offset * 2));
+  *right_pad = static_cast<int64_t>(*(data + offset * 2 + 1));
+}
+
+// Given dimension index and the left/right padding.
+// Returns the corresponding dimension in the input array.
+inline int GetInputDimension(int padded_dimension, int left_pad, int right_pad,
+                             int input_dim_size, int offset) {
+  if (padded_dimension < left_pad) {
+    const int original_ind = left_pad + offset - 1;
+    return original_ind - (std::min(padded_dimension, original_ind - offset));
+  }
+  padded_dimension -= left_pad;
+  if (padded_dimension >= input_dim_size) {
+    padded_dimension -= input_dim_size;
+    const int original_ind = input_dim_size - (1 + offset);
+    return original_ind - std::min(padded_dimension, original_ind);
+  }
+  return padded_dimension;
+}
+
+// Given and index in output array, returns the index of the value
+// in input array.
+int GetFlatIndex(int index, int num_dims,
+                 const TfLiteEvalTensor* padding_matrix,
+                 const TfLiteIntArray* input_dims,
+                 int* output_dims_num_elements, int* input_dims_num_elements,
+                 const int offset) {
+  int flat_index = 0;
+  int64_t left_pad = 0, right_pad = 0, dimension_index, index_in_input;
+
+  for (int i = 0; i < num_dims; ++i) {
+    switch (padding_matrix->type) {
+      case kTfLiteInt32:
+        GetPadding(padding_matrix->data.i32, i, &left_pad, &right_pad);
+        break;
+      case kTfLiteInt64:
+        GetPadding(padding_matrix->data.i64, i, &left_pad, &right_pad);
+        break;
+      default:
+        break;
+    }
+    dimension_index = index / output_dims_num_elements[i];
+
+    index_in_input = GetInputDimension(dimension_index, left_pad, right_pad,
+                                       input_dims->data[i], offset);
+
+    flat_index += index_in_input * (input_dims_num_elements)[i];
+    index %= output_dims_num_elements[i];
+  }
+
+  return flat_index;
+}
+
+template <typename T>
+void MirrorPad(const TfLiteEvalTensor* padding_matrix,
+               const TfLiteIntArray* input_dims, int* output_dims_num_elements,
+               int* input_dims_num_elements, const T* input_data,
+               T* output_data, const int offset, const int num_dims,
+               const int output_size) {
+  for (int i = 0; i < output_size; ++i) {
+    output_data[i] = input_data[GetFlatIndex(
+        i, num_dims, padding_matrix, input_dims, output_dims_num_elements,
+        input_dims_num_elements, offset)];
+  }
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TfLiteStatus status = kTfLiteOk;
+  const OpDataMirrorPad* data =
+      static_cast<const OpDataMirrorPad*>(node->user_data);
+
+  const TfLiteEvalTensor* input_tensor =
+      tflite::micro::GetEvalInput(context, node, 0);
+  const TfLiteEvalTensor* padding_matrix =
+      tflite::micro::GetEvalInput(context, node, 1);
+
+  TfLiteEvalTensor* output_tensor =
+      tflite::micro::GetEvalOutput(context, node, 0);
+  const int input_dims = data->input_dims;
+  const int output_size = data->output_size;
+
+  int* input_dims_num_elements = (int*)context->GetScratchBuffer(
+      context, data->input_dims_num_elements_buffer_index);
+  int* output_dims_num_elements = (int*)context->GetScratchBuffer(
+      context, data->output_dims_num_elements_buffer_index);
+
+  for (int i = 0; i < input_dims; i++) {
+    output_dims_num_elements[i] = 1;
+    input_dims_num_elements[i] = 1;
+  }
+
+  for (int i = input_dims - 2; i >= 0; i--) {
+    output_dims_num_elements[i] =
+        output_dims_num_elements[i + 1] * output_tensor->dims->data[i + 1];
+
+    input_dims_num_elements[i] =
+        input_dims_num_elements[i + 1] * input_tensor->dims->data[i + 1];
+  }
+
+  switch (output_tensor->type) {
+    case kTfLiteFloat32: {
+      MirrorPad(padding_matrix, input_tensor->dims, output_dims_num_elements,
+                input_dims_num_elements,
+                tflite::micro::GetTensorData<float>(input_tensor),
+                tflite::micro::GetTensorData<float>(output_tensor),
+                data->offset, input_dims, output_size);
+      break;
+    }
+    case kTfLiteInt8: {
+      MirrorPad(padding_matrix, input_tensor->dims, output_dims_num_elements,
+                input_dims_num_elements,
+                tflite::micro::GetTensorData<int8_t>(input_tensor),
+                tflite::micro::GetTensorData<int8_t>(output_tensor),
+                data->offset, input_dims, output_size);
+      break;
+    }
+    default:
+      status = kTfLiteError;
+      break;
+  }
+
+#undef TF_LITE_MIRROR_PAD
+
+  return status;
+}
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataMirrorPad));
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpDataMirrorPad* data = static_cast<OpDataMirrorPad*>(node->user_data);
+
+  TfLiteTensor* input_tensor = micro_context->AllocateTempInputTensor(node, 0);
+  TfLiteTensor* padding_matrix =
+      micro_context->AllocateTempInputTensor(node, 1);
+  TfLiteTensor* output_tensor =
+      micro_context->AllocateTempOutputTensor(node, 0);
+
+  TF_LITE_ENSURE_EQ(context, NumDimensions(padding_matrix), 2);
+  TF_LITE_ENSURE_EQ(context, SizeOfDimension(padding_matrix, 0),
+                    NumDimensions(input_tensor));
+  auto* params =
+      reinterpret_cast<TfLiteMirrorPaddingParams*>(node->builtin_data);
+  if (params == nullptr) {
+    return kTfLiteError;
+  }
+
+  data->offset =
+      params->mode != TfLiteMirrorPaddingMode::kTfLiteMirrorPaddingReflect ? 0
+                                                                           : 1;
+  data->input_dims = NumDimensions(input_tensor);
+  data->output_size = NumElements(output_tensor);
+
+  TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
+      context, data->input_dims * sizeof(int),
+      &data->output_dims_num_elements_buffer_index));
+  TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
+      context, data->input_dims * sizeof(int),
+      &data->input_dims_num_elements_buffer_index));
+
+  micro_context->DeallocateTempTfLiteTensor(input_tensor);
+  micro_context->DeallocateTempTfLiteTensor(padding_matrix);
+  micro_context->DeallocateTempTfLiteTensor(output_tensor);
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteRegistration Register_MIRROR_PAD() {
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_function_specializations.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_function_specializations.h
new file mode 100644
index 0000000..1b7b038
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_function_specializations.h
@@ -0,0 +1,145 @@
+// Patched by Edge Impulse to include reference and hardware-accelerated kernels
+#include "../../../../classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "mli_api.h"  // NOLINT
+
+namespace tflite {
+
+// Convolution specialized function.
+typedef mli_status (*conv_func_ptr)(const mli_tensor* /*in*/,
+                                    const mli_tensor* /*weights*/,
+                                    const mli_tensor* /*bias*/,
+                                    const mli_conv2d_cfg* /*cfg*/,
+                                    mli_tensor* /*out*/);
+
+#ifdef MLI_2_0
+conv_func_ptr __attribute__((weak))
+mli_krn_conv2d_hwcn(const mli_tensor* weights) {
+  int filter_w = weights->shape[KRNL_W_DIM_HWCN];
+  int filter_h = weights->shape[KRNL_H_DIM_HWCN];
+
+  if (filter_w == 1 && filter_h == 1) {
+    return mli_krn_conv2d_hwcn_sa8_sa8_sa32_k1x1;
+  } else if (filter_w == 3 && filter_h == 3) {
+    return mli_krn_conv2d_hwcn_sa8_sa8_sa32_k3x3;
+  } else if (filter_w == 5 && filter_h == 5) {
+    return mli_krn_conv2d_hwcn_sa8_sa8_sa32_k5x5;
+  } else {
+    return mli_krn_conv2d_hwcn_sa8_sa8_sa32;
+  }
+}
+#else
+conv_func_ptr __attribute__((weak))
+mli_krn_conv2d_hwcn(const mli_tensor* weights, const mli_conv2d_cfg* cfg) {
+  return mli_krn_conv2d_nhwc_sa8_sa8_sa32;
+}
+#endif
+
+// Depthwise convolution specialized function.
+typedef mli_status (*depthwise_func_ptr)(const mli_tensor* /*in*/,
+                                         const mli_tensor* /*weights*/,
+                                         const mli_tensor* /*bias*/,
+                                         const mli_conv2d_cfg* /*cfg*/,
+                                         mli_tensor* /*out*/);
+
+#ifdef MLI_2_0
+depthwise_func_ptr __attribute__((weak))
+mli_krn_depthwise_conv2d(const mli_tensor* weights) {
+  int filter_w = weights->shape[KRNL_DW_W_DIM_HW1N];
+  int filter_h = weights->shape[KRNL_DW_H_DIM_HW1N];
+
+  if (filter_w == 3 && filter_h == 3) {
+    return mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32_k3x3;
+  } else if (filter_w == 5 && filter_h == 5) {
+    return mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32_k5x5;
+  } else {
+    return mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32;
+  }
+}
+#else
+depthwise_func_ptr __attribute__((weak))
+mli_krn_depthwise_conv2d(const mli_tensor* weights, const mli_conv2d_cfg* cfg) {
+  return mli_krn_depthwise_conv2d_hwcn_sa8_sa8_sa32;
+}
+#endif
+
+#ifdef MLI_2_0
+depthwise_func_ptr __attribute__((weak))
+mli_krn_group_conv2d(const mli_tensor* weights) {
+  int filter_w = weights->shape[KRNL_DW_W_DIM_HW1N];
+  int filter_h = weights->shape[KRNL_DW_H_DIM_HW1N];
+
+  if (filter_w == 3 && filter_h == 3) {
+    return mli_krn_group_conv2d_hwcn_sa8_sa8_sa32_k3x3;
+  } else if (filter_w == 5 && filter_h == 5) {
+    return mli_krn_group_conv2d_hwcn_sa8_sa8_sa32_k5x5;
+  } else {
+    return mli_krn_group_conv2d_hwcn_sa8_sa8_sa32;
+  }
+}
+#endif
+
+// Pooling specialized functions.
+typedef mli_status (*pooling_func_ptr)(const mli_tensor* /*in*/,
+                                       const mli_pool_cfg* /*cfg*/,
+                                       mli_tensor* /*out*/);
+
+#ifdef MLI_2_0
+pooling_func_ptr __attribute__((weak))
+mli_krn_avepool(const mli_pool_cfg* cfg) {
+  int filter_w = cfg->kernel_width;
+  int filter_h = cfg->kernel_height;
+
+  if (filter_w == 2 && filter_h == 2) {
+    return mli_krn_avepool_hwc_sa8_k2x2;
+  } else if (filter_w == 3 && filter_h == 3) {
+    return mli_krn_avepool_hwc_sa8_k3x3;
+  } else {
+    return mli_krn_avepool_hwc_sa8;
+  }
+}
+#else
+pooling_func_ptr __attribute__((weak))
+mli_krn_avepool(const mli_pool_cfg* cfg) {
+  return mli_krn_avepool_hwc_sa8;
+}
+#endif
+
+#ifdef MLI_2_0
+pooling_func_ptr __attribute__((weak))
+mli_krn_maxpool(const mli_pool_cfg* cfg) {
+  int filter_w = cfg->kernel_width;
+  int filter_h = cfg->kernel_height;
+
+  if (filter_w == 2 && filter_h == 2) {
+    return mli_krn_maxpool_hwc_sa8_k2x2;
+  } else if (filter_w == 3 && filter_h == 3) {
+    return mli_krn_maxpool_hwc_sa8_k3x3;
+  } else {
+    return mli_krn_maxpool_hwc_sa8;
+  }
+}
+#else
+pooling_func_ptr __attribute__((weak))
+mli_krn_maxpool(const mli_pool_cfg* cfg) {
+  return mli_krn_maxpool_hwc_sa8;
+}
+#endif
+
+}  // namespace tflite
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_interface.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_interface.cc
new file mode 100644
index 0000000..bbd5e3a
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_interface.cc
@@ -0,0 +1,160 @@
+// Patched by Edge Impulse to include reference and hardware-accelerated kernels
+#include "../../../../classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "mli_interface.h"  // NOLINT
+
+#include <math.h>
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+
+#ifndef MLI_2_0
+template <>
+int8_t* MliTensorInterface::Data<int8_t>(void) {
+  TFLITE_DCHECK(tensor_->el_type == MLI_EL_ASYM_I8);
+  return static_cast<int8_t*>(tensor_->data);
+}
+
+template <>
+int32_t* MliTensorInterface::Data<int32_t>(void) {
+  TFLITE_DCHECK(tensor_->el_type == MLI_EL_ASYM_I32);
+  return static_cast<int32_t*>(tensor_->data);
+}
+
+template <>
+int32_t* MliTensorInterface::Scale(void) {
+  return &tensor_->el_params.asym.scale.i32;
+}
+
+template <>
+int32_t** MliTensorInterface::Scale(void) {
+  return &tensor_->el_params.asym.scale.pi32;
+}
+
+template <>
+void MliTensorInterface::SetData(int8_t* data, uint32_t capacity) const {
+  TFLITE_DCHECK(tensor_->el_type == MLI_EL_ASYM_I8);
+  tensor_->data = data;
+  tensor_->capacity = capacity;
+}
+
+template <>
+void MliTensorInterface::SetData(int32_t* data, uint32_t capacity) const {
+  TFLITE_DCHECK(tensor_->el_type == MLI_EL_ASYM_I32);
+  tensor_->data = data;
+  tensor_->capacity = capacity;
+}
+
+mli_tensor* MliTensorInterface::MliTensor(void) { return tensor_; }
+
+const mli_tensor* MliTensorInterface::MliTensor(void) const {
+  return static_cast<const mli_tensor*>(
+      const_cast<MliTensorInterface*>(this)->MliTensor());
+}
+
+uint32_t* MliTensorInterface::Rank(void) { return &tensor_->rank; }
+
+const uint32_t* MliTensorInterface::DataCapacity(void) const {
+  return &tensor_->capacity;
+}
+
+mli_element_type* MliTensorInterface::ElType(void) { return &tensor_->el_type; }
+
+template <>
+int16_t* MliTensorInterface::ZeroPoint(void) {
+  return &tensor_->el_params.asym.zero_point.i16;
+}
+
+template <>
+int16_t** MliTensorInterface::ZeroPoint(void) {
+  return &tensor_->el_params.asym.zero_point.pi16;
+}
+
+uint32_t* MliTensorInterface::ZeroPointCapacity(void) { return nullptr; }
+
+int32_t* MliTensorInterface::Dim(void) { return &tensor_->el_params.asym.dim; }
+
+uint32_t* MliTensorInterface::ScaleCapacity(void) { return nullptr; }
+
+template <>
+int8_t* MliTensorInterface::ScaleFracBits(void) {
+  return &tensor_->el_params.asym.scale_frac_bits;
+}
+
+uint32_t* MliTensorInterface::ScaleFracBitsCapacity(void) { return nullptr; }
+
+int32_t* MliTensorInterface::MemStride(void) { return tensor_->mem_stride; }
+
+uint32_t* MliTensorInterface::Shape(void) { return tensor_->shape; }
+
+const uint32_t* MliTensorInterface::Shape(void) const {
+  return static_cast<const uint32_t*>(
+      const_cast<MliTensorInterface*>(this)->Shape());
+}
+
+void MliTensorInterface::SetScale(float fscale) {
+  int exp;
+  frexpf(fscale, &exp);
+  int frac_bits = 31 - exp;
+  int32_t iscale = (int32_t)((1ll << frac_bits) * fscale + 0.5f);
+  *(this->ScaleFracBits<int8_t*>()) = frac_bits;
+  *(this->Scale<int32_t*>()) = (int32_t)iscale;
+}
+
+void MliTensorInterface::SetScalePerChannel(float* fscale,
+                                            const int num_channels) {
+  int min_frac_bits;
+  for (int i = 0; i < num_channels; i++) {
+    int exp;
+    frexpf(fscale[i], &exp);
+    int cur_frac_bits = 31 - exp;
+    if (i == 0) {
+      min_frac_bits = cur_frac_bits;
+    } else {
+      min_frac_bits =
+          min_frac_bits < cur_frac_bits ? min_frac_bits : cur_frac_bits;
+    }
+  }
+  *this->ScaleFracBits<int8_t*>() = min_frac_bits;
+
+  for (int i = 0; i < num_channels; i++) {
+    int32_t iscale = (int32_t)((1ll << min_frac_bits) * fscale[i] + 0.5f);
+    (*this->Scale<int32_t**>())[i] = iscale;
+  }
+}
+
+void MliTensorInterface::SetElType(TfLiteType type) {
+  if (type == kTfLiteInt8) {
+    *this->ElType() = MLI_EL_ASYM_I8;
+  } else if (type == kTfLiteInt32) {
+    *this->ElType() = MLI_EL_ASYM_I32;
+  } else {
+    MicroPrintf("Wrong data type. Expected int8_t or int32_t.");
+    TFLITE_ABORT;
+  }
+}
+#endif
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
+
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_interface.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_interface.h
new file mode 100644
index 0000000..e08f84a
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_interface.h
@@ -0,0 +1,80 @@
+// Patched by Edge Impulse to include reference and hardware-accelerated kernels
+#include "../../../../classifier/ei_classifier_config.h"
+#if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_INTERFACE_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_INTERFACE_H_
+
+#include "mli_api.h"  // NOLINT
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+namespace tflite {
+namespace ops {
+namespace micro {
+
+// Abstracts access to mli_tensor fields to use different versions of MLI
+// Library (1.x and 2.x)
+// Example:
+//    ops::micro::MliTensorInterface mli_in =
+//    ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+//        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+
+class MliTensorInterface {
+ public:
+  // Make sure that lifetime of MliTensorInterface instance isn't bigger than
+  // related mli_tensor.
+  MliTensorInterface(mli_tensor* tensor) : tensor_(tensor){};
+  MliTensorInterface() = default;
+  ~MliTensorInterface() = default;
+
+  template <typename T>
+  T* Data();
+  template <typename T>
+  T Scale();
+  template <typename T>
+  T ZeroPoint();
+  template <typename T>
+  T ScaleFracBits();
+  mli_tensor* MliTensor();
+  const mli_tensor* MliTensor() const;
+  int32_t* Dim();
+  uint32_t* Rank();
+  uint32_t* Shape();
+  const uint32_t* Shape() const;
+  const uint32_t* DataCapacity() const;
+  uint32_t* ScaleCapacity();
+  mli_element_type* ElType();
+  uint32_t* ScaleFracBitsCapacity();
+  int32_t* MemStride();
+  uint32_t* ZeroPointCapacity();
+
+  template <typename T>
+  void SetData(T* data, uint32_t capacity) const;
+  void SetScale(float fscale);
+  void SetScalePerChannel(float* fscale, const int num_channels);
+  void SetElType(TfLiteType type);
+
+ private:
+  mli_tensor* tensor_;
+};
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_SLICERS_H_
+
+#endif // EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h
index a227340..4179c74 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h
@@ -1,7 +1,7 @@
 // Patched by Edge Impulse to include reference and hardware-accelerated kernels
 #include "../../../../classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -20,102 +20,292 @@ limitations under the License.
 #define TENSORFLOW_LITE_MICRO_KERNELS_ARC_MLI_TF_UTILS_H_
 
 #include "mli_api.h"  // NOLINT
+#include "mli_interface.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
-constexpr int kFracBitsQ15 = 15;
-constexpr int kFracBitsQ31 = 31;
+#define KRNL_C_DIM_NHWC 0  // output channels
 
 namespace tflite {
 namespace ops {
 namespace micro {
 
-inline void ConvertToMliTensorData(const TfLiteTensor* tfT, mli_tensor* mliT) {
+inline void ConvertToMliTensorData(const TfLiteTensor* tfT,
+                                   MliTensorInterface* mliT,
+                                   bool is_bias_tensor) {
   // Data is NULL until MliTensorAttachBuffer is called.
-  mliT->data = NULL;
+  mliT->SetElType(tfT->type);
   if (tfT->type == kTfLiteInt8) {
-    mliT->el_type = MLI_EL_ASYM_I8;
+    mliT->SetData<int8_t>(nullptr, tfT->bytes);
   } else if (tfT->type == kTfLiteInt32) {
-    mliT->el_type = MLI_EL_ASYM_I32;
+    mliT->SetData<int32_t>(nullptr, tfT->bytes);
   } else {
-    TF_LITE_FATAL("Wrong data type. Expected int8_t or int32_t.");
+    MicroPrintf("Wrong data type. Expected int8_t or int32_t.");
+    TFLITE_ABORT;
   }
+  const int32_t dims_count = GetTensorShape(tfT).DimensionsCount();
+  *mliT->Rank() = is_bias_tensor ? 1 : dims_count;
 
-  mliT->capacity = tfT->bytes;
-  mliT->rank = GetTensorShape(tfT).DimensionsCount();
-  for (int i = 0; i < GetTensorShape(tfT).DimensionsCount(); i++) {
-    mliT->shape[i] = GetTensorShape(tfT).Dims(i);
+  int mli_tensor_memstride = 1;
+  if (is_bias_tensor) {
+    mliT->Shape()[0] = GetTensorShape(tfT).Dims(dims_count - 1);
+    mliT->MemStride()[0] = mli_tensor_memstride;
+  } else {
+    for (int i = dims_count - 1; i >= 0; --i) {
+      mliT->Shape()[i] = GetTensorShape(tfT).Dims(i);
+      mliT->MemStride()[i] = mli_tensor_memstride;
+      mli_tensor_memstride *= GetTensorShape(tfT).Dims(i);
+    }
   }
 }
 
-inline void ConvertToMliQuantParams(const TfLiteTensor* tfT, mli_tensor* mliT) {
-  mliT->el_params.asym.dim = -1;
-  mliT->el_params.asym.zero_point.i16 = tfT->params.zero_point;
+inline void ConvertToMliQuantParams(const TfLiteTensor* tfT,
+                                    MliTensorInterface* mliT) {
+  *mliT->Dim() = -1;
+#ifdef MLI_2_0
+  *mliT->ZeroPointCapacity() = 0;
+#endif
+  *mliT->ZeroPoint<int16_t*>() = tfT->params.zero_point;
   float fscale = tfT->params.scale;
-  int exp;
-  frexpf(fscale, &exp);
-  int frac_bits = kFracBitsQ31 - exp;
-  int32_t iscale = (int32_t)((1ll << frac_bits) * fscale + 0.5f);
-  mliT->el_params.asym.scale_frac_bits = frac_bits;
-  mliT->el_params.asym.scale.i32 = (int32_t)iscale;
+  mliT->SetScale(fscale);
 }
 
 inline void ConvertToMliQuantParamsPerChannel(const TfLiteTensor* tfT,
-                                              mli_tensor* mliT) {
+                                              MliTensorInterface* mliT,
+                                              bool is_bias_tensor) {
   // mli tensor scale and zero_point arrays should be allocated at this point
-  TFLITE_DCHECK_NE(mliT->el_params.asym.scale.pi16, 0);
-  TFLITE_DCHECK_NE(mliT->el_params.asym.zero_point.pi16, 0);
+#ifdef MLI_2_0
+  TFLITE_DCHECK_NE(*mliT->Scale<int16_t**>(), 0);
+  TFLITE_DCHECK_NE(*mliT->ZeroPoint<int16_t**>(), 0);
+#else
+  TFLITE_DCHECK_NE(*mliT->Scale<int32_t**>(), 0);
+  TFLITE_DCHECK_NE(*mliT->ZeroPoint<int16_t**>(), 0);
+#endif
 
   // get per channel quantization parameters
   const auto* affine_quantization =
       reinterpret_cast<TfLiteAffineQuantization*>(tfT->quantization.params);
-  mliT->el_params.asym.dim = affine_quantization->quantized_dimension;
+  int32_t quantized_dimension =
+      is_bias_tensor ? 0 : affine_quantization->quantized_dimension;
+  const int num_channels = mliT->Shape()[quantized_dimension];
 
-  // find frac_bits
-  const int num_channels =
-      mliT->shape[affine_quantization->quantized_dimension];
-  int min_frac_bits;
+  *mliT->Dim() = quantized_dimension;
+
+  // set capacities
+#ifdef MLI_2_0
+  *mliT->ScaleFracBitsCapacity() = num_channels * sizeof(int8_t);
+  *mliT->ScaleCapacity() = num_channels * sizeof(int16_t);
+  *mliT->ZeroPointCapacity() = num_channels * sizeof(int16_t);
+#endif
   float* fscale = affine_quantization->scale->data;
-  for (int i = 0; i < num_channels; i++) {
-    int exp;
-    frexpf(fscale[i], &exp);
-    int cur_frac_bits = kFracBitsQ31 - exp;
-    if (i == 0) {
-      min_frac_bits = cur_frac_bits;
-    } else {
-      min_frac_bits =
-          min_frac_bits < cur_frac_bits ? min_frac_bits : cur_frac_bits;
-    }
-  }
-  mliT->el_params.asym.scale_frac_bits = min_frac_bits;
+  mliT->SetScalePerChannel(fscale, num_channels);
 
+#ifdef MLI_2_0
+  int16_t* zero_point = *mliT->ZeroPoint<int16_t**>();
   for (int i = 0; i < num_channels; i++) {
-    int32_t iscale = (int32_t)((1ll << min_frac_bits) * fscale[i] + 0.5f);
-    mliT->el_params.asym.scale.pi32[i] = iscale;
+    zero_point[i] = tfT->params.zero_point;
   }
+#endif
 }
 
 template <typename datatype>
-inline void MliTensorAttachBuffer(const TfLiteEvalTensor* tfT,
-                                  mli_tensor* mliT) {
+inline void MliTensorAttachBuffer(const TfLiteEvalTensor*,
+                                  const MliTensorInterface*);
+
+template <>
+inline void MliTensorAttachBuffer<int8_t>(const TfLiteEvalTensor* tfT,
+                                          const MliTensorInterface* mliT) {
   // "const_cast" here used to attach const data buffer to the initially
   // non-const mli_tensor. This is required by current implementation of MLI
   // backend and planned for redesign due to this and some other aspects.
-  mliT->data = const_cast<void*>(
-      static_cast<const void*>(tflite::micro::GetTensorData<datatype>(tfT)));
+  mliT->SetData<int8_t>(
+      const_cast<int8_t*>(tflite::micro::GetTensorData<int8_t>(tfT)),
+      *mliT->DataCapacity());
 }
 
-inline void ConvertToMliTensor(const TfLiteTensor* tfT, mli_tensor* mliT) {
-  ConvertToMliTensorData(tfT, mliT);
+template <>
+inline void MliTensorAttachBuffer<int32_t>(const TfLiteEvalTensor* tfT,
+                                           const MliTensorInterface* mliT) {
+  // "const_cast" here used to attach const data buffer to the initially
+  // non-const mli_tensor. This is required by current implementation of MLI
+  // backend and planned for redesign due to this and some other aspects.
+  mliT->SetData<int32_t>(
+      const_cast<int32_t*>(tflite::micro::GetTensorData<int32_t>(tfT)),
+      *mliT->DataCapacity());
+}
+
+inline void ConvertToMliTensor(const TfLiteTensor* tfT,
+                               MliTensorInterface* mliT) {
+  ConvertToMliTensorData(tfT, mliT, false);
   ConvertToMliQuantParams(tfT, mliT);
 }
 
 inline void ConvertToMliTensorPerChannel(const TfLiteTensor* tfT,
-                                         mli_tensor* mliT) {
-  ConvertToMliTensorData(tfT, mliT);
-  ConvertToMliQuantParamsPerChannel(tfT, mliT);
+                                         MliTensorInterface* mliT,
+                                         bool is_bias_tensor) {
+  ConvertToMliTensorData(tfT, mliT, is_bias_tensor);
+  ConvertToMliQuantParamsPerChannel(tfT, mliT, is_bias_tensor);
+}
+
+inline void PrepareLocalTensor(mli_tensor* tensor, mli_tensor* tensor_local) {
+#ifdef MLI_2_0
+  int8_t* local_data = tensor_local->data.mem.pi8;
+  *tensor_local = *tensor;
+  tensor_local->data.mem.pi8 = local_data;
+#else
+  int8_t* local_data = static_cast<int8_t*>(tensor_local->data);
+  *tensor_local = *tensor;
+  tensor_local->data = local_data;
+#endif
 }
+
+inline void AdjustBiasTensor(MliTensorInterface* bias, MliTensorInterface* in,
+                             MliTensorInterface* weights) {
+  int32_t quantized_dimension = *bias->Dim();
+  const int num_channels =
+      quantized_dimension < 0 ? 1 : bias->Shape()[quantized_dimension];
+  for (int i = 0; i < num_channels; i++) {
+    int32_t adjusted_bias_scale =
+        (*in->Scale<int16_t*>()) * (*weights->Scale<int16_t**>())[i];
+    int in_shift = *in->ScaleFracBits<int8_t*>();
+    int w_shift = (*weights->ScaleFracBits<int8_t**>())[i];
+    int b_shift = (*bias->ScaleFracBits<int8_t**>())[i];
+    int bias_shift = in_shift + w_shift - b_shift;
+    (*bias->Scale<int16_t**>())[i] =
+        (int16_t)(adjusted_bias_scale >> bias_shift);
+  }
+}
+
+#ifdef MLI_2_0_KRNL_TEST
+// Reorder an array according to given indexes. If backward is true, order of
+// index array must be reversed.
+inline static void reorder(uint32_t* arr, const uint8_t index[],
+                           bool backward) {
+  uint32_t temp[MLI_MAX_RANK];
+  for (int8_t i = 0; i < MLI_MAX_RANK; i++) {
+    if (backward)
+      temp[index[i]] = arr[i];
+    else
+      temp[i] = arr[index[i]];
+  }
+  for (int8_t i = 0; i < MLI_MAX_RANK; i++) {
+    arr[i] = temp[i];
+  }
+}
+
+// Change shape of mli tensor and recalculate mem strides.
+inline void change_shape(mli_tensor* mliT, const uint8_t dim_order[]) {
+  reorder(mliT->shape, dim_order, false);
+
+  // Calculate strides for new layout
+  int mli_tensor_memstride = 1;
+  for (int shape_idx = mliT->rank - 1; shape_idx >= 0; --shape_idx) {
+    mliT->mem_stride[shape_idx] = mli_tensor_memstride;
+    mli_tensor_memstride *= mliT->shape[shape_idx];
+  }
+}
+
+inline void permute_weights(const mli_tensor* weights_src,
+                            const mli_permute_cfg* permute_cfg,
+                            mli_tensor* weights_dst,
+                            mli_data_container* buffer_data) {
+  mli_tensor buffer = {};
+  buffer.el_params = weights_dst->el_params;
+  buffer.data = *buffer_data;
+  // Compare weights tensor size and avaliable buffer capacity.
+  int buffer_size = buffer_data->capacity;
+  int weights_size = mli_hlp_count_elem_num(weights_src, 0) *
+                     mli_hlp_tensor_element_size(weights_src);
+
+  // Need to change shape of distanation weights buffer according to permute
+  // dimensions order to calculate slice sizes
+  change_shape(weights_dst, permute_cfg->perm_dim);
+
+  if (buffer_size >= weights_size) {
+    mli_mov_cfg_t copy_config;
+    mli_mov_cfg_for_copy(&copy_config);
+    mli_mov_tensor_sync(weights_src, &copy_config, &buffer);
+    mli_krn_permute_sa8(&buffer, permute_cfg, weights_dst);
+  } else {
+    // Weights shape is NHWC and output (buffer) shape is HWC where N_w = C_o.
+    // Buffer size (H_o * W_o) must be more or equal then the weights size (H_w
+    // * W_w * C_w). So, this is the reason, why buffer size (output tensor) is
+    // divided by channel shape.
+    uint32_t slice_size = buffer_size / weights_src->shape[KRNL_C_DIM_NHWC];
+
+    mli_mov_cfg_t copy_config = {};
+    uint32_t src_offsets[] = {0, 0, 0, 0};
+    uint32_t src_sizes[] = {0, 0, 0, 0};
+    int dst_mem_stride[] = {0, 0, 0, 0};
+
+    mli_tensor weights_dst_sub_tensor;
+    mli_sub_tensor_cfg sub_tensor_cfg = {};
+    sub_tensor_cfg.sub_tensor_rank = weights_src->rank;
+
+    // Calculate dimensions for slice accroding to buffer capacity.
+    // Now, after calling change_shape() function, dst weights buffer has the
+    // MLI layout (HWCN). This means, the innermost dimension (N) of dst weights
+    // tensor is equal to the innermost dimension of output tensor (N).
+    sub_tensor_cfg.size[weights_dst->rank - 1] =
+        src_sizes[weights_dst->rank - 1] = weights_src->shape[KRNL_C_DIM_NHWC];
+    // Now need to calculate other shapes for weights slice. Total slice size is
+    // H*W*C*N, so to calculate sizes for each axis, avaliable slice size is
+    // divided by shape for each axis.
+    uint32_t slice_size_left = slice_size;
+    for (uint32_t i = 0; i < weights_dst->rank - 1; i++) {
+      sub_tensor_cfg.size[i] = src_sizes[i] =
+          slice_size_left / weights_dst->shape[i] > 0 ? weights_dst->shape[i]
+                                                      : slice_size_left;
+      slice_size_left /= weights_dst->shape[i];
+      slice_size_left = slice_size_left > 0 ? slice_size_left : 1;
+    }
+    // Need to reorder src tensor sizes because it is still in TFLM format
+    // (NHWC) and src_sizes array calculated as (HWCN).
+    reorder(src_sizes, permute_cfg->perm_dim, true);
+
+    sub_tensor_cfg.offset[KRNL_C_DIM_HWCN] = src_offsets[KRNL_H_DIM_HWCN] = 0;
+    sub_tensor_cfg.offset[KRNL_H_DIM_HWCN] = src_offsets[KRNL_W_DIM_HWCN] = 0;
+    sub_tensor_cfg.offset[KRNL_W_DIM_HWCN] = src_offsets[KRNL_D_DIM_HWCN] = 0;
+    sub_tensor_cfg.offset[KRNL_D_DIM_HWCN] = src_offsets[KRNL_C_DIM_HWCN] = 0;
+    do {
+      do {
+        do {
+          do {
+            mli_mov_cfg_for_slice(&copy_config, (int*)src_offsets,
+                                  (int*)src_sizes, dst_mem_stride);
+            mli_mov_tensor_sync(weights_src, &copy_config, &buffer);
+
+            mli_hlp_create_subtensor(weights_dst, &sub_tensor_cfg,
+                                     &weights_dst_sub_tensor);
+            mli_krn_permute_sa8(&buffer, permute_cfg, &weights_dst_sub_tensor);
+
+            // For each axis, it is necessary to recalculate the offsets and
+            // slice sizes.
+            sub_tensor_cfg.offset[2] = src_offsets[3] += src_sizes[3];
+            src_sizes[3] =
+                std::min(src_sizes[3], weights_src->shape[3] - src_offsets[3]);
+          } while (src_offsets[3] < weights_src->shape[3]);
+
+          sub_tensor_cfg.offset[1] = src_offsets[2] += src_sizes[2];
+          src_sizes[2] =
+              std::min(src_sizes[2], weights_src->shape[2] - src_offsets[2]);
+        } while (src_offsets[2] < weights_src->shape[2]);
+
+        sub_tensor_cfg.offset[0] = src_offsets[1] += src_sizes[1];
+        src_sizes[1] =
+            std::min(src_sizes[1], weights_src->shape[1] - src_offsets[1]);
+      } while (src_offsets[1] < weights_src->shape[1]);
+
+      sub_tensor_cfg.offset[3] = src_offsets[0] += src_sizes[0];
+      src_sizes[0] =
+          std::min(src_sizes[0], weights_src->shape[0] - src_offsets[0]);
+    } while (src_offsets[0] < weights_src->shape[0]);
+  }
+}
+#endif
+
 }  // namespace micro
 }  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.cc
index 16863d6..9f00d2e 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.cc
@@ -3,7 +3,7 @@
 #if 0 == 1
 /* noop */
 #elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -27,113 +27,52 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace mul {
-
-constexpr int kInput1Tensor = 0;
-constexpr int kInput2Tensor = 1;
-constexpr int kOutputTensor = 0;
-
-struct OpData {
-  int32_t output_activation_min;
-  int32_t output_activation_max;
-
-  int32_t output_multiplier;
-  int output_shift;
-
-  // Cached tensor zero point values for quantized operations.
-  int32_t input1_zero_point;
-  int32_t input2_zero_point;
-  int32_t output_zero_point;
-
-  float output_activation_min_f32;
-  float output_activation_max_f32;
-};
-
-TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
-                             TfLiteMulParams* params, OpData* data) {
-  const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
-  TF_LITE_ENSURE(context, input1 != nullptr);
-  const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
-  TF_LITE_ENSURE(context, input2 != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
-  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-
-  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
-
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
-    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
-        context, params->activation, output, &data->output_activation_min,
-        &data->output_activation_max));
-
-    double real_multiplier = static_cast<double>(input1->params.scale) *
-                             static_cast<double>(input2->params.scale) /
-                             static_cast<double>(output->params.scale);
-    QuantizeMultiplier(real_multiplier, &data->output_multiplier,
-                       &data->output_shift);
-
-    data->input1_zero_point = input1->params.zero_point;
-    data->input2_zero_point = input2->params.zero_point;
-    data->output_zero_point = output->params.zero_point;
-  } else {
-    CalculateActivationRange(params->activation,
-                             &data->output_activation_min_f32,
-                             &data->output_activation_max_f32);
-  }
-
-  return kTfLiteOk;
-}
-
-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
-  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->builtin_data != nullptr);
-  auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
-
-  TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
-
-  return CalculateOpData(context, node, params, data);
-}
+namespace {
 
-void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData& data,
-                   const TfLiteEvalTensor* input1,
+void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
+                   const OpDataMul* data, const TfLiteEvalTensor* input1,
                    const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
-  tflite::ArithmeticParams op_params;
-  op_params.quantized_activation_min = data.output_activation_min;
-  op_params.quantized_activation_max = data.output_activation_max;
-  op_params.input1_offset = -data.input1_zero_point;
-  op_params.input2_offset = -data.input2_zero_point;
-  op_params.output_offset = data.output_zero_point;
-  op_params.output_multiplier = data.output_multiplier;
-  op_params.output_shift = data.output_shift;
+  tflite::ArithmeticParams op_params = {};
+
+  op_params.quantized_activation_min = data->output_activation_min;
+  op_params.quantized_activation_max = data->output_activation_max;
+  op_params.float_activation_max = data->output_activation_max_f32;
+  op_params.input1_offset = -data->input1_zero_point;
+  op_params.input2_offset = -data->input2_zero_point;
+  op_params.output_offset = data->output_zero_point;
+  op_params.output_multiplier = data->output_multiplier;
+  op_params.output_shift = data->output_shift;
 
   bool need_broadcast = reference_ops::ProcessBroadcastShapes(
       tflite::micro::GetTensorShape(input1),
       tflite::micro::GetTensorShape(input2), &op_params);
 
-#define TF_LITE_MUL(type, opname, dtype)                         \
-  type::opname(op_params, tflite::micro::GetTensorShape(input1), \
-               tflite::micro::GetTensorData<dtype>(input1),      \
-               tflite::micro::GetTensorShape(input2),            \
-               tflite::micro::GetTensorData<dtype>(input2),      \
-               tflite::micro::GetTensorShape(output),            \
-               tflite::micro::GetTensorData<dtype>(output));
-
-  if (output->type == kTfLiteInt8) {
-    if (need_broadcast) {
-      TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t);
-    } else {
+  if (need_broadcast) {
+    if (input1->type == kTfLiteInt8) {
+      reference_integer_ops::BroadcastMul4DSlow(
+          op_params, tflite::micro::GetTensorShape(input1),
+          tflite::micro::GetTensorData<int8_t>(input1),
+          tflite::micro::GetTensorShape(input2),
+          tflite::micro::GetTensorData<int8_t>(input2),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int8_t>(output));
+    } else if (input1->type == kTfLiteInt16) {
+      reference_integer_ops::BroadcastMul4DSlow(
+          op_params, tflite::micro::GetTensorShape(input1),
+          tflite::micro::GetTensorData<int16_t>(input1),
+          tflite::micro::GetTensorShape(input2),
+          tflite::micro::GetTensorData<int16_t>(input2),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int16_t>(output));
+    }
+
+  } else {
+    if (input1->type == kTfLiteInt8) {
       arm_elementwise_mul_s8(
           tflite::micro::GetTensorData<int8_t>(input1),
           tflite::micro::GetTensorData<int8_t>(input2), op_params.input1_offset,
@@ -144,89 +83,109 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData& data,
           MatchingElementsSize(tflite::micro::GetTensorShape(input1),
                                tflite::micro::GetTensorShape(input2),
                                tflite::micro::GetTensorShape(output)));
-    }
-  } else if (output->type == kTfLiteUInt8) {
-    if (need_broadcast) {
-      TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, uint8_t);
-    } else {
-      TF_LITE_MUL(reference_integer_ops, Mul, uint8_t);
+    } else if (input1->type == kTfLiteInt16) {
+      arm_elementwise_mul_s16(
+          tflite::micro::GetTensorData<int16_t>(input1),
+          tflite::micro::GetTensorData<int16_t>(input2),
+          op_params.input1_offset, op_params.input2_offset,
+          tflite::micro::GetTensorData<int16_t>(output),
+          op_params.output_offset, op_params.output_multiplier,
+          op_params.output_shift, op_params.quantized_activation_min,
+          op_params.quantized_activation_max,
+          MatchingElementsSize(tflite::micro::GetTensorShape(input1),
+                               tflite::micro::GetTensorShape(input2),
+                               tflite::micro::GetTensorShape(output)));
     }
   }
-#undef TF_LITE_MUL
 }
 
-void EvalFloat(TfLiteContext* context, TfLiteNode* node,
-               TfLiteMulParams* params, const OpData& data,
-               const TfLiteEvalTensor* input1, const TfLiteEvalTensor* input2,
-               TfLiteEvalTensor* output) {
-  tflite::ArithmeticParams op_params;
-  op_params.float_activation_min = data.output_activation_min_f32;
-  op_params.float_activation_max = data.output_activation_max_f32;
-
-  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
-      tflite::micro::GetTensorShape(input1),
-      tflite::micro::GetTensorShape(input2), &op_params);
-#define TF_LITE_MUL(opname)                                               \
-  reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \
-                        tflite::micro::GetTensorData<float>(input1),      \
-                        tflite::micro::GetTensorShape(input2),            \
-                        tflite::micro::GetTensorData<float>(input2),      \
-                        tflite::micro::GetTensorShape(output),            \
-                        tflite::micro::GetTensorData<float>(output));
-
-  if (need_broadcast) {
-    TF_LITE_MUL(BroadcastMul4DSlow);
-  } else {
-    TF_LITE_MUL(Mul);
-  }
-#undef TF_LITE_MUL
-}
+}  // namespace
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->builtin_data != nullptr);
   auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
 
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpDataMul* data = static_cast<const OpDataMul*>(node->user_data);
+
   const TfLiteEvalTensor* input1 =
-      tflite::micro::GetEvalInput(context, node, kInput1Tensor);
+      tflite::micro::GetEvalInput(context, node, kMulInput1Tensor);
   const TfLiteEvalTensor* input2 =
-      tflite::micro::GetEvalInput(context, node, kInput2Tensor);
+      tflite::micro::GetEvalInput(context, node, kMulInput2Tensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
-
-  TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData& data = *(static_cast<const OpData*>(node->user_data));
+      tflite::micro::GetEvalOutput(context, node, kMulOutputTensor);
 
   switch (input1->type) {
-    case kTfLiteUInt8:
     case kTfLiteInt8:
       EvalQuantized(context, node, data, input1, input2, output);
       break;
+    case kTfLiteInt16:
+      EvalQuantized(context, node, data, input1, input2, output);
+      break;
+    case kTfLiteInt32:
+      EvalMulQuantizedReference(context, node, data, input1, input2, output);
+      break;
     case kTfLiteFloat32:
-      EvalFloat(context, node, params, data, input1, input2, output);
+      EvalMulFloatReference(context, node, params, data, input1, input2,
+                            output);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input1->type), input1->type);
+      MicroPrintf("Type %s (%d) not supported.",
+                  TfLiteTypeGetName(input1->type), input1->type);
       return kTfLiteError;
   }
 
   return kTfLiteOk;
 }
-}  // namespace mul
+
+TfLiteStatus EvalInt8(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  TFLITE_DCHECK(node->user_data != nullptr);
+
+  const OpDataMul* data = static_cast<const OpDataMul*>(node->user_data);
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kMulInput1Tensor);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kMulInput2Tensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kMulOutputTensor);
+  TFLITE_DCHECK(input1->type == kTfLiteInt8);
+
+  EvalQuantized(context, node, data, input1, input2, output);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalInt16(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  TFLITE_DCHECK(node->user_data != nullptr);
+
+  const OpDataMul* data = static_cast<const OpDataMul*>(node->user_data);
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, kMulInput1Tensor);
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, kMulInput2Tensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kMulOutputTensor);
+  TFLITE_DCHECK(input1->type == kTfLiteInt16);
+
+  EvalQuantized(context, node, data, input1, input2, output);
+
+  return kTfLiteOk;
+}
 
 TfLiteRegistration Register_MUL() {
-  return {/* Init=*/mul::Init,
-          /* Free=*/nullptr,
-          /* Prepare=*/mul::Prepare,
-          /*invoke=*/mul::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(MulInit, MulPrepare, Eval);
+}
+
+TfLiteRegistration Register_MUL_INT8() {
+  return tflite::micro::RegisterOp(MulInit, MulPrepare, EvalInt8);
+}
+
+TfLiteRegistration Register_MUL_INT16() {
+  return tflite::micro::RegisterOp(MulInit, MulPrepare, EvalInt16);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
 
 #elif EI_CLASSIFIER_TFLITE_ENABLE_ESP_NN == 1
@@ -245,6 +204,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.h"
+
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
@@ -254,78 +215,20 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
+#if ESP_NN
 #include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h"
+#endif
+
 #include <esp_timer.h>
 
 long long mul_total_time = 0;
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace mul {
-
-namespace {
-
-constexpr int kInput1Tensor = 0;
-constexpr int kInput2Tensor = 1;
-constexpr int kOutputTensor = 0;
-
-struct OpData {
-  int32_t input1_zero_point;
-  int32_t input2_zero_point;
-
-  int32_t output_activation_min;
-  int32_t output_activation_max;
-  int32_t output_zero_point;
-  int32_t output_multiplier;
-  int output_shift;
-
-  float output_activation_min_f32;
-  float output_activation_max_f32;
-};
-
-TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
-                             TfLiteMulParams* params, OpData* data) {
-  const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
-  TF_LITE_ENSURE(context, input1 != nullptr);
-  const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
-  TF_LITE_ENSURE(context, input2 != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
-  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-
-  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
-
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
-    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
-        context, params->activation, output, &data->output_activation_min,
-        &data->output_activation_max));
-
-    double real_multiplier = static_cast<double>(input1->params.scale) *
-                             static_cast<double>(input2->params.scale) /
-                             static_cast<double>(output->params.scale);
-    QuantizeMultiplier(real_multiplier, &data->output_multiplier,
-                       &data->output_shift);
-
-    data->input1_zero_point = input1->params.zero_point;
-    data->input2_zero_point = input2->params.zero_point;
-    data->output_zero_point = output->params.zero_point;
-  } else {
-    CalculateActivationRange(params->activation,
-                             &data->output_activation_min_f32,
-                             &data->output_activation_max_f32);
-  }
-
-  return kTfLiteOk;
-}
-
-}  // namespace
-
-void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                      const OpData* data, const TfLiteEvalTensor* input1,
+#if ESP_NN
+void MulEvalQuantized(TfLiteContext* context, TfLiteNode* node,
+                      const OpDataMul* data, const TfLiteEvalTensor* input1,
                       const TfLiteEvalTensor* input2,
                       TfLiteEvalTensor* output) {
   tflite::ArithmeticParams op_params = {};
@@ -364,163 +267,55 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
                                                     tflite::micro::GetTensorShape(output)));
   }
 }
+#endif
 
-void EvalQuantizedReference(TfLiteContext* context, TfLiteNode* node, const OpData* data,
-                   const TfLiteEvalTensor* input1,
-                   const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
-  tflite::ArithmeticParams op_params = {};
-  op_params.quantized_activation_min = data->output_activation_min;
-  op_params.quantized_activation_max = data->output_activation_max;
-  op_params.float_activation_max = data->output_activation_max_f32;
-  op_params.input1_offset = -data->input1_zero_point;
-  op_params.input2_offset = -data->input2_zero_point;
-  op_params.output_offset = data->output_zero_point;
-  op_params.output_multiplier = data->output_multiplier;
-  op_params.output_shift = data->output_shift;
-
-  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
-      tflite::micro::GetTensorShape(input1),
-      tflite::micro::GetTensorShape(input2), &op_params);
-
-  if (output->type == kTfLiteInt8) {
-    if (need_broadcast) {
-      reference_integer_ops::BroadcastMul4DSlow(
-          op_params, tflite::micro::GetTensorShape(input1),
-          tflite::micro::GetTensorData<int8_t>(input1),
-          tflite::micro::GetTensorShape(input2),
-          tflite::micro::GetTensorData<int8_t>(input2),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<int8_t>(output));
-    } else {
-      reference_integer_ops::Mul(op_params,
-                                 tflite::micro::GetTensorShape(input1),
-                                 tflite::micro::GetTensorData<int8_t>(input1),
-                                 tflite::micro::GetTensorShape(input2),
-                                 tflite::micro::GetTensorData<int8_t>(input2),
-                                 tflite::micro::GetTensorShape(output),
-                                 tflite::micro::GetTensorData<int8_t>(output));
-    }
-  } else if (output->type == kTfLiteUInt8) {
-    if (need_broadcast) {
-      reference_integer_ops::BroadcastMul4DSlow(
-          op_params, tflite::micro::GetTensorShape(input1),
-          tflite::micro::GetTensorData<uint8_t>(input1),
-          tflite::micro::GetTensorShape(input2),
-          tflite::micro::GetTensorData<uint8_t>(input2),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<uint8_t>(output));
-    } else {
-      reference_integer_ops::Mul(op_params,
-                                 tflite::micro::GetTensorShape(input1),
-                                 tflite::micro::GetTensorData<uint8_t>(input1),
-                                 tflite::micro::GetTensorShape(input2),
-                                 tflite::micro::GetTensorData<uint8_t>(input2),
-                                 tflite::micro::GetTensorShape(output),
-                                 tflite::micro::GetTensorData<uint8_t>(output));
-    }
-  }
-}
-
-void EvalFloat(TfLiteContext* context, TfLiteNode* node,
-               TfLiteMulParams* params, const OpData* data,
-               const TfLiteEvalTensor* input1, const TfLiteEvalTensor* input2,
-               TfLiteEvalTensor* output) {
-  tflite::ArithmeticParams op_params = {};
-  op_params.float_activation_min = data->output_activation_min_f32;
-  op_params.float_activation_max = data->output_activation_max_f32;
-
-  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
-      tflite::micro::GetTensorShape(input1),
-      tflite::micro::GetTensorShape(input2), &op_params);
-
-  if (need_broadcast) {
-    reference_ops::BroadcastMul4DSlow(
-        op_params, tflite::micro::GetTensorShape(input1),
-        tflite::micro::GetTensorData<float>(input1),
-        tflite::micro::GetTensorShape(input2),
-        tflite::micro::GetTensorData<float>(input2),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<float>(output));
-  } else {
-    reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
-                       tflite::micro::GetTensorData<float>(input1),
-                       tflite::micro::GetTensorShape(input2),
-                       tflite::micro::GetTensorData<float>(input2),
-                       tflite::micro::GetTensorShape(output),
-                       tflite::micro::GetTensorData<float>(output));
-  }
-}
-
-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
-  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->builtin_data != nullptr);
-  auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
-
-  TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
-
-  return CalculateOpData(context, node, params, data);
-}
-
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->builtin_data != nullptr);
   auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
 
   TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData* data = static_cast<const OpData*>(node->user_data);
+  const OpDataMul* data = static_cast<const OpDataMul*>(node->user_data);
 
   const TfLiteEvalTensor* input1 =
-      tflite::micro::GetEvalInput(context, node, kInput1Tensor);
+      tflite::micro::GetEvalInput(context, node, kMulInput1Tensor);
   const TfLiteEvalTensor* input2 =
-      tflite::micro::GetEvalInput(context, node, kInput1Tensor);
+      tflite::micro::GetEvalInput(context, node, kMulInput2Tensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kInput1Tensor);
+      tflite::micro::GetEvalOutput(context, node, kMulOutputTensor);
 
   long long start_time = esp_timer_get_time();
   switch (input1->type) {
     case kTfLiteInt8:
-      EvalQuantized(context, node, data, input1, input2, output);
+#if ESP_NN
+      MulEvalQuantized(context, node, data, input1, input2, output);
+#else
+      EvalMulQuantizedReference(context, node, data, input1, input2, output);
+#endif
       break;
     case kTfLiteInt32:
-      EvalQuantizedReference(context, node, data, input1, input2, output);
+      EvalMulQuantizedReference(context, node, data, input1, input2, output);
       break;
     case kTfLiteFloat32:
-      EvalFloat(context, node, params, data, input1, input2,
+      EvalMulFloatReference(context, node, params, data, input1, input2,
                             output);
       break;
     default:
-
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input1->type), input1->type);
-
+      MicroPrintf("Type %s (%d) not supported.",
+                  TfLiteTypeGetName(input1->type), input1->type);
       return kTfLiteError;
   }
   mul_total_time += esp_timer_get_time() - start_time;
   return kTfLiteOk;
 }
 
-}  // namespace mul
-
 TfLiteRegistration Register_MUL() {
-  return {/*init=*/mul::Init,
-          /*free=*/nullptr,
-          /*prepare=*/mul::Prepare,
-          /*invoke=*/mul::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(MulInit, MulPrepare, MulEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
+
 #else
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -535,226 +330,58 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.h"
 
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace mul {
-namespace {
 
-constexpr int kInput1Tensor = 0;
-constexpr int kInput2Tensor = 1;
-constexpr int kOutputTensor = 0;
-
-struct OpData {
-  int32_t input1_zero_point;
-  int32_t input2_zero_point;
-
-  int32_t output_activation_min;
-  int32_t output_activation_max;
-  int32_t output_zero_point;
-  int32_t output_multiplier;
-  int output_shift;
-
-  float output_activation_min_f32;
-  float output_activation_max_f32;
-};
-
-TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
-                             TfLiteMulParams* params, OpData* data) {
-  const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
-  TF_LITE_ENSURE(context, input1 != nullptr);
-  const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
-  TF_LITE_ENSURE(context, input2 != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
-  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-
-  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
-
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
-    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
-        context, params->activation, output, &data->output_activation_min,
-        &data->output_activation_max));
-
-    double real_multiplier = static_cast<double>(input1->params.scale) *
-                             static_cast<double>(input2->params.scale) /
-                             static_cast<double>(output->params.scale);
-    QuantizeMultiplier(real_multiplier, &data->output_multiplier,
-                       &data->output_shift);
-
-    data->input1_zero_point = input1->params.zero_point;
-    data->input2_zero_point = input2->params.zero_point;
-    data->output_zero_point = output->params.zero_point;
-  } else {
-    CalculateActivationRange(params->activation,
-                             &data->output_activation_min_f32,
-                             &data->output_activation_max_f32);
-  }
-
-  return kTfLiteOk;
-}
-
-}  // namespace
-
-void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data,
-                   const TfLiteEvalTensor* input1,
-                   const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
-  tflite::ArithmeticParams op_params = {};
-  op_params.quantized_activation_min = data->output_activation_min;
-  op_params.quantized_activation_max = data->output_activation_max;
-  op_params.float_activation_max = data->output_activation_max_f32;
-  op_params.input1_offset = -data->input1_zero_point;
-  op_params.input2_offset = -data->input2_zero_point;
-  op_params.output_offset = data->output_zero_point;
-  op_params.output_multiplier = data->output_multiplier;
-  op_params.output_shift = data->output_shift;
-
-  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
-      tflite::micro::GetTensorShape(input1),
-      tflite::micro::GetTensorShape(input2), &op_params);
-
-  if (output->type == kTfLiteInt8) {
-    if (need_broadcast) {
-      reference_integer_ops::BroadcastMul4DSlow(
-          op_params, tflite::micro::GetTensorShape(input1),
-          tflite::micro::GetTensorData<int8_t>(input1),
-          tflite::micro::GetTensorShape(input2),
-          tflite::micro::GetTensorData<int8_t>(input2),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<int8_t>(output));
-    } else {
-      reference_integer_ops::Mul(op_params,
-                                 tflite::micro::GetTensorShape(input1),
-                                 tflite::micro::GetTensorData<int8_t>(input1),
-                                 tflite::micro::GetTensorShape(input2),
-                                 tflite::micro::GetTensorData<int8_t>(input2),
-                                 tflite::micro::GetTensorShape(output),
-                                 tflite::micro::GetTensorData<int8_t>(output));
-    }
-  } else if (output->type == kTfLiteUInt8) {
-    if (need_broadcast) {
-      reference_integer_ops::BroadcastMul4DSlow(
-          op_params, tflite::micro::GetTensorShape(input1),
-          tflite::micro::GetTensorData<uint8_t>(input1),
-          tflite::micro::GetTensorShape(input2),
-          tflite::micro::GetTensorData<uint8_t>(input2),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<uint8_t>(output));
-    } else {
-      reference_integer_ops::Mul(op_params,
-                                 tflite::micro::GetTensorShape(input1),
-                                 tflite::micro::GetTensorData<uint8_t>(input1),
-                                 tflite::micro::GetTensorShape(input2),
-                                 tflite::micro::GetTensorData<uint8_t>(input2),
-                                 tflite::micro::GetTensorShape(output),
-                                 tflite::micro::GetTensorData<uint8_t>(output));
-    }
-  }
-}
-
-void EvalFloat(TfLiteContext* context, TfLiteNode* node,
-               TfLiteMulParams* params, const OpData* data,
-               const TfLiteEvalTensor* input1, const TfLiteEvalTensor* input2,
-               TfLiteEvalTensor* output) {
-  tflite::ArithmeticParams op_params = {};
-  op_params.float_activation_min = data->output_activation_min_f32;
-  op_params.float_activation_max = data->output_activation_max_f32;
-
-  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
-      tflite::micro::GetTensorShape(input1),
-      tflite::micro::GetTensorShape(input2), &op_params);
-
-  if (need_broadcast) {
-    reference_ops::BroadcastMul4DSlow(
-        op_params, tflite::micro::GetTensorShape(input1),
-        tflite::micro::GetTensorData<float>(input1),
-        tflite::micro::GetTensorShape(input2),
-        tflite::micro::GetTensorData<float>(input2),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<float>(output));
-  } else {
-    reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
-                       tflite::micro::GetTensorData<float>(input1),
-                       tflite::micro::GetTensorShape(input2),
-                       tflite::micro::GetTensorData<float>(input2),
-                       tflite::micro::GetTensorShape(output),
-                       tflite::micro::GetTensorData<float>(output));
-  }
-}
-
-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
-  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->builtin_data != nullptr);
   auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
 
   TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
-
-  return CalculateOpData(context, node, params, data);
-}
-
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->builtin_data != nullptr);
-  auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
-
-  TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData* data = static_cast<const OpData*>(node->user_data);
+  const OpDataMul* data = static_cast<const OpDataMul*>(node->user_data);
 
   const TfLiteEvalTensor* input1 =
-      tflite::micro::GetEvalInput(context, node, kInput1Tensor);
+      tflite::micro::GetEvalInput(context, node, kMulInput1Tensor);
   const TfLiteEvalTensor* input2 =
-      tflite::micro::GetEvalInput(context, node, kInput2Tensor);
+      tflite::micro::GetEvalInput(context, node, kMulInput2Tensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kMulOutputTensor);
 
   switch (input1->type) {
-    case kTfLiteUInt8:
     case kTfLiteInt8:
-      EvalQuantized(context, node, data, input1, input2, output);
+    case kTfLiteInt16:
+    case kTfLiteInt32:
+      EvalMulQuantizedReference(context, node, data, input1, input2, output);
       break;
     case kTfLiteFloat32:
-      EvalFloat(context, node, params, data, input1, input2, output);
+      EvalMulFloatReference(context, node, params, data, input1, input2,
+                            output);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input1->type), input1->type);
+      MicroPrintf("Type %s (%d) not supported.",
+                  TfLiteTypeGetName(input1->type), input1->type);
       return kTfLiteError;
   }
 
   return kTfLiteOk;
 }
-}  // namespace mul
 
 TfLiteRegistration Register_MUL() {
-  return {/*init=*/mul::Init,
-          /*free=*/nullptr,
-          /*prepare=*/mul::Prepare,
-          /*invoke=*/mul::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(MulInit, MulPrepare, MulEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
 
 #endif
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.h
new file mode 100644
index 0000000..61d4605
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.h
@@ -0,0 +1,74 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_MUL_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_MUL_H_
+
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+
+namespace tflite {
+
+extern const int kMulInput1Tensor;
+extern const int kMulInput2Tensor;
+extern const int kMulOutputTensor;
+
+struct OpDataMul {
+  int32_t input1_zero_point;
+  int32_t input2_zero_point;
+
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t output_zero_point;
+  int32_t output_multiplier;
+  int output_shift;
+
+  float output_activation_min_f32;
+  float output_activation_max_f32;
+};
+
+void* MulInit(TfLiteContext* context, const char* buffer, size_t length);
+
+TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node,
+                                TfLiteMulParams* params, OpDataMul* data);
+
+TfLiteStatus MulPrepare(TfLiteContext* context, TfLiteNode* node);
+
+TfLiteStatus EvalMulQuantizedReference(TfLiteContext* context, TfLiteNode* node,
+                                       const OpDataMul* data,
+                                       const TfLiteEvalTensor* input1,
+                                       const TfLiteEvalTensor* input2,
+                                       TfLiteEvalTensor* output);
+
+void EvalMulFloatReference(TfLiteContext* context, TfLiteNode* node,
+                           TfLiteMulParams* params, const OpDataMul* data,
+                           const TfLiteEvalTensor* input1,
+                           const TfLiteEvalTensor* input2,
+                           TfLiteEvalTensor* output);
+
+// Generic must define registration function.
+TfLiteRegistration Register_MUL();
+
+#if defined(CMSIS_NN)
+TfLiteRegistration Register_MUL_INT8();
+#else
+// Fallback registration
+inline TfLiteRegistration Register_MUL_INT8() { return Register_MUL(); }
+#endif
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_MUL_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul_common.cc
new file mode 100644
index 0000000..187fae2
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/mul_common.cc
@@ -0,0 +1,213 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/mul.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mul.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+
+namespace tflite {
+
+const int kMulInput1Tensor = 0;
+const int kMulInput2Tensor = 1;
+const int kMulOutputTensor = 0;
+
+void* MulInit(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataMul));
+}
+
+TfLiteStatus CalculateOpDataMul(TfLiteContext* context, TfLiteNode* node,
+                                TfLiteMulParams* params, OpDataMul* data) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input1 =
+      micro_context->AllocateTempInputTensor(node, kMulInput1Tensor);
+  TF_LITE_ENSURE(context, input1 != nullptr);
+  TfLiteTensor* input2 =
+      micro_context->AllocateTempInputTensor(node, kMulInput2Tensor);
+  TF_LITE_ENSURE(context, input2 != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kMulOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
+
+  if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, params->activation, output, &data->output_activation_min,
+        &data->output_activation_max));
+
+    double real_multiplier = static_cast<double>(input1->params.scale) *
+                             static_cast<double>(input2->params.scale) /
+                             static_cast<double>(output->params.scale);
+    QuantizeMultiplier(real_multiplier, &data->output_multiplier,
+                       &data->output_shift);
+
+    data->input1_zero_point = input1->params.zero_point;
+    data->input2_zero_point = input2->params.zero_point;
+    data->output_zero_point = output->params.zero_point;
+
+    if (input1->type == kTfLiteInt16) {
+      TF_LITE_ENSURE_EQ(context, data->input1_zero_point, 0);
+      TF_LITE_ENSURE_EQ(context, data->input2_zero_point, 0);
+      TF_LITE_ENSURE_EQ(context, data->output_zero_point, 0);
+    }
+  } else if (output->type == kTfLiteInt32) {
+    CalculateActivationRange(params->activation, &data->output_activation_min,
+                             &data->output_activation_max);
+  } else {
+    CalculateActivationRange(params->activation,
+                             &data->output_activation_min_f32,
+                             &data->output_activation_max_f32);
+  }
+
+  micro_context->DeallocateTempTfLiteTensor(input1);
+  micro_context->DeallocateTempTfLiteTensor(input2);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  return kTfLiteOk;
+}
+
+TfLiteStatus MulPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpDataMul* data = static_cast<OpDataMul*>(node->user_data);
+
+  return CalculateOpDataMul(context, node, params, data);
+}
+
+TfLiteStatus EvalMulQuantizedReference(TfLiteContext* context, TfLiteNode* node,
+                                       const OpDataMul* data,
+                                       const TfLiteEvalTensor* input1,
+                                       const TfLiteEvalTensor* input2,
+                                       TfLiteEvalTensor* output) {
+  tflite::ArithmeticParams op_params = {};
+  op_params.quantized_activation_min = data->output_activation_min;
+  op_params.quantized_activation_max = data->output_activation_max;
+  op_params.float_activation_max = data->output_activation_max_f32;
+  op_params.input1_offset = -data->input1_zero_point;
+  op_params.input2_offset = -data->input2_zero_point;
+  op_params.output_offset = data->output_zero_point;
+  op_params.output_multiplier = data->output_multiplier;
+  op_params.output_shift = data->output_shift;
+
+  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+      tflite::micro::GetTensorShape(input1),
+      tflite::micro::GetTensorShape(input2), &op_params);
+
+  if (input1->type == kTfLiteInt8) {
+    if (need_broadcast) {
+      reference_integer_ops::BroadcastMul4DSlow(
+          op_params, tflite::micro::GetTensorShape(input1),
+          tflite::micro::GetTensorData<int8_t>(input1),
+          tflite::micro::GetTensorShape(input2),
+          tflite::micro::GetTensorData<int8_t>(input2),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int8_t>(output));
+    } else {
+      reference_integer_ops::Mul(op_params,
+                                 tflite::micro::GetTensorShape(input1),
+                                 tflite::micro::GetTensorData<int8_t>(input1),
+                                 tflite::micro::GetTensorShape(input2),
+                                 tflite::micro::GetTensorData<int8_t>(input2),
+                                 tflite::micro::GetTensorShape(output),
+                                 tflite::micro::GetTensorData<int8_t>(output));
+    }
+  } else if (input1->type == kTfLiteInt32) {
+    if (need_broadcast) {
+      reference_ops::BroadcastMul4DSlow(
+          op_params, tflite::micro::GetTensorShape(input1),
+          tflite::micro::GetTensorData<int32_t>(input1),
+          tflite::micro::GetTensorShape(input2),
+          tflite::micro::GetTensorData<int32_t>(input2),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int32_t>(output));
+    } else {
+      reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
+                         tflite::micro::GetTensorData<int32_t>(input1),
+                         tflite::micro::GetTensorShape(input2),
+                         tflite::micro::GetTensorData<int32_t>(input2),
+                         tflite::micro::GetTensorShape(output),
+                         tflite::micro::GetTensorData<int32_t>(output));
+    }
+  } else if (input1->type == kTfLiteInt16) {
+    TF_LITE_ENSURE_EQ(context, op_params.input1_offset, 0);
+    TF_LITE_ENSURE_EQ(context, op_params.input2_offset, 0);
+    TF_LITE_ENSURE_EQ(context, op_params.output_offset, 0);
+
+    if (need_broadcast) {
+      reference_integer_ops::BroadcastMul4DSlow(
+          op_params, tflite::micro::GetTensorShape(input1),
+          tflite::micro::GetTensorData<int16_t>(input1),
+          tflite::micro::GetTensorShape(input2),
+          tflite::micro::GetTensorData<int16_t>(input2),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int16_t>(output));
+    } else {
+      reference_integer_ops::Mul(op_params,
+                                 tflite::micro::GetTensorShape(input1),
+                                 tflite::micro::GetTensorData<int16_t>(input1),
+                                 tflite::micro::GetTensorShape(input2),
+                                 tflite::micro::GetTensorData<int16_t>(input2),
+                                 tflite::micro::GetTensorShape(output),
+                                 tflite::micro::GetTensorData<int16_t>(output));
+    }
+  }
+  return kTfLiteOk;
+}
+
+void EvalMulFloatReference(TfLiteContext* context, TfLiteNode* node,
+                           TfLiteMulParams* params, const OpDataMul* data,
+                           const TfLiteEvalTensor* input1,
+                           const TfLiteEvalTensor* input2,
+                           TfLiteEvalTensor* output) {
+  tflite::ArithmeticParams op_params = {};
+  op_params.float_activation_min = data->output_activation_min_f32;
+  op_params.float_activation_max = data->output_activation_max_f32;
+
+  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+      tflite::micro::GetTensorShape(input1),
+      tflite::micro::GetTensorShape(input2), &op_params);
+
+  if (need_broadcast) {
+    reference_ops::BroadcastMul4DSlow(
+        op_params, tflite::micro::GetTensorShape(input1),
+        tflite::micro::GetTensorData<float>(input1),
+        tflite::micro::GetTensorShape(input2),
+        tflite::micro::GetTensorData<float>(input2),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<float>(output));
+  } else {
+    reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1),
+                       tflite::micro::GetTensorData<float>(input1),
+                       tflite::micro::GetTensorShape(input2),
+                       tflite::micro::GetTensorData<float>(input2),
+                       tflite::micro::GetTensorShape(output),
+                       tflite::micro::GetTensorData<float>(output));
+  }
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/neg.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/neg.cc
index cc19d42..249f7ad 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/neg.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/neg.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -18,11 +18,11 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace neg {
+
+namespace {
 
 constexpr int kInputTensor = 0;
 constexpr int kOutputTensor = 0;
@@ -41,26 +41,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                             tflite::micro::GetTensorData<float>(output));
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
 }
 
-}  // namespace neg
+}  // namespace
 
 TfLiteRegistration Register_NEG() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/nullptr,
-          /*invoke=*/neg::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, nullptr, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/pack.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/pack.cc
index 4130f7c..79615bd 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/pack.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/pack.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,11 +17,10 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace pack {
+
 namespace {
 
 constexpr int kOutputTensor = 0;
@@ -82,10 +81,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       return PackImpl<float>(context, node, output, data->values_count,
                              data->axis);
     }
-    case kTfLiteUInt8: {
-      return PackImpl<uint8_t>(context, node, output, data->values_count,
-                               data->axis);
-    }
     case kTfLiteInt8: {
       return PackImpl<int8_t>(context, node, output, data->values_count,
                               data->axis);
@@ -99,8 +94,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                                data->axis);
     }
     default: {
-      TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by pack.",
-                         TfLiteTypeGetName(output->type));
+      MicroPrintf("Type '%s' is not supported by pack.",
+                  TfLiteTypeGetName(output->type));
       return kTfLiteError;
     }
   }
@@ -109,19 +104,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }
 
 }  // namespace
-}  // namespace pack
 
 TfLiteRegistration Register_PACK() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/nullptr,
-          /*invoke=*/pack::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, nullptr, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/pad.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/pad.cc
index ec59d19..a7d7edd 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/pad.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/pad.cc
@@ -23,11 +23,9 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace pad {
 namespace {
 
 struct OpData {
@@ -35,27 +33,115 @@ struct OpData {
   int32_t output_zero_point;
 };
 
-}  // namespace
-
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(OpData));
 }
 
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData* data = static_cast<const OpData*>(node->user_data);
+
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, /*index=*/0);
+  const TfLiteEvalTensor* constant_values =
+      NumInputs(node) == 3
+          ? tflite::micro::GetEvalInput(context, node, /*index=*/2)
+          : nullptr;
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, /*index=*/0);
+
+  switch (input->type) {
+    case kTfLiteFloat32: {
+      float pad_value =
+          constant_values == nullptr
+              ? 0.f
+              : *tflite::micro::GetTensorData<float>(constant_values);
+      if (data->params.resizing_category == ResizingCategory::kImageStyle) {
+        reference_ops::PadImageStyle(
+            data->params, tflite::micro::GetTensorShape(input),
+            tflite::micro::GetTensorData<float>(input), &pad_value,
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<float>(output));
+      } else {
+        reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
+                           tflite::micro::GetTensorData<float>(input),
+                           &pad_value, tflite::micro::GetTensorShape(output),
+                           tflite::micro::GetTensorData<float>(output));
+      }
+    } break;
+    case kTfLiteInt8: {
+      int8_t pad_value;
+      if (constant_values == nullptr) {
+        pad_value = static_cast<uint8_t>(data->output_zero_point);
+      } else {
+        pad_value = *tflite::micro::GetTensorData<int8_t>(constant_values);
+      }
+      if (data->params.resizing_category == ResizingCategory::kImageStyle) {
+        reference_ops::PadImageStyle(
+            data->params, tflite::micro::GetTensorShape(input),
+            tflite::micro::GetTensorData<int8_t>(input), &pad_value,
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int8_t>(output));
+      } else {
+        reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
+                           tflite::micro::GetTensorData<int8_t>(input),
+                           &pad_value, tflite::micro::GetTensorShape(output),
+                           tflite::micro::GetTensorData<int8_t>(output));
+      }
+    } break;
+    case kTfLiteInt16: {
+      int16_t pad_value =
+          constant_values == nullptr
+              ? 0
+              : *tflite::micro::GetTensorData<int16_t>(constant_values);
+      reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
+                         tflite::micro::GetTensorData<int16_t>(input),
+                         &pad_value, tflite::micro::GetTensorShape(output),
+                         tflite::micro::GetTensorData<int16_t>(output));
+    } break;
+    case kTfLiteInt32: {
+      int32_t pad_value =
+          constant_values == nullptr
+              ? 0
+              : *tflite::micro::GetTensorData<int32_t>(constant_values);
+      reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
+                         tflite::micro::GetTensorData<int32_t>(input),
+                         &pad_value, tflite::micro::GetTensorShape(output),
+                         tflite::micro::GetTensorData<int32_t>(output));
+    } break;
+    default:
+
+      MicroPrintf("Type %s not currently supported by Pad.",
+                  TfLiteTypeGetName(input->type));
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteStatus PadPrepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   TFLITE_DCHECK(node->user_data != nullptr);
   OpData* data = static_cast<OpData*>(node->user_data);
 
   TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
-  const TfLiteTensor* input = GetInput(context, node, /*index=*/0);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, /*index=*/0);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* paddings = GetInput(context, node, /*index=*/1);
+  TfLiteTensor* paddings =
+      micro_context->AllocateTempInputTensor(node, /*index=*/1);
   TF_LITE_ENSURE(context, paddings != nullptr);
-  const TfLiteTensor* constant_values =
-      NumInputs(node) == 3 ? GetInput(context, node, /*index=*/2) : nullptr;
-  TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
+  TfLiteTensor* constant_values =
+      NumInputs(node) == 3
+          ? micro_context->AllocateTempInputTensor(node, /*index=*/2)
+          : nullptr;
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, /*index=*/0);
   TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE_EQ(context, input->type, output->type);
@@ -103,21 +189,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     data->params.right_padding[idx] = paddings_data[idx * 2 + 1];
   }
 
-  if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
+  if (input->type == kTfLiteInt8) {
     if (constant_values == nullptr) {
       // Quantized Pad requires that 0 is represented in the quantized
       // range.
-      if (input->type == kTfLiteUInt8) {
-        TF_LITE_ENSURE(context, output->params.zero_point >=
-                                    std::numeric_limits<uint8_t>::min());
-        TF_LITE_ENSURE(context, output->params.zero_point <=
-                                    std::numeric_limits<uint8_t>::max());
-      } else {
-        TF_LITE_ENSURE(context, output->params.zero_point >=
-                                    std::numeric_limits<int8_t>::min());
-        TF_LITE_ENSURE(context, output->params.zero_point <=
-                                    std::numeric_limits<int8_t>::max());
-      }
+      TF_LITE_ENSURE(context, output->params.zero_point >=
+                                  std::numeric_limits<int8_t>::min());
+      TF_LITE_ENSURE(context, output->params.zero_point <=
+                                  std::numeric_limits<int8_t>::max());
     } else {
       // Quantized Pad requires that 'constant_values' is represented in the
       // same quantized range as the input and output tensors.
@@ -129,126 +208,23 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     data->output_zero_point = output->params.zero_point;
   }
 
-  return kTfLiteOk;
-}
-
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData* data = static_cast<const OpData*>(node->user_data);
-
-  const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, /*index=*/0);
-  const TfLiteEvalTensor* constant_values =
-      NumInputs(node) == 3
-          ? tflite::micro::GetEvalInput(context, node, /*index=*/2)
-          : nullptr;
-  TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, /*index=*/0);
-
-  switch (input->type) {
-    case kTfLiteFloat32: {
-      float pad_value =
-          constant_values == nullptr
-              ? 0.f
-              : *tflite::micro::GetTensorData<float>(constant_values);
-      if (data->params.resizing_category == ResizingCategory::kImageStyle) {
-        reference_ops::PadImageStyle(
-            data->params, tflite::micro::GetTensorShape(input),
-            tflite::micro::GetTensorData<float>(input), &pad_value,
-            tflite::micro::GetTensorShape(output),
-            tflite::micro::GetTensorData<float>(output));
-      } else {
-        reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
-                           tflite::micro::GetTensorData<float>(input),
-                           &pad_value, tflite::micro::GetTensorShape(output),
-                           tflite::micro::GetTensorData<float>(output));
-      }
-    } break;
-    case kTfLiteUInt8: {
-      uint8_t pad_value;
-      if (constant_values == nullptr) {
-        pad_value = static_cast<uint8_t>(data->output_zero_point);
-      } else {
-        pad_value = *tflite::micro::GetTensorData<uint8_t>(constant_values);
-      }
-      if (data->params.resizing_category == ResizingCategory::kImageStyle) {
-        reference_ops::PadImageStyle(
-            data->params, tflite::micro::GetTensorShape(input),
-            tflite::micro::GetTensorData<uint8_t>(input), &pad_value,
-            tflite::micro::GetTensorShape(output),
-            tflite::micro::GetTensorData<uint8_t>(output));
-      } else {
-        reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
-                           tflite::micro::GetTensorData<uint8_t>(input),
-                           &pad_value, tflite::micro::GetTensorShape(output),
-                           tflite::micro::GetTensorData<uint8_t>(output));
-      }
-    } break;
-    case kTfLiteInt8: {
-      int8_t pad_value;
-      if (constant_values == nullptr) {
-        pad_value = static_cast<uint8_t>(data->output_zero_point);
-      } else {
-        pad_value = *tflite::micro::GetTensorData<int8_t>(constant_values);
-      }
-      if (data->params.resizing_category == ResizingCategory::kImageStyle) {
-        reference_ops::PadImageStyle(
-            data->params, tflite::micro::GetTensorShape(input),
-            tflite::micro::GetTensorData<int8_t>(input), &pad_value,
-            tflite::micro::GetTensorShape(output),
-            tflite::micro::GetTensorData<int8_t>(output));
-      } else {
-        reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
-                           tflite::micro::GetTensorData<int8_t>(input),
-                           &pad_value, tflite::micro::GetTensorShape(output),
-                           tflite::micro::GetTensorData<int8_t>(output));
-      }
-    } break;
-    case kTfLiteInt32: {
-      int32_t pad_value =
-          constant_values == nullptr
-              ? 0
-              : *tflite::micro::GetTensorData<int32_t>(constant_values);
-      reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
-                         tflite::micro::GetTensorData<int32_t>(input),
-                         &pad_value, tflite::micro::GetTensorShape(output),
-                         tflite::micro::GetTensorData<int32_t>(output));
-    } break;
-    default:
-
-      TF_LITE_KERNEL_LOG(context, "Type %s not currently supported by Pad.",
-                         TfLiteTypeGetName(input->type));
-      return kTfLiteError;
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(paddings);
+  if (constant_values != nullptr) {
+    micro_context->DeallocateTempTfLiteTensor(constant_values);
   }
-#undef TF_LITE_PAD
+  micro_context->DeallocateTempTfLiteTensor(output);
+
   return kTfLiteOk;
 }
 
-}  // namespace pad
-
 TfLiteRegistration Register_PAD() {
-  return {/*init=*/pad::Init,
-          /*free=*/nullptr,
-          /*prepare=*/pad::Prepare,
-          /*invoke=*/pad::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, PadPrepare, Eval);
 }
 
 // Also register Pad as PadV2.
 TfLiteRegistration Register_PADV2() {
-  return {/*init=*/pad::Init,
-          /*free=*/nullptr,
-          /*prepare=*/pad::Prepare,
-          /*invoke=*/pad::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, PadPrepare, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/pad.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/pad.h
new file mode 100644
index 0000000..81d1a9f
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/pad.h
@@ -0,0 +1,27 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_PAD_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_PAD_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+
+namespace tflite {
+
+TfLiteStatus PadPrepare(TfLiteContext* context, TfLiteNode* node);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_PAD_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.cc
index bc956db..8b6f9e0 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.cc
@@ -3,7 +3,7 @@
 #if 0 == 1
 /* noop */
 #elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -20,289 +20,158 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h"
 
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
-#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/base.h"  // from @flatbuffers
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace pooling {
 
 namespace {
 
-constexpr int kInputTensor = 0;
-constexpr int kOutputTensor = 0;
-
 struct OpData {
-  TfLitePaddingValues padding;
+  OpDataPooling reference_op_data;
+
   // Index to buffer for optimizations if applicable.
   int buffer_idx;
-
-  int32_t activation_min;
-  int32_t activation_max;
-  float activation_min_f32;
-  float activation_max_f32;
 };
 
-TfLiteStatus CalculateOpData(TfLiteContext* context,
-                             const TfLitePoolParams* params,
-                             const TfLiteTensor* input, TfLiteTensor* output,
-                             OpData* data) {
-  // input: batch, height, width, channel
-  int height = SizeOfDimension(input, 1);
-  int width = SizeOfDimension(input, 2);
-
-  int out_height, out_width;
-
-  data->padding = ComputePaddingHeightWidth(
-      params->stride_height, params->stride_width,
-      /*dilation_rate_height=*/1,
-      /*dilation_rate_width=*/1, height, width, params->filter_height,
-      params->filter_width, params->padding, &out_height, &out_width);
+void PopulateCommonParams(
+    TfLiteContext* const context, cmsis_nn_dims* const input_dims,
+    cmsis_nn_dims* const output_dims, cmsis_nn_pool_params* const pool_params,
+    cmsis_nn_context* const ctx, cmsis_nn_dims* const filter_dims,
+    const OpData& data, const RuntimeShape& input_shape,
+    const RuntimeShape& output_shape, const TfLitePoolParams* params) {
+  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
 
-  if (input->type == kTfLiteFloat32) {
-    CalculateActivationRange(params->activation, &data->activation_min_f32,
-                             &data->activation_max_f32);
-  } else {
-    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
-        context, params->activation, output, &data->activation_min,
-        &data->activation_max));
-    TFLITE_DCHECK_LE(data->activation_min, data->activation_max);
+  input_dims->n = 1;
+  input_dims->h = input_shape.Dims(1);
+  input_dims->w = input_shape.Dims(2);
+  input_dims->c = depth;
+
+  output_dims->n = 1;
+  output_dims->h = output_shape.Dims(1);
+  output_dims->w = output_shape.Dims(2);
+  output_dims->c = depth;
+
+  pool_params->stride.h = params->stride_height;
+  pool_params->stride.w = params->stride_width;
+  pool_params->padding.h = data.reference_op_data.padding.height;
+  pool_params->padding.w = data.reference_op_data.padding.width;
+  pool_params->activation.min = data.reference_op_data.activation_min;
+  pool_params->activation.max = data.reference_op_data.activation_max;
+
+  filter_dims->n = 1;
+  filter_dims->h = params->filter_height;
+  filter_dims->w = params->filter_width;
+  filter_dims->c = 1;
+  ctx->buf = nullptr;
+  ctx->size = 0;
+  if (data.buffer_idx > -1) {
+    ctx->buf = context->GetScratchBuffer(context, data.buffer_idx);
   }
-
-  // Set buffer index to a reset value
-  data->buffer_idx = -1;
-
-  return kTfLiteOk;
-}
-
-void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
-                      const TfLitePoolParams* params, const OpData& data,
-                      const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
-  float activation_min, activation_max;
-  CalculateActivationRange(params->activation, &activation_min,
-                           &activation_max);
-
-  PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data.padding.height;
-  op_params.padding_values.width = data.padding.width;
-  op_params.float_activation_min = activation_min;
-  op_params.float_activation_max = activation_max;
-  reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
-                             tflite::micro::GetTensorData<float>(input),
-                             tflite::micro::GetTensorShape(output),
-                             tflite::micro::GetTensorData<float>(output));
 }
 
 void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
                           const TfLitePoolParams* params, const OpData& data,
                           const TfLiteEvalTensor* input,
                           TfLiteEvalTensor* output) {
-  TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
-
-  if (input->type == kTfLiteUInt8) {
-    PoolParams op_params;
-    op_params.stride_height = params->stride_height;
-    op_params.stride_width = params->stride_width;
-    op_params.filter_height = params->filter_height;
-    op_params.filter_width = params->filter_width;
-    op_params.padding_values.height = data.padding.height;
-    op_params.padding_values.width = data.padding.width;
-    op_params.quantized_activation_min = data.activation_min;
-    op_params.quantized_activation_max = data.activation_max;
-
-    reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
-                               tflite::micro::GetTensorData<uint8_t>(input),
-                               tflite::micro::GetTensorShape(output),
-                               tflite::micro::GetTensorData<uint8_t>(output));
-  } else {
-    RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
-    TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK((input->type == kTfLiteInt8) || (input->type == kTfLiteInt16));
 
-    RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
-    TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
-
-    const int depth = MatchingDim(input_shape, 3, output_shape, 3);
-
-    cmsis_nn_dims input_dims;
-    input_dims.n = 1;
-    input_dims.h = input_shape.Dims(1);
-    input_dims.w = input_shape.Dims(2);
-    input_dims.c = depth;
-
-    cmsis_nn_dims output_dims;
-    output_dims.n = 1;
-    output_dims.h = output_shape.Dims(1);
-    output_dims.w = output_shape.Dims(2);
-    output_dims.c = depth;
+  RuntimeShape input_shape = micro::GetTensorShape(input);
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 
-    cmsis_nn_pool_params pool_params;
-    pool_params.stride.h = params->stride_height;
-    pool_params.stride.w = params->stride_width;
-    pool_params.padding.h = data.padding.height;
-    pool_params.padding.w = data.padding.width;
-    pool_params.activation.min = data.activation_min;
-    pool_params.activation.max = data.activation_max;
+  RuntimeShape output_shape = micro::GetTensorShape(output);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 
-    cmsis_nn_dims filter_dims;
-    filter_dims.n = 1;
-    filter_dims.h = params->filter_height;
-    filter_dims.w = params->filter_width;
-    filter_dims.c = 1;
+  cmsis_nn_dims input_dims;
+  cmsis_nn_dims output_dims;
+  cmsis_nn_pool_params pool_params;
+  cmsis_nn_dims filter_dims;
+  cmsis_nn_context ctx;
 
-    cmsis_nn_context ctx;
-    ctx.buf = nullptr;
-    ctx.size = 0;
-    if (data.buffer_idx > -1) {
-      ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
-    }
+  PopulateCommonParams(context, &input_dims, &output_dims, &pool_params, &ctx,
+                       &filter_dims, data, input_shape, output_shape, params);
 
+  if (input->type == kTfLiteInt8) {
     TFLITE_DCHECK_EQ(
         arm_avgpool_s8(&ctx, &pool_params, &input_dims,
-                       tflite::micro::GetTensorData<int8_t>(input),
-                       &filter_dims, &output_dims,
-                       tflite::micro::GetTensorData<int8_t>(output)),
-        ARM_MATH_SUCCESS);
+                       micro::GetTensorData<int8_t>(input), &filter_dims,
+                       &output_dims, micro::GetTensorData<int8_t>(output)),
+        ARM_CMSIS_NN_SUCCESS);
+  } else {
+    TFLITE_DCHECK_EQ(
+        arm_avgpool_s16(&ctx, &pool_params, &input_dims,
+                        micro::GetTensorData<int16_t>(input), &filter_dims,
+                        &output_dims, micro::GetTensorData<int16_t>(output)),
+        ARM_CMSIS_NN_SUCCESS);
   }
 }
 
-void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
-                  TfLitePoolParams* params, const OpData& data,
-                  const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
-  float activation_min, activation_max;
-  CalculateActivationRange(params->activation, &activation_min,
-                           &activation_max);
-  tflite::PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data.padding.height;
-  op_params.padding_values.width = data.padding.width;
-  op_params.float_activation_min = data.activation_min_f32;
-  op_params.float_activation_max = data.activation_max_f32;
-  reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
-                         tflite::micro::GetTensorData<float>(input),
-                         tflite::micro::GetTensorShape(output),
-                         tflite::micro::GetTensorData<float>(output));
-}
+TfLiteStatus MaxEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
+                              const TfLitePoolParams* params,
+                              const OpData& data, const TfLiteEvalTensor* input,
+                              TfLiteEvalTensor* output) {
+  TFLITE_DCHECK((input->type == kTfLiteInt8) || (input->type == kTfLiteInt16));
 
-void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node,
-                           TfLitePoolParams* params, const OpData& data,
-                           const TfLiteEvalTensor* input,
-                           TfLiteEvalTensor* output) {
-  tflite::PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data.padding.height;
-  op_params.padding_values.width = data.padding.width;
-  op_params.quantized_activation_min = data.activation_min;
-  op_params.quantized_activation_max = data.activation_max;
-  reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
-                         tflite::micro::GetTensorData<uint8_t>(input),
-                         tflite::micro::GetTensorShape(output),
-                         tflite::micro::GetTensorData<uint8_t>(output));
-}
+  RuntimeShape input_shape = micro::GetTensorShape(input);
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 
-TfLiteStatus MaxEvalInt8(TfLiteContext* context, const TfLiteNode* node,
-                         const TfLitePoolParams* params, const OpData& data,
-                         const TfLiteEvalTensor* input,
-                         TfLiteEvalTensor* output) {
-  RuntimeShape input_shape = tflite::micro::GetTensorShape(input);
-  RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
-  const int depth = MatchingDim(input_shape, 3, output_shape, 3);
+  RuntimeShape output_shape = micro::GetTensorShape(output);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
 
   cmsis_nn_dims input_dims;
-  input_dims.n = 1;
-  input_dims.h = input_shape.Dims(1);
-  input_dims.w = input_shape.Dims(2);
-  input_dims.c = depth;
-
   cmsis_nn_dims output_dims;
-  output_dims.n = 1;
-  output_dims.h = output_shape.Dims(1);
-  output_dims.w = output_shape.Dims(2);
-  output_dims.c = depth;
-
   cmsis_nn_pool_params pool_params;
-  pool_params.stride.h = params->stride_height;
-  pool_params.stride.w = params->stride_width;
-  pool_params.padding.h = data.padding.height;
-  pool_params.padding.w = data.padding.width;
-  pool_params.activation.min = data.activation_min;
-  pool_params.activation.max = data.activation_max;
-
   cmsis_nn_dims filter_dims;
-  filter_dims.n = 1;
-  filter_dims.h = params->filter_height;
-  filter_dims.w = params->filter_width;
-  filter_dims.c = 1;
-
   cmsis_nn_context ctx;
-  ctx.buf = nullptr;
-  ctx.size = 0;
-  if (data.buffer_idx > -1) {
-    ctx.buf = context->GetScratchBuffer(context, data.buffer_idx);
-  }
 
-  TFLITE_DCHECK_EQ(
-      arm_max_pool_s8(&ctx, &pool_params, &input_dims,
-                      tflite::micro::GetTensorData<int8_t>(input), &filter_dims,
-                      &output_dims,
-                      tflite::micro::GetTensorData<int8_t>(output)),
-      ARM_MATH_SUCCESS);
+  PopulateCommonParams(context, &input_dims, &output_dims, &pool_params, &ctx,
+                       &filter_dims, data, input_shape, output_shape, params);
+
+  if (input->type == kTfLiteInt8) {
+    TFLITE_DCHECK_EQ(
+        arm_max_pool_s8(&ctx, &pool_params, &input_dims,
+                        micro::GetTensorData<int8_t>(input), &filter_dims,
+                        &output_dims, micro::GetTensorData<int8_t>(output)),
+        ARM_CMSIS_NN_SUCCESS);
+  } else {
+    TFLITE_DCHECK_EQ(
+        arm_max_pool_s16(&ctx, &pool_params, &input_dims,
+                         micro::GetTensorData<int16_t>(input), &filter_dims,
+                         &output_dims, micro::GetTensorData<int16_t>(output)),
+        ARM_CMSIS_NN_SUCCESS);
+  }
 
   return kTfLiteOk;
 }
 
-}  // namespace
-
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(OpData));
 }
 
 TfLiteStatus MaxPrepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  TFLITE_DCHECK(node->builtin_data != nullptr);
-
-  OpData* data = static_cast<OpData*>(node->user_data);
-  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
-
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
-
+  TF_LITE_ENSURE_STATUS(PoolingPrepare(context, node));
+  // Set buffer index to a reset value
+  static_cast<OpData*>(node->user_data)->buffer_idx = -1;
   return kTfLiteOk;
 }
 
 TfLiteStatus AveragePrepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  TFLITE_DCHECK(node->builtin_data != nullptr);
-
-  OpData* data = static_cast<OpData*>(node->user_data);
-  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
+  TF_LITE_ENSURE_STATUS(PoolingPrepare(context, node));
 
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
 
-  TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kPoolingInputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kPoolingOutputTensor);
 
-  if (input->type == kTfLiteInt8) {
+  if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
     RuntimeShape input_shape = GetTensorShape(input);
     TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
 
@@ -313,8 +182,11 @@ TfLiteStatus AveragePrepare(TfLiteContext* context, TfLiteNode* node) {
     const int output_width = output_shape.Dims(2);
 
     const int32_t buffer_size =
-        arm_avgpool_s8_get_buffer_size(output_width, depth);
+        input->type == kTfLiteInt16
+            ? arm_avgpool_s16_get_buffer_size(output_width, depth)
+            : arm_avgpool_s8_get_buffer_size(output_width, depth);
 
+    auto* data = static_cast<OpData*>(node->user_data);
     if (buffer_size > 0) {
       TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena(
           context, buffer_size, &data->buffer_idx));
@@ -322,6 +194,9 @@ TfLiteStatus AveragePrepare(TfLiteContext* context, TfLiteNode* node) {
       data->buffer_idx = -1;
     }
   }
+
+  micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(input);
   return kTfLiteOk;
 }
 
@@ -333,48 +208,56 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
   const OpData& data = *(static_cast<const OpData*>(node->user_data));
 
   const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      micro::GetEvalInput(context, node, kPoolingInputTensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      micro::GetEvalOutput(context, node, kPoolingOutputTensor);
 
   // Inputs and outputs share the same type, guaranteed by the converter.
-  switch (input->type) {
-    case kTfLiteFloat32:
-      #if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+  if (input->type == kTfLiteFloat32) {
+#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_F32
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
-      AverageEvalFloat(context, node, params, data, input, output);
-      break;
-    case kTfLiteUInt8:
-      #if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_U8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#endif
+    AveragePoolingEvalFloat(context, node, params, &data.reference_op_data,
+                            input, output);
+  } else if (input->type == kTfLiteInt8) {
+#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
+#endif
+    AverageEvalQuantized(context, node, params, data, input, output);
+  } else if (input->type == kTfLiteInt16) {
+    AverageEvalQuantized(context, node, params, data, input, output);
+  } else {
+    MicroPrintf("Input type %s is not currently supported",
+                TfLiteTypeGetName(input->type));
+    return kTfLiteError;
+  }
 
-      AverageEvalQuantized(context, node, params, data, input, output);
-      break;
-    case kTfLiteInt8:
-      #if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
+  return kTfLiteOk;
+}
+
+TfLiteStatus AverageEvalInt8(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));
+
+  const TfLiteEvalTensor* input =
+      micro::GetEvalInput(context, node, kPoolingInputTensor);
+  TFLITE_DCHECK(input->type == kTfLiteInt8);
+  TfLiteEvalTensor* output =
+      micro::GetEvalOutput(context, node, kPoolingOutputTensor);
+
+  AverageEvalQuantized(context, node, params, data, input, output);
 
-      AverageEvalQuantized(context, node, params, data, input, output);
-      break;
-    default:
-      TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
-                         TfLiteTypeGetName(input->type));
-      return kTfLiteError;
-  }
   return kTfLiteOk;
 }
 
-TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus AverageEvalInt16(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->builtin_data != nullptr);
   auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
 
@@ -382,76 +265,117 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
   const OpData& data = *(static_cast<const OpData*>(node->user_data));
 
   const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      micro::GetEvalInput(context, node, kPoolingInputTensor);
+  TFLITE_DCHECK(input->type == kTfLiteInt16);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      micro::GetEvalOutput(context, node, kPoolingOutputTensor);
 
-  switch (input->type) {
-    case kTfLiteFloat32:
-      #if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
+  AverageEvalQuantized(context, node, params, data, input, output);
 
-      MaxEvalFloat(context, node, params, data, input, output);
-      break;
-    case kTfLiteUInt8:
-      #if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_U8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
+  return kTfLiteOk;
+}
+TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
 
-      MaxEvalQuantizedUInt8(context, node, params, data, input, output);
-      break;
-    case kTfLiteInt8:
-      #if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));
 
-      MaxEvalInt8(context, node, params, data, input, output);
-      break;
-    default:
-      TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
-                         TfLiteTypeGetName(input->type));
+  const TfLiteEvalTensor* input =
+      micro::GetEvalInput(context, node, kPoolingInputTensor);
+  TfLiteEvalTensor* output =
+      micro::GetEvalOutput(context, node, kPoolingOutputTensor);
+
+  if (input->type == kTfLiteFloat32) {
+#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_F32
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
+#endif
+    MaxPoolingEvalFloat(context, node, params, &data.reference_op_data, input,
+                        output);
+  } else if (input->type == kTfLiteInt8) {
+#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
+      return kTfLiteError;
+#endif
+    MaxEvalQuantized(context, node, params, data, input, output);
+  } else if (input->type == kTfLiteInt16) {
+    MaxEvalQuantized(context, node, params, data, input, output);
+  } else {
+    MicroPrintf("Input type %s is not currently supported",
+                TfLiteTypeGetName(input->type));
+    return kTfLiteError;
   }
+
   return kTfLiteOk;
 }
 
-}  // namespace pooling
+TfLiteStatus MaxEvalInt8(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));
+
+  const TfLiteEvalTensor* input =
+      micro::GetEvalInput(context, node, kPoolingInputTensor);
+  TFLITE_DCHECK(input->type == kTfLiteInt8);
+  TfLiteEvalTensor* output =
+      micro::GetEvalOutput(context, node, kPoolingOutputTensor);
+
+  MaxEvalQuantized(context, node, params, data, input, output);
+  return kTfLiteOk;
+}
+
+TfLiteStatus MaxEvalInt16(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData& data = *(static_cast<const OpData*>(node->user_data));
+
+  const TfLiteEvalTensor* input =
+      micro::GetEvalInput(context, node, kPoolingInputTensor);
+  TFLITE_DCHECK(input->type == kTfLiteInt16);
+  TfLiteEvalTensor* output =
+      micro::GetEvalOutput(context, node, kPoolingOutputTensor);
+
+  MaxEvalQuantized(context, node, params, data, input, output);
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteRegistration Register_AVERAGE_POOL_2D_INT8() {
+  return tflite::micro::RegisterOp(Init, AveragePrepare, AverageEvalInt8);
+}
+
+TfLiteRegistration Register_AVERAGE_POOL_2D_INT16() {
+  return tflite::micro::RegisterOp(Init, AveragePrepare, AverageEvalInt16);
+}
 
 TfLiteRegistration Register_AVERAGE_POOL_2D() {
-  return {/*init=*/pooling::Init,
-          /*free=*/nullptr,
-          /*prepare=*/pooling::AveragePrepare,
-          /*invoke=*/pooling::AverageEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, AveragePrepare, AverageEval);
+}
+
+TfLiteRegistration Register_MAX_POOL_2D_INT8() {
+  return tflite::micro::RegisterOp(Init, MaxPrepare, MaxEvalInt8);
+}
+
+TfLiteRegistration Register_MAX_POOL_2D_INT16() {
+  return tflite::micro::RegisterOp(Init, MaxPrepare, MaxEvalInt16);
 }
 
 TfLiteRegistration Register_MAX_POOL_2D() {
-  return {/*init=*/pooling::Init,
-          /*free=*/nullptr,
-          /*prepare=*/pooling::MaxPrepare,
-          /*invoke=*/pooling::MaxEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, MaxPrepare, MaxEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
 
 #elif EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -473,16 +397,15 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_function_specializations.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_slicers.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/mli_tf_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace pooling {
 
 namespace {
 
@@ -500,9 +423,13 @@ struct OpData {
   bool is_mli_applicable;
 
   // Tensors in MLI format.
-  mli_tensor* mli_in;
-  mli_tensor* mli_out;
+  mutable ops::micro::MliTensorInterface mli_in;
+  mutable ops::micro::MliTensorInterface mli_out;
   mli_pool_cfg* cfg;
+
+  // Pointer to the mli convolution function.
+  pooling_func_ptr p_mli_krn_avepool_hwc_sa8;
+  pooling_func_ptr p_mli_krn_maxpool_hwc_sa8;
 };
 
 enum MliPoolingType { AveragePooling = 0, MaxPooling = 1 };
@@ -543,9 +470,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
   OpData* data = static_cast<OpData*>(node->user_data);
 
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
 
   data->is_mli_applicable = IsMliApplicable(context, input, params);
@@ -555,22 +486,22 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   if (input->type == kTfLiteFloat32) {
     CalculateActivationRange(params->activation, &data->activation_min_f32,
                              &data->activation_max_f32);
-  } else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
+  } else if (input->type == kTfLiteInt8) {
     CalculateActivationRangeQuantized(context, params->activation, output,
                                       &data->activation_min,
                                       &data->activation_max);
   }
 
   if (data->is_mli_applicable) {
-    data->mli_in = static_cast<mli_tensor*>(
-        context->AllocatePersistentBuffer(context, sizeof(mli_tensor)));
-    data->mli_out = static_cast<mli_tensor*>(
-        context->AllocatePersistentBuffer(context, sizeof(mli_tensor)));
+    data->mli_in = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
+    data->mli_out = ops::micro::MliTensorInterface(static_cast<mli_tensor*>(
+        context->AllocatePersistentBuffer(context, sizeof(mli_tensor))));
     data->cfg = static_cast<mli_pool_cfg*>(
         context->AllocatePersistentBuffer(context, sizeof(mli_pool_cfg)));
 
-    ops::micro::ConvertToMliTensor(input, data->mli_in);
-    ops::micro::ConvertToMliTensor(output, data->mli_out);
+    ops::micro::ConvertToMliTensor(input, &data->mli_in);
+    ops::micro::ConvertToMliTensor(output, &data->mli_out);
 
     data->cfg->kernel_width = params->filter_width;
     data->cfg->kernel_height = params->filter_height;
@@ -590,7 +521,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       data->cfg->padding_bottom =
           data->padding.height + data->padding.height_offset;
     }
+
+    // Choose pooling mli specialized functions.
+    data->p_mli_krn_avepool_hwc_sa8 = mli_krn_avepool(data->cfg);
+    data->p_mli_krn_maxpool_hwc_sa8 = mli_krn_maxpool(data->cfg);
   }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -616,9 +554,8 @@ void AverageEvalFloat(TfLiteContext* context, const TfLiteNode* node,
                              tflite::micro::GetTensorShape(output),
                              tflite::micro::GetTensorData<float>(output));
 #else
-  TF_LITE_KERNEL_LOG(context,
-                     "Type %s (%d) is not supported by ARC MLI Library.",
-                     TfLiteTypeGetName(input->type), input->type);
+  MicroPrintf("Type %s (%d) is not supported by ARC MLI Library.",
+              TfLiteTypeGetName(input->type), input->type);
 #endif
 }
 
@@ -629,8 +566,8 @@ TfLiteStatus EvalMli(TfLiteContext* context, const TfLitePoolParams* params,
                      const MliPoolingType pooling_type) {
   mli_pool_cfg cfg_local = *data.cfg;
 
-  ops::micro::MliTensorAttachBuffer<int8_t>(input, data.mli_in);
-  ops::micro::MliTensorAttachBuffer<int8_t>(output, data.mli_out);
+  ops::micro::MliTensorAttachBuffer<int8_t>(input, &data.mli_in);
+  ops::micro::MliTensorAttachBuffer<int8_t>(output, &data.mli_out);
 
   const int height_dimension = 1;
   int in_slice_height = 0;
@@ -639,18 +576,26 @@ TfLiteStatus EvalMli(TfLiteContext* context, const TfLitePoolParams* params,
 
   // Tensors for data in fast (local) memory and config to copy data from
   // external to local memory
-  mli_tensor in_local = *data.mli_in;
-  mli_tensor out_local = *data.mli_out;
+  mli_tensor in_local = *data.mli_in.MliTensor();
+  mli_tensor out_local = *data.mli_out.MliTensor();
+
+  ops::micro::MliTensorInterface in_local_interface(&in_local);
+  ops::micro::MliTensorInterface out_local_interface(&out_local);
+
   mli_mov_cfg_t copy_config;
   mli_mov_cfg_for_copy(&copy_config);
   TF_LITE_ENSURE_STATUS(get_arc_scratch_buffer_for_pooling_tensors(
-      context, &in_local, &out_local));
-  bool in_is_local = in_local.data == data.mli_in->data;
-  bool out_is_local = out_local.data == data.mli_out->data;
+      context, &in_local_interface, &out_local_interface));
+
+  bool in_is_local =
+      in_local_interface.Data<int8_t>() == data.mli_in.Data<int8_t>();
+  bool out_is_local =
+      out_local_interface.Data<int8_t>() == data.mli_out.Data<int8_t>();
+
   TF_LITE_ENSURE_STATUS(arc_scratch_buffer_calc_slice_size_io(
-      &in_local, &out_local, cfg_local.kernel_height, cfg_local.stride_height,
-      cfg_local.padding_top, cfg_local.padding_bottom, &in_slice_height,
-      &out_slice_height));
+      &in_local_interface, &out_local_interface, cfg_local.kernel_height,
+      cfg_local.stride_height, cfg_local.padding_top, cfg_local.padding_bottom,
+      &in_slice_height, &out_slice_height));
 
   /* mli_in tensor contains batches of HWC tensors. so it is a 4 dimensional
      tensor. because the mli kernel will process one HWC tensor at a time, the 4
@@ -659,10 +604,11 @@ TfLiteStatus EvalMli(TfLiteContext* context, const TfLitePoolParams* params,
      for that the sliceHeight has been calculated. The tensor slicer is
      configured that it will completely slice the nBatch dimension (0) and slice
      the height dimension (1) in chunks of 'sliceHeight' */
-  TensorSlicer in_slice(data.mli_in, height_dimension, in_slice_height,
-                        cfg_local.padding_top, cfg_local.padding_bottom,
-                        overlap);
-  TensorSlicer out_slice(data.mli_out, height_dimension, out_slice_height);
+  ops::micro::TensorSlicer in_slice(data.mli_in.MliTensor(), height_dimension,
+                                    in_slice_height, cfg_local.padding_top,
+                                    cfg_local.padding_bottom, overlap);
+  ops::micro::TensorSlicer out_slice(data.mli_out.MliTensor(), height_dimension,
+                                     out_slice_height);
 
   /* is_local indicates that the tensor is already in local memory,
      so in that case the original tensor can be used,
@@ -671,14 +617,21 @@ TfLiteStatus EvalMli(TfLiteContext* context, const TfLitePoolParams* params,
   mli_tensor* out_ptr = out_is_local ? out_slice.Sub() : &out_local;
 
   while (!out_slice.Done()) {
+    if (!out_is_local) {
+      ops::micro::PrepareLocalTensor(out_slice.Sub(), &out_local);
+      ops::micro::PrepareLocalTensor(in_slice.Sub(), &in_local);
+    }
     cfg_local.padding_top = in_slice.GetPaddingPre();
     cfg_local.padding_bottom = in_slice.GetPaddingPost();
 
     mli_mov_tensor_sync(in_slice.Sub(), &copy_config, in_ptr);
-    if (pooling_type == AveragePooling)
-      mli_krn_avepool_hwc_sa8(in_ptr, &cfg_local, out_ptr);
-    else if (pooling_type == MaxPooling)
-      mli_krn_maxpool_hwc_sa8(in_ptr, &cfg_local, out_ptr);
+    if (pooling_type == AveragePooling) {
+      TFLITE_DCHECK(data.p_mli_krn_avepool_hwc_sa8 != nullptr);
+      data.p_mli_krn_avepool_hwc_sa8(in_ptr, &cfg_local, out_ptr);
+    } else if (pooling_type == MaxPooling) {
+      TFLITE_DCHECK(data.p_mli_krn_maxpool_hwc_sa8 != nullptr);
+      data.p_mli_krn_maxpool_hwc_sa8(in_ptr, &cfg_local, out_ptr);
+    }
     mli_mov_tensor_sync(out_ptr, &copy_config, out_slice.Sub());
 
     in_slice.Next();
@@ -692,7 +645,7 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
                           const TfLiteEvalTensor* input,
                           TfLiteEvalTensor* output) {
 #if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
-  TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
+  TFLITE_DCHECK(input->type == kTfLiteInt8);
 
   PoolParams op_params;
   op_params.stride_height = params->stride_height;
@@ -704,22 +657,14 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
   op_params.quantized_activation_min = data.activation_min;
   op_params.quantized_activation_max = data.activation_max;
 
-  if (input->type == kTfLiteUInt8) {
-    reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
-                               tflite::micro::GetTensorData<uint8_t>(input),
-                               tflite::micro::GetTensorShape(output),
-                               tflite::micro::GetTensorData<uint8_t>(output));
-  } else {
-    reference_integer_ops::AveragePool(
-        op_params, tflite::micro::GetTensorShape(input),
-        tflite::micro::GetTensorData<int8_t>(input),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<int8_t>(output));
-  }
+  reference_integer_ops::AveragePool(
+      op_params, tflite::micro::GetTensorShape(input),
+      tflite::micro::GetTensorData<int8_t>(input),
+      tflite::micro::GetTensorShape(output),
+      tflite::micro::GetTensorData<int8_t>(output));
 #else
-  TF_LITE_KERNEL_LOG(context,
-                     "Type %s (%d) is not supported by ARC MLI Library.",
-                     TfLiteTypeGetName(input->type), input->type);
+  MicroPrintf("Type %s (%d) is not supported by ARC MLI Library.",
+              TfLiteTypeGetName(input->type), input->type);
 #endif
 }
 
@@ -741,8 +686,8 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
                          tflite::micro::GetTensorShape(output),
                          tflite::micro::GetTensorData<float>(output));
 #else
-  TF_LITE_KERNEL_LOG(
-      context,
+  MicroPrintf(
+
       "Node configuration or type %s (%d) is not supported by ARC MLI Library.",
       TfLiteTypeGetName(input->type), input->type);
 #endif
@@ -752,6 +697,7 @@ void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
                       TfLitePoolParams* params, const OpData& data,
                       const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
 #if !defined(TF_LITE_STRIP_REFERENCE_IMPL)
+  TFLITE_DCHECK(input->type == kTfLiteInt8);
   tflite::PoolParams op_params;
   op_params.stride_height = params->stride_height;
   op_params.stride_width = params->stride_width;
@@ -762,26 +708,18 @@ void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
   op_params.quantized_activation_min = data.activation_min;
   op_params.quantized_activation_max = data.activation_max;
 
-  if (input->type == kTfLiteUInt8) {
-    reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
-                           tflite::micro::GetTensorData<uint8_t>(input),
-                           tflite::micro::GetTensorShape(output),
-                           tflite::micro::GetTensorData<uint8_t>(output));
-  } else {
-    reference_integer_ops::MaxPool(
-        op_params, tflite::micro::GetTensorShape(input),
-        tflite::micro::GetTensorData<int8_t>(input),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<int8_t>(output));
-  }
+  reference_integer_ops::MaxPool(op_params,
+                                 tflite::micro::GetTensorShape(input),
+                                 tflite::micro::GetTensorData<int8_t>(input),
+                                 tflite::micro::GetTensorShape(output),
+                                 tflite::micro::GetTensorData<int8_t>(output));
 #else
-  TF_LITE_KERNEL_LOG(
-      context,
+  MicroPrintf(
+
       "Node configuration or type %s (%d) is not supported by ARC MLI Library.",
       TfLiteTypeGetName(input->type), input->type);
 #endif
 }
-}  // namespace
 
 TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->builtin_data != nullptr);
@@ -798,34 +736,19 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
   // Inputs and outputs share the same type, guaranteed by the converter.
   switch (input->type) {
     case kTfLiteFloat32:
-      #if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_F32
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       AverageEvalFloat(context, node, params, data, input, output);
       break;
-    case kTfLiteUInt8:
-      #if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_U8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
-
-      if (data.is_mli_applicable) {
-        EvalMli(context, params, data, input, output, AveragePooling);
-      } else {
-        AverageEvalQuantized(context, node, params, data, input, output);
-      }
-      break;
     case kTfLiteInt8:
-      #if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       if (data.is_mli_applicable) {
         EvalMli(context, params, data, input, output, AveragePooling);
       } else {
@@ -833,8 +756,8 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
       }
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
-                         TfLiteTypeGetName(input->type));
+      MicroPrintf("Input type %s is not currently supported",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -853,34 +776,19 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
 
   switch (input->type) {
     case kTfLiteFloat32:
-      #if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_F32
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       MaxEvalFloat(context, node, params, data, input, output);
       break;
-    case kTfLiteUInt8:
-      #if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_U8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
-
-      if (data.is_mli_applicable) {
-        EvalMli(context, params, data, input, output, MaxPooling);
-      } else {
-        MaxEvalQuantized(context, node, params, data, input, output);
-      }
-      break;
     case kTfLiteInt8:
-      #if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       if (data.is_mli_applicable) {
         EvalMli(context, params, data, input, output, MaxPooling);
       } else {
@@ -888,39 +796,23 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
       }
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
-                         TfLiteTypeGetName(input->type));
+      MicroPrintf("Type %s not currently supported.",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
   }
   return kTfLiteOk;
 }
 
-}  // namespace pooling
+}  // namespace
 
 TfLiteRegistration Register_AVERAGE_POOL_2D() {
-  return {/*init=*/pooling::Init,
-          /*free=*/nullptr,
-          /*prepare=*/pooling::Prepare,
-          /*invoke=*/pooling::AverageEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, AverageEval);
 }
 
 TfLiteRegistration Register_MAX_POOL_2D() {
-  return {/*init=*/pooling::Init,
-          /*free=*/nullptr,
-          /*prepare=*/pooling::Prepare,
-          /*invoke=*/pooling::MaxEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, MaxEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
 
 #elif EI_CLASSIFIER_TFLITE_ENABLE_SILABS_MVP == 1
@@ -1254,9 +1146,6 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node)
 }  // namespace pooling
 }  // namespace sl
 
-namespace ops {
-namespace micro {
-
 TfLiteRegistration Register_MAX_POOL_2D() {
   static TfLiteRegistration max_pool_registration = {
     /*init=*/sl::pooling::Init,
@@ -1272,9 +1161,6 @@ TfLiteRegistration Register_MAX_POOL_2D() {
   return max_pool_registration;
 }
 
-}  // namespace micro
-}  // namespace ops
-
 // Just to keep all_ops_resolver() happy during development ...
 TfLiteRegistration Register_AVERAGE_POOL_2D() {
   static TfLiteRegistration avg_pool_registration = {
@@ -1308,75 +1194,28 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h"
 
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
+#if ESP_NN
 #include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h"
+#endif
+
 #include <esp_timer.h>
 
 long long pooling_total_time = 0;
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace pooling {
 
 namespace {
-
-constexpr int kInputTensor = 0;
-constexpr int kOutputTensor = 0;
-
-struct OpData {
-  TfLitePaddingValues padding;
-  int32_t activation_min;
-  int32_t activation_max;
-  float activation_min_f32;
-  float activation_max_f32;
-};
-
-TfLiteStatus CalculateOpData(const TfLiteContext* context,
-                             const TfLitePoolParams* params,
-                             const TfLiteTensor* input,
-                             const TfLiteTensor* output, OpData* data) {
-  // input: batch, height, width, channel
-  int height = SizeOfDimension(input, 1);
-  int width = SizeOfDimension(input, 2);
-
-  int out_height, out_width;
-
-  data->padding = ComputePaddingHeightWidth(
-      params->stride_height, params->stride_width,
-      /*dilation_rate_height=*/1,
-      /*dilation_rate_width=*/1, height, width, params->filter_height,
-      params->filter_width, params->padding, &out_height, &out_width);
-
-  return kTfLiteOk;
-}
-
-void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
-                      const TfLitePoolParams* params, const OpData* data,
-                      const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
-  PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.float_activation_min = data->activation_min_f32;
-  op_params.float_activation_max = data->activation_max_f32;
-  reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
-                             tflite::micro::GetTensorData<float>(input),
-                             tflite::micro::GetTensorShape(output),
-                             tflite::micro::GetTensorData<float>(output));
-}
-
+#if ESP_NN
 void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
-                          const TfLitePoolParams* params, const OpData* data,
+                          const TfLitePoolParams* params, const OpDataPooling* data,
                           const TfLiteEvalTensor* input,
                           TfLiteEvalTensor* output) {
 
@@ -1432,26 +1271,8 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
   }
 }
 
-void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
-                  TfLitePoolParams* params, const OpData* data,
-                  const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
-  tflite::PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.float_activation_min = data->activation_min_f32;
-  op_params.float_activation_max = data->activation_max_f32;
-  reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
-                         tflite::micro::GetTensorData<float>(input),
-                         tflite::micro::GetTensorShape(output),
-                         tflite::micro::GetTensorData<float>(output));
-}
-
 void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                      TfLitePoolParams* params, const OpData* data,
+                      TfLitePoolParams* params, const OpDataPooling* data,
                       const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
 
   const int stride_height = params->stride_height;
@@ -1504,40 +1325,43 @@ void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
     }
   }
 }
-
-}  // namespace
+#endif
 
 TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->builtin_data != nullptr);
   auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
 
   TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData* data =
-      static_cast<const OpData*>(node->user_data);
+  const OpDataPooling* data =
+      static_cast<const OpDataPooling*>(node->user_data);
 
   const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      micro::GetEvalInput(context, node, kPoolingInputTensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      micro::GetEvalOutput(context, node, kPoolingOutputTensor);
 
   long long start_time = esp_timer_get_time();
   // Inputs and outputs share the same type, guaranteed by the converter.
   switch (input->type) {
     case kTfLiteFloat32:
-      #if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_F32
+#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_F32
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
-      AverageEvalFloat(context, node, params, data, input, output);
+#endif
+      AveragePoolingEvalFloat(context, node, params, data, input, output);
       break;
     case kTfLiteInt8:
-      #if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_I8
+#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_I8
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
+#endif
+#if ESP_NN
       AverageEvalQuantized(context, node, params, data, input, output);
+#else
+      AveragePoolingEvalQuantized<int8_t>(context, node, params, data, input, output);
+#endif
       break;
     default:
       TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
@@ -1553,31 +1377,35 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
 
   TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData* data =
-      static_cast<const OpData*>(node->user_data);
+  const OpDataPooling* data =
+      static_cast<const OpDataPooling*>(node->user_data);
 
   const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      micro::GetEvalInput(context, node, kPoolingInputTensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      micro::GetEvalOutput(context, node, kPoolingOutputTensor);
 
   long long start_time = esp_timer_get_time();
   switch (input->type) {
     case kTfLiteFloat32:
-      #if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_F32
+#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_F32
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
-      MaxEvalFloat(context, node, params, data, input, output);
+#endif
+      MaxPoolingEvalFloat(context, node, params, data, input, output);
       break;
     case kTfLiteInt8:
-      #if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_I8
+#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_I8
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
+#endif
+#if ESP_NN
       MaxEvalQuantized(context, node, params, data, input, output);
+#else
+      MaxPoolingEvalQuantized<int8_t>(context, node, params, data, input, output);
+#endif
       break;
     default:
       TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
@@ -1590,64 +1418,23 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
 
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->builtin_data != nullptr);
-  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
-
-  TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
-
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
-
-  if (input->type == kTfLiteFloat32) {
-    CalculateActivationRange(params->activation, &data->activation_min_f32,
-                             &data->activation_max_f32);
-  } else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
-    CalculateActivationRangeQuantized(context, params->activation, output,
-                                      &data->activation_min,
-                                      &data->activation_max);
-  }
-
-  return kTfLiteOk;
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling));
 }
 
-}  // namespace pooling
+}  // namespace
 
 TfLiteRegistration Register_AVERAGE_POOL_2D() {
-  return {/*init=*/pooling::Init,
-          /*free=*/nullptr,
-          /*prepare=*/pooling::Prepare,
-          /*invoke=*/pooling::AverageEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, PoolingPrepare, AverageEval);
 }
 
 TfLiteRegistration Register_MAX_POOL_2D() {
-  return {/*init=*/pooling::Init,
-          /*free=*/nullptr,
-          /*prepare=*/pooling::Prepare,
-          /*invoke=*/pooling::MaxEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, PoolingPrepare, MaxEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
+
 #else
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -1664,187 +1451,54 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h"
 
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace pooling {
 
 namespace {
 
-constexpr int kInputTensor = 0;
-constexpr int kOutputTensor = 0;
-
-struct OpData {
-  TfLitePaddingValues padding;
-  int32_t activation_min;
-  int32_t activation_max;
-  float activation_min_f32;
-  float activation_max_f32;
-};
-
-TfLiteStatus CalculateOpData(const TfLiteContext* context,
-                             const TfLitePoolParams* params,
-                             const TfLiteTensor* input,
-                             const TfLiteTensor* output, OpData* data) {
-  // input: batch, height, width, channel
-  int height = SizeOfDimension(input, 1);
-  int width = SizeOfDimension(input, 2);
-
-  int out_height, out_width;
-
-  data->padding = ComputePaddingHeightWidth(
-      params->stride_height, params->stride_width,
-      /*dilation_rate_height=*/1,
-      /*dilation_rate_width=*/1, height, width, params->filter_height,
-      params->filter_width, params->padding, &out_height, &out_width);
-
-  return kTfLiteOk;
-}
-
-void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node,
-                      const TfLitePoolParams* params, const OpData* data,
-                      const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
-  PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.float_activation_min = data->activation_min_f32;
-  op_params.float_activation_max = data->activation_max_f32;
-  reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
-                             tflite::micro::GetTensorData<float>(input),
-                             tflite::micro::GetTensorShape(output),
-                             tflite::micro::GetTensorData<float>(output));
-}
-
-void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
-                          const TfLitePoolParams* params, const OpData* data,
-                          const TfLiteEvalTensor* input,
-                          TfLiteEvalTensor* output) {
-  TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8);
-
-  PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.quantized_activation_min = data->activation_min;
-  op_params.quantized_activation_max = data->activation_max;
-
-  if (input->type == kTfLiteUInt8) {
-    reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
-                               tflite::micro::GetTensorData<uint8_t>(input),
-                               tflite::micro::GetTensorShape(output),
-                               tflite::micro::GetTensorData<uint8_t>(output));
-  } else {
-    reference_integer_ops::AveragePool(
-        op_params, tflite::micro::GetTensorShape(input),
-        tflite::micro::GetTensorData<int8_t>(input),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<int8_t>(output));
-  }
-}
-
-void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node,
-                  TfLitePoolParams* params, const OpData* data,
-                  const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
-  tflite::PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.float_activation_min = data->activation_min_f32;
-  op_params.float_activation_max = data->activation_max_f32;
-  reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
-                         tflite::micro::GetTensorData<float>(input),
-                         tflite::micro::GetTensorShape(output),
-                         tflite::micro::GetTensorData<float>(output));
-}
-
-void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node,
-                      TfLitePoolParams* params, const OpData* data,
-                      const TfLiteEvalTensor* input, TfLiteEvalTensor* output) {
-  tflite::PoolParams op_params;
-  op_params.stride_height = params->stride_height;
-  op_params.stride_width = params->stride_width;
-  op_params.filter_height = params->filter_height;
-  op_params.filter_width = params->filter_width;
-  op_params.padding_values.height = data->padding.height;
-  op_params.padding_values.width = data->padding.width;
-  op_params.quantized_activation_min = data->activation_min;
-  op_params.quantized_activation_max = data->activation_max;
-
-  if (input->type == kTfLiteUInt8) {
-    reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
-                           tflite::micro::GetTensorData<uint8_t>(input),
-                           tflite::micro::GetTensorShape(output),
-                           tflite::micro::GetTensorData<uint8_t>(output));
-  } else {
-    reference_integer_ops::MaxPool(
-        op_params, tflite::micro::GetTensorShape(input),
-        tflite::micro::GetTensorData<int8_t>(input),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<int8_t>(output));
-  }
-}
-}  // namespace
-
 TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->builtin_data != nullptr);
   auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
 
   TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData* data = static_cast<const OpData*>(node->user_data);
+  const OpDataPooling* data =
+      static_cast<const OpDataPooling*>(node->user_data);
 
   const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      micro::GetEvalInput(context, node, kPoolingInputTensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      micro::GetEvalOutput(context, node, kPoolingOutputTensor);
 
   // Inputs and outputs share the same type, guaranteed by the converter.
   switch (input->type) {
     case kTfLiteFloat32:
-      #if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_F32
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
-      AverageEvalFloat(context, node, params, data, input, output);
-      break;
-    case kTfLiteUInt8:
-      #if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_U8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
-
-      AverageEvalQuantized(context, node, params, data, input, output);
+#endif
+      AveragePoolingEvalFloat(context, node, params, data, input, output);
       break;
     case kTfLiteInt8:
-      #if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
-      AverageEvalQuantized(context, node, params, data, input, output);
+#endif
+      AveragePoolingEvalQuantized<int8_t>(context, node, params, data, input,
+                                          output);
+      break;
+    case kTfLiteInt16:
+      AveragePoolingEvalQuantized<int16_t>(context, node, params, data, input,
+                                           output);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported",
-                         TfLiteTypeGetName(input->type));
+      MicroPrintf("Input type %s is not currently supported",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -1855,44 +1509,39 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
 
   TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData* data = static_cast<const OpData*>(node->user_data);
+  const OpDataPooling* data =
+      static_cast<const OpDataPooling*>(node->user_data);
 
   const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      micro::GetEvalInput(context, node, kPoolingInputTensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      micro::GetEvalOutput(context, node, kPoolingOutputTensor);
 
   switch (input->type) {
     case kTfLiteFloat32:
-      #if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
-
-      MaxEvalFloat(context, node, params, data, input, output);
-      break;
-    case kTfLiteUInt8:
-      #if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_U8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_F32
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
-      MaxEvalQuantized(context, node, params, data, input, output);
+#endif
+      MaxPoolingEvalFloat(context, node, params, data, input, output);
       break;
     case kTfLiteInt8:
-      #if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_MAX_POOL_2D_IN_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
-      MaxEvalQuantized(context, node, params, data, input, output);
+#endif
+      MaxPoolingEvalQuantized<int8_t>(context, node, params, data, input,
+                                      output);
+      break;
+    case kTfLiteInt16:
+      MaxPoolingEvalQuantized<int16_t>(context, node, params, data, input,
+                                       output);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
-                         TfLiteTypeGetName(input->type));
+      MicroPrintf("Type %s not currently supported.",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -1900,61 +1549,19 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
 
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->builtin_data != nullptr);
-  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
-
-  TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
-
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
-
-  if (input->type == kTfLiteFloat32) {
-    CalculateActivationRange(params->activation, &data->activation_min_f32,
-                             &data->activation_max_f32);
-  } else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
-    CalculateActivationRangeQuantized(context, params->activation, output,
-                                      &data->activation_min,
-                                      &data->activation_max);
-  }
-
-  return kTfLiteOk;
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling));
 }
 
-}  // namespace pooling
+}  // namespace
 
 TfLiteRegistration Register_AVERAGE_POOL_2D() {
-  return {/*init=*/pooling::Init,
-          /*free=*/nullptr,
-          /*prepare=*/pooling::Prepare,
-          /*invoke=*/pooling::AverageEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, PoolingPrepare, AverageEval);
 }
 
 TfLiteRegistration Register_MAX_POOL_2D() {
-  return {/*init=*/pooling::Init,
-          /*free=*/nullptr,
-          /*prepare=*/pooling::Prepare,
-          /*invoke=*/pooling::MaxEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, PoolingPrepare, MaxEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
 
 #endif
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h
new file mode 100644
index 0000000..d33aa23
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h
@@ -0,0 +1,142 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_
+
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_ops.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+
+extern const int kPoolingInputTensor;
+extern const int kPoolingOutputTensor;
+
+struct OpDataPooling {
+  TfLitePaddingValues padding;
+  int32_t activation_min;
+  int32_t activation_max;
+  float activation_min_f32;
+  float activation_max_f32;
+};
+
+TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context,
+                                    const TfLitePoolParams* params,
+                                    const TfLiteTensor* input,
+                                    const TfLiteTensor* output,
+                                    OpDataPooling* data);
+
+TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node);
+
+void AveragePoolingEvalFloat(const TfLiteContext* context,
+                             const TfLiteNode* node,
+                             const TfLitePoolParams* params,
+                             const OpDataPooling* data,
+                             const TfLiteEvalTensor* input,
+                             TfLiteEvalTensor* output);
+
+template <typename T>
+void AveragePoolingEvalQuantized(TfLiteContext* context, const TfLiteNode* node,
+                                 const TfLitePoolParams* params,
+                                 const OpDataPooling* data,
+                                 const TfLiteEvalTensor* input,
+                                 TfLiteEvalTensor* output) {
+  TFLITE_DCHECK(input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
+
+  PoolParams op_params;
+  op_params.stride_height = params->stride_height;
+  op_params.stride_width = params->stride_width;
+  op_params.filter_height = params->filter_height;
+  op_params.filter_width = params->filter_width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data->padding.width;
+  op_params.quantized_activation_min = data->activation_min;
+  op_params.quantized_activation_max = data->activation_max;
+
+  reference_integer_ops::AveragePool(op_params,
+                                     tflite::micro::GetTensorShape(input),
+                                     tflite::micro::GetTensorData<T>(input),
+                                     tflite::micro::GetTensorShape(output),
+                                     tflite::micro::GetTensorData<T>(output));
+}
+
+void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
+                         TfLitePoolParams* params, const OpDataPooling* data,
+                         const TfLiteEvalTensor* input,
+                         TfLiteEvalTensor* output);
+
+template <typename T>
+void MaxPoolingEvalQuantized(TfLiteContext* context, TfLiteNode* node,
+                             TfLitePoolParams* params,
+                             const OpDataPooling* data,
+                             const TfLiteEvalTensor* input,
+                             TfLiteEvalTensor* output) {
+  TFLITE_DCHECK(input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
+
+  tflite::PoolParams op_params;
+  op_params.stride_height = params->stride_height;
+  op_params.stride_width = params->stride_width;
+  op_params.filter_height = params->filter_height;
+  op_params.filter_width = params->filter_width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data->padding.width;
+  op_params.quantized_activation_min = data->activation_min;
+  op_params.quantized_activation_max = data->activation_max;
+
+  reference_integer_ops::MaxPool(op_params,
+                                 tflite::micro::GetTensorShape(input),
+                                 tflite::micro::GetTensorData<T>(input),
+                                 tflite::micro::GetTensorShape(output),
+                                 tflite::micro::GetTensorData<T>(output));
+}
+
+#if defined(CMSIS_NN)
+TfLiteRegistration Register_AVERAGE_POOL_2D_INT8();
+
+TfLiteRegistration Register_MAX_POOL_2D_INT8();
+
+TfLiteRegistration Register_AVERAGE_POOL_2D_INT16();
+
+TfLiteRegistration Register_MAX_POOL_2D_INT16();
+#else
+inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT8() {
+  return tflite::Register_AVERAGE_POOL_2D();
+}
+
+inline TfLiteRegistration Register_MAX_POOL_2D_INT8() {
+  return tflite::Register_MAX_POOL_2D();
+}
+
+inline TfLiteRegistration Register_AVERAGE_POOL_2D_INT16() {
+  return tflite::Register_AVERAGE_POOL_2D();
+}
+
+inline TfLiteRegistration Register_MAX_POOL_2D_INT16() {
+  return tflite::Register_MAX_POOL_2D();
+}
+#endif
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_POOLING_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling_common.cc
new file mode 100644
index 0000000..8eb66e7
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling_common.cc
@@ -0,0 +1,128 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h"
+
+namespace tflite {
+
+const int kPoolingInputTensor = 0;
+const int kPoolingOutputTensor = 0;
+
+TfLiteStatus CalculateOpDataPooling(const TfLiteContext* context,
+                                    const TfLitePoolParams* params,
+                                    const TfLiteTensor* input,
+                                    const TfLiteTensor* output,
+                                    OpDataPooling* data) {
+  // input: batch, height, width, channel
+  int height = SizeOfDimension(input, 1);
+  int width = SizeOfDimension(input, 2);
+
+  int out_height, out_width;
+
+  data->padding = ComputePaddingHeightWidth(
+      params->stride_height, params->stride_width,
+      /*dilation_rate_height=*/1,
+      /*dilation_rate_width=*/1, height, width, params->filter_height,
+      params->filter_width, params->padding, &out_height, &out_width);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus PoolingPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpDataPooling* data = static_cast<OpDataPooling*>(node->user_data);
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kPoolingInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kPoolingOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE_STATUS(
+      CalculateOpDataPooling(context, params, input, output, data));
+
+  if (input->type == kTfLiteFloat32) {
+    CalculateActivationRange(params->activation, &data->activation_min_f32,
+                             &data->activation_max_f32);
+  } else if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
+    CalculateActivationRangeQuantized(context, params->activation, output,
+                                      &data->activation_min,
+                                      &data->activation_max);
+  } else {
+    MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                input->type);
+    return kTfLiteError;
+  }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+void AveragePoolingEvalFloat(const TfLiteContext* context,
+                             const TfLiteNode* node,
+                             const TfLitePoolParams* params,
+                             const OpDataPooling* data,
+                             const TfLiteEvalTensor* input,
+                             TfLiteEvalTensor* output) {
+  PoolParams op_params;
+  op_params.stride_height = params->stride_height;
+  op_params.stride_width = params->stride_width;
+  op_params.filter_height = params->filter_height;
+  op_params.filter_width = params->filter_width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data->padding.width;
+  op_params.float_activation_min = data->activation_min_f32;
+  op_params.float_activation_max = data->activation_max_f32;
+  reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input),
+                             tflite::micro::GetTensorData<float>(input),
+                             tflite::micro::GetTensorShape(output),
+                             tflite::micro::GetTensorData<float>(output));
+}
+
+void MaxPoolingEvalFloat(TfLiteContext* context, TfLiteNode* node,
+                         TfLitePoolParams* params, const OpDataPooling* data,
+                         const TfLiteEvalTensor* input,
+                         TfLiteEvalTensor* output) {
+  tflite::PoolParams op_params;
+  op_params.stride_height = params->stride_height;
+  op_params.stride_width = params->stride_width;
+  op_params.filter_height = params->filter_height;
+  op_params.filter_width = params->filter_width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data->padding.width;
+  op_params.float_activation_min = data->activation_min_f32;
+  op_params.float_activation_max = data->activation_max_f32;
+  reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
+                         tflite::micro::GetTensorData<float>(input),
+                         tflite::micro::GetTensorShape(output),
+                         tflite::micro::GetTensorData<float>(output));
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.cc
index b53956f..bceb7ff 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -22,88 +22,16 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace activations {
-namespace {
-
-TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
-                                  const TfLiteTensor* alpha,
-                                  TfLiteTensor* output, PreluParams* params) {
-  if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8 ||
-      output->type == kTfLiteInt16) {
-    double real_multiplier_1 = static_cast<double>(input->params.scale) /
-                               static_cast<double>(output->params.scale);
-    double real_multiplier_2 = static_cast<double>(input->params.scale) *
-                               static_cast<double>(alpha->params.scale) /
-                               static_cast<double>(output->params.scale);
-    QuantizeMultiplier(real_multiplier_1, &params->output_multiplier_1,
-                       &params->output_shift_1);
-    QuantizeMultiplier(real_multiplier_2, &params->output_multiplier_2,
-                       &params->output_shift_2);
-
-    params->input_offset = -input->params.zero_point;
-    params->alpha_offset = -alpha->params.zero_point;
-    params->output_offset = output->params.zero_point;
-  }
-
-  return kTfLiteOk;
-}
-
-}  // namespace
-
-inline void BroadcastPrelu4DSlowFloat(
-    const RuntimeShape& unextended_input1_shape, const float* input1_data,
-    const RuntimeShape& unextended_input2_shape, const float* input2_data,
-    const RuntimeShape& unextended_output_shape, float* output_data) {
-  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
-  const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
-
-  NdArrayDesc<4> desc1;
-  NdArrayDesc<4> desc2;
-  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
-                                      unextended_input2_shape, &desc1, &desc2);
-
-  for (int b = 0; b < output_shape.Dims(0); ++b) {
-    for (int y = 0; y < output_shape.Dims(1); ++y) {
-      for (int x = 0; x < output_shape.Dims(2); ++x) {
-        for (int c = 0; c < output_shape.Dims(3); ++c) {
-          auto out_idx = Offset(output_shape, b, y, x, c);
-          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
-          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
-          auto in1_val = input1_data[in1_idx];
-          auto in2_val = input2_data[in2_idx];
-          output_data[out_idx] = in1_val >= 0.0f ? in1_val : in1_val * in2_val;
-        }
-      }
-    }
-  }
-}
 
 void* PreluInit(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(PreluParams));
 }
 
-TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  PreluParams* params = static_cast<PreluParams*>(node->user_data);
-
-  const TfLiteTensor* input = GetInput(context, node, 0);
-  TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* alpha = GetInput(context, node, 1);
-  TF_LITE_ENSURE(context, alpha != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, 0);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  return CalculatePreluParams(input, alpha, output, params);
-}
-
 TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
   const PreluParams& params =
@@ -123,16 +51,6 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
                                 tflite::micro::GetTensorData<float>(output));
       return kTfLiteOk;
     } break;
-    case kTfLiteUInt8: {
-      reference_ops::BroadcastPrelu4DSlow(
-          params, tflite::micro::GetTensorShape(input),
-          tflite::micro::GetTensorData<uint8_t>(input),
-          tflite::micro::GetTensorShape(alpha),
-          tflite::micro::GetTensorData<uint8_t>(alpha),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<uint8_t>(output));
-      return kTfLiteOk;
-    } break;
     case kTfLiteInt8: {
       reference_ops::BroadcastPrelu4DSlow(
           params, tflite::micro::GetTensorShape(input),
@@ -144,26 +62,14 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
       return kTfLiteOk;
     } break;
     default:
-      TF_LITE_KERNEL_LOG(
-          context, "Only float32 and uint8_t are supported currently, got %d.",
-          TfLiteTypeGetName(input->type));
+      MicroPrintf("Only float32 and uint8_t are supported currently, got %d.",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
   }
 }
 
-}  // namespace activations
-
 TfLiteRegistration Register_PRELU() {
-  return {/*init=*/activations::PreluInit,
-          /*free=*/nullptr,
-          /*prepare=*/activations::PreluPrepare,
-          /*invoke=*/activations::PreluEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(PreluInit, PreluPrepare, PreluEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.h
new file mode 100644
index 0000000..d5b780a
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.h
@@ -0,0 +1,39 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+
+TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
+                                  const TfLiteTensor* alpha,
+                                  TfLiteTensor* output, PreluParams* params);
+
+void BroadcastPrelu4DSlowFloat(const RuntimeShape& unextended_input1_shape,
+                               const float* input1_data,
+                               const RuntimeShape& unextended_input2_shape,
+                               const float* input2_data,
+                               const RuntimeShape& unextended_output_shape,
+                               float* output_data);
+
+TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_PRELU_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu_common.cc
new file mode 100644
index 0000000..8c1f2ef
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu_common.cc
@@ -0,0 +1,105 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/prelu.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/prelu.h"
+
+namespace tflite {
+
+TfLiteStatus CalculatePreluParams(const TfLiteTensor* input,
+                                  const TfLiteTensor* alpha,
+                                  TfLiteTensor* output, PreluParams* params) {
+  if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
+    double real_multiplier_1 = static_cast<double>(input->params.scale) /
+                               static_cast<double>(output->params.scale);
+    double real_multiplier_2 = static_cast<double>(input->params.scale) *
+                               static_cast<double>(alpha->params.scale) /
+                               static_cast<double>(output->params.scale);
+    QuantizeMultiplier(real_multiplier_1, &params->output_multiplier_1,
+                       &params->output_shift_1);
+    QuantizeMultiplier(real_multiplier_2, &params->output_multiplier_2,
+                       &params->output_shift_2);
+
+    params->input_offset = -input->params.zero_point;
+    params->alpha_offset = -alpha->params.zero_point;
+    params->output_offset = output->params.zero_point;
+  }
+
+  return kTfLiteOk;
+}
+
+void BroadcastPrelu4DSlowFloat(const RuntimeShape& unextended_input1_shape,
+                               const float* input1_data,
+                               const RuntimeShape& unextended_input2_shape,
+                               const float* input2_data,
+                               const RuntimeShape& unextended_output_shape,
+                               float* output_data) {
+  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
+                                      unextended_input2_shape, &desc1, &desc2);
+
+  for (int b = 0; b < output_shape.Dims(0); ++b) {
+    for (int y = 0; y < output_shape.Dims(1); ++y) {
+      for (int x = 0; x < output_shape.Dims(2); ++x) {
+        for (int c = 0; c < output_shape.Dims(3); ++c) {
+          auto out_idx = Offset(output_shape, b, y, x, c);
+          auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+          auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+          auto in1_val = input1_data[in1_idx];
+          auto in2_val = input2_data[in2_idx];
+          output_data[out_idx] = in1_val >= 0.0f ? in1_val : in1_val * in2_val;
+        }
+      }
+    }
+  }
+}
+
+TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  PreluParams* params = static_cast<PreluParams*>(node->user_data);
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* alpha = micro_context->AllocateTempInputTensor(node, 1);
+  TF_LITE_ENSURE(context, alpha != nullptr);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE_OK(context,
+                    CalculatePreluParams(input, alpha, output, params));
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(alpha);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize.cc
index ad02ec3..487f502 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize.cc
@@ -34,14 +34,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
 }  // namespace
 
 TfLiteRegistration Register_QUANTIZE() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/PrepareQuantizeReference,
-          /*invoke=*/EvalQuantizeReference,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, PrepareQuantizeReference,
+                                   EvalQuantizeReference);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize_common.cc
index 8cb0455..5ba29f4 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize_common.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize_common.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <limits>
+
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/quantize.h"
@@ -21,6 +23,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/quantize.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
@@ -33,9 +36,11 @@ TfLiteStatus PrepareQuantizeReference(TfLiteContext* context,
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
-  const TfLiteTensor* input = GetInput(context, node, 0);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
   TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, 0);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
   TF_LITE_ENSURE(context, output != nullptr);
 
   // TODO(b/128934713): Add support for fixed-point per-channel quantization.
@@ -48,10 +53,10 @@ TfLiteStatus PrepareQuantizeReference(TfLiteContext* context,
   TF_LITE_ENSURE(context, affine_quantization->scale);
   TF_LITE_ENSURE(context, affine_quantization->scale->size == 1);
 
-  TF_LITE_ENSURE(context, input->type == kTfLiteFloat32 ||
-                              input->type == kTfLiteInt16 ||
-                              input->type == kTfLiteInt8 ||
-                              input->type == kTfLiteUInt8);
+  TF_LITE_ENSURE(
+      context, input->type == kTfLiteFloat32 || input->type == kTfLiteInt32 ||
+                   input->type == kTfLiteInt16 || input->type == kTfLiteInt8 ||
+                   input->type == kTfLiteUInt8);
   TF_LITE_ENSURE(context, output->type == kTfLiteInt8 ||
                               output->type == kTfLiteInt16 ||
                               output->type == kTfLiteInt32 ||
@@ -78,6 +83,9 @@ TfLiteStatus PrepareQuantizeReference(TfLiteContext* context,
   data->quantization_params.scale = static_cast<double>(output->params.scale);
 
   data->input_zero_point = input->params.zero_point;
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -105,9 +113,32 @@ TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
             tflite::micro::GetTensorData<int16_t>(output));
         return kTfLiteOk;
       default:
-        TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                           TfLiteTypeGetName(input->type),
-                           TfLiteTypeGetName(output->type));
+        MicroPrintf("Input %s, output %s not supported.",
+                    TfLiteTypeGetName(input->type),
+                    TfLiteTypeGetName(output->type));
+        return kTfLiteError;
+    }
+  } else if (input->type == kTfLiteInt32) {
+    size_t size = ElementCount(*input->dims);
+    switch (output->type) {
+      case kTfLiteInt8:
+        reference_ops::Requantize(
+            tflite::micro::GetTensorData<int32_t>(input), size,
+            data->requantize_output_multiplier, data->requantize_output_shift,
+            data->input_zero_point, data->quantization_params.zero_point,
+            tflite::micro::GetTensorData<int8_t>(output));
+        break;
+      case kTfLiteInt16:
+        reference_ops::Requantize(
+            tflite::micro::GetTensorData<int32_t>(input), size,
+            data->requantize_output_multiplier, data->requantize_output_shift,
+            data->input_zero_point, data->quantization_params.zero_point,
+            tflite::micro::GetTensorData<int16_t>(output));
+        break;
+      default:
+        MicroPrintf("Input %s, output %s not supported.",
+                    TfLiteTypeGetName(input->type),
+                    TfLiteTypeGetName(output->type));
         return kTfLiteError;
     }
   } else if (input->type == kTfLiteInt16) {
@@ -135,9 +166,9 @@ TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
             tflite::micro::GetTensorData<int32_t>(output));
         return kTfLiteOk;
       default:
-        TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                           TfLiteTypeGetName(input->type),
-                           TfLiteTypeGetName(output->type));
+        MicroPrintf("Input %s, output %s not supported.",
+                    TfLiteTypeGetName(input->type),
+                    TfLiteTypeGetName(output->type));
         return kTfLiteError;
     }
   } else if (input->type == kTfLiteInt8) {
@@ -159,6 +190,13 @@ TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
             data->input_zero_point, data->quantization_params.zero_point,
             tflite::micro::GetTensorData<uint8_t>(output));
         break;
+      case kTfLiteInt16:
+        reference_ops::Requantize(
+            tflite::micro::GetTensorData<int8_t>(input), size,
+            data->requantize_output_multiplier, data->requantize_output_shift,
+            data->input_zero_point, data->quantization_params.zero_point,
+            tflite::micro::GetTensorData<int16_t>(output));
+        break;
       case kTfLiteInt32:
         reference_ops::Requantize(
             tflite::micro::GetTensorData<int8_t>(input), size,
@@ -167,9 +205,9 @@ TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
             tflite::micro::GetTensorData<int32_t>(output));
         break;
       default:
-        TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                           TfLiteTypeGetName(input->type),
-                           TfLiteTypeGetName(output->type));
+        MicroPrintf("Input %s, output %s not supported.",
+                    TfLiteTypeGetName(input->type),
+                    TfLiteTypeGetName(output->type));
         return kTfLiteError;
     }
   } else if (input->type == kTfLiteUInt8) {
@@ -183,15 +221,15 @@ TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
             tflite::micro::GetTensorData<int8_t>(output));
         break;
       default:
-        TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                          TfLiteTypeGetName(input->type),
-                          TfLiteTypeGetName(output->type));
+        MicroPrintf("Input %s, output %s not supported.",
+                    TfLiteTypeGetName(input->type),
+                    TfLiteTypeGetName(output->type));
         return kTfLiteError;
     }
   } else {
-    TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                       TfLiteTypeGetName(input->type),
-                       TfLiteTypeGetName(output->type));
+    MicroPrintf("Input %s, output %s not supported.",
+                TfLiteTypeGetName(input->type),
+                TfLiteTypeGetName(output->type));
     return kTfLiteError;
   }
 
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/read_variable.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/read_variable.cc
new file mode 100644
index 0000000..ba1fe4a
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/read_variable.cc
@@ -0,0 +1,87 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <stddef.h>
+
+#include <cstring>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+namespace {
+
+constexpr int kInputVariableId = 0;
+constexpr int kOutputValue = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(NumInputs(node) == 1);
+  TFLITE_DCHECK(NumOutputs(node) == 1);
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input_resource_id_tensor =
+      micro_context->AllocateTempInputTensor(node, kInputVariableId);
+
+  TFLITE_DCHECK(input_resource_id_tensor != nullptr);
+  TFLITE_DCHECK(input_resource_id_tensor->type == kTfLiteResource);
+  TFLITE_DCHECK(NumElements(input_resource_id_tensor) == 1);
+
+  micro_context->DeallocateTempTfLiteTensor(input_resource_id_tensor);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input_resource_id_tensor =
+      tflite::micro::GetEvalInput(context, node, kInputVariableId);
+  TFLITE_DCHECK(input_resource_id_tensor != nullptr);
+
+  TfLiteEvalTensor* output_value =
+      tflite::micro::GetEvalOutput(context, node, kOutputValue);
+  TFLITE_DCHECK(output_value != nullptr);
+
+  tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
+  MicroGraph& graph_info = micro_context->graph();
+
+  MicroResourceVariables* resources = graph_info.GetResourceVariables();
+  if (resources == nullptr) {
+    MicroPrintf(
+        "READ_VARIABLE requires resource variables. Please create "
+        "ResourceVariables and pass it to the interpreter.");
+    return kTfLiteError;
+  }
+  TF_LITE_ENSURE_OK(
+      context,
+      resources->Read(input_resource_id_tensor->data.i32[0], output_value));
+  return kTfLiteOk;
+}
+
+}  // namespace.
+
+TfLiteRegistration Register_READ_VARIABLE() {
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/real.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/real.cc
index 7bf121c..6ec5aad 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/real.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/real.cc
@@ -26,7 +26,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
 namespace ops {
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.cc
index c152beb..b346282 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,531 +23,64 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace reduce {
-
-constexpr int kMaxNumberOfAxis = 4;
-constexpr int kMaxNumberOfReducedAxis = 2;
-
-struct OpData {
-  int32_t multiplier;
-  int shift;
-  int temp_buffer_idx;
-  int resolved_axis_idx;
-  int input_zp;
-  float input_scale;
-  int output_zp;
-  float output_scale;
-  int num_output_elements;
-};
 
 void* InitReduce(TfLiteContext* context, const char* buffer, size_t length) {
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataReduce));
 }
 
-TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
-  // Inputs Tensor (dtype depends on quantization):
-  // [0] = Input
-  // [1] = Axis
-  const TfLiteTensor* input = GetInput(context, node, 0);
-
-  // Outputs Tensor (dtype depends on quantization):
-  // [0] = Output
-
-  // Validate number of inputs and outputs
-  TF_LITE_ENSURE_EQ(context, node->inputs->size, 2);
-  TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
-
-  // Validate axis type
-  const TfLiteTensor* axis = GetInput(context, node, 1);
-  TF_LITE_ENSURE(context, axis != nullptr);
-  TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32);
-
-  if (input->type == kTfLiteInt8) {
-    OpData* data = static_cast<OpData*>(node->user_data);
-    const TfLiteTensor* output = GetOutput(context, node, 0);
-    const double real_multiplier = static_cast<double>(input->params.scale) /
-                                   static_cast<double>(output->params.scale);
-    QuantizeMultiplier(real_multiplier, &data->multiplier, &data->shift);
-  }
-
-  return kTfLiteOk;
+TfLiteStatus PrepareMax(TfLiteContext* context, TfLiteNode* node) {
+  return PrepareMinMaxHelper(context, node,
+                          static_cast<OpDataReduce*>(node->user_data));
 }
 
-TfLiteStatus PrepareMinMax(TfLiteContext* context, TfLiteNode* node) {
-  TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
-
-  OpData* op_data = static_cast<OpData*>(node->user_data);
-  const TfLiteTensor* input = GetInput(context, node, 0);
-  const TfLiteTensor* output = GetOutput(context, node, 0);
-  const TfLiteTensor* axis = GetInput(context, node, 1);
-
-  op_data->input_scale = input->params.scale;
-  op_data->output_scale = output->params.scale;
-  op_data->num_output_elements = NumElements(output);
-
-  context->RequestScratchBufferInArena(context, sizeof(int) * input->dims->size,
-                                       &op_data->temp_buffer_idx);
-  context->RequestScratchBufferInArena(
-      context, sizeof(int) * static_cast<int>(ElementCount(*axis->dims)),
-      &op_data->resolved_axis_idx);
-
-  return kTfLiteOk;
+TfLiteStatus PrepareMin(TfLiteContext* context, TfLiteNode* node) {
+  return PrepareMinMaxHelper(context, node,
+                          static_cast<OpDataReduce*>(node->user_data));
 }
 
 TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* input = GetInput(context, node, 0);
-  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
-  const TfLiteTensor* output = GetOutput(context, node, 0);
-  if (input->type == kTfLiteInt8) {
-    const double real_multiplier = static_cast<double>(input->params.scale) /
-                                   static_cast<double>(output->params.scale);
-    QuantizeMultiplier(real_multiplier, &op_data->multiplier, &op_data->shift);
-  }
-
-  int output_size = NumElements(output);
-  if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
-    context->RequestScratchBufferInArena(context, output_size * sizeof(int32_t),
-                                         &op_data->temp_buffer_idx);
-    op_data->input_zp = input->params.zero_point;
-    op_data->input_scale = input->params.scale;
-    op_data->output_zp = output->params.zero_point;
-    op_data->output_scale = output->params.scale;
-  }
-
-  TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
-  // TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
-  return kTfLiteOk;
-}
-
-void ResolveAxis(const int* axis_data, int axis_count,
-                 tflite::MeanParams* op_params) {
-  int i = 0;
-  for (; i < axis_count; ++i) {
-    op_params->axis[i] = static_cast<int16_t>(axis_data[i]);
-  }
-  for (; i < 4; ++i) {
-    op_params->axis[i] = 1;
-  }
-  op_params->axis_count = axis_count;
+  return PrepareMeanOrSumHelper(context, node,
+                                static_cast<OpDataReduce*>(node->user_data));
 }
 
 TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
-  const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
-  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
-  TfLiteReducerParams* params =
-      reinterpret_cast<TfLiteReducerParams*>(node->builtin_data);
-  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
-
-  int num_axis = static_cast<int>(ElementCount(*axis->dims));
-  int temp_index[kMaxNumberOfAxis];
-  int resolved_axis[kMaxNumberOfReducedAxis];
-
-  tflite::MeanParams op_params;
-  ResolveAxis(tflite::micro::GetTensorData<int>(axis), num_axis, &op_params);
-
-  // Special case mean implementation exists for 4D mean across axes 1 and 2.
-  bool special_case_4d_axes_1_and_2 =
-      input->dims->size == 4 && op_params.axis_count == 2 &&
-      ((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
-       (op_params.axis[0] == 2 && op_params.axis[1] == 1));
-
-  switch (input->type) {
-    case kTfLiteFloat32: {
-      // Defer to specialized implementation for 4D Mean across axes 1 & 2.
-      if (params->keep_dims && special_case_4d_axes_1_and_2) {
-        reference_ops::MeanOrSum(op_params, tflite::micro::GetTensorShape(input),
-                            tflite::micro::GetTensorData<float>(input),
-                            tflite::micro::GetTensorShape(output),
-                            tflite::micro::GetTensorData<float>(output),
-                            false);
-      } else {
-        TF_LITE_ENSURE(
-            context,
-            reference_ops::MeanOrSum(
-                tflite::micro::GetTensorData<float>(input), input->dims->data,
-                input->dims->size, tflite::micro::GetTensorData<float>(output),
-                output->dims->data, output->dims->size,
-                tflite::micro::GetTensorData<int>(axis), num_axis,
-                params->keep_dims, temp_index, resolved_axis,
-                tflite::micro::GetTensorData<float>(output),
-                false));
-      }
-    } break;
-    case kTfLiteInt8: {
-      // Defer to specialized implementation for 4D Mean across axes 1 & 2.
-      if (params->keep_dims && special_case_4d_axes_1_and_2) {
-        reference_integer_ops::MeanOrSum(
-            op_params, op_data->multiplier, op_data->shift,
-            tflite::micro::GetTensorShape(input),
-            tflite::micro::GetTensorData<int8_t>(input), op_data->input_zp,
-            tflite::micro::GetTensorShape(output),
-            tflite::micro::GetTensorData<int8_t>(output), op_data->output_zp,
-            false);
-      } else if (op_data->input_zp == op_data->output_zp &&
-                 op_data->input_scale == op_data->output_scale) {
-        int32_t* temp_buffer = static_cast<int32_t*>(
-            context->GetScratchBuffer(context, op_data->temp_buffer_idx));
-        TF_LITE_ENSURE(
-            context,
-            reference_ops::MeanOrSum(
-                tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
-                input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
-                output->dims->data, output->dims->size,
-                tflite::micro::GetTensorData<int>(axis), num_axis,
-                params->keep_dims, temp_index, resolved_axis, temp_buffer,
-                false));
-      } else {
-        int32_t* temp_buffer = static_cast<int32_t*>(
-            context->GetScratchBuffer(context, op_data->temp_buffer_idx));
-        TF_LITE_ENSURE(
-            context,
-            reference_ops::QuantizedMeanOrSum(
-                tflite::micro::GetTensorData<int8_t>(input), op_data->input_zp,
-                op_data->input_scale, input->dims->data, input->dims->size,
-                tflite::micro::GetTensorData<int8_t>(output),
-                op_data->output_zp, op_data->output_scale, output->dims->data,
-                output->dims->size, tflite::micro::GetTensorData<int>(axis),
-                num_axis, params->keep_dims, temp_index, resolved_axis,
-                temp_buffer, false));
-      }
-    } break;
-    case kTfLiteUInt8: {
-      // Defer to specialized implementation for 4D Mean across axes 1 & 2.
-      if (params->keep_dims && special_case_4d_axes_1_and_2) {
-        reference_ops::MeanOrSum(op_params, tflite::micro::GetTensorShape(input),
-                            tflite::micro::GetTensorData<uint8_t>(input),
-                            op_data->input_zp, op_data->input_scale,
-                            tflite::micro::GetTensorShape(output),
-                            tflite::micro::GetTensorData<uint8_t>(output),
-                            op_data->output_zp, op_data->output_scale,
-                            false);
-      } else if (op_data->input_zp == op_data->output_zp &&
-                 op_data->input_scale == op_data->output_scale) {
-        uint32_t* temp_buffer = static_cast<uint32_t*>(
-            context->GetScratchBuffer(context, op_data->temp_buffer_idx));
-        TF_LITE_ENSURE(
-            context,
-            reference_ops::MeanOrSum(tflite::micro::GetTensorData<uint8_t>(input),
-                                input->dims->data, input->dims->size,
-                                tflite::micro::GetTensorData<uint8_t>(output),
-                                output->dims->data, output->dims->size,
-                                tflite::micro::GetTensorData<int>(axis),
-                                num_axis, params->keep_dims, temp_index,
-                                resolved_axis, temp_buffer,
-                                false));
-      } else {
-        uint32_t* temp_buffer = static_cast<uint32_t*>(
-            context->GetScratchBuffer(context, op_data->temp_buffer_idx));
-        TF_LITE_ENSURE(
-            context,
-            reference_ops::QuantizedMeanOrSum(
-                tflite::micro::GetTensorData<uint8_t>(input), op_data->input_zp,
-                op_data->input_scale, input->dims->data, input->dims->size,
-                tflite::micro::GetTensorData<uint8_t>(output),
-                op_data->output_zp, op_data->output_scale, output->dims->data,
-                output->dims->size, tflite::micro::GetTensorData<int>(axis),
-                num_axis, params->keep_dims, temp_index, resolved_axis,
-                temp_buffer, false));
-      }
-    } break;
-    default:
-      TF_LITE_ENSURE_MSG(context, false,
-                         "Currently, only float32, int8 or uint8 input type "
-                         "is supported.");
-  }
-  return kTfLiteOk;
-}
-
-TfLiteStatus EvalSum(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
-  const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
-  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
-  TfLiteReducerParams* params =
-      reinterpret_cast<TfLiteReducerParams*>(node->builtin_data);
-  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
-
-  int num_axis = static_cast<int>(ElementCount(*axis->dims));
-  int temp_index[kMaxNumberOfAxis];
-  int resolved_axis[kMaxNumberOfReducedAxis];
-
-  tflite::MeanParams op_params;
-  ResolveAxis(tflite::micro::GetTensorData<int>(axis), num_axis, &op_params);
-
-  // Special case mean implementation exists for 4D mean across axes 1 and 2.
-  bool special_case_4d_axes_1_and_2 =
-      input->dims->size == 4 && op_params.axis_count == 2 &&
-      ((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
-       (op_params.axis[0] == 2 && op_params.axis[1] == 1));
-
-  switch (input->type) {
-    case kTfLiteFloat32: {
-      // Defer to specialized implementation for 4D Mean across axes 1 & 2.
-      if (params->keep_dims && special_case_4d_axes_1_and_2) {
-        reference_ops::MeanOrSum(op_params, tflite::micro::GetTensorShape(input),
-                            tflite::micro::GetTensorData<float>(input),
-                            tflite::micro::GetTensorShape(output),
-                            tflite::micro::GetTensorData<float>(output),
-                            true);
-      } else {
-        TF_LITE_ENSURE(
-            context,
-            reference_ops::MeanOrSum(
-                tflite::micro::GetTensorData<float>(input), input->dims->data,
-                input->dims->size, tflite::micro::GetTensorData<float>(output),
-                output->dims->data, output->dims->size,
-                tflite::micro::GetTensorData<int>(axis), num_axis,
-                params->keep_dims, temp_index, resolved_axis,
-                tflite::micro::GetTensorData<float>(output),
-                true));
-      }
-    } break;
-    case kTfLiteInt8: {
-      // Defer to specialized implementation for 4D Mean across axes 1 & 2.
-      if (params->keep_dims && special_case_4d_axes_1_and_2) {
-        reference_integer_ops::MeanOrSum(
-            op_params, op_data->multiplier, op_data->shift,
-            tflite::micro::GetTensorShape(input),
-            tflite::micro::GetTensorData<int8_t>(input), op_data->input_zp,
-            tflite::micro::GetTensorShape(output),
-            tflite::micro::GetTensorData<int8_t>(output), op_data->output_zp,
-            true);
-      } else if (op_data->input_zp == op_data->output_zp &&
-                 op_data->input_scale == op_data->output_scale) {
-        int32_t* temp_buffer = static_cast<int32_t*>(
-            context->GetScratchBuffer(context, op_data->temp_buffer_idx));
-        TF_LITE_ENSURE(
-            context,
-            reference_ops::MeanOrSum(
-                tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
-                input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
-                output->dims->data, output->dims->size,
-                tflite::micro::GetTensorData<int>(axis), num_axis,
-                params->keep_dims, temp_index, resolved_axis, temp_buffer,
-                true));
-      } else {
-        int32_t* temp_buffer = static_cast<int32_t*>(
-            context->GetScratchBuffer(context, op_data->temp_buffer_idx));
-        TF_LITE_ENSURE(
-            context,
-            reference_ops::QuantizedMeanOrSum(
-                tflite::micro::GetTensorData<int8_t>(input), op_data->input_zp,
-                op_data->input_scale, input->dims->data, input->dims->size,
-                tflite::micro::GetTensorData<int8_t>(output),
-                op_data->output_zp, op_data->output_scale, output->dims->data,
-                output->dims->size, tflite::micro::GetTensorData<int>(axis),
-                num_axis, params->keep_dims, temp_index, resolved_axis,
-                temp_buffer, true));
-      }
-    } break;
-    case kTfLiteUInt8: {
-      // Defer to specialized implementation for 4D Mean across axes 1 & 2.
-      if (params->keep_dims && special_case_4d_axes_1_and_2) {
-        reference_ops::MeanOrSum(op_params, tflite::micro::GetTensorShape(input),
-                            tflite::micro::GetTensorData<uint8_t>(input),
-                            op_data->input_zp, op_data->input_scale,
-                            tflite::micro::GetTensorShape(output),
-                            tflite::micro::GetTensorData<uint8_t>(output),
-                            op_data->output_zp, op_data->output_scale,
-                            true);
-      } else if (op_data->input_zp == op_data->output_zp &&
-                 op_data->input_scale == op_data->output_scale) {
-        uint32_t* temp_buffer = static_cast<uint32_t*>(
-            context->GetScratchBuffer(context, op_data->temp_buffer_idx));
-        TF_LITE_ENSURE(
-            context,
-            reference_ops::MeanOrSum(tflite::micro::GetTensorData<uint8_t>(input),
-                                input->dims->data, input->dims->size,
-                                tflite::micro::GetTensorData<uint8_t>(output),
-                                output->dims->data, output->dims->size,
-                                tflite::micro::GetTensorData<int>(axis),
-                                num_axis, params->keep_dims, temp_index,
-                                resolved_axis, temp_buffer,
-                                true));
-      } else {
-        uint32_t* temp_buffer = static_cast<uint32_t*>(
-            context->GetScratchBuffer(context, op_data->temp_buffer_idx));
-        TF_LITE_ENSURE(
-            context,
-            reference_ops::QuantizedMeanOrSum(
-                tflite::micro::GetTensorData<uint8_t>(input), op_data->input_zp,
-                op_data->input_scale, input->dims->data, input->dims->size,
-                tflite::micro::GetTensorData<uint8_t>(output),
-                op_data->output_zp, op_data->output_scale, output->dims->data,
-                output->dims->size, tflite::micro::GetTensorData<int>(axis),
-                num_axis, params->keep_dims, temp_index, resolved_axis,
-                temp_buffer, true));
-      }
-    } break;
-    default:
-      TF_LITE_ENSURE_MSG(context, false,
-                         "Currently, only float32, int8 or uint8 input type "
-                         "is supported.");
-  }
-  return kTfLiteOk;
+  return EvalMeanHelper(context, node,
+                        static_cast<OpDataReduce*>(node->user_data));
 }
 
 TfLiteStatus EvalMax(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
-  const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
-  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
-  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
-  TfLiteReducerParams* params =
-      static_cast<TfLiteReducerParams*>(node->builtin_data);
-  OpData* op_data = static_cast<OpData*>(node->user_data);
-
-  // Interpret an axis tensor with null dimensions as a scalar
-  int num_axis = static_cast<int>(ElementCount(*axis->dims));
-  int* temp_buffer = static_cast<int*>(
-      context->GetScratchBuffer(context, op_data->temp_buffer_idx));
-  int* resolved_axis = static_cast<int*>(
-      context->GetScratchBuffer(context, op_data->resolved_axis_idx));
-  switch (input->type) {
-    case kTfLiteFloat32:
-      TF_LITE_ENSURE(
-          context,
-          reference_ops::ReduceGeneric<float>(
-              tflite::micro::GetTensorData<float>(input), input->dims->data,
-              input->dims->size, tflite::micro::GetTensorData<float>(output),
-              output->dims->data, output->dims->size,
-              tflite::micro::GetTensorData<int>(axis), num_axis,
-              params->keep_dims, temp_buffer, resolved_axis,
-              std::numeric_limits<float>::lowest(),
-              [](const float current, const float in) -> float {
-                return (in > current) ? in : current;
-              }));
-      break;
-    case kTfLiteInt8:
-      TF_LITE_ENSURE_EQ(context, static_cast<double>(op_data->input_scale),
-                        static_cast<double>(op_data->output_scale));
-      TF_LITE_ENSURE_EQ(context, op_data->input_zp, op_data->output_zp);
-      TF_LITE_ENSURE(
-          context,
-          reference_ops::ReduceGeneric<int8_t>(
-              tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
-              input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
-              output->dims->data, output->dims->size,
-              tflite::micro::GetTensorData<int>(axis), num_axis,
-              params->keep_dims, temp_buffer, resolved_axis,
-              std::numeric_limits<int8_t>::lowest(),
-              [](const int8_t current, const int8_t in) -> int8_t {
-                return (in > current) ? in : current;
-              }));
-      break;
-    default:
-      TF_LITE_KERNEL_LOG(context,
-                         "Only float32 and int8 types are supported.\n");
-      return kTfLiteError;
-  }
-  return kTfLiteOk;
+  OpDataReduce* op_data = static_cast<OpDataReduce*>(node->user_data);
+  return EvalMaxHelper(context, node, op_data);
 }
 
 TfLiteStatus EvalMin(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
-  const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
-  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
-  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
-  TfLiteReducerParams* params =
-      static_cast<TfLiteReducerParams*>(node->builtin_data);
-  OpData* op_data = static_cast<OpData*>(node->user_data);
-
-  // Interpret an axis tensor with null dimensions as a scalar
-  int num_axis = static_cast<int>(ElementCount(*axis->dims));
-  int* temp_buffer = static_cast<int*>(
-      context->GetScratchBuffer(context, op_data->temp_buffer_idx));
-  int* resolved_axis = static_cast<int*>(
-      context->GetScratchBuffer(context, op_data->resolved_axis_idx));
-  switch (input->type) {
-    case kTfLiteFloat32:
-      TF_LITE_ENSURE(
-          context,
-          reference_ops::ReduceGeneric<float>(
-              tflite::micro::GetTensorData<float>(input), input->dims->data,
-              input->dims->size, tflite::micro::GetTensorData<float>(output),
-              output->dims->data, output->dims->size,
-              tflite::micro::GetTensorData<int>(axis), num_axis,
-              params->keep_dims, temp_buffer, resolved_axis,
-              std::numeric_limits<float>::max(),
-              [](const float current, const float in) -> float {
-                return (in < current) ? in : current;
-              }));
-      break;
-    case kTfLiteInt8:
-      TF_LITE_ENSURE_EQ(context, static_cast<double>(op_data->input_scale),
-                        static_cast<double>(op_data->output_scale));
-      TF_LITE_ENSURE_EQ(context, op_data->input_zp, op_data->output_zp);
-      TF_LITE_ENSURE(
-          context,
-          reference_ops::ReduceGeneric<int8_t>(
-              tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
-              input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
-              output->dims->data, output->dims->size,
-              tflite::micro::GetTensorData<int>(axis), num_axis,
-              params->keep_dims, temp_buffer, resolved_axis,
-              std::numeric_limits<int8_t>::max(),
-              [](const int8_t current, const int8_t in) -> int8_t {
-                return (in < current) ? in : current;
-              }));
-      break;
-    default:
-      TF_LITE_KERNEL_LOG(context,
-                         "Only float32 and int8 types are supported.\n");
-      return kTfLiteError;
-  }
-  return kTfLiteOk;
+  OpDataReduce* op_data = static_cast<OpDataReduce*>(node->user_data);
+  return EvalMinHelper(context, node, op_data);
 }
 
-}  // namespace reduce
+TfLiteStatus EvalSum(TfLiteContext* context, TfLiteNode* node) {
+  return EvalSumHelper(context, node,
+                       static_cast<OpDataReduce*>(node->user_data));
+}
 
 TfLiteRegistration Register_MEAN() {
-  return {/*init=*/reduce::InitReduce,
-          /*free=*/nullptr,
-          /*prepare=*/reduce::PrepareMeanOrSum,
-          /*invoke=*/reduce::EvalMean,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(InitReduce, PrepareMeanOrSum, EvalMean);
 }
 
-TfLiteRegistration Register_SUM() {
-  return {/*init=*/reduce::InitReduce,
-          /*free=*/nullptr,
-          /*prepare=*/reduce::PrepareMeanOrSum,
-          /*invoke=*/reduce::EvalSum,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+TfLiteRegistration Register_REDUCE_MAX() {
+  return tflite::micro::RegisterOp(InitReduce, PrepareMax, EvalMax);
 }
 
 TfLiteRegistration Register_REDUCE_MIN() {
-  return {/*init=*/reduce::InitReduce,
-          /*free=*/nullptr,
-          /*prepare=*/reduce::PrepareMinMax,
-          /*invoke=*/reduce::EvalMin,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(InitReduce, PrepareMin, EvalMin);
 }
 
-TfLiteRegistration Register_REDUCE_MAX() {
-  return {/*init=*/reduce::InitReduce,
-          /*free=*/nullptr,
-          /*prepare=*/reduce::PrepareMinMax,
-          /*invoke=*/reduce::EvalMax,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+TfLiteRegistration Register_SUM() {
+  return tflite::micro::RegisterOp(InitReduce, PrepareMeanOrSum, EvalSum);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.h
new file mode 100644
index 0000000..6780df4
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.h
@@ -0,0 +1,71 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_
+
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+
+extern const int kMaxNumberOfAxis;
+extern const int kMaxNumberOfReducedAxis;
+
+struct OpDataReduce {
+  int32_t multiplier;
+  int shift;
+  int temp_buffer_idx;
+  int resolved_axis_idx;
+  int input_zp;
+  float input_scale;
+  int output_zp;
+  float output_scale;
+  int num_output_elements;
+  int num_axis;
+};
+
+TfLiteStatus PrepareMinMaxHelper(TfLiteContext* context, TfLiteNode* node,
+                              OpDataReduce* op_data);
+
+TfLiteStatus PrepareMeanOrSumHelper(TfLiteContext* context, TfLiteNode* node,
+                                    OpDataReduce* op_data);
+
+TfLiteStatus EvalMaxHelper(TfLiteContext* context, TfLiteNode* node,
+                           OpDataReduce* op_data);
+
+TfLiteStatus EvalMinHelper(TfLiteContext* context, TfLiteNode* node,
+                           OpDataReduce* op_data);
+
+TfLiteStatus EvalMeanHelper(TfLiteContext* context, TfLiteNode* node,
+                            OpDataReduce* op_data);
+
+TfLiteStatus EvalSumHelper(TfLiteContext* context, TfLiteNode* node,
+                           OpDataReduce* op_data);
+
+void ReduceResolveAxis(const int* axis_data, int axis_count,
+                       MeanParams* op_params);
+
+TfLiteRegistration Register_MEAN();
+TfLiteRegistration Register_REDUCE_MAX();
+TfLiteRegistration Register_REDUCE_MIN();
+TfLiteRegistration Register_SUM();
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_REDUCE_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce_common.cc
new file mode 100644
index 0000000..a2c5c38
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce_common.cc
@@ -0,0 +1,417 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/reduce.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+
+const int kMaxNumberOfAxis = 5;
+const int kMaxNumberOfReducedAxis = 2;
+
+TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node,
+                           int32_t* multiplier, int* shift) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  // Inputs Tensor (dtype depends on quantization):
+  // [0] = Input
+  // [1] = Axis
+  TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
+
+  // Outputs Tensor (dtype depends on quantization):
+  // [0] = Output
+
+  // Validate number of inputs and outputs
+  TF_LITE_ENSURE_EQ(context, node->inputs->size, 2);
+  TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
+
+  // Validate axis type
+  TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 1);
+  TF_LITE_ENSURE(context, axis != nullptr);
+  TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32);
+
+  if (input->type == kTfLiteInt8) {
+    TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
+    const double real_multiplier = static_cast<double>(input->params.scale) /
+                                   static_cast<double>(output->params.scale);
+    QuantizeMultiplier(real_multiplier, multiplier, shift);
+    micro_context->DeallocateTempTfLiteTensor(output);
+  }
+  micro_context->DeallocateTempTfLiteTensor(axis);
+  micro_context->DeallocateTempTfLiteTensor(input);
+  return kTfLiteOk;
+}
+
+TfLiteStatus PrepareMinMaxHelper(TfLiteContext* context, TfLiteNode* node,
+                              OpDataReduce* op_data) {
+  TF_LITE_ENSURE_OK(context, PrepareSimple(context, node, &op_data->multiplier,
+                                           &op_data->shift));
+
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
+  TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 1);
+
+  op_data->input_scale = input->params.scale;
+  op_data->output_scale = output->params.scale;
+  op_data->num_output_elements = NumElements(output);
+
+  context->RequestScratchBufferInArena(context, sizeof(int) * input->dims->size,
+                                       &op_data->temp_buffer_idx);
+  context->RequestScratchBufferInArena(
+      context, sizeof(int) * static_cast<int>(ElementCount(*axis->dims)),
+      &op_data->resolved_axis_idx);
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(axis);
+  return kTfLiteOk;
+}
+
+TfLiteStatus PrepareMeanOrSumHelper(TfLiteContext* context, TfLiteNode* node,
+                                    OpDataReduce* op_data) {
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
+  TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 1);
+  if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
+    const double real_multiplier = static_cast<double>(input->params.scale) /
+                                   static_cast<double>(output->params.scale);
+    QuantizeMultiplier(real_multiplier, &op_data->multiplier, &op_data->shift);
+  }
+
+  int output_size = NumElements(output);
+  op_data->num_axis = NumElements(axis);
+
+  if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
+    context->RequestScratchBufferInArena(context, output_size * sizeof(int32_t),
+                                         &op_data->temp_buffer_idx);
+    op_data->input_zp = input->params.zero_point;
+    op_data->input_scale = input->params.scale;
+    op_data->output_zp = output->params.zero_point;
+    op_data->output_scale = output->params.scale;
+  }
+
+  TF_LITE_ENSURE_OK(
+      context,
+      PrepareSimple(context, node, &(op_data->multiplier), &(op_data->shift)));
+  // TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(axis);
+  return kTfLiteOk;
+}
+
+void ResolveAxis(const int* axis_data, int axis_count,
+                 tflite::MeanParams* op_params) {
+  int i = 0;
+  for (; i < axis_count; ++i) {
+    op_params->axis[i] = static_cast<int16_t>(axis_data[i]);
+  }
+  for (; i < 4; ++i) {
+    op_params->axis[i] = 1;
+  }
+  op_params->axis_count = axis_count;
+}
+
+template <typename T>
+TfLiteStatus QuantizedMeanOrSum(TfLiteContext* context, TfLiteNode* node,
+                                int* temp_index, int* resolved_axis,
+                                int32_t* temp_sum, OpDataReduce* op_data,
+                                bool compute_sum) {
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
+  const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+  TfLiteReducerParams* params =
+      static_cast<TfLiteReducerParams*>(node->builtin_data);
+
+  bool result = reference_ops::QuantizedMeanOrSumExtraArgs<T, int32_t>(
+      tflite::micro::GetTensorData<T>(input), op_data->input_zp,
+      op_data->input_scale, &input->dims->data[0], input->dims->size,
+      tflite::micro::GetTensorData<T>(output), op_data->output_scale,
+      op_data->multiplier, op_data->shift, op_data->output_zp,
+      &output->dims->data[0], output->dims->size,
+      tflite::micro::GetTensorData<int>(axis), op_data->num_axis,
+      params->keep_dims, temp_index, resolved_axis, temp_sum, compute_sum);
+  TF_LITE_ENSURE(context, result);
+
+  return kTfLiteOk;
+}
+
+template <typename T, typename U>
+TfLiteStatus Mean(TfLiteContext* context, TfLiteNode* node,
+                  OpDataReduce* op_data, int* temp_index, int* resolved_axis,
+                  U* temp_sum) {
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
+  const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+  TfLiteReducerParams* params =
+      static_cast<TfLiteReducerParams*>(node->builtin_data);
+
+  reference_ops::Mean<T, U>(
+      tflite::micro::GetTensorData<T>(input), &input->dims->data[0],
+      input->dims->size, tflite::micro::GetTensorData<T>(output),
+      &output->dims->data[0], output->dims->size,
+      tflite::micro::GetTensorData<int>(axis), op_data->num_axis,
+      params->keep_dims, temp_index, resolved_axis, temp_sum);
+
+  return kTfLiteOk;
+}
+
+template <typename integer_type>
+TfLiteStatus EvalIntegerMean(TfLiteContext* context, TfLiteNode* node,
+                             int num_axis, OpDataReduce* op_data,
+                             int* temp_index, int* resolved_axis) {
+  int32_t* temp_sum = static_cast<int32_t*>(
+      context->GetScratchBuffer(context, op_data->temp_buffer_idx));
+
+  if (op_data->input_zp == op_data->output_zp &&
+      op_data->input_scale == op_data->output_scale) {
+    Mean<integer_type, int32_t>(context, node, op_data, temp_index,
+                                resolved_axis, temp_sum);
+  } else {
+    QuantizedMeanOrSum<integer_type>(context, node, temp_index, resolved_axis,
+                                     temp_sum, op_data, /*compute_sum=*/false);
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalMeanHelper(TfLiteContext* context, TfLiteNode* node,
+                            OpDataReduce* op_data) {
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
+  const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+  TfLiteReducerParams* params =
+      reinterpret_cast<TfLiteReducerParams*>(node->builtin_data);
+
+  int num_axis = static_cast<int>(ElementCount(*axis->dims));
+  int temp_index[kMaxNumberOfAxis];
+  int resolved_axis[kMaxNumberOfReducedAxis];
+
+  switch (input->type) {
+    case kTfLiteFloat32: {
+      tflite::MeanParams op_params;
+      ResolveAxis(tflite::micro::GetTensorData<int>(axis), num_axis,
+                  &op_params);
+
+      // Special case mean implementation exists for 4D mean across axes 1
+      // and 2.
+      bool special_case_4d_axes_1_and_2 =
+          input->dims->size == 4 && op_params.axis_count == 2 &&
+          ((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
+           (op_params.axis[0] == 2 && op_params.axis[1] == 1));
+
+      // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+      if (params->keep_dims && special_case_4d_axes_1_and_2) {
+        reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input),
+                            tflite::micro::GetTensorData<float>(input),
+                            tflite::micro::GetTensorShape(output),
+                            tflite::micro::GetTensorData<float>(output));
+      } else {
+        TF_LITE_ENSURE(
+            context,
+            reference_ops::Mean(
+                tflite::micro::GetTensorData<float>(input), input->dims->data,
+                input->dims->size, tflite::micro::GetTensorData<float>(output),
+                output->dims->data, output->dims->size,
+                tflite::micro::GetTensorData<int>(axis), num_axis,
+                params->keep_dims, temp_index, resolved_axis,
+                tflite::micro::GetTensorData<float>(output)));
+      }
+    } break;
+    case kTfLiteInt8: {
+      TF_LITE_ENSURE_OK(
+          context, EvalIntegerMean<int8_t>(context, node, num_axis, op_data,
+                                           temp_index, resolved_axis));
+    } break;
+    case kTfLiteInt16: {
+      TF_LITE_ENSURE_OK(
+          context, EvalIntegerMean<int16_t>(context, node, num_axis, op_data,
+                                            temp_index, resolved_axis));
+    } break;
+    default:
+      TF_LITE_ENSURE_MSG(context, false,
+                         "Currently, only float32, int8 or int16 input type "
+                         "is supported.");
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalMaxHelper(TfLiteContext* context, TfLiteNode* node,
+                           OpDataReduce* op_data) {
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
+  const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+  TfLiteReducerParams* params =
+      static_cast<TfLiteReducerParams*>(node->builtin_data);
+
+  // Interpret an axis tensor with null dimensions as a scalar
+  int num_axis = static_cast<int>(ElementCount(*axis->dims));
+  int* temp_buffer = static_cast<int*>(
+      context->GetScratchBuffer(context, op_data->temp_buffer_idx));
+  int* resolved_axis = static_cast<int*>(
+      context->GetScratchBuffer(context, op_data->resolved_axis_idx));
+  switch (input->type) {
+    case kTfLiteFloat32:
+      TF_LITE_ENSURE(
+          context,
+          reference_ops::ReduceGeneric<float>(
+              tflite::micro::GetTensorData<float>(input), input->dims->data,
+              input->dims->size, tflite::micro::GetTensorData<float>(output),
+              output->dims->data, output->dims->size,
+              tflite::micro::GetTensorData<int>(axis), num_axis,
+              params->keep_dims, temp_buffer, resolved_axis,
+              std::numeric_limits<float>::lowest(),
+              [](const float current, const float in) -> float {
+                return (in > current) ? in : current;
+              }));
+      break;
+    case kTfLiteInt8:
+      TF_LITE_ENSURE_EQ(context, static_cast<double>(op_data->input_scale),
+                        static_cast<double>(op_data->output_scale));
+      TF_LITE_ENSURE_EQ(context, op_data->input_zp, op_data->output_zp);
+      TF_LITE_ENSURE(
+          context,
+          reference_ops::ReduceGeneric<int8_t>(
+              tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
+              input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
+              output->dims->data, output->dims->size,
+              tflite::micro::GetTensorData<int>(axis), num_axis,
+              params->keep_dims, temp_buffer, resolved_axis,
+              std::numeric_limits<int8_t>::lowest(),
+              [](const int8_t current, const int8_t in) -> int8_t {
+                return (in > current) ? in : current;
+              }));
+      break;
+    default:
+      MicroPrintf("Only float32 and int8 types are supported.");
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalMinHelper(TfLiteContext* context, TfLiteNode* node,
+                           OpDataReduce* op_data) {
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
+  const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+  TfLiteReducerParams* params =
+      static_cast<TfLiteReducerParams*>(node->builtin_data);
+
+  // Interpret an axis tensor with null dimensions as a scalar
+  int num_axis = static_cast<int>(ElementCount(*axis->dims));
+  int* temp_buffer = static_cast<int*>(
+      context->GetScratchBuffer(context, op_data->temp_buffer_idx));
+  int* resolved_axis = static_cast<int*>(
+      context->GetScratchBuffer(context, op_data->resolved_axis_idx));
+  switch (input->type) {
+    case kTfLiteFloat32:
+      TF_LITE_ENSURE(
+          context,
+          reference_ops::ReduceGeneric<float>(
+              tflite::micro::GetTensorData<float>(input), input->dims->data,
+              input->dims->size, tflite::micro::GetTensorData<float>(output),
+              output->dims->data, output->dims->size,
+              tflite::micro::GetTensorData<int>(axis), num_axis,
+              params->keep_dims, temp_buffer, resolved_axis,
+              std::numeric_limits<float>::max(),
+              [](const float current, const float in) -> float {
+                return (in < current) ? in : current;
+              }));
+      break;
+    case kTfLiteInt8:
+      TF_LITE_ENSURE_EQ(context, static_cast<double>(op_data->input_scale),
+                        static_cast<double>(op_data->output_scale));
+      TF_LITE_ENSURE_EQ(context, op_data->input_zp, op_data->output_zp);
+      TF_LITE_ENSURE(
+          context,
+          reference_ops::ReduceGeneric<int8_t>(
+              tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
+              input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
+              output->dims->data, output->dims->size,
+              tflite::micro::GetTensorData<int>(axis), num_axis,
+              params->keep_dims, temp_buffer, resolved_axis,
+              std::numeric_limits<int8_t>::max(),
+              [](const int8_t current, const int8_t in) -> int8_t {
+                return (in < current) ? in : current;
+              }));
+      break;
+    default:
+      MicroPrintf("Only float32 and int8 types are supported.");
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalSumHelper(TfLiteContext* context, TfLiteNode* node,
+                           OpDataReduce* op_data) {
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
+  const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 1);
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+  TfLiteReducerParams* params =
+      static_cast<TfLiteReducerParams*>(node->builtin_data);
+
+  // Interpret an axis tensor with null dimensions as a scalar.
+  int num_axis = static_cast<int>(ElementCount(*axis->dims));
+  int temp_index[kMaxNumberOfAxis];
+  int resolved_axis[kMaxNumberOfReducedAxis];
+
+  switch (input->type) {
+    case kTfLiteFloat32: {
+      TF_LITE_ENSURE(
+          context,
+          reference_ops::ReduceGeneric<float>(
+              tflite::micro::GetTensorData<float>(input), input->dims->data,
+              input->dims->size, tflite::micro::GetTensorData<float>(output),
+              output->dims->data, output->dims->size,
+              tflite::micro::GetTensorData<int>(axis), num_axis,
+              params->keep_dims, temp_index, resolved_axis, /*init_value=*/0.f,
+              [](const float current, const float in) -> float {
+                return in + current;
+              }));
+    } break;
+    case kTfLiteInt8: {
+      int32_t* temp_sum = static_cast<int32_t*>(
+          context->GetScratchBuffer(context, op_data->temp_buffer_idx));
+      QuantizedMeanOrSum<int8_t>(context, node, temp_index, resolved_axis,
+                                 temp_sum, op_data, /*compute_sum=*/true);
+    } break;
+    case kTfLiteInt16: {
+      int32_t* temp_sum = static_cast<int32_t*>(
+          context->GetScratchBuffer(context, op_data->temp_buffer_idx));
+      QuantizedMeanOrSum<int16_t>(context, node, temp_index, resolved_axis,
+                                  temp_sum, op_data, /*compute_sum=*/true);
+    } break;
+    default:
+      MicroPrintf("Only float32, int8, and int16 types are supported.");
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/reshape.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/reshape.cc
index 877ba7e..f71298c 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/reshape.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/reshape.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <cstring>
+
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
@@ -31,9 +33,13 @@ constexpr int kInputTensor = 0;
 constexpr int kOutputTensor = 0;
 
 TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
   // Tensorflow's Reshape allows one of the shape components to have the
   // special -1 value, meaning it will be calculated automatically based on the
@@ -68,6 +74,9 @@ TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
 
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
   TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -93,9 +102,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   // Do nothing for in-place reshape.
   if (input->data.raw != output->data.raw) {
     // Otherwise perform reshape with copy.
-    for (size_t i = 0; i < input_bytes; ++i) {
-      output->data.raw[i] = input->data.raw[i];
-    }
+    memcpy(output->data.raw, input->data.raw, input_bytes);
   }
   return kTfLiteOk;
 }
@@ -103,14 +110,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace reshape
 
 TfLiteRegistration Register_RESHAPE() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/reshape::Prepare,
-          /*invoke=*/reshape::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, reshape::Prepare, reshape::Eval);
 }
 
 }  // namespace micro
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/resize_bilinear.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/resize_bilinear.cc
new file mode 100644
index 0000000..01399ee
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/resize_bilinear.cc
@@ -0,0 +1,116 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kSizeTensor = 1;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TfLiteTensor* size =
+      micro_context->AllocateTempInputTensor(node, kSizeTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+
+  TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1);
+
+  TF_LITE_ENSURE_EQ(context, size->type, kTfLiteInt32);
+  output->type = input->type;
+
+  TF_LITE_ENSURE_MSG(context, IsConstantTensor(size),
+                     "Non constant size tensor not supported");
+
+  // Ensure params are valid.
+  auto* params =
+      reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
+  if (params->half_pixel_centers && params->align_corners) {
+    MicroPrintf("If half_pixel_centers is True, align_corners must be False.");
+    return kTfLiteError;
+  }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(size);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params =
+      reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
+
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  const TfLiteEvalTensor* size =
+      tflite::micro::GetEvalInput(context, node, kSizeTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  if (output->type == kTfLiteFloat32) {
+    tflite::ResizeBilinearParams op_params;
+    op_params.align_corners = params->align_corners;
+    op_params.half_pixel_centers = params->half_pixel_centers;
+    reference_ops::ResizeBilinear(op_params,
+                                  tflite::micro::GetTensorShape(input),
+                                  tflite::micro::GetTensorData<float>(input),
+                                  tflite::micro::GetTensorShape(size),
+                                  tflite::micro::GetTensorData<int32_t>(size),
+                                  tflite::micro::GetTensorShape(output),
+                                  tflite::micro::GetTensorData<float>(output));
+  } else if (output->type == kTfLiteInt8) {
+    tflite::ResizeBilinearParams op_params;
+    op_params.align_corners = params->align_corners;
+    op_params.half_pixel_centers = params->half_pixel_centers;
+    reference_ops::ResizeBilinearInteger(
+        op_params, tflite::micro::GetTensorShape(input),
+        tflite::micro::GetTensorData<int8_t>(input),
+        tflite::micro::GetTensorShape(size),
+        tflite::micro::GetTensorData<int32_t>(size),
+        tflite::micro::GetTensorShape(output),
+        tflite::micro::GetTensorData<int8_t>(output));
+  } else {
+    MicroPrintf("Output type is %d, requires float or int8.", output->type);
+    return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteRegistration Register_RESIZE_BILINEAR() {
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc
index b716e80..d6f3df3 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace ops {
@@ -32,12 +33,17 @@ constexpr int kSizeTensor = 1;
 constexpr int kOutputTensor = 0;
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  const TfLiteTensor* size = GetInput(context, node, kSizeTensor);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TfLiteTensor* size =
+      micro_context->AllocateTempInputTensor(node, kSizeTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
 
   // Our current implementations rely on the input being 4D,
   // and the size being 1D tensor with exactly 2 elements.
@@ -49,9 +55,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   output->type = input->type;
 
   if (!IsConstantTensor(size)) {
-    TF_LITE_KERNEL_LOG(context, "Dynamic tensors are unsupported in tfmicro.");
+    MicroPrintf("Dynamic tensors are unsupported in tfmicro.");
     return kTfLiteError;
   }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(size);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
   return kTfLiteOk;
 }
 
@@ -78,26 +89,26 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         tflite::micro::GetTensorData<int32_t>(size),
         tflite::micro::GetTensorShape(output),
         tflite::micro::GetTensorData<int32_t>(output));
-  } else if (output->type == kTfLiteUInt8) {
+  } else if (output->type == kTfLiteInt8) {
     reference_ops::ResizeNearestNeighbor(
         op_params, tflite::micro::GetTensorShape(input),
-        tflite::micro::GetTensorData<uint8_t>(input),
+        tflite::micro::GetTensorData<int8_t>(input),
         tflite::micro::GetTensorShape(size),
         tflite::micro::GetTensorData<int32_t>(size),
         tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<uint8_t>(output));
-  } else if (output->type == kTfLiteInt8) {
+        tflite::micro::GetTensorData<int8_t>(output));
+  } else if (output->type == kTfLiteInt16) {
     reference_ops::ResizeNearestNeighbor(
         op_params, tflite::micro::GetTensorShape(input),
-        tflite::micro::GetTensorData<int8_t>(input),
+        tflite::micro::GetTensorData<int16_t>(input),
         tflite::micro::GetTensorShape(size),
         tflite::micro::GetTensorData<int32_t>(size),
         tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<int8_t>(output));
+        tflite::micro::GetTensorData<int16_t>(output));
   } else {
-    TF_LITE_KERNEL_LOG(context,
-                       "Output type is %d, requires float, uint8_t or int8_t.",
-                       output->type);
+    MicroPrintf("Output tensor type %s (%d) not supported.",
+                TfLiteTypeGetName(output->type), output->type);
+
     return kTfLiteError;
   }
 
@@ -106,14 +117,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace resize_nearest_neighbor
 
 TfLiteRegistration Register_RESIZE_NEAREST_NEIGHBOR() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/resize_nearest_neighbor::Prepare,
-          /*invoke=*/resize_nearest_neighbor::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, resize_nearest_neighbor::Prepare,
+                                   resize_nearest_neighbor::Eval);
 }
 
 }  // namespace micro
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/rfft2d.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/rfft2d.cc
index 4bd5018..fe4a16c 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/rfft2d.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/rfft2d.cc
@@ -26,7 +26,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 #include "edge-impulse-sdk/dsp/kissfft/kiss_fftr.h"
 
 namespace tflite {
@@ -82,8 +82,18 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
   // Check type and shape of the input tensor
-  const TfLiteTensor* input;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+
+  TfLiteTensor* fft_length =
+      micro_context->AllocateTempInputTensor(node, kFftLengthTensor);
+  const int32_t* fft_length_data = GetTensorData<int32_t>(fft_length);
+
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+
   TF_LITE_ENSURE(context, NumDimensions(input) >= 2);
   if (input->type != kTfLiteFloat32) {
     context->ReportError(context,
@@ -93,11 +103,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   }
 
   // Check type and shape of the fft_length tensor
-  const TfLiteTensor* fft_length;
-  TF_LITE_ENSURE_OK(context,
-                    GetInputSafe(context, node, kFftLengthTensor, &fft_length));
   const RuntimeShape fft_length_shape = GetTensorShape(fft_length);
-
   TF_LITE_ENSURE_EQ(context, NumDimensions(fft_length), 1);
   TF_LITE_ENSURE_EQ(context, fft_length_shape.Dims(0), 2);
   if (fft_length->type != kTfLiteInt32) {
@@ -107,10 +113,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     return kTfLiteError;
   }
 
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
-
   OpData* data = static_cast<OpData*>(node->user_data);
 
   size_t output_els = output->bytes / sizeof(TfLiteComplex64);
@@ -119,6 +121,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     context->RequestScratchBufferInArena(
               context, output_els * sizeof(kiss_fft_cpx), &data->kiss_fft_output_buffer_index));
 
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(fft_length);
+  micro_context->DeallocateTempTfLiteTensor(output);
 
   return kTfLiteOk;
 }
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/round.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/round.cc
index 6c4f23f..56e30d3 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/round.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/round.cc
@@ -29,9 +29,13 @@ constexpr int kInputTensor = 0;
 constexpr int kOutputTensor = 0;
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
@@ -42,6 +46,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   for (int i = 0; i < output->dims->size; ++i) {
     TF_LITE_ENSURE_EQ(context, output->dims->data[i], input->dims->data[i]);
   }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -61,14 +68,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace round
 
 TfLiteRegistration Register_ROUND() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/round::Prepare,
-          /*invoke=*/round::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, round::Prepare, round::Eval);
 }
 
 }  // namespace micro
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.cc
index 3ec861f..b119d67 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.cc
@@ -1,7 +1,7 @@
 // Patched by Edge Impulse to include reference and hardware-accelerated kernels
 #include "../../../../classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -28,7 +28,7 @@ namespace tflite {
 namespace ops {
 namespace micro {
 
-#ifdef __Xxy
+#if (defined(__Xxy)) || (defined(__Xvdsp))
 static void get_arc_two_buffer_sizes(int request_size_1, int request_size_2,
                                      int* grant_size_1, int* grant_size_2) {
   int maxrequest = 0;
@@ -69,202 +69,250 @@ static void get_arc_two_buffer_sizes(int request_size_1, int request_size_2,
 }
 
 static TfLiteStatus get_arc_scratch_buffer_for_io_tensors(
-    TfLiteContext* context, mli_tensor* in, mli_tensor* out) {
+    TfLiteContext* context, MliTensorInterface* in, MliTensorInterface* out) {
   int request_size_in = 0;
   int request_size_out = 0;
   int grant_size_in = 0;
   int grant_size_out = 0;
-  if (!inside_arc_ccm(in->data)) {
+  if (!inside_arc_ccm(in->Data<int8_t>())) {
     // In case the input tensor contains multiple batches, it has rank 4
     // because the mli kernel cannot operate on batches, we need to have the
     // size of a single HWC tensor. that is why the start_rank is 1 in case of
     // input rank 4
-    int start_rank = in->rank - 3;
-    request_size_in = mli_hlp_count_elem_num(in, start_rank) *
-                      mli_hlp_tensor_element_size(in);
+    int start_rank = *in->Rank() - 3;
+    request_size_in = mli_hlp_count_elem_num(in->MliTensor(), start_rank) *
+                      mli_hlp_tensor_element_size(in->MliTensor());
   }
-  if (!inside_arc_ccm(out->data)) {
+  if (!inside_arc_ccm(out->Data<int8_t>())) {
     // In case the input tensor contains multiple batches, it has rank 4
     // because the mli kernel cannot operate on batches, we need to have the
     // size of a single batch. that is why the start_rank is 1 in case of input
     // rank 4
-    int start_rank = out->rank - 3;
-    request_size_out = mli_hlp_count_elem_num(out, start_rank) *
-                       mli_hlp_tensor_element_size(out);
+    int start_rank = *out->Rank() - 3;
+    request_size_out = mli_hlp_count_elem_num(out->MliTensor(), start_rank) *
+                       mli_hlp_tensor_element_size(out->MliTensor());
   }
 
   get_arc_two_buffer_sizes(request_size_in, request_size_out, &grant_size_in,
                            &grant_size_out);
-
-  if (!inside_arc_ccm(in->data)) {
-    in->data = get_arc_scratch_buffer(grant_size_in);
-    in->capacity = grant_size_in;
-    if (in->data == NULL) return kTfLiteError;
+  if (!inside_arc_ccm(in->Data<int8_t>())) {
+    in->SetData<int8_t>(
+        static_cast<int8_t*>(get_arc_scratch_buffer(grant_size_in)),
+        grant_size_in);
+    if (in->Data<int8_t>() == NULL) return kTfLiteError;
   }
-  if (!inside_arc_ccm(out->data)) {
-    out->data = get_arc_scratch_buffer(grant_size_out);
-    out->capacity = grant_size_out;
-    if (out->data == NULL) return kTfLiteError;
+
+  if (!inside_arc_ccm(out->Data<int8_t>())) {
+    out->SetData<int8_t>(
+        static_cast<int8_t*>(get_arc_scratch_buffer(grant_size_out)),
+        grant_size_out);
+    if (out->Data<int8_t>() == NULL) return kTfLiteError;
   }
 
   return kTfLiteOk;
 }
 #endif
 
-TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context,
-                                                     mli_tensor* in,
-                                                     mli_tensor* weights,
-                                                     mli_tensor* bias,
-                                                     mli_tensor* out) {
+TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(
+    TfLiteContext* context, MliTensorInterface* in, MliTensorInterface* weights,
+    MliTensorInterface* bias, MliTensorInterface* out) {
   TfLiteStatus ret_val = kTfLiteOk;
-#ifdef __Xxy
+#if (defined(__Xxy)) || (defined(__Xvdsp))
   init_arc_scratch_buffers();
-  if (!inside_arc_ccm(weights->data)) {
-    int weights_size = mli_hlp_count_elem_num(weights, 0) *
-                       mli_hlp_tensor_element_size(weights);
-    int max_weights_size = 0;
-    weights->data = get_arc_scratch_buffer(weights_size);
-    weights->capacity = weights_size;
-    if (weights->data == NULL) {
-      get_arc_scratch_buffer_max_size(&max_weights_size);
-      weights->data = get_arc_scratch_buffer(max_weights_size);
-      weights->capacity = max_weights_size;
-      if (max_weights_size == 0) ret_val = kTfLiteError;
-    }
-    if (weights->data == NULL) ret_val = kTfLiteError;
-  }
 
-  if (!inside_arc_ccm(bias->data)) {
+  if (!inside_arc_ccm(bias->Data<int32_t>())) {
     uint32_t bias_mem_requirements =
-        mli_hlp_count_elem_num(bias, 0) * mli_hlp_tensor_element_size(bias);
-    bias->data = get_arc_scratch_buffer(bias_mem_requirements);
-    bias->capacity = bias_mem_requirements;
+        mli_hlp_count_elem_num(bias->MliTensor(), 0) *
+        mli_hlp_tensor_element_size(bias->MliTensor());
+    bias->SetData<int32_t>(
+        static_cast<int32_t*>(get_arc_scratch_buffer(bias_mem_requirements)),
+        bias_mem_requirements);
   }
 
-  if (ret_val == kTfLiteOk) {
-    ret_val = get_arc_scratch_buffer_for_io_tensors(context, in, out);
-  }
-
-  if (bias->data == NULL) {
+  if (bias->Data<int32_t>() == NULL) {
     int max_bias_size = 0;
     get_arc_scratch_buffer_max_size(&max_bias_size);
-    bias->data = get_arc_scratch_buffer(max_bias_size);
-    bias->capacity = max_bias_size;
+    bias->SetData<int32_t>(
+        static_cast<int32_t*>(get_arc_scratch_buffer(max_bias_size)),
+        max_bias_size);
     if (max_bias_size == 0) ret_val = kTfLiteError;
   }
-  if (bias->data == NULL) ret_val = kTfLiteError;
+  if (bias->Data<int32_t>() == NULL) ret_val = kTfLiteError;
 
+  if (!inside_arc_ccm(weights->Data<int8_t>())) {
+    int weights_size = mli_hlp_count_elem_num(weights->MliTensor(), 0) *
+                       mli_hlp_tensor_element_size(weights->MliTensor());
+    int max_weights_size = 0;
+    weights->SetData<int8_t>(
+        static_cast<int8_t*>(get_arc_scratch_buffer(weights_size)),
+        weights_size);
+    if (weights->Data<int8_t>() == NULL) {
+      get_arc_scratch_buffer_max_size(&max_weights_size);
+      weights->SetData<int8_t>(
+          static_cast<int8_t*>(get_arc_scratch_buffer(max_weights_size)),
+          max_weights_size);
+      if (max_weights_size == 0) ret_val = kTfLiteError;
+    }
+    if (weights->Data<int8_t>() == NULL) ret_val = kTfLiteError;
+  }
+
+  if (ret_val == kTfLiteOk) {
+    ret_val = get_arc_scratch_buffer_for_io_tensors(context, in, out);
+  }
 #endif
   return ret_val;
 }
 
 TfLiteStatus get_arc_scratch_buffer_for_fully_connect_tensors(
-    TfLiteContext* context, mli_tensor* in, mli_tensor* weights,
-    mli_tensor* bias, mli_tensor* out) {
+    TfLiteContext* context, MliTensorInterface* in, MliTensorInterface* weights,
+    MliTensorInterface* bias, MliTensorInterface* out) {
   TfLiteStatus ret_val = kTfLiteOk;
-#ifdef __Xxy
+
+#if (defined(__Xxy)) || (defined(__Xvdsp))
   init_arc_scratch_buffers();
-  /* strategy for FC kernels:
-     first allocate input, because this cannot be sliced. (in case of batch
-     processing, only a single input needs to be allocated) then weights & bias
-     because if fully loaded, they can be reused over batches. then output.
-     The number of output channels (for weights slicing) depends on size of
-     output and size of weights&bias */
 
-  if (!inside_arc_ccm(in->data)) {
-    /* In case the input tensor contains multiple batches,
-       only count the size if the inner most dimension */
-    int size_in = mli_hlp_count_elem_num(in, in->rank - 1) *
-                  mli_hlp_tensor_element_size(in);
-    in->data = get_arc_scratch_buffer(size_in);
-    in->capacity = size_in;
-    if (in->data == NULL) {
-      in->capacity = 0;
-      ret_val = kTfLiteError;
-    }
+  if (!inside_arc_ccm(bias->Data<int32_t>())) {
+    int bias_mem_requirements = mli_hlp_count_elem_num(bias->MliTensor(), 0) *
+                                mli_hlp_tensor_element_size(bias->MliTensor());
+    bias->SetData<int32_t>(
+        static_cast<int32_t*>(get_arc_scratch_buffer(bias_mem_requirements)),
+        bias_mem_requirements);
+  }
+
+  if (bias->Data<int32_t>() == NULL) {
+    int max_bias_size = 0;
+    get_arc_scratch_buffer_max_size(&max_bias_size);
+    bias->SetData<int32_t>(
+        static_cast<int32_t*>(get_arc_scratch_buffer(max_bias_size)),
+        max_bias_size);
+    if (max_bias_size == 0) ret_val = kTfLiteError;
   }
+  if (bias->Data<int32_t>() == NULL) ret_val = kTfLiteError;
 
-  if (!inside_arc_ccm(weights->data)) {
-    int weights_size = mli_hlp_count_elem_num(weights, 0) *
-                       mli_hlp_tensor_element_size(weights);
+  if (!inside_arc_ccm(weights->Data<int8_t>())) {
+    int weights_size = mli_hlp_count_elem_num(weights->MliTensor(), 0) *
+                       mli_hlp_tensor_element_size(weights->MliTensor());
     int max_weights_size = 0;
-    weights->data = get_arc_scratch_buffer(weights_size);
-    weights->capacity = weights_size;
-    if (weights->data == NULL) {
+    weights->SetData<int8_t>(
+        static_cast<int8_t*>(get_arc_scratch_buffer(weights_size)),
+        weights_size);
+    if (weights->Data<int8_t>() == NULL) {
       get_arc_scratch_buffer_max_size(&max_weights_size);
-      weights->data = get_arc_scratch_buffer(max_weights_size);
-      weights->capacity = max_weights_size;
+      weights->SetData<int8_t>(
+          static_cast<int8_t*>(get_arc_scratch_buffer(max_weights_size)),
+          max_weights_size);
       if (max_weights_size == 0) ret_val = kTfLiteError;
     }
-    if (weights->data == NULL) ret_val = kTfLiteError;
+    if (weights->Data<int8_t>() == NULL) ret_val = kTfLiteError;
   }
 
-  if (!inside_arc_ccm(bias->data)) {
-    int bias_mem_requirements =
-        mli_hlp_count_elem_num(bias, 0) * mli_hlp_tensor_element_size(bias);
-    bias->data = get_arc_scratch_buffer(bias_mem_requirements);
-    bias->capacity = bias_mem_requirements;
-  }
+  /* strategy for FC kernels:
+     first allocate input, because this cannot be sliced. (in case of batch
+     processing, only a single input needs to be allocated) then weights &
+     bias because if fully loaded, they can be reused over batches. then
+     output. The number of output channels (for weights slicing) depends on
+     size of output and size of weights&bias */
 
-  if (!inside_arc_ccm(out->data)) {
+  if (!inside_arc_ccm(in->Data<int8_t>())) {
     /* In case the input tensor contains multiple batches,
        only count the size if the inner most dimension */
-    int out_size = mli_hlp_count_elem_num(out, out->rank - 1) *
-                   mli_hlp_tensor_element_size(out);
+    int size_in = mli_hlp_count_elem_num(in->MliTensor(), *in->Rank() - 1) *
+                  mli_hlp_tensor_element_size(in->MliTensor());
+    in->SetData<int8_t>(static_cast<int8_t*>(get_arc_scratch_buffer(size_in)),
+                        size_in);
+    if (in->Data<int8_t>() == NULL) {
+      in->SetData<int8_t>(nullptr, 0);
+      ret_val = kTfLiteError;
+    }
+  }
+  if (!inside_arc_ccm(out->Data<int8_t>())) {
+    /* In case the input tensor contains multiple batches,
+       only count the size if the inner most dimension */
+    int out_size = mli_hlp_count_elem_num(out->MliTensor(), *out->Rank() - 1) *
+                   mli_hlp_tensor_element_size(out->MliTensor());
     int max_out_size = 0;
-    out->data = get_arc_scratch_buffer(out_size);
-    out->capacity = out_size;
-    if (out->data == NULL) {
+    out->SetData<int8_t>(static_cast<int8_t*>(get_arc_scratch_buffer(out_size)),
+                         out_size);
+    if (out->Data<int8_t>() == NULL) {
       get_arc_scratch_buffer_max_size(&max_out_size);
-      out->data = get_arc_scratch_buffer(max_out_size);
-      out->capacity = max_out_size;
+      out->SetData<int8_t>(
+          static_cast<int8_t*>(get_arc_scratch_buffer(max_out_size)),
+          max_out_size);
       if (max_out_size == 0) ret_val = kTfLiteError;
     }
-    if (out->data == NULL) ret_val = kTfLiteError;
+    if (out->Data<int8_t>() == NULL) ret_val = kTfLiteError;
   }
+#endif
+  return ret_val;
+}
 
-  if (bias->data == NULL) {
-    int max_bias_size = 0;
-    get_arc_scratch_buffer_max_size(&max_bias_size);
-    bias->data = get_arc_scratch_buffer(max_bias_size);
-    bias->capacity = max_bias_size;
-    if (max_bias_size == 0) ret_val = kTfLiteError;
+TfLiteStatus get_arc_scratch_buffer_for_eltwise_tensors(
+    TfLiteContext* context, MliTensorInterface* in1, MliTensorInterface* in2,
+    MliTensorInterface* out) {
+  TfLiteStatus ret_val = kTfLiteOk;
+#if (defined(__Xxy)) || (defined(__Xvdsp))
+  init_arc_scratch_buffers();
+  constexpr int tsr_num = 3;
+  int in1_size = mli_hlp_count_elem_num(in1->MliTensor(), 0) *
+                 mli_hlp_tensor_element_size(in1->MliTensor());
+  int in2_size = mli_hlp_count_elem_num(in2->MliTensor(), 0) *
+                 mli_hlp_tensor_element_size(in2->MliTensor());
+  int out_size = mli_hlp_count_elem_num(out->MliTensor(), 0) *
+                 mli_hlp_tensor_element_size(out->MliTensor());
+  int sizes[tsr_num] = {in1_size, in2_size, out_size};
+  MliTensorInterface* in_tensors[tsr_num] = {in1, in2, out};
+  for (int i = 0; i < tsr_num; ++i) {
+    if (!inside_arc_ccm(in_tensors[i]->Data<int8_t>())) {
+      auto* data_ptr = get_arc_scratch_buffer(sizes[i]);
+      if (data_ptr == nullptr) {
+        get_arc_scratch_buffer_max_size(&sizes[i]);
+        data_ptr = get_arc_scratch_buffer(sizes[i]);
+      }
+      if (data_ptr == nullptr || sizes[i] == 0) {
+        in_tensors[i]->SetData<int8_t>(nullptr, 0);
+        ret_val = kTfLiteError;
+      } else {
+        in_tensors[i]->SetData<int8_t>(static_cast<int8_t*>(data_ptr),
+                                       sizes[i]);
+      }
+    }
   }
-  if (bias->data == NULL) ret_val = kTfLiteError;
-
 #endif
   return ret_val;
 }
 
 TfLiteStatus arc_scratch_buffer_calc_slice_size_io(
-    const mli_tensor* in, const mli_tensor* out, const int kernel_height,
-    const int stride_height, const int padding_top, const int padding_bot,
-    int* in_slice_height, int* out_slice_height) {
+    const MliTensorInterface* in, const MliTensorInterface* out,
+    const int kernel_height, const int stride_height, const int padding_top,
+    const int padding_bot, int* in_slice_height, int* out_slice_height) {
   const int height_dimension = 1;
-  const int in_height = in->shape[height_dimension];
-  const int out_height = out->shape[height_dimension];
-  const int line_size_in = mli_hlp_count_elem_num(in, height_dimension + 1) *
-                           mli_hlp_tensor_element_size(in);
-  const int line_size_out = mli_hlp_count_elem_num(out, height_dimension + 1) *
-                            mli_hlp_tensor_element_size(out);
+  const int in_height = in->Shape()[height_dimension];
+  const int out_height = out->Shape()[height_dimension];
+  const int line_size_in =
+      mli_hlp_count_elem_num(in->MliTensor(), height_dimension + 1) *
+      mli_hlp_tensor_element_size(in->MliTensor());
+  const int line_size_out =
+      mli_hlp_count_elem_num(out->MliTensor(), height_dimension + 1) *
+      mli_hlp_tensor_element_size(out->MliTensor());
   int max_lines_in = 0;
   int max_lines_out = 0;
   int max_out_lines_for_input = 0;
-  bool fit = (static_cast<int>(in->capacity) >= in_height * line_size_in) &&
-             (static_cast<int>(out->capacity) >= out_height * line_size_out);
+  bool fit =
+      (static_cast<int>(*in->DataCapacity()) >= in_height * line_size_in) &&
+      (static_cast<int>(*out->DataCapacity()) >= out_height * line_size_out);
   if (fit) {
-    // in case both tensors completely fit in the capacity, there is no need for
-    // slicing. As padding can affect effective input region, we also derive it
-    // from output height, and rely on a clipping logic which intend to reduce
-    // last smaller slice. I.e the only slice is a kind of
-    // "smaller last slice that need to be corrected"
+    // in case both tensors completely fit in the capacity, there is no need
+    // for slicing. As padding can affect effective input region, we also
+    // derive it from output height, and rely on a clipping logic which intend
+    // to reduce last smaller slice. I.e the only slice is a kind of "smaller
+    // last slice that need to be corrected"
     *in_slice_height = std::max(in_height, out_height * stride_height);
     *out_slice_height = out_height;
   } else {
     // First compute how many lines fit into the input tensor, and compute how
     // many output lines can be computed with that.
-    max_lines_in =
-        std::min(in_height, static_cast<int>(in->capacity) / line_size_in);
+    max_lines_in = std::min(
+        in_height, static_cast<int>(*in->DataCapacity()) / line_size_in);
     if (max_lines_in >= in_height) {
       max_out_lines_for_input = out_height;
     } else if (2 * max_lines_in >= in_height) {
@@ -279,8 +327,8 @@ TfLiteStatus arc_scratch_buffer_calc_slice_size_io(
           (max_lines_in - kernel_height + 1) / stride_height;
     }
     // Then compute how many output lines fit into the output tensor.
-    max_lines_out =
-        std::min(out_height, static_cast<int>(out->capacity) / line_size_out);
+    max_lines_out = std::min(
+        out_height, static_cast<int>(*out->DataCapacity()) / line_size_out);
     // the smallest of the two determines the slice height for the output, and
     // the derived sliceheight for the input.
     *out_slice_height = std::min(max_out_lines_for_input, max_lines_out);
@@ -295,29 +343,32 @@ TfLiteStatus arc_scratch_buffer_calc_slice_size_io(
 }
 
 TfLiteStatus arc_scratch_buffer_calc_slice_size_weights(
-    const mli_tensor* weights, const mli_tensor* bias,
+    const MliTensorInterface* weights, const MliTensorInterface* bias,
     const int weight_out_ch_dimension, int* slice_channels) {
-  const int channels = weights->shape[weight_out_ch_dimension];
-  const int ch_size_w = (mli_hlp_count_elem_num(weights, 0) / channels) *
-                        mli_hlp_tensor_element_size(weights);
-  const int ch_size_b = (mli_hlp_count_elem_num(bias, 0) / channels) *
-                        mli_hlp_tensor_element_size(bias);
+  const int channels = weights->Shape()[weight_out_ch_dimension];
+  const int ch_size_w =
+      (mli_hlp_count_elem_num(weights->MliTensor(), 0) / channels) *
+      mli_hlp_tensor_element_size(weights->MliTensor());
+  const int ch_size_b =
+      (mli_hlp_count_elem_num(bias->MliTensor(), 0) / channels) *
+      mli_hlp_tensor_element_size(bias->MliTensor());
   int max_ch_weigths = 0;
   int max_ch_bias = 0;
 
-  bool fit = (static_cast<int>(weights->capacity) >= channels * ch_size_w) &&
-             (static_cast<int>(bias->capacity) >= channels * ch_size_b);
+  bool fit =
+      (static_cast<int>(*weights->DataCapacity()) >= channels * ch_size_w) &&
+      (static_cast<int>(*bias->DataCapacity()) >= channels * ch_size_b);
   if (fit) {
-    // in case both tensors completely fit in the capacity, there is no need for
-    // slicing
+    // in case both tensors completely fit in the capacity, there is no need
+    // for slicing
     *slice_channels = channels;
   } else {
     // First compute how many channels fit into the weights tensor
-    max_ch_weigths =
-        std::min(channels, static_cast<int>(weights->capacity) / ch_size_w);
+    max_ch_weigths = std::min(
+        channels, static_cast<int>(*weights->DataCapacity()) / ch_size_w);
     // Ten compute how many channels fit into the bias tensor.
     max_ch_bias =
-        std::min(channels, static_cast<int>(bias->capacity) / ch_size_b);
+        std::min(channels, static_cast<int>(*bias->DataCapacity()) / ch_size_b);
     // the smallest of the two determines the slice size
     *slice_channels = std::min(max_ch_weigths, max_ch_bias);
   }
@@ -329,10 +380,9 @@ TfLiteStatus arc_scratch_buffer_calc_slice_size_weights(
   }
 }
 
-TfLiteStatus get_arc_scratch_buffer_for_pooling_tensors(TfLiteContext* context,
-                                                        mli_tensor* in,
-                                                        mli_tensor* out) {
-#ifdef __Xxy
+TfLiteStatus get_arc_scratch_buffer_for_pooling_tensors(
+    TfLiteContext* context, MliTensorInterface* in, MliTensorInterface* out) {
+#if (defined(__Xxy)) || (defined(__Xvdsp))
   init_arc_scratch_buffers();
   return get_arc_scratch_buffer_for_io_tensors(context, in, out);
 #else
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h
index 38846fa..2f60948 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buf_mgr.h
@@ -1,7 +1,7 @@
 // Patched by Edge Impulse to include reference and hardware-accelerated kernels
 #include "../../../../classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@ limitations under the License.
 #define TENSORFLOW_LITE_MICRO_ARC_SCRATCH_BUF_MGR_H_
 
 #include "mli_api.h"  // NOLINT
+#include "mli_interface.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 
 namespace tflite {
@@ -32,19 +33,17 @@ namespace micro {
  * @detail This function will update the data pointers in the 4 tensors with
  * pointers to scratch buffers in fast local memory.
  *
- * @param context  [I] pointer to TfLite context (needed for error handling)
- * @param in [IO] pointer to the input tensor
- * @param weights [IO] pointer to the weights tensor
- * @param bias [IO] pointer to the bias tensor
- * @param output [IO] pointer to the output tensor
+ * @param context   [I] pointer to TfLite context (needed for error handling)
+ * @param in        [IO] pointer to the input tensor
+ * @param weights   [IO] pointer to the weights tensor
+ * @param bias      [IO] pointer to the bias tensor
+ * @param output    [IO] pointer to the output tensor
  *
  * @return Tf Lite status code
  */
-TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context,
-                                                     mli_tensor* in,
-                                                     mli_tensor* weights,
-                                                     mli_tensor* bias,
-                                                     mli_tensor* out);
+TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(
+    TfLiteContext* context, MliTensorInterface* in, MliTensorInterface* weights,
+    MliTensorInterface* bias, MliTensorInterface* out);
 
 /**
  * @brief Function to allocate scratch buffers for pooling kernels with only
@@ -53,15 +52,14 @@ TfLiteStatus get_arc_scratch_buffer_for_conv_tensors(TfLiteContext* context,
  * @detail This function will update the data pointers in the 2 tensors with
  * pointers to scratch buffers in fast local memory.
  *
- * @param context  [I] pointer to TfLite context (needed for error handling)
- * @param in [IO] pointer to the input tensor
- * @param output [IO] pointer to the output tensor
+ * @param context   [I] pointer to TfLite context (needed for error handling)
+ * @param in        [IO] pointer to the input tensor
+ * @param output    [IO] pointer to the output tensor
  *
  * @return Tf Lite status code
  */
-TfLiteStatus get_arc_scratch_buffer_for_pooling_tensors(TfLiteContext* context,
-                                                        mli_tensor* in,
-                                                        mli_tensor* out);
+TfLiteStatus get_arc_scratch_buffer_for_pooling_tensors(
+    TfLiteContext* context, MliTensorInterface* in, MliTensorInterface* out);
 
 /**
  * @brief Function to allocate scratch buffers for the fully connect tensors
@@ -69,17 +67,34 @@ TfLiteStatus get_arc_scratch_buffer_for_pooling_tensors(TfLiteContext* context,
  * @detail This function will update the data pointers in the 4 tensors with
  * pointers to scratch buffers in fast local memory.
  *
- * @param context  [I] pointer to TfLite context (needed for error handling)
- * @param in [IO] pointer to the input tensor
- * @param weights [IO] pointer to the weights tensor
- * @param bias [IO] pointer to the bias tensor
- * @param output [IO] pointer to the output tensor
+ * @param context   [I] pointer to TfLite context (needed for error handling)
+ * @param in        [IO] pointer to the input tensor
+ * @param weights   [IO] pointer to the weights tensor
+ * @param bias      [IO] pointer to the bias tensor
+ * @param output    [IO] pointer to the output tensor
  *
  * @return Tf Lite status code
  */
 TfLiteStatus get_arc_scratch_buffer_for_fully_connect_tensors(
-    TfLiteContext* context, mli_tensor* in, mli_tensor* weights,
-    mli_tensor* bias, mli_tensor* out);
+    TfLiteContext* context, MliTensorInterface* in, MliTensorInterface* weights,
+    MliTensorInterface* bias, MliTensorInterface* out);
+
+/**
+ * @brief Function to allocate scratch buffers for the eltwise function tensors
+ *
+ * @detail This function will update the data pointers in the 3 tensors with
+ * pointers to scratch buffers in fast local memory.
+ *
+ * @param context   [I] pointer to TfLite context (needed for error handling)
+ * @param in1       [IO] pointer to the first input tensor
+ * @param in2       [IO] pointer to the second input tensor
+ * @param output    [IO] pointer to the output tensor
+ *
+ * @return Tf Lite status code
+ */
+TfLiteStatus get_arc_scratch_buffer_for_eltwise_tensors(
+    TfLiteContext* context, MliTensorInterface* in1, MliTensorInterface* in2,
+    MliTensorInterface* out);
 
 /**
  * @brief Function to calculate slice size for io tensors
@@ -89,22 +104,23 @@ TfLiteStatus get_arc_scratch_buffer_for_fully_connect_tensors(
  * padding. the function will look at the capacity filed in the in and out
  * tensor to determine the available buffersize.
  *
- * @param in [I] pointer to the input tensor
- * @param out [I] pointer to the output tensor
- * @param kernelHeight [I] size of the kernel in height dimension
- * @param strideHeight [I] input stride in height dimension
- * @param padding_top [I] number of lines with zeros at the top
- * @param padding_bot [I] number of lines with zeros at the bottom
- * @param inSliceHeight [O] slice size in height dimension for the input tensor
- * @param outSliceHeight [O] slice size in height dimension for the output
+ * @param in                [I] pointer to the input tensor
+ * @param out               [I] pointer to the output tensor
+ * @param kernelHeight      [I] size of the kernel in height dimension
+ * @param strideHeight      [I] input stride in height dimension
+ * @param padding_top       [I] number of lines with zeros at the top
+ * @param padding_bot       [I] number of lines with zeros at the bottom
+ * @param inSliceHeight     [O] slice size in height dimension for the input
+ * tensor
+ * @param outSliceHeight    [O] slice size in height dimension for the output
  * tensor
  *
  * @return Tf Lite status code
  */
 TfLiteStatus arc_scratch_buffer_calc_slice_size_io(
-    const mli_tensor* in, const mli_tensor* out, const int kernelHeight,
-    const int strideHeight, const int padding_top, const int padding_bot,
-    int* in_slice_height, int* out_slice_height);
+    const MliTensorInterface* in, const MliTensorInterface* out,
+    const int kernelHeight, const int strideHeight, const int padding_top,
+    const int padding_bot, int* in_slice_height, int* out_slice_height);
 
 /**
  * @brief Function to calculate slice size for weight slicing
@@ -113,16 +129,16 @@ TfLiteStatus arc_scratch_buffer_calc_slice_size_io(
  * dimension for weight and bias tensors. the function will look at the capacity
  * filed in the weights and bias tensor to determine the available buffersize.
  *
- * @param weights [I] pointer to the input tensor
- * @param bias [I] pointer to the output tensor
- * @param weightOutChDimension [I] dimension of the output channels in the
+ * @param weights               [I] pointer to the input tensor
+ * @param bias                  [I] pointer to the output tensor
+ * @param weightOutChDimension  [I] dimension of the output channels in the
  * weights tensor
- * @param sliceChannels [O] slice size in output channel dimension
+ * @param sliceChannels         [O] slice size in output channel dimension
  *
  * @return Tf Lite status code
  */
 TfLiteStatus arc_scratch_buffer_calc_slice_size_weights(
-    const mli_tensor* weights, const mli_tensor* bias,
+    const MliTensorInterface* weights, const MliTensorInterface* bias,
     const int weight_out_ch_dimension, int* slice_channels);
 
 }  // namespace micro
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.cc
index 5e5a619..924cc41 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.cc
@@ -1,7 +1,7 @@
 // Patched by Edge Impulse to include reference and hardware-accelerated kernels
 #include "../../../../classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -28,30 +28,43 @@ namespace micro {
  * used for the data section and the stack. the values can be overruled by
  * adding a -D option to the makefile of the application
  */
+
+#ifdef __Xxy
+
 #ifndef SCRATCH_MEM_X_SIZE
 #ifdef core_config_xy_size
 #define SCRATCH_MEM_X_SIZE (core_config_xy_size)
-#else
-#define SCRATCH_MEM_X_SIZE (0)
 #endif
 #endif
 
 #ifndef SCRATCH_MEM_Y_SIZE
 #ifdef core_config_xy_size
 #define SCRATCH_MEM_Y_SIZE (core_config_xy_size)
-#else
-#define SCRATCH_MEM_Y_SIZE (0)
 #endif
 #endif
 
 #ifndef SCRATCH_MEM_Z_SIZE
 #ifdef core_config_dccm_size
 #define SCRATCH_MEM_Z_SIZE ((core_config_dccm_size) / 2)
-#else
-#define SCRATCH_MEM_Z_SIZE (0)
 #endif
 #endif
 
+#elif defined(__Xvdsp)
+
+#ifndef SCRATCH_MEM_VEC_SIZE
+#ifdef core_config_vec_mem_size
+#define SCRATCH_MEM_VEC_SIZE ((core_config_vec_mem_size * 3) / 4)
+#endif
+#endif
+
+#else
+
+#define SCRATCH_MEM_SIZE (65536)
+
+#endif
+
+#ifdef __Xxy
+
 // Patched by Edge Impulse, ARC GCC fixes
 namespace {
 #if defined (__GNUC__)
@@ -76,13 +89,44 @@ static int8_t scratch_mem_z[SCRATCH_MEM_Z_SIZE] __attribute__((section(".Zdata")
 #pragma Bss(".Zdata")
 static int8_t scratch_mem_z[SCRATCH_MEM_Z_SIZE];
 #pragma Bss()
+#endif
+
+#elif defined(__Xvdsp)
+
+#pragma Bss(".vecmem_data")
+static int8_t scratch_mem_vec_1[SCRATCH_MEM_VEC_SIZE / 4];
+static int8_t scratch_mem_vec_2[SCRATCH_MEM_VEC_SIZE / 4];
+static int8_t scratch_mem_vec_3[SCRATCH_MEM_VEC_SIZE / 2];
+#pragma Bss()
+
+#else
+
+static int8_t scratch_mem_stack[SCRATCH_MEM_SIZE];
+
 #endif
 }  // namespace
 
+#ifdef __Xxy
+
 static int8_t* scratch_mem[] = {scratch_mem_x, scratch_mem_y, scratch_mem_z};
 static uint32_t scratch_sizes[] = {SCRATCH_MEM_X_SIZE, SCRATCH_MEM_Y_SIZE,
                                    SCRATCH_MEM_Z_SIZE};
 
+#elif defined(__Xvdsp)
+
+static int8_t* scratch_mem[] = {scratch_mem_vec_1, scratch_mem_vec_2,
+                                scratch_mem_vec_3};
+static uint32_t scratch_sizes[] = {SCRATCH_MEM_VEC_SIZE / 4,
+                                   SCRATCH_MEM_VEC_SIZE / 4,
+                                   SCRATCH_MEM_VEC_SIZE / 2};
+
+#else
+
+static int8_t* scratch_mem[] = {scratch_mem_stack};
+static uint32_t scratch_sizes[] = {SCRATCH_MEM_SIZE};
+
+#endif
+
 void* get_arc_scratch_buffer(int size) {
   // Function to asign fast memory from one of 3 scratch buffers.
   // Best Fit strategy - memory is allocated from that memory bank that leaves
@@ -101,7 +145,7 @@ void* get_arc_scratch_buffer(int size) {
     }
   }
   if (best_mem_idx >= 0) {
-    buf = static_cast<void*>(scratch_mem[best_mem_idx]);
+    buf = scratch_mem[best_mem_idx];
     scratch_mem[best_mem_idx] += size;
     scratch_sizes[best_mem_idx] -= size;
   }
@@ -138,12 +182,24 @@ void get_arc_scratch_buffer_two_max_sizes(int* size1, int* size2) {
 }
 
 void init_arc_scratch_buffers(void) {
+#ifdef __Xxy
   scratch_mem[0] = scratch_mem_x;
   scratch_mem[1] = scratch_mem_y;
   scratch_mem[2] = scratch_mem_z;
   scratch_sizes[0] = SCRATCH_MEM_X_SIZE;
   scratch_sizes[1] = SCRATCH_MEM_Y_SIZE;
   scratch_sizes[2] = SCRATCH_MEM_Z_SIZE;
+#elif defined(__Xvdsp)
+  scratch_mem[0] = scratch_mem_vec_1;
+  scratch_mem[1] = scratch_mem_vec_2;
+  scratch_mem[2] = scratch_mem_vec_3;
+  scratch_sizes[0] = SCRATCH_MEM_VEC_SIZE / 4;
+  scratch_sizes[1] = SCRATCH_MEM_VEC_SIZE / 4;
+  scratch_sizes[2] = SCRATCH_MEM_VEC_SIZE / 2;
+#else
+  scratch_mem[0] = scratch_mem_stack;
+  scratch_sizes[0] = SCRATCH_MEM_SIZE;
+#endif
 }
 
 }  // namespace micro
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h
index bb80b4f..dc704aa 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/scratch_buffers.h
@@ -1,7 +1,7 @@
 // Patched by Edge Impulse to include reference and hardware-accelerated kernels
 #include "../../../../classifier/ei_classifier_config.h"
 #if EI_CLASSIFIER_TFLITE_ENABLE_ARC == 1
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -52,7 +52,7 @@ static inline bool inside_arc_xccm(void* p) {
 }
 
 static inline bool inside_arc_yccm(void* p) {
-#if core_config_xy
+#if core_config_xy_size
   return ((unsigned)p >= core_config_xy_y_base) &&
          ((unsigned)p < core_config_xy_y_base + core_config_xy_size);
 #else
@@ -60,8 +60,18 @@ static inline bool inside_arc_yccm(void* p) {
 #endif
 }
 
+static inline bool inside_arc_vccm(void* p) {
+#if core_config_vec_mem_size
+  return ((unsigned)p >= core_config_vec_mem_base) &&
+         ((unsigned)p < core_config_vec_mem_base + core_config_vec_mem_size);
+#else
+  return false;
+#endif
+}
+
 static inline bool inside_arc_ccm(void* p) {
-  return inside_arc_dccm(p) || inside_arc_xccm(p) || inside_arc_yccm(p);
+  return inside_arc_dccm(p) || inside_arc_xccm(p) || inside_arc_yccm(p) ||
+         inside_arc_vccm(p);
 }
 
 }  // namespace micro
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/select.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/select.cc
index 360d503..dcfed8c 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/select.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/select.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,25 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-
 #ifndef TF_LITE_STATIC_MEMORY
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/select.h"
 
 #include <stddef.h>
 #include <stdint.h>
 
-#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace select {
 
 constexpr int kInputTensorCondition = 0;
 constexpr int kInputTensorX = 1;
@@ -49,106 +43,42 @@ struct OpData {
   bool has_low_rank_input_condition;
 };
 
-template <typename D, typename T>
-void Select(const RuntimeShape& input_condition_shape,
-            const D* input_condition_data, const RuntimeShape& input_x_shape,
-            const T* input_x_data, const RuntimeShape& input_y_shape,
-            const T* input_y_data, const RuntimeShape& output_shape,
-            T* output_data) {
-  const int64_t flatsize = MatchingFlatSize(
-      input_condition_shape, input_x_shape, input_y_shape, output_shape);
-  for (int64_t i = 0; i < flatsize; ++i) {
-    output_data[i] =
-        input_condition_data[i] ? input_x_data[i] : input_y_data[i];
-  }
-}
-
-template <typename D, typename T>
-void RankOneSelect(const RuntimeShape& input_condition_shape,
-                   const D* input_condition_data,
-                   const RuntimeShape& input_x_shape, const T* input_x_data,
-                   const RuntimeShape& input_y_shape, const T* input_y_data,
-                   const RuntimeShape& output_shape, T* output_data) {
-  const int64_t outer_size = input_condition_shape.FlatSize();
-  int64_t inner_size;
-  if (input_condition_shape.DimensionsCount() == 0) {
-    inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
-  } else {
-    TFLITE_DCHECK_EQ(
-        MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0),
-        outer_size);
-    inner_size =
-        MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
-  }
-
-  int64_t offset = 0;
-  for (int64_t i = 0; i < outer_size; i++) {
-    const T* input_data = input_condition_data[i] ? input_x_data : input_y_data;
-    memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
-    offset += inner_size;
-  }
-}
-
-template <typename D, typename T>
-void BroadcastSelect4DSlow(const RuntimeShape& input_condition_shape,
-                           const D* input_condition_data,
-                           const RuntimeShape& input_x_shape,
-                           const T* input_x_data,
-                           const RuntimeShape& input_y_shape,
-                           const T* input_y_data,
-                           const RuntimeShape& output_shape, T* output_data) {
-  TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4);
-  TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
-
-  const RuntimeShape extended_output_shape =
-      RuntimeShape::ExtendedShape(4, output_shape);
-
-  NdArrayDesc<4> desc_condition;
-  NdArrayDesc<4> desc_x;
-  NdArrayDesc<4> desc_y;
-  NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape,
-                                      input_y_shape, &desc_condition, &desc_x,
-                                      &desc_y);
-
-  // In Tensorflow, the dimensions are canonically named (batch_number, row,
-  // col, channel), with extents (batches, height, width, depth), with the
-  // trailing dimension changing most rapidly (channels has the smallest
-  // stride, typically 1 element).
-  //
-  // In generated C code, we store arrays with the dimensions reversed. The
-  // first dimension has smallest stride.
-  //
-  // We name our variables by their Tensorflow convention, but generate C code
-  // nesting loops such that the innermost loop has the smallest stride for
-  // the best cache behavior.
-  for (int b = 0; b < extended_output_shape.Dims(0); ++b) {
-    for (int y = 0; y < extended_output_shape.Dims(1); ++y) {
-      for (int x = 0; x < extended_output_shape.Dims(2); ++x) {
-        for (int c = 0; c < extended_output_shape.Dims(3); ++c) {
-          const int condition_index =
-              SubscriptToIndex(desc_condition, b, y, x, c);
-          const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
-          const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
-          output_data[Offset(extended_output_shape, b, y, x, c)] =
-              input_condition_data[condition_index] ? input_x_data[x_index]
-                                                    : input_y_data[y_index];
-        }
-      }
-    }
-  }
-}
-
 void* SelectInit(TfLiteContext* context, const char* buffer, size_t length) {
-  auto* data = new OpData;
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  auto* data = static_cast<OpData*>(
+      context->AllocatePersistentBuffer(context, sizeof(OpData)));
   data->requires_broadcast = false;
   data->has_low_rank_input_condition = false;
   return data;
 }
 
-void SelectFree(TfLiteContext* context, void* buffer) {
-  delete reinterpret_cast<OpData*>(buffer);
+TfLiteStatus CheckBroadcastShape(TfLiteContext* context,
+                                 const TfLiteTensor* input1,
+                                 const TfLiteTensor* input2,
+                                 const TfLiteTensor* input3,
+                                 const TfLiteIntArray* output_shape) {
+  const int dims1 = NumDimensions(input1);
+  const int dims2 = NumDimensions(input2);
+  const int dims3 = NumDimensions(input3);
+  const int out_dims = std::max(std::max(dims1, dims2), dims3);
+  TF_LITE_ENSURE_EQ(context, out_dims, output_shape->size);
+
+  for (int i = 0; i < out_dims; ++i) {
+    const int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
+    const int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
+    const int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1);
+    const int min_value = std::min(std::min(d1, d2), d3);
+    int max_value = std::max(std::max(d1, d2), d3);
+    // If one dimention is 0, others must be 0 or 1.
+    if (min_value == 0) max_value = 0;
+    if (!(d1 == 1 || d1 == max_value) || !(d2 == 1 || d2 == max_value) ||
+        !(d3 == 1 || d3 == max_value)) {
+      MicroPrintf("Given shapes are not broadcastable.");
+      return kTfLiteError;
+    }
+    TF_LITE_ENSURE_EQ(context, output_shape->data[out_dims - i - 1], max_value);
+  }
+  return kTfLiteOk;
 }
 
 template <KernelType kernel_type>
@@ -158,24 +88,33 @@ TfLiteStatus SelectPrepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
-  const TfLiteTensor* input_condition;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensorCondition,
-                                          &input_condition));
-  const TfLiteTensor* input_x;
-  TF_LITE_ENSURE_OK(context,
-                    GetInputSafe(context, node, kInputTensorX, &input_x));
-  const TfLiteTensor* input_y;
-  TF_LITE_ENSURE_OK(context,
-                    GetInputSafe(context, node, kInputTensorY, &input_y));
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* input_condition =
+      micro_context->AllocateTempInputTensor(node, kInputTensorCondition);
+
+  TfLiteTensor* input_x =
+      micro_context->AllocateTempInputTensor(node, kInputTensorX);
+
+  TfLiteTensor* input_y =
+      micro_context->AllocateTempInputTensor(node, kInputTensorY);
+
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
 
   // Input must be bool.
   TF_LITE_ENSURE_TYPES_EQ(context, input_condition->type, kTfLiteBool);
   TF_LITE_ENSURE_TYPES_EQ(context, input_x->type, input_y->type);
   output->type = input_x->type;
 
+  // Respect the original output shape when there are mixed shapes to represent
+  // a scalar data.
+  if (GetTensorShape(input_condition).FlatSize() == 1 &&
+      GetTensorShape(input_x).FlatSize() == 1 &&
+      GetTensorShape(input_y).FlatSize() == 1 &&
+      GetTensorShape(output).FlatSize() == 1) {
+    return kTfLiteOk;
+  }
+
   bool same_shape = HaveSameShapes(input_condition, input_x) &&
                     HaveSameShapes(input_x, input_y);
   TfLiteIntArray* output_size;
@@ -197,9 +136,9 @@ TfLiteStatus SelectPrepare(TfLiteContext* context, TfLiteNode* node) {
         break;
       }
       case kVersionTwo: {
-        TF_LITE_ENSURE_OK(context, CalculateShapeForBroadcast(
-                                       context, input_condition, input_x,
-                                       input_y, &output_size));
+        TF_LITE_ENSURE_OK(
+          context, CheckBroadcastShape(context, input_condition, input_x, input_y,
+                                     output->dims));
         data->requires_broadcast = true;
         break;
       }
@@ -210,102 +149,90 @@ TfLiteStatus SelectPrepare(TfLiteContext* context, TfLiteNode* node) {
     output_size = TfLiteIntArrayCopy(input_x->dims);
   }
 
+  micro_context->DeallocateTempTfLiteTensor(input_condition);
+  micro_context->DeallocateTempTfLiteTensor(input_x);
+  micro_context->DeallocateTempTfLiteTensor(input_y);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  TfLiteIntArrayFree(output_size);
+
   return kTfLiteOk;
 }
 
-TfLiteStatus SelectEval(TfLiteContext* context, TfLiteNode* node) {
-  OpData* data = reinterpret_cast<OpData*>(node->user_data);
-  const TfLiteTensor* input_condition;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensorCondition,
-                                          &input_condition));
-  const TfLiteTensor* input_x;
-  TF_LITE_ENSURE_OK(context,
-                    GetInputSafe(context, node, kInputTensorX, &input_x));
-  const TfLiteTensor* input_y;
-  TF_LITE_ENSURE_OK(context,
-                    GetInputSafe(context, node, kInputTensorY, &input_y));
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
-
-#define TF_LITE_SELECT(type, op)                                           \
-  op(GetTensorShape(input_condition),                       \
-     GetTensorData<bool>(input_condition),                  \
-     GetTensorShape(input_x), GetTensorData<type>(input_x), \
-     GetTensorShape(input_y), GetTensorData<type>(input_y), \
-     GetTensorShape(output), GetTensorData<type>(output));
-
-#define TF_LITE_SWITCH(type, op)                                               \
-  switch (type) {                                                              \
-    break;                                                                     \
-    case kTfLiteBool:                                                          \
-      TF_LITE_SELECT(bool, op);                                                \
-      break;                                                                   \
-    case kTfLiteFloat32:                                                       \
-      TF_LITE_SELECT(float, op);                                               \
-      break;                                                                   \
-    case kTfLiteUInt8:                                                         \
-      TF_LITE_SELECT(uint8_t, op);                                             \
-      break;                                                                   \
-    case kTfLiteInt8:                                                          \
-      TF_LITE_SELECT(int8_t, op);                                              \
-      break;                                                                   \
-    case kTfLiteInt16:                                                         \
-      TF_LITE_SELECT(int16_t, op);                                             \
-      break;                                                                   \
-    case kTfLiteInt32:                                                         \
-      TF_LITE_SELECT(int32_t, op);                                             \
-      break;                                                                   \
-    case kTfLiteInt64:                                                         \
-      TF_LITE_SELECT(int64_t, op);                                             \
-      break;                                                                   \
-    default:                                                                   \
-      context->ReportError(context,                                            \
-                           "Does not support type other than bool|float|int, " \
-                           "got %d",                                           \
-                           type);                                              \
-      return kTfLiteError;                                                     \
+template <typename T>
+void CallSelect(const TfLiteEvalTensor* input_condition,
+                const TfLiteEvalTensor* input_x,
+                const TfLiteEvalTensor* input_y, TfLiteEvalTensor* output,
+                bool need_broadcast) {
+  using Func = decltype(reference_ops::Select<bool, T>)*;
+  Func select_func;
+  if (need_broadcast) {
+    select_func = reference_ops::BroadcastSelect5DSlow<bool, T>;
+  } else {
+    select_func = reference_ops::Select<bool, T>;
   }
 
-  if (data->has_low_rank_input_condition) {
-    TF_LITE_SWITCH(input_x->type, RankOneSelect);
-  } else if (data->requires_broadcast) {
-    TF_LITE_SWITCH(input_x->type, BroadcastSelect4DSlow);
-  } else {
-    TF_LITE_SWITCH(input_x->type, Select);
+  select_func(tflite::micro::GetTensorShape(input_condition),
+              tflite::micro::GetTensorData<bool>(input_condition),
+              tflite::micro::GetTensorShape(input_x),
+              tflite::micro::GetTensorData<T>(input_x),
+              tflite::micro::GetTensorShape(input_y),
+              tflite::micro::GetTensorData<T>(input_y),
+              tflite::micro::GetTensorShape(output),
+              tflite::micro::GetTensorData<T>(output));
+}
+
+TfLiteStatus SelectEval(TfLiteContext* context, TfLiteNode* node) {
+  OpData* data = static_cast<OpData*>(node->user_data);
+
+  const TfLiteEvalTensor* input_condition =
+      tflite::micro::GetEvalInput(context, node, kInputTensorX);
+
+  const TfLiteEvalTensor* input_x =
+      tflite::micro::GetEvalInput(context, node, kInputTensorY);
+
+  const TfLiteEvalTensor* input_y =
+      tflite::micro::GetEvalInput(context, node, kInputTensorCondition);
+
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  switch (input_x->type) {
+    case kTfLiteFloat32:
+      CallSelect<float>(input_condition, input_x, input_y, output,
+                        data->requires_broadcast);
+      break;
+    case kTfLiteInt8:
+      CallSelect<int8_t>(input_condition, input_x, input_y, output,
+                         data->requires_broadcast);
+      break;
+    case kTfLiteInt16:
+      CallSelect<int16_t>(input_condition, input_x, input_y, output,
+                          data->requires_broadcast);
+      break;
+    default:
+      MicroPrintf("Does not support type other than %s, but got %s",
+                  "int8|int16|float32", TfLiteTypeGetName(input_x->type));
+      return kTfLiteError;
   }
 
-#undef TF_LITE_SELECT
-#undef TF_LITE_SWITCH
   return kTfLiteOk;
 }
 
-}  // namespace select
-}  // namespace micro
-}  // namespace ops
-
 TfLiteRegistration Register_SELECT() {
-  return {/*init=*/ops::micro::select::SelectInit,
-          /*free=*/ops::micro::select::SelectFree,
-          /*prepare=*/ops::micro::select::SelectPrepare<ops::micro::select::kVersionOne>,
-          /*invoke=*/ops::micro::select::SelectEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(tflite::SelectInit, tflite::SelectPrepare<kVersionOne>,
+                                   tflite::SelectEval);
 }
 
+// SelectV2 op selects values of 'x' if the corresponding value of 'condition'
+// is true or the value of 'y' if false. There are valid condition input sizes:
+//
+// 1. Either the same shape (in which case the select is elementwise), or
+// 2. Broadcastable shapes between 'condition', 'x' and 'y'.
 TfLiteRegistration Register_SELECT_V2() {
-  return {/*init=*/ops::micro::select::SelectInit,
-          /*free=*/ops::micro::select::SelectFree,
-          /*prepare=*/ops::micro::select::SelectPrepare<ops::micro::select::kVersionTwo>,
-          /*invoke=*/ops::micro::select::SelectEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(tflite::SelectInit, tflite::SelectPrepare<kVersionTwo>,
+                                   tflite::SelectEval);
 }
 
 }  // namespace tflite
-
 #endif // TF_LITE_STATIC_MEMORY
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/shape.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/shape.cc
index d0cf78b..21af290 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/shape.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/shape.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
@@ -47,8 +48,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TfLiteEvalTensor* output =
       tflite::micro::GetEvalOutput(context, node, kOutputTensor);
   if (output->type != kTfLiteInt32) {
-    TF_LITE_KERNEL_LOG(context, "Output type %s (%d) not supported.",
-                       TfLiteTypeGetName(output->type), output->type);
+    MicroPrintf("Output type %s (%d) not supported.",
+                TfLiteTypeGetName(output->type), output->type);
     return kTfLiteError;
   } else {
     ExtractShape(input, tflite::micro::GetTensorData<int32_t>(output));
@@ -60,14 +61,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_SHAPE() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/slice.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/slice.cc
index 841efa2..16ce966 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/slice.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/slice.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -13,246 +13,145 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include <array>
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/slice.h"
+
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/portable_tensor.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace slice {
+
+namespace {
 
 constexpr int kInputTensor = 0;
 constexpr int kBeginTensor = 1;
 constexpr int kSizeTensor = 2;
 constexpr int kOutputTensor = 0;
 
-// This Op only supports 1-5D cases and since we use the optimized ops 5D
-// implementation, the 1-4D tensors are mapped to 5D.
 const int kMaxDim = 5;
 
-
-template <typename T>
-TfLiteStatus CalculateOutputShapeVector(TfLiteContext* context,
-                                        const TfLiteTensor* input,
-                                        const TfLiteTensor* begin,
-                                        const TfLiteTensor* size,
-                                        std::vector<int>* output_shape_vector) {
-  for (int idx = 0; idx < NumDimensions(input); ++idx) {
-    T size_value = GetTensorData<T>(size)[idx];
-    if (size_value < 0) {
-      if (size_value != -1) {
-        context->ReportError(context, "Invalid size.");
-        return kTfLiteError;
-      }
-      size_value = SizeOfDimension(input, idx) - GetTensorData<T>(begin)[idx];
-    } else {
-      if (SizeOfDimension(input, idx) <
-          GetTensorData<T>(begin)[idx] + size_value) {
-        context->ReportError(context, "Invalid begin and size.");
-        return kTfLiteError;
-      }
-    }
-    output_shape_vector->push_back(static_cast<int>(size_value));
-  }
-  return kTfLiteOk;
-}
-
 template <typename T>
-void GetBeginAndSizeVectors(int dimensions, const TfLiteTensor* begin,
-                            const TfLiteTensor* size, std::vector<int>* begins,
-                            std::vector<int>* sizes) {
+void GetBeginAndSizeVectors(int dimensions, const TfLiteEvalTensor* begin,
+                            const TfLiteEvalTensor* size, int32_t* begins,
+                            int32_t* sizes) {
+  int offset = kMaxDim - dimensions;
   for (int idx = 0; idx < dimensions; ++idx) {
-    begins->push_back(GetTensorData<T>(begin)[idx]);
-    sizes->push_back(GetTensorData<T>(size)[idx]);
-  }
-}
-
-template <typename T>
-inline void Slice(const tflite::SliceParams& op_params,
-                  const RuntimeShape& input_shape,
-                  const RuntimeShape& output_shape,
-                  SequentialTensorWriter<T>* writer) {
-  const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
-  TFLITE_DCHECK_LE(op_params.begin_count, 5);
-  TFLITE_DCHECK_LE(op_params.size_count, 5);
-  const int begin_count = op_params.begin_count;
-  const int size_count = op_params.size_count;
-  // We front-pad the begin and size vectors.
-  std::array<int, 5> start;
-  std::array<int, 5> stop;
-  for (int i = 0; i < 5; ++i) {
-    int padded_i = 5 - i;
-    start[i] =
-        begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
-    stop[i] =
-        (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
-            ? ext_shape.Dims(i)
-            : start[i] + op_params.size[size_count - padded_i];
-  }
-
-  for (int i0 = start[0]; i0 < stop[0]; ++i0) {
-    for (int i1 = start[1]; i1 < stop[1]; ++i1) {
-      for (int i2 = start[2]; i2 < stop[2]; ++i2) {
-        for (int i3 = start[3]; i3 < stop[3]; ++i3) {
-          for (int i4 = start[4]; i4 < stop[4]; ++i4) {
-            writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
-          }
-        }
-      }
-    }
+    begins[offset + idx] = tflite::micro::GetTensorData<T>(begin)[idx];
+    sizes[offset + idx] = tflite::micro::GetTensorData<T>(size)[idx];
   }
 }
 
-template <typename T>
-inline void Slice(const tflite::SliceParams& op_params,
-                  const RuntimeShape& input_shape, const T* input_data,
-                  const RuntimeShape& output_shape, T* output_data) {
-  SequentialTensorWriter<T> writer(input_data, output_data);
-  return Slice(op_params, input_shape, output_shape, &writer);
-}
-
-template <typename T>
-inline void Slice(const tflite::SliceParams& op_params,
-                  const RuntimeShape& input_shape, const TfLiteTensor* input,
-                  const RuntimeShape& output_shape, TfLiteTensor* output) {
-  SequentialTensorWriter<T> writer(input, output);
-  return Slice(op_params, input_shape, output_shape, &writer);
-}
-
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
-  const TfLiteTensor* input;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
-  const TfLiteTensor* begin;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kBeginTensor, &begin));
-  const TfLiteTensor* size;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kSizeTensor, &size));
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TFLITE_DCHECK(input != nullptr);
+  TfLiteTensor* begin =
+      micro_context->AllocateTempInputTensor(node, kBeginTensor);
+  TFLITE_DCHECK(begin != nullptr);
+  TfLiteTensor* size =
+      micro_context->AllocateTempInputTensor(node, kSizeTensor);
+  TFLITE_DCHECK(size != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TFLITE_DCHECK(output != nullptr);
 
   // Ensure validity of input tensor and its dimension.
-  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
-  TF_LITE_ENSURE(context,
-                 begin->type == kTfLiteInt32 || begin->type == kTfLiteInt64);
-  TF_LITE_ENSURE(context,
-                 size->type == kTfLiteInt32 || size->type == kTfLiteInt64);
-  TF_LITE_ENSURE_EQ(context, NumDimensions(begin), 1);
-  TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1);
-  TF_LITE_ENSURE_EQ(context, NumElements(begin), NumElements(size));
-  TF_LITE_ENSURE_MSG(context, NumDimensions(input) <= kMaxDim,
-                     "Slice op only supports 1D-5D input arrays.");
+  TFLITE_DCHECK(input->type == output->type);
+  TFLITE_DCHECK(begin->type == size->type);
+  TFLITE_DCHECK(begin->type == kTfLiteInt32 || begin->type == kTfLiteInt64);
+  TFLITE_DCHECK(size->type == kTfLiteInt32 || size->type == kTfLiteInt64);
+  TFLITE_DCHECK(NumDimensions(begin) == 1);
+  TFLITE_DCHECK(NumDimensions(size) == 1);
+  TFLITE_DCHECK(NumElements(begin) == NumElements(size));
+  TFLITE_DCHECK(NumDimensions(input) <= kMaxDim);
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(begin);
+  micro_context->DeallocateTempTfLiteTensor(size);
+  micro_context->DeallocateTempTfLiteTensor(output);
 
   return kTfLiteOk;
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* input;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
-  const TfLiteTensor* begin;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kBeginTensor, &begin));
-  const TfLiteTensor* size;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kSizeTensor, &size));
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
-
-  std::vector<int> begins;
-  begins.reserve(kMaxDim);
-  std::vector<int> sizes;
-  sizes.reserve(kMaxDim);
-
-  for (int i = NumDimensions(input); i < kMaxDim; ++i) {
-    begins.push_back(0);
-    sizes.push_back(1);
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  const TfLiteEvalTensor* begin =
+      tflite::micro::GetEvalInput(context, node, kBeginTensor);
+  const TfLiteEvalTensor* size =
+      tflite::micro::GetEvalInput(context, node, kSizeTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+  tflite::SliceParams op_params;
+  op_params.begin_count = kMaxDim;
+  op_params.size_count = kMaxDim;
+  for (int i = 0; i < kMaxDim; ++i) {
+    op_params.begin[i] = 0;
+    op_params.size[i] = 1;
   }
 
   if (begin->type == kTfLiteInt32) {
-    GetBeginAndSizeVectors<int32_t>(NumDimensions(input), begin, size, &begins,
-                                    &sizes);
+    GetBeginAndSizeVectors<int32_t>(input->dims->size, begin, size,
+                                    op_params.begin, op_params.size);
   } else if (begin->type == kTfLiteInt64) {
-    GetBeginAndSizeVectors<int64_t>(NumDimensions(input), begin, size, &begins,
-                                    &sizes);
+    GetBeginAndSizeVectors<int64_t>(input->dims->size, begin, size,
+                                    op_params.begin, op_params.size);
   } else {
-    context->ReportError(
-        context, "Type %d is currently not supported by Slice.", begin->type);
+    MicroPrintf("Begin tensor type %s (%d) not supported.",
+                TfLiteTypeGetName(input->type), input->type);
     return kTfLiteError;
   }
 
-  // The Slice op implementation only accepts 5-D sizes. That constraint is, for
-  // the present, maintained here.
-  //
-  // The dimensions in the kernel used to be in reverse-order, and TFLite
-  // arranged the begins and sizes vectors accordingly. This macro incorporates
-  // the needed reversing.
-#define TF_LITE_SLICE(data_type)                                               \
-  {                                                                            \
-    TF_LITE_ENSURE_EQ(context, begins.size(), kMaxDim);                        \
-    TF_LITE_ENSURE_EQ(context, sizes.size(), kMaxDim);                         \
-    tflite::SliceParams op_params;                                             \
-    op_params.begin_count = kMaxDim;                                           \
-    op_params.size_count = kMaxDim;                                            \
-    for (int i = 0; i < kMaxDim; ++i) {                                        \
-      op_params.begin[i] = begins[i];                                          \
-      op_params.size[i] = sizes[i];                                            \
-    }                                                                          \
-                                                                               \
-    Slice<data_type>(op_params, GetTensorShape(input), input, \
-                     GetTensorShape(output), output);         \
-  }
-
   switch (input->type) {
     case kTfLiteFloat32:
-      TF_LITE_SLICE(float);
+      reference_ops::Slice<float>(op_params,
+                                  tflite::micro::GetTensorShape(input),
+                                  tflite::micro::GetTensorData<float>(input),
+                                  tflite::micro::GetTensorShape(output),
+                                  tflite::micro::GetTensorData<float>(output));
       break;
     case kTfLiteInt32:
-      TF_LITE_SLICE(int32_t);
-      break;
-    case kTfLiteInt64:
-      TF_LITE_SLICE(int64_t);
+      reference_ops::Slice<int32_t>(
+          op_params, tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<int32_t>(input),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int32_t>(output));
       break;
     case kTfLiteInt8:
-      TF_LITE_SLICE(int8_t);
+      reference_ops::Slice<int8_t>(
+          op_params, tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<int8_t>(input),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int8_t>(output));
       break;
     case kTfLiteInt16:
-      TF_LITE_SLICE(int16_t);
-      break;
-    case kTfLiteUInt8:
-      TF_LITE_SLICE(uint8_t);
-      break;
-    case kTfLiteBool:
-      TF_LITE_SLICE(bool);
+      reference_ops::Slice<int16_t>(
+          op_params, tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<int16_t>(input),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int16_t>(output));
       break;
     default:
-      context->ReportError(
-          context, "Type %d is currently not supported by Slice.", input->type);
+      MicroPrintf("Input tensor type %s (%d) not supported.",
+                  TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
   }
-#undef TF_LITE_SLICE
   return kTfLiteOk;
 }
 
-}  // namespace slice
-}  // namespace micro
-}  // namespace ops
+}  // namespace
 
 TfLiteRegistration Register_SLICE() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/ops::micro::slice::Prepare,
-          /*invoke=*/ops::micro::slice::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.cc
index e0ce24c..d5d6355 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.cc
@@ -3,7 +3,7 @@
 #if 0 == 1
 /* noop */
 #elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -29,66 +29,55 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
 
-void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
-                      const SoftmaxParams& op_data) {
-  if (input->type == kTfLiteUInt8) {
-    #if EI_TFLITE_DISABLE_SOFTMAX_IN_U8
-    return;
-    #endif
-
-    tflite::reference_ops::Softmax(
-        op_data, tflite::micro::GetTensorShape(input),
-        tflite::micro::GetTensorData<uint8_t>(input),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<uint8_t>(output));
-  } else if (input->type == kTfLiteInt8) {
-    #if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
-    return;
-    #endif
+struct CMSISNNSoftmaxParams {
+  SoftmaxParams softmax_params;
+  int32_t num_rows;
+  int32_t row_size;
+};
 
-    if (output->type == kTfLiteInt16) {
-      #if EI_TFLITE_DISABLE_SOFTMAX_OUT_I16
-      return;
-      #endif
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context,
+                                           sizeof(CMSISNNSoftmaxParams));
+}
 
-      tflite::reference_ops::Softmax(
-          op_data, tflite::micro::GetTensorShape(input),
-          tflite::micro::GetTensorData<int8_t>(input),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<int16_t>(output));
-    } else {
-      #if EI_TFLITE_DISABLE_SOFTMAX_OUT_I8
-      return;
-      #endif
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
 
-      const auto input_shape = tflite::micro::GetTensorShape(input);
-      const auto output_shape = tflite::micro::GetTensorShape(output);
-      const int trailing_dim = input_shape.DimensionsCount() - 1;
-      const int outer_size =
-          MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-      const int depth =
-          MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+  TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
+  TF_LITE_ENSURE(context, output != nullptr);
 
-      arm_softmax_s8(tflite::micro::GetTensorData<int8_t>(input), outer_size,
-                     depth, op_data.input_multiplier, op_data.input_left_shift,
-                     op_data.diff_min,
-                     tflite::micro::GetTensorData<int8_t>(output));
-    }
-  } else {
-    #if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
-    return;
-    #endif
+  TF_LITE_ENSURE(context, node->user_data != nullptr);
+  CMSISNNSoftmaxParams* op_data =
+      static_cast<CMSISNNSoftmaxParams*>(node->user_data);
 
-    tflite::reference_ops::SoftmaxInt16(
-        op_data, tflite::micro::GetTensorShape(input),
-        tflite::micro::GetTensorData<int16_t>(input),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<int16_t>(output));
-  }
+  auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
+  auto ret_val = CalculateSoftmaxParams(context, input, output, params,
+                                        &op_data->softmax_params);
+
+  const auto input_shape = GetTensorShape(input);
+  const auto output_shape = GetTensorShape(output);
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size =
+      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth =
+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+  op_data->num_rows = outer_size;
+  op_data->row_size = depth;
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  return ret_val;
 }
 
 TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
@@ -96,72 +85,155 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
   TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
 
   TFLITE_DCHECK(node->user_data != nullptr);
-  const SoftmaxParams data =
-      *static_cast<const SoftmaxParams*>(node->user_data);
+  const CMSISNNSoftmaxParams op_data =
+      *static_cast<const CMSISNNSoftmaxParams*>(node->user_data);
 
   switch (input->type) {
     case kTfLiteFloat32: {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_F32
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       tflite::reference_ops::Softmax(
-          data, tflite::micro::GetTensorShape(input),
+          op_data.softmax_params, tflite::micro::GetTensorShape(input),
           tflite::micro::GetTensorData<float>(input),
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output));
       return kTfLiteOk;
     }
     case kTfLiteInt8: {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
-      SoftmaxQuantized(input, output, data);
-      return kTfLiteOk;
-    }
-    case kTfLiteUInt8: {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_U8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#endif
+      if (output->type == kTfLiteInt8) {
+#if EI_TFLITE_DISABLE_SOFTMAX_OUT_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  output->type);
       return kTfLiteError;
-      #endif
-
-      SoftmaxQuantized(input, output, data);
+#endif
+        arm_softmax_s8(tflite::micro::GetTensorData<int8_t>(input),
+                       op_data.num_rows, op_data.row_size,
+                       op_data.softmax_params.input_multiplier,
+                       op_data.softmax_params.input_left_shift,
+                       op_data.softmax_params.diff_min,
+                       tflite::micro::GetTensorData<int8_t>(output));
+      } else {
+#if EI_TFLITE_DISABLE_SOFTMAX_OUT_I16
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  output->type);
+      return kTfLiteError;
+#endif
+        arm_softmax_s8_s16(tflite::micro::GetTensorData<int8_t>(input),
+                           op_data.num_rows, op_data.row_size,
+                           op_data.softmax_params.input_multiplier,
+                           op_data.softmax_params.input_left_shift,
+                           op_data.softmax_params.diff_min,
+                           tflite::micro::GetTensorData<int16_t>(output));
+      }
       return kTfLiteOk;
     }
     case kTfLiteInt16: {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
-      SoftmaxQuantized(input, output, data);
+#endif
+      const cmsis_nn_softmax_lut_s16 softmax_params = {
+          .exp_lut = op_data.softmax_params.exp_lut,
+          .one_by_one_lut = op_data.softmax_params.one_over_one_plus_x_lut};
+
+      TFLITE_DCHECK_EQ(
+          arm_softmax_s16(
+              tflite::micro::GetTensorData<int16_t>(input), op_data.num_rows,
+              op_data.row_size, op_data.softmax_params.input_multiplier,
+              op_data.softmax_params.input_left_shift, &softmax_params,
+              tflite::micro::GetTensorData<int16_t>(output)),
+          ARM_CMSIS_NN_SUCCESS);
       return kTfLiteOk;
     }
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
   }
 }
 
+TfLiteStatus SoftmaxEvalInt8(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const CMSISNNSoftmaxParams op_data =
+      *static_cast<const CMSISNNSoftmaxParams*>(node->user_data);
+
+  arm_softmax_s8(tflite::micro::GetTensorData<int8_t>(input), op_data.num_rows,
+                 op_data.row_size, op_data.softmax_params.input_multiplier,
+                 op_data.softmax_params.input_left_shift,
+                 op_data.softmax_params.diff_min,
+                 tflite::micro::GetTensorData<int8_t>(output));
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus SoftmaxEvalInt8_Int16(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const CMSISNNSoftmaxParams op_data =
+      *static_cast<const CMSISNNSoftmaxParams*>(node->user_data);
+
+  arm_softmax_s8_s16(
+      tflite::micro::GetTensorData<int8_t>(input), op_data.num_rows,
+      op_data.row_size, op_data.softmax_params.input_multiplier,
+      op_data.softmax_params.input_left_shift, op_data.softmax_params.diff_min,
+      tflite::micro::GetTensorData<int16_t>(output));
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus SoftmaxEvalInt16(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const CMSISNNSoftmaxParams op_data =
+      *static_cast<const CMSISNNSoftmaxParams*>(node->user_data);
+
+  const cmsis_nn_softmax_lut_s16 softmax_params = {
+      .exp_lut = op_data.softmax_params.exp_lut,
+      .one_by_one_lut = op_data.softmax_params.one_over_one_plus_x_lut};
+
+  TFLITE_DCHECK_EQ(
+      arm_softmax_s16(tflite::micro::GetTensorData<int16_t>(input),
+                      op_data.num_rows, op_data.row_size,
+                      op_data.softmax_params.input_multiplier,
+                      op_data.softmax_params.input_left_shift, &softmax_params,
+                      tflite::micro::GetTensorData<int16_t>(output)),
+      ARM_CMSIS_NN_SUCCESS);
+
+  return kTfLiteOk;
+}
+
 }  // namespace
 
 TfLiteRegistration Register_SOFTMAX() {
-  return {/*init=*/SoftmaxInit,
-          /*free=*/nullptr,
-          /*prepare=*/SoftmaxPrepare,
-          /*invoke=*/SoftmaxEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, SoftmaxEval);
+}
+
+TfLiteRegistration Register_SOFTMAX_INT8() {
+  return tflite::micro::RegisterOp(Init, Prepare, SoftmaxEvalInt8);
+}
+
+TfLiteRegistration Register_SOFTMAX_INT8_INT16() {
+  return tflite::micro::RegisterOp(Init, Prepare, SoftmaxEvalInt8_Int16);
+}
+
+TfLiteRegistration Register_SOFTMAX_INT16() {
+  return tflite::micro::RegisterOp(Init, Prepare, SoftmaxEvalInt16);
 }
 
 }  // namespace tflite
@@ -181,6 +253,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h"
 
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
@@ -192,110 +265,28 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
-#include "freertos/FreeRTOS.h"
 #include <esp_timer.h>
 
+#if ESP_NN
 #include "edge-impulse-sdk/porting/espressif/ESP-NN/include/esp_nn.h"
+#endif
 
 long long softmax_total_time = 0;
 
 namespace tflite {
 namespace {
+// Softmax parameter data that persists in user_data
+const int kInt16LUTArraySize = 513;
 
 struct NodeData {
   SoftmaxParams op_data;
+#if ESP_NN
   int buffer_idx;
+#endif
 };
 
-// Softmax parameter data that persists in user_data
-const int kInt16LUTArraySize = 513;
-
-TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
-                                    const TfLiteTensor* input,
-                                    TfLiteTensor* output,
-                                    const TfLiteSoftmaxParams* params,
-                                    SoftmaxParams* op_data) {
-  if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
-    if (input->type == kTfLiteInt16) {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
-
-      TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
-      TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 32768,
-                          (0.001f * 1.f / 32768));
-    } else {  // input->type == kTfLiteInt8
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
-
-      TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
-      if (output->type == kTfLiteInt16) {
-        TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
-        TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 65536,
-                            (0.001f * 1.f / 65536));
-      } else {  // output->type == kTfLiteint8
-        TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
-        TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
-        TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
-      }
-    }
-
-    static const int kScaledDiffIntegerBits = 5;
-
-    // Calculate input_multiplier and input_left_shift
-    if (input->type == kTfLiteInt16) {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
-
-      int input_left_shift;
-      double input_scale_beta_rescale =
-          static_cast<double>(input->params.scale) *
-          static_cast<double>(params->beta) /
-          (10.0 / 65535.0);  // scale the input_diff such that [-65535, 0]
-                             // correspond to [-10.0, 0.0]
-      QuantizeMultiplier(input_scale_beta_rescale, &op_data->input_multiplier,
-                         &input_left_shift);
-      op_data->input_left_shift = input_left_shift;
-    } else { // kTfLiteInt8
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
-      return kTfLiteError;
-      #endif
-
-      int input_left_shift;
-      tflite::PreprocessSoftmaxScaling(
-          static_cast<double>(params->beta),
-          static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
-          &op_data->input_multiplier, &input_left_shift);
-      op_data->input_left_shift = input_left_shift;
-      op_data->diff_min =
-          -1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
-                                              op_data->input_left_shift);
-    }
-  } else {
-    #if EI_TFLITE_DISABLE_SOFTMAX_IN_F32
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                    TfLiteTypeGetName(input->type), input->type);
-    return kTfLiteError;
-    #endif
-
-    TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
-    TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
-    op_data->beta = static_cast<double>(params->beta);
-  }
-  return kTfLiteOk;
-}
-
 static void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(NodeData));
@@ -304,31 +295,29 @@ static void* Init(TfLiteContext* context, const char* buffer, size_t length) {
 void SoftmaxQuantized(TfLiteContext* context, const TfLiteEvalTensor* input,
                       TfLiteEvalTensor* output, const NodeData* data) {
   if (input->type == kTfLiteInt8) {
-    #if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                    TfLiteTypeGetName(input->type), input->type);
-    return;
-    #endif
-
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
+      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
+                      TfLiteTypeGetName(input->type), input->type);
+      return;
+#endif
     if (output->type == kTfLiteInt16) {
-      #if EI_TFLITE_DISABLE_SOFTMAX_OUT_I16
+#if EI_TFLITE_DISABLE_SOFTMAX_OUT_I16
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(output->type), output->type);
       return;
-      #endif
-
+#endif
       tflite::reference_ops::Softmax(
           data->op_data, tflite::micro::GetTensorShape(input),
           tflite::micro::GetTensorData<int8_t>(input),
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<int16_t>(output));
     } else {
-      #if EI_TFLITE_DISABLE_SOFTMAX_OUT_I8
+#if EI_TFLITE_DISABLE_SOFTMAX_OUT_I8
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(output->type), output->type);
       return;
-      #endif
-
+#endif
+#if ESP_NN
       const int32_t input_beta_multiplier = data->op_data.input_multiplier;
       const int32_t input_beta_left_shift = data->op_data.input_left_shift;
       const int diff_min = data->op_data.diff_min;
@@ -348,14 +337,20 @@ void SoftmaxQuantized(TfLiteContext* context, const TfLiteEvalTensor* input,
       esp_nn_set_softmax_scratch_buf(scratch_buf);
       esp_nn_softmax_s8(in_ptr, outer_size, depth, input_beta_multiplier,
                         input_beta_left_shift, diff_min, out_ptr);
+#else
+      tflite::reference_ops::Softmax(
+          data->op_data, tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<int8_t>(input),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int8_t>(output));
+#endif
     }
   } else {
-    #if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
     TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                     TfLiteTypeGetName(input->type), input->type);
     return;
-    #endif
-
+#endif
     tflite::reference_ops::SoftmaxInt16(
         data->op_data, tflite::micro::GetTensorShape(input),
         tflite::micro::GetTensorData<int16_t>(input),
@@ -374,38 +369,35 @@ static TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   long long start_time = esp_timer_get_time();
   switch (input->type) {
     case kTfLiteFloat32: {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_F32
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_F32
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
+#endif
       tflite::reference_ops::Softmax(
           data.op_data, tflite::micro::GetTensorShape(input),
           tflite::micro::GetTensorData<float>(input),
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output));
-      break;
     }
-    case kTfLiteInt8: {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
+    break;
+    case kTfLiteInt8:
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       SoftmaxQuantized(context, input, output, &data);
-      break;
-    }
+    break;
     case kTfLiteInt16: {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       SoftmaxQuantized(context, input, output, &data);
-      break;
     }
+    break;
     default:
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                          TfLiteTypeGetName(input->type), input->type);
@@ -415,67 +407,26 @@ static TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   return kTfLiteOk;
 }
 
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-  const TfLiteTensor* input = GetInput(context, node, 0);
+  TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
   TF_LITE_ENSURE(context, input != nullptr);
   TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
-  TfLiteTensor* output = GetOutput(context, node, 0);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
   TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE(context, node->user_data != nullptr);
   NodeData* data = static_cast<NodeData*>(node->user_data);
-
-  // Only allocate LUTs for KTfLiteInt16 data type
-  if (input->type == kTfLiteInt16) {
-    #if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                    TfLiteTypeGetName(input->type), input->type);
-    return kTfLiteError;
-    #endif
-
-    void* raw_exp_lut = context->AllocatePersistentBuffer(
-        context, sizeof(int16_t) * kInt16LUTArraySize);
-    TF_LITE_ENSURE(context, raw_exp_lut != nullptr);
-    data->op_data.exp_lut = reinterpret_cast<int16_t*>(raw_exp_lut);
-    void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer(
-        context, sizeof(int16_t) * kInt16LUTArraySize);
-    TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr);
-    data->op_data.one_over_one_plus_x_lut =
-        reinterpret_cast<int16_t*>(one_over_one_plus_x_lut);
-  }
-
-  if (output->type == kTfLiteInt16) {
-    TF_LITE_ENSURE(context,
-                   input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
-  } else {
-    TF_LITE_ENSURE_EQ(context, input->type, output->type);
-  }
-
-  // Populate LUT if required
-  if (input->type == kTfLiteInt16) {
-    #if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                    TfLiteTypeGetName(input->type), input->type);
-    return kTfLiteError;
-    #endif
-
-    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
-    // exp LUT only used on negative values
-    // we consider exp(-10.0) is insignificant to accumulation
-    gen_lut([](float value) { return std::exp(value); }, -10.0f, 0.0f,
-            data->op_data.exp_lut, kInt16LUTArraySize);
-    gen_lut([](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f,
-            data->op_data.one_over_one_plus_x_lut, kInt16LUTArraySize);
-    data->op_data.zero_point = output->params.zero_point;
-    data->op_data.scale = output->params.scale;
-  }
+  SoftmaxParams* op_data = static_cast<SoftmaxParams*>(&data->op_data);
 
   auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
   auto ret_val =
-      CalculateSoftmaxParams(context, input, output, params, &data->op_data);
+      CalculateSoftmaxParams(context, input, output, params, op_data);
 
+#if ESP_NN
   if (output->type == kTfLiteInt8 && input->type == kTfLiteInt8) {
     const int32_t input_width = input->dims->data[1];
     const int32_t input_height = input->dims->data[2];
@@ -486,26 +437,21 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         context, scratch_buf_size, &data->buffer_idx));
     }
   }
+#endif
 
-  //micro_context->DeallocateTempTfLiteTensor(input);
-  //micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return ret_val;
 }
 
 }  // namespace
 
 TfLiteRegistration Register_SOFTMAX() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
 }  // namespace tflite
+
 #else
 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
@@ -533,6 +479,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -540,25 +487,13 @@ namespace {
 void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
                       const SoftmaxParams& op_data) {
   if (input->type == kTfLiteInt8) {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
-      return;
-      #endif
-
     if (output->type == kTfLiteInt16) {
-      #if EI_TFLITE_DISABLE_SOFTMAX_OUT_I16
-      return;
-      #endif
-
       tflite::reference_ops::Softmax(
           op_data, tflite::micro::GetTensorShape(input),
           tflite::micro::GetTensorData<int8_t>(input),
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<int16_t>(output));
-    } else { // kTfLiteInt8
-      #if EI_TFLITE_DISABLE_SOFTMAX_OUT_I8
-      return;
-      #endif
-
+    } else {
       tflite::reference_ops::Softmax(
           op_data, tflite::micro::GetTensorShape(input),
           tflite::micro::GetTensorData<int8_t>(input),
@@ -566,10 +501,6 @@ void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output,
           tflite::micro::GetTensorData<int8_t>(output));
     }
   } else {
-    #if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
-    return;
-    #endif
-
     tflite::reference_ops::SoftmaxInt16(
         op_data, tflite::micro::GetTensorShape(input),
         tflite::micro::GetTensorData<int16_t>(input),
@@ -587,12 +518,11 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
 
   switch (input->type) {
     case kTfLiteFloat32: {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_F32
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_F32
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       tflite::reference_ops::Softmax(
           op_data, tflite::micro::GetTensorShape(input),
           tflite::micro::GetTensorData<float>(input),
@@ -601,42 +531,33 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
       return kTfLiteOk;
     }
     case kTfLiteInt8: {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       SoftmaxQuantized(input, output, op_data);
       return kTfLiteOk;
     }
     case kTfLiteInt16: {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                      TfLiteTypeGetName(input->type), input->type);
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       SoftmaxQuantized(input, output, op_data);
       return kTfLiteOk;
     }
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
   }
 }
 }  // namespace
 
 TfLiteRegistration Register_SOFTMAX() {
-  return {/*init=*/SoftmaxInit,
-          /*free=*/nullptr,
-          /*prepare=*/SoftmaxPrepare,
-          /*invoke=*/SoftmaxEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(SoftmaxInit, SoftmaxPrepare, SoftmaxEval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h
index 4235510..fb15d38 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -24,8 +24,47 @@ namespace tflite {
 
 void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length);
 
+// Common helper function to SoftmaxPrepare.
+TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
+                                    const TfLiteTensor* input,
+                                    TfLiteTensor* output,
+                                    const TfLiteSoftmaxParams* params,
+                                    SoftmaxParams* op_data);
+
 TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node);
 
+// This is the most generic TfLiteRegistration. The actual supported types may
+// still be target dependent. The only requirement is that every implementation
+// (reference or optimized) must define this function.
+TfLiteRegistration Register_SOFTMAX();
+
+#if defined(XTENSA) || defined(CMSIS_NN)
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int8 input and int16 output.
+TfLiteRegistration Register_SOFTMAX_INT8_INT16();
+#else
+inline TfLiteRegistration Register_SOFTMAX_INT8_INT16() {
+  return Register_SOFTMAX();
+}
+#endif
+
+#if defined(CMSIS_NN)
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int8 input/output and uses the latency optimized implementations.
+TfLiteRegistration Register_SOFTMAX_INT8();
+
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int16 input/output and uses the latency optimized implementations.
+TfLiteRegistration Register_SOFTMAX_INT16();
+
+#else
+inline TfLiteRegistration Register_SOFTMAX_INT8() { return Register_SOFTMAX(); }
+
+inline TfLiteRegistration Register_SOFTMAX_INT16() {
+  return Register_SOFTMAX();
+}
+#endif
+
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax_common.cc
index 7ca41b8..82ec071 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax_common.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax_common.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -20,36 +20,91 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h"
 
 namespace tflite {
 
 namespace {
 // Softmax parameter data that persists in user_data
-const int kInt16LUTArraySize = 513;
+const int kInt16LUTArraySize = LUTSize<int16_t>();
+
+TfLiteStatus InitializeLutForInt16(TfLiteContext* context,
+                                   const TfLiteTensor* input,
+                                   TfLiteTensor* output,
+                                   SoftmaxParams* op_data) {
+  // Only allocate LUTs for KTfLiteInt16 data type
+  if (input->type == kTfLiteInt16) {
+    void* raw_exp_lut = context->AllocatePersistentBuffer(
+        context, sizeof(int16_t) * kInt16LUTArraySize);
+    TF_LITE_ENSURE(context, raw_exp_lut != nullptr);
+    op_data->exp_lut = reinterpret_cast<int16_t*>(raw_exp_lut);
+    void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer(
+        context, sizeof(int16_t) * kInt16LUTArraySize);
+    TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr);
+    op_data->one_over_one_plus_x_lut =
+        reinterpret_cast<int16_t*>(one_over_one_plus_x_lut);
+  }
+
+  if (output->type == kTfLiteInt16) {
+    TF_LITE_ENSURE(context,
+                   input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
+  } else {
+    TF_LITE_ENSURE_EQ(context, input->type, output->type);
+  }
+
+  // Populate LUT if required
+  if (input->type == kTfLiteInt16) {
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+    // exp LUT only used on negative values
+    // we consider exp(-10.0) is insignificant to accumulation
+    const int32_t range = std::numeric_limits<int16_t>::max() -
+                          std::numeric_limits<int16_t>::min();
+    LUTPopulate<int16_t>(
+        10.0f / range, std::numeric_limits<int16_t>::max(), 2.0f / range, 0,
+        [](float value) { return std::exp(value); }, op_data->exp_lut);
+
+    LUTPopulate<int16_t>(
+        1.0f / range, std::numeric_limits<int16_t>::min(), 2.0f / range, 0,
+        [](float value) { return 1.0f / (1.0f + value); },
+        op_data->one_over_one_plus_x_lut);
+
+    op_data->zero_point = output->params.zero_point;
+    op_data->scale = output->params.scale;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace
 
 TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
                                     const TfLiteTensor* input,
                                     TfLiteTensor* output,
                                     const TfLiteSoftmaxParams* params,
                                     SoftmaxParams* op_data) {
+#ifndef EI_TFLITE_DISABLE_SOFTMAX_IN_I16
+  if (InitializeLutForInt16(context, input, output, op_data) != kTfLiteOk) {
+    return kTfLiteError;
+  }
+#endif
+
   if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
     if (input->type == kTfLiteInt16) {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+                      TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
-
+#endif
+      TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
       TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
       TF_LITE_ENSURE_NEAR(context, output->params.scale, 1.f / 32768,
                           (0.001f * 1.f / 32768));
     } else {  // input->type == kTfLiteInt8
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+                      TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
       if (output->type == kTfLiteInt16) {
         TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
@@ -66,12 +121,11 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
 
     // Calculate input_multiplier and input_left_shift
     if (input->type == kTfLiteInt16) {
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       int input_left_shift;
       double input_scale_beta_rescale =
           static_cast<double>(input->params.scale) *
@@ -81,13 +135,12 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
       QuantizeMultiplier(input_scale_beta_rescale, &op_data->input_multiplier,
                          &input_left_shift);
       op_data->input_left_shift = input_left_shift;
-    } else { // kTfLiteInt8
-      #if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
+    } else {
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_I8
       TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                       TfLiteTypeGetName(input->type), input->type);
       return kTfLiteError;
-      #endif
-
+#endif
       int input_left_shift;
       tflite::PreprocessSoftmaxScaling(
           static_cast<double>(params->beta),
@@ -99,12 +152,11 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
                                               op_data->input_left_shift);
     }
   } else {
-    #if EI_TFLITE_DISABLE_SOFTMAX_IN_F32
+#if EI_TFLITE_DISABLE_SOFTMAX_IN_F32
     TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
                     TfLiteTypeGetName(input->type), input->type);
     return kTfLiteError;
-    #endif
-
+#endif
     TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
     TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
     op_data->beta = static_cast<double>(params->beta);
@@ -112,71 +164,32 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
   return kTfLiteOk;
 }
 
-}  // namespace
-
 void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams));
 }
 
 TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-  const TfLiteTensor* input = GetInput(context, node, 0);
+  TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
   TF_LITE_ENSURE(context, input != nullptr);
   TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
-  TfLiteTensor* output = GetOutput(context, node, 0);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
   TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE(context, node->user_data != nullptr);
   SoftmaxParams* op_data = static_cast<SoftmaxParams*>(node->user_data);
-  // Only allocate LUTs for KTfLiteInt16 data type
-  if (input->type == kTfLiteInt16) {
-    #if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                    TfLiteTypeGetName(input->type), input->type);
-    return kTfLiteError;
-    #endif
-
-    void* raw_exp_lut = context->AllocatePersistentBuffer(
-        context, sizeof(int16_t) * kInt16LUTArraySize);
-    TF_LITE_ENSURE(context, raw_exp_lut != nullptr);
-    op_data->exp_lut = reinterpret_cast<int16_t*>(raw_exp_lut);
-    void* one_over_one_plus_x_lut = context->AllocatePersistentBuffer(
-        context, sizeof(int16_t) * kInt16LUTArraySize);
-    TF_LITE_ENSURE(context, one_over_one_plus_x_lut != nullptr);
-    op_data->one_over_one_plus_x_lut =
-        reinterpret_cast<int16_t*>(one_over_one_plus_x_lut);
-  }
-
-  if (output->type == kTfLiteInt16) {
-    TF_LITE_ENSURE(context,
-                   input->type == kTfLiteInt8 || input->type == kTfLiteInt16);
-  } else {
-    TF_LITE_ENSURE_EQ(context, input->type, output->type);
-  }
-
-  // Populate LUT if required
-  if (input->type == kTfLiteInt16) {
-    #if EI_TFLITE_DISABLE_SOFTMAX_IN_I16
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                    TfLiteTypeGetName(input->type), input->type);
-    return kTfLiteError;
-    #endif
-
-    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
-    // exp LUT only used on negative values
-    // we consider exp(-10.0) is insignificant to accumulation
-    gen_lut([](float value) { return std::exp(value); }, -10.0f, 0.0f,
-            op_data->exp_lut, kInt16LUTArraySize);
-    gen_lut([](float value) { return 1.0f / (1.0f + value); }, 0.0f, 1.0f,
-            op_data->one_over_one_plus_x_lut, kInt16LUTArraySize);
-    op_data->zero_point = output->params.zero_point;
-    op_data->scale = output->params.scale;
-  }
 
   auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);
-  return CalculateSoftmaxParams(context, input, output, params, op_data);
+  auto ret_val =
+      CalculateSoftmaxParams(context, input, output, params, op_data);
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  return ret_val;
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/space_to_batch_nd.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/space_to_batch_nd.cc
index 8b5659f..5a7f414 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/space_to_batch_nd.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/space_to_batch_nd.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
@@ -44,11 +45,15 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
 }
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, input != nullptr && output != nullptr);
 
   TF_LITE_ENSURE(context, NumDimensions(input) >= kInputOutputMinDimensionNum);
@@ -57,6 +62,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE(context, NumDimensions(output) <= kInputOutputMaxDimensionNum);
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
 
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -98,8 +105,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           tflite::micro::GetTensorData<int8_t>(output));
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -108,14 +115,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace.
 
 TfLiteRegistration Register_SPACE_TO_BATCH_ND() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/space_to_depth.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/space_to_depth.cc
new file mode 100644
index 0000000..2ab0faa
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/space_to_depth.cc
@@ -0,0 +1,127 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/space_to_depth.h"
+
+#include <stdint.h>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kOutputTensor = 0;
+constexpr int kBatchRank = 0;
+constexpr int kHeightRank = 1;
+constexpr int kWidthRank = 2;
+constexpr int kDepthRank = 3;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  auto* params =
+      reinterpret_cast<TfLiteSpaceToDepthParams*>(node->builtin_data);
+
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
+
+  auto data_type = output->type;
+  TF_LITE_ENSURE(context,
+                 data_type == kTfLiteFloat32 || data_type == kTfLiteInt8);
+  TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+
+  const int block_size = params->block_size;
+  const int input_height = input->dims->data[kHeightRank];
+  const int input_width = input->dims->data[kWidthRank];
+  int output_height = input_height / block_size;
+  int output_width = input_width / block_size;
+
+  TF_LITE_ENSURE_EQ(context, input_height, output_height * block_size);
+  TF_LITE_ENSURE_EQ(context, input_width, output_width * block_size);
+
+  // Relocate dims to the persistent storage arena before changing them,
+  // otherwise we'd be modifying temporary copies made by the interpreters each
+  // time they process the layer.
+  TfLiteEvalTensor* output_eval =
+      micro::GetEvalOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE_OK(context, micro::CreateWritableTensorDimsWithCopy(
+                                 context, output, output_eval));
+
+  output->dims->data[kBatchRank] = input->dims->data[kBatchRank];
+  output->dims->data[kHeightRank] = output_height;
+  output->dims->data[kWidthRank] = output_width;
+  output->dims->data[kDepthRank] =
+      input->dims->data[kDepthRank] * block_size * block_size;
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params =
+      reinterpret_cast<TfLiteSpaceToDepthParams*>(node->builtin_data);
+
+  const TfLiteEvalTensor* input =
+      micro::GetEvalInput(context, node, kInputTensor);
+  TfLiteEvalTensor* output = micro::GetEvalOutput(context, node, kOutputTensor);
+
+  SpaceToDepthParams op_params;
+  op_params.block_size = params->block_size;
+
+  switch (input->type) {  // Already know in/out types are same.
+    case kTfLiteFloat32:
+      reference_ops::SpaceToDepth(op_params, micro::GetTensorShape(input),
+                                  micro::GetTensorData<float>(input),
+                                  micro::GetTensorShape(output),
+                                  micro::GetTensorData<float>(output));
+      break;
+    case kTfLiteInt8:
+      reference_ops::SpaceToDepth(op_params, micro::GetTensorShape(input),
+                                  micro::GetTensorData<int8_t>(input),
+                                  micro::GetTensorShape(output),
+                                  micro::GetTensorData<int8_t>(output));
+      break;
+    default:
+      MicroPrintf("SPACE_TO_DEPTH only supports FLOAT32 and INT8, got %s.",
+                  TfLiteTypeGetName(input->type));
+      return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteRegistration Register_SPACE_TO_DEPTH() {
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/split.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/split.cc
index 7071f5c..d4d5280 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/split.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/split.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace ops {
@@ -68,7 +69,8 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
 }
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* axis = GetInput(context, node, 0);
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 0);
   TF_LITE_ENSURE(context, axis != nullptr);
 
   // Dynamic output tensors are needed if axis tensor is not constant.
@@ -76,6 +78,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // constant axis tensor for now.
   TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
                      "Non constant axis tensor not supported");
+
+  micro_context->DeallocateTempTfLiteTensor(axis);
   return kTfLiteOk;
 }
 
@@ -95,9 +99,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     case kTfLiteFloat32: {
       return SplitImpl<float>(context, node, input, axis_value);
     }
-    case kTfLiteUInt8: {
-      return SplitImpl<uint8_t>(context, node, input, axis_value);
-    }
     case kTfLiteInt8: {
       return SplitImpl<int8_t>(context, node, input, axis_value);
     }
@@ -108,11 +109,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       return SplitImpl<int32_t>(context, node, input, axis_value);
     }
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s currently not supported.",
-                         TfLiteTypeGetName(input->type));
+      MicroPrintf("Type %s currently not supported.",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
   }
-#undef TF_LITE_SPLIT
 
   return kTfLiteOk;
 }
@@ -120,14 +120,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace split
 
 TfLiteRegistration Register_SPLIT() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/split::Prepare,
-          /*invoke=*/split::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, split::Prepare, split::Eval);
 }
 
 }  // namespace micro
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/split_v.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/split_v.cc
index eb68496..caf6083 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/split_v.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/split_v.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -19,11 +19,11 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace split_v {
+
+namespace {
 
 template <typename T>
 TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
@@ -74,13 +74,14 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 3);
 
+  MicroContext* micro_context = GetMicroContext(context);
   // Dynamic output tensors are needed if axis tensor is not constant.
   // But Micro doesn't support dynamic memory allocation, so we only support
   // constant axis tensor for now.
-  const TfLiteTensor* axis = GetInput(context, node, 2);
+  TfLiteTensor* axis = micro_context->AllocateTempInputTensor(node, 2);
   TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
                      "Non constant axis tensor not supported");
-
+  micro_context->DeallocateTempTfLiteTensor(axis);
   return kTfLiteOk;
 }
 
@@ -110,26 +111,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       return SplitImpl<int32_t>(context, node, input, axis_value);
     }
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s currently not supported.",
-                         TfLiteTypeGetName(input->type));
+      MicroPrintf("Type %s currently not supported.",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
   }
   return kTfLiteOk;
 }
 
-}  // namespace split_v
+}  // namespace
 
 TfLiteRegistration Register_SPLIT_V() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/split_v::Prepare,
-          /*invoke=*/split_v::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/squared_difference.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/squared_difference.cc
index c4ef5be..e45cbbe 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/squared_difference.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/squared_difference.cc
@@ -1,8 +1,11 @@
 /* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
+
     http://www.apache.org/licenses/LICENSE-2.0
+
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -15,7 +18,8 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -49,9 +53,17 @@ TfLiteStatus SquaredDifferencePrepare(TfLiteContext* context,
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
-  const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
-  const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input1 =
+      micro_context->AllocateTempInputTensor(node, kInputTensor1);
+  TF_LITE_ENSURE(context, input1 != nullptr);
+  TfLiteTensor* input2 =
+      micro_context->AllocateTempInputTensor(node, kInputTensor2);
+  TF_LITE_ENSURE(context, input2 != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
   output->type = input2->type;
@@ -116,6 +128,9 @@ TfLiteStatus SquaredDifferencePrepare(TfLiteContext* context,
 
   data->requires_broadcast = !HaveSameShapes(input1, input2);
 
+  micro_context->DeallocateTempTfLiteTensor(input1);
+  micro_context->DeallocateTempTfLiteTensor(input2);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -225,14 +240,8 @@ TfLiteStatus SquaredDifferenceEval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_SQUARED_DIFFERENCE() {
-  return {/*init=*/SquaredDifferenceInit,
-          /*free=*/nullptr,
-          /*prepare=*/SquaredDifferencePrepare,
-          /*invoke=*/SquaredDifferenceEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(
+      SquaredDifferenceInit, SquaredDifferencePrepare, SquaredDifferenceEval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/squeeze.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/squeeze.cc
index 2cfb39d..8a42410 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/squeeze.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/squeeze.cc
@@ -22,17 +22,25 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
 
 struct SqueezeContext {
-  SqueezeContext(TfLiteContext* context, TfLiteNode* node)
-      : params(reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data)),
-        input(GetInput(context, node, 0)),
-        output(GetOutput(context, node, 0)) {}
+  SqueezeContext(TfLiteContext* context, TfLiteNode* node) {
+    params = reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data);
+    micro_context = GetMicroContext(context);
+    input = micro_context->AllocateTempInputTensor(node, 0);
+    output = micro_context->AllocateTempOutputTensor(node, 0);
+  }
+  ~SqueezeContext() {
+    micro_context->DeallocateTempTfLiteTensor(input);
+    micro_context->DeallocateTempTfLiteTensor(output);
+  }
+  MicroContext* micro_context;
   TfLiteSqueezeParams* params;
-  const TfLiteTensor* const input;
+  TfLiteTensor* input;
   TfLiteTensor* output;
 };
 
@@ -80,32 +88,31 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  SqueezeContext op_context(context, node);
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
 
-  if (op_context.input->type == kTfLiteString) {
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                       TfLiteTypeGetName(op_context.input->type),
-                       op_context.input->type);
+  if (input->type == kTfLiteString) {
+    MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                input->type);
     return kTfLiteError;
   }
 
-  TF_LITE_ENSURE_EQ(context, op_context.input->bytes, op_context.output->bytes);
-  memcpy(op_context.output->data.raw, op_context.input->data.raw,
-         op_context.input->bytes);
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+  size_t input_byte_size;
+  size_t output_byte_size;
+  TF_LITE_ENSURE_OK(context,
+                    TfLiteEvalTensorByteLength(input, &input_byte_size));
+  TF_LITE_ENSURE_OK(context,
+                    TfLiteEvalTensorByteLength(output, &output_byte_size));
+
+  TF_LITE_ENSURE_EQ(context, input_byte_size, output_byte_size);
+  memcpy(output->data.raw, input->data.raw, input_byte_size);
   return kTfLiteOk;
 }
 
 }  // namespace
 
 TfLiteRegistration Register_SQUEEZE() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/strided_slice.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/strided_slice.cc
index 57cbc59..b8c5d71 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/strided_slice.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/strided_slice.cc
@@ -1,4 +1,4 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -23,11 +23,11 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace strided_slice {
+
+namespace {
 
 constexpr int kInputTensor = 0;
 constexpr int kBeginTensor = 1;
@@ -38,18 +38,27 @@ constexpr int kOutputTensor = 0;
 struct StridedSliceContext {
   StridedSliceContext(TfLiteContext* context, TfLiteNode* node) {
     params = reinterpret_cast<TfLiteStridedSliceParams*>(node->builtin_data);
-    input = GetInput(context, node, kInputTensor);
-    begin = GetInput(context, node, kBeginTensor);
-    end = GetInput(context, node, kEndTensor);
-    strides = GetInput(context, node, kStridesTensor);
-    output = GetOutput(context, node, kOutputTensor);
+    micro_context = GetMicroContext(context);
+    input = micro_context->AllocateTempInputTensor(node, kInputTensor);
+    begin = micro_context->AllocateTempInputTensor(node, kBeginTensor);
+    end = micro_context->AllocateTempInputTensor(node, kEndTensor);
+    strides = micro_context->AllocateTempInputTensor(node, kStridesTensor);
+    output = micro_context->AllocateTempOutputTensor(node, kOutputTensor);
     dims = NumDimensions(input);
   }
+  ~StridedSliceContext() {
+    micro_context->DeallocateTempTfLiteTensor(input);
+    micro_context->DeallocateTempTfLiteTensor(begin);
+    micro_context->DeallocateTempTfLiteTensor(end);
+    micro_context->DeallocateTempTfLiteTensor(strides);
+    micro_context->DeallocateTempTfLiteTensor(output);
+  }
   const TfLiteStridedSliceParams* params;
-  const TfLiteTensor* input;
-  const TfLiteTensor* begin;
-  const TfLiteTensor* end;
-  const TfLiteTensor* strides;
+  MicroContext* micro_context;
+  TfLiteTensor* input;
+  TfLiteTensor* begin;
+  TfLiteTensor* end;
+  TfLiteTensor* strides;
   TfLiteTensor* output;
   int dims;
 };
@@ -225,25 +234,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                                   tflite::micro::GetTensorData<bool>(output));
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
 }
-}  // namespace strided_slice
+
+}  // namespace
 
 TfLiteRegistration Register_STRIDED_SLICE() {
-  return {/*init=*/strided_slice::Init,
-          /*free=*/nullptr,
-          /*prepare=*/strided_slice::Prepare,
-          /*invoke=*/strided_slice::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.cc
index 0ca8dd1..266d6b5 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -13,114 +13,31 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/sub.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.h"
 
 #include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/sub.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace micro {
-namespace sub {
-
-constexpr int kInputTensor1 = 0;
-constexpr int kInputTensor2 = 1;
-constexpr int kOutputTensor = 0;
-
-struct OpData {
-  bool requires_broadcast;
-
-  // These fields are used in both the general 8-bit -> 8bit quantized path,
-  // and the special 16-bit -> 16bit quantized path
-  int input1_shift;
-  int input2_shift;
-  int32_t output_activation_min;
-  int32_t output_activation_max;
-
-  // These fields are used only in the general 8-bit -> 8bit quantized path
-  int32_t input1_multiplier;
-  int32_t input2_multiplier;
-  int32_t output_multiplier;
-  int output_shift;
-  int left_shift;
-  int32_t input1_offset;
-  int32_t input2_offset;
-  int32_t output_offset;
-};
-
-TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params,
-                             const TfLiteTensor* input1,
-                             const TfLiteTensor* input2, TfLiteTensor* output,
-                             OpData* data) {
-  data->requires_broadcast = !HaveSameShapes(input1, input2);
-
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
-    // 8bit -> 8bit general quantized path, with general rescalings
-    data->input1_offset = -input1->params.zero_point;
-    data->input2_offset = -input2->params.zero_point;
-    data->output_offset = output->params.zero_point;
-    data->left_shift = 20;
-    const float twice_max_input_scale =
-        2 * std::max(input1->params.scale, input2->params.scale);
-    const double real_input1_multiplier =
-        static_cast<double>(input1->params.scale / twice_max_input_scale);
-    const double real_input2_multiplier =
-        static_cast<double>(input2->params.scale / twice_max_input_scale);
-    const double real_output_multiplier =
-        static_cast<double>(twice_max_input_scale /
-                            ((1 << data->left_shift) * output->params.scale));
 
-    QuantizeMultiplierSmallerThanOneExp(
-        real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
-
-    QuantizeMultiplierSmallerThanOneExp(
-        real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
-
-    QuantizeMultiplierSmallerThanOneExp(
-        real_output_multiplier, &data->output_multiplier, &data->output_shift);
-
-    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
-        context, params->activation, output, &data->output_activation_min,
-        &data->output_activation_max));
-  }
-
-  return kTfLiteOk;
-}
-
-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+void* SubInit(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  TFLITE_DCHECK(node->builtin_data != nullptr);
-
-  OpData* data = static_cast<OpData*>(node->user_data);
-  auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
-
-  const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
-  TF_LITE_ENSURE(context, input1 != nullptr);
-  const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
-  TF_LITE_ENSURE(context, input2 != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  TF_LITE_ENSURE_STATUS(
-      CalculateOpData(context, params, input1, input2, output, data));
-  return kTfLiteOk;
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataSub));
 }
 
 void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params,
-             const OpData* data, const TfLiteEvalTensor* input1,
+             const OpDataSub* data, const TfLiteEvalTensor* input1,
              const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
   float output_activation_min, output_activation_max;
   CalculateActivationRange(params->activation, &output_activation_min,
@@ -147,31 +64,31 @@ void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params,
 }
 
 TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
-                              TfLiteSubParams* params, const OpData* data,
+                              TfLiteSubParams* params, const OpDataSub* data,
                               const TfLiteEvalTensor* input1,
                               const TfLiteEvalTensor* input2,
                               TfLiteEvalTensor* output) {
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
-    tflite::ArithmeticParams op_params;
-    op_params.left_shift = data->left_shift;
-    op_params.input1_offset = data->input1_offset;
-    op_params.input1_multiplier = data->input1_multiplier;
-    op_params.input1_shift = data->input1_shift;
-    op_params.input2_offset = data->input2_offset;
-    op_params.input2_multiplier = data->input2_multiplier;
-    op_params.input2_shift = data->input2_shift;
-    op_params.output_offset = data->output_offset;
-    op_params.output_multiplier = data->output_multiplier;
-    op_params.output_shift = data->output_shift;
-    SetActivationParams(data->output_activation_min,
-                        data->output_activation_max, &op_params);
-    bool need_broadcast = reference_ops::ProcessBroadcastShapes(
-        tflite::micro::GetTensorShape(input1),
-        tflite::micro::GetTensorShape(input2), &op_params);
-
-    if (output->type == kTfLiteInt8) {
+  tflite::ArithmeticParams op_params;
+  op_params.left_shift = data->left_shift;
+  op_params.input1_offset = data->input1_offset;
+  op_params.input1_multiplier = data->input1_multiplier;
+  op_params.input1_shift = data->input1_shift;
+  op_params.input2_offset = data->input2_offset;
+  op_params.input2_multiplier = data->input2_multiplier;
+  op_params.input2_shift = data->input2_shift;
+  op_params.output_offset = data->output_offset;
+  op_params.output_multiplier = data->output_multiplier;
+  op_params.output_shift = data->output_shift;
+  SetActivationParams(data->output_activation_min, data->output_activation_max,
+                      &op_params);
+  bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+      tflite::micro::GetTensorShape(input1),
+      tflite::micro::GetTensorShape(input2), &op_params);
+
+  switch (output->type) {
+    case kTfLiteInt8: {
       if (need_broadcast) {
-        tflite::reference_ops::BroadcastSubSlow(
+        tflite::reference_ops::BroadcastQuantSubSlow(
             op_params, tflite::micro::GetTensorShape(input1),
             tflite::micro::GetTensorData<int8_t>(input1),
             tflite::micro::GetTensorShape(input2),
@@ -187,70 +104,65 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
             tflite::micro::GetTensorShape(output),
             tflite::micro::GetTensorData<int8_t>(output));
       }
-    } else {
+      break;
+    }
+    case kTfLiteInt16: {
       if (need_broadcast) {
-        tflite::reference_ops::BroadcastSubSlow(
+        tflite::reference_ops::BroadcastQuantSubSlow(
             op_params, tflite::micro::GetTensorShape(input1),
-            tflite::micro::GetTensorData<uint8_t>(input1),
+            tflite::micro::GetTensorData<int16_t>(input1),
             tflite::micro::GetTensorShape(input2),
-            tflite::micro::GetTensorData<uint8_t>(input2),
+            tflite::micro::GetTensorData<int16_t>(input2),
             tflite::micro::GetTensorShape(output),
-            tflite::micro::GetTensorData<uint8_t>(output));
+            tflite::micro::GetTensorData<int16_t>(output));
       } else {
         tflite::reference_ops::Sub(
             op_params, tflite::micro::GetTensorShape(input1),
-            tflite::micro::GetTensorData<uint8_t>(input1),
+            tflite::micro::GetTensorData<int16_t>(input1),
             tflite::micro::GetTensorShape(input2),
-            tflite::micro::GetTensorData<uint8_t>(input2),
+            tflite::micro::GetTensorData<int16_t>(input2),
             tflite::micro::GetTensorShape(output),
-            tflite::micro::GetTensorData<uint8_t>(output));
+            tflite::micro::GetTensorData<int16_t>(output));
       }
+      break;
     }
+    default:
+      MicroPrintf("Quantized type %s not currently supported.",
+                  TfLiteTypeGetName(output->type));
+      return kTfLiteError;
   }
-
   return kTfLiteOk;
 }
 
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus SubEval(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
 
   const TfLiteEvalTensor* input1 =
-      tflite::micro::GetEvalInput(context, node, kInputTensor1);
+      tflite::micro::GetEvalInput(context, node, kSubInputTensor1);
   const TfLiteEvalTensor* input2 =
-      tflite::micro::GetEvalInput(context, node, kInputTensor2);
+      tflite::micro::GetEvalInput(context, node, kSubInputTensor2);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kSubOutputTensor);
 
   TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData& data = *(static_cast<const OpData*>(node->user_data));
+  const OpDataSub& data = *(static_cast<const OpDataSub*>(node->user_data));
 
   if (output->type == kTfLiteFloat32) {
     EvalSub(context, node, params, &data, input1, input2, output);
-  } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
+  } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
     TF_LITE_ENSURE_OK(context, EvalSubQuantized(context, node, params, &data,
                                                 input1, input2, output));
   } else {
-    TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                       TfLiteTypeGetName(output->type), output->type);
+    MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(output->type),
+                output->type);
     return kTfLiteError;
   }
 
   return kTfLiteOk;
 }
 
-}  // namespace sub
-
 TfLiteRegistration Register_SUB() {
-  return {/*init=*/sub::Init,
-          /*free=*/nullptr,
-          /*prepare=*/sub::Prepare,
-          /*invoke=*/sub::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(SubInit, SubPrepare, SubEval);
 }
 
-}  // namespace micro
-}  // namespace ops
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.h
new file mode 100644
index 0000000..36608d5
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.h
@@ -0,0 +1,60 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_
+
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+
+namespace tflite {
+
+extern const int kSubInputTensor1;
+extern const int kSubInputTensor2;
+extern const int kSubOutputTensor;
+
+struct OpDataSub {
+  bool requires_broadcast;
+
+  // These fields are used in both the general 8-bit -> 8bit quantized path,
+  // and the special 16-bit -> 16bit quantized path
+  int input1_shift;
+  int input2_shift;
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+
+  // These fields are used only in the general 8-bit -> 8bit quantized path
+  int32_t input1_multiplier;
+  int32_t input2_multiplier;
+  int32_t output_multiplier;
+  int output_shift;
+  int left_shift;
+  int32_t input1_offset;
+  int32_t input2_offset;
+  int32_t output_offset;
+};
+
+TfLiteStatus CalculateOpDataSub(TfLiteContext* context, TfLiteSubParams* params,
+                                const TfLiteTensor* input1,
+                                const TfLiteTensor* input2,
+                                TfLiteTensor* output, OpDataSub* data);
+
+TfLiteStatus SubPrepare(TfLiteContext* context, TfLiteNode* node);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_SUB_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub_common.cc
new file mode 100644
index 0000000..fcb8d4b
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/sub_common.cc
@@ -0,0 +1,109 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/add.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/sub.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/sub.h"
+
+namespace tflite {
+
+const int kSubInputTensor1 = 0;
+const int kSubInputTensor2 = 1;
+const int kSubOutputTensor = 0;
+
+TfLiteStatus CalculateOpDataSub(TfLiteContext* context, TfLiteSubParams* params,
+                                const TfLiteTensor* input1,
+                                const TfLiteTensor* input2,
+                                TfLiteTensor* output, OpDataSub* data) {
+  data->requires_broadcast = !HaveSameShapes(input1, input2);
+
+  if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
+    // 8bit -> 8bit general quantized path, with general rescalings
+    data->input1_offset = -input1->params.zero_point;
+    data->input2_offset = -input2->params.zero_point;
+    data->output_offset = output->params.zero_point;
+
+    // The shift is set to 15 in case of 16-bit and 20 in case of 8-bit,
+    // accordingly. In case of 16-bit we have 65535 << 15 which is less than 1
+    // << 31, therefore the addition will still fit in a 32 bit accumulator.
+    data->left_shift = output->type == kTfLiteInt16 ? 15 : 20;
+    const float twice_max_input_scale =
+        2 * std::max(input1->params.scale, input2->params.scale);
+    const double real_input1_multiplier =
+        static_cast<double>(input1->params.scale) /
+        static_cast<double>(twice_max_input_scale);
+    const double real_input2_multiplier =
+        static_cast<double>(input2->params.scale) /
+        static_cast<double>(twice_max_input_scale);
+    const double real_output_multiplier =
+        static_cast<double>(twice_max_input_scale) /
+        ((1 << data->left_shift) * static_cast<double>(output->params.scale));
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_input2_multiplier, &data->input2_multiplier, &data->input2_shift);
+
+    QuantizeMultiplierSmallerThanOneExp(
+        real_output_multiplier, &data->output_multiplier, &data->output_shift);
+
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, params->activation, output, &data->output_activation_min,
+        &data->output_activation_max));
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus SubPrepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+
+  OpDataSub* data = static_cast<OpDataSub*>(node->user_data);
+  auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
+
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* input1 =
+      micro_context->AllocateTempInputTensor(node, kSubInputTensor1);
+  TF_LITE_ENSURE(context, input1 != nullptr);
+  TfLiteTensor* input2 =
+      micro_context->AllocateTempInputTensor(node, kSubInputTensor2);
+  TF_LITE_ENSURE(context, input2 != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kSubOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE_STATUS(
+      CalculateOpDataSub(context, params, input1, input2, output, data));
+
+  micro_context->DeallocateTempTfLiteTensor(input1);
+  micro_context->DeallocateTempTfLiteTensor(input2);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.cc
index 64c4ced..e9b50e5 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.cc
@@ -3,7 +3,7 @@
 #if 0 == 1
 /* noop */
 #elif EI_CLASSIFIER_TFLITE_ENABLE_CMSIS_NN == 1
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -18,8 +18,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include <cmath>
-#include <cstdint>
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.h"
 
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nn_types.h"
 #include "edge-impulse-sdk/CMSIS/NN/Include/arm_nnfunctions.h"
@@ -32,190 +31,26 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/activation_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
 namespace {
 
-struct OpData {
-  int32_t effective_scale_1_a;
-  int32_t effective_scale_2_a;
-  // b versions of each scale are kept at int since the numbers are just the
-  // shift value - typically between [-32, 32].
-  int effective_scale_1_b;
-  int effective_scale_2_b;
-  int scratch_tensor_index;
-  int scratch_output_tensor_index;
-
-  // Cached tensor zero point values for quantized operations.
-  int input_zero_point;
-  int output_zero_point;
-};
-
-// Input tensors.
-constexpr int kInputTensor = 0;
-constexpr int kWeightsFeatureTensor = 1;
-constexpr int kWeightsTimeTensor = 2;
-constexpr int kBiasTensor = 3;
-// This is a variable tensor, and will be modified by this op.
-constexpr int kInputActivationStateTensor = 4;
-
-// Output tensor.
-constexpr int kOutputTensor = 0;
-
-/**
- * This version of SVDF is specific to TFLite Micro. It contains the following
- * differences between the TFLite version:
- *
- * 1.) Scratch tensor allocation - scratch tensors must be known ahead of time
- * for the Micro interpreter.
- * 2.) Output dimensions - the TFLite version determines output size and runtime
- * and resizes the output tensor. Micro runtime does not support tensor
- * resizing.
- */
-static inline void ApplyTimeWeightsBiasAndActivation(
-    int batch_size, int memory_size, int num_filters, int num_units, int rank,
-    const float* const __restrict__ weights_time_ptr,
-    const float* const __restrict__ bias_ptr, TfLiteFusedActivation activation,
-    float* const __restrict__ state_ptr, float* const __restrict__ scratch_ptr,
-    float* const __restrict__ output_ptr) {
-  // Compute matmul(activation_state, weights_time).
-  for (int b = 0; b < batch_size; ++b) {
-    // Perform batched vector dot product:
-    float* scratch_ptr_batch = scratch_ptr + b * num_filters;
-    const float* vector1_ptr = weights_time_ptr;
-    const float* vector2_ptr = state_ptr + b * memory_size * num_filters;
-    for (int i = 0; i < num_filters; ++i) {
-      *scratch_ptr_batch = 0.f;
-      for (int j = 0; j < memory_size; ++j) {
-        *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
-      }
-      scratch_ptr_batch++;
-    }
-  }
-
-  // Initialize output with bias if provided.
-  if (bias_ptr) {
-    // VectorBatchVectorAssign
-    for (int i = 0; i < batch_size; ++i) {
-      float* output_data = output_ptr + i * num_units;
-      const float* bias_data = bias_ptr;
-      for (int j = 0; j < num_units; ++j) {
-        *output_data++ = *bias_data++;
-      }
-    }
-  } else {
-    float* output_data = output_ptr;
-    for (int i = 0; i < batch_size * num_units; ++i) {
-      *output_data++ = 0.0f;
-    }
-  }
-
-  // Reduction sum.
-  for (int b = 0; b < batch_size; ++b) {
-    float* output_ptr_batch = output_ptr + b * num_units;
-    float* scratch_ptr_batch = scratch_ptr + b * num_filters;
-
-    // Reduction sum vector
-    for (int i = 0; i < num_units; ++i) {
-      for (int j = 0; j < rank; j++) {
-        output_ptr_batch[i] += *scratch_ptr_batch++;
-      }
-    }
-  }
-
-  // Apply activation.
-  for (int b = 0; b < batch_size; ++b) {
-    float* output_ptr_batch = output_ptr + b * num_units;
-    for (int i = 0; i < num_units; ++i) {
-      *output_ptr_batch =
-          tflite::ops::micro::ActivationValFloat(activation, *output_ptr_batch);
-      ++output_ptr_batch;
-    }
-  }
-}
-
-inline void EvalFloatSVDF(
-    TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
-    const TfLiteEvalTensor* weights_feature,
-    const TfLiteEvalTensor* weights_time, const TfLiteEvalTensor* bias,
-    const TfLiteSVDFParams* params, int scratch_tensor_index,
-    TfLiteEvalTensor* activation_state, TfLiteEvalTensor* output) {
-  const int rank = params->rank;
-  const int batch_size = input->dims->data[0];
-  const int input_size = input->dims->data[1];
-  const int num_filters = weights_feature->dims->data[0];
-  const int num_units = num_filters / rank;
-  const int memory_size = weights_time->dims->data[1];
-
-  const float* weights_feature_ptr =
-      tflite::micro::GetTensorData<float>(weights_feature);
-  const float* weights_time_ptr =
-      tflite::micro::GetTensorData<float>(weights_time);
-  const float* bias_ptr = tflite::micro::GetTensorData<float>(bias);
-  const float* input_ptr = tflite::micro::GetTensorData<float>(input);
-
-  float* state_ptr = tflite::micro::GetTensorData<float>(activation_state);
-
-  TFLITE_DCHECK(context != nullptr);
-  TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
-
-  float* scratch_ptr = static_cast<float*>(
-      context->GetScratchBuffer(context, scratch_tensor_index));
-
-  float* output_ptr = tflite::micro::GetTensorData<float>(output);
-
-  // Left shift the activation_state.
-  {
-    float* new_state_start = state_ptr;
-    const float* old_state_start = state_ptr + 1;
-    const float* old_state_end =
-        state_ptr + batch_size * num_filters * memory_size;
-    while (old_state_start != old_state_end) {
-      *new_state_start++ = *old_state_start++;
-    }
-  }
-
-  // Note: no need to clear the latest activation, matmul is not accumulative.
-
-  // Compute conv1d(inputs, weights_feature).
-  // The activation_state's rightmost column is used to save current cycle
-  // activation. This is achieved by starting at state_ptr[memory_size - 1] and
-  // having the stride equal to memory_size.
-
-  // Perform batched matrix vector multiply operation:
-  {
-    const float* matrix = weights_feature_ptr;
-    const float* vector = input_ptr;
-    float* result = &state_ptr[memory_size - 1];
-    float* result_in_batch = result;
-    for (int i = 0; i < batch_size; ++i) {
-      const float* matrix_ptr = matrix;
-      for (int j = 0; j < num_filters; ++j) {
-        float dot_prod = 0.0f;
-        const float* vector_in_batch = vector + i * input_size;
-        for (int k = 0; k < input_size; ++k) {
-          dot_prod += *matrix_ptr++ * *vector_in_batch++;
-        }
-        *result_in_batch = dot_prod;
-        result_in_batch += memory_size;
-      }
-    }
-  }
-
-  ApplyTimeWeightsBiasAndActivation(
-      batch_size, memory_size, num_filters, num_units, rank, weights_time_ptr,
-      bias_ptr, params->activation, state_ptr, scratch_ptr, output_ptr);
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataSvdf));
 }
 
-void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
-                     const TfLiteEvalTensor* input_tensor,
-                     const TfLiteEvalTensor* weights_feature_tensor,
-                     const TfLiteEvalTensor* weights_time_tensor,
-                     const TfLiteEvalTensor* bias_tensor,
-                     const TfLiteSVDFParams* params,
-                     TfLiteEvalTensor* activation_state_tensor,
-                     TfLiteEvalTensor* output_tensor, const OpData& data) {
+TfLiteStatus EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
+                             const TfLiteEvalTensor* input_tensor,
+                             const TfLiteEvalTensor* weights_feature_tensor,
+                             const TfLiteEvalTensor* weights_time_tensor,
+                             const TfLiteEvalTensor* bias_tensor,
+                             const TfLiteSVDFParams* params,
+                             TfLiteEvalTensor* activation_state_tensor,
+                             TfLiteEvalTensor* output_tensor,
+                             const OpDataSvdf& data) {
   cmsis_nn_dims input_dims;
   input_dims.n = input_tensor->dims->data[0];
   input_dims.h = input_tensor->dims->data[1];
@@ -270,216 +105,125 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node,
       context->GetScratchBuffer(context, data.scratch_output_tensor_index));
 
   int8_t* output_data = tflite::micro::GetTensorData<int8_t>(output_tensor);
-  arm_svdf_s8(
-      &scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params,
-      &out_quant_params, &input_dims,
-      (int8_t*)tflite::micro::GetTensorData<int8_t>(input_tensor), &state_dims,
-      (int16_t*)tflite::micro::GetTensorData<int16_t>(activation_state_tensor),
-      &weights_feature_dims,
-      (int8_t*)tflite::micro::GetTensorData<int8_t>(weights_feature_tensor),
-      &weights_time_dims,
-      (int16_t*)tflite::micro::GetTensorData<int16_t>(weights_time_tensor),
-      &bias_dims, (int32_t*)tflite::micro::GetTensorData<int32_t>(bias_tensor),
-      &output_dims, output_data);
-}
-
-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
-  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->builtin_data != nullptr);
-
-  const auto* params = static_cast<const TfLiteSVDFParams*>(node->builtin_data);
-
-  // Validate Tensor Inputs (dtype depends on quantization):
-  // [0] = Input, {2, batch_size, input_size}
-  // [1] = Weights Feature, {2, num_filters, input_size}
-  // [2] = Weights Time, {2, num_filters, memory_size}
-  // [3] = Bias (optional), {1, num_units}
-  // [4] = Activation State (variable),
-  //         {2, batch_size, memory_size * num_filters}
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* weights_feature =
-      GetInput(context, node, kWeightsFeatureTensor);
-  TF_LITE_ENSURE(context, weights_feature != nullptr);
-  const TfLiteTensor* weights_time =
-      GetInput(context, node, kWeightsTimeTensor);
-  TF_LITE_ENSURE(context, weights_time != nullptr);
-  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
-  const TfLiteTensor* activation_state =
-      GetInput(context, node, kInputActivationStateTensor);
-  TF_LITE_ENSURE(context, activation_state != nullptr);
-
-  // Define input constants based on input tensor definition above:
-  const int rank = params->rank;
-  const int input_size = input->dims->data[1];
-  const int batch_size = input->dims->data[0];
-  const int num_filters = weights_feature->dims->data[0];
-  TF_LITE_ENSURE_EQ(context, num_filters % rank, 0);
-  const int num_units = num_filters / rank;
-  const int memory_size = weights_time->dims->data[1];
-
-  // Validate Input Tensor:
-  TF_LITE_ENSURE(context,
-                 input->type == kTfLiteFloat32 || input->type == kTfLiteInt8);
-  TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2);
-
-  // Validate Tensor Output:
-  // [0] = float/int8, {2, batch_size, num_units}
-  TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
-  TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
-  TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units);
-
-  // Validate Weights Feature Input Tensor:
-  TF_LITE_ENSURE_EQ(context, NumDimensions(weights_feature), 2);
-  TF_LITE_ENSURE_EQ(context, weights_feature->dims->data[1], input_size);
-
-  // Validate Weights Time Input Tensor:
-  TF_LITE_ENSURE_EQ(context, NumDimensions(weights_time), 2);
-  TF_LITE_ENSURE_EQ(context, weights_time->dims->data[0], num_filters);
-  TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size);
-
-  // Validate Optional Bias Input Tensor:
-  if (bias != nullptr) {
-    TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units);
-  }
-
-  // Validate Activation State Input Tensor:
-  TF_LITE_ENSURE_EQ(context, NumDimensions(activation_state), 2);
-  TF_LITE_ENSURE_EQ(context, activation_state->dims->data[0], batch_size);
-  TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1],
-                    memory_size * num_filters);
-  // Since is_variable is not part of TFLiteEvalTensor, check is_variable here.
-  TF_LITE_ENSURE_EQ(context, activation_state->is_variable, true);
-
-  TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
 
-  TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
-
-  if (input->type == kTfLiteInt8) {
-    TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
-    TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
-    TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16);
-    if (bias != nullptr) {
-      TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
+  switch (weights_time_tensor->type) {
+    case kTfLiteInt8: {
+      arm_svdf_s8(
+          &scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params,
+          &out_quant_params, &input_dims,
+          tflite::micro::GetTensorData<int8_t>(input_tensor), &state_dims,
+          tflite::micro::GetTensorData<int8_t>(activation_state_tensor),
+          &weights_feature_dims,
+          tflite::micro::GetTensorData<int8_t>(weights_feature_tensor),
+          &weights_time_dims,
+          tflite::micro::GetTensorData<int8_t>(weights_time_tensor), &bias_dims,
+          tflite::micro::GetTensorData<int32_t>(bias_tensor), &output_dims,
+          output_data);
+      return kTfLiteOk;
     }
 
-    TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
-
-    const double effective_scale_1 = static_cast<double>(
-        input->params.scale * weights_feature->params.scale /
-        activation_state->params.scale);
-    const double effective_scale_2 =
-        static_cast<double>(activation_state->params.scale *
-                            weights_time->params.scale / output->params.scale);
-
-    // TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready.
-    TF_LITE_ENSURE(
-        context,
-        std::abs(static_cast<double>(bias->params.scale) -
-                 static_cast<double>(activation_state->params.scale *
-                                     weights_time->params.scale)) < 1e-5);
-
-    QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
-                       &(data->effective_scale_1_b));
-    QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a),
-                       &(data->effective_scale_2_b));
-
-    data->input_zero_point = input->params.zero_point;
-    data->output_zero_point = output->params.zero_point;
-
-    TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
-
-    const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
-        context, batch_size * num_filters * sizeof(int32_t),
-        &(data->scratch_tensor_index));
-    TF_LITE_ENSURE_OK(context, scratch_status);
-
-    const TfLiteStatus scratch_output_status =
-        context->RequestScratchBufferInArena(
-            context, batch_size * num_units * sizeof(int32_t),
-            &(data->scratch_output_tensor_index));
-    TF_LITE_ENSURE_OK(context, scratch_output_status);
-  } else {
-    TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32);
-    TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32);
-    TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32);
-    if (bias != nullptr) {
-      TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
+    case kTfLiteInt16: {
+      arm_svdf_state_s16_s8(
+          &scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params,
+          &out_quant_params, &input_dims,
+          tflite::micro::GetTensorData<int8_t>(input_tensor), &state_dims,
+          tflite::micro::GetTensorData<int16_t>(activation_state_tensor),
+          &weights_feature_dims,
+          tflite::micro::GetTensorData<int8_t>(weights_feature_tensor),
+          &weights_time_dims,
+          tflite::micro::GetTensorData<int16_t>(weights_time_tensor),
+          &bias_dims, tflite::micro::GetTensorData<int32_t>(bias_tensor),
+          &output_dims, output_data);
+      return kTfLiteOk;
     }
-    TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
 
-    TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
-    const TfLiteStatus scratch_status = context->RequestScratchBufferInArena(
-        context, batch_size * num_filters * sizeof(float),
-        &(data->scratch_tensor_index));
-    TF_LITE_ENSURE_OK(context, scratch_status);
+    default:
+      MicroPrintf("Could not find matching function for type %s.",
+                  TfLiteTypeGetName(weights_time_tensor->type));
+      return kTfLiteError;
   }
-
-  return kTfLiteOk;
 }
 
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus EvalSvdf(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
   TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData& data = *(static_cast<const OpData*>(node->user_data));
+  const OpDataSvdf& data = *(static_cast<const OpDataSvdf*>(node->user_data));
 
   const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      tflite::micro::GetEvalInput(context, node, kSvdfInputTensor);
   const TfLiteEvalTensor* weights_feature =
-      tflite::micro::GetEvalInput(context, node, kWeightsFeatureTensor);
+      tflite::micro::GetEvalInput(context, node, kSvdfWeightsFeatureTensor);
   const TfLiteEvalTensor* weights_time =
-      tflite::micro::GetEvalInput(context, node, kWeightsTimeTensor);
+      tflite::micro::GetEvalInput(context, node, kSvdfWeightsTimeTensor);
   const TfLiteEvalTensor* bias =
       (NumInputs(node) == 5)
-          ? tflite::micro::GetEvalInput(context, node, kBiasTensor)
+          ? tflite::micro::GetEvalInput(context, node, kSvdfBiasTensor)
           : nullptr;
   TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput(
-      context, node, kInputActivationStateTensor);
+      context, node, kSvdfInputActivationStateTensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kSvdfOutputTensor);
 
-  switch (weights_feature->type) {
+  switch (weights_time->type) {
     case kTfLiteFloat32: {
-      EvalFloatSVDF(context, node, input, weights_feature, weights_time, bias,
-                    params, data.scratch_tensor_index, activation_state,
-                    output);
+      EvalFloatSvdfReference(
+          context, node, input, weights_feature, weights_time, bias, params,
+          data.scratch_tensor_index, activation_state, output);
       return kTfLiteOk;
-      break;
     }
 
-    case kTfLiteInt8: {
-      EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias,
-                      params, activation_state, output, data);
-      return kTfLiteOk;
-      break;
+    case kTfLiteInt8:
+    case kTfLiteInt16: {
+      return EvalIntegerSVDF(context, node, input, weights_feature,
+                             weights_time, bias, params, activation_state,
+                             output, data);
     }
 
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
-                         TfLiteTypeGetName(weights_feature->type));
+      MicroPrintf("Type %s not currently supported.",
+                  TfLiteTypeGetName(weights_feature->type));
       return kTfLiteError;
   }
   return kTfLiteOk;
 }
 
+TfLiteStatus EvalSvdfInt8(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpDataSvdf& data = *(static_cast<const OpDataSvdf*>(node->user_data));
+
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kSvdfInputTensor);
+  const TfLiteEvalTensor* weights_feature =
+      tflite::micro::GetEvalInput(context, node, kSvdfWeightsFeatureTensor);
+  const TfLiteEvalTensor* weights_time =
+      tflite::micro::GetEvalInput(context, node, kSvdfWeightsTimeTensor);
+  const TfLiteEvalTensor* bias =
+      (NumInputs(node) == 5)
+          ? tflite::micro::GetEvalInput(context, node, kSvdfBiasTensor)
+          : nullptr;
+  TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput(
+      context, node, kSvdfInputActivationStateTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kSvdfOutputTensor);
+
+  TFLITE_DCHECK((weights_time->type == kTfLiteInt8) ||
+                (weights_time->type == kTfLiteInt16));
+  // Because of the TODO mentioned below, the int16 weight data type is not
+  // split into a seperate registration.
+  // TODO(#523): remove 16-bit code when no longer needed.
+  return EvalIntegerSVDF(context, node, input, weights_feature, weights_time,
+                         bias, params, activation_state, output, data);
+}
+
 }  // namespace
 
 TfLiteRegistration Register_SVDF() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, PrepareSvdf, EvalSvdf);
+}
+
+TfLiteRegistration Register_SVDF_INT8() {
+  return tflite::micro::RegisterOp(Init, PrepareSvdf, EvalSvdfInt8);
 }
 
 }  // namespace tflite
@@ -513,6 +257,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/activation_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
@@ -520,13 +265,13 @@ namespace {
 
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataSvdf));
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLiteSVDFParams*>(node->builtin_data);
   TFLITE_DCHECK(node->user_data != nullptr);
-  const OpData& data = *(static_cast<const OpData*>(node->user_data));
+  const OpDataSvdf& data = *(static_cast<const OpDataSvdf*>(node->user_data));
 
   const TfLiteEvalTensor* input =
       tflite::micro::GetEvalInput(context, node, kSvdfInputTensor);
@@ -553,16 +298,31 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     }
 
     case kTfLiteInt8: {
-      EvalIntegerSvdfReference(context, node, input, weights_feature,
-                               weights_time, bias, params, activation_state,
-                               output, data);
-      return kTfLiteOk;
-      break;
+      switch (weights_time->type) {
+        case kTfLiteInt16: {
+          EvalInt16SvdfReference(context, node, input, weights_feature,
+                                 weights_time, bias, params, activation_state,
+                                 output, data);
+          return kTfLiteOk;
+          break;
+        }
+        case kTfLiteInt8: {
+          EvalInt8SvdfReference(context, node, input, weights_feature,
+                                weights_time, bias, params, activation_state,
+                                output, data);
+          return kTfLiteOk;
+          break;
+        }
+        default:
+          MicroPrintf("Type %s not currently supported.",
+                      TfLiteTypeGetName(weights_time->type));
+          return kTfLiteError;
+      }
     }
 
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
-                         TfLiteTypeGetName(weights_feature->type));
+      MicroPrintf("Type %s not currently supported.",
+                  TfLiteTypeGetName(weights_feature->type));
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -571,14 +331,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_SVDF() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/PrepareSvdf,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, PrepareSvdf, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.h b/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.h
index 8a7eb0f..8bc068e 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@ limitations under the License.
 
 namespace tflite {
 
-struct OpData {
+struct OpDataSvdf {
   int32_t effective_scale_1_a;
   int32_t effective_scale_2_a;
   // b versions of each scale are kept at int since the numbers are just the
@@ -33,6 +33,7 @@ struct OpData {
   // Cached tensor zero point values for quantized operations.
   int input_zero_point;
   int output_zero_point;
+  int activation_state_zero_point;
 };
 
 // Input tensors.
@@ -46,16 +47,26 @@ extern const int kSvdfInputActivationStateTensor;
 // Output tensor.
 extern const int kSvdfOutputTensor;
 
-// TensorflowLite Micro-specific reference implementation for Integer SVDF.
-void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node,
-                              const TfLiteEvalTensor* input_tensor,
-                              const TfLiteEvalTensor* weights_feature_tensor,
-                              const TfLiteEvalTensor* weights_time_tensor,
-                              const TfLiteEvalTensor* bias_tensor,
-                              const TfLiteSVDFParams* params,
-                              TfLiteEvalTensor* activation_state_tensor,
-                              TfLiteEvalTensor* output_tensor,
-                              const OpData& data);
+void EvalInt8SvdfReference(TfLiteContext* context, TfLiteNode* node,
+                           const TfLiteEvalTensor* input_tensor,
+                           const TfLiteEvalTensor* weights_feature_tensor,
+                           const TfLiteEvalTensor* weights_time_tensor,
+                           const TfLiteEvalTensor* bias_tensor,
+                           const TfLiteSVDFParams* params,
+                           TfLiteEvalTensor* activation_state_tensor,
+                           TfLiteEvalTensor* output_tensor,
+                           const OpDataSvdf& data);
+
+// TODO(#523): remove 16-bit code when no longer needed.
+void EvalInt16SvdfReference(TfLiteContext* context, TfLiteNode* node,
+                            const TfLiteEvalTensor* input_tensor,
+                            const TfLiteEvalTensor* weights_feature_tensor,
+                            const TfLiteEvalTensor* weights_time_tensor,
+                            const TfLiteEvalTensor* bias_tensor,
+                            const TfLiteSVDFParams* params,
+                            TfLiteEvalTensor* activation_state_tensor,
+                            TfLiteEvalTensor* output_tensor,
+                            const OpDataSvdf& data);
 
 void EvalFloatSvdfReference(
     TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
@@ -66,6 +77,23 @@ void EvalFloatSvdfReference(
 
 TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node);
 
+// This is the most generic TfLiteRegistration. The actual supported types may
+// still be target dependent. The only requirement is that every implementation
+// (reference or optimized) must define this function.
+TfLiteRegistration Register_SVDF();
+
+#if defined(HEXAGON) || defined(CMSIS_NN)
+TfLiteRegistration Register_SVDF_INT8();
+
+#else
+// Note that while this block gets used for both reference and optimized kernels
+// that do not have any specialized implementations, the only goal here is to
+// define fallback implementation that allow reference kernels to still be used
+// from applications that call a more specific kernel variant.
+
+inline TfLiteRegistration Register_SVDF_INT8() { return Register_SVDF(); }
+
+#endif
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_SVDF_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf_common.cc
index 038dac1..bdc36b8 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf_common.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/svdf_common.cc
@@ -48,6 +48,7 @@ const int kSvdfInputActivationStateTensor =
     4;  // This is a variable tensor, and will be modified by this op.
 const int kSvdfOutputTensor = 0;
 
+template <typename T>
 void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node,
                               const TfLiteEvalTensor* input_tensor,
                               const TfLiteEvalTensor* weights_feature_tensor,
@@ -56,7 +57,7 @@ void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node,
                               const TfLiteSVDFParams* params,
                               TfLiteEvalTensor* activation_state_tensor,
                               TfLiteEvalTensor* output_tensor,
-                              const OpData& data) {
+                              const OpDataSvdf& data) {
   const int n_rank = params->rank;
   const int n_batch = input_tensor->dims->data[0];
   const int n_input = input_tensor->dims->data[1];
@@ -73,14 +74,13 @@ void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node,
       context->GetScratchBuffer(context, data.scratch_output_tensor_index));
 
   // Shift states.
-  int16_t* const state_ptr =
-      tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
+  T* const state_ptr = tflite::micro::GetTensorData<T>(activation_state_tensor);
 
   // Left shift the activation_state.
   {
-    int16_t* new_state_start = state_ptr;
-    const int16_t* old_state_start = state_ptr + 1;
-    const int16_t* old_state_end = state_ptr + n_batch * n_filter * n_memory;
+    T* new_state_start = state_ptr;
+    const T* old_state_start = state_ptr + 1;
+    const T* old_state_end = state_ptr + n_batch * n_filter * n_memory;
     while (old_state_start != old_state_end) {
       *new_state_start++ = *old_state_start++;
     }
@@ -90,14 +90,13 @@ void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node,
 
   // Feature matmul.
   {
-    int16_t* state =
-        tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
+    T* state = tflite::micro::GetTensorData<T>(activation_state_tensor);
     const int8_t* input = tflite::micro::GetTensorData<int8_t>(input_tensor);
     const int8_t* weight_feature =
         tflite::micro::GetTensorData<int8_t>(weights_feature_tensor);
-    const int32_t output_max = std::numeric_limits<int16_t>::max();
-    const int32_t output_min = std::numeric_limits<int16_t>::min();
-    int16_t* result_in_batch = state + (n_memory - 1);
+    const int32_t output_max = std::numeric_limits<T>::max();
+    const int32_t output_min = std::numeric_limits<T>::min();
+    T* result_in_batch = state + (n_memory - 1);
     for (int b = 0; b < n_batch; b++) {
       const int8_t* matrix_ptr = weight_feature;
       for (int r = 0; r < n_filter; r++) {
@@ -110,13 +109,10 @@ void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node,
         dot_prod = MultiplyByQuantizedMultiplier(
             dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
         dot_prod = std::min(std::max(output_min, dot_prod), output_max);
-        // This assumes state is symmetrically quantized. Otherwise last bit of
-        // state should be initialized to its zero point and accumulate the
-        // dot_prod.
-        // Equivalent as the following:
-        //     result_in_batch = zero point, which happens to be zero.
-        //     result_in_batch += dot_prod_56.
-        *result_in_batch = dot_prod;
+        // The int16 version of the op assumes a zero_point of 0.  This
+        // code accounts for the potentially non-zero zero_point for the int8
+        // version of the op.
+        *result_in_batch = data.activation_state_zero_point + dot_prod;
         result_in_batch += n_memory;
       }
     }
@@ -128,16 +124,18 @@ void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node,
       int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
 
       // Perform batched vector dot product:
-      const int16_t* vector1_ptr =
-          tflite::micro::GetTensorData<int16_t>(weights_time_tensor);
-      const int16_t* vector2_ptr =
-          tflite::micro::GetTensorData<int16_t>(activation_state_tensor) +
+      const T* vector1_ptr =
+          tflite::micro::GetTensorData<T>(weights_time_tensor);
+      const T* vector2_ptr =
+          tflite::micro::GetTensorData<T>(activation_state_tensor) +
           b * n_memory * n_filter;
 
       for (int i = 0; i < n_filter; i++) {
         *scratch_ptr_batch = 0;
         for (int j = 0; j < n_memory; j++) {
-          *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
+          *scratch_ptr_batch +=
+              *vector1_ptr++ *
+              (*vector2_ptr++ - data.activation_state_zero_point);
         }
         scratch_ptr_batch++;
       }
@@ -192,12 +190,46 @@ void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node,
     }
   }
 }
+
+/**
+ * Generate two versions of the integer code.  One with int16_t type for the
+ * time weights and the activation state, and another one with int8_t for the
+ * same.
+ */
+
+void EvalInt16SvdfReference(TfLiteContext* context, TfLiteNode* node,
+                            const TfLiteEvalTensor* input_tensor,
+                            const TfLiteEvalTensor* weights_feature_tensor,
+                            const TfLiteEvalTensor* weights_time_tensor,
+                            const TfLiteEvalTensor* bias_tensor,
+                            const TfLiteSVDFParams* params,
+                            TfLiteEvalTensor* activation_state_tensor,
+                            TfLiteEvalTensor* output_tensor,
+                            const OpDataSvdf& data) {
+  EvalIntegerSvdfReference<int16_t>(
+      context, node, input_tensor, weights_feature_tensor, weights_time_tensor,
+      bias_tensor, params, activation_state_tensor, output_tensor, data);
+}
+
+void EvalInt8SvdfReference(TfLiteContext* context, TfLiteNode* node,
+                           const TfLiteEvalTensor* input_tensor,
+                           const TfLiteEvalTensor* weights_feature_tensor,
+                           const TfLiteEvalTensor* weights_time_tensor,
+                           const TfLiteEvalTensor* bias_tensor,
+                           const TfLiteSVDFParams* params,
+                           TfLiteEvalTensor* activation_state_tensor,
+                           TfLiteEvalTensor* output_tensor,
+                           const OpDataSvdf& data) {
+  EvalIntegerSvdfReference<int8_t>(
+      context, node, input_tensor, weights_feature_tensor, weights_time_tensor,
+      bias_tensor, params, activation_state_tensor, output_tensor, data);
+}
+
 static inline void ApplyTimeWeightsBiasAndActivation(
     int batch_size, int memory_size, int num_filters, int num_units, int rank,
-    const float* const __restrict__ weights_time_ptr,
-    const float* const __restrict__ bias_ptr, TfLiteFusedActivation activation,
-    float* const __restrict__ state_ptr, float* const __restrict__ scratch_ptr,
-    float* const __restrict__ output_ptr) {
+    const float* const weights_time_ptr, const float* const bias_ptr,
+    TfLiteFusedActivation activation, float* const state_ptr,
+    float* const scratch_ptr, float* const output_ptr) {
   // Compute matmul(activation_state, weights_time).
   for (int b = 0; b < batch_size; ++b) {
     // Perform batched vector dot product:
@@ -332,6 +364,8 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
 
   const auto* params = static_cast<const TfLiteSVDFParams*>(node->builtin_data);
 
+  MicroContext* micro_context = GetMicroContext(context);
+
   // Validate Tensor Inputs (dtype depends on quantization):
   // [0] = Input, {2, batch_size, input_size}
   // [1] = Weights Feature, {2, num_filters, input_size}
@@ -339,18 +373,19 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
   // [3] = Bias (optional), {1, num_units}
   // [4] = Activation State (variable),
   //         {2, batch_size, memory_size * num_filters}
-  const TfLiteTensor* input = GetInput(context, node, kSvdfInputTensor);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kSvdfInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* weights_feature =
-      GetInput(context, node, kSvdfWeightsFeatureTensor);
+  TfLiteTensor* weights_feature =
+      micro_context->AllocateTempInputTensor(node, kSvdfWeightsFeatureTensor);
   TF_LITE_ENSURE(context, weights_feature != nullptr);
-  const TfLiteTensor* weights_time =
-      GetInput(context, node, kSvdfWeightsTimeTensor);
+  TfLiteTensor* weights_time =
+      micro_context->AllocateTempInputTensor(node, kSvdfWeightsTimeTensor);
   TF_LITE_ENSURE(context, weights_time != nullptr);
-  const TfLiteTensor* bias =
-      GetOptionalInputTensor(context, node, kSvdfBiasTensor);
-  const TfLiteTensor* activation_state =
-      GetInput(context, node, kSvdfInputActivationStateTensor);
+  TfLiteTensor* bias =
+      micro_context->AllocateTempInputTensor(node, kSvdfBiasTensor);
+  TfLiteTensor* activation_state = micro_context->AllocateTempInputTensor(
+      node, kSvdfInputActivationStateTensor);
   TF_LITE_ENSURE(context, activation_state != nullptr);
 
   // Define input constants based on input tensor definition above:
@@ -370,7 +405,8 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
   // Validate Tensor Output:
   // [0] = float/int8_t, {2, batch_size, num_units}
   TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
-  TfLiteTensor* output = GetOutput(context, node, kSvdfOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kSvdfOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
   TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
   TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
@@ -401,31 +437,35 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, node->inputs->size, 5);
 
   TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
+  OpDataSvdf* data = static_cast<OpDataSvdf*>(node->user_data);
 
   if (input->type == kTfLiteInt8) {
     TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8);
-    TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16);
-    TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16);
+    TF_LITE_ENSURE(context, (weights_time->type == kTfLiteInt16) ||
+                                (weights_time->type == kTfLiteInt8));
+    TF_LITE_ENSURE(context, (activation_state->type == kTfLiteInt16) ||
+                                (activation_state->type == kTfLiteInt8));
     if (bias != nullptr) {
       TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
     }
 
     TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
 
-    const double effective_scale_1 = static_cast<double>(
-        input->params.scale * weights_feature->params.scale /
-        activation_state->params.scale);
+    const double effective_scale_1 =
+        static_cast<double>(input->params.scale) *
+        static_cast<double>(weights_feature->params.scale) /
+        static_cast<double>(activation_state->params.scale);
     const double effective_scale_2 =
-        static_cast<double>(activation_state->params.scale *
-                            weights_time->params.scale / output->params.scale);
+        static_cast<double>(activation_state->params.scale) *
+        static_cast<double>(weights_time->params.scale) /
+        static_cast<double>(output->params.scale);
 
     // TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready.
     TF_LITE_ENSURE(
         context,
         std::abs(static_cast<double>(bias->params.scale) -
-                 static_cast<double>(activation_state->params.scale *
-                                     weights_time->params.scale)) < 1e-5);
+                 (static_cast<double>(activation_state->params.scale) *
+                  static_cast<double>(weights_time->params.scale))) < 1e-5);
 
     QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a),
                        &(data->effective_scale_1_b));
@@ -434,6 +474,7 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
 
     data->input_zero_point = input->params.zero_point;
     data->output_zero_point = output->params.zero_point;
+    data->activation_state_zero_point = activation_state->params.zero_point;
 
     TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
 
@@ -463,6 +504,12 @@ TfLiteStatus PrepareSvdf(TfLiteContext* context, TfLiteNode* node) {
     TF_LITE_ENSURE_OK(context, scratch_status);
   }
 
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(weights_feature);
+  micro_context->DeallocateTempTfLiteTensor(weights_time);
+  micro_context->DeallocateTempTfLiteTensor(activation_state);
+  micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(bias);
   return kTfLiteOk;
 }
 
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/tanh.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/tanh.cc
index fa100ea..2ae32b6 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/tanh.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/tanh.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
@@ -48,16 +49,19 @@ void* TanhInit(TfLiteContext* context, const char* buffer, size_t length) {
 
 TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
                                        OpData* data) {
+  MicroContext* micro_context = GetMicroContext(context);
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
 
-  if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
+  if (input->type == kTfLiteInt8) {
     static constexpr int kInputIntegerBits = 4;
     const double input_real_multiplier =
         static_cast<double>(input->params.scale) *
@@ -69,6 +73,62 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
     data->input_range_radius =
         CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
   }
+
+  if (input->type == kTfLiteInt16) {
+    static constexpr int kInputIntegerBits = 3;
+    static constexpr int kOutputFractionalBits = 15;
+
+    // These operators are implemented in fixed-point arithmetic,
+    // which intrinsically wants symmetric ranges (zero_point==0)
+    // and power-of-two scales (power-of-two is abbreviated below as POT).
+    // While more general support would be possible by means of rescaling,
+    // that would add some overhead and some loss of accuracy and wouldn't
+    // be used at the moment as current quantized LSTM applications are
+    // happy with symmetric, power-of-two-scales quantization. So we just
+    // implement that narrow case only for now.
+
+    TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+
+    int input_scale_log2_rounded;
+    bool param_scale_pot =
+        CheckedLog2(input->params.scale, &input_scale_log2_rounded);
+
+    data->input_left_shift =
+        (15 - kInputIntegerBits) + input_scale_log2_rounded;
+    param_scale_pot &=
+        (data->input_left_shift == 0 || data->input_left_shift == 1);
+
+    if (param_scale_pot) {
+      data->input_multiplier = 0;
+    } else {
+      // Calculate multiplier to change input scale to 1/(3*4096)
+      // as required by the table lookup.
+      // The number 3.0 in the multiplier comes from here,
+      // because the interval is [-10.7, 10.7] instead of [-8, 8].
+      // So, in this scaling +/-2^17 represents +/-10.7.
+
+      double multiplier =
+          static_cast<double>(input->params.scale) * 4096.0 * 3.0;
+      data->input_left_shift = 0;
+
+      while (multiplier <= 32767.0 / 2.0 && data->input_left_shift <= 30) {
+        data->input_left_shift++;
+        multiplier = multiplier * 2.0;
+      }
+
+      data->input_multiplier = static_cast<int32_t>(multiplier);
+    }
+
+    int output_scale_log2_rounded;
+    TF_LITE_ENSURE(
+        context, CheckedLog2(output->params.scale, &output_scale_log2_rounded));
+    TF_LITE_ENSURE_EQ(context, output_scale_log2_rounded,
+                      -kOutputFractionalBits);
+  }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -77,10 +137,15 @@ TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
 
   OpData* data = static_cast<OpData*>(node->user_data);
 
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
   data->input_zero_point = input->params.zero_point;
-  return CalculateArithmeticOpData(context, node, data);
+  TF_LITE_ENSURE_OK(context, CalculateArithmeticOpData(context, node, data));
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  return kTfLiteOk;
 }
 
 }  // namespace
@@ -103,25 +168,12 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
       return kTfLiteOk;
     } break;
     case kTfLiteInt16: {
-      TanhParams params;
-      params.input_left_shift = data.input_left_shift;
-      reference_ops::Tanh(params, tflite::micro::GetTensorShape(input),
-                          tflite::micro::GetTensorData<int16_t>(input),
-                          tflite::micro::GetTensorShape(output),
-                          tflite::micro::GetTensorData<int16_t>(output));
-      return kTfLiteOk;
-    } break;
-    case kTfLiteUInt8: {
-      TanhParams params;
-      params.input_zero_point = data.input_zero_point;
-      params.input_range_radius = data.input_range_radius;
-      params.input_multiplier = data.input_multiplier;
-      params.input_left_shift = data.input_left_shift;
-      reference_ops::Tanh(params, tflite::micro::GetTensorShape(input),
-                          tflite::micro::GetTensorData<uint8_t>(input),
-                          tflite::micro::GetTensorShape(output),
-                          tflite::micro::GetTensorData<uint8_t>(output));
-
+      reference_integer_ops::Tanh(
+          data.input_multiplier, data.input_left_shift,
+          tflite::micro::GetTensorShape(input),
+          tflite::micro::GetTensorData<int16_t>(input),
+          tflite::micro::GetTensorShape(output),
+          tflite::micro::GetTensorData<int16_t>(output));
       return kTfLiteOk;
     } break;
     case kTfLiteInt8: {
@@ -134,9 +186,9 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
       return kTfLiteOk;
     } break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                         TfLiteTypeGetName(input->type),
-                         TfLiteTypeGetName(output->type));
+      MicroPrintf("Input %s, output %s not supported.",
+                  TfLiteTypeGetName(input->type),
+                  TfLiteTypeGetName(output->type), context);
       return kTfLiteError;
   }
 }
@@ -144,14 +196,8 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace activations
 
 TfLiteRegistration Register_TANH() {
-  return {/*init=*/activations::TanhInit,
-          /*free=*/nullptr,
-          /*prepare=*/activations::TanhPrepare,
-          /*invoke=*/activations::TanhEval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(
+      activations::TanhInit, activations::TanhPrepare, activations::TanhEval);
 }
 }  // namespace micro
 }  // namespace ops
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/transpose.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/transpose.cc
index 5500d9b..c0bd6e4 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/transpose.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/transpose.cc
@@ -12,48 +12,58 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include <stdint.h>
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose.h"
 
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/reference/transpose.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/types.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
-namespace ops {
-namespace builtin {
-namespace transpose {
+namespace {
 
 constexpr int kInputTensor = 0;
 constexpr int kPermTensor = 1;
 constexpr int kOutputTensor = 0;
 
+struct TransposeContext {
+  TransposeContext(TfLiteContext* context, TfLiteNode* node) {
+    micro_context = GetMicroContext(context);
+    input = micro_context->AllocateTempInputTensor(node, kInputTensor);
+    perm = micro_context->AllocateTempInputTensor(node, kPermTensor);
+    output = micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  }
+  ~TransposeContext() {
+    micro_context->DeallocateTempTfLiteTensor(input);
+    micro_context->DeallocateTempTfLiteTensor(perm);
+    micro_context->DeallocateTempTfLiteTensor(output);
+  }
+  MicroContext* micro_context;
+  TfLiteTensor* input;
+  TfLiteTensor* perm;
+  TfLiteTensor* output;
+};
+
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
-  TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* perm = GetInput(context, node, kPermTensor);
-  TF_LITE_ENSURE(context, perm != nullptr);
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
-  TF_LITE_ENSURE(context, output != nullptr);
+  TransposeContext op_context(context, node);
 
   // Ensure validity of input tensor.
-  TF_LITE_ENSURE_MSG(context, NumDimensions(input) <= 5,
+  TF_LITE_ENSURE_MSG(context, NumDimensions(op_context.input) <= 5,
                      "Transpose op only supports 1D-5D input arrays.");
-  TF_LITE_ENSURE_TYPES_EQ(context, input->type,
-                          output->type);
+  TF_LITE_ENSURE_TYPES_EQ(context, op_context.input->type,
+                          op_context.output->type);
 
-  int dims = NumDimensions(input);
-  const int32_t* perm_data = perm->data.i32;
+  int dims = NumDimensions(op_context.input);
+  const int32_t* perm_data = GetTensorData<int32_t>(op_context.perm);
 
   // Ensure validity of the permutations tensor as a 1D tensor.
-  TF_LITE_ENSURE_EQ(context, NumDimensions(perm), 1);
-  TF_LITE_ENSURE_EQ(context, perm->dims->data[0], dims);
+  TF_LITE_ENSURE_EQ(context, NumDimensions(op_context.perm), 1);
+  TF_LITE_ENSURE_EQ(context, op_context.perm->dims->data[0], dims);
   for (int idx = 0; idx < dims; ++idx) {
     TF_LITE_ENSURE_MSG(context, (perm_data[idx] >= 0 && perm_data[idx] < dims),
                        "Transpose op permutations array is out of bounds.");
@@ -83,44 +93,30 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   switch (input->type) {
     case kTfLiteFloat32:
       reference_ops::Transpose(params, tflite::micro::GetTensorShape(input),
-                               input->data.f,
+                               tflite::micro::GetTensorData<float>(input),
                                tflite::micro::GetTensorShape(output),
-                               output->data.f);
+                               tflite::micro::GetTensorData<float>(output));
       break;
     case kTfLiteInt8:
       reference_ops::Transpose(params, tflite::micro::GetTensorShape(input),
-                               input->data.int8,
+                               tflite::micro::GetTensorData<int8_t>(input),
                                tflite::micro::GetTensorShape(output),
-                               output->data.int8);
-      break;
-    case kTfLiteInt32:
-      reference_ops::Transpose(params, tflite::micro::GetTensorShape(input),
-                               input->data.i32,
-                               tflite::micro::GetTensorShape(output),
-                               output->data.i32);
+                               tflite::micro::GetTensorData<int8_t>(output));
       break;
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf(
+          "Type %s is currently not supported by Transpose. "
+          "Only float32 and int8 is supported",
+          TfLiteTypeGetName(input->type));
       return kTfLiteError;
   }
 
   return kTfLiteOk;
 }
 
-}  // namespace transpose
-}  // namespace builtin
-}  // namespace ops
+}  // namespace
 
 TfLiteRegistration Register_TRANSPOSE() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/ops::builtin::transpose::Prepare,
-          /*invoke=*/ops::builtin::transpose::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
-
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/transpose_conv.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/transpose_conv.cc
index e25236a..411d4e0 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/transpose_conv.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/transpose_conv.cc
@@ -191,7 +191,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node)
         reinterpret_cast<int32_t*>(&data->op_params.output_activation_min),
         reinterpret_cast<int32_t*>(&data->op_params.output_activation_max),
         data->per_channel_output_multiplier,
-        reinterpret_cast<int*>(data->per_channel_output_shift),
+        reinterpret_cast<int32_t*>(data->per_channel_output_shift),
         num_channels));
     }
 
@@ -353,7 +353,7 @@ TfLiteRegistration Register_TRANSPOSE_CONV() {
 }  // namespace tflite
 
 #else
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -379,6 +379,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/padding.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -402,6 +403,10 @@ struct OpData {
   // A scratch buffer is required for quantized implementations.
   int scratch_buffer_index;
 
+  // TODO(b/192090531): Remove this once all 8x16 transpose conv models use
+  // 64-bit biases.
+  int bias_converted_buffer_index;
+
   // Multiplier and shift arrays are required for the int8 implementation.
   int32_t* per_channel_output_multiplier;
   int32_t* per_channel_output_shift;
@@ -420,9 +425,8 @@ inline PaddingType RuntimePaddingType(TfLitePadding padding) {
 }
 
 TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
-                             const TfLiteConvParams* params, int width,
+                             const TfLiteTransposeConvParams* params, int width,
                              int height, int filter_width, int filter_height,
-                             int out_width, int out_height,
                              const TfLiteType data_type, OpData* data) {
   bool has_bias = node->inputs->size == 4;
   // Check number of inputs/outputs
@@ -431,10 +435,13 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
 
   // Matching GetWindowedOutputSize in TensorFlow.
   auto padding = params->padding;
+  int unused_output_width;
+  int unused_output_height;
   TfLitePaddingValues padding_values = ComputePaddingHeightWidth(
-      params->stride_height, params->stride_width,
-      params->dilation_height_factor, params->dilation_width_factor, height,
-      width, filter_height, filter_width, padding, &out_height, &out_width);
+      params->stride_height, params->stride_width, 1,
+      1,  // Dilation height and width are always 1 for transpose_conv.
+      height, width, filter_height, filter_width, padding,
+      &unused_output_height, &unused_output_width);
 
   data->params.padding_type = RuntimePaddingType(padding);
   data->params.padding_values.width = padding_values.width;
@@ -443,24 +450,48 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
   // Note that quantized inference requires that all tensors have their
   // parameters set. This is usually done during quantized training.
   if (data_type != kTfLiteFloat32) {
-    const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+    MicroContext* micro_context = GetMicroContext(context);
+
+    TfLiteTensor* input =
+        micro_context->AllocateTempInputTensor(node, kInputTensor);
     TF_LITE_ENSURE(context, input != nullptr);
-    const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+    TfLiteTensor* filter =
+        micro_context->AllocateTempInputTensor(node, kFilterTensor);
     TF_LITE_ENSURE(context, filter != nullptr);
-    const TfLiteTensor* bias =
-        GetOptionalInputTensor(context, node, kBiasTensor);
-    TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+    TfLiteTensor* bias =
+        micro_context->AllocateTempInputTensor(node, kBiasTensor);
+    TfLiteTensor* output =
+        micro_context->AllocateTempOutputTensor(node, kOutputTensor);
     TF_LITE_ENSURE(context, output != nullptr);
     int output_channels = filter->dims->data[kConvQuantizedDimension];
 
     TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
-        context, input, filter, bias, output, params->activation,
+        context, input, filter, bias, output, kTfLiteActNone,
         &data->params.output_multiplier, &data->params.output_shift,
         &data->params.quantized_activation_min,
         &data->params.quantized_activation_max,
-        data->per_channel_output_multiplier,
-        reinterpret_cast<int*>(data->per_channel_output_shift),
+        data->per_channel_output_multiplier, data->per_channel_output_shift,
         output_channels));
+
+    // TODO(b/192090531): Remove this once all 8x16 transpose conv models use
+    // 64-bit biases.
+    if (input->type == kTfLiteInt16) {
+      TFLITE_DCHECK(filter->type == kTfLiteInt8);
+      TFLITE_DCHECK(output->type == kTfLiteInt16);
+      if (bias->type == kTfLiteInt16) {
+        TFLITE_DCHECK(
+            context->RequestScratchBufferInArena(
+                context, GetTensorShape(bias).FlatSize() * sizeof(std::int64_t),
+                &(data->bias_converted_buffer_index)) == kTfLiteOk);
+      }
+    }
+
+    micro_context->DeallocateTempTfLiteTensor(input);
+    micro_context->DeallocateTempTfLiteTensor(filter);
+    micro_context->DeallocateTempTfLiteTensor(output);
+    if (bias != nullptr) {
+      micro_context->DeallocateTempTfLiteTensor(bias);
+    }
   }
   return kTfLiteOk;
 }
@@ -475,21 +506,26 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->builtin_data != nullptr);
 
   OpData* data = static_cast<OpData*>(node->user_data);
-  const auto params = static_cast<const TfLiteConvParams*>(node->builtin_data);
+  const auto params =
+      static_cast<const TfLiteTransposeConvParams*>(node->builtin_data);
 
-  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
   TF_LITE_ENSURE(context, output != nullptr);
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
   TF_LITE_ENSURE(context, input != nullptr);
-  const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+  TfLiteTensor* filter =
+      micro_context->AllocateTempInputTensor(node, kFilterTensor);
   TF_LITE_ENSURE(context, filter != nullptr);
 
-  int input_width = input->dims->data[2];
-  int input_height = input->dims->data[1];
-  int filter_width = filter->dims->data[2];
-  int filter_height = filter->dims->data[1];
-  int output_width = output->dims->data[2];
-  int output_height = output->dims->data[1];
+  // Get height and width of the output.
+  const int width = SizeOfDimension(output, 2);
+  const int height = SizeOfDimension(output, 1);
+  const int filter_width = SizeOfDimension(filter, 2);
+  const int filter_height = SizeOfDimension(filter, 1);
 
   // Dynamically allocate per-channel quantization parameters.
   const int num_channels = filter->dims->data[kConvQuantizedDimension];
@@ -501,7 +537,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
           context, num_channels * sizeof(int32_t)));
 
   // Quantized kernels use an int32 scratch buffer.
-  if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
+  if (input->type == kTfLiteInt8) {
     TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
     TFLITE_DCHECK(context->RequestScratchBufferInArena(
                       context,
@@ -509,8 +545,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                       &(data->scratch_buffer_index)) == kTfLiteOk);
   }
 
+  // Quantized 16x8 kernels use an int64 scratch buffer.
+  if (input->type == kTfLiteInt16) {
+    TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
+    TFLITE_DCHECK(context->RequestScratchBufferInArena(
+                      context,
+                      GetTensorShape(output).FlatSize() * sizeof(std::int64_t),
+                      &(data->scratch_buffer_index)) == kTfLiteOk);
+  }
+
   // All per-channel quantized tensors need valid zero point and scale arrays.
-  if (input->type == kTfLiteInt8) {
+  if (input->type == kTfLiteInt8 || input->type == kTfLiteInt16) {
     TF_LITE_ENSURE_EQ(context, filter->quantization.type,
                       kTfLiteAffineQuantization);
 
@@ -528,28 +573,24 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                       affine_quantization->zero_point->size);
   }
 
-  TF_LITE_ENSURE_STATUS(CalculateOpData(
-      context, node, params, input_width, input_height, filter_width,
-      filter_height, output_width, output_height, input->type, data));
+  TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height,
+                                        filter_width, filter_height,
+                                        input->type, data));
 
   // Offsets (zero points)
   data->params.input_offset = -input->params.zero_point;
   data->params.weights_offset = -filter->params.zero_point;
   data->params.output_offset = output->params.zero_point;
 
-  // Stride + dilation
+  // Stride
   data->params.stride_width = params->stride_width;
   data->params.stride_height = params->stride_height;
-  data->params.dilation_width_factor = params->dilation_width_factor;
-  data->params.dilation_height_factor = params->dilation_height_factor;
-
-  float output_activation_min, output_activation_max;
-  CalculateActivationRange(params->activation, &output_activation_min,
-                           &output_activation_max);
-  data->params.float_activation_min = output_activation_min;
-  data->params.float_activation_max = output_activation_max;
+
+  micro_context->DeallocateTempTfLiteTensor(output);
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(filter);
   return kTfLiteOk;
-}  // namespace conv
+}
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteEvalTensor* input =
@@ -567,18 +608,28 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const OpData& data = *(static_cast<const OpData*>(node->user_data));
 
   TF_LITE_ENSURE_EQ(context, input->type, output->type);
-  TF_LITE_ENSURE_MSG(context, input->type == filter->type,
-                     "Hybrid models are not supported on TFLite Micro.");
+  TF_LITE_ENSURE_MSG(
+      context,
+      input->type == filter->type ||
+          (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8),
+      "Hybrid models are not supported on TFLite Micro.");
 
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32: {
+      const auto& params =
+          *(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));
+      ConvParams op_params = data.params;
+      CalculateActivationRange(params.activation,
+                               &op_params.float_activation_min,
+                               &op_params.float_activation_max);
+
       reference_ops::TransposeConv(
-          data.params, tflite::micro::GetTensorShape(input),
+          op_params, tflite::micro::GetTensorShape(input),
           tflite::micro::GetTensorData<float>(input),
           tflite::micro::GetTensorShape(filter),
           tflite::micro::GetTensorData<float>(filter),
           tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<float>(bias),
+          tflite::micro::GetOptionalTensorData<float>(bias),
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<float>(output),
           tflite::micro::GetTensorShape(nullptr), nullptr);
@@ -594,15 +645,53 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           tflite::micro::GetTensorShape(filter),
           tflite::micro::GetTensorData<int8_t>(filter),
           tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<int32_t>(bias),
+          tflite::micro::GetOptionalTensorData<int32_t>(bias),
           tflite::micro::GetTensorShape(output),
           tflite::micro::GetTensorData<int8_t>(output),
           tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
       break;
     }
+    case kTfLiteInt16: {
+      std::int64_t* scratch_buffer = static_cast<int64_t*>(
+          context->GetScratchBuffer(context, data.scratch_buffer_index));
+      // TODO(b/192090531): Remove this once all 8x16 transpose conv models use
+      // 64-bit biases.
+      if (bias != nullptr && bias->type == kTfLiteInt16) {
+        std::int64_t* bias_converted_buffer =
+            static_cast<int64_t*>(context->GetScratchBuffer(
+                context, data.bias_converted_buffer_index));
+        for (int i = 0; i < tflite::micro::GetTensorShape(bias).FlatSize();
+             i++) {
+          bias_converted_buffer[i] = bias->data.i16[i];
+        }
+        reference_integer_ops::TransposeConv(
+            data.params, data.per_channel_output_multiplier,
+            data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
+            tflite::micro::GetTensorData<int16_t>(input),
+            tflite::micro::GetTensorShape(filter),
+            tflite::micro::GetTensorData<int8_t>(filter),
+            tflite::micro::GetTensorShape(bias), bias_converted_buffer,
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int16_t>(output),
+            tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
+      } else {
+        reference_integer_ops::TransposeConv(
+            data.params, data.per_channel_output_multiplier,
+            data.per_channel_output_shift, tflite::micro::GetTensorShape(input),
+            tflite::micro::GetTensorData<int16_t>(input),
+            tflite::micro::GetTensorShape(filter),
+            tflite::micro::GetTensorData<int8_t>(filter),
+            tflite::micro::GetTensorShape(bias),
+            tflite::micro::GetOptionalTensorData<std::int64_t>(bias),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int16_t>(output),
+            tflite::micro::GetTensorShape(nullptr), nullptr, scratch_buffer);
+      }
+      break;
+    }
     default:
-      TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.",
-                         TfLiteTypeGetName(input->type), input->type);
+      MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+                  input->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -611,14 +700,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_TRANSPOSE_CONV() {
-  return {/*init=*/Init,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/tree_ensemble_classifier.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/tree_ensemble_classifier.cc
index 7197bad..c0f4317 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/tree_ensemble_classifier.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/tree_ensemble_classifier.cc
@@ -69,6 +69,7 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
+  MicroContext* micro_context = GetMicroContext(context);
   const OpDataTree* data = static_cast<const OpDataTree*>(node->user_data);
   const flexbuffers::Map& m = flexbuffers::GetRoot(data->buffer_t, data->buffer_length).AsMap();
 
@@ -108,10 +109,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-  const TfLiteTensor* input = GetInput(context, node, 0);
+  TfLiteTensor* input = micro_context->AllocateTempInputTensor(node, 0);
   TF_LITE_ENSURE(context, input != nullptr);
   TF_LITE_ENSURE(context, NumDimensions(input) == 2);
-  TfLiteTensor* output = GetOutput(context, node, 0);
+  TfLiteTensor* output = micro_context->AllocateTempOutputTensor(node, 0);
   TF_LITE_ENSURE(context, output != nullptr);
 
   int input_width = SizeOfDimension(input, 1);
@@ -121,42 +122,59 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   for (uint32_t i = 0; i < data->num_internal_nodes; i++) {
     TF_LITE_ENSURE(context, data->nodes_featureids[i] < input_width);
     TF_LITE_ENSURE(context, data->nodes_featureids[i] >= 0);
-    if (data->nodes_modes[i] == 0) {
-      TF_LITE_ENSURE(context, data->nodes_classids[i] < output_width);
-      TF_LITE_ENSURE(context, data->nodes_classids[i] >= 0);
+    if (!m["nodes_modes"].AsBlob().IsTheEmptyBlob()) {
+        if (data->nodes_modes[i] == 0) {
+            TF_LITE_ENSURE(context, data->nodes_classids[i] < output_width);
+            TF_LITE_ENSURE(context, data->nodes_classids[i] >= 0);
+        }
     }
   }
 
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
+
   return kTfLiteOk;
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   const OpDataTree* data = static_cast<const OpDataTree*>(node->user_data);
+
   const TfLiteEvalTensor* input =
       tflite::micro::GetEvalInput(context, node, 0);
-  const TfLiteEvalTensor* output =
+  const float *in_data = tflite::micro::GetTensorData<float>(input);
+
+  TfLiteEvalTensor* output =
       tflite::micro::GetEvalOutput(context, node, 0);
+  float *out_data = tflite::micro::GetTensorData<float>(output);
 
   const tflite::RuntimeShape output_shape = tflite::micro::GetTensorShape(output);
-  memset(output->data.f, 0, output_shape.FlatSize() * sizeof(float));
+  memset(out_data, 0, output_shape.FlatSize() * sizeof(float));
 
   for (uint32_t i = 0; i < data->num_trees; i++) {
     uint16_t ix = data->tree_root_ids[i];
+
     while (ix < data->num_internal_nodes) {
-      if (input->data.f[data->nodes_featureids[ix]] <= data->nodes_values[ix]) {
+      float node_val = 0;
+      memcpy(&node_val, (data->nodes_values + ix), sizeof(float));
+
+      if (in_data[data->nodes_featureids[ix]] <= node_val) {
         ix = data->nodes_truenodeids[ix];
       } else {
         ix = data->nodes_falsenodeids[ix];
       }
     }
     ix -= data->num_internal_nodes;
-    output->data.f[data->nodes_classids[ix]] += data->nodes_weights[ix];
+
+    float weight = 0;
+    memcpy(&weight, (data->nodes_weights + ix), sizeof(float));
+    out_data[data->nodes_classids[ix]] += weight;
   }
 
   return kTfLiteOk;
 }
 
+
 }  // namespace
 
 TfLiteRegistration* Register_TreeEnsembleClassifier() {
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc
new file mode 100644
index 0000000..7ff9a2f
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc
@@ -0,0 +1,589 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Integer version of unidirectional sequence lstm. Only the standard LSTM
+// (defined in the keras LSTM layer, e.g., no peephole etc.) is supported here.
+// Currently used by the 16 bits activation case only
+
+#include <algorithm>
+#include <limits>
+
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/quantization_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_eval.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/lstm_shared.h"
+
+namespace tflite {
+
+namespace {
+/*Helper Functions*/
+
+// Interface to access all the TempTfLiteTensors of the LSTM kernel during the
+// preparation phase. Can only be constructed through the constructor to avoid
+// memory leakage. All TempTfLiteTensors will be deallocated through the
+// destructor.
+class LstmTensors {
+ public:
+  LstmTensors(const LstmTensors& other) = delete;
+  LstmTensors& operator=(const LstmTensors& other) = delete;
+
+  LstmTensors(TfLiteContext* context, TfLiteNode* node) {
+    micro_context_ = GetMicroContext(context);
+    // 24 internal tensors. see lstm_shared.h for tensor names
+    for (size_t i = 0; i < 24; i++) {
+      internal_tensors_[i] = micro_context_->AllocateTempInputTensor(node, i);
+    }
+    output_tensor_ =
+        micro_context_->AllocateTempOutputTensor(node, kLstmOutputTensor);
+  }
+
+  ~LstmTensors() {
+    for (size_t i = 0; i < 24; i++) {
+      if (internal_tensors_[i] != nullptr) {
+        micro_context_->DeallocateTempTfLiteTensor(internal_tensors_[i]);
+      }
+    }
+    micro_context_->DeallocateTempTfLiteTensor(output_tensor_);
+  }
+
+  // Verify the LSTM internal tensor properties (e.g., type checks)
+  // Input/output/states/fc weights tensors are required for kernel evaulation.
+  // The state tensors should be variables. Variants of the standard LSTM
+  // are not supported here, therefore their corresponding tensors should be
+  // invalid
+  TfLiteStatus ValidateTensorStatus(TfLiteContext* context) const {
+    // Verify certain tensor properties
+    // input tensor
+    TF_LITE_ENSURE(context, internal_tensors_[kLstmInputTensor] != nullptr);
+    // hidden state
+    TF_LITE_ENSURE(context,
+                   internal_tensors_[kLstmOutputStateTensor] != nullptr);
+    TF_LITE_ENSURE(context,
+                   internal_tensors_[kLstmOutputStateTensor]->is_variable);
+    // hidden state becomes input so they must have the same type
+    TF_LITE_ENSURE_EQ(context, internal_tensors_[kLstmOutputStateTensor]->type,
+                      internal_tensors_[kLstmInputTensor]->type);
+    // cell state
+    TF_LITE_ENSURE(context, internal_tensors_[kLstmCellStateTensor] != nullptr);
+    TF_LITE_ENSURE(context,
+                   internal_tensors_[kLstmCellStateTensor]->is_variable);
+    // output
+    TF_LITE_ENSURE(context, output_tensor_ != nullptr);
+    // output type is the same as the input type (activations)
+    TF_LITE_ENSURE_EQ(context, output_tensor_->type,
+                      internal_tensors_[kLstmInputTensor]->type);
+
+    // weight tensors (1-9, see lstm_shared for index definition)
+    const auto weight_type =
+        internal_tensors_[kLstmInputToForgetWeightsTensor]->type;
+    for (size_t i = 1; i < 9; i++) {
+      TF_LITE_ENSURE(context, internal_tensors_[i] != nullptr);
+      TF_LITE_ENSURE_EQ(context, internal_tensors_[i]->type, weight_type);
+    }
+
+    // bias tensors (12-15, see lstm_shared for index definition)
+    const auto bias_type = internal_tensors_[kLstmForgetGateBiasTensor]->type;
+    for (size_t i = 12; i < 16; i++) {
+      TF_LITE_ENSURE(context, internal_tensors_[i] != nullptr);
+      TF_LITE_ENSURE_EQ(context, internal_tensors_[i]->type, bias_type);
+    }
+    // Tensors from LSTM variants are invalid
+    // No peephole
+    for (size_t i = 9; i < 12; i++) {
+      TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr);
+    }
+    // No projection
+    for (size_t i = 16; i < 18; i++) {
+      TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr);
+    }
+    // No internal layer norm
+    for (size_t i = 20; i < 24; i++) {
+      TF_LITE_ENSURE(context, internal_tensors_[i] == nullptr);
+    }
+    return kTfLiteOk;
+  }
+
+  // Internal tensors. see lstm_shared.h for tensor names
+  const TfLiteTensor* GetInternalTensor(const int tensor_index) const {
+    return internal_tensors_[tensor_index];
+  }
+
+  const TfLiteTensor* HiddenStateTensor() const {
+    return internal_tensors_[kLstmOutputStateTensor];
+  }
+  const TfLiteTensor* CellStateTensor() const {
+    return internal_tensors_[kLstmCellStateTensor];
+  }
+  const TfLiteTensor* OutputTensor() const { return output_tensor_; }
+
+ private:
+  // see lstm_shared.h for tensor names
+  MicroContext* micro_context_;
+  TfLiteTensor* internal_tensors_[24];
+  TfLiteTensor* output_tensor_;
+};
+
+// Deduce the size information (Batch (B), Time Steps (T), Input dimension (I),
+// State dimension (S)) that defines the LSTM using the input and hidden state
+// tensor
+LstmSizeInfo CreateLstmSizeInfo(
+    const bool time_major, const TfLiteIntArray* input_tensor_shape,
+    const TfLiteIntArray* hidden_state_tensor_shape) {
+  LstmSizeInfo size_info;
+  size_info.time_major = time_major;
+  size_info.batch_size =
+      time_major ? input_tensor_shape->data[1] : input_tensor_shape->data[0];
+  size_info.time_steps =
+      time_major ? input_tensor_shape->data[0] : input_tensor_shape->data[1];
+  size_info.input_dimension = input_tensor_shape->data[2];
+  size_info.state_dimension = hidden_state_tensor_shape->data[1];
+  return size_info;
+}
+
+TfLiteStatus ValidateWeightTensorSize(TfLiteContext* context,
+                                      const TfLiteTensor* tensor, int dim1_size,
+                                      int dim2_size) {
+  TF_LITE_ENSURE_EQ(context, tensor->dims->size, 2);
+  TF_LITE_ENSURE_EQ(context, tensor->dims->data[0], dim1_size);
+  TF_LITE_ENSURE_EQ(context, tensor->dims->data[1], dim2_size);
+  return kTfLiteOk;
+}
+
+TfLiteStatus ValidateBiasTensorSize(TfLiteContext* context,
+                                    const TfLiteTensor* tensor, int size) {
+  TF_LITE_ENSURE_EQ(context, tensor->dims->size, 1);
+  TF_LITE_ENSURE_EQ(context, tensor->dims->data[0], size);
+  return kTfLiteOk;
+}
+
+// Go through every tensors and make sure their shape match the kernel
+// configuration
+TfLiteStatus ValidateTensorSize(TfLiteContext* context,
+                                const LstmTensors& tensors,
+                                const LstmSizeInfo& size_info) {
+  // Input FC weights
+  for (size_t i = 1; i < 5; i++) {
+    TF_LITE_ENSURE_OK(
+        context, ValidateWeightTensorSize(context, tensors.GetInternalTensor(i),
+                                          size_info.state_dimension,
+                                          size_info.input_dimension));
+  }
+  // Recurrent FC weights
+  for (size_t i = 5; i < 9; i++) {
+    TF_LITE_ENSURE_OK(
+        context, ValidateWeightTensorSize(context, tensors.GetInternalTensor(i),
+                                          size_info.state_dimension,
+                                          size_info.state_dimension));
+  }
+  // Biases
+  for (size_t i = 12; i < 16; i++) {
+    TF_LITE_ENSURE_OK(
+        context, ValidateBiasTensorSize(context, tensors.GetInternalTensor(i),
+                                        size_info.state_dimension));
+  }
+
+  // Check the shape of input state tensors.
+  // These tensor may be 1D or 2D. It's fine as long as the total size is
+  // correct.
+  TF_LITE_ENSURE_EQ(context, NumElements(tensors.HiddenStateTensor()),
+                    size_info.batch_size * size_info.state_dimension);
+  TF_LITE_ENSURE_EQ(context, NumElements(tensors.CellStateTensor()),
+                    size_info.batch_size * size_info.state_dimension);
+
+  // Check the shape of output tensor against that of input tensor
+  TF_LITE_ENSURE_EQ(context, tensors.OutputTensor()->dims->size, 3);
+  TF_LITE_ENSURE_EQ(context,
+                    tensors.GetInternalTensor(kLstmInputTensor)->dims->data[0],
+                    tensors.OutputTensor()->dims->data[0]);
+  TF_LITE_ENSURE_EQ(context,
+                    tensors.GetInternalTensor(kLstmInputTensor)->dims->data[1],
+                    tensors.OutputTensor()->dims->data[1]);
+  TF_LITE_ENSURE_EQ(context, tensors.OutputTensor()->dims->data[2],
+                    size_info.state_dimension);
+  return kTfLiteOk;
+}
+
+// Wrapper function to create gate parameters for the four internal LSTM gates
+TfLiteStatus CreateGateParams(
+    TfLiteContext* context,
+    /*Input tensors*/
+    const TfLiteTensor* input, const TfLiteTensor* input_weight,
+    const TfLiteTensor* input_bias,
+    /*Hidden state tensors*/
+    const TfLiteTensor* hidden_state, const TfLiteTensor* hidden_state_weight,
+    const TfLiteTensor* hidden_state_bias,
+    /*Scale of the fc output (input to non-linear activation)*/
+    const float nonlinear_activation_input_scale, const TfLiteType cell_type,
+    tflite::GateParameters& gate_params) {
+  // A temp tflite tensor to represent the output of fc operation. Only the data
+  // type and quantization parameters are set since it is only used for
+  // parameter calculations
+  TfLiteTensor fc_output_temp;
+  fc_output_temp.type = cell_type;
+  fc_output_temp.params.scale = nonlinear_activation_input_scale;
+  fc_output_temp.params.zero_point = 0;  // symmetrical quantized
+
+  // A temp fc opdata to reuse the helper function on creating fc parameters
+  tflite::OpDataFullyConnected fc_data_temp;
+  // TODO(b/265853320): due to the lack of precision for the float scale,
+  // scale_diff / output_scale <= 0.02 (potentially requires 1e-8 precision) can
+  // not be satisified for the bias. Here we rely on the correctiveness of the
+  // conversion process (set input_bias=nullptr to avoid checking) for
+  // tensor scales
+  TF_LITE_ENSURE_STATUS(CalculateOpDataFullyConnected(
+      context, kTfLiteActNone, input->type, input, input_weight,
+      /*input_bias=*/nullptr, &fc_output_temp, &fc_data_temp));
+  gate_params.input_fc_params = FullyConnectedParamsQuantized(fc_data_temp);
+  double real_multiplier = 0.0;
+  GetQuantizedConvolutionMultipler(context, input, input_weight, nullptr,
+                                   &fc_output_temp, &real_multiplier);
+
+  TF_LITE_ENSURE_STATUS(CalculateOpDataFullyConnected(
+      context, kTfLiteActNone, hidden_state->type, hidden_state,
+      hidden_state_weight, hidden_state_bias, &fc_output_temp, &fc_data_temp));
+  gate_params.recurrent_fc_params = FullyConnectedParamsQuantized(fc_data_temp);
+  return kTfLiteOk;
+}
+
+// Create parameters for element wise multiplication that happens in a) cell
+// state update ; b) hidden state update
+// Note that all the output of gates are symmetrically quantized so only scales
+// are required for input. However, during the hidden state update phase, the
+// output is the updated hidden state, which is asymmetrically quantized. Thus
+// output may require zero point
+tflite::ArithmeticParams CreateInterGateMulParams(const float input1_scale,
+                                                  const float input2_scale,
+                                                  const float output_scale,
+                                                  const TfLiteType output_type,
+                                                  const int output_zp = 0) {
+  tflite::ArithmeticParams op_params = {};
+  if (output_type == kTfLiteInt16) {
+    op_params.quantized_activation_min = std::numeric_limits<int16_t>::min();
+    op_params.quantized_activation_max = std::numeric_limits<int16_t>::max();
+  } else if (output_type == kTfLiteInt8) {
+    op_params.quantized_activation_min = std::numeric_limits<int8_t>::min();
+    op_params.quantized_activation_max = std::numeric_limits<int8_t>::max();
+  }
+
+  op_params.input1_offset = 0;  // symmetric
+  op_params.input2_offset = 0;  // symmetric
+  op_params.output_offset = output_zp;
+
+  const double input_product_scale =
+      static_cast<double>(input1_scale) * static_cast<double>(input2_scale);
+  double effective_scale =
+      input_product_scale / static_cast<double>(output_scale);
+
+  QuantizeMultiplier(effective_scale, &op_params.output_multiplier,
+                     &op_params.output_shift);
+  return op_params;
+}
+
+// Create the additional information about the cell state, which include:
+// cell_state_scale_power: used in integer nonlinear function (e.g., tanh)
+// quantized_cell_clip: quantized cell clip range
+CellStateInfo CreateLstmCellStateInfo(const float cell_state_scale,
+                                      const float cell_clip) {
+  CellStateInfo cell_state_info;
+  // cell_state_scale_power: 2^-cell_state_scale_power = cell state scale
+  int buffer;
+  tflite::CheckedLog2(cell_state_scale, &buffer);
+  cell_state_info.cell_state_scale_power = buffer;
+  // Cell state specifics
+  cell_state_info.cell_clip = cell_clip;
+  cell_state_info.quantized_cell_clip = static_cast<int16_t>(
+      std::min(std::max(static_cast<double>(cell_clip) /
+                            static_cast<double>(cell_state_scale),
+                        static_cast<double>(-32768.0)),
+               static_cast<double>(32767.0)));
+  return cell_state_info;
+}
+
+CellStateInfo CreateLstmCellStateInfoFloat(const float cell_clip) {
+  CellStateInfo cell_state_info;
+  cell_state_info.cell_clip = cell_clip;
+  cell_state_info.cell_state_scale_power = 0;  // no quantization
+  cell_state_info.quantized_cell_clip = 0;     // no quantization
+  return cell_state_info;
+}
+
+tflite::FullyConnectedParams CreateFCParamsFloat() {
+  FullyConnectedParams op_params;
+  CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min,
+                           &op_params.float_activation_max);
+  return op_params;
+}
+
+tflite::GateParameters CreateGateParamsFloat() {
+  tflite::GateParameters gate_params = {};
+  gate_params.input_fc_params = CreateFCParamsFloat();
+  gate_params.recurrent_fc_params = CreateFCParamsFloat();
+  return gate_params;
+}
+
+tflite::ArithmeticParams CreateInterGateMulParamsFloat() {
+  tflite::ArithmeticParams op_params = {};
+  CalculateActivationRange(kTfLiteActNone, &op_params.float_activation_min,
+                           &op_params.float_activation_max);
+  return op_params;
+}
+
+TfLiteStatus PrepareGateParametersFloat(TfLiteContext* context,
+                                        const LstmTensors& lstm_tensors,
+                                        OpDataLSTM* op_data) {
+  // Gate Parameters
+  op_data->forget_gate_parameters = CreateGateParamsFloat();
+  op_data->input_gate_parameters = CreateGateParamsFloat();
+  op_data->cell_gate_parameters = CreateGateParamsFloat();
+  op_data->output_gate_parameters = CreateGateParamsFloat();
+  // Inter gate multiplication parameters
+  op_data->inter_gate_parameters.forget_cell_mul_params =
+      CreateInterGateMulParamsFloat();
+  op_data->inter_gate_parameters.input_mul_params =
+      CreateInterGateMulParamsFloat();
+  op_data->inter_gate_parameters.output_mul_params =
+      CreateInterGateMulParamsFloat();
+  return kTfLiteOk;
+}
+
+TfLiteStatus PrepareGateParametersInteger(TfLiteContext* context,
+                                          const LstmTensors& lstm_tensors,
+                                          OpDataLSTM* op_data) {
+  float nonlinear_input_scale = 0.00024414062;  // 2^-12 Q3.12 -> Q0.15
+  TF_LITE_ENSURE_OK(
+      context,
+      CreateGateParams(
+          context, lstm_tensors.GetInternalTensor(kLstmInputTensor),
+          lstm_tensors.GetInternalTensor(kLstmInputToForgetWeightsTensor),
+          lstm_tensors.GetInternalTensor(kLstmForgetGateBiasTensor),
+          lstm_tensors.GetInternalTensor(kLstmOutputStateTensor),
+          lstm_tensors.GetInternalTensor(kLstmRecurrentToForgetWeightsTensor),
+          /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16,
+          op_data->forget_gate_parameters));
+  TF_LITE_ENSURE_OK(
+      context,
+      CreateGateParams(
+          context, lstm_tensors.GetInternalTensor(kLstmInputTensor),
+          lstm_tensors.GetInternalTensor(kLstmInputToInputWeightsTensor),
+          lstm_tensors.GetInternalTensor(kLstmInputGateBiasTensor),
+          lstm_tensors.GetInternalTensor(kLstmOutputStateTensor),
+          lstm_tensors.GetInternalTensor(kLstmRecurrentToInputWeightsTensor),
+          /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16,
+          op_data->input_gate_parameters));
+  TF_LITE_ENSURE_OK(
+      context,
+      CreateGateParams(
+          context, lstm_tensors.GetInternalTensor(kLstmInputTensor),
+          lstm_tensors.GetInternalTensor(kLstmInputToCellWeightsTensor),
+          lstm_tensors.GetInternalTensor(kLstmCellGateBiasTensor),
+          lstm_tensors.GetInternalTensor(kLstmOutputStateTensor),
+          lstm_tensors.GetInternalTensor(kLstmRecurrentToCellWeightsTensor),
+          /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16,
+          op_data->cell_gate_parameters));
+  TF_LITE_ENSURE_OK(
+      context,
+      CreateGateParams(
+          context, lstm_tensors.GetInternalTensor(kLstmInputTensor),
+          lstm_tensors.GetInternalTensor(kLstmInputToOutputWeightsTensor),
+          lstm_tensors.GetInternalTensor(kLstmOutputGateBiasTensor),
+          lstm_tensors.GetInternalTensor(kLstmOutputStateTensor),
+          lstm_tensors.GetInternalTensor(kLstmRecurrentToOutputWeightsTensor),
+          /*hidden_state_bias=*/nullptr, nonlinear_input_scale, kTfLiteInt16,
+          op_data->output_gate_parameters));
+
+  // Inter gate multiplication parameters
+  float nonlinear_output_scale = 0.00003051757;  // 2^-15 Q3.12 -> Q0.15
+  float cell_state_scale = lstm_tensors.CellStateTensor()->params.scale;
+  // forget gate output (nonlinear output) x cell state -> cell state
+  op_data->inter_gate_parameters.forget_cell_mul_params =
+      CreateInterGateMulParams(nonlinear_output_scale, cell_state_scale,
+                               cell_state_scale, kTfLiteInt16);
+  // input gate output x cell gate output -> cell state
+  op_data->inter_gate_parameters.input_mul_params =
+      CreateInterGateMulParams(nonlinear_output_scale, nonlinear_output_scale,
+                               cell_state_scale, kTfLiteInt16);
+  // tanh output x output gate output -> hidden state (potentially asymmetric)
+  op_data->inter_gate_parameters.output_mul_params = CreateInterGateMulParams(
+      nonlinear_output_scale, nonlinear_output_scale,
+      lstm_tensors.HiddenStateTensor()->params.scale,
+      lstm_tensors.HiddenStateTensor()->type,
+      lstm_tensors.HiddenStateTensor()->params.zero_point);
+  return kTfLiteOk;
+}
+
+LSTMKernelContents CreateLSTMKernelContent(TfLiteContext* context,
+                                           TfLiteNode* node) {
+  LSTMKernelContents kernel_content;
+  // Point to correct tensors
+  for (size_t i = 0; i < 24; i++) {
+    kernel_content.internal_tensors[i] =
+        tflite::micro::GetMutableEvalInput(context, node, i);
+  }
+  // Output tensor
+  kernel_content.output_tensor = tflite::micro::GetEvalOutput(context, node, 0);
+  return kernel_content;
+}
+
+template <typename CellType>
+LSTMBuffers<CellType> CreateLSTMBuffers(TfLiteContext* context,
+                                        const int* buffer_indices) {
+  LSTMBuffers<CellType> buffers;
+  buffers.buffer0 = reinterpret_cast<CellType*>(
+      context->GetScratchBuffer(context, buffer_indices[0]));
+  buffers.buffer1 = reinterpret_cast<CellType*>(
+      context->GetScratchBuffer(context, buffer_indices[1]));
+  buffers.buffer2 = reinterpret_cast<CellType*>(
+      context->GetScratchBuffer(context, buffer_indices[2]));
+  buffers.buffer3 = reinterpret_cast<CellType*>(
+      context->GetScratchBuffer(context, buffer_indices[3]));
+  return buffers;
+}
+
+/*Kernel functions*/
+
+void* UnidirectionalSequenceLstmInit(TfLiteContext* context, const char* buffer,
+                                     size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataLSTM));
+}
+
+TfLiteStatus UnidirectionalSequenceLstmPrepare(TfLiteContext* context,
+                                               TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
+  TF_LITE_ENSURE_EQ(context, node->inputs->size, 24);
+
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  TFLITE_DCHECK(node->user_data != nullptr);
+
+  OpDataLSTM* op_data = reinterpret_cast<OpDataLSTM*>(node->user_data);
+  const auto* builtin_data =
+      static_cast<TfLiteUnidirectionalSequenceLSTMParams*>(node->builtin_data);
+  // All TempTfLiteTensors will be deallocated through the destructor.
+  LstmTensors lstm_tensors(context, node);
+  TF_LITE_ENSURE_OK(context, lstm_tensors.ValidateTensorStatus(context));
+
+  op_data->cell_gate_nonlinear_type = builtin_data->activation;
+  op_data->size_info =
+      CreateLstmSizeInfo(builtin_data->time_major,
+                         lstm_tensors.GetInternalTensor(kLstmInputTensor)->dims,
+                         lstm_tensors.HiddenStateTensor()->dims);
+  TF_LITE_ENSURE_OK(
+      context, ValidateTensorSize(context, lstm_tensors, op_data->size_info));
+
+  // Create cell state information and gate parameters (Fully Connected and Mul)
+  auto cell_state_type =
+      lstm_tensors.GetInternalTensor(kLstmCellStateTensor)->type;
+  if (cell_state_type == kTfLiteFloat32) {
+    op_data->cell_state_info =
+        CreateLstmCellStateInfoFloat(builtin_data->cell_clip);
+    TF_LITE_ENSURE_OK(
+        context, PrepareGateParametersFloat(context, lstm_tensors, op_data));
+  } else if (cell_state_type == kTfLiteInt16) {
+    op_data->cell_state_info = CreateLstmCellStateInfo(
+        lstm_tensors.CellStateTensor()->params.scale, builtin_data->cell_clip);
+    TF_LITE_ENSURE_OK(
+        context, PrepareGateParametersInteger(context, lstm_tensors, op_data));
+  } else {
+    MicroPrintf(
+        "Cell state type %s (%d) not supported. The quantized Unidirectional "
+        "Sequence LSTM Op only support int16 cell state",
+        TfLiteTypeGetName(cell_state_type), cell_state_type);
+    return kTfLiteError;
+  }
+  // request buffers (four buffers)
+  for (size_t i = 0; i < 4; i++) {
+    TF_LITE_ENSURE_OK(context, context->RequestScratchBufferInArena(
+                                   context,
+                                   op_data->size_info.batch_size *
+                                       op_data->size_info.state_dimension *
+                                       TfLiteTypeGetSize(cell_state_type),
+                                   &(op_data->buffer_indices[i])));
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus UnidirectionalSequenceLstmEval(TfLiteContext* context,
+                                            TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpDataLSTM& op_data = *reinterpret_cast<OpDataLSTM*>(node->user_data);
+  auto kernel_content = CreateLSTMKernelContent(context, node);
+
+  const auto activation_type =
+      kernel_content.internal_tensors[kLstmInputTensor]->type;
+  const auto weight_type =
+      kernel_content.internal_tensors[kLstmInputToInputWeightsTensor]->type;
+
+  switch (activation_type) {
+    case kTfLiteFloat32: {
+      LSTMBuffers<float> buffers =
+          CreateLSTMBuffers<float>(context, op_data.buffer_indices);
+      EvalLstm<float, float, float, float>(op_data, kernel_content, buffers);
+      break;
+    }
+    case kTfLiteInt8: {
+      switch (weight_type) {
+        case kTfLiteInt8: {
+          // 8(activation)x8(weight)->16(cell) LSTM with 32 bits bias
+          LSTMBuffers<int16_t> buffers =
+              CreateLSTMBuffers<int16_t>(context, op_data.buffer_indices);
+          EvalLstm<int8_t, int8_t, int16_t, int32_t>(op_data, kernel_content,
+                                                     buffers);
+          break;
+        }
+        default: {
+          MicroPrintf("Filter type %s (%d) not supported.",
+                      TfLiteTypeGetName(weight_type), activation_type);
+          return kTfLiteError;
+        }
+      }
+      break;
+    }
+    case kTfLiteInt16: {
+      switch (weight_type) {
+        case kTfLiteInt8: {
+          // 16(activation)x8(weight)->16(cell) LSTM with 64 bits bias
+          LSTMBuffers<int16_t> buffers =
+              CreateLSTMBuffers<int16_t>(context, op_data.buffer_indices);
+          EvalLstm<int16_t, int8_t, int16_t, int64_t>(op_data, kernel_content,
+                                                      buffers);
+          break;
+        }
+        default: {
+          MicroPrintf("Filter type %s (%d) not supported.",
+                      TfLiteTypeGetName(weight_type), weight_type);
+          return kTfLiteError;
+        }
+      }
+      break;
+    }
+    default: {
+      MicroPrintf("Input type %s (%d) not supported.",
+                  TfLiteTypeGetName(activation_type), activation_type);
+      return kTfLiteError;
+    }
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteRegistration Register_UNIDIRECTIONAL_SEQUENCE_LSTM() {
+  return tflite::micro::RegisterOp(UnidirectionalSequenceLstmInit,
+                                   UnidirectionalSequenceLstmPrepare,
+                                   UnidirectionalSequenceLstmEval);
+}
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/unpack.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/unpack.cc
index 26da0ce..c0d3d8b 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/unpack.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/unpack.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace ops {
@@ -87,15 +88,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     case kTfLiteInt32: {
       return UnpackImpl<int32_t>(context, node, input, data->num, data->axis);
     }
-    case kTfLiteUInt8: {
-      return UnpackImpl<uint8_t>(context, node, input, data->num, data->axis);
-    }
     case kTfLiteInt8: {
       return UnpackImpl<int8_t>(context, node, input, data->num, data->axis);
     }
     default: {
-      TF_LITE_KERNEL_LOG(context, "Type '%s' is not supported by unpack.",
-                         TfLiteTypeGetName(input->type));
+      MicroPrintf("Type '%s' is not supported by unpack.",
+                  TfLiteTypeGetName(input->type));
       return kTfLiteError;
     }
   }
@@ -106,14 +104,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace unpack
 
 TfLiteRegistration Register_UNPACK() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/nullptr,
-          /*invoke=*/unpack::Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, nullptr, unpack::Eval);
 }
 
 }  // namespace micro
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/var_handle.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/var_handle.cc
new file mode 100644
index 0000000..2329f2c
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/var_handle.cc
@@ -0,0 +1,93 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <stddef.h>
+
+#include <cstring>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+namespace {
+
+struct OpData {
+  int32_t resource_id;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+  const auto* params =
+      reinterpret_cast<const TfLiteVarHandleParams*>(node->builtin_data);
+
+  tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
+  MicroGraph& graph_info = micro_context->graph();
+
+  MicroResourceVariables* resources = graph_info.GetResourceVariables();
+  if (resources == nullptr) {
+    MicroPrintf(
+        "VAR_HANDLE requires resource variables. Please create "
+        "ResourceVariables and pass it to the interpreter.");
+    return kTfLiteError;
+  }
+  op_data->resource_id =
+      resources->CreateIdIfNoneFound(params->container, params->shared_name);
+  if (op_data->resource_id < 0) {
+    return kTfLiteError;
+  }
+
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+  TFLITE_DCHECK(output != nullptr);
+
+  // Assign saved resource_id so this output tensor will always return the
+  // correct resource id.
+  output->data.i32 = &op_data->resource_id;
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+  TFLITE_DCHECK(output != nullptr);
+
+  // Assign saved resource_id so this output tensor will always return the
+  // correct resource id.
+  output->data.i32 = &op_data->resource_id;
+  return kTfLiteOk;
+}
+
+}  // namespace.
+
+TfLiteRegistration Register_VAR_HANDLE() {
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/while.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/while.cc
new file mode 100644
index 0000000..ba18ba6
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/while.cc
@@ -0,0 +1,133 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <stddef.h>
+
+#include <cstring>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/builtin_op_data.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+
+namespace {
+
+struct OpData {
+  int cond_subgraph_index;
+  int body_subgraph_index;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+  const auto* params =
+      reinterpret_cast<const TfLiteWhileParams*>(node->builtin_data);
+
+  op_data->cond_subgraph_index = params->cond_subgraph_index;
+  op_data->body_subgraph_index = params->body_subgraph_index;
+
+  // The first input is the condition.
+  tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
+
+  size_t num_inputs = node->inputs->size;
+  size_t num_outputs = node->outputs->size;
+
+  MicroGraph& graph_info = micro_context->graph();
+
+  TF_LITE_ENSURE(context,
+                 op_data->cond_subgraph_index < graph_info.NumSubgraphs());
+  TF_LITE_ENSURE(context,
+                 op_data->body_subgraph_index < graph_info.NumSubgraphs());
+
+  TF_LITE_ENSURE_EQ(context, num_inputs,
+                    graph_info.NumSubgraphInputs(op_data->cond_subgraph_index));
+  TF_LITE_ENSURE_EQ(context, num_inputs,
+                    graph_info.NumSubgraphInputs(op_data->body_subgraph_index));
+  TF_LITE_ENSURE_EQ(context, num_inputs, num_outputs);
+  TF_LITE_ENSURE_EQ(
+      context, num_outputs,
+      graph_info.NumSubgraphOutputs(op_data->body_subgraph_index));
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+
+  tflite::MicroContext* micro_context = tflite::GetMicroContext(context);
+  MicroGraph* graph_info = &micro_context->graph();
+
+  TF_LITE_ENSURE_OK(context,
+                    tflite::micro::CopyOpInputsToSubgraphInputs(
+                        context, node, graph_info, op_data->cond_subgraph_index,
+                        /*first_tensor_idx=*/0));
+
+  TF_LITE_ENSURE_OK(context,
+                    graph_info->InvokeSubgraph(op_data->cond_subgraph_index));
+
+  TfLiteEvalTensor* cond_subgraph_output = graph_info->GetSubgraphOutput(
+      op_data->cond_subgraph_index, /*tensor_idx=*/0);
+  bool cond_value = cond_subgraph_output->data.b[0];
+
+  TF_LITE_ENSURE_OK(context,
+                    tflite::micro::CopyOpInputsToSubgraphInputs(
+                        context, node, graph_info, op_data->body_subgraph_index,
+                        /*first_tensor_idx=*/0));
+  TF_LITE_ENSURE_OK(context,
+                    tflite::micro::CopyOpInputsToOpOutputs(context, node));
+
+  while (cond_value == true) {
+    // Copy output of this iteration back to the body input.
+    TF_LITE_ENSURE_OK(
+        context, tflite::micro::CopyOpOutputsToSubgraphInputs(
+                     context, node, graph_info, op_data->body_subgraph_index));
+    TF_LITE_ENSURE_OK(context,
+                      graph_info->InvokeSubgraph(op_data->body_subgraph_index));
+
+    TF_LITE_ENSURE_OK(
+        context, tflite::micro::CopySubgraphOutputsToOpOutputs(
+                     context, node, graph_info, op_data->body_subgraph_index));
+    TF_LITE_ENSURE_OK(
+        context, tflite::micro::CopyOpOutputsToSubgraphInputs(
+                     context, node, graph_info, op_data->cond_subgraph_index));
+    TF_LITE_ENSURE_OK(context,
+                      graph_info->InvokeSubgraph(op_data->cond_subgraph_index));
+
+    cond_subgraph_output = graph_info->GetSubgraphOutput(
+        op_data->cond_subgraph_index, /*tensor_idx=*/0);
+    cond_value = cond_subgraph_output->data.b[0];
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace.
+
+TfLiteRegistration Register_WHILE() {
+  return tflite::micro::RegisterOp(Init, Prepare, Eval);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/kernels/zeros_like.cc b/edge-impulse-sdk/tensorflow/lite/micro/kernels/zeros_like.cc
index 73b9508..c868341 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/kernels/zeros_like.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/kernels/zeros_like.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 namespace {
@@ -25,15 +26,20 @@ constexpr int kInputTensor = 0;
 constexpr int kOutputTensor = 0;
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-  const TfLiteTensor* input;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kOutputTensor, &output));
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
   output->type = input->type;
 
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(output);
   return kTfLiteOk;
 }
 
@@ -65,10 +71,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       resetZeros(tflite::micro::GetTensorData<float>(output), flat_size);
       break;
     default:
-      TF_LITE_KERNEL_LOG(context,
-                         "ZerosLike only currently supports int64, int32, "
-                         "and float32, got %d.",
-                         input->type);
+      MicroPrintf(
+          "ZerosLike only currently supports int64, int32, "
+          "and float32, got %d.",
+          input->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
@@ -76,14 +82,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace
 
 TfLiteRegistration Register_ZEROS_LIKE() {
-  return {/*init=*/nullptr,
-          /*free=*/nullptr,
-          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
-          /*profiling_string=*/nullptr,
-          /*builtin_code=*/0,
-          /*custom_name=*/nullptr,
-          /*version=*/0};
+  return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.cc b/edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.cc
index d767e89..486b68e 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.cc
@@ -20,9 +20,8 @@ limitations under the License.
 
 #include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"  // from @flatbuffers
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
 
 namespace tflite {
@@ -81,12 +80,18 @@ TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size) {
     case kTfLiteBool:
       *size = sizeof(bool);
       break;
+    case kTfLiteResource:
+      *size = sizeof(int32_t);
+      break;
     case kTfLiteComplex64:
       *size = sizeof(float) * 2;
       break;
     case kTfLiteComplex128:
       *size = sizeof(double) * 2;
       break;
+    case kTfLiteInt4:
+      *size = sizeof(int8_t);
+      break;
     default:
       return kTfLiteError;
   }
@@ -94,8 +99,7 @@ TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size) {
 }
 
 TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
-                                    size_t* bytes, size_t* type_size,
-                                    ErrorReporter* error_reporter) {
+                                    size_t* bytes, size_t* type_size) {
   int element_count = 1;
   // If flatbuffer_tensor.shape == nullptr, then flatbuffer_tensor is a scalar
   // so has 1 element.
@@ -106,8 +110,8 @@ TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
   }
 
   TfLiteType tf_lite_type;
-  TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
-                                          &tf_lite_type, error_reporter));
+  TF_LITE_ENSURE_STATUS(
+      ConvertTensorType(flatbuffer_tensor.type(), &tf_lite_type));
   TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(tf_lite_type, type_size));
   *bytes = element_count * (*type_size);
   return kTfLiteOk;
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h b/edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h
index cd3c697..2ceb2bc 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h
@@ -19,8 +19,8 @@ limitations under the License.
 #include <cstdint>
 
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h"
 
 namespace tflite {
 
@@ -33,13 +33,19 @@ uint8_t* AlignPointerDown(uint8_t* data, size_t alignment);
 // Returns an increased size that's a multiple of alignment.
 size_t AlignSizeUp(size_t size, size_t alignment);
 
+// Templated version of AlignSizeUp
+// Returns an increased size that's a multiple of alignment.
+template <typename T>
+size_t AlignSizeUp(size_t count = 1) {
+  return AlignSizeUp(sizeof(T) * count, alignof(T));
+}
+
 // Returns size in bytes for a given TfLiteType.
 TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size);
 
 // How many bytes are needed to hold a tensor's contents.
 TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor,
-                                    size_t* bytes, size_t* type_size,
-                                    ErrorReporter* error_reporter);
+                                    size_t* bytes, size_t* type_size);
 
 // How many bytes are used in a TfLiteEvalTensor instance. The byte length is
 // returned in out_bytes.
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc
index e623ac5..ff98fc2 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.cc
@@ -15,8 +15,28 @@ limitations under the License.
 
 #include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
 
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_string.h"
+
 namespace tflite {
 
+namespace {
+
+// Returns a character representing a numbered buffer
+// for GreedyMemoryPlanner::PrintMemoryPlan()
+char GetOrdinalCharacter(int i) {
+  if (i < 10) {
+    return '0' + i;
+  } else if (i < 36) {
+    return 'a' + (i - 10);
+  } else if (i < 62) {
+    return 'A' + (i - 36);
+  }
+  return '*';
+}
+
+}  // namespace
+
 // Simple stable in-place sort function. Not time-efficient for large arrays.
 // Would normally be in an anonymous namespace to keep it private, but we want
 // to be able to test it externally.
@@ -38,9 +58,14 @@ void ReverseSortInPlace(int* values, int* ids, int size) {
   } while (any_swapped);
 }
 
-GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
-                                         int scratch_buffer_size)
-    : buffer_count_(0), need_to_calculate_offsets_(true) {
+GreedyMemoryPlanner::GreedyMemoryPlanner() {}
+
+TfLiteStatus GreedyMemoryPlanner::Init(unsigned char* scratch_buffer,
+                                       int scratch_buffer_size) {
+  // Reset internal states
+  buffer_count_ = 0;
+  need_to_calculate_offsets_ = true;
+
   // Allocate the arrays we need within the scratch buffer arena.
   max_buffer_count_ = scratch_buffer_size / per_buffer_size();
 
@@ -58,18 +83,17 @@ GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
   next_free += sizeof(ListEntry) * max_buffer_count_;
 
   buffer_offsets_ = reinterpret_cast<int*>(next_free);
+  return kTfLiteOk;
 }
 
 GreedyMemoryPlanner::~GreedyMemoryPlanner() {
   // We don't own the scratch buffer, so don't deallocate anything.
 }
 
-TfLiteStatus GreedyMemoryPlanner::AddBuffer(
-    tflite::ErrorReporter* error_reporter, int size, int first_time_used,
-    int last_time_used) {
+TfLiteStatus GreedyMemoryPlanner::AddBuffer(int size, int first_time_used,
+                                            int last_time_used) {
   if (buffer_count_ >= max_buffer_count_) {
-    TF_LITE_REPORT_ERROR(error_reporter, "Too many buffers (max is %d)",
-                         max_buffer_count_);
+    MicroPrintf("Too many buffers (max is %d)", max_buffer_count_);
     return kTfLiteError;
   }
   BufferRequirements* current = &requirements_[buffer_count_];
@@ -82,12 +106,11 @@ TfLiteStatus GreedyMemoryPlanner::AddBuffer(
   return kTfLiteOk;
 }
 
-TfLiteStatus GreedyMemoryPlanner::AddBuffer(
-    tflite::ErrorReporter* error_reporter, int size, int first_time_used,
-    int last_time_used, int offline_offset) {
+TfLiteStatus GreedyMemoryPlanner::AddBuffer(int size, int first_time_used,
+                                            int last_time_used,
+                                            int offline_offset) {
   BufferRequirements* current = &requirements_[buffer_count_];
-  if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
-      kTfLiteOk) {
+  if (AddBuffer(size, first_time_used, last_time_used) != kTfLiteOk) {
     return kTfLiteError;
   }
   current->offline_offset = offline_offset;
@@ -297,8 +320,6 @@ size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
   while (entry) {
     BufferRequirements* requirements =
         &requirements_[entry->requirements_index];
-    // TODO(b/148246793): Update all size and offset variables types from
-    //                    int to size_t
     const size_t current_size = entry->offset + requirements->size;
     if (current_size > max_size) {
       max_size = current_size;
@@ -311,17 +332,14 @@ size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
   return max_size;
 }
 
-void GreedyMemoryPlanner::PrintMemoryPlan(ErrorReporter* error_reporter) {
+void GreedyMemoryPlanner::PrintMemoryPlan() {
   CalculateOffsetsIfNeeded();
 
   for (int i = 0; i < buffer_count_; ++i) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter,
-        "Planner buffer ID: %d, calculated offset: %d, size required: %d, "
-        "first_time_created: %d, "
-        "last_time_used: %d",
-        i, buffer_offsets_[i], requirements_[i].size,
-        requirements_[i].first_time_used, requirements_[i].last_time_used);
+    MicroPrintf("%c (id=%d): size=%d, offset=%d, first_used=%d last_used=%d",
+                GetOrdinalCharacter(i), i, requirements_[i].size,
+                buffer_offsets_[i], requirements_[i].first_time_used,
+                requirements_[i].last_time_used);
   }
 
   constexpr int kLineWidth = 80;
@@ -345,6 +363,7 @@ void GreedyMemoryPlanner::PrintMemoryPlan(ErrorReporter* error_reporter) {
     for (int c = 0; c < kLineWidth; ++c) {
       line[c] = '.';
     }
+    int memory_use = 0;
     for (int i = 0; i < buffer_count_; ++i) {
       BufferRequirements* requirements = &requirements_[i];
       if ((t < requirements->first_time_used) ||
@@ -356,47 +375,39 @@ void GreedyMemoryPlanner::PrintMemoryPlan(ErrorReporter* error_reporter) {
         continue;
       }
       const int size = requirements->size;
+      memory_use += size;
       const int line_start = (offset * kLineWidth) / max_size;
       const int line_end = ((offset + size) * kLineWidth) / max_size;
       for (int n = line_start; n < line_end; ++n) {
         if (line[n] == '.') {
-          char display;
-          if (i < 10) {
-            display = '0' + i;
-          } else if (i < 36) {
-            display = 'a' + (i - 10);
-          } else if (i < 62) {
-            display = 'A' + (i - 36);
-          } else {
-            display = '*';
-          }
-          line[n] = display;
+          line[n] = GetOrdinalCharacter(i);
         } else {
           line[n] = '!';
         }
       }
     }
     line[kLineWidth] = 0;
-    TF_LITE_REPORT_ERROR(error_reporter, "%s", (const char*)line);
+
+    MicroPrintf("%s%d: %s (%dk)", t < 10 ? " " : "", t, (const char*)line,
+                (memory_use + 1023) / 1024);
   }
 }
 
 int GreedyMemoryPlanner::GetBufferCount() { return buffer_count_; }
 
-TfLiteStatus GreedyMemoryPlanner::GetOffsetForBuffer(
-    tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) {
+TfLiteStatus GreedyMemoryPlanner::GetOffsetForBuffer(int buffer_index,
+                                                     int* offset) {
   CalculateOffsetsIfNeeded();
   if ((buffer_index < 0) || (buffer_index >= buffer_count_)) {
-    TF_LITE_REPORT_ERROR(error_reporter,
-                         "buffer index %d is outside range 0 to %d",
-                         buffer_index, buffer_count_);
+    MicroPrintf("buffer index %d is outside range 0 to %d", buffer_index,
+                buffer_count_);
     return kTfLiteError;
   }
   *offset = buffer_offsets_[buffer_index];
   return kTfLiteOk;
 }
 
-bool GreedyMemoryPlanner::DoAnyBuffersOverlap(ErrorReporter* error_reporter) {
+bool GreedyMemoryPlanner::DoAnyBuffersOverlap() {
   CalculateOffsetsIfNeeded();
   bool were_overlaps_found = false;
   for (int i = 0; i < buffer_count_; ++i) {
@@ -425,10 +436,10 @@ bool GreedyMemoryPlanner::DoAnyBuffersOverlap(ErrorReporter* error_reporter) {
         continue;
       }
       were_overlaps_found = true;
-      TF_LITE_REPORT_ERROR(
-          error_reporter, "Overlap: %d (%d=>%d, %d->%d) vs %d (%d=>%d, %d->%d)",
-          i, a_first_time_used, a_last_time_used, a_start_offset, a_end_offset,
-          j, b_first_time_used, b_last_time_used, b_start_offset, b_end_offset);
+      MicroPrintf("Overlap: %d (%d=>%d, %d->%d) vs %d (%d=>%d, %d->%d)", i,
+                  a_first_time_used, a_last_time_used, a_start_offset,
+                  a_end_offset, j, b_first_time_used, b_last_time_used,
+                  b_start_offset, b_end_offset);
     }
   }
   return were_overlaps_found;
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h
index 42775ba..d77a595 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h
@@ -17,7 +17,7 @@ limitations under the License.
 #define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_GREEDY_MEMORY_PLANNER_H_
 
 #include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/memory_planner.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h"
 
 namespace tflite {
 
@@ -43,29 +43,32 @@ constexpr int kOnlinePlannedBuffer = -1;
 //
 // This is not guaranteed to produce the best placement, since that's an
 // NP-Complete problem, but in practice it should produce one that's decent.
-class GreedyMemoryPlanner : public MemoryPlanner {
+class GreedyMemoryPlanner : public MicroMemoryPlanner {
  public:
-  // You need to pass in an area of memory to be used for planning. This memory
-  // needs to have a lifetime as long as the planner, but isn't owned by this
-  // object, so management should be handled by the client. This is so it can be
-  // stack or globally allocated if necessary on devices without dynamic memory
-  // allocation. How many buffers can be planned for will depend on the size of
-  // this scratch memory, so you should enlarge it if you see an error when
-  // calling AddBuffer(). The memory can be reused once you're done with the
-  // planner, as long as you copy the calculated offsets to another location.
-  // Each buffer requires about 36 bytes of scratch.
-  GreedyMemoryPlanner(unsigned char* scratch_buffer, int scratch_buffer_size);
+  GreedyMemoryPlanner();
   ~GreedyMemoryPlanner() override;
 
+  // You need to pass in an area of memory to be used for planning. The client
+  // should ensure the validity of the memory when it needs to use this object.
+  // This memory isn't owned by this object, so management should be handled by
+  // the client. This is so it can be stack or globally allocated if necessary
+  // on devices without dynamic memory allocation. How many buffers can be
+  // planned for will depend on the size of this scratch memory, so you should
+  // enlarge it if you see an error when calling AddBuffer(). The memory can be
+  // reused once you're done with the planner, as long as you copy the
+  // calculated offsets to another location. Each buffer requires about 36 bytes
+  // of scratch.
+  TfLiteStatus Init(unsigned char* scratch_buffer,
+                    int scratch_buffer_size) override;
+
   // Record details of a buffer we want to place.
-  TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
-                         int first_time_used, int last_time_used) override;
+  TfLiteStatus AddBuffer(int size, int first_time_used,
+                         int last_time_used) override;
 
   // Record details of an offline planned buffer offset we want to place.
   // offline_offset is the buffer offset from the start of the arena.
-  TfLiteStatus AddBuffer(ErrorReporter* error_reporter, int size,
-                         int first_time_used, int last_time_used,
-                         int offline_offset);
+  TfLiteStatus AddBuffer(int size, int first_time_used, int last_time_used,
+                         int offline_offset) override;
 
   // Returns the high-water mark of used memory. This is the minimum size of a
   // memory arena you'd need to allocate to hold these buffers.
@@ -77,15 +80,14 @@ class GreedyMemoryPlanner : public MemoryPlanner {
   // Where a given buffer should be placed in the memory arena.
   // This information is stored in the memory arena itself, so once the arena
   // is used for inference, it will be overwritten.
-  TfLiteStatus GetOffsetForBuffer(ErrorReporter* error_reporter,
-                                  int buffer_index, int* offset) override;
+  TfLiteStatus GetOffsetForBuffer(int buffer_index, int* offset) override;
 
   // Prints an ascii-art diagram of the buffer layout plan.
-  void PrintMemoryPlan(ErrorReporter* error_reporter);
+  void PrintMemoryPlan() override;
 
   // Debug method to check whether any buffer allocations are overlapping. This
   // is an O(N^2) complexity operation, so only use for testing.
-  bool DoAnyBuffersOverlap(ErrorReporter* error_reporter);
+  bool DoAnyBuffersOverlap();
 
   // Used to store a list of buffers ordered by their offset.
   struct ListEntry {
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.cc b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.cc
index 8a4e514..6e21eb6 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.cc
@@ -15,18 +15,21 @@ limitations under the License.
 
 #include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.h"
 
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
 namespace tflite {
 
+// Patched by Edge Impulse
+constexpr int LinearMemoryPlanner::kMaxBufferCount;
+
 LinearMemoryPlanner::LinearMemoryPlanner()
     : current_buffer_count_(0), next_free_offset_(0) {}
 LinearMemoryPlanner::~LinearMemoryPlanner() {}
 
-TfLiteStatus LinearMemoryPlanner::AddBuffer(
-    tflite::ErrorReporter* error_reporter, int size, int first_time_used,
-    int last_time_used) {
+TfLiteStatus LinearMemoryPlanner::AddBuffer(int size, int first_time_used,
+                                            int last_time_used) {
   if (current_buffer_count_ >= kMaxBufferCount) {
-    TF_LITE_REPORT_ERROR(error_reporter, "Too many buffers (max is %d)",
-                         kMaxBufferCount);
+    MicroPrintf("Too many buffers (max is %d)", kMaxBufferCount);
     return kTfLiteError;
   }
   buffer_offsets_[current_buffer_count_] = next_free_offset_;
@@ -39,12 +42,11 @@ size_t LinearMemoryPlanner::GetMaximumMemorySize() { return next_free_offset_; }
 
 int LinearMemoryPlanner::GetBufferCount() { return current_buffer_count_; }
 
-TfLiteStatus LinearMemoryPlanner::GetOffsetForBuffer(
-    tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) {
+TfLiteStatus LinearMemoryPlanner::GetOffsetForBuffer(int buffer_index,
+                                                     int* offset) {
   if ((buffer_index < 0) || (buffer_index >= current_buffer_count_)) {
-    TF_LITE_REPORT_ERROR(error_reporter,
-                         "buffer index %d is outside range 0 to %d",
-                         buffer_index, current_buffer_count_);
+    MicroPrintf("buffer index %d is outside range 0 to %d", buffer_index,
+                current_buffer_count_);
     return kTfLiteError;
   }
   *offset = buffer_offsets_[buffer_index];
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.h b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.h
index d2712f9..f699f8b 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/linear_memory_planner.h
@@ -17,24 +17,23 @@ limitations under the License.
 #define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_LINEAR_MEMORY_PLANNER_H_
 
 #include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/memory_planner.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h"
 
 namespace tflite {
 
 // The simplest possible memory planner that just lays out all buffers at
 // increasing offsets without trying to reuse memory.
-class LinearMemoryPlanner : public MemoryPlanner {
+class LinearMemoryPlanner : public MicroMemoryPlanner {
  public:
   LinearMemoryPlanner();
   ~LinearMemoryPlanner() override;
 
-  TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter, int size,
-                         int first_time_used, int last_time_used) override;
+  TfLiteStatus AddBuffer(int size, int first_time_used,
+                         int last_time_used) override;
 
   size_t GetMaximumMemorySize() override;
   int GetBufferCount() override;
-  TfLiteStatus GetOffsetForBuffer(tflite::ErrorReporter* error_reporter,
-                                  int buffer_index, int* offset) override;
+  TfLiteStatus GetOffsetForBuffer(int buffer_index, int* offset) override;
 
  private:
   static constexpr int kMaxBufferCount = 1024;
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/memory_plan_struct.h b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/memory_plan_struct.h
new file mode 100644
index 0000000..5f3b7ef
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/memory_plan_struct.h
@@ -0,0 +1,73 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLAN_STRUCT_H_
+#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLAN_STRUCT_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+
+// This is an experimental feature and subjected to change.
+// More description is available at
+// tensorflow/lite/micro/docs/offline_memory_plan.md.
+
+// Describes a buffer's layout inside an arena. This struct should be kept as
+// small as possible for memory footprint sensitive applications and should use
+// only primitive fields, making it easy to adjust offline.
+struct BufferDescriptor {
+  // Starting offset inside an arena for this buffer.
+  // Offset is the minimum information needed for the buffer.  The user knows
+  // the model and the size of each buffer in order to lay out a valid buffer
+  // plan.
+  int32_t offset;
+};
+
+// A structure describing the lay out of buffers inside an arena.
+struct BufferPlan {
+  // Number of buffers described in this plan.
+  int32_t buffer_count;
+
+  // Each element describes one buffer.
+  // Buffer index is implicit by the order of AddBuffer() call.
+  // Specifically, indices of activation tensors are 0 … N-1 where N is the
+  // number of activation tensors.
+  // The rest are based on the order of OP requests.
+  //
+  // This is a flexible array member and should ideally be
+  // arena_entries[]; However, in order to support a variety
+  // of compilers (and without needing to add ifdef's), we
+  // are implementing the flexible array member with an array of
+  // length 1 as the last member of the struct. When the size of a BufferPlan
+  // is needed, use the provided SizeOfBufferPlan(buffer_count) that
+  // accounts for this implemenatation caveat.
+  BufferDescriptor buffer_plan_entries[1];
+};
+
+// Returns size of a BufferPlan given a buffer count. This size is compile time
+// known if buffer_count is a compile time constant.
+constexpr size_t SizeOfBufferPlan(int32_t buffer_count) {
+  // Minus 1 because a BufferPlan struct have a BufferDescriptor already.
+  // Max to provide a lower bound for the corner case of buffer_count = 0.
+  return sizeof(BufferPlan) +
+         sizeof(BufferDescriptor) * Max(buffer_count - 1, 0);
+}
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLAN_STRUCT_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/memory_planner.h b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h
similarity index 55%
rename from edge-impulse-sdk/tensorflow/lite/micro/memory_planner/memory_planner.h
rename to edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h
index c79060f..0d0d74f 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/memory_planner.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h
@@ -13,11 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
-#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
+#ifndef TENSORFLOW_LITE_MICRO_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
+#define TENSORFLOW_LITE_MICRO_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
 
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
 
 namespace tflite {
 
@@ -28,44 +27,65 @@ namespace tflite {
 // information about the calculated layout. For example:
 //
 // SomeMemoryPlanner planner;
-// planner.AddBuffer(reporter, 100, 0, 1);  // Buffer 0
-// planner.AddBuffer(reporter, 50, 2, 3);   // Buffer 1
-// planner.AddBuffer(reporter, 50, 2, 3);   // Buffer 2
+// planner.AddBuffer(100, 0, 1);  // Buffer 0
+// planner.AddBuffer(50, 2, 3);   // Buffer 1
+// planner.AddBuffer(50, 2, 3);   // Buffer 2
 //
 // int offset0;
-// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 0, &offset0));
+// TF_EXPECT_OK(planner.GetOffsetForBuffer(0, &offset0));
 // int offset1;
-// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 1, &offset1));
+// TF_EXPECT_OK(planner.GetOffsetForBuffer(1, &offset1));
 // int offset2;
-// TF_EXPECT_OK(planner.GetOffsetForBuffer(reporter, 2, &offset2));
+// TF_EXPECT_OK(planner.GetOffsetForBuffer(2, &offset2));
 // const int arena_size_needed = planner.GetMaximumMemorySize();
 //
 // The goal is for applications to be able to experiment with different layout
 // strategies without changing their client code, by swapping out classes that
 // implement this interface.=
-class MemoryPlanner {
+class MicroMemoryPlanner {
  public:
-  MemoryPlanner() {}
-  virtual ~MemoryPlanner() {}
+  MicroMemoryPlanner() {}
+  virtual ~MicroMemoryPlanner() {}
 
   // Pass information about a buffer's size and lifetime to the layout
   // algorithm. The order this is called implicitly assigns an index to the
   // result, so the buffer information that's passed into the N-th call of
   // this method will be used as the buffer_index argument to
   // GetOffsetForBuffer().
-  virtual TfLiteStatus AddBuffer(tflite::ErrorReporter* error_reporter,
-                                 int size, int first_time_used,
+  virtual TfLiteStatus AddBuffer(int size, int first_time_used,
                                  int last_time_used) = 0;
 
+  // Record details of an offline planned buffer offset we want to place.
+  // offline_offset is the buffer offset from the start of the arena.
+  // This is to support offline memory planning from the flatbuffer metadata.
+  // By default, it returns an error.
+  virtual TfLiteStatus AddBuffer(int size, int first_time_used,
+                                 int last_time_used, int offline_offset) {
+    return kTfLiteError;
+  }
+
   // The largest contiguous block of memory that's needed to hold the layout.
   virtual size_t GetMaximumMemorySize() = 0;
   // How many buffers have been added to the planner.
   virtual int GetBufferCount() = 0;
   // Calculated layout offset for the N-th buffer added to the planner.
-  virtual TfLiteStatus GetOffsetForBuffer(tflite::ErrorReporter* error_reporter,
-                                          int buffer_index, int* offset) = 0;
+  virtual TfLiteStatus GetOffsetForBuffer(int buffer_index, int* offset) = 0;
+
+  // Provides the scratch buffer in case that the memory planner needs it.
+  // The lifetime of scratch buffers lifetime lasts until the static memory plan
+  // is committed.
+  // The default implementation is for the memory planner that does not need
+  // scratch buffer and simply returns ok.
+  virtual TfLiteStatus Init(unsigned char* scratch_buffer,
+                            int scratch_buffer_size) {
+    return kTfLiteOk;
+  }
+
+  virtual void PrintMemoryPlan() {
+    // Default does nothing.
+  }
 };
 
 }  // namespace tflite
 
-#endif  // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
+#endif  // TENSORFLOW_LITE_MICRO_MICRO_MEMORY_PLANNER_MEMORY_PLANNER_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.cc b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.cc
new file mode 100644
index 0000000..0c1fd6d
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.cc
@@ -0,0 +1,66 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h"
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+
+NonPersistentMemoryPlannerShim::NonPersistentMemoryPlannerShim(
+    const BufferPlan* buffer_plan)
+    : buffer_plan_(buffer_plan), buffer_request_count_(0) {}
+
+NonPersistentMemoryPlannerShim::~NonPersistentMemoryPlannerShim() {}
+
+TfLiteStatus NonPersistentMemoryPlannerShim::AddBuffer(int size,
+                                                       int first_time_used,
+                                                       int last_time_used) {
+  buffer_request_count_++;
+  if (buffer_request_count_ > buffer_plan_->buffer_count) {
+    MicroPrintf(
+        "Attempting to add buffer %d, but only %d buffers in given buffer "
+        "plan.",
+        buffer_request_count_, buffer_plan_->buffer_count);
+    return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+size_t NonPersistentMemoryPlannerShim::GetMaximumMemorySize() {
+  // Simply return 0 to let the framework accept this memory plan
+  // because the client ensure validity of the memory plan.
+  return 0;
+}
+
+// How many buffers are in the given memory plan.
+int NonPersistentMemoryPlannerShim::GetBufferCount() {
+  return buffer_plan_->buffer_count;
+}
+
+TfLiteStatus NonPersistentMemoryPlannerShim::GetOffsetForBuffer(
+    int buffer_request_index, int* offset) {
+  if (buffer_request_index >= buffer_plan_->buffer_count) {
+    MicroPrintf(
+        "Attempting to get offset for buffer %d, but only %d buffers in given "
+        "buffer plan.",
+        buffer_request_index, buffer_plan_->buffer_count);
+    return kTfLiteError;
+  }
+  *offset = buffer_plan_->buffer_plan_entries[buffer_request_index].offset;
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h
new file mode 100644
index 0000000..291c678
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h
@@ -0,0 +1,129 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_NON_PERSISTENT_MEMORY_PLANNER_SHIM_H__
+#define TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_NON_PERSISTENT_MEMORY_PLANNER_SHIM_H__
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/memory_plan_struct.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h"
+
+namespace tflite {
+
+/*   This is an experimental feature and subjected to change.
+ *
+The NonPersistentMemoryPlannerShim enables TFLM to work with an external tooling
+that can plan the offset of each non persistent buffer for the Model within the
+TFLM arena.
+
+If the NonPersistentMemoryPlannerShim is used, then the final binary does not
+have any of the symbols associated with the GreedyMemoryPlanner which results in
+a reduced memory footprint.
+
+Additionally, the offline planning of the non-persistent buffers can be used to
+have a more efficient utilization compared to the GreedyMemoryPlanner.
+
+For example, consider the following hypothetical model:
+
+A1(400)                    A2(401)
+──┬─────────┐    ┌───────────
+  │         │    │
+  │         │    │
+  │         ▼    ▼
+  │       ┌────────┐
+  │       │  OP1   │
+  │       └───┬────┘       A4(201)
+  │   A3(10)  │              │
+  │           │              │
+  │           │              │
+  │       ┌───┴────┐         │
+  │       │  OP2   │◄────────┤
+  │       └───┬────┘         │
+  │   A5(11)  │      A6(202) │
+  │           │       │      │
+  │           ▼       │      │
+  │       ┌────────┐  │      │
+  │       │  OP3   │◄─┘      │
+  │       └───┬────┘         │
+  │           │      A8(200) │
+  │   A7(12)  │        │     │
+  │           │        │     │
+  │       ┌───┴────┐◄──┘     │
+  └──────►│  OP4   │         │
+          └───┬────┘◄────────┘
+              │
+      A9(13)  │
+              ▼
+
+The GreedyMemoryPlanner will give the following memory layout that requires 1012
+bytes of scratch arena size:
+
+┌─────────────────────────────────────────┬──────────────────────────┬────────┬───────┐
+│  A2(401)                                │          A1(400)         │ A4(201)│
+A3(10)│
+└─────────────────────────────────────────┴──────────────────────────┴────────┴───────┘
+
+┌───────────┬──────┬──────┐
+│ A6(202)   │A5(11)│A7(12)│
+└───────────┴──────┴──────┘
+
+┌──────────┬───────┐
+│ A8(200)  │A9(13) │
+└──────────┴───────┘
+
+But a more efficient offline memory plan that requires only 826 bytes of scratch
+arena size can be
+
+┌──────────────────────────────────────┬─────────────────────────────┬───────┬──────┐
+│      A1(400)                         │         A2(401)             │
+A3(10)│A5(11)│
+└──────────────────────────────────────┴─────────────────────────────┴───────┴──────┘
+
+                                       ┌────────────────┬────────────┬────────┬───────┐
+                                       │A4(201)         │  A8(200)   │A9(13)
+│A7(12) │ └────────────────┴────────────┴────────┴───────┘
+
+                                                        ┌─────────────┐
+                                                        │  A6(202)    │
+                                                        └─────────────┘
+
+*/
+class NonPersistentMemoryPlannerShim : public MicroMemoryPlanner {
+ public:
+  // Does not take ownership of buffer_plan, which must refer to a valid
+  // BufferPlan that outlives this object.
+  explicit NonPersistentMemoryPlannerShim(const BufferPlan* buffer_plan);
+  ~NonPersistentMemoryPlannerShim() override;
+
+  TfLiteStatus GetOffsetForBuffer(int buffer_request_index,
+                                  int* offset) override;
+
+  TfLiteStatus AddBuffer(int size, int first_time_used,
+                         int last_time_used) override;
+  size_t GetMaximumMemorySize() override;
+  int GetBufferCount() override;
+
+ private:
+  const BufferPlan* buffer_plan_;  // not owned, can't be null
+
+  // The number of buffers requested so far. Used for error checking.
+  int buffer_request_count_;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_MEMORY_PLANNER_NON_PERSISTENT_MEMORY_PLANNER_SHIM_H__
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.cc b/edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.cc
new file mode 100644
index 0000000..296a502
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.cc
@@ -0,0 +1,375 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.h"
+
+#include <algorithm>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+
+namespace {
+constexpr char kOfflineMemAllocMetadata[] = "OfflineMemoryAllocation";
+constexpr int kUninitializedLifetime = -1;
+}  // namespace
+
+// Mark the given Allocation info as first created at the specified allocation
+// scope count. Only the first creation must be recorded since the allocation
+// scope count monotonically increases throughout the lifetime marking process.
+void AllocationInfoBuilder::UpdateFirstCreated(AllocationInfo* current,
+                                               int allocation_scope_count) {
+  TFLITE_DCHECK(current->first_created <= allocation_scope_count);
+  if (current->first_created == kUninitializedLifetime) {
+    current->first_created = allocation_scope_count;
+  }
+}
+
+// Mark the given AllocationInfo as last used at the specified allocation scope
+// count. Update the last used marker every time, since the allocation scope
+// count monotonically increases through the lifetime marking process.
+void AllocationInfoBuilder::UpdateLastUsed(AllocationInfo* current,
+                                           int allocation_scope_count) {
+  TFLITE_DCHECK(current->last_used <= allocation_scope_count);
+  current->last_used = allocation_scope_count;
+}
+
+TfLiteStatus AllocationInfoBuilder::MarkSubgraphLifetimesIfNecessary(
+    const Operator* op, internal::ScratchBufferRequest* scratch_buffer_requests,
+    ScratchBufferHandle* scratch_buffer_handles,
+    SubgraphAllocations* allocations) {
+  int first_subgraph_index = -1;
+  int second_subgraph_index = -1;
+  const OperatorCode* opcode =
+      model_->operator_codes()->Get(op->opcode_index());
+  switch (opcode->builtin_code()) {
+    case BuiltinOperator_IF: {
+      first_subgraph_index =
+          op->builtin_options_as_IfOptions()->then_subgraph_index();
+      second_subgraph_index =
+          op->builtin_options_as_IfOptions()->else_subgraph_index();
+      break;
+    }
+    case BuiltinOperator_CALL_ONCE: {
+      first_subgraph_index =
+          op->builtin_options_as_CallOnceOptions()->init_subgraph_index();
+      break;
+    }
+    case BuiltinOperator_WHILE: {
+      first_subgraph_index =
+          op->builtin_options_as_WhileOptions()->cond_subgraph_index();
+      second_subgraph_index =
+          op->builtin_options_as_WhileOptions()->body_subgraph_index();
+      break;
+    }
+    default: {
+      break;
+    }
+  }
+  if (first_subgraph_index != -1) {
+    // Enter a new allocation scope for each subgraph.
+    allocation_scope_count_++;
+    TF_LITE_ENSURE_STATUS(
+        MarkAllocationLifetimes(first_subgraph_index, scratch_buffer_requests,
+                                scratch_buffer_handles, allocations));
+  }
+  if (second_subgraph_index != -1) {
+    // Enter a new allocation scope for each subgraph.
+    allocation_scope_count_++;
+    TF_LITE_ENSURE_STATUS(
+        MarkAllocationLifetimes(second_subgraph_index, scratch_buffer_requests,
+                                scratch_buffer_handles, allocations));
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus AllocationInfoBuilder::CreateAllocationInfo(
+    int scratch_buffer_request_count) {
+  size_t subgraph_offsets_length = model_->subgraphs()->size() * sizeof(size_t);
+  info_.subgraph_offsets =
+      reinterpret_cast<size_t*>(non_persistent_allocator_->AllocateTemp(
+          subgraph_offsets_length, alignof(size_t)));
+  if (info_.subgraph_offsets == nullptr) {
+    MicroPrintf(
+        "Failed to allocate memory for memory planning, %d bytes required",
+        subgraph_offsets_length);
+    return kTfLiteError;
+  }
+  size_t tensor_count = 0;
+  for (size_t subgraph_idx = 0; subgraph_idx < model_->subgraphs()->size();
+       subgraph_idx++) {
+    // Add all tensors in each subgraph to the AllocationInfo array. Even weight
+    // tensors are added but marked with needs_allocating = false. Including all
+    // tensors in the graph here simplifies logic.
+    info_.subgraph_offsets[subgraph_idx] = tensor_count;
+    tensor_count += model_->subgraphs()->Get(subgraph_idx)->tensors()->size();
+  }
+  info_.tensor_count = tensor_count;
+
+  // Scratch buffer allocations follow tensor allocations, so the scratch offset
+  // is equal to the number of tensor allocations.
+  info_.scratch_offset = tensor_count;
+  info_.allocation_info_count = tensor_count + scratch_buffer_request_count;
+  info_.scratch_buffer_count = scratch_buffer_request_count;
+  size_t bytes = sizeof(AllocationInfo) * info_.allocation_info_count;
+
+  // Allocate an array of AllocationInfo structs from the temp section. This
+  // struct will be used by AllocationInfoBuilder to find buffer usage.
+  info_.allocation_info = reinterpret_cast<AllocationInfo*>(
+      non_persistent_allocator_->AllocateTemp(bytes, alignof(AllocationInfo)));
+  if (info_.allocation_info == nullptr) {
+    MicroPrintf(
+        "Failed to allocate memory for memory planning, %d bytes required",
+        bytes);
+    return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus AllocationInfoBuilder::FreeAllocationInfo() {
+  non_persistent_allocator_->DeallocateTemp(
+      reinterpret_cast<uint8_t*>(info_.allocation_info));
+  non_persistent_allocator_->DeallocateTemp(
+      reinterpret_cast<uint8_t*>(info_.subgraph_offsets));
+  return kTfLiteOk;
+}
+
+TfLiteStatus AllocationInfoBuilder::ValidateSubgraph(
+    const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors) {
+  uint32_t operators_size = NumSubgraphOperators(subgraph);
+
+  for (uint32_t i = 0; i < operators_size; i++) {
+    const auto op = subgraph->operators()->Get(i);
+    for (size_t n = 0;
+         op->intermediates() != nullptr && n < op->intermediates()->size();
+         n++) {
+      const int tensor_index = op->intermediates()->Get(n);
+      size_t tensor_size = -1;
+      TF_LITE_ENSURE_STATUS(TfLiteEvalTensorByteLength(
+          &eval_tensors[tensor_index], &tensor_size));
+      if (tensor_size != 0) {
+        MicroPrintf(
+            "Does not support intermediate tensor with non-zero size: %d",
+            tensor_size);
+        return kTfLiteError;
+      }
+    }
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus AllocationInfoBuilder::InitializeAllocationInfo(
+    const int32_t* offline_offsets, SubgraphAllocations* allocations) {
+  AllocationInfo* allocation_info = info_.allocation_info;
+  // Initialize allocation info for every tensor in every subgraph.
+  for (size_t subgraph_idx = 0; subgraph_idx < model_->subgraphs()->size();
+       subgraph_idx++) {
+    const SubGraph* subgraph = model_->subgraphs()->Get(subgraph_idx);
+    TfLiteEvalTensor* eval_tensors = allocations[subgraph_idx].tensors;
+    AllocationInfo* subgraph_allocation_info =
+        &allocation_info[info_.subgraph_offsets[subgraph_idx]];
+
+    // Ensure constraints are met.
+    TF_LITE_ENSURE_STATUS(ValidateSubgraph(subgraph, eval_tensors));
+
+    for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
+      AllocationInfo* current = &subgraph_allocation_info[i];
+      current->output_ptr = &(eval_tensors[i].data.data);
+
+      TF_LITE_ENSURE_STATUS(
+          TfLiteEvalTensorByteLength(&eval_tensors[i], &current->bytes));
+
+      current->first_created = kUninitializedLifetime;
+      current->last_used = kUninitializedLifetime;
+      current->needs_allocating =
+          (eval_tensors[i].data.data == nullptr) &&
+          (!subgraph->tensors()->Get(i)->is_variable()) &&
+          (current->bytes != 0);
+      if (offline_offsets) {
+        current->offline_offset = offline_offsets[i];
+
+        // Mark offline planned variable tensors so they can get an offline
+        // offset and be handled offline.
+        if (subgraph->tensors()->Get(i)->is_variable() &&
+            current->offline_offset != kOnlinePlannedBuffer) {
+          current->needs_allocating = true;
+        }
+
+      } else {
+        current->offline_offset = kOnlinePlannedBuffer;
+      }
+    }
+  }
+  // Initialize allocation info for every scratch buffer.
+  AllocationInfo* scratch_allocation_info =
+      &allocation_info[info_.scratch_offset];
+  for (size_t i = 0; i < info_.scratch_buffer_count; i++) {
+    AllocationInfo* current = &scratch_allocation_info[i];
+    current->first_created = kUninitializedLifetime;
+    current->last_used = kUninitializedLifetime;
+    current->needs_allocating = true;
+    current->offline_offset = kOnlinePlannedBuffer;
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus AllocationInfoBuilder::MarkAllocationLifetimes(
+    int subgraph_idx, internal::ScratchBufferRequest* scratch_buffer_requests,
+    ScratchBufferHandle* scratch_buffer_handles,
+    SubgraphAllocations* allocations) {
+  const SubGraph* subgraph = model_->subgraphs()->Get(subgraph_idx);
+
+  AllocationInfo* allocation_info = info_.allocation_info;
+  // Each subgraph's tensor allocations are in a contiguous block starting at
+  // subgraph_offsets_[subgraph index] with one entry per tensor.
+  AllocationInfo* subgraph_allocation_info =
+      &allocation_info[info_.subgraph_offsets[subgraph_idx]];
+
+  uint32_t operators_size = NumSubgraphOperators(subgraph);
+  // Mark all inputs as created at the start of the subgraph invocation.
+  for (size_t i = 0;
+       subgraph->inputs() != nullptr && i < subgraph->inputs()->size(); ++i) {
+    const int tensor_index = subgraph->inputs()->Get(i);
+    AllocationInfo* current = &subgraph_allocation_info[tensor_index];
+    UpdateFirstCreated(current, allocation_scope_count_);
+    // This will ensure that the tensors that are inputs to the subgraphs
+    // but not used in any ops also have a reasonable lifetime.
+    UpdateLastUsed(current, allocation_scope_count_);
+  }
+
+  for (uint32_t i = 0; i < operators_size; i++) {
+    // Each operator has a new allocation scope.
+    allocation_scope_count_++;
+    const auto* op = subgraph->operators()->Get(i);
+    // Figure out when the first creation and use of each tensor is.
+    for (size_t n = 0; op->outputs() != nullptr && n < op->outputs()->size();
+         ++n) {
+      const int tensor_index = op->outputs()->Get(n);
+      AllocationInfo* current = &subgraph_allocation_info[tensor_index];
+      UpdateFirstCreated(current, allocation_scope_count_);
+    }
+
+    // Keep track of scope count before any subgraphs, so that scratch buffers'
+    // lifetime within a control flow op properly overlaps with all subgraphs.
+    int start_allocation_scope_count = allocation_scope_count_;
+
+    // Control flow operators can invoke subgraphs. Plan these subgraphs
+    // before continuing on to the rest of the graph.
+    MarkSubgraphLifetimesIfNecessary(op, scratch_buffer_requests,
+                                     scratch_buffer_handles, allocations);
+
+    // Figure out when the last use of each tensor is.
+    for (size_t n = 0; op->inputs() != nullptr && n < op->inputs()->size();
+         ++n) {
+      const int tensor_index = op->inputs()->Get(n);
+      // Optional bias tensors can have an index of -1 when they are omitted.
+      if (tensor_index >= 0) {
+        AllocationInfo* current = &subgraph_allocation_info[tensor_index];
+        // No need to update creation since it is either marked by the subgraph
+        // or producer op, or it is not part of the memory plan (weight, bias
+        // tensor).
+        UpdateLastUsed(current, allocation_scope_count_);
+      }
+    }
+    for (size_t n = 0; op->outputs() != nullptr && n < op->outputs()->size();
+         ++n) {
+      const int tensor_index = op->outputs()->Get(n);
+      AllocationInfo* current = &subgraph_allocation_info[tensor_index];
+      UpdateLastUsed(current, allocation_scope_count_);
+    }
+
+    // Mark thse lifetime of scratch buffers belonging to the current node. This
+    // operation is O(N * M) where N is the total number of visited nodes and M
+    // is the total number of scratch buffers.
+    // TODO(b/217794030): Optimize this memory planning code.
+    AllocationInfo* scratch_allocation_info =
+        &allocation_info[info_.scratch_offset];
+    for (size_t scratch_idx = 0; scratch_idx < info_.scratch_buffer_count;
+         scratch_idx++) {
+      internal::ScratchBufferRequest request =
+          scratch_buffer_requests[scratch_idx];
+      AllocationInfo* current = &scratch_allocation_info[scratch_idx];
+      if (request.node_idx == static_cast<int>(i) &&
+          request.subgraph_idx == static_cast<int>(subgraph_idx)) {
+        ScratchBufferHandle* current_handle =
+            &(scratch_buffer_handles[scratch_idx]);
+        current->output_ptr = reinterpret_cast<void**>(&current_handle->data);
+        current->bytes = request.bytes;
+        UpdateFirstCreated(current, start_allocation_scope_count);
+        UpdateLastUsed(current, allocation_scope_count_);
+      }
+    }
+  }
+
+  // Mark all outputs as persistent to the end of the subgraph invocation.
+  for (size_t i = 0;
+       subgraph->outputs() != nullptr && i < subgraph->outputs()->size(); ++i) {
+    const int tensor_index = subgraph->outputs()->Get(i);
+    AllocationInfo* current = &subgraph_allocation_info[tensor_index];
+    // Make sure to assign the First created value of the subgraph output
+    // This will handle the case where the subgraph is empty. This helps
+    // ensure all tensors have valid lifetimes before those are used by the
+    // memory planner.
+    UpdateFirstCreated(current, allocation_scope_count_);
+    UpdateLastUsed(current, allocation_scope_count_);
+  }
+  return kTfLiteOk;
+}
+
+// Get offline tensors allocation plan. See
+// micro/docs/memory_management.md for more info.
+TfLiteStatus AllocationInfoBuilder::GetOfflinePlannedOffsets(
+    const int32_t** offline_planner_offsets) {
+  if (model_->metadata()) {
+    for (size_t i = 0; i < model_->metadata()->size(); ++i) {
+      auto metadata = model_->metadata()->Get(i);
+
+      if (metadata->name()) {
+        const size_t metadata_name_size = metadata->name()->size();
+
+        if ((strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
+                     std::min(metadata_name_size,
+                              strlen(kOfflineMemAllocMetadata))) == 0) &&
+            metadata_name_size == strlen(kOfflineMemAllocMetadata)) {
+          const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
+              model_->buffers();
+          auto* buffer = (*buffers)[metadata->buffer()];
+          auto* array = buffer->data();
+          const uint32_t* metadata_buffer =
+              reinterpret_cast<const uint32_t*>(array->data());
+          const size_t nbr_tensors = static_cast<size_t>(metadata_buffer[2]);
+          *offline_planner_offsets =
+              reinterpret_cast<const int32_t*>(&metadata_buffer[3]);
+
+          if (info_.tensor_count != nbr_tensors) {
+            MicroPrintf(
+                "Nbr of offline buffer offsets (%d) in metadata "
+                "not equal nbr tensors (%d)\n",
+                nbr_tensors, info_.tensor_count);
+            return kTfLiteError;
+          }
+        }
+      }
+    }
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.h
new file mode 100644
index 0000000..a02503e
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.h
@@ -0,0 +1,139 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATION_INFO_H_
+#define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATION_INFO_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h"
+
+namespace tflite {
+
+// Used to hold information used during allocation calculations.
+struct AllocationInfo {
+  size_t bytes;
+  void** output_ptr;
+  int first_created;
+  int last_used;
+  int32_t offline_offset;
+  bool needs_allocating;
+};
+
+// Used to hold the allocation info list and related metadata for the entire
+// graph (including subgraphs). Since all subgraphs are planned together, the
+// allocation info list contains allocations for all subgraphs. Track the offset
+// into this list for each subgraph then reserve space to track all allocations.
+//
+// The AllocationInfo list is a contiguous list of allocations across all
+// subgraphs and scratch buffers. Each element here is marked as
+// s<subgraph index>t<tensor index>. The following is a possible
+// AllocationInfo list:
+// [s0t0, s0t1, s1t0, s2t1, s1t2, s3t0, s3t1, scratch0, scratch1, scratch2]
+//
+// For this example, the subgraph offsets would be [0, 2, 5] and the scratch
+// offset would be 7.
+struct GraphAllocationInfo {
+  AllocationInfo* allocation_info;
+  size_t allocation_info_count;
+  size_t* subgraph_offsets;
+  size_t scratch_offset;
+  size_t tensor_count;
+  size_t scratch_buffer_count;
+};
+
+// A helper class to construct AllocationInfo array. This array contains the
+// lifetime of tensors / scratch_buffer and will be used to calculate the memory
+// plan. Methods need to be called in order from `Create`, Init`, `Add*`, to
+// `Finish`.
+class AllocationInfoBuilder {
+ public:
+  AllocationInfoBuilder(const Model* model,
+                        INonPersistentBufferAllocator* non_persistent_allocator)
+      : model_(model), non_persistent_allocator_(non_persistent_allocator) {}
+
+  // Check if model contains offline planned buffer offsets.
+  //  - If there's no metadata available, offline_planner_offsets is not set
+  //  - If there's metadata available, offline_planner_offsets will point to the
+  //    first offset in the metadata buffer list.
+  TfLiteStatus GetOfflinePlannedOffsets(
+      const int32_t** offline_planner_offsets);
+
+  // Allocate memory for the allocation info array as well as offsets into that
+  // array for each subgraph.
+  TfLiteStatus CreateAllocationInfo(int scratch_buffer_request_count);
+
+  // Release memory used for the allocation info array.
+  TfLiteStatus FreeAllocationInfo();
+
+  // Initialize AllocationInfo for all tensors and scratch buffers in the graph.
+  TfLiteStatus InitializeAllocationInfo(const int32_t* offline_offsets,
+                                        SubgraphAllocations* allocations);
+
+  // Mark the scope of each tensor and scratch buffer across the graph. Enter
+  // all possible subgraphs invoked by each control flow operator. This method
+  // marks the maximum lifetime of each buffer so that tensors are correctly
+  // planned for all valid invocation flows.
+  TfLiteStatus MarkAllocationLifetimes(
+      int subgraph_idx, internal::ScratchBufferRequest* scratch_buffer_request,
+      ScratchBufferHandle* scratch_buffer_handles,
+      SubgraphAllocations* allocations);
+
+  // Identify control flow operators and recursively mark all subgraphs which
+  // that operator can invoke. The lifetime of all tensors within a subgraph
+  // can only be extended. The order of subgraph invocation does not matter
+  // since subgraphs within the same control flow operator are executed
+  // within their own allocation scope (planned buffers in a subgraph cannot
+  // persist beyond the end of that subgraph's invocation).
+  TfLiteStatus MarkSubgraphLifetimesIfNecessary(
+      const Operator* op,
+      internal::ScratchBufferRequest* scratch_buffer_requests,
+      ScratchBufferHandle* scratch_buffer_handles,
+      SubgraphAllocations* allocations);
+
+  // Returns the number of allocations.
+  int AllocationCount() const { return info_.allocation_info_count; }
+
+  // Returns a pointer to the built AllocationInfo array.
+  AllocationInfo* Finish() const { return info_.allocation_info; }
+
+ private:
+  // Mark the given Allocation info as first created at the specified allocation
+  // scope count. Only the first creation must be recorded since the allocation
+  // scope count monotonically increases throughout the lifetime marking
+  // process.
+  void UpdateFirstCreated(AllocationInfo* current, int allocation_scope_count);
+
+  // Mark the given AllocationInfo as last used at the specified allocation
+  // scope
+  // count. Update the last used marker every time, since the allocation scope
+  // count monotonically increases through the lifetime marking process.
+  void UpdateLastUsed(AllocationInfo* current, int allocation_scope_count);
+
+  // Validate if a subgraph satisfies assumptions.
+  TfLiteStatus ValidateSubgraph(const SubGraph* subgraph,
+                                TfLiteEvalTensor* eval_tensors);
+
+  const tflite::Model* model_ = nullptr;
+  INonPersistentBufferAllocator* non_persistent_allocator_ = nullptr;
+  GraphAllocationInfo info_;
+  int allocation_scope_count_ = 0;
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATION_INFO_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.cc b/edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.cc
index dc02eb2..872cb06 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.cc
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -20,17 +20,19 @@ limitations under the License.
 
 #include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"  // from @flatbuffers
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/memory_planner.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/simple_memory_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocation_info.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_arena_constants.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_utils.h"
 
@@ -46,296 +48,30 @@ constexpr size_t kMaxScratchBuffersPerOp = 12;
 // needs a node id assignment.
 constexpr int kUnassignedScratchBufferRequestIndex = -1;
 
-// Used to hold information used during allocation calculations.
-struct AllocationInfo {
-  size_t bytes;
-  void** output_ptr;
-  int first_created;
-  int last_used;
-  int32_t offline_offset;
-  bool needs_allocating;
-};
-
-// We align tensor buffers to 16-byte boundaries, since this is a common
-// requirement for SIMD extensions.
-constexpr int kBufferAlignment = 16;
-constexpr char kOfflineMemAllocMetadata[] = "OfflineMemoryAllocation";
 const TfLiteIntArray kZeroLengthIntArray = {};
 
-class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
+class MicroBuiltinDataAllocator : public TfLiteBridgeBuiltinDataAllocator {
  public:
-  explicit MicroBuiltinDataAllocator(SimpleMemoryAllocator* memory_allocator)
-      : memory_allocator_(memory_allocator) {}
+  explicit MicroBuiltinDataAllocator(
+      IPersistentBufferAllocator* persistent_allocator)
+      : persistent_allocator_(persistent_allocator) {}
 
   void* Allocate(size_t size, size_t alignment_hint) override {
-    return memory_allocator_->AllocateFromTail(size, alignment_hint);
+    return persistent_allocator_->AllocatePersistentBuffer(size,
+                                                           alignment_hint);
   }
   void Deallocate(void* data) override {
     // Do not deallocate, builtin data needs to be available for the life time
     // of the model.
   }
 
- private:
-  SimpleMemoryAllocator* memory_allocator_;
-
   TF_LITE_REMOVE_VIRTUAL_DELETE
-};
-
-#if !defined(__clang__)
-// Helper function to check flatbuffer metadata correctness. This function is
-// not called by default. Hence it's not linked in to the final binary code.
-TfLiteStatus CheckOfflinePlannedOffsets(const Model* model,
-                                        ErrorReporter* error_reporter) {
-  // Suppress compile warning for unused function
-  (void)CheckOfflinePlannedOffsets;
-
-  if (model->metadata()) {
-    for (size_t i = 0; i < model->metadata()->size(); ++i) {
-      auto metadata = model->metadata()->Get(i);
-      if (strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
-                  strlen(kOfflineMemAllocMetadata)) == 0) {
-        auto* subgraphs = model->subgraphs();
-        const SubGraph* subgraph = (*subgraphs)[0];
-        const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors =
-            subgraph->tensors();
-        const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
-            model->buffers();
-        int nbr_tflite_tensors = tensors->size();
-        auto* buffer = (*buffers)[metadata->buffer()];
-        auto* array = buffer->data();
-        const uint32_t* metadata_buffer = (uint32_t*)array->data();
-        int version = metadata_buffer[0];
-        int subgraph_idx = metadata_buffer[1];
-        const int nbr_offline_offsets = metadata_buffer[2];
-#ifndef TF_LITE_STRIP_ERROR_STRINGS
-        int* offline_planner_offsets = (int*)&metadata_buffer[3];
-#endif
-
-        TF_LITE_REPORT_ERROR(error_reporter, "==== Model metadata info: =====");
-        TF_LITE_REPORT_ERROR(error_reporter,
-                             "Offline planner metadata found, version %d, "
-                             "subgraph %d, nbr offline offsets %d",
-                             version, subgraph_idx, nbr_offline_offsets);
-        for (int j = 0; j < nbr_offline_offsets; ++j) {
-          TF_LITE_REPORT_ERROR(
-              error_reporter,
-              "Offline planner tensor index %d, offline offset: %d", j,
-              offline_planner_offsets[j]);
-        }
-
-        if (version != 1) {
-          TF_LITE_REPORT_ERROR(error_reporter, "Version not supported! (%d)\n",
-                               version);
-          return kTfLiteError;
-        }
-        if (subgraph_idx != 0) {
-          TF_LITE_REPORT_ERROR(error_reporter,
-                               "Only 1 subgraph supported! Subgraph idx (%d)\n",
-                               subgraph_idx);
-          return kTfLiteError;
-        }
-        if (nbr_tflite_tensors != nbr_offline_offsets) {
-          TF_LITE_REPORT_ERROR(error_reporter,
-                               "Nbr of offline buffer offsets (%d) in metadata "
-                               "not equal nbr tensors (%d)\n",
-                               nbr_offline_offsets, nbr_tflite_tensors);
-          return kTfLiteError;
-        }
-      }
-    }
-  }
-  return kTfLiteOk;
-}
-#endif
-
-// A helper class to construct AllocationInfo array. This array contains the
-// lifetime of tensors / scratch_buffer and will be used to calculate the memory
-// plan. Methods need to be called in order from `Init`, `Add*`, to `Finish`.
-class AllocationInfoBuilder {
- public:
-  AllocationInfoBuilder(AllocationInfo* info, size_t tensor_count,
-                        size_t scratch_buffer_count, ErrorReporter* reporter)
-      : info_(info),
-        tensor_count_(tensor_count),
-        buffer_count_(scratch_buffer_count),
-        reporter_(reporter) {}
-
-  // Check if model contains offline planned buffer offsets.
-  //  - If there's no metadata available, offline_planner_offsets is not set
-  //  - If there's metadata available, offline_planner_offsets will point to the
-  //    first offset in the metadata buffer list.
-  TfLiteStatus GetOfflinePlannedOffsets(
-      const Model* model, const int32_t** offline_planner_offsets);
-
-  // Add allocaiton information for the tensors.
-  TfLiteStatus AddTensors(const SubGraph* subgraph,
-                          const int32_t* offline_offsets,
-                          TfLiteEvalTensor* eval_tensors);
-
-  // Add allocation information for the scratch buffers.
-  TfLiteStatus AddScratchBuffers(
-      internal::ScratchBufferRequest* scratch_buffer_requests,
-      ScratchBufferHandle* scratch_buffer_handles);
-
-  // Returns a pointer to the built AllocationInfo array.
-  const AllocationInfo* Finish() const { return info_; }
 
  private:
-  AllocationInfo* info_ = nullptr;
-  size_t tensor_count_ = 0;
-  size_t buffer_count_ = 0;
-  ErrorReporter* reporter_ = nullptr;
+  IPersistentBufferAllocator* persistent_allocator_;
 };
 
-TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
-                                               const int32_t* offline_offsets,
-                                               TfLiteEvalTensor* eval_tensors) {
-  TFLITE_DCHECK(eval_tensors != nullptr);
-
-  // Set up allocation info for all tensors.
-  for (size_t i = 0; i < tensor_count_; ++i) {
-    AllocationInfo* current = &info_[i];
-    current->output_ptr = &(eval_tensors[i].data.data);
-
-    TF_LITE_ENSURE_STATUS(
-        TfLiteEvalTensorByteLength(&eval_tensors[i], &current->bytes));
-
-    current->first_created = -1;
-    current->last_used = -1;
-    current->needs_allocating = (eval_tensors[i].data.data == nullptr) &&
-                                (!subgraph->tensors()->Get(i)->is_variable());
-    if (offline_offsets) {
-      current->offline_offset = offline_offsets[i];
-    } else {
-      current->offline_offset = kOnlinePlannedBuffer;
-    }
-  }
-
-  for (size_t i = 0; i < subgraph->inputs()->size(); ++i) {
-    const int tensor_index = subgraph->inputs()->Get(i);
-    AllocationInfo* current = &info_[tensor_index];
-    current->first_created = 0;
-  }
-
-  // Mark all outputs as persistent to the end of the invocation.
-  for (size_t i = 0; i < subgraph->outputs()->size(); ++i) {
-    const int tensor_index = subgraph->outputs()->Get(i);
-    AllocationInfo* current = &info_[tensor_index];
-    current->last_used = subgraph->operators()->size() - 1;
-  }
-
-  // Figure out when the first and last use of each tensor is.
-  for (int i = (subgraph->operators()->size() - 1); i >= 0; --i) {
-    const auto* op = subgraph->operators()->Get(i);
-    for (size_t n = 0; n < op->inputs()->size(); ++n) {
-      const int tensor_index = op->inputs()->Get(n);
-      AllocationInfo* current = &info_[tensor_index];
-      if (((current->last_used == -1) || (current->last_used < i))) {
-        current->last_used = i;
-      }
-    }
-    for (size_t n = 0; n < op->outputs()->size(); ++n) {
-      const int tensor_index = op->outputs()->Get(n);
-      AllocationInfo* current = &info_[tensor_index];
-      if ((current->first_created == -1) || (current->first_created > i)) {
-        current->first_created = i;
-      }
-    }
-  }
-
-  // Sanity check for valid tensor lifetime.
-  for (size_t i = 0; i < tensor_count_; ++i) {
-    AllocationInfo* current = &info_[i];
-    // Even though tensor appears to be read only it may still need to be
-    // allocated.
-    const bool appears_read_only =
-        (current->first_created == -1) && (current->last_used != -1);
-    const bool has_partial_lifetime =
-        !appears_read_only &&
-        ((current->first_created == -1) || (current->last_used == -1));
-    if (has_partial_lifetime && current->needs_allocating) {
-      TF_LITE_REPORT_ERROR(
-          reporter_,
-          "Logic error in memory planner, tensor %d has an invalid lifetime: "
-          "first_created: %d, last_used: %d",
-          i, current->first_created, current->last_used);
-      return kTfLiteError;
-    }
-  }
-  return kTfLiteOk;
-}
-
-// The tensor offsets will be encoded in the metadata:[Metadata] field of the
-// Model. The following encoding applies:
-//
-// | Metadata component |                 Value                                |
-// |    name:string     | “OfflineMemoryAllocation”                            |
-// |    buffer:unit     | Index of buffer containing memory allocation data    |
-//
-// The buffer contents for the memory allocation is a list of 32-bit integers.
-// The number of tensors, n, must be equal to the number of tensors defined in
-// the model. The following encoding applies:
-//
-// |  Offset |                            Value                                |
-// |    0    | Offline allocation format version – set to 0                    |
-// |    1    | Subgraph index to which this allocation applies                 |
-// |    2    | Number offsets following: n                                     |
-// |    3    | Arena byte offset of tensor #0 or -1 to allocate at runtime     |
-// |    4    | Arena byte offset of tensor #1 or -1 to allocate at runtime     |
-// | 3+(n-1) | Arena byte offset of tensor #(n-1) or -1 to allocate at runtime |
-TfLiteStatus AllocationInfoBuilder::GetOfflinePlannedOffsets(
-    const Model* model, const int32_t** offline_planner_offsets) {
-  if (model->metadata()) {
-    for (size_t i = 0; i < model->metadata()->size(); ++i) {
-      auto metadata = model->metadata()->Get(i);
-      if (strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
-                  strlen(kOfflineMemAllocMetadata)) == 0) {
-        const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
-            model->buffers();
-        auto* buffer = (*buffers)[metadata->buffer()];
-        auto* array = buffer->data();
-        const uint32_t* metadata_buffer =
-            reinterpret_cast<const uint32_t*>(array->data());
-        const size_t nbr_tensors = static_cast<size_t>(metadata_buffer[2]);
-        *offline_planner_offsets =
-            reinterpret_cast<const int32_t*>(&metadata_buffer[3]);
-
-        if (tensor_count_ != nbr_tensors) {
-          TF_LITE_REPORT_ERROR(reporter_,
-                               "Nbr of offline buffer offsets (%d) in metadata "
-                               "not equal nbr tensors (%d)\n",
-                               nbr_tensors, tensor_count_);
-          return kTfLiteError;
-        }
-      }
-    }
-  }
-  return kTfLiteOk;
-}
-
-TfLiteStatus AllocationInfoBuilder::AddScratchBuffers(
-    internal::ScratchBufferRequest* scratch_buffer_requests,
-    ScratchBufferHandle* scratch_buffer_handles) {
-  // Set up allocation info for buffers.
-  for (size_t i = tensor_count_; i < tensor_count_ + buffer_count_; ++i) {
-    internal::ScratchBufferRequest* current_request =
-        &(scratch_buffer_requests[i - tensor_count_]);
-    ScratchBufferHandle* current_handle =
-        &(scratch_buffer_handles[i - tensor_count_]);
-
-    AllocationInfo* current = &info_[i];
-    current->output_ptr = reinterpret_cast<void**>(&current_handle->data);
-    current->bytes = current_request->bytes;
-    current->first_created = current_request->node_idx;
-    current->last_used = current_request->node_idx;
-    current->offline_offset = kOnlinePlannedBuffer;
-    current->needs_allocating = true;
-  }
-  return kTfLiteOk;
-}
-
-TfLiteStatus CreatePlan(ErrorReporter* error_reporter,
-                        GreedyMemoryPlanner* planner,
+TfLiteStatus CreatePlan(MicroMemoryPlanner* planner,
                         const AllocationInfo* allocation_info,
                         size_t allocation_info_size) {
   // Add the tensors to our allocation plan.
@@ -343,23 +79,22 @@ TfLiteStatus CreatePlan(ErrorReporter* error_reporter,
     const AllocationInfo* current = &allocation_info[i];
     if (current->needs_allocating) {
       size_t aligned_bytes_required =
-          AlignSizeUp(current->bytes, kBufferAlignment);
+          AlignSizeUp(current->bytes, MicroArenaBufferAlignment());
       if (current->offline_offset == kOnlinePlannedBuffer) {
-        TF_LITE_ENSURE_STATUS(
-            planner->AddBuffer(error_reporter, aligned_bytes_required,
-                               current->first_created, current->last_used));
+        TF_LITE_ENSURE_STATUS(planner->AddBuffer(aligned_bytes_required,
+                                                 current->first_created,
+                                                 current->last_used));
       } else {
-        TF_LITE_ENSURE_STATUS(planner->AddBuffer(
-            error_reporter, aligned_bytes_required, current->first_created,
-            current->last_used, current->offline_offset));
+        TF_LITE_ENSURE_STATUS(
+            planner->AddBuffer(aligned_bytes_required, current->first_created,
+                               current->last_used, current->offline_offset));
       }
     }
   }
   return kTfLiteOk;
 }
 
-TfLiteStatus CommitPlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
-                        uint8_t* starting_point,
+TfLiteStatus CommitPlan(MicroMemoryPlanner* planner, uint8_t* starting_point,
                         const AllocationInfo* allocation_info,
                         size_t allocation_info_size) {
   // Figure out the actual memory addresses for each buffer, based on the plan.
@@ -369,61 +104,60 @@ TfLiteStatus CommitPlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
     if (current->needs_allocating) {
       int offset = -1;
       TF_LITE_ENSURE_STATUS(
-          planner->GetOffsetForBuffer(error_reporter, planner_index, &offset));
+          planner->GetOffsetForBuffer(planner_index, &offset));
       *current->output_ptr = reinterpret_cast<void*>(starting_point + offset);
       ++planner_index;
     }
   }
   return kTfLiteOk;
 }
+
+IPersistentBufferAllocator* CreatePersistentArenaAllocator(uint8_t* buffer_head,
+                                                           size_t buffer_size) {
+  // Align the actually used area by the tail because persistent buffer grows
+  // from the bottom to top.
+  uint8_t* aligned_buffer_tail =
+      AlignPointerDown(buffer_head + buffer_size, MicroArenaBufferAlignment());
+  size_t aligned_buffer_size = aligned_buffer_tail - buffer_head;
+  PersistentArenaBufferAllocator tmp =
+      PersistentArenaBufferAllocator(buffer_head, aligned_buffer_size);
+
+  // Allocate enough bytes from the buffer to create a
+  // SingleArenaBufferAllocator. The new instance will use the current adjusted
+  // tail buffer from the tmp allocator instance.
+  uint8_t* allocator_buffer =
+      tmp.AllocatePersistentBuffer(sizeof(PersistentArenaBufferAllocator),
+                                   alignof(PersistentArenaBufferAllocator));
+  // Use the default copy constructor to populate internal states.
+  return new (allocator_buffer) PersistentArenaBufferAllocator(tmp);
+}
+
+// NonPersistentBufferAllocator instance is created in the persistent buffer
+// because it has to be persistent to keep track of the non-persistent buffer
+// information.
+INonPersistentBufferAllocator* CreateNonPersistentArenaAllocator(
+    uint8_t* buffer_head, size_t buffer_size,
+    IPersistentBufferAllocator* persistent_buffer_allocator) {
+  uint8_t* allocator_buffer =
+      persistent_buffer_allocator->AllocatePersistentBuffer(
+          sizeof(NonPersistentArenaBufferAllocator),
+          alignof(NonPersistentArenaBufferAllocator));
+  // Align the actually used area by the head because persistent buffer grows
+  // from the head to bottom.
+  uint8_t* aligned_buffer_head =
+      AlignPointerUp(buffer_head, MicroArenaBufferAlignment());
+  size_t aligned_buffer_size = buffer_head + buffer_size - aligned_buffer_head;
+
+  INonPersistentBufferAllocator* non_persistent_buffer_allocator =
+      new (allocator_buffer) NonPersistentArenaBufferAllocator(
+          aligned_buffer_head, aligned_buffer_size);
+  return non_persistent_buffer_allocator;
+}
+
 }  // namespace
 
 namespace internal {
 
-// Handles architecture safe mapping of flatbuffer vectors to a TfLite*Array
-// struct. Matching types are required (e.g. float and TfLiteFloatArray).
-// Big-endian systems will always allocate dimension array data in the tail
-// (persistent) section.
-template <typename kFlatBufferVectorType, typename kTfLiteArrayType>
-TfLiteStatus FlatBufferVectorToTfLiteTypeArray(
-    SimpleMemoryAllocator* allocator, ErrorReporter* error_reporter,
-    const flatbuffers::Vector<kFlatBufferVectorType>* flatbuffer_array,
-    kTfLiteArrayType** result) {
-  TFLITE_DCHECK(error_reporter != nullptr);
-  TFLITE_DCHECK(flatbuffer_array != nullptr);
-  // TODO(b/159668691): Consider adding type assertion or breaking this function
-  // into multiple functions for each type. std::is_same is c++11 and has a
-  // special updated constructor in c++17 that requires a string argument.
-  if (FLATBUFFERS_LITTLEENDIAN) {
-    // On little-endian machines, TfLite*Array happens to have the same memory
-    // layout as flatbuffers:Vector<kFlatBufferVectorType>, so we can
-    // reinterpret_cast the flatbuffer vector and avoid a copy and malloc.
-    *result = const_cast<kTfLiteArrayType*>(
-        reinterpret_cast<const kTfLiteArrayType*>(flatbuffer_array));
-  } else {
-    // Big-endian architecture can not use the same memory layout as
-    // flatbuffers::Vector<kFlatBufferVectorType>. Allocate from the tail and
-    // copy values from the flatbuffer into the newly allocated chunk.
-    kTfLiteArrayType* array =
-        reinterpret_cast<kTfLiteArrayType*>(allocator->AllocateFromTail(
-            TfLiteIntArrayGetSizeInBytes(flatbuffer_array->Length()),
-            alignof(kTfLiteArrayType)));
-    if (array == nullptr) {
-      TF_LITE_REPORT_ERROR(
-          error_reporter,
-          "Failed to allocate %d bytes of memory to copy an array.",
-          TfLiteIntArrayGetSizeInBytes(flatbuffer_array->Length()));
-      return kTfLiteError;
-    }
-    array->size = flatbuffer_array->Length();
-    for (int i = 0; i < array->size; ++i) {
-      array->data[i] = flatbuffer_array->Get(i);
-    }
-    *result = array;
-  }
-  return kTfLiteOk;
-}
-
 // Returns a pointer to any buffer associated with the flatbuffer tensor. Can
 // return nullptr if no buffer is found.
 void* GetFlatbufferTensorBuffer(
@@ -457,22 +191,26 @@ void* GetFlatbufferTensorBuffer(
 }
 
 TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
-    SimpleMemoryAllocator* allocator, bool allocate_temp,
-    const tflite::Tensor& flatbuffer_tensor,
+    IPersistentBufferAllocator* persistent_buffer_allocator,
+    INonPersistentBufferAllocator* non_persistent_buffer_allocator,
+    bool allocate_temp, const tflite::Tensor& flatbuffer_tensor,
     const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
-    ErrorReporter* error_reporter, TfLiteTensor* result) {
+    TfLiteTensor* result) {
   TFLITE_DCHECK(result != nullptr);
 
   *result = {};
   // Make sure the serialized type is one we know how to deal with, and convert
   // it from a flatbuffer enum into a constant used by the kernel C API.
-  TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
-                                          &result->type, error_reporter));
+  TF_LITE_ENSURE_STATUS(
+      tflite::ConvertTensorType(flatbuffer_tensor.type(), &result->type));
   // Make sure we remember if the serialized tensor is designated as a variable.
   result->is_variable = flatbuffer_tensor.is_variable();
 
   result->data.data = GetFlatbufferTensorBuffer(flatbuffer_tensor, buffers);
-
+  // this is useful for debugging
+#if defined(EI_LOG_LEVEL) && (EI_LOG_LEVEL >= 4)
+  result->name = flatbuffer_tensor.name()->c_str();
+#endif
   // TODO(petewarden): Some of these paths aren't getting enough testing
   // coverage, so we should figure out some tests that exercise them.
   if (result->data.data == nullptr) {
@@ -487,20 +225,20 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
 
   // Figure out what the size in bytes of the buffer is and store it.
   size_t type_size;
-  TF_LITE_ENSURE_STATUS(BytesRequiredForTensor(
-      flatbuffer_tensor, &result->bytes, &type_size, error_reporter));
+  TF_LITE_ENSURE_STATUS(
+      BytesRequiredForTensor(flatbuffer_tensor, &result->bytes, &type_size));
 
   if (flatbuffer_tensor.shape() == nullptr) {
     // flatbuffer_tensor.shape() can return a nullptr in the case of a scalar
     // tensor.
+    // TODO(b/188459715): figure out why const_cast is required here.
     result->dims = const_cast<TfLiteIntArray*>(&kZeroLengthIntArray);
   } else {
     // TFLM doesn't allow reshaping the tensor which requires dynamic memory
     // allocation so it is safe to drop the const qualifier. In the future, if
     // we really want to update the tensor shape, we can always pass in a new
     // TfLiteIntArray - especially we have to do so if the dimension is
-    TF_LITE_ENSURE_STATUS(FlatBufferVectorToTfLiteTypeArray(
-        allocator, error_reporter, flatbuffer_tensor.shape(), &(result->dims)));
+    result->dims = FlatBufferVectorToTfLiteTypeArray(flatbuffer_tensor.shape());
   }
 
   // Copy the quantization information from the serialized data.
@@ -523,15 +261,15 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
     TfLiteAffineQuantization* quantization =
         allocate_temp
             ? reinterpret_cast<TfLiteAffineQuantization*>(
-                  allocator->AllocateTemp(sizeof(TfLiteAffineQuantization),
-                                          alignof(TfLiteAffineQuantization)))
+                  non_persistent_buffer_allocator->AllocateTemp(
+                      sizeof(TfLiteAffineQuantization),
+                      alignof(TfLiteAffineQuantization)))
             : reinterpret_cast<TfLiteAffineQuantization*>(
-                  allocator->AllocateFromTail(
+                  persistent_buffer_allocator->AllocatePersistentBuffer(
                       sizeof(TfLiteAffineQuantization),
                       alignof(TfLiteAffineQuantization)));
     if (quantization == nullptr) {
-      TF_LITE_REPORT_ERROR(error_reporter,
-                           "Unable to allocate TfLiteAffineQuantization.\n");
+      MicroPrintf("Unable to allocate TfLiteAffineQuantization.\n");
       return kTfLiteError;
     }
 
@@ -540,26 +278,31 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
     // zero_point is stored as a int64_t.
     quantization->zero_point =
         allocate_temp
-            ? reinterpret_cast<TfLiteIntArray*>(allocator->AllocateTemp(
-                  TfLiteIntArrayGetSizeInBytes(channels),
-                  alignof(TfLiteIntArray)))
-            : reinterpret_cast<TfLiteIntArray*>(allocator->AllocateFromTail(
-                  TfLiteIntArrayGetSizeInBytes(channels),
-                  alignof(TfLiteIntArray)));
+            ? reinterpret_cast<TfLiteIntArray*>(
+                  non_persistent_buffer_allocator->AllocateTemp(
+                      TfLiteIntArrayGetSizeInBytes(channels),
+                      alignof(TfLiteIntArray)))
+            : reinterpret_cast<TfLiteIntArray*>(
+                  persistent_buffer_allocator->AllocatePersistentBuffer(
+                      TfLiteIntArrayGetSizeInBytes(channels),
+                      alignof(TfLiteIntArray)));
     if (quantization->zero_point == nullptr) {
-      TF_LITE_REPORT_ERROR(error_reporter,
-                           "Unable to allocate quantization->zero_point.\n");
+      MicroPrintf("Unable to allocate quantization->zero_point.\n");
       return kTfLiteError;
     }
 
-    TF_LITE_ENSURE_STATUS(FlatBufferVectorToTfLiteTypeArray(
-        allocator, error_reporter, src_quantization->scale(),
-        &quantization->scale));
+    quantization->scale =
+        FlatBufferVectorToTfLiteTypeArray(src_quantization->scale());
 
     quantization->zero_point->size = channels;
     int* zero_point_data = quantization->zero_point->data;
     for (int i = 0; i < channels; i++) {
-      zero_point_data[i] = src_quantization->zero_point()->Get(i);
+      // As a space-saving optimization, zero point arrays for weights can be
+      // reduced to a single value, since all zero points for weights are 0.
+      zero_point_data[i] = src_quantization->zero_point()->size() ==
+                                   src_quantization->scale()->size()
+                               ? src_quantization->zero_point()->Get(i)
+                               : src_quantization->zero_point()->Get(0);
     }
     // TODO(rocky): Need to add a micro_allocator test case that fails when
     // this is not copied:
@@ -571,14 +314,14 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
 }
 
 TfLiteStatus InitializeTfLiteEvalTensorFromFlatbuffer(
-    SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor,
+    const tflite::Tensor& flatbuffer_tensor,
     const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
-    ErrorReporter* error_reporter, TfLiteEvalTensor* result) {
+    TfLiteEvalTensor* result) {
   *result = {};
   // Make sure the serialized type is one we know how to deal with, and convert
   // it from a flatbuffer enum into a constant used by the kernel C API.
-  TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
-                                          &result->type, error_reporter));
+  TF_LITE_ENSURE_STATUS(
+      tflite::ConvertTensorType(flatbuffer_tensor.type(), &result->type));
 
   result->data.data = GetFlatbufferTensorBuffer(flatbuffer_tensor, buffers);
 
@@ -587,96 +330,183 @@ TfLiteStatus InitializeTfLiteEvalTensorFromFlatbuffer(
     // tensor.
     result->dims = const_cast<TfLiteIntArray*>(&kZeroLengthIntArray);
   } else {
-    TF_LITE_ENSURE_STATUS(FlatBufferVectorToTfLiteTypeArray(
-        allocator, error_reporter, flatbuffer_tensor.shape(), &(result->dims)));
+    result->dims = FlatBufferVectorToTfLiteTypeArray(flatbuffer_tensor.shape());
   }
   return kTfLiteOk;
 }
 
 }  // namespace internal
 
-MicroAllocator::MicroAllocator(SimpleMemoryAllocator* memory_allocator,
-                               ErrorReporter* error_reporter)
-    : memory_allocator_(memory_allocator),
-      error_reporter_(error_reporter),
+size_t MicroAllocator::GetDefaultTailUsage(bool is_memory_planner_given) {
+  size_t total_size = AlignSizeUp<SingleArenaBufferAllocator>() +
+                      AlignSizeUp<MicroAllocator>() +
+                      AlignSizeUp<MicroBuiltinDataAllocator>() +
+                      AlignSizeUp<SubgraphAllocations>();
+  if (!is_memory_planner_given) {
+    total_size += AlignSizeUp<GreedyMemoryPlanner>();
+  }
+  return total_size;
+}
+
+MicroAllocator::MicroAllocator(SingleArenaBufferAllocator* memory_allocator,
+                               MicroMemoryPlanner* memory_planner)
+    : non_persistent_buffer_allocator_(memory_allocator),
+      persistent_buffer_allocator_(memory_allocator),
+      memory_planner_(memory_planner),
+      model_is_allocating_(false) {}
+
+MicroAllocator::MicroAllocator(
+    IPersistentBufferAllocator* persistent_buffer_allocator,
+    INonPersistentBufferAllocator* non_persistent_buffer_allocator,
+    MicroMemoryPlanner* memory_planner)
+    : non_persistent_buffer_allocator_(non_persistent_buffer_allocator),
+      persistent_buffer_allocator_(persistent_buffer_allocator),
+      memory_planner_(memory_planner),
       model_is_allocating_(false) {}
 
 MicroAllocator::~MicroAllocator() {}
 
 MicroAllocator* MicroAllocator::Create(uint8_t* tensor_arena, size_t arena_size,
-                                       ErrorReporter* error_reporter) {
-  uint8_t* aligned_arena = AlignPointerUp(tensor_arena, kBufferAlignment);
+                                       MicroMemoryPlanner* memory_planner) {
+  uint8_t* aligned_arena =
+      AlignPointerUp(tensor_arena, MicroArenaBufferAlignment());
+  size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena;
+  SingleArenaBufferAllocator* memory_allocator =
+      SingleArenaBufferAllocator::Create(aligned_arena, aligned_arena_size);
+
+  return Create(memory_allocator, memory_planner);
+}
+
+MicroAllocator* MicroAllocator::Create(uint8_t* tensor_arena,
+                                       size_t arena_size) {
+  uint8_t* aligned_arena =
+      AlignPointerUp(tensor_arena, MicroArenaBufferAlignment());
   size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena;
-  return Create(SimpleMemoryAllocator::Create(error_reporter, aligned_arena,
-                                              aligned_arena_size),
-                error_reporter);
+  SingleArenaBufferAllocator* memory_allocator =
+      SingleArenaBufferAllocator::Create(aligned_arena, aligned_arena_size);
+
+  // By default create GreedyMemoryPlanner.
+  // If a different MemoryPlanner is needed, use the other api.
+  uint8_t* memory_planner_buffer = memory_allocator->AllocatePersistentBuffer(
+      sizeof(GreedyMemoryPlanner), alignof(GreedyMemoryPlanner));
+  GreedyMemoryPlanner* memory_planner =
+      new (memory_planner_buffer) GreedyMemoryPlanner();
+
+  return Create(memory_allocator, memory_planner);
 }
 
-MicroAllocator* MicroAllocator::Create(SimpleMemoryAllocator* memory_allocator,
-                                       ErrorReporter* error_reporter) {
+MicroAllocator* MicroAllocator::Create(
+    SingleArenaBufferAllocator* memory_allocator,
+    MicroMemoryPlanner* memory_planner) {
   TFLITE_DCHECK(memory_allocator != nullptr);
-  TFLITE_DCHECK(error_reporter != nullptr);
+  TFLITE_DCHECK(memory_planner != nullptr);
 
-  uint8_t* allocator_buffer = memory_allocator->AllocateFromTail(
+  uint8_t* allocator_buffer = memory_allocator->AllocatePersistentBuffer(
       sizeof(MicroAllocator), alignof(MicroAllocator));
-  MicroAllocator* allocator =
-      new (allocator_buffer) MicroAllocator(memory_allocator, error_reporter);
+  MicroAllocator* allocator = new (allocator_buffer)
+      MicroAllocator(memory_allocator, memory_allocator, memory_planner);
+  return allocator;
+}
+
+MicroAllocator* MicroAllocator::Create(uint8_t* persistent_tensor_arena,
+                                       size_t persistent_arena_size,
+                                       uint8_t* non_persistent_tensor_arena,
+                                       size_t non_persistent_arena_size) {
+  TFLITE_DCHECK(persistent_tensor_arena != nullptr);
+  TFLITE_DCHECK(non_persistent_tensor_arena != nullptr);
+  TFLITE_DCHECK(persistent_tensor_arena != non_persistent_tensor_arena);
+
+  IPersistentBufferAllocator* persistent_buffer_allocator =
+      CreatePersistentArenaAllocator(persistent_tensor_arena,
+                                     persistent_arena_size);
+  INonPersistentBufferAllocator* non_persistent_buffer_allocator =
+      CreateNonPersistentArenaAllocator(non_persistent_tensor_arena,
+                                        non_persistent_arena_size,
+                                        persistent_buffer_allocator);
+
+  uint8_t* memory_planner_buffer =
+      persistent_buffer_allocator->AllocatePersistentBuffer(
+          sizeof(GreedyMemoryPlanner), alignof(GreedyMemoryPlanner));
+  GreedyMemoryPlanner* memory_planner =
+      new (memory_planner_buffer) GreedyMemoryPlanner();
+
+  uint8_t* micro_allocator_buffer =
+      persistent_buffer_allocator->AllocatePersistentBuffer(
+          sizeof(MicroAllocator), alignof(MicroAllocator));
+  MicroAllocator* allocator = new (micro_allocator_buffer)
+      MicroAllocator(persistent_buffer_allocator,
+                     non_persistent_buffer_allocator, memory_planner);
   return allocator;
 }
 
-TfLiteStatus MicroAllocator::StartModelAllocation(
-    const Model* model, const MicroOpResolver& op_resolver,
-    NodeAndRegistration** node_and_registrations,
-    TfLiteEvalTensor** eval_tensors) {
+SubgraphAllocations* MicroAllocator::StartModelAllocation(const Model* model) {
   TFLITE_DCHECK(model != nullptr);
 
   if (model_is_allocating_) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "MicroAllocator: Model allocation started before "
-                         "finishing previously allocated model");
-    return kTfLiteError;
+    MicroPrintf(
+        "MicroAllocator: Model allocation started before "
+        "finishing previously allocated model");
+    return nullptr;
   }
 
   model_is_allocating_ = true;
 
-  TF_LITE_ENSURE_STATUS(InitScratchBufferData());
-  TF_LITE_ENSURE_STATUS(AllocateTfLiteEvalTensors(model, eval_tensors));
-  TF_LITE_ENSURE_STATUS(
-      AllocateNodeAndRegistrations(model, node_and_registrations));
-  TF_LITE_ENSURE_STATUS(PrepareNodeAndRegistrationDataFromFlatbuffer(
-      model, op_resolver, *node_and_registrations));
+  uint8_t* data_allocator_buffer =
+      persistent_buffer_allocator_->AllocatePersistentBuffer(
+          sizeof(MicroBuiltinDataAllocator),
+          alignof(MicroBuiltinDataAllocator));
+  builtin_data_allocator_ = new (data_allocator_buffer)
+      MicroBuiltinDataAllocator(persistent_buffer_allocator_);
 
-  return kTfLiteOk;
+  if (InitScratchBufferData() != kTfLiteOk) {
+    return nullptr;
+  }
+
+  // Allocate struct to store eval tensors, nodes and registrations.
+  SubgraphAllocations* output = reinterpret_cast<SubgraphAllocations*>(
+      persistent_buffer_allocator_->AllocatePersistentBuffer(
+          sizeof(SubgraphAllocations) * model->subgraphs()->size(),
+          alignof(SubgraphAllocations)));
+  if (output == nullptr) {
+    MicroPrintf("Failed to allocate memory for model metadata.");
+    return nullptr;
+  }
+
+  if (AllocateTfLiteEvalTensors(model, output) != kTfLiteOk ||
+      AllocateNodeAndRegistrations(model, output) != kTfLiteOk) {
+    return nullptr;
+  }
+  return output;
 }
 
 TfLiteStatus MicroAllocator::FinishModelAllocation(
-    const Model* model, TfLiteEvalTensor* eval_tensors,
+    const Model* model, SubgraphAllocations* subgraph_allocations,
     ScratchBufferHandle** scratch_buffer_handles) {
   if (!model_is_allocating_) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "MicroAllocator: Model allocation finished before "
-                         "starting allocating model");
+    MicroPrintf(
+        "MicroAllocator: Model allocation finished before "
+        "starting allocating model");
     return kTfLiteError;
   }
 
-  const SubGraph* subgraph = GetSubGraphFromModel(model);
-  TFLITE_DCHECK(subgraph != nullptr);
-
+  // Allocate scratch buffer metadata.
   TF_LITE_ENSURE_STATUS(AllocateScratchBufferHandles(
       scratch_buffer_handles, scratch_buffer_request_count_));
-  TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, subgraph, eval_tensors,
-                                               *scratch_buffer_handles));
-  TF_LITE_ENSURE_STATUS(AllocateVariables(subgraph, eval_tensors));
 
+  // Plan all subgraphs and scratch buffers together.
+  TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, subgraph_allocations,
+                                               *scratch_buffer_handles));
   model_is_allocating_ = false;
   return kTfLiteOk;
 }
 
 void* MicroAllocator::AllocatePersistentBuffer(size_t bytes) {
-  return memory_allocator_->AllocateFromTail(bytes, kBufferAlignment);
+  return persistent_buffer_allocator_->AllocatePersistentBuffer(
+      bytes, MicroArenaBufferAlignment());
 }
 
 TfLiteStatus MicroAllocator::RequestScratchBufferInArena(size_t bytes,
+                                                         int subgraph_idx,
                                                          int* buffer_idx) {
   // All scratch buffer requests are stored in the head section of the arena
   // when a model is in the prepare phase. First align a scratch buffer request
@@ -693,10 +523,8 @@ TfLiteStatus MicroAllocator::RequestScratchBufferInArena(size_t bytes,
 
   // First, ensure that the per-kernel request has not exceeded the limit:
   if (current_node_request_count >= kMaxScratchBuffersPerOp) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter_,
-        "Scratch buffer request exeeds limit per operator (%d)",
-        kMaxScratchBuffersPerOp);
+    MicroPrintf("Scratch buffer request exeeds limit per operator (%d)",
+                kMaxScratchBuffersPerOp);
     return kTfLiteError;
   }
 
@@ -708,6 +536,7 @@ TfLiteStatus MicroAllocator::RequestScratchBufferInArena(size_t bytes,
   // allocating:
   current_request->bytes = bytes;
   current_request->node_idx = kUnassignedScratchBufferRequestIndex;
+  current_request->subgraph_idx = subgraph_idx;
 
   // Assign the current request index to the out-param:
   *buffer_idx = scratch_buffer_request_count_;
@@ -720,7 +549,7 @@ TfLiteStatus MicroAllocator::RequestScratchBufferInArena(size_t bytes,
 TfLiteStatus MicroAllocator::FinishPrepareNodeAllocations(int node_id) {
   // When a node has finished preparing, all temp allocations performed by the
   // kernel should be cleaned up:
-  ResetTempAllocations();
+  TF_LITE_ENSURE_STATUS(ResetTempAllocations());
 
   // Find and update any new scratch buffer requests for the current node:
   internal::ScratchBufferRequest* requests = GetScratchBufferRequests();
@@ -738,7 +567,8 @@ TfLiteStatus MicroAllocator::FinishPrepareNodeAllocations(int node_id) {
 
   // Ensure that the head is re-adjusted to allow for another at-most
   // kMaxScratchBuffersPerOp scratch buffer requests in the next operator:
-  TF_LITE_ENSURE_STATUS(memory_allocator_->SetHeadBufferSize(
+  TF_LITE_ENSURE_STATUS(non_persistent_buffer_allocator_->ResizeBuffer(
+      scratch_buffer_head_,
       sizeof(internal::ScratchBufferRequest) *
           (scratch_buffer_request_count_ + kMaxScratchBuffersPerOp),
       alignof(internal::ScratchBufferRequest)));
@@ -747,281 +577,225 @@ TfLiteStatus MicroAllocator::FinishPrepareNodeAllocations(int node_id) {
 }
 
 size_t MicroAllocator::used_bytes() const {
-  return memory_allocator_->GetUsedBytes();
+  return non_persistent_buffer_allocator_->GetNonPersistentUsedBytes() +
+         persistent_buffer_allocator_->GetPersistentUsedBytes();
 }
 
 TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
-    const Model* model, NodeAndRegistration** node_and_registrations) {
-  TFLITE_DCHECK(node_and_registrations);
-
-  const SubGraph* subgraph = GetSubGraphFromModel(model);
-  TFLITE_DCHECK(subgraph != nullptr);
-
-  NodeAndRegistration* output = reinterpret_cast<NodeAndRegistration*>(
-      memory_allocator_->AllocateFromTail(
-          sizeof(NodeAndRegistration) * subgraph->operators()->size(),
-          alignof(NodeAndRegistration)));
-  if (output == nullptr) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter_,
-        "Failed to allocate memory for node_and_registrations.");
-    return kTfLiteError;
-  }
-  *node_and_registrations = output;
-  return kTfLiteOk;
-}
-
-TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
-    const Model* model, const MicroOpResolver& op_resolver,
-    NodeAndRegistration* node_and_registrations) {
-  TFLITE_DCHECK(model != nullptr);
-  TFLITE_DCHECK(node_and_registrations != nullptr);
-
-  const SubGraph* subgraph = GetSubGraphFromModel(model);
-  TFLITE_DCHECK(subgraph != nullptr);
-
-  TfLiteStatus status = kTfLiteOk;
-  auto* opcodes = model->operator_codes();
-  MicroBuiltinDataAllocator builtin_data_allocator(memory_allocator_);
-  for (size_t i = 0; i < subgraph->operators()->size(); ++i) {
-    const auto* op = subgraph->operators()->Get(i);
-    const size_t index = op->opcode_index();
-    if (index >= opcodes->size()) {
-      TF_LITE_REPORT_ERROR(error_reporter_,
-                           "Missing registration for opcode_index %d\n", index);
-      return kTfLiteError;
-    }
-    auto* opcode = (*opcodes)[index];
-    status =
-        GetRegistrationFromOpCode(opcode, op_resolver, error_reporter_,
-                                  &(node_and_registrations[i].registration));
-    if (status != kTfLiteOk) {
-      TF_LITE_REPORT_ERROR(error_reporter_,
-                           "Failed to get registration from op code %s\n ",
-                           EnumNameBuiltinOperator(GetBuiltinCode(opcode)));
-      return status;
-    }
-    const auto* registration = node_and_registrations[i].registration;
-    if (registration == nullptr) {
-      TF_LITE_REPORT_ERROR(error_reporter_, "Skipping op for opcode_index %d\n",
-                           index);
+    const Model* model, SubgraphAllocations* subgraph_allocations) {
+  TFLITE_DCHECK(subgraph_allocations != nullptr);
+
+  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
+       subgraph_idx++) {
+    const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
+    TFLITE_DCHECK(subgraph != nullptr);
+
+    uint32_t operators_size = NumSubgraphOperators(subgraph);
+
+    // Initialize NodeAndRegistrations for the subgraph.
+    NodeAndRegistration* output = reinterpret_cast<NodeAndRegistration*>(
+        persistent_buffer_allocator_->AllocatePersistentBuffer(
+            sizeof(NodeAndRegistration) * operators_size,
+            alignof(NodeAndRegistration)));
+    if (output == nullptr) {
+      MicroPrintf("Failed to allocate memory for node_and_registrations.");
       return kTfLiteError;
     }
-    BuiltinOperator op_type =
-        static_cast<BuiltinOperator>(registration->builtin_code);
-
-    const char* custom_data = nullptr;
-    size_t custom_data_size = 0;
-    unsigned char* builtin_data = nullptr;
-
-    if (op_type == BuiltinOperator_CUSTOM) {
-      // Custom Ops may or may not have a non-null custom_options field.
-      if (op->custom_options() != nullptr) {
-        custom_data =
-            reinterpret_cast<const char*>(op->custom_options()->data());
-        custom_data_size = op->custom_options()->size();
-      }
-    } else {
-      if (op->custom_options() != nullptr) {
-        TF_LITE_REPORT_ERROR(
-            error_reporter_,
-            "Unsupported behavior: found builtin operator %s with custom "
-            "options.\n",
-            EnumNameBuiltinOperator(op_type));
-        return kTfLiteError;
-      }
-
-      MicroOpResolver::BuiltinParseFunction parser =
-          op_resolver.GetOpDataParser(op_type);
-      if (parser == nullptr) {
-        TF_LITE_REPORT_ERROR(error_reporter_, "Did not find a parser for %s",
-                             EnumNameBuiltinOperator(op_type));
-
-        return kTfLiteError;
-      }
-      TF_LITE_ENSURE_STATUS(parser(op, error_reporter_, &builtin_data_allocator,
-                                   (void**)(&builtin_data)));
-    }
-
-    TfLiteIntArray* inputs_array;
-    TF_LITE_ENSURE_STATUS(internal::FlatBufferVectorToTfLiteTypeArray(
-        memory_allocator_, error_reporter_, op->inputs(), &inputs_array));
-
-    TfLiteIntArray* outputs_array;
-    TF_LITE_ENSURE_STATUS(internal::FlatBufferVectorToTfLiteTypeArray(
-        memory_allocator_, error_reporter_, op->outputs(), &outputs_array));
-
-    TfLiteNode* node = &(node_and_registrations[i].node);
-    *node = {};
-    node->inputs = inputs_array;
-    node->outputs = outputs_array;
-    node->builtin_data = reinterpret_cast<void*>(builtin_data);
-    node->custom_initial_data = custom_data;
-    node->custom_initial_data_size = custom_data_size;
+    subgraph_allocations[subgraph_idx].node_and_registrations = output;
   }
-
   return kTfLiteOk;
 }
 
 TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensor(
-    const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
-  const SubGraph* subgraph = GetSubGraphFromModel(model);
+    const Model* model, const SubgraphAllocations* subgraph_allocations,
+    int tensor_index, int subgraph_index) {
+  const SubGraph* subgraph = model->subgraphs()->Get(subgraph_index);
   TFLITE_DCHECK(subgraph != nullptr);
 
   // This value is allocated from persistent arena space. It is guaranteed to be
   // around for the lifetime of the application.
-  TfLiteTensor* tensor =
-      AllocatePersistentTfLiteTensorInternal(model, eval_tensors, tensor_index);
+  TfLiteTensor* tensor = AllocatePersistentTfLiteTensorInternal();
 
   // Populate any fields from the flatbuffer, since this TfLiteTensor struct is
   // allocated in the persistent section of the arena, ensure that additional
   // allocations also take place in that section of the arena.
-  if (PopulateTfLiteTensorFromFlatbuffer(model, subgraph, tensor, tensor_index,
-                                         /*allocate_temp=*/false) !=
-      kTfLiteOk) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Failed to populate a persistent TfLiteTensor struct "
-                         "from flatbuffer data!");
+  if (PopulateTfLiteTensorFromFlatbuffer(
+          model, tensor, tensor_index, subgraph_index,
+          /*allocate_temp=*/false) != kTfLiteOk) {
+    MicroPrintf(
+        "Failed to populate a persistent TfLiteTensor struct "
+        "from flatbuffer data!");
     return nullptr;
   }
 
-  if (eval_tensors != nullptr) {
+  if (subgraph_allocations != nullptr) {
     // Tensor buffers that are allocated at runtime (e.g. non-weight buffers)
     // and not located in the flatbuffer are stored on the pre-allocated list of
     // TfLiteEvalTensors structs. These structs are the source of truth, simply
     // point the corresponding buffer to the new TfLiteTensor data value.
-    tensor->data.data = eval_tensors[tensor_index].data.data;
+    tensor->data.data =
+        subgraph_allocations[subgraph_index].tensors[tensor_index].data.data;
+    // TfLiteEvalTensor structs must also be the source of truth for the
+    // TfLiteTensor dims.
+    tensor->dims =
+        subgraph_allocations[subgraph_index].tensors[tensor_index].dims;
   }
   return tensor;
 }
 
+void MicroAllocator::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) {
+  TFLITE_DCHECK(tensor != nullptr);
+
+  if (tensor->quantization.type == kTfLiteAffineQuantization) {
+    TFLITE_DCHECK(tensor->quantization.params != nullptr);
+    TfLiteAffineQuantization* quantization =
+        reinterpret_cast<TfLiteAffineQuantization*>(
+            tensor->quantization.params);
+
+    non_persistent_buffer_allocator_->DeallocateTemp(
+        reinterpret_cast<uint8_t*>(quantization->zero_point));
+    non_persistent_buffer_allocator_->DeallocateTemp(
+        reinterpret_cast<uint8_t*>(quantization));
+  }
+
+  // Clear the data in case someone still access tensor arena by mistake
+  tensor->quantization.type = kTfLiteNoQuantization;
+  tensor->quantization.params = nullptr;
+  tensor->data.data = nullptr;
+  tensor->dims = nullptr;
+  non_persistent_buffer_allocator_->DeallocateTemp(
+      reinterpret_cast<uint8_t*>(tensor));
+}
+
 TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(
-    const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
-  const SubGraph* subgraph = GetSubGraphFromModel(model);
+    const Model* model, const SubgraphAllocations* subgraph_allocations,
+    int tensor_index, int subgraph_index) {
+  const SubGraph* subgraph = model->subgraphs()->Get(subgraph_index);
   TFLITE_DCHECK(subgraph != nullptr);
 
   // This value is allocated from temporary arena space. It is guaranteed to be
   // around for at least the scope of the calling function. Since this struct
   // allocation takes place in temp space, no need to own or cleanup.
-  TfLiteTensor* tensor =
-      reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateTemp(
-          sizeof(TfLiteTensor), alignof(TfLiteTensor)));
+  TfLiteTensor* tensor = reinterpret_cast<TfLiteTensor*>(
+      non_persistent_buffer_allocator_->AllocateTemp(sizeof(TfLiteTensor),
+                                                     alignof(TfLiteTensor)));
 
   // Populate any fields from the flatbuffer, since this TfLiteTensor struct is
   // allocated in the temp section of the arena, ensure that additional
   // allocations also take place in that section of the arena.
-  if (PopulateTfLiteTensorFromFlatbuffer(model, subgraph, tensor, tensor_index,
+  if (PopulateTfLiteTensorFromFlatbuffer(model, tensor, tensor_index,
+                                         subgraph_index,
                                          /*allocate_temp=*/true) != kTfLiteOk) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter_,
+    MicroPrintf(
         "Failed to populate a temp TfLiteTensor struct from flatbuffer data!");
     return nullptr;
   }
 
-  if (eval_tensors != nullptr) {
+  if (subgraph_allocations != nullptr) {
     // Tensor buffers that are allocated at runtime (e.g. non-weight buffers)
     // and not located in the flatbuffer are stored on the pre-allocated list of
     // TfLiteEvalTensors structs. These structs are the source of truth, simply
     // point the corresponding buffer to the new TfLiteTensor data value.
-    tensor->data.data = eval_tensors[tensor_index].data.data;
+    tensor->data.data =
+        subgraph_allocations[subgraph_index].tensors[tensor_index].data.data;
+    // TfLiteEvalTensor structs must also be the source of truth for the
+    // TfLiteTensor dims.
+    tensor->dims =
+        subgraph_allocations[subgraph_index].tensors[tensor_index].dims;
   }
   return tensor;
 }
 
-void MicroAllocator::ResetTempAllocations() {
-  memory_allocator_->ResetTempAllocations();
+TfLiteStatus MicroAllocator::ResetTempAllocations() {
+  return non_persistent_buffer_allocator_->ResetTempAllocations();
 }
 
-TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors(
-    const Model* model, TfLiteEvalTensor** eval_tensors) {
-  TFLITE_DCHECK(eval_tensors != nullptr);
-
-  const SubGraph* subgraph = GetSubGraphFromModel(model);
-  TFLITE_DCHECK(subgraph != nullptr);
-
-  size_t alloc_count = subgraph->tensors()->size();
-  TfLiteEvalTensor* tensors =
-      reinterpret_cast<TfLiteEvalTensor*>(memory_allocator_->AllocateFromTail(
-          sizeof(TfLiteEvalTensor) * alloc_count, alignof(TfLiteEvalTensor)));
-  if (tensors == nullptr) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Failed to allocate memory for context->eval_tensors, "
-                         "%d bytes required",
-                         sizeof(TfLiteEvalTensor) * alloc_count);
-    return kTfLiteError;
-  }
+bool MicroAllocator::IsAllTempDeallocated() {
+  return non_persistent_buffer_allocator_->IsAllTempDeallocated();
+}
 
-  for (size_t i = 0; i < alloc_count; ++i) {
-    TfLiteStatus status = internal::InitializeTfLiteEvalTensorFromFlatbuffer(
-        memory_allocator_, *subgraph->tensors()->Get(i), model->buffers(),
-        error_reporter_, &tensors[i]);
-    if (status != kTfLiteOk) {
-      TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d",
-                           i);
+TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors(
+    const Model* model, SubgraphAllocations* subgraph_allocations) {
+  TFLITE_DCHECK(subgraph_allocations != nullptr);
+
+  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
+       subgraph_idx++) {
+    const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
+    TFLITE_DCHECK(subgraph != nullptr);
+
+    size_t alloc_count = subgraph->tensors()->size();
+    TfLiteEvalTensor* tensors = reinterpret_cast<TfLiteEvalTensor*>(
+        persistent_buffer_allocator_->AllocatePersistentBuffer(
+            sizeof(TfLiteEvalTensor) * alloc_count, alignof(TfLiteEvalTensor)));
+    if (tensors == nullptr) {
+      MicroPrintf(
+          "Failed to allocate memory for context->eval_tensors, "
+          "%d bytes required",
+          sizeof(TfLiteEvalTensor) * alloc_count);
       return kTfLiteError;
     }
+
+    for (size_t i = 0; i < alloc_count; ++i) {
+      TfLiteStatus status = internal::InitializeTfLiteEvalTensorFromFlatbuffer(
+          *subgraph->tensors()->Get(i), model->buffers(), &tensors[i]);
+      if (status != kTfLiteOk) {
+        MicroPrintf("Failed to initialize tensor %d", i);
+        return kTfLiteError;
+      }
+    }
+    subgraph_allocations[subgraph_idx].tensors = tensors;
   }
-  *eval_tensors = tensors;
   return kTfLiteOk;
 }
 
-TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph,
-                                               TfLiteEvalTensor* eval_tensors) {
+TfLiteStatus MicroAllocator::AllocateVariables(
+    const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors,
+    const int32_t* offline_planner_offsets) {
   for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
     auto* tensor = subgraph->tensors()->Get(i);
     if (tensor->is_variable()) {
-      size_t buffer_size;
-      TF_LITE_ENSURE_STATUS(
-          TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size));
+      if (offline_planner_offsets == nullptr ||
+          offline_planner_offsets[i] == kOnlinePlannedBuffer) {
+        size_t buffer_size;
+        TF_LITE_ENSURE_STATUS(
+            TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size));
 
-      eval_tensors[i].data.data =
-          memory_allocator_->AllocateFromTail(buffer_size, kBufferAlignment);
+        eval_tensors[i].data.data =
+            persistent_buffer_allocator_->AllocatePersistentBuffer(
+                buffer_size, MicroArenaBufferAlignment());
 
-      if (eval_tensors[i].data.data == nullptr) {
-        TF_LITE_REPORT_ERROR(error_reporter_,
-                             "Failed to allocate variable tensor of size %d",
-                             buffer_size);
-        return kTfLiteError;
+        if (eval_tensors[i].data.data == nullptr) {
+          MicroPrintf("Failed to allocate variable tensor of size %d",
+                      buffer_size);
+          return kTfLiteError;
+        }
       }
     }
   }
   return kTfLiteOk;
 }
 
-TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensorInternal(
-    const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
-  return reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateFromTail(
-      sizeof(TfLiteTensor), alignof(TfLiteTensor)));
+TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensorInternal() {
+  return reinterpret_cast<TfLiteTensor*>(
+      persistent_buffer_allocator_->AllocatePersistentBuffer(
+          sizeof(TfLiteTensor), alignof(TfLiteTensor)));
 }
 
 TfLiteStatus MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
-    const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
-    int tensor_index, bool allocate_temp) {
+    const Model* model, TfLiteTensor* tensor, int tensor_index,
+    int subgraph_idx, bool allocate_temp) {
   // TODO(b/162311891): This method serves as a stub to ensure quantized
   // allocations in the tail can be recorded. Once the interpreter has APIs for
   // accessing buffers on TfLiteEvalTensor this method can be dropped.
   return internal::InitializeTfLiteTensorFromFlatbuffer(
-      memory_allocator_, allocate_temp, *subgraph->tensors()->Get(tensor_index),
-      model->buffers(), error_reporter_, tensor);
-}
-
-ErrorReporter* MicroAllocator::error_reporter() const {
-  return error_reporter_;
-}
-
-const SubGraph* MicroAllocator::GetSubGraphFromModel(const Model* model) {
-  auto* subgraphs = model->subgraphs();
-  if (subgraphs->size() != 1) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Only 1 subgraph is currently supported.\n");
-    return nullptr;
-  }
-  return (*subgraphs)[0];
+      persistent_buffer_allocator_, non_persistent_buffer_allocator_,
+      allocate_temp,
+      *model->subgraphs()->Get(subgraph_idx)->tensors()->Get(tensor_index),
+      model->buffers(), tensor);
 }
 
 TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
-    const Model* model, const SubGraph* subgraph,
-    TfLiteEvalTensor* eval_tensors,
+    const Model* model, SubgraphAllocations* allocations,
     ScratchBufferHandle* scratch_buffer_handles) {
   size_t head_usage = 0;
   // Create static memory plan
@@ -1034,69 +808,70 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
   // allocated from the temp section and cleaned up at the bottom of this
   // function.
 
-  size_t allocation_info_count =
-      subgraph->tensors()->size() + scratch_buffer_request_count_;
-  size_t bytes = sizeof(AllocationInfo) * allocation_info_count;
-
-  // Allocate an array of AllocationInfo structs from the temp section. This
-  // struct will be used by AllocationInfoBuilder to find buffer usage.
-  AllocationInfo* allocation_info = reinterpret_cast<AllocationInfo*>(
-      memory_allocator_->AllocateTemp(bytes, alignof(AllocationInfo)));
-  if (allocation_info == nullptr) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter_,
-        "Failed to allocate memory for allocation_info, %d bytes required",
-        bytes);
-    return kTfLiteError;
-  }
-
   // Use the AllocationInfoBuilder class to help determine where buffers are
   // used in the subgraph.
-  AllocationInfoBuilder builder(allocation_info, subgraph->tensors()->size(),
-                                scratch_buffer_request_count_, error_reporter_);
+  AllocationInfoBuilder builder(model, non_persistent_buffer_allocator_);
+  TF_LITE_ENSURE_STATUS(
+      builder.CreateAllocationInfo(scratch_buffer_request_count_));
 
   const int32_t* offline_planner_offsets = nullptr;
   TF_LITE_ENSURE_STATUS(
-      builder.GetOfflinePlannedOffsets(model, &offline_planner_offsets));
+      builder.GetOfflinePlannedOffsets(&offline_planner_offsets));
+
+  // We allocate buffers for variable tensors here since the offline planner
+  // offsets are conviently available here.
+  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
+       subgraph_idx++) {
+    const SubGraph* subgraph = model->subgraphs()->Get(subgraph_idx);
+    TFLITE_DCHECK(subgraph != nullptr);
+    TF_LITE_ENSURE_STATUS(AllocateVariables(
+        subgraph, allocations[subgraph_idx].tensors, offline_planner_offsets));
+  }
+
   TF_LITE_ENSURE_STATUS(
-      builder.AddTensors(subgraph, offline_planner_offsets, eval_tensors));
+      builder.InitializeAllocationInfo(offline_planner_offsets, allocations));
 
   internal::ScratchBufferRequest* scratch_buffer_requests =
       GetScratchBufferRequests();
-
-  TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_requests,
-                                                  scratch_buffer_handles));
+  TF_LITE_ENSURE_STATUS(builder.MarkAllocationLifetimes(
+      0, scratch_buffer_requests, scratch_buffer_handles, allocations));
+  int allocation_info_count = builder.AllocationCount();
+  AllocationInfo* allocation_info = builder.Finish();
 
   // Remaining arena size that memory planner can use for calculating offsets.
   size_t remaining_arena_size =
-      memory_allocator_->GetAvailableMemory(kBufferAlignment);
-  uint8_t* planner_arena =
-      memory_allocator_->AllocateTemp(remaining_arena_size, kBufferAlignment);
-  TF_LITE_ENSURE(error_reporter_, planner_arena != nullptr);
-  GreedyMemoryPlanner planner(planner_arena, remaining_arena_size);
-  TF_LITE_ENSURE_STATUS(CreatePlan(error_reporter_, &planner, allocation_info,
-                                   allocation_info_count));
+      non_persistent_buffer_allocator_->GetAvailableMemory(
+          MicroArenaBufferAlignment());
+  uint8_t* planner_arena = non_persistent_buffer_allocator_->AllocateTemp(
+      remaining_arena_size, MicroArenaBufferAlignment());
 
-  // Reset all temp allocations used above:
-  memory_allocator_->ResetTempAllocations();
-
-  size_t actual_available_arena_size =
-      memory_allocator_->GetAvailableMemory(kBufferAlignment);
-
-  // Make sure we have enough arena size.
-  if (planner.GetMaximumMemorySize() > actual_available_arena_size) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter_,
-        "Arena size is too small for all buffers. Needed %u but only "
-        "%u was available.",
-        planner.GetMaximumMemorySize(), actual_available_arena_size);
+  if (planner_arena == nullptr) {
     return kTfLiteError;
   }
+
+  memory_planner_->Init(planner_arena, remaining_arena_size);
+  TF_LITE_ENSURE_STATUS(
+      CreatePlan(memory_planner_, allocation_info, allocation_info_count));
+
   // Commit the plan.
-  TF_LITE_ENSURE_STATUS(CommitPlan(error_reporter_, &planner,
-                                   memory_allocator_->GetHeadBuffer(),
-                                   allocation_info, allocation_info_count));
-  head_usage = planner.GetMaximumMemorySize();
+  TF_LITE_ENSURE_STATUS(
+      CommitPlan(memory_planner_,
+                 non_persistent_buffer_allocator_->GetOverlayMemoryAddress(),
+                 allocation_info, allocation_info_count));
+
+  // Reset all temp allocations used above:
+  builder.FreeAllocationInfo();
+  non_persistent_buffer_allocator_->DeallocateTemp(planner_arena);
+  TF_LITE_ENSURE_STATUS(
+      non_persistent_buffer_allocator_->ResetTempAllocations());
+  TF_LITE_ENSURE_STATUS(
+      non_persistent_buffer_allocator_->DeallocateResizableBuffer(
+          scratch_buffer_head_));
+
+#ifdef TF_LITE_SHOW_MEMORY_USE
+  memory_planner_->PrintMemoryPlan();
+#endif
+  head_usage = memory_planner_->GetMaximumMemorySize();
 
   // The head is used to store memory plans for one model at a time during the
   // model preparation stage, and is re-purposed to store scratch buffer handles
@@ -1110,8 +885,9 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
   // The head is used for storing scratch buffer allocations before finalizing a
   // memory plan in this function. Ensure that the head is set to the largest
   // memory plan sent through the allocator:
-  TF_LITE_ENSURE_STATUS(memory_allocator_->SetHeadBufferSize(
-      max_head_buffer_usage_, kBufferAlignment));
+  TF_LITE_ENSURE_STATUS(
+      non_persistent_buffer_allocator_->ReserveNonPersistentOverlayMemory(
+          max_head_buffer_usage_, MicroArenaBufferAlignment()));
   return kTfLiteOk;
 }
 
@@ -1127,7 +903,7 @@ TfLiteStatus MicroAllocator::AllocateScratchBufferHandles(
   // Allocate a consecutive block of memory store the scratch buffer handles.
   // This alignment ensures quick lookup during inference time for the model:
   *scratch_buffer_handles = reinterpret_cast<ScratchBufferHandle*>(
-      memory_allocator_->AllocateFromTail(
+      persistent_buffer_allocator_->AllocatePersistentBuffer(
           sizeof(ScratchBufferHandle) * handle_count,
           alignof(ScratchBufferHandle)));
 
@@ -1142,17 +918,24 @@ TfLiteStatus MicroAllocator::InitScratchBufferData() {
   // All requests will be stored in the head section. Each kernel is allowed at
   // most kMaxScratchBuffersPerOp requests. Adjust the head to reserve at most
   // that many requests to begin:
-  TF_LITE_ENSURE_STATUS(memory_allocator_->SetHeadBufferSize(
-      sizeof(internal::ScratchBufferRequest) * kMaxScratchBuffersPerOp,
-      alignof(internal::ScratchBufferRequest)));
+  scratch_buffer_head_ =
+      non_persistent_buffer_allocator_->AllocateResizableBuffer(
+          sizeof(internal::ScratchBufferRequest) * kMaxScratchBuffersPerOp,
+          alignof(internal::ScratchBufferRequest));
+  if (scratch_buffer_head_ == nullptr) {
+    return kTfLiteError;
+  }
 
   return kTfLiteOk;
 }
 
 internal::ScratchBufferRequest* MicroAllocator::GetScratchBufferRequests() {
-  return reinterpret_cast<internal::ScratchBufferRequest*>(
-      AlignPointerUp(memory_allocator_->GetHeadBuffer(),
-                     alignof(internal::ScratchBufferRequest)));
+  return reinterpret_cast<internal::ScratchBufferRequest*>(AlignPointerUp(
+      scratch_buffer_head_, alignof(internal::ScratchBufferRequest)));
+}
+
+TfLiteBridgeBuiltinDataAllocator* MicroAllocator::GetBuiltinDataAllocator() {
+  return builtin_data_allocator_;
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h
index 49294b8..ca2e27e 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -18,16 +18,18 @@ limitations under the License.
 #include <cstddef>
 #include <cstdint>
 
-#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"  // from @flatbuffers
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/simple_memory_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/micro_memory_planner.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
 
 namespace tflite {
 
+// TODO(b/199402574): rename to tflite_internal or just remove internal
+// namespace.
 namespace internal {
 
 // Sets up all of the data structure members for a TfLiteTensor based on the
@@ -35,10 +37,11 @@ namespace internal {
 // TODO(b/162311891): Drop this method when the interpreter has an API for
 // returning buffers on TfLiteEvalTensor.
 TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
-    SimpleMemoryAllocator* allocator, bool allocate_temp,
-    const tflite::Tensor& flatbuffer_tensor,
+    IPersistentBufferAllocator* persistent_buffer_allocator,
+    INonPersistentBufferAllocator* non_persistent_buffer_allocator,
+    bool allocate_temp, const tflite::Tensor& flatbuffer_tensor,
     const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
-    ErrorReporter* error_reporter, TfLiteTensor* result);
+    TfLiteTensor* result);
 
 // Holds placeholder information for a scratch buffer request from a kernel.
 // This struct is only used during the model prepare stage. Each request from a
@@ -50,7 +53,7 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
 // of a sequential, array of ScratchBufferHandle allocations in the tail
 // section. These allocations are indexed by the request API defined in the
 // TfLiteContext struct.
-typedef struct {
+struct ScratchBufferRequest {
   // Number of bytes required by the buffer. The actual allocated size might be
   // greater than `bytes` due to buffer alignment.
   size_t bytes;
@@ -58,22 +61,30 @@ typedef struct {
   // determine the lifetime of the buffer. In AllocationInfo, this buffer will
   // have `before` = node_idx and `after` = node_idx.
   int node_idx;
-} ScratchBufferRequest;
+  int subgraph_idx;
+};
 
 }  // namespace internal
 
-typedef struct {
+struct NodeAndRegistration {
   TfLiteNode node;
   const TfLiteRegistration* registration;
-} NodeAndRegistration;
+};
 
 // Holds a pointer to a buffer for a scratch buffer requested by a kernel during
 // the model prepare stage. This struct is allocated in-place and allows for
 // quick pointer-indexed lookup for speed during model inference.
-typedef struct {
+struct ScratchBufferHandle {
   // Pointer to location of the scratch buffer:
   uint8_t* data;
-} ScratchBufferHandle;
+};
+
+// Stores all per-subgraph allocations. This includes the node and registration
+// array, and tensor list for each subgraph.
+struct SubgraphAllocations {
+  NodeAndRegistration* node_and_registrations;
+  TfLiteEvalTensor* tensors;
+};
 
 // Allocator responsible for allocating memory for all intermediate tensors
 // necessary to invoke a model.
@@ -84,9 +95,9 @@ typedef struct {
 //
 // The MicroAllocator simply plans out additional allocations that are required
 // to standup a model for inference in TF Micro. This class currently relies on
-// an additional allocator - SimpleMemoryAllocator - for all allocations from an
-// arena. These allocations are divided into head (non-persistent) and tail
-// (persistent) regions:
+// an additional allocator - SingleArenaBufferAllocator - for all allocations
+// from an arena. These allocations are divided into head (non-persistent) and
+// tail (persistent) regions:
 //
 // Memory layout to help understand how it works
 // This information could change in the future version.
@@ -101,41 +112,64 @@ typedef struct {
 class MicroAllocator {
  public:
   // Creates a MicroAllocator instance from a given tensor arena. This arena
-  // will be managed by the created instance.
-  // Note: Please use __declspec(align(16)) to make sure tensor_arena is 16
+  // will be managed by the created instance. The GreedyMemoryPlanner will
+  // by default be used and created on the arena.
+  // Note: Please use alignas(16) to make sure tensor_arena is 16
   // bytes aligned, otherwise some head room will be wasted.
   // TODO(b/157615197): Cleanup constructor + factory usage.
-  static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
-                                ErrorReporter* error_reporter);
-
-  // Creates a MicroAllocator instance using the provided SimpleMemoryAllocator
-  // intance. This allocator instance will use the SimpleMemoryAllocator
-  // instance to manage allocations internally.
-  static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
-                                ErrorReporter* error_reporter);
+  static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size);
 
-  // Begin allocating internal resources required for model inference.
+  // Creates a MicroAllocator instance from a given tensor arena and a given
+  // MemoryPlanner. This arena will be managed by the created instance. Note:
+  // Please use alignas(16) to make sure tensor_arena is 16 bytes
+  // aligned, otherwise some head room will be wasted.
+  static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
+                                MicroMemoryPlanner* memory_planner);
+
+  // Creates a MicroAllocator instance using the provided
+  // SingleArenaBufferAllocator instance and the MemoryPlanner. This allocator
+  // instance will use the SingleArenaBufferAllocator instance to manage
+  // allocations internally.
+  static MicroAllocator* Create(SingleArenaBufferAllocator* memory_allocator,
+                                MicroMemoryPlanner* memory_planner);
+
+  // Creates a MicroAllocator instance using the provided
+  // SingleArenaBufferAllocator instance and the MemoryPlanner. This allocator
+  // instance will use the SingleArenaBufferAllocator instance to manage
+  // allocations internally.
+  static MicroAllocator* Create(uint8_t* persistent_tensor_arena,
+                                size_t persistent_arena_size,
+                                uint8_t* non_persistent_tensor_arena,
+                                size_t non_persistent_arena_size);
+
+  // Returns the fixed amount of memory overhead of MicroAllocator.
+  static size_t GetDefaultTailUsage(bool is_memory_planner_given);
+
+  // Allocates internal resources required for model inference for each subgraph
+  // from the arena.
+  //
   // This method will run through the flatbuffer data supplied in the model to
   // properly allocate tensor, node, and op registration data. This method is
-  // expected to be followed with a call to FinishModelAllocation() before
-  // resuming allocation with another model. All persistent tensor buffers are
-  // stored in the out-param eval_tensors. This value is allocated from the
-  // persistent memory arena and will be used to host runtime tensor buffers.
-  TfLiteStatus StartModelAllocation(
-      const Model* model, const MicroOpResolver& op_resolver,
-      NodeAndRegistration** node_and_registrations,
-      TfLiteEvalTensor** eval_tensors);
+  // expected to be followed with a call to FinishModelAllocation()  Returns a
+  // pointer to an array of SubgraphAllocations (also stored in the tail of the
+  // arena) where each index corresponds to a different subgraph in the model.
+  // Return value is nullptr if the allocations failed.
+  SubgraphAllocations* StartModelAllocation(const Model* model);
 
   // Finish allocating internal resources required for model inference.
-  // This method will plan non-persistent buffers and commit a memory plan to
-  // the 'head' section of the memory arena. All variable tensor data will also
-  // be allocated. This method should be called after assigning model resources
-  // in StartModelAllocation(). The eval_tensors pointer should be the value
-  // passed into this class during StartModelAllocation(). Scratch buffer
-  // handles are stored in the out-param `scratch_buffer_handles`. This value
-  // will be used in `GetScratchBuffer` call to retrieve scratch buffers.
+  //
+  // -Plan the memory for activation tensors and scratch buffers.
+  // -Update eval tensors for each subgraph based on planned offsets.
+  // -Allocate scratch buffer handles array and update based on planned offsets.
+  //
+  // This method should be called after assigning model resources
+  // in StartModelAllocation(). The subgraph_allocations pointer should be the
+  // value passed into this class during StartModelAllocation(). Scratch buffer
+  // handles are stored in the out-param `scratch_buffer_handles` array which is
+  // allocated in this method. This value will be used in `GetScratchBuffer`
+  // call to retrieve scratch buffers.
   TfLiteStatus FinishModelAllocation(
-      const Model* model, TfLiteEvalTensor* eval_tensors,
+      const Model* model, SubgraphAllocations* subgraph_allocations,
       ScratchBufferHandle** scratch_buffer_handles);
 
   // Allocates a TfLiteTensor struct and populates the returned value with
@@ -145,22 +179,30 @@ class MicroAllocator {
   // class during StartModelAllocation() and contains the source-of-truth for
   // buffers.
   virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
-      const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
+      const Model* model, const SubgraphAllocations* subgraph_allocations,
+      int tensor_index, int subgraph_index);
 
   // Allocates a TfLiteTensor struct and populates the returned value with
   // properties from the model flatbuffer. This struct is allocated from
   // temporary arena memory is only guaranteed until a call is made to
-  // ResetTempAllocations(). The eval_tensors pointer should be the value passed
-  // into this class during StartModelAllocation() and contains the
-  // source-of-truth for buffers.
-  virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model,
-                                                 TfLiteEvalTensor* eval_tensors,
-                                                 int tensor_index);
+  // ResetTempAllocations(). Subgraph_allocaitons contains the array of
+  // TfLiteEvalTensors. If the newly allocated temp at the specified subgraph
+  // and tensor index is already present int the TfLiteEvalTensor array, its
+  // data buffer will be re-used.
+  virtual TfLiteTensor* AllocateTempTfLiteTensor(
+      const Model* model, const SubgraphAllocations* subgraph_allocations,
+      int tensor_index, int subgraph_index);
+
+  virtual void DeallocateTempTfLiteTensor(TfLiteTensor*);
 
   // Resets all temporary allocations. This method should be called after a
   // chain of temp allocations (e.g. chain of TfLiteTensor objects via
   // AllocateTfLiteTensor()).
-  virtual void ResetTempAllocations();
+  virtual TfLiteStatus ResetTempAllocations();
+
+  // Returns true if all temporary buffers including temp TfLiteTensor are
+  // already deallocated.
+  virtual bool IsAllTempDeallocated();
 
   // Allocates persistent buffer which has the same life time as the allocator.
   // The memory is immediately available and is allocated from the tail of the
@@ -171,7 +213,8 @@ class MicroAllocator {
   // This method only requests a buffer with a given size to be used after a
   // model has finished allocation via FinishModelAllocation(). All requested
   // buffers will be accessible by the out-param in that method.
-  TfLiteStatus RequestScratchBufferInArena(size_t bytes, int* buffer_idx);
+  TfLiteStatus RequestScratchBufferInArena(size_t bytes, int subgraph_idx,
+                                           int* buffer_idx);
 
   // Finish allocating a specific NodeAndRegistration prepare block (kernel
   // entry for a model) with a given node ID. This call ensures that any scratch
@@ -183,53 +226,48 @@ class MicroAllocator {
   // `FinishModelAllocation`. Otherwise, it will return 0.
   size_t used_bytes() const;
 
+  TfLiteBridgeBuiltinDataAllocator* GetBuiltinDataAllocator();
+
  protected:
-  MicroAllocator(SimpleMemoryAllocator* memory_allocator,
-                 ErrorReporter* error_reporter);
+  MicroAllocator(SingleArenaBufferAllocator* memory_allocator,
+                 MicroMemoryPlanner* memory_planner);
+  MicroAllocator(IPersistentBufferAllocator* persistent_buffer_allocator,
+                 INonPersistentBufferAllocator* non_persistent_buffer_allocator,
+                 MicroMemoryPlanner* memory_planner);
   virtual ~MicroAllocator();
 
   // Allocates an array in the arena to hold pointers to the node and
   // registration pointers required to represent the inference graph of the
   // model.
   virtual TfLiteStatus AllocateNodeAndRegistrations(
-      const Model* model, NodeAndRegistration** node_and_registrations);
-
-  // Populates node and registration pointers representing the inference graph
-  // of the model from values inside the flatbuffer (loaded from the TfLiteModel
-  // instance). Persistent data (e.g. operator data) is allocated from the
-  // arena.
-  virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
-      const Model* model, const MicroOpResolver& op_resolver,
-      NodeAndRegistration* node_and_registrations);
+      const Model* model, SubgraphAllocations* subgraph_allocations);
 
   // Allocates the list of persistent TfLiteEvalTensors that are used for the
   // "eval" phase of model inference. These structs will be the source of truth
-  // for all tensor buffers. Allocation results are stored in the out-param
-  // eval_tensors.
+  // for all tensor buffers.
   virtual TfLiteStatus AllocateTfLiteEvalTensors(
-      const Model* model, TfLiteEvalTensor** eval_tensors);
+      const Model* model, SubgraphAllocations* subgraph_allocations);
 
   // Allocates persistent tensor buffers for variable tensors in the subgraph.
-  virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
-                                         TfLiteEvalTensor* eval_tensors);
+  // Online and offline variable tensors are handled differently hence the
+  // offline_planner_offsets parameter is needed.
+  virtual TfLiteStatus AllocateVariables(
+      const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors,
+      const int32_t* offline_planner_offsets);
 
   // Allocate and return a persistent TfLiteTensor.
   // TODO(b/162311891): Drop this method when the interpreter has an API for
   // accessing TfLiteEvalTensor structs.
-  virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
-      const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
+  virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal();
 
   // Populates a TfLiteTensor struct with data from the model flatbuffer. Any
   // quantization data is allocated from either the tail (persistent) or temp
   // sections of the arena based on the allocation flag.
-  virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(
-      const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
-      int tensor_index, bool allocate_temp);
-
-  ErrorReporter* error_reporter() const;
-
-  // Returns the first subgraph from the model.
-  const SubGraph* GetSubGraphFromModel(const Model* model);
+  virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
+                                                          TfLiteTensor* tensor,
+                                                          int tensor_index,
+                                                          int subgraph_idx,
+                                                          bool allocate_temp);
 
  private:
   // Commits a memory plan for all non-persistent buffer allocations in the
@@ -240,8 +278,7 @@ class MicroAllocator {
   // ScratchBufferHandle structs that will point to allocated buffers also in
   // the head section.
   virtual TfLiteStatus CommitStaticMemoryPlan(
-      const Model* model, const SubGraph* subgraph,
-      TfLiteEvalTensor* eval_tensors,
+      const Model* model, SubgraphAllocations* allocations,
       ScratchBufferHandle* scratch_buffer_handles);
 
   // Allocates an array of ScratchBufferHandle structs in the tail section for a
@@ -259,15 +296,24 @@ class MicroAllocator {
   internal::ScratchBufferRequest* GetScratchBufferRequests();
 
   // A simple memory allocator that always allocate from the arena tail or head.
-  SimpleMemoryAllocator* memory_allocator_;
+  INonPersistentBufferAllocator* non_persistent_buffer_allocator_;
+  IPersistentBufferAllocator* persistent_buffer_allocator_;
+
+  // Allocator used to allocate persistent builtin data.
+  TfLiteBridgeBuiltinDataAllocator* builtin_data_allocator_;
+
+  // Activation buffer memory planner.
+  MicroMemoryPlanner* memory_planner_;
 
-  ErrorReporter* error_reporter_;
   bool model_is_allocating_;
 
   // Holds the number of ScratchBufferRequest instances stored in the head
   // section when a model is allocating.
   size_t scratch_buffer_request_count_ = 0;
 
+  // Holds ScratchBufferRequest when a model is allocating
+  uint8_t* scratch_buffer_head_ = nullptr;
+
   // Holds the byte length of the memory plan with the largest head usage. Used
   // to ensure that multi-tenant allocations can share the head for buffers.
   size_t max_head_buffer_usage_ = 0;
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_arena_constants.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_arena_constants.h
new file mode 100644
index 0000000..8282817
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_arena_constants.h
@@ -0,0 +1,28 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_MICRO_ARENA_CONSTANTS_H_
+#define TENSORFLOW_LITE_MICRO_MICRO_ARENA_CONSTANTS_H_
+
+namespace tflite {
+
+// The default buffer alignment requirement.
+// We align tensor buffers to 16-byte boundaries, since this is a common
+// requirement for SIMD extensions.
+constexpr int MicroArenaBufferAlignment() { return 16; }
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_MICRO_ARENA_CONSTANTS_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_context.cc b/edge-impulse-sdk/tensorflow/lite/micro/micro_context.cc
new file mode 100644
index 0000000..b0a4244
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_context.cc
@@ -0,0 +1,129 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h"
+
+#include <cstdarg>
+#include <cstddef>
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+MicroContext::MicroContext(MicroAllocator* allocator, const Model* model,
+                           MicroGraph* graph)
+    : allocator_(*allocator), graph_(*graph), model_(model) {}
+
+MicroContext::~MicroContext() {}
+
+void* MicroContext::AllocatePersistentBuffer(size_t bytes) {
+  return allocator_.AllocatePersistentBuffer(bytes);
+}
+
+TfLiteStatus MicroContext::RequestScratchBufferInArena(size_t bytes,
+                                                       int* buffer_idx) {
+  return allocator_.RequestScratchBufferInArena(
+      bytes, graph_.GetCurrentSubgraphIndex(), buffer_idx);
+}
+
+void* MicroContext::GetScratchBuffer(int buffer_idx) {
+  ScratchBufferHandle* handle = scratch_buffer_handles_ + buffer_idx;
+  return handle->data;
+}
+
+TfLiteTensor* MicroContext::AllocateTempTfLiteTensor(int tensor_idx) {
+  return allocator_.AllocateTempTfLiteTensor(model_, graph_.GetAllocations(),
+                                             tensor_idx,
+                                             graph_.GetCurrentSubgraphIndex());
+}
+
+int MicroContext::GetTensorIndex(int index, int max_size,
+                                 const int* tensor_indices) {
+  if (index >= 0 && index < max_size) {
+    const int tensor_index = tensor_indices[index];
+    if (tensor_index != kTfLiteOptionalTensor) {
+      return tensor_index;
+    }
+  }
+  return -1;
+}
+
+TfLiteTensor* MicroContext::AllocateTempInputTensor(const TfLiteNode* node,
+                                                    int index) {
+  const int tensor_index =
+      GetTensorIndex(index, node->inputs->size, node->inputs->data);
+  if (tensor_index < 0) {
+    return nullptr;
+  }
+  return AllocateTempTfLiteTensor(tensor_index);
+}
+
+TfLiteTensor* MicroContext::AllocateTempOutputTensor(const TfLiteNode* node,
+                                                     int index) {
+  const int tensor_index =
+      GetTensorIndex(index, node->outputs->size, node->outputs->data);
+  if (tensor_index < 0) {
+    return nullptr;
+  }
+  return AllocateTempTfLiteTensor(tensor_index);
+}
+
+TfLiteTensor* MicroContext::AllocateTempIntermediateTensor(
+    const TfLiteNode* node, int index) {
+  const int tensor_index = GetTensorIndex(index, node->intermediates->size,
+                                          node->intermediates->data);
+  if (tensor_index < 0) {
+    return nullptr;
+  }
+  return AllocateTempTfLiteTensor(tensor_index);
+}
+
+void MicroContext::DeallocateTempTfLiteTensor(TfLiteTensor* tensor) {
+  return allocator_.DeallocateTempTfLiteTensor(tensor);
+}
+
+TfLiteEvalTensor* MicroContext::GetEvalTensor(int tensor_idx) {
+  return &graph_.GetAllocations()[graph_.GetCurrentSubgraphIndex()]
+              .tensors[tensor_idx];
+}
+
+void MicroContext::SetScratchBufferHandles(
+    ScratchBufferHandle* scratch_buffer_handles) {
+  scratch_buffer_handles_ = scratch_buffer_handles;
+}
+
+TfLiteStatus MicroContext::set_external_context(
+    void* external_context_payload) {
+  if (external_context_payload == nullptr ||
+      external_context_payload_ != nullptr) {
+    MicroPrintf(
+        "Attempting to set external context to %x but it was %x already",
+        external_context_payload, external_context_payload_);
+    return kTfLiteError;
+  }
+
+  external_context_payload_ = external_context_payload;
+  return kTfLiteOk;
+}
+
+void MicroContextReportOpError(struct TfLiteContext* context,
+                               const char* format, ...) {
+  va_list args;
+  va_start(args, format);
+  Log(format, args);
+  va_end(args);
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_context.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_context.h
new file mode 100644
index 0000000..65a64b2
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_context.h
@@ -0,0 +1,161 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_
+#define TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h"
+
+namespace tflite {
+// MicroContext is eventually going to become the API between TFLM and the
+// kernels, replacing all the functions in TfLiteContext. The end state is code
+// kernels to have code like:
+//
+// MicroContext* micro_context = GetMicroContext(context);
+// micro_context-><TFLM kernel API>
+class MicroContext {
+ public:
+  // Does not take any ownership, and all pointers must refer to valid objects
+  // that outlive the one constructed.
+  explicit MicroContext(MicroAllocator* allocator, const Model* model,
+                        MicroGraph* graph);
+  virtual ~MicroContext();
+
+  // Allocate persistent buffer which has the same life time as the interpreter.
+  // Returns nullptr on failure.
+  // The memory is allocated from the tail.
+  // This method is only available in Init or Prepare stage.
+  // Virtual so that it can be faked for kernel tests.
+  virtual void* AllocatePersistentBuffer(size_t bytes);
+
+  // Request a scratch buffer in the arena through static memory planning.
+  // This method is only available in Prepare stage and the buffer is allocated
+  // by the interpreter between Prepare and Eval stage. In Eval stage,
+  // GetScratchBuffer API can be used to fetch the address.
+  // Virtual so that it can be faked for kernel tests.
+  virtual TfLiteStatus RequestScratchBufferInArena(size_t bytes,
+                                                   int* buffer_idx);
+
+  // Get the scratch buffer pointer.
+  // This method is only available in Eval stage.
+  // Virtual so that it can be faked for kernel tests.
+  virtual void* GetScratchBuffer(int buffer_idx);
+
+  // Returns a temporary TfLiteTensor struct for a given index.
+  // Virtual so that it can be faked for kernel tests.
+  virtual TfLiteTensor* AllocateTempTfLiteTensor(int tensor_idx);
+
+  // Returns a temporary TfLiteTensor struct for the specified input tensor of a
+  // given mode. This is the recommended API over the deprecated
+  // GetInput/GetInputSafe to get a temp input tensor. The returned tensor shall
+  // be freed via calling DeallocateTempTfLiteTensor.
+  virtual TfLiteTensor* AllocateTempInputTensor(const TfLiteNode* node,
+                                                int index);
+
+  // Returns a temporary TfLiteTensor struct for the specified output tensor of
+  // a given mode. This is the recommended API over the deprecated
+  // GetOutput/GetOutputSafe to get a temp output tensor. The returned tensor
+  // shall be freed via calling DeallocateTempTfLiteTensor.
+  virtual TfLiteTensor* AllocateTempOutputTensor(const TfLiteNode* node,
+                                                 int index);
+
+  // Returns a temporary TfLiteTensor struct for the specified intermediate
+  // tensor of a given mode. This is the recommended API over the deprecated
+  // GetIntermediates/GetIntermediatesSafe to get a temp intermediate tensor.
+  // The returned tensor shall be freed via calling DeallocateTempTfLiteTensor.
+  virtual TfLiteTensor* AllocateTempIntermediateTensor(const TfLiteNode* node,
+                                                       int index);
+
+  // Deallocates a temp TfLiteTensor.
+  // Virtual so that it can be faked for kernel tests.
+  virtual void DeallocateTempTfLiteTensor(TfLiteTensor* tensor);
+
+  // Returns a TfLiteEvalTensor struct for a given index.
+  // Virtual so that it can be faked for kernel tests.
+  virtual TfLiteEvalTensor* GetEvalTensor(int tensor_idx);
+
+  // Does not take ownership of the pointer and the pointer must refer to valid
+  // an object that outlive this class instance.
+  // This can only be called once to set one external context.
+  TfLiteStatus set_external_context(void* external_context_payload);
+
+  void* external_context() { return external_context_payload_; }
+
+  MicroGraph& graph() { return graph_; }
+
+  // Sets the pointer to a list of ScratchBufferHandle instances.
+  // Not API between TFLM and kernels. Primarily used by the framework for
+  // housekeeping in MicroContext.
+  void SetScratchBufferHandles(ScratchBufferHandle* scratch_buffer_handles);
+
+ private:
+  // Return the tensor index as tensor_indices[index]. tensor_indices is of
+  // max_size. Return -1 if index is not in the valid range of tensor_indices.
+  int GetTensorIndex(int index, int max_size, const int* tensor_indices);
+
+  MicroAllocator& allocator_;
+  MicroGraph& graph_;
+  const Model* model_;
+
+  ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
+  void* external_context_payload_ = nullptr;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+inline MicroContext* GetMicroContext(const struct TfLiteContext* context) {
+  return reinterpret_cast<MicroContext*>(context->impl_);
+}
+
+// Deprecated API. Prefer to using the MicroContext API directly from the
+// kernels.
+// TODO(b/213010668): migrate all existing kernels to use MicroContext, delete
+// these functions, and remove corresponding members from the TfLiteContext
+// struct for TFLM.
+inline void* MicroContextAllocatePersistentBuffer(TfLiteContext* ctx,
+                                                  size_t bytes) {
+  return GetMicroContext(ctx)->AllocatePersistentBuffer(bytes);
+}
+inline TfLiteStatus MicroContextRequestScratchBufferInArena(TfLiteContext* ctx,
+                                                            size_t bytes,
+                                                            int* buffer_idx) {
+  return GetMicroContext(ctx)->RequestScratchBufferInArena(bytes, buffer_idx);
+}
+inline void* MicroContextGetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
+  return GetMicroContext(ctx)->GetScratchBuffer(buffer_idx);
+}
+inline TfLiteTensor* MicroContextGetTensor(const struct TfLiteContext* context,
+                                           int tensor_idx) {
+  return GetMicroContext(context)->AllocateTempTfLiteTensor(tensor_idx);
+}
+inline TfLiteEvalTensor* MicroContextGetEvalTensor(
+    const struct TfLiteContext* context, int tensor_idx) {
+  return GetMicroContext(context)->GetEvalTensor(tensor_idx);
+}
+inline TfLiteExternalContext* MicroContextGetExternalContext(
+    TfLiteContext* context, TfLiteExternalContextType unused) {
+  return reinterpret_cast<TfLiteExternalContext*>(
+      GetMicroContext(context)->external_context());
+}
+
+// Requests that an error be reported with format string msg.
+void MicroContextReportOpError(struct TfLiteContext* context,
+                               const char* format, ...);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_MICRO_CONTEXT_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.cc b/edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.cc
index 00a88be..f15cfcc 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.cc
@@ -19,39 +19,14 @@ limitations under the License.
 #include <cstdint>
 #include <new>
 
-#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
-#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/micro_string.h"
-#endif
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace {
 uint8_t micro_error_reporter_buffer[sizeof(tflite::MicroErrorReporter)];
 tflite::MicroErrorReporter* error_reporter_ = nullptr;
 
-void Log(const char* format, va_list args) {
-#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
-  // Only pulling in the implementation of this function for builds where we
-  // expect to make use of it to be extra cautious about not increasing the code
-  // size.
-  static constexpr int kMaxLogLen = 256;
-  char log_buffer[kMaxLogLen];
-  MicroVsnprintf(log_buffer, kMaxLogLen, format, args);
-  DebugLog(log_buffer);
-  DebugLog("\r\n");
-#endif
-}
-
 }  // namespace
 
-#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
-void MicroPrintf(const char* format, ...) {
-  va_list args;
-  va_start(args, format);
-  Log(format, args);
-  va_end(args);
-}
-#endif
-
 namespace tflite {
 ErrorReporter* GetMicroErrorReporter() {
   if (error_reporter_ == nullptr) {
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h
index d2fd174..20a2423 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h
@@ -12,29 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#ifndef TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
-#define TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
+#ifndef TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_MICRO_ERROR_REPORTER_H_
+#define TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_MICRO_ERROR_REPORTER_H_
 
 #include <cstdarg>
 
 #include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
 
-#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
-// This function can be used independent of the MicroErrorReporter to get
-// printf-like functionalitys and are common to all target platforms.
-void MicroPrintf(const char* format, ...);
-#else
-// We use a #define to ensure that the strings are completely stripped, to
-// prevent an unnecessary increase in the binary size.
-#define MicroPrintf(format, ...)
-#endif
-
 namespace tflite {
-
 // Get a pointer to a singleton global error reporter.
 ErrorReporter* GetMicroErrorReporter();
-
 class MicroErrorReporter : public ErrorReporter {
  public:
   ~MicroErrorReporter() override {}
@@ -46,4 +34,4 @@ class MicroErrorReporter : public ErrorReporter {
 
 }  // namespace tflite
 
-#endif  // TENSORFLOW_LITE_MICRO_MICRO_ERROR_REPORTER_H_
+#endif  // TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_MICRO_ERROR_REPORTER_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_graph.cc b/edge-impulse-sdk/tensorflow/lite/micro/micro_graph.cc
new file mode 100644
index 0000000..fa43d6c
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_graph.cc
@@ -0,0 +1,258 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h"
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"  // from @flatbuffers
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+namespace {
+
+const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
+  if (registration->builtin_code == BuiltinOperator_CUSTOM) {
+    return registration->custom_name;
+  } else {
+    return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
+  }
+}
+
+}  // namespace
+
+MicroGraph::MicroGraph(TfLiteContext* context, const Model* model,
+                       MicroAllocator* allocator,
+                       MicroResourceVariables* resource_variables)
+    : context_(context),
+      model_(model),
+      allocator_(allocator),
+      current_subgraph_index_(0),
+      resource_variables_(resource_variables) {
+  if (model != nullptr) {
+    subgraphs_ = model->subgraphs();
+  }
+}
+
+MicroGraph::~MicroGraph() {}
+
+TfLiteStatus MicroGraph::InitSubgraphs() {
+  int previous_subgraph_idx = current_subgraph_index_;
+
+  for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size();
+       subgraph_idx++) {
+    current_subgraph_index_ = subgraph_idx;
+    uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx);
+    for (size_t i = 0; i < operators_size; ++i) {
+      TfLiteNode* node =
+          &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node);
+      const TfLiteRegistration* registration =
+          subgraph_allocations_[subgraph_idx]
+              .node_and_registrations[i]
+              .registration;
+      size_t init_data_size;
+      const char* init_data;
+      if (registration->builtin_code == BuiltinOperator_CUSTOM) {
+        init_data = reinterpret_cast<const char*>(node->custom_initial_data);
+        init_data_size = node->custom_initial_data_size;
+      } else {
+        init_data = reinterpret_cast<const char*>(node->builtin_data);
+        init_data_size = 0;
+      }
+      if (registration->init) {
+        node->user_data =
+            registration->init(context_, init_data, init_data_size);
+      }
+    }
+  }
+  current_subgraph_index_ = previous_subgraph_idx;
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus MicroGraph::PrepareSubgraphs(bool run_all_prep_ops) {
+  int previous_subgraph_idx = current_subgraph_index_;
+  bool all_prep_ops_ok = true;
+
+  for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size();
+       subgraph_idx++) {
+    current_subgraph_index_ = subgraph_idx;
+    uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx);
+    for (size_t i = 0; i < operators_size; ++i) {
+      TfLiteNode* node =
+          &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node);
+      const TfLiteRegistration* registration =
+          subgraph_allocations_[subgraph_idx]
+              .node_and_registrations[i]
+              .registration;
+      if (registration->prepare != nullptr) {
+        TfLiteStatus prepare_status = registration->prepare(context_, node);
+        if (prepare_status != kTfLiteOk) {
+          MicroPrintf("Node %s (number %df) failed to prepare with status %d",
+                      OpNameFromRegistration(registration), i, prepare_status);
+
+          all_prep_ops_ok = false;
+          if (!run_all_prep_ops) {
+            return kTfLiteError;
+          }
+        }
+      }
+      allocator_->FinishPrepareNodeAllocations(/*node_id=*/i);
+    }
+
+    if (!all_prep_ops_ok) {
+      return kTfLiteError;
+    }
+
+  }
+  current_subgraph_index_ = previous_subgraph_idx;
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus MicroGraph::FreeSubgraphs() {
+  int previous_subgraph_idx = current_subgraph_index_;
+
+  for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size();
+       subgraph_idx++) {
+    current_subgraph_index_ = subgraph_idx;
+    uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx);
+    for (size_t i = 0; i < operators_size; ++i) {
+      TfLiteNode* node =
+          &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node);
+      const TfLiteRegistration* registration =
+          subgraph_allocations_[subgraph_idx]
+              .node_and_registrations[i]
+              .registration;
+      // registration is allocated outside the interpreter, so double check to
+      // make sure it's not nullptr;
+      if (registration != nullptr && registration->free != nullptr) {
+        registration->free(context_, node->user_data);
+      }
+    }
+  }
+  current_subgraph_index_ = previous_subgraph_idx;
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus MicroGraph::InvokeSubgraph(int subgraph_idx) {
+  int previous_subgraph_idx = current_subgraph_index_;
+  current_subgraph_index_ = subgraph_idx;
+
+  if (static_cast<size_t>(subgraph_idx) >= subgraphs_->size()) {
+    MicroPrintf("Accessing subgraph %d but only %d subgraphs found",
+                subgraph_idx, subgraphs_->size());
+    return kTfLiteError;
+  }
+  uint32_t operators_size = NumSubgraphOperators(model_, subgraph_idx);
+  for (size_t i = 0; i < operators_size; ++i) {
+    TfLiteNode* node =
+        &(subgraph_allocations_[subgraph_idx].node_and_registrations[i].node);
+    const TfLiteRegistration* registration = subgraph_allocations_[subgraph_idx]
+                                                 .node_and_registrations[i]
+                                                 .registration;
+
+// This ifdef is needed (even though ScopedMicroProfiler itself is a no-op with
+// -DTF_LITE_STRIP_ERROR_STRINGS) because the function OpNameFromRegistration is
+// only defined for builds with the error strings.
+#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
+    ScopedMicroProfiler scoped_profiler(
+        OpNameFromRegistration(registration),
+        reinterpret_cast<MicroProfilerInterface*>(context_->profiler));
+#endif
+
+    TFLITE_DCHECK(registration->invoke);
+    TfLiteStatus invoke_status = registration->invoke(context_, node);
+
+    // All TfLiteTensor structs used in the kernel are allocated from temp
+    // memory in the allocator. This creates a chain of allocations in the
+    // temp section. The call below resets the chain of allocations to
+    // prepare for the next call.
+    allocator_->ResetTempAllocations();
+
+    if (invoke_status == kTfLiteError) {
+      MicroPrintf("Node %s (number %d) failed to invoke with status %d",
+                  OpNameFromRegistration(registration), i, invoke_status);
+      return kTfLiteError;
+    } else if (invoke_status != kTfLiteOk) {
+      return invoke_status;
+    }
+  }
+  current_subgraph_index_ = previous_subgraph_idx;
+  return kTfLiteOk;
+}
+
+TfLiteStatus MicroGraph::ResetVariableTensors() {
+  for (size_t subgraph_idx = 0; subgraph_idx < subgraphs_->size();
+       subgraph_idx++) {
+    const SubGraph* subgraph = (*subgraphs_)[subgraph_idx];
+    for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
+      auto* tensor = subgraph->tensors()->Get(i);
+      if (tensor->is_variable()) {
+        size_t buffer_size;
+        TF_LITE_ENSURE_STATUS(TfLiteEvalTensorByteLength(
+            &subgraph_allocations_[subgraph_idx].tensors[i], &buffer_size));
+
+        int value = 0;
+        if (tensor->type() == tflite::TensorType_INT8) {
+          value = tensor->quantization()->zero_point()->Get(0);
+        }
+        memset(subgraph_allocations_[subgraph_idx].tensors[i].data.raw, value,
+               buffer_size);
+      }
+    }
+  }
+  if (resource_variables_ != nullptr) {
+    resource_variables_->ResetAll();
+  }
+
+  return kTfLiteOk;
+}
+
+int MicroGraph::NumSubgraphs() { return model_->subgraphs()->size(); }
+
+void MicroGraph::SetSubgraphAllocations(
+    SubgraphAllocations* subgraph_allocations) {
+  subgraph_allocations_ = subgraph_allocations;
+}
+
+size_t MicroGraph::NumSubgraphInputs(int subgraph_idx) {
+  return model_->subgraphs()->Get(subgraph_idx)->inputs()->size();
+}
+
+TfLiteEvalTensor* MicroGraph::GetSubgraphInput(int subgraph_idx,
+                                               int input_idx) {
+  int tensor_idx =
+      model_->subgraphs()->Get(subgraph_idx)->inputs()->Get(input_idx);
+  return &subgraph_allocations_[subgraph_idx].tensors[tensor_idx];
+}
+
+size_t MicroGraph::NumSubgraphOutputs(int subgraph_idx) {
+  return model_->subgraphs()->Get(subgraph_idx)->outputs()->size();
+}
+
+TfLiteEvalTensor* MicroGraph::GetSubgraphOutput(int subgraph_idx,
+                                                int output_idx) {
+  int tensor_idx =
+      model_->subgraphs()->Get(subgraph_idx)->outputs()->Get(output_idx);
+  return &subgraph_allocations_[subgraph_idx].tensors[tensor_idx];
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h
new file mode 100644
index 0000000..0e096c7
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h
@@ -0,0 +1,110 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_
+#define TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+// Abstracts the details of interacting with the tflite::Model.
+//
+// Provides methods to access, initialize, prepare, invoke and free any
+// subgraph in the tflite::Graph.
+class MicroGraph {
+ public:
+  // The lifetime of the context, model, allocator and resource_variables must
+  // be at least as long as that of the graph object, since the this class may
+  // need to access them at any time. If resource_variables is a nullptr,
+  // GetResourceVariables will return a nullptr.
+  MicroGraph(TfLiteContext* context, const Model* model,
+             MicroAllocator* allocator,
+             MicroResourceVariables* resource_variables);
+  virtual ~MicroGraph();
+
+  // Sets up builtin data and calls TfLiteRegistration->Init for every operator
+  // in every subgraph in the model.
+  virtual TfLiteStatus InitSubgraphs();
+
+  // Calls TfLiteRegistration->Prepare for every operator in every subgraph in
+  // the model.
+  virtual TfLiteStatus PrepareSubgraphs(bool run_all_prep_ops);
+
+  // Calls TfLiteRegistration->Free for every operator in every subgraph in the
+  // model.
+  virtual TfLiteStatus FreeSubgraphs();
+
+  // Calls TfLiteRegistration->Invoke for every operator in a single subgraph in
+  // the model.
+  virtual TfLiteStatus InvokeSubgraph(int subgraph_idx);
+
+  // Zeros out all variable tensors in all subgraphs in the model.
+  virtual TfLiteStatus ResetVariableTensors();
+
+  // Number of tensor inputs to a specified subgraph in the model.
+  virtual size_t NumSubgraphInputs(int subgraph_idx);
+
+  // Get the specified input tensor of a specified subgraph in the model.
+  virtual TfLiteEvalTensor* GetSubgraphInput(int subgraph_idx, int input_idx);
+
+  // Number of tensor outputs from a specified subgraph in the model.
+  virtual size_t NumSubgraphOutputs(int subgraph_idx);
+
+  // Get the specified output tensor of a specified subgraph in the model.
+  virtual TfLiteEvalTensor* GetSubgraphOutput(int subgraph_idx, int output_idx);
+
+  // Number of subgraphs in the model.
+  virtual int NumSubgraphs();
+
+  // Hook to pass in subgraph allocations tracked within the interpreter,
+  // allowing MicroGraph to init / prepare / invoke subgraphs in the model.
+  void SetSubgraphAllocations(SubgraphAllocations* subgraph_allocations);
+
+  // Get the current subgraph index. Within an on operator, this is guaranteed
+  // to be the subgraph of that operator.
+  int GetCurrentSubgraphIndex() { return current_subgraph_index_; }
+
+  // Set the current subgraph index.
+  void SetCurrentSubgraphIndex(int subgraph_idx)
+  {
+    current_subgraph_index_ = subgraph_idx;
+  }
+
+  // Gets the list of alloctions for each subgraph. This is the source of truth
+  // for all per-subgraph allocation data.
+  SubgraphAllocations* GetAllocations() { return subgraph_allocations_; }
+
+  // Get the resource variables for this TFLM graph.
+  MicroResourceVariables* GetResourceVariables() { return resource_variables_; }
+
+ private:
+  TfLiteContext* context_;
+  const Model* model_;
+  MicroAllocator* allocator_;
+  SubgraphAllocations* subgraph_allocations_ = nullptr;
+  int current_subgraph_index_;
+  MicroResourceVariables* resource_variables_;
+  const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs_;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_MICRO_GRAPH_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.cc b/edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.cc
index b1b8e71..3c734fb 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.cc
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -19,132 +19,64 @@ limitations under the License.
 #include <cstdint>
 
 #include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"  // from @flatbuffers
+#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/tensor_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_profiler_interface.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_utils.h"
 
 namespace tflite {
-namespace {
-
-#ifndef TF_LITE_STRIP_ERROR_STRINGS
-const char* OpNameFromRegistration(const TfLiteRegistration* registration) {
-  if (registration->builtin_code == BuiltinOperator_CUSTOM) {
-    return registration->custom_name;
-  } else {
-    return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code));
-  }
-}
-#endif  // !defined(TF_LITE_STRIP_ERROR_STRINGS)
-
-}  // namespace
-
-namespace internal {
-
-ContextHelper::ContextHelper(ErrorReporter* error_reporter,
-                             MicroAllocator* allocator, const Model* model)
-    : allocator_(allocator), error_reporter_(error_reporter), model_(model) {}
-
-void* ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx,
-                                              size_t bytes) {
-  return reinterpret_cast<ContextHelper*>(ctx->impl_)
-      ->allocator_->AllocatePersistentBuffer(bytes);
-}
-
-TfLiteStatus ContextHelper::RequestScratchBufferInArena(TfLiteContext* ctx,
-                                                        size_t bytes,
-                                                        int* buffer_idx) {
-  ContextHelper* helper = reinterpret_cast<ContextHelper*>(ctx->impl_);
-  return helper->allocator_->RequestScratchBufferInArena(bytes, buffer_idx);
-}
-
-void* ContextHelper::GetScratchBuffer(TfLiteContext* ctx, int buffer_idx) {
-  ContextHelper* helper = reinterpret_cast<ContextHelper*>(ctx->impl_);
-  ScratchBufferHandle* handle = helper->scratch_buffer_handles_ + buffer_idx;
-  return handle->data;
-}
-
-void ContextHelper::ReportOpError(struct TfLiteContext* context,
-                                  const char* format, ...) {
-#ifndef TF_LITE_STRIP_ERROR_STRINGS
-  ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
-  va_list args;
-  va_start(args, format);
-  TF_LITE_REPORT_ERROR(helper->error_reporter_, format, args);
-  va_end(args);
-#endif
-}
-
-TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context,
-                                       int tensor_idx) {
-  ContextHelper* helper = static_cast<ContextHelper*>(context->impl_);
-  return helper->allocator_->AllocateTempTfLiteTensor(
-      helper->model_, helper->eval_tensors_, tensor_idx);
-}
-
-TfLiteEvalTensor* ContextHelper::GetEvalTensor(
-    const struct TfLiteContext* context, int tensor_idx) {
-  ContextHelper* helper = reinterpret_cast<ContextHelper*>(context->impl_);
-  return &helper->eval_tensors_[tensor_idx];
-}
-
-void ContextHelper::SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors) {
-  eval_tensors_ = eval_tensors;
-}
-
-void ContextHelper::SetScratchBufferHandles(
-    ScratchBufferHandle* scratch_buffer_handles) {
-  scratch_buffer_handles_ = scratch_buffer_handles;
-}
-
-}  // namespace internal
 
 MicroInterpreter::MicroInterpreter(const Model* model,
                                    const MicroOpResolver& op_resolver,
                                    uint8_t* tensor_arena,
                                    size_t tensor_arena_size,
-                                   ErrorReporter* error_reporter,
-                                   MicroProfiler* profiler)
+                                   MicroResourceVariables* resource_variables,
+                                   MicroProfilerInterface* profiler)
     : model_(model),
       op_resolver_(op_resolver),
-      error_reporter_(error_reporter),
-      allocator_(*MicroAllocator::Create(tensor_arena, tensor_arena_size,
-                                         error_reporter)),
+      allocator_(*MicroAllocator::Create(tensor_arena, tensor_arena_size)),
+
+      graph_(&context_, model, &allocator_, resource_variables),
       tensors_allocated_(false),
       initialization_status_(kTfLiteError),
-      eval_tensors_(nullptr),
-      context_helper_(error_reporter_, &allocator_, model),
       input_tensors_(nullptr),
-      output_tensors_(nullptr) {
+      output_tensors_(nullptr),
+      micro_context_(&allocator_, model_, &graph_) {
   Init(profiler);
 }
 
 MicroInterpreter::MicroInterpreter(const Model* model,
                                    const MicroOpResolver& op_resolver,
                                    MicroAllocator* allocator,
-                                   ErrorReporter* error_reporter,
-                                   MicroProfiler* profiler)
+                                   MicroResourceVariables* resource_variables,
+                                   MicroProfilerInterface* profiler)
     : model_(model),
       op_resolver_(op_resolver),
-      error_reporter_(error_reporter),
       allocator_(*allocator),
+      graph_(&context_, model, allocator, resource_variables),
       tensors_allocated_(false),
       initialization_status_(kTfLiteError),
-      eval_tensors_(nullptr),
-      context_helper_(error_reporter_, &allocator_, model),
       input_tensors_(nullptr),
-      output_tensors_(nullptr) {
+      output_tensors_(nullptr),
+      micro_context_(&allocator_, model_, &graph_) {
   Init(profiler);
 }
 
 MicroInterpreter::~MicroInterpreter() {
+  if (graph_.GetAllocations() != nullptr) {
+    graph_.FreeSubgraphs();
+  }
+#ifdef EON_COMPILER_RUN
   if (node_and_registrations_ != nullptr) {
-    for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
+    for (size_t i = 0; i < model_->subgraphs()->Get(0)->operators()->size(); ++i) {
       TfLiteNode* node = &(node_and_registrations_[i].node);
       const TfLiteRegistration* registration =
           node_and_registrations_[i].registration;
@@ -155,111 +87,152 @@ MicroInterpreter::~MicroInterpreter() {
       }
     }
   }
+#endif
 }
 
-void MicroInterpreter::Init(MicroProfiler* profiler) {
-  const flatbuffers::Vector<flatbuffers::Offset<SubGraph>>* subgraphs =
-      model_->subgraphs();
-  if (subgraphs->size() != 1) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Only 1 subgraph is currently supported.\n");
-    initialization_status_ = kTfLiteError;
-    return;
-  }
-  subgraph_ = (*subgraphs)[0];
-
-  context_.impl_ = static_cast<void*>(&context_helper_);
-  context_.ReportError = context_helper_.ReportOpError;
-  context_.GetTensor = context_helper_.GetTensor;
-  context_.GetEvalTensor = context_helper_.GetEvalTensor;
-  context_.recommended_num_threads = 1;
+void MicroInterpreter::Init(MicroProfilerInterface* profiler) {
+  context_.impl_ = static_cast<void*>(&micro_context_);
+  context_.ReportError = MicroContextReportOpError;
+  context_.GetTensor = MicroContextGetTensor;
+  context_.GetEvalTensor = MicroContextGetEvalTensor;
   context_.profiler = profiler;
 
   initialization_status_ = kTfLiteOk;
 }
 
+TfLiteStatus MicroInterpreter::PrepareNodeAndRegistrationDataFromFlatbuffer() {
+  for (int subgraph_idx = 0; subgraph_idx < graph_.NumSubgraphs();
+       subgraph_idx++) {
+    const SubGraph* subgraph = model_->subgraphs()->Get(subgraph_idx);
+    TFLITE_DCHECK(subgraph != nullptr);
+
+    auto* opcodes = model_->operator_codes();
+    TfLiteBridgeBuiltinDataAllocator* builtin_data_allocator =
+        allocator_.GetBuiltinDataAllocator();
+    uint32_t operators_size = NumSubgraphOperators(subgraph);
+    for (size_t i = 0; i < operators_size; ++i) {
+      const auto* op = subgraph->operators()->Get(i);
+      const size_t index = op->opcode_index();
+      if (index >= opcodes->size()) {
+        MicroPrintf("Missing registration for opcode_index %d\n", index);
+        return kTfLiteError;
+      }
+      const auto* opcode = opcodes->Get(index);
+      TfLiteStatus status =
+          GetRegistrationFromOpCode(opcode, op_resolver_,
+                                    &(graph_.GetAllocations()[subgraph_idx]
+                                          .node_and_registrations[i]
+                                          .registration));
+      if (status != kTfLiteOk) {
+        MicroPrintf("Failed to get registration from op code %s\n ",
+                    EnumNameBuiltinOperator(GetBuiltinCode(opcode)));
+        return status;
+      }
+      const auto* registration = graph_.GetAllocations()[subgraph_idx]
+                                     .node_and_registrations[i]
+                                     .registration;
+      if (registration == nullptr) {
+        MicroPrintf("Skipping op for opcode_index %d\n", index);
+        return kTfLiteError;
+      }
+      BuiltinOperator op_type =
+          static_cast<BuiltinOperator>(registration->builtin_code);
+
+      const char* custom_data = nullptr;
+      size_t custom_data_size = 0;
+      unsigned char* builtin_data = nullptr;
+
+      if (op_type == BuiltinOperator_CUSTOM) {
+        // Custom Ops may or may not have a non-null custom_options field.
+        if (op->custom_options() != nullptr) {
+          custom_data =
+              reinterpret_cast<const char*>(op->custom_options()->data());
+          custom_data_size = op->custom_options()->size();
+        }
+      } else {
+        if (op->custom_options() != nullptr) {
+          MicroPrintf(
+              "Unsupported behavior: found builtin operator %s with custom "
+              "options.\n",
+              EnumNameBuiltinOperator(op_type));
+          return kTfLiteError;
+        }
+
+        TfLiteBridgeBuiltinParseFunction parser =
+            op_resolver_.GetOpDataParser(op_type);
+        if (parser == nullptr) {
+          MicroPrintf("Did not find a parser for %s",
+                      EnumNameBuiltinOperator(op_type));
+
+          return kTfLiteError;
+        }
+        TF_LITE_ENSURE_STATUS(CallBuiltinParseFunction(
+            parser, op, builtin_data_allocator, (void**)(&builtin_data)));
+      }
+
+      TfLiteIntArray* inputs_array =
+          FlatBufferVectorToTfLiteTypeArray(op->inputs());
+      TfLiteIntArray* outputs_array =
+          FlatBufferVectorToTfLiteTypeArray(op->outputs());
+
+      TfLiteNode* node = &(
+          graph_.GetAllocations()[subgraph_idx].node_and_registrations[i].node);
+      *node = {};
+      node->inputs = inputs_array;
+      node->outputs = outputs_array;
+      node->builtin_data = reinterpret_cast<void*>(builtin_data);
+      node->custom_initial_data = custom_data;
+      node->custom_initial_data_size = custom_data_size;
+
+      if (op->intermediates() && (op->intermediates()->size() > 0)) {
+        node->intermediates =
+            FlatBufferVectorToTfLiteTypeArray(op->intermediates());
+      }
+    }
+  }
+  return kTfLiteOk;
+}
+
 TfLiteStatus MicroInterpreter::AllocateTensors(bool run_all_prep_ops) {
-  if (allocator_.StartModelAllocation(model_, op_resolver_,
-                                      &node_and_registrations_,
-                                      &eval_tensors_) != kTfLiteOk) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Failed starting model allocation.\n");
+  SubgraphAllocations* allocations = allocator_.StartModelAllocation(model_);
+
+  if (allocations == nullptr) {
+    MicroPrintf("Failed starting model allocation.\n");
     initialization_status_ = kTfLiteError;
     return kTfLiteError;
   }
 
-  // Update the pointer now that TfLiteEvalTensor allocation has completed on
-  // the context helper.
-  // TODO(b/16157777): This call would not be needed if ContextHelper rolled
-  // into the interpreter.
-  context_helper_.SetTfLiteEvalTensors(eval_tensors_);
-  context_.tensors_size = subgraph_->tensors()->size();
+  graph_.SetSubgraphAllocations(allocations);
+
+  TF_LITE_ENSURE_STATUS(PrepareNodeAndRegistrationDataFromFlatbuffer());
 
   // Only allow AllocatePersistentBuffer in Init stage.
-  context_.AllocatePersistentBuffer = context_helper_.AllocatePersistentBuffer;
+  context_.AllocatePersistentBuffer = MicroContextAllocatePersistentBuffer;
   context_.RequestScratchBufferInArena = nullptr;
   context_.GetScratchBuffer = nullptr;
-
-  for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
-    auto* node = &(node_and_registrations_[i].node);
-    auto* registration = node_and_registrations_[i].registration;
-    size_t init_data_size;
-    const char* init_data;
-    if (registration->builtin_code == BuiltinOperator_CUSTOM) {
-      init_data = reinterpret_cast<const char*>(node->custom_initial_data);
-      init_data_size = node->custom_initial_data_size;
-    } else {
-      init_data = reinterpret_cast<const char*>(node->builtin_data);
-      init_data_size = 0;
-    }
-    if (registration->init) {
-      node->user_data =
-          registration->init(&context_, init_data, init_data_size);
-    }
-  }
-
-  bool all_prep_ops_ok = true;
+  context_.GetExternalContext = nullptr;
+  TF_LITE_ENSURE_STATUS(graph_.InitSubgraphs());
 
   // Both AllocatePersistentBuffer and RequestScratchBufferInArena is
   // available in Prepare stage.
   context_.RequestScratchBufferInArena =
-      context_helper_.RequestScratchBufferInArena;
-  for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
-    auto* node = &(node_and_registrations_[i].node);
-    auto* registration = node_and_registrations_[i].registration;
-    if (registration->prepare) {
-      TfLiteStatus prepare_status = registration->prepare(&context_, node);
-      if (prepare_status != kTfLiteOk) {
-        TF_LITE_REPORT_ERROR(
-            error_reporter_,
-            "Node %s (number %df) failed to prepare with status %d",
-            OpNameFromRegistration(registration), i, prepare_status);
-
-        all_prep_ops_ok = false;
-
-        if (!run_all_prep_ops) {
-          return kTfLiteError;
-        }
-      }
-    }
-    allocator_.FinishPrepareNodeAllocations(/*node_id=*/i);
-  }
+      MicroContextRequestScratchBufferInArena;
+  // external_context become available in Prepare stage.
+  context_.GetExternalContext = MicroContextGetExternalContext;
 
-  if (!all_prep_ops_ok) {
-    return kTfLiteError;
-  }
+  TF_LITE_ENSURE_STATUS(graph_.PrepareSubgraphs(run_all_prep_ops));
 
   // Prepare is done, we're ready for Invoke. Memory allocation is no longer
   // allowed. Kernels can only fetch scratch buffers via GetScratchBuffer.
   context_.AllocatePersistentBuffer = nullptr;
   context_.RequestScratchBufferInArena = nullptr;
-  context_.GetScratchBuffer = context_helper_.GetScratchBuffer;
+  context_.GetScratchBuffer = MicroContextGetScratchBuffer;
+
+  TF_LITE_ENSURE_OK(&context_, allocator_.FinishModelAllocation(
+                                   model_, graph_.GetAllocations(),
+                                   &scratch_buffer_handles_));
 
-  TF_LITE_ENSURE_OK(&context_,
-                    allocator_.FinishModelAllocation(model_, eval_tensors_,
-                                                     &scratch_buffer_handles_));
-  // TODO(b/16157777): Remove this when ContextHelper is rolled into this class.
-  context_helper_.SetScratchBufferHandles(scratch_buffer_handles_);
+  micro_context_.SetScratchBufferHandles(scratch_buffer_handles_);
 
   // TODO(b/162311891): Drop these allocations when the interpreter supports
   // handling buffers from TfLiteEvalTensor.
@@ -267,8 +240,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors(bool run_all_prep_ops) {
       reinterpret_cast<TfLiteTensor**>(allocator_.AllocatePersistentBuffer(
           sizeof(TfLiteTensor*) * inputs_size()));
   if (input_tensors_ == nullptr) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter_,
+    MicroPrintf(
         "Failed to allocate memory for context->input_tensors_, "
         "%d bytes required",
         sizeof(TfLiteTensor*) * inputs_size());
@@ -277,10 +249,9 @@ TfLiteStatus MicroInterpreter::AllocateTensors(bool run_all_prep_ops) {
 
   for (size_t i = 0; i < inputs_size(); ++i) {
     input_tensors_[i] = allocator_.AllocatePersistentTfLiteTensor(
-        model_, eval_tensors_, inputs().Get(i));
+        model_, graph_.GetAllocations(), inputs().Get(i), 0);
     if (input_tensors_[i] == nullptr) {
-      TF_LITE_REPORT_ERROR(error_reporter_,
-                           "Failed to initialize input tensor %d", i);
+      MicroPrintf("Failed to initialize input tensor %d", i);
       return kTfLiteError;
     }
   }
@@ -291,8 +262,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors(bool run_all_prep_ops) {
       reinterpret_cast<TfLiteTensor**>(allocator_.AllocatePersistentBuffer(
           sizeof(TfLiteTensor*) * outputs_size()));
   if (output_tensors_ == nullptr) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter_,
+    MicroPrintf(
         "Failed to allocate memory for context->output_tensors_, "
         "%d bytes required",
         sizeof(TfLiteTensor*) * outputs_size());
@@ -301,15 +271,18 @@ TfLiteStatus MicroInterpreter::AllocateTensors(bool run_all_prep_ops) {
 
   for (size_t i = 0; i < outputs_size(); ++i) {
     output_tensors_[i] = allocator_.AllocatePersistentTfLiteTensor(
-        model_, eval_tensors_, outputs().Get(i));
+        model_, graph_.GetAllocations(), outputs().Get(i), 0);
     if (output_tensors_[i] == nullptr) {
-      TF_LITE_REPORT_ERROR(error_reporter_,
-                           "Failed to initialize output tensor %d", i);
+      MicroPrintf("Failed to initialize output tensor %d", i);
       return kTfLiteError;
     }
   }
 
-  TF_LITE_ENSURE_STATUS(ResetVariableTensors());
+  TF_LITE_ENSURE_STATUS(Reset());
+
+#ifdef EON_COMPILER_RUN
+  node_and_registrations_ = allocations->node_and_registrations;
+#endif
 
   tensors_allocated_ = true;
   return kTfLiteOk;
@@ -317,59 +290,22 @@ TfLiteStatus MicroInterpreter::AllocateTensors(bool run_all_prep_ops) {
 
 TfLiteStatus MicroInterpreter::Invoke() {
   if (initialization_status_ != kTfLiteOk) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Invoke() called after initialization failed\n");
+    MicroPrintf("Invoke() called after initialization failed\n");
     return kTfLiteError;
   }
 
   // Ensure tensors are allocated before the interpreter is invoked to avoid
   // difficult to debug segfaults.
   if (!tensors_allocated_) {
-    TF_LITE_ENSURE_OK(&context_, AllocateTensors());
+    TF_LITE_ENSURE_OK(&context_, AllocateTensors(true));
   }
-
-  for (size_t i = 0; i < subgraph_->operators()->size(); ++i) {
-    auto* node = &(node_and_registrations_[i].node);
-    auto* registration = node_and_registrations_[i].registration;
-
-// This ifdef is needed (even though ScopedMicroProfiler itself is a no-op with
-// -DTF_LITE_STRIP_ERROR_STRINGS) because the function OpNameFromRegistration is
-// only defined for builds with the error strings.
-#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
-    ScopedMicroProfiler scoped_profiler(
-        OpNameFromRegistration(registration),
-        reinterpret_cast<MicroProfiler*>(context_.profiler));
-#endif
-
-    TFLITE_DCHECK(registration->invoke);
-    TfLiteStatus invoke_status = registration->invoke(&context_, node);
-
-    // All TfLiteTensor structs used in the kernel are allocated from temp
-    // memory in the allocator. This creates a chain of allocations in the
-    // temp section. The call below resets the chain of allocations to
-    // prepare for the next call.
-    allocator_.ResetTempAllocations();
-
-    if (invoke_status == kTfLiteError) {
-      TF_LITE_REPORT_ERROR(
-          error_reporter_,
-          "Node %s (number %d) failed to invoke with status %d",
-          OpNameFromRegistration(registration), i, invoke_status);
-      return kTfLiteError;
-    } else if (invoke_status != kTfLiteOk) {
-      return invoke_status;
-    }
-  }
-
-  return kTfLiteOk;
+  return graph_.InvokeSubgraph(0);
 }
 
 TfLiteTensor* MicroInterpreter::input(size_t index) {
   const size_t length = inputs_size();
   if (index >= length) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Input index %d out of range (length is %d)", index,
-                         length);
+    MicroPrintf("Input index %d out of range (length is %d)", index, length);
     return nullptr;
   }
   return input_tensors_[index];
@@ -378,43 +314,34 @@ TfLiteTensor* MicroInterpreter::input(size_t index) {
 TfLiteTensor* MicroInterpreter::output(size_t index) {
   const size_t length = outputs_size();
   if (index >= length) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Output index %d out of range (length is %d)", index,
-                         length);
+    MicroPrintf("Output index %d out of range (length is %d)", index, length);
     return nullptr;
   }
   return output_tensors_[index];
 }
 
-TfLiteTensor* MicroInterpreter::tensor(size_t index) {
-  const size_t length = tensors_size();
+TfLiteTensor* MicroInterpreter::tensor(size_t index, size_t subgraph_idx) {
+  const size_t length = tensors_size(subgraph_idx);
   if (index >= length) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Tensor index %d out of range (length is %d)", index,
-                         length);
+    MicroPrintf("Tensor index %d out of range (length is %d)", index, length);
     return nullptr;
   }
-  return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_,
-                                                   index);
+  return allocator_.AllocatePersistentTfLiteTensor(model_, graph_.GetAllocations(), index, subgraph_idx);
 }
 
-TfLiteStatus MicroInterpreter::ResetVariableTensors() {
-  for (size_t i = 0; i < subgraph_->tensors()->size(); ++i) {
-    auto* tensor = subgraph_->tensors()->Get(i);
-    if (tensor->is_variable()) {
-      size_t buffer_size;
-      TF_LITE_ENSURE_STATUS(
-          TfLiteEvalTensorByteLength(&eval_tensors_[i], &buffer_size));
-
-      int value = 0;
-      if (tensor->type() == tflite::TensorType_INT8) {
-        value = tensor->quantization()->zero_point()->Get(0);
-      }
-      memset(eval_tensors_[i].data.raw, value, buffer_size);
-    }
+// Repurposing free subgraphs to reset state for some ops for now
+// will reset api is made. See b/220940833#comment25 for more context.
+TfLiteStatus MicroInterpreter::Reset() {
+  TfLiteStatus status = graph_.FreeSubgraphs();
+  if (status != kTfLiteOk) {
+    return status;
   }
+  return graph_.ResetVariableTensors();
+}
 
-  return kTfLiteOk;
+TfLiteStatus MicroInterpreter::SetMicroExternalContext(
+    void* external_context_payload) {
+  return micro_context_.set_external_context(external_context_payload);
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.h
index f33f8a4..5901372 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -19,74 +19,39 @@ limitations under the License.
 #include <cstdint>
 
 #include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"  // from @flatbuffers
+#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_context.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_profiler_interface.h"
 #include "edge-impulse-sdk/tensorflow/lite/portable_type_to_tflitetype.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h"
 
-// Copied from tensorflow/lite/version.h to avoid a dependency chain into
+/// Copied from tensorflow/lite/version.h to avoid a dependency chain into
 // tensorflow/core.
 #define TFLITE_SCHEMA_VERSION (3)
 
 namespace tflite {
 
-namespace internal {
-
-// A helper class to encapsulate the implementation of APIs in Context.
-// context->impl_ points to an instance of this class.
-// Check tensorflow/lite/c/common.h for detailed descriptions.
-// TODO(b/16157777): Consider rolling this class into MicroInterpreter.
-class ContextHelper {
- public:
-  explicit ContextHelper(ErrorReporter* error_reporter,
-                         MicroAllocator* allocator, const Model* model);
-
-  // Functions that will be assigned to function pointers on TfLiteContext:
-  static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes);
-  static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx,
-                                                  size_t bytes,
-                                                  int* buffer_idx);
-  static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx);
-  static void ReportOpError(struct TfLiteContext* context, const char* format,
-                            ...);
-  static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
-                                 int tensor_idx);
-  static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
-                                         int tensor_idx);
-
-  // Sets the pointer to a list of TfLiteEvalTensor instances.
-  void SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors);
-
-  // Sets the pointer to a list of ScratchBufferHandle instances.
-  void SetScratchBufferHandles(ScratchBufferHandle* scratch_buffer_handles);
-
- private:
-  MicroAllocator* allocator_ = nullptr;
-  ErrorReporter* error_reporter_ = nullptr;
-  const Model* model_ = nullptr;
-  TfLiteEvalTensor* eval_tensors_ = nullptr;
-  ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
-};
-
-}  // namespace internal
-
 class MicroInterpreter {
  public:
-  // The lifetime of the model, op resolver, tensor arena, error reporter and
-  // profiler must be at least as long as that of the interpreter object, since
-  // the interpreter may need to access them at any time. This means that you
-  // should usually create them with the same scope as each other, for example
-  // having them all allocated on the stack as local variables through a
-  // top-level function. The interpreter doesn't do any deallocation of any of
-  // the pointed-to objects, ownership remains with the caller.
+  // The lifetime of the model, op resolver, tensor arena, error reporter,
+  // resource variables, and profiler must be at least as long as that of the
+  // interpreter object, since the interpreter may need to access them at any
+  // time. This means that you should usually create them with the same scope as
+  // each other, for example having them all allocated on the stack as local
+  // variables through a top-level function. The interpreter doesn't do any
+  // deallocation of any of the pointed-to objects, ownership remains with the
+  // caller.
   MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
                    uint8_t* tensor_arena, size_t tensor_arena_size,
-                   ErrorReporter* error_reporter,
-                   MicroProfiler* profiler = nullptr);
+                   MicroResourceVariables* resource_variables = nullptr,
+                   MicroProfilerInterface* profiler = nullptr);
 
   // Create an interpreter instance using an existing MicroAllocator instance.
   // This constructor should be used when creating an allocator that needs to
@@ -94,22 +59,31 @@ class MicroInterpreter {
   // allocations inside the interpreter. The lifetime of the allocator must be
   // as long as that of the interpreter object.
   MicroInterpreter(const Model* model, const MicroOpResolver& op_resolver,
-                   MicroAllocator* allocator, ErrorReporter* error_reporter,
-                   MicroProfiler* profiler = nullptr);
+                   MicroAllocator* allocator,
+                   MicroResourceVariables* resource_variables = nullptr,
+                   MicroProfilerInterface* profiler = nullptr);
 
   ~MicroInterpreter();
 
   // Runs through the model and allocates all necessary input, output and
   // intermediate tensors.
-  TfLiteStatus AllocateTensors(bool run_all_prep_ops = false);
+  TfLiteStatus AllocateTensors(bool run_all_prep_ops);
 
   // In order to support partial graph runs for strided models, this can return
   // values other than kTfLiteOk and kTfLiteError.
   // TODO(b/149795762): Add this to the TfLiteStatus enum.
   TfLiteStatus Invoke();
 
-  size_t tensors_size() const { return context_.tensors_size; }
-  TfLiteTensor* tensor(size_t tensor_index);
+  // This is the recommended API for an application to pass an external payload
+  // pointer as an external context to kernels. The life time of the payload
+  // pointer should be at least as long as this interpreter. TFLM supports only
+  // one external context.
+  TfLiteStatus SetMicroExternalContext(void* external_context_payload);
+
+  size_t tensors_size(size_t subgraph_idx = 0) const { return model_->subgraphs()->Get(subgraph_idx)->tensors()->size(); }
+
+  TfLiteTensor* tensor(size_t tensor_index, size_t subgraph_idx = 0);
+
   template <class T>
   T* typed_tensor(int tensor_index) {
     if (TfLiteTensor* tensor_ptr = tensor(tensor_index)) {
@@ -121,9 +95,11 @@ class MicroInterpreter {
   }
 
   TfLiteTensor* input(size_t index);
-  size_t inputs_size() const { return subgraph_->inputs()->Length(); }
+  size_t inputs_size() const {
+    return model_->subgraphs()->Get(0)->inputs()->size();
+  }
   const flatbuffers::Vector<int32_t>& inputs() const {
-    return *subgraph_->inputs();
+    return *model_->subgraphs()->Get(0)->inputs();
   }
   TfLiteTensor* input_tensor(size_t index) { return input(index); }
   template <class T>
@@ -137,9 +113,11 @@ class MicroInterpreter {
   }
 
   TfLiteTensor* output(size_t index);
-  size_t outputs_size() const { return subgraph_->outputs()->Length(); }
+  size_t outputs_size() const {
+    return model_->subgraphs()->Get(0)->outputs()->size();
+  }
   const flatbuffers::Vector<int32_t>& outputs() const {
-    return *subgraph_->outputs();
+    return *model_->subgraphs()->Get(0)->outputs();
   }
   TfLiteTensor* output_tensor(size_t index) { return output(index); }
   template <class T>
@@ -152,17 +130,31 @@ class MicroInterpreter {
     return nullptr;
   }
 
-  // Reset all variable tensors to the default value.
-  TfLiteStatus ResetVariableTensors();
+  // Reset the state to be what you would expect when the interpreter is first
+  // created. i.e. after Init and Prepare is called for the very first time.
+  TfLiteStatus Reset();
 
   TfLiteStatus initialization_status() const { return initialization_status_; }
 
-  size_t operators_size() const { return subgraph_->operators()->size(); }
+#ifdef EON_COMPILER_RUN
+  NodeAndRegistration* node_and_registrations_ = nullptr;
 
-  // For debugging only.
-  const NodeAndRegistration node_and_registration(int node_index) const {
-    return node_and_registrations_[node_index];
+  size_t operators_size(uint32_t subgraph_idx = 0) const
+  {
+    return model_->subgraphs()->Get(subgraph_idx)->operators()->size();
+  }
+
+  const NodeAndRegistration node_and_registration(int node_index, int sg)
+  {
+    return graph_.GetAllocations()[sg].node_and_registrations[node_index];
   }
+#endif
+
+  // Populates node and registration pointers representing the inference graph
+  // of the model from values inside the flatbuffer (loaded from the TfLiteModel
+  // instance). Persistent data (e.g. operator data) is allocated from the
+  // arena.
+  TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer();
 
   // For debugging only.
   // Returns the actual used arena in bytes. This method gives the optimal arena
@@ -179,30 +171,28 @@ class MicroInterpreter {
  private:
   // TODO(b/158263161): Consider switching to Create() function to enable better
   // error reporting during initialization.
-  void Init(MicroProfiler* profiler);
+  void Init(MicroProfilerInterface* profiler);
 
-  NodeAndRegistration* node_and_registrations_ = nullptr;
+  // Gets the current subgraph index used from within context methods.
+  int get_subgraph_index() { return graph_.GetCurrentSubgraphIndex(); }
 
   const Model* model_;
   const MicroOpResolver& op_resolver_;
-  ErrorReporter* error_reporter_;
   TfLiteContext context_ = {};
   MicroAllocator& allocator_;
+  MicroGraph graph_;
   bool tensors_allocated_;
 
   TfLiteStatus initialization_status_;
 
-  const SubGraph* subgraph_ = nullptr;
-  TfLiteEvalTensor* eval_tensors_ = nullptr;
   ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
 
-  // TODO(b/16157777): Drop this reference:
-  internal::ContextHelper context_helper_;
-
   // TODO(b/162311891): Clean these pointers up when this class supports buffers
   // from TfLiteEvalTensor.
   TfLiteTensor** input_tensors_;
   TfLiteTensor** output_tensors_;
+
+  MicroContext micro_context_;
 };
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_log.cc b/edge-impulse-sdk/tensorflow/lite/micro/micro_log.cc
new file mode 100644
index 0000000..26282ca
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_log.cc
@@ -0,0 +1,47 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+#include <cstdarg>
+#include <cstdint>
+#include <new>
+
+#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
+#include "edge-impulse-sdk/tensorflow/lite/micro/debug_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_string.h"
+#endif
+
+void Log(const char* format, va_list args) {
+#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
+  // Only pulling in the implementation of this function for builds where we
+  // expect to make use of it to be extra cautious about not increasing the code
+  // size.
+  static constexpr int kMaxLogLen = 256;
+  char log_buffer[kMaxLogLen];
+  MicroVsnprintf(log_buffer, kMaxLogLen, format, args);
+  DebugLog(log_buffer);
+  DebugLog("\r\n");
+#endif
+}
+
+#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
+void MicroPrintf(const char* format, ...) {
+  va_list args;
+  va_start(args, format);
+  Log(format, args);
+  va_end(args);
+}
+#endif
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_log.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_log.h
new file mode 100644
index 0000000..22cceb2
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_log.h
@@ -0,0 +1,49 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_MICRO_LOG_H_
+#define TENSORFLOW_LITE_MICRO_MICRO_LOG_H_
+
+#include <cstdarg>
+
+// do this by default except when running EON compiler
+#ifndef EON_COMPILER_RUN
+#define TF_LITE_STRIP_ERROR_STRINGS
+#endif
+
+// This is a free function used to perform the actual logging.
+// This function will be used by MicroPrintf and MicroErrorReporter::Report()
+void Log(const char* format, va_list args);
+
+#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
+// This function can be used independent of the MicroErrorReporter to get
+// printf-like functionalitys and are common to all target platforms.
+void MicroPrintf(const char* format, ...);
+#else
+// We use a #define to ensure that the strings are completely stripped, to
+// prevent an unnecessary increase in the binary size.
+#define MicroPrintf(...) tflite::Unused(__VA_ARGS__)
+#endif
+
+namespace tflite {
+
+// From
+// https://stackoverflow.com/questions/23235910/variadic-unused-function-macro
+template <typename... Args>
+void Unused(Args&&... args) {
+  (void)(sizeof...(args));
+}
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_MICRO_LOG_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_mutable_op_resolver.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_mutable_op_resolver.h
index 5d46365..798787a 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_mutable_op_resolver.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_mutable_op_resolver.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -19,28 +19,33 @@ limitations under the License.
 #include <cstring>
 
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
 #include "edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/add.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/conv.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/depthwise_conv.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/ethosu.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/tree_ensemble_classifier.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/fully_connected.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/kernels/micro_ops.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/pooling.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/reduce.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/softmax.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
 
 namespace tflite {
-// TfLiteRegistration* Register_DETECTION_POSTPROCESS();
+TfLiteRegistration* Register_DETECTION_POSTPROCESS();
 
 template <unsigned int tOpCount>
 class MicroMutableOpResolver : public MicroOpResolver {
  public:
   TF_LITE_REMOVE_VIRTUAL_DELETE
 
-  explicit MicroMutableOpResolver(ErrorReporter* error_reporter = nullptr)
-      : error_reporter_(error_reporter) {}
+  explicit MicroMutableOpResolver() {}
 
   const TfLiteRegistration* FindOp(tflite::BuiltinOperator op) const override {
     if (op == BuiltinOperator_CUSTOM) return nullptr;
@@ -65,7 +70,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
     return nullptr;
   }
 
-  MicroOpResolver::BuiltinParseFunction GetOpDataParser(
+  TfLiteBridgeBuiltinParseFunction GetOpDataParser(
       BuiltinOperator op) const override {
     TFLITE_DCHECK(num_buitin_ops_ <= tOpCount);
     for (unsigned int i = 0; i < num_buitin_ops_; ++i) {
@@ -82,22 +87,16 @@ class MicroMutableOpResolver : public MicroOpResolver {
   // kTfLiteError.
   TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration) {
     if (registrations_len_ >= tOpCount) {
-      if (error_reporter_) {
-        TF_LITE_REPORT_ERROR(
-            error_reporter_,
-            "Couldn't register custom op '%s', resolver size is too small (%d)",
-            name, tOpCount);
-      }
+      MicroPrintf(
+          "Couldn't register custom op '%s', resolver size is too"
+          "small (%d)",
+          name, tOpCount);
       return kTfLiteError;
     }
 
     if (FindOp(name) != nullptr) {
-      if (error_reporter_ != nullptr) {
-        TF_LITE_REPORT_ERROR(error_reporter_,
-                             "Calling AddCustom for the same op more than once "
-                             "is not supported (Op: %s).",
-                             name);
-      }
+      MicroPrintf("Calling AddCustom for the same op more than once ");
+      MicroPrintf("is not supported (Op: %s).", name);
       return kTfLiteError;
     }
 
@@ -118,9 +117,8 @@ class MicroMutableOpResolver : public MicroOpResolver {
                       ParseAbs);
   }
 
-  TfLiteStatus AddAdd() {
-    return AddBuiltin(BuiltinOperator_ADD, tflite::ops::micro::Register_ADD(),
-                      ParseAdd);
+  TfLiteStatus AddAdd(const TfLiteRegistration& registration = Register_ADD()) {
+    return AddBuiltin(BuiltinOperator_ADD, registration, ParseAdd);
   }
 
   TfLiteStatus AddAddN() {
@@ -129,24 +127,26 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddArgMax() {
-    return AddBuiltin(BuiltinOperator_ARG_MAX,
-                      tflite::ops::micro::Register_ARG_MAX(), ParseArgMax);
+    return AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX(), ParseArgMax);
   }
 
   TfLiteStatus AddArgMin() {
-    return AddBuiltin(BuiltinOperator_ARG_MIN,
-                      tflite::ops::micro::Register_ARG_MIN(), ParseArgMin);
+    return AddBuiltin(BuiltinOperator_ARG_MIN, Register_ARG_MIN(), ParseArgMin);
   }
 
-  TfLiteStatus AddAveragePool2D() {
-    return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D,
-                      tflite::ops::micro::Register_AVERAGE_POOL_2D(),
-                      ParsePool);
+  TfLiteStatus AddAssignVariable() {
+    return AddBuiltin(BuiltinOperator_ASSIGN_VARIABLE,
+                      tflite::Register_ASSIGN_VARIABLE(), ParseAssignVariable);
+  }
+
+  TfLiteStatus AddAveragePool2D(
+      const TfLiteRegistration& registration = Register_AVERAGE_POOL_2D()) {
+    return AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, registration, ParsePool);
   }
 
   TfLiteStatus AddBatchMatMul() {
     return AddBuiltin(BuiltinOperator_BATCH_MATMUL,
-                      tflite::Register_BATCH_MATMUL(), ParseBatchMatMul);
+                      Register_BATCH_MATMUL(), ParseBatchMatMul);
   }
 
   TfLiteStatus AddBatchToSpaceNd() {
@@ -154,18 +154,27 @@ class MicroMutableOpResolver : public MicroOpResolver {
                       Register_BATCH_TO_SPACE_ND(), ParseBatchToSpaceNd);
   }
 
+  TfLiteStatus AddBroadcastArgs() {
+    return AddBuiltin(BuiltinOperator_BROADCAST_ARGS, Register_BROADCAST_ARGS(),
+                      ParseBroadcastArgs);
+  }
+
+  TfLiteStatus AddBroadcastTo() {
+    return AddBuiltin(BuiltinOperator_BROADCAST_TO, Register_BROADCAST_TO(),
+                      ParseBroadcastTo);
+  }
+
+  TfLiteStatus AddCallOnce() {
+    return AddBuiltin(BuiltinOperator_CALL_ONCE, Register_CALL_ONCE(),
+                      ParseCallOnce);
+  }
+
   TfLiteStatus AddCast() {
     return AddBuiltin(BuiltinOperator_CAST, Register_CAST(), ParseCast);
   }
 
   TfLiteStatus AddCeil() {
-    return AddBuiltin(BuiltinOperator_CEIL, tflite::ops::micro::Register_CEIL(),
-                      ParseCeil);
-  }
-
-  TfLiteStatus AddCircularBuffer() {
-    return AddCustom("CIRCULAR_BUFFER",
-                     tflite::ops::micro::Register_CIRCULAR_BUFFER());
+    return AddBuiltin(BuiltinOperator_CEIL, Register_CEIL(), ParseCeil);
   }
 
   TfLiteStatus AddComplexAbs() {
@@ -173,14 +182,18 @@ class MicroMutableOpResolver : public MicroOpResolver {
                       ParseComplexAbs);
   }
 
+  TfLiteStatus AddCircularBuffer() {
+    return AddCustom("CIRCULAR_BUFFER", tflite::Register_CIRCULAR_BUFFER());
+  }
+
   TfLiteStatus AddConcatenation() {
-    return AddBuiltin(BuiltinOperator_CONCATENATION,
-                      tflite::ops::micro::Register_CONCATENATION(),
+    return AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION(),
                       ParseConcatenation);
   }
 
-  TfLiteStatus AddConv2D() {
-    return AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(), ParseConv2D);
+  TfLiteStatus AddConv2D(
+      const TfLiteRegistration& registration = Register_CONV_2D()) {
+    return AddBuiltin(BuiltinOperator_CONV_2D, registration, ParseConv2D);
   }
 
   TfLiteStatus AddCos() {
@@ -188,21 +201,31 @@ class MicroMutableOpResolver : public MicroOpResolver {
                       ParseCos);
   }
 
-  TfLiteStatus AddDepthwiseConv2D() {
-    return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
-                      Register_DEPTHWISE_CONV_2D(), ParseDepthwiseConv2D);
+  TfLiteStatus AddCumSum() {
+    return AddBuiltin(BuiltinOperator_CUMSUM, tflite::Register_CUMSUM(),
+                      ParseCumsum);
+  }
+
+  TfLiteStatus AddDepthToSpace() {
+    return AddBuiltin(BuiltinOperator_DEPTH_TO_SPACE,
+                      tflite::Register_DEPTH_TO_SPACE(), ParseDepthToSpace);
+  }
+
+  TfLiteStatus AddDepthwiseConv2D(
+      const TfLiteRegistration& registration = Register_DEPTHWISE_CONV_2D()) {
+    return AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, registration,
+                      ParseDepthwiseConv2D);
   }
 
   TfLiteStatus AddDequantize() {
-    return AddBuiltin(BuiltinOperator_DEQUANTIZE,
-                      tflite::ops::micro::Register_DEQUANTIZE(),
+    return AddBuiltin(BuiltinOperator_DEQUANTIZE, tflite::Register_DEQUANTIZE(),
                       ParseDequantize);
   }
 
-  // TfLiteStatus AddDetectionPostprocess() {
-  //   return AddCustom("TFLite_Detection_PostProcess",
-  //                    tflite::Register_DETECTION_POSTPROCESS());
-  // }
+  TfLiteStatus AddDetectionPostprocess() {
+    return AddCustom("TFLite_Detection_PostProcess",
+                     tflite::Register_DETECTION_POSTPROCESS());
+  }
 
   TfLiteStatus AddDiv() {
     return AddBuiltin(BuiltinOperator_DIV, tflite::Register_DIV(), ParseDiv);
@@ -213,8 +236,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddEqual() {
-    return AddBuiltin(BuiltinOperator_EQUAL,
-                      tflite::ops::micro::Register_EQUAL(), ParseEqual);
+    return AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL(), ParseEqual);
   }
 
   TfLiteStatus AddEthosU() {
@@ -226,8 +248,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddExp() {
-    return AddBuiltin(BuiltinOperator_EXP, tflite::ops::micro::Register_EXP(),
-                      ParseExp);
+    return AddBuiltin(BuiltinOperator_EXP, Register_EXP(), ParseExp);
   }
 
   TfLiteStatus AddExpandDims() {
@@ -235,9 +256,22 @@ class MicroMutableOpResolver : public MicroOpResolver {
                       ParseExpandDims);
   }
 
+  TfLiteStatus AddFill() {
+    return AddBuiltin(BuiltinOperator_FILL, tflite::Register_FILL(), ParseFill);
+  }
+
   TfLiteStatus AddFloor() {
-    return AddBuiltin(BuiltinOperator_FLOOR,
-                      tflite::ops::micro::Register_FLOOR(), ParseFloor);
+    return AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR(), ParseFloor);
+  }
+
+  TfLiteStatus AddFloorDiv() {
+    return AddBuiltin(BuiltinOperator_FLOOR_DIV, tflite::Register_FLOOR_DIV(),
+                      ParseFloorDiv);
+  }
+
+  TfLiteStatus AddFloorMod() {
+    return AddBuiltin(BuiltinOperator_FLOOR_MOD, tflite::Register_FLOOR_MOD(),
+                      ParseFloorMod);
   }
 
   TfLiteStatus AddFullyConnected(
@@ -248,25 +282,28 @@ class MicroMutableOpResolver : public MicroOpResolver {
 
 #ifndef TF_LITE_STATIC_MEMORY
   TfLiteStatus AddGather() {
-    return AddBuiltin(BuiltinOperator_GATHER, Register_GATHER(),
+    return AddBuiltin(BuiltinOperator_GATHER, tflite::Register_GATHER(),
                       ParseGather);
   }
 #endif
 
+  TfLiteStatus AddGatherNd() {
+    return AddBuiltin(BuiltinOperator_GATHER_ND, tflite::Register_GATHER_ND(),
+                      ParseGatherNd);
+  }
+
   TfLiteStatus AddGreater() {
-    return AddBuiltin(BuiltinOperator_GREATER,
-                      tflite::ops::micro::Register_GREATER(), ParseGreater);
+    return AddBuiltin(BuiltinOperator_GREATER, Register_GREATER(),
+                      ParseGreater);
   }
 
   TfLiteStatus AddGreaterEqual() {
-    return AddBuiltin(BuiltinOperator_GREATER_EQUAL,
-                      tflite::ops::micro::Register_GREATER_EQUAL(),
+    return AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL(),
                       ParseGreaterEqual);
   }
 
   TfLiteStatus AddHardSwish() {
-    return AddBuiltin(BuiltinOperator_HARD_SWISH,
-                      tflite::ops::micro::Register_HARD_SWISH(),
+    return AddBuiltin(BuiltinOperator_HARD_SWISH, tflite::Register_HARD_SWISH(),
                       ParseHardSwish);
   }
 
@@ -275,6 +312,10 @@ class MicroMutableOpResolver : public MicroOpResolver {
                       ParseImag);
   }
 
+  TfLiteStatus AddIf() {
+    return AddBuiltin(BuiltinOperator_IF, tflite::Register_IF(), ParseIf);
+  }
+
   TfLiteStatus AddL2Normalization() {
     return AddBuiltin(BuiltinOperator_L2_NORMALIZATION,
                       tflite::ops::micro::Register_L2_NORMALIZATION(),
@@ -292,13 +333,11 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddLess() {
-    return AddBuiltin(BuiltinOperator_LESS, tflite::ops::micro::Register_LESS(),
-                      ParseLess);
+    return AddBuiltin(BuiltinOperator_LESS, Register_LESS(), ParseLess);
   }
 
   TfLiteStatus AddLessEqual() {
-    return AddBuiltin(BuiltinOperator_LESS_EQUAL,
-                      tflite::ops::micro::Register_LESS_EQUAL(),
+    return AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL(),
                       ParseLessEqual);
   }
 
@@ -309,8 +348,7 @@ class MicroMutableOpResolver : public MicroOpResolver {
 
   TfLiteStatus AddLogicalAnd() {
     return AddBuiltin(BuiltinOperator_LOGICAL_AND,
-                      tflite::ops::micro::Register_LOGICAL_AND(),
-                      ParseLogicalAnd);
+                      tflite::Register_LOGICAL_AND(), ParseLogicalAnd);
   }
 
   TfLiteStatus AddLogicalNot() {
@@ -320,74 +358,72 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddLogicalOr() {
-    return AddBuiltin(BuiltinOperator_LOGICAL_OR,
-                      tflite::ops::micro::Register_LOGICAL_OR(),
+    return AddBuiltin(BuiltinOperator_LOGICAL_OR, tflite::Register_LOGICAL_OR(),
                       ParseLogicalOr);
   }
 
   TfLiteStatus AddLogistic() {
-    return AddBuiltin(BuiltinOperator_LOGISTIC,
-                      tflite::ops::micro::Register_LOGISTIC(), ParseLogistic);
+    return AddBuiltin(BuiltinOperator_LOGISTIC, tflite::Register_LOGISTIC(),
+                      ParseLogistic);
   }
 
   TfLiteStatus AddLogSoftmax() {
     return AddBuiltin(BuiltinOperator_LOG_SOFTMAX,
-                      Register_LOG_SOFTMAX(), ParseLogSoftmax);
+                      tflite::Register_LOG_SOFTMAX(), ParseLogSoftmax);
   }
 
   TfLiteStatus AddMaximum() {
-    return AddBuiltin(BuiltinOperator_MAXIMUM,
-                      tflite::ops::micro::Register_MAXIMUM(), ParseMaximum);
+    return AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM(),
+                      ParseMaximum);
+  }
+
+  TfLiteStatus AddMaxPool2D(
+      const TfLiteRegistration& registration = Register_MAX_POOL_2D()) {
+    return AddBuiltin(BuiltinOperator_MAX_POOL_2D, registration, ParsePool);
   }
 
-  TfLiteStatus AddMaxPool2D() {
-    return AddBuiltin(BuiltinOperator_MAX_POOL_2D,
-                      tflite::ops::micro::Register_MAX_POOL_2D(), ParsePool);
+  TfLiteStatus AddMirrorPad() {
+    return AddBuiltin(BuiltinOperator_MIRROR_PAD, tflite::Register_MIRROR_PAD(),
+                      ParseMirrorPad);
   }
 
   TfLiteStatus AddMean() {
-    return AddBuiltin(BuiltinOperator_MEAN, tflite::ops::micro::Register_MEAN(),
-                      ParseReducer);
+    return AddBuiltin(BuiltinOperator_MEAN, Register_MEAN(), ParseReducer);
   }
 
   TfLiteStatus AddMinimum() {
-    return AddBuiltin(BuiltinOperator_MINIMUM,
-                      tflite::ops::micro::Register_MINIMUM(), ParseMinimum);
+    return AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM(),
+                      ParseMinimum);
   }
 
-  TfLiteStatus AddMul() {
-    return AddBuiltin(BuiltinOperator_MUL, tflite::ops::micro::Register_MUL(),
-                      ParseMul);
+  TfLiteStatus AddMul(const TfLiteRegistration& registration = Register_MUL()) {
+    return AddBuiltin(BuiltinOperator_MUL, registration, ParseMul);
   }
 
   TfLiteStatus AddNeg() {
-    return AddBuiltin(BuiltinOperator_NEG, tflite::ops::micro::Register_NEG(),
-                      ParseNeg);
+    return AddBuiltin(BuiltinOperator_NEG, Register_NEG(), ParseNeg);
   }
 
   TfLiteStatus AddNotEqual() {
-    return AddBuiltin(BuiltinOperator_NOT_EQUAL,
-                      tflite::ops::micro::Register_NOT_EQUAL(), ParseNotEqual);
+    return AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL(),
+                      ParseNotEqual);
   }
 
   TfLiteStatus AddPack() {
-    return AddBuiltin(BuiltinOperator_PACK, tflite::ops::micro::Register_PACK(),
-                      ParsePack);
+    return AddBuiltin(BuiltinOperator_PACK, Register_PACK(), ParsePack);
   }
 
-  TfLiteStatus AddPad() {
-    return AddBuiltin(BuiltinOperator_PAD, tflite::ops::micro::Register_PAD(),
-                      ParsePad);
+  TfLiteStatus AddPad(const TfLiteRegistration& registration = Register_PAD()) {
+    return AddBuiltin(BuiltinOperator_PAD, registration, ParsePad);
   }
 
   TfLiteStatus AddPadV2() {
-    return AddBuiltin(BuiltinOperator_PADV2,
-                      tflite::ops::micro::Register_PADV2(), ParsePadV2);
+    return AddBuiltin(BuiltinOperator_PADV2, Register_PADV2(), ParsePadV2);
   }
 
   TfLiteStatus AddPrelu() {
-    return AddBuiltin(BuiltinOperator_PRELU,
-                      tflite::ops::micro::Register_PRELU(), ParsePrelu);
+    return AddBuiltin(BuiltinOperator_PRELU, tflite::Register_PRELU(),
+                      ParsePrelu);
   }
 
   TfLiteStatus AddQuantize() {
@@ -400,24 +436,28 @@ class MicroMutableOpResolver : public MicroOpResolver {
                       ParseReal);
   }
 
+  TfLiteStatus AddReadVariable() {
+    return AddBuiltin(BuiltinOperator_READ_VARIABLE,
+                      tflite::Register_READ_VARIABLE(), ParseReadVariable);
+  }
+
   TfLiteStatus AddReduceMax() {
-    return AddBuiltin(BuiltinOperator_REDUCE_MAX,
-                      tflite::ops::micro::Register_REDUCE_MAX(), ParseReducer);
+    return AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX(),
+                      ParseReducer);
   }
 
   TfLiteStatus AddReduceMin() {
-     return AddBuiltin(BuiltinOperator_REDUCE_MIN,
-                       tflite::ops::micro::Register_REDUCE_MIN(), ParseReducer);
-   }
+     return AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN(),
+                       ParseReducer);
+  }
 
   TfLiteStatus AddRelu() {
-    return AddBuiltin(BuiltinOperator_RELU, tflite::ops::micro::Register_RELU(),
-                      ParseRelu);
+    return AddBuiltin(BuiltinOperator_RELU, tflite::Register_RELU(), ParseRelu);
   }
 
   TfLiteStatus AddRelu6() {
-    return AddBuiltin(BuiltinOperator_RELU6,
-                      tflite::ops::micro::Register_RELU6(), ParseRelu6);
+    return AddBuiltin(BuiltinOperator_RELU6, tflite::Register_RELU6(),
+                      ParseRelu6);
   }
 
   TfLiteStatus AddReshape() {
@@ -425,6 +465,11 @@ class MicroMutableOpResolver : public MicroOpResolver {
                       tflite::ops::micro::Register_RESHAPE(), ParseReshape);
   }
 
+  TfLiteStatus AddResizeBilinear() {
+    return AddBuiltin(BuiltinOperator_RESIZE_BILINEAR,
+                      Register_RESIZE_BILINEAR(), ParseResizeBilinear);
+  }
+
   TfLiteStatus AddResizeNearestNeighbor() {
     return AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
                       tflite::ops::micro::Register_RESIZE_NEAREST_NEIGHBOR(),
@@ -468,13 +513,12 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddSlice() {
-    return AddBuiltin(BuiltinOperator_SLICE, Register_SLICE(),
-                      ParseSlice);
+    return AddBuiltin(BuiltinOperator_SLICE, Register_SLICE(), ParseSlice);
   }
 
-  TfLiteStatus AddSoftmax() {
-    return AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX(),
-                      ParseSoftmax);
+  TfLiteStatus AddSoftmax(
+      const TfLiteRegistration& registration = Register_SOFTMAX()) {
+    return AddBuiltin(BuiltinOperator_SOFTMAX, registration, ParseSoftmax);
   }
 
   TfLiteStatus AddSpaceToBatchNd() {
@@ -482,19 +526,18 @@ class MicroMutableOpResolver : public MicroOpResolver {
                       Register_SPACE_TO_BATCH_ND(), ParseSpaceToBatchNd);
   }
 
+  TfLiteStatus AddSpaceToDepth() {
+    return AddBuiltin(BuiltinOperator_SPACE_TO_DEPTH, Register_SPACE_TO_DEPTH(),
+                      ParseSpaceToDepth);
+  }
+
   TfLiteStatus AddSplit() {
     return AddBuiltin(BuiltinOperator_SPLIT,
                       tflite::ops::micro::Register_SPLIT(), ParseSplit);
   }
 
   TfLiteStatus AddSplitV() {
-    return AddBuiltin(BuiltinOperator_SPLIT_V,
-                      tflite::ops::micro::Register_SPLIT_V(), ParseSplitV);
-  }
-
-  TfLiteStatus AddSquaredDifference() {
-    return AddBuiltin(BuiltinOperator_SQUARED_DIFFERENCE, Register_SQUARED_DIFFERENCE(),
-                      ParseSquaredDifference);
+    return AddBuiltin(BuiltinOperator_SPLIT_V, Register_SPLIT_V(), ParseSplitV);
   }
 
   TfLiteStatus AddSqueeze() {
@@ -512,24 +555,28 @@ class MicroMutableOpResolver : public MicroOpResolver {
                       tflite::ops::micro::Register_SQUARE(), ParseSquare);
   }
 
+  TfLiteStatus AddSquaredDifference() {
+    return AddBuiltin(BuiltinOperator_SQUARED_DIFFERENCE,
+                      tflite::Register_SQUARED_DIFFERENCE(),
+                      ParseSquaredDifference);
+  }
+
   TfLiteStatus AddStridedSlice() {
-    return AddBuiltin(BuiltinOperator_STRIDED_SLICE,
-                      tflite::ops::micro::Register_STRIDED_SLICE(),
+    return AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE(),
                       ParseStridedSlice);
   }
 
   TfLiteStatus AddSub() {
-    return AddBuiltin(BuiltinOperator_SUB, tflite::ops::micro::Register_SUB(),
-                      ParseSub);
+    return AddBuiltin(BuiltinOperator_SUB, tflite::Register_SUB(), ParseSub);
   }
 
   TfLiteStatus AddSum() {
-    return AddBuiltin(BuiltinOperator_SUM, tflite::ops::micro::Register_SUM(),
-                      ParseReducer);
+    return AddBuiltin(BuiltinOperator_SUM, Register_SUM(), ParseReducer);
   }
 
-  TfLiteStatus AddSvdf() {
-    return AddBuiltin(BuiltinOperator_SVDF, Register_SVDF(), ParseSvdf);
+  TfLiteStatus AddSvdf(
+      const TfLiteRegistration& registration = Register_SVDF()) {
+    return AddBuiltin(BuiltinOperator_SVDF, registration, ParseSvdf);
   }
 
   TfLiteStatus AddTanh() {
@@ -537,16 +584,16 @@ class MicroMutableOpResolver : public MicroOpResolver {
                       ParseTanh);
   }
 
-  TfLiteStatus AddTranspose() {
-    return AddBuiltin(BuiltinOperator_TRANSPOSE,
-                      tflite::Register_TRANSPOSE(), ParseTranspose);
-  }
-
   TfLiteStatus AddTransposeConv() {
     return AddBuiltin(BuiltinOperator_TRANSPOSE_CONV,
                       tflite::Register_TRANSPOSE_CONV(), ParseTransposeConv);
   }
 
+  TfLiteStatus AddTranspose() {
+    return AddBuiltin(BuiltinOperator_TRANSPOSE, Register_TRANSPOSE(),
+                      ParseTranspose);
+  }
+
   TfLiteStatus AddTreeEnsembleClassifier() {
     return AddCustom(tflite::GetString_TreeEnsembleClassifier(),
                      tflite::Register_TreeEnsembleClassifier());
@@ -557,6 +604,21 @@ class MicroMutableOpResolver : public MicroOpResolver {
                       tflite::ops::micro::Register_UNPACK(), ParseUnpack);
   }
 
+  TfLiteStatus AddUnidirectionalSequenceLstm() {
+    return AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
+                      Register_UNIDIRECTIONAL_SEQUENCE_LSTM(),
+                      ParseUnidirectionalSequenceLSTM);
+  }
+
+  TfLiteStatus AddVarHandle() {
+    return AddBuiltin(BuiltinOperator_VAR_HANDLE, Register_VAR_HANDLE(),
+                      ParseVarHandle);
+  }
+
+  TfLiteStatus AddWhile() {
+    return AddBuiltin(BuiltinOperator_WHILE, Register_WHILE(), ParseWhile);
+  }
+
   TfLiteStatus AddZerosLike() {
     return AddBuiltin(BuiltinOperator_ZEROS_LIKE, Register_ZEROS_LIKE(),
                       ParseZerosLike);
@@ -567,33 +629,22 @@ class MicroMutableOpResolver : public MicroOpResolver {
  private:
   TfLiteStatus AddBuiltin(tflite::BuiltinOperator op,
                           const TfLiteRegistration& registration,
-                          MicroOpResolver::BuiltinParseFunction parser) {
+                          TfLiteBridgeBuiltinParseFunction parser) {
     if (op == BuiltinOperator_CUSTOM) {
-      if (error_reporter_ != nullptr) {
-        TF_LITE_REPORT_ERROR(error_reporter_,
-                             "Invalid parameter BuiltinOperator_CUSTOM to the "
-                             "AddBuiltin function.");
-      }
+      MicroPrintf("Invalid parameter BuiltinOperator_CUSTOM to the ");
+      MicroPrintf("AddBuiltin function.");
       return kTfLiteError;
     }
 
     if (FindOp(op) != nullptr) {
-      if (error_reporter_ != nullptr) {
-        TF_LITE_REPORT_ERROR(error_reporter_,
-                             "Calling AddBuiltin with the same op more than "
-                             "once is not supported (Op: #%d).",
-                             op);
-      }
+      MicroPrintf("Calling AddBuiltin with the same op more than ");
+      MicroPrintf("once is not supported (Op: #%d).", op);
       return kTfLiteError;
     }
 
     if (registrations_len_ >= tOpCount) {
-      if (error_reporter_) {
-        TF_LITE_REPORT_ERROR(error_reporter_,
-                             "Couldn't register builtin op #%d, resolver size "
-                             "is too small (%d).",
-                             op, tOpCount);
-      }
+      MicroPrintf("Couldn't register builtin op #%d, resolver size ", op);
+      MicroPrintf("is too small (%d).", tOpCount);
       return kTfLiteError;
     }
 
@@ -616,10 +667,8 @@ class MicroMutableOpResolver : public MicroOpResolver {
   // Arrays (and counter) to store the builtin codes and their corresponding
   // parse functions as these are registered with the Op Resolver.
   BuiltinOperator builtin_codes_[tOpCount];
-  MicroOpResolver::BuiltinParseFunction builtin_parsers_[tOpCount];
+  TfLiteBridgeBuiltinParseFunction builtin_parsers_[tOpCount];
   unsigned int num_buitin_ops_ = 0;
-
-  ErrorReporter* error_reporter_;
 };
 
 };  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h
index af8bb67..1bd3f4b 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_op_resolver.h
@@ -16,9 +16,8 @@ limitations under the License.
 #define TENSORFLOW_LITE_MICRO_MICRO_OP_RESOLVER_H_
 
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/flatbuffer_conversions.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/flatbuffer_conversions_bridge.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
 
 namespace tflite {
@@ -32,13 +31,8 @@ namespace tflite {
 // We need an interface class instead of directly using MicroMutableOpResolver
 // because MicroMutableOpResolver is a class template with the number of
 // registered Ops as the template parameter.
-class MicroOpResolver : public OpResolver {
+class MicroOpResolver : public TfLiteBridgeOpResolver {
  public:
-  typedef TfLiteStatus (*BuiltinParseFunction)(const Operator* op,
-                                               ErrorReporter* error_reporter,
-                                               BuiltinDataAllocator* allocator,
-                                               void** builtin_data);
-
   // Returns the Op registration struct corresponding to the enum code from the
   // flatbuffer schema. Returns nullptr if the op is not found or if op ==
   // BuiltinOperator_CUSTOM.
@@ -63,7 +57,8 @@ class MicroOpResolver : public OpResolver {
 
   // Returns the operator specific parsing function for the OpData for a
   // BuiltinOperator (if registered), else nullptr.
-  virtual BuiltinParseFunction GetOpDataParser(BuiltinOperator op) const = 0;
+  virtual TfLiteBridgeBuiltinParseFunction GetOpDataParser(
+      BuiltinOperator op) const = 0;
 
   ~MicroOpResolver() override {}
 };
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.cc b/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.cc
index 1af7a7f..63306ce 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.cc
@@ -14,10 +14,12 @@ limitations under the License.
 ==============================================================================*/
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.h"
 
+#include <cinttypes>
 #include <cstdint>
+#include <cstring>
 
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_time.h"
 
 namespace tflite {
@@ -38,7 +40,7 @@ void MicroProfiler::EndEvent(uint32_t event_handle) {
   end_ticks_[event_handle] = GetCurrentTimeTicks();
 }
 
-int32_t MicroProfiler::GetTotalTicks() const {
+uint32_t MicroProfiler::GetTotalTicks() const {
   int32_t ticks = 0;
   for (int i = 0; i < num_events_; ++i) {
     ticks += end_ticks_[i] - start_ticks_[i];
@@ -49,10 +51,65 @@ int32_t MicroProfiler::GetTotalTicks() const {
 void MicroProfiler::Log() const {
 #if !defined(TF_LITE_STRIP_ERROR_STRINGS)
   for (int i = 0; i < num_events_; ++i) {
-    int32_t ticks = end_ticks_[i] - start_ticks_[i];
-    MicroPrintf("%s took %d ticks (%d ms).", tags_[i], ticks, TicksToMs(ticks));
+    uint32_t ticks = end_ticks_[i] - start_ticks_[i];
+    MicroPrintf("%s took %" PRIu32 " ticks (%d ms).", tags_[i], ticks,
+                TicksToMs(ticks));
   }
 #endif
 }
 
+void MicroProfiler::LogCsv() const {
+#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
+  MicroPrintf("\"Event\",\"Tag\",\"Ticks\"");
+  for (int i = 0; i < num_events_; ++i) {
+    uint32_t ticks = end_ticks_[i] - start_ticks_[i];
+    MicroPrintf("%d,%s,%" PRIu32, i, tags_[i], ticks);
+  }
+#endif
+}
+
+void MicroProfiler::LogTicksPerTagCsv() {
+#if !defined(TF_LITE_STRIP_ERROR_STRINGS)
+  MicroPrintf(
+      "\"Unique Tag\",\"Total ticks across all events with that tag.\"");
+  int total_ticks = 0;
+  for (int i = 0; i < num_events_; ++i) {
+    uint32_t ticks = end_ticks_[i] - start_ticks_[i];
+    TFLITE_DCHECK(tags_[i] != nullptr);
+    int position = FindExistingOrNextPosition(tags_[i]);
+    TFLITE_DCHECK(position >= 0);
+    total_ticks_per_tag[position].tag = tags_[i];
+    total_ticks_per_tag[position].ticks =
+        total_ticks_per_tag[position].ticks + ticks;
+    total_ticks += ticks;
+  }
+
+  for (int i = 0; i < num_events_; ++i) {
+    TicksPerTag each_tag_entry = total_ticks_per_tag[i];
+    if (each_tag_entry.tag == nullptr) {
+      break;
+    }
+    MicroPrintf("%s, %d", each_tag_entry.tag, each_tag_entry.ticks);
+  }
+  MicroPrintf("total number of ticks, %d", total_ticks);
+#endif
+}
+
+// This method finds a particular array element in the total_ticks_per_tag array
+// with the matching tag_name passed in the method. If it can find a
+// matching array element that has the same tag_name, then it will return the
+// position of the matching element. But if it unable to find a matching element
+// with the given tag_name, it will return the next available empty position
+// from the array.
+int MicroProfiler::FindExistingOrNextPosition(const char* tag_name) {
+  int pos = 0;
+  for (; pos < num_events_; pos++) {
+    TicksPerTag each_tag_entry = total_ticks_per_tag[pos];
+    if (each_tag_entry.tag == nullptr ||
+        strcmp(each_tag_entry.tag, tag_name) == 0) {
+      return pos;
+    }
+  }
+  return pos < num_events_ ? pos : -1;
+}
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.h
index 3f285b2..d940398 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,9 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
 #define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_
 
-#include <cstdint>
-
 #include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_profiler_interface.h"
 
 namespace tflite {
 
@@ -26,7 +25,7 @@ namespace tflite {
 // performance. Bottleck operators can be identified along with slow code
 // sections. This can be used in conjunction with running the relevant micro
 // benchmark to evaluate end-to-end performance.
-class MicroProfiler {
+class MicroProfiler : public MicroProfilerInterface {
  public:
   MicroProfiler() = default;
   virtual ~MicroProfiler() = default;
@@ -34,7 +33,7 @@ class MicroProfiler {
   // Marks the start of a new event and returns an event handle that can be used
   // to mark the end of the event via EndEvent. The lifetime of the tag
   // parameter must exceed that of the MicroProfiler.
-  virtual uint32_t BeginEvent(const char* tag);
+  virtual uint32_t BeginEvent(const char* tag) override;
 
   // Marks the end of an event associated with event_handle. It is the
   // responsibility of the caller to ensure than EndEvent is called once and
@@ -43,7 +42,7 @@ class MicroProfiler {
   // If EndEvent is called more than once for the same event_handle, the last
   // call will be used as the end of event marker.If EndEvent is called 0 times
   // for a particular event_handle, the duration of that event will be 0 ticks.
-  virtual void EndEvent(uint32_t event_handle);
+  virtual void EndEvent(uint32_t event_handle) override;
 
   // Clears all the events that have been currently profiled.
   void ClearEvents() { num_events_ = 0; }
@@ -51,33 +50,55 @@ class MicroProfiler {
   // Returns the sum of the ticks taken across all the events. This number
   // is only meaningful if all of the events are disjoint (the end time of
   // event[i] <= start time of event[i+1]).
-  int32_t GetTotalTicks() const;
+  uint32_t GetTotalTicks() const;
 
-  // Prints the profiling information of each of the events.
+  // Prints the profiling information of each of the events in human readable
+  // form.
   void Log() const;
 
+  // Prints the profiling information of each of the events in CSV (Comma
+  // Separated Value) form.
+  void LogCsv() const;
+
+  // Prints  total ticks for each unique tag in CSV format.
+  // Output will have one row for each unique tag along with the
+  // total ticks summed across all events with that particular tag.
+  void LogTicksPerTagCsv();
+
  private:
   // Maximum number of events that this class can keep track of. If we call
   // AddEvent more than kMaxEvents number of times, then the oldest event's
   // profiling information will be overwritten.
-  static constexpr int kMaxEvents = 50;
+  static constexpr int kMaxEvents = 1024;
 
   const char* tags_[kMaxEvents];
-  int32_t start_ticks_[kMaxEvents];
-  int32_t end_ticks_[kMaxEvents];
+  uint32_t start_ticks_[kMaxEvents];
+  uint32_t end_ticks_[kMaxEvents];
   int num_events_ = 0;
 
+  struct TicksPerTag {
+    const char* tag;
+    uint32_t ticks;
+  };
+  // In practice, the number of tags will be much lower than the number of
+  // events. But it is theoretically possible that each event to be unique and
+  // hence we allow total_ticks_per_tag to have kMaxEvents entries.
+  TicksPerTag total_ticks_per_tag[kMaxEvents] = {};
+
+  int FindExistingOrNextPosition(const char* tag_name);
+
   TF_LITE_REMOVE_VIRTUAL_DELETE;
 };
 
-#if defined(NDEBUG)
+#if defined(TF_LITE_STRIP_ERROR_STRINGS)
 // For release builds, the ScopedMicroProfiler is a noop.
 //
 // This is done because the ScipedProfiler is used as part of the
 // MicroInterpreter and we want to ensure zero overhead for the release builds.
 class ScopedMicroProfiler {
  public:
-  explicit ScopedMicroProfiler(const char* tag, MicroProfiler* profiler) {}
+  explicit ScopedMicroProfiler(const char* tag,
+                               MicroProfilerInterface* profiler) {}
 };
 
 #else
@@ -94,7 +115,8 @@ class ScopedMicroProfiler {
 // }
 class ScopedMicroProfiler {
  public:
-  explicit ScopedMicroProfiler(const char* tag, MicroProfiler* profiler)
+  explicit ScopedMicroProfiler(const char* tag,
+                               MicroProfilerInterface* profiler)
       : profiler_(profiler) {
     if (profiler_ != nullptr) {
       event_handle_ = profiler_->BeginEvent(tag);
@@ -109,9 +131,9 @@ class ScopedMicroProfiler {
 
  private:
   uint32_t event_handle_ = 0;
-  MicroProfiler* profiler_ = nullptr;
+  MicroProfilerInterface* profiler_ = nullptr;
 };
-#endif  // !defined(NDEBUG)
+#endif  // !defined(TF_LITE_STRIP_ERROR_STRINGS)
 
 }  // namespace tflite
 
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler_interface.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler_interface.h
new file mode 100644
index 0000000..f839a74
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_profiler_interface.h
@@ -0,0 +1,38 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_INTERFACE_H_
+#define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_INTERFACE_H_
+
+#include <cstdint>
+
+namespace tflite {
+
+// Interface class that the TFLM framework relies on for profiling.
+class MicroProfilerInterface {
+ public:
+  virtual ~MicroProfilerInterface() {}
+
+  // Marks the start of a new event and returns an event handle that can be used
+  // to mark the end of the event via EndEvent.
+  virtual uint32_t BeginEvent(const char* tag) = 0;
+
+  // Marks the end of an event associated with event_handle.
+  virtual void EndEvent(uint32_t event_handle) = 0;
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_MICRO_PROFILER_INTERFACE_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.cc b/edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.cc
new file mode 100644
index 0000000..c07d111
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.cc
@@ -0,0 +1,148 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h"
+
+#include <cstring>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+
+namespace {}  // namespace
+
+MicroResourceVariables* MicroResourceVariables::Create(
+    MicroAllocator* allocator, int max_num_variables) {
+  TFLITE_DCHECK(allocator != nullptr);
+
+  uint8_t* allocator_buffer = static_cast<uint8_t*>(
+      allocator->AllocatePersistentBuffer(sizeof(MicroResourceVariables)));
+  MicroResourceVariable* variable_array =
+      static_cast<MicroResourceVariable*>(allocator->AllocatePersistentBuffer(
+          sizeof(MicroResourceVariable) * max_num_variables));
+  MicroResourceVariables* variables = new (allocator_buffer)
+      MicroResourceVariables(variable_array, max_num_variables);
+  return variables;
+}
+
+int MicroResourceVariables::CreateIdIfNoneFound(const char* container,
+                                                const char* shared_name) {
+  int resource_id = FindId(container, shared_name);
+  if (resource_id >= 0) {
+    return resource_id;
+  }
+
+  // no existing variable found for the given container and shared name pair.
+  if (num_resource_variables_ >= max_variable_count_) {
+    MicroPrintf(
+        "Failed to allocate resource variable. Maximum resource variable count "
+        "(%d) "
+        "reached.",
+        max_variable_count_);
+    return -1;
+  }
+
+  resource_id = num_resource_variables_++;
+  resource_variables_[resource_id].container = container;
+  resource_variables_[resource_id].shared_name = shared_name;
+  resource_variables_[resource_id].resource_buffer = nullptr;
+  resource_variables_[resource_id].bytes = 0;
+  return resource_id;
+}
+
+TfLiteStatus MicroResourceVariables::Read(int id,
+                                          const TfLiteEvalTensor* tensor) {
+  if (id < 0 || id >= num_resource_variables_) {
+    MicroPrintf("Attempting to read non-existent resource variable %d", id);
+    return kTfLiteError;
+  }
+  MicroResourceVariable variable = resource_variables_[id];
+  TFLITE_DCHECK(EvalTensorBytes(tensor) == variable.bytes);
+  TFLITE_DCHECK(variable.resource_buffer != nullptr);
+  memcpy(tensor->data.raw, variable.resource_buffer, variable.bytes);
+  return kTfLiteOk;
+}
+
+TfLiteStatus MicroResourceVariables::Allocate(int id, TfLiteContext* context,
+                                              const TfLiteTensor* tensor) {
+  if (id < 0 || id >= num_resource_variables_) {
+    MicroPrintf("Attempting to read non-existent resource variable %d", id);
+    return kTfLiteError;
+  }
+
+  MicroResourceVariable& variable = resource_variables_[id];
+
+  if (variable.resource_buffer == nullptr) {
+    variable.bytes = tensor->bytes;
+    variable.resource_buffer =
+        context->AllocatePersistentBuffer(context, tensor->bytes);
+    if (variable.resource_buffer == nullptr) {
+      MicroPrintf("Failed to allocate resource buffer.");
+      return kTfLiteError;
+    }
+    // Zero out resource buffers by deafult. Buffers can be initialized to
+    // nonzero values using ASSIGN_VARIABLE.
+    memset(variable.resource_buffer, 0, variable.bytes);
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus MicroResourceVariables::Assign(int id,
+                                            const TfLiteEvalTensor* tensor) {
+  if (id < 0 || id >= num_resource_variables_) {
+    MicroPrintf("Attempting to read non-existent resource variable %d", id);
+    return kTfLiteError;
+  }
+  MicroResourceVariable variable = resource_variables_[id];
+
+  if (variable.resource_buffer == nullptr) {
+    MicroPrintf(
+        "Attempting to assign from a TfLiteEvalTensor before the resource "
+        "buffer has been allocated. Make sure to call AssignResourceVariable "
+        "with a TfLiteTensor first.");
+    return kTfLiteError;
+  }
+  TFLITE_DCHECK(EvalTensorBytes(tensor) == variable.bytes);
+  memcpy(variable.resource_buffer, tensor->data.raw, variable.bytes);
+  return kTfLiteOk;
+}
+
+TfLiteStatus MicroResourceVariables::ResetAll() {
+  for (int i = 0; i < num_resource_variables_; i++) {
+    MicroResourceVariable variable = resource_variables_[i];
+    memset(variable.resource_buffer, 0, variable.bytes);
+  }
+  return kTfLiteOk;
+}
+
+int MicroResourceVariables::FindId(const char* container,
+                                   const char* shared_name) {
+  for (int i = 0; i < num_resource_variables_; i++) {
+    // Some TFLite flatbuffers contain null container names to save space.
+    if ((container == nullptr ||
+         !strcmp(container, resource_variables_[i].container)) &&
+        !strcmp(shared_name, resource_variables_[i].shared_name)) {
+      return i;
+    }
+  }
+  return -1;
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h
new file mode 100644
index 0000000..d2ebb35
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_resource_variable.h
@@ -0,0 +1,87 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TFLITE_MICRO_TENSORFLOW_LITE_MICRO_MICRO_RESOURCE_H_
+#define TFLITE_MICRO_TENSORFLOW_LITE_MICRO_MICRO_RESOURCE_H_
+
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h"
+
+namespace tflite {
+
+class MicroResourceVariables {
+ public:
+  // Create
+  static MicroResourceVariables* Create(MicroAllocator* allocator,
+                                        int num_variables);
+
+  // Creates a resource variable if none is available for the given container
+  // and shared name pair. Returns the resource ID corresponding to the
+  // container and shared name pair. If allocation fails, the returned resource
+  // ID will be negative. The the container and shared_name must outlive this
+  // class.
+  int CreateIdIfNoneFound(const char* container, const char* shared_name);
+
+  // Read the resource buffer associated with the given ID into the given
+  // tensor.
+  TfLiteStatus Read(int id, const TfLiteEvalTensor* tensor);
+
+  // Allocates the resource buffer if none has been allocated, based on the
+  // length of the input tensor. Copies input tensor contents to the resource
+  // buffer.
+  TfLiteStatus Allocate(int id, TfLiteContext* context,
+                        const TfLiteTensor* tensor);
+
+  // Copies input tensor contents to the resource buffer.
+  // AllocateResourceVariable with a TFLite tensor must have been called first
+  // in order to allocate the resource buffer.
+  TfLiteStatus Assign(int id, const TfLiteEvalTensor* tensor);
+
+  // Zeros out all resource buffers.
+  TfLiteStatus ResetAll();
+
+ private:
+  int FindId(const char* container, const char* shared_name);
+
+  // Micro resource contains the mapping between resource container/name strings
+  // and resouce IDs. Each resource ID corresponds to a resource buffer pointer.
+  // The resouce ID is created during the VAR_HANDLE operator preparation stage.
+  // The resource buffer pointer is created during ASSIGN_VARIABLE preparation
+  // stage based on the size of the TFLiteTensor being assigned.
+  struct MicroResourceVariable {
+    const char* container;
+    const char* shared_name;
+    void* resource_buffer;
+
+    // This is only for verifying read size.
+    size_t bytes;
+  };
+
+  MicroResourceVariables(MicroResourceVariable* variables,
+                         int max_variable_count)
+      : resource_variables_(variables),
+        max_variable_count_(max_variable_count),
+        num_resource_variables_(0) {}
+
+  MicroResourceVariable* resource_variables_;
+  int max_variable_count_;
+  int num_resource_variables_;
+};
+
+}  // namespace tflite
+
+#endif  // TFLITE_MICRO_TENSORFLOW_LITE_MICRO_MICRO_RESOURCE_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_string.cc b/edge-impulse-sdk/tensorflow/lite/micro/micro_string.cc
index 30b60a9..39746f9 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_string.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_string.cc
@@ -283,6 +283,14 @@ extern "C" int MicroVsnprintf(char* output, int len, const char* format,
         case '%':
           output[output_index++] = *current++;
           break;
+        case 'c':
+          if (usable_length - output_index < 1) {
+            output[output_index++] = '\0';
+            return output_index;
+          }
+          output[output_index++] = va_arg(args, int32_t);
+          current++;
+          break;
         case 's':
           char* string = va_arg(args, char*);
           int string_idx = 0;
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_time.cc b/edge-impulse-sdk/tensorflow/lite/micro/micro_time.cc
index b769851..d418509 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_time.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_time.cc
@@ -22,8 +22,7 @@ limitations under the License.
 // To add an equivalent function for your own platform, create your own
 // implementation file, and place it in a subfolder with named after the OS
 // you're targeting. For example, see the Cortex M bare metal version in
-// tensorflow/lite/micro/bluepill/micro_time.cc or the mbed one on
-// tensorflow/lite/micro/mbed/micro_time.cc.
+// tensorflow/lite/micro/bluepill/micro_time.cc
 
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_time.h"
 
@@ -39,21 +38,21 @@ namespace tflite {
 // for a platform to support Tensorflow Lite for Microcontrollers profiling.
 // This returns 0 by default because timing is an optional feature that builds
 // without errors on platforms that do not need it.
-int32_t ticks_per_second() { return 0; }
+uint32_t ticks_per_second() { return 0; }
 
 // Reference implementation of the GetCurrentTimeTicks() function that's
 // required for a platform to support Tensorflow Lite for Microcontrollers
 // profiling. This returns 0 by default because timing is an optional feature
 // that builds without errors on platforms that do not need it.
-int32_t GetCurrentTimeTicks() { return 0; }
+uint32_t GetCurrentTimeTicks() { return 0; }
 
 #else  // defined(TF_LITE_USE_CTIME)
 
 // For platforms that support ctime, we implment the micro_time interface in
 // this central location.
-int32_t ticks_per_second() { return CLOCKS_PER_SEC; }
+uint32_t ticks_per_second() { return CLOCKS_PER_SEC; }
 
-int32_t GetCurrentTimeTicks() { return clock(); }
+uint32_t GetCurrentTimeTicks() { return clock(); }
 #endif
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_time.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_time.h
index fac9069..7a8ab45 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_time.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_time.h
@@ -21,14 +21,14 @@ namespace tflite {
 
 // These functions should be implemented by each target platform, and provide an
 // accurate tick count along with how many ticks there are per second.
-int32_t ticks_per_second();
+uint32_t ticks_per_second();
 
 // Return time in ticks.  The meaning of a tick varies per platform.
-int32_t GetCurrentTimeTicks();
+uint32_t GetCurrentTimeTicks();
 
-inline int32_t TicksToMs(int32_t ticks) {
-  return static_cast<int32_t>(1000.0f * static_cast<float>(ticks) /
-                              static_cast<float>(ticks_per_second()));
+inline uint32_t TicksToMs(int32_t ticks) {
+  return static_cast<uint32_t>(1000.0f * static_cast<float>(ticks) /
+                               static_cast<float>(ticks_per_second()));
 }
 
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_utils.cc b/edge-impulse-sdk/tensorflow/lite/micro/micro_utils.cc
index 3d21aaf..4f7eba7 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_utils.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_utils.cc
@@ -20,7 +20,10 @@ limitations under the License.
 #include <limits>
 
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 
@@ -32,17 +35,26 @@ int ElementCount(const TfLiteIntArray& dims) {
   return result;
 }
 
-void SignedSymmetricPerChannelQuantize(const float* values,
-                                       TfLiteIntArray* dims,
-                                       int quantized_dimension,
-                                       int8_t* quantized_values,
-                                       float* scaling_factors) {
+size_t EvalTensorBytes(const TfLiteEvalTensor* tensor) {
+  size_t bytes_per_element;
+  TFLITE_DCHECK(kTfLiteOk ==
+                TfLiteTypeSizeOf(tensor->type, &bytes_per_element));
+  return ElementCount(*tensor->dims) * bytes_per_element;
+}
+
+void SignedSymmetricPerChannelQuantize(
+    const float* values, TfLiteIntArray* dims, int quantized_dimension,
+    int8_t* quantized_values, float* scaling_factors, TfLiteType type) {
   int input_size = ElementCount(*dims);
   int channel_count = dims->data[quantized_dimension];
   int per_channel_size = input_size / channel_count;
 
   int stride;
   int channel_stride;
+
+  int qmin = QMinFromTfLiteType(type);
+  int qmax = QMaxFromTfLiteType(type);
+
   if (quantized_dimension == 0) {
     stride = 1;
     channel_stride = per_channel_size;
@@ -50,7 +62,8 @@ void SignedSymmetricPerChannelQuantize(const float* values,
     stride = channel_count;
     channel_stride = 1;
   } else {
-    TF_LITE_FATAL("quantized dimension must be 0 or 3");
+    MicroPrintf("quantized dimension must be 0 or 3");
+    TFLITE_ABORT;
   }
 
   // Calculate scales for each channel.
@@ -63,16 +76,13 @@ void SignedSymmetricPerChannelQuantize(const float* values,
       min = fminf(min, values[idx]);
       max = fmaxf(max, values[idx]);
     }
-    scaling_factors[channel] =
-        fmaxf(fabs(min), fabs(max)) / std::numeric_limits<int8_t>::max();
+    scaling_factors[channel] = fmaxf(fabs(min), fabs(max)) / qmax;
     for (int i = 0; i < per_channel_size; i++) {
       int idx = channel * channel_stride + i * stride;
       const int32_t quantized_value =
           static_cast<int32_t>(roundf(values[idx] / scaling_factors[channel]));
       // Clamp: just in case some odd numeric offset.
-      quantized_values[idx] =
-          fminf(std::numeric_limits<int8_t>::max(),
-                fmaxf(std::numeric_limits<int8_t>::min() + 1, quantized_value));
+      quantized_values[idx] = fminf(qmax, fmaxf(qmin + 1, quantized_value));
     }
   }
 }
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h b/edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h
index 8ff6465..73de1dc 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h
@@ -16,8 +16,10 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
 #define TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
 
+// Patched by Edge Impulse
 // Arduino build defines abs as a macro here. That is invalid C++, and breaks
 // libc++'s <complex> header, undefine it.
+// TODO investigate if this belongs to global patch or Arduino lib one
 #ifdef abs
 #undef abs
 #endif
@@ -35,6 +37,12 @@ namespace tflite {
 
 int ElementCount(const TfLiteIntArray& dims);
 
+size_t EvalTensorBytes(const TfLiteEvalTensor* tensor);
+
+// C++11 does not support constexpr max; hence, use ternary conditional to
+// create our own constexpr Max function.
+constexpr int Max(int a, int b) { return a >= b ? a : b; }
+
 // Converts a float value into a quantized value.  Note that large values (close
 // to max int and min int) may see significant error due to a lack of floating
 // point granularity for large values.
@@ -50,11 +58,13 @@ T FloatToQuantizedType(const float value, const float scale, int zero_point) {
 
 template <typename T>
 T FloatToSymmetricQuantizedType(const float value, const float scale) {
-  int32_t result = round(value / scale);
-  result =
-      std::max(static_cast<int32_t>(std::numeric_limits<T>::min() + 1), result);
-  result =
-      std::min(static_cast<int32_t>(std::numeric_limits<T>::max()), result);
+  // 64-bit values are required since 8x16 conv accumulates to int64, meaning
+  // an int64 bias is required.
+  std::int64_t result = round(value / scale);
+  result = std::max(
+      static_cast<std::int64_t>(std::numeric_limits<T>::min() + 1), result);
+  result = std::min(static_cast<std::int64_t>(std::numeric_limits<T>::max()),
+                    result);
   return result;
 }
 
@@ -102,7 +112,8 @@ void SignedSymmetricPerChannelQuantize(const float* values,
                                        TfLiteIntArray* dims,
                                        int quantized_dimension,
                                        int8_t* quantized_values,
-                                       float* scaling_factor);
+                                       float* scaling_factor,
+                                       TfLiteType type = kTfLiteNoType);
 
 // Quantizes inputs based on the values provided, choosing the smallest range
 // which includes all input values.
@@ -136,6 +147,24 @@ void Dequantize(const T* values, const int size, const float scale,
   }
 }
 
+// based on TfLiteType passed in to these functions the corresponding max / min
+// int for that type are returned
+inline int QMinFromTfLiteType(TfLiteType type) {
+  if (type == kTfLiteInt4) {
+    return -8;
+  } else {
+    return std::numeric_limits<int8_t>::min();
+  }
+}
+
+inline int QMaxFromTfLiteType(TfLiteType type) {
+  if (type == kTfLiteInt4) {
+    return 7;
+  } else {
+    return std::numeric_limits<int8_t>::max();
+  }
+}
+
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_MICRO_MICRO_UTILS_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.cc b/edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.cc
new file mode 100644
index 0000000..8ad3864
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.cc
@@ -0,0 +1,66 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.h"
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h"
+
+namespace tflite {
+
+MockMicroGraph::MockMicroGraph(SingleArenaBufferAllocator* allocator)
+    : MicroGraph(nullptr, nullptr, nullptr, nullptr),
+      allocator_(allocator),
+      init_count_(0),
+      prepare_count_(0),
+      free_count_(0) {
+  memset(invoke_counts_, 0, sizeof(invoke_counts_));
+  mock_tensor_ =
+      reinterpret_cast<TfLiteEvalTensor*>(allocator_->AllocatePersistentBuffer(
+          sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor)));
+  int* dims_array = reinterpret_cast<int*>(
+      allocator_->AllocatePersistentBuffer(3 * sizeof(int), alignof(int)));
+  float* data_array = reinterpret_cast<float*>(
+      allocator_->AllocatePersistentBuffer(2 * sizeof(float), alignof(float)));
+  int dims[] = {2, 1, 2};
+  memcpy(dims_array, dims, 3 * sizeof(int));
+  mock_tensor_->dims = testing::IntArrayFromInts(dims_array);
+  mock_tensor_->data.f = data_array;
+  mock_tensor_->type = kTfLiteFloat32;
+}
+
+TfLiteStatus MockMicroGraph::InvokeSubgraph(int subgraph_idx) {
+  invoke_counts_[subgraph_idx]++;
+  return kTfLiteOk;
+}
+
+TfLiteStatus MockMicroGraph::ResetVariableTensors() { return kTfLiteOk; }
+
+size_t MockMicroGraph::NumSubgraphInputs(int subgraph_idx) { return 1; }
+
+TfLiteEvalTensor* MockMicroGraph::GetSubgraphInput(int subgraph_idx,
+                                                   int tensor_idx) {
+  return mock_tensor_;
+}
+
+size_t MockMicroGraph::NumSubgraphOutputs(int subgraph_idx) { return 1; }
+
+TfLiteEvalTensor* MockMicroGraph::GetSubgraphOutput(int subgraph_idx,
+                                                    int tensor_idx) {
+  return mock_tensor_;
+}
+
+int MockMicroGraph::NumSubgraphs() { return kMaxSubgraphs; }
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.h b/edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.h
new file mode 100644
index 0000000..b1aeb20
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/mock_micro_graph.h
@@ -0,0 +1,60 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_
+#define TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_graph.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+
+// MockMicroGraph stubs out all MicroGraph methods used during invoke. A count
+// of the number of calls to invoke for each subgraph is maintained for
+// validation of control flow operators.
+class MockMicroGraph : public MicroGraph {
+ public:
+  explicit MockMicroGraph(SingleArenaBufferAllocator* allocator);
+  TfLiteStatus InvokeSubgraph(int subgraph_idx) override;
+  TfLiteStatus ResetVariableTensors() override;
+  size_t NumSubgraphInputs(int subgraph_idx) override;
+  TfLiteEvalTensor* GetSubgraphInput(int subgraph_idx, int tensor_idx) override;
+  size_t NumSubgraphOutputs(int subgraph_idx) override;
+  TfLiteEvalTensor* GetSubgraphOutput(int subgraph_idx,
+                                      int tensor_idx) override;
+  int NumSubgraphs() override;
+  int get_init_count() const { return init_count_; }
+  int get_prepare_count() const { return prepare_count_; }
+  int get_free_count() const { return free_count_; }
+  int get_invoke_count(int subgraph_idx) const {
+    return invoke_counts_[subgraph_idx];
+  }
+
+ private:
+  static constexpr int kMaxSubgraphs = 10;
+  SingleArenaBufferAllocator* allocator_;
+  TfLiteEvalTensor* mock_tensor_;
+  int init_count_;
+  int prepare_count_;
+  int free_count_;
+  int invoke_counts_[kMaxSubgraphs];
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_MOCK_MICRO_GRAPH_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.cc b/edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.cc
new file mode 100644
index 0000000..65515ff
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.cc
@@ -0,0 +1,170 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.h"
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+
+NonPersistentArenaBufferAllocator::NonPersistentArenaBufferAllocator(
+    uint8_t* buffer, size_t buffer_size)
+    : buffer_head_(buffer),
+      buffer_tail_(buffer + buffer_size),
+      head_temp_(buffer),
+      next_temp_(buffer) {}
+
+NonPersistentArenaBufferAllocator::~NonPersistentArenaBufferAllocator() {}
+
+// Allocates a temporary buffer. This buffer is not resizable.
+uint8_t* NonPersistentArenaBufferAllocator::AllocateTemp(size_t size,
+                                                         size_t alignment) {
+  uint8_t* const aligned_result = AlignPointerUp(next_temp_, alignment);
+  const size_t available_memory = buffer_tail_ - aligned_result;
+  if (available_memory < size) {
+    MicroPrintf(
+        "Failed to allocate temp memory. Requested: %u, "
+        "available %u, missing: %u",
+        size, available_memory, size - available_memory);
+    return nullptr;
+  }
+  next_temp_ = aligned_result + size;
+  temp_buffer_ptr_check_sum_ ^= reinterpret_cast<intptr_t>(aligned_result);
+  temp_buffer_count_++;
+  return aligned_result;
+}
+
+// Signals that a temporary buffer is no longer needed.
+void NonPersistentArenaBufferAllocator::DeallocateTemp(uint8_t* temp_buf) {
+  temp_buffer_ptr_check_sum_ ^= reinterpret_cast<intptr_t>(temp_buf);
+  temp_buffer_count_--;
+}
+
+// Returns true if all temporary buffers are already deallocated.
+bool NonPersistentArenaBufferAllocator::IsAllTempDeallocated() {
+  if (temp_buffer_count_ != 0 || temp_buffer_ptr_check_sum_ != 0) {
+    MicroPrintf(
+        "Number of allocated temp buffers: %d. Checksum passing status: %d",
+        temp_buffer_count_, !temp_buffer_ptr_check_sum_);
+    return false;
+  }
+  return true;
+}
+
+// Signals that all temporary allocations can be reclaimed. TFLM calls this
+// API when it knows that all temporary buffers that it requested has been
+// deallocated. The goal of API is to facilitate implementations of
+// INonPersistentBufferAllocator can reuse buffer with some reasonable
+// complexity.
+TfLiteStatus NonPersistentArenaBufferAllocator::ResetTempAllocations() {
+  if (!IsAllTempDeallocated()) {
+    MicroPrintf(
+        "All temp buffers must be freed before calling ResetTempAllocations()");
+    return kTfLiteError;
+  }
+  next_temp_ = head_temp_;
+  return kTfLiteOk;
+}
+
+// Returns a buffer that is resizable viable ResizeBuffer().
+uint8_t* NonPersistentArenaBufferAllocator::AllocateResizableBuffer(
+    size_t size, size_t alignment) {
+  // Only supports one resizable buffer, which starts at the buffer head.
+  uint8_t* expected_resizable_buf = AlignPointerUp(buffer_head_, alignment);
+
+  if (resizable_buffer_allocated_) {
+    MicroPrintf(
+        "Cannot allocate a new resizable buffer when one is already allocated");
+    return nullptr;
+  }
+
+  if (ResizeBuffer(expected_resizable_buf, size, alignment) == kTfLiteOk) {
+    resizable_buffer_allocated_ = true;
+    return expected_resizable_buf;
+  }
+  return nullptr;
+}
+
+// Resizes a buffer that is previously returned by the AllocateResizableBuffer.
+// Note that ResizeBuffer(old_resizable_buf, 0, 1) effectively deallocates
+// a previous allocated resizable buffer.
+TfLiteStatus NonPersistentArenaBufferAllocator::ResizeBuffer(
+    uint8_t* resizable_buf, size_t size, size_t alignment) {
+  // Only supports one resizable buffer, which starts at the buffer head.
+  uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
+  if (resizable_buf != expect_resizable_buf) {
+    MicroPrintf("Internal error: buffer is not resizable");
+    return kTfLiteError;
+  }
+  if (head_temp_ != next_temp_) {
+    MicroPrintf("ResetTempAllocations() is not called before ResizeBuffer().");
+    return kTfLiteError;
+  }
+
+  const size_t available_memory = buffer_tail_ - expect_resizable_buf;
+  if (available_memory < size) {
+    MicroPrintf(
+        "Failed to resize buffer. Requested: %u, available %u, missing: %u",
+        size, available_memory, size - available_memory);
+    return kTfLiteError;
+  }
+  head_temp_ = expect_resizable_buf + size;
+  next_temp_ = head_temp_;
+
+  return kTfLiteOk;
+}
+
+// Frees up the memory occupied by the resizable buffer.
+TfLiteStatus NonPersistentArenaBufferAllocator::DeallocateResizableBuffer(
+    uint8_t* resizable_buf) {
+  TfLiteStatus status = ResizeBuffer(resizable_buf, 0, 1);
+  if (status == kTfLiteOk) {
+    resizable_buffer_allocated_ = false;
+  }
+  return status;
+}
+
+// Returns a pointer pointing to the start of the overlay memory, which is
+// used for activation tensors and scratch buffers by kernels at Invoke stage.
+uint8_t* NonPersistentArenaBufferAllocator::GetOverlayMemoryAddress() const {
+  return buffer_head_;
+}
+
+// Reserves the size of the overlay memory. This overlay is reserved for the
+// kernels at Invoke stage. This is referred to as the overlay because before
+// Invoket state, the same memory can be used for temp buffers. The layout of
+// the memory is planned by the memory planner separately at Invoke stage.
+TfLiteStatus
+NonPersistentArenaBufferAllocator::ReserveNonPersistentOverlayMemory(
+    size_t size, size_t alignment) {
+  uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
+  return ResizeBuffer(expect_resizable_buf, size, alignment);
+}
+
+// Returns the size of non-persistent buffer in use.
+size_t NonPersistentArenaBufferAllocator::GetNonPersistentUsedBytes() const {
+  return (next_temp_ - buffer_head_);
+}
+
+// Returns the number of bytes available with a given alignment. This number
+// takes in account any temporary allocations.
+size_t NonPersistentArenaBufferAllocator::GetAvailableMemory(
+    size_t alignment) const {
+  uint8_t* const aligned_temp = AlignPointerUp(next_temp_, alignment);
+  uint8_t* const aligned_tail = AlignPointerDown(buffer_tail_, alignment);
+  return aligned_tail - aligned_temp;
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.h b/edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.h
new file mode 100644
index 0000000..2a3d639
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/non_persistent_arena_buffer_allocator.h
@@ -0,0 +1,104 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
+#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
+
+#include <cstddef>
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/ibuffer_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
+
+namespace tflite {
+
+// Implement INonPersistentBufferAllocator on an arena that is dedicated for
+// non-persistent buffers.
+class NonPersistentArenaBufferAllocator : public INonPersistentBufferAllocator {
+ public:
+  NonPersistentArenaBufferAllocator(uint8_t* buffer, size_t buffer_size);
+  virtual ~NonPersistentArenaBufferAllocator();
+
+  // Allocates a temporary buffer. This buffer is not resizable.
+  uint8_t* AllocateTemp(size_t size, size_t alignment) override;
+
+  // Signals that a temporary buffer is no longer needed.
+  void DeallocateTemp(uint8_t* buf) override;
+
+  // Returns true if all temporary buffers are already deallocated.
+  bool IsAllTempDeallocated() override;
+
+  // Signals that all temporary allocations can be reclaimed. TFLM calls this
+  // API when it knows that all temporary buffers that it requested has been
+  // deallocated.
+  TfLiteStatus ResetTempAllocations() override;
+
+  // Returns a buffer that is resizable viable ResizeBuffer().
+  uint8_t* AllocateResizableBuffer(size_t size, size_t alignment) override;
+
+  // Resizes a buffer that is previously returned by the
+  // AllocateResizableBuffer.
+  TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
+                            size_t alignment) override;
+
+  // Frees up the memory occupied by the resizable buffer.
+  TfLiteStatus DeallocateResizableBuffer(uint8_t* resizable_buf) override;
+
+  // Returns a pointer pointing to the start of the overlay memory, which is
+  // used for activation tensors and scratch buffers by kernels at Invoke stage.
+  uint8_t* GetOverlayMemoryAddress() const override;
+
+  // Reserves the size of the overlay memory. This overlay is reserved for the
+  // kernels at Invoke stage. This is referred to as the overlay because before
+  // Invoket state, the same memory can be used for temp buffers. The layout of
+  // the memory is planned by the memory planner separately at Invoke stage.
+  TfLiteStatus ReserveNonPersistentOverlayMemory(size_t size,
+                                                 size_t alignment) override;
+
+  // Returns the size of non-persistent buffer in use.
+  size_t GetNonPersistentUsedBytes() const override;
+
+  // Returns the number of bytes available with a given alignment. This number
+  // takes in account any temporary allocations.
+  size_t GetAvailableMemory(size_t alignment) const override;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+
+ private:
+  // The memory arena that this allocator manages.
+  uint8_t* const buffer_head_;
+  uint8_t* const buffer_tail_;
+
+  // The whole region is split into two parts:
+  // buffer_head_ to head_temp_ - 1 belongs to the only resizable buffer.
+  // head_temp_ to buffer_tail_ can be used for (non-resizable) temp buffers.
+  uint8_t* head_temp_;
+
+  // next_temp_ points to the next available temp buffer allocation address and
+  // its range is between head_temp_ and buffer_tail_
+  uint8_t* next_temp_;
+
+  // XOR Check sum for outstanding temp buffers.
+  // If all temp buffers are deallocated OR no temp buffers are allocated,
+  // temp_buffer_ptr_check_sum_ == nullptr.
+  intptr_t temp_buffer_ptr_check_sum_ = 0;
+  // Count of outstanding temp buffers.
+  int temp_buffer_count_ = 0;
+  bool resizable_buffer_allocated_ = false;
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_NON_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.cc b/edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.cc
new file mode 100644
index 0000000..a60b626
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.cc
@@ -0,0 +1,32 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.h"
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_error_reporter.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_utils.h"
+
+namespace tflite {
+
+TfLiteStatus GetRegistrationFromOpCode(
+    const OperatorCode* opcode, const OpResolver& op_resolver,
+    const TfLiteRegistration** registration) {
+  return GetRegistrationFromOpCode(
+      opcode, op_resolver, tflite::GetMicroErrorReporter(), registration);
+}
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.h b/edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.h
new file mode 100644
index 0000000..bf6a2db
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/op_resolver_bridge.h
@@ -0,0 +1,38 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_OP_RESOLVER_BRIDGE_H_
+#define TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_OP_RESOLVER_BRIDGE_H_
+
+#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/api/op_resolver.h"  // needed for the Using declarative
+
+namespace tflite {
+
+// Forward declaration of the classes and structs used here.
+struct OperatorCode;
+
+using TfLiteBridgeOpResolver = OpResolver;
+
+// Handles the logic for converting between an OperatorCode structure extracted
+// from a flatbuffer and information about a registered operator
+// implementation.
+TfLiteStatus GetRegistrationFromOpCode(const OperatorCode* opcode,
+                                       const OpResolver& op_resolver,
+                                       const TfLiteRegistration** registration);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_TFLITE_BRIDGE_OP_RESOLVER_BRIDGE_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.cc b/edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.cc
new file mode 100644
index 0000000..9237691
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.cc
@@ -0,0 +1,52 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.h"
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+
+PersistentArenaBufferAllocator::PersistentArenaBufferAllocator(
+    uint8_t* buffer, size_t buffer_size)
+    : buffer_head_(buffer),
+      buffer_tail_(buffer + buffer_size),
+      tail_temp_(buffer_tail_) {}
+
+PersistentArenaBufferAllocator::~PersistentArenaBufferAllocator() {}
+
+uint8_t* PersistentArenaBufferAllocator::AllocatePersistentBuffer(
+    size_t size, size_t alignment) {
+  uint8_t* const aligned_result =
+      AlignPointerDown(tail_temp_ - size, alignment);
+  if (aligned_result < buffer_head_) {
+#ifndef TF_LITE_STRIP_ERROR_STRINGS
+    const size_t missing_memory = buffer_head_ - aligned_result;
+    MicroPrintf(
+        "Failed to allocate tail memory. Requested: %u, "
+        "available %u, missing: %u",
+        size, size - missing_memory, missing_memory);
+#endif
+    return nullptr;
+  }
+  tail_temp_ = aligned_result;
+  return aligned_result;
+}
+
+size_t PersistentArenaBufferAllocator::GetPersistentUsedBytes() const {
+  return buffer_tail_ - tail_temp_;
+}
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.h b/edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.h
new file mode 100644
index 0000000..911c486
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/persistent_arena_buffer_allocator.h
@@ -0,0 +1,58 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
+#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
+
+#include <cstddef>
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/ibuffer_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
+
+namespace tflite {
+
+// PersistentArenaBufferAllocator is an implementatation of
+// IPersistentBufferAllocator interface on an arena that is dedicated for
+// persistent buffers.
+class PersistentArenaBufferAllocator : public IPersistentBufferAllocator {
+ public:
+  PersistentArenaBufferAllocator(uint8_t* buffer, size_t buffer_size);
+  virtual ~PersistentArenaBufferAllocator();
+
+  // Allocates persistent memory. The persistent buffer is never freed.
+  // Returns nullptr if errors occured.
+  uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) override;
+
+  // Returns the size of all persistent allocations in bytes.
+  size_t GetPersistentUsedBytes() const override;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+ private:
+  // The memory arena that this allocator manages.
+  uint8_t* const buffer_head_;
+  uint8_t* const buffer_tail_;
+
+  // The whole region is split into two parts:
+  // tail_temp_ to buffer_tail_ contains allocated buffers;
+  // buffer_head_ to tail_temp_ - 1 belongs to still available spaces.
+  // So in essence, the allocated region grows from the bottom and emulates
+  // SingleArenaBufferAllocator's persistent part.
+  uint8_t* tail_temp_;
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_PERSISTENT_ARENA_BUFFER_ALLOCATOR_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.cc b/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.cc
index b108d13..11e4d1b 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.cc
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -15,33 +15,49 @@ limitations under the License.
 
 #include "edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.h"
 
-#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/recording_simple_memory_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
 
 namespace tflite {
 
+size_t RecordingMicroAllocator::GetDefaultTailUsage() {
+  // RecordingMicroAllocator inherits from MicroAllocator and its tail usage is
+  // similar with MicroAllocator with SingleArenaBufferAllocator and
+  // MicroAllocator being replaced.
+  return MicroAllocator::GetDefaultTailUsage(
+             /*is_memory_planner_given=*/false) +
+         AlignSizeUp<RecordingSingleArenaBufferAllocator>() -
+         AlignSizeUp<SingleArenaBufferAllocator>() +
+         AlignSizeUp<RecordingMicroAllocator>() - AlignSizeUp<MicroAllocator>();
+}
+
 RecordingMicroAllocator::RecordingMicroAllocator(
-    RecordingSimpleMemoryAllocator* recording_memory_allocator,
-    ErrorReporter* error_reporter)
-    : MicroAllocator(recording_memory_allocator, error_reporter),
+    RecordingSingleArenaBufferAllocator* recording_memory_allocator,
+    MicroMemoryPlanner* memory_planner)
+    : MicroAllocator(recording_memory_allocator, memory_planner),
       recording_memory_allocator_(recording_memory_allocator) {}
 
-RecordingMicroAllocator* RecordingMicroAllocator::Create(
-    uint8_t* tensor_arena, size_t arena_size, ErrorReporter* error_reporter) {
-  TFLITE_DCHECK(error_reporter != nullptr);
-
-  RecordingSimpleMemoryAllocator* simple_memory_allocator =
-      RecordingSimpleMemoryAllocator::Create(error_reporter, tensor_arena,
-                                             arena_size);
+RecordingMicroAllocator* RecordingMicroAllocator::Create(uint8_t* tensor_arena,
+                                                         size_t arena_size) {
+  RecordingSingleArenaBufferAllocator* simple_memory_allocator =
+      RecordingSingleArenaBufferAllocator::Create(tensor_arena, arena_size);
   TFLITE_DCHECK(simple_memory_allocator != nullptr);
 
-  uint8_t* allocator_buffer = simple_memory_allocator->AllocateFromTail(
+  uint8_t* memory_planner_buffer =
+      simple_memory_allocator->AllocatePersistentBuffer(
+          sizeof(GreedyMemoryPlanner), alignof(GreedyMemoryPlanner));
+  GreedyMemoryPlanner* memory_planner =
+      new (memory_planner_buffer) GreedyMemoryPlanner();
+
+  uint8_t* allocator_buffer = simple_memory_allocator->AllocatePersistentBuffer(
       sizeof(RecordingMicroAllocator), alignof(RecordingMicroAllocator));
   RecordingMicroAllocator* allocator = new (allocator_buffer)
-      RecordingMicroAllocator(simple_memory_allocator, error_reporter);
+      RecordingMicroAllocator(simple_memory_allocator, memory_planner);
   return allocator;
 }
 
@@ -63,29 +79,22 @@ RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation(
     case RecordedAllocationType::kOpData:
       return recorded_op_data_;
   }
-  TF_LITE_REPORT_ERROR(error_reporter(), "Invalid allocation type supplied: %d",
-                       allocation_type);
+  MicroPrintf("Invalid allocation type supplied: %d", allocation_type);
   return RecordedAllocation();
 }
 
-const RecordingSimpleMemoryAllocator*
+const RecordingSingleArenaBufferAllocator*
 RecordingMicroAllocator::GetSimpleMemoryAllocator() const {
   return recording_memory_allocator_;
 }
 
 void RecordingMicroAllocator::PrintAllocations() const {
-  TF_LITE_REPORT_ERROR(
-      error_reporter(),
-      "[RecordingMicroAllocator] Arena allocation total %d bytes",
-      recording_memory_allocator_->GetUsedBytes());
-  TF_LITE_REPORT_ERROR(
-      error_reporter(),
-      "[RecordingMicroAllocator] Arena allocation head %d bytes",
-      recording_memory_allocator_->GetHeadUsedBytes());
-  TF_LITE_REPORT_ERROR(
-      error_reporter(),
-      "[RecordingMicroAllocator] Arena allocation tail %d bytes",
-      recording_memory_allocator_->GetTailUsedBytes());
+  MicroPrintf("[RecordingMicroAllocator] Arena allocation total %d bytes",
+              recording_memory_allocator_->GetUsedBytes());
+  MicroPrintf("[RecordingMicroAllocator] Arena allocation head %d bytes",
+              recording_memory_allocator_->GetNonPersistentUsedBytes());
+  MicroPrintf("[RecordingMicroAllocator] Arena allocation tail %d bytes",
+              recording_memory_allocator_->GetPersistentUsedBytes());
   PrintRecordedAllocation(RecordedAllocationType::kTfLiteEvalTensorData,
                           "TfLiteEvalTensor data", "allocations");
   PrintRecordedAllocation(RecordedAllocationType::kPersistentTfLiteTensorData,
@@ -119,8 +128,7 @@ void RecordingMicroAllocator::PrintRecordedAllocation(
 #ifndef TF_LITE_STRIP_ERROR_STRINGS
   RecordedAllocation allocation = GetRecordedAllocation(allocation_type);
   if (allocation.used_bytes > 0 || allocation.requested_bytes > 0) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter(),
+    MicroPrintf(
         "[RecordingMicroAllocator] '%s' used %d bytes with alignment overhead "
         "(requested %d bytes for %d %s)",
         allocation_name, allocation.used_bytes, allocation.requested_bytes,
@@ -130,91 +138,90 @@ void RecordingMicroAllocator::PrintRecordedAllocation(
 }
 
 TfLiteStatus RecordingMicroAllocator::AllocateNodeAndRegistrations(
-    const Model* model, NodeAndRegistration** node_and_registrations) {
+    const Model* model, SubgraphAllocations* subgraph_allocations) {
   RecordedAllocation allocations = SnapshotAllocationUsage();
 
-  TfLiteStatus status = MicroAllocator::AllocateNodeAndRegistrations(
-      model, node_and_registrations);
+  TfLiteStatus status =
+      MicroAllocator::AllocateNodeAndRegistrations(model, subgraph_allocations);
 
   RecordAllocationUsage(allocations,
                         recorded_node_and_registration_array_data_);
-  // The allocation count in SimpleMemoryAllocator will only be 1. To provide
-  // better logging, decrement by 1 and add in the actual number of operators
-  // used in the graph:
-  // The allocation for this recording will always be 1. This is because the
-  // parent class mallocs one large allocation for the number of nodes in the
-  // graph (e.g. sizeof(NodeAndRegistration) * num_nodes).
-  // To prevent extra overhead and potential for fragmentation, manually adjust
-  // the accounting by decrementing by 1 and adding the actual number of nodes
-  // used in the graph:
-  recorded_node_and_registration_array_data_.count +=
-      GetSubGraphFromModel(model)->operators()->size() - 1;
-  return status;
-}
 
-TfLiteStatus
-RecordingMicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
-    const Model* model, const MicroOpResolver& op_resolver,
-    NodeAndRegistration* node_and_registrations) {
-  RecordedAllocation allocations = SnapshotAllocationUsage();
-
-  TfLiteStatus status =
-      MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
-          model, op_resolver, node_and_registrations);
-
-  RecordAllocationUsage(allocations, recorded_op_data_);
+  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
+       subgraph_idx++) {
+    // The allocation count in SingleArenaBufferAllocator will only be 1. To
+    // provide better logging, decrement by 1 and add in the actual number of
+    // operators used in the graph: The allocation for this recording will
+    // always be 1. This is because the parent class mallocs one large
+    // allocation for the number of nodes in the graph (e.g.
+    // sizeof(NodeAndRegistration) * num_nodes). To prevent extra overhead and
+    // potential for fragmentation, manually adjust the accounting by
+    // decrementing by 1 and adding the actual number of nodes used in the
+    // graph:
+    if (model->subgraphs()->Get(subgraph_idx)->operators()) {
+      recorded_node_and_registration_array_data_.count +=
+          model->subgraphs()->Get(subgraph_idx)->operators()->size() - 1;
+    } else {
+      recorded_node_and_registration_array_data_.count -= 1;
+    }
+  }
   return status;
 }
 
 TfLiteStatus RecordingMicroAllocator::AllocateTfLiteEvalTensors(
-    const Model* model, TfLiteEvalTensor** eval_tensors) {
+    const Model* model, SubgraphAllocations* subgraph_allocations) {
   RecordedAllocation allocations = SnapshotAllocationUsage();
 
   TfLiteStatus status =
-      MicroAllocator::AllocateTfLiteEvalTensors(model, eval_tensors);
+      MicroAllocator::AllocateTfLiteEvalTensors(model, subgraph_allocations);
 
   RecordAllocationUsage(allocations, recorded_tflite_eval_tensor_data_);
-  // The allocation for this recording will always be 1. This is because the
-  // parent class mallocs one large allocation for the number of tensors in the
-  // graph (e.g. sizeof(TfLiteEvalTensor) * num_tensors).
-  // To prevent extra overhead and potential for fragmentation, manually adjust
-  // the accounting by decrementing by 1 and adding the actual number of tensors
-  // used in the graph:
-  recorded_tflite_eval_tensor_data_.count +=
-      GetSubGraphFromModel(model)->tensors()->size() - 1;
+
+  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs()->size();
+       subgraph_idx++) {
+    // The allocation for this recording will always be 1. This is because the
+    // parent class mallocs one large allocation for the number of tensors in
+    // the graph (e.g. sizeof(TfLiteEvalTensor) * num_tensors). To prevent extra
+    // overhead and potential for fragmentation, manually adjust the accounting
+    // by decrementing by 1 and adding the actual number of tensors used in the
+    // graph:
+    recorded_tflite_eval_tensor_data_.count +=
+        model->subgraphs()->Get(subgraph_idx)->tensors()->size() - 1;
+  }
   return status;
 }
 
 TfLiteStatus RecordingMicroAllocator::AllocateVariables(
-    const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors) {
+    const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors,
+    const int32_t* offline_planner_offsets) {
   RecordedAllocation allocations = SnapshotAllocationUsage();
 
-  TfLiteStatus status =
-      MicroAllocator::AllocateVariables(subgraph, eval_tensors);
+  TfLiteStatus status = MicroAllocator::AllocateVariables(
+      subgraph, eval_tensors, offline_planner_offsets);
 
   RecordAllocationUsage(allocations,
                         recorded_tflite_tensor_variable_buffer_data_);
   return status;
 }
 
-TfLiteTensor* RecordingMicroAllocator::AllocatePersistentTfLiteTensorInternal(
-    const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
+TfLiteTensor*
+RecordingMicroAllocator::AllocatePersistentTfLiteTensorInternal() {
   RecordedAllocation allocations = SnapshotAllocationUsage();
 
-  TfLiteTensor* result = MicroAllocator::AllocatePersistentTfLiteTensorInternal(
-      model, eval_tensors, tensor_index);
+  TfLiteTensor* result =
+      MicroAllocator::AllocatePersistentTfLiteTensorInternal();
 
   RecordAllocationUsage(allocations, recorded_persistent_tflite_tensor_data_);
   return result;
 }
 
 TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
-    const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
-    int tensor_index, bool allocate_temp) {
+    const Model* model, TfLiteTensor* tensor, int tensor_index,
+    int subgraph_index, bool allocate_temp) {
   RecordedAllocation allocations = SnapshotAllocationUsage();
 
   TfLiteStatus status = MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
-      model, subgraph, tensor, tensor_index, allocate_temp);
+      model, tensor, tensor_index, subgraph_index, allocate_temp);
 
   RecordAllocationUsage(allocations,
                         recorded_persistent_tflite_tensor_quantization_data_);
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.h b/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.h
index c8470c1..9d694af 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,9 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
 #define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_ALLOCATOR_H_
 
+#include "edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_allocator.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/recording_simple_memory_allocator.h"
 
 namespace tflite {
 
@@ -48,21 +48,23 @@ struct RecordedAllocation {
 // Utility subclass of MicroAllocator that records all allocations
 // inside the arena. A summary of allocations can be logged through the
 // ErrorReporter by invoking LogAllocations(). This special allocator requires
-// an instance of RecordingSimpleMemoryAllocator to capture allocations in the
-// head and tail. Arena allocation recording can be retrieved by type through
-// the GetRecordedAllocation() function. This class should only be used for
-// auditing memory usage or integration testing.
+// an instance of RecordingSingleArenaBufferAllocator to capture allocations in
+// the head and tail. Arena allocation recording can be retrieved by type
+// through the GetRecordedAllocation() function. This class should only be used
+// for auditing memory usage or integration testing.
 class RecordingMicroAllocator : public MicroAllocator {
  public:
   static RecordingMicroAllocator* Create(uint8_t* tensor_arena,
-                                         size_t arena_size,
-                                         ErrorReporter* error_reporter);
+                                         size_t arena_size);
+
+  // Returns the fixed amount of memory overhead of RecordingMicroAllocator.
+  static size_t GetDefaultTailUsage();
 
   // Returns the recorded allocations information for a given allocation type.
   RecordedAllocation GetRecordedAllocation(
       RecordedAllocationType allocation_type) const;
 
-  const RecordingSimpleMemoryAllocator* GetSimpleMemoryAllocator() const;
+  const RecordingSingleArenaBufferAllocator* GetSimpleMemoryAllocator() const;
 
   // Logs out through the ErrorReporter all allocation recordings by type
   // defined in RecordedAllocationType.
@@ -72,32 +74,28 @@ class RecordingMicroAllocator : public MicroAllocator {
 
  protected:
   TfLiteStatus AllocateNodeAndRegistrations(
-      const Model* model,
-      NodeAndRegistration** node_and_registrations) override;
-  TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
-      const Model* model, const MicroOpResolver& op_resolver,
-      NodeAndRegistration* node_and_registrations) override;
+      const Model* model, SubgraphAllocations* subgraph_allocations) override;
   TfLiteStatus AllocateTfLiteEvalTensors(
-      const Model* model, TfLiteEvalTensor** eval_tensors) override;
-  TfLiteStatus AllocateVariables(const SubGraph* subgraph,
-                                 TfLiteEvalTensor* eval_tensors) override;
+      const Model* model, SubgraphAllocations* subgraph_allocations) override;
+  TfLiteStatus AllocateVariables(
+      const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors,
+      const int32_t* offline_planner_offsets) override;
   // TODO(b/162311891): Once all kernels have been updated to the new API drop
   // this method. It is only used to record TfLiteTensor persistent allocations.
-  TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
-      const Model* model, TfLiteEvalTensor* eval_tensors,
-      int tensor_index) override;
+  TfLiteTensor* AllocatePersistentTfLiteTensorInternal() override;
+
   // TODO(b/162311891): Once all kernels have been updated to the new API drop
   // this function since all allocations for quantized data will take place in
   // the temp section.
   TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model,
-                                                  const SubGraph* subgraph,
                                                   TfLiteTensor* tensor,
                                                   int tensor_index,
+                                                  int subgraph_index,
                                                   bool allocate_temp) override;
 
  private:
-  RecordingMicroAllocator(RecordingSimpleMemoryAllocator* memory_allocator,
-                          ErrorReporter* error_reporter);
+  RecordingMicroAllocator(RecordingSingleArenaBufferAllocator* memory_allocator,
+                          MicroMemoryPlanner* memory_planner);
 
   void PrintRecordedAllocation(RecordedAllocationType allocation_type,
                                const char* allocation_name,
@@ -107,7 +105,7 @@ class RecordingMicroAllocator : public MicroAllocator {
   void RecordAllocationUsage(const RecordedAllocation& snapshotted_allocation,
                              RecordedAllocation& recorded_allocation);
 
-  const RecordingSimpleMemoryAllocator* recording_memory_allocator_;
+  const RecordingSingleArenaBufferAllocator* recording_memory_allocator_;
 
   RecordedAllocation recorded_tflite_eval_tensor_data_ = {};
   RecordedAllocation recorded_persistent_tflite_tensor_data_ = {};
@@ -115,6 +113,8 @@ class RecordingMicroAllocator : public MicroAllocator {
   RecordedAllocation recorded_persistent_buffer_data_ = {};
   RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {};
   RecordedAllocation recorded_node_and_registration_array_data_ = {};
+
+  // TODO(b/187993291): Re-enable OpData allocating tracking.
   RecordedAllocation recorded_op_data_ = {};
 
   TF_LITE_REMOVE_VIRTUAL_DELETE
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_interpreter.h b/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_interpreter.h
index 90f27a4..ce44fbd 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_interpreter.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/recording_micro_interpreter.h
@@ -1,4 +1,4 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_LITE_MICRO_RECORDING_MICRO_INTERPRETER_H_
 
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_interpreter.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_profiler_interface.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/recording_micro_allocator.h"
 
 namespace tflite {
@@ -37,19 +38,22 @@ class RecordingMicroInterpreter : public MicroInterpreter {
   RecordingMicroInterpreter(const Model* model,
                             const MicroOpResolver& op_resolver,
                             uint8_t* tensor_arena, size_t tensor_arena_size,
-                            ErrorReporter* error_reporter)
-      : MicroInterpreter(model, op_resolver,
-                         RecordingMicroAllocator::Create(
-                             tensor_arena, tensor_arena_size, error_reporter),
-                         error_reporter),
+                            MicroResourceVariables* resource_variable = nullptr,
+                            MicroProfilerInterface* profiler = nullptr)
+      : MicroInterpreter(
+            model, op_resolver,
+            RecordingMicroAllocator::Create(tensor_arena, tensor_arena_size),
+            resource_variable, profiler),
         recording_micro_allocator_(
             static_cast<const RecordingMicroAllocator&>(allocator())) {}
 
   RecordingMicroInterpreter(const Model* model,
                             const MicroOpResolver& op_resolver,
                             RecordingMicroAllocator* allocator,
-                            ErrorReporter* error_reporter)
-      : MicroInterpreter(model, op_resolver, allocator, error_reporter),
+                            MicroResourceVariables* resource_variable = nullptr,
+                            MicroProfilerInterface* profiler = nullptr)
+      : MicroInterpreter(model, op_resolver, allocator, resource_variable,
+                         profiler),
         recording_micro_allocator_(*allocator) {}
 
   const RecordingMicroAllocator& GetMicroAllocator() const {
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/recording_simple_memory_allocator.cc b/edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.cc
similarity index 51%
rename from edge-impulse-sdk/tensorflow/lite/micro/recording_simple_memory_allocator.cc
rename to edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.cc
index d8e9910..746561c 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/recording_simple_memory_allocator.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "edge-impulse-sdk/tensorflow/lite/micro/recording_simple_memory_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.h"
 
 #include <new>
 
@@ -21,47 +21,47 @@ limitations under the License.
 
 namespace tflite {
 
-RecordingSimpleMemoryAllocator::RecordingSimpleMemoryAllocator(
-    ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size)
-    : SimpleMemoryAllocator(error_reporter, buffer_head, buffer_size),
+RecordingSingleArenaBufferAllocator::RecordingSingleArenaBufferAllocator(
+    uint8_t* buffer_head, size_t buffer_size)
+    : SingleArenaBufferAllocator(buffer_head, buffer_size),
       requested_head_bytes_(0),
       requested_tail_bytes_(0),
       used_bytes_(0),
       alloc_count_(0) {}
 
-RecordingSimpleMemoryAllocator::~RecordingSimpleMemoryAllocator() {}
+RecordingSingleArenaBufferAllocator::~RecordingSingleArenaBufferAllocator() {}
 
-RecordingSimpleMemoryAllocator* RecordingSimpleMemoryAllocator::Create(
-    ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) {
-  TFLITE_DCHECK(error_reporter != nullptr);
+RecordingSingleArenaBufferAllocator*
+RecordingSingleArenaBufferAllocator::Create(uint8_t* buffer_head,
+                                            size_t buffer_size) {
   TFLITE_DCHECK(buffer_head != nullptr);
-  RecordingSimpleMemoryAllocator tmp =
-      RecordingSimpleMemoryAllocator(error_reporter, buffer_head, buffer_size);
+  RecordingSingleArenaBufferAllocator tmp =
+      RecordingSingleArenaBufferAllocator(buffer_head, buffer_size);
 
-  uint8_t* allocator_buffer =
-      tmp.AllocateFromTail(sizeof(RecordingSimpleMemoryAllocator),
-                           alignof(RecordingSimpleMemoryAllocator));
+  uint8_t* allocator_buffer = tmp.AllocatePersistentBuffer(
+      sizeof(RecordingSingleArenaBufferAllocator),
+      alignof(RecordingSingleArenaBufferAllocator));
   // Use the default copy constructor to populate internal states.
-  return new (allocator_buffer) RecordingSimpleMemoryAllocator(tmp);
+  return new (allocator_buffer) RecordingSingleArenaBufferAllocator(tmp);
 }
 
-size_t RecordingSimpleMemoryAllocator::GetRequestedBytes() const {
+size_t RecordingSingleArenaBufferAllocator::GetRequestedBytes() const {
   return requested_head_bytes_ + requested_tail_bytes_;
 }
 
-size_t RecordingSimpleMemoryAllocator::GetUsedBytes() const {
+size_t RecordingSingleArenaBufferAllocator::GetUsedBytes() const {
   return used_bytes_;
 }
 
-size_t RecordingSimpleMemoryAllocator::GetAllocatedCount() const {
+size_t RecordingSingleArenaBufferAllocator::GetAllocatedCount() const {
   return alloc_count_;
 }
 
-TfLiteStatus RecordingSimpleMemoryAllocator::SetHeadBufferSize(
-    size_t size, size_t alignment) {
+TfLiteStatus RecordingSingleArenaBufferAllocator::ResizeBuffer(
+    uint8_t* resizable_buf, size_t size, size_t alignment) {
   const uint8_t* previous_head = head();
   TfLiteStatus status =
-      SimpleMemoryAllocator::SetHeadBufferSize(size, alignment);
+      SingleArenaBufferAllocator::ResizeBuffer(resizable_buf, size, alignment);
   if (status == kTfLiteOk) {
     used_bytes_ += head() - previous_head;
     requested_head_bytes_ = size;
@@ -69,10 +69,11 @@ TfLiteStatus RecordingSimpleMemoryAllocator::SetHeadBufferSize(
   return status;
 }
 
-uint8_t* RecordingSimpleMemoryAllocator::AllocateFromTail(size_t size,
-                                                          size_t alignment) {
+uint8_t* RecordingSingleArenaBufferAllocator::AllocatePersistentBuffer(
+    size_t size, size_t alignment) {
   const uint8_t* previous_tail = tail();
-  uint8_t* result = SimpleMemoryAllocator::AllocateFromTail(size, alignment);
+  uint8_t* result =
+      SingleArenaBufferAllocator::AllocatePersistentBuffer(size, alignment);
   if (result != nullptr) {
     used_bytes_ += previous_tail - tail();
     requested_tail_bytes_ += size;
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/recording_simple_memory_allocator.h b/edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.h
similarity index 57%
rename from edge-impulse-sdk/tensorflow/lite/micro/recording_simple_memory_allocator.h
rename to edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.h
index e8ea581..cb58a8b 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/recording_simple_memory_allocator.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/recording_single_arena_buffer_allocator.h
@@ -13,28 +13,26 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
-#define TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
+#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
+#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
 
+#include "edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/simple_memory_allocator.h"
 
 namespace tflite {
 
-// Utility class used to log allocations of a SimpleMemoryAllocator. Should only
-// be used in debug/evaluation settings or unit tests to evaluate allocation
-// usage.
-class RecordingSimpleMemoryAllocator : public SimpleMemoryAllocator {
+// Utility class used to log allocations of a SingleArenaBufferAllocator. Should
+// only be used in debug/evaluation settings or unit tests to evaluate
+// allocation usage.
+class RecordingSingleArenaBufferAllocator : public SingleArenaBufferAllocator {
  public:
-  RecordingSimpleMemoryAllocator(ErrorReporter* error_reporter,
-                                 uint8_t* buffer_head, size_t buffer_size);
+  RecordingSingleArenaBufferAllocator(uint8_t* buffer_head, size_t buffer_size);
   // TODO(b/157615197): Cleanup constructors/destructor and use factory
   // functions.
-  ~RecordingSimpleMemoryAllocator() override;
+  ~RecordingSingleArenaBufferAllocator() override;
 
-  static RecordingSimpleMemoryAllocator* Create(ErrorReporter* error_reporter,
-                                                uint8_t* buffer_head,
-                                                size_t buffer_size);
+  static RecordingSingleArenaBufferAllocator* Create(uint8_t* buffer_head,
+                                                     size_t buffer_size);
 
   // Returns the number of bytes requested from the head or tail.
   size_t GetRequestedBytes() const;
@@ -47,8 +45,9 @@ class RecordingSimpleMemoryAllocator : public SimpleMemoryAllocator {
   // Returns the number of alloc calls from the head or tail.
   size_t GetAllocatedCount() const;
 
-  TfLiteStatus SetHeadBufferSize(size_t size, size_t alignment) override;
-  uint8_t* AllocateFromTail(size_t size, size_t alignment) override;
+  TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
+                            size_t alignment) override;
+  uint8_t* AllocatePersistentBuffer(size_t size, size_t alignment) override;
 
  private:
   size_t requested_head_bytes_;
@@ -61,4 +60,4 @@ class RecordingSimpleMemoryAllocator : public SimpleMemoryAllocator {
 
 }  // namespace tflite
 
-#endif  // TENSORFLOW_LITE_MICRO_RECORDING_SIMPLE_MEMORY_ALLOCATOR_H_
+#endif  // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_RECORDING_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/simple_memory_allocator.cc b/edge-impulse-sdk/tensorflow/lite/micro/simple_memory_allocator.cc
deleted file mode 100644
index 97ef4f5..0000000
--- a/edge-impulse-sdk/tensorflow/lite/micro/simple_memory_allocator.cc
+++ /dev/null
@@ -1,149 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "edge-impulse-sdk/tensorflow/lite/micro/simple_memory_allocator.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <new>
-
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
-
-namespace tflite {
-
-SimpleMemoryAllocator::SimpleMemoryAllocator(ErrorReporter* error_reporter,
-                                             uint8_t* buffer_head,
-                                             uint8_t* buffer_tail)
-    : error_reporter_(error_reporter),
-      buffer_head_(buffer_head),
-      buffer_tail_(buffer_tail),
-      head_(buffer_head),
-      tail_(buffer_tail),
-      temp_(buffer_head_) {}
-
-SimpleMemoryAllocator::SimpleMemoryAllocator(ErrorReporter* error_reporter,
-                                             uint8_t* buffer,
-                                             size_t buffer_size)
-    : SimpleMemoryAllocator(error_reporter, buffer, buffer + buffer_size) {}
-
-/* static */
-SimpleMemoryAllocator* SimpleMemoryAllocator::Create(
-    ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) {
-  TFLITE_DCHECK(error_reporter != nullptr);
-  TFLITE_DCHECK(buffer_head != nullptr);
-  SimpleMemoryAllocator tmp =
-      SimpleMemoryAllocator(error_reporter, buffer_head, buffer_size);
-
-  // Allocate enough bytes from the buffer to create a SimpleMemoryAllocator.
-  // The new instance will use the current adjusted tail buffer from the tmp
-  // allocator instance.
-  uint8_t* allocator_buffer = tmp.AllocateFromTail(
-      sizeof(SimpleMemoryAllocator), alignof(SimpleMemoryAllocator));
-  // Use the default copy constructor to populate internal states.
-  return new (allocator_buffer) SimpleMemoryAllocator(tmp);
-}
-
-SimpleMemoryAllocator::~SimpleMemoryAllocator() {}
-
-TfLiteStatus SimpleMemoryAllocator::SetHeadBufferSize(size_t size,
-                                                      size_t alignment) {
-  if (head_ != temp_) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter_,
-        "Internal error: SetHeadBufferSize() needs to be called "
-        "after ResetTempAllocations().");
-    return kTfLiteError;
-  }
-
-  uint8_t* const aligned_result = AlignPointerUp(buffer_head_, alignment);
-  const size_t available_memory = tail_ - aligned_result;
-  if (available_memory < size) {
-    TF_LITE_REPORT_ERROR(
-        error_reporter_,
-        "Failed to set head size. Requested: %u, available %u, missing: %u",
-        size, available_memory, size - available_memory);
-    return kTfLiteError;
-  }
-  head_ = aligned_result + size;
-  temp_ = head_;
-
-  return kTfLiteOk;
-}
-
-uint8_t* SimpleMemoryAllocator::AllocateFromTail(size_t size,
-                                                 size_t alignment) {
-  uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment);
-  if (aligned_result < head_) {
-#ifndef TF_LITE_STRIP_ERROR_STRINGS
-    const size_t missing_memory = head_ - aligned_result;
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Failed to allocate tail memory. Requested: %u, "
-                         "available %u, missing: %u",
-                         size, size - missing_memory, missing_memory);
-#endif
-    return nullptr;
-  }
-  tail_ = aligned_result;
-  return aligned_result;
-}
-
-uint8_t* SimpleMemoryAllocator::AllocateTemp(size_t size, size_t alignment) {
-  uint8_t* const aligned_result = AlignPointerUp(temp_, alignment);
-  const size_t available_memory = tail_ - aligned_result;
-  if (available_memory < size) {
-    TF_LITE_REPORT_ERROR(error_reporter_,
-                         "Failed to allocate temp memory. Requested: %u, "
-                         "available %u, missing: %u",
-                         size, available_memory, size - available_memory);
-    return nullptr;
-  }
-  temp_ = aligned_result + size;
-  return aligned_result;
-}
-
-void SimpleMemoryAllocator::ResetTempAllocations() { temp_ = head_; }
-
-uint8_t* SimpleMemoryAllocator::GetHeadBuffer() const { return buffer_head_; }
-
-size_t SimpleMemoryAllocator::GetHeadUsedBytes() const {
-  return head_ - buffer_head_;
-}
-
-size_t SimpleMemoryAllocator::GetTailUsedBytes() const {
-  return buffer_tail_ - tail_;
-}
-
-size_t SimpleMemoryAllocator::GetAvailableMemory(size_t alignment) const {
-  uint8_t* const aligned_temp = AlignPointerUp(temp_, alignment);
-  uint8_t* const aligned_tail = AlignPointerDown(tail_, alignment);
-  return aligned_tail - aligned_temp;
-}
-
-size_t SimpleMemoryAllocator::GetUsedBytes() const {
-  return GetBufferSize() - (tail_ - temp_);
-}
-
-size_t SimpleMemoryAllocator::GetBufferSize() const {
-  return buffer_tail_ - buffer_head_;
-}
-
-uint8_t* SimpleMemoryAllocator::head() const { return head_; }
-
-uint8_t* SimpleMemoryAllocator::tail() const { return tail_; }
-
-}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/simple_memory_allocator.h b/edge-impulse-sdk/tensorflow/lite/micro/simple_memory_allocator.h
deleted file mode 100644
index 3ee2f36..0000000
--- a/edge-impulse-sdk/tensorflow/lite/micro/simple_memory_allocator.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
-#define TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
-
-#include <cstddef>
-#include <cstdint>
-
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
-#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
-
-namespace tflite {
-
-// TODO(petewarden): This allocator never frees up or reuses  any memory, even
-// though we have enough information about lifetimes of the tensors to do so.
-// This makes it pretty wasteful, so we should use a more intelligent method.
-class SimpleMemoryAllocator {
- public:
-  // TODO(b/157615197): Cleanup constructors/destructor and use factory
-  // functions.
-  SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer_head,
-                        uint8_t* buffer_tail);
-  SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer,
-                        size_t buffer_size);
-  virtual ~SimpleMemoryAllocator();
-
-  // Creates a new SimpleMemoryAllocator from a given buffer head and size.
-  static SimpleMemoryAllocator* Create(ErrorReporter* error_reporter,
-                                       uint8_t* buffer_head,
-                                       size_t buffer_size);
-
-  // Adjusts the head (lowest address and moving upwards) memory allocation to a
-  // given size. Calls to this method will also invalidate all temporary
-  // allocation values (it sets the location of temp space at the end of the
-  // head section). This call will fail if a chain of allocations through
-  // AllocateTemp() have not been cleaned up with a call to
-  // ResetTempAllocations().
-  virtual TfLiteStatus SetHeadBufferSize(size_t size, size_t alignment);
-
-  // Allocates memory starting at the tail of the arena (highest address and
-  // moving downwards).
-  virtual uint8_t* AllocateFromTail(size_t size, size_t alignment);
-
-  // Allocates a temporary buffer from the head of the arena (lowest address and
-  // moving upwards) but does not update the actual head allocation size or
-  // position. The returned buffer is guaranteed until either
-  // ResetTempAllocations() is called or another call to AllocateFromHead().
-  // Repeat calls to this function will create a chain of temp allocations. All
-  // calls to AllocateTemp() must end with a call to ResetTempAllocations(). If
-  // AllocateFromHead() is called before a call to ResetTempAllocations(), it
-  // will fail with an error message.
-  virtual uint8_t* AllocateTemp(size_t size, size_t alignment);
-
-  // Resets a chain of temporary allocations back to the current head of the
-  // arena (lowest address).
-  virtual void ResetTempAllocations();
-
-  // Returns a pointer to the buffer currently assigned to the head section.
-  // This buffer is set by calling SetHeadSize().
-  uint8_t* GetHeadBuffer() const;
-
-  // Returns the size of the head section in bytes.
-  size_t GetHeadUsedBytes() const;
-
-  // Returns the size of all allocations in the tail section in bytes.
-  size_t GetTailUsedBytes() const;
-
-  // Returns the number of bytes available with a given alignment. This number
-  // takes in account any temporary allocations.
-  size_t GetAvailableMemory(size_t alignment) const;
-
-  // Returns the number of used bytes in the allocator. This number takes in
-  // account any temporary allocations.
-  size_t GetUsedBytes() const;
-
- protected:
-  // Returns a pointer to the current end of the head buffer.
-  uint8_t* head() const;
-
-  // Returns a pointer to the current end of the tail buffer.
-  uint8_t* tail() const;
-
- private:
-  size_t GetBufferSize() const;
-
-  ErrorReporter* error_reporter_;
-  uint8_t* buffer_head_;
-  uint8_t* buffer_tail_;
-  uint8_t* head_;
-  uint8_t* tail_;
-  uint8_t* temp_;
-
-  TF_LITE_REMOVE_VIRTUAL_DELETE
-};
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_MICRO_SIMPLE_MEMORY_ALLOCATOR_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.cc b/edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.cc
new file mode 100644
index 0000000..1015b53
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.cc
@@ -0,0 +1,199 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <new>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/c_api_types.h"
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/op_macros.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+
+SingleArenaBufferAllocator::SingleArenaBufferAllocator(uint8_t* buffer_head,
+                                                       uint8_t* buffer_tail)
+    : buffer_head_(buffer_head),
+      buffer_tail_(buffer_tail),
+      head_(buffer_head),
+      tail_(buffer_tail),
+      temp_(buffer_head_) {}
+
+SingleArenaBufferAllocator::SingleArenaBufferAllocator(uint8_t* buffer,
+                                                       size_t buffer_size)
+    : SingleArenaBufferAllocator(buffer, buffer + buffer_size) {}
+
+/* static */
+SingleArenaBufferAllocator* SingleArenaBufferAllocator::Create(
+    uint8_t* buffer_head, size_t buffer_size) {
+  TFLITE_DCHECK(buffer_head != nullptr);
+  SingleArenaBufferAllocator tmp =
+      SingleArenaBufferAllocator(buffer_head, buffer_size);
+
+  // Allocate enough bytes from the buffer to create a
+  // SingleArenaBufferAllocator. The new instance will use the current adjusted
+  // tail buffer from the tmp allocator instance.
+  uint8_t* allocator_buffer = tmp.AllocatePersistentBuffer(
+      sizeof(SingleArenaBufferAllocator), alignof(SingleArenaBufferAllocator));
+  // Use the default copy constructor to populate internal states.
+  return new (allocator_buffer) SingleArenaBufferAllocator(tmp);
+}
+
+SingleArenaBufferAllocator::~SingleArenaBufferAllocator() {}
+
+uint8_t* SingleArenaBufferAllocator::AllocateResizableBuffer(size_t size,
+                                                             size_t alignment) {
+  // Only supports one resizable buffer, which starts at the buffer head.
+  uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
+  if (ResizeBuffer(expect_resizable_buf, size, alignment) == kTfLiteOk) {
+    return expect_resizable_buf;
+  }
+  return nullptr;
+}
+
+TfLiteStatus SingleArenaBufferAllocator::DeallocateResizableBuffer(
+    uint8_t* resizable_buf) {
+  return ResizeBuffer(resizable_buf, 0, 1);
+}
+
+TfLiteStatus SingleArenaBufferAllocator::ReserveNonPersistentOverlayMemory(
+    size_t size, size_t alignment) {
+  uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
+  return ResizeBuffer(expect_resizable_buf, size, alignment);
+}
+
+TfLiteStatus SingleArenaBufferAllocator::ResizeBuffer(uint8_t* resizable_buf,
+                                                      size_t size,
+                                                      size_t alignment) {
+  // Only supports one resizable buffer, which starts at the buffer head.
+  uint8_t* expect_resizable_buf = AlignPointerUp(buffer_head_, alignment);
+  if (head_ != temp_ || resizable_buf != expect_resizable_buf) {
+    MicroPrintf(
+        "Internal error: either buffer is not resizable or "
+        "ResetTempAllocations() is not called before ResizeBuffer().");
+    return kTfLiteError;
+  }
+
+  uint8_t* const aligned_result = AlignPointerUp(buffer_head_, alignment);
+  const size_t available_memory = tail_ - aligned_result;
+  if (available_memory < size) {
+    MicroPrintf(
+        "Failed to resize buffer. Requested: %u, available %u, missing: %u",
+        size, available_memory, size - available_memory);
+    return kTfLiteError;
+  }
+  head_ = aligned_result + size;
+  temp_ = head_;
+
+  return kTfLiteOk;
+}
+
+uint8_t* SingleArenaBufferAllocator::AllocatePersistentBuffer(
+    size_t size, size_t alignment) {
+  uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment);
+  if (aligned_result < head_) {
+#ifndef TF_LITE_STRIP_ERROR_STRINGS
+    const size_t missing_memory = head_ - aligned_result;
+    MicroPrintf(
+        "Failed to allocate tail memory. Requested: %u, "
+        "available %u, missing: %u",
+        size, size - missing_memory, missing_memory);
+#endif
+    return nullptr;
+  }
+  tail_ = aligned_result;
+  return aligned_result;
+}
+
+uint8_t* SingleArenaBufferAllocator::AllocateTemp(size_t size,
+                                                  size_t alignment) {
+  uint8_t* const aligned_result = AlignPointerUp(temp_, alignment);
+  const size_t available_memory = tail_ - aligned_result;
+  if (available_memory < size) {
+    MicroPrintf(
+        "Failed to allocate temp memory. Requested: %u, "
+        "available %u, missing: %u",
+        size, available_memory, size - available_memory);
+    return nullptr;
+  }
+  temp_ = aligned_result + size;
+  temp_buffer_ptr_check_sum_ ^= (reinterpret_cast<intptr_t>(aligned_result));
+  temp_buffer_count_++;
+  return aligned_result;
+}
+
+void SingleArenaBufferAllocator::DeallocateTemp(uint8_t* temp_buf) {
+  temp_buffer_ptr_check_sum_ ^= (reinterpret_cast<intptr_t>(temp_buf));
+  temp_buffer_count_--;
+}
+
+bool SingleArenaBufferAllocator::IsAllTempDeallocated() {
+  if (temp_buffer_count_ != 0 || temp_buffer_ptr_check_sum_ != 0) {
+    MicroPrintf(
+        "Number of allocated temp buffers: %d. Checksum passing status: %d",
+        temp_buffer_count_, !temp_buffer_ptr_check_sum_);
+    return false;
+  }
+  return true;
+}
+
+TfLiteStatus SingleArenaBufferAllocator::ResetTempAllocations() {
+  // TODO(b/209453859): enable error check based on IsAllTempDeallocated after
+  // all AllocateTemp have been paird with DeallocateTemp
+  if (!IsAllTempDeallocated()) {
+    MicroPrintf(
+        "All temp buffers must be freed before calling ResetTempAllocations()");
+    return kTfLiteError;
+  }
+  temp_ = head_;
+  return kTfLiteOk;
+}
+
+uint8_t* SingleArenaBufferAllocator::GetOverlayMemoryAddress() const {
+  return buffer_head_;
+}
+
+size_t SingleArenaBufferAllocator::GetNonPersistentUsedBytes() const {
+  return std::max(head_ - buffer_head_, temp_ - buffer_head_);
+}
+
+size_t SingleArenaBufferAllocator::GetPersistentUsedBytes() const {
+  return buffer_tail_ - tail_;
+}
+
+size_t SingleArenaBufferAllocator::GetAvailableMemory(size_t alignment) const {
+  uint8_t* const aligned_temp = AlignPointerUp(temp_, alignment);
+  uint8_t* const aligned_tail = AlignPointerDown(tail_, alignment);
+  return aligned_tail - aligned_temp;
+}
+
+size_t SingleArenaBufferAllocator::GetUsedBytes() const {
+  return GetPersistentUsedBytes() + GetNonPersistentUsedBytes();
+}
+
+size_t SingleArenaBufferAllocator::GetBufferSize() const {
+  return buffer_tail_ - buffer_head_;
+}
+
+uint8_t* SingleArenaBufferAllocator::head() const { return head_; }
+
+uint8_t* SingleArenaBufferAllocator::tail() const { return tail_; }
+
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h b/edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h
new file mode 100644
index 0000000..730ee73
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/single_arena_buffer_allocator.h
@@ -0,0 +1,144 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
+#define TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
+
+#include <cstddef>
+#include <cstdint>
+
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/ibuffer_allocator.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/compatibility.h"
+
+namespace tflite {
+
+// TODO(petewarden): This allocator never frees up or reuses  any memory, even
+// though we have enough information about lifetimes of the tensors to do so.
+// This makes it pretty wasteful, so we should use a more intelligent method.
+class SingleArenaBufferAllocator : public INonPersistentBufferAllocator,
+                                   public IPersistentBufferAllocator {
+ public:
+  // TODO(b/157615197): Cleanup constructors/destructor and use factory
+  // functions.
+  SingleArenaBufferAllocator(uint8_t* buffer_head, uint8_t* buffer_tail);
+  SingleArenaBufferAllocator(uint8_t* buffer, size_t buffer_size);
+  virtual ~SingleArenaBufferAllocator();
+
+  // Creates a new SingleArenaBufferAllocator from a given buffer head and size.
+  static SingleArenaBufferAllocator* Create(uint8_t* buffer_head,
+                                            size_t buffer_size);
+
+  // Resizes a buffer that is previously returned by the
+  // AllocateResizableBuffer. In current implementation, it Adjusts the head
+  // (lowest address and moving upwards) memory allocation to a given size.
+  // Calls to this method will also invalidate all temporary allocation values
+  // (it sets the location of temp space at the end of the head section). This
+  // call will fail if a chain of allocations through AllocateTemp() have not
+  // been cleaned up with a call to ResetTempAllocations().
+  virtual TfLiteStatus ResizeBuffer(uint8_t* resizable_buf, size_t size,
+                                    size_t alignment) override;
+
+  // Returns a buffer that is resizable viable ResizeBuffer(). Only one
+  // resizable buffer is currently supported.
+  virtual uint8_t* AllocateResizableBuffer(size_t size,
+                                           size_t alignment) override;
+
+  // Frees up the memory occupied by the resizable buffer
+  virtual TfLiteStatus DeallocateResizableBuffer(
+      uint8_t* resizable_buf) override;
+
+  // Reserves the non-persistent memory that is planned by the memory planner.
+  virtual TfLiteStatus ReserveNonPersistentOverlayMemory(
+      size_t size, size_t alignment) override;
+
+  // Allocates persistent memory starting at the tail of the arena (highest
+  // address and moving downwards).
+  virtual uint8_t* AllocatePersistentBuffer(size_t size,
+                                            size_t alignment) override;
+
+  // Allocates a temporary buffer from the head of the arena (lowest address and
+  // moving upwards) but does not update the actual head allocation size or
+  // position. The returned buffer is guaranteed until either
+  // ResetTempAllocations() is called or another call to AllocateFromHead().
+  // Repeat calls to this function will create a chain of temp allocations. All
+  // calls to AllocateTemp() must end with a call to ResetTempAllocations(). If
+  // AllocateFromHead() is called before a call to ResetTempAllocations(), it
+  // will fail with an error message.
+  virtual uint8_t* AllocateTemp(size_t size, size_t alignment) override;
+
+  // Signals that a temporary buffer is no longer needed. This is currently for
+  // book-keeping purpose and the memory region are not immediately available
+  // for re-use. The deallocated memory region are only reclaimed after
+  // ResetTempAllocations is called as it is right now.
+  virtual void DeallocateTemp(uint8_t* buf) override;
+
+  // Returns true if all temporary buffers are already deallocated.
+  virtual bool IsAllTempDeallocated() override;
+
+  // Resets a chain of temporary allocations back to the current head of the
+  // arena (lowest address).
+  virtual TfLiteStatus ResetTempAllocations() override;
+
+  // Returns a pointer to the buffer currently assigned to the head section.
+  // This buffer is set by calling SetHeadSize().
+  uint8_t* GetOverlayMemoryAddress() const override;
+
+  // Returns the size of the head section in bytes.
+  size_t GetNonPersistentUsedBytes() const override;
+
+  // Returns the size of all allocations in the tail section in bytes.
+  size_t GetPersistentUsedBytes() const override;
+
+  // Returns the number of bytes available with a given alignment. This number
+  // takes in account any temporary allocations.
+  size_t GetAvailableMemory(size_t alignment) const override;
+
+  // Returns the number of used bytes in the allocator. This number takes in
+  // account any temporary allocations.
+  size_t GetUsedBytes() const;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
+
+ protected:
+  // Returns a pointer to the current end of the head buffer.
+  uint8_t* head() const;
+
+  // Returns a pointer to the current end of the tail buffer.
+  uint8_t* tail() const;
+
+ private:
+  size_t GetBufferSize() const;
+  uint8_t* buffer_head_;
+  uint8_t* buffer_tail_;
+  uint8_t* head_;
+  uint8_t* tail_;
+  uint8_t* temp_;
+
+  // The combination of the checksum of outstanding temporary buffer pointers
+  // AND the count of outstanding temporary buffer provide a low cost mechanism
+  // to audit temporary buffers' allocation and deallocation.
+  //
+  // XOR Check sum for outstanding temp buffers.
+  // If all temp buffers are deallocated OR no temp buffers are allocated,
+  // temp_buffer_ptr_check_sum_ == nullptr.
+  intptr_t temp_buffer_ptr_check_sum_ = 0;
+  // Count of outstanding temp buffers.
+  int temp_buffer_count_ = 0;
+};
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_ARENA_ALLOCATOR_SINGLE_ARENA_BUFFER_ALLOCATOR_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/tensor_utils_common.cc b/edge-impulse-sdk/tensorflow/lite/micro/tensor_utils_common.cc
deleted file mode 100644
index 8d4f83e..0000000
--- a/edge-impulse-sdk/tensorflow/lite/micro/tensor_utils_common.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_utils_common.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <limits>
-
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/cppmath.h"
-
-namespace tflite {
-
-//
-// The following is copied from TfLite portable_tensor_utils.cc
-//
-// The declarations are located in header file:
-//   tensorflow/lite/kernels/internal/tensor_utils_common.h
-//
-namespace tensor_utils {
-
-// Quantizes a buffer of floating point values using a symmetric quantization
-// (i.e. linear quantization without an offset) to 8-bit signed integers.
-// It also outputs the range (min, max) of the floating point buffer, and the
-// scaling factor used to quantize the values.
-void SymmetricQuantizeFloats(const float* values, const int size,
-                             int8_t* quantized_values, float* min_value,
-                             float* max_value, float* scaling_factor) {
-  auto minmax = std::minmax_element(values, values + size);
-  *min_value = *minmax.first;
-  *max_value = *minmax.second;
-
-  SymmetricQuantizeFloats(values, size, quantized_values, *min_value,
-                          *max_value, scaling_factor);
-}
-
-// Quantizes a buffer of floating point values using a symmetric quantization
-// (i.e. linear quantization without an offset) to 8-bit signed integers.
-// It uses the range (min, max) provided to the function to calculate the
-// appropriate scaling factor to quantize the values.
-void SymmetricQuantizeFloats(const float* values, const int size,
-                             int8_t* quantized_values, float min_value,
-                             float max_value, float* scaling_factor) {
-  const int32_t kScale = 127;
-  const float range = std::max(std::abs(min_value), std::abs(max_value));
-  if (range == 0) {
-    std::fill_n(quantized_values, size, 0);
-    *scaling_factor = 1;
-    return;
-  }
-  *scaling_factor = range / kScale;
-  const float scaling_factor_inv = kScale / range;
-  for (int i = 0; i < size; ++i) {
-    const int32_t quantized_value =
-        static_cast<int32_t>(TfLiteRound(values[i] * scaling_factor_inv));
-    // Clamp: just in case some odd numeric offset.
-    quantized_values[i] = static_cast<int8_t>(
-        std::min(kScale, std::max(-kScale, quantized_value)));
-  }
-}
-
-void AsymmetricQuantizeFloats(const float* values, const int size,
-                              int8_t* quantized_values, float* scaling_factor,
-                              int32_t* offset) {
-  const int32_t kMinScale = -128;
-  const int32_t kMaxScale = 127;
-  const double qmin_double = kMinScale;
-  const double qmax_double = kMaxScale;
-  const auto minmax = std::minmax_element(values, values + size);
-  const double rmin = fmin(0, *minmax.first);
-  const double rmax = fmax(0, *minmax.second);
-  if (rmin == rmax) {
-    std::fill_n(quantized_values, size, 0);
-    *scaling_factor = 1;
-    *offset = 0;
-    return;
-  } else {
-    double scale = (rmax - rmin) / (qmax_double - qmin_double);
-    const double zero_point_from_min = qmin_double - rmin / scale;
-    const double zero_point_from_max = qmax_double - rmax / scale;
-    const double zero_point_from_min_error =
-        std::abs(qmin_double) + std::abs(rmin / scale);
-    const double zero_point_from_max_error =
-        std::abs(qmax_double) + std::abs(rmax / scale);
-    const double zero_point_double =
-        zero_point_from_min_error < zero_point_from_max_error
-            ? zero_point_from_min
-            : zero_point_from_max;
-    int8_t nudged_zero_point = 0;
-    if (zero_point_double <= qmin_double) {
-      nudged_zero_point = kMinScale;
-    } else if (zero_point_double >= qmax_double) {
-      nudged_zero_point = kMaxScale;
-    } else {
-      nudged_zero_point = static_cast<int8_t>(round(zero_point_double));
-    }
-    *scaling_factor = scale;
-    *offset = nudged_zero_point;
-  }
-  const float scaling_factor_inv = 1.0f / *scaling_factor;
-  for (int i = 0; i < size; ++i) {
-    const int32_t quantized_value = static_cast<int32_t>(
-        TfLiteRound(*offset + values[i] * scaling_factor_inv));
-    quantized_values[i] =
-        std::min(kMaxScale, std::max(kMinScale, quantized_value));
-  }
-}
-
-// Reduce-sum on a vector:
-// input_vector: pointer to input vector.
-// output_vector: pointer to vector.
-// output_size: output vector size.
-// reduction_size: number of consecutive elements from input vector which are
-// added to get one element of output.
-void ReductionSumVector(const int8_t* input_vector, int32_t* output_vector,
-                        int output_size, int reduction_size) {
-  for (int o = 0; o < output_size; o++) {
-    int32_t result = 0;
-    for (int r = 0; r < reduction_size; r++) {
-      result += input_vector[r];
-    }
-    output_vector[o] = result;
-    input_vector += reduction_size;
-  }
-}
-
-}  // namespace tensor_utils
-
-}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.cc b/edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.cc
new file mode 100644
index 0000000..fe4c836
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.cc
@@ -0,0 +1,112 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.h"
+
+#include <cstdarg>
+#include <cstddef>
+#include <cstdint>
+#include <initializer_list>
+#include <new>
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"  // from @flatbuffers
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+
+// TODO(b/170464050): Use TFLM test only version of schema_utils.
+
+namespace tflite {
+namespace testing {
+
+const TfLiteRegistration* PackerOp::getRegistration() {
+  return GetMutableRegistration();
+}
+
+TfLiteRegistration* PackerOp::GetMutableRegistration() {
+  static TfLiteRegistration r;
+  r.init = Init;
+  r.prepare = Prepare;
+  r.invoke = Invoke;
+  r.free = Free;
+  return &r;
+}
+
+void* PackerOp::Init(TfLiteContext* context, const char* buffer,
+                     size_t length) {
+  freed_ = false;
+  // Do nothing.
+  return nullptr;
+}
+
+void PackerOp::Free(TfLiteContext* context, void* buffer) { freed_ = true; }
+
+TfLiteStatus PackerOp::Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TfLiteStatus PackerOp::Invoke(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, 0);
+  TF_LITE_ENSURE(context, input1 != nullptr);
+  const int32_t* input1_data = input1->data.i32;
+  TF_LITE_ENSURE_EQ(context, input1->dims->size, 1);
+  const int32_t input1_len = input1->dims->data[0];
+
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, 1);
+  TF_LITE_ENSURE(context, input2 != nullptr);
+  const int32_t* input2_data = input2->data.i32;
+  TF_LITE_ENSURE_EQ(context, input2->dims->size, 1);
+  const int32_t input2_len = input2->dims->data[0];
+
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+  TF_LITE_ENSURE(context, output != nullptr);
+  int32_t* output_data = output->data.i32;
+  int32_t output_len = output->dims->data[0];
+
+  // Fill output with input: first with the first tensor, then with the second
+  // tensor up to the size of the output tensor.
+  int cnt = 0;
+  int i;
+  for (i = 0; i < input1_len && cnt < output_len; i++, cnt++) {
+    output_data[cnt] = input1_data[i];
+  }
+  if (cnt >= output_len) {
+    return kTfLiteOk;
+  }
+
+  for (i = 0; i < input2_len && cnt < output_len; i++, cnt++) {
+    output_data[cnt] = input2_data[i];
+  }
+  if (cnt >= output_len) {
+    return kTfLiteOk;
+  }
+
+  for (; cnt < output_len; cnt++) {
+    output_data[cnt] = 0;
+  }
+  return kTfLiteOk;
+}
+
+bool PackerOp::freed_ = false;
+
+}  // namespace testing
+}  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.h b/edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.h
new file mode 100644
index 0000000..cbbbcec
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.h
@@ -0,0 +1,50 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_TEST_HELPER_CUSTOM_OPS_H_
+#define TENSORFLOW_LITE_MICRO_TEST_HELPER_CUSTOM_OPS_H_
+
+#include <cstdint>
+#include <limits>
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"  // from @flatbuffers
+#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/portable_type_to_tflitetype.h"
+#include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+namespace testing {
+
+class PackerOp {
+ public:
+  static const TfLiteRegistration* getRegistration();
+  static TfLiteRegistration* GetMutableRegistration();
+  static void* Init(TfLiteContext* context, const char* buffer, size_t length);
+  static void Free(TfLiteContext* context, void* buffer);
+  static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node);
+  static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node);
+
+ private:
+  static bool freed_;
+};
+
+}  // namespace testing
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_TEST_HELPER_CUSTOM_OPS_H_
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/test_helpers.cc b/edge-impulse-sdk/tensorflow/lite/micro/test_helpers.cc
index 6fb3685..d97caca 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/test_helpers.cc
+++ b/edge-impulse-sdk/tensorflow/lite/micro/test_helpers.cc
@@ -23,12 +23,15 @@ limitations under the License.
 
 #include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"  // from @flatbuffers
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
-#include "edge-impulse-sdk/tensorflow/lite/core/api/error_reporter.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/kernel_util.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/kernels/kernel_util.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/memory_helpers.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/micro_arena_constants.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
+#include "edge-impulse-sdk/tensorflow/lite/micro/test_helper_custom_ops.h"
 #include "edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h"
 
 // TODO(b/170464050): Use TFLM test only version of schema_utils.
@@ -39,7 +42,9 @@ namespace {
 
 class StackAllocator : public flatbuffers::Allocator {
  public:
-  StackAllocator() : data_(data_backing_), data_size_(0) {}
+  StackAllocator(size_t alignment) : data_size_(0) {
+    data_ = AlignPointerUp(data_backing_, alignment);
+  }
 
   uint8_t* allocate(size_t size) override {
     TFLITE_DCHECK((data_size_ + size) <= kStackAllocatorSize);
@@ -51,10 +56,10 @@ class StackAllocator : public flatbuffers::Allocator {
 
   void deallocate(uint8_t* p, size_t) override {}
 
-  static StackAllocator& instance() {
+  static StackAllocator& instance(size_t alignment = 1) {
     // Avoid using true dynamic memory allocation to be portable to bare metal.
     static char inst_memory[sizeof(StackAllocator)];
-    static StackAllocator* inst = new (inst_memory) StackAllocator;
+    static StackAllocator* inst = new (inst_memory) StackAllocator(alignment);
     return *inst;
   }
 
@@ -64,13 +69,16 @@ class StackAllocator : public flatbuffers::Allocator {
   uint8_t data_backing_[kStackAllocatorSize];
   uint8_t* data_;
   int data_size_;
+
+  TF_LITE_REMOVE_VIRTUAL_DELETE
 };
 
 flatbuffers::FlatBufferBuilder* BuilderInstance() {
   static char inst_memory[sizeof(flatbuffers::FlatBufferBuilder)];
   static flatbuffers::FlatBufferBuilder* inst =
       new (inst_memory) flatbuffers::FlatBufferBuilder(
-          StackAllocator::kStackAllocatorSize, &StackAllocator::instance());
+          StackAllocator::kStackAllocatorSize,
+          &StackAllocator::instance(MicroArenaBufferAlignment()));
   return inst;
 }
 
@@ -101,7 +109,9 @@ class ModelBuilder {
 
   // Adds a node to the model with given input and output Tensors.
   Node AddNode(Operator op, std::initializer_list<Tensor> inputs,
-               std::initializer_list<Tensor> outputs);
+               std::initializer_list<Tensor> outputs,
+               std::initializer_list<Tensor> intermediates =
+                   std::initializer_list<Tensor>{});
 
   void AddMetadata(const char* description_string,
                    const int32_t* metadata_buffer_data, size_t num_elements);
@@ -156,12 +166,17 @@ ModelBuilder::Operator ModelBuilder::RegisterOp(BuiltinOperator op,
 ModelBuilder::Node ModelBuilder::AddNode(
     ModelBuilder::Operator op,
     std::initializer_list<ModelBuilder::Tensor> inputs,
-    std::initializer_list<ModelBuilder::Tensor> outputs) {
+    std::initializer_list<ModelBuilder::Tensor> outputs,
+    std::initializer_list<ModelBuilder::Tensor> intermediates) {
   TFLITE_DCHECK(next_operator_id_ <= kMaxOperators);
   operators_[next_operator_id_] = tflite::CreateOperator(
       *builder_, op, builder_->CreateVector(inputs.begin(), inputs.size()),
       builder_->CreateVector(outputs.begin(), outputs.size()),
-      BuiltinOptions_NONE);
+      BuiltinOptions_NONE,
+      /*builtin_options=*/0,
+      /*custom_options=*/0, tflite::CustomOptionsFormat_FLEXBUFFERS,
+      /*mutating_variable_inputs =*/0,
+      builder_->CreateVector(intermediates.begin(), intermediates.size()));
   next_operator_id_++;
   return next_operator_id_ - 1;
 }
@@ -195,7 +210,7 @@ const Model* ModelBuilder::BuildModel(
     buffers[i] = metadata_buffers_[i - 1];
   }
 
-  // TFLM only supports single subgraph.
+  // Default to single subgraph model.
   constexpr size_t subgraphs_size = 1;
 
   // Find out number of subgraph inputs.
@@ -261,13 +276,16 @@ const Model* BuildSimpleStatefulModel() {
 
   const int op_id =
       model_builder.RegisterOp(BuiltinOperator_CUSTOM, "simple_stateful_op");
-  const int input_tensor = model_builder.AddTensor(TensorType_UINT8, {3});
-  const int median_tensor = model_builder.AddTensor(TensorType_UINT8, {3});
+  const int input_tensor = model_builder.AddTensor(TensorType_INT8, {3});
+  const int median_tensor = model_builder.AddTensor(TensorType_INT8, {3});
   const int invoke_count_tensor =
       model_builder.AddTensor(TensorType_INT32, {1});
+  const int intermediate_tensor =
+      model_builder.AddTensor(TensorType_FLOAT32, {0});
 
   model_builder.AddNode(op_id, {input_tensor},
-                        {median_tensor, invoke_count_tensor});
+                        {median_tensor, invoke_count_tensor},
+                        {intermediate_tensor});
   return model_builder.BuildModel({input_tensor},
                                   {median_tensor, invoke_count_tensor});
 }
@@ -341,6 +359,149 @@ const Model* BuildModelWithOfflinePlanning(int number_of_tensors,
       node_conn[0].input, node_conn[num_conns - 1].output, num_subgraph_inputs);
 }
 
+const Model* BuildModelWithUnusedInputs() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+
+  constexpr size_t buffers_size = 1;
+  const Offset<Buffer> buffers[buffers_size] = {CreateBuffer(*builder)};
+  constexpr size_t tensor_shape_size = 2;
+  const int32_t tensor_shape[tensor_shape_size] = {1, 64};
+  constexpr size_t tensors_size = 4;
+  const Offset<Tensor> tensors[tensors_size] = {
+      CreateTensor(*builder,
+                   builder->CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_INT8, 0,
+                   builder->CreateString("test_input_tensor"), 0, false),
+      CreateTensor(*builder,
+                   builder->CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_INT8, 0,
+                   builder->CreateString("test_unused_input_tensor"), 0, false),
+      CreateTensor(*builder,
+                   builder->CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_INT8, 0,
+                   builder->CreateString("test_output_tensor"), 0, false),
+      CreateTensor(*builder,
+                   builder->CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_INT8, 0,
+                   builder->CreateString("test_unused_tensor"), 0, false),
+  };
+  constexpr size_t inputs_size = 2;
+  const int32_t inputs[inputs_size] = {0, 1};
+  constexpr size_t outputs_size = 1;
+  const int32_t outputs[outputs_size] = {2};
+  constexpr size_t operator_inputs_size = 1;
+  const int32_t operator_inputs[operator_inputs_size] = {0};
+  constexpr size_t operator_outputs_size = 1;
+  const int32_t operator_outputs[operator_outputs_size] = {2};
+  constexpr size_t operators_size = 1;
+  const Offset<Operator> operators[operators_size] = {
+      CreateOperator(
+          *builder, 0,
+          builder->CreateVector(operator_inputs, operator_inputs_size),
+          builder->CreateVector(operator_outputs, operator_outputs_size),
+          BuiltinOptions_NONE),
+  };
+  constexpr size_t subgraphs_size = 1;
+  const Offset<SubGraph> subgraphs[subgraphs_size] = {
+      CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size),
+                     builder->CreateVector(inputs, inputs_size),
+                     builder->CreateVector(outputs, outputs_size),
+                     builder->CreateVector(operators, operators_size),
+                     builder->CreateString("test_subgraph"))};
+  constexpr size_t operator_codes_size = 1;
+  const Offset<OperatorCode> operator_codes[operator_codes_size] = {
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "mock_custom",
+                               /*version=*/0, BuiltinOperator_CUSTOM)};
+  const Offset<Model> model_offset = CreateModel(
+      *builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
+      builder->CreateVector(subgraphs, subgraphs_size),
+      builder->CreateString("test_model"),
+      builder->CreateVector(buffers, buffers_size));
+  FinishModelBuffer(*builder, model_offset);
+  void* model_pointer = builder->GetBufferPointer();
+  const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
+  return model;
+}
+
+const Model* BuildModelWithUnusedOperatorOutputs() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+
+  constexpr size_t buffers_size = 1;
+  const Offset<Buffer> buffers[buffers_size] = {CreateBuffer(*builder)};
+  constexpr size_t tensor_shape_size = 2;
+  const int32_t tensor_shape[tensor_shape_size] = {1, 64};
+  constexpr size_t tensors_size = 2;
+  const Offset<Tensor> tensors[tensors_size] = {
+      CreateTensor(*builder,
+                   builder->CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_INT8, 0,
+                   builder->CreateString("test_input_tensor"), 0, false),
+      CreateTensor(
+          *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
+          TensorType_INT8, 0,
+          builder->CreateString("test_unused_output_tensor"), 0, false)};
+  constexpr size_t inputs_size = 1;
+  const int32_t inputs[inputs_size] = {};
+  constexpr size_t outputs_size = 1;
+  const int32_t outputs[outputs_size] = {0};
+  constexpr size_t operator_inputs_size = 1;
+  const int32_t operator_inputs[operator_inputs_size] = {};
+  constexpr size_t operator_outputs_size = 2;
+  const int32_t operator_outputs[operator_outputs_size] = {0, 1};
+  constexpr size_t operators_size = 1;
+  const Offset<Operator> operators[operators_size] = {
+      CreateOperator(
+          *builder, 0,
+          builder->CreateVector(operator_inputs, operator_inputs_size),
+          builder->CreateVector(operator_outputs, operator_outputs_size),
+          BuiltinOptions_NONE),
+  };
+  constexpr size_t subgraphs_size = 1;
+  const Offset<SubGraph> subgraphs[subgraphs_size] = {
+      CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size),
+                     builder->CreateVector(inputs, inputs_size),
+                     builder->CreateVector(outputs, outputs_size),
+                     builder->CreateVector(operators, operators_size),
+                     builder->CreateString("test_subgraph"))};
+  constexpr size_t operator_codes_size = 1;
+  const Offset<OperatorCode> operator_codes[operator_codes_size] = {
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "mock_custom",
+                               /*version=*/0, BuiltinOperator_CUSTOM)};
+  const Offset<Model> model_offset = CreateModel(
+      *builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
+      builder->CreateVector(subgraphs, subgraphs_size),
+      builder->CreateString("test_model"),
+      builder->CreateVector(buffers, buffers_size));
+  FinishModelBuffer(*builder, model_offset);
+  void* model_pointer = builder->GetBufferPointer();
+  const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
+  return model;
+}
+
+const Model* BuildModelWith256x256Tensor() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* fb_builder = BuilderInstance();
+
+  ModelBuilder model_builder(fb_builder);
+
+  const int op_id =
+      model_builder.RegisterOp(BuiltinOperator_CUSTOM, "mock_custom");
+  const int input1_tensor =
+      model_builder.AddTensor(TensorType_INT8, {256, 256});
+  const int input2_tensor =
+      model_builder.AddTensor(TensorType_INT8, {256, 256});
+  const int output_tensor =
+      model_builder.AddTensor(TensorType_INT8, {256, 256});
+
+  model_builder.AddNode(op_id, {input1_tensor, input2_tensor}, {output_tensor});
+  return model_builder.BuildModel({input1_tensor, input2_tensor},
+                                  {output_tensor});
+}
+
 const Model* BuildSimpleMockModel() {
   using flatbuffers::Offset;
   flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
@@ -362,7 +523,7 @@ const Model* BuildSimpleMockModel() {
                    builder->CreateString("test_input_tensor"), 0, false),
       CreateTensor(*builder,
                    builder->CreateVector(tensor_shape, tensor_shape_size),
-                   TensorType_UINT8, 1,
+                   TensorType_INT8, 1,
                    builder->CreateString("test_weight_tensor"), 0, false),
       CreateTensor(*builder,
                    builder->CreateVector(tensor_shape, tensor_shape_size),
@@ -458,7 +619,7 @@ const Model* BuildComplexMockModel() {
           0, true /* is_variable */),
       CreateTensor(
           *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
-          TensorType_UINT8, 2, builder->CreateString("test_weight_tensor_1"), 0,
+          TensorType_INT8, 2, builder->CreateString("test_weight_tensor_1"), 0,
           false /* is_variable */),
       // Op 1 output / Op 2 input:
       CreateTensor(
@@ -472,7 +633,7 @@ const Model* BuildComplexMockModel() {
           0, true /* is_variable */),
       CreateTensor(
           *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
-          TensorType_UINT8, 2, builder->CreateString("test_weight_tensor_2"), 0,
+          TensorType_INT8, 2, builder->CreateString("test_weight_tensor_2"), 0,
           false /* is_variable */),
       // Op 2 output / Op 3 input:
       CreateTensor(
@@ -486,7 +647,7 @@ const Model* BuildComplexMockModel() {
           0, true /* is_variable */),
       CreateTensor(
           *builder, builder->CreateVector(tensor_shape, tensor_shape_size),
-          TensorType_UINT8, 2, builder->CreateString("test_weight_tensor_3"), 0,
+          TensorType_INT8, 2, builder->CreateString("test_weight_tensor_3"), 0,
           false /* is_variable */),
       // Op 3 output:
       CreateTensor(
@@ -638,6 +799,636 @@ const Model* BuildSimpleMultipleInputsModel() {
   return model;
 }
 
+const Model* BuildSimpleModelWithSubgraphsAndIf() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+
+  constexpr size_t buffers_size = 1;
+  const Offset<Buffer> buffers[buffers_size] = {
+      CreateBuffer(*builder),
+  };
+  const int32_t condition_tensor_shape[] = {1};
+  const int32_t data_tensor_shape[] = {1, 2};
+  constexpr size_t tensors_size = 4;
+  const Offset<Tensor> subgraph1_tensors[tensors_size] = {
+      CreateTensor(*builder, builder->CreateVector(condition_tensor_shape, 1),
+                   TensorType_BOOL, 0,
+                   builder->CreateString("condition tensor"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor1"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor2"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("output_tensor"), 0, false),
+  };
+  const Offset<Tensor> subgraph2_tensors[tensors_size] = {
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor1"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor2"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("output_tensor"), 0, false),
+  };
+  const Offset<Tensor> subgraph3_tensors[tensors_size] = {
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor1"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor2"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("output_tensor"), 0, false),
+  };
+
+  constexpr size_t if_inputs_size = 3;
+  const int32_t if_inputs[if_inputs_size] = {0, 1, 2};
+  constexpr size_t outputs_size = 1;
+  const int32_t if_outputs[outputs_size] = {3};
+  constexpr size_t operator_inputs_size = 2;
+  const int32_t operator_inputs[operator_inputs_size] = {0, 1};
+  const int32_t operator_outputs[outputs_size] = {2};
+  constexpr size_t operators_size = 1;
+  const Offset<Operator> subgraph1_operators[operators_size] = {
+      CreateOperator(
+          *builder, 0, builder->CreateVector(if_inputs, if_inputs_size),
+          builder->CreateVector(if_outputs, outputs_size),
+          BuiltinOptions_IfOptions, CreateIfOptions(*builder, 1, 2).Union()),
+  };
+  const Offset<Operator> subgraph2_operators[operators_size] = {
+      CreateOperator(
+          *builder, 1,
+          builder->CreateVector(operator_inputs, operator_inputs_size),
+          builder->CreateVector(operator_outputs, outputs_size),
+          BuiltinOptions_NONE),
+  };
+  const Offset<Operator> subgraph3_operators[operators_size] = {
+      CreateOperator(
+          *builder, 2,
+          builder->CreateVector(operator_inputs, operator_inputs_size),
+          builder->CreateVector(operator_outputs, outputs_size),
+          BuiltinOptions_NONE),
+  };
+  constexpr size_t subgraphs_size = 3;
+  const Offset<SubGraph> subgraphs[subgraphs_size] = {
+      CreateSubGraph(*builder, builder->CreateVector(subgraph1_tensors, 4),
+                     builder->CreateVector(if_inputs, if_inputs_size),
+                     builder->CreateVector(if_outputs, outputs_size),
+                     builder->CreateVector(subgraph1_operators, operators_size),
+                     builder->CreateString("if_subgraph")),
+      CreateSubGraph(
+          *builder, builder->CreateVector(subgraph2_tensors, 3),
+          builder->CreateVector(operator_inputs, operator_inputs_size),
+          builder->CreateVector(operator_outputs, outputs_size),
+          builder->CreateVector(subgraph2_operators, operators_size),
+          builder->CreateString("then_subgraph")),
+      CreateSubGraph(
+          *builder, builder->CreateVector(subgraph3_tensors, 3),
+          builder->CreateVector(operator_inputs, operator_inputs_size),
+          builder->CreateVector(operator_outputs, outputs_size),
+          builder->CreateVector(subgraph3_operators, operators_size),
+          builder->CreateString("else_subgraph")),
+  };
+  constexpr size_t operator_codes_size = 3;
+  const Offset<OperatorCode> operator_codes[operator_codes_size] = {
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "multiple_inputs_op",
+                               /*version=*/0, BuiltinOperator_IF),
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "multiple_inputs_op",
+                               /*version=*/0, BuiltinOperator_ADD),
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "multiple_inputs_op",
+                               /*version=*/0, BuiltinOperator_MUL),
+  };
+  const Offset<Model> model_offset = CreateModel(
+      *builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
+      builder->CreateVector(subgraphs, subgraphs_size),
+      builder->CreateString("test_model"),
+      builder->CreateVector(buffers, buffers_size));
+  FinishModelBuffer(*builder, model_offset);
+  void* model_pointer = builder->GetBufferPointer();
+  const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
+  return model;
+}
+
+const Model* BuildSimpleModelWithIfAndEmptySubgraph() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+
+  constexpr size_t buffers_size = 1;
+  const Offset<Buffer> buffers[buffers_size] = {
+      CreateBuffer(*builder),
+  };
+  const int32_t condition_tensor_shape[] = {1};
+  const int32_t data_tensor_shape[] = {1, 2};
+  constexpr size_t tensors_size = 4;
+  const Offset<Tensor> subgraph1_tensors[tensors_size] = {
+      CreateTensor(*builder, builder->CreateVector(condition_tensor_shape, 1),
+                   TensorType_BOOL, 0,
+                   builder->CreateString("condition tensor"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor1"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor2"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("output_tensor"), 0, false),
+  };
+  const Offset<Tensor> subgraph2_tensors[tensors_size] = {
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor1"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor2"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("output_tensor"), 0, false),
+  };
+  const Offset<Tensor> subgraph3_tensors[tensors_size] = {
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor1"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor2"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 2),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("output_tensor"), 0, false),
+  };
+
+  constexpr size_t if_inputs_size = 3;
+  const int32_t if_inputs[if_inputs_size] = {0, 1, 2};
+  constexpr size_t outputs_size = 1;
+  const int32_t if_outputs[outputs_size] = {3};
+  constexpr size_t operator_inputs_size = 2;
+  const int32_t operator_inputs[operator_inputs_size] = {0, 1};
+  const int32_t operator_outputs[outputs_size] = {2};
+  constexpr size_t operators_size = 1;
+  const Offset<Operator> subgraph1_operators[operators_size] = {
+      CreateOperator(
+          *builder, 0, builder->CreateVector(if_inputs, if_inputs_size),
+          builder->CreateVector(if_outputs, outputs_size),
+          BuiltinOptions_IfOptions, CreateIfOptions(*builder, 1, 2).Union()),
+  };
+  const Offset<Operator> subgraph2_operators[operators_size] = {
+      CreateOperator(
+          *builder, 1,
+          builder->CreateVector(operator_inputs, operator_inputs_size),
+          builder->CreateVector(operator_outputs, outputs_size),
+          BuiltinOptions_NONE),
+  };
+  constexpr size_t subgraphs_size = 3;
+  const Offset<SubGraph> subgraphs[subgraphs_size] = {
+      CreateSubGraph(*builder, builder->CreateVector(subgraph1_tensors, 4),
+                     builder->CreateVector(if_inputs, if_inputs_size),
+                     builder->CreateVector(if_outputs, outputs_size),
+                     builder->CreateVector(subgraph1_operators, operators_size),
+                     builder->CreateString("if_subgraph")),
+      CreateSubGraph(
+          *builder, builder->CreateVector(subgraph2_tensors, 3),
+          builder->CreateVector(operator_inputs, operator_inputs_size),
+          builder->CreateVector(operator_outputs, outputs_size),
+          builder->CreateVector(subgraph2_operators, operators_size),
+          builder->CreateString("then_subgraph")),
+      CreateSubGraph(
+          *builder, builder->CreateVector(subgraph3_tensors, 3),
+          builder->CreateVector(operator_inputs, operator_inputs_size),
+          builder->CreateVector(operator_outputs, outputs_size), 0,
+          builder->CreateString("else_subgraph")),
+  };
+  constexpr size_t operator_codes_size = 3;
+  const Offset<OperatorCode> operator_codes[operator_codes_size] = {
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "multiple_inputs_op",
+                               /*version=*/0, BuiltinOperator_IF),
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "multiple_inputs_op",
+                               /*version=*/0, BuiltinOperator_ADD),
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "multiple_inputs_op",
+                               /*version=*/0, BuiltinOperator_MUL),
+  };
+  const Offset<Model> model_offset = CreateModel(
+      *builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
+      builder->CreateVector(subgraphs, subgraphs_size),
+      builder->CreateString("test_model"),
+      builder->CreateVector(buffers, buffers_size));
+  FinishModelBuffer(*builder, model_offset);
+  void* model_pointer = builder->GetBufferPointer();
+  const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
+  return model;
+}
+
+const Model* BuildSimpleModelWithSubgraphsAndWhile() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+
+  constexpr size_t buffers_size = 1;
+  const Offset<Buffer> buffers[buffers_size] = {
+      CreateBuffer(*builder),
+  };
+  const int32_t data_tensor_shape[] = {1, 1};
+  constexpr size_t while_tensors_size = 4;
+  constexpr size_t op_tensors_size = 3;
+  const Offset<Tensor> subgraph0_tensors[while_tensors_size] = {
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor0"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor1"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("output_tensor0"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("output_tensor1"), 0, false),
+  };
+  const Offset<Tensor> subgraph1_tensors[op_tensors_size] = {
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor1"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor2"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1),
+                   TensorType_BOOL, 0,
+                   builder->CreateString("condition_tensor"), 0, false),
+  };
+  const Offset<Tensor> subgraph2_tensors[op_tensors_size] = {
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor0"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("input_tensor1"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(data_tensor_shape, 1),
+                   TensorType_FLOAT32, 0,
+                   builder->CreateString("output_tensor0"), 0, false),
+  };
+
+  constexpr size_t inputs_size = 2;
+  const int32_t inputs[inputs_size] = {0, 1};
+  constexpr size_t while_outputs_size = 2;
+  const int32_t while_outputs[while_outputs_size] = {2, 3};
+  constexpr size_t cond_outputs_size = 1;
+  const int32_t cond_outputs[cond_outputs_size] = {2};
+  constexpr size_t add_outputs_size = 1;
+  const int32_t add_outputs[add_outputs_size] = {2};
+  constexpr size_t add_subgraph_outputs_size = 2;
+  const int32_t add_subgraph_outputs[add_subgraph_outputs_size] = {2, 1};
+  constexpr size_t operators_size = 1;
+  const Offset<Operator> subgraph0_operators[operators_size] = {
+      CreateOperator(*builder, 0, builder->CreateVector(inputs, inputs_size),
+                     builder->CreateVector(while_outputs, while_outputs_size),
+                     BuiltinOptions_WhileOptions,
+                     CreateWhileOptions(*builder, 1, 2).Union()),
+  };
+  const Offset<Operator> subgraph1_operators[operators_size] = {
+      CreateOperator(*builder, 1, builder->CreateVector(inputs, inputs_size),
+                     builder->CreateVector(cond_outputs, cond_outputs_size),
+                     BuiltinOptions_NONE),
+  };
+  const Offset<Operator> subgraph2_operators[operators_size] = {
+      CreateOperator(*builder, 2, builder->CreateVector(inputs, inputs_size),
+                     builder->CreateVector(add_outputs, add_outputs_size),
+                     BuiltinOptions_NONE),
+  };
+  constexpr size_t subgraphs_size = 3;
+  const Offset<SubGraph> subgraphs[subgraphs_size] = {
+      CreateSubGraph(*builder, builder->CreateVector(subgraph0_tensors, 4),
+                     builder->CreateVector(inputs, inputs_size),
+                     builder->CreateVector(while_outputs, while_outputs_size),
+                     builder->CreateVector(subgraph0_operators, operators_size),
+                     builder->CreateString("while_subgraph")),
+      CreateSubGraph(*builder, builder->CreateVector(subgraph1_tensors, 3),
+                     builder->CreateVector(inputs, inputs_size),
+                     builder->CreateVector(cond_outputs, cond_outputs_size),
+                     builder->CreateVector(subgraph1_operators, operators_size),
+                     builder->CreateString("cond_subgraph")),
+      CreateSubGraph(*builder, builder->CreateVector(subgraph2_tensors, 3),
+                     builder->CreateVector(inputs, inputs_size),
+                     builder->CreateVector(add_subgraph_outputs,
+                                           add_subgraph_outputs_size),
+                     builder->CreateVector(subgraph2_operators, operators_size),
+                     builder->CreateString("body_subgraph")),
+  };
+  constexpr size_t operator_codes_size = 3;
+  const Offset<OperatorCode> operator_codes[operator_codes_size] = {
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "multiple_inputs_op",
+                               /*version=*/0, BuiltinOperator_WHILE),
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "multiple_inputs_op",
+                               /*version=*/0, BuiltinOperator_LESS),
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "multiple_inputs_op",
+                               /*version=*/0, BuiltinOperator_ADD),
+  };
+  const Offset<Model> model_offset = CreateModel(
+      *builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
+      builder->CreateVector(subgraphs, subgraphs_size),
+      builder->CreateString("test_model"),
+      builder->CreateVector(buffers, buffers_size));
+  FinishModelBuffer(*builder, model_offset);
+  void* model_pointer = builder->GetBufferPointer();
+  const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
+  return model;
+}
+
+// Build a model with If and two subgraphs: two data tensors A1 of size 2, A2 of
+// size 4 are first concatenated, then cut to a new tensor A3 of size 3; the new
+// tensor A3 of size 3 is then concatenated with A2 tensor of size 4 to produce
+// a final output tensor A4. This model is specially crafted to capture the
+// corner case outlined in go/avoid-memory-corruption-in-if-operator.
+//
+//                Subgraph0
+//            A0(1) A2_0(4)  A1_0(2)
+//             |      |      | ---+
+//             v      v      v    |
+//            +--------------+    |
+//            |     IF       |    |
+//            +------+-------+    |
+//                   | A3_0(3)      |
+//                   v            |
+//            +--------------+    |
+//            |    CUSTOM    |<---+
+//            +------+-------+
+//                   |
+//                   v
+//                    A4_0(8)
+//
+//                Subgraph1/2
+//              A1_1(2)      A2_1(4)
+//                 |         |
+//                 v         v
+//             +---------------+
+//             |   CUSTOM      |
+//             +-------+-------+
+//                     |
+//                     v A3_1(3)
+//
+// And it leads to memory plan as below
+//
+//                  Subgraph0 Layout
+//
+//
+//   <------------A4_0        -------------> <----- A2_0-------> <----A3_0 --->
+//  +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
+//  |    |    |    |    |    |    |    |    | 3  | 4  | 5  |  6 |    |    |    |
+//  +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
+//
+//  +----+----+----+
+//  | 1  |  2 | A0 |
+//  +----+----+----+
+//  <---A1_0-->
+//
+//                 Subgraph 1 Layout
+//
+//  +----+----+----+----+----+----+----+----+----+
+//  |    |    |    |    |    |    |    |    |    |
+//  +----+----+----+----+----+----+----+----+----+
+//
+//
+//  <------A2_1 -------><----A3_1  ---><--A1_1--->
+//
+//
+// A1_1 of subgraph 1 will overlap with A2_0 of subgraph 0.
+// In a buggy implementation of IF, two overwrite may happen:
+// 1. copying input from A1_0 to A1_1 overwrites A2_0 before A2_0 is copied to
+// A2_1; thus subgraph 1 produce incorrect output.
+// 2. copying output from A3_1 to A4_0 overwrites A1_0, which should remain
+// intact so that it can be used by the OP after the IF operator in subgraph 0
+//
+
+const Model* BuildModelWithIfAndSubgraphInputTensorOverlap() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+
+  constexpr TensorType kTensorType = TensorType_INT32;
+  constexpr int kBlockSize =
+      tflite::MicroArenaBufferAlignment() / sizeof(int32_t);
+  constexpr size_t kBuffersCount = 1;
+  const Offset<Buffer> buffers[kBuffersCount] = {
+      CreateBuffer(*builder),
+  };
+  const int32_t kConditionTensorShape[] = {1};
+  const int32_t kIfInput1TensorShape[] = {2 * kBlockSize};
+  const int32_t kIfInput2TensorShape[] = {4 * kBlockSize};
+  const int32_t kIfOutputTensorShape[] = {3 * kBlockSize};
+  const int32_t kFinalOutputTensorShape[] = {8 * kBlockSize};
+  constexpr size_t kSubgraph0TensorsCount = 5;
+  const Offset<Tensor> kSubgraph0Tensors[kSubgraph0TensorsCount] = {
+      CreateTensor(*builder, builder->CreateVector(kConditionTensorShape, 1),
+                   TensorType_BOOL, 0,
+                   builder->CreateString("condition tensor"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(kIfInput1TensorShape, 1),
+                   kTensorType, 0, builder->CreateString("if_input_tensor1"), 0,
+                   false),
+      CreateTensor(*builder, builder->CreateVector(kIfInput2TensorShape, 1),
+                   kTensorType, 0, builder->CreateString("if_input_tensor2"), 0,
+                   false),
+      CreateTensor(*builder, builder->CreateVector(kIfOutputTensorShape, 1),
+                   kTensorType, 0, builder->CreateString("if_output_tensor"), 0,
+                   false),
+      CreateTensor(*builder, builder->CreateVector(kFinalOutputTensorShape, 1),
+                   kTensorType, 0, builder->CreateString("final_output_tensor"),
+                   0, false),
+  };
+
+  // Subgraph 1 is the chosen path if condition tensor in IF is true.
+  constexpr size_t kSubgraph1TensorsCount = 3;
+  const Offset<Tensor> kSubgraph1Tensors[kSubgraph1TensorsCount] = {
+      CreateTensor(*builder, builder->CreateVector(kIfInput1TensorShape, 1),
+                   kTensorType, 0,
+                   builder->CreateString("subgraph1_input_tensor1"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(kIfInput2TensorShape, 1),
+                   kTensorType, 0,
+                   builder->CreateString("subgraph1_input_tensor2"), 0, false),
+      CreateTensor(*builder, builder->CreateVector(kIfOutputTensorShape, 1),
+                   kTensorType, 0,
+                   builder->CreateString("subgraph1_output_tensor"), 0, false),
+  };
+
+  // Subgraph 2 is the chosen path if condition tensor in IF is false
+  constexpr size_t kSubgraph2TensorsCount = 3;
+  const Offset<Tensor> kSubgraph2Tensors[kSubgraph2TensorsCount] = {
+      CreateTensor(*builder, builder->CreateVector(kIfInput1TensorShape, 1),
+                   kTensorType, 0, builder->CreateString("if_input_tensor1"), 0,
+                   false),
+      CreateTensor(*builder, builder->CreateVector(kIfInput2TensorShape, 1),
+                   kTensorType, 0, builder->CreateString("if_input_tensor2"), 0,
+                   false),
+      CreateTensor(*builder, builder->CreateVector(kIfOutputTensorShape, 1),
+                   kTensorType, 0, builder->CreateString("if_output_tensor"), 0,
+                   false),
+  };
+
+  constexpr int kIfOpCodeIndex = 0;
+  constexpr int kCustomOpCodeIndex = 1;
+
+  constexpr size_t kIfInputsCount = 3;
+  const int32_t kIfInputs[kIfInputsCount] = {0, 1, 2};
+  constexpr size_t kOutputsCount = 1;
+  const int32_t kIfOutputs[kOutputsCount] = {3};
+  constexpr size_t kOpAfterIfInputsCount = 2;
+  const int32_t kOpAfterIfInputs[kOpAfterIfInputsCount] = {3, 2};
+  const int32_t kOpAfterIfOutputs[kOutputsCount] = {4};
+  constexpr size_t kOperatorsCount = 2;
+  const Offset<Operator> kSubgraph0Operators[kOperatorsCount] = {
+      CreateOperator(*builder, kIfOpCodeIndex,
+                     builder->CreateVector(kIfInputs, kIfInputsCount),
+                     builder->CreateVector(kIfOutputs, kOutputsCount),
+                     BuiltinOptions_IfOptions,
+                     CreateIfOptions(*builder, 1, 2).Union()),
+      CreateOperator(
+          *builder, kCustomOpCodeIndex,
+          builder->CreateVector(kOpAfterIfInputs, kOpAfterIfInputsCount),
+          builder->CreateVector(kOpAfterIfOutputs, kOutputsCount)),
+  };
+
+  constexpr size_t kSubgraph1InputsCount = 2;
+  const int32_t kSubgraph1Inputs[kSubgraph1InputsCount] = {0, 1};
+  constexpr size_t kSubgraph1OutputsCount = 1;
+  const int32_t kSubgraph1Outputs[kSubgraph1OutputsCount] = {2};
+  constexpr size_t kSubgraph1OperatorsCount = 1;
+  const Offset<Operator> kSubgraph1Operators[kSubgraph1OperatorsCount] = {
+      CreateOperator(
+          *builder, kCustomOpCodeIndex,
+          builder->CreateVector(kSubgraph1Inputs, kSubgraph1InputsCount),
+          builder->CreateVector(kSubgraph1Outputs, kSubgraph1OutputsCount),
+          BuiltinOptions_NONE),
+  };
+
+  constexpr size_t kSubgraph2InputsCount = 2;
+  const int32_t kSubgraph2Inputs[kSubgraph2InputsCount] = {0, 1};
+  constexpr size_t kSubgraph2OutputsCount = 1;
+  const int32_t kSubgraph2Outputs[kSubgraph2OutputsCount] = {2};
+  constexpr size_t kSubgraph2OperatorsCount = 1;
+  const Offset<Operator> kSubgraph2Operators[kSubgraph2OperatorsCount] = {
+      CreateOperator(
+          *builder, kCustomOpCodeIndex,
+          builder->CreateVector(kSubgraph2Inputs, kSubgraph2InputsCount),
+          builder->CreateVector(kSubgraph2Outputs, kSubgraph2OutputsCount),
+          BuiltinOptions_NONE),
+  };
+
+  constexpr size_t kSubgraphsCount = 3;
+  const Offset<SubGraph> kSubgraphs[kSubgraphsCount] = {
+      CreateSubGraph(
+          *builder,
+          builder->CreateVector(kSubgraph0Tensors, kSubgraph0TensorsCount),
+          builder->CreateVector(kIfInputs, kIfInputsCount),
+          builder->CreateVector(kOpAfterIfOutputs, kOutputsCount),
+          builder->CreateVector(kSubgraph0Operators, kOperatorsCount),
+          builder->CreateString("if_subgraph")),
+      CreateSubGraph(
+          *builder,
+          builder->CreateVector(kSubgraph1Tensors, kSubgraph1TensorsCount),
+          builder->CreateVector(kSubgraph1Inputs, kSubgraph1InputsCount),
+          builder->CreateVector(kSubgraph1Outputs, kSubgraph1OutputsCount),
+          builder->CreateVector(kSubgraph1Operators, kSubgraph1OperatorsCount),
+          builder->CreateString("then_subgraph")),
+      CreateSubGraph(
+          *builder,
+          builder->CreateVector(kSubgraph2Tensors, kSubgraph2TensorsCount),
+          builder->CreateVector(kSubgraph2Inputs, kSubgraph2InputsCount),
+          builder->CreateVector(kSubgraph2Outputs, kSubgraph2OutputsCount),
+          builder->CreateVector(kSubgraph2Operators, kSubgraph2OperatorsCount),
+          builder->CreateString("else_subgraph")),
+  };
+
+  constexpr size_t kOperatorCodesCount = 2;
+  const Offset<OperatorCode> kOperatorCodes[kOperatorCodesCount] = {
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, "if",
+                               /*version=*/0, BuiltinOperator_IF),
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "custom_packer_op",
+                               /*version=*/0, BuiltinOperator_CUSTOM),
+  };
+  const Offset<Model> kModelOffset = CreateModel(
+      *builder, 0, builder->CreateVector(kOperatorCodes, kOperatorCodesCount),
+      builder->CreateVector(kSubgraphs, kSubgraphsCount),
+      builder->CreateString("test_model"),
+      builder->CreateVector(buffers, kBuffersCount));
+  FinishModelBuffer(*builder, kModelOffset);
+  void* model_pointer = builder->GetBufferPointer();
+  const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
+  return model;
+}
+
+// Mock model with one main subgraph containing a single CALL_ONCE op (with null
+// inputs and outputs) which invokes a second subgraph which has null inputs and
+// outputs.
+const Model* BuildSimpleMockModelWithNullInputsOutputs() {
+  using flatbuffers::Offset;
+  flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+
+  constexpr size_t buffers_size = 1;
+  const Offset<Buffer> buffers[buffers_size] = {
+      CreateBuffer(*builder),
+  };
+  constexpr size_t tensor_shape_size = 1;
+  const int32_t tensor_shape[tensor_shape_size] = {0};
+  constexpr size_t tensors_size = 1;
+  const Offset<Tensor> tensors[tensors_size] = {
+      CreateTensor(*builder,
+                   builder->CreateVector(tensor_shape, tensor_shape_size),
+                   TensorType_INT32, 0,
+                   builder->CreateString("test_input_tensor1"), 0, false),
+  };
+  constexpr size_t subgraph0_inputs_size = 1;
+  const int32_t subgraph0_inputs[subgraph0_inputs_size] = {0};
+  constexpr size_t subgraph0_outputs_size = 1;
+  const int32_t subgraph0_outputs[subgraph0_outputs_size] = {0};
+  constexpr size_t operators_size = 1;
+  const Offset<Operator> subgraph0_operators[operators_size] = {
+      CreateOperator(*builder, 0, {}, {}, BuiltinOptions_CallOnceOptions,
+                     CreateCallOnceOptions(*builder, 1).Union()),
+  };
+  const Offset<Operator> subgraph1_operators[operators_size] = {
+      CreateOperator(*builder, 1, {}, {}, BuiltinOptions_NONE)};
+  constexpr size_t subgraphs_size = 2;
+  const Offset<SubGraph> subgraphs[subgraphs_size] = {
+      CreateSubGraph(
+          *builder, builder->CreateVector(tensors, tensors_size),
+          builder->CreateVector(subgraph0_inputs, subgraph0_inputs_size),
+          builder->CreateVector(subgraph0_outputs, subgraph0_outputs_size),
+          builder->CreateVector(subgraph0_operators, operators_size),
+          builder->CreateString("main_subgraph")),
+      CreateSubGraph(*builder, builder->CreateVector(tensors, tensors_size), {},
+                     {},
+                     builder->CreateVector(subgraph1_operators, operators_size),
+                     builder->CreateString("secondary subgraph")),
+  };
+  constexpr size_t operator_codes_size = 2;
+  const Offset<OperatorCode> operator_codes[operator_codes_size] = {
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0,
+                               "call_once_op",
+                               /*version=*/0, BuiltinOperator_CALL_ONCE),
+      CreateOperatorCodeDirect(*builder, /*deprecated_builtin_code=*/0, "no_op",
+                               /*version=*/0, BuiltinOperator_CUSTOM)};
+  const Offset<Model> model_offset = CreateModel(
+      *builder, 0, builder->CreateVector(operator_codes, operator_codes_size),
+      builder->CreateVector(subgraphs, subgraphs_size),
+      builder->CreateString("test_model"),
+      builder->CreateVector(buffers, buffers_size));
+  FinishModelBuffer(*builder, model_offset);
+  void* model_pointer = builder->GetBufferPointer();
+  const Model* model = flatbuffers::GetRoot<Model>(model_pointer);
+  return model;
+}
+
 }  // namespace
 
 const TfLiteRegistration* SimpleStatefulOp::getRegistration() {
@@ -669,9 +1460,12 @@ TfLiteStatus SimpleStatefulOp::Prepare(TfLiteContext* context,
   OpData* data = reinterpret_cast<OpData*>(node->user_data);
 
   // Make sure that the input is in uint8_t with at least 1 data entry.
-  const TfLiteTensor* input;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
-  if (input->type != kTfLiteUInt8) return kTfLiteError;
+  MicroContext* micro_context = GetMicroContext(context);
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+
+  if (input->type != kTfLiteInt8) return kTfLiteError;
   if (NumElements(input->dims) == 0) return kTfLiteError;
 
   // Allocate a temporary buffer with the same size of input for sorting.
@@ -683,6 +1477,7 @@ TfLiteStatus SimpleStatefulOp::Prepare(TfLiteContext* context,
       context->AllocatePersistentBuffer(context, sizeof(int)));
   *data->invoke_count = 0;
 
+  micro_context->DeallocateTempTfLiteTensor(input);
   return kTfLiteOk;
 }
 
@@ -691,9 +1486,10 @@ TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context,
   OpData* data = reinterpret_cast<OpData*>(node->user_data);
   *data->invoke_count += 1;
 
-  const TfLiteTensor* input;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
-  const uint8_t* input_data = GetTensorData<uint8_t>(input);
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
+  const uint8_t* input_data = input->data.uint8;
   int size = NumElements(input->dims);
 
   uint8_t* sorting_buffer = reinterpret_cast<uint8_t*>(
@@ -711,14 +1507,14 @@ TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context,
     }
   }
 
-  TfLiteTensor* median;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kMedianTensor, &median));
-  uint8_t* median_data = GetTensorData<uint8_t>(median);
-  TfLiteTensor* invoke_count;
-  TF_LITE_ENSURE_OK(context,
-                    GetOutputSafe(context, node, kInvokeCount, &invoke_count));
-  int32_t* invoke_count_data = GetTensorData<int32_t>(invoke_count);
+  TfLiteEvalTensor* median =
+      tflite::micro::GetEvalOutput(context, node, kMedianTensor);
+  TF_LITE_ENSURE(context, median != nullptr);
+  uint8_t* median_data = median->data.uint8;
+  TfLiteEvalTensor* invoke_count =
+      tflite::micro::GetEvalOutput(context, node, kInvokeCount);
+  TF_LITE_ENSURE(context, invoke_count != nullptr);
+  int32_t* invoke_count_data = invoke_count->data.i32;
 
   median_data[0] = sorting_buffer[size / 2];
   invoke_count_data[0] = *data->invoke_count;
@@ -755,14 +1551,15 @@ TfLiteStatus MockCustom::Prepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 TfLiteStatus MockCustom::Invoke(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* input;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
+  TF_LITE_ENSURE(context, input != nullptr);
   const int32_t* input_data = input->data.i32;
-  const TfLiteTensor* weight;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &weight));
+  const TfLiteEvalTensor* weight =
+      tflite::micro::GetEvalInput(context, node, 1);
+  TF_LITE_ENSURE(context, weight != nullptr);
   const uint8_t* weight_data = weight->data.uint8;
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+  TF_LITE_ENSURE(context, output != nullptr);
   int32_t* output_data = output->data.i32;
   output_data[0] =
       0;  // Catch output tensor sharing memory with an input tensor
@@ -804,18 +1601,20 @@ TfLiteStatus MultipleInputs::Prepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 TfLiteStatus MultipleInputs::Invoke(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* input;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
+  TF_LITE_ENSURE(context, input != nullptr);
   const int32_t* input_data = input->data.i32;
-  const TfLiteTensor* input1;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &input1));
+  const TfLiteEvalTensor* input1 =
+      tflite::micro::GetEvalInput(context, node, 1);
+  TF_LITE_ENSURE(context, input1 != nullptr);
   const int32_t* input_data1 = input1->data.i32;
-  const TfLiteTensor* input2;
-  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 2, &input2));
+  const TfLiteEvalTensor* input2 =
+      tflite::micro::GetEvalInput(context, node, 2);
+  TF_LITE_ENSURE(context, input2 != nullptr);
   const int32_t* input_data2 = input2->data.i32;
 
-  TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+  TF_LITE_ENSURE(context, output != nullptr);
   int32_t* output_data = output->data.i32;
   output_data[0] =
       0;  // Catch output tensor sharing memory with an input tensor
@@ -825,6 +1624,40 @@ TfLiteStatus MultipleInputs::Invoke(TfLiteContext* context, TfLiteNode* node) {
 
 bool MultipleInputs::freed_ = false;
 
+const TfLiteRegistration* NoOp::getRegistration() {
+  return GetMutableRegistration();
+}
+
+TfLiteRegistration* NoOp::GetMutableRegistration() {
+  static TfLiteRegistration r;
+  r.init = Init;
+  r.prepare = Prepare;
+  r.invoke = Invoke;
+  r.free = Free;
+  return &r;
+}
+
+void* NoOp::Init(TfLiteContext* context, const char* buffer, size_t length) {
+  // We don't support delegate in TFL micro. This is a weak check to test if
+  // context struct being zero-initialized.
+  TFLITE_DCHECK(context->ReplaceNodeSubsetsWithDelegateKernels == nullptr);
+  freed_ = false;
+  // Do nothing.
+  return nullptr;
+}
+
+void NoOp::Free(TfLiteContext* context, void* buffer) { freed_ = true; }
+
+TfLiteStatus NoOp::Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TfLiteStatus NoOp::Invoke(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+bool NoOp::freed_ = false;
+
 AllOpsResolver GetOpResolver() {
   AllOpsResolver op_resolver;
   op_resolver.AddCustom("mock_custom", MockCustom::GetMutableRegistration());
@@ -832,9 +1665,32 @@ AllOpsResolver GetOpResolver() {
                         SimpleStatefulOp::GetMutableRegistration());
   op_resolver.AddCustom("multiple_inputs_op",
                         MultipleInputs::GetMutableRegistration());
+  op_resolver.AddCustom("no_op", NoOp::GetMutableRegistration());
+  op_resolver.AddCustom("custom_packer_op", PackerOp::GetMutableRegistration());
   return op_resolver;
 }
 
+const Model* GetModelWithUnusedInputs() {
+  static Model* model = nullptr;
+  if (!model) {
+    model = const_cast<Model*>(BuildModelWithUnusedInputs());
+  }
+  return model;
+}
+
+const Model* GetModelWithUnusedOperatorOutputs() {
+  static Model* model = nullptr;
+  if (!model) {
+    model = const_cast<Model*>(BuildModelWithUnusedOperatorOutputs());
+  }
+  return model;
+}
+
+const Model* GetModelWith256x256Tensor() {
+  static const Model* model = BuildModelWith256x256Tensor();
+  return model;
+}
+
 const Model* GetSimpleMockModel() {
   static Model* model = nullptr;
   if (!model) {
@@ -851,6 +1707,46 @@ const Model* GetSimpleMultipleInputsModel() {
   return model;
 }
 
+const Model* GetSimpleModelWithSubgraphsAndIf() {
+  static Model* model = nullptr;
+  if (!model) {
+    model = const_cast<Model*>(BuildSimpleModelWithSubgraphsAndIf());
+  }
+  return model;
+}
+
+const Model* GetSimpleModelWithIfAndEmptySubgraph() {
+  static Model* model = nullptr;
+  if (!model) {
+    model = const_cast<Model*>(BuildSimpleModelWithIfAndEmptySubgraph());
+  }
+  return model;
+}
+
+const Model* GetSimpleModelWithSubgraphsAndWhile() {
+  static Model* model = nullptr;
+  if (!model) {
+    model = const_cast<Model*>(BuildSimpleModelWithSubgraphsAndWhile());
+  }
+  return model;
+}
+
+const Model* GetModelWithIfAndSubgraphInputTensorOverlap() {
+  static Model* model = nullptr;
+  if (!model) {
+    model = const_cast<Model*>(BuildModelWithIfAndSubgraphInputTensorOverlap());
+  }
+  return model;
+}
+
+const Model* GetSimpleModelWithNullInputsAndOutputs() {
+  static Model* model = nullptr;
+  if (!model) {
+    model = const_cast<Model*>(BuildSimpleMockModelWithNullInputsOutputs());
+  }
+  return model;
+}
+
 const Model* GetComplexMockModel() {
   static Model* model = nullptr;
   if (!model) {
@@ -903,13 +1799,21 @@ const Tensor* Create1dFlatbufferTensor(int size, bool is_variable) {
 const Tensor* CreateQuantizedFlatbufferTensor(int size) {
   using flatbuffers::Offset;
   flatbuffers::FlatBufferBuilder* builder = BuilderInstance();
+  constexpr size_t quant_params_size = 1;
+  const float min_array[quant_params_size] = {0.1f};
+  const float max_array[quant_params_size] = {0.2f};
+  const float scale_array[quant_params_size] = {0.3f};
+  const int64_t zero_point_array[quant_params_size] = {100ll};
+
   const Offset<QuantizationParameters> quant_params =
       CreateQuantizationParameters(
           *builder,
-          /*min=*/builder->CreateVector<float>({0.1f}),
-          /*max=*/builder->CreateVector<float>({0.2f}),
-          /*scale=*/builder->CreateVector<float>({0.3f}),
-          /*zero_point=*/builder->CreateVector<int64_t>({100ll}));
+          /*min=*/builder->CreateVector<float>(min_array, quant_params_size),
+          /*max=*/builder->CreateVector<float>(max_array, quant_params_size),
+          /*scale=*/
+          builder->CreateVector<float>(scale_array, quant_params_size),
+          /*zero_point=*/
+          builder->CreateVector<int64_t>(zero_point_array, quant_params_size));
 
   constexpr size_t tensor_shape_size = 1;
   const int32_t tensor_shape[tensor_shape_size] = {size};
@@ -971,22 +1875,10 @@ int TestStrcmp(const char* a, const char* b) {
          *reinterpret_cast<const unsigned char*>(b);
 }
 
-// Wrapper to forward kernel errors to the interpreter's error reporter.
-void ReportOpError(struct TfLiteContext* context, const char* format, ...) {
-#ifndef TF_LITE_STRIP_ERROR_STRINGS
-  ErrorReporter* error_reporter = static_cast<ErrorReporter*>(context->impl_);
-  va_list args;
-  va_start(args, format);
-  TF_LITE_REPORT_ERROR(error_reporter, format, args);
-  va_end(args);
-#endif
-}
-
 // Create a TfLiteIntArray from an array of ints.  The first element in the
 // supplied array must be the size of the array expressed as an int.
-TfLiteIntArray* IntArrayFromInts(const int* int_array) {
-  return const_cast<TfLiteIntArray*>(
-      reinterpret_cast<const TfLiteIntArray*>(int_array));
+TfLiteIntArray* IntArrayFromInts(int* int_array) {
+  return reinterpret_cast<TfLiteIntArray*>(int_array);
 }
 
 // Create a TfLiteFloatArray from an array of floats.  The first element in the
@@ -999,6 +1891,20 @@ TfLiteFloatArray* FloatArrayFromFloats(const float* floats) {
   return reinterpret_cast<TfLiteFloatArray*>(const_cast<float*>(floats));
 }
 
+TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized,
+                                       TfLiteIntArray* dims, float input_scale,
+                                       float weights_scale, bool is_variable) {
+  float bias_scale = input_scale * weights_scale;
+  tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale);
+
+  // Quantized int16_t tensors always have a zero point of 0, since the range of
+  // int16_t values is large, and because zero point costs extra cycles during
+  // processing.
+  TfLiteTensor result =
+      CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable);
+  return result;
+}
+
 TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
                                        TfLiteIntArray* dims, float input_scale,
                                        float weights_scale, bool is_variable) {
@@ -1013,11 +1919,27 @@ TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
   return result;
 }
 
+TfLiteTensor CreateQuantizedBiasTensor(const float* data,
+                                       std::int64_t* quantized,
+                                       TfLiteIntArray* dims, float input_scale,
+                                       float weights_scale, bool is_variable) {
+  float bias_scale = input_scale * weights_scale;
+  tflite::SymmetricQuantize(data, quantized, ElementCount(*dims), bias_scale);
+
+  // Quantized int32_t tensors always have a zero point of 0, since the range of
+  // int32_t values is large, and because zero point costs extra cycles during
+  // processing.
+  TfLiteTensor result =
+      CreateQuantizedTensor(quantized, dims, bias_scale, 0, is_variable);
+  return result;
+}
+
 // Quantizes int32_t bias tensor with per-channel weights determined by input
 // scale multiplied by weight scale for each channel.
+template <typename T>
 TfLiteTensor CreatePerChannelQuantizedBiasTensor(
-    const float* input, int32_t* quantized, TfLiteIntArray* dims,
-    float input_scale, float* weight_scales, float* scales, int* zero_points,
+    const float* input, T* quantized, TfLiteIntArray* dims, float input_scale,
+    float* weight_scales, float* scales, int* zero_points,
     TfLiteAffineQuantization* affine_quant, int quantized_dimension,
     bool is_variable) {
   int input_size = ElementCount(*dims);
@@ -1031,8 +1953,8 @@ TfLiteTensor CreatePerChannelQuantizedBiasTensor(
     zero_points[i + 1] = 0;
   }
 
-  SymmetricPerChannelQuantize<int32_t>(input, quantized, input_size,
-                                       num_channels, scales_array);
+  SymmetricPerChannelQuantize<T>(input, quantized, input_size, num_channels,
+                                 scales_array);
 
   affine_quant->scale = FloatArrayFromFloats(scales);
   affine_quant->zero_point = IntArrayFromInts(zero_points);
@@ -1043,16 +1965,37 @@ TfLiteTensor CreatePerChannelQuantizedBiasTensor(
   return result;
 }
 
+TfLiteTensor CreatePerChannelQuantizedBiasTensor(
+    const float* input, int32_t* quantized, TfLiteIntArray* dims,
+    float input_scale, float* weight_scales, float* scales, int* zero_points,
+    TfLiteAffineQuantization* affine_quant, int quantized_dimension,
+    bool is_variable) {
+  return CreatePerChannelQuantizedBiasTensor<int32_t>(
+      input, quantized, dims, input_scale, weight_scales, scales, zero_points,
+      affine_quant, quantized_dimension, is_variable);
+}
+
+TfLiteTensor CreatePerChannelQuantizedBiasTensor(
+    const float* input, std::int64_t* quantized, TfLiteIntArray* dims,
+    float input_scale, float* weight_scales, float* scales, int* zero_points,
+    TfLiteAffineQuantization* affine_quant, int quantized_dimension,
+    bool is_variable) {
+  return CreatePerChannelQuantizedBiasTensor<std::int64_t>(
+      input, quantized, dims, input_scale, weight_scales, scales, zero_points,
+      affine_quant, quantized_dimension, is_variable);
+}
+
 TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
     const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
     int* zero_points, TfLiteAffineQuantization* affine_quant,
-    int quantized_dimension, bool is_variable) {
+    int quantized_dimension, bool is_variable, TfLiteType tensor_weight_type) {
   int channel_count = dims->data[quantized_dimension];
+
   scales[0] = static_cast<float>(channel_count);
   zero_points[0] = channel_count;
 
   SignedSymmetricPerChannelQuantize(input, dims, quantized_dimension, quantized,
-                                    &scales[1]);
+                                    &scales[1], tensor_weight_type);
 
   for (int i = 0; i < channel_count; i++) {
     zero_points[i + 1] = 0;
@@ -1061,8 +2004,8 @@ TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
   affine_quant->scale = FloatArrayFromFloats(scales);
   affine_quant->zero_point = IntArrayFromInts(zero_points);
   affine_quant->quantized_dimension = quantized_dimension;
-
-  TfLiteTensor result = CreateTensor(quantized, dims, is_variable);
+  TfLiteTensor result =
+      CreateTensor(quantized, dims, is_variable, tensor_weight_type);
   result.quantization = {kTfLiteAffineQuantization, affine_quant};
   return result;
 }
@@ -1075,5 +2018,18 @@ size_t GetModelTensorCount(const Model* model) {
   return 0;
 }
 
+void PackInt4ValuesDenselyInPlace(uint8_t* src_buffer, int buffer_size) {
+  for (int i = 0; i < buffer_size; ++i) {
+    if (i % 2 == 0) {
+      src_buffer[i / 2] = src_buffer[i] & 0x0F;
+    } else {
+      src_buffer[i / 2] |= src_buffer[i] << 4;
+    }
+  }
+  // the rest of the buffer should be empty since half of it is packed with the
+  // values
+  memset(src_buffer + (buffer_size + 1) / 2, 0, buffer_size / 2);
+}
+
 }  // namespace testing
 }  // namespace tflite
diff --git a/edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h b/edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h
index 03c0872..544181d 100644
--- a/edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h
+++ b/edge-impulse-sdk/tensorflow/lite/micro/test_helpers.h
@@ -16,15 +16,13 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_
 #define TENSORFLOW_LITE_MICRO_TEST_HELPERS_H_
 
-// Useful functions for writing tests.
-
 #include <cstdint>
 #include <limits>
 
 #include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"  // from @flatbuffers
-#include "edge-impulse-sdk/tensorflow/lite//kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/c/common.h"
 #include "edge-impulse-sdk/tensorflow/lite/kernels/internal/compatibility.h"
+#include "edge-impulse-sdk/tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/all_ops_resolver.h"
 #include "edge-impulse-sdk/tensorflow/lite/micro/micro_utils.h"
 #include "edge-impulse-sdk/tensorflow/lite/portable_type_to_tflitetype.h"
@@ -90,6 +88,19 @@ class MultipleInputs {
   static bool freed_;
 };
 
+// A simple no-op operator.
+class NoOp {
+ public:
+  static const TfLiteRegistration* getRegistration();
+  static TfLiteRegistration* GetMutableRegistration();
+  static void* Init(TfLiteContext* context, const char* buffer, size_t length);
+  static void Free(TfLiteContext* context, void* buffer);
+  static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node);
+  static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node);
+
+  static bool freed_;
+};
+
 // Returns an Op Resolver that can be used in the testing code.
 AllOpsResolver GetOpResolver();
 
@@ -101,6 +112,12 @@ const Model* GetSimpleMockModel();
 // tensors, and operators.
 const Model* GetComplexMockModel();
 
+// Returns a simple example flatbuffer TensorFlow Lite model. Contains 1 input,
+// 1 layer of weights, 1 output Tensor, and 1 operator.
+// The size of all three tensors is 256 x 256, which is larger than what other
+// models provide from this test helper.
+const Model* GetModelWith256x256Tensor();
+
 // Returns a simple flatbuffer model with two branches.
 const Model* GetSimpleModelWithBranch();
 
@@ -126,9 +143,33 @@ const Model* GetModelWithOfflinePlanning(int num_tensors,
                                          int num_conns,
                                          int num_subgraph_inputs = 0);
 
+// Returns a flatbuffer with a single operator, two inputs (one unused) and one
+// output.
+const Model* GetModelWithUnusedInputs();
+
+// Returns a flatbuffer with a single operator, zero inputs and two outputs
+// (one unused).
+const Model* GetModelWithUnusedOperatorOutputs();
+
 // Returns a flatbuffer model with `simple_stateful_op`
 const Model* GetSimpleStatefulModel();
 
+// Returns a flatbuffer model with "if" and two subgraphs.
+const Model* GetSimpleModelWithSubgraphsAndIf();
+
+// Returns a flatbuffer model with "if" and two subgraphs one of which is empty.
+const Model* GetSimpleModelWithIfAndEmptySubgraph();
+
+// Returns a flatbuffer model with "while" and three subgraphs.
+const Model* GetSimpleModelWithSubgraphsAndWhile();
+
+// Returns a flatbuffer model with "if" and two subgraphs and the input tensor 1
+// of "if" subgraph overlaps with the input tensor 2 of subgraph 1.
+const Model* GetModelWithIfAndSubgraphInputTensorOverlap();
+
+// Returns a flatbuffer model with null subgraph/operator inputs and outputs.
+const Model* GetSimpleModelWithNullInputsAndOutputs();
+
 // Builds a one-dimensional flatbuffer tensor of the given size.
 const Tensor* Create1dFlatbufferTensor(int size, bool is_variable = false);
 
@@ -146,45 +187,60 @@ CreateFlatbufferBuffers();
 // Performs a simple string comparison without requiring standard C library.
 int TestStrcmp(const char* a, const char* b);
 
-// Wrapper to forward kernel errors to the interpreter's error reporter.
-void ReportOpError(struct TfLiteContext* context, const char* format, ...);
-
 void PopulateContext(TfLiteTensor* tensors, int tensors_size,
                      TfLiteContext* context);
 
 // Create a TfLiteIntArray from an array of ints.  The first element in the
 // supplied array must be the size of the array expressed as an int.
-TfLiteIntArray* IntArrayFromInts(const int* int_array);
+TfLiteIntArray* IntArrayFromInts(int* int_array);
 
 // Create a TfLiteFloatArray from an array of floats.  The first element in the
 // supplied array must be the size of the array expressed as a float.
 TfLiteFloatArray* FloatArrayFromFloats(const float* floats);
 
+// Assumes that `src_tensor` is a buffer where each element is a 4-bit value
+// stored in 8-bit.
+// Returns a new buffer that is packed densely with 2 4-bit values in a byte.
+// The packing format is low-bits-first, i.e. the lower nibble of a byte is
+// filled first, followed by the upper nibble.
+void PackInt4ValuesDenselyInPlace(uint8_t* src_buffer, int buffer_size);
+
 template <typename T>
 TfLiteTensor CreateTensor(const T* data, TfLiteIntArray* dims,
-                          const bool is_variable = false) {
+                          const bool is_variable = false,
+                          TfLiteType type = kTfLiteNoType) {
   TfLiteTensor result;
   result.dims = dims;
   result.params = {};
   result.quantization = {kTfLiteNoQuantization, nullptr};
   result.is_variable = is_variable;
   result.allocation_type = kTfLiteMemNone;
-  result.type = typeToTfLiteType<T>();
-  // Const cast is used to allow passing in const and non-const arrays within a
-  // single CreateTensor method. A Const array should be used for immutable
-  // input tensors and non-const array should be used for mutable and output
-  // tensors.
   result.data.data = const_cast<T*>(data);
   result.quantization = {kTfLiteAffineQuantization, nullptr};
   result.bytes = ElementCount(*dims) * sizeof(T);
+  result.data.data = const_cast<T*>(data);
+
+  if (type == kTfLiteInt4) {
+    result.type = kTfLiteInt4;
+    PackInt4ValuesDenselyInPlace(tflite::GetTensorData<uint8_t>(&result),
+                                 ElementCount(*dims));
+    result.bytes = ((ElementCount(*dims) + 1) / 2);
+  } else {
+    // Const cast is used to allow passing in const and non-const arrays within
+    // a single CreateTensor method. A Const array should be used for immutable
+    // input tensors and non-const array should be used for mutable and output
+    // tensors.
+    result.type = typeToTfLiteType<T>();
+  }
   return result;
 }
 
 template <typename T>
 TfLiteTensor CreateQuantizedTensor(const T* data, TfLiteIntArray* dims,
                                    const float scale, const int zero_point = 0,
-                                   const bool is_variable = false) {
-  TfLiteTensor result = CreateTensor(data, dims, is_variable);
+                                   const bool is_variable = false,
+                                   TfLiteType type = kTfLiteNoType) {
+  TfLiteTensor result = CreateTensor(data, dims, is_variable, type);
   result.params = {scale, zero_point};
   result.quantization = {kTfLiteAffineQuantization, nullptr};
   return result;
@@ -193,17 +249,30 @@ TfLiteTensor CreateQuantizedTensor(const T* data, TfLiteIntArray* dims,
 template <typename T>
 TfLiteTensor CreateQuantizedTensor(const float* input, T* quantized,
                                    TfLiteIntArray* dims, float scale,
-                                   int zero_point, bool is_variable = false) {
+                                   int zero_point, bool is_variable = false,
+                                   TfLiteType type = kTfLiteNoType) {
   int input_size = ElementCount(*dims);
   tflite::Quantize(input, quantized, input_size, scale, zero_point);
-  return CreateQuantizedTensor(quantized, dims, scale, zero_point, is_variable);
+  return CreateQuantizedTensor(quantized, dims, scale, zero_point, is_variable,
+                               type);
 }
 
+TfLiteTensor CreateQuantizedBiasTensor(const float* data, int16_t* quantized,
+                                       TfLiteIntArray* dims, float input_scale,
+                                       float weights_scale,
+                                       bool is_variable = false);
+
 TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized,
                                        TfLiteIntArray* dims, float input_scale,
                                        float weights_scale,
                                        bool is_variable = false);
 
+TfLiteTensor CreateQuantizedBiasTensor(const float* data,
+                                       std::int64_t* quantized,
+                                       TfLiteIntArray* dims, float input_scale,
+                                       float weights_scale,
+                                       bool is_variable = false);
+
 // Quantizes int32_t bias tensor with per-channel weights determined by input
 // scale multiplied by weight scale for each channel.
 TfLiteTensor CreatePerChannelQuantizedBiasTensor(
@@ -212,10 +281,19 @@ TfLiteTensor CreatePerChannelQuantizedBiasTensor(
     TfLiteAffineQuantization* affine_quant, int quantized_dimension,
     bool is_variable = false);
 
+// Quantizes int64_t bias tensor with per-channel weights determined by input
+// scale multiplied by weight scale for each channel.
+TfLiteTensor CreatePerChannelQuantizedBiasTensor(
+    const float* input, std::int64_t* quantized, TfLiteIntArray* dims,
+    float input_scale, float* weight_scales, float* scales, int* zero_points,
+    TfLiteAffineQuantization* affine_quant, int quantized_dimension,
+    bool is_variable = false);
+
 TfLiteTensor CreateSymmetricPerChannelQuantizedTensor(
     const float* input, int8_t* quantized, TfLiteIntArray* dims, float* scales,
     int* zero_points, TfLiteAffineQuantization* affine_quant,
-    int quantized_dimension, bool is_variable = false);
+    int quantized_dimension, bool is_variable = false,
+    TfLiteType tensor_weight_type = kTfLiteNoType);
 
 // Returns the number of tensors in the default subgraph for a tflite::Model.
 size_t GetModelTensorCount(const Model* model);
diff --git a/edge-impulse-sdk/tensorflow/lite/portable_type_to_tflitetype.h b/edge-impulse-sdk/tensorflow/lite/portable_type_to_tflitetype.h
index 82cf1c0..28d2bf8 100644
--- a/edge-impulse-sdk/tensorflow/lite/portable_type_to_tflitetype.h
+++ b/edge-impulse-sdk/tensorflow/lite/portable_type_to_tflitetype.h
@@ -27,7 +27,7 @@ limitations under the License.
 
 #include <stdint.h>
 
-#include "edge-impulse-sdk/tensorflow/lite/c/common.h"
+#include "edge-impulse-sdk/tensorflow/lite/core/c/common.h"
 
 namespace tflite {
 
@@ -61,6 +61,7 @@ struct TfLiteTypeToType {};  // Specializations below
 MATCH_TYPE_AND_TFLITE_TYPE(int32_t, kTfLiteInt32);
 MATCH_TYPE_AND_TFLITE_TYPE(uint32_t, kTfLiteUInt32);
 MATCH_TYPE_AND_TFLITE_TYPE(int16_t, kTfLiteInt16);
+MATCH_TYPE_AND_TFLITE_TYPE(uint16_t, kTfLiteUInt16);
 MATCH_TYPE_AND_TFLITE_TYPE(int64_t, kTfLiteInt64);
 MATCH_TYPE_AND_TFLITE_TYPE(float, kTfLiteFloat32);
 MATCH_TYPE_AND_TFLITE_TYPE(unsigned char, kTfLiteUInt8);
diff --git a/edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h b/edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h
index f46e84d..416029f 100755
--- a/edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h
+++ b/edge-impulse-sdk/tensorflow/lite/schema/schema_generated.h
@@ -1,406 +1,578 @@
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// automatically generated by the FlatBuffers compiler, do not modify
-
-
-#ifndef FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_
-#define FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_
+#ifndef FLATBUFFERS_GENERATED_SCHEMA_SUPPL_TFLITE_H_
+#define FLATBUFFERS_GENERATED_SCHEMA_SUPPL_TFLITE_H_
 
 #include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"
 
+// Ensure the included flatbuffers.h is the same version as when this file was
+// generated, otherwise it may not be compatible.
+static_assert(FLATBUFFERS_VERSION_MAJOR == 2 &&
+              FLATBUFFERS_VERSION_MINOR == 0 &&
+              FLATBUFFERS_VERSION_REVISION == 6,
+             "Non-compatible flatbuffers version included");
+
 namespace tflite {
 
 struct CustomQuantization;
+struct CustomQuantizationBuilder;
 struct CustomQuantizationT;
 
 struct QuantizationParameters;
+struct QuantizationParametersBuilder;
 struct QuantizationParametersT;
 
 struct Int32Vector;
+struct Int32VectorBuilder;
 struct Int32VectorT;
 
 struct Uint16Vector;
+struct Uint16VectorBuilder;
 struct Uint16VectorT;
 
 struct Uint8Vector;
+struct Uint8VectorBuilder;
 struct Uint8VectorT;
 
 struct DimensionMetadata;
+struct DimensionMetadataBuilder;
 struct DimensionMetadataT;
 
 struct SparsityParameters;
+struct SparsityParametersBuilder;
 struct SparsityParametersT;
 
+struct VariantSubType;
+struct VariantSubTypeBuilder;
+struct VariantSubTypeT;
+
 struct Tensor;
+struct TensorBuilder;
 struct TensorT;
 
 struct Conv2DOptions;
+struct Conv2DOptionsBuilder;
 struct Conv2DOptionsT;
 
 struct Conv3DOptions;
+struct Conv3DOptionsBuilder;
 struct Conv3DOptionsT;
 
 struct Pool2DOptions;
+struct Pool2DOptionsBuilder;
 struct Pool2DOptionsT;
 
 struct DepthwiseConv2DOptions;
+struct DepthwiseConv2DOptionsBuilder;
 struct DepthwiseConv2DOptionsT;
 
 struct ConcatEmbeddingsOptions;
+struct ConcatEmbeddingsOptionsBuilder;
 struct ConcatEmbeddingsOptionsT;
 
 struct LSHProjectionOptions;
+struct LSHProjectionOptionsBuilder;
 struct LSHProjectionOptionsT;
 
 struct SVDFOptions;
+struct SVDFOptionsBuilder;
 struct SVDFOptionsT;
 
 struct RNNOptions;
+struct RNNOptionsBuilder;
 struct RNNOptionsT;
 
 struct SequenceRNNOptions;
+struct SequenceRNNOptionsBuilder;
 struct SequenceRNNOptionsT;
 
 struct BidirectionalSequenceRNNOptions;
+struct BidirectionalSequenceRNNOptionsBuilder;
 struct BidirectionalSequenceRNNOptionsT;
 
 struct FullyConnectedOptions;
+struct FullyConnectedOptionsBuilder;
 struct FullyConnectedOptionsT;
 
 struct SoftmaxOptions;
+struct SoftmaxOptionsBuilder;
 struct SoftmaxOptionsT;
 
 struct ConcatenationOptions;
+struct ConcatenationOptionsBuilder;
 struct ConcatenationOptionsT;
 
 struct AddOptions;
+struct AddOptionsBuilder;
 struct AddOptionsT;
 
 struct MulOptions;
+struct MulOptionsBuilder;
 struct MulOptionsT;
 
 struct L2NormOptions;
+struct L2NormOptionsBuilder;
 struct L2NormOptionsT;
 
 struct LocalResponseNormalizationOptions;
+struct LocalResponseNormalizationOptionsBuilder;
 struct LocalResponseNormalizationOptionsT;
 
 struct LSTMOptions;
+struct LSTMOptionsBuilder;
 struct LSTMOptionsT;
 
 struct UnidirectionalSequenceLSTMOptions;
+struct UnidirectionalSequenceLSTMOptionsBuilder;
 struct UnidirectionalSequenceLSTMOptionsT;
 
 struct BidirectionalSequenceLSTMOptions;
+struct BidirectionalSequenceLSTMOptionsBuilder;
 struct BidirectionalSequenceLSTMOptionsT;
 
 struct ResizeBilinearOptions;
+struct ResizeBilinearOptionsBuilder;
 struct ResizeBilinearOptionsT;
 
 struct ResizeNearestNeighborOptions;
+struct ResizeNearestNeighborOptionsBuilder;
 struct ResizeNearestNeighborOptionsT;
 
 struct CallOptions;
+struct CallOptionsBuilder;
 struct CallOptionsT;
 
 struct PadOptions;
+struct PadOptionsBuilder;
 struct PadOptionsT;
 
 struct PadV2Options;
+struct PadV2OptionsBuilder;
 struct PadV2OptionsT;
 
 struct ReshapeOptions;
+struct ReshapeOptionsBuilder;
 struct ReshapeOptionsT;
 
 struct SpaceToBatchNDOptions;
+struct SpaceToBatchNDOptionsBuilder;
 struct SpaceToBatchNDOptionsT;
 
 struct BatchToSpaceNDOptions;
+struct BatchToSpaceNDOptionsBuilder;
 struct BatchToSpaceNDOptionsT;
 
 struct SkipGramOptions;
+struct SkipGramOptionsBuilder;
 struct SkipGramOptionsT;
 
 struct SpaceToDepthOptions;
+struct SpaceToDepthOptionsBuilder;
 struct SpaceToDepthOptionsT;
 
 struct DepthToSpaceOptions;
+struct DepthToSpaceOptionsBuilder;
 struct DepthToSpaceOptionsT;
 
 struct SubOptions;
+struct SubOptionsBuilder;
 struct SubOptionsT;
 
 struct DivOptions;
+struct DivOptionsBuilder;
 struct DivOptionsT;
 
 struct TopKV2Options;
+struct TopKV2OptionsBuilder;
 struct TopKV2OptionsT;
 
 struct EmbeddingLookupSparseOptions;
+struct EmbeddingLookupSparseOptionsBuilder;
 struct EmbeddingLookupSparseOptionsT;
 
 struct GatherOptions;
+struct GatherOptionsBuilder;
 struct GatherOptionsT;
 
 struct TransposeOptions;
+struct TransposeOptionsBuilder;
 struct TransposeOptionsT;
 
 struct ExpOptions;
+struct ExpOptionsBuilder;
 struct ExpOptionsT;
 
 struct CosOptions;
+struct CosOptionsBuilder;
 struct CosOptionsT;
 
 struct ReducerOptions;
+struct ReducerOptionsBuilder;
 struct ReducerOptionsT;
 
 struct SqueezeOptions;
+struct SqueezeOptionsBuilder;
 struct SqueezeOptionsT;
 
 struct SplitOptions;
+struct SplitOptionsBuilder;
 struct SplitOptionsT;
 
 struct SplitVOptions;
+struct SplitVOptionsBuilder;
 struct SplitVOptionsT;
 
 struct StridedSliceOptions;
+struct StridedSliceOptionsBuilder;
 struct StridedSliceOptionsT;
 
 struct LogSoftmaxOptions;
+struct LogSoftmaxOptionsBuilder;
 struct LogSoftmaxOptionsT;
 
 struct CastOptions;
+struct CastOptionsBuilder;
 struct CastOptionsT;
 
 struct DequantizeOptions;
+struct DequantizeOptionsBuilder;
 struct DequantizeOptionsT;
 
 struct MaximumMinimumOptions;
+struct MaximumMinimumOptionsBuilder;
 struct MaximumMinimumOptionsT;
 
 struct TileOptions;
+struct TileOptionsBuilder;
 struct TileOptionsT;
 
 struct ArgMaxOptions;
+struct ArgMaxOptionsBuilder;
 struct ArgMaxOptionsT;
 
 struct ArgMinOptions;
+struct ArgMinOptionsBuilder;
 struct ArgMinOptionsT;
 
 struct GreaterOptions;
+struct GreaterOptionsBuilder;
 struct GreaterOptionsT;
 
 struct GreaterEqualOptions;
+struct GreaterEqualOptionsBuilder;
 struct GreaterEqualOptionsT;
 
 struct LessOptions;
+struct LessOptionsBuilder;
 struct LessOptionsT;
 
 struct LessEqualOptions;
+struct LessEqualOptionsBuilder;
 struct LessEqualOptionsT;
 
 struct NegOptions;
+struct NegOptionsBuilder;
 struct NegOptionsT;
 
 struct SelectOptions;
+struct SelectOptionsBuilder;
 struct SelectOptionsT;
 
 struct SliceOptions;
+struct SliceOptionsBuilder;
 struct SliceOptionsT;
 
 struct TransposeConvOptions;
+struct TransposeConvOptionsBuilder;
 struct TransposeConvOptionsT;
 
 struct ExpandDimsOptions;
+struct ExpandDimsOptionsBuilder;
 struct ExpandDimsOptionsT;
 
 struct SparseToDenseOptions;
+struct SparseToDenseOptionsBuilder;
 struct SparseToDenseOptionsT;
 
 struct EqualOptions;
+struct EqualOptionsBuilder;
 struct EqualOptionsT;
 
 struct NotEqualOptions;
+struct NotEqualOptionsBuilder;
 struct NotEqualOptionsT;
 
 struct ShapeOptions;
+struct ShapeOptionsBuilder;
 struct ShapeOptionsT;
 
 struct RankOptions;
+struct RankOptionsBuilder;
 struct RankOptionsT;
 
 struct PowOptions;
+struct PowOptionsBuilder;
 struct PowOptionsT;
 
 struct FakeQuantOptions;
+struct FakeQuantOptionsBuilder;
 struct FakeQuantOptionsT;
 
 struct PackOptions;
+struct PackOptionsBuilder;
 struct PackOptionsT;
 
 struct LogicalOrOptions;
+struct LogicalOrOptionsBuilder;
 struct LogicalOrOptionsT;
 
 struct OneHotOptions;
+struct OneHotOptionsBuilder;
 struct OneHotOptionsT;
 
 struct AbsOptions;
+struct AbsOptionsBuilder;
 struct AbsOptionsT;
 
 struct HardSwishOptions;
+struct HardSwishOptionsBuilder;
 struct HardSwishOptionsT;
 
 struct LogicalAndOptions;
+struct LogicalAndOptionsBuilder;
 struct LogicalAndOptionsT;
 
 struct LogicalNotOptions;
+struct LogicalNotOptionsBuilder;
 struct LogicalNotOptionsT;
 
 struct UnpackOptions;
+struct UnpackOptionsBuilder;
 struct UnpackOptionsT;
 
 struct FloorDivOptions;
+struct FloorDivOptionsBuilder;
 struct FloorDivOptionsT;
 
 struct SquareOptions;
+struct SquareOptionsBuilder;
 struct SquareOptionsT;
 
 struct ZerosLikeOptions;
+struct ZerosLikeOptionsBuilder;
 struct ZerosLikeOptionsT;
 
 struct FillOptions;
+struct FillOptionsBuilder;
 struct FillOptionsT;
 
 struct FloorModOptions;
+struct FloorModOptionsBuilder;
 struct FloorModOptionsT;
 
 struct RangeOptions;
+struct RangeOptionsBuilder;
 struct RangeOptionsT;
 
 struct LeakyReluOptions;
+struct LeakyReluOptionsBuilder;
 struct LeakyReluOptionsT;
 
 struct SquaredDifferenceOptions;
+struct SquaredDifferenceOptionsBuilder;
 struct SquaredDifferenceOptionsT;
 
 struct MirrorPadOptions;
+struct MirrorPadOptionsBuilder;
 struct MirrorPadOptionsT;
 
 struct UniqueOptions;
+struct UniqueOptionsBuilder;
 struct UniqueOptionsT;
 
 struct ReverseV2Options;
+struct ReverseV2OptionsBuilder;
 struct ReverseV2OptionsT;
 
 struct AddNOptions;
+struct AddNOptionsBuilder;
 struct AddNOptionsT;
 
 struct GatherNdOptions;
+struct GatherNdOptionsBuilder;
 struct GatherNdOptionsT;
 
 struct WhereOptions;
+struct WhereOptionsBuilder;
 struct WhereOptionsT;
 
 struct ReverseSequenceOptions;
+struct ReverseSequenceOptionsBuilder;
 struct ReverseSequenceOptionsT;
 
 struct MatrixDiagOptions;
+struct MatrixDiagOptionsBuilder;
 struct MatrixDiagOptionsT;
 
 struct QuantizeOptions;
+struct QuantizeOptionsBuilder;
 struct QuantizeOptionsT;
 
 struct MatrixSetDiagOptions;
+struct MatrixSetDiagOptionsBuilder;
 struct MatrixSetDiagOptionsT;
 
 struct IfOptions;
+struct IfOptionsBuilder;
 struct IfOptionsT;
 
 struct CallOnceOptions;
+struct CallOnceOptionsBuilder;
 struct CallOnceOptionsT;
 
 struct WhileOptions;
+struct WhileOptionsBuilder;
 struct WhileOptionsT;
 
 struct NonMaxSuppressionV4Options;
+struct NonMaxSuppressionV4OptionsBuilder;
 struct NonMaxSuppressionV4OptionsT;
 
 struct NonMaxSuppressionV5Options;
+struct NonMaxSuppressionV5OptionsBuilder;
 struct NonMaxSuppressionV5OptionsT;
 
 struct ScatterNdOptions;
+struct ScatterNdOptionsBuilder;
 struct ScatterNdOptionsT;
 
 struct SelectV2Options;
+struct SelectV2OptionsBuilder;
 struct SelectV2OptionsT;
 
 struct DensifyOptions;
+struct DensifyOptionsBuilder;
 struct DensifyOptionsT;
 
 struct SegmentSumOptions;
+struct SegmentSumOptionsBuilder;
 struct SegmentSumOptionsT;
 
 struct BatchMatMulOptions;
+struct BatchMatMulOptionsBuilder;
 struct BatchMatMulOptionsT;
 
 struct CumsumOptions;
+struct CumsumOptionsBuilder;
 struct CumsumOptionsT;
 
 struct BroadcastToOptions;
+struct BroadcastToOptionsBuilder;
 struct BroadcastToOptionsT;
 
 struct Rfft2dOptions;
+struct Rfft2dOptionsBuilder;
 struct Rfft2dOptionsT;
 
 struct HashtableOptions;
+struct HashtableOptionsBuilder;
 struct HashtableOptionsT;
 
 struct HashtableFindOptions;
+struct HashtableFindOptionsBuilder;
 struct HashtableFindOptionsT;
 
 struct HashtableImportOptions;
+struct HashtableImportOptionsBuilder;
 struct HashtableImportOptionsT;
 
 struct HashtableSizeOptions;
+struct HashtableSizeOptionsBuilder;
 struct HashtableSizeOptionsT;
 
+struct VarHandleOptions;
+struct VarHandleOptionsBuilder;
+struct VarHandleOptionsT;
+
+struct ReadVariableOptions;
+struct ReadVariableOptionsBuilder;
+struct ReadVariableOptionsT;
+
+struct AssignVariableOptions;
+struct AssignVariableOptionsBuilder;
+struct AssignVariableOptionsT;
+
+struct RandomOptions;
+struct RandomOptionsBuilder;
+struct RandomOptionsT;
+
+struct BucketizeOptions;
+struct BucketizeOptionsBuilder;
+struct BucketizeOptionsT;
+
+struct GeluOptions;
+struct GeluOptionsBuilder;
+struct GeluOptionsT;
+
+struct DynamicUpdateSliceOptions;
+struct DynamicUpdateSliceOptionsBuilder;
+struct DynamicUpdateSliceOptionsT;
+
+struct UnsortedSegmentProdOptions;
+struct UnsortedSegmentProdOptionsBuilder;
+struct UnsortedSegmentProdOptionsT;
+
+struct UnsortedSegmentMaxOptions;
+struct UnsortedSegmentMaxOptionsBuilder;
+struct UnsortedSegmentMaxOptionsT;
+
+struct UnsortedSegmentSumOptions;
+struct UnsortedSegmentSumOptionsBuilder;
+struct UnsortedSegmentSumOptionsT;
+
+struct ATan2Options;
+struct ATan2OptionsBuilder;
+struct ATan2OptionsT;
+
+struct UnsortedSegmentMinOptions;
+struct UnsortedSegmentMinOptionsBuilder;
+struct UnsortedSegmentMinOptionsT;
+
+struct SignOptions;
+struct SignOptionsBuilder;
+struct SignOptionsT;
+
 struct OperatorCode;
+struct OperatorCodeBuilder;
 struct OperatorCodeT;
 
 struct Operator;
+struct OperatorBuilder;
 struct OperatorT;
 
 struct SubGraph;
+struct SubGraphBuilder;
 struct SubGraphT;
 
 struct Buffer;
+struct BufferBuilder;
 struct BufferT;
 
 struct Metadata;
+struct MetadataBuilder;
 struct MetadataT;
 
 struct TensorMap;
+struct TensorMapBuilder;
 struct TensorMapT;
 
 struct SignatureDef;
+struct SignatureDefBuilder;
 struct SignatureDefT;
 
 struct Model;
+struct ModelBuilder;
 struct ModelT;
 
-enum TensorType {
+enum TensorType : int8_t {
   TensorType_FLOAT32 = 0,
   TensorType_FLOAT16 = 1,
   TensorType_INT32 = 2,
@@ -417,11 +589,13 @@ enum TensorType {
   TensorType_RESOURCE = 13,
   TensorType_VARIANT = 14,
   TensorType_UINT32 = 15,
+  TensorType_UINT16 = 16,
+  TensorType_INT4 = 17,
   TensorType_MIN = TensorType_FLOAT32,
-  TensorType_MAX = TensorType_UINT32
+  TensorType_MAX = TensorType_INT4
 };
 
-inline const TensorType (&EnumValuesTensorType())[16] {
+inline const TensorType (&EnumValuesTensorType())[18] {
   static const TensorType values[] = {
     TensorType_FLOAT32,
     TensorType_FLOAT16,
@@ -438,13 +612,15 @@ inline const TensorType (&EnumValuesTensorType())[16] {
     TensorType_UINT64,
     TensorType_RESOURCE,
     TensorType_VARIANT,
-    TensorType_UINT32
+    TensorType_UINT32,
+    TensorType_UINT16,
+    TensorType_INT4
   };
   return values;
 }
 
 inline const char * const *EnumNamesTensorType() {
-  static const char * const names[17] = {
+  static const char * const names[19] = {
     "FLOAT32",
     "FLOAT16",
     "INT32",
@@ -461,244 +637,21 @@ inline const char * const *EnumNamesTensorType() {
     "RESOURCE",
     "VARIANT",
     "UINT32",
+    "UINT16",
+    "INT4",
     nullptr
   };
   return names;
 }
 
 inline const char *EnumNameTensorType(TensorType e) {
-  if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_UINT32)) return "";
+  if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_INT4)) return "";
   const size_t index = static_cast<size_t>(e);
   return EnumNamesTensorType()[index];
 }
 
-enum QuantizationDetails {
-  QuantizationDetails_NONE = 0,
-  QuantizationDetails_CustomQuantization = 1,
-  QuantizationDetails_MIN = QuantizationDetails_NONE,
-  QuantizationDetails_MAX = QuantizationDetails_CustomQuantization
-};
-
-inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2] {
-  static const QuantizationDetails values[] = {
-    QuantizationDetails_NONE,
-    QuantizationDetails_CustomQuantization
-  };
-  return values;
-}
-
-inline const char * const *EnumNamesQuantizationDetails() {
-  static const char * const names[3] = {
-    "NONE",
-    "CustomQuantization",
-    nullptr
-  };
-  return names;
-}
-
-inline const char *EnumNameQuantizationDetails(QuantizationDetails e) {
-  if (flatbuffers::IsOutRange(e, QuantizationDetails_NONE, QuantizationDetails_CustomQuantization)) return "";
-  const size_t index = static_cast<size_t>(e);
-  return EnumNamesQuantizationDetails()[index];
-}
-
-template<typename T> struct QuantizationDetailsTraits {
-  static const QuantizationDetails enum_value = QuantizationDetails_NONE;
-};
-
-template<> struct QuantizationDetailsTraits<tflite::CustomQuantization> {
-  static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization;
-};
-
-struct QuantizationDetailsUnion {
-  QuantizationDetails type;
-  void *value;
-
-  QuantizationDetailsUnion() : type(QuantizationDetails_NONE), value(nullptr) {}
-  QuantizationDetailsUnion(QuantizationDetailsUnion&& u) FLATBUFFERS_NOEXCEPT :
-    type(QuantizationDetails_NONE), value(nullptr)
-    { std::swap(type, u.type); std::swap(value, u.value); }
-  QuantizationDetailsUnion(const QuantizationDetailsUnion &) FLATBUFFERS_NOEXCEPT;
-  QuantizationDetailsUnion &operator=(const QuantizationDetailsUnion &u) FLATBUFFERS_NOEXCEPT
-    { QuantizationDetailsUnion t(u); std::swap(type, t.type); std::swap(value, t.value); return *this; }
-  QuantizationDetailsUnion &operator=(QuantizationDetailsUnion &&u) FLATBUFFERS_NOEXCEPT
-    { std::swap(type, u.type); std::swap(value, u.value); return *this; }
-  ~QuantizationDetailsUnion() { Reset(); }
-
-  void Reset();
-
-#ifndef FLATBUFFERS_CPP98_STL
-  template <typename T>
-  void Set(T&& val) {
-    using RT = typename std::remove_reference<T>::type;
-    Reset();
-    type = QuantizationDetailsTraits<typename RT::TableType>::enum_value;
-    if (type != QuantizationDetails_NONE) {
-      value = new RT(std::forward<T>(val));
-    }
-  }
-#endif  // FLATBUFFERS_CPP98_STL
-
-  static void *UnPack(const void *obj, QuantizationDetails type, const flatbuffers::resolver_function_t *resolver);
-  flatbuffers::Offset<void> Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher = nullptr) const;
-
-  tflite::CustomQuantizationT *AsCustomQuantization() {
-    return type == QuantizationDetails_CustomQuantization ?
-      reinterpret_cast<tflite::CustomQuantizationT *>(value) : nullptr;
-  }
-  const tflite::CustomQuantizationT *AsCustomQuantization() const {
-    return type == QuantizationDetails_CustomQuantization ?
-      reinterpret_cast<const tflite::CustomQuantizationT *>(value) : nullptr;
-  }
-};
-
-bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, QuantizationDetails type);
-bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
-
-enum DimensionType {
-  DimensionType_DENSE = 0,
-  DimensionType_SPARSE_CSR = 1,
-  DimensionType_MIN = DimensionType_DENSE,
-  DimensionType_MAX = DimensionType_SPARSE_CSR
-};
-
-inline const DimensionType (&EnumValuesDimensionType())[2] {
-  static const DimensionType values[] = {
-    DimensionType_DENSE,
-    DimensionType_SPARSE_CSR
-  };
-  return values;
-}
-
-inline const char * const *EnumNamesDimensionType() {
-  static const char * const names[3] = {
-    "DENSE",
-    "SPARSE_CSR",
-    nullptr
-  };
-  return names;
-}
-
-inline const char *EnumNameDimensionType(DimensionType e) {
-  if (flatbuffers::IsOutRange(e, DimensionType_DENSE, DimensionType_SPARSE_CSR)) return "";
-  const size_t index = static_cast<size_t>(e);
-  return EnumNamesDimensionType()[index];
-}
-
-enum SparseIndexVector {
-  SparseIndexVector_NONE = 0,
-  SparseIndexVector_Int32Vector = 1,
-  SparseIndexVector_Uint16Vector = 2,
-  SparseIndexVector_Uint8Vector = 3,
-  SparseIndexVector_MIN = SparseIndexVector_NONE,
-  SparseIndexVector_MAX = SparseIndexVector_Uint8Vector
-};
-
-inline const SparseIndexVector (&EnumValuesSparseIndexVector())[4] {
-  static const SparseIndexVector values[] = {
-    SparseIndexVector_NONE,
-    SparseIndexVector_Int32Vector,
-    SparseIndexVector_Uint16Vector,
-    SparseIndexVector_Uint8Vector
-  };
-  return values;
-}
-
-inline const char * const *EnumNamesSparseIndexVector() {
-  static const char * const names[5] = {
-    "NONE",
-    "Int32Vector",
-    "Uint16Vector",
-    "Uint8Vector",
-    nullptr
-  };
-  return names;
-}
-
-inline const char *EnumNameSparseIndexVector(SparseIndexVector e) {
-  if (flatbuffers::IsOutRange(e, SparseIndexVector_NONE, SparseIndexVector_Uint8Vector)) return "";
-  const size_t index = static_cast<size_t>(e);
-  return EnumNamesSparseIndexVector()[index];
-}
-
-template<typename T> struct SparseIndexVectorTraits {
-  static const SparseIndexVector enum_value = SparseIndexVector_NONE;
-};
-
-template<> struct SparseIndexVectorTraits<tflite::Int32Vector> {
-  static const SparseIndexVector enum_value = SparseIndexVector_Int32Vector;
-};
-
-template<> struct SparseIndexVectorTraits<tflite::Uint16Vector> {
-  static const SparseIndexVector enum_value = SparseIndexVector_Uint16Vector;
-};
-
-template<> struct SparseIndexVectorTraits<tflite::Uint8Vector> {
-  static const SparseIndexVector enum_value = SparseIndexVector_Uint8Vector;
-};
-
-struct SparseIndexVectorUnion {
-  SparseIndexVector type;
-  void *value;
-
-  SparseIndexVectorUnion() : type(SparseIndexVector_NONE), value(nullptr) {}
-  SparseIndexVectorUnion(SparseIndexVectorUnion&& u) FLATBUFFERS_NOEXCEPT :
-    type(SparseIndexVector_NONE), value(nullptr)
-    { std::swap(type, u.type); std::swap(value, u.value); }
-  SparseIndexVectorUnion(const SparseIndexVectorUnion &) FLATBUFFERS_NOEXCEPT;
-  SparseIndexVectorUnion &operator=(const SparseIndexVectorUnion &u) FLATBUFFERS_NOEXCEPT
-    { SparseIndexVectorUnion t(u); std::swap(type, t.type); std::swap(value, t.value); return *this; }
-  SparseIndexVectorUnion &operator=(SparseIndexVectorUnion &&u) FLATBUFFERS_NOEXCEPT
-    { std::swap(type, u.type); std::swap(value, u.value); return *this; }
-  ~SparseIndexVectorUnion() { Reset(); }
-
-  void Reset();
-
-#ifndef FLATBUFFERS_CPP98_STL
-  template <typename T>
-  void Set(T&& val) {
-    using RT = typename std::remove_reference<T>::type;
-    Reset();
-    type = SparseIndexVectorTraits<typename RT::TableType>::enum_value;
-    if (type != SparseIndexVector_NONE) {
-      value = new RT(std::forward<T>(val));
-    }
-  }
-#endif  // FLATBUFFERS_CPP98_STL
-
-  static void *UnPack(const void *obj, SparseIndexVector type, const flatbuffers::resolver_function_t *resolver);
-  flatbuffers::Offset<void> Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher = nullptr) const;
-
-  tflite::Int32VectorT *AsInt32Vector() {
-    return type == SparseIndexVector_Int32Vector ?
-      reinterpret_cast<tflite::Int32VectorT *>(value) : nullptr;
-  }
-  const tflite::Int32VectorT *AsInt32Vector() const {
-    return type == SparseIndexVector_Int32Vector ?
-      reinterpret_cast<const tflite::Int32VectorT *>(value) : nullptr;
-  }
-  tflite::Uint16VectorT *AsUint16Vector() {
-    return type == SparseIndexVector_Uint16Vector ?
-      reinterpret_cast<tflite::Uint16VectorT *>(value) : nullptr;
-  }
-  const tflite::Uint16VectorT *AsUint16Vector() const {
-    return type == SparseIndexVector_Uint16Vector ?
-      reinterpret_cast<const tflite::Uint16VectorT *>(value) : nullptr;
-  }
-  tflite::Uint8VectorT *AsUint8Vector() {
-    return type == SparseIndexVector_Uint8Vector ?
-      reinterpret_cast<tflite::Uint8VectorT *>(value) : nullptr;
-  }
-  const tflite::Uint8VectorT *AsUint8Vector() const {
-    return type == SparseIndexVector_Uint8Vector ?
-      reinterpret_cast<const tflite::Uint8VectorT *>(value) : nullptr;
-  }
-};
 
-bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void *obj, SparseIndexVector type);
-bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
-
-enum BuiltinOperator {
+enum BuiltinOperator : int32_t {
   BuiltinOperator_ADD = 0,
   BuiltinOperator_AVERAGE_POOL_2D = 1,
   BuiltinOperator_CONCATENATION = 2,
@@ -839,11 +792,30 @@ enum BuiltinOperator {
   BuiltinOperator_HASHTABLE_FIND = 137,
   BuiltinOperator_HASHTABLE_IMPORT = 138,
   BuiltinOperator_HASHTABLE_SIZE = 139,
+  BuiltinOperator_REDUCE_ALL = 140,
+  BuiltinOperator_CONV_3D_TRANSPOSE = 141,
+  BuiltinOperator_VAR_HANDLE = 142,
+  BuiltinOperator_READ_VARIABLE = 143,
+  BuiltinOperator_ASSIGN_VARIABLE = 144,
+  BuiltinOperator_BROADCAST_ARGS = 145,
+  BuiltinOperator_RANDOM_STANDARD_NORMAL = 146,
+  BuiltinOperator_BUCKETIZE = 147,
+  BuiltinOperator_RANDOM_UNIFORM = 148,
+  BuiltinOperator_MULTINOMIAL = 149,
+  BuiltinOperator_GELU = 150,
+  BuiltinOperator_DYNAMIC_UPDATE_SLICE = 151,
+  BuiltinOperator_RELU_0_TO_1 = 152,
+  BuiltinOperator_UNSORTED_SEGMENT_PROD = 153,
+  BuiltinOperator_UNSORTED_SEGMENT_MAX = 154,
+  BuiltinOperator_UNSORTED_SEGMENT_SUM = 155,
+  BuiltinOperator_ATAN2 = 156,
+  BuiltinOperator_UNSORTED_SEGMENT_MIN = 157,
+  BuiltinOperator_SIGN = 158,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_HASHTABLE_SIZE
+  BuiltinOperator_MAX = BuiltinOperator_SIGN
 };
 
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[140] {
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[159] {
   static const BuiltinOperator values[] = {
     BuiltinOperator_ADD,
     BuiltinOperator_AVERAGE_POOL_2D,
@@ -984,13 +956,32 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[140] {
     BuiltinOperator_HASHTABLE,
     BuiltinOperator_HASHTABLE_FIND,
     BuiltinOperator_HASHTABLE_IMPORT,
-    BuiltinOperator_HASHTABLE_SIZE
+    BuiltinOperator_HASHTABLE_SIZE,
+    BuiltinOperator_REDUCE_ALL,
+    BuiltinOperator_CONV_3D_TRANSPOSE,
+    BuiltinOperator_VAR_HANDLE,
+    BuiltinOperator_READ_VARIABLE,
+    BuiltinOperator_ASSIGN_VARIABLE,
+    BuiltinOperator_BROADCAST_ARGS,
+    BuiltinOperator_RANDOM_STANDARD_NORMAL,
+    BuiltinOperator_BUCKETIZE,
+    BuiltinOperator_RANDOM_UNIFORM,
+    BuiltinOperator_MULTINOMIAL,
+    BuiltinOperator_GELU,
+    BuiltinOperator_DYNAMIC_UPDATE_SLICE,
+    BuiltinOperator_RELU_0_TO_1,
+    BuiltinOperator_UNSORTED_SEGMENT_PROD,
+    BuiltinOperator_UNSORTED_SEGMENT_MAX,
+    BuiltinOperator_UNSORTED_SEGMENT_SUM,
+    BuiltinOperator_ATAN2,
+    BuiltinOperator_UNSORTED_SEGMENT_MIN,
+    BuiltinOperator_SIGN
   };
   return values;
 }
 
 inline const char * const *EnumNamesBuiltinOperator() {
-  static const char * const names[141] = {
+  static const char * const names[160] = {
     "ADD",
     "AVERAGE_POOL_2D",
     "CONCATENATION",
@@ -1131,18 +1122,37 @@ inline const char * const *EnumNamesBuiltinOperator() {
     "HASHTABLE_FIND",
     "HASHTABLE_IMPORT",
     "HASHTABLE_SIZE",
+    "REDUCE_ALL",
+    "CONV_3D_TRANSPOSE",
+    "VAR_HANDLE",
+    "READ_VARIABLE",
+    "ASSIGN_VARIABLE",
+    "BROADCAST_ARGS",
+    "RANDOM_STANDARD_NORMAL",
+    "BUCKETIZE",
+    "RANDOM_UNIFORM",
+    "MULTINOMIAL",
+    "GELU",
+    "DYNAMIC_UPDATE_SLICE",
+    "RELU_0_TO_1",
+    "UNSORTED_SEGMENT_PROD",
+    "UNSORTED_SEGMENT_MAX",
+    "UNSORTED_SEGMENT_SUM",
+    "ATAN2",
+    "UNSORTED_SEGMENT_MIN",
+    "SIGN",
     nullptr
   };
   return names;
 }
 
 inline const char *EnumNameBuiltinOperator(BuiltinOperator e) {
-  if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_HASHTABLE_SIZE)) return "";
+  if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_SIGN)) return "";
   const size_t index = static_cast<size_t>(e);
   return EnumNamesBuiltinOperator()[index];
 }
 
-enum BuiltinOptions {
+enum BuiltinOptions : uint8_t {
   BuiltinOptions_NONE = 0,
   BuiltinOptions_Conv2DOptions = 1,
   BuiltinOptions_DepthwiseConv2DOptions = 2,
@@ -1254,11 +1264,24 @@ enum BuiltinOptions {
   BuiltinOptions_HashtableFindOptions = 108,
   BuiltinOptions_HashtableImportOptions = 109,
   BuiltinOptions_HashtableSizeOptions = 110,
+  BuiltinOptions_VarHandleOptions = 111,
+  BuiltinOptions_ReadVariableOptions = 112,
+  BuiltinOptions_AssignVariableOptions = 113,
+  BuiltinOptions_RandomOptions = 114,
+  BuiltinOptions_BucketizeOptions = 115,
+  BuiltinOptions_GeluOptions = 116,
+  BuiltinOptions_DynamicUpdateSliceOptions = 117,
+  BuiltinOptions_UnsortedSegmentProdOptions = 118,
+  BuiltinOptions_UnsortedSegmentMaxOptions = 119,
+  BuiltinOptions_UnsortedSegmentMinOptions = 120,
+  BuiltinOptions_UnsortedSegmentSumOptions = 121,
+  BuiltinOptions_ATan2Options = 122,
+  BuiltinOptions_SignOptions = 123,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_HashtableSizeOptions
+  BuiltinOptions_MAX = BuiltinOptions_SignOptions
 };
 
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[111] {
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[124] {
   static const BuiltinOptions values[] = {
     BuiltinOptions_NONE,
     BuiltinOptions_Conv2DOptions,
@@ -1370,13 +1393,26 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[111] {
     BuiltinOptions_HashtableOptions,
     BuiltinOptions_HashtableFindOptions,
     BuiltinOptions_HashtableImportOptions,
-    BuiltinOptions_HashtableSizeOptions
+    BuiltinOptions_HashtableSizeOptions,
+    BuiltinOptions_VarHandleOptions,
+    BuiltinOptions_ReadVariableOptions,
+    BuiltinOptions_AssignVariableOptions,
+    BuiltinOptions_RandomOptions,
+    BuiltinOptions_BucketizeOptions,
+    BuiltinOptions_GeluOptions,
+    BuiltinOptions_DynamicUpdateSliceOptions,
+    BuiltinOptions_UnsortedSegmentProdOptions,
+    BuiltinOptions_UnsortedSegmentMaxOptions,
+    BuiltinOptions_UnsortedSegmentMinOptions,
+    BuiltinOptions_UnsortedSegmentSumOptions,
+    BuiltinOptions_ATan2Options,
+    BuiltinOptions_SignOptions
   };
   return values;
 }
 
 inline const char * const *EnumNamesBuiltinOptions() {
-  static const char * const names[112] = {
+  static const char * const names[125] = {
     "NONE",
     "Conv2DOptions",
     "DepthwiseConv2DOptions",
@@ -1488,13 +1524,26 @@ inline const char * const *EnumNamesBuiltinOptions() {
     "HashtableFindOptions",
     "HashtableImportOptions",
     "HashtableSizeOptions",
+    "VarHandleOptions",
+    "ReadVariableOptions",
+    "AssignVariableOptions",
+    "RandomOptions",
+    "BucketizeOptions",
+    "GeluOptions",
+    "DynamicUpdateSliceOptions",
+    "UnsortedSegmentProdOptions",
+    "UnsortedSegmentMaxOptions",
+    "UnsortedSegmentMinOptions",
+    "UnsortedSegmentSumOptions",
+    "ATan2Options",
+    "SignOptions",
     nullptr
   };
   return names;
 }
 
 inline const char *EnumNameBuiltinOptions(BuiltinOptions e) {
-  if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_HashtableSizeOptions)) return "";
+  if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_SignOptions)) return "";
   const size_t index = static_cast<size_t>(e);
   return EnumNamesBuiltinOptions()[index];
 }
@@ -1943,15946 +1992,596 @@ template<> struct BuiltinOptionsTraits<tflite::HashtableSizeOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_HashtableSizeOptions;
 };
 
-struct BuiltinOptionsUnion {
-  BuiltinOptions type;
-  void *value;
-
-  BuiltinOptionsUnion() : type(BuiltinOptions_NONE), value(nullptr) {}
-  BuiltinOptionsUnion(BuiltinOptionsUnion&& u) FLATBUFFERS_NOEXCEPT :
-    type(BuiltinOptions_NONE), value(nullptr)
-    { std::swap(type, u.type); std::swap(value, u.value); }
-  BuiltinOptionsUnion(const BuiltinOptionsUnion &) FLATBUFFERS_NOEXCEPT;
-  BuiltinOptionsUnion &operator=(const BuiltinOptionsUnion &u) FLATBUFFERS_NOEXCEPT
-    { BuiltinOptionsUnion t(u); std::swap(type, t.type); std::swap(value, t.value); return *this; }
-  BuiltinOptionsUnion &operator=(BuiltinOptionsUnion &&u) FLATBUFFERS_NOEXCEPT
-    { std::swap(type, u.type); std::swap(value, u.value); return *this; }
-  ~BuiltinOptionsUnion() { Reset(); }
-
-  void Reset();
-
-#ifndef FLATBUFFERS_CPP98_STL
-  template <typename T>
-  void Set(T&& val) {
-    using RT = typename std::remove_reference<T>::type;
-    Reset();
-    type = BuiltinOptionsTraits<typename RT::TableType>::enum_value;
-    if (type != BuiltinOptions_NONE) {
-      value = new RT(std::forward<T>(val));
-    }
-  }
-#endif  // FLATBUFFERS_CPP98_STL
+template<> struct BuiltinOptionsTraits<tflite::VarHandleOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_VarHandleOptions;
+};
 
-  static void *UnPack(const void *obj, BuiltinOptions type, const flatbuffers::resolver_function_t *resolver);
-  flatbuffers::Offset<void> Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher = nullptr) const;
+template<> struct BuiltinOptionsTraits<tflite::ReadVariableOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ReadVariableOptions;
+};
 
-  tflite::Conv2DOptionsT *AsConv2DOptions() {
-    return type == BuiltinOptions_Conv2DOptions ?
-      reinterpret_cast<tflite::Conv2DOptionsT *>(value) : nullptr;
-  }
-  const tflite::Conv2DOptionsT *AsConv2DOptions() const {
-    return type == BuiltinOptions_Conv2DOptions ?
-      reinterpret_cast<const tflite::Conv2DOptionsT *>(value) : nullptr;
-  }
-  tflite::DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() {
-    return type == BuiltinOptions_DepthwiseConv2DOptions ?
-      reinterpret_cast<tflite::DepthwiseConv2DOptionsT *>(value) : nullptr;
-  }
-  const tflite::DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() const {
-    return type == BuiltinOptions_DepthwiseConv2DOptions ?
-      reinterpret_cast<const tflite::DepthwiseConv2DOptionsT *>(value) : nullptr;
-  }
-  tflite::ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() {
-    return type == BuiltinOptions_ConcatEmbeddingsOptions ?
-      reinterpret_cast<tflite::ConcatEmbeddingsOptionsT *>(value) : nullptr;
-  }
-  const tflite::ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() const {
-    return type == BuiltinOptions_ConcatEmbeddingsOptions ?
-      reinterpret_cast<const tflite::ConcatEmbeddingsOptionsT *>(value) : nullptr;
-  }
-  tflite::LSHProjectionOptionsT *AsLSHProjectionOptions() {
-    return type == BuiltinOptions_LSHProjectionOptions ?
-      reinterpret_cast<tflite::LSHProjectionOptionsT *>(value) : nullptr;
-  }
-  const tflite::LSHProjectionOptionsT *AsLSHProjectionOptions() const {
-    return type == BuiltinOptions_LSHProjectionOptions ?
-      reinterpret_cast<const tflite::LSHProjectionOptionsT *>(value) : nullptr;
-  }
-  tflite::Pool2DOptionsT *AsPool2DOptions() {
-    return type == BuiltinOptions_Pool2DOptions ?
-      reinterpret_cast<tflite::Pool2DOptionsT *>(value) : nullptr;
-  }
-  const tflite::Pool2DOptionsT *AsPool2DOptions() const {
-    return type == BuiltinOptions_Pool2DOptions ?
-      reinterpret_cast<const tflite::Pool2DOptionsT *>(value) : nullptr;
-  }
-  tflite::SVDFOptionsT *AsSVDFOptions() {
-    return type == BuiltinOptions_SVDFOptions ?
-      reinterpret_cast<tflite::SVDFOptionsT *>(value) : nullptr;
-  }
-  const tflite::SVDFOptionsT *AsSVDFOptions() const {
-    return type == BuiltinOptions_SVDFOptions ?
-      reinterpret_cast<const tflite::SVDFOptionsT *>(value) : nullptr;
-  }
-  tflite::RNNOptionsT *AsRNNOptions() {
-    return type == BuiltinOptions_RNNOptions ?
-      reinterpret_cast<tflite::RNNOptionsT *>(value) : nullptr;
-  }
-  const tflite::RNNOptionsT *AsRNNOptions() const {
-    return type == BuiltinOptions_RNNOptions ?
-      reinterpret_cast<const tflite::RNNOptionsT *>(value) : nullptr;
-  }
-  tflite::FullyConnectedOptionsT *AsFullyConnectedOptions() {
-    return type == BuiltinOptions_FullyConnectedOptions ?
-      reinterpret_cast<tflite::FullyConnectedOptionsT *>(value) : nullptr;
-  }
-  const tflite::FullyConnectedOptionsT *AsFullyConnectedOptions() const {
-    return type == BuiltinOptions_FullyConnectedOptions ?
-      reinterpret_cast<const tflite::FullyConnectedOptionsT *>(value) : nullptr;
-  }
-  tflite::SoftmaxOptionsT *AsSoftmaxOptions() {
-    return type == BuiltinOptions_SoftmaxOptions ?
-      reinterpret_cast<tflite::SoftmaxOptionsT *>(value) : nullptr;
-  }
-  const tflite::SoftmaxOptionsT *AsSoftmaxOptions() const {
-    return type == BuiltinOptions_SoftmaxOptions ?
-      reinterpret_cast<const tflite::SoftmaxOptionsT *>(value) : nullptr;
-  }
-  tflite::ConcatenationOptionsT *AsConcatenationOptions() {
-    return type == BuiltinOptions_ConcatenationOptions ?
-      reinterpret_cast<tflite::ConcatenationOptionsT *>(value) : nullptr;
-  }
-  const tflite::ConcatenationOptionsT *AsConcatenationOptions() const {
-    return type == BuiltinOptions_ConcatenationOptions ?
-      reinterpret_cast<const tflite::ConcatenationOptionsT *>(value) : nullptr;
-  }
-  tflite::AddOptionsT *AsAddOptions() {
-    return type == BuiltinOptions_AddOptions ?
-      reinterpret_cast<tflite::AddOptionsT *>(value) : nullptr;
-  }
-  const tflite::AddOptionsT *AsAddOptions() const {
-    return type == BuiltinOptions_AddOptions ?
-      reinterpret_cast<const tflite::AddOptionsT *>(value) : nullptr;
-  }
-  tflite::L2NormOptionsT *AsL2NormOptions() {
-    return type == BuiltinOptions_L2NormOptions ?
-      reinterpret_cast<tflite::L2NormOptionsT *>(value) : nullptr;
-  }
-  const tflite::L2NormOptionsT *AsL2NormOptions() const {
-    return type == BuiltinOptions_L2NormOptions ?
-      reinterpret_cast<const tflite::L2NormOptionsT *>(value) : nullptr;
-  }
-  tflite::LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions() {
-    return type == BuiltinOptions_LocalResponseNormalizationOptions ?
-      reinterpret_cast<tflite::LocalResponseNormalizationOptionsT *>(value) : nullptr;
-  }
-  const tflite::LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions() const {
-    return type == BuiltinOptions_LocalResponseNormalizationOptions ?
-      reinterpret_cast<const tflite::LocalResponseNormalizationOptionsT *>(value) : nullptr;
-  }
-  tflite::LSTMOptionsT *AsLSTMOptions() {
-    return type == BuiltinOptions_LSTMOptions ?
-      reinterpret_cast<tflite::LSTMOptionsT *>(value) : nullptr;
-  }
-  const tflite::LSTMOptionsT *AsLSTMOptions() const {
-    return type == BuiltinOptions_LSTMOptions ?
-      reinterpret_cast<const tflite::LSTMOptionsT *>(value) : nullptr;
-  }
-  tflite::ResizeBilinearOptionsT *AsResizeBilinearOptions() {
-    return type == BuiltinOptions_ResizeBilinearOptions ?
-      reinterpret_cast<tflite::ResizeBilinearOptionsT *>(value) : nullptr;
-  }
-  const tflite::ResizeBilinearOptionsT *AsResizeBilinearOptions() const {
-    return type == BuiltinOptions_ResizeBilinearOptions ?
-      reinterpret_cast<const tflite::ResizeBilinearOptionsT *>(value) : nullptr;
-  }
-  tflite::CallOptionsT *AsCallOptions() {
-    return type == BuiltinOptions_CallOptions ?
-      reinterpret_cast<tflite::CallOptionsT *>(value) : nullptr;
-  }
-  const tflite::CallOptionsT *AsCallOptions() const {
-    return type == BuiltinOptions_CallOptions ?
-      reinterpret_cast<const tflite::CallOptionsT *>(value) : nullptr;
-  }
-  tflite::ReshapeOptionsT *AsReshapeOptions() {
-    return type == BuiltinOptions_ReshapeOptions ?
-      reinterpret_cast<tflite::ReshapeOptionsT *>(value) : nullptr;
-  }
-  const tflite::ReshapeOptionsT *AsReshapeOptions() const {
-    return type == BuiltinOptions_ReshapeOptions ?
-      reinterpret_cast<const tflite::ReshapeOptionsT *>(value) : nullptr;
-  }
-  tflite::SkipGramOptionsT *AsSkipGramOptions() {
-    return type == BuiltinOptions_SkipGramOptions ?
-      reinterpret_cast<tflite::SkipGramOptionsT *>(value) : nullptr;
-  }
-  const tflite::SkipGramOptionsT *AsSkipGramOptions() const {
-    return type == BuiltinOptions_SkipGramOptions ?
-      reinterpret_cast<const tflite::SkipGramOptionsT *>(value) : nullptr;
-  }
-  tflite::SpaceToDepthOptionsT *AsSpaceToDepthOptions() {
-    return type == BuiltinOptions_SpaceToDepthOptions ?
-      reinterpret_cast<tflite::SpaceToDepthOptionsT *>(value) : nullptr;
-  }
-  const tflite::SpaceToDepthOptionsT *AsSpaceToDepthOptions() const {
-    return type == BuiltinOptions_SpaceToDepthOptions ?
-      reinterpret_cast<const tflite::SpaceToDepthOptionsT *>(value) : nullptr;
-  }
-  tflite::EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() {
-    return type == BuiltinOptions_EmbeddingLookupSparseOptions ?
-      reinterpret_cast<tflite::EmbeddingLookupSparseOptionsT *>(value) : nullptr;
-  }
-  const tflite::EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() const {
-    return type == BuiltinOptions_EmbeddingLookupSparseOptions ?
-      reinterpret_cast<const tflite::EmbeddingLookupSparseOptionsT *>(value) : nullptr;
-  }
-  tflite::MulOptionsT *AsMulOptions() {
-    return type == BuiltinOptions_MulOptions ?
-      reinterpret_cast<tflite::MulOptionsT *>(value) : nullptr;
-  }
-  const tflite::MulOptionsT *AsMulOptions() const {
-    return type == BuiltinOptions_MulOptions ?
-      reinterpret_cast<const tflite::MulOptionsT *>(value) : nullptr;
-  }
-  tflite::PadOptionsT *AsPadOptions() {
-    return type == BuiltinOptions_PadOptions ?
-      reinterpret_cast<tflite::PadOptionsT *>(value) : nullptr;
-  }
-  const tflite::PadOptionsT *AsPadOptions() const {
-    return type == BuiltinOptions_PadOptions ?
-      reinterpret_cast<const tflite::PadOptionsT *>(value) : nullptr;
-  }
-  tflite::GatherOptionsT *AsGatherOptions() {
-    return type == BuiltinOptions_GatherOptions ?
-      reinterpret_cast<tflite::GatherOptionsT *>(value) : nullptr;
-  }
-  const tflite::GatherOptionsT *AsGatherOptions() const {
-    return type == BuiltinOptions_GatherOptions ?
-      reinterpret_cast<const tflite::GatherOptionsT *>(value) : nullptr;
-  }
-  tflite::BatchToSpaceNDOptionsT *AsBatchToSpaceNDOptions() {
-    return type == BuiltinOptions_BatchToSpaceNDOptions ?
-      reinterpret_cast<tflite::BatchToSpaceNDOptionsT *>(value) : nullptr;
-  }
-  const tflite::BatchToSpaceNDOptionsT *AsBatchToSpaceNDOptions() const {
-    return type == BuiltinOptions_BatchToSpaceNDOptions ?
-      reinterpret_cast<const tflite::BatchToSpaceNDOptionsT *>(value) : nullptr;
-  }
-  tflite::SpaceToBatchNDOptionsT *AsSpaceToBatchNDOptions() {
-    return type == BuiltinOptions_SpaceToBatchNDOptions ?
-      reinterpret_cast<tflite::SpaceToBatchNDOptionsT *>(value) : nullptr;
-  }
-  const tflite::SpaceToBatchNDOptionsT *AsSpaceToBatchNDOptions() const {
-    return type == BuiltinOptions_SpaceToBatchNDOptions ?
-      reinterpret_cast<const tflite::SpaceToBatchNDOptionsT *>(value) : nullptr;
-  }
-  tflite::TransposeOptionsT *AsTransposeOptions() {
-    return type == BuiltinOptions_TransposeOptions ?
-      reinterpret_cast<tflite::TransposeOptionsT *>(value) : nullptr;
-  }
-  const tflite::TransposeOptionsT *AsTransposeOptions() const {
-    return type == BuiltinOptions_TransposeOptions ?
-      reinterpret_cast<const tflite::TransposeOptionsT *>(value) : nullptr;
-  }
-  tflite::ReducerOptionsT *AsReducerOptions() {
-    return type == BuiltinOptions_ReducerOptions ?
-      reinterpret_cast<tflite::ReducerOptionsT *>(value) : nullptr;
-  }
-  const tflite::ReducerOptionsT *AsReducerOptions() const {
-    return type == BuiltinOptions_ReducerOptions ?
-      reinterpret_cast<const tflite::ReducerOptionsT *>(value) : nullptr;
-  }
-  tflite::SubOptionsT *AsSubOptions() {
-    return type == BuiltinOptions_SubOptions ?
-      reinterpret_cast<tflite::SubOptionsT *>(value) : nullptr;
-  }
-  const tflite::SubOptionsT *AsSubOptions() const {
-    return type == BuiltinOptions_SubOptions ?
-      reinterpret_cast<const tflite::SubOptionsT *>(value) : nullptr;
-  }
-  tflite::DivOptionsT *AsDivOptions() {
-    return type == BuiltinOptions_DivOptions ?
-      reinterpret_cast<tflite::DivOptionsT *>(value) : nullptr;
-  }
-  const tflite::DivOptionsT *AsDivOptions() const {
-    return type == BuiltinOptions_DivOptions ?
-      reinterpret_cast<const tflite::DivOptionsT *>(value) : nullptr;
-  }
-  tflite::SqueezeOptionsT *AsSqueezeOptions() {
-    return type == BuiltinOptions_SqueezeOptions ?
-      reinterpret_cast<tflite::SqueezeOptionsT *>(value) : nullptr;
-  }
-  const tflite::SqueezeOptionsT *AsSqueezeOptions() const {
-    return type == BuiltinOptions_SqueezeOptions ?
-      reinterpret_cast<const tflite::SqueezeOptionsT *>(value) : nullptr;
-  }
-  tflite::SequenceRNNOptionsT *AsSequenceRNNOptions() {
-    return type == BuiltinOptions_SequenceRNNOptions ?
-      reinterpret_cast<tflite::SequenceRNNOptionsT *>(value) : nullptr;
-  }
-  const tflite::SequenceRNNOptionsT *AsSequenceRNNOptions() const {
-    return type == BuiltinOptions_SequenceRNNOptions ?
-      reinterpret_cast<const tflite::SequenceRNNOptionsT *>(value) : nullptr;
-  }
-  tflite::StridedSliceOptionsT *AsStridedSliceOptions() {
-    return type == BuiltinOptions_StridedSliceOptions ?
-      reinterpret_cast<tflite::StridedSliceOptionsT *>(value) : nullptr;
-  }
-  const tflite::StridedSliceOptionsT *AsStridedSliceOptions() const {
-    return type == BuiltinOptions_StridedSliceOptions ?
-      reinterpret_cast<const tflite::StridedSliceOptionsT *>(value) : nullptr;
-  }
-  tflite::ExpOptionsT *AsExpOptions() {
-    return type == BuiltinOptions_ExpOptions ?
-      reinterpret_cast<tflite::ExpOptionsT *>(value) : nullptr;
-  }
-  const tflite::ExpOptionsT *AsExpOptions() const {
-    return type == BuiltinOptions_ExpOptions ?
-      reinterpret_cast<const tflite::ExpOptionsT *>(value) : nullptr;
-  }
-  tflite::TopKV2OptionsT *AsTopKV2Options() {
-    return type == BuiltinOptions_TopKV2Options ?
-      reinterpret_cast<tflite::TopKV2OptionsT *>(value) : nullptr;
-  }
-  const tflite::TopKV2OptionsT *AsTopKV2Options() const {
-    return type == BuiltinOptions_TopKV2Options ?
-      reinterpret_cast<const tflite::TopKV2OptionsT *>(value) : nullptr;
-  }
-  tflite::SplitOptionsT *AsSplitOptions() {
-    return type == BuiltinOptions_SplitOptions ?
-      reinterpret_cast<tflite::SplitOptionsT *>(value) : nullptr;
-  }
-  const tflite::SplitOptionsT *AsSplitOptions() const {
-    return type == BuiltinOptions_SplitOptions ?
-      reinterpret_cast<const tflite::SplitOptionsT *>(value) : nullptr;
-  }
-  tflite::LogSoftmaxOptionsT *AsLogSoftmaxOptions() {
-    return type == BuiltinOptions_LogSoftmaxOptions ?
-      reinterpret_cast<tflite::LogSoftmaxOptionsT *>(value) : nullptr;
-  }
-  const tflite::LogSoftmaxOptionsT *AsLogSoftmaxOptions() const {
-    return type == BuiltinOptions_LogSoftmaxOptions ?
-      reinterpret_cast<const tflite::LogSoftmaxOptionsT *>(value) : nullptr;
-  }
-  tflite::CastOptionsT *AsCastOptions() {
-    return type == BuiltinOptions_CastOptions ?
-      reinterpret_cast<tflite::CastOptionsT *>(value) : nullptr;
-  }
-  const tflite::CastOptionsT *AsCastOptions() const {
-    return type == BuiltinOptions_CastOptions ?
-      reinterpret_cast<const tflite::CastOptionsT *>(value) : nullptr;
-  }
-  tflite::DequantizeOptionsT *AsDequantizeOptions() {
-    return type == BuiltinOptions_DequantizeOptions ?
-      reinterpret_cast<tflite::DequantizeOptionsT *>(value) : nullptr;
-  }
-  const tflite::DequantizeOptionsT *AsDequantizeOptions() const {
-    return type == BuiltinOptions_DequantizeOptions ?
-      reinterpret_cast<const tflite::DequantizeOptionsT *>(value) : nullptr;
-  }
-  tflite::MaximumMinimumOptionsT *AsMaximumMinimumOptions() {
-    return type == BuiltinOptions_MaximumMinimumOptions ?
-      reinterpret_cast<tflite::MaximumMinimumOptionsT *>(value) : nullptr;
-  }
-  const tflite::MaximumMinimumOptionsT *AsMaximumMinimumOptions() const {
-    return type == BuiltinOptions_MaximumMinimumOptions ?
-      reinterpret_cast<const tflite::MaximumMinimumOptionsT *>(value) : nullptr;
-  }
-  tflite::ArgMaxOptionsT *AsArgMaxOptions() {
-    return type == BuiltinOptions_ArgMaxOptions ?
-      reinterpret_cast<tflite::ArgMaxOptionsT *>(value) : nullptr;
-  }
-  const tflite::ArgMaxOptionsT *AsArgMaxOptions() const {
-    return type == BuiltinOptions_ArgMaxOptions ?
-      reinterpret_cast<const tflite::ArgMaxOptionsT *>(value) : nullptr;
-  }
-  tflite::LessOptionsT *AsLessOptions() {
-    return type == BuiltinOptions_LessOptions ?
-      reinterpret_cast<tflite::LessOptionsT *>(value) : nullptr;
-  }
-  const tflite::LessOptionsT *AsLessOptions() const {
-    return type == BuiltinOptions_LessOptions ?
-      reinterpret_cast<const tflite::LessOptionsT *>(value) : nullptr;
-  }
-  tflite::NegOptionsT *AsNegOptions() {
-    return type == BuiltinOptions_NegOptions ?
-      reinterpret_cast<tflite::NegOptionsT *>(value) : nullptr;
-  }
-  const tflite::NegOptionsT *AsNegOptions() const {
-    return type == BuiltinOptions_NegOptions ?
-      reinterpret_cast<const tflite::NegOptionsT *>(value) : nullptr;
-  }
-  tflite::PadV2OptionsT *AsPadV2Options() {
-    return type == BuiltinOptions_PadV2Options ?
-      reinterpret_cast<tflite::PadV2OptionsT *>(value) : nullptr;
-  }
-  const tflite::PadV2OptionsT *AsPadV2Options() const {
-    return type == BuiltinOptions_PadV2Options ?
-      reinterpret_cast<const tflite::PadV2OptionsT *>(value) : nullptr;
-  }
-  tflite::GreaterOptionsT *AsGreaterOptions() {
-    return type == BuiltinOptions_GreaterOptions ?
-      reinterpret_cast<tflite::GreaterOptionsT *>(value) : nullptr;
-  }
-  const tflite::GreaterOptionsT *AsGreaterOptions() const {
-    return type == BuiltinOptions_GreaterOptions ?
-      reinterpret_cast<const tflite::GreaterOptionsT *>(value) : nullptr;
-  }
-  tflite::GreaterEqualOptionsT *AsGreaterEqualOptions() {
-    return type == BuiltinOptions_GreaterEqualOptions ?
-      reinterpret_cast<tflite::GreaterEqualOptionsT *>(value) : nullptr;
-  }
-  const tflite::GreaterEqualOptionsT *AsGreaterEqualOptions() const {
-    return type == BuiltinOptions_GreaterEqualOptions ?
-      reinterpret_cast<const tflite::GreaterEqualOptionsT *>(value) : nullptr;
-  }
-  tflite::LessEqualOptionsT *AsLessEqualOptions() {
-    return type == BuiltinOptions_LessEqualOptions ?
-      reinterpret_cast<tflite::LessEqualOptionsT *>(value) : nullptr;
-  }
-  const tflite::LessEqualOptionsT *AsLessEqualOptions() const {
-    return type == BuiltinOptions_LessEqualOptions ?
-      reinterpret_cast<const tflite::LessEqualOptionsT *>(value) : nullptr;
-  }
-  tflite::SelectOptionsT *AsSelectOptions() {
-    return type == BuiltinOptions_SelectOptions ?
-      reinterpret_cast<tflite::SelectOptionsT *>(value) : nullptr;
-  }
-  const tflite::SelectOptionsT *AsSelectOptions() const {
-    return type == BuiltinOptions_SelectOptions ?
-      reinterpret_cast<const tflite::SelectOptionsT *>(value) : nullptr;
-  }
-  tflite::SliceOptionsT *AsSliceOptions() {
-    return type == BuiltinOptions_SliceOptions ?
-      reinterpret_cast<tflite::SliceOptionsT *>(value) : nullptr;
-  }
-  const tflite::SliceOptionsT *AsSliceOptions() const {
-    return type == BuiltinOptions_SliceOptions ?
-      reinterpret_cast<const tflite::SliceOptionsT *>(value) : nullptr;
-  }
-  tflite::TransposeConvOptionsT *AsTransposeConvOptions() {
-    return type == BuiltinOptions_TransposeConvOptions ?
-      reinterpret_cast<tflite::TransposeConvOptionsT *>(value) : nullptr;
-  }
-  const tflite::TransposeConvOptionsT *AsTransposeConvOptions() const {
-    return type == BuiltinOptions_TransposeConvOptions ?
-      reinterpret_cast<const tflite::TransposeConvOptionsT *>(value) : nullptr;
-  }
-  tflite::SparseToDenseOptionsT *AsSparseToDenseOptions() {
-    return type == BuiltinOptions_SparseToDenseOptions ?
-      reinterpret_cast<tflite::SparseToDenseOptionsT *>(value) : nullptr;
-  }
-  const tflite::SparseToDenseOptionsT *AsSparseToDenseOptions() const {
-    return type == BuiltinOptions_SparseToDenseOptions ?
-      reinterpret_cast<const tflite::SparseToDenseOptionsT *>(value) : nullptr;
-  }
-  tflite::TileOptionsT *AsTileOptions() {
-    return type == BuiltinOptions_TileOptions ?
-      reinterpret_cast<tflite::TileOptionsT *>(value) : nullptr;
-  }
-  const tflite::TileOptionsT *AsTileOptions() const {
-    return type == BuiltinOptions_TileOptions ?
-      reinterpret_cast<const tflite::TileOptionsT *>(value) : nullptr;
-  }
-  tflite::ExpandDimsOptionsT *AsExpandDimsOptions() {
-    return type == BuiltinOptions_ExpandDimsOptions ?
-      reinterpret_cast<tflite::ExpandDimsOptionsT *>(value) : nullptr;
-  }
-  const tflite::ExpandDimsOptionsT *AsExpandDimsOptions() const {
-    return type == BuiltinOptions_ExpandDimsOptions ?
-      reinterpret_cast<const tflite::ExpandDimsOptionsT *>(value) : nullptr;
-  }
-  tflite::EqualOptionsT *AsEqualOptions() {
-    return type == BuiltinOptions_EqualOptions ?
-      reinterpret_cast<tflite::EqualOptionsT *>(value) : nullptr;
-  }
-  const tflite::EqualOptionsT *AsEqualOptions() const {
-    return type == BuiltinOptions_EqualOptions ?
-      reinterpret_cast<const tflite::EqualOptionsT *>(value) : nullptr;
-  }
-  tflite::NotEqualOptionsT *AsNotEqualOptions() {
-    return type == BuiltinOptions_NotEqualOptions ?
-      reinterpret_cast<tflite::NotEqualOptionsT *>(value) : nullptr;
-  }
-  const tflite::NotEqualOptionsT *AsNotEqualOptions() const {
-    return type == BuiltinOptions_NotEqualOptions ?
-      reinterpret_cast<const tflite::NotEqualOptionsT *>(value) : nullptr;
-  }
-  tflite::ShapeOptionsT *AsShapeOptions() {
-    return type == BuiltinOptions_ShapeOptions ?
-      reinterpret_cast<tflite::ShapeOptionsT *>(value) : nullptr;
-  }
-  const tflite::ShapeOptionsT *AsShapeOptions() const {
-    return type == BuiltinOptions_ShapeOptions ?
-      reinterpret_cast<const tflite::ShapeOptionsT *>(value) : nullptr;
-  }
-  tflite::PowOptionsT *AsPowOptions() {
-    return type == BuiltinOptions_PowOptions ?
-      reinterpret_cast<tflite::PowOptionsT *>(value) : nullptr;
-  }
-  const tflite::PowOptionsT *AsPowOptions() const {
-    return type == BuiltinOptions_PowOptions ?
-      reinterpret_cast<const tflite::PowOptionsT *>(value) : nullptr;
-  }
-  tflite::ArgMinOptionsT *AsArgMinOptions() {
-    return type == BuiltinOptions_ArgMinOptions ?
-      reinterpret_cast<tflite::ArgMinOptionsT *>(value) : nullptr;
-  }
-  const tflite::ArgMinOptionsT *AsArgMinOptions() const {
-    return type == BuiltinOptions_ArgMinOptions ?
-      reinterpret_cast<const tflite::ArgMinOptionsT *>(value) : nullptr;
-  }
-  tflite::FakeQuantOptionsT *AsFakeQuantOptions() {
-    return type == BuiltinOptions_FakeQuantOptions ?
-      reinterpret_cast<tflite::FakeQuantOptionsT *>(value) : nullptr;
-  }
-  const tflite::FakeQuantOptionsT *AsFakeQuantOptions() const {
-    return type == BuiltinOptions_FakeQuantOptions ?
-      reinterpret_cast<const tflite::FakeQuantOptionsT *>(value) : nullptr;
-  }
-  tflite::PackOptionsT *AsPackOptions() {
-    return type == BuiltinOptions_PackOptions ?
-      reinterpret_cast<tflite::PackOptionsT *>(value) : nullptr;
-  }
-  const tflite::PackOptionsT *AsPackOptions() const {
-    return type == BuiltinOptions_PackOptions ?
-      reinterpret_cast<const tflite::PackOptionsT *>(value) : nullptr;
-  }
-  tflite::LogicalOrOptionsT *AsLogicalOrOptions() {
-    return type == BuiltinOptions_LogicalOrOptions ?
-      reinterpret_cast<tflite::LogicalOrOptionsT *>(value) : nullptr;
-  }
-  const tflite::LogicalOrOptionsT *AsLogicalOrOptions() const {
-    return type == BuiltinOptions_LogicalOrOptions ?
-      reinterpret_cast<const tflite::LogicalOrOptionsT *>(value) : nullptr;
-  }
-  tflite::OneHotOptionsT *AsOneHotOptions() {
-    return type == BuiltinOptions_OneHotOptions ?
-      reinterpret_cast<tflite::OneHotOptionsT *>(value) : nullptr;
-  }
-  const tflite::OneHotOptionsT *AsOneHotOptions() const {
-    return type == BuiltinOptions_OneHotOptions ?
-      reinterpret_cast<const tflite::OneHotOptionsT *>(value) : nullptr;
-  }
-  tflite::LogicalAndOptionsT *AsLogicalAndOptions() {
-    return type == BuiltinOptions_LogicalAndOptions ?
-      reinterpret_cast<tflite::LogicalAndOptionsT *>(value) : nullptr;
-  }
-  const tflite::LogicalAndOptionsT *AsLogicalAndOptions() const {
-    return type == BuiltinOptions_LogicalAndOptions ?
-      reinterpret_cast<const tflite::LogicalAndOptionsT *>(value) : nullptr;
-  }
-  tflite::LogicalNotOptionsT *AsLogicalNotOptions() {
-    return type == BuiltinOptions_LogicalNotOptions ?
-      reinterpret_cast<tflite::LogicalNotOptionsT *>(value) : nullptr;
-  }
-  const tflite::LogicalNotOptionsT *AsLogicalNotOptions() const {
-    return type == BuiltinOptions_LogicalNotOptions ?
-      reinterpret_cast<const tflite::LogicalNotOptionsT *>(value) : nullptr;
-  }
-  tflite::UnpackOptionsT *AsUnpackOptions() {
-    return type == BuiltinOptions_UnpackOptions ?
-      reinterpret_cast<tflite::UnpackOptionsT *>(value) : nullptr;
-  }
-  const tflite::UnpackOptionsT *AsUnpackOptions() const {
-    return type == BuiltinOptions_UnpackOptions ?
-      reinterpret_cast<const tflite::UnpackOptionsT *>(value) : nullptr;
-  }
-  tflite::FloorDivOptionsT *AsFloorDivOptions() {
-    return type == BuiltinOptions_FloorDivOptions ?
-      reinterpret_cast<tflite::FloorDivOptionsT *>(value) : nullptr;
-  }
-  const tflite::FloorDivOptionsT *AsFloorDivOptions() const {
-    return type == BuiltinOptions_FloorDivOptions ?
-      reinterpret_cast<const tflite::FloorDivOptionsT *>(value) : nullptr;
-  }
-  tflite::SquareOptionsT *AsSquareOptions() {
-    return type == BuiltinOptions_SquareOptions ?
-      reinterpret_cast<tflite::SquareOptionsT *>(value) : nullptr;
-  }
-  const tflite::SquareOptionsT *AsSquareOptions() const {
-    return type == BuiltinOptions_SquareOptions ?
-      reinterpret_cast<const tflite::SquareOptionsT *>(value) : nullptr;
-  }
-  tflite::ZerosLikeOptionsT *AsZerosLikeOptions() {
-    return type == BuiltinOptions_ZerosLikeOptions ?
-      reinterpret_cast<tflite::ZerosLikeOptionsT *>(value) : nullptr;
-  }
-  const tflite::ZerosLikeOptionsT *AsZerosLikeOptions() const {
-    return type == BuiltinOptions_ZerosLikeOptions ?
-      reinterpret_cast<const tflite::ZerosLikeOptionsT *>(value) : nullptr;
-  }
-  tflite::FillOptionsT *AsFillOptions() {
-    return type == BuiltinOptions_FillOptions ?
-      reinterpret_cast<tflite::FillOptionsT *>(value) : nullptr;
-  }
-  const tflite::FillOptionsT *AsFillOptions() const {
-    return type == BuiltinOptions_FillOptions ?
-      reinterpret_cast<const tflite::FillOptionsT *>(value) : nullptr;
-  }
-  tflite::BidirectionalSequenceLSTMOptionsT *AsBidirectionalSequenceLSTMOptions() {
-    return type == BuiltinOptions_BidirectionalSequenceLSTMOptions ?
-      reinterpret_cast<tflite::BidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
-  }
-  const tflite::BidirectionalSequenceLSTMOptionsT *AsBidirectionalSequenceLSTMOptions() const {
-    return type == BuiltinOptions_BidirectionalSequenceLSTMOptions ?
-      reinterpret_cast<const tflite::BidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
-  }
-  tflite::BidirectionalSequenceRNNOptionsT *AsBidirectionalSequenceRNNOptions() {
-    return type == BuiltinOptions_BidirectionalSequenceRNNOptions ?
-      reinterpret_cast<tflite::BidirectionalSequenceRNNOptionsT *>(value) : nullptr;
-  }
-  const tflite::BidirectionalSequenceRNNOptionsT *AsBidirectionalSequenceRNNOptions() const {
-    return type == BuiltinOptions_BidirectionalSequenceRNNOptions ?
-      reinterpret_cast<const tflite::BidirectionalSequenceRNNOptionsT *>(value) : nullptr;
-  }
-  tflite::UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() {
-    return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ?
-      reinterpret_cast<tflite::UnidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
-  }
-  const tflite::UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() const {
-    return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ?
-      reinterpret_cast<const tflite::UnidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
-  }
-  tflite::FloorModOptionsT *AsFloorModOptions() {
-    return type == BuiltinOptions_FloorModOptions ?
-      reinterpret_cast<tflite::FloorModOptionsT *>(value) : nullptr;
-  }
-  const tflite::FloorModOptionsT *AsFloorModOptions() const {
-    return type == BuiltinOptions_FloorModOptions ?
-      reinterpret_cast<const tflite::FloorModOptionsT *>(value) : nullptr;
-  }
-  tflite::RangeOptionsT *AsRangeOptions() {
-    return type == BuiltinOptions_RangeOptions ?
-      reinterpret_cast<tflite::RangeOptionsT *>(value) : nullptr;
-  }
-  const tflite::RangeOptionsT *AsRangeOptions() const {
-    return type == BuiltinOptions_RangeOptions ?
-      reinterpret_cast<const tflite::RangeOptionsT *>(value) : nullptr;
-  }
-  tflite::ResizeNearestNeighborOptionsT *AsResizeNearestNeighborOptions() {
-    return type == BuiltinOptions_ResizeNearestNeighborOptions ?
-      reinterpret_cast<tflite::ResizeNearestNeighborOptionsT *>(value) : nullptr;
-  }
-  const tflite::ResizeNearestNeighborOptionsT *AsResizeNearestNeighborOptions() const {
-    return type == BuiltinOptions_ResizeNearestNeighborOptions ?
-      reinterpret_cast<const tflite::ResizeNearestNeighborOptionsT *>(value) : nullptr;
-  }
-  tflite::LeakyReluOptionsT *AsLeakyReluOptions() {
-    return type == BuiltinOptions_LeakyReluOptions ?
-      reinterpret_cast<tflite::LeakyReluOptionsT *>(value) : nullptr;
-  }
-  const tflite::LeakyReluOptionsT *AsLeakyReluOptions() const {
-    return type == BuiltinOptions_LeakyReluOptions ?
-      reinterpret_cast<const tflite::LeakyReluOptionsT *>(value) : nullptr;
-  }
-  tflite::SquaredDifferenceOptionsT *AsSquaredDifferenceOptions() {
-    return type == BuiltinOptions_SquaredDifferenceOptions ?
-      reinterpret_cast<tflite::SquaredDifferenceOptionsT *>(value) : nullptr;
-  }
-  const tflite::SquaredDifferenceOptionsT *AsSquaredDifferenceOptions() const {
-    return type == BuiltinOptions_SquaredDifferenceOptions ?
-      reinterpret_cast<const tflite::SquaredDifferenceOptionsT *>(value) : nullptr;
-  }
-  tflite::MirrorPadOptionsT *AsMirrorPadOptions() {
-    return type == BuiltinOptions_MirrorPadOptions ?
-      reinterpret_cast<tflite::MirrorPadOptionsT *>(value) : nullptr;
-  }
-  const tflite::MirrorPadOptionsT *AsMirrorPadOptions() const {
-    return type == BuiltinOptions_MirrorPadOptions ?
-      reinterpret_cast<const tflite::MirrorPadOptionsT *>(value) : nullptr;
-  }
-  tflite::AbsOptionsT *AsAbsOptions() {
-    return type == BuiltinOptions_AbsOptions ?
-      reinterpret_cast<tflite::AbsOptionsT *>(value) : nullptr;
-  }
-  const tflite::AbsOptionsT *AsAbsOptions() const {
-    return type == BuiltinOptions_AbsOptions ?
-      reinterpret_cast<const tflite::AbsOptionsT *>(value) : nullptr;
-  }
-  tflite::SplitVOptionsT *AsSplitVOptions() {
-    return type == BuiltinOptions_SplitVOptions ?
-      reinterpret_cast<tflite::SplitVOptionsT *>(value) : nullptr;
-  }
-  const tflite::SplitVOptionsT *AsSplitVOptions() const {
-    return type == BuiltinOptions_SplitVOptions ?
-      reinterpret_cast<const tflite::SplitVOptionsT *>(value) : nullptr;
-  }
-  tflite::UniqueOptionsT *AsUniqueOptions() {
-    return type == BuiltinOptions_UniqueOptions ?
-      reinterpret_cast<tflite::UniqueOptionsT *>(value) : nullptr;
-  }
-  const tflite::UniqueOptionsT *AsUniqueOptions() const {
-    return type == BuiltinOptions_UniqueOptions ?
-      reinterpret_cast<const tflite::UniqueOptionsT *>(value) : nullptr;
-  }
-  tflite::ReverseV2OptionsT *AsReverseV2Options() {
-    return type == BuiltinOptions_ReverseV2Options ?
-      reinterpret_cast<tflite::ReverseV2OptionsT *>(value) : nullptr;
-  }
-  const tflite::ReverseV2OptionsT *AsReverseV2Options() const {
-    return type == BuiltinOptions_ReverseV2Options ?
-      reinterpret_cast<const tflite::ReverseV2OptionsT *>(value) : nullptr;
-  }
-  tflite::AddNOptionsT *AsAddNOptions() {
-    return type == BuiltinOptions_AddNOptions ?
-      reinterpret_cast<tflite::AddNOptionsT *>(value) : nullptr;
-  }
-  const tflite::AddNOptionsT *AsAddNOptions() const {
-    return type == BuiltinOptions_AddNOptions ?
-      reinterpret_cast<const tflite::AddNOptionsT *>(value) : nullptr;
-  }
-  tflite::GatherNdOptionsT *AsGatherNdOptions() {
-    return type == BuiltinOptions_GatherNdOptions ?
-      reinterpret_cast<tflite::GatherNdOptionsT *>(value) : nullptr;
-  }
-  const tflite::GatherNdOptionsT *AsGatherNdOptions() const {
-    return type == BuiltinOptions_GatherNdOptions ?
-      reinterpret_cast<const tflite::GatherNdOptionsT *>(value) : nullptr;
-  }
-  tflite::CosOptionsT *AsCosOptions() {
-    return type == BuiltinOptions_CosOptions ?
-      reinterpret_cast<tflite::CosOptionsT *>(value) : nullptr;
-  }
-  const tflite::CosOptionsT *AsCosOptions() const {
-    return type == BuiltinOptions_CosOptions ?
-      reinterpret_cast<const tflite::CosOptionsT *>(value) : nullptr;
-  }
-  tflite::WhereOptionsT *AsWhereOptions() {
-    return type == BuiltinOptions_WhereOptions ?
-      reinterpret_cast<tflite::WhereOptionsT *>(value) : nullptr;
-  }
-  const tflite::WhereOptionsT *AsWhereOptions() const {
-    return type == BuiltinOptions_WhereOptions ?
-      reinterpret_cast<const tflite::WhereOptionsT *>(value) : nullptr;
-  }
-  tflite::RankOptionsT *AsRankOptions() {
-    return type == BuiltinOptions_RankOptions ?
-      reinterpret_cast<tflite::RankOptionsT *>(value) : nullptr;
-  }
-  const tflite::RankOptionsT *AsRankOptions() const {
-    return type == BuiltinOptions_RankOptions ?
-      reinterpret_cast<const tflite::RankOptionsT *>(value) : nullptr;
-  }
-  tflite::ReverseSequenceOptionsT *AsReverseSequenceOptions() {
-    return type == BuiltinOptions_ReverseSequenceOptions ?
-      reinterpret_cast<tflite::ReverseSequenceOptionsT *>(value) : nullptr;
-  }
-  const tflite::ReverseSequenceOptionsT *AsReverseSequenceOptions() const {
-    return type == BuiltinOptions_ReverseSequenceOptions ?
-      reinterpret_cast<const tflite::ReverseSequenceOptionsT *>(value) : nullptr;
-  }
-  tflite::MatrixDiagOptionsT *AsMatrixDiagOptions() {
-    return type == BuiltinOptions_MatrixDiagOptions ?
-      reinterpret_cast<tflite::MatrixDiagOptionsT *>(value) : nullptr;
-  }
-  const tflite::MatrixDiagOptionsT *AsMatrixDiagOptions() const {
-    return type == BuiltinOptions_MatrixDiagOptions ?
-      reinterpret_cast<const tflite::MatrixDiagOptionsT *>(value) : nullptr;
-  }
-  tflite::QuantizeOptionsT *AsQuantizeOptions() {
-    return type == BuiltinOptions_QuantizeOptions ?
-      reinterpret_cast<tflite::QuantizeOptionsT *>(value) : nullptr;
-  }
-  const tflite::QuantizeOptionsT *AsQuantizeOptions() const {
-    return type == BuiltinOptions_QuantizeOptions ?
-      reinterpret_cast<const tflite::QuantizeOptionsT *>(value) : nullptr;
-  }
-  tflite::MatrixSetDiagOptionsT *AsMatrixSetDiagOptions() {
-    return type == BuiltinOptions_MatrixSetDiagOptions ?
-      reinterpret_cast<tflite::MatrixSetDiagOptionsT *>(value) : nullptr;
-  }
-  const tflite::MatrixSetDiagOptionsT *AsMatrixSetDiagOptions() const {
-    return type == BuiltinOptions_MatrixSetDiagOptions ?
-      reinterpret_cast<const tflite::MatrixSetDiagOptionsT *>(value) : nullptr;
-  }
-  tflite::HardSwishOptionsT *AsHardSwishOptions() {
-    return type == BuiltinOptions_HardSwishOptions ?
-      reinterpret_cast<tflite::HardSwishOptionsT *>(value) : nullptr;
-  }
-  const tflite::HardSwishOptionsT *AsHardSwishOptions() const {
-    return type == BuiltinOptions_HardSwishOptions ?
-      reinterpret_cast<const tflite::HardSwishOptionsT *>(value) : nullptr;
-  }
-  tflite::IfOptionsT *AsIfOptions() {
-    return type == BuiltinOptions_IfOptions ?
-      reinterpret_cast<tflite::IfOptionsT *>(value) : nullptr;
-  }
-  const tflite::IfOptionsT *AsIfOptions() const {
-    return type == BuiltinOptions_IfOptions ?
-      reinterpret_cast<const tflite::IfOptionsT *>(value) : nullptr;
-  }
-  tflite::WhileOptionsT *AsWhileOptions() {
-    return type == BuiltinOptions_WhileOptions ?
-      reinterpret_cast<tflite::WhileOptionsT *>(value) : nullptr;
-  }
-  const tflite::WhileOptionsT *AsWhileOptions() const {
-    return type == BuiltinOptions_WhileOptions ?
-      reinterpret_cast<const tflite::WhileOptionsT *>(value) : nullptr;
-  }
-  tflite::DepthToSpaceOptionsT *AsDepthToSpaceOptions() {
-    return type == BuiltinOptions_DepthToSpaceOptions ?
-      reinterpret_cast<tflite::DepthToSpaceOptionsT *>(value) : nullptr;
-  }
-  const tflite::DepthToSpaceOptionsT *AsDepthToSpaceOptions() const {
-    return type == BuiltinOptions_DepthToSpaceOptions ?
-      reinterpret_cast<const tflite::DepthToSpaceOptionsT *>(value) : nullptr;
-  }
-  tflite::NonMaxSuppressionV4OptionsT *AsNonMaxSuppressionV4Options() {
-    return type == BuiltinOptions_NonMaxSuppressionV4Options ?
-      reinterpret_cast<tflite::NonMaxSuppressionV4OptionsT *>(value) : nullptr;
-  }
-  const tflite::NonMaxSuppressionV4OptionsT *AsNonMaxSuppressionV4Options() const {
-    return type == BuiltinOptions_NonMaxSuppressionV4Options ?
-      reinterpret_cast<const tflite::NonMaxSuppressionV4OptionsT *>(value) : nullptr;
-  }
-  tflite::NonMaxSuppressionV5OptionsT *AsNonMaxSuppressionV5Options() {
-    return type == BuiltinOptions_NonMaxSuppressionV5Options ?
-      reinterpret_cast<tflite::NonMaxSuppressionV5OptionsT *>(value) : nullptr;
-  }
-  const tflite::NonMaxSuppressionV5OptionsT *AsNonMaxSuppressionV5Options() const {
-    return type == BuiltinOptions_NonMaxSuppressionV5Options ?
-      reinterpret_cast<const tflite::NonMaxSuppressionV5OptionsT *>(value) : nullptr;
-  }
-  tflite::ScatterNdOptionsT *AsScatterNdOptions() {
-    return type == BuiltinOptions_ScatterNdOptions ?
-      reinterpret_cast<tflite::ScatterNdOptionsT *>(value) : nullptr;
-  }
-  const tflite::ScatterNdOptionsT *AsScatterNdOptions() const {
-    return type == BuiltinOptions_ScatterNdOptions ?
-      reinterpret_cast<const tflite::ScatterNdOptionsT *>(value) : nullptr;
-  }
-  tflite::SelectV2OptionsT *AsSelectV2Options() {
-    return type == BuiltinOptions_SelectV2Options ?
-      reinterpret_cast<tflite::SelectV2OptionsT *>(value) : nullptr;
-  }
-  const tflite::SelectV2OptionsT *AsSelectV2Options() const {
-    return type == BuiltinOptions_SelectV2Options ?
-      reinterpret_cast<const tflite::SelectV2OptionsT *>(value) : nullptr;
-  }
-  tflite::DensifyOptionsT *AsDensifyOptions() {
-    return type == BuiltinOptions_DensifyOptions ?
-      reinterpret_cast<tflite::DensifyOptionsT *>(value) : nullptr;
-  }
-  const tflite::DensifyOptionsT *AsDensifyOptions() const {
-    return type == BuiltinOptions_DensifyOptions ?
-      reinterpret_cast<const tflite::DensifyOptionsT *>(value) : nullptr;
-  }
-  tflite::SegmentSumOptionsT *AsSegmentSumOptions() {
-    return type == BuiltinOptions_SegmentSumOptions ?
-      reinterpret_cast<tflite::SegmentSumOptionsT *>(value) : nullptr;
-  }
-  const tflite::SegmentSumOptionsT *AsSegmentSumOptions() const {
-    return type == BuiltinOptions_SegmentSumOptions ?
-      reinterpret_cast<const tflite::SegmentSumOptionsT *>(value) : nullptr;
-  }
-  tflite::BatchMatMulOptionsT *AsBatchMatMulOptions() {
-    return type == BuiltinOptions_BatchMatMulOptions ?
-      reinterpret_cast<tflite::BatchMatMulOptionsT *>(value) : nullptr;
-  }
-  const tflite::BatchMatMulOptionsT *AsBatchMatMulOptions() const {
-    return type == BuiltinOptions_BatchMatMulOptions ?
-      reinterpret_cast<const tflite::BatchMatMulOptionsT *>(value) : nullptr;
-  }
-  tflite::CumsumOptionsT *AsCumsumOptions() {
-    return type == BuiltinOptions_CumsumOptions ?
-      reinterpret_cast<tflite::CumsumOptionsT *>(value) : nullptr;
-  }
-  const tflite::CumsumOptionsT *AsCumsumOptions() const {
-    return type == BuiltinOptions_CumsumOptions ?
-      reinterpret_cast<const tflite::CumsumOptionsT *>(value) : nullptr;
-  }
-  tflite::CallOnceOptionsT *AsCallOnceOptions() {
-    return type == BuiltinOptions_CallOnceOptions ?
-      reinterpret_cast<tflite::CallOnceOptionsT *>(value) : nullptr;
-  }
-  const tflite::CallOnceOptionsT *AsCallOnceOptions() const {
-    return type == BuiltinOptions_CallOnceOptions ?
-      reinterpret_cast<const tflite::CallOnceOptionsT *>(value) : nullptr;
-  }
-  tflite::BroadcastToOptionsT *AsBroadcastToOptions() {
-    return type == BuiltinOptions_BroadcastToOptions ?
-      reinterpret_cast<tflite::BroadcastToOptionsT *>(value) : nullptr;
-  }
-  const tflite::BroadcastToOptionsT *AsBroadcastToOptions() const {
-    return type == BuiltinOptions_BroadcastToOptions ?
-      reinterpret_cast<const tflite::BroadcastToOptionsT *>(value) : nullptr;
-  }
-  tflite::Rfft2dOptionsT *AsRfft2dOptions() {
-    return type == BuiltinOptions_Rfft2dOptions ?
-      reinterpret_cast<tflite::Rfft2dOptionsT *>(value) : nullptr;
-  }
-  const tflite::Rfft2dOptionsT *AsRfft2dOptions() const {
-    return type == BuiltinOptions_Rfft2dOptions ?
-      reinterpret_cast<const tflite::Rfft2dOptionsT *>(value) : nullptr;
-  }
-  tflite::Conv3DOptionsT *AsConv3DOptions() {
-    return type == BuiltinOptions_Conv3DOptions ?
-      reinterpret_cast<tflite::Conv3DOptionsT *>(value) : nullptr;
-  }
-  const tflite::Conv3DOptionsT *AsConv3DOptions() const {
-    return type == BuiltinOptions_Conv3DOptions ?
-      reinterpret_cast<const tflite::Conv3DOptionsT *>(value) : nullptr;
-  }
-  tflite::HashtableOptionsT *AsHashtableOptions() {
-    return type == BuiltinOptions_HashtableOptions ?
-      reinterpret_cast<tflite::HashtableOptionsT *>(value) : nullptr;
-  }
-  const tflite::HashtableOptionsT *AsHashtableOptions() const {
-    return type == BuiltinOptions_HashtableOptions ?
-      reinterpret_cast<const tflite::HashtableOptionsT *>(value) : nullptr;
-  }
-  tflite::HashtableFindOptionsT *AsHashtableFindOptions() {
-    return type == BuiltinOptions_HashtableFindOptions ?
-      reinterpret_cast<tflite::HashtableFindOptionsT *>(value) : nullptr;
-  }
-  const tflite::HashtableFindOptionsT *AsHashtableFindOptions() const {
-    return type == BuiltinOptions_HashtableFindOptions ?
-      reinterpret_cast<const tflite::HashtableFindOptionsT *>(value) : nullptr;
-  }
-  tflite::HashtableImportOptionsT *AsHashtableImportOptions() {
-    return type == BuiltinOptions_HashtableImportOptions ?
-      reinterpret_cast<tflite::HashtableImportOptionsT *>(value) : nullptr;
-  }
-  const tflite::HashtableImportOptionsT *AsHashtableImportOptions() const {
-    return type == BuiltinOptions_HashtableImportOptions ?
-      reinterpret_cast<const tflite::HashtableImportOptionsT *>(value) : nullptr;
-  }
-  tflite::HashtableSizeOptionsT *AsHashtableSizeOptions() {
-    return type == BuiltinOptions_HashtableSizeOptions ?
-      reinterpret_cast<tflite::HashtableSizeOptionsT *>(value) : nullptr;
-  }
-  const tflite::HashtableSizeOptionsT *AsHashtableSizeOptions() const {
-    return type == BuiltinOptions_HashtableSizeOptions ?
-      reinterpret_cast<const tflite::HashtableSizeOptionsT *>(value) : nullptr;
-  }
+template<> struct BuiltinOptionsTraits<tflite::AssignVariableOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions;
 };
 
-bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
-bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
-
-enum Padding {
-  Padding_SAME = 0,
-  Padding_VALID = 1,
-  Padding_MIN = Padding_SAME,
-  Padding_MAX = Padding_VALID
+template<> struct BuiltinOptionsTraits<tflite::RandomOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_RandomOptions;
 };
 
-inline const Padding (&EnumValuesPadding())[2] {
-  static const Padding values[] = {
-    Padding_SAME,
-    Padding_VALID
-  };
-  return values;
-}
+template<> struct BuiltinOptionsTraits<tflite::BucketizeOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_BucketizeOptions;
+};
 
-inline const char * const *EnumNamesPadding() {
-  static const char * const names[3] = {
-    "SAME",
-    "VALID",
-    nullptr
-  };
-  return names;
-}
+template<> struct BuiltinOptionsTraits<tflite::GeluOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_GeluOptions;
+};
 
-inline const char *EnumNamePadding(Padding e) {
-  if (flatbuffers::IsOutRange(e, Padding_SAME, Padding_VALID)) return "";
-  const size_t index = static_cast<size_t>(e);
-  return EnumNamesPadding()[index];
-}
+template<> struct BuiltinOptionsTraits<tflite::DynamicUpdateSliceOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_DynamicUpdateSliceOptions;
+};
 
-enum ActivationFunctionType {
-  ActivationFunctionType_NONE = 0,
-  ActivationFunctionType_RELU = 1,
-  ActivationFunctionType_RELU_N1_TO_1 = 2,
-  ActivationFunctionType_RELU6 = 3,
-  ActivationFunctionType_TANH = 4,
-  ActivationFunctionType_SIGN_BIT = 5,
-  ActivationFunctionType_MIN = ActivationFunctionType_NONE,
-  ActivationFunctionType_MAX = ActivationFunctionType_SIGN_BIT
+template<> struct BuiltinOptionsTraits<tflite::UnsortedSegmentProdOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentProdOptions;
 };
 
-inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6] {
-  static const ActivationFunctionType values[] = {
-    ActivationFunctionType_NONE,
-    ActivationFunctionType_RELU,
-    ActivationFunctionType_RELU_N1_TO_1,
-    ActivationFunctionType_RELU6,
-    ActivationFunctionType_TANH,
-    ActivationFunctionType_SIGN_BIT
-  };
-  return values;
-}
+template<> struct BuiltinOptionsTraits<tflite::UnsortedSegmentMaxOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentMaxOptions;
+};
 
-inline const char * const *EnumNamesActivationFunctionType() {
-  static const char * const names[7] = {
-    "NONE",
-    "RELU",
-    "RELU_N1_TO_1",
-    "RELU6",
-    "TANH",
-    "SIGN_BIT",
-    nullptr
-  };
-  return names;
-}
+template<> struct BuiltinOptionsTraits<tflite::UnsortedSegmentMinOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentMinOptions;
+};
 
-inline const char *EnumNameActivationFunctionType(ActivationFunctionType e) {
-  if (flatbuffers::IsOutRange(e, ActivationFunctionType_NONE, ActivationFunctionType_SIGN_BIT)) return "";
-  const size_t index = static_cast<size_t>(e);
-  return EnumNamesActivationFunctionType()[index];
-}
+template<> struct BuiltinOptionsTraits<tflite::UnsortedSegmentSumOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentSumOptions;
+};
 
-enum LSHProjectionType {
-  LSHProjectionType_UNKNOWN = 0,
-  LSHProjectionType_SPARSE = 1,
-  LSHProjectionType_DENSE = 2,
-  LSHProjectionType_MIN = LSHProjectionType_UNKNOWN,
-  LSHProjectionType_MAX = LSHProjectionType_DENSE
+template<> struct BuiltinOptionsTraits<tflite::ATan2Options> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ATan2Options;
 };
 
-inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3] {
-  static const LSHProjectionType values[] = {
-    LSHProjectionType_UNKNOWN,
-    LSHProjectionType_SPARSE,
-    LSHProjectionType_DENSE
-  };
-  return values;
-}
+template<> struct BuiltinOptionsTraits<tflite::SignOptions> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SignOptions;
+};
 
-inline const char * const *EnumNamesLSHProjectionType() {
-  static const char * const names[4] = {
-    "UNKNOWN",
-    "SPARSE",
-    "DENSE",
-    nullptr
-  };
-  return names;
-}
+template<typename T> struct BuiltinOptionsUnionTraits {
+  static const BuiltinOptions enum_value = BuiltinOptions_NONE;
+};
 
-inline const char *EnumNameLSHProjectionType(LSHProjectionType e) {
-  if (flatbuffers::IsOutRange(e, LSHProjectionType_UNKNOWN, LSHProjectionType_DENSE)) return "";
-  const size_t index = static_cast<size_t>(e);
-  return EnumNamesLSHProjectionType()[index];
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::Conv2DOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions;
+};
 
-enum FullyConnectedOptionsWeightsFormat {
-  FullyConnectedOptionsWeightsFormat_DEFAULT = 0,
-  FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1,
-  FullyConnectedOptionsWeightsFormat_MIN = FullyConnectedOptionsWeightsFormat_DEFAULT,
-  FullyConnectedOptionsWeightsFormat_MAX = FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8
+template<> struct BuiltinOptionsUnionTraits<tflite::DepthwiseConv2DOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions;
 };
 
-inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2] {
-  static const FullyConnectedOptionsWeightsFormat values[] = {
-    FullyConnectedOptionsWeightsFormat_DEFAULT,
-    FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8
-  };
-  return values;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::ConcatEmbeddingsOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions;
+};
 
-inline const char * const *EnumNamesFullyConnectedOptionsWeightsFormat() {
-  static const char * const names[3] = {
-    "DEFAULT",
-    "SHUFFLED4x16INT8",
-    nullptr
-  };
-  return names;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::LSHProjectionOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions;
+};
 
-inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e) {
-  if (flatbuffers::IsOutRange(e, FullyConnectedOptionsWeightsFormat_DEFAULT, FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8)) return "";
-  const size_t index = static_cast<size_t>(e);
-  return EnumNamesFullyConnectedOptionsWeightsFormat()[index];
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::Pool2DOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions;
+};
 
-enum LSTMKernelType {
-  LSTMKernelType_FULL = 0,
-  LSTMKernelType_BASIC = 1,
-  LSTMKernelType_MIN = LSTMKernelType_FULL,
-  LSTMKernelType_MAX = LSTMKernelType_BASIC
+template<> struct BuiltinOptionsUnionTraits<tflite::SVDFOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions;
 };
 
-inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2] {
-  static const LSTMKernelType values[] = {
-    LSTMKernelType_FULL,
-    LSTMKernelType_BASIC
-  };
-  return values;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::RNNOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions;
+};
 
-inline const char * const *EnumNamesLSTMKernelType() {
-  static const char * const names[3] = {
-    "FULL",
-    "BASIC",
-    nullptr
-  };
-  return names;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::FullyConnectedOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions;
+};
 
-inline const char *EnumNameLSTMKernelType(LSTMKernelType e) {
-  if (flatbuffers::IsOutRange(e, LSTMKernelType_FULL, LSTMKernelType_BASIC)) return "";
-  const size_t index = static_cast<size_t>(e);
-  return EnumNamesLSTMKernelType()[index];
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::SoftmaxOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions;
+};
 
-enum CombinerType {
-  CombinerType_SUM = 0,
-  CombinerType_MEAN = 1,
-  CombinerType_SQRTN = 2,
-  CombinerType_MIN = CombinerType_SUM,
-  CombinerType_MAX = CombinerType_SQRTN
+template<> struct BuiltinOptionsUnionTraits<tflite::ConcatenationOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions;
 };
 
-inline const CombinerType (&EnumValuesCombinerType())[3] {
-  static const CombinerType values[] = {
-    CombinerType_SUM,
-    CombinerType_MEAN,
-    CombinerType_SQRTN
-  };
-  return values;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::AddOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_AddOptions;
+};
 
-inline const char * const *EnumNamesCombinerType() {
-  static const char * const names[4] = {
-    "SUM",
-    "MEAN",
-    "SQRTN",
-    nullptr
-  };
-  return names;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::L2NormOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions;
+};
 
-inline const char *EnumNameCombinerType(CombinerType e) {
-  if (flatbuffers::IsOutRange(e, CombinerType_SUM, CombinerType_SQRTN)) return "";
-  const size_t index = static_cast<size_t>(e);
-  return EnumNamesCombinerType()[index];
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::LocalResponseNormalizationOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions;
+};
 
-enum MirrorPadMode {
-  MirrorPadMode_REFLECT = 0,
-  MirrorPadMode_SYMMETRIC = 1,
-  MirrorPadMode_MIN = MirrorPadMode_REFLECT,
-  MirrorPadMode_MAX = MirrorPadMode_SYMMETRIC
+template<> struct BuiltinOptionsUnionTraits<tflite::LSTMOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions;
 };
 
-inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2] {
-  static const MirrorPadMode values[] = {
-    MirrorPadMode_REFLECT,
-    MirrorPadMode_SYMMETRIC
-  };
-  return values;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::ResizeBilinearOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions;
+};
 
-inline const char * const *EnumNamesMirrorPadMode() {
-  static const char * const names[3] = {
-    "REFLECT",
-    "SYMMETRIC",
-    nullptr
-  };
-  return names;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::CallOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_CallOptions;
+};
 
-inline const char *EnumNameMirrorPadMode(MirrorPadMode e) {
-  if (flatbuffers::IsOutRange(e, MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC)) return "";
-  const size_t index = static_cast<size_t>(e);
-  return EnumNamesMirrorPadMode()[index];
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::ReshapeOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions;
+};
 
-enum CustomOptionsFormat {
-  CustomOptionsFormat_FLEXBUFFERS = 0,
-  CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS,
-  CustomOptionsFormat_MAX = CustomOptionsFormat_FLEXBUFFERS
+template<> struct BuiltinOptionsUnionTraits<tflite::SkipGramOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions;
 };
 
-inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1] {
-  static const CustomOptionsFormat values[] = {
-    CustomOptionsFormat_FLEXBUFFERS
-  };
-  return values;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::SpaceToDepthOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions;
+};
 
-inline const char * const *EnumNamesCustomOptionsFormat() {
-  static const char * const names[2] = {
-    "FLEXBUFFERS",
-    nullptr
-  };
-  return names;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::EmbeddingLookupSparseOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions;
+};
 
-inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e) {
-  if (flatbuffers::IsOutRange(e, CustomOptionsFormat_FLEXBUFFERS, CustomOptionsFormat_FLEXBUFFERS)) return "";
-  const size_t index = static_cast<size_t>(e);
-  return EnumNamesCustomOptionsFormat()[index];
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::MulOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_MulOptions;
+};
 
-struct CustomQuantizationT : public flatbuffers::NativeTable {
-  typedef CustomQuantization TableType;
-  std::vector<uint8_t> custom;
-  CustomQuantizationT() {
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::PadOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_PadOptions;
 };
 
-struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef CustomQuantizationT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_CUSTOM = 4
-  };
-  const flatbuffers::Vector<uint8_t> *custom() const {
-    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_CUSTOM) &&
-           verifier.VerifyVector(custom()) &&
-           verifier.EndTable();
-  }
-  CustomQuantizationT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(CustomQuantizationT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<CustomQuantization> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+template<> struct BuiltinOptionsUnionTraits<tflite::GatherOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions;
 };
 
-struct CustomQuantizationBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom) {
-    fbb_.AddOffset(CustomQuantization::VT_CUSTOM, custom);
-  }
-  explicit CustomQuantizationBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &);
-  flatbuffers::Offset<CustomQuantization> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<CustomQuantization>(end);
-    return o;
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::BatchToSpaceNDOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions;
 };
 
-inline flatbuffers::Offset<CustomQuantization> CreateCustomQuantization(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom = 0) {
-  CustomQuantizationBuilder builder_(_fbb);
-  builder_.add_custom(custom);
-  return builder_.Finish();
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::SpaceToBatchNDOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions;
+};
 
-inline flatbuffers::Offset<CustomQuantization> CreateCustomQuantizationDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<uint8_t> *custom = nullptr) {
-  if (custom) { _fbb.ForceVectorAlignment(custom->size(), sizeof(uint8_t), 16); }
-  auto custom__ = custom ? _fbb.CreateVector<uint8_t>(*custom) : 0;
-  return tflite::CreateCustomQuantization(
-      _fbb,
-      custom__);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::TransposeOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions;
+};
 
-flatbuffers::Offset<CustomQuantization> CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct QuantizationParametersT : public flatbuffers::NativeTable {
-  typedef QuantizationParameters TableType;
-  std::vector<float> min;
-  std::vector<float> max;
-  std::vector<float> scale;
-  std::vector<int64_t> zero_point;
-  tflite::QuantizationDetailsUnion details;
-  int32_t quantized_dimension;
-  QuantizationParametersT()
-      : quantized_dimension(0) {
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::ReducerOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions;
 };
 
-struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef QuantizationParametersT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_MIN = 4,
-    VT_MAX = 6,
-    VT_SCALE = 8,
-    VT_ZERO_POINT = 10,
-    VT_DETAILS_TYPE = 12,
-    VT_DETAILS = 14,
-    VT_QUANTIZED_DIMENSION = 16
-  };
-  const flatbuffers::Vector<float> *min() const {
-    return GetPointer<const flatbuffers::Vector<float> *>(VT_MIN);
-  }
-  const flatbuffers::Vector<float> *max() const {
-    return GetPointer<const flatbuffers::Vector<float> *>(VT_MAX);
-  }
-  const flatbuffers::Vector<float> *scale() const {
-    return GetPointer<const flatbuffers::Vector<float> *>(VT_SCALE);
-  }
-  const flatbuffers::Vector<int64_t> *zero_point() const {
-    return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT);
-  }
-  tflite::QuantizationDetails details_type() const {
-    return static_cast<tflite::QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
-  }
-  const void *details() const {
-    return GetPointer<const void *>(VT_DETAILS);
-  }
-  template<typename T> const T *details_as() const;
-  const tflite::CustomQuantization *details_as_CustomQuantization() const {
-    return details_type() == tflite::QuantizationDetails_CustomQuantization ? static_cast<const tflite::CustomQuantization *>(details()) : nullptr;
-  }
-  int32_t quantized_dimension() const {
-    return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_MIN) &&
-           verifier.VerifyVector(min()) &&
-           VerifyOffset(verifier, VT_MAX) &&
-           verifier.VerifyVector(max()) &&
-           VerifyOffset(verifier, VT_SCALE) &&
-           verifier.VerifyVector(scale()) &&
-           VerifyOffset(verifier, VT_ZERO_POINT) &&
-           verifier.VerifyVector(zero_point()) &&
-           VerifyField<uint8_t>(verifier, VT_DETAILS_TYPE) &&
-           VerifyOffset(verifier, VT_DETAILS) &&
-           VerifyQuantizationDetails(verifier, details(), details_type()) &&
-           VerifyField<int32_t>(verifier, VT_QUANTIZED_DIMENSION) &&
-           verifier.EndTable();
-  }
-  QuantizationParametersT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(QuantizationParametersT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<QuantizationParameters> Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+template<> struct BuiltinOptionsUnionTraits<tflite::SubOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SubOptions;
 };
 
-template<> inline const tflite::CustomQuantization *QuantizationParameters::details_as<tflite::CustomQuantization>() const {
-  return details_as_CustomQuantization();
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::DivOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_DivOptions;
+};
 
-struct QuantizationParametersBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min) {
-    fbb_.AddOffset(QuantizationParameters::VT_MIN, min);
-  }
-  void add_max(flatbuffers::Offset<flatbuffers::Vector<float>> max) {
-    fbb_.AddOffset(QuantizationParameters::VT_MAX, max);
-  }
-  void add_scale(flatbuffers::Offset<flatbuffers::Vector<float>> scale) {
-    fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale);
-  }
-  void add_zero_point(flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point) {
-    fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point);
-  }
-  void add_details_type(tflite::QuantizationDetails details_type) {
-    fbb_.AddElement<uint8_t>(QuantizationParameters::VT_DETAILS_TYPE, static_cast<uint8_t>(details_type), 0);
-  }
-  void add_details(flatbuffers::Offset<void> details) {
-    fbb_.AddOffset(QuantizationParameters::VT_DETAILS, details);
-  }
-  void add_quantized_dimension(int32_t quantized_dimension) {
-    fbb_.AddElement<int32_t>(QuantizationParameters::VT_QUANTIZED_DIMENSION, quantized_dimension, 0);
-  }
-  explicit QuantizationParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &);
-  flatbuffers::Offset<QuantizationParameters> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<QuantizationParameters>(end);
-    return o;
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::SqueezeOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions;
 };
 
-inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
-    flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
-    flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
-    tflite::QuantizationDetails details_type = tflite::QuantizationDetails_NONE,
-    flatbuffers::Offset<void> details = 0,
-    int32_t quantized_dimension = 0) {
-  QuantizationParametersBuilder builder_(_fbb);
-  builder_.add_quantized_dimension(quantized_dimension);
-  builder_.add_details(details);
-  builder_.add_zero_point(zero_point);
-  builder_.add_scale(scale);
-  builder_.add_max(max);
-  builder_.add_min(min);
-  builder_.add_details_type(details_type);
-  return builder_.Finish();
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::SequenceRNNOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions;
+};
 
-inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<float> *min = nullptr,
-    const std::vector<float> *max = nullptr,
-    const std::vector<float> *scale = nullptr,
-    const std::vector<int64_t> *zero_point = nullptr,
-    tflite::QuantizationDetails details_type = tflite::QuantizationDetails_NONE,
-    flatbuffers::Offset<void> details = 0,
-    int32_t quantized_dimension = 0) {
-  auto min__ = min ? _fbb.CreateVector<float>(*min) : 0;
-  auto max__ = max ? _fbb.CreateVector<float>(*max) : 0;
-  auto scale__ = scale ? _fbb.CreateVector<float>(*scale) : 0;
-  auto zero_point__ = zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0;
-  return tflite::CreateQuantizationParameters(
-      _fbb,
-      min__,
-      max__,
-      scale__,
-      zero_point__,
-      details_type,
-      details,
-      quantized_dimension);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::StridedSliceOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions;
+};
 
-flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+template<> struct BuiltinOptionsUnionTraits<tflite::ExpOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions;
+};
 
-struct Int32VectorT : public flatbuffers::NativeTable {
-  typedef Int32Vector TableType;
-  std::vector<int32_t> values;
-  Int32VectorT() {
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::TopKV2OptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options;
 };
 
-struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef Int32VectorT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_VALUES = 4
-  };
-  const flatbuffers::Vector<int32_t> *values() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_VALUES);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_VALUES) &&
-           verifier.VerifyVector(values()) &&
-           verifier.EndTable();
-  }
-  Int32VectorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(Int32VectorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Int32Vector> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+template<> struct BuiltinOptionsUnionTraits<tflite::SplitOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions;
 };
 
-struct Int32VectorBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_values(flatbuffers::Offset<flatbuffers::Vector<int32_t>> values) {
-    fbb_.AddOffset(Int32Vector::VT_VALUES, values);
-  }
-  explicit Int32VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  Int32VectorBuilder &operator=(const Int32VectorBuilder &);
-  flatbuffers::Offset<Int32Vector> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Int32Vector>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<Int32Vector> CreateInt32Vector(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> values = 0) {
-  Int32VectorBuilder builder_(_fbb);
-  builder_.add_values(values);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Int32Vector> CreateInt32VectorDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<int32_t> *values = nullptr) {
-  auto values__ = values ? _fbb.CreateVector<int32_t>(*values) : 0;
-  return tflite::CreateInt32Vector(
-      _fbb,
-      values__);
-}
-
-flatbuffers::Offset<Int32Vector> CreateInt32Vector(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct Uint16VectorT : public flatbuffers::NativeTable {
-  typedef Uint16Vector TableType;
-  std::vector<uint16_t> values;
-  Uint16VectorT() {
-  }
-};
-
-struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef Uint16VectorT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_VALUES = 4
-  };
-  const flatbuffers::Vector<uint16_t> *values() const {
-    return GetPointer<const flatbuffers::Vector<uint16_t> *>(VT_VALUES);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_VALUES) &&
-           verifier.VerifyVector(values()) &&
-           verifier.EndTable();
-  }
-  Uint16VectorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(Uint16VectorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Uint16Vector> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct Uint16VectorBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_values(flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values) {
-    fbb_.AddOffset(Uint16Vector::VT_VALUES, values);
-  }
-  explicit Uint16VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  Uint16VectorBuilder &operator=(const Uint16VectorBuilder &);
-  flatbuffers::Offset<Uint16Vector> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Uint16Vector>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<Uint16Vector> CreateUint16Vector(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values = 0) {
-  Uint16VectorBuilder builder_(_fbb);
-  builder_.add_values(values);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Uint16Vector> CreateUint16VectorDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<uint16_t> *values = nullptr) {
-  if (values) { _fbb.ForceVectorAlignment(values->size(), sizeof(uint16_t), 4); }
-  auto values__ = values ? _fbb.CreateVector<uint16_t>(*values) : 0;
-  return tflite::CreateUint16Vector(
-      _fbb,
-      values__);
-}
-
-flatbuffers::Offset<Uint16Vector> CreateUint16Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct Uint8VectorT : public flatbuffers::NativeTable {
-  typedef Uint8Vector TableType;
-  std::vector<uint8_t> values;
-  Uint8VectorT() {
-  }
-};
-
-struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef Uint8VectorT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_VALUES = 4
-  };
-  const flatbuffers::Vector<uint8_t> *values() const {
-    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_VALUES);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_VALUES) &&
-           verifier.VerifyVector(values()) &&
-           verifier.EndTable();
-  }
-  Uint8VectorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(Uint8VectorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Uint8Vector> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct Uint8VectorBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_values(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values) {
-    fbb_.AddOffset(Uint8Vector::VT_VALUES, values);
-  }
-  explicit Uint8VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  Uint8VectorBuilder &operator=(const Uint8VectorBuilder &);
-  flatbuffers::Offset<Uint8Vector> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Uint8Vector>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<Uint8Vector> CreateUint8Vector(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values = 0) {
-  Uint8VectorBuilder builder_(_fbb);
-  builder_.add_values(values);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Uint8Vector> CreateUint8VectorDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<uint8_t> *values = nullptr) {
-  if (values) { _fbb.ForceVectorAlignment(values->size(), sizeof(uint8_t), 4); }
-  auto values__ = values ? _fbb.CreateVector<uint8_t>(*values) : 0;
-  return tflite::CreateUint8Vector(
-      _fbb,
-      values__);
-}
-
-flatbuffers::Offset<Uint8Vector> CreateUint8Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct DimensionMetadataT : public flatbuffers::NativeTable {
-  typedef DimensionMetadata TableType;
-  tflite::DimensionType format;
-  int32_t dense_size;
-  tflite::SparseIndexVectorUnion array_segments;
-  tflite::SparseIndexVectorUnion array_indices;
-  DimensionMetadataT()
-      : format(tflite::DimensionType_DENSE),
-        dense_size(0) {
-  }
-};
-
-struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef DimensionMetadataT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_FORMAT = 4,
-    VT_DENSE_SIZE = 6,
-    VT_ARRAY_SEGMENTS_TYPE = 8,
-    VT_ARRAY_SEGMENTS = 10,
-    VT_ARRAY_INDICES_TYPE = 12,
-    VT_ARRAY_INDICES = 14
-  };
-  tflite::DimensionType format() const {
-    return static_cast<tflite::DimensionType>(GetField<int8_t>(VT_FORMAT, 0));
-  }
-  int32_t dense_size() const {
-    return GetField<int32_t>(VT_DENSE_SIZE, 0);
-  }
-  tflite::SparseIndexVector array_segments_type() const {
-    return static_cast<tflite::SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0));
-  }
-  const void *array_segments() const {
-    return GetPointer<const void *>(VT_ARRAY_SEGMENTS);
-  }
-  template<typename T> const T *array_segments_as() const;
-  const tflite::Int32Vector *array_segments_as_Int32Vector() const {
-    return array_segments_type() == tflite::SparseIndexVector_Int32Vector ? static_cast<const tflite::Int32Vector *>(array_segments()) : nullptr;
-  }
-  const tflite::Uint16Vector *array_segments_as_Uint16Vector() const {
-    return array_segments_type() == tflite::SparseIndexVector_Uint16Vector ? static_cast<const tflite::Uint16Vector *>(array_segments()) : nullptr;
-  }
-  const tflite::Uint8Vector *array_segments_as_Uint8Vector() const {
-    return array_segments_type() == tflite::SparseIndexVector_Uint8Vector ? static_cast<const tflite::Uint8Vector *>(array_segments()) : nullptr;
-  }
-  tflite::SparseIndexVector array_indices_type() const {
-    return static_cast<tflite::SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0));
-  }
-  const void *array_indices() const {
-    return GetPointer<const void *>(VT_ARRAY_INDICES);
-  }
-  template<typename T> const T *array_indices_as() const;
-  const tflite::Int32Vector *array_indices_as_Int32Vector() const {
-    return array_indices_type() == tflite::SparseIndexVector_Int32Vector ? static_cast<const tflite::Int32Vector *>(array_indices()) : nullptr;
-  }
-  const tflite::Uint16Vector *array_indices_as_Uint16Vector() const {
-    return array_indices_type() == tflite::SparseIndexVector_Uint16Vector ? static_cast<const tflite::Uint16Vector *>(array_indices()) : nullptr;
-  }
-  const tflite::Uint8Vector *array_indices_as_Uint8Vector() const {
-    return array_indices_type() == tflite::SparseIndexVector_Uint8Vector ? static_cast<const tflite::Uint8Vector *>(array_indices()) : nullptr;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FORMAT) &&
-           VerifyField<int32_t>(verifier, VT_DENSE_SIZE) &&
-           VerifyField<uint8_t>(verifier, VT_ARRAY_SEGMENTS_TYPE) &&
-           VerifyOffset(verifier, VT_ARRAY_SEGMENTS) &&
-           VerifySparseIndexVector(verifier, array_segments(), array_segments_type()) &&
-           VerifyField<uint8_t>(verifier, VT_ARRAY_INDICES_TYPE) &&
-           VerifyOffset(verifier, VT_ARRAY_INDICES) &&
-           VerifySparseIndexVector(verifier, array_indices(), array_indices_type()) &&
-           verifier.EndTable();
-  }
-  DimensionMetadataT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(DimensionMetadataT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<DimensionMetadata> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-template<> inline const tflite::Int32Vector *DimensionMetadata::array_segments_as<tflite::Int32Vector>() const {
-  return array_segments_as_Int32Vector();
-}
-
-template<> inline const tflite::Uint16Vector *DimensionMetadata::array_segments_as<tflite::Uint16Vector>() const {
-  return array_segments_as_Uint16Vector();
-}
-
-template<> inline const tflite::Uint8Vector *DimensionMetadata::array_segments_as<tflite::Uint8Vector>() const {
-  return array_segments_as_Uint8Vector();
-}
-
-template<> inline const tflite::Int32Vector *DimensionMetadata::array_indices_as<tflite::Int32Vector>() const {
-  return array_indices_as_Int32Vector();
-}
-
-template<> inline const tflite::Uint16Vector *DimensionMetadata::array_indices_as<tflite::Uint16Vector>() const {
-  return array_indices_as_Uint16Vector();
-}
-
-template<> inline const tflite::Uint8Vector *DimensionMetadata::array_indices_as<tflite::Uint8Vector>() const {
-  return array_indices_as_Uint8Vector();
-}
-
-struct DimensionMetadataBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_format(tflite::DimensionType format) {
-    fbb_.AddElement<int8_t>(DimensionMetadata::VT_FORMAT, static_cast<int8_t>(format), 0);
-  }
-  void add_dense_size(int32_t dense_size) {
-    fbb_.AddElement<int32_t>(DimensionMetadata::VT_DENSE_SIZE, dense_size, 0);
-  }
-  void add_array_segments_type(tflite::SparseIndexVector array_segments_type) {
-    fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_SEGMENTS_TYPE, static_cast<uint8_t>(array_segments_type), 0);
-  }
-  void add_array_segments(flatbuffers::Offset<void> array_segments) {
-    fbb_.AddOffset(DimensionMetadata::VT_ARRAY_SEGMENTS, array_segments);
-  }
-  void add_array_indices_type(tflite::SparseIndexVector array_indices_type) {
-    fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_INDICES_TYPE, static_cast<uint8_t>(array_indices_type), 0);
-  }
-  void add_array_indices(flatbuffers::Offset<void> array_indices) {
-    fbb_.AddOffset(DimensionMetadata::VT_ARRAY_INDICES, array_indices);
-  }
-  explicit DimensionMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  DimensionMetadataBuilder &operator=(const DimensionMetadataBuilder &);
-  flatbuffers::Offset<DimensionMetadata> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<DimensionMetadata>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::DimensionType format = tflite::DimensionType_DENSE,
-    int32_t dense_size = 0,
-    tflite::SparseIndexVector array_segments_type = tflite::SparseIndexVector_NONE,
-    flatbuffers::Offset<void> array_segments = 0,
-    tflite::SparseIndexVector array_indices_type = tflite::SparseIndexVector_NONE,
-    flatbuffers::Offset<void> array_indices = 0) {
-  DimensionMetadataBuilder builder_(_fbb);
-  builder_.add_array_indices(array_indices);
-  builder_.add_array_segments(array_segments);
-  builder_.add_dense_size(dense_size);
-  builder_.add_array_indices_type(array_indices_type);
-  builder_.add_array_segments_type(array_segments_type);
-  builder_.add_format(format);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SparsityParametersT : public flatbuffers::NativeTable {
-  typedef SparsityParameters TableType;
-  std::vector<int32_t> traversal_order;
-  std::vector<int32_t> block_map;
-  std::vector<std::unique_ptr<tflite::DimensionMetadataT>> dim_metadata;
-  SparsityParametersT() {
-  }
-};
-
-struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SparsityParametersT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_TRAVERSAL_ORDER = 4,
-    VT_BLOCK_MAP = 6,
-    VT_DIM_METADATA = 8
-  };
-  const flatbuffers::Vector<int32_t> *traversal_order() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_TRAVERSAL_ORDER);
-  }
-  const flatbuffers::Vector<int32_t> *block_map() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BLOCK_MAP);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<tflite::DimensionMetadata>> *dim_metadata() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::DimensionMetadata>> *>(VT_DIM_METADATA);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_TRAVERSAL_ORDER) &&
-           verifier.VerifyVector(traversal_order()) &&
-           VerifyOffset(verifier, VT_BLOCK_MAP) &&
-           verifier.VerifyVector(block_map()) &&
-           VerifyOffset(verifier, VT_DIM_METADATA) &&
-           verifier.VerifyVector(dim_metadata()) &&
-           verifier.VerifyVectorOfTables(dim_metadata()) &&
-           verifier.EndTable();
-  }
-  SparsityParametersT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SparsityParametersT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SparsityParameters> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+template<> struct BuiltinOptionsUnionTraits<tflite::LogSoftmaxOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions;
 };
 
-struct SparsityParametersBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_traversal_order(flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order) {
-    fbb_.AddOffset(SparsityParameters::VT_TRAVERSAL_ORDER, traversal_order);
-  }
-  void add_block_map(flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map) {
-    fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
-  }
-  void add_dim_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::DimensionMetadata>>> dim_metadata) {
-    fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
-  }
-  explicit SparsityParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SparsityParametersBuilder &operator=(const SparsityParametersBuilder &);
-  flatbuffers::Offset<SparsityParameters> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SparsityParameters>(end);
-    return o;
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::CastOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_CastOptions;
 };
 
-inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::DimensionMetadata>>> dim_metadata = 0) {
-  SparsityParametersBuilder builder_(_fbb);
-  builder_.add_dim_metadata(dim_metadata);
-  builder_.add_block_map(block_map);
-  builder_.add_traversal_order(traversal_order);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<int32_t> *traversal_order = nullptr,
-    const std::vector<int32_t> *block_map = nullptr,
-    const std::vector<flatbuffers::Offset<tflite::DimensionMetadata>> *dim_metadata = nullptr) {
-  auto traversal_order__ = traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0;
-  auto block_map__ = block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0;
-  auto dim_metadata__ = dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<tflite::DimensionMetadata>>(*dim_metadata) : 0;
-  return tflite::CreateSparsityParameters(
-      _fbb,
-      traversal_order__,
-      block_map__,
-      dim_metadata__);
-}
-
-flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct TensorT : public flatbuffers::NativeTable {
-  typedef Tensor TableType;
-  std::vector<int32_t> shape;
-  tflite::TensorType type;
-  uint32_t buffer;
-  std::string name;
-  std::unique_ptr<tflite::QuantizationParametersT> quantization;
-  bool is_variable;
-  std::unique_ptr<tflite::SparsityParametersT> sparsity;
-  std::vector<int32_t> shape_signature;
-  TensorT()
-      : type(tflite::TensorType_FLOAT32),
-        buffer(0),
-        is_variable(false) {
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::DequantizeOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions;
 };
 
-struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef TensorT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_SHAPE = 4,
-    VT_TYPE = 6,
-    VT_BUFFER = 8,
-    VT_NAME = 10,
-    VT_QUANTIZATION = 12,
-    VT_IS_VARIABLE = 14,
-    VT_SPARSITY = 16,
-    VT_SHAPE_SIGNATURE = 18
-  };
-  const flatbuffers::Vector<int32_t> *shape() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
-  }
-  tflite::TensorType type() const {
-    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_TYPE, 0));
-  }
-  uint32_t buffer() const {
-    return GetField<uint32_t>(VT_BUFFER, 0);
-  }
-  const flatbuffers::String *name() const {
-    return GetPointer<const flatbuffers::String *>(VT_NAME);
-  }
-  const tflite::QuantizationParameters *quantization() const {
-    return GetPointer<const tflite::QuantizationParameters *>(VT_QUANTIZATION);
-  }
-  bool is_variable() const {
-    return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0;
-  }
-  const tflite::SparsityParameters *sparsity() const {
-    return GetPointer<const tflite::SparsityParameters *>(VT_SPARSITY);
-  }
-  const flatbuffers::Vector<int32_t> *shape_signature() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE_SIGNATURE);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_SHAPE) &&
-           verifier.VerifyVector(shape()) &&
-           VerifyField<int8_t>(verifier, VT_TYPE) &&
-           VerifyField<uint32_t>(verifier, VT_BUFFER) &&
-           VerifyOffset(verifier, VT_NAME) &&
-           verifier.VerifyString(name()) &&
-           VerifyOffset(verifier, VT_QUANTIZATION) &&
-           verifier.VerifyTable(quantization()) &&
-           VerifyField<uint8_t>(verifier, VT_IS_VARIABLE) &&
-           VerifyOffset(verifier, VT_SPARSITY) &&
-           verifier.VerifyTable(sparsity()) &&
-           VerifyOffset(verifier, VT_SHAPE_SIGNATURE) &&
-           verifier.VerifyVector(shape_signature()) &&
-           verifier.EndTable();
-  }
-  TensorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Tensor> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+template<> struct BuiltinOptionsUnionTraits<tflite::MaximumMinimumOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions;
 };
 
-struct TensorBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape) {
-    fbb_.AddOffset(Tensor::VT_SHAPE, shape);
-  }
-  void add_type(tflite::TensorType type) {
-    fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0);
-  }
-  void add_buffer(uint32_t buffer) {
-    fbb_.AddElement<uint32_t>(Tensor::VT_BUFFER, buffer, 0);
-  }
-  void add_name(flatbuffers::Offset<flatbuffers::String> name) {
-    fbb_.AddOffset(Tensor::VT_NAME, name);
-  }
-  void add_quantization(flatbuffers::Offset<tflite::QuantizationParameters> quantization) {
-    fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization);
-  }
-  void add_is_variable(bool is_variable) {
-    fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0);
-  }
-  void add_sparsity(flatbuffers::Offset<tflite::SparsityParameters> sparsity) {
-    fbb_.AddOffset(Tensor::VT_SPARSITY, sparsity);
-  }
-  void add_shape_signature(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature) {
-    fbb_.AddOffset(Tensor::VT_SHAPE_SIGNATURE, shape_signature);
-  }
-  explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  TensorBuilder &operator=(const TensorBuilder &);
-  flatbuffers::Offset<Tensor> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Tensor>(end);
-    return o;
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::ArgMaxOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions;
 };
 
-inline flatbuffers::Offset<Tensor> CreateTensor(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
-    tflite::TensorType type = tflite::TensorType_FLOAT32,
-    uint32_t buffer = 0,
-    flatbuffers::Offset<flatbuffers::String> name = 0,
-    flatbuffers::Offset<tflite::QuantizationParameters> quantization = 0,
-    bool is_variable = false,
-    flatbuffers::Offset<tflite::SparsityParameters> sparsity = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0) {
-  TensorBuilder builder_(_fbb);
-  builder_.add_shape_signature(shape_signature);
-  builder_.add_sparsity(sparsity);
-  builder_.add_quantization(quantization);
-  builder_.add_name(name);
-  builder_.add_buffer(buffer);
-  builder_.add_shape(shape);
-  builder_.add_is_variable(is_variable);
-  builder_.add_type(type);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Tensor> CreateTensorDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<int32_t> *shape = nullptr,
-    tflite::TensorType type = tflite::TensorType_FLOAT32,
-    uint32_t buffer = 0,
-    const char *name = nullptr,
-    flatbuffers::Offset<tflite::QuantizationParameters> quantization = 0,
-    bool is_variable = false,
-    flatbuffers::Offset<tflite::SparsityParameters> sparsity = 0,
-    const std::vector<int32_t> *shape_signature = nullptr) {
-  auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0;
-  auto name__ = name ? _fbb.CreateString(name) : 0;
-  auto shape_signature__ = shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0;
-  return tflite::CreateTensor(
-      _fbb,
-      shape__,
-      type,
-      buffer,
-      name__,
-      quantization,
-      is_variable,
-      sparsity,
-      shape_signature__);
-}
-
-flatbuffers::Offset<Tensor> CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct Conv2DOptionsT : public flatbuffers::NativeTable {
-  typedef Conv2DOptions TableType;
-  tflite::Padding padding;
-  int32_t stride_w;
-  int32_t stride_h;
-  tflite::ActivationFunctionType fused_activation_function;
-  int32_t dilation_w_factor;
-  int32_t dilation_h_factor;
-  Conv2DOptionsT()
-      : padding(tflite::Padding_SAME),
-        stride_w(0),
-        stride_h(0),
-        fused_activation_function(tflite::ActivationFunctionType_NONE),
-        dilation_w_factor(1),
-        dilation_h_factor(1) {
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::LessOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_LessOptions;
 };
 
-struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef Conv2DOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_PADDING = 4,
-    VT_STRIDE_W = 6,
-    VT_STRIDE_H = 8,
-    VT_FUSED_ACTIVATION_FUNCTION = 10,
-    VT_DILATION_W_FACTOR = 12,
-    VT_DILATION_H_FACTOR = 14
-  };
-  tflite::Padding padding() const {
-    return static_cast<tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
-  }
-  int32_t stride_w() const {
-    return GetField<int32_t>(VT_STRIDE_W, 0);
-  }
-  int32_t stride_h() const {
-    return GetField<int32_t>(VT_STRIDE_H, 0);
-  }
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  int32_t dilation_w_factor() const {
-    return GetField<int32_t>(VT_DILATION_W_FACTOR, 1);
-  }
-  int32_t dilation_h_factor() const {
-    return GetField<int32_t>(VT_DILATION_H_FACTOR, 1);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_PADDING) &&
-           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
-           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
-           VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) &&
-           verifier.EndTable();
-  }
-  Conv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(Conv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Conv2DOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+template<> struct BuiltinOptionsUnionTraits<tflite::NegOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_NegOptions;
 };
 
-struct Conv2DOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_padding(tflite::Padding padding) {
-    fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
-  }
-  void add_stride_w(int32_t stride_w) {
-    fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_W, stride_w, 0);
-  }
-  void add_stride_h(int32_t stride_h) {
-    fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0);
-  }
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  void add_dilation_w_factor(int32_t dilation_w_factor) {
-    fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
-  }
-  void add_dilation_h_factor(int32_t dilation_h_factor) {
-    fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
-  }
-  explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &);
-  flatbuffers::Offset<Conv2DOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Conv2DOptions>(end);
-    return o;
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::PadV2OptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options;
 };
 
-inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::Padding padding = tflite::Padding_SAME,
-    int32_t stride_w = 0,
-    int32_t stride_h = 0,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
-    int32_t dilation_w_factor = 1,
-    int32_t dilation_h_factor = 1) {
-  Conv2DOptionsBuilder builder_(_fbb);
-  builder_.add_dilation_h_factor(dilation_h_factor);
-  builder_.add_dilation_w_factor(dilation_w_factor);
-  builder_.add_stride_h(stride_h);
-  builder_.add_stride_w(stride_w);
-  builder_.add_fused_activation_function(fused_activation_function);
-  builder_.add_padding(padding);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct Conv3DOptionsT : public flatbuffers::NativeTable {
-  typedef Conv3DOptions TableType;
-  tflite::Padding padding;
-  int32_t stride_d;
-  int32_t stride_w;
-  int32_t stride_h;
-  tflite::ActivationFunctionType fused_activation_function;
-  int32_t dilation_d_factor;
-  int32_t dilation_w_factor;
-  int32_t dilation_h_factor;
-  Conv3DOptionsT()
-      : padding(tflite::Padding_SAME),
-        stride_d(0),
-        stride_w(0),
-        stride_h(0),
-        fused_activation_function(tflite::ActivationFunctionType_NONE),
-        dilation_d_factor(1),
-        dilation_w_factor(1),
-        dilation_h_factor(1) {
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::GreaterOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions;
 };
 
-struct Conv3DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef Conv3DOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_PADDING = 4,
-    VT_STRIDE_D = 6,
-    VT_STRIDE_W = 8,
-    VT_STRIDE_H = 10,
-    VT_FUSED_ACTIVATION_FUNCTION = 12,
-    VT_DILATION_D_FACTOR = 14,
-    VT_DILATION_W_FACTOR = 16,
-    VT_DILATION_H_FACTOR = 18
-  };
-  tflite::Padding padding() const {
-    return static_cast<tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
-  }
-  int32_t stride_d() const {
-    return GetField<int32_t>(VT_STRIDE_D, 0);
-  }
-  int32_t stride_w() const {
-    return GetField<int32_t>(VT_STRIDE_W, 0);
-  }
-  int32_t stride_h() const {
-    return GetField<int32_t>(VT_STRIDE_H, 0);
-  }
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  int32_t dilation_d_factor() const {
-    return GetField<int32_t>(VT_DILATION_D_FACTOR, 1);
-  }
-  int32_t dilation_w_factor() const {
-    return GetField<int32_t>(VT_DILATION_W_FACTOR, 1);
-  }
-  int32_t dilation_h_factor() const {
-    return GetField<int32_t>(VT_DILATION_H_FACTOR, 1);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_PADDING) &&
-           VerifyField<int32_t>(verifier, VT_STRIDE_D) &&
-           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
-           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           VerifyField<int32_t>(verifier, VT_DILATION_D_FACTOR) &&
-           VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
-           VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) &&
-           verifier.EndTable();
-  }
-  Conv3DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(Conv3DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Conv3DOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+template<> struct BuiltinOptionsUnionTraits<tflite::GreaterEqualOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions;
 };
 
-struct Conv3DOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_padding(tflite::Padding padding) {
-    fbb_.AddElement<int8_t>(Conv3DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
-  }
-  void add_stride_d(int32_t stride_d) {
-    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_D, stride_d, 0);
-  }
-  void add_stride_w(int32_t stride_w) {
-    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_W, stride_w, 0);
-  }
-  void add_stride_h(int32_t stride_h) {
-    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_H, stride_h, 0);
-  }
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  void add_dilation_d_factor(int32_t dilation_d_factor) {
-    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_D_FACTOR, dilation_d_factor, 1);
-  }
-  void add_dilation_w_factor(int32_t dilation_w_factor) {
-    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
-  }
-  void add_dilation_h_factor(int32_t dilation_h_factor) {
-    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
-  }
-  explicit Conv3DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  Conv3DOptionsBuilder &operator=(const Conv3DOptionsBuilder &);
-  flatbuffers::Offset<Conv3DOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Conv3DOptions>(end);
-    return o;
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::LessEqualOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions;
 };
 
-inline flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::Padding padding = tflite::Padding_SAME,
-    int32_t stride_d = 0,
-    int32_t stride_w = 0,
-    int32_t stride_h = 0,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
-    int32_t dilation_d_factor = 1,
-    int32_t dilation_w_factor = 1,
-    int32_t dilation_h_factor = 1) {
-  Conv3DOptionsBuilder builder_(_fbb);
-  builder_.add_dilation_h_factor(dilation_h_factor);
-  builder_.add_dilation_w_factor(dilation_w_factor);
-  builder_.add_dilation_d_factor(dilation_d_factor);
-  builder_.add_stride_h(stride_h);
-  builder_.add_stride_w(stride_w);
-  builder_.add_stride_d(stride_d);
-  builder_.add_fused_activation_function(fused_activation_function);
-  builder_.add_padding(padding);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct Pool2DOptionsT : public flatbuffers::NativeTable {
-  typedef Pool2DOptions TableType;
-  tflite::Padding padding;
-  int32_t stride_w;
-  int32_t stride_h;
-  int32_t filter_width;
-  int32_t filter_height;
-  tflite::ActivationFunctionType fused_activation_function;
-  Pool2DOptionsT()
-      : padding(tflite::Padding_SAME),
-        stride_w(0),
-        stride_h(0),
-        filter_width(0),
-        filter_height(0),
-        fused_activation_function(tflite::ActivationFunctionType_NONE) {
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::SelectOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions;
 };
 
-struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef Pool2DOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_PADDING = 4,
-    VT_STRIDE_W = 6,
-    VT_STRIDE_H = 8,
-    VT_FILTER_WIDTH = 10,
-    VT_FILTER_HEIGHT = 12,
-    VT_FUSED_ACTIVATION_FUNCTION = 14
-  };
-  tflite::Padding padding() const {
-    return static_cast<tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
-  }
-  int32_t stride_w() const {
-    return GetField<int32_t>(VT_STRIDE_W, 0);
-  }
-  int32_t stride_h() const {
-    return GetField<int32_t>(VT_STRIDE_H, 0);
-  }
-  int32_t filter_width() const {
-    return GetField<int32_t>(VT_FILTER_WIDTH, 0);
-  }
-  int32_t filter_height() const {
-    return GetField<int32_t>(VT_FILTER_HEIGHT, 0);
-  }
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_PADDING) &&
-           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
-           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
-           VerifyField<int32_t>(verifier, VT_FILTER_WIDTH) &&
-           VerifyField<int32_t>(verifier, VT_FILTER_HEIGHT) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           verifier.EndTable();
-  }
-  Pool2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(Pool2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Pool2DOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+template<> struct BuiltinOptionsUnionTraits<tflite::SliceOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions;
 };
 
-struct Pool2DOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_padding(tflite::Padding padding) {
-    fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
-  }
-  void add_stride_w(int32_t stride_w) {
-    fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_W, stride_w, 0);
-  }
-  void add_stride_h(int32_t stride_h) {
-    fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_H, stride_h, 0);
-  }
-  void add_filter_width(int32_t filter_width) {
-    fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0);
-  }
-  void add_filter_height(int32_t filter_height) {
-    fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
-  }
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &);
-  flatbuffers::Offset<Pool2DOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Pool2DOptions>(end);
-    return o;
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::TransposeConvOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions;
 };
 
-inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::Padding padding = tflite::Padding_SAME,
-    int32_t stride_w = 0,
-    int32_t stride_h = 0,
-    int32_t filter_width = 0,
-    int32_t filter_height = 0,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) {
-  Pool2DOptionsBuilder builder_(_fbb);
-  builder_.add_filter_height(filter_height);
-  builder_.add_filter_width(filter_width);
-  builder_.add_stride_h(stride_h);
-  builder_.add_stride_w(stride_w);
-  builder_.add_fused_activation_function(fused_activation_function);
-  builder_.add_padding(padding);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct DepthwiseConv2DOptionsT : public flatbuffers::NativeTable {
-  typedef DepthwiseConv2DOptions TableType;
-  tflite::Padding padding;
-  int32_t stride_w;
-  int32_t stride_h;
-  int32_t depth_multiplier;
-  tflite::ActivationFunctionType fused_activation_function;
-  int32_t dilation_w_factor;
-  int32_t dilation_h_factor;
-  DepthwiseConv2DOptionsT()
-      : padding(tflite::Padding_SAME),
-        stride_w(0),
-        stride_h(0),
-        depth_multiplier(0),
-        fused_activation_function(tflite::ActivationFunctionType_NONE),
-        dilation_w_factor(1),
-        dilation_h_factor(1) {
-  }
+template<> struct BuiltinOptionsUnionTraits<tflite::SparseToDenseOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions;
 };
 
-struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef DepthwiseConv2DOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_PADDING = 4,
-    VT_STRIDE_W = 6,
-    VT_STRIDE_H = 8,
-    VT_DEPTH_MULTIPLIER = 10,
-    VT_FUSED_ACTIVATION_FUNCTION = 12,
-    VT_DILATION_W_FACTOR = 14,
-    VT_DILATION_H_FACTOR = 16
-  };
-  tflite::Padding padding() const {
-    return static_cast<tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
-  }
-  int32_t stride_w() const {
-    return GetField<int32_t>(VT_STRIDE_W, 0);
-  }
-  int32_t stride_h() const {
-    return GetField<int32_t>(VT_STRIDE_H, 0);
-  }
-  int32_t depth_multiplier() const {
-    return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0);
-  }
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  int32_t dilation_w_factor() const {
-    return GetField<int32_t>(VT_DILATION_W_FACTOR, 1);
-  }
-  int32_t dilation_h_factor() const {
-    return GetField<int32_t>(VT_DILATION_H_FACTOR, 1);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_PADDING) &&
-           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
-           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
-           VerifyField<int32_t>(verifier, VT_DEPTH_MULTIPLIER) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
-           VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) &&
-           verifier.EndTable();
-  }
-  DepthwiseConv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(DepthwiseConv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<DepthwiseConv2DOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+template<> struct BuiltinOptionsUnionTraits<tflite::TileOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_TileOptions;
 };
 
-struct DepthwiseConv2DOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_padding(tflite::Padding padding) {
-    fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
-  }
-  void add_stride_w(int32_t stride_w) {
-    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0);
-  }
-  void add_stride_h(int32_t stride_h) {
-    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0);
-  }
-  void add_depth_multiplier(int32_t depth_multiplier) {
-    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0);
-  }
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  void add_dilation_w_factor(int32_t dilation_w_factor) {
-    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
-  }
-  void add_dilation_h_factor(int32_t dilation_h_factor) {
-    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
-  }
-  explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &);
-  flatbuffers::Offset<DepthwiseConv2DOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<DepthwiseConv2DOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::Padding padding = tflite::Padding_SAME,
-    int32_t stride_w = 0,
-    int32_t stride_h = 0,
-    int32_t depth_multiplier = 0,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
-    int32_t dilation_w_factor = 1,
-    int32_t dilation_h_factor = 1) {
-  DepthwiseConv2DOptionsBuilder builder_(_fbb);
-  builder_.add_dilation_h_factor(dilation_h_factor);
-  builder_.add_dilation_w_factor(dilation_w_factor);
-  builder_.add_depth_multiplier(depth_multiplier);
-  builder_.add_stride_h(stride_h);
-  builder_.add_stride_w(stride_w);
-  builder_.add_fused_activation_function(fused_activation_function);
-  builder_.add_padding(padding);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ConcatEmbeddingsOptionsT : public flatbuffers::NativeTable {
-  typedef ConcatEmbeddingsOptions TableType;
-  int32_t num_channels;
-  std::vector<int32_t> num_columns_per_channel;
-  std::vector<int32_t> embedding_dim_per_channel;
-  ConcatEmbeddingsOptionsT()
-      : num_channels(0) {
-  }
-};
-
-struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ConcatEmbeddingsOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_NUM_CHANNELS = 4,
-    VT_NUM_COLUMNS_PER_CHANNEL = 6,
-    VT_EMBEDDING_DIM_PER_CHANNEL = 8
-  };
-  int32_t num_channels() const {
-    return GetField<int32_t>(VT_NUM_CHANNELS, 0);
-  }
-  const flatbuffers::Vector<int32_t> *num_columns_per_channel() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NUM_COLUMNS_PER_CHANNEL);
-  }
-  const flatbuffers::Vector<int32_t> *embedding_dim_per_channel() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_EMBEDDING_DIM_PER_CHANNEL);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_NUM_CHANNELS) &&
-           VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) &&
-           verifier.VerifyVector(num_columns_per_channel()) &&
-           VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) &&
-           verifier.VerifyVector(embedding_dim_per_channel()) &&
-           verifier.EndTable();
-  }
-  ConcatEmbeddingsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ConcatEmbeddingsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ConcatEmbeddingsOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ConcatEmbeddingsOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_num_channels(int32_t num_channels) {
-    fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
-  }
-  void add_num_columns_per_channel(flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel) {
-    fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
-  }
-  void add_embedding_dim_per_channel(flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel) {
-    fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL, embedding_dim_per_channel);
-  }
-  explicit ConcatEmbeddingsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &);
-  flatbuffers::Offset<ConcatEmbeddingsOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ConcatEmbeddingsOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t num_channels = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0) {
-  ConcatEmbeddingsOptionsBuilder builder_(_fbb);
-  builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
-  builder_.add_num_columns_per_channel(num_columns_per_channel);
-  builder_.add_num_channels(num_channels);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptionsDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t num_channels = 0,
-    const std::vector<int32_t> *num_columns_per_channel = nullptr,
-    const std::vector<int32_t> *embedding_dim_per_channel = nullptr) {
-  auto num_columns_per_channel__ = num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0;
-  auto embedding_dim_per_channel__ = embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0;
-  return tflite::CreateConcatEmbeddingsOptions(
-      _fbb,
-      num_channels,
-      num_columns_per_channel__,
-      embedding_dim_per_channel__);
-}
-
-flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct LSHProjectionOptionsT : public flatbuffers::NativeTable {
-  typedef LSHProjectionOptions TableType;
-  tflite::LSHProjectionType type;
-  LSHProjectionOptionsT()
-      : type(tflite::LSHProjectionType_UNKNOWN) {
-  }
-};
-
-struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef LSHProjectionOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_TYPE = 4
-  };
-  tflite::LSHProjectionType type() const {
-    return static_cast<tflite::LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_TYPE) &&
-           verifier.EndTable();
-  }
-  LSHProjectionOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(LSHProjectionOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<LSHProjectionOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct LSHProjectionOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_type(tflite::LSHProjectionType type) {
-    fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0);
-  }
-  explicit LSHProjectionOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &);
-  flatbuffers::Offset<LSHProjectionOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<LSHProjectionOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::LSHProjectionType type = tflite::LSHProjectionType_UNKNOWN) {
-  LSHProjectionOptionsBuilder builder_(_fbb);
-  builder_.add_type(type);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SVDFOptionsT : public flatbuffers::NativeTable {
-  typedef SVDFOptions TableType;
-  int32_t rank;
-  tflite::ActivationFunctionType fused_activation_function;
-  bool asymmetric_quantize_inputs;
-  SVDFOptionsT()
-      : rank(0),
-        fused_activation_function(tflite::ActivationFunctionType_NONE),
-        asymmetric_quantize_inputs(false) {
-  }
-};
-
-struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SVDFOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_RANK = 4,
-    VT_FUSED_ACTIVATION_FUNCTION = 6,
-    VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
-  };
-  int32_t rank() const {
-    return GetField<int32_t>(VT_RANK, 0);
-  }
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  bool asymmetric_quantize_inputs() const {
-    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_RANK) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) &&
-           verifier.EndTable();
-  }
-  SVDFOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SVDFOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SVDFOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_rank(int32_t rank) {
-    fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0);
-  }
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
-    fbb_.AddElement<uint8_t>(SVDFOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
-  }
-  explicit SVDFOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &);
-  flatbuffers::Offset<SVDFOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SVDFOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t rank = 0,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
-    bool asymmetric_quantize_inputs = false) {
-  SVDFOptionsBuilder builder_(_fbb);
-  builder_.add_rank(rank);
-  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
-  builder_.add_fused_activation_function(fused_activation_function);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct RNNOptionsT : public flatbuffers::NativeTable {
-  typedef RNNOptions TableType;
-  tflite::ActivationFunctionType fused_activation_function;
-  bool asymmetric_quantize_inputs;
-  RNNOptionsT()
-      : fused_activation_function(tflite::ActivationFunctionType_NONE),
-        asymmetric_quantize_inputs(false) {
-  }
-};
-
-struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef RNNOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_FUSED_ACTIVATION_FUNCTION = 4,
-    VT_ASYMMETRIC_QUANTIZE_INPUTS = 6
-  };
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  bool asymmetric_quantize_inputs() const {
-    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) &&
-           verifier.EndTable();
-  }
-  RNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<RNNOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct RNNOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
-    fbb_.AddElement<uint8_t>(RNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
-  }
-  explicit RNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  RNNOptionsBuilder &operator=(const RNNOptionsBuilder &);
-  flatbuffers::Offset<RNNOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<RNNOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
-    bool asymmetric_quantize_inputs = false) {
-  RNNOptionsBuilder builder_(_fbb);
-  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
-  builder_.add_fused_activation_function(fused_activation_function);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<RNNOptions> CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SequenceRNNOptionsT : public flatbuffers::NativeTable {
-  typedef SequenceRNNOptions TableType;
-  bool time_major;
-  tflite::ActivationFunctionType fused_activation_function;
-  bool asymmetric_quantize_inputs;
-  SequenceRNNOptionsT()
-      : time_major(false),
-        fused_activation_function(tflite::ActivationFunctionType_NONE),
-        asymmetric_quantize_inputs(false) {
-  }
-};
-
-struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SequenceRNNOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_TIME_MAJOR = 4,
-    VT_FUSED_ACTIVATION_FUNCTION = 6,
-    VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
-  };
-  bool time_major() const {
-    return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0;
-  }
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  bool asymmetric_quantize_inputs() const {
-    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) &&
-           verifier.EndTable();
-  }
-  SequenceRNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SequenceRNNOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SequenceRNNOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_time_major(bool time_major) {
-    fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major), 0);
-  }
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
-    fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
-  }
-  explicit SequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &);
-  flatbuffers::Offset<SequenceRNNOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SequenceRNNOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    bool time_major = false,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
-    bool asymmetric_quantize_inputs = false) {
-  SequenceRNNOptionsBuilder builder_(_fbb);
-  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
-  builder_.add_fused_activation_function(fused_activation_function);
-  builder_.add_time_major(time_major);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct BidirectionalSequenceRNNOptionsT : public flatbuffers::NativeTable {
-  typedef BidirectionalSequenceRNNOptions TableType;
-  bool time_major;
-  tflite::ActivationFunctionType fused_activation_function;
-  bool merge_outputs;
-  bool asymmetric_quantize_inputs;
-  BidirectionalSequenceRNNOptionsT()
-      : time_major(false),
-        fused_activation_function(tflite::ActivationFunctionType_NONE),
-        merge_outputs(false),
-        asymmetric_quantize_inputs(false) {
-  }
-};
-
-struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef BidirectionalSequenceRNNOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_TIME_MAJOR = 4,
-    VT_FUSED_ACTIVATION_FUNCTION = 6,
-    VT_MERGE_OUTPUTS = 8,
-    VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
-  };
-  bool time_major() const {
-    return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0;
-  }
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  bool merge_outputs() const {
-    return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0;
-  }
-  bool asymmetric_quantize_inputs() const {
-    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) &&
-           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) &&
-           verifier.EndTable();
-  }
-  BidirectionalSequenceRNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<BidirectionalSequenceRNNOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct BidirectionalSequenceRNNOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_time_major(bool time_major) {
-    fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major), 0);
-  }
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  void add_merge_outputs(bool merge_outputs) {
-    fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS, static_cast<uint8_t>(merge_outputs), 0);
-  }
-  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
-    fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
-  }
-  explicit BidirectionalSequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &);
-  flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<BidirectionalSequenceRNNOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    bool time_major = false,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
-    bool merge_outputs = false,
-    bool asymmetric_quantize_inputs = false) {
-  BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
-  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
-  builder_.add_merge_outputs(merge_outputs);
-  builder_.add_fused_activation_function(fused_activation_function);
-  builder_.add_time_major(time_major);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct FullyConnectedOptionsT : public flatbuffers::NativeTable {
-  typedef FullyConnectedOptions TableType;
-  tflite::ActivationFunctionType fused_activation_function;
-  tflite::FullyConnectedOptionsWeightsFormat weights_format;
-  bool keep_num_dims;
-  bool asymmetric_quantize_inputs;
-  FullyConnectedOptionsT()
-      : fused_activation_function(tflite::ActivationFunctionType_NONE),
-        weights_format(tflite::FullyConnectedOptionsWeightsFormat_DEFAULT),
-        keep_num_dims(false),
-        asymmetric_quantize_inputs(false) {
-  }
-};
-
-struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef FullyConnectedOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_FUSED_ACTIVATION_FUNCTION = 4,
-    VT_WEIGHTS_FORMAT = 6,
-    VT_KEEP_NUM_DIMS = 8,
-    VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
-  };
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  tflite::FullyConnectedOptionsWeightsFormat weights_format() const {
-    return static_cast<tflite::FullyConnectedOptionsWeightsFormat>(GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
-  }
-  bool keep_num_dims() const {
-    return GetField<uint8_t>(VT_KEEP_NUM_DIMS, 0) != 0;
-  }
-  bool asymmetric_quantize_inputs() const {
-    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           VerifyField<int8_t>(verifier, VT_WEIGHTS_FORMAT) &&
-           VerifyField<uint8_t>(verifier, VT_KEEP_NUM_DIMS) &&
-           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) &&
-           verifier.EndTable();
-  }
-  FullyConnectedOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(FullyConnectedOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<FullyConnectedOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct FullyConnectedOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  void add_weights_format(tflite::FullyConnectedOptionsWeightsFormat weights_format) {
-    fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_WEIGHTS_FORMAT, static_cast<int8_t>(weights_format), 0);
-  }
-  void add_keep_num_dims(bool keep_num_dims) {
-    fbb_.AddElement<uint8_t>(FullyConnectedOptions::VT_KEEP_NUM_DIMS, static_cast<uint8_t>(keep_num_dims), 0);
-  }
-  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
-    fbb_.AddElement<uint8_t>(FullyConnectedOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
-  }
-  explicit FullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &);
-  flatbuffers::Offset<FullyConnectedOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<FullyConnectedOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
-    tflite::FullyConnectedOptionsWeightsFormat weights_format = tflite::FullyConnectedOptionsWeightsFormat_DEFAULT,
-    bool keep_num_dims = false,
-    bool asymmetric_quantize_inputs = false) {
-  FullyConnectedOptionsBuilder builder_(_fbb);
-  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
-  builder_.add_keep_num_dims(keep_num_dims);
-  builder_.add_weights_format(weights_format);
-  builder_.add_fused_activation_function(fused_activation_function);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SoftmaxOptionsT : public flatbuffers::NativeTable {
-  typedef SoftmaxOptions TableType;
-  float beta;
-  SoftmaxOptionsT()
-      : beta(0.0f) {
-  }
-};
-
-struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SoftmaxOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_BETA = 4
-  };
-  float beta() const {
-    return GetField<float>(VT_BETA, 0.0f);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<float>(verifier, VT_BETA) &&
-           verifier.EndTable();
-  }
-  SoftmaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SoftmaxOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SoftmaxOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_beta(float beta) {
-    fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f);
-  }
-  explicit SoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &);
-  flatbuffers::Offset<SoftmaxOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SoftmaxOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SoftmaxOptions> CreateSoftmaxOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    float beta = 0.0f) {
-  SoftmaxOptionsBuilder builder_(_fbb);
-  builder_.add_beta(beta);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SoftmaxOptions> CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ConcatenationOptionsT : public flatbuffers::NativeTable {
-  typedef ConcatenationOptions TableType;
-  int32_t axis;
-  tflite::ActivationFunctionType fused_activation_function;
-  ConcatenationOptionsT()
-      : axis(0),
-        fused_activation_function(tflite::ActivationFunctionType_NONE) {
-  }
-};
-
-struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ConcatenationOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_AXIS = 4,
-    VT_FUSED_ACTIVATION_FUNCTION = 6
-  };
-  int32_t axis() const {
-    return GetField<int32_t>(VT_AXIS, 0);
-  }
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_AXIS) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           verifier.EndTable();
-  }
-  ConcatenationOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ConcatenationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ConcatenationOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ConcatenationOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_axis(int32_t axis) {
-    fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0);
-  }
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  explicit ConcatenationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &);
-  flatbuffers::Offset<ConcatenationOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ConcatenationOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t axis = 0,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) {
-  ConcatenationOptionsBuilder builder_(_fbb);
-  builder_.add_axis(axis);
-  builder_.add_fused_activation_function(fused_activation_function);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct AddOptionsT : public flatbuffers::NativeTable {
-  typedef AddOptions TableType;
-  tflite::ActivationFunctionType fused_activation_function;
-  bool pot_scale_int16;
-  AddOptionsT()
-      : fused_activation_function(tflite::ActivationFunctionType_NONE),
-        pot_scale_int16(true) {
-  }
-};
-
-struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef AddOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_FUSED_ACTIVATION_FUNCTION = 4,
-    VT_POT_SCALE_INT16 = 6
-  };
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  bool pot_scale_int16() const {
-    return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) &&
-           verifier.EndTable();
-  }
-  AddOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<AddOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct AddOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  void add_pot_scale_int16(bool pot_scale_int16) {
-    fbb_.AddElement<uint8_t>(AddOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16), 1);
-  }
-  explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  AddOptionsBuilder &operator=(const AddOptionsBuilder &);
-  flatbuffers::Offset<AddOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<AddOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<AddOptions> CreateAddOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
-    bool pot_scale_int16 = true) {
-  AddOptionsBuilder builder_(_fbb);
-  builder_.add_pot_scale_int16(pot_scale_int16);
-  builder_.add_fused_activation_function(fused_activation_function);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<AddOptions> CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct MulOptionsT : public flatbuffers::NativeTable {
-  typedef MulOptions TableType;
-  tflite::ActivationFunctionType fused_activation_function;
-  MulOptionsT()
-      : fused_activation_function(tflite::ActivationFunctionType_NONE) {
-  }
-};
-
-struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef MulOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
-  };
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           verifier.EndTable();
-  }
-  MulOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<MulOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct MulOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  explicit MulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  MulOptionsBuilder &operator=(const MulOptionsBuilder &);
-  flatbuffers::Offset<MulOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<MulOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<MulOptions> CreateMulOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) {
-  MulOptionsBuilder builder_(_fbb);
-  builder_.add_fused_activation_function(fused_activation_function);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<MulOptions> CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct L2NormOptionsT : public flatbuffers::NativeTable {
-  typedef L2NormOptions TableType;
-  tflite::ActivationFunctionType fused_activation_function;
-  L2NormOptionsT()
-      : fused_activation_function(tflite::ActivationFunctionType_NONE) {
-  }
-};
-
-struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef L2NormOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
-  };
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           verifier.EndTable();
-  }
-  L2NormOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(L2NormOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<L2NormOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct L2NormOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  explicit L2NormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &);
-  flatbuffers::Offset<L2NormOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<L2NormOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) {
-  L2NormOptionsBuilder builder_(_fbb);
-  builder_.add_fused_activation_function(fused_activation_function);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct LocalResponseNormalizationOptionsT : public flatbuffers::NativeTable {
-  typedef LocalResponseNormalizationOptions TableType;
-  int32_t radius;
-  float bias;
-  float alpha;
-  float beta;
-  LocalResponseNormalizationOptionsT()
-      : radius(0),
-        bias(0.0f),
-        alpha(0.0f),
-        beta(0.0f) {
-  }
-};
-
-struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef LocalResponseNormalizationOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_RADIUS = 4,
-    VT_BIAS = 6,
-    VT_ALPHA = 8,
-    VT_BETA = 10
-  };
-  int32_t radius() const {
-    return GetField<int32_t>(VT_RADIUS, 0);
-  }
-  float bias() const {
-    return GetField<float>(VT_BIAS, 0.0f);
-  }
-  float alpha() const {
-    return GetField<float>(VT_ALPHA, 0.0f);
-  }
-  float beta() const {
-    return GetField<float>(VT_BETA, 0.0f);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_RADIUS) &&
-           VerifyField<float>(verifier, VT_BIAS) &&
-           VerifyField<float>(verifier, VT_ALPHA) &&
-           VerifyField<float>(verifier, VT_BETA) &&
-           verifier.EndTable();
-  }
-  LocalResponseNormalizationOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(LocalResponseNormalizationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<LocalResponseNormalizationOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct LocalResponseNormalizationOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_radius(int32_t radius) {
-    fbb_.AddElement<int32_t>(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0);
-  }
-  void add_bias(float bias) {
-    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0f);
-  }
-  void add_alpha(float alpha) {
-    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0f);
-  }
-  void add_beta(float beta) {
-    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
-  }
-  explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  LocalResponseNormalizationOptionsBuilder &operator=(const LocalResponseNormalizationOptionsBuilder &);
-  flatbuffers::Offset<LocalResponseNormalizationOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<LocalResponseNormalizationOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<LocalResponseNormalizationOptions> CreateLocalResponseNormalizationOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t radius = 0,
-    float bias = 0.0f,
-    float alpha = 0.0f,
-    float beta = 0.0f) {
-  LocalResponseNormalizationOptionsBuilder builder_(_fbb);
-  builder_.add_beta(beta);
-  builder_.add_alpha(alpha);
-  builder_.add_bias(bias);
-  builder_.add_radius(radius);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<LocalResponseNormalizationOptions> CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct LSTMOptionsT : public flatbuffers::NativeTable {
-  typedef LSTMOptions TableType;
-  tflite::ActivationFunctionType fused_activation_function;
-  float cell_clip;
-  float proj_clip;
-  tflite::LSTMKernelType kernel_type;
-  bool asymmetric_quantize_inputs;
-  LSTMOptionsT()
-      : fused_activation_function(tflite::ActivationFunctionType_NONE),
-        cell_clip(0.0f),
-        proj_clip(0.0f),
-        kernel_type(tflite::LSTMKernelType_FULL),
-        asymmetric_quantize_inputs(false) {
-  }
-};
-
-struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef LSTMOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_FUSED_ACTIVATION_FUNCTION = 4,
-    VT_CELL_CLIP = 6,
-    VT_PROJ_CLIP = 8,
-    VT_KERNEL_TYPE = 10,
-    VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
-  };
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  float cell_clip() const {
-    return GetField<float>(VT_CELL_CLIP, 0.0f);
-  }
-  float proj_clip() const {
-    return GetField<float>(VT_PROJ_CLIP, 0.0f);
-  }
-  tflite::LSTMKernelType kernel_type() const {
-    return static_cast<tflite::LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
-  }
-  bool asymmetric_quantize_inputs() const {
-    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           VerifyField<float>(verifier, VT_CELL_CLIP) &&
-           VerifyField<float>(verifier, VT_PROJ_CLIP) &&
-           VerifyField<int8_t>(verifier, VT_KERNEL_TYPE) &&
-           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) &&
-           verifier.EndTable();
-  }
-  LSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<LSTMOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct LSTMOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  void add_cell_clip(float cell_clip) {
-    fbb_.AddElement<float>(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
-  }
-  void add_proj_clip(float proj_clip) {
-    fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
-  }
-  void add_kernel_type(tflite::LSTMKernelType kernel_type) {
-    fbb_.AddElement<int8_t>(LSTMOptions::VT_KERNEL_TYPE, static_cast<int8_t>(kernel_type), 0);
-  }
-  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
-    fbb_.AddElement<uint8_t>(LSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
-  }
-  explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &);
-  flatbuffers::Offset<LSTMOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<LSTMOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
-    float cell_clip = 0.0f,
-    float proj_clip = 0.0f,
-    tflite::LSTMKernelType kernel_type = tflite::LSTMKernelType_FULL,
-    bool asymmetric_quantize_inputs = false) {
-  LSTMOptionsBuilder builder_(_fbb);
-  builder_.add_proj_clip(proj_clip);
-  builder_.add_cell_clip(cell_clip);
-  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
-  builder_.add_kernel_type(kernel_type);
-  builder_.add_fused_activation_function(fused_activation_function);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct UnidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable {
-  typedef UnidirectionalSequenceLSTMOptions TableType;
-  tflite::ActivationFunctionType fused_activation_function;
-  float cell_clip;
-  float proj_clip;
-  bool time_major;
-  bool asymmetric_quantize_inputs;
-  UnidirectionalSequenceLSTMOptionsT()
-      : fused_activation_function(tflite::ActivationFunctionType_NONE),
-        cell_clip(0.0f),
-        proj_clip(0.0f),
-        time_major(false),
-        asymmetric_quantize_inputs(false) {
-  }
-};
-
-struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef UnidirectionalSequenceLSTMOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_FUSED_ACTIVATION_FUNCTION = 4,
-    VT_CELL_CLIP = 6,
-    VT_PROJ_CLIP = 8,
-    VT_TIME_MAJOR = 10,
-    VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
-  };
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  float cell_clip() const {
-    return GetField<float>(VT_CELL_CLIP, 0.0f);
-  }
-  float proj_clip() const {
-    return GetField<float>(VT_PROJ_CLIP, 0.0f);
-  }
-  bool time_major() const {
-    return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0;
-  }
-  bool asymmetric_quantize_inputs() const {
-    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           VerifyField<float>(verifier, VT_CELL_CLIP) &&
-           VerifyField<float>(verifier, VT_PROJ_CLIP) &&
-           VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
-           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) &&
-           verifier.EndTable();
-  }
-  UnidirectionalSequenceLSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct UnidirectionalSequenceLSTMOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  void add_cell_clip(float cell_clip) {
-    fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
-  }
-  void add_proj_clip(float proj_clip) {
-    fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
-  }
-  void add_time_major(bool time_major) {
-    fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major), 0);
-  }
-  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
-    fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
-  }
-  explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  UnidirectionalSequenceLSTMOptionsBuilder &operator=(const UnidirectionalSequenceLSTMOptionsBuilder &);
-  flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
-    float cell_clip = 0.0f,
-    float proj_clip = 0.0f,
-    bool time_major = false,
-    bool asymmetric_quantize_inputs = false) {
-  UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
-  builder_.add_proj_clip(proj_clip);
-  builder_.add_cell_clip(cell_clip);
-  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
-  builder_.add_time_major(time_major);
-  builder_.add_fused_activation_function(fused_activation_function);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct BidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable {
-  typedef BidirectionalSequenceLSTMOptions TableType;
-  tflite::ActivationFunctionType fused_activation_function;
-  float cell_clip;
-  float proj_clip;
-  bool merge_outputs;
-  bool time_major;
-  bool asymmetric_quantize_inputs;
-  BidirectionalSequenceLSTMOptionsT()
-      : fused_activation_function(tflite::ActivationFunctionType_NONE),
-        cell_clip(0.0f),
-        proj_clip(0.0f),
-        merge_outputs(false),
-        time_major(true),
-        asymmetric_quantize_inputs(false) {
-  }
-};
-
-struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef BidirectionalSequenceLSTMOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_FUSED_ACTIVATION_FUNCTION = 4,
-    VT_CELL_CLIP = 6,
-    VT_PROJ_CLIP = 8,
-    VT_MERGE_OUTPUTS = 10,
-    VT_TIME_MAJOR = 12,
-    VT_ASYMMETRIC_QUANTIZE_INPUTS = 14
-  };
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  float cell_clip() const {
-    return GetField<float>(VT_CELL_CLIP, 0.0f);
-  }
-  float proj_clip() const {
-    return GetField<float>(VT_PROJ_CLIP, 0.0f);
-  }
-  bool merge_outputs() const {
-    return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0;
-  }
-  bool time_major() const {
-    return GetField<uint8_t>(VT_TIME_MAJOR, 1) != 0;
-  }
-  bool asymmetric_quantize_inputs() const {
-    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           VerifyField<float>(verifier, VT_CELL_CLIP) &&
-           VerifyField<float>(verifier, VT_PROJ_CLIP) &&
-           VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) &&
-           VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
-           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) &&
-           verifier.EndTable();
-  }
-  BidirectionalSequenceLSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct BidirectionalSequenceLSTMOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  void add_cell_clip(float cell_clip) {
-    fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
-  }
-  void add_proj_clip(float proj_clip) {
-    fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
-  }
-  void add_merge_outputs(bool merge_outputs) {
-    fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS, static_cast<uint8_t>(merge_outputs), 0);
-  }
-  void add_time_major(bool time_major) {
-    fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major), 1);
-  }
-  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
-    fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
-  }
-  explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  BidirectionalSequenceLSTMOptionsBuilder &operator=(const BidirectionalSequenceLSTMOptionsBuilder &);
-  flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<BidirectionalSequenceLSTMOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
-    float cell_clip = 0.0f,
-    float proj_clip = 0.0f,
-    bool merge_outputs = false,
-    bool time_major = true,
-    bool asymmetric_quantize_inputs = false) {
-  BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
-  builder_.add_proj_clip(proj_clip);
-  builder_.add_cell_clip(cell_clip);
-  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
-  builder_.add_time_major(time_major);
-  builder_.add_merge_outputs(merge_outputs);
-  builder_.add_fused_activation_function(fused_activation_function);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ResizeBilinearOptionsT : public flatbuffers::NativeTable {
-  typedef ResizeBilinearOptions TableType;
-  bool align_corners;
-  bool half_pixel_centers;
-  ResizeBilinearOptionsT()
-      : align_corners(false),
-        half_pixel_centers(false) {
-  }
-};
-
-struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ResizeBilinearOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_ALIGN_CORNERS = 8,
-    VT_HALF_PIXEL_CENTERS = 10
-  };
-  bool align_corners() const {
-    return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0;
-  }
-  bool half_pixel_centers() const {
-    return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
-           VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) &&
-           verifier.EndTable();
-  }
-  ResizeBilinearOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ResizeBilinearOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ResizeBilinearOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ResizeBilinearOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_align_corners(bool align_corners) {
-    fbb_.AddElement<uint8_t>(ResizeBilinearOptions::VT_ALIGN_CORNERS, static_cast<uint8_t>(align_corners), 0);
-  }
-  void add_half_pixel_centers(bool half_pixel_centers) {
-    fbb_.AddElement<uint8_t>(ResizeBilinearOptions::VT_HALF_PIXEL_CENTERS, static_cast<uint8_t>(half_pixel_centers), 0);
-  }
-  explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &);
-  flatbuffers::Offset<ResizeBilinearOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ResizeBilinearOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    bool align_corners = false,
-    bool half_pixel_centers = false) {
-  ResizeBilinearOptionsBuilder builder_(_fbb);
-  builder_.add_half_pixel_centers(half_pixel_centers);
-  builder_.add_align_corners(align_corners);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ResizeNearestNeighborOptionsT : public flatbuffers::NativeTable {
-  typedef ResizeNearestNeighborOptions TableType;
-  bool align_corners;
-  bool half_pixel_centers;
-  ResizeNearestNeighborOptionsT()
-      : align_corners(false),
-        half_pixel_centers(false) {
-  }
-};
-
-struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ResizeNearestNeighborOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_ALIGN_CORNERS = 4,
-    VT_HALF_PIXEL_CENTERS = 6
-  };
-  bool align_corners() const {
-    return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0;
-  }
-  bool half_pixel_centers() const {
-    return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
-           VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) &&
-           verifier.EndTable();
-  }
-  ResizeNearestNeighborOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ResizeNearestNeighborOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ResizeNearestNeighborOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeNearestNeighborOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ResizeNearestNeighborOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_align_corners(bool align_corners) {
-    fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS, static_cast<uint8_t>(align_corners), 0);
-  }
-  void add_half_pixel_centers(bool half_pixel_centers) {
-    fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS, static_cast<uint8_t>(half_pixel_centers), 0);
-  }
-  explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &);
-  flatbuffers::Offset<ResizeNearestNeighborOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ResizeNearestNeighborOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ResizeNearestNeighborOptions> CreateResizeNearestNeighborOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    bool align_corners = false,
-    bool half_pixel_centers = false) {
-  ResizeNearestNeighborOptionsBuilder builder_(_fbb);
-  builder_.add_half_pixel_centers(half_pixel_centers);
-  builder_.add_align_corners(align_corners);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ResizeNearestNeighborOptions> CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeNearestNeighborOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct CallOptionsT : public flatbuffers::NativeTable {
-  typedef CallOptions TableType;
-  uint32_t subgraph;
-  CallOptionsT()
-      : subgraph(0) {
-  }
-};
-
-struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef CallOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_SUBGRAPH = 4
-  };
-  uint32_t subgraph() const {
-    return GetField<uint32_t>(VT_SUBGRAPH, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<uint32_t>(verifier, VT_SUBGRAPH) &&
-           verifier.EndTable();
-  }
-  CallOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<CallOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct CallOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_subgraph(uint32_t subgraph) {
-    fbb_.AddElement<uint32_t>(CallOptions::VT_SUBGRAPH, subgraph, 0);
-  }
-  explicit CallOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  CallOptionsBuilder &operator=(const CallOptionsBuilder &);
-  flatbuffers::Offset<CallOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<CallOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<CallOptions> CreateCallOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    uint32_t subgraph = 0) {
-  CallOptionsBuilder builder_(_fbb);
-  builder_.add_subgraph(subgraph);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct PadOptionsT : public flatbuffers::NativeTable {
-  typedef PadOptions TableType;
-  PadOptionsT() {
-  }
-};
-
-struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef PadOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  PadOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(PadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<PadOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct PadOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  PadOptionsBuilder &operator=(const PadOptionsBuilder &);
-  flatbuffers::Offset<PadOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<PadOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<PadOptions> CreatePadOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  PadOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct PadV2OptionsT : public flatbuffers::NativeTable {
-  typedef PadV2Options TableType;
-  PadV2OptionsT() {
-  }
-};
-
-struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef PadV2OptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  PadV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(PadV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<PadV2Options> Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct PadV2OptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &);
-  flatbuffers::Offset<PadV2Options> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<PadV2Options>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  PadV2OptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ReshapeOptionsT : public flatbuffers::NativeTable {
-  typedef ReshapeOptions TableType;
-  std::vector<int32_t> new_shape;
-  ReshapeOptionsT() {
-  }
-};
-
-struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ReshapeOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_NEW_SHAPE = 4
-  };
-  const flatbuffers::Vector<int32_t> *new_shape() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NEW_SHAPE);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_NEW_SHAPE) &&
-           verifier.VerifyVector(new_shape()) &&
-           verifier.EndTable();
-  }
-  ReshapeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ReshapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ReshapeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ReshapeOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape) {
-    fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape);
-  }
-  explicit ReshapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &);
-  flatbuffers::Offset<ReshapeOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ReshapeOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape = 0) {
-  ReshapeOptionsBuilder builder_(_fbb);
-  builder_.add_new_shape(new_shape);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<ReshapeOptions> CreateReshapeOptionsDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<int32_t> *new_shape = nullptr) {
-  auto new_shape__ = new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0;
-  return tflite::CreateReshapeOptions(
-      _fbb,
-      new_shape__);
-}
-
-flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SpaceToBatchNDOptionsT : public flatbuffers::NativeTable {
-  typedef SpaceToBatchNDOptions TableType;
-  SpaceToBatchNDOptionsT() {
-  }
-};
-
-struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SpaceToBatchNDOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  SpaceToBatchNDOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SpaceToBatchNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SpaceToBatchNDOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SpaceToBatchNDOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &);
-  flatbuffers::Offset<SpaceToBatchNDOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SpaceToBatchNDOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SpaceToBatchNDOptions> CreateSpaceToBatchNDOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  SpaceToBatchNDOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SpaceToBatchNDOptions> CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct BatchToSpaceNDOptionsT : public flatbuffers::NativeTable {
-  typedef BatchToSpaceNDOptions TableType;
-  BatchToSpaceNDOptionsT() {
-  }
-};
-
-struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef BatchToSpaceNDOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  BatchToSpaceNDOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(BatchToSpaceNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<BatchToSpaceNDOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct BatchToSpaceNDOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &);
-  flatbuffers::Offset<BatchToSpaceNDOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<BatchToSpaceNDOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<BatchToSpaceNDOptions> CreateBatchToSpaceNDOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  BatchToSpaceNDOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<BatchToSpaceNDOptions> CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SkipGramOptionsT : public flatbuffers::NativeTable {
-  typedef SkipGramOptions TableType;
-  int32_t ngram_size;
-  int32_t max_skip_size;
-  bool include_all_ngrams;
-  SkipGramOptionsT()
-      : ngram_size(0),
-        max_skip_size(0),
-        include_all_ngrams(false) {
-  }
-};
-
-struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SkipGramOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_NGRAM_SIZE = 4,
-    VT_MAX_SKIP_SIZE = 6,
-    VT_INCLUDE_ALL_NGRAMS = 8
-  };
-  int32_t ngram_size() const {
-    return GetField<int32_t>(VT_NGRAM_SIZE, 0);
-  }
-  int32_t max_skip_size() const {
-    return GetField<int32_t>(VT_MAX_SKIP_SIZE, 0);
-  }
-  bool include_all_ngrams() const {
-    return GetField<uint8_t>(VT_INCLUDE_ALL_NGRAMS, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_NGRAM_SIZE) &&
-           VerifyField<int32_t>(verifier, VT_MAX_SKIP_SIZE) &&
-           VerifyField<uint8_t>(verifier, VT_INCLUDE_ALL_NGRAMS) &&
-           verifier.EndTable();
-  }
-  SkipGramOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SkipGramOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SkipGramOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SkipGramOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_ngram_size(int32_t ngram_size) {
-    fbb_.AddElement<int32_t>(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0);
-  }
-  void add_max_skip_size(int32_t max_skip_size) {
-    fbb_.AddElement<int32_t>(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0);
-  }
-  void add_include_all_ngrams(bool include_all_ngrams) {
-    fbb_.AddElement<uint8_t>(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS, static_cast<uint8_t>(include_all_ngrams), 0);
-  }
-  explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &);
-  flatbuffers::Offset<SkipGramOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SkipGramOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t ngram_size = 0,
-    int32_t max_skip_size = 0,
-    bool include_all_ngrams = false) {
-  SkipGramOptionsBuilder builder_(_fbb);
-  builder_.add_max_skip_size(max_skip_size);
-  builder_.add_ngram_size(ngram_size);
-  builder_.add_include_all_ngrams(include_all_ngrams);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SpaceToDepthOptionsT : public flatbuffers::NativeTable {
-  typedef SpaceToDepthOptions TableType;
-  int32_t block_size;
-  SpaceToDepthOptionsT()
-      : block_size(0) {
-  }
-};
-
-struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SpaceToDepthOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_BLOCK_SIZE = 4
-  };
-  int32_t block_size() const {
-    return GetField<int32_t>(VT_BLOCK_SIZE, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) &&
-           verifier.EndTable();
-  }
-  SpaceToDepthOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SpaceToDepthOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SpaceToDepthOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SpaceToDepthOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_block_size(int32_t block_size) {
-    fbb_.AddElement<int32_t>(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0);
-  }
-  explicit SpaceToDepthOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &);
-  flatbuffers::Offset<SpaceToDepthOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SpaceToDepthOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SpaceToDepthOptions> CreateSpaceToDepthOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t block_size = 0) {
-  SpaceToDepthOptionsBuilder builder_(_fbb);
-  builder_.add_block_size(block_size);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SpaceToDepthOptions> CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct DepthToSpaceOptionsT : public flatbuffers::NativeTable {
-  typedef DepthToSpaceOptions TableType;
-  int32_t block_size;
-  DepthToSpaceOptionsT()
-      : block_size(0) {
-  }
-};
-
-struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef DepthToSpaceOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_BLOCK_SIZE = 4
-  };
-  int32_t block_size() const {
-    return GetField<int32_t>(VT_BLOCK_SIZE, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_BLOCK_SIZE) &&
-           verifier.EndTable();
-  }
-  DepthToSpaceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(DepthToSpaceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<DepthToSpaceOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct DepthToSpaceOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_block_size(int32_t block_size) {
-    fbb_.AddElement<int32_t>(DepthToSpaceOptions::VT_BLOCK_SIZE, block_size, 0);
-  }
-  explicit DepthToSpaceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  DepthToSpaceOptionsBuilder &operator=(const DepthToSpaceOptionsBuilder &);
-  flatbuffers::Offset<DepthToSpaceOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<DepthToSpaceOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<DepthToSpaceOptions> CreateDepthToSpaceOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t block_size = 0) {
-  DepthToSpaceOptionsBuilder builder_(_fbb);
-  builder_.add_block_size(block_size);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<DepthToSpaceOptions> CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SubOptionsT : public flatbuffers::NativeTable {
-  typedef SubOptions TableType;
-  tflite::ActivationFunctionType fused_activation_function;
-  bool pot_scale_int16;
-  SubOptionsT()
-      : fused_activation_function(tflite::ActivationFunctionType_NONE),
-        pot_scale_int16(true) {
-  }
-};
-
-struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SubOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_FUSED_ACTIVATION_FUNCTION = 4,
-    VT_POT_SCALE_INT16 = 6
-  };
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  bool pot_scale_int16() const {
-    return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) &&
-           verifier.EndTable();
-  }
-  SubOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SubOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SubOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SubOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  void add_pot_scale_int16(bool pot_scale_int16) {
-    fbb_.AddElement<uint8_t>(SubOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16), 1);
-  }
-  explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SubOptionsBuilder &operator=(const SubOptionsBuilder &);
-  flatbuffers::Offset<SubOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SubOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SubOptions> CreateSubOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
-    bool pot_scale_int16 = true) {
-  SubOptionsBuilder builder_(_fbb);
-  builder_.add_pot_scale_int16(pot_scale_int16);
-  builder_.add_fused_activation_function(fused_activation_function);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SubOptions> CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct DivOptionsT : public flatbuffers::NativeTable {
-  typedef DivOptions TableType;
-  tflite::ActivationFunctionType fused_activation_function;
-  DivOptionsT()
-      : fused_activation_function(tflite::ActivationFunctionType_NONE) {
-  }
-};
-
-struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef DivOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
-  };
-  tflite::ActivationFunctionType fused_activation_function() const {
-    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
-           verifier.EndTable();
-  }
-  DivOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(DivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<DivOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct DivOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
-    fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
-  }
-  explicit DivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  DivOptionsBuilder &operator=(const DivOptionsBuilder &);
-  flatbuffers::Offset<DivOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<DivOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<DivOptions> CreateDivOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) {
-  DivOptionsBuilder builder_(_fbb);
-  builder_.add_fused_activation_function(fused_activation_function);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<DivOptions> CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct TopKV2OptionsT : public flatbuffers::NativeTable {
-  typedef TopKV2Options TableType;
-  TopKV2OptionsT() {
-  }
-};
-
-struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef TopKV2OptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  TopKV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(TopKV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<TopKV2Options> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct TopKV2OptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &);
-  flatbuffers::Offset<TopKV2Options> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<TopKV2Options>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  TopKV2OptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct EmbeddingLookupSparseOptionsT : public flatbuffers::NativeTable {
-  typedef EmbeddingLookupSparseOptions TableType;
-  tflite::CombinerType combiner;
-  EmbeddingLookupSparseOptionsT()
-      : combiner(tflite::CombinerType_SUM) {
-  }
-};
-
-struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef EmbeddingLookupSparseOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_COMBINER = 4
-  };
-  tflite::CombinerType combiner() const {
-    return static_cast<tflite::CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_COMBINER) &&
-           verifier.EndTable();
-  }
-  EmbeddingLookupSparseOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(EmbeddingLookupSparseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<EmbeddingLookupSparseOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct EmbeddingLookupSparseOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_combiner(tflite::CombinerType combiner) {
-    fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER, static_cast<int8_t>(combiner), 0);
-  }
-  explicit EmbeddingLookupSparseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &);
-  flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<EmbeddingLookupSparseOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<EmbeddingLookupSparseOptions> CreateEmbeddingLookupSparseOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::CombinerType combiner = tflite::CombinerType_SUM) {
-  EmbeddingLookupSparseOptionsBuilder builder_(_fbb);
-  builder_.add_combiner(combiner);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<EmbeddingLookupSparseOptions> CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct GatherOptionsT : public flatbuffers::NativeTable {
-  typedef GatherOptions TableType;
-  int32_t axis;
-  int32_t batch_dims;
-  GatherOptionsT()
-      : axis(0),
-        batch_dims(0) {
-  }
-};
-
-struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef GatherOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_AXIS = 4,
-    VT_BATCH_DIMS = 6
-  };
-  int32_t axis() const {
-    return GetField<int32_t>(VT_AXIS, 0);
-  }
-  int32_t batch_dims() const {
-    return GetField<int32_t>(VT_BATCH_DIMS, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_AXIS) &&
-           VerifyField<int32_t>(verifier, VT_BATCH_DIMS) &&
-           verifier.EndTable();
-  }
-  GatherOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(GatherOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<GatherOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct GatherOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_axis(int32_t axis) {
-    fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0);
-  }
-  void add_batch_dims(int32_t batch_dims) {
-    fbb_.AddElement<int32_t>(GatherOptions::VT_BATCH_DIMS, batch_dims, 0);
-  }
-  explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  GatherOptionsBuilder &operator=(const GatherOptionsBuilder &);
-  flatbuffers::Offset<GatherOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<GatherOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t axis = 0,
-    int32_t batch_dims = 0) {
-  GatherOptionsBuilder builder_(_fbb);
-  builder_.add_batch_dims(batch_dims);
-  builder_.add_axis(axis);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct TransposeOptionsT : public flatbuffers::NativeTable {
-  typedef TransposeOptions TableType;
-  TransposeOptionsT() {
-  }
-};
-
-struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef TransposeOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  TransposeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(TransposeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<TransposeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct TransposeOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &);
-  flatbuffers::Offset<TransposeOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<TransposeOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<TransposeOptions> CreateTransposeOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  TransposeOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<TransposeOptions> CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ExpOptionsT : public flatbuffers::NativeTable {
-  typedef ExpOptions TableType;
-  ExpOptionsT() {
-  }
-};
-
-struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ExpOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  ExpOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ExpOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ExpOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ExpOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ExpOptionsBuilder &operator=(const ExpOptionsBuilder &);
-  flatbuffers::Offset<ExpOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ExpOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ExpOptions> CreateExpOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  ExpOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct CosOptionsT : public flatbuffers::NativeTable {
-  typedef CosOptions TableType;
-  CosOptionsT() {
-  }
-};
-
-struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef CosOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  CosOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(CosOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<CosOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct CosOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit CosOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  CosOptionsBuilder &operator=(const CosOptionsBuilder &);
-  flatbuffers::Offset<CosOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<CosOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<CosOptions> CreateCosOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  CosOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<CosOptions> CreateCosOptions(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ReducerOptionsT : public flatbuffers::NativeTable {
-  typedef ReducerOptions TableType;
-  bool keep_dims;
-  ReducerOptionsT()
-      : keep_dims(false) {
-  }
-};
-
-struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ReducerOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_KEEP_DIMS = 4
-  };
-  bool keep_dims() const {
-    return GetField<uint8_t>(VT_KEEP_DIMS, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<uint8_t>(verifier, VT_KEEP_DIMS) &&
-           verifier.EndTable();
-  }
-  ReducerOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ReducerOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ReducerOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ReducerOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_keep_dims(bool keep_dims) {
-    fbb_.AddElement<uint8_t>(ReducerOptions::VT_KEEP_DIMS, static_cast<uint8_t>(keep_dims), 0);
-  }
-  explicit ReducerOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &);
-  flatbuffers::Offset<ReducerOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ReducerOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ReducerOptions> CreateReducerOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    bool keep_dims = false) {
-  ReducerOptionsBuilder builder_(_fbb);
-  builder_.add_keep_dims(keep_dims);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ReducerOptions> CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SqueezeOptionsT : public flatbuffers::NativeTable {
-  typedef SqueezeOptions TableType;
-  std::vector<int32_t> squeeze_dims;
-  SqueezeOptionsT() {
-  }
-};
-
-struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SqueezeOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_SQUEEZE_DIMS = 4
-  };
-  const flatbuffers::Vector<int32_t> *squeeze_dims() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SQUEEZE_DIMS);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_SQUEEZE_DIMS) &&
-           verifier.VerifyVector(squeeze_dims()) &&
-           verifier.EndTable();
-  }
-  SqueezeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SqueezeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SqueezeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SqueezeOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_squeeze_dims(flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims) {
-    fbb_.AddOffset(SqueezeOptions::VT_SQUEEZE_DIMS, squeeze_dims);
-  }
-  explicit SqueezeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &);
-  flatbuffers::Offset<SqueezeOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SqueezeOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SqueezeOptions> CreateSqueezeOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims = 0) {
-  SqueezeOptionsBuilder builder_(_fbb);
-  builder_.add_squeeze_dims(squeeze_dims);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<SqueezeOptions> CreateSqueezeOptionsDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<int32_t> *squeeze_dims = nullptr) {
-  auto squeeze_dims__ = squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0;
-  return tflite::CreateSqueezeOptions(
-      _fbb,
-      squeeze_dims__);
-}
-
-flatbuffers::Offset<SqueezeOptions> CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SplitOptionsT : public flatbuffers::NativeTable {
-  typedef SplitOptions TableType;
-  int32_t num_splits;
-  SplitOptionsT()
-      : num_splits(0) {
-  }
-};
-
-struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SplitOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_NUM_SPLITS = 4
-  };
-  int32_t num_splits() const {
-    return GetField<int32_t>(VT_NUM_SPLITS, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_NUM_SPLITS) &&
-           verifier.EndTable();
-  }
-  SplitOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SplitOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SplitOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SplitOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_num_splits(int32_t num_splits) {
-    fbb_.AddElement<int32_t>(SplitOptions::VT_NUM_SPLITS, num_splits, 0);
-  }
-  explicit SplitOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SplitOptionsBuilder &operator=(const SplitOptionsBuilder &);
-  flatbuffers::Offset<SplitOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SplitOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t num_splits = 0) {
-  SplitOptionsBuilder builder_(_fbb);
-  builder_.add_num_splits(num_splits);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SplitVOptionsT : public flatbuffers::NativeTable {
-  typedef SplitVOptions TableType;
-  int32_t num_splits;
-  SplitVOptionsT()
-      : num_splits(0) {
-  }
-};
-
-struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SplitVOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_NUM_SPLITS = 4
-  };
-  int32_t num_splits() const {
-    return GetField<int32_t>(VT_NUM_SPLITS, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_NUM_SPLITS) &&
-           verifier.EndTable();
-  }
-  SplitVOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SplitVOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SplitVOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SplitVOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_num_splits(int32_t num_splits) {
-    fbb_.AddElement<int32_t>(SplitVOptions::VT_NUM_SPLITS, num_splits, 0);
-  }
-  explicit SplitVOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &);
-  flatbuffers::Offset<SplitVOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SplitVOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t num_splits = 0) {
-  SplitVOptionsBuilder builder_(_fbb);
-  builder_.add_num_splits(num_splits);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct StridedSliceOptionsT : public flatbuffers::NativeTable {
-  typedef StridedSliceOptions TableType;
-  int32_t begin_mask;
-  int32_t end_mask;
-  int32_t ellipsis_mask;
-  int32_t new_axis_mask;
-  int32_t shrink_axis_mask;
-  StridedSliceOptionsT()
-      : begin_mask(0),
-        end_mask(0),
-        ellipsis_mask(0),
-        new_axis_mask(0),
-        shrink_axis_mask(0) {
-  }
-};
-
-struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef StridedSliceOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_BEGIN_MASK = 4,
-    VT_END_MASK = 6,
-    VT_ELLIPSIS_MASK = 8,
-    VT_NEW_AXIS_MASK = 10,
-    VT_SHRINK_AXIS_MASK = 12
-  };
-  int32_t begin_mask() const {
-    return GetField<int32_t>(VT_BEGIN_MASK, 0);
-  }
-  int32_t end_mask() const {
-    return GetField<int32_t>(VT_END_MASK, 0);
-  }
-  int32_t ellipsis_mask() const {
-    return GetField<int32_t>(VT_ELLIPSIS_MASK, 0);
-  }
-  int32_t new_axis_mask() const {
-    return GetField<int32_t>(VT_NEW_AXIS_MASK, 0);
-  }
-  int32_t shrink_axis_mask() const {
-    return GetField<int32_t>(VT_SHRINK_AXIS_MASK, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_BEGIN_MASK) &&
-           VerifyField<int32_t>(verifier, VT_END_MASK) &&
-           VerifyField<int32_t>(verifier, VT_ELLIPSIS_MASK) &&
-           VerifyField<int32_t>(verifier, VT_NEW_AXIS_MASK) &&
-           VerifyField<int32_t>(verifier, VT_SHRINK_AXIS_MASK) &&
-           verifier.EndTable();
-  }
-  StridedSliceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(StridedSliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<StridedSliceOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct StridedSliceOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_begin_mask(int32_t begin_mask) {
-    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_BEGIN_MASK, begin_mask, 0);
-  }
-  void add_end_mask(int32_t end_mask) {
-    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_END_MASK, end_mask, 0);
-  }
-  void add_ellipsis_mask(int32_t ellipsis_mask) {
-    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_ELLIPSIS_MASK, ellipsis_mask, 0);
-  }
-  void add_new_axis_mask(int32_t new_axis_mask) {
-    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_NEW_AXIS_MASK, new_axis_mask, 0);
-  }
-  void add_shrink_axis_mask(int32_t shrink_axis_mask) {
-    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_SHRINK_AXIS_MASK, shrink_axis_mask, 0);
-  }
-  explicit StridedSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &);
-  flatbuffers::Offset<StridedSliceOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<StridedSliceOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<StridedSliceOptions> CreateStridedSliceOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t begin_mask = 0,
-    int32_t end_mask = 0,
-    int32_t ellipsis_mask = 0,
-    int32_t new_axis_mask = 0,
-    int32_t shrink_axis_mask = 0) {
-  StridedSliceOptionsBuilder builder_(_fbb);
-  builder_.add_shrink_axis_mask(shrink_axis_mask);
-  builder_.add_new_axis_mask(new_axis_mask);
-  builder_.add_ellipsis_mask(ellipsis_mask);
-  builder_.add_end_mask(end_mask);
-  builder_.add_begin_mask(begin_mask);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<StridedSliceOptions> CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct LogSoftmaxOptionsT : public flatbuffers::NativeTable {
-  typedef LogSoftmaxOptions TableType;
-  LogSoftmaxOptionsT() {
-  }
-};
-
-struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef LogSoftmaxOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  LogSoftmaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(LogSoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<LogSoftmaxOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct LogSoftmaxOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &);
-  flatbuffers::Offset<LogSoftmaxOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<LogSoftmaxOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<LogSoftmaxOptions> CreateLogSoftmaxOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  LogSoftmaxOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<LogSoftmaxOptions> CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct CastOptionsT : public flatbuffers::NativeTable {
-  typedef CastOptions TableType;
-  tflite::TensorType in_data_type;
-  tflite::TensorType out_data_type;
-  CastOptionsT()
-      : in_data_type(tflite::TensorType_FLOAT32),
-        out_data_type(tflite::TensorType_FLOAT32) {
-  }
-};
-
-struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef CastOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_IN_DATA_TYPE = 4,
-    VT_OUT_DATA_TYPE = 6
-  };
-  tflite::TensorType in_data_type() const {
-    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
-  }
-  tflite::TensorType out_data_type() const {
-    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_IN_DATA_TYPE) &&
-           VerifyField<int8_t>(verifier, VT_OUT_DATA_TYPE) &&
-           verifier.EndTable();
-  }
-  CastOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(CastOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<CastOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct CastOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_in_data_type(tflite::TensorType in_data_type) {
-    fbb_.AddElement<int8_t>(CastOptions::VT_IN_DATA_TYPE, static_cast<int8_t>(in_data_type), 0);
-  }
-  void add_out_data_type(tflite::TensorType out_data_type) {
-    fbb_.AddElement<int8_t>(CastOptions::VT_OUT_DATA_TYPE, static_cast<int8_t>(out_data_type), 0);
-  }
-  explicit CastOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  CastOptionsBuilder &operator=(const CastOptionsBuilder &);
-  flatbuffers::Offset<CastOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<CastOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<CastOptions> CreateCastOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::TensorType in_data_type = tflite::TensorType_FLOAT32,
-    tflite::TensorType out_data_type = tflite::TensorType_FLOAT32) {
-  CastOptionsBuilder builder_(_fbb);
-  builder_.add_out_data_type(out_data_type);
-  builder_.add_in_data_type(in_data_type);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<CastOptions> CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct DequantizeOptionsT : public flatbuffers::NativeTable {
-  typedef DequantizeOptions TableType;
-  DequantizeOptionsT() {
-  }
-};
-
-struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef DequantizeOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  DequantizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(DequantizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<DequantizeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct DequantizeOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &);
-  flatbuffers::Offset<DequantizeOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<DequantizeOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<DequantizeOptions> CreateDequantizeOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  DequantizeOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<DequantizeOptions> CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct MaximumMinimumOptionsT : public flatbuffers::NativeTable {
-  typedef MaximumMinimumOptions TableType;
-  MaximumMinimumOptionsT() {
-  }
-};
-
-struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef MaximumMinimumOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  MaximumMinimumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(MaximumMinimumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<MaximumMinimumOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct MaximumMinimumOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &);
-  flatbuffers::Offset<MaximumMinimumOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<MaximumMinimumOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<MaximumMinimumOptions> CreateMaximumMinimumOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  MaximumMinimumOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<MaximumMinimumOptions> CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct TileOptionsT : public flatbuffers::NativeTable {
-  typedef TileOptions TableType;
-  TileOptionsT() {
-  }
-};
-
-struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef TileOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  TileOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(TileOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<TileOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct TileOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  TileOptionsBuilder &operator=(const TileOptionsBuilder &);
-  flatbuffers::Offset<TileOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<TileOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<TileOptions> CreateTileOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  TileOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ArgMaxOptionsT : public flatbuffers::NativeTable {
-  typedef ArgMaxOptions TableType;
-  tflite::TensorType output_type;
-  ArgMaxOptionsT()
-      : output_type(tflite::TensorType_FLOAT32) {
-  }
-};
-
-struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ArgMaxOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_OUTPUT_TYPE = 4
-  };
-  tflite::TensorType output_type() const {
-    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) &&
-           verifier.EndTable();
-  }
-  ArgMaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ArgMaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ArgMaxOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ArgMaxOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_output_type(tflite::TensorType output_type) {
-    fbb_.AddElement<int8_t>(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
-  }
-  explicit ArgMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &);
-  flatbuffers::Offset<ArgMaxOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ArgMaxOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ArgMaxOptions> CreateArgMaxOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::TensorType output_type = tflite::TensorType_FLOAT32) {
-  ArgMaxOptionsBuilder builder_(_fbb);
-  builder_.add_output_type(output_type);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ArgMaxOptions> CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ArgMinOptionsT : public flatbuffers::NativeTable {
-  typedef ArgMinOptions TableType;
-  tflite::TensorType output_type;
-  ArgMinOptionsT()
-      : output_type(tflite::TensorType_FLOAT32) {
-  }
-};
-
-struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ArgMinOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_OUTPUT_TYPE = 4
-  };
-  tflite::TensorType output_type() const {
-    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE) &&
-           verifier.EndTable();
-  }
-  ArgMinOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ArgMinOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ArgMinOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ArgMinOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_output_type(tflite::TensorType output_type) {
-    fbb_.AddElement<int8_t>(ArgMinOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
-  }
-  explicit ArgMinOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &);
-  flatbuffers::Offset<ArgMinOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ArgMinOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ArgMinOptions> CreateArgMinOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::TensorType output_type = tflite::TensorType_FLOAT32) {
-  ArgMinOptionsBuilder builder_(_fbb);
-  builder_.add_output_type(output_type);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ArgMinOptions> CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct GreaterOptionsT : public flatbuffers::NativeTable {
-  typedef GreaterOptions TableType;
-  GreaterOptionsT() {
-  }
-};
-
-struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef GreaterOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  GreaterOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(GreaterOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<GreaterOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct GreaterOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &);
-  flatbuffers::Offset<GreaterOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<GreaterOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<GreaterOptions> CreateGreaterOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  GreaterOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<GreaterOptions> CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct GreaterEqualOptionsT : public flatbuffers::NativeTable {
-  typedef GreaterEqualOptions TableType;
-  GreaterEqualOptionsT() {
-  }
-};
-
-struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef GreaterEqualOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  GreaterEqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(GreaterEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<GreaterEqualOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct GreaterEqualOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &);
-  flatbuffers::Offset<GreaterEqualOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<GreaterEqualOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<GreaterEqualOptions> CreateGreaterEqualOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  GreaterEqualOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<GreaterEqualOptions> CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct LessOptionsT : public flatbuffers::NativeTable {
-  typedef LessOptions TableType;
-  LessOptionsT() {
-  }
-};
-
-struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef LessOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  LessOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(LessOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<LessOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct LessOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  LessOptionsBuilder &operator=(const LessOptionsBuilder &);
-  flatbuffers::Offset<LessOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<LessOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<LessOptions> CreateLessOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  LessOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct LessEqualOptionsT : public flatbuffers::NativeTable {
-  typedef LessEqualOptions TableType;
-  LessEqualOptionsT() {
-  }
-};
-
-struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef LessEqualOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  LessEqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(LessEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<LessEqualOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct LessEqualOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &);
-  flatbuffers::Offset<LessEqualOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<LessEqualOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<LessEqualOptions> CreateLessEqualOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  LessEqualOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<LessEqualOptions> CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct NegOptionsT : public flatbuffers::NativeTable {
-  typedef NegOptions TableType;
-  NegOptionsT() {
-  }
-};
-
-struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef NegOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  NegOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(NegOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<NegOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct NegOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  NegOptionsBuilder &operator=(const NegOptionsBuilder &);
-  flatbuffers::Offset<NegOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<NegOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<NegOptions> CreateNegOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  NegOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SelectOptionsT : public flatbuffers::NativeTable {
-  typedef SelectOptions TableType;
-  SelectOptionsT() {
-  }
-};
-
-struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SelectOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  SelectOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SelectOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SelectOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SelectOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SelectOptionsBuilder &operator=(const SelectOptionsBuilder &);
-  flatbuffers::Offset<SelectOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SelectOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  SelectOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SliceOptionsT : public flatbuffers::NativeTable {
-  typedef SliceOptions TableType;
-  SliceOptionsT() {
-  }
-};
-
-struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SliceOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  SliceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SliceOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SliceOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SliceOptionsBuilder &operator=(const SliceOptionsBuilder &);
-  flatbuffers::Offset<SliceOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SliceOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  SliceOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct TransposeConvOptionsT : public flatbuffers::NativeTable {
-  typedef TransposeConvOptions TableType;
-  tflite::Padding padding;
-  int32_t stride_w;
-  int32_t stride_h;
-  TransposeConvOptionsT()
-      : padding(tflite::Padding_SAME),
-        stride_w(0),
-        stride_h(0) {
-  }
-};
-
-struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef TransposeConvOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_PADDING = 4,
-    VT_STRIDE_W = 6,
-    VT_STRIDE_H = 8
-  };
-  tflite::Padding padding() const {
-    return static_cast<tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
-  }
-  int32_t stride_w() const {
-    return GetField<int32_t>(VT_STRIDE_W, 0);
-  }
-  int32_t stride_h() const {
-    return GetField<int32_t>(VT_STRIDE_H, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_PADDING) &&
-           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
-           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
-           verifier.EndTable();
-  }
-  TransposeConvOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(TransposeConvOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<TransposeConvOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct TransposeConvOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_padding(tflite::Padding padding) {
-    fbb_.AddElement<int8_t>(TransposeConvOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
-  }
-  void add_stride_w(int32_t stride_w) {
-    fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_W, stride_w, 0);
-  }
-  void add_stride_h(int32_t stride_h) {
-    fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_H, stride_h, 0);
-  }
-  explicit TransposeConvOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &);
-  flatbuffers::Offset<TransposeConvOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<TransposeConvOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<TransposeConvOptions> CreateTransposeConvOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::Padding padding = tflite::Padding_SAME,
-    int32_t stride_w = 0,
-    int32_t stride_h = 0) {
-  TransposeConvOptionsBuilder builder_(_fbb);
-  builder_.add_stride_h(stride_h);
-  builder_.add_stride_w(stride_w);
-  builder_.add_padding(padding);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<TransposeConvOptions> CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ExpandDimsOptionsT : public flatbuffers::NativeTable {
-  typedef ExpandDimsOptions TableType;
-  ExpandDimsOptionsT() {
-  }
-};
-
-struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ExpandDimsOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  ExpandDimsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ExpandDimsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ExpandDimsOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ExpandDimsOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &);
-  flatbuffers::Offset<ExpandDimsOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ExpandDimsOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ExpandDimsOptions> CreateExpandDimsOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  ExpandDimsOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ExpandDimsOptions> CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SparseToDenseOptionsT : public flatbuffers::NativeTable {
-  typedef SparseToDenseOptions TableType;
-  bool validate_indices;
-  SparseToDenseOptionsT()
-      : validate_indices(false) {
-  }
-};
-
-struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SparseToDenseOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_VALIDATE_INDICES = 4
-  };
-  bool validate_indices() const {
-    return GetField<uint8_t>(VT_VALIDATE_INDICES, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<uint8_t>(verifier, VT_VALIDATE_INDICES) &&
-           verifier.EndTable();
-  }
-  SparseToDenseOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SparseToDenseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SparseToDenseOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SparseToDenseOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_validate_indices(bool validate_indices) {
-    fbb_.AddElement<uint8_t>(SparseToDenseOptions::VT_VALIDATE_INDICES, static_cast<uint8_t>(validate_indices), 0);
-  }
-  explicit SparseToDenseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &);
-  flatbuffers::Offset<SparseToDenseOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SparseToDenseOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SparseToDenseOptions> CreateSparseToDenseOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    bool validate_indices = false) {
-  SparseToDenseOptionsBuilder builder_(_fbb);
-  builder_.add_validate_indices(validate_indices);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SparseToDenseOptions> CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct EqualOptionsT : public flatbuffers::NativeTable {
-  typedef EqualOptions TableType;
-  EqualOptionsT() {
-  }
-};
-
-struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef EqualOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  EqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(EqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<EqualOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct EqualOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  EqualOptionsBuilder &operator=(const EqualOptionsBuilder &);
-  flatbuffers::Offset<EqualOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<EqualOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  EqualOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct NotEqualOptionsT : public flatbuffers::NativeTable {
-  typedef NotEqualOptions TableType;
-  NotEqualOptionsT() {
-  }
-};
-
-struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef NotEqualOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  NotEqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(NotEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<NotEqualOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct NotEqualOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &);
-  flatbuffers::Offset<NotEqualOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<NotEqualOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<NotEqualOptions> CreateNotEqualOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  NotEqualOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<NotEqualOptions> CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ShapeOptionsT : public flatbuffers::NativeTable {
-  typedef ShapeOptions TableType;
-  tflite::TensorType out_type;
-  ShapeOptionsT()
-      : out_type(tflite::TensorType_FLOAT32) {
-  }
-};
-
-struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ShapeOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_OUT_TYPE = 4
-  };
-  tflite::TensorType out_type() const {
-    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_OUT_TYPE) &&
-           verifier.EndTable();
-  }
-  ShapeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ShapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ShapeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ShapeOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_out_type(tflite::TensorType out_type) {
-    fbb_.AddElement<int8_t>(ShapeOptions::VT_OUT_TYPE, static_cast<int8_t>(out_type), 0);
-  }
-  explicit ShapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &);
-  flatbuffers::Offset<ShapeOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ShapeOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ShapeOptions> CreateShapeOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::TensorType out_type = tflite::TensorType_FLOAT32) {
-  ShapeOptionsBuilder builder_(_fbb);
-  builder_.add_out_type(out_type);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ShapeOptions> CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct RankOptionsT : public flatbuffers::NativeTable {
-  typedef RankOptions TableType;
-  RankOptionsT() {
-  }
-};
-
-struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef RankOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  RankOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(RankOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<RankOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct RankOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit RankOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  RankOptionsBuilder &operator=(const RankOptionsBuilder &);
-  flatbuffers::Offset<RankOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<RankOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<RankOptions> CreateRankOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  RankOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<RankOptions> CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct PowOptionsT : public flatbuffers::NativeTable {
-  typedef PowOptions TableType;
-  PowOptionsT() {
-  }
-};
-
-struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef PowOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  PowOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(PowOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<PowOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct PowOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  PowOptionsBuilder &operator=(const PowOptionsBuilder &);
-  flatbuffers::Offset<PowOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<PowOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<PowOptions> CreatePowOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  PowOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct FakeQuantOptionsT : public flatbuffers::NativeTable {
-  typedef FakeQuantOptions TableType;
-  float min;
-  float max;
-  int32_t num_bits;
-  bool narrow_range;
-  FakeQuantOptionsT()
-      : min(0.0f),
-        max(0.0f),
-        num_bits(0),
-        narrow_range(false) {
-  }
-};
-
-struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef FakeQuantOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_MIN = 4,
-    VT_MAX = 6,
-    VT_NUM_BITS = 8,
-    VT_NARROW_RANGE = 10
-  };
-  float min() const {
-    return GetField<float>(VT_MIN, 0.0f);
-  }
-  float max() const {
-    return GetField<float>(VT_MAX, 0.0f);
-  }
-  int32_t num_bits() const {
-    return GetField<int32_t>(VT_NUM_BITS, 0);
-  }
-  bool narrow_range() const {
-    return GetField<uint8_t>(VT_NARROW_RANGE, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<float>(verifier, VT_MIN) &&
-           VerifyField<float>(verifier, VT_MAX) &&
-           VerifyField<int32_t>(verifier, VT_NUM_BITS) &&
-           VerifyField<uint8_t>(verifier, VT_NARROW_RANGE) &&
-           verifier.EndTable();
-  }
-  FakeQuantOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(FakeQuantOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<FakeQuantOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct FakeQuantOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_min(float min) {
-    fbb_.AddElement<float>(FakeQuantOptions::VT_MIN, min, 0.0f);
-  }
-  void add_max(float max) {
-    fbb_.AddElement<float>(FakeQuantOptions::VT_MAX, max, 0.0f);
-  }
-  void add_num_bits(int32_t num_bits) {
-    fbb_.AddElement<int32_t>(FakeQuantOptions::VT_NUM_BITS, num_bits, 0);
-  }
-  void add_narrow_range(bool narrow_range) {
-    fbb_.AddElement<uint8_t>(FakeQuantOptions::VT_NARROW_RANGE, static_cast<uint8_t>(narrow_range), 0);
-  }
-  explicit FakeQuantOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &);
-  flatbuffers::Offset<FakeQuantOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<FakeQuantOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<FakeQuantOptions> CreateFakeQuantOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    float min = 0.0f,
-    float max = 0.0f,
-    int32_t num_bits = 0,
-    bool narrow_range = false) {
-  FakeQuantOptionsBuilder builder_(_fbb);
-  builder_.add_num_bits(num_bits);
-  builder_.add_max(max);
-  builder_.add_min(min);
-  builder_.add_narrow_range(narrow_range);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<FakeQuantOptions> CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct PackOptionsT : public flatbuffers::NativeTable {
-  typedef PackOptions TableType;
-  int32_t values_count;
-  int32_t axis;
-  PackOptionsT()
-      : values_count(0),
-        axis(0) {
-  }
-};
-
-struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef PackOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_VALUES_COUNT = 4,
-    VT_AXIS = 6
-  };
-  int32_t values_count() const {
-    return GetField<int32_t>(VT_VALUES_COUNT, 0);
-  }
-  int32_t axis() const {
-    return GetField<int32_t>(VT_AXIS, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_VALUES_COUNT) &&
-           VerifyField<int32_t>(verifier, VT_AXIS) &&
-           verifier.EndTable();
-  }
-  PackOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(PackOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<PackOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct PackOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_values_count(int32_t values_count) {
-    fbb_.AddElement<int32_t>(PackOptions::VT_VALUES_COUNT, values_count, 0);
-  }
-  void add_axis(int32_t axis) {
-    fbb_.AddElement<int32_t>(PackOptions::VT_AXIS, axis, 0);
-  }
-  explicit PackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  PackOptionsBuilder &operator=(const PackOptionsBuilder &);
-  flatbuffers::Offset<PackOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<PackOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<PackOptions> CreatePackOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t values_count = 0,
-    int32_t axis = 0) {
-  PackOptionsBuilder builder_(_fbb);
-  builder_.add_axis(axis);
-  builder_.add_values_count(values_count);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<PackOptions> CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct LogicalOrOptionsT : public flatbuffers::NativeTable {
-  typedef LogicalOrOptions TableType;
-  LogicalOrOptionsT() {
-  }
-};
-
-struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef LogicalOrOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  LogicalOrOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(LogicalOrOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<LogicalOrOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct LogicalOrOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &);
-  flatbuffers::Offset<LogicalOrOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<LogicalOrOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<LogicalOrOptions> CreateLogicalOrOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  LogicalOrOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<LogicalOrOptions> CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct OneHotOptionsT : public flatbuffers::NativeTable {
-  typedef OneHotOptions TableType;
-  int32_t axis;
-  OneHotOptionsT()
-      : axis(0) {
-  }
-};
-
-struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef OneHotOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_AXIS = 4
-  };
-  int32_t axis() const {
-    return GetField<int32_t>(VT_AXIS, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_AXIS) &&
-           verifier.EndTable();
-  }
-  OneHotOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(OneHotOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<OneHotOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct OneHotOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_axis(int32_t axis) {
-    fbb_.AddElement<int32_t>(OneHotOptions::VT_AXIS, axis, 0);
-  }
-  explicit OneHotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &);
-  flatbuffers::Offset<OneHotOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<OneHotOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t axis = 0) {
-  OneHotOptionsBuilder builder_(_fbb);
-  builder_.add_axis(axis);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct AbsOptionsT : public flatbuffers::NativeTable {
-  typedef AbsOptions TableType;
-  AbsOptionsT() {
-  }
-};
-
-struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef AbsOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  AbsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(AbsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<AbsOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct AbsOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  AbsOptionsBuilder &operator=(const AbsOptionsBuilder &);
-  flatbuffers::Offset<AbsOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<AbsOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  AbsOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct HardSwishOptionsT : public flatbuffers::NativeTable {
-  typedef HardSwishOptions TableType;
-  HardSwishOptionsT() {
-  }
-};
-
-struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef HardSwishOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  HardSwishOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(HardSwishOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<HardSwishOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct HardSwishOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit HardSwishOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  HardSwishOptionsBuilder &operator=(const HardSwishOptionsBuilder &);
-  flatbuffers::Offset<HardSwishOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<HardSwishOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<HardSwishOptions> CreateHardSwishOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  HardSwishOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<HardSwishOptions> CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct LogicalAndOptionsT : public flatbuffers::NativeTable {
-  typedef LogicalAndOptions TableType;
-  LogicalAndOptionsT() {
-  }
-};
-
-struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef LogicalAndOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  LogicalAndOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(LogicalAndOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<LogicalAndOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct LogicalAndOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &);
-  flatbuffers::Offset<LogicalAndOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<LogicalAndOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<LogicalAndOptions> CreateLogicalAndOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  LogicalAndOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<LogicalAndOptions> CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct LogicalNotOptionsT : public flatbuffers::NativeTable {
-  typedef LogicalNotOptions TableType;
-  LogicalNotOptionsT() {
-  }
-};
-
-struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef LogicalNotOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  LogicalNotOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(LogicalNotOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<LogicalNotOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct LogicalNotOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &);
-  flatbuffers::Offset<LogicalNotOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<LogicalNotOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<LogicalNotOptions> CreateLogicalNotOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  LogicalNotOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<LogicalNotOptions> CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct UnpackOptionsT : public flatbuffers::NativeTable {
-  typedef UnpackOptions TableType;
-  int32_t num;
-  int32_t axis;
-  UnpackOptionsT()
-      : num(0),
-        axis(0) {
-  }
-};
-
-struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef UnpackOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_NUM = 4,
-    VT_AXIS = 6
-  };
-  int32_t num() const {
-    return GetField<int32_t>(VT_NUM, 0);
-  }
-  int32_t axis() const {
-    return GetField<int32_t>(VT_AXIS, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_NUM) &&
-           VerifyField<int32_t>(verifier, VT_AXIS) &&
-           verifier.EndTable();
-  }
-  UnpackOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(UnpackOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<UnpackOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct UnpackOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_num(int32_t num) {
-    fbb_.AddElement<int32_t>(UnpackOptions::VT_NUM, num, 0);
-  }
-  void add_axis(int32_t axis) {
-    fbb_.AddElement<int32_t>(UnpackOptions::VT_AXIS, axis, 0);
-  }
-  explicit UnpackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &);
-  flatbuffers::Offset<UnpackOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<UnpackOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t num = 0,
-    int32_t axis = 0) {
-  UnpackOptionsBuilder builder_(_fbb);
-  builder_.add_axis(axis);
-  builder_.add_num(num);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct FloorDivOptionsT : public flatbuffers::NativeTable {
-  typedef FloorDivOptions TableType;
-  FloorDivOptionsT() {
-  }
-};
-
-struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef FloorDivOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  FloorDivOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(FloorDivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<FloorDivOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct FloorDivOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &);
-  flatbuffers::Offset<FloorDivOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<FloorDivOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<FloorDivOptions> CreateFloorDivOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  FloorDivOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<FloorDivOptions> CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SquareOptionsT : public flatbuffers::NativeTable {
-  typedef SquareOptions TableType;
-  SquareOptionsT() {
-  }
-};
-
-struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SquareOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  SquareOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SquareOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SquareOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SquareOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SquareOptionsBuilder &operator=(const SquareOptionsBuilder &);
-  flatbuffers::Offset<SquareOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SquareOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  SquareOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ZerosLikeOptionsT : public flatbuffers::NativeTable {
-  typedef ZerosLikeOptions TableType;
-  ZerosLikeOptionsT() {
-  }
-};
-
-struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ZerosLikeOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  ZerosLikeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ZerosLikeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ZerosLikeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ZerosLikeOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &);
-  flatbuffers::Offset<ZerosLikeOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ZerosLikeOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ZerosLikeOptions> CreateZerosLikeOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  ZerosLikeOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ZerosLikeOptions> CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct FillOptionsT : public flatbuffers::NativeTable {
-  typedef FillOptions TableType;
-  FillOptionsT() {
-  }
-};
-
-struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef FillOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  FillOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(FillOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<FillOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct FillOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  FillOptionsBuilder &operator=(const FillOptionsBuilder &);
-  flatbuffers::Offset<FillOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<FillOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<FillOptions> CreateFillOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  FillOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct FloorModOptionsT : public flatbuffers::NativeTable {
-  typedef FloorModOptions TableType;
-  FloorModOptionsT() {
-  }
-};
-
-struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef FloorModOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  FloorModOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(FloorModOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<FloorModOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct FloorModOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &);
-  flatbuffers::Offset<FloorModOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<FloorModOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  FloorModOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct RangeOptionsT : public flatbuffers::NativeTable {
-  typedef RangeOptions TableType;
-  RangeOptionsT() {
-  }
-};
-
-struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef RangeOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  RangeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(RangeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<RangeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct RangeOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  RangeOptionsBuilder &operator=(const RangeOptionsBuilder &);
-  flatbuffers::Offset<RangeOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<RangeOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  RangeOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct LeakyReluOptionsT : public flatbuffers::NativeTable {
-  typedef LeakyReluOptions TableType;
-  float alpha;
-  LeakyReluOptionsT()
-      : alpha(0.0f) {
-  }
-};
-
-struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef LeakyReluOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_ALPHA = 4
-  };
-  float alpha() const {
-    return GetField<float>(VT_ALPHA, 0.0f);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<float>(verifier, VT_ALPHA) &&
-           verifier.EndTable();
-  }
-  LeakyReluOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(LeakyReluOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<LeakyReluOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct LeakyReluOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_alpha(float alpha) {
-    fbb_.AddElement<float>(LeakyReluOptions::VT_ALPHA, alpha, 0.0f);
-  }
-  explicit LeakyReluOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &);
-  flatbuffers::Offset<LeakyReluOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<LeakyReluOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<LeakyReluOptions> CreateLeakyReluOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    float alpha = 0.0f) {
-  LeakyReluOptionsBuilder builder_(_fbb);
-  builder_.add_alpha(alpha);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<LeakyReluOptions> CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SquaredDifferenceOptionsT : public flatbuffers::NativeTable {
-  typedef SquaredDifferenceOptions TableType;
-  SquaredDifferenceOptionsT() {
-  }
-};
-
-struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SquaredDifferenceOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  SquaredDifferenceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SquaredDifferenceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SquaredDifferenceOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquaredDifferenceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SquaredDifferenceOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &);
-  flatbuffers::Offset<SquaredDifferenceOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SquaredDifferenceOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SquaredDifferenceOptions> CreateSquaredDifferenceOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  SquaredDifferenceOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SquaredDifferenceOptions> CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquaredDifferenceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct MirrorPadOptionsT : public flatbuffers::NativeTable {
-  typedef MirrorPadOptions TableType;
-  tflite::MirrorPadMode mode;
-  MirrorPadOptionsT()
-      : mode(tflite::MirrorPadMode_REFLECT) {
-  }
-};
-
-struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef MirrorPadOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_MODE = 4
-  };
-  tflite::MirrorPadMode mode() const {
-    return static_cast<tflite::MirrorPadMode>(GetField<int8_t>(VT_MODE, 0));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_MODE) &&
-           verifier.EndTable();
-  }
-  MirrorPadOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(MirrorPadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<MirrorPadOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct MirrorPadOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_mode(tflite::MirrorPadMode mode) {
-    fbb_.AddElement<int8_t>(MirrorPadOptions::VT_MODE, static_cast<int8_t>(mode), 0);
-  }
-  explicit MirrorPadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &);
-  flatbuffers::Offset<MirrorPadOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<MirrorPadOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<MirrorPadOptions> CreateMirrorPadOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::MirrorPadMode mode = tflite::MirrorPadMode_REFLECT) {
-  MirrorPadOptionsBuilder builder_(_fbb);
-  builder_.add_mode(mode);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<MirrorPadOptions> CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct UniqueOptionsT : public flatbuffers::NativeTable {
-  typedef UniqueOptions TableType;
-  tflite::TensorType idx_out_type;
-  UniqueOptionsT()
-      : idx_out_type(tflite::TensorType_INT32) {
-  }
-};
-
-struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef UniqueOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_IDX_OUT_TYPE = 4
-  };
-  tflite::TensorType idx_out_type() const {
-    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_IDX_OUT_TYPE) &&
-           verifier.EndTable();
-  }
-  UniqueOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(UniqueOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<UniqueOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct UniqueOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_idx_out_type(tflite::TensorType idx_out_type) {
-    fbb_.AddElement<int8_t>(UniqueOptions::VT_IDX_OUT_TYPE, static_cast<int8_t>(idx_out_type), 2);
-  }
-  explicit UniqueOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  UniqueOptionsBuilder &operator=(const UniqueOptionsBuilder &);
-  flatbuffers::Offset<UniqueOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<UniqueOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<UniqueOptions> CreateUniqueOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    tflite::TensorType idx_out_type = tflite::TensorType_INT32) {
-  UniqueOptionsBuilder builder_(_fbb);
-  builder_.add_idx_out_type(idx_out_type);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<UniqueOptions> CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ReverseV2OptionsT : public flatbuffers::NativeTable {
-  typedef ReverseV2Options TableType;
-  ReverseV2OptionsT() {
-  }
-};
-
-struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ReverseV2OptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  ReverseV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ReverseV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ReverseV2Options> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ReverseV2OptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit ReverseV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ReverseV2OptionsBuilder &operator=(const ReverseV2OptionsBuilder &);
-  flatbuffers::Offset<ReverseV2Options> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ReverseV2Options>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ReverseV2Options> CreateReverseV2Options(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  ReverseV2OptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ReverseV2Options> CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct AddNOptionsT : public flatbuffers::NativeTable {
-  typedef AddNOptions TableType;
-  AddNOptionsT() {
-  }
-};
-
-struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef AddNOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  AddNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(AddNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<AddNOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct AddNOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit AddNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  AddNOptionsBuilder &operator=(const AddNOptionsBuilder &);
-  flatbuffers::Offset<AddNOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<AddNOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<AddNOptions> CreateAddNOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  AddNOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<AddNOptions> CreateAddNOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct GatherNdOptionsT : public flatbuffers::NativeTable {
-  typedef GatherNdOptions TableType;
-  GatherNdOptionsT() {
-  }
-};
-
-struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef GatherNdOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  GatherNdOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(GatherNdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<GatherNdOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct GatherNdOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit GatherNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  GatherNdOptionsBuilder &operator=(const GatherNdOptionsBuilder &);
-  flatbuffers::Offset<GatherNdOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<GatherNdOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<GatherNdOptions> CreateGatherNdOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  GatherNdOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<GatherNdOptions> CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct WhereOptionsT : public flatbuffers::NativeTable {
-  typedef WhereOptions TableType;
-  WhereOptionsT() {
-  }
-};
-
-struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef WhereOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  WhereOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(WhereOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<WhereOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct WhereOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit WhereOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  WhereOptionsBuilder &operator=(const WhereOptionsBuilder &);
-  flatbuffers::Offset<WhereOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<WhereOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<WhereOptions> CreateWhereOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  WhereOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<WhereOptions> CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ReverseSequenceOptionsT : public flatbuffers::NativeTable {
-  typedef ReverseSequenceOptions TableType;
-  int32_t seq_dim;
-  int32_t batch_dim;
-  ReverseSequenceOptionsT()
-      : seq_dim(0),
-        batch_dim(0) {
-  }
-};
-
-struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ReverseSequenceOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_SEQ_DIM = 4,
-    VT_BATCH_DIM = 6
-  };
-  int32_t seq_dim() const {
-    return GetField<int32_t>(VT_SEQ_DIM, 0);
-  }
-  int32_t batch_dim() const {
-    return GetField<int32_t>(VT_BATCH_DIM, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_SEQ_DIM) &&
-           VerifyField<int32_t>(verifier, VT_BATCH_DIM) &&
-           verifier.EndTable();
-  }
-  ReverseSequenceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ReverseSequenceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ReverseSequenceOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseSequenceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ReverseSequenceOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_seq_dim(int32_t seq_dim) {
-    fbb_.AddElement<int32_t>(ReverseSequenceOptions::VT_SEQ_DIM, seq_dim, 0);
-  }
-  void add_batch_dim(int32_t batch_dim) {
-    fbb_.AddElement<int32_t>(ReverseSequenceOptions::VT_BATCH_DIM, batch_dim, 0);
-  }
-  explicit ReverseSequenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ReverseSequenceOptionsBuilder &operator=(const ReverseSequenceOptionsBuilder &);
-  flatbuffers::Offset<ReverseSequenceOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ReverseSequenceOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ReverseSequenceOptions> CreateReverseSequenceOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t seq_dim = 0,
-    int32_t batch_dim = 0) {
-  ReverseSequenceOptionsBuilder builder_(_fbb);
-  builder_.add_batch_dim(batch_dim);
-  builder_.add_seq_dim(seq_dim);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ReverseSequenceOptions> CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReverseSequenceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct MatrixDiagOptionsT : public flatbuffers::NativeTable {
-  typedef MatrixDiagOptions TableType;
-  MatrixDiagOptionsT() {
-  }
-};
-
-struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef MatrixDiagOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  MatrixDiagOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(MatrixDiagOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<MatrixDiagOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct MatrixDiagOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit MatrixDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  MatrixDiagOptionsBuilder &operator=(const MatrixDiagOptionsBuilder &);
-  flatbuffers::Offset<MatrixDiagOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<MatrixDiagOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<MatrixDiagOptions> CreateMatrixDiagOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  MatrixDiagOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<MatrixDiagOptions> CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct QuantizeOptionsT : public flatbuffers::NativeTable {
-  typedef QuantizeOptions TableType;
-  QuantizeOptionsT() {
-  }
-};
-
-struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef QuantizeOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  QuantizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(QuantizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<QuantizeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct QuantizeOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit QuantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  QuantizeOptionsBuilder &operator=(const QuantizeOptionsBuilder &);
-  flatbuffers::Offset<QuantizeOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<QuantizeOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<QuantizeOptions> CreateQuantizeOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  QuantizeOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<QuantizeOptions> CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct MatrixSetDiagOptionsT : public flatbuffers::NativeTable {
-  typedef MatrixSetDiagOptions TableType;
-  MatrixSetDiagOptionsT() {
-  }
-};
-
-struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef MatrixSetDiagOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  MatrixSetDiagOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(MatrixSetDiagOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<MatrixSetDiagOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct MatrixSetDiagOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit MatrixSetDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  MatrixSetDiagOptionsBuilder &operator=(const MatrixSetDiagOptionsBuilder &);
-  flatbuffers::Offset<MatrixSetDiagOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<MatrixSetDiagOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<MatrixSetDiagOptions> CreateMatrixSetDiagOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  MatrixSetDiagOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<MatrixSetDiagOptions> CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct IfOptionsT : public flatbuffers::NativeTable {
-  typedef IfOptions TableType;
-  int32_t then_subgraph_index;
-  int32_t else_subgraph_index;
-  IfOptionsT()
-      : then_subgraph_index(0),
-        else_subgraph_index(0) {
-  }
-};
-
-struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef IfOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_THEN_SUBGRAPH_INDEX = 4,
-    VT_ELSE_SUBGRAPH_INDEX = 6
-  };
-  int32_t then_subgraph_index() const {
-    return GetField<int32_t>(VT_THEN_SUBGRAPH_INDEX, 0);
-  }
-  int32_t else_subgraph_index() const {
-    return GetField<int32_t>(VT_ELSE_SUBGRAPH_INDEX, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_THEN_SUBGRAPH_INDEX) &&
-           VerifyField<int32_t>(verifier, VT_ELSE_SUBGRAPH_INDEX) &&
-           verifier.EndTable();
-  }
-  IfOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(IfOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<IfOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct IfOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_then_subgraph_index(int32_t then_subgraph_index) {
-    fbb_.AddElement<int32_t>(IfOptions::VT_THEN_SUBGRAPH_INDEX, then_subgraph_index, 0);
-  }
-  void add_else_subgraph_index(int32_t else_subgraph_index) {
-    fbb_.AddElement<int32_t>(IfOptions::VT_ELSE_SUBGRAPH_INDEX, else_subgraph_index, 0);
-  }
-  explicit IfOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  IfOptionsBuilder &operator=(const IfOptionsBuilder &);
-  flatbuffers::Offset<IfOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<IfOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<IfOptions> CreateIfOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t then_subgraph_index = 0,
-    int32_t else_subgraph_index = 0) {
-  IfOptionsBuilder builder_(_fbb);
-  builder_.add_else_subgraph_index(else_subgraph_index);
-  builder_.add_then_subgraph_index(then_subgraph_index);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct CallOnceOptionsT : public flatbuffers::NativeTable {
-  typedef CallOnceOptions TableType;
-  int32_t init_subgraph_index;
-  CallOnceOptionsT()
-      : init_subgraph_index(0) {
-  }
-};
-
-struct CallOnceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef CallOnceOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_INIT_SUBGRAPH_INDEX = 4
-  };
-  int32_t init_subgraph_index() const {
-    return GetField<int32_t>(VT_INIT_SUBGRAPH_INDEX, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_INIT_SUBGRAPH_INDEX) &&
-           verifier.EndTable();
-  }
-  CallOnceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(CallOnceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<CallOnceOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct CallOnceOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_init_subgraph_index(int32_t init_subgraph_index) {
-    fbb_.AddElement<int32_t>(CallOnceOptions::VT_INIT_SUBGRAPH_INDEX, init_subgraph_index, 0);
-  }
-  explicit CallOnceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  CallOnceOptionsBuilder &operator=(const CallOnceOptionsBuilder &);
-  flatbuffers::Offset<CallOnceOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<CallOnceOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<CallOnceOptions> CreateCallOnceOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t init_subgraph_index = 0) {
-  CallOnceOptionsBuilder builder_(_fbb);
-  builder_.add_init_subgraph_index(init_subgraph_index);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<CallOnceOptions> CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct WhileOptionsT : public flatbuffers::NativeTable {
-  typedef WhileOptions TableType;
-  int32_t cond_subgraph_index;
-  int32_t body_subgraph_index;
-  WhileOptionsT()
-      : cond_subgraph_index(0),
-        body_subgraph_index(0) {
-  }
-};
-
-struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef WhileOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_COND_SUBGRAPH_INDEX = 4,
-    VT_BODY_SUBGRAPH_INDEX = 6
-  };
-  int32_t cond_subgraph_index() const {
-    return GetField<int32_t>(VT_COND_SUBGRAPH_INDEX, 0);
-  }
-  int32_t body_subgraph_index() const {
-    return GetField<int32_t>(VT_BODY_SUBGRAPH_INDEX, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_COND_SUBGRAPH_INDEX) &&
-           VerifyField<int32_t>(verifier, VT_BODY_SUBGRAPH_INDEX) &&
-           verifier.EndTable();
-  }
-  WhileOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(WhileOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<WhileOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct WhileOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_cond_subgraph_index(int32_t cond_subgraph_index) {
-    fbb_.AddElement<int32_t>(WhileOptions::VT_COND_SUBGRAPH_INDEX, cond_subgraph_index, 0);
-  }
-  void add_body_subgraph_index(int32_t body_subgraph_index) {
-    fbb_.AddElement<int32_t>(WhileOptions::VT_BODY_SUBGRAPH_INDEX, body_subgraph_index, 0);
-  }
-  explicit WhileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  WhileOptionsBuilder &operator=(const WhileOptionsBuilder &);
-  flatbuffers::Offset<WhileOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<WhileOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<WhileOptions> CreateWhileOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t cond_subgraph_index = 0,
-    int32_t body_subgraph_index = 0) {
-  WhileOptionsBuilder builder_(_fbb);
-  builder_.add_body_subgraph_index(body_subgraph_index);
-  builder_.add_cond_subgraph_index(cond_subgraph_index);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<WhileOptions> CreateWhileOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct NonMaxSuppressionV4OptionsT : public flatbuffers::NativeTable {
-  typedef NonMaxSuppressionV4Options TableType;
-  NonMaxSuppressionV4OptionsT() {
-  }
-};
-
-struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef NonMaxSuppressionV4OptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  NonMaxSuppressionV4OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(NonMaxSuppressionV4OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<NonMaxSuppressionV4Options> Pack(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV4OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct NonMaxSuppressionV4OptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit NonMaxSuppressionV4OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  NonMaxSuppressionV4OptionsBuilder &operator=(const NonMaxSuppressionV4OptionsBuilder &);
-  flatbuffers::Offset<NonMaxSuppressionV4Options> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<NonMaxSuppressionV4Options>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<NonMaxSuppressionV4Options> CreateNonMaxSuppressionV4Options(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  NonMaxSuppressionV4OptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<NonMaxSuppressionV4Options> CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV4OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct NonMaxSuppressionV5OptionsT : public flatbuffers::NativeTable {
-  typedef NonMaxSuppressionV5Options TableType;
-  NonMaxSuppressionV5OptionsT() {
-  }
-};
-
-struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef NonMaxSuppressionV5OptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  NonMaxSuppressionV5OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(NonMaxSuppressionV5OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<NonMaxSuppressionV5Options> Pack(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV5OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct NonMaxSuppressionV5OptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit NonMaxSuppressionV5OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  NonMaxSuppressionV5OptionsBuilder &operator=(const NonMaxSuppressionV5OptionsBuilder &);
-  flatbuffers::Offset<NonMaxSuppressionV5Options> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<NonMaxSuppressionV5Options>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<NonMaxSuppressionV5Options> CreateNonMaxSuppressionV5Options(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  NonMaxSuppressionV5OptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<NonMaxSuppressionV5Options> CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV5OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ScatterNdOptionsT : public flatbuffers::NativeTable {
-  typedef ScatterNdOptions TableType;
-  ScatterNdOptionsT() {
-  }
-};
-
-struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ScatterNdOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  ScatterNdOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ScatterNdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ScatterNdOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ScatterNdOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit ScatterNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ScatterNdOptionsBuilder &operator=(const ScatterNdOptionsBuilder &);
-  flatbuffers::Offset<ScatterNdOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ScatterNdOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ScatterNdOptions> CreateScatterNdOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  ScatterNdOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ScatterNdOptions> CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SelectV2OptionsT : public flatbuffers::NativeTable {
-  typedef SelectV2Options TableType;
-  SelectV2OptionsT() {
-  }
-};
-
-struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SelectV2OptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  SelectV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SelectV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SelectV2Options> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SelectV2OptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit SelectV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SelectV2OptionsBuilder &operator=(const SelectV2OptionsBuilder &);
-  flatbuffers::Offset<SelectV2Options> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SelectV2Options>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SelectV2Options> CreateSelectV2Options(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  SelectV2OptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SelectV2Options> CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct DensifyOptionsT : public flatbuffers::NativeTable {
-  typedef DensifyOptions TableType;
-  DensifyOptionsT() {
-  }
-};
-
-struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef DensifyOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  DensifyOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(DensifyOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<DensifyOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct DensifyOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit DensifyOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  DensifyOptionsBuilder &operator=(const DensifyOptionsBuilder &);
-  flatbuffers::Offset<DensifyOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<DensifyOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<DensifyOptions> CreateDensifyOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  DensifyOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<DensifyOptions> CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SegmentSumOptionsT : public flatbuffers::NativeTable {
-  typedef SegmentSumOptions TableType;
-  SegmentSumOptionsT() {
-  }
-};
-
-struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SegmentSumOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  SegmentSumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SegmentSumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SegmentSumOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SegmentSumOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit SegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SegmentSumOptionsBuilder &operator=(const SegmentSumOptionsBuilder &);
-  flatbuffers::Offset<SegmentSumOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SegmentSumOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SegmentSumOptions> CreateSegmentSumOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  SegmentSumOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SegmentSumOptions> CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct BatchMatMulOptionsT : public flatbuffers::NativeTable {
-  typedef BatchMatMulOptions TableType;
-  bool adj_x;
-  bool adj_y;
-  bool asymmetric_quantize_inputs;
-  BatchMatMulOptionsT()
-      : adj_x(false),
-        adj_y(false),
-        asymmetric_quantize_inputs(false) {
-  }
-};
-
-struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef BatchMatMulOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_ADJ_X = 4,
-    VT_ADJ_Y = 6,
-    VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
-  };
-  bool adj_x() const {
-    return GetField<uint8_t>(VT_ADJ_X, 0) != 0;
-  }
-  bool adj_y() const {
-    return GetField<uint8_t>(VT_ADJ_Y, 0) != 0;
-  }
-  bool asymmetric_quantize_inputs() const {
-    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<uint8_t>(verifier, VT_ADJ_X) &&
-           VerifyField<uint8_t>(verifier, VT_ADJ_Y) &&
-           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) &&
-           verifier.EndTable();
-  }
-  BatchMatMulOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(BatchMatMulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<BatchMatMulOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct BatchMatMulOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_adj_x(bool adj_x) {
-    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJ_X, static_cast<uint8_t>(adj_x), 0);
-  }
-  void add_adj_y(bool adj_y) {
-    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJ_Y, static_cast<uint8_t>(adj_y), 0);
-  }
-  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
-    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
-  }
-  explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  BatchMatMulOptionsBuilder &operator=(const BatchMatMulOptionsBuilder &);
-  flatbuffers::Offset<BatchMatMulOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<BatchMatMulOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<BatchMatMulOptions> CreateBatchMatMulOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    bool adj_x = false,
-    bool adj_y = false,
-    bool asymmetric_quantize_inputs = false) {
-  BatchMatMulOptionsBuilder builder_(_fbb);
-  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
-  builder_.add_adj_y(adj_y);
-  builder_.add_adj_x(adj_x);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<BatchMatMulOptions> CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct CumsumOptionsT : public flatbuffers::NativeTable {
-  typedef CumsumOptions TableType;
-  bool exclusive;
-  bool reverse;
-  CumsumOptionsT()
-      : exclusive(false),
-        reverse(false) {
-  }
-};
-
-struct CumsumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef CumsumOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_EXCLUSIVE = 4,
-    VT_REVERSE = 6
-  };
-  bool exclusive() const {
-    return GetField<uint8_t>(VT_EXCLUSIVE, 0) != 0;
-  }
-  bool reverse() const {
-    return GetField<uint8_t>(VT_REVERSE, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<uint8_t>(verifier, VT_EXCLUSIVE) &&
-           VerifyField<uint8_t>(verifier, VT_REVERSE) &&
-           verifier.EndTable();
-  }
-  CumsumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(CumsumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<CumsumOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct CumsumOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_exclusive(bool exclusive) {
-    fbb_.AddElement<uint8_t>(CumsumOptions::VT_EXCLUSIVE, static_cast<uint8_t>(exclusive), 0);
-  }
-  void add_reverse(bool reverse) {
-    fbb_.AddElement<uint8_t>(CumsumOptions::VT_REVERSE, static_cast<uint8_t>(reverse), 0);
-  }
-  explicit CumsumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  CumsumOptionsBuilder &operator=(const CumsumOptionsBuilder &);
-  flatbuffers::Offset<CumsumOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<CumsumOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    bool exclusive = false,
-    bool reverse = false) {
-  CumsumOptionsBuilder builder_(_fbb);
-  builder_.add_reverse(reverse);
-  builder_.add_exclusive(exclusive);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct BroadcastToOptionsT : public flatbuffers::NativeTable {
-  typedef BroadcastToOptions TableType;
-  BroadcastToOptionsT() {
-  }
-};
-
-struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef BroadcastToOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  BroadcastToOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(BroadcastToOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<BroadcastToOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct BroadcastToOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit BroadcastToOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  BroadcastToOptionsBuilder &operator=(const BroadcastToOptionsBuilder &);
-  flatbuffers::Offset<BroadcastToOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<BroadcastToOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<BroadcastToOptions> CreateBroadcastToOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  BroadcastToOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<BroadcastToOptions> CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct Rfft2dOptionsT : public flatbuffers::NativeTable {
-  typedef Rfft2dOptions TableType;
-  Rfft2dOptionsT() {
-  }
-};
-
-struct Rfft2dOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef Rfft2dOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  Rfft2dOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(Rfft2dOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Rfft2dOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct Rfft2dOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit Rfft2dOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  Rfft2dOptionsBuilder &operator=(const Rfft2dOptionsBuilder &);
-  flatbuffers::Offset<Rfft2dOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Rfft2dOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  Rfft2dOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct HashtableOptionsT : public flatbuffers::NativeTable {
-  typedef HashtableOptions TableType;
-  int32_t table_id;
-  tflite::TensorType key_dtype;
-  tflite::TensorType value_dtype;
-  HashtableOptionsT()
-      : table_id(0),
-        key_dtype(tflite::TensorType_FLOAT32),
-        value_dtype(tflite::TensorType_FLOAT32) {
-  }
-};
-
-struct HashtableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef HashtableOptionsT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_TABLE_ID = 4,
-    VT_KEY_DTYPE = 6,
-    VT_VALUE_DTYPE = 8
-  };
-  int32_t table_id() const {
-    return GetField<int32_t>(VT_TABLE_ID, 0);
-  }
-  tflite::TensorType key_dtype() const {
-    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_KEY_DTYPE, 0));
-  }
-  tflite::TensorType value_dtype() const {
-    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_VALUE_DTYPE, 0));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_TABLE_ID) &&
-           VerifyField<int8_t>(verifier, VT_KEY_DTYPE) &&
-           VerifyField<int8_t>(verifier, VT_VALUE_DTYPE) &&
-           verifier.EndTable();
-  }
-  HashtableOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(HashtableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<HashtableOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct HashtableOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_table_id(int32_t table_id) {
-    fbb_.AddElement<int32_t>(HashtableOptions::VT_TABLE_ID, table_id, 0);
-  }
-  void add_key_dtype(tflite::TensorType key_dtype) {
-    fbb_.AddElement<int8_t>(HashtableOptions::VT_KEY_DTYPE, static_cast<int8_t>(key_dtype), 0);
-  }
-  void add_value_dtype(tflite::TensorType value_dtype) {
-    fbb_.AddElement<int8_t>(HashtableOptions::VT_VALUE_DTYPE, static_cast<int8_t>(value_dtype), 0);
-  }
-  explicit HashtableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  HashtableOptionsBuilder &operator=(const HashtableOptionsBuilder &);
-  flatbuffers::Offset<HashtableOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<HashtableOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<HashtableOptions> CreateHashtableOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t table_id = 0,
-    tflite::TensorType key_dtype = tflite::TensorType_FLOAT32,
-    tflite::TensorType value_dtype = tflite::TensorType_FLOAT32) {
-  HashtableOptionsBuilder builder_(_fbb);
-  builder_.add_table_id(table_id);
-  builder_.add_value_dtype(value_dtype);
-  builder_.add_key_dtype(key_dtype);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<HashtableOptions> CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct HashtableFindOptionsT : public flatbuffers::NativeTable {
-  typedef HashtableFindOptions TableType;
-  HashtableFindOptionsT() {
-  }
-};
-
-struct HashtableFindOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef HashtableFindOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  HashtableFindOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(HashtableFindOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<HashtableFindOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct HashtableFindOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit HashtableFindOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  HashtableFindOptionsBuilder &operator=(const HashtableFindOptionsBuilder &);
-  flatbuffers::Offset<HashtableFindOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<HashtableFindOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<HashtableFindOptions> CreateHashtableFindOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  HashtableFindOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<HashtableFindOptions> CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct HashtableImportOptionsT : public flatbuffers::NativeTable {
-  typedef HashtableImportOptions TableType;
-  HashtableImportOptionsT() {
-  }
-};
-
-struct HashtableImportOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef HashtableImportOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  HashtableImportOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(HashtableImportOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<HashtableImportOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableImportOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct HashtableImportOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit HashtableImportOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  HashtableImportOptionsBuilder &operator=(const HashtableImportOptionsBuilder &);
-  flatbuffers::Offset<HashtableImportOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<HashtableImportOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<HashtableImportOptions> CreateHashtableImportOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  HashtableImportOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<HashtableImportOptions> CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableImportOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct HashtableSizeOptionsT : public flatbuffers::NativeTable {
-  typedef HashtableSizeOptions TableType;
-  HashtableSizeOptionsT() {
-  }
-};
-
-struct HashtableSizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef HashtableSizeOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  HashtableSizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(HashtableSizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<HashtableSizeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct HashtableSizeOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit HashtableSizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  HashtableSizeOptionsBuilder &operator=(const HashtableSizeOptionsBuilder &);
-  flatbuffers::Offset<HashtableSizeOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<HashtableSizeOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<HashtableSizeOptions> CreateHashtableSizeOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  HashtableSizeOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<HashtableSizeOptions> CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct OperatorCodeT : public flatbuffers::NativeTable {
-  typedef OperatorCode TableType;
-  int8_t deprecated_builtin_code;
-  std::string custom_code;
-  int32_t version;
-  tflite::BuiltinOperator builtin_code;
-  OperatorCodeT()
-      : deprecated_builtin_code(0),
-        version(1),
-        builtin_code(tflite::BuiltinOperator_ADD) {
-  }
-};
-
-struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef OperatorCodeT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_DEPRECATED_BUILTIN_CODE = 4,
-    VT_CUSTOM_CODE = 6,
-    VT_VERSION = 8,
-    VT_BUILTIN_CODE = 10
-  };
-  int8_t deprecated_builtin_code() const {
-    return GetField<int8_t>(VT_DEPRECATED_BUILTIN_CODE, 0);
-  }
-  const flatbuffers::String *custom_code() const {
-    return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
-  }
-  int32_t version() const {
-    return GetField<int32_t>(VT_VERSION, 1);
-  }
-  tflite::BuiltinOperator builtin_code() const {
-    return static_cast<tflite::BuiltinOperator>(GetField<int32_t>(VT_BUILTIN_CODE, 0));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_DEPRECATED_BUILTIN_CODE) &&
-           VerifyOffset(verifier, VT_CUSTOM_CODE) &&
-           verifier.VerifyString(custom_code()) &&
-           VerifyField<int32_t>(verifier, VT_VERSION) &&
-           VerifyField<int32_t>(verifier, VT_BUILTIN_CODE) &&
-           verifier.EndTable();
-  }
-  OperatorCodeT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(OperatorCodeT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<OperatorCode> Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct OperatorCodeBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_deprecated_builtin_code(int8_t deprecated_builtin_code) {
-    fbb_.AddElement<int8_t>(OperatorCode::VT_DEPRECATED_BUILTIN_CODE, deprecated_builtin_code, 0);
-  }
-  void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code) {
-    fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code);
-  }
-  void add_version(int32_t version) {
-    fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1);
-  }
-  void add_builtin_code(tflite::BuiltinOperator builtin_code) {
-    fbb_.AddElement<int32_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int32_t>(builtin_code), 0);
-  }
-  explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  OperatorCodeBuilder &operator=(const OperatorCodeBuilder &);
-  flatbuffers::Offset<OperatorCode> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<OperatorCode>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<OperatorCode> CreateOperatorCode(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int8_t deprecated_builtin_code = 0,
-    flatbuffers::Offset<flatbuffers::String> custom_code = 0,
-    int32_t version = 1,
-    tflite::BuiltinOperator builtin_code = tflite::BuiltinOperator_ADD) {
-  OperatorCodeBuilder builder_(_fbb);
-  builder_.add_builtin_code(builtin_code);
-  builder_.add_version(version);
-  builder_.add_custom_code(custom_code);
-  builder_.add_deprecated_builtin_code(deprecated_builtin_code);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<OperatorCode> CreateOperatorCodeDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int8_t deprecated_builtin_code = 0,
-    const char *custom_code = nullptr,
-    int32_t version = 1,
-    tflite::BuiltinOperator builtin_code = tflite::BuiltinOperator_ADD) {
-  auto custom_code__ = custom_code ? _fbb.CreateString(custom_code) : 0;
-  return tflite::CreateOperatorCode(
-      _fbb,
-      deprecated_builtin_code,
-      custom_code__,
-      version,
-      builtin_code);
-}
-
-flatbuffers::Offset<OperatorCode> CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct OperatorT : public flatbuffers::NativeTable {
-  typedef Operator TableType;
-  uint32_t opcode_index;
-  std::vector<int32_t> inputs;
-  std::vector<int32_t> outputs;
-  tflite::BuiltinOptionsUnion builtin_options;
-  std::vector<uint8_t> custom_options;
-  tflite::CustomOptionsFormat custom_options_format;
-  std::vector<bool> mutating_variable_inputs;
-  std::vector<int32_t> intermediates;
-  OperatorT()
-      : opcode_index(0),
-        custom_options_format(tflite::CustomOptionsFormat_FLEXBUFFERS) {
-  }
-};
-
-struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef OperatorT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_OPCODE_INDEX = 4,
-    VT_INPUTS = 6,
-    VT_OUTPUTS = 8,
-    VT_BUILTIN_OPTIONS_TYPE = 10,
-    VT_BUILTIN_OPTIONS = 12,
-    VT_CUSTOM_OPTIONS = 14,
-    VT_CUSTOM_OPTIONS_FORMAT = 16,
-    VT_MUTATING_VARIABLE_INPUTS = 18,
-    VT_INTERMEDIATES = 20
-  };
-  uint32_t opcode_index() const {
-    return GetField<uint32_t>(VT_OPCODE_INDEX, 0);
-  }
-  const flatbuffers::Vector<int32_t> *inputs() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
-  }
-  const flatbuffers::Vector<int32_t> *outputs() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
-  }
-  tflite::BuiltinOptions builtin_options_type() const {
-    return static_cast<tflite::BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
-  }
-  const void *builtin_options() const {
-    return GetPointer<const void *>(VT_BUILTIN_OPTIONS);
-  }
-  template<typename T> const T *builtin_options_as() const;
-  const tflite::Conv2DOptions *builtin_options_as_Conv2DOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_Conv2DOptions ? static_cast<const tflite::Conv2DOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_DepthwiseConv2DOptions ? static_cast<const tflite::DepthwiseConv2DOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ConcatEmbeddingsOptions ? static_cast<const tflite::ConcatEmbeddingsOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_LSHProjectionOptions ? static_cast<const tflite::LSHProjectionOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::Pool2DOptions *builtin_options_as_Pool2DOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_Pool2DOptions ? static_cast<const tflite::Pool2DOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SVDFOptions *builtin_options_as_SVDFOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SVDFOptions ? static_cast<const tflite::SVDFOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::RNNOptions *builtin_options_as_RNNOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_RNNOptions ? static_cast<const tflite::RNNOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_FullyConnectedOptions ? static_cast<const tflite::FullyConnectedOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SoftmaxOptions ? static_cast<const tflite::SoftmaxOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ConcatenationOptions ? static_cast<const tflite::ConcatenationOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::AddOptions *builtin_options_as_AddOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_AddOptions ? static_cast<const tflite::AddOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::L2NormOptions *builtin_options_as_L2NormOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_L2NormOptions ? static_cast<const tflite::L2NormOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::LocalResponseNormalizationOptions *builtin_options_as_LocalResponseNormalizationOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_LocalResponseNormalizationOptions ? static_cast<const tflite::LocalResponseNormalizationOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::LSTMOptions *builtin_options_as_LSTMOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_LSTMOptions ? static_cast<const tflite::LSTMOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ResizeBilinearOptions ? static_cast<const tflite::ResizeBilinearOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::CallOptions *builtin_options_as_CallOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_CallOptions ? static_cast<const tflite::CallOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::ReshapeOptions *builtin_options_as_ReshapeOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ReshapeOptions ? static_cast<const tflite::ReshapeOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SkipGramOptions *builtin_options_as_SkipGramOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SkipGramOptions ? static_cast<const tflite::SkipGramOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SpaceToDepthOptions ? static_cast<const tflite::SpaceToDepthOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_EmbeddingLookupSparseOptions ? static_cast<const tflite::EmbeddingLookupSparseOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::MulOptions *builtin_options_as_MulOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_MulOptions ? static_cast<const tflite::MulOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::PadOptions *builtin_options_as_PadOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_PadOptions ? static_cast<const tflite::PadOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::GatherOptions *builtin_options_as_GatherOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_GatherOptions ? static_cast<const tflite::GatherOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_BatchToSpaceNDOptions ? static_cast<const tflite::BatchToSpaceNDOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SpaceToBatchNDOptions ? static_cast<const tflite::SpaceToBatchNDOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::TransposeOptions *builtin_options_as_TransposeOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_TransposeOptions ? static_cast<const tflite::TransposeOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::ReducerOptions *builtin_options_as_ReducerOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ReducerOptions ? static_cast<const tflite::ReducerOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SubOptions *builtin_options_as_SubOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SubOptions ? static_cast<const tflite::SubOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::DivOptions *builtin_options_as_DivOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_DivOptions ? static_cast<const tflite::DivOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SqueezeOptions *builtin_options_as_SqueezeOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SqueezeOptions ? static_cast<const tflite::SqueezeOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SequenceRNNOptions ? static_cast<const tflite::SequenceRNNOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_StridedSliceOptions ? static_cast<const tflite::StridedSliceOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::ExpOptions *builtin_options_as_ExpOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ExpOptions ? static_cast<const tflite::ExpOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::TopKV2Options *builtin_options_as_TopKV2Options() const {
-    return builtin_options_type() == tflite::BuiltinOptions_TopKV2Options ? static_cast<const tflite::TopKV2Options *>(builtin_options()) : nullptr;
-  }
-  const tflite::SplitOptions *builtin_options_as_SplitOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SplitOptions ? static_cast<const tflite::SplitOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_LogSoftmaxOptions ? static_cast<const tflite::LogSoftmaxOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::CastOptions *builtin_options_as_CastOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_CastOptions ? static_cast<const tflite::CastOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::DequantizeOptions *builtin_options_as_DequantizeOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_DequantizeOptions ? static_cast<const tflite::DequantizeOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_MaximumMinimumOptions ? static_cast<const tflite::MaximumMinimumOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ArgMaxOptions ? static_cast<const tflite::ArgMaxOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::LessOptions *builtin_options_as_LessOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_LessOptions ? static_cast<const tflite::LessOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::NegOptions *builtin_options_as_NegOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_NegOptions ? static_cast<const tflite::NegOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::PadV2Options *builtin_options_as_PadV2Options() const {
-    return builtin_options_type() == tflite::BuiltinOptions_PadV2Options ? static_cast<const tflite::PadV2Options *>(builtin_options()) : nullptr;
-  }
-  const tflite::GreaterOptions *builtin_options_as_GreaterOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_GreaterOptions ? static_cast<const tflite::GreaterOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_GreaterEqualOptions ? static_cast<const tflite::GreaterEqualOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::LessEqualOptions *builtin_options_as_LessEqualOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_LessEqualOptions ? static_cast<const tflite::LessEqualOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SelectOptions *builtin_options_as_SelectOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SelectOptions ? static_cast<const tflite::SelectOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SliceOptions *builtin_options_as_SliceOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SliceOptions ? static_cast<const tflite::SliceOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_TransposeConvOptions ? static_cast<const tflite::TransposeConvOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SparseToDenseOptions ? static_cast<const tflite::SparseToDenseOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::TileOptions *builtin_options_as_TileOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_TileOptions ? static_cast<const tflite::TileOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ExpandDimsOptions ? static_cast<const tflite::ExpandDimsOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::EqualOptions *builtin_options_as_EqualOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_EqualOptions ? static_cast<const tflite::EqualOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::NotEqualOptions *builtin_options_as_NotEqualOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_NotEqualOptions ? static_cast<const tflite::NotEqualOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::ShapeOptions *builtin_options_as_ShapeOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ShapeOptions ? static_cast<const tflite::ShapeOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::PowOptions *builtin_options_as_PowOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_PowOptions ? static_cast<const tflite::PowOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::ArgMinOptions *builtin_options_as_ArgMinOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ArgMinOptions ? static_cast<const tflite::ArgMinOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_FakeQuantOptions ? static_cast<const tflite::FakeQuantOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::PackOptions *builtin_options_as_PackOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_PackOptions ? static_cast<const tflite::PackOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_LogicalOrOptions ? static_cast<const tflite::LogicalOrOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::OneHotOptions *builtin_options_as_OneHotOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_OneHotOptions ? static_cast<const tflite::OneHotOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_LogicalAndOptions ? static_cast<const tflite::LogicalAndOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_LogicalNotOptions ? static_cast<const tflite::LogicalNotOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::UnpackOptions *builtin_options_as_UnpackOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_UnpackOptions ? static_cast<const tflite::UnpackOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::FloorDivOptions *builtin_options_as_FloorDivOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_FloorDivOptions ? static_cast<const tflite::FloorDivOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SquareOptions *builtin_options_as_SquareOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SquareOptions ? static_cast<const tflite::SquareOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ZerosLikeOptions ? static_cast<const tflite::ZerosLikeOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::FillOptions *builtin_options_as_FillOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_FillOptions ? static_cast<const tflite::FillOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::BidirectionalSequenceLSTMOptions *builtin_options_as_BidirectionalSequenceLSTMOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_BidirectionalSequenceLSTMOptions ? static_cast<const tflite::BidirectionalSequenceLSTMOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_BidirectionalSequenceRNNOptions ? static_cast<const tflite::BidirectionalSequenceRNNOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::UnidirectionalSequenceLSTMOptions *builtin_options_as_UnidirectionalSequenceLSTMOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_UnidirectionalSequenceLSTMOptions ? static_cast<const tflite::UnidirectionalSequenceLSTMOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::FloorModOptions *builtin_options_as_FloorModOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_FloorModOptions ? static_cast<const tflite::FloorModOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::RangeOptions *builtin_options_as_RangeOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_RangeOptions ? static_cast<const tflite::RangeOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ResizeNearestNeighborOptions ? static_cast<const tflite::ResizeNearestNeighborOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_LeakyReluOptions ? static_cast<const tflite::LeakyReluOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SquaredDifferenceOptions ? static_cast<const tflite::SquaredDifferenceOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_MirrorPadOptions ? static_cast<const tflite::MirrorPadOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::AbsOptions *builtin_options_as_AbsOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_AbsOptions ? static_cast<const tflite::AbsOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SplitVOptions *builtin_options_as_SplitVOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SplitVOptions ? static_cast<const tflite::SplitVOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::UniqueOptions *builtin_options_as_UniqueOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_UniqueOptions ? static_cast<const tflite::UniqueOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::ReverseV2Options *builtin_options_as_ReverseV2Options() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ReverseV2Options ? static_cast<const tflite::ReverseV2Options *>(builtin_options()) : nullptr;
-  }
-  const tflite::AddNOptions *builtin_options_as_AddNOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_AddNOptions ? static_cast<const tflite::AddNOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::GatherNdOptions *builtin_options_as_GatherNdOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_GatherNdOptions ? static_cast<const tflite::GatherNdOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::CosOptions *builtin_options_as_CosOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_CosOptions ? static_cast<const tflite::CosOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::WhereOptions *builtin_options_as_WhereOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_WhereOptions ? static_cast<const tflite::WhereOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::RankOptions *builtin_options_as_RankOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_RankOptions ? static_cast<const tflite::RankOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ReverseSequenceOptions ? static_cast<const tflite::ReverseSequenceOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_MatrixDiagOptions ? static_cast<const tflite::MatrixDiagOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::QuantizeOptions *builtin_options_as_QuantizeOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_QuantizeOptions ? static_cast<const tflite::QuantizeOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_MatrixSetDiagOptions ? static_cast<const tflite::MatrixSetDiagOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::HardSwishOptions *builtin_options_as_HardSwishOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_HardSwishOptions ? static_cast<const tflite::HardSwishOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::IfOptions *builtin_options_as_IfOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_IfOptions ? static_cast<const tflite::IfOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::WhileOptions *builtin_options_as_WhileOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_WhileOptions ? static_cast<const tflite::WhileOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_DepthToSpaceOptions ? static_cast<const tflite::DepthToSpaceOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const {
-    return builtin_options_type() == tflite::BuiltinOptions_NonMaxSuppressionV4Options ? static_cast<const tflite::NonMaxSuppressionV4Options *>(builtin_options()) : nullptr;
-  }
-  const tflite::NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const {
-    return builtin_options_type() == tflite::BuiltinOptions_NonMaxSuppressionV5Options ? static_cast<const tflite::NonMaxSuppressionV5Options *>(builtin_options()) : nullptr;
-  }
-  const tflite::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_ScatterNdOptions ? static_cast<const tflite::ScatterNdOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SelectV2Options *builtin_options_as_SelectV2Options() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SelectV2Options ? static_cast<const tflite::SelectV2Options *>(builtin_options()) : nullptr;
-  }
-  const tflite::DensifyOptions *builtin_options_as_DensifyOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_DensifyOptions ? static_cast<const tflite::DensifyOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_SegmentSumOptions ? static_cast<const tflite::SegmentSumOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_BatchMatMulOptions ? static_cast<const tflite::BatchMatMulOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::CumsumOptions *builtin_options_as_CumsumOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_CumsumOptions ? static_cast<const tflite::CumsumOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::CallOnceOptions *builtin_options_as_CallOnceOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_CallOnceOptions ? static_cast<const tflite::CallOnceOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_BroadcastToOptions ? static_cast<const tflite::BroadcastToOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::Rfft2dOptions *builtin_options_as_Rfft2dOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_Rfft2dOptions ? static_cast<const tflite::Rfft2dOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::Conv3DOptions *builtin_options_as_Conv3DOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_Conv3DOptions ? static_cast<const tflite::Conv3DOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::HashtableOptions *builtin_options_as_HashtableOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_HashtableOptions ? static_cast<const tflite::HashtableOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::HashtableFindOptions *builtin_options_as_HashtableFindOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_HashtableFindOptions ? static_cast<const tflite::HashtableFindOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::HashtableImportOptions *builtin_options_as_HashtableImportOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_HashtableImportOptions ? static_cast<const tflite::HashtableImportOptions *>(builtin_options()) : nullptr;
-  }
-  const tflite::HashtableSizeOptions *builtin_options_as_HashtableSizeOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_HashtableSizeOptions ? static_cast<const tflite::HashtableSizeOptions *>(builtin_options()) : nullptr;
-  }
-  const flatbuffers::Vector<uint8_t> *custom_options() const {
-    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
-  }
-  tflite::CustomOptionsFormat custom_options_format() const {
-    return static_cast<tflite::CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
-  }
-  const flatbuffers::Vector<uint8_t> *mutating_variable_inputs() const {
-    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_MUTATING_VARIABLE_INPUTS);
-  }
-  const flatbuffers::Vector<int32_t> *intermediates() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INTERMEDIATES);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<uint32_t>(verifier, VT_OPCODE_INDEX) &&
-           VerifyOffset(verifier, VT_INPUTS) &&
-           verifier.VerifyVector(inputs()) &&
-           VerifyOffset(verifier, VT_OUTPUTS) &&
-           verifier.VerifyVector(outputs()) &&
-           VerifyField<uint8_t>(verifier, VT_BUILTIN_OPTIONS_TYPE) &&
-           VerifyOffset(verifier, VT_BUILTIN_OPTIONS) &&
-           VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) &&
-           VerifyOffset(verifier, VT_CUSTOM_OPTIONS) &&
-           verifier.VerifyVector(custom_options()) &&
-           VerifyField<int8_t>(verifier, VT_CUSTOM_OPTIONS_FORMAT) &&
-           VerifyOffset(verifier, VT_MUTATING_VARIABLE_INPUTS) &&
-           verifier.VerifyVector(mutating_variable_inputs()) &&
-           VerifyOffset(verifier, VT_INTERMEDIATES) &&
-           verifier.VerifyVector(intermediates()) &&
-           verifier.EndTable();
-  }
-  OperatorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(OperatorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Operator> Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-template<> inline const tflite::Conv2DOptions *Operator::builtin_options_as<tflite::Conv2DOptions>() const {
-  return builtin_options_as_Conv2DOptions();
-}
-
-template<> inline const tflite::DepthwiseConv2DOptions *Operator::builtin_options_as<tflite::DepthwiseConv2DOptions>() const {
-  return builtin_options_as_DepthwiseConv2DOptions();
-}
-
-template<> inline const tflite::ConcatEmbeddingsOptions *Operator::builtin_options_as<tflite::ConcatEmbeddingsOptions>() const {
-  return builtin_options_as_ConcatEmbeddingsOptions();
-}
-
-template<> inline const tflite::LSHProjectionOptions *Operator::builtin_options_as<tflite::LSHProjectionOptions>() const {
-  return builtin_options_as_LSHProjectionOptions();
-}
-
-template<> inline const tflite::Pool2DOptions *Operator::builtin_options_as<tflite::Pool2DOptions>() const {
-  return builtin_options_as_Pool2DOptions();
-}
-
-template<> inline const tflite::SVDFOptions *Operator::builtin_options_as<tflite::SVDFOptions>() const {
-  return builtin_options_as_SVDFOptions();
-}
-
-template<> inline const tflite::RNNOptions *Operator::builtin_options_as<tflite::RNNOptions>() const {
-  return builtin_options_as_RNNOptions();
-}
-
-template<> inline const tflite::FullyConnectedOptions *Operator::builtin_options_as<tflite::FullyConnectedOptions>() const {
-  return builtin_options_as_FullyConnectedOptions();
-}
-
-template<> inline const tflite::SoftmaxOptions *Operator::builtin_options_as<tflite::SoftmaxOptions>() const {
-  return builtin_options_as_SoftmaxOptions();
-}
-
-template<> inline const tflite::ConcatenationOptions *Operator::builtin_options_as<tflite::ConcatenationOptions>() const {
-  return builtin_options_as_ConcatenationOptions();
-}
-
-template<> inline const tflite::AddOptions *Operator::builtin_options_as<tflite::AddOptions>() const {
-  return builtin_options_as_AddOptions();
-}
-
-template<> inline const tflite::L2NormOptions *Operator::builtin_options_as<tflite::L2NormOptions>() const {
-  return builtin_options_as_L2NormOptions();
-}
-
-template<> inline const tflite::LocalResponseNormalizationOptions *Operator::builtin_options_as<tflite::LocalResponseNormalizationOptions>() const {
-  return builtin_options_as_LocalResponseNormalizationOptions();
-}
-
-template<> inline const tflite::LSTMOptions *Operator::builtin_options_as<tflite::LSTMOptions>() const {
-  return builtin_options_as_LSTMOptions();
-}
-
-template<> inline const tflite::ResizeBilinearOptions *Operator::builtin_options_as<tflite::ResizeBilinearOptions>() const {
-  return builtin_options_as_ResizeBilinearOptions();
-}
-
-template<> inline const tflite::CallOptions *Operator::builtin_options_as<tflite::CallOptions>() const {
-  return builtin_options_as_CallOptions();
-}
-
-template<> inline const tflite::ReshapeOptions *Operator::builtin_options_as<tflite::ReshapeOptions>() const {
-  return builtin_options_as_ReshapeOptions();
-}
-
-template<> inline const tflite::SkipGramOptions *Operator::builtin_options_as<tflite::SkipGramOptions>() const {
-  return builtin_options_as_SkipGramOptions();
-}
-
-template<> inline const tflite::SpaceToDepthOptions *Operator::builtin_options_as<tflite::SpaceToDepthOptions>() const {
-  return builtin_options_as_SpaceToDepthOptions();
-}
-
-template<> inline const tflite::EmbeddingLookupSparseOptions *Operator::builtin_options_as<tflite::EmbeddingLookupSparseOptions>() const {
-  return builtin_options_as_EmbeddingLookupSparseOptions();
-}
-
-template<> inline const tflite::MulOptions *Operator::builtin_options_as<tflite::MulOptions>() const {
-  return builtin_options_as_MulOptions();
-}
-
-template<> inline const tflite::PadOptions *Operator::builtin_options_as<tflite::PadOptions>() const {
-  return builtin_options_as_PadOptions();
-}
-
-template<> inline const tflite::GatherOptions *Operator::builtin_options_as<tflite::GatherOptions>() const {
-  return builtin_options_as_GatherOptions();
-}
-
-template<> inline const tflite::BatchToSpaceNDOptions *Operator::builtin_options_as<tflite::BatchToSpaceNDOptions>() const {
-  return builtin_options_as_BatchToSpaceNDOptions();
-}
-
-template<> inline const tflite::SpaceToBatchNDOptions *Operator::builtin_options_as<tflite::SpaceToBatchNDOptions>() const {
-  return builtin_options_as_SpaceToBatchNDOptions();
-}
-
-template<> inline const tflite::TransposeOptions *Operator::builtin_options_as<tflite::TransposeOptions>() const {
-  return builtin_options_as_TransposeOptions();
-}
-
-template<> inline const tflite::ReducerOptions *Operator::builtin_options_as<tflite::ReducerOptions>() const {
-  return builtin_options_as_ReducerOptions();
-}
-
-template<> inline const tflite::SubOptions *Operator::builtin_options_as<tflite::SubOptions>() const {
-  return builtin_options_as_SubOptions();
-}
-
-template<> inline const tflite::DivOptions *Operator::builtin_options_as<tflite::DivOptions>() const {
-  return builtin_options_as_DivOptions();
-}
-
-template<> inline const tflite::SqueezeOptions *Operator::builtin_options_as<tflite::SqueezeOptions>() const {
-  return builtin_options_as_SqueezeOptions();
-}
-
-template<> inline const tflite::SequenceRNNOptions *Operator::builtin_options_as<tflite::SequenceRNNOptions>() const {
-  return builtin_options_as_SequenceRNNOptions();
-}
-
-template<> inline const tflite::StridedSliceOptions *Operator::builtin_options_as<tflite::StridedSliceOptions>() const {
-  return builtin_options_as_StridedSliceOptions();
-}
-
-template<> inline const tflite::ExpOptions *Operator::builtin_options_as<tflite::ExpOptions>() const {
-  return builtin_options_as_ExpOptions();
-}
-
-template<> inline const tflite::TopKV2Options *Operator::builtin_options_as<tflite::TopKV2Options>() const {
-  return builtin_options_as_TopKV2Options();
-}
-
-template<> inline const tflite::SplitOptions *Operator::builtin_options_as<tflite::SplitOptions>() const {
-  return builtin_options_as_SplitOptions();
-}
-
-template<> inline const tflite::LogSoftmaxOptions *Operator::builtin_options_as<tflite::LogSoftmaxOptions>() const {
-  return builtin_options_as_LogSoftmaxOptions();
-}
-
-template<> inline const tflite::CastOptions *Operator::builtin_options_as<tflite::CastOptions>() const {
-  return builtin_options_as_CastOptions();
-}
-
-template<> inline const tflite::DequantizeOptions *Operator::builtin_options_as<tflite::DequantizeOptions>() const {
-  return builtin_options_as_DequantizeOptions();
-}
-
-template<> inline const tflite::MaximumMinimumOptions *Operator::builtin_options_as<tflite::MaximumMinimumOptions>() const {
-  return builtin_options_as_MaximumMinimumOptions();
-}
-
-template<> inline const tflite::ArgMaxOptions *Operator::builtin_options_as<tflite::ArgMaxOptions>() const {
-  return builtin_options_as_ArgMaxOptions();
-}
-
-template<> inline const tflite::LessOptions *Operator::builtin_options_as<tflite::LessOptions>() const {
-  return builtin_options_as_LessOptions();
-}
-
-template<> inline const tflite::NegOptions *Operator::builtin_options_as<tflite::NegOptions>() const {
-  return builtin_options_as_NegOptions();
-}
-
-template<> inline const tflite::PadV2Options *Operator::builtin_options_as<tflite::PadV2Options>() const {
-  return builtin_options_as_PadV2Options();
-}
-
-template<> inline const tflite::GreaterOptions *Operator::builtin_options_as<tflite::GreaterOptions>() const {
-  return builtin_options_as_GreaterOptions();
-}
-
-template<> inline const tflite::GreaterEqualOptions *Operator::builtin_options_as<tflite::GreaterEqualOptions>() const {
-  return builtin_options_as_GreaterEqualOptions();
-}
-
-template<> inline const tflite::LessEqualOptions *Operator::builtin_options_as<tflite::LessEqualOptions>() const {
-  return builtin_options_as_LessEqualOptions();
-}
-
-template<> inline const tflite::SelectOptions *Operator::builtin_options_as<tflite::SelectOptions>() const {
-  return builtin_options_as_SelectOptions();
-}
-
-template<> inline const tflite::SliceOptions *Operator::builtin_options_as<tflite::SliceOptions>() const {
-  return builtin_options_as_SliceOptions();
-}
-
-template<> inline const tflite::TransposeConvOptions *Operator::builtin_options_as<tflite::TransposeConvOptions>() const {
-  return builtin_options_as_TransposeConvOptions();
-}
-
-template<> inline const tflite::SparseToDenseOptions *Operator::builtin_options_as<tflite::SparseToDenseOptions>() const {
-  return builtin_options_as_SparseToDenseOptions();
-}
-
-template<> inline const tflite::TileOptions *Operator::builtin_options_as<tflite::TileOptions>() const {
-  return builtin_options_as_TileOptions();
-}
-
-template<> inline const tflite::ExpandDimsOptions *Operator::builtin_options_as<tflite::ExpandDimsOptions>() const {
-  return builtin_options_as_ExpandDimsOptions();
-}
-
-template<> inline const tflite::EqualOptions *Operator::builtin_options_as<tflite::EqualOptions>() const {
-  return builtin_options_as_EqualOptions();
-}
-
-template<> inline const tflite::NotEqualOptions *Operator::builtin_options_as<tflite::NotEqualOptions>() const {
-  return builtin_options_as_NotEqualOptions();
-}
-
-template<> inline const tflite::ShapeOptions *Operator::builtin_options_as<tflite::ShapeOptions>() const {
-  return builtin_options_as_ShapeOptions();
-}
-
-template<> inline const tflite::PowOptions *Operator::builtin_options_as<tflite::PowOptions>() const {
-  return builtin_options_as_PowOptions();
-}
-
-template<> inline const tflite::ArgMinOptions *Operator::builtin_options_as<tflite::ArgMinOptions>() const {
-  return builtin_options_as_ArgMinOptions();
-}
-
-template<> inline const tflite::FakeQuantOptions *Operator::builtin_options_as<tflite::FakeQuantOptions>() const {
-  return builtin_options_as_FakeQuantOptions();
-}
-
-template<> inline const tflite::PackOptions *Operator::builtin_options_as<tflite::PackOptions>() const {
-  return builtin_options_as_PackOptions();
-}
-
-template<> inline const tflite::LogicalOrOptions *Operator::builtin_options_as<tflite::LogicalOrOptions>() const {
-  return builtin_options_as_LogicalOrOptions();
-}
-
-template<> inline const tflite::OneHotOptions *Operator::builtin_options_as<tflite::OneHotOptions>() const {
-  return builtin_options_as_OneHotOptions();
-}
-
-template<> inline const tflite::LogicalAndOptions *Operator::builtin_options_as<tflite::LogicalAndOptions>() const {
-  return builtin_options_as_LogicalAndOptions();
-}
-
-template<> inline const tflite::LogicalNotOptions *Operator::builtin_options_as<tflite::LogicalNotOptions>() const {
-  return builtin_options_as_LogicalNotOptions();
-}
-
-template<> inline const tflite::UnpackOptions *Operator::builtin_options_as<tflite::UnpackOptions>() const {
-  return builtin_options_as_UnpackOptions();
-}
-
-template<> inline const tflite::FloorDivOptions *Operator::builtin_options_as<tflite::FloorDivOptions>() const {
-  return builtin_options_as_FloorDivOptions();
-}
-
-template<> inline const tflite::SquareOptions *Operator::builtin_options_as<tflite::SquareOptions>() const {
-  return builtin_options_as_SquareOptions();
-}
-
-template<> inline const tflite::ZerosLikeOptions *Operator::builtin_options_as<tflite::ZerosLikeOptions>() const {
-  return builtin_options_as_ZerosLikeOptions();
-}
-
-template<> inline const tflite::FillOptions *Operator::builtin_options_as<tflite::FillOptions>() const {
-  return builtin_options_as_FillOptions();
-}
-
-template<> inline const tflite::BidirectionalSequenceLSTMOptions *Operator::builtin_options_as<tflite::BidirectionalSequenceLSTMOptions>() const {
-  return builtin_options_as_BidirectionalSequenceLSTMOptions();
-}
-
-template<> inline const tflite::BidirectionalSequenceRNNOptions *Operator::builtin_options_as<tflite::BidirectionalSequenceRNNOptions>() const {
-  return builtin_options_as_BidirectionalSequenceRNNOptions();
-}
-
-template<> inline const tflite::UnidirectionalSequenceLSTMOptions *Operator::builtin_options_as<tflite::UnidirectionalSequenceLSTMOptions>() const {
-  return builtin_options_as_UnidirectionalSequenceLSTMOptions();
-}
-
-template<> inline const tflite::FloorModOptions *Operator::builtin_options_as<tflite::FloorModOptions>() const {
-  return builtin_options_as_FloorModOptions();
-}
-
-template<> inline const tflite::RangeOptions *Operator::builtin_options_as<tflite::RangeOptions>() const {
-  return builtin_options_as_RangeOptions();
-}
-
-template<> inline const tflite::ResizeNearestNeighborOptions *Operator::builtin_options_as<tflite::ResizeNearestNeighborOptions>() const {
-  return builtin_options_as_ResizeNearestNeighborOptions();
-}
-
-template<> inline const tflite::LeakyReluOptions *Operator::builtin_options_as<tflite::LeakyReluOptions>() const {
-  return builtin_options_as_LeakyReluOptions();
-}
-
-template<> inline const tflite::SquaredDifferenceOptions *Operator::builtin_options_as<tflite::SquaredDifferenceOptions>() const {
-  return builtin_options_as_SquaredDifferenceOptions();
-}
-
-template<> inline const tflite::MirrorPadOptions *Operator::builtin_options_as<tflite::MirrorPadOptions>() const {
-  return builtin_options_as_MirrorPadOptions();
-}
-
-template<> inline const tflite::AbsOptions *Operator::builtin_options_as<tflite::AbsOptions>() const {
-  return builtin_options_as_AbsOptions();
-}
-
-template<> inline const tflite::SplitVOptions *Operator::builtin_options_as<tflite::SplitVOptions>() const {
-  return builtin_options_as_SplitVOptions();
-}
-
-template<> inline const tflite::UniqueOptions *Operator::builtin_options_as<tflite::UniqueOptions>() const {
-  return builtin_options_as_UniqueOptions();
-}
-
-template<> inline const tflite::ReverseV2Options *Operator::builtin_options_as<tflite::ReverseV2Options>() const {
-  return builtin_options_as_ReverseV2Options();
-}
-
-template<> inline const tflite::AddNOptions *Operator::builtin_options_as<tflite::AddNOptions>() const {
-  return builtin_options_as_AddNOptions();
-}
-
-template<> inline const tflite::GatherNdOptions *Operator::builtin_options_as<tflite::GatherNdOptions>() const {
-  return builtin_options_as_GatherNdOptions();
-}
-
-template<> inline const tflite::CosOptions *Operator::builtin_options_as<tflite::CosOptions>() const {
-  return builtin_options_as_CosOptions();
-}
-
-template<> inline const tflite::WhereOptions *Operator::builtin_options_as<tflite::WhereOptions>() const {
-  return builtin_options_as_WhereOptions();
-}
-
-template<> inline const tflite::RankOptions *Operator::builtin_options_as<tflite::RankOptions>() const {
-  return builtin_options_as_RankOptions();
-}
-
-template<> inline const tflite::ReverseSequenceOptions *Operator::builtin_options_as<tflite::ReverseSequenceOptions>() const {
-  return builtin_options_as_ReverseSequenceOptions();
-}
-
-template<> inline const tflite::MatrixDiagOptions *Operator::builtin_options_as<tflite::MatrixDiagOptions>() const {
-  return builtin_options_as_MatrixDiagOptions();
-}
-
-template<> inline const tflite::QuantizeOptions *Operator::builtin_options_as<tflite::QuantizeOptions>() const {
-  return builtin_options_as_QuantizeOptions();
-}
-
-template<> inline const tflite::MatrixSetDiagOptions *Operator::builtin_options_as<tflite::MatrixSetDiagOptions>() const {
-  return builtin_options_as_MatrixSetDiagOptions();
-}
-
-template<> inline const tflite::HardSwishOptions *Operator::builtin_options_as<tflite::HardSwishOptions>() const {
-  return builtin_options_as_HardSwishOptions();
-}
-
-template<> inline const tflite::IfOptions *Operator::builtin_options_as<tflite::IfOptions>() const {
-  return builtin_options_as_IfOptions();
-}
-
-template<> inline const tflite::WhileOptions *Operator::builtin_options_as<tflite::WhileOptions>() const {
-  return builtin_options_as_WhileOptions();
-}
-
-template<> inline const tflite::DepthToSpaceOptions *Operator::builtin_options_as<tflite::DepthToSpaceOptions>() const {
-  return builtin_options_as_DepthToSpaceOptions();
-}
-
-template<> inline const tflite::NonMaxSuppressionV4Options *Operator::builtin_options_as<tflite::NonMaxSuppressionV4Options>() const {
-  return builtin_options_as_NonMaxSuppressionV4Options();
-}
-
-template<> inline const tflite::NonMaxSuppressionV5Options *Operator::builtin_options_as<tflite::NonMaxSuppressionV5Options>() const {
-  return builtin_options_as_NonMaxSuppressionV5Options();
-}
-
-template<> inline const tflite::ScatterNdOptions *Operator::builtin_options_as<tflite::ScatterNdOptions>() const {
-  return builtin_options_as_ScatterNdOptions();
-}
-
-template<> inline const tflite::SelectV2Options *Operator::builtin_options_as<tflite::SelectV2Options>() const {
-  return builtin_options_as_SelectV2Options();
-}
-
-template<> inline const tflite::DensifyOptions *Operator::builtin_options_as<tflite::DensifyOptions>() const {
-  return builtin_options_as_DensifyOptions();
-}
-
-template<> inline const tflite::SegmentSumOptions *Operator::builtin_options_as<tflite::SegmentSumOptions>() const {
-  return builtin_options_as_SegmentSumOptions();
-}
-
-template<> inline const tflite::BatchMatMulOptions *Operator::builtin_options_as<tflite::BatchMatMulOptions>() const {
-  return builtin_options_as_BatchMatMulOptions();
-}
-
-template<> inline const tflite::CumsumOptions *Operator::builtin_options_as<tflite::CumsumOptions>() const {
-  return builtin_options_as_CumsumOptions();
-}
-
-template<> inline const tflite::CallOnceOptions *Operator::builtin_options_as<tflite::CallOnceOptions>() const {
-  return builtin_options_as_CallOnceOptions();
-}
-
-template<> inline const tflite::BroadcastToOptions *Operator::builtin_options_as<tflite::BroadcastToOptions>() const {
-  return builtin_options_as_BroadcastToOptions();
-}
-
-template<> inline const tflite::Rfft2dOptions *Operator::builtin_options_as<tflite::Rfft2dOptions>() const {
-  return builtin_options_as_Rfft2dOptions();
-}
-
-template<> inline const tflite::Conv3DOptions *Operator::builtin_options_as<tflite::Conv3DOptions>() const {
-  return builtin_options_as_Conv3DOptions();
-}
-
-template<> inline const tflite::HashtableOptions *Operator::builtin_options_as<tflite::HashtableOptions>() const {
-  return builtin_options_as_HashtableOptions();
-}
-
-template<> inline const tflite::HashtableFindOptions *Operator::builtin_options_as<tflite::HashtableFindOptions>() const {
-  return builtin_options_as_HashtableFindOptions();
-}
-
-template<> inline const tflite::HashtableImportOptions *Operator::builtin_options_as<tflite::HashtableImportOptions>() const {
-  return builtin_options_as_HashtableImportOptions();
-}
-
-template<> inline const tflite::HashtableSizeOptions *Operator::builtin_options_as<tflite::HashtableSizeOptions>() const {
-  return builtin_options_as_HashtableSizeOptions();
-}
-
-struct OperatorBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_opcode_index(uint32_t opcode_index) {
-    fbb_.AddElement<uint32_t>(Operator::VT_OPCODE_INDEX, opcode_index, 0);
-  }
-  void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs) {
-    fbb_.AddOffset(Operator::VT_INPUTS, inputs);
-  }
-  void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs) {
-    fbb_.AddOffset(Operator::VT_OUTPUTS, outputs);
-  }
-  void add_builtin_options_type(tflite::BuiltinOptions builtin_options_type) {
-    fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE, static_cast<uint8_t>(builtin_options_type), 0);
-  }
-  void add_builtin_options(flatbuffers::Offset<void> builtin_options) {
-    fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options);
-  }
-  void add_custom_options(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options) {
-    fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options);
-  }
-  void add_custom_options_format(tflite::CustomOptionsFormat custom_options_format) {
-    fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT, static_cast<int8_t>(custom_options_format), 0);
-  }
-  void add_mutating_variable_inputs(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs) {
-    fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
-  }
-  void add_intermediates(flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates) {
-    fbb_.AddOffset(Operator::VT_INTERMEDIATES, intermediates);
-  }
-  explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  OperatorBuilder &operator=(const OperatorBuilder &);
-  flatbuffers::Offset<Operator> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Operator>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<Operator> CreateOperator(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    uint32_t opcode_index = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
-    tflite::BuiltinOptions builtin_options_type = tflite::BuiltinOptions_NONE,
-    flatbuffers::Offset<void> builtin_options = 0,
-    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
-    tflite::CustomOptionsFormat custom_options_format = tflite::CustomOptionsFormat_FLEXBUFFERS,
-    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0) {
-  OperatorBuilder builder_(_fbb);
-  builder_.add_intermediates(intermediates);
-  builder_.add_mutating_variable_inputs(mutating_variable_inputs);
-  builder_.add_custom_options(custom_options);
-  builder_.add_builtin_options(builtin_options);
-  builder_.add_outputs(outputs);
-  builder_.add_inputs(inputs);
-  builder_.add_opcode_index(opcode_index);
-  builder_.add_custom_options_format(custom_options_format);
-  builder_.add_builtin_options_type(builtin_options_type);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Operator> CreateOperatorDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    uint32_t opcode_index = 0,
-    const std::vector<int32_t> *inputs = nullptr,
-    const std::vector<int32_t> *outputs = nullptr,
-    tflite::BuiltinOptions builtin_options_type = tflite::BuiltinOptions_NONE,
-    flatbuffers::Offset<void> builtin_options = 0,
-    const std::vector<uint8_t> *custom_options = nullptr,
-    tflite::CustomOptionsFormat custom_options_format = tflite::CustomOptionsFormat_FLEXBUFFERS,
-    const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
-    const std::vector<int32_t> *intermediates = nullptr) {
-  auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
-  auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
-  auto custom_options__ = custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0;
-  auto mutating_variable_inputs__ = mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0;
-  auto intermediates__ = intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0;
-  return tflite::CreateOperator(
-      _fbb,
-      opcode_index,
-      inputs__,
-      outputs__,
-      builtin_options_type,
-      builtin_options,
-      custom_options__,
-      custom_options_format,
-      mutating_variable_inputs__,
-      intermediates__);
-}
-
-flatbuffers::Offset<Operator> CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SubGraphT : public flatbuffers::NativeTable {
-  typedef SubGraph TableType;
-  std::vector<std::unique_ptr<tflite::TensorT>> tensors;
-  std::vector<int32_t> inputs;
-  std::vector<int32_t> outputs;
-  std::vector<std::unique_ptr<tflite::OperatorT>> operators;
-  std::string name;
-  SubGraphT() {
-  }
-};
-
-struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SubGraphT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_TENSORS = 4,
-    VT_INPUTS = 6,
-    VT_OUTPUTS = 8,
-    VT_OPERATORS = 10,
-    VT_NAME = 12
-  };
-  const flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>> *tensors() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>> *>(VT_TENSORS);
-  }
-  const flatbuffers::Vector<int32_t> *inputs() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
-  }
-  const flatbuffers::Vector<int32_t> *outputs() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>> *operators() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>> *>(VT_OPERATORS);
-  }
-  const flatbuffers::String *name() const {
-    return GetPointer<const flatbuffers::String *>(VT_NAME);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_TENSORS) &&
-           verifier.VerifyVector(tensors()) &&
-           verifier.VerifyVectorOfTables(tensors()) &&
-           VerifyOffset(verifier, VT_INPUTS) &&
-           verifier.VerifyVector(inputs()) &&
-           VerifyOffset(verifier, VT_OUTPUTS) &&
-           verifier.VerifyVector(outputs()) &&
-           VerifyOffset(verifier, VT_OPERATORS) &&
-           verifier.VerifyVector(operators()) &&
-           verifier.VerifyVectorOfTables(operators()) &&
-           VerifyOffset(verifier, VT_NAME) &&
-           verifier.VerifyString(name()) &&
-           verifier.EndTable();
-  }
-  SubGraphT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SubGraph> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SubGraphBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>> tensors) {
-    fbb_.AddOffset(SubGraph::VT_TENSORS, tensors);
-  }
-  void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs) {
-    fbb_.AddOffset(SubGraph::VT_INPUTS, inputs);
-  }
-  void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs) {
-    fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
-  }
-  void add_operators(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>> operators) {
-    fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
-  }
-  void add_name(flatbuffers::Offset<flatbuffers::String> name) {
-    fbb_.AddOffset(SubGraph::VT_NAME, name);
-  }
-  explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SubGraphBuilder &operator=(const SubGraphBuilder &);
-  flatbuffers::Offset<SubGraph> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SubGraph>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SubGraph> CreateSubGraph(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>> tensors = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>> operators = 0,
-    flatbuffers::Offset<flatbuffers::String> name = 0) {
-  SubGraphBuilder builder_(_fbb);
-  builder_.add_name(name);
-  builder_.add_operators(operators);
-  builder_.add_outputs(outputs);
-  builder_.add_inputs(inputs);
-  builder_.add_tensors(tensors);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<flatbuffers::Offset<tflite::Tensor>> *tensors = nullptr,
-    const std::vector<int32_t> *inputs = nullptr,
-    const std::vector<int32_t> *outputs = nullptr,
-    const std::vector<flatbuffers::Offset<tflite::Operator>> *operators = nullptr,
-    const char *name = nullptr) {
-  auto tensors__ = tensors ? _fbb.CreateVector<flatbuffers::Offset<tflite::Tensor>>(*tensors) : 0;
-  auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
-  auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
-  auto operators__ = operators ? _fbb.CreateVector<flatbuffers::Offset<tflite::Operator>>(*operators) : 0;
-  auto name__ = name ? _fbb.CreateString(name) : 0;
-  return tflite::CreateSubGraph(
-      _fbb,
-      tensors__,
-      inputs__,
-      outputs__,
-      operators__,
-      name__);
-}
-
-flatbuffers::Offset<SubGraph> CreateSubGraph(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct BufferT : public flatbuffers::NativeTable {
-  typedef Buffer TableType;
-  std::vector<uint8_t> data;
-  BufferT() {
-  }
-};
-
-struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef BufferT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_DATA = 4
-  };
-  const flatbuffers::Vector<uint8_t> *data() const {
-    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_DATA);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_DATA) &&
-           verifier.VerifyVector(data()) &&
-           verifier.EndTable();
-  }
-  BufferT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Buffer> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BufferT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct BufferBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data) {
-    fbb_.AddOffset(Buffer::VT_DATA, data);
-  }
-  explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  BufferBuilder &operator=(const BufferBuilder &);
-  flatbuffers::Offset<Buffer> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Buffer>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<Buffer> CreateBuffer(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data = 0) {
-  BufferBuilder builder_(_fbb);
-  builder_.add_data(data);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Buffer> CreateBufferDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<uint8_t> *data = nullptr) {
-  if (data) { _fbb.ForceVectorAlignment(data->size(), sizeof(uint8_t), 16); }
-  auto data__ = data ? _fbb.CreateVector<uint8_t>(*data) : 0;
-  return tflite::CreateBuffer(
-      _fbb,
-      data__);
-}
-
-flatbuffers::Offset<Buffer> CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct MetadataT : public flatbuffers::NativeTable {
-  typedef Metadata TableType;
-  std::string name;
-  uint32_t buffer;
-  MetadataT()
-      : buffer(0) {
-  }
-};
-
-struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef MetadataT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_NAME = 4,
-    VT_BUFFER = 6
-  };
-  const flatbuffers::String *name() const {
-    return GetPointer<const flatbuffers::String *>(VT_NAME);
-  }
-  uint32_t buffer() const {
-    return GetField<uint32_t>(VT_BUFFER, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_NAME) &&
-           verifier.VerifyString(name()) &&
-           VerifyField<uint32_t>(verifier, VT_BUFFER) &&
-           verifier.EndTable();
-  }
-  MetadataT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(MetadataT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Metadata> Pack(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct MetadataBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_name(flatbuffers::Offset<flatbuffers::String> name) {
-    fbb_.AddOffset(Metadata::VT_NAME, name);
-  }
-  void add_buffer(uint32_t buffer) {
-    fbb_.AddElement<uint32_t>(Metadata::VT_BUFFER, buffer, 0);
-  }
-  explicit MetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  MetadataBuilder &operator=(const MetadataBuilder &);
-  flatbuffers::Offset<Metadata> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Metadata>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<Metadata> CreateMetadata(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::String> name = 0,
-    uint32_t buffer = 0) {
-  MetadataBuilder builder_(_fbb);
-  builder_.add_buffer(buffer);
-  builder_.add_name(name);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Metadata> CreateMetadataDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const char *name = nullptr,
-    uint32_t buffer = 0) {
-  auto name__ = name ? _fbb.CreateString(name) : 0;
-  return tflite::CreateMetadata(
-      _fbb,
-      name__,
-      buffer);
-}
-
-flatbuffers::Offset<Metadata> CreateMetadata(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct TensorMapT : public flatbuffers::NativeTable {
-  typedef TensorMap TableType;
-  std::string name;
-  uint32_t tensor_index;
-  TensorMapT()
-      : tensor_index(0) {
-  }
-};
-
-struct TensorMap FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef TensorMapT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_NAME = 4,
-    VT_TENSOR_INDEX = 6
-  };
-  const flatbuffers::String *name() const {
-    return GetPointer<const flatbuffers::String *>(VT_NAME);
-  }
-  uint32_t tensor_index() const {
-    return GetField<uint32_t>(VT_TENSOR_INDEX, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_NAME) &&
-           verifier.VerifyString(name()) &&
-           VerifyField<uint32_t>(verifier, VT_TENSOR_INDEX) &&
-           verifier.EndTable();
-  }
-  TensorMapT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(TensorMapT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<TensorMap> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct TensorMapBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_name(flatbuffers::Offset<flatbuffers::String> name) {
-    fbb_.AddOffset(TensorMap::VT_NAME, name);
-  }
-  void add_tensor_index(uint32_t tensor_index) {
-    fbb_.AddElement<uint32_t>(TensorMap::VT_TENSOR_INDEX, tensor_index, 0);
-  }
-  explicit TensorMapBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  TensorMapBuilder &operator=(const TensorMapBuilder &);
-  flatbuffers::Offset<TensorMap> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<TensorMap>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<TensorMap> CreateTensorMap(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::String> name = 0,
-    uint32_t tensor_index = 0) {
-  TensorMapBuilder builder_(_fbb);
-  builder_.add_tensor_index(tensor_index);
-  builder_.add_name(name);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<TensorMap> CreateTensorMapDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const char *name = nullptr,
-    uint32_t tensor_index = 0) {
-  auto name__ = name ? _fbb.CreateString(name) : 0;
-  return tflite::CreateTensorMap(
-      _fbb,
-      name__,
-      tensor_index);
-}
-
-flatbuffers::Offset<TensorMap> CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SignatureDefT : public flatbuffers::NativeTable {
-  typedef SignatureDef TableType;
-  std::vector<std::unique_ptr<tflite::TensorMapT>> inputs;
-  std::vector<std::unique_ptr<tflite::TensorMapT>> outputs;
-  std::string method_name;
-  std::string key;
-  SignatureDefT() {
-  }
-};
-
-struct SignatureDef FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SignatureDefT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_INPUTS = 4,
-    VT_OUTPUTS = 6,
-    VT_METHOD_NAME = 8,
-    VT_KEY = 10
-  };
-  const flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>> *inputs() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>> *>(VT_INPUTS);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>> *outputs() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>> *>(VT_OUTPUTS);
-  }
-  const flatbuffers::String *method_name() const {
-    return GetPointer<const flatbuffers::String *>(VT_METHOD_NAME);
-  }
-  const flatbuffers::String *key() const {
-    return GetPointer<const flatbuffers::String *>(VT_KEY);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_INPUTS) &&
-           verifier.VerifyVector(inputs()) &&
-           verifier.VerifyVectorOfTables(inputs()) &&
-           VerifyOffset(verifier, VT_OUTPUTS) &&
-           verifier.VerifyVector(outputs()) &&
-           verifier.VerifyVectorOfTables(outputs()) &&
-           VerifyOffset(verifier, VT_METHOD_NAME) &&
-           verifier.VerifyString(method_name()) &&
-           VerifyOffset(verifier, VT_KEY) &&
-           verifier.VerifyString(key()) &&
-           verifier.EndTable();
-  }
-  SignatureDefT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SignatureDefT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SignatureDef> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SignatureDefBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_inputs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>>> inputs) {
-    fbb_.AddOffset(SignatureDef::VT_INPUTS, inputs);
-  }
-  void add_outputs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>>> outputs) {
-    fbb_.AddOffset(SignatureDef::VT_OUTPUTS, outputs);
-  }
-  void add_method_name(flatbuffers::Offset<flatbuffers::String> method_name) {
-    fbb_.AddOffset(SignatureDef::VT_METHOD_NAME, method_name);
-  }
-  void add_key(flatbuffers::Offset<flatbuffers::String> key) {
-    fbb_.AddOffset(SignatureDef::VT_KEY, key);
-  }
-  explicit SignatureDefBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SignatureDefBuilder &operator=(const SignatureDefBuilder &);
-  flatbuffers::Offset<SignatureDef> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SignatureDef>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SignatureDef> CreateSignatureDef(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>>> inputs = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>>> outputs = 0,
-    flatbuffers::Offset<flatbuffers::String> method_name = 0,
-    flatbuffers::Offset<flatbuffers::String> key = 0) {
-  SignatureDefBuilder builder_(_fbb);
-  builder_.add_key(key);
-  builder_.add_method_name(method_name);
-  builder_.add_outputs(outputs);
-  builder_.add_inputs(inputs);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<SignatureDef> CreateSignatureDefDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<flatbuffers::Offset<tflite::TensorMap>> *inputs = nullptr,
-    const std::vector<flatbuffers::Offset<tflite::TensorMap>> *outputs = nullptr,
-    const char *method_name = nullptr,
-    const char *key = nullptr) {
-  auto inputs__ = inputs ? _fbb.CreateVector<flatbuffers::Offset<tflite::TensorMap>>(*inputs) : 0;
-  auto outputs__ = outputs ? _fbb.CreateVector<flatbuffers::Offset<tflite::TensorMap>>(*outputs) : 0;
-  auto method_name__ = method_name ? _fbb.CreateString(method_name) : 0;
-  auto key__ = key ? _fbb.CreateString(key) : 0;
-  return tflite::CreateSignatureDef(
-      _fbb,
-      inputs__,
-      outputs__,
-      method_name__,
-      key__);
-}
-
-flatbuffers::Offset<SignatureDef> CreateSignatureDef(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ModelT : public flatbuffers::NativeTable {
-  typedef Model TableType;
-  uint32_t version;
-  std::vector<std::unique_ptr<tflite::OperatorCodeT>> operator_codes;
-  std::vector<std::unique_ptr<tflite::SubGraphT>> subgraphs;
-  std::string description;
-  std::vector<std::unique_ptr<tflite::BufferT>> buffers;
-  std::vector<int32_t> metadata_buffer;
-  std::vector<std::unique_ptr<tflite::MetadataT>> metadata;
-  std::vector<std::unique_ptr<tflite::SignatureDefT>> signature_defs;
-  ModelT()
-      : version(0) {
-  }
-};
-
-struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ModelT NativeTableType;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
-    VT_VERSION = 4,
-    VT_OPERATOR_CODES = 6,
-    VT_SUBGRAPHS = 8,
-    VT_DESCRIPTION = 10,
-    VT_BUFFERS = 12,
-    VT_METADATA_BUFFER = 14,
-    VT_METADATA = 16,
-    VT_SIGNATURE_DEFS = 18
-  };
-  uint32_t version() const {
-    return GetField<uint32_t>(VT_VERSION, 0);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>> *operator_codes() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>> *>(VT_OPERATOR_CODES);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<tflite::SubGraph>> *subgraphs() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::SubGraph>> *>(VT_SUBGRAPHS);
-  }
-  const flatbuffers::String *description() const {
-    return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>> *buffers() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>> *>(VT_BUFFERS);
-  }
-  const flatbuffers::Vector<int32_t> *metadata_buffer() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_METADATA_BUFFER);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<tflite::Metadata>> *metadata() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::Metadata>> *>(VT_METADATA);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<tflite::SignatureDef>> *signature_defs() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::SignatureDef>> *>(VT_SIGNATURE_DEFS);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<uint32_t>(verifier, VT_VERSION) &&
-           VerifyOffset(verifier, VT_OPERATOR_CODES) &&
-           verifier.VerifyVector(operator_codes()) &&
-           verifier.VerifyVectorOfTables(operator_codes()) &&
-           VerifyOffset(verifier, VT_SUBGRAPHS) &&
-           verifier.VerifyVector(subgraphs()) &&
-           verifier.VerifyVectorOfTables(subgraphs()) &&
-           VerifyOffset(verifier, VT_DESCRIPTION) &&
-           verifier.VerifyString(description()) &&
-           VerifyOffset(verifier, VT_BUFFERS) &&
-           verifier.VerifyVector(buffers()) &&
-           verifier.VerifyVectorOfTables(buffers()) &&
-           VerifyOffset(verifier, VT_METADATA_BUFFER) &&
-           verifier.VerifyVector(metadata_buffer()) &&
-           VerifyOffset(verifier, VT_METADATA) &&
-           verifier.VerifyVector(metadata()) &&
-           verifier.VerifyVectorOfTables(metadata()) &&
-           VerifyOffset(verifier, VT_SIGNATURE_DEFS) &&
-           verifier.VerifyVector(signature_defs()) &&
-           verifier.VerifyVectorOfTables(signature_defs()) &&
-           verifier.EndTable();
-  }
-  ModelT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Model> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ModelBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_version(uint32_t version) {
-    fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0);
-  }
-  void add_operator_codes(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>>> operator_codes) {
-    fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
-  }
-  void add_subgraphs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::SubGraph>>> subgraphs) {
-    fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
-  }
-  void add_description(flatbuffers::Offset<flatbuffers::String> description) {
-    fbb_.AddOffset(Model::VT_DESCRIPTION, description);
-  }
-  void add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>> buffers) {
-    fbb_.AddOffset(Model::VT_BUFFERS, buffers);
-  }
-  void add_metadata_buffer(flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer) {
-    fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer);
-  }
-  void add_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Metadata>>> metadata) {
-    fbb_.AddOffset(Model::VT_METADATA, metadata);
-  }
-  void add_signature_defs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::SignatureDef>>> signature_defs) {
-    fbb_.AddOffset(Model::VT_SIGNATURE_DEFS, signature_defs);
-  }
-  explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ModelBuilder &operator=(const ModelBuilder &);
-  flatbuffers::Offset<Model> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Model>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<Model> CreateModel(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    uint32_t version = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>>> operator_codes = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::SubGraph>>> subgraphs = 0,
-    flatbuffers::Offset<flatbuffers::String> description = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>> buffers = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Metadata>>> metadata = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::SignatureDef>>> signature_defs = 0) {
-  ModelBuilder builder_(_fbb);
-  builder_.add_signature_defs(signature_defs);
-  builder_.add_metadata(metadata);
-  builder_.add_metadata_buffer(metadata_buffer);
-  builder_.add_buffers(buffers);
-  builder_.add_description(description);
-  builder_.add_subgraphs(subgraphs);
-  builder_.add_operator_codes(operator_codes);
-  builder_.add_version(version);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Model> CreateModelDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    uint32_t version = 0,
-    const std::vector<flatbuffers::Offset<tflite::OperatorCode>> *operator_codes = nullptr,
-    const std::vector<flatbuffers::Offset<tflite::SubGraph>> *subgraphs = nullptr,
-    const char *description = nullptr,
-    const std::vector<flatbuffers::Offset<tflite::Buffer>> *buffers = nullptr,
-    const std::vector<int32_t> *metadata_buffer = nullptr,
-    const std::vector<flatbuffers::Offset<tflite::Metadata>> *metadata = nullptr,
-    const std::vector<flatbuffers::Offset<tflite::SignatureDef>> *signature_defs = nullptr) {
-  auto operator_codes__ = operator_codes ? _fbb.CreateVector<flatbuffers::Offset<tflite::OperatorCode>>(*operator_codes) : 0;
-  auto subgraphs__ = subgraphs ? _fbb.CreateVector<flatbuffers::Offset<tflite::SubGraph>>(*subgraphs) : 0;
-  auto description__ = description ? _fbb.CreateString(description) : 0;
-  auto buffers__ = buffers ? _fbb.CreateVector<flatbuffers::Offset<tflite::Buffer>>(*buffers) : 0;
-  auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0;
-  auto metadata__ = metadata ? _fbb.CreateVector<flatbuffers::Offset<tflite::Metadata>>(*metadata) : 0;
-  auto signature_defs__ = signature_defs ? _fbb.CreateVector<flatbuffers::Offset<tflite::SignatureDef>>(*signature_defs) : 0;
-  return tflite::CreateModel(
-      _fbb,
-      version,
-      operator_codes__,
-      subgraphs__,
-      description__,
-      buffers__,
-      metadata_buffer__,
-      metadata__,
-      signature_defs__);
-}
-
-flatbuffers::Offset<Model> CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-inline CustomQuantizationT *CustomQuantization::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new CustomQuantizationT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void CustomQuantization::UnPackTo(CustomQuantizationT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = custom(); if (_e) { _o->custom.resize(_e->size()); std::copy(_e->begin(), _e->end(), _o->custom.begin()); } }
-}
-
-inline flatbuffers::Offset<CustomQuantization> CustomQuantization::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateCustomQuantization(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<CustomQuantization> CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CustomQuantizationT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  _fbb.ForceVectorAlignment(_o->custom.size(), sizeof(uint8_t), 16);
-  auto _custom = _o->custom.size() ? _fbb.CreateVector(_o->custom) : 0;
-  return tflite::CreateCustomQuantization(
-      _fbb,
-      _custom);
-}
-
-inline QuantizationParametersT *QuantizationParameters::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new QuantizationParametersT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void QuantizationParameters::UnPackTo(QuantizationParametersT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = min(); if (_e) { _o->min.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->min[_i] = _e->Get(_i); } } }
-  { auto _e = max(); if (_e) { _o->max.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->max[_i] = _e->Get(_i); } } }
-  { auto _e = scale(); if (_e) { _o->scale.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->scale[_i] = _e->Get(_i); } } }
-  { auto _e = zero_point(); if (_e) { _o->zero_point.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->zero_point[_i] = _e->Get(_i); } } }
-  { auto _e = details_type(); _o->details.type = _e; }
-  { auto _e = details(); if (_e) _o->details.value = tflite::QuantizationDetailsUnion::UnPack(_e, details_type(), _resolver); }
-  { auto _e = quantized_dimension(); _o->quantized_dimension = _e; }
-}
-
-inline flatbuffers::Offset<QuantizationParameters> QuantizationParameters::Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateQuantizationParameters(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const QuantizationParametersT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _min = _o->min.size() ? _fbb.CreateVector(_o->min) : 0;
-  auto _max = _o->max.size() ? _fbb.CreateVector(_o->max) : 0;
-  auto _scale = _o->scale.size() ? _fbb.CreateVector(_o->scale) : 0;
-  auto _zero_point = _o->zero_point.size() ? _fbb.CreateVector(_o->zero_point) : 0;
-  auto _details_type = _o->details.type;
-  auto _details = _o->details.Pack(_fbb);
-  auto _quantized_dimension = _o->quantized_dimension;
-  return tflite::CreateQuantizationParameters(
-      _fbb,
-      _min,
-      _max,
-      _scale,
-      _zero_point,
-      _details_type,
-      _details,
-      _quantized_dimension);
-}
-
-inline Int32VectorT *Int32Vector::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new Int32VectorT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void Int32Vector::UnPackTo(Int32VectorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = values(); if (_e) { _o->values.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->values[_i] = _e->Get(_i); } } }
-}
-
-inline flatbuffers::Offset<Int32Vector> Int32Vector::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateInt32Vector(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<Int32Vector> CreateInt32Vector(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Int32VectorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _values = _o->values.size() ? _fbb.CreateVector(_o->values) : 0;
-  return tflite::CreateInt32Vector(
-      _fbb,
-      _values);
-}
-
-inline Uint16VectorT *Uint16Vector::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new Uint16VectorT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void Uint16Vector::UnPackTo(Uint16VectorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = values(); if (_e) { _o->values.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->values[_i] = _e->Get(_i); } } }
-}
-
-inline flatbuffers::Offset<Uint16Vector> Uint16Vector::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateUint16Vector(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<Uint16Vector> CreateUint16Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Uint16VectorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  _fbb.ForceVectorAlignment(_o->values.size(), sizeof(uint16_t), 4);
-  auto _values = _o->values.size() ? _fbb.CreateVector(_o->values) : 0;
-  return tflite::CreateUint16Vector(
-      _fbb,
-      _values);
-}
-
-inline Uint8VectorT *Uint8Vector::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new Uint8VectorT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void Uint8Vector::UnPackTo(Uint8VectorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = values(); if (_e) { _o->values.resize(_e->size()); std::copy(_e->begin(), _e->end(), _o->values.begin()); } }
-}
-
-inline flatbuffers::Offset<Uint8Vector> Uint8Vector::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateUint8Vector(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<Uint8Vector> CreateUint8Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Uint8VectorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  _fbb.ForceVectorAlignment(_o->values.size(), sizeof(uint8_t), 4);
-  auto _values = _o->values.size() ? _fbb.CreateVector(_o->values) : 0;
-  return tflite::CreateUint8Vector(
-      _fbb,
-      _values);
-}
-
-inline DimensionMetadataT *DimensionMetadata::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new DimensionMetadataT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void DimensionMetadata::UnPackTo(DimensionMetadataT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = format(); _o->format = _e; }
-  { auto _e = dense_size(); _o->dense_size = _e; }
-  { auto _e = array_segments_type(); _o->array_segments.type = _e; }
-  { auto _e = array_segments(); if (_e) _o->array_segments.value = tflite::SparseIndexVectorUnion::UnPack(_e, array_segments_type(), _resolver); }
-  { auto _e = array_indices_type(); _o->array_indices.type = _e; }
-  { auto _e = array_indices(); if (_e) _o->array_indices.value = tflite::SparseIndexVectorUnion::UnPack(_e, array_indices_type(), _resolver); }
-}
-
-inline flatbuffers::Offset<DimensionMetadata> DimensionMetadata::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateDimensionMetadata(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DimensionMetadataT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _format = _o->format;
-  auto _dense_size = _o->dense_size;
-  auto _array_segments_type = _o->array_segments.type;
-  auto _array_segments = _o->array_segments.Pack(_fbb);
-  auto _array_indices_type = _o->array_indices.type;
-  auto _array_indices = _o->array_indices.Pack(_fbb);
-  return tflite::CreateDimensionMetadata(
-      _fbb,
-      _format,
-      _dense_size,
-      _array_segments_type,
-      _array_segments,
-      _array_indices_type,
-      _array_indices);
-}
-
-inline SparsityParametersT *SparsityParameters::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SparsityParametersT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SparsityParameters::UnPackTo(SparsityParametersT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = traversal_order(); if (_e) { _o->traversal_order.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->traversal_order[_i] = _e->Get(_i); } } }
-  { auto _e = block_map(); if (_e) { _o->block_map.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->block_map[_i] = _e->Get(_i); } } }
-  { auto _e = dim_metadata(); if (_e) { _o->dim_metadata.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->dim_metadata[_i] = std::unique_ptr<tflite::DimensionMetadataT>(_e->Get(_i)->UnPack(_resolver)); } } }
-}
-
-inline flatbuffers::Offset<SparsityParameters> SparsityParameters::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSparsityParameters(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SparsityParametersT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _traversal_order = _o->traversal_order.size() ? _fbb.CreateVector(_o->traversal_order) : 0;
-  auto _block_map = _o->block_map.size() ? _fbb.CreateVector(_o->block_map) : 0;
-  auto _dim_metadata = _o->dim_metadata.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::DimensionMetadata>> (_o->dim_metadata.size(), [](size_t i, _VectorArgs *__va) { return CreateDimensionMetadata(*__va->__fbb, __va->__o->dim_metadata[i].get(), __va->__rehasher); }, &_va ) : 0;
-  return tflite::CreateSparsityParameters(
-      _fbb,
-      _traversal_order,
-      _block_map,
-      _dim_metadata);
-}
-
-inline TensorT *Tensor::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new TensorT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void Tensor::UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = shape(); if (_e) { _o->shape.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->shape[_i] = _e->Get(_i); } } }
-  { auto _e = type(); _o->type = _e; }
-  { auto _e = buffer(); _o->buffer = _e; }
-  { auto _e = name(); if (_e) _o->name = _e->str(); }
-  { auto _e = quantization(); if (_e) _o->quantization = std::unique_ptr<tflite::QuantizationParametersT>(_e->UnPack(_resolver)); }
-  { auto _e = is_variable(); _o->is_variable = _e; }
-  { auto _e = sparsity(); if (_e) _o->sparsity = std::unique_ptr<tflite::SparsityParametersT>(_e->UnPack(_resolver)); }
-  { auto _e = shape_signature(); if (_e) { _o->shape_signature.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->shape_signature[_i] = _e->Get(_i); } } }
-}
-
-inline flatbuffers::Offset<Tensor> Tensor::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateTensor(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<Tensor> CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TensorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _shape = _o->shape.size() ? _fbb.CreateVector(_o->shape) : 0;
-  auto _type = _o->type;
-  auto _buffer = _o->buffer;
-  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
-  auto _quantization = _o->quantization ? CreateQuantizationParameters(_fbb, _o->quantization.get(), _rehasher) : 0;
-  auto _is_variable = _o->is_variable;
-  auto _sparsity = _o->sparsity ? CreateSparsityParameters(_fbb, _o->sparsity.get(), _rehasher) : 0;
-  auto _shape_signature = _o->shape_signature.size() ? _fbb.CreateVector(_o->shape_signature) : 0;
-  return tflite::CreateTensor(
-      _fbb,
-      _shape,
-      _type,
-      _buffer,
-      _name,
-      _quantization,
-      _is_variable,
-      _sparsity,
-      _shape_signature);
-}
-
-inline Conv2DOptionsT *Conv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new Conv2DOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void Conv2DOptions::UnPackTo(Conv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = padding(); _o->padding = _e; }
-  { auto _e = stride_w(); _o->stride_w = _e; }
-  { auto _e = stride_h(); _o->stride_h = _e; }
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-  { auto _e = dilation_w_factor(); _o->dilation_w_factor = _e; }
-  { auto _e = dilation_h_factor(); _o->dilation_h_factor = _e; }
-}
-
-inline flatbuffers::Offset<Conv2DOptions> Conv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateConv2DOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Conv2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _padding = _o->padding;
-  auto _stride_w = _o->stride_w;
-  auto _stride_h = _o->stride_h;
-  auto _fused_activation_function = _o->fused_activation_function;
-  auto _dilation_w_factor = _o->dilation_w_factor;
-  auto _dilation_h_factor = _o->dilation_h_factor;
-  return tflite::CreateConv2DOptions(
-      _fbb,
-      _padding,
-      _stride_w,
-      _stride_h,
-      _fused_activation_function,
-      _dilation_w_factor,
-      _dilation_h_factor);
-}
-
-inline Conv3DOptionsT *Conv3DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new Conv3DOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void Conv3DOptions::UnPackTo(Conv3DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = padding(); _o->padding = _e; }
-  { auto _e = stride_d(); _o->stride_d = _e; }
-  { auto _e = stride_w(); _o->stride_w = _e; }
-  { auto _e = stride_h(); _o->stride_h = _e; }
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-  { auto _e = dilation_d_factor(); _o->dilation_d_factor = _e; }
-  { auto _e = dilation_w_factor(); _o->dilation_w_factor = _e; }
-  { auto _e = dilation_h_factor(); _o->dilation_h_factor = _e; }
-}
-
-inline flatbuffers::Offset<Conv3DOptions> Conv3DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateConv3DOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Conv3DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _padding = _o->padding;
-  auto _stride_d = _o->stride_d;
-  auto _stride_w = _o->stride_w;
-  auto _stride_h = _o->stride_h;
-  auto _fused_activation_function = _o->fused_activation_function;
-  auto _dilation_d_factor = _o->dilation_d_factor;
-  auto _dilation_w_factor = _o->dilation_w_factor;
-  auto _dilation_h_factor = _o->dilation_h_factor;
-  return tflite::CreateConv3DOptions(
-      _fbb,
-      _padding,
-      _stride_d,
-      _stride_w,
-      _stride_h,
-      _fused_activation_function,
-      _dilation_d_factor,
-      _dilation_w_factor,
-      _dilation_h_factor);
-}
-
-inline Pool2DOptionsT *Pool2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new Pool2DOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void Pool2DOptions::UnPackTo(Pool2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = padding(); _o->padding = _e; }
-  { auto _e = stride_w(); _o->stride_w = _e; }
-  { auto _e = stride_h(); _o->stride_h = _e; }
-  { auto _e = filter_width(); _o->filter_width = _e; }
-  { auto _e = filter_height(); _o->filter_height = _e; }
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-}
-
-inline flatbuffers::Offset<Pool2DOptions> Pool2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreatePool2DOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Pool2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _padding = _o->padding;
-  auto _stride_w = _o->stride_w;
-  auto _stride_h = _o->stride_h;
-  auto _filter_width = _o->filter_width;
-  auto _filter_height = _o->filter_height;
-  auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreatePool2DOptions(
-      _fbb,
-      _padding,
-      _stride_w,
-      _stride_h,
-      _filter_width,
-      _filter_height,
-      _fused_activation_function);
-}
-
-inline DepthwiseConv2DOptionsT *DepthwiseConv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new DepthwiseConv2DOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void DepthwiseConv2DOptions::UnPackTo(DepthwiseConv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = padding(); _o->padding = _e; }
-  { auto _e = stride_w(); _o->stride_w = _e; }
-  { auto _e = stride_h(); _o->stride_h = _e; }
-  { auto _e = depth_multiplier(); _o->depth_multiplier = _e; }
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-  { auto _e = dilation_w_factor(); _o->dilation_w_factor = _e; }
-  { auto _e = dilation_h_factor(); _o->dilation_h_factor = _e; }
-}
-
-inline flatbuffers::Offset<DepthwiseConv2DOptions> DepthwiseConv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateDepthwiseConv2DOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DepthwiseConv2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _padding = _o->padding;
-  auto _stride_w = _o->stride_w;
-  auto _stride_h = _o->stride_h;
-  auto _depth_multiplier = _o->depth_multiplier;
-  auto _fused_activation_function = _o->fused_activation_function;
-  auto _dilation_w_factor = _o->dilation_w_factor;
-  auto _dilation_h_factor = _o->dilation_h_factor;
-  return tflite::CreateDepthwiseConv2DOptions(
-      _fbb,
-      _padding,
-      _stride_w,
-      _stride_h,
-      _depth_multiplier,
-      _fused_activation_function,
-      _dilation_w_factor,
-      _dilation_h_factor);
-}
-
-inline ConcatEmbeddingsOptionsT *ConcatEmbeddingsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ConcatEmbeddingsOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ConcatEmbeddingsOptions::UnPackTo(ConcatEmbeddingsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = num_channels(); _o->num_channels = _e; }
-  { auto _e = num_columns_per_channel(); if (_e) { _o->num_columns_per_channel.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->num_columns_per_channel[_i] = _e->Get(_i); } } }
-  { auto _e = embedding_dim_per_channel(); if (_e) { _o->embedding_dim_per_channel.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->embedding_dim_per_channel[_i] = _e->Get(_i); } } }
-}
-
-inline flatbuffers::Offset<ConcatEmbeddingsOptions> ConcatEmbeddingsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateConcatEmbeddingsOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ConcatEmbeddingsOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _num_channels = _o->num_channels;
-  auto _num_columns_per_channel = _o->num_columns_per_channel.size() ? _fbb.CreateVector(_o->num_columns_per_channel) : 0;
-  auto _embedding_dim_per_channel = _o->embedding_dim_per_channel.size() ? _fbb.CreateVector(_o->embedding_dim_per_channel) : 0;
-  return tflite::CreateConcatEmbeddingsOptions(
-      _fbb,
-      _num_channels,
-      _num_columns_per_channel,
-      _embedding_dim_per_channel);
-}
-
-inline LSHProjectionOptionsT *LSHProjectionOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new LSHProjectionOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void LSHProjectionOptions::UnPackTo(LSHProjectionOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = type(); _o->type = _e; }
-}
-
-inline flatbuffers::Offset<LSHProjectionOptions> LSHProjectionOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateLSHProjectionOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LSHProjectionOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _type = _o->type;
-  return tflite::CreateLSHProjectionOptions(
-      _fbb,
-      _type);
-}
-
-inline SVDFOptionsT *SVDFOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SVDFOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SVDFOptions::UnPackTo(SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = rank(); _o->rank = _e; }
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
-}
-
-inline flatbuffers::Offset<SVDFOptions> SVDFOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSVDFOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SVDFOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _rank = _o->rank;
-  auto _fused_activation_function = _o->fused_activation_function;
-  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
-  return tflite::CreateSVDFOptions(
-      _fbb,
-      _rank,
-      _fused_activation_function,
-      _asymmetric_quantize_inputs);
-}
-
-inline RNNOptionsT *RNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new RNNOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void RNNOptions::UnPackTo(RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
-}
-
-inline flatbuffers::Offset<RNNOptions> RNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateRNNOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _fused_activation_function = _o->fused_activation_function;
-  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
-  return tflite::CreateRNNOptions(
-      _fbb,
-      _fused_activation_function,
-      _asymmetric_quantize_inputs);
-}
-
-inline SequenceRNNOptionsT *SequenceRNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SequenceRNNOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SequenceRNNOptions::UnPackTo(SequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = time_major(); _o->time_major = _e; }
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
-}
-
-inline flatbuffers::Offset<SequenceRNNOptions> SequenceRNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSequenceRNNOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SequenceRNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _time_major = _o->time_major;
-  auto _fused_activation_function = _o->fused_activation_function;
-  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
-  return tflite::CreateSequenceRNNOptions(
-      _fbb,
-      _time_major,
-      _fused_activation_function,
-      _asymmetric_quantize_inputs);
-}
-
-inline BidirectionalSequenceRNNOptionsT *BidirectionalSequenceRNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new BidirectionalSequenceRNNOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void BidirectionalSequenceRNNOptions::UnPackTo(BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = time_major(); _o->time_major = _e; }
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-  { auto _e = merge_outputs(); _o->merge_outputs = _e; }
-  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
-}
-
-inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> BidirectionalSequenceRNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateBidirectionalSequenceRNNOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BidirectionalSequenceRNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _time_major = _o->time_major;
-  auto _fused_activation_function = _o->fused_activation_function;
-  auto _merge_outputs = _o->merge_outputs;
-  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
-  return tflite::CreateBidirectionalSequenceRNNOptions(
-      _fbb,
-      _time_major,
-      _fused_activation_function,
-      _merge_outputs,
-      _asymmetric_quantize_inputs);
-}
-
-inline FullyConnectedOptionsT *FullyConnectedOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new FullyConnectedOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void FullyConnectedOptions::UnPackTo(FullyConnectedOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-  { auto _e = weights_format(); _o->weights_format = _e; }
-  { auto _e = keep_num_dims(); _o->keep_num_dims = _e; }
-  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
-}
-
-inline flatbuffers::Offset<FullyConnectedOptions> FullyConnectedOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateFullyConnectedOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FullyConnectedOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _fused_activation_function = _o->fused_activation_function;
-  auto _weights_format = _o->weights_format;
-  auto _keep_num_dims = _o->keep_num_dims;
-  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
-  return tflite::CreateFullyConnectedOptions(
-      _fbb,
-      _fused_activation_function,
-      _weights_format,
-      _keep_num_dims,
-      _asymmetric_quantize_inputs);
-}
-
-inline SoftmaxOptionsT *SoftmaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SoftmaxOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SoftmaxOptions::UnPackTo(SoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = beta(); _o->beta = _e; }
-}
-
-inline flatbuffers::Offset<SoftmaxOptions> SoftmaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSoftmaxOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SoftmaxOptions> CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SoftmaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _beta = _o->beta;
-  return tflite::CreateSoftmaxOptions(
-      _fbb,
-      _beta);
-}
-
-inline ConcatenationOptionsT *ConcatenationOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ConcatenationOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ConcatenationOptions::UnPackTo(ConcatenationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = axis(); _o->axis = _e; }
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-}
-
-inline flatbuffers::Offset<ConcatenationOptions> ConcatenationOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateConcatenationOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ConcatenationOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _axis = _o->axis;
-  auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreateConcatenationOptions(
-      _fbb,
-      _axis,
-      _fused_activation_function);
-}
-
-inline AddOptionsT *AddOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new AddOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void AddOptions::UnPackTo(AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-  { auto _e = pot_scale_int16(); _o->pot_scale_int16 = _e; }
-}
-
-inline flatbuffers::Offset<AddOptions> AddOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateAddOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<AddOptions> CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AddOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _fused_activation_function = _o->fused_activation_function;
-  auto _pot_scale_int16 = _o->pot_scale_int16;
-  return tflite::CreateAddOptions(
-      _fbb,
-      _fused_activation_function,
-      _pot_scale_int16);
-}
-
-inline MulOptionsT *MulOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new MulOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void MulOptions::UnPackTo(MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-}
-
-inline flatbuffers::Offset<MulOptions> MulOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateMulOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<MulOptions> CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MulOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreateMulOptions(
-      _fbb,
-      _fused_activation_function);
-}
-
-inline L2NormOptionsT *L2NormOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new L2NormOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void L2NormOptions::UnPackTo(L2NormOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-}
-
-inline flatbuffers::Offset<L2NormOptions> L2NormOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateL2NormOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const L2NormOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreateL2NormOptions(
-      _fbb,
-      _fused_activation_function);
-}
-
-inline LocalResponseNormalizationOptionsT *LocalResponseNormalizationOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new LocalResponseNormalizationOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void LocalResponseNormalizationOptions::UnPackTo(LocalResponseNormalizationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = radius(); _o->radius = _e; }
-  { auto _e = bias(); _o->bias = _e; }
-  { auto _e = alpha(); _o->alpha = _e; }
-  { auto _e = beta(); _o->beta = _e; }
-}
-
-inline flatbuffers::Offset<LocalResponseNormalizationOptions> LocalResponseNormalizationOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateLocalResponseNormalizationOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<LocalResponseNormalizationOptions> CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LocalResponseNormalizationOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _radius = _o->radius;
-  auto _bias = _o->bias;
-  auto _alpha = _o->alpha;
-  auto _beta = _o->beta;
-  return tflite::CreateLocalResponseNormalizationOptions(
-      _fbb,
-      _radius,
-      _bias,
-      _alpha,
-      _beta);
-}
-
-inline LSTMOptionsT *LSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new LSTMOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void LSTMOptions::UnPackTo(LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-  { auto _e = cell_clip(); _o->cell_clip = _e; }
-  { auto _e = proj_clip(); _o->proj_clip = _e; }
-  { auto _e = kernel_type(); _o->kernel_type = _e; }
-  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
-}
-
-inline flatbuffers::Offset<LSTMOptions> LSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateLSTMOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _fused_activation_function = _o->fused_activation_function;
-  auto _cell_clip = _o->cell_clip;
-  auto _proj_clip = _o->proj_clip;
-  auto _kernel_type = _o->kernel_type;
-  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
-  return tflite::CreateLSTMOptions(
-      _fbb,
-      _fused_activation_function,
-      _cell_clip,
-      _proj_clip,
-      _kernel_type,
-      _asymmetric_quantize_inputs);
-}
-
-inline UnidirectionalSequenceLSTMOptionsT *UnidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new UnidirectionalSequenceLSTMOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void UnidirectionalSequenceLSTMOptions::UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-  { auto _e = cell_clip(); _o->cell_clip = _e; }
-  { auto _e = proj_clip(); _o->proj_clip = _e; }
-  { auto _e = time_major(); _o->time_major = _e; }
-  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
-}
-
-inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> UnidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateUnidirectionalSequenceLSTMOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnidirectionalSequenceLSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _fused_activation_function = _o->fused_activation_function;
-  auto _cell_clip = _o->cell_clip;
-  auto _proj_clip = _o->proj_clip;
-  auto _time_major = _o->time_major;
-  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
-  return tflite::CreateUnidirectionalSequenceLSTMOptions(
-      _fbb,
-      _fused_activation_function,
-      _cell_clip,
-      _proj_clip,
-      _time_major,
-      _asymmetric_quantize_inputs);
-}
-
-inline BidirectionalSequenceLSTMOptionsT *BidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new BidirectionalSequenceLSTMOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void BidirectionalSequenceLSTMOptions::UnPackTo(BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-  { auto _e = cell_clip(); _o->cell_clip = _e; }
-  { auto _e = proj_clip(); _o->proj_clip = _e; }
-  { auto _e = merge_outputs(); _o->merge_outputs = _e; }
-  { auto _e = time_major(); _o->time_major = _e; }
-  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
-}
-
-inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> BidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateBidirectionalSequenceLSTMOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BidirectionalSequenceLSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _fused_activation_function = _o->fused_activation_function;
-  auto _cell_clip = _o->cell_clip;
-  auto _proj_clip = _o->proj_clip;
-  auto _merge_outputs = _o->merge_outputs;
-  auto _time_major = _o->time_major;
-  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
-  return tflite::CreateBidirectionalSequenceLSTMOptions(
-      _fbb,
-      _fused_activation_function,
-      _cell_clip,
-      _proj_clip,
-      _merge_outputs,
-      _time_major,
-      _asymmetric_quantize_inputs);
-}
-
-inline ResizeBilinearOptionsT *ResizeBilinearOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ResizeBilinearOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ResizeBilinearOptions::UnPackTo(ResizeBilinearOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = align_corners(); _o->align_corners = _e; }
-  { auto _e = half_pixel_centers(); _o->half_pixel_centers = _e; }
-}
-
-inline flatbuffers::Offset<ResizeBilinearOptions> ResizeBilinearOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateResizeBilinearOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ResizeBilinearOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _align_corners = _o->align_corners;
-  auto _half_pixel_centers = _o->half_pixel_centers;
-  return tflite::CreateResizeBilinearOptions(
-      _fbb,
-      _align_corners,
-      _half_pixel_centers);
-}
-
-inline ResizeNearestNeighborOptionsT *ResizeNearestNeighborOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ResizeNearestNeighborOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ResizeNearestNeighborOptions::UnPackTo(ResizeNearestNeighborOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = align_corners(); _o->align_corners = _e; }
-  { auto _e = half_pixel_centers(); _o->half_pixel_centers = _e; }
-}
-
-inline flatbuffers::Offset<ResizeNearestNeighborOptions> ResizeNearestNeighborOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeNearestNeighborOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateResizeNearestNeighborOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ResizeNearestNeighborOptions> CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeNearestNeighborOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ResizeNearestNeighborOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _align_corners = _o->align_corners;
-  auto _half_pixel_centers = _o->half_pixel_centers;
-  return tflite::CreateResizeNearestNeighborOptions(
-      _fbb,
-      _align_corners,
-      _half_pixel_centers);
-}
-
-inline CallOptionsT *CallOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new CallOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void CallOptions::UnPackTo(CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = subgraph(); _o->subgraph = _e; }
-}
-
-inline flatbuffers::Offset<CallOptions> CallOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateCallOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CallOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _subgraph = _o->subgraph;
-  return tflite::CreateCallOptions(
-      _fbb,
-      _subgraph);
-}
-
-inline PadOptionsT *PadOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new PadOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void PadOptions::UnPackTo(PadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<PadOptions> PadOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreatePadOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const PadOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreatePadOptions(
-      _fbb);
-}
-
-inline PadV2OptionsT *PadV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new PadV2OptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void PadV2Options::UnPackTo(PadV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<PadV2Options> PadV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreatePadV2Options(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const PadV2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreatePadV2Options(
-      _fbb);
-}
-
-inline ReshapeOptionsT *ReshapeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ReshapeOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ReshapeOptions::UnPackTo(ReshapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = new_shape(); if (_e) { _o->new_shape.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->new_shape[_i] = _e->Get(_i); } } }
-}
-
-inline flatbuffers::Offset<ReshapeOptions> ReshapeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateReshapeOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReshapeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _new_shape = _o->new_shape.size() ? _fbb.CreateVector(_o->new_shape) : 0;
-  return tflite::CreateReshapeOptions(
-      _fbb,
-      _new_shape);
-}
-
-inline SpaceToBatchNDOptionsT *SpaceToBatchNDOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SpaceToBatchNDOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SpaceToBatchNDOptions::UnPackTo(SpaceToBatchNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<SpaceToBatchNDOptions> SpaceToBatchNDOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSpaceToBatchNDOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SpaceToBatchNDOptions> CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SpaceToBatchNDOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateSpaceToBatchNDOptions(
-      _fbb);
-}
-
-inline BatchToSpaceNDOptionsT *BatchToSpaceNDOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new BatchToSpaceNDOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void BatchToSpaceNDOptions::UnPackTo(BatchToSpaceNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<BatchToSpaceNDOptions> BatchToSpaceNDOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateBatchToSpaceNDOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<BatchToSpaceNDOptions> CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BatchToSpaceNDOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateBatchToSpaceNDOptions(
-      _fbb);
-}
-
-inline SkipGramOptionsT *SkipGramOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SkipGramOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SkipGramOptions::UnPackTo(SkipGramOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = ngram_size(); _o->ngram_size = _e; }
-  { auto _e = max_skip_size(); _o->max_skip_size = _e; }
-  { auto _e = include_all_ngrams(); _o->include_all_ngrams = _e; }
-}
-
-inline flatbuffers::Offset<SkipGramOptions> SkipGramOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSkipGramOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SkipGramOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _ngram_size = _o->ngram_size;
-  auto _max_skip_size = _o->max_skip_size;
-  auto _include_all_ngrams = _o->include_all_ngrams;
-  return tflite::CreateSkipGramOptions(
-      _fbb,
-      _ngram_size,
-      _max_skip_size,
-      _include_all_ngrams);
-}
-
-inline SpaceToDepthOptionsT *SpaceToDepthOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SpaceToDepthOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SpaceToDepthOptions::UnPackTo(SpaceToDepthOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = block_size(); _o->block_size = _e; }
-}
-
-inline flatbuffers::Offset<SpaceToDepthOptions> SpaceToDepthOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSpaceToDepthOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SpaceToDepthOptions> CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SpaceToDepthOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _block_size = _o->block_size;
-  return tflite::CreateSpaceToDepthOptions(
-      _fbb,
-      _block_size);
-}
-
-inline DepthToSpaceOptionsT *DepthToSpaceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new DepthToSpaceOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void DepthToSpaceOptions::UnPackTo(DepthToSpaceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = block_size(); _o->block_size = _e; }
-}
-
-inline flatbuffers::Offset<DepthToSpaceOptions> DepthToSpaceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateDepthToSpaceOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<DepthToSpaceOptions> CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DepthToSpaceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _block_size = _o->block_size;
-  return tflite::CreateDepthToSpaceOptions(
-      _fbb,
-      _block_size);
-}
-
-inline SubOptionsT *SubOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SubOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SubOptions::UnPackTo(SubOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-  { auto _e = pot_scale_int16(); _o->pot_scale_int16 = _e; }
-}
-
-inline flatbuffers::Offset<SubOptions> SubOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSubOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SubOptions> CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SubOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _fused_activation_function = _o->fused_activation_function;
-  auto _pot_scale_int16 = _o->pot_scale_int16;
-  return tflite::CreateSubOptions(
-      _fbb,
-      _fused_activation_function,
-      _pot_scale_int16);
-}
-
-inline DivOptionsT *DivOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new DivOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void DivOptions::UnPackTo(DivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
-}
-
-inline flatbuffers::Offset<DivOptions> DivOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateDivOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<DivOptions> CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DivOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _fused_activation_function = _o->fused_activation_function;
-  return tflite::CreateDivOptions(
-      _fbb,
-      _fused_activation_function);
-}
-
-inline TopKV2OptionsT *TopKV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new TopKV2OptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void TopKV2Options::UnPackTo(TopKV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<TopKV2Options> TopKV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateTopKV2Options(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TopKV2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateTopKV2Options(
-      _fbb);
-}
-
-inline EmbeddingLookupSparseOptionsT *EmbeddingLookupSparseOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new EmbeddingLookupSparseOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void EmbeddingLookupSparseOptions::UnPackTo(EmbeddingLookupSparseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = combiner(); _o->combiner = _e; }
-}
-
-inline flatbuffers::Offset<EmbeddingLookupSparseOptions> EmbeddingLookupSparseOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateEmbeddingLookupSparseOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<EmbeddingLookupSparseOptions> CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const EmbeddingLookupSparseOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _combiner = _o->combiner;
-  return tflite::CreateEmbeddingLookupSparseOptions(
-      _fbb,
-      _combiner);
-}
-
-inline GatherOptionsT *GatherOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new GatherOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void GatherOptions::UnPackTo(GatherOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = axis(); _o->axis = _e; }
-  { auto _e = batch_dims(); _o->batch_dims = _e; }
-}
-
-inline flatbuffers::Offset<GatherOptions> GatherOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateGatherOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GatherOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _axis = _o->axis;
-  auto _batch_dims = _o->batch_dims;
-  return tflite::CreateGatherOptions(
-      _fbb,
-      _axis,
-      _batch_dims);
-}
-
-inline TransposeOptionsT *TransposeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new TransposeOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void TransposeOptions::UnPackTo(TransposeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<TransposeOptions> TransposeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateTransposeOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<TransposeOptions> CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TransposeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateTransposeOptions(
-      _fbb);
-}
-
-inline ExpOptionsT *ExpOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ExpOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ExpOptions::UnPackTo(ExpOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<ExpOptions> ExpOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateExpOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ExpOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateExpOptions(
-      _fbb);
-}
-
-inline CosOptionsT *CosOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new CosOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void CosOptions::UnPackTo(CosOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<CosOptions> CosOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateCosOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<CosOptions> CreateCosOptions(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CosOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateCosOptions(
-      _fbb);
-}
-
-inline ReducerOptionsT *ReducerOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ReducerOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ReducerOptions::UnPackTo(ReducerOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = keep_dims(); _o->keep_dims = _e; }
-}
-
-inline flatbuffers::Offset<ReducerOptions> ReducerOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateReducerOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ReducerOptions> CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReducerOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _keep_dims = _o->keep_dims;
-  return tflite::CreateReducerOptions(
-      _fbb,
-      _keep_dims);
-}
-
-inline SqueezeOptionsT *SqueezeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SqueezeOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SqueezeOptions::UnPackTo(SqueezeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = squeeze_dims(); if (_e) { _o->squeeze_dims.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->squeeze_dims[_i] = _e->Get(_i); } } }
-}
-
-inline flatbuffers::Offset<SqueezeOptions> SqueezeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSqueezeOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SqueezeOptions> CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SqueezeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _squeeze_dims = _o->squeeze_dims.size() ? _fbb.CreateVector(_o->squeeze_dims) : 0;
-  return tflite::CreateSqueezeOptions(
-      _fbb,
-      _squeeze_dims);
-}
-
-inline SplitOptionsT *SplitOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SplitOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SplitOptions::UnPackTo(SplitOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = num_splits(); _o->num_splits = _e; }
-}
-
-inline flatbuffers::Offset<SplitOptions> SplitOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSplitOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SplitOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _num_splits = _o->num_splits;
-  return tflite::CreateSplitOptions(
-      _fbb,
-      _num_splits);
-}
-
-inline SplitVOptionsT *SplitVOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SplitVOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SplitVOptions::UnPackTo(SplitVOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = num_splits(); _o->num_splits = _e; }
-}
-
-inline flatbuffers::Offset<SplitVOptions> SplitVOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSplitVOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SplitVOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _num_splits = _o->num_splits;
-  return tflite::CreateSplitVOptions(
-      _fbb,
-      _num_splits);
-}
-
-inline StridedSliceOptionsT *StridedSliceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new StridedSliceOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void StridedSliceOptions::UnPackTo(StridedSliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = begin_mask(); _o->begin_mask = _e; }
-  { auto _e = end_mask(); _o->end_mask = _e; }
-  { auto _e = ellipsis_mask(); _o->ellipsis_mask = _e; }
-  { auto _e = new_axis_mask(); _o->new_axis_mask = _e; }
-  { auto _e = shrink_axis_mask(); _o->shrink_axis_mask = _e; }
-}
-
-inline flatbuffers::Offset<StridedSliceOptions> StridedSliceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateStridedSliceOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<StridedSliceOptions> CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const StridedSliceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _begin_mask = _o->begin_mask;
-  auto _end_mask = _o->end_mask;
-  auto _ellipsis_mask = _o->ellipsis_mask;
-  auto _new_axis_mask = _o->new_axis_mask;
-  auto _shrink_axis_mask = _o->shrink_axis_mask;
-  return tflite::CreateStridedSliceOptions(
-      _fbb,
-      _begin_mask,
-      _end_mask,
-      _ellipsis_mask,
-      _new_axis_mask,
-      _shrink_axis_mask);
-}
-
-inline LogSoftmaxOptionsT *LogSoftmaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new LogSoftmaxOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void LogSoftmaxOptions::UnPackTo(LogSoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<LogSoftmaxOptions> LogSoftmaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateLogSoftmaxOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<LogSoftmaxOptions> CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LogSoftmaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateLogSoftmaxOptions(
-      _fbb);
-}
-
-inline CastOptionsT *CastOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new CastOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void CastOptions::UnPackTo(CastOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = in_data_type(); _o->in_data_type = _e; }
-  { auto _e = out_data_type(); _o->out_data_type = _e; }
-}
-
-inline flatbuffers::Offset<CastOptions> CastOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateCastOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<CastOptions> CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CastOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _in_data_type = _o->in_data_type;
-  auto _out_data_type = _o->out_data_type;
-  return tflite::CreateCastOptions(
-      _fbb,
-      _in_data_type,
-      _out_data_type);
-}
-
-inline DequantizeOptionsT *DequantizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new DequantizeOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void DequantizeOptions::UnPackTo(DequantizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<DequantizeOptions> DequantizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateDequantizeOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<DequantizeOptions> CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DequantizeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateDequantizeOptions(
-      _fbb);
-}
-
-inline MaximumMinimumOptionsT *MaximumMinimumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new MaximumMinimumOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void MaximumMinimumOptions::UnPackTo(MaximumMinimumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<MaximumMinimumOptions> MaximumMinimumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateMaximumMinimumOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<MaximumMinimumOptions> CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MaximumMinimumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateMaximumMinimumOptions(
-      _fbb);
-}
-
-inline TileOptionsT *TileOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new TileOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void TileOptions::UnPackTo(TileOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<TileOptions> TileOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateTileOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TileOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateTileOptions(
-      _fbb);
-}
-
-inline ArgMaxOptionsT *ArgMaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ArgMaxOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ArgMaxOptions::UnPackTo(ArgMaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = output_type(); _o->output_type = _e; }
-}
-
-inline flatbuffers::Offset<ArgMaxOptions> ArgMaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateArgMaxOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ArgMaxOptions> CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ArgMaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _output_type = _o->output_type;
-  return tflite::CreateArgMaxOptions(
-      _fbb,
-      _output_type);
-}
-
-inline ArgMinOptionsT *ArgMinOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ArgMinOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ArgMinOptions::UnPackTo(ArgMinOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = output_type(); _o->output_type = _e; }
-}
-
-inline flatbuffers::Offset<ArgMinOptions> ArgMinOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateArgMinOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ArgMinOptions> CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ArgMinOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _output_type = _o->output_type;
-  return tflite::CreateArgMinOptions(
-      _fbb,
-      _output_type);
-}
-
-inline GreaterOptionsT *GreaterOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new GreaterOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void GreaterOptions::UnPackTo(GreaterOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<GreaterOptions> GreaterOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateGreaterOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<GreaterOptions> CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GreaterOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateGreaterOptions(
-      _fbb);
-}
-
-inline GreaterEqualOptionsT *GreaterEqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new GreaterEqualOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void GreaterEqualOptions::UnPackTo(GreaterEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<GreaterEqualOptions> GreaterEqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateGreaterEqualOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<GreaterEqualOptions> CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GreaterEqualOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateGreaterEqualOptions(
-      _fbb);
-}
-
-inline LessOptionsT *LessOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new LessOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void LessOptions::UnPackTo(LessOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<LessOptions> LessOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateLessOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LessOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateLessOptions(
-      _fbb);
-}
-
-inline LessEqualOptionsT *LessEqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new LessEqualOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void LessEqualOptions::UnPackTo(LessEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<LessEqualOptions> LessEqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateLessEqualOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<LessEqualOptions> CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LessEqualOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateLessEqualOptions(
-      _fbb);
-}
-
-inline NegOptionsT *NegOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new NegOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void NegOptions::UnPackTo(NegOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<NegOptions> NegOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateNegOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const NegOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateNegOptions(
-      _fbb);
-}
-
-inline SelectOptionsT *SelectOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SelectOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SelectOptions::UnPackTo(SelectOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<SelectOptions> SelectOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSelectOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SelectOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateSelectOptions(
-      _fbb);
-}
-
-inline SliceOptionsT *SliceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SliceOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SliceOptions::UnPackTo(SliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<SliceOptions> SliceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSliceOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SliceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateSliceOptions(
-      _fbb);
-}
-
-inline TransposeConvOptionsT *TransposeConvOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new TransposeConvOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void TransposeConvOptions::UnPackTo(TransposeConvOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = padding(); _o->padding = _e; }
-  { auto _e = stride_w(); _o->stride_w = _e; }
-  { auto _e = stride_h(); _o->stride_h = _e; }
-}
-
-inline flatbuffers::Offset<TransposeConvOptions> TransposeConvOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateTransposeConvOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<TransposeConvOptions> CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TransposeConvOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _padding = _o->padding;
-  auto _stride_w = _o->stride_w;
-  auto _stride_h = _o->stride_h;
-  return tflite::CreateTransposeConvOptions(
-      _fbb,
-      _padding,
-      _stride_w,
-      _stride_h);
-}
-
-inline ExpandDimsOptionsT *ExpandDimsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ExpandDimsOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ExpandDimsOptions::UnPackTo(ExpandDimsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<ExpandDimsOptions> ExpandDimsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateExpandDimsOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ExpandDimsOptions> CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ExpandDimsOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateExpandDimsOptions(
-      _fbb);
-}
-
-inline SparseToDenseOptionsT *SparseToDenseOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SparseToDenseOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SparseToDenseOptions::UnPackTo(SparseToDenseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = validate_indices(); _o->validate_indices = _e; }
-}
-
-inline flatbuffers::Offset<SparseToDenseOptions> SparseToDenseOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSparseToDenseOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SparseToDenseOptions> CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SparseToDenseOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _validate_indices = _o->validate_indices;
-  return tflite::CreateSparseToDenseOptions(
-      _fbb,
-      _validate_indices);
-}
-
-inline EqualOptionsT *EqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new EqualOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void EqualOptions::UnPackTo(EqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<EqualOptions> EqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateEqualOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const EqualOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateEqualOptions(
-      _fbb);
-}
-
-inline NotEqualOptionsT *NotEqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new NotEqualOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void NotEqualOptions::UnPackTo(NotEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<NotEqualOptions> NotEqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateNotEqualOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<NotEqualOptions> CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const NotEqualOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateNotEqualOptions(
-      _fbb);
-}
-
-inline ShapeOptionsT *ShapeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ShapeOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ShapeOptions::UnPackTo(ShapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = out_type(); _o->out_type = _e; }
-}
-
-inline flatbuffers::Offset<ShapeOptions> ShapeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateShapeOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ShapeOptions> CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ShapeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _out_type = _o->out_type;
-  return tflite::CreateShapeOptions(
-      _fbb,
-      _out_type);
-}
-
-inline RankOptionsT *RankOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new RankOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void RankOptions::UnPackTo(RankOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<RankOptions> RankOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateRankOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<RankOptions> CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RankOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateRankOptions(
-      _fbb);
-}
-
-inline PowOptionsT *PowOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new PowOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void PowOptions::UnPackTo(PowOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<PowOptions> PowOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreatePowOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const PowOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreatePowOptions(
-      _fbb);
-}
-
-inline FakeQuantOptionsT *FakeQuantOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new FakeQuantOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void FakeQuantOptions::UnPackTo(FakeQuantOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = min(); _o->min = _e; }
-  { auto _e = max(); _o->max = _e; }
-  { auto _e = num_bits(); _o->num_bits = _e; }
-  { auto _e = narrow_range(); _o->narrow_range = _e; }
-}
-
-inline flatbuffers::Offset<FakeQuantOptions> FakeQuantOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateFakeQuantOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<FakeQuantOptions> CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FakeQuantOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _min = _o->min;
-  auto _max = _o->max;
-  auto _num_bits = _o->num_bits;
-  auto _narrow_range = _o->narrow_range;
-  return tflite::CreateFakeQuantOptions(
-      _fbb,
-      _min,
-      _max,
-      _num_bits,
-      _narrow_range);
-}
-
-inline PackOptionsT *PackOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new PackOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void PackOptions::UnPackTo(PackOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = values_count(); _o->values_count = _e; }
-  { auto _e = axis(); _o->axis = _e; }
-}
-
-inline flatbuffers::Offset<PackOptions> PackOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreatePackOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<PackOptions> CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const PackOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _values_count = _o->values_count;
-  auto _axis = _o->axis;
-  return tflite::CreatePackOptions(
-      _fbb,
-      _values_count,
-      _axis);
-}
-
-inline LogicalOrOptionsT *LogicalOrOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new LogicalOrOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void LogicalOrOptions::UnPackTo(LogicalOrOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<LogicalOrOptions> LogicalOrOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateLogicalOrOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<LogicalOrOptions> CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LogicalOrOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateLogicalOrOptions(
-      _fbb);
-}
-
-inline OneHotOptionsT *OneHotOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new OneHotOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void OneHotOptions::UnPackTo(OneHotOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = axis(); _o->axis = _e; }
-}
-
-inline flatbuffers::Offset<OneHotOptions> OneHotOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateOneHotOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OneHotOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _axis = _o->axis;
-  return tflite::CreateOneHotOptions(
-      _fbb,
-      _axis);
-}
-
-inline AbsOptionsT *AbsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new AbsOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void AbsOptions::UnPackTo(AbsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<AbsOptions> AbsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateAbsOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AbsOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateAbsOptions(
-      _fbb);
-}
-
-inline HardSwishOptionsT *HardSwishOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new HardSwishOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void HardSwishOptions::UnPackTo(HardSwishOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<HardSwishOptions> HardSwishOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateHardSwishOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<HardSwishOptions> CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HardSwishOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateHardSwishOptions(
-      _fbb);
-}
-
-inline LogicalAndOptionsT *LogicalAndOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new LogicalAndOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void LogicalAndOptions::UnPackTo(LogicalAndOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<LogicalAndOptions> LogicalAndOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateLogicalAndOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<LogicalAndOptions> CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LogicalAndOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateLogicalAndOptions(
-      _fbb);
-}
-
-inline LogicalNotOptionsT *LogicalNotOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new LogicalNotOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void LogicalNotOptions::UnPackTo(LogicalNotOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<LogicalNotOptions> LogicalNotOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateLogicalNotOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<LogicalNotOptions> CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LogicalNotOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateLogicalNotOptions(
-      _fbb);
-}
-
-inline UnpackOptionsT *UnpackOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new UnpackOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void UnpackOptions::UnPackTo(UnpackOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = num(); _o->num = _e; }
-  { auto _e = axis(); _o->axis = _e; }
-}
-
-inline flatbuffers::Offset<UnpackOptions> UnpackOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateUnpackOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnpackOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _num = _o->num;
-  auto _axis = _o->axis;
-  return tflite::CreateUnpackOptions(
-      _fbb,
-      _num,
-      _axis);
-}
-
-inline FloorDivOptionsT *FloorDivOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new FloorDivOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void FloorDivOptions::UnPackTo(FloorDivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<FloorDivOptions> FloorDivOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateFloorDivOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<FloorDivOptions> CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FloorDivOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateFloorDivOptions(
-      _fbb);
-}
-
-inline SquareOptionsT *SquareOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SquareOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SquareOptions::UnPackTo(SquareOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<SquareOptions> SquareOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSquareOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SquareOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateSquareOptions(
-      _fbb);
-}
-
-inline ZerosLikeOptionsT *ZerosLikeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ZerosLikeOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ZerosLikeOptions::UnPackTo(ZerosLikeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<ZerosLikeOptions> ZerosLikeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateZerosLikeOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ZerosLikeOptions> CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ZerosLikeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateZerosLikeOptions(
-      _fbb);
-}
-
-inline FillOptionsT *FillOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new FillOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void FillOptions::UnPackTo(FillOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<FillOptions> FillOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateFillOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FillOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateFillOptions(
-      _fbb);
-}
-
-inline FloorModOptionsT *FloorModOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new FloorModOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void FloorModOptions::UnPackTo(FloorModOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<FloorModOptions> FloorModOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateFloorModOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FloorModOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateFloorModOptions(
-      _fbb);
-}
-
-inline RangeOptionsT *RangeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new RangeOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void RangeOptions::UnPackTo(RangeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<RangeOptions> RangeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateRangeOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RangeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateRangeOptions(
-      _fbb);
-}
-
-inline LeakyReluOptionsT *LeakyReluOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new LeakyReluOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void LeakyReluOptions::UnPackTo(LeakyReluOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = alpha(); _o->alpha = _e; }
-}
-
-inline flatbuffers::Offset<LeakyReluOptions> LeakyReluOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateLeakyReluOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<LeakyReluOptions> CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LeakyReluOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _alpha = _o->alpha;
-  return tflite::CreateLeakyReluOptions(
-      _fbb,
-      _alpha);
-}
-
-inline SquaredDifferenceOptionsT *SquaredDifferenceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SquaredDifferenceOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SquaredDifferenceOptions::UnPackTo(SquaredDifferenceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<SquaredDifferenceOptions> SquaredDifferenceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquaredDifferenceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSquaredDifferenceOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SquaredDifferenceOptions> CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquaredDifferenceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SquaredDifferenceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateSquaredDifferenceOptions(
-      _fbb);
-}
-
-inline MirrorPadOptionsT *MirrorPadOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new MirrorPadOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void MirrorPadOptions::UnPackTo(MirrorPadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = mode(); _o->mode = _e; }
-}
-
-inline flatbuffers::Offset<MirrorPadOptions> MirrorPadOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateMirrorPadOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<MirrorPadOptions> CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MirrorPadOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _mode = _o->mode;
-  return tflite::CreateMirrorPadOptions(
-      _fbb,
-      _mode);
-}
-
-inline UniqueOptionsT *UniqueOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new UniqueOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void UniqueOptions::UnPackTo(UniqueOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = idx_out_type(); _o->idx_out_type = _e; }
-}
-
-inline flatbuffers::Offset<UniqueOptions> UniqueOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateUniqueOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<UniqueOptions> CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UniqueOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _idx_out_type = _o->idx_out_type;
-  return tflite::CreateUniqueOptions(
-      _fbb,
-      _idx_out_type);
-}
-
-inline ReverseV2OptionsT *ReverseV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ReverseV2OptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ReverseV2Options::UnPackTo(ReverseV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<ReverseV2Options> ReverseV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateReverseV2Options(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ReverseV2Options> CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReverseV2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateReverseV2Options(
-      _fbb);
-}
-
-inline AddNOptionsT *AddNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new AddNOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void AddNOptions::UnPackTo(AddNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<AddNOptions> AddNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateAddNOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<AddNOptions> CreateAddNOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AddNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateAddNOptions(
-      _fbb);
-}
-
-inline GatherNdOptionsT *GatherNdOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new GatherNdOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void GatherNdOptions::UnPackTo(GatherNdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<GatherNdOptions> GatherNdOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateGatherNdOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<GatherNdOptions> CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GatherNdOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateGatherNdOptions(
-      _fbb);
-}
-
-inline WhereOptionsT *WhereOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new WhereOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void WhereOptions::UnPackTo(WhereOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<WhereOptions> WhereOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateWhereOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<WhereOptions> CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const WhereOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateWhereOptions(
-      _fbb);
-}
-
-inline ReverseSequenceOptionsT *ReverseSequenceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ReverseSequenceOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ReverseSequenceOptions::UnPackTo(ReverseSequenceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = seq_dim(); _o->seq_dim = _e; }
-  { auto _e = batch_dim(); _o->batch_dim = _e; }
-}
-
-inline flatbuffers::Offset<ReverseSequenceOptions> ReverseSequenceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseSequenceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateReverseSequenceOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ReverseSequenceOptions> CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReverseSequenceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReverseSequenceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _seq_dim = _o->seq_dim;
-  auto _batch_dim = _o->batch_dim;
-  return tflite::CreateReverseSequenceOptions(
-      _fbb,
-      _seq_dim,
-      _batch_dim);
-}
-
-inline MatrixDiagOptionsT *MatrixDiagOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new MatrixDiagOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void MatrixDiagOptions::UnPackTo(MatrixDiagOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<MatrixDiagOptions> MatrixDiagOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateMatrixDiagOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<MatrixDiagOptions> CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MatrixDiagOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateMatrixDiagOptions(
-      _fbb);
-}
-
-inline QuantizeOptionsT *QuantizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new QuantizeOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void QuantizeOptions::UnPackTo(QuantizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<QuantizeOptions> QuantizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateQuantizeOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<QuantizeOptions> CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const QuantizeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateQuantizeOptions(
-      _fbb);
-}
-
-inline MatrixSetDiagOptionsT *MatrixSetDiagOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new MatrixSetDiagOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void MatrixSetDiagOptions::UnPackTo(MatrixSetDiagOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<MatrixSetDiagOptions> MatrixSetDiagOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateMatrixSetDiagOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<MatrixSetDiagOptions> CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MatrixSetDiagOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateMatrixSetDiagOptions(
-      _fbb);
-}
-
-inline IfOptionsT *IfOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new IfOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void IfOptions::UnPackTo(IfOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = then_subgraph_index(); _o->then_subgraph_index = _e; }
-  { auto _e = else_subgraph_index(); _o->else_subgraph_index = _e; }
-}
-
-inline flatbuffers::Offset<IfOptions> IfOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateIfOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const IfOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _then_subgraph_index = _o->then_subgraph_index;
-  auto _else_subgraph_index = _o->else_subgraph_index;
-  return tflite::CreateIfOptions(
-      _fbb,
-      _then_subgraph_index,
-      _else_subgraph_index);
-}
-
-inline CallOnceOptionsT *CallOnceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new CallOnceOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void CallOnceOptions::UnPackTo(CallOnceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = init_subgraph_index(); _o->init_subgraph_index = _e; }
-}
-
-inline flatbuffers::Offset<CallOnceOptions> CallOnceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateCallOnceOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<CallOnceOptions> CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CallOnceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _init_subgraph_index = _o->init_subgraph_index;
-  return tflite::CreateCallOnceOptions(
-      _fbb,
-      _init_subgraph_index);
-}
-
-inline WhileOptionsT *WhileOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new WhileOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void WhileOptions::UnPackTo(WhileOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = cond_subgraph_index(); _o->cond_subgraph_index = _e; }
-  { auto _e = body_subgraph_index(); _o->body_subgraph_index = _e; }
-}
-
-inline flatbuffers::Offset<WhileOptions> WhileOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateWhileOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<WhileOptions> CreateWhileOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const WhileOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _cond_subgraph_index = _o->cond_subgraph_index;
-  auto _body_subgraph_index = _o->body_subgraph_index;
-  return tflite::CreateWhileOptions(
-      _fbb,
-      _cond_subgraph_index,
-      _body_subgraph_index);
-}
-
-inline NonMaxSuppressionV4OptionsT *NonMaxSuppressionV4Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new NonMaxSuppressionV4OptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void NonMaxSuppressionV4Options::UnPackTo(NonMaxSuppressionV4OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<NonMaxSuppressionV4Options> NonMaxSuppressionV4Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV4OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateNonMaxSuppressionV4Options(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<NonMaxSuppressionV4Options> CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV4OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const NonMaxSuppressionV4OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateNonMaxSuppressionV4Options(
-      _fbb);
-}
-
-inline NonMaxSuppressionV5OptionsT *NonMaxSuppressionV5Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new NonMaxSuppressionV5OptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void NonMaxSuppressionV5Options::UnPackTo(NonMaxSuppressionV5OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<NonMaxSuppressionV5Options> NonMaxSuppressionV5Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV5OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateNonMaxSuppressionV5Options(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<NonMaxSuppressionV5Options> CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV5OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const NonMaxSuppressionV5OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateNonMaxSuppressionV5Options(
-      _fbb);
-}
-
-inline ScatterNdOptionsT *ScatterNdOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ScatterNdOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ScatterNdOptions::UnPackTo(ScatterNdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<ScatterNdOptions> ScatterNdOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateScatterNdOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ScatterNdOptions> CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ScatterNdOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateScatterNdOptions(
-      _fbb);
-}
-
-inline SelectV2OptionsT *SelectV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SelectV2OptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SelectV2Options::UnPackTo(SelectV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<SelectV2Options> SelectV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSelectV2Options(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SelectV2Options> CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SelectV2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateSelectV2Options(
-      _fbb);
-}
-
-inline DensifyOptionsT *DensifyOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new DensifyOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void DensifyOptions::UnPackTo(DensifyOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<DensifyOptions> DensifyOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateDensifyOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<DensifyOptions> CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DensifyOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateDensifyOptions(
-      _fbb);
-}
-
-inline SegmentSumOptionsT *SegmentSumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SegmentSumOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SegmentSumOptions::UnPackTo(SegmentSumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<SegmentSumOptions> SegmentSumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSegmentSumOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SegmentSumOptions> CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SegmentSumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateSegmentSumOptions(
-      _fbb);
-}
-
-inline BatchMatMulOptionsT *BatchMatMulOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new BatchMatMulOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void BatchMatMulOptions::UnPackTo(BatchMatMulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = adj_x(); _o->adj_x = _e; }
-  { auto _e = adj_y(); _o->adj_y = _e; }
-  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
-}
-
-inline flatbuffers::Offset<BatchMatMulOptions> BatchMatMulOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateBatchMatMulOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<BatchMatMulOptions> CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BatchMatMulOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _adj_x = _o->adj_x;
-  auto _adj_y = _o->adj_y;
-  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
-  return tflite::CreateBatchMatMulOptions(
-      _fbb,
-      _adj_x,
-      _adj_y,
-      _asymmetric_quantize_inputs);
-}
-
-inline CumsumOptionsT *CumsumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new CumsumOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void CumsumOptions::UnPackTo(CumsumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = exclusive(); _o->exclusive = _e; }
-  { auto _e = reverse(); _o->reverse = _e; }
-}
-
-inline flatbuffers::Offset<CumsumOptions> CumsumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateCumsumOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CumsumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _exclusive = _o->exclusive;
-  auto _reverse = _o->reverse;
-  return tflite::CreateCumsumOptions(
-      _fbb,
-      _exclusive,
-      _reverse);
-}
-
-inline BroadcastToOptionsT *BroadcastToOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new BroadcastToOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void BroadcastToOptions::UnPackTo(BroadcastToOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<BroadcastToOptions> BroadcastToOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateBroadcastToOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<BroadcastToOptions> CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BroadcastToOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateBroadcastToOptions(
-      _fbb);
-}
-
-inline Rfft2dOptionsT *Rfft2dOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new Rfft2dOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void Rfft2dOptions::UnPackTo(Rfft2dOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<Rfft2dOptions> Rfft2dOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateRfft2dOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Rfft2dOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateRfft2dOptions(
-      _fbb);
-}
-
-inline HashtableOptionsT *HashtableOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new HashtableOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void HashtableOptions::UnPackTo(HashtableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = table_id(); _o->table_id = _e; }
-  { auto _e = key_dtype(); _o->key_dtype = _e; }
-  { auto _e = value_dtype(); _o->value_dtype = _e; }
-}
-
-inline flatbuffers::Offset<HashtableOptions> HashtableOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateHashtableOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<HashtableOptions> CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HashtableOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _table_id = _o->table_id;
-  auto _key_dtype = _o->key_dtype;
-  auto _value_dtype = _o->value_dtype;
-  return tflite::CreateHashtableOptions(
-      _fbb,
-      _table_id,
-      _key_dtype,
-      _value_dtype);
-}
-
-inline HashtableFindOptionsT *HashtableFindOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new HashtableFindOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::ExpandDimsOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions;
+};
 
-inline void HashtableFindOptions::UnPackTo(HashtableFindOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::EqualOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions;
+};
 
-inline flatbuffers::Offset<HashtableFindOptions> HashtableFindOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateHashtableFindOptions(_fbb, _o, _rehasher);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::NotEqualOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions;
+};
 
-inline flatbuffers::Offset<HashtableFindOptions> CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HashtableFindOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateHashtableFindOptions(
-      _fbb);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::ShapeOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions;
+};
 
-inline HashtableImportOptionsT *HashtableImportOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new HashtableImportOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::PowOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_PowOptions;
+};
 
-inline void HashtableImportOptions::UnPackTo(HashtableImportOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::ArgMinOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions;
+};
 
-inline flatbuffers::Offset<HashtableImportOptions> HashtableImportOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableImportOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateHashtableImportOptions(_fbb, _o, _rehasher);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::FakeQuantOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions;
+};
 
-inline flatbuffers::Offset<HashtableImportOptions> CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableImportOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HashtableImportOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateHashtableImportOptions(
-      _fbb);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::PackOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_PackOptions;
+};
 
-inline HashtableSizeOptionsT *HashtableSizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new HashtableSizeOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::LogicalOrOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions;
+};
 
-inline void HashtableSizeOptions::UnPackTo(HashtableSizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::OneHotOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions;
+};
 
-inline flatbuffers::Offset<HashtableSizeOptions> HashtableSizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateHashtableSizeOptions(_fbb, _o, _rehasher);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::LogicalAndOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions;
+};
 
-inline flatbuffers::Offset<HashtableSizeOptions> CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HashtableSizeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateHashtableSizeOptions(
-      _fbb);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::LogicalNotOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions;
+};
 
-inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new OperatorCodeT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::UnpackOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions;
+};
 
-inline void OperatorCode::UnPackTo(OperatorCodeT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = deprecated_builtin_code(); _o->deprecated_builtin_code = _e; }
-  { auto _e = custom_code(); if (_e) _o->custom_code = _e->str(); }
-  { auto _e = version(); _o->version = _e; }
-  { auto _e = builtin_code(); _o->builtin_code = _e; }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::FloorDivOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions;
+};
 
-inline flatbuffers::Offset<OperatorCode> OperatorCode::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateOperatorCode(_fbb, _o, _rehasher);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::SquareOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions;
+};
 
-inline flatbuffers::Offset<OperatorCode> CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OperatorCodeT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _deprecated_builtin_code = _o->deprecated_builtin_code;
-  auto _custom_code = _o->custom_code.empty() ? 0 : _fbb.CreateString(_o->custom_code);
-  auto _version = _o->version;
-  auto _builtin_code = _o->builtin_code;
-  return tflite::CreateOperatorCode(
-      _fbb,
-      _deprecated_builtin_code,
-      _custom_code,
-      _version,
-      _builtin_code);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::ZerosLikeOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions;
+};
 
-inline OperatorT *Operator::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new OperatorT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::FillOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_FillOptions;
+};
 
-inline void Operator::UnPackTo(OperatorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = opcode_index(); _o->opcode_index = _e; }
-  { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->inputs[_i] = _e->Get(_i); } } }
-  { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->outputs[_i] = _e->Get(_i); } } }
-  { auto _e = builtin_options_type(); _o->builtin_options.type = _e; }
-  { auto _e = builtin_options(); if (_e) _o->builtin_options.value = tflite::BuiltinOptionsUnion::UnPack(_e, builtin_options_type(), _resolver); }
-  { auto _e = custom_options(); if (_e) { _o->custom_options.resize(_e->size()); std::copy(_e->begin(), _e->end(), _o->custom_options.begin()); } }
-  { auto _e = custom_options_format(); _o->custom_options_format = _e; }
-  { auto _e = mutating_variable_inputs(); if (_e) { _o->mutating_variable_inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->mutating_variable_inputs[_i] = _e->Get(_i) != 0; } } }
-  { auto _e = intermediates(); if (_e) { _o->intermediates.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->intermediates[_i] = _e->Get(_i); } } }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::BidirectionalSequenceLSTMOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions;
+};
 
-inline flatbuffers::Offset<Operator> Operator::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateOperator(_fbb, _o, _rehasher);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::BidirectionalSequenceRNNOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions;
+};
 
-inline flatbuffers::Offset<Operator> CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OperatorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _opcode_index = _o->opcode_index;
-  auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0;
-  auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0;
-  auto _builtin_options_type = _o->builtin_options.type;
-  auto _builtin_options = _o->builtin_options.Pack(_fbb);
-  auto _custom_options = _o->custom_options.size() ? _fbb.CreateVector(_o->custom_options) : 0;
-  auto _custom_options_format = _o->custom_options_format;
-  auto _mutating_variable_inputs = _o->mutating_variable_inputs.size() ? _fbb.CreateVector(_o->mutating_variable_inputs) : 0;
-  auto _intermediates = _o->intermediates.size() ? _fbb.CreateVector(_o->intermediates) : 0;
-  return tflite::CreateOperator(
-      _fbb,
-      _opcode_index,
-      _inputs,
-      _outputs,
-      _builtin_options_type,
-      _builtin_options,
-      _custom_options,
-      _custom_options_format,
-      _mutating_variable_inputs,
-      _intermediates);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::UnidirectionalSequenceLSTMOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
+};
 
-inline SubGraphT *SubGraph::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SubGraphT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::FloorModOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions;
+};
 
-inline void SubGraph::UnPackTo(SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = tensors(); if (_e) { _o->tensors.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->tensors[_i] = std::unique_ptr<tflite::TensorT>(_e->Get(_i)->UnPack(_resolver)); } } }
-  { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->inputs[_i] = _e->Get(_i); } } }
-  { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->outputs[_i] = _e->Get(_i); } } }
-  { auto _e = operators(); if (_e) { _o->operators.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->operators[_i] = std::unique_ptr<tflite::OperatorT>(_e->Get(_i)->UnPack(_resolver)); } } }
-  { auto _e = name(); if (_e) _o->name = _e->str(); }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::RangeOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions;
+};
 
-inline flatbuffers::Offset<SubGraph> SubGraph::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSubGraph(_fbb, _o, _rehasher);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::ResizeNearestNeighborOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions;
+};
 
-inline flatbuffers::Offset<SubGraph> CreateSubGraph(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SubGraphT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _tensors = _o->tensors.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::Tensor>> (_o->tensors.size(), [](size_t i, _VectorArgs *__va) { return CreateTensor(*__va->__fbb, __va->__o->tensors[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0;
-  auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0;
-  auto _operators = _o->operators.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::Operator>> (_o->operators.size(), [](size_t i, _VectorArgs *__va) { return CreateOperator(*__va->__fbb, __va->__o->operators[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
-  return tflite::CreateSubGraph(
-      _fbb,
-      _tensors,
-      _inputs,
-      _outputs,
-      _operators,
-      _name);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::LeakyReluOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions;
+};
 
-inline BufferT *Buffer::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new BufferT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::SquaredDifferenceOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions;
+};
 
-inline void Buffer::UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = data(); if (_e) { _o->data.resize(_e->size()); std::copy(_e->begin(), _e->end(), _o->data.begin()); } }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::MirrorPadOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions;
+};
 
-inline flatbuffers::Offset<Buffer> Buffer::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BufferT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateBuffer(_fbb, _o, _rehasher);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::AbsOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions;
+};
 
-inline flatbuffers::Offset<Buffer> CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BufferT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  _fbb.ForceVectorAlignment(_o->data.size(), sizeof(uint8_t), 16);
-  auto _data = _o->data.size() ? _fbb.CreateVector(_o->data) : 0;
-  return tflite::CreateBuffer(
-      _fbb,
-      _data);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::SplitVOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions;
+};
 
-inline MetadataT *Metadata::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new MetadataT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::UniqueOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_UniqueOptions;
+};
 
-inline void Metadata::UnPackTo(MetadataT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = name(); if (_e) _o->name = _e->str(); }
-  { auto _e = buffer(); _o->buffer = _e; }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::ReverseV2OptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ReverseV2Options;
+};
 
-inline flatbuffers::Offset<Metadata> Metadata::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateMetadata(_fbb, _o, _rehasher);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::AddNOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_AddNOptions;
+};
 
-inline flatbuffers::Offset<Metadata> CreateMetadata(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MetadataT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
-  auto _buffer = _o->buffer;
-  return tflite::CreateMetadata(
-      _fbb,
-      _name,
-      _buffer);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::GatherNdOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_GatherNdOptions;
+};
 
-inline TensorMapT *TensorMap::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new TensorMapT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::CosOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_CosOptions;
+};
 
-inline void TensorMap::UnPackTo(TensorMapT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = name(); if (_e) _o->name = _e->str(); }
-  { auto _e = tensor_index(); _o->tensor_index = _e; }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::WhereOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions;
+};
 
-inline flatbuffers::Offset<TensorMap> TensorMap::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateTensorMap(_fbb, _o, _rehasher);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::RankOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_RankOptions;
+};
 
-inline flatbuffers::Offset<TensorMap> CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TensorMapT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
-  auto _tensor_index = _o->tensor_index;
-  return tflite::CreateTensorMap(
-      _fbb,
-      _name,
-      _tensor_index);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::ReverseSequenceOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ReverseSequenceOptions;
+};
 
-inline SignatureDefT *SignatureDef::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SignatureDefT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::MatrixDiagOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_MatrixDiagOptions;
+};
 
-inline void SignatureDef::UnPackTo(SignatureDefT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->inputs[_i] = std::unique_ptr<tflite::TensorMapT>(_e->Get(_i)->UnPack(_resolver)); } } }
-  { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->outputs[_i] = std::unique_ptr<tflite::TensorMapT>(_e->Get(_i)->UnPack(_resolver)); } } }
-  { auto _e = method_name(); if (_e) _o->method_name = _e->str(); }
-  { auto _e = key(); if (_e) _o->key = _e->str(); }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::QuantizeOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_QuantizeOptions;
+};
 
-inline flatbuffers::Offset<SignatureDef> SignatureDef::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSignatureDef(_fbb, _o, _rehasher);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::MatrixSetDiagOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_MatrixSetDiagOptions;
+};
 
-inline flatbuffers::Offset<SignatureDef> CreateSignatureDef(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SignatureDefT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _inputs = _o->inputs.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::TensorMap>> (_o->inputs.size(), [](size_t i, _VectorArgs *__va) { return CreateTensorMap(*__va->__fbb, __va->__o->inputs[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _outputs = _o->outputs.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::TensorMap>> (_o->outputs.size(), [](size_t i, _VectorArgs *__va) { return CreateTensorMap(*__va->__fbb, __va->__o->outputs[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _method_name = _o->method_name.empty() ? 0 : _fbb.CreateString(_o->method_name);
-  auto _key = _o->key.empty() ? 0 : _fbb.CreateString(_o->key);
-  return tflite::CreateSignatureDef(
-      _fbb,
-      _inputs,
-      _outputs,
-      _method_name,
-      _key);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::HardSwishOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_HardSwishOptions;
+};
 
-inline ModelT *Model::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ModelT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::IfOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_IfOptions;
+};
 
-inline void Model::UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = version(); _o->version = _e; }
-  { auto _e = operator_codes(); if (_e) { _o->operator_codes.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->operator_codes[_i] = std::unique_ptr<tflite::OperatorCodeT>(_e->Get(_i)->UnPack(_resolver)); } } }
-  { auto _e = subgraphs(); if (_e) { _o->subgraphs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->subgraphs[_i] = std::unique_ptr<tflite::SubGraphT>(_e->Get(_i)->UnPack(_resolver)); } } }
-  { auto _e = description(); if (_e) _o->description = _e->str(); }
-  { auto _e = buffers(); if (_e) { _o->buffers.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->buffers[_i] = std::unique_ptr<tflite::BufferT>(_e->Get(_i)->UnPack(_resolver)); } } }
-  { auto _e = metadata_buffer(); if (_e) { _o->metadata_buffer.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->metadata_buffer[_i] = _e->Get(_i); } } }
-  { auto _e = metadata(); if (_e) { _o->metadata.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->metadata[_i] = std::unique_ptr<tflite::MetadataT>(_e->Get(_i)->UnPack(_resolver)); } } }
-  { auto _e = signature_defs(); if (_e) { _o->signature_defs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->signature_defs[_i] = std::unique_ptr<tflite::SignatureDefT>(_e->Get(_i)->UnPack(_resolver)); } } }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::WhileOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_WhileOptions;
+};
 
-inline flatbuffers::Offset<Model> Model::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateModel(_fbb, _o, _rehasher);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::DepthToSpaceOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_DepthToSpaceOptions;
+};
 
-inline flatbuffers::Offset<Model> CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ModelT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _version = _o->version;
-  auto _operator_codes = _o->operator_codes.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::OperatorCode>> (_o->operator_codes.size(), [](size_t i, _VectorArgs *__va) { return CreateOperatorCode(*__va->__fbb, __va->__o->operator_codes[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _subgraphs = _o->subgraphs.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::SubGraph>> (_o->subgraphs.size(), [](size_t i, _VectorArgs *__va) { return CreateSubGraph(*__va->__fbb, __va->__o->subgraphs[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _description = _o->description.empty() ? 0 : _fbb.CreateString(_o->description);
-  auto _buffers = _o->buffers.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::Buffer>> (_o->buffers.size(), [](size_t i, _VectorArgs *__va) { return CreateBuffer(*__va->__fbb, __va->__o->buffers[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _metadata_buffer = _o->metadata_buffer.size() ? _fbb.CreateVector(_o->metadata_buffer) : 0;
-  auto _metadata = _o->metadata.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::Metadata>> (_o->metadata.size(), [](size_t i, _VectorArgs *__va) { return CreateMetadata(*__va->__fbb, __va->__o->metadata[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _signature_defs = _o->signature_defs.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::SignatureDef>> (_o->signature_defs.size(), [](size_t i, _VectorArgs *__va) { return CreateSignatureDef(*__va->__fbb, __va->__o->signature_defs[i].get(), __va->__rehasher); }, &_va ) : 0;
-  return tflite::CreateModel(
-      _fbb,
-      _version,
-      _operator_codes,
-      _subgraphs,
-      _description,
-      _buffers,
-      _metadata_buffer,
-      _metadata,
-      _signature_defs);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::NonMaxSuppressionV4OptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV4Options;
+};
 
-inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, QuantizationDetails type) {
-  switch (type) {
-    case QuantizationDetails_NONE: {
-      return true;
-    }
-    case QuantizationDetails_CustomQuantization: {
-      auto ptr = reinterpret_cast<const tflite::CustomQuantization *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    default: return true;
-  }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::NonMaxSuppressionV5OptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV5Options;
+};
 
-inline bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
-  if (!values || !types) return !values && !types;
-  if (values->size() != types->size()) return false;
-  for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
-    if (!VerifyQuantizationDetails(
-        verifier,  values->Get(i), types->GetEnum<QuantizationDetails>(i))) {
-      return false;
-    }
-  }
-  return true;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::ScatterNdOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ScatterNdOptions;
+};
 
-inline void *QuantizationDetailsUnion::UnPack(const void *obj, QuantizationDetails type, const flatbuffers::resolver_function_t *resolver) {
-  switch (type) {
-    case QuantizationDetails_CustomQuantization: {
-      auto ptr = reinterpret_cast<const tflite::CustomQuantization *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    default: return nullptr;
-  }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::SelectV2OptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SelectV2Options;
+};
 
-inline flatbuffers::Offset<void> QuantizationDetailsUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher) const {
-  switch (type) {
-    case QuantizationDetails_CustomQuantization: {
-      auto ptr = reinterpret_cast<const tflite::CustomQuantizationT *>(value);
-      return CreateCustomQuantization(_fbb, ptr, _rehasher).Union();
-    }
-    default: return 0;
-  }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::DensifyOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_DensifyOptions;
+};
 
-inline QuantizationDetailsUnion::QuantizationDetailsUnion(const QuantizationDetailsUnion &u) FLATBUFFERS_NOEXCEPT : type(u.type), value(nullptr) {
-  switch (type) {
-    case QuantizationDetails_CustomQuantization: {
-      value = new tflite::CustomQuantizationT(*reinterpret_cast<tflite::CustomQuantizationT *>(u.value));
-      break;
-    }
-    default:
-      break;
-  }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::SegmentSumOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SegmentSumOptions;
+};
 
-inline void QuantizationDetailsUnion::Reset() {
-  switch (type) {
-    case QuantizationDetails_CustomQuantization: {
-      auto ptr = reinterpret_cast<tflite::CustomQuantizationT *>(value);
-      delete ptr;
-      break;
-    }
-    default: break;
-  }
-  value = nullptr;
-  type = QuantizationDetails_NONE;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::BatchMatMulOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions;
+};
 
-inline bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void *obj, SparseIndexVector type) {
-  switch (type) {
-    case SparseIndexVector_NONE: {
-      return true;
-    }
-    case SparseIndexVector_Int32Vector: {
-      auto ptr = reinterpret_cast<const tflite::Int32Vector *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case SparseIndexVector_Uint16Vector: {
-      auto ptr = reinterpret_cast<const tflite::Uint16Vector *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case SparseIndexVector_Uint8Vector: {
-      auto ptr = reinterpret_cast<const tflite::Uint8Vector *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    default: return true;
-  }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::CumsumOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_CumsumOptions;
+};
 
-inline bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
-  if (!values || !types) return !values && !types;
-  if (values->size() != types->size()) return false;
-  for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
-    if (!VerifySparseIndexVector(
-        verifier,  values->Get(i), types->GetEnum<SparseIndexVector>(i))) {
-      return false;
-    }
-  }
-  return true;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::CallOnceOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_CallOnceOptions;
+};
 
-inline void *SparseIndexVectorUnion::UnPack(const void *obj, SparseIndexVector type, const flatbuffers::resolver_function_t *resolver) {
-  switch (type) {
-    case SparseIndexVector_Int32Vector: {
-      auto ptr = reinterpret_cast<const tflite::Int32Vector *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case SparseIndexVector_Uint16Vector: {
-      auto ptr = reinterpret_cast<const tflite::Uint16Vector *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case SparseIndexVector_Uint8Vector: {
-      auto ptr = reinterpret_cast<const tflite::Uint8Vector *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    default: return nullptr;
-  }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::BroadcastToOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions;
+};
 
-inline flatbuffers::Offset<void> SparseIndexVectorUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher) const {
-  switch (type) {
-    case SparseIndexVector_Int32Vector: {
-      auto ptr = reinterpret_cast<const tflite::Int32VectorT *>(value);
-      return CreateInt32Vector(_fbb, ptr, _rehasher).Union();
-    }
-    case SparseIndexVector_Uint16Vector: {
-      auto ptr = reinterpret_cast<const tflite::Uint16VectorT *>(value);
-      return CreateUint16Vector(_fbb, ptr, _rehasher).Union();
-    }
-    case SparseIndexVector_Uint8Vector: {
-      auto ptr = reinterpret_cast<const tflite::Uint8VectorT *>(value);
-      return CreateUint8Vector(_fbb, ptr, _rehasher).Union();
-    }
-    default: return 0;
-  }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::Rfft2dOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_Rfft2dOptions;
+};
 
-inline SparseIndexVectorUnion::SparseIndexVectorUnion(const SparseIndexVectorUnion &u) FLATBUFFERS_NOEXCEPT : type(u.type), value(nullptr) {
-  switch (type) {
-    case SparseIndexVector_Int32Vector: {
-      value = new tflite::Int32VectorT(*reinterpret_cast<tflite::Int32VectorT *>(u.value));
-      break;
-    }
-    case SparseIndexVector_Uint16Vector: {
-      value = new tflite::Uint16VectorT(*reinterpret_cast<tflite::Uint16VectorT *>(u.value));
-      break;
-    }
-    case SparseIndexVector_Uint8Vector: {
-      value = new tflite::Uint8VectorT(*reinterpret_cast<tflite::Uint8VectorT *>(u.value));
-      break;
-    }
-    default:
-      break;
-  }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::Conv3DOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_Conv3DOptions;
+};
 
-inline void SparseIndexVectorUnion::Reset() {
-  switch (type) {
-    case SparseIndexVector_Int32Vector: {
-      auto ptr = reinterpret_cast<tflite::Int32VectorT *>(value);
-      delete ptr;
-      break;
-    }
-    case SparseIndexVector_Uint16Vector: {
-      auto ptr = reinterpret_cast<tflite::Uint16VectorT *>(value);
-      delete ptr;
-      break;
-    }
-    case SparseIndexVector_Uint8Vector: {
-      auto ptr = reinterpret_cast<tflite::Uint8VectorT *>(value);
-      delete ptr;
-      break;
-    }
-    default: break;
-  }
-  value = nullptr;
-  type = SparseIndexVector_NONE;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::HashtableOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableOptions;
+};
 
-inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type) {
-  switch (type) {
-    case BuiltinOptions_NONE: {
-      return true;
-    }
-    case BuiltinOptions_Conv2DOptions: {
-      auto ptr = reinterpret_cast<const tflite::Conv2DOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_DepthwiseConv2DOptions: {
-      auto ptr = reinterpret_cast<const tflite::DepthwiseConv2DOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ConcatEmbeddingsOptions: {
-      auto ptr = reinterpret_cast<const tflite::ConcatEmbeddingsOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_LSHProjectionOptions: {
-      auto ptr = reinterpret_cast<const tflite::LSHProjectionOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_Pool2DOptions: {
-      auto ptr = reinterpret_cast<const tflite::Pool2DOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SVDFOptions: {
-      auto ptr = reinterpret_cast<const tflite::SVDFOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_RNNOptions: {
-      auto ptr = reinterpret_cast<const tflite::RNNOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_FullyConnectedOptions: {
-      auto ptr = reinterpret_cast<const tflite::FullyConnectedOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SoftmaxOptions: {
-      auto ptr = reinterpret_cast<const tflite::SoftmaxOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ConcatenationOptions: {
-      auto ptr = reinterpret_cast<const tflite::ConcatenationOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_AddOptions: {
-      auto ptr = reinterpret_cast<const tflite::AddOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_L2NormOptions: {
-      auto ptr = reinterpret_cast<const tflite::L2NormOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_LocalResponseNormalizationOptions: {
-      auto ptr = reinterpret_cast<const tflite::LocalResponseNormalizationOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_LSTMOptions: {
-      auto ptr = reinterpret_cast<const tflite::LSTMOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ResizeBilinearOptions: {
-      auto ptr = reinterpret_cast<const tflite::ResizeBilinearOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_CallOptions: {
-      auto ptr = reinterpret_cast<const tflite::CallOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ReshapeOptions: {
-      auto ptr = reinterpret_cast<const tflite::ReshapeOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SkipGramOptions: {
-      auto ptr = reinterpret_cast<const tflite::SkipGramOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SpaceToDepthOptions: {
-      auto ptr = reinterpret_cast<const tflite::SpaceToDepthOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_EmbeddingLookupSparseOptions: {
-      auto ptr = reinterpret_cast<const tflite::EmbeddingLookupSparseOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_MulOptions: {
-      auto ptr = reinterpret_cast<const tflite::MulOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_PadOptions: {
-      auto ptr = reinterpret_cast<const tflite::PadOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_GatherOptions: {
-      auto ptr = reinterpret_cast<const tflite::GatherOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_BatchToSpaceNDOptions: {
-      auto ptr = reinterpret_cast<const tflite::BatchToSpaceNDOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SpaceToBatchNDOptions: {
-      auto ptr = reinterpret_cast<const tflite::SpaceToBatchNDOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_TransposeOptions: {
-      auto ptr = reinterpret_cast<const tflite::TransposeOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ReducerOptions: {
-      auto ptr = reinterpret_cast<const tflite::ReducerOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SubOptions: {
-      auto ptr = reinterpret_cast<const tflite::SubOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_DivOptions: {
-      auto ptr = reinterpret_cast<const tflite::DivOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SqueezeOptions: {
-      auto ptr = reinterpret_cast<const tflite::SqueezeOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SequenceRNNOptions: {
-      auto ptr = reinterpret_cast<const tflite::SequenceRNNOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_StridedSliceOptions: {
-      auto ptr = reinterpret_cast<const tflite::StridedSliceOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ExpOptions: {
-      auto ptr = reinterpret_cast<const tflite::ExpOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_TopKV2Options: {
-      auto ptr = reinterpret_cast<const tflite::TopKV2Options *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SplitOptions: {
-      auto ptr = reinterpret_cast<const tflite::SplitOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_LogSoftmaxOptions: {
-      auto ptr = reinterpret_cast<const tflite::LogSoftmaxOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_CastOptions: {
-      auto ptr = reinterpret_cast<const tflite::CastOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_DequantizeOptions: {
-      auto ptr = reinterpret_cast<const tflite::DequantizeOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_MaximumMinimumOptions: {
-      auto ptr = reinterpret_cast<const tflite::MaximumMinimumOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ArgMaxOptions: {
-      auto ptr = reinterpret_cast<const tflite::ArgMaxOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_LessOptions: {
-      auto ptr = reinterpret_cast<const tflite::LessOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_NegOptions: {
-      auto ptr = reinterpret_cast<const tflite::NegOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_PadV2Options: {
-      auto ptr = reinterpret_cast<const tflite::PadV2Options *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_GreaterOptions: {
-      auto ptr = reinterpret_cast<const tflite::GreaterOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_GreaterEqualOptions: {
-      auto ptr = reinterpret_cast<const tflite::GreaterEqualOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_LessEqualOptions: {
-      auto ptr = reinterpret_cast<const tflite::LessEqualOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SelectOptions: {
-      auto ptr = reinterpret_cast<const tflite::SelectOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SliceOptions: {
-      auto ptr = reinterpret_cast<const tflite::SliceOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_TransposeConvOptions: {
-      auto ptr = reinterpret_cast<const tflite::TransposeConvOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SparseToDenseOptions: {
-      auto ptr = reinterpret_cast<const tflite::SparseToDenseOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_TileOptions: {
-      auto ptr = reinterpret_cast<const tflite::TileOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ExpandDimsOptions: {
-      auto ptr = reinterpret_cast<const tflite::ExpandDimsOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_EqualOptions: {
-      auto ptr = reinterpret_cast<const tflite::EqualOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_NotEqualOptions: {
-      auto ptr = reinterpret_cast<const tflite::NotEqualOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ShapeOptions: {
-      auto ptr = reinterpret_cast<const tflite::ShapeOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_PowOptions: {
-      auto ptr = reinterpret_cast<const tflite::PowOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ArgMinOptions: {
-      auto ptr = reinterpret_cast<const tflite::ArgMinOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_FakeQuantOptions: {
-      auto ptr = reinterpret_cast<const tflite::FakeQuantOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_PackOptions: {
-      auto ptr = reinterpret_cast<const tflite::PackOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_LogicalOrOptions: {
-      auto ptr = reinterpret_cast<const tflite::LogicalOrOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_OneHotOptions: {
-      auto ptr = reinterpret_cast<const tflite::OneHotOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_LogicalAndOptions: {
-      auto ptr = reinterpret_cast<const tflite::LogicalAndOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_LogicalNotOptions: {
-      auto ptr = reinterpret_cast<const tflite::LogicalNotOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_UnpackOptions: {
-      auto ptr = reinterpret_cast<const tflite::UnpackOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_FloorDivOptions: {
-      auto ptr = reinterpret_cast<const tflite::FloorDivOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SquareOptions: {
-      auto ptr = reinterpret_cast<const tflite::SquareOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ZerosLikeOptions: {
-      auto ptr = reinterpret_cast<const tflite::ZerosLikeOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_FillOptions: {
-      auto ptr = reinterpret_cast<const tflite::FillOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
-      auto ptr = reinterpret_cast<const tflite::BidirectionalSequenceLSTMOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
-      auto ptr = reinterpret_cast<const tflite::BidirectionalSequenceRNNOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
-      auto ptr = reinterpret_cast<const tflite::UnidirectionalSequenceLSTMOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_FloorModOptions: {
-      auto ptr = reinterpret_cast<const tflite::FloorModOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_RangeOptions: {
-      auto ptr = reinterpret_cast<const tflite::RangeOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ResizeNearestNeighborOptions: {
-      auto ptr = reinterpret_cast<const tflite::ResizeNearestNeighborOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_LeakyReluOptions: {
-      auto ptr = reinterpret_cast<const tflite::LeakyReluOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SquaredDifferenceOptions: {
-      auto ptr = reinterpret_cast<const tflite::SquaredDifferenceOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_MirrorPadOptions: {
-      auto ptr = reinterpret_cast<const tflite::MirrorPadOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_AbsOptions: {
-      auto ptr = reinterpret_cast<const tflite::AbsOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SplitVOptions: {
-      auto ptr = reinterpret_cast<const tflite::SplitVOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_UniqueOptions: {
-      auto ptr = reinterpret_cast<const tflite::UniqueOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ReverseV2Options: {
-      auto ptr = reinterpret_cast<const tflite::ReverseV2Options *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_AddNOptions: {
-      auto ptr = reinterpret_cast<const tflite::AddNOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_GatherNdOptions: {
-      auto ptr = reinterpret_cast<const tflite::GatherNdOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_CosOptions: {
-      auto ptr = reinterpret_cast<const tflite::CosOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_WhereOptions: {
-      auto ptr = reinterpret_cast<const tflite::WhereOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_RankOptions: {
-      auto ptr = reinterpret_cast<const tflite::RankOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ReverseSequenceOptions: {
-      auto ptr = reinterpret_cast<const tflite::ReverseSequenceOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_MatrixDiagOptions: {
-      auto ptr = reinterpret_cast<const tflite::MatrixDiagOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_QuantizeOptions: {
-      auto ptr = reinterpret_cast<const tflite::QuantizeOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_MatrixSetDiagOptions: {
-      auto ptr = reinterpret_cast<const tflite::MatrixSetDiagOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_HardSwishOptions: {
-      auto ptr = reinterpret_cast<const tflite::HardSwishOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_IfOptions: {
-      auto ptr = reinterpret_cast<const tflite::IfOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_WhileOptions: {
-      auto ptr = reinterpret_cast<const tflite::WhileOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_DepthToSpaceOptions: {
-      auto ptr = reinterpret_cast<const tflite::DepthToSpaceOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_NonMaxSuppressionV4Options: {
-      auto ptr = reinterpret_cast<const tflite::NonMaxSuppressionV4Options *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_NonMaxSuppressionV5Options: {
-      auto ptr = reinterpret_cast<const tflite::NonMaxSuppressionV5Options *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_ScatterNdOptions: {
-      auto ptr = reinterpret_cast<const tflite::ScatterNdOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SelectV2Options: {
-      auto ptr = reinterpret_cast<const tflite::SelectV2Options *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_DensifyOptions: {
-      auto ptr = reinterpret_cast<const tflite::DensifyOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_SegmentSumOptions: {
-      auto ptr = reinterpret_cast<const tflite::SegmentSumOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_BatchMatMulOptions: {
-      auto ptr = reinterpret_cast<const tflite::BatchMatMulOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_CumsumOptions: {
-      auto ptr = reinterpret_cast<const tflite::CumsumOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_CallOnceOptions: {
-      auto ptr = reinterpret_cast<const tflite::CallOnceOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_BroadcastToOptions: {
-      auto ptr = reinterpret_cast<const tflite::BroadcastToOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_Rfft2dOptions: {
-      auto ptr = reinterpret_cast<const tflite::Rfft2dOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_Conv3DOptions: {
-      auto ptr = reinterpret_cast<const tflite::Conv3DOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_HashtableOptions: {
-      auto ptr = reinterpret_cast<const tflite::HashtableOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_HashtableFindOptions: {
-      auto ptr = reinterpret_cast<const tflite::HashtableFindOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_HashtableImportOptions: {
-      auto ptr = reinterpret_cast<const tflite::HashtableImportOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    case BuiltinOptions_HashtableSizeOptions: {
-      auto ptr = reinterpret_cast<const tflite::HashtableSizeOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
-    default: return true;
-  }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::HashtableFindOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableFindOptions;
+};
 
-inline bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
-  if (!values || !types) return !values && !types;
-  if (values->size() != types->size()) return false;
-  for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
-    if (!VerifyBuiltinOptions(
-        verifier,  values->Get(i), types->GetEnum<BuiltinOptions>(i))) {
-      return false;
-    }
-  }
-  return true;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::HashtableImportOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableImportOptions;
+};
 
-inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, const flatbuffers::resolver_function_t *resolver) {
-  switch (type) {
-    case BuiltinOptions_Conv2DOptions: {
-      auto ptr = reinterpret_cast<const tflite::Conv2DOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_DepthwiseConv2DOptions: {
-      auto ptr = reinterpret_cast<const tflite::DepthwiseConv2DOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ConcatEmbeddingsOptions: {
-      auto ptr = reinterpret_cast<const tflite::ConcatEmbeddingsOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_LSHProjectionOptions: {
-      auto ptr = reinterpret_cast<const tflite::LSHProjectionOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_Pool2DOptions: {
-      auto ptr = reinterpret_cast<const tflite::Pool2DOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SVDFOptions: {
-      auto ptr = reinterpret_cast<const tflite::SVDFOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_RNNOptions: {
-      auto ptr = reinterpret_cast<const tflite::RNNOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_FullyConnectedOptions: {
-      auto ptr = reinterpret_cast<const tflite::FullyConnectedOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SoftmaxOptions: {
-      auto ptr = reinterpret_cast<const tflite::SoftmaxOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ConcatenationOptions: {
-      auto ptr = reinterpret_cast<const tflite::ConcatenationOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_AddOptions: {
-      auto ptr = reinterpret_cast<const tflite::AddOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_L2NormOptions: {
-      auto ptr = reinterpret_cast<const tflite::L2NormOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_LocalResponseNormalizationOptions: {
-      auto ptr = reinterpret_cast<const tflite::LocalResponseNormalizationOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_LSTMOptions: {
-      auto ptr = reinterpret_cast<const tflite::LSTMOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ResizeBilinearOptions: {
-      auto ptr = reinterpret_cast<const tflite::ResizeBilinearOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_CallOptions: {
-      auto ptr = reinterpret_cast<const tflite::CallOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ReshapeOptions: {
-      auto ptr = reinterpret_cast<const tflite::ReshapeOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SkipGramOptions: {
-      auto ptr = reinterpret_cast<const tflite::SkipGramOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SpaceToDepthOptions: {
-      auto ptr = reinterpret_cast<const tflite::SpaceToDepthOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_EmbeddingLookupSparseOptions: {
-      auto ptr = reinterpret_cast<const tflite::EmbeddingLookupSparseOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_MulOptions: {
-      auto ptr = reinterpret_cast<const tflite::MulOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_PadOptions: {
-      auto ptr = reinterpret_cast<const tflite::PadOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_GatherOptions: {
-      auto ptr = reinterpret_cast<const tflite::GatherOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_BatchToSpaceNDOptions: {
-      auto ptr = reinterpret_cast<const tflite::BatchToSpaceNDOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SpaceToBatchNDOptions: {
-      auto ptr = reinterpret_cast<const tflite::SpaceToBatchNDOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_TransposeOptions: {
-      auto ptr = reinterpret_cast<const tflite::TransposeOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ReducerOptions: {
-      auto ptr = reinterpret_cast<const tflite::ReducerOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SubOptions: {
-      auto ptr = reinterpret_cast<const tflite::SubOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_DivOptions: {
-      auto ptr = reinterpret_cast<const tflite::DivOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SqueezeOptions: {
-      auto ptr = reinterpret_cast<const tflite::SqueezeOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SequenceRNNOptions: {
-      auto ptr = reinterpret_cast<const tflite::SequenceRNNOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_StridedSliceOptions: {
-      auto ptr = reinterpret_cast<const tflite::StridedSliceOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ExpOptions: {
-      auto ptr = reinterpret_cast<const tflite::ExpOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_TopKV2Options: {
-      auto ptr = reinterpret_cast<const tflite::TopKV2Options *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SplitOptions: {
-      auto ptr = reinterpret_cast<const tflite::SplitOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_LogSoftmaxOptions: {
-      auto ptr = reinterpret_cast<const tflite::LogSoftmaxOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_CastOptions: {
-      auto ptr = reinterpret_cast<const tflite::CastOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_DequantizeOptions: {
-      auto ptr = reinterpret_cast<const tflite::DequantizeOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_MaximumMinimumOptions: {
-      auto ptr = reinterpret_cast<const tflite::MaximumMinimumOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ArgMaxOptions: {
-      auto ptr = reinterpret_cast<const tflite::ArgMaxOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_LessOptions: {
-      auto ptr = reinterpret_cast<const tflite::LessOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_NegOptions: {
-      auto ptr = reinterpret_cast<const tflite::NegOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_PadV2Options: {
-      auto ptr = reinterpret_cast<const tflite::PadV2Options *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_GreaterOptions: {
-      auto ptr = reinterpret_cast<const tflite::GreaterOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_GreaterEqualOptions: {
-      auto ptr = reinterpret_cast<const tflite::GreaterEqualOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_LessEqualOptions: {
-      auto ptr = reinterpret_cast<const tflite::LessEqualOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SelectOptions: {
-      auto ptr = reinterpret_cast<const tflite::SelectOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SliceOptions: {
-      auto ptr = reinterpret_cast<const tflite::SliceOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_TransposeConvOptions: {
-      auto ptr = reinterpret_cast<const tflite::TransposeConvOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SparseToDenseOptions: {
-      auto ptr = reinterpret_cast<const tflite::SparseToDenseOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_TileOptions: {
-      auto ptr = reinterpret_cast<const tflite::TileOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ExpandDimsOptions: {
-      auto ptr = reinterpret_cast<const tflite::ExpandDimsOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_EqualOptions: {
-      auto ptr = reinterpret_cast<const tflite::EqualOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_NotEqualOptions: {
-      auto ptr = reinterpret_cast<const tflite::NotEqualOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ShapeOptions: {
-      auto ptr = reinterpret_cast<const tflite::ShapeOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_PowOptions: {
-      auto ptr = reinterpret_cast<const tflite::PowOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ArgMinOptions: {
-      auto ptr = reinterpret_cast<const tflite::ArgMinOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_FakeQuantOptions: {
-      auto ptr = reinterpret_cast<const tflite::FakeQuantOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_PackOptions: {
-      auto ptr = reinterpret_cast<const tflite::PackOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_LogicalOrOptions: {
-      auto ptr = reinterpret_cast<const tflite::LogicalOrOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_OneHotOptions: {
-      auto ptr = reinterpret_cast<const tflite::OneHotOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_LogicalAndOptions: {
-      auto ptr = reinterpret_cast<const tflite::LogicalAndOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_LogicalNotOptions: {
-      auto ptr = reinterpret_cast<const tflite::LogicalNotOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_UnpackOptions: {
-      auto ptr = reinterpret_cast<const tflite::UnpackOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_FloorDivOptions: {
-      auto ptr = reinterpret_cast<const tflite::FloorDivOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SquareOptions: {
-      auto ptr = reinterpret_cast<const tflite::SquareOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ZerosLikeOptions: {
-      auto ptr = reinterpret_cast<const tflite::ZerosLikeOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_FillOptions: {
-      auto ptr = reinterpret_cast<const tflite::FillOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
-      auto ptr = reinterpret_cast<const tflite::BidirectionalSequenceLSTMOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
-      auto ptr = reinterpret_cast<const tflite::BidirectionalSequenceRNNOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
-      auto ptr = reinterpret_cast<const tflite::UnidirectionalSequenceLSTMOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_FloorModOptions: {
-      auto ptr = reinterpret_cast<const tflite::FloorModOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_RangeOptions: {
-      auto ptr = reinterpret_cast<const tflite::RangeOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ResizeNearestNeighborOptions: {
-      auto ptr = reinterpret_cast<const tflite::ResizeNearestNeighborOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_LeakyReluOptions: {
-      auto ptr = reinterpret_cast<const tflite::LeakyReluOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SquaredDifferenceOptions: {
-      auto ptr = reinterpret_cast<const tflite::SquaredDifferenceOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_MirrorPadOptions: {
-      auto ptr = reinterpret_cast<const tflite::MirrorPadOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_AbsOptions: {
-      auto ptr = reinterpret_cast<const tflite::AbsOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SplitVOptions: {
-      auto ptr = reinterpret_cast<const tflite::SplitVOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_UniqueOptions: {
-      auto ptr = reinterpret_cast<const tflite::UniqueOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ReverseV2Options: {
-      auto ptr = reinterpret_cast<const tflite::ReverseV2Options *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_AddNOptions: {
-      auto ptr = reinterpret_cast<const tflite::AddNOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_GatherNdOptions: {
-      auto ptr = reinterpret_cast<const tflite::GatherNdOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_CosOptions: {
-      auto ptr = reinterpret_cast<const tflite::CosOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_WhereOptions: {
-      auto ptr = reinterpret_cast<const tflite::WhereOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_RankOptions: {
-      auto ptr = reinterpret_cast<const tflite::RankOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ReverseSequenceOptions: {
-      auto ptr = reinterpret_cast<const tflite::ReverseSequenceOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_MatrixDiagOptions: {
-      auto ptr = reinterpret_cast<const tflite::MatrixDiagOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_QuantizeOptions: {
-      auto ptr = reinterpret_cast<const tflite::QuantizeOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_MatrixSetDiagOptions: {
-      auto ptr = reinterpret_cast<const tflite::MatrixSetDiagOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_HardSwishOptions: {
-      auto ptr = reinterpret_cast<const tflite::HardSwishOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_IfOptions: {
-      auto ptr = reinterpret_cast<const tflite::IfOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_WhileOptions: {
-      auto ptr = reinterpret_cast<const tflite::WhileOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_DepthToSpaceOptions: {
-      auto ptr = reinterpret_cast<const tflite::DepthToSpaceOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_NonMaxSuppressionV4Options: {
-      auto ptr = reinterpret_cast<const tflite::NonMaxSuppressionV4Options *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_NonMaxSuppressionV5Options: {
-      auto ptr = reinterpret_cast<const tflite::NonMaxSuppressionV5Options *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_ScatterNdOptions: {
-      auto ptr = reinterpret_cast<const tflite::ScatterNdOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SelectV2Options: {
-      auto ptr = reinterpret_cast<const tflite::SelectV2Options *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_DensifyOptions: {
-      auto ptr = reinterpret_cast<const tflite::DensifyOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_SegmentSumOptions: {
-      auto ptr = reinterpret_cast<const tflite::SegmentSumOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_BatchMatMulOptions: {
-      auto ptr = reinterpret_cast<const tflite::BatchMatMulOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_CumsumOptions: {
-      auto ptr = reinterpret_cast<const tflite::CumsumOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_CallOnceOptions: {
-      auto ptr = reinterpret_cast<const tflite::CallOnceOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_BroadcastToOptions: {
-      auto ptr = reinterpret_cast<const tflite::BroadcastToOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_Rfft2dOptions: {
-      auto ptr = reinterpret_cast<const tflite::Rfft2dOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_Conv3DOptions: {
-      auto ptr = reinterpret_cast<const tflite::Conv3DOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_HashtableOptions: {
-      auto ptr = reinterpret_cast<const tflite::HashtableOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_HashtableFindOptions: {
-      auto ptr = reinterpret_cast<const tflite::HashtableFindOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_HashtableImportOptions: {
-      auto ptr = reinterpret_cast<const tflite::HashtableImportOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    case BuiltinOptions_HashtableSizeOptions: {
-      auto ptr = reinterpret_cast<const tflite::HashtableSizeOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
-    default: return nullptr;
-  }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::HashtableSizeOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableSizeOptions;
+};
 
-inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher) const {
-  switch (type) {
-    case BuiltinOptions_Conv2DOptions: {
-      auto ptr = reinterpret_cast<const tflite::Conv2DOptionsT *>(value);
-      return CreateConv2DOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_DepthwiseConv2DOptions: {
-      auto ptr = reinterpret_cast<const tflite::DepthwiseConv2DOptionsT *>(value);
-      return CreateDepthwiseConv2DOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ConcatEmbeddingsOptions: {
-      auto ptr = reinterpret_cast<const tflite::ConcatEmbeddingsOptionsT *>(value);
-      return CreateConcatEmbeddingsOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_LSHProjectionOptions: {
-      auto ptr = reinterpret_cast<const tflite::LSHProjectionOptionsT *>(value);
-      return CreateLSHProjectionOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_Pool2DOptions: {
-      auto ptr = reinterpret_cast<const tflite::Pool2DOptionsT *>(value);
-      return CreatePool2DOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SVDFOptions: {
-      auto ptr = reinterpret_cast<const tflite::SVDFOptionsT *>(value);
-      return CreateSVDFOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_RNNOptions: {
-      auto ptr = reinterpret_cast<const tflite::RNNOptionsT *>(value);
-      return CreateRNNOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_FullyConnectedOptions: {
-      auto ptr = reinterpret_cast<const tflite::FullyConnectedOptionsT *>(value);
-      return CreateFullyConnectedOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SoftmaxOptions: {
-      auto ptr = reinterpret_cast<const tflite::SoftmaxOptionsT *>(value);
-      return CreateSoftmaxOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ConcatenationOptions: {
-      auto ptr = reinterpret_cast<const tflite::ConcatenationOptionsT *>(value);
-      return CreateConcatenationOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_AddOptions: {
-      auto ptr = reinterpret_cast<const tflite::AddOptionsT *>(value);
-      return CreateAddOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_L2NormOptions: {
-      auto ptr = reinterpret_cast<const tflite::L2NormOptionsT *>(value);
-      return CreateL2NormOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_LocalResponseNormalizationOptions: {
-      auto ptr = reinterpret_cast<const tflite::LocalResponseNormalizationOptionsT *>(value);
-      return CreateLocalResponseNormalizationOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_LSTMOptions: {
-      auto ptr = reinterpret_cast<const tflite::LSTMOptionsT *>(value);
-      return CreateLSTMOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ResizeBilinearOptions: {
-      auto ptr = reinterpret_cast<const tflite::ResizeBilinearOptionsT *>(value);
-      return CreateResizeBilinearOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_CallOptions: {
-      auto ptr = reinterpret_cast<const tflite::CallOptionsT *>(value);
-      return CreateCallOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ReshapeOptions: {
-      auto ptr = reinterpret_cast<const tflite::ReshapeOptionsT *>(value);
-      return CreateReshapeOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SkipGramOptions: {
-      auto ptr = reinterpret_cast<const tflite::SkipGramOptionsT *>(value);
-      return CreateSkipGramOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SpaceToDepthOptions: {
-      auto ptr = reinterpret_cast<const tflite::SpaceToDepthOptionsT *>(value);
-      return CreateSpaceToDepthOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_EmbeddingLookupSparseOptions: {
-      auto ptr = reinterpret_cast<const tflite::EmbeddingLookupSparseOptionsT *>(value);
-      return CreateEmbeddingLookupSparseOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_MulOptions: {
-      auto ptr = reinterpret_cast<const tflite::MulOptionsT *>(value);
-      return CreateMulOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_PadOptions: {
-      auto ptr = reinterpret_cast<const tflite::PadOptionsT *>(value);
-      return CreatePadOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_GatherOptions: {
-      auto ptr = reinterpret_cast<const tflite::GatherOptionsT *>(value);
-      return CreateGatherOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_BatchToSpaceNDOptions: {
-      auto ptr = reinterpret_cast<const tflite::BatchToSpaceNDOptionsT *>(value);
-      return CreateBatchToSpaceNDOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SpaceToBatchNDOptions: {
-      auto ptr = reinterpret_cast<const tflite::SpaceToBatchNDOptionsT *>(value);
-      return CreateSpaceToBatchNDOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_TransposeOptions: {
-      auto ptr = reinterpret_cast<const tflite::TransposeOptionsT *>(value);
-      return CreateTransposeOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ReducerOptions: {
-      auto ptr = reinterpret_cast<const tflite::ReducerOptionsT *>(value);
-      return CreateReducerOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SubOptions: {
-      auto ptr = reinterpret_cast<const tflite::SubOptionsT *>(value);
-      return CreateSubOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_DivOptions: {
-      auto ptr = reinterpret_cast<const tflite::DivOptionsT *>(value);
-      return CreateDivOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SqueezeOptions: {
-      auto ptr = reinterpret_cast<const tflite::SqueezeOptionsT *>(value);
-      return CreateSqueezeOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SequenceRNNOptions: {
-      auto ptr = reinterpret_cast<const tflite::SequenceRNNOptionsT *>(value);
-      return CreateSequenceRNNOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_StridedSliceOptions: {
-      auto ptr = reinterpret_cast<const tflite::StridedSliceOptionsT *>(value);
-      return CreateStridedSliceOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ExpOptions: {
-      auto ptr = reinterpret_cast<const tflite::ExpOptionsT *>(value);
-      return CreateExpOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_TopKV2Options: {
-      auto ptr = reinterpret_cast<const tflite::TopKV2OptionsT *>(value);
-      return CreateTopKV2Options(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SplitOptions: {
-      auto ptr = reinterpret_cast<const tflite::SplitOptionsT *>(value);
-      return CreateSplitOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_LogSoftmaxOptions: {
-      auto ptr = reinterpret_cast<const tflite::LogSoftmaxOptionsT *>(value);
-      return CreateLogSoftmaxOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_CastOptions: {
-      auto ptr = reinterpret_cast<const tflite::CastOptionsT *>(value);
-      return CreateCastOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_DequantizeOptions: {
-      auto ptr = reinterpret_cast<const tflite::DequantizeOptionsT *>(value);
-      return CreateDequantizeOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_MaximumMinimumOptions: {
-      auto ptr = reinterpret_cast<const tflite::MaximumMinimumOptionsT *>(value);
-      return CreateMaximumMinimumOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ArgMaxOptions: {
-      auto ptr = reinterpret_cast<const tflite::ArgMaxOptionsT *>(value);
-      return CreateArgMaxOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_LessOptions: {
-      auto ptr = reinterpret_cast<const tflite::LessOptionsT *>(value);
-      return CreateLessOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_NegOptions: {
-      auto ptr = reinterpret_cast<const tflite::NegOptionsT *>(value);
-      return CreateNegOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_PadV2Options: {
-      auto ptr = reinterpret_cast<const tflite::PadV2OptionsT *>(value);
-      return CreatePadV2Options(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_GreaterOptions: {
-      auto ptr = reinterpret_cast<const tflite::GreaterOptionsT *>(value);
-      return CreateGreaterOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_GreaterEqualOptions: {
-      auto ptr = reinterpret_cast<const tflite::GreaterEqualOptionsT *>(value);
-      return CreateGreaterEqualOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_LessEqualOptions: {
-      auto ptr = reinterpret_cast<const tflite::LessEqualOptionsT *>(value);
-      return CreateLessEqualOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SelectOptions: {
-      auto ptr = reinterpret_cast<const tflite::SelectOptionsT *>(value);
-      return CreateSelectOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SliceOptions: {
-      auto ptr = reinterpret_cast<const tflite::SliceOptionsT *>(value);
-      return CreateSliceOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_TransposeConvOptions: {
-      auto ptr = reinterpret_cast<const tflite::TransposeConvOptionsT *>(value);
-      return CreateTransposeConvOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SparseToDenseOptions: {
-      auto ptr = reinterpret_cast<const tflite::SparseToDenseOptionsT *>(value);
-      return CreateSparseToDenseOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_TileOptions: {
-      auto ptr = reinterpret_cast<const tflite::TileOptionsT *>(value);
-      return CreateTileOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ExpandDimsOptions: {
-      auto ptr = reinterpret_cast<const tflite::ExpandDimsOptionsT *>(value);
-      return CreateExpandDimsOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_EqualOptions: {
-      auto ptr = reinterpret_cast<const tflite::EqualOptionsT *>(value);
-      return CreateEqualOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_NotEqualOptions: {
-      auto ptr = reinterpret_cast<const tflite::NotEqualOptionsT *>(value);
-      return CreateNotEqualOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ShapeOptions: {
-      auto ptr = reinterpret_cast<const tflite::ShapeOptionsT *>(value);
-      return CreateShapeOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_PowOptions: {
-      auto ptr = reinterpret_cast<const tflite::PowOptionsT *>(value);
-      return CreatePowOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ArgMinOptions: {
-      auto ptr = reinterpret_cast<const tflite::ArgMinOptionsT *>(value);
-      return CreateArgMinOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_FakeQuantOptions: {
-      auto ptr = reinterpret_cast<const tflite::FakeQuantOptionsT *>(value);
-      return CreateFakeQuantOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_PackOptions: {
-      auto ptr = reinterpret_cast<const tflite::PackOptionsT *>(value);
-      return CreatePackOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_LogicalOrOptions: {
-      auto ptr = reinterpret_cast<const tflite::LogicalOrOptionsT *>(value);
-      return CreateLogicalOrOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_OneHotOptions: {
-      auto ptr = reinterpret_cast<const tflite::OneHotOptionsT *>(value);
-      return CreateOneHotOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_LogicalAndOptions: {
-      auto ptr = reinterpret_cast<const tflite::LogicalAndOptionsT *>(value);
-      return CreateLogicalAndOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_LogicalNotOptions: {
-      auto ptr = reinterpret_cast<const tflite::LogicalNotOptionsT *>(value);
-      return CreateLogicalNotOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_UnpackOptions: {
-      auto ptr = reinterpret_cast<const tflite::UnpackOptionsT *>(value);
-      return CreateUnpackOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_FloorDivOptions: {
-      auto ptr = reinterpret_cast<const tflite::FloorDivOptionsT *>(value);
-      return CreateFloorDivOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SquareOptions: {
-      auto ptr = reinterpret_cast<const tflite::SquareOptionsT *>(value);
-      return CreateSquareOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ZerosLikeOptions: {
-      auto ptr = reinterpret_cast<const tflite::ZerosLikeOptionsT *>(value);
-      return CreateZerosLikeOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_FillOptions: {
-      auto ptr = reinterpret_cast<const tflite::FillOptionsT *>(value);
-      return CreateFillOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
-      auto ptr = reinterpret_cast<const tflite::BidirectionalSequenceLSTMOptionsT *>(value);
-      return CreateBidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
-      auto ptr = reinterpret_cast<const tflite::BidirectionalSequenceRNNOptionsT *>(value);
-      return CreateBidirectionalSequenceRNNOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
-      auto ptr = reinterpret_cast<const tflite::UnidirectionalSequenceLSTMOptionsT *>(value);
-      return CreateUnidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_FloorModOptions: {
-      auto ptr = reinterpret_cast<const tflite::FloorModOptionsT *>(value);
-      return CreateFloorModOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_RangeOptions: {
-      auto ptr = reinterpret_cast<const tflite::RangeOptionsT *>(value);
-      return CreateRangeOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ResizeNearestNeighborOptions: {
-      auto ptr = reinterpret_cast<const tflite::ResizeNearestNeighborOptionsT *>(value);
-      return CreateResizeNearestNeighborOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_LeakyReluOptions: {
-      auto ptr = reinterpret_cast<const tflite::LeakyReluOptionsT *>(value);
-      return CreateLeakyReluOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SquaredDifferenceOptions: {
-      auto ptr = reinterpret_cast<const tflite::SquaredDifferenceOptionsT *>(value);
-      return CreateSquaredDifferenceOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_MirrorPadOptions: {
-      auto ptr = reinterpret_cast<const tflite::MirrorPadOptionsT *>(value);
-      return CreateMirrorPadOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_AbsOptions: {
-      auto ptr = reinterpret_cast<const tflite::AbsOptionsT *>(value);
-      return CreateAbsOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SplitVOptions: {
-      auto ptr = reinterpret_cast<const tflite::SplitVOptionsT *>(value);
-      return CreateSplitVOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_UniqueOptions: {
-      auto ptr = reinterpret_cast<const tflite::UniqueOptionsT *>(value);
-      return CreateUniqueOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ReverseV2Options: {
-      auto ptr = reinterpret_cast<const tflite::ReverseV2OptionsT *>(value);
-      return CreateReverseV2Options(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_AddNOptions: {
-      auto ptr = reinterpret_cast<const tflite::AddNOptionsT *>(value);
-      return CreateAddNOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_GatherNdOptions: {
-      auto ptr = reinterpret_cast<const tflite::GatherNdOptionsT *>(value);
-      return CreateGatherNdOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_CosOptions: {
-      auto ptr = reinterpret_cast<const tflite::CosOptionsT *>(value);
-      return CreateCosOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_WhereOptions: {
-      auto ptr = reinterpret_cast<const tflite::WhereOptionsT *>(value);
-      return CreateWhereOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_RankOptions: {
-      auto ptr = reinterpret_cast<const tflite::RankOptionsT *>(value);
-      return CreateRankOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ReverseSequenceOptions: {
-      auto ptr = reinterpret_cast<const tflite::ReverseSequenceOptionsT *>(value);
-      return CreateReverseSequenceOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_MatrixDiagOptions: {
-      auto ptr = reinterpret_cast<const tflite::MatrixDiagOptionsT *>(value);
-      return CreateMatrixDiagOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_QuantizeOptions: {
-      auto ptr = reinterpret_cast<const tflite::QuantizeOptionsT *>(value);
-      return CreateQuantizeOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_MatrixSetDiagOptions: {
-      auto ptr = reinterpret_cast<const tflite::MatrixSetDiagOptionsT *>(value);
-      return CreateMatrixSetDiagOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_HardSwishOptions: {
-      auto ptr = reinterpret_cast<const tflite::HardSwishOptionsT *>(value);
-      return CreateHardSwishOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_IfOptions: {
-      auto ptr = reinterpret_cast<const tflite::IfOptionsT *>(value);
-      return CreateIfOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_WhileOptions: {
-      auto ptr = reinterpret_cast<const tflite::WhileOptionsT *>(value);
-      return CreateWhileOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_DepthToSpaceOptions: {
-      auto ptr = reinterpret_cast<const tflite::DepthToSpaceOptionsT *>(value);
-      return CreateDepthToSpaceOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_NonMaxSuppressionV4Options: {
-      auto ptr = reinterpret_cast<const tflite::NonMaxSuppressionV4OptionsT *>(value);
-      return CreateNonMaxSuppressionV4Options(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_NonMaxSuppressionV5Options: {
-      auto ptr = reinterpret_cast<const tflite::NonMaxSuppressionV5OptionsT *>(value);
-      return CreateNonMaxSuppressionV5Options(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_ScatterNdOptions: {
-      auto ptr = reinterpret_cast<const tflite::ScatterNdOptionsT *>(value);
-      return CreateScatterNdOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SelectV2Options: {
-      auto ptr = reinterpret_cast<const tflite::SelectV2OptionsT *>(value);
-      return CreateSelectV2Options(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_DensifyOptions: {
-      auto ptr = reinterpret_cast<const tflite::DensifyOptionsT *>(value);
-      return CreateDensifyOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_SegmentSumOptions: {
-      auto ptr = reinterpret_cast<const tflite::SegmentSumOptionsT *>(value);
-      return CreateSegmentSumOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_BatchMatMulOptions: {
-      auto ptr = reinterpret_cast<const tflite::BatchMatMulOptionsT *>(value);
-      return CreateBatchMatMulOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_CumsumOptions: {
-      auto ptr = reinterpret_cast<const tflite::CumsumOptionsT *>(value);
-      return CreateCumsumOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_CallOnceOptions: {
-      auto ptr = reinterpret_cast<const tflite::CallOnceOptionsT *>(value);
-      return CreateCallOnceOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_BroadcastToOptions: {
-      auto ptr = reinterpret_cast<const tflite::BroadcastToOptionsT *>(value);
-      return CreateBroadcastToOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_Rfft2dOptions: {
-      auto ptr = reinterpret_cast<const tflite::Rfft2dOptionsT *>(value);
-      return CreateRfft2dOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_Conv3DOptions: {
-      auto ptr = reinterpret_cast<const tflite::Conv3DOptionsT *>(value);
-      return CreateConv3DOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_HashtableOptions: {
-      auto ptr = reinterpret_cast<const tflite::HashtableOptionsT *>(value);
-      return CreateHashtableOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_HashtableFindOptions: {
-      auto ptr = reinterpret_cast<const tflite::HashtableFindOptionsT *>(value);
-      return CreateHashtableFindOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_HashtableImportOptions: {
-      auto ptr = reinterpret_cast<const tflite::HashtableImportOptionsT *>(value);
-      return CreateHashtableImportOptions(_fbb, ptr, _rehasher).Union();
-    }
-    case BuiltinOptions_HashtableSizeOptions: {
-      auto ptr = reinterpret_cast<const tflite::HashtableSizeOptionsT *>(value);
-      return CreateHashtableSizeOptions(_fbb, ptr, _rehasher).Union();
-    }
-    default: return 0;
-  }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::VarHandleOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_VarHandleOptions;
+};
 
-inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FLATBUFFERS_NOEXCEPT : type(u.type), value(nullptr) {
-  switch (type) {
-    case BuiltinOptions_Conv2DOptions: {
-      value = new tflite::Conv2DOptionsT(*reinterpret_cast<tflite::Conv2DOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_DepthwiseConv2DOptions: {
-      value = new tflite::DepthwiseConv2DOptionsT(*reinterpret_cast<tflite::DepthwiseConv2DOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ConcatEmbeddingsOptions: {
-      value = new tflite::ConcatEmbeddingsOptionsT(*reinterpret_cast<tflite::ConcatEmbeddingsOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_LSHProjectionOptions: {
-      value = new tflite::LSHProjectionOptionsT(*reinterpret_cast<tflite::LSHProjectionOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_Pool2DOptions: {
-      value = new tflite::Pool2DOptionsT(*reinterpret_cast<tflite::Pool2DOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SVDFOptions: {
-      value = new tflite::SVDFOptionsT(*reinterpret_cast<tflite::SVDFOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_RNNOptions: {
-      value = new tflite::RNNOptionsT(*reinterpret_cast<tflite::RNNOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_FullyConnectedOptions: {
-      value = new tflite::FullyConnectedOptionsT(*reinterpret_cast<tflite::FullyConnectedOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SoftmaxOptions: {
-      value = new tflite::SoftmaxOptionsT(*reinterpret_cast<tflite::SoftmaxOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ConcatenationOptions: {
-      value = new tflite::ConcatenationOptionsT(*reinterpret_cast<tflite::ConcatenationOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_AddOptions: {
-      value = new tflite::AddOptionsT(*reinterpret_cast<tflite::AddOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_L2NormOptions: {
-      value = new tflite::L2NormOptionsT(*reinterpret_cast<tflite::L2NormOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_LocalResponseNormalizationOptions: {
-      value = new tflite::LocalResponseNormalizationOptionsT(*reinterpret_cast<tflite::LocalResponseNormalizationOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_LSTMOptions: {
-      value = new tflite::LSTMOptionsT(*reinterpret_cast<tflite::LSTMOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ResizeBilinearOptions: {
-      value = new tflite::ResizeBilinearOptionsT(*reinterpret_cast<tflite::ResizeBilinearOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_CallOptions: {
-      value = new tflite::CallOptionsT(*reinterpret_cast<tflite::CallOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ReshapeOptions: {
-      value = new tflite::ReshapeOptionsT(*reinterpret_cast<tflite::ReshapeOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SkipGramOptions: {
-      value = new tflite::SkipGramOptionsT(*reinterpret_cast<tflite::SkipGramOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SpaceToDepthOptions: {
-      value = new tflite::SpaceToDepthOptionsT(*reinterpret_cast<tflite::SpaceToDepthOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_EmbeddingLookupSparseOptions: {
-      value = new tflite::EmbeddingLookupSparseOptionsT(*reinterpret_cast<tflite::EmbeddingLookupSparseOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_MulOptions: {
-      value = new tflite::MulOptionsT(*reinterpret_cast<tflite::MulOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_PadOptions: {
-      value = new tflite::PadOptionsT(*reinterpret_cast<tflite::PadOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_GatherOptions: {
-      value = new tflite::GatherOptionsT(*reinterpret_cast<tflite::GatherOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_BatchToSpaceNDOptions: {
-      value = new tflite::BatchToSpaceNDOptionsT(*reinterpret_cast<tflite::BatchToSpaceNDOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SpaceToBatchNDOptions: {
-      value = new tflite::SpaceToBatchNDOptionsT(*reinterpret_cast<tflite::SpaceToBatchNDOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_TransposeOptions: {
-      value = new tflite::TransposeOptionsT(*reinterpret_cast<tflite::TransposeOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ReducerOptions: {
-      value = new tflite::ReducerOptionsT(*reinterpret_cast<tflite::ReducerOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SubOptions: {
-      value = new tflite::SubOptionsT(*reinterpret_cast<tflite::SubOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_DivOptions: {
-      value = new tflite::DivOptionsT(*reinterpret_cast<tflite::DivOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SqueezeOptions: {
-      value = new tflite::SqueezeOptionsT(*reinterpret_cast<tflite::SqueezeOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SequenceRNNOptions: {
-      value = new tflite::SequenceRNNOptionsT(*reinterpret_cast<tflite::SequenceRNNOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_StridedSliceOptions: {
-      value = new tflite::StridedSliceOptionsT(*reinterpret_cast<tflite::StridedSliceOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ExpOptions: {
-      value = new tflite::ExpOptionsT(*reinterpret_cast<tflite::ExpOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_TopKV2Options: {
-      value = new tflite::TopKV2OptionsT(*reinterpret_cast<tflite::TopKV2OptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SplitOptions: {
-      value = new tflite::SplitOptionsT(*reinterpret_cast<tflite::SplitOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_LogSoftmaxOptions: {
-      value = new tflite::LogSoftmaxOptionsT(*reinterpret_cast<tflite::LogSoftmaxOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_CastOptions: {
-      value = new tflite::CastOptionsT(*reinterpret_cast<tflite::CastOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_DequantizeOptions: {
-      value = new tflite::DequantizeOptionsT(*reinterpret_cast<tflite::DequantizeOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_MaximumMinimumOptions: {
-      value = new tflite::MaximumMinimumOptionsT(*reinterpret_cast<tflite::MaximumMinimumOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ArgMaxOptions: {
-      value = new tflite::ArgMaxOptionsT(*reinterpret_cast<tflite::ArgMaxOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_LessOptions: {
-      value = new tflite::LessOptionsT(*reinterpret_cast<tflite::LessOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_NegOptions: {
-      value = new tflite::NegOptionsT(*reinterpret_cast<tflite::NegOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_PadV2Options: {
-      value = new tflite::PadV2OptionsT(*reinterpret_cast<tflite::PadV2OptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_GreaterOptions: {
-      value = new tflite::GreaterOptionsT(*reinterpret_cast<tflite::GreaterOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_GreaterEqualOptions: {
-      value = new tflite::GreaterEqualOptionsT(*reinterpret_cast<tflite::GreaterEqualOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_LessEqualOptions: {
-      value = new tflite::LessEqualOptionsT(*reinterpret_cast<tflite::LessEqualOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SelectOptions: {
-      value = new tflite::SelectOptionsT(*reinterpret_cast<tflite::SelectOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SliceOptions: {
-      value = new tflite::SliceOptionsT(*reinterpret_cast<tflite::SliceOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_TransposeConvOptions: {
-      value = new tflite::TransposeConvOptionsT(*reinterpret_cast<tflite::TransposeConvOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SparseToDenseOptions: {
-      value = new tflite::SparseToDenseOptionsT(*reinterpret_cast<tflite::SparseToDenseOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_TileOptions: {
-      value = new tflite::TileOptionsT(*reinterpret_cast<tflite::TileOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ExpandDimsOptions: {
-      value = new tflite::ExpandDimsOptionsT(*reinterpret_cast<tflite::ExpandDimsOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_EqualOptions: {
-      value = new tflite::EqualOptionsT(*reinterpret_cast<tflite::EqualOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_NotEqualOptions: {
-      value = new tflite::NotEqualOptionsT(*reinterpret_cast<tflite::NotEqualOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ShapeOptions: {
-      value = new tflite::ShapeOptionsT(*reinterpret_cast<tflite::ShapeOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_PowOptions: {
-      value = new tflite::PowOptionsT(*reinterpret_cast<tflite::PowOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ArgMinOptions: {
-      value = new tflite::ArgMinOptionsT(*reinterpret_cast<tflite::ArgMinOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_FakeQuantOptions: {
-      value = new tflite::FakeQuantOptionsT(*reinterpret_cast<tflite::FakeQuantOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_PackOptions: {
-      value = new tflite::PackOptionsT(*reinterpret_cast<tflite::PackOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_LogicalOrOptions: {
-      value = new tflite::LogicalOrOptionsT(*reinterpret_cast<tflite::LogicalOrOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_OneHotOptions: {
-      value = new tflite::OneHotOptionsT(*reinterpret_cast<tflite::OneHotOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_LogicalAndOptions: {
-      value = new tflite::LogicalAndOptionsT(*reinterpret_cast<tflite::LogicalAndOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_LogicalNotOptions: {
-      value = new tflite::LogicalNotOptionsT(*reinterpret_cast<tflite::LogicalNotOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_UnpackOptions: {
-      value = new tflite::UnpackOptionsT(*reinterpret_cast<tflite::UnpackOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_FloorDivOptions: {
-      value = new tflite::FloorDivOptionsT(*reinterpret_cast<tflite::FloorDivOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SquareOptions: {
-      value = new tflite::SquareOptionsT(*reinterpret_cast<tflite::SquareOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ZerosLikeOptions: {
-      value = new tflite::ZerosLikeOptionsT(*reinterpret_cast<tflite::ZerosLikeOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_FillOptions: {
-      value = new tflite::FillOptionsT(*reinterpret_cast<tflite::FillOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
-      value = new tflite::BidirectionalSequenceLSTMOptionsT(*reinterpret_cast<tflite::BidirectionalSequenceLSTMOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
-      value = new tflite::BidirectionalSequenceRNNOptionsT(*reinterpret_cast<tflite::BidirectionalSequenceRNNOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
-      value = new tflite::UnidirectionalSequenceLSTMOptionsT(*reinterpret_cast<tflite::UnidirectionalSequenceLSTMOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_FloorModOptions: {
-      value = new tflite::FloorModOptionsT(*reinterpret_cast<tflite::FloorModOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_RangeOptions: {
-      value = new tflite::RangeOptionsT(*reinterpret_cast<tflite::RangeOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ResizeNearestNeighborOptions: {
-      value = new tflite::ResizeNearestNeighborOptionsT(*reinterpret_cast<tflite::ResizeNearestNeighborOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_LeakyReluOptions: {
-      value = new tflite::LeakyReluOptionsT(*reinterpret_cast<tflite::LeakyReluOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SquaredDifferenceOptions: {
-      value = new tflite::SquaredDifferenceOptionsT(*reinterpret_cast<tflite::SquaredDifferenceOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_MirrorPadOptions: {
-      value = new tflite::MirrorPadOptionsT(*reinterpret_cast<tflite::MirrorPadOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_AbsOptions: {
-      value = new tflite::AbsOptionsT(*reinterpret_cast<tflite::AbsOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SplitVOptions: {
-      value = new tflite::SplitVOptionsT(*reinterpret_cast<tflite::SplitVOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_UniqueOptions: {
-      value = new tflite::UniqueOptionsT(*reinterpret_cast<tflite::UniqueOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ReverseV2Options: {
-      value = new tflite::ReverseV2OptionsT(*reinterpret_cast<tflite::ReverseV2OptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_AddNOptions: {
-      value = new tflite::AddNOptionsT(*reinterpret_cast<tflite::AddNOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_GatherNdOptions: {
-      value = new tflite::GatherNdOptionsT(*reinterpret_cast<tflite::GatherNdOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_CosOptions: {
-      value = new tflite::CosOptionsT(*reinterpret_cast<tflite::CosOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_WhereOptions: {
-      value = new tflite::WhereOptionsT(*reinterpret_cast<tflite::WhereOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_RankOptions: {
-      value = new tflite::RankOptionsT(*reinterpret_cast<tflite::RankOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ReverseSequenceOptions: {
-      value = new tflite::ReverseSequenceOptionsT(*reinterpret_cast<tflite::ReverseSequenceOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_MatrixDiagOptions: {
-      value = new tflite::MatrixDiagOptionsT(*reinterpret_cast<tflite::MatrixDiagOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_QuantizeOptions: {
-      value = new tflite::QuantizeOptionsT(*reinterpret_cast<tflite::QuantizeOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_MatrixSetDiagOptions: {
-      value = new tflite::MatrixSetDiagOptionsT(*reinterpret_cast<tflite::MatrixSetDiagOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_HardSwishOptions: {
-      value = new tflite::HardSwishOptionsT(*reinterpret_cast<tflite::HardSwishOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_IfOptions: {
-      value = new tflite::IfOptionsT(*reinterpret_cast<tflite::IfOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_WhileOptions: {
-      value = new tflite::WhileOptionsT(*reinterpret_cast<tflite::WhileOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_DepthToSpaceOptions: {
-      value = new tflite::DepthToSpaceOptionsT(*reinterpret_cast<tflite::DepthToSpaceOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_NonMaxSuppressionV4Options: {
-      value = new tflite::NonMaxSuppressionV4OptionsT(*reinterpret_cast<tflite::NonMaxSuppressionV4OptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_NonMaxSuppressionV5Options: {
-      value = new tflite::NonMaxSuppressionV5OptionsT(*reinterpret_cast<tflite::NonMaxSuppressionV5OptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_ScatterNdOptions: {
-      value = new tflite::ScatterNdOptionsT(*reinterpret_cast<tflite::ScatterNdOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SelectV2Options: {
-      value = new tflite::SelectV2OptionsT(*reinterpret_cast<tflite::SelectV2OptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_DensifyOptions: {
-      value = new tflite::DensifyOptionsT(*reinterpret_cast<tflite::DensifyOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_SegmentSumOptions: {
-      value = new tflite::SegmentSumOptionsT(*reinterpret_cast<tflite::SegmentSumOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_BatchMatMulOptions: {
-      value = new tflite::BatchMatMulOptionsT(*reinterpret_cast<tflite::BatchMatMulOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_CumsumOptions: {
-      value = new tflite::CumsumOptionsT(*reinterpret_cast<tflite::CumsumOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_CallOnceOptions: {
-      value = new tflite::CallOnceOptionsT(*reinterpret_cast<tflite::CallOnceOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_BroadcastToOptions: {
-      value = new tflite::BroadcastToOptionsT(*reinterpret_cast<tflite::BroadcastToOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_Rfft2dOptions: {
-      value = new tflite::Rfft2dOptionsT(*reinterpret_cast<tflite::Rfft2dOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_Conv3DOptions: {
-      value = new tflite::Conv3DOptionsT(*reinterpret_cast<tflite::Conv3DOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_HashtableOptions: {
-      value = new tflite::HashtableOptionsT(*reinterpret_cast<tflite::HashtableOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_HashtableFindOptions: {
-      value = new tflite::HashtableFindOptionsT(*reinterpret_cast<tflite::HashtableFindOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_HashtableImportOptions: {
-      value = new tflite::HashtableImportOptionsT(*reinterpret_cast<tflite::HashtableImportOptionsT *>(u.value));
-      break;
-    }
-    case BuiltinOptions_HashtableSizeOptions: {
-      value = new tflite::HashtableSizeOptionsT(*reinterpret_cast<tflite::HashtableSizeOptionsT *>(u.value));
-      break;
-    }
-    default:
-      break;
-  }
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::ReadVariableOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ReadVariableOptions;
+};
 
-inline void BuiltinOptionsUnion::Reset() {
-  switch (type) {
-    case BuiltinOptions_Conv2DOptions: {
-      auto ptr = reinterpret_cast<tflite::Conv2DOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_DepthwiseConv2DOptions: {
-      auto ptr = reinterpret_cast<tflite::DepthwiseConv2DOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ConcatEmbeddingsOptions: {
-      auto ptr = reinterpret_cast<tflite::ConcatEmbeddingsOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_LSHProjectionOptions: {
-      auto ptr = reinterpret_cast<tflite::LSHProjectionOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_Pool2DOptions: {
-      auto ptr = reinterpret_cast<tflite::Pool2DOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SVDFOptions: {
-      auto ptr = reinterpret_cast<tflite::SVDFOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_RNNOptions: {
-      auto ptr = reinterpret_cast<tflite::RNNOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_FullyConnectedOptions: {
-      auto ptr = reinterpret_cast<tflite::FullyConnectedOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SoftmaxOptions: {
-      auto ptr = reinterpret_cast<tflite::SoftmaxOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ConcatenationOptions: {
-      auto ptr = reinterpret_cast<tflite::ConcatenationOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_AddOptions: {
-      auto ptr = reinterpret_cast<tflite::AddOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_L2NormOptions: {
-      auto ptr = reinterpret_cast<tflite::L2NormOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_LocalResponseNormalizationOptions: {
-      auto ptr = reinterpret_cast<tflite::LocalResponseNormalizationOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_LSTMOptions: {
-      auto ptr = reinterpret_cast<tflite::LSTMOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ResizeBilinearOptions: {
-      auto ptr = reinterpret_cast<tflite::ResizeBilinearOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_CallOptions: {
-      auto ptr = reinterpret_cast<tflite::CallOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ReshapeOptions: {
-      auto ptr = reinterpret_cast<tflite::ReshapeOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SkipGramOptions: {
-      auto ptr = reinterpret_cast<tflite::SkipGramOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SpaceToDepthOptions: {
-      auto ptr = reinterpret_cast<tflite::SpaceToDepthOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_EmbeddingLookupSparseOptions: {
-      auto ptr = reinterpret_cast<tflite::EmbeddingLookupSparseOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_MulOptions: {
-      auto ptr = reinterpret_cast<tflite::MulOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_PadOptions: {
-      auto ptr = reinterpret_cast<tflite::PadOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_GatherOptions: {
-      auto ptr = reinterpret_cast<tflite::GatherOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_BatchToSpaceNDOptions: {
-      auto ptr = reinterpret_cast<tflite::BatchToSpaceNDOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SpaceToBatchNDOptions: {
-      auto ptr = reinterpret_cast<tflite::SpaceToBatchNDOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_TransposeOptions: {
-      auto ptr = reinterpret_cast<tflite::TransposeOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ReducerOptions: {
-      auto ptr = reinterpret_cast<tflite::ReducerOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SubOptions: {
-      auto ptr = reinterpret_cast<tflite::SubOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_DivOptions: {
-      auto ptr = reinterpret_cast<tflite::DivOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SqueezeOptions: {
-      auto ptr = reinterpret_cast<tflite::SqueezeOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SequenceRNNOptions: {
-      auto ptr = reinterpret_cast<tflite::SequenceRNNOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_StridedSliceOptions: {
-      auto ptr = reinterpret_cast<tflite::StridedSliceOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ExpOptions: {
-      auto ptr = reinterpret_cast<tflite::ExpOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_TopKV2Options: {
-      auto ptr = reinterpret_cast<tflite::TopKV2OptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SplitOptions: {
-      auto ptr = reinterpret_cast<tflite::SplitOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_LogSoftmaxOptions: {
-      auto ptr = reinterpret_cast<tflite::LogSoftmaxOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_CastOptions: {
-      auto ptr = reinterpret_cast<tflite::CastOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_DequantizeOptions: {
-      auto ptr = reinterpret_cast<tflite::DequantizeOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_MaximumMinimumOptions: {
-      auto ptr = reinterpret_cast<tflite::MaximumMinimumOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ArgMaxOptions: {
-      auto ptr = reinterpret_cast<tflite::ArgMaxOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_LessOptions: {
-      auto ptr = reinterpret_cast<tflite::LessOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_NegOptions: {
-      auto ptr = reinterpret_cast<tflite::NegOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_PadV2Options: {
-      auto ptr = reinterpret_cast<tflite::PadV2OptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_GreaterOptions: {
-      auto ptr = reinterpret_cast<tflite::GreaterOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_GreaterEqualOptions: {
-      auto ptr = reinterpret_cast<tflite::GreaterEqualOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_LessEqualOptions: {
-      auto ptr = reinterpret_cast<tflite::LessEqualOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SelectOptions: {
-      auto ptr = reinterpret_cast<tflite::SelectOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SliceOptions: {
-      auto ptr = reinterpret_cast<tflite::SliceOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_TransposeConvOptions: {
-      auto ptr = reinterpret_cast<tflite::TransposeConvOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SparseToDenseOptions: {
-      auto ptr = reinterpret_cast<tflite::SparseToDenseOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_TileOptions: {
-      auto ptr = reinterpret_cast<tflite::TileOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ExpandDimsOptions: {
-      auto ptr = reinterpret_cast<tflite::ExpandDimsOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_EqualOptions: {
-      auto ptr = reinterpret_cast<tflite::EqualOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_NotEqualOptions: {
-      auto ptr = reinterpret_cast<tflite::NotEqualOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ShapeOptions: {
-      auto ptr = reinterpret_cast<tflite::ShapeOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_PowOptions: {
-      auto ptr = reinterpret_cast<tflite::PowOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ArgMinOptions: {
-      auto ptr = reinterpret_cast<tflite::ArgMinOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_FakeQuantOptions: {
-      auto ptr = reinterpret_cast<tflite::FakeQuantOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_PackOptions: {
-      auto ptr = reinterpret_cast<tflite::PackOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_LogicalOrOptions: {
-      auto ptr = reinterpret_cast<tflite::LogicalOrOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_OneHotOptions: {
-      auto ptr = reinterpret_cast<tflite::OneHotOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_LogicalAndOptions: {
-      auto ptr = reinterpret_cast<tflite::LogicalAndOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_LogicalNotOptions: {
-      auto ptr = reinterpret_cast<tflite::LogicalNotOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_UnpackOptions: {
-      auto ptr = reinterpret_cast<tflite::UnpackOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_FloorDivOptions: {
-      auto ptr = reinterpret_cast<tflite::FloorDivOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SquareOptions: {
-      auto ptr = reinterpret_cast<tflite::SquareOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ZerosLikeOptions: {
-      auto ptr = reinterpret_cast<tflite::ZerosLikeOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_FillOptions: {
-      auto ptr = reinterpret_cast<tflite::FillOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
-      auto ptr = reinterpret_cast<tflite::BidirectionalSequenceLSTMOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
-      auto ptr = reinterpret_cast<tflite::BidirectionalSequenceRNNOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
-      auto ptr = reinterpret_cast<tflite::UnidirectionalSequenceLSTMOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_FloorModOptions: {
-      auto ptr = reinterpret_cast<tflite::FloorModOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_RangeOptions: {
-      auto ptr = reinterpret_cast<tflite::RangeOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ResizeNearestNeighborOptions: {
-      auto ptr = reinterpret_cast<tflite::ResizeNearestNeighborOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_LeakyReluOptions: {
-      auto ptr = reinterpret_cast<tflite::LeakyReluOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SquaredDifferenceOptions: {
-      auto ptr = reinterpret_cast<tflite::SquaredDifferenceOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_MirrorPadOptions: {
-      auto ptr = reinterpret_cast<tflite::MirrorPadOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_AbsOptions: {
-      auto ptr = reinterpret_cast<tflite::AbsOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SplitVOptions: {
-      auto ptr = reinterpret_cast<tflite::SplitVOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_UniqueOptions: {
-      auto ptr = reinterpret_cast<tflite::UniqueOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ReverseV2Options: {
-      auto ptr = reinterpret_cast<tflite::ReverseV2OptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_AddNOptions: {
-      auto ptr = reinterpret_cast<tflite::AddNOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_GatherNdOptions: {
-      auto ptr = reinterpret_cast<tflite::GatherNdOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_CosOptions: {
-      auto ptr = reinterpret_cast<tflite::CosOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_WhereOptions: {
-      auto ptr = reinterpret_cast<tflite::WhereOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_RankOptions: {
-      auto ptr = reinterpret_cast<tflite::RankOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ReverseSequenceOptions: {
-      auto ptr = reinterpret_cast<tflite::ReverseSequenceOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_MatrixDiagOptions: {
-      auto ptr = reinterpret_cast<tflite::MatrixDiagOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_QuantizeOptions: {
-      auto ptr = reinterpret_cast<tflite::QuantizeOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_MatrixSetDiagOptions: {
-      auto ptr = reinterpret_cast<tflite::MatrixSetDiagOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_HardSwishOptions: {
-      auto ptr = reinterpret_cast<tflite::HardSwishOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_IfOptions: {
-      auto ptr = reinterpret_cast<tflite::IfOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_WhileOptions: {
-      auto ptr = reinterpret_cast<tflite::WhileOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_DepthToSpaceOptions: {
-      auto ptr = reinterpret_cast<tflite::DepthToSpaceOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_NonMaxSuppressionV4Options: {
-      auto ptr = reinterpret_cast<tflite::NonMaxSuppressionV4OptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_NonMaxSuppressionV5Options: {
-      auto ptr = reinterpret_cast<tflite::NonMaxSuppressionV5OptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_ScatterNdOptions: {
-      auto ptr = reinterpret_cast<tflite::ScatterNdOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SelectV2Options: {
-      auto ptr = reinterpret_cast<tflite::SelectV2OptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_DensifyOptions: {
-      auto ptr = reinterpret_cast<tflite::DensifyOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_SegmentSumOptions: {
-      auto ptr = reinterpret_cast<tflite::SegmentSumOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_BatchMatMulOptions: {
-      auto ptr = reinterpret_cast<tflite::BatchMatMulOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_CumsumOptions: {
-      auto ptr = reinterpret_cast<tflite::CumsumOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_CallOnceOptions: {
-      auto ptr = reinterpret_cast<tflite::CallOnceOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_BroadcastToOptions: {
-      auto ptr = reinterpret_cast<tflite::BroadcastToOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_Rfft2dOptions: {
-      auto ptr = reinterpret_cast<tflite::Rfft2dOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_Conv3DOptions: {
-      auto ptr = reinterpret_cast<tflite::Conv3DOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_HashtableOptions: {
-      auto ptr = reinterpret_cast<tflite::HashtableOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_HashtableFindOptions: {
-      auto ptr = reinterpret_cast<tflite::HashtableFindOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_HashtableImportOptions: {
-      auto ptr = reinterpret_cast<tflite::HashtableImportOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    case BuiltinOptions_HashtableSizeOptions: {
-      auto ptr = reinterpret_cast<tflite::HashtableSizeOptionsT *>(value);
-      delete ptr;
-      break;
-    }
-    default: break;
-  }
-  value = nullptr;
-  type = BuiltinOptions_NONE;
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::AssignVariableOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions;
+};
 
-inline const tflite::Model *GetModel(const void *buf) {
-  return flatbuffers::GetRoot<tflite::Model>(buf);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::RandomOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_RandomOptions;
+};
 
-inline const tflite::Model *GetSizePrefixedModel(const void *buf) {
-  return flatbuffers::GetSizePrefixedRoot<tflite::Model>(buf);
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::BucketizeOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_BucketizeOptions;
+};
 
-inline const char *ModelIdentifier() {
-  return "TFL3";
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::GeluOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_GeluOptions;
+};
 
-inline bool ModelBufferHasIdentifier(const void *buf) {
-  return flatbuffers::BufferHasIdentifier(
-      buf, ModelIdentifier());
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::DynamicUpdateSliceOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_DynamicUpdateSliceOptions;
+};
 
-inline bool VerifyModelBuffer(
-    flatbuffers::Verifier &verifier) {
-  return verifier.VerifyBuffer<tflite::Model>(ModelIdentifier());
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::UnsortedSegmentProdOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentProdOptions;
+};
 
-inline bool VerifySizePrefixedModelBuffer(
-    flatbuffers::Verifier &verifier) {
-  return verifier.VerifySizePrefixedBuffer<tflite::Model>(ModelIdentifier());
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::UnsortedSegmentMaxOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentMaxOptions;
+};
 
-inline const char *ModelExtension() {
-  return "tflite";
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::UnsortedSegmentMinOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentMinOptions;
+};
 
-inline void FinishModelBuffer(
-    flatbuffers::FlatBufferBuilder &fbb,
-    flatbuffers::Offset<tflite::Model> root) {
-  fbb.Finish(root, ModelIdentifier());
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::UnsortedSegmentSumOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentSumOptions;
+};
 
-inline void FinishSizePrefixedModelBuffer(
-    flatbuffers::FlatBufferBuilder &fbb,
-    flatbuffers::Offset<tflite::Model> root) {
-  fbb.FinishSizePrefixed(root, ModelIdentifier());
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::ATan2OptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_ATan2Options;
+};
 
-inline std::unique_ptr<tflite::ModelT> UnPackModel(
-    const void *buf,
-    const flatbuffers::resolver_function_t *res = nullptr) {
-  return std::unique_ptr<tflite::ModelT>(GetModel(buf)->UnPack(res));
-}
+template<> struct BuiltinOptionsUnionTraits<tflite::SignOptionsT> {
+  static const BuiltinOptions enum_value = BuiltinOptions_SignOptions;
+};
 
-inline std::unique_ptr<tflite::ModelT> UnPackSizePrefixedModel(
-    const void *buf,
-    const flatbuffers::resolver_function_t *res = nullptr) {
-  return std::unique_ptr<tflite::ModelT>(GetSizePrefixedModel(buf)->UnPack(res));
-}
+struct OperatorCodeT : public flatbuffers::NativeTable {
+  typedef OperatorCode TableType;
+  int8_t deprecated_builtin_code = 0;
+  std::string custom_code{};
+  int32_t version = 1;
+  tflite::BuiltinOperator builtin_code = tflite::BuiltinOperator_ADD;
+};
 
-}  // namespace tflite
+struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef OperatorCodeT NativeTableType;
+  typedef OperatorCodeBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_DEPRECATED_BUILTIN_CODE = 4,
+    VT_CUSTOM_CODE = 6,
+    VT_VERSION = 8,
+    VT_BUILTIN_CODE = 10
+  };
+  int8_t deprecated_builtin_code() const {
+    return GetField<int8_t>(VT_DEPRECATED_BUILTIN_CODE, 0);
+  }
+  const flatbuffers::String *custom_code() const {
+    return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
+  }
+  int32_t version() const {
+    return GetField<int32_t>(VT_VERSION, 1);
+  }
+  tflite::BuiltinOperator builtin_code() const {
+    return static_cast<tflite::BuiltinOperator>(GetField<int32_t>(VT_BUILTIN_CODE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_DEPRECATED_BUILTIN_CODE, 1) &&
+           VerifyOffset(verifier, VT_CUSTOM_CODE) &&
+           verifier.VerifyString(custom_code()) &&
+           VerifyField<int32_t>(verifier, VT_VERSION, 4) &&
+           VerifyField<int32_t>(verifier, VT_BUILTIN_CODE, 4) &&
+           verifier.EndTable();
+  }
+  OperatorCodeT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(OperatorCodeT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<OperatorCode> Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
 
-#endif  // FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_
+}
+#endif  // FLATBUFFERS_GENERATED_SCHEMA_SUPPL_TFLITE_H_
\ No newline at end of file
diff --git a/edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h b/edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h
new file mode 100644
index 0000000..aaa2252
--- /dev/null
+++ b/edge-impulse-sdk/tensorflow/lite/schema/schema_generated_full.h
@@ -0,0 +1,17601 @@
+// automatically generated by the FlatBuffers compiler, do not modify
+
+
+#ifndef FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_
+#define FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h"
+
+// Ensure the included flatbuffers.h is the same version as when this file was
+// generated, otherwise it may not be compatible.
+static_assert(FLATBUFFERS_VERSION_MAJOR == 2 &&
+              FLATBUFFERS_VERSION_MINOR == 0 &&
+              FLATBUFFERS_VERSION_REVISION == 6,
+             "Non-compatible flatbuffers version included");
+
+namespace tflite {
+
+enum QuantizationDetails : uint8_t {
+  QuantizationDetails_NONE = 0,
+  QuantizationDetails_CustomQuantization = 1,
+  QuantizationDetails_MIN = QuantizationDetails_NONE,
+  QuantizationDetails_MAX = QuantizationDetails_CustomQuantization
+};
+
+inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2] {
+  static const QuantizationDetails values[] = {
+    QuantizationDetails_NONE,
+    QuantizationDetails_CustomQuantization
+  };
+  return values;
+}
+
+inline const char * const *EnumNamesQuantizationDetails() {
+  static const char * const names[3] = {
+    "NONE",
+    "CustomQuantization",
+    nullptr
+  };
+  return names;
+}
+
+inline const char *EnumNameQuantizationDetails(QuantizationDetails e) {
+  if (flatbuffers::IsOutRange(e, QuantizationDetails_NONE, QuantizationDetails_CustomQuantization)) return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesQuantizationDetails()[index];
+}
+
+template<typename T> struct QuantizationDetailsTraits {
+  static const QuantizationDetails enum_value = QuantizationDetails_NONE;
+};
+
+template<> struct QuantizationDetailsTraits<tflite::CustomQuantization> {
+  static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization;
+};
+
+template<typename T> struct QuantizationDetailsUnionTraits {
+  static const QuantizationDetails enum_value = QuantizationDetails_NONE;
+};
+
+template<> struct QuantizationDetailsUnionTraits<tflite::CustomQuantizationT> {
+  static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization;
+};
+
+struct QuantizationDetailsUnion {
+  QuantizationDetails type;
+  void *value;
+
+  QuantizationDetailsUnion() : type(QuantizationDetails_NONE), value(nullptr) {}
+  QuantizationDetailsUnion(QuantizationDetailsUnion&& u) FLATBUFFERS_NOEXCEPT :
+    type(QuantizationDetails_NONE), value(nullptr)
+    { std::swap(type, u.type); std::swap(value, u.value); }
+  QuantizationDetailsUnion(const QuantizationDetailsUnion &);
+  QuantizationDetailsUnion &operator=(const QuantizationDetailsUnion &u)
+    { QuantizationDetailsUnion t(u); std::swap(type, t.type); std::swap(value, t.value); return *this; }
+  QuantizationDetailsUnion &operator=(QuantizationDetailsUnion &&u) FLATBUFFERS_NOEXCEPT
+    { std::swap(type, u.type); std::swap(value, u.value); return *this; }
+  ~QuantizationDetailsUnion() { Reset(); }
+
+  void Reset();
+
+  template <typename T>
+  void Set(T&& val) {
+    typedef typename std::remove_reference<T>::type RT;
+    Reset();
+    type = QuantizationDetailsUnionTraits<RT>::enum_value;
+    if (type != QuantizationDetails_NONE) {
+      value = new RT(std::forward<T>(val));
+    }
+  }
+
+  static void *UnPack(const void *obj, QuantizationDetails type, const flatbuffers::resolver_function_t *resolver);
+  flatbuffers::Offset<void> Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher = nullptr) const;
+
+  tflite::CustomQuantizationT *AsCustomQuantization() {
+    return type == QuantizationDetails_CustomQuantization ?
+      reinterpret_cast<tflite::CustomQuantizationT *>(value) : nullptr;
+  }
+  const tflite::CustomQuantizationT *AsCustomQuantization() const {
+    return type == QuantizationDetails_CustomQuantization ?
+      reinterpret_cast<const tflite::CustomQuantizationT *>(value) : nullptr;
+  }
+};
+
+bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, QuantizationDetails type);
+bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
+
+enum DimensionType : int8_t {
+  DimensionType_DENSE = 0,
+  DimensionType_SPARSE_CSR = 1,
+  DimensionType_MIN = DimensionType_DENSE,
+  DimensionType_MAX = DimensionType_SPARSE_CSR
+};
+
+inline const DimensionType (&EnumValuesDimensionType())[2] {
+  static const DimensionType values[] = {
+    DimensionType_DENSE,
+    DimensionType_SPARSE_CSR
+  };
+  return values;
+}
+
+inline const char * const *EnumNamesDimensionType() {
+  static const char * const names[3] = {
+    "DENSE",
+    "SPARSE_CSR",
+    nullptr
+  };
+  return names;
+}
+
+inline const char *EnumNameDimensionType(DimensionType e) {
+  if (flatbuffers::IsOutRange(e, DimensionType_DENSE, DimensionType_SPARSE_CSR)) return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesDimensionType()[index];
+}
+
+enum SparseIndexVector : uint8_t {
+  SparseIndexVector_NONE = 0,
+  SparseIndexVector_Int32Vector = 1,
+  SparseIndexVector_Uint16Vector = 2,
+  SparseIndexVector_Uint8Vector = 3,
+  SparseIndexVector_MIN = SparseIndexVector_NONE,
+  SparseIndexVector_MAX = SparseIndexVector_Uint8Vector
+};
+
+inline const SparseIndexVector (&EnumValuesSparseIndexVector())[4] {
+  static const SparseIndexVector values[] = {
+    SparseIndexVector_NONE,
+    SparseIndexVector_Int32Vector,
+    SparseIndexVector_Uint16Vector,
+    SparseIndexVector_Uint8Vector
+  };
+  return values;
+}
+
+inline const char * const *EnumNamesSparseIndexVector() {
+  static const char * const names[5] = {
+    "NONE",
+    "Int32Vector",
+    "Uint16Vector",
+    "Uint8Vector",
+    nullptr
+  };
+  return names;
+}
+
+inline const char *EnumNameSparseIndexVector(SparseIndexVector e) {
+  if (flatbuffers::IsOutRange(e, SparseIndexVector_NONE, SparseIndexVector_Uint8Vector)) return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesSparseIndexVector()[index];
+}
+
+template<typename T> struct SparseIndexVectorTraits {
+  static const SparseIndexVector enum_value = SparseIndexVector_NONE;
+};
+
+template<> struct SparseIndexVectorTraits<tflite::Int32Vector> {
+  static const SparseIndexVector enum_value = SparseIndexVector_Int32Vector;
+};
+
+template<> struct SparseIndexVectorTraits<tflite::Uint16Vector> {
+  static const SparseIndexVector enum_value = SparseIndexVector_Uint16Vector;
+};
+
+template<> struct SparseIndexVectorTraits<tflite::Uint8Vector> {
+  static const SparseIndexVector enum_value = SparseIndexVector_Uint8Vector;
+};
+
+template<typename T> struct SparseIndexVectorUnionTraits {
+  static const SparseIndexVector enum_value = SparseIndexVector_NONE;
+};
+
+template<> struct SparseIndexVectorUnionTraits<tflite::Int32VectorT> {
+  static const SparseIndexVector enum_value = SparseIndexVector_Int32Vector;
+};
+
+template<> struct SparseIndexVectorUnionTraits<tflite::Uint16VectorT> {
+  static const SparseIndexVector enum_value = SparseIndexVector_Uint16Vector;
+};
+
+template<> struct SparseIndexVectorUnionTraits<tflite::Uint8VectorT> {
+  static const SparseIndexVector enum_value = SparseIndexVector_Uint8Vector;
+};
+
+struct SparseIndexVectorUnion {
+  SparseIndexVector type;
+  void *value;
+
+  SparseIndexVectorUnion() : type(SparseIndexVector_NONE), value(nullptr) {}
+  SparseIndexVectorUnion(SparseIndexVectorUnion&& u) FLATBUFFERS_NOEXCEPT :
+    type(SparseIndexVector_NONE), value(nullptr)
+    { std::swap(type, u.type); std::swap(value, u.value); }
+  SparseIndexVectorUnion(const SparseIndexVectorUnion &);
+  SparseIndexVectorUnion &operator=(const SparseIndexVectorUnion &u)
+    { SparseIndexVectorUnion t(u); std::swap(type, t.type); std::swap(value, t.value); return *this; }
+  SparseIndexVectorUnion &operator=(SparseIndexVectorUnion &&u) FLATBUFFERS_NOEXCEPT
+    { std::swap(type, u.type); std::swap(value, u.value); return *this; }
+  ~SparseIndexVectorUnion() { Reset(); }
+
+  void Reset();
+
+  template <typename T>
+  void Set(T&& val) {
+    typedef typename std::remove_reference<T>::type RT;
+    Reset();
+    type = SparseIndexVectorUnionTraits<RT>::enum_value;
+    if (type != SparseIndexVector_NONE) {
+      value = new RT(std::forward<T>(val));
+    }
+  }
+
+  static void *UnPack(const void *obj, SparseIndexVector type, const flatbuffers::resolver_function_t *resolver);
+  flatbuffers::Offset<void> Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher = nullptr) const;
+
+  tflite::Int32VectorT *AsInt32Vector() {
+    return type == SparseIndexVector_Int32Vector ?
+      reinterpret_cast<tflite::Int32VectorT *>(value) : nullptr;
+  }
+  const tflite::Int32VectorT *AsInt32Vector() const {
+    return type == SparseIndexVector_Int32Vector ?
+      reinterpret_cast<const tflite::Int32VectorT *>(value) : nullptr;
+  }
+  tflite::Uint16VectorT *AsUint16Vector() {
+    return type == SparseIndexVector_Uint16Vector ?
+      reinterpret_cast<tflite::Uint16VectorT *>(value) : nullptr;
+  }
+  const tflite::Uint16VectorT *AsUint16Vector() const {
+    return type == SparseIndexVector_Uint16Vector ?
+      reinterpret_cast<const tflite::Uint16VectorT *>(value) : nullptr;
+  }
+  tflite::Uint8VectorT *AsUint8Vector() {
+    return type == SparseIndexVector_Uint8Vector ?
+      reinterpret_cast<tflite::Uint8VectorT *>(value) : nullptr;
+  }
+  const tflite::Uint8VectorT *AsUint8Vector() const {
+    return type == SparseIndexVector_Uint8Vector ?
+      reinterpret_cast<const tflite::Uint8VectorT *>(value) : nullptr;
+  }
+};
+
+bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void *obj, SparseIndexVector type);
+bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
+
+struct BuiltinOptionsUnion {
+  BuiltinOptions type;
+  void *value;
+
+  BuiltinOptionsUnion() : type(BuiltinOptions_NONE), value(nullptr) {}
+  BuiltinOptionsUnion(BuiltinOptionsUnion&& u) FLATBUFFERS_NOEXCEPT :
+    type(BuiltinOptions_NONE), value(nullptr)
+    { std::swap(type, u.type); std::swap(value, u.value); }
+  BuiltinOptionsUnion(const BuiltinOptionsUnion &);
+  BuiltinOptionsUnion &operator=(const BuiltinOptionsUnion &u)
+    { BuiltinOptionsUnion t(u); std::swap(type, t.type); std::swap(value, t.value); return *this; }
+  BuiltinOptionsUnion &operator=(BuiltinOptionsUnion &&u) FLATBUFFERS_NOEXCEPT
+    { std::swap(type, u.type); std::swap(value, u.value); return *this; }
+  ~BuiltinOptionsUnion() { Reset(); }
+
+  void Reset();
+
+  template <typename T>
+  void Set(T&& val) {
+    typedef typename std::remove_reference<T>::type RT;
+    Reset();
+    type = BuiltinOptionsUnionTraits<RT>::enum_value;
+    if (type != BuiltinOptions_NONE) {
+      value = new RT(std::forward<T>(val));
+    }
+  }
+
+  static void *UnPack(const void *obj, BuiltinOptions type, const flatbuffers::resolver_function_t *resolver);
+  flatbuffers::Offset<void> Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher = nullptr) const;
+
+  tflite::Conv2DOptionsT *AsConv2DOptions() {
+    return type == BuiltinOptions_Conv2DOptions ?
+      reinterpret_cast<tflite::Conv2DOptionsT *>(value) : nullptr;
+  }
+  const tflite::Conv2DOptionsT *AsConv2DOptions() const {
+    return type == BuiltinOptions_Conv2DOptions ?
+      reinterpret_cast<const tflite::Conv2DOptionsT *>(value) : nullptr;
+  }
+  tflite::DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() {
+    return type == BuiltinOptions_DepthwiseConv2DOptions ?
+      reinterpret_cast<tflite::DepthwiseConv2DOptionsT *>(value) : nullptr;
+  }
+  const tflite::DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() const {
+    return type == BuiltinOptions_DepthwiseConv2DOptions ?
+      reinterpret_cast<const tflite::DepthwiseConv2DOptionsT *>(value) : nullptr;
+  }
+  tflite::ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() {
+    return type == BuiltinOptions_ConcatEmbeddingsOptions ?
+      reinterpret_cast<tflite::ConcatEmbeddingsOptionsT *>(value) : nullptr;
+  }
+  const tflite::ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() const {
+    return type == BuiltinOptions_ConcatEmbeddingsOptions ?
+      reinterpret_cast<const tflite::ConcatEmbeddingsOptionsT *>(value) : nullptr;
+  }
+  tflite::LSHProjectionOptionsT *AsLSHProjectionOptions() {
+    return type == BuiltinOptions_LSHProjectionOptions ?
+      reinterpret_cast<tflite::LSHProjectionOptionsT *>(value) : nullptr;
+  }
+  const tflite::LSHProjectionOptionsT *AsLSHProjectionOptions() const {
+    return type == BuiltinOptions_LSHProjectionOptions ?
+      reinterpret_cast<const tflite::LSHProjectionOptionsT *>(value) : nullptr;
+  }
+  tflite::Pool2DOptionsT *AsPool2DOptions() {
+    return type == BuiltinOptions_Pool2DOptions ?
+      reinterpret_cast<tflite::Pool2DOptionsT *>(value) : nullptr;
+  }
+  const tflite::Pool2DOptionsT *AsPool2DOptions() const {
+    return type == BuiltinOptions_Pool2DOptions ?
+      reinterpret_cast<const tflite::Pool2DOptionsT *>(value) : nullptr;
+  }
+  tflite::SVDFOptionsT *AsSVDFOptions() {
+    return type == BuiltinOptions_SVDFOptions ?
+      reinterpret_cast<tflite::SVDFOptionsT *>(value) : nullptr;
+  }
+  const tflite::SVDFOptionsT *AsSVDFOptions() const {
+    return type == BuiltinOptions_SVDFOptions ?
+      reinterpret_cast<const tflite::SVDFOptionsT *>(value) : nullptr;
+  }
+  tflite::RNNOptionsT *AsRNNOptions() {
+    return type == BuiltinOptions_RNNOptions ?
+      reinterpret_cast<tflite::RNNOptionsT *>(value) : nullptr;
+  }
+  const tflite::RNNOptionsT *AsRNNOptions() const {
+    return type == BuiltinOptions_RNNOptions ?
+      reinterpret_cast<const tflite::RNNOptionsT *>(value) : nullptr;
+  }
+  tflite::FullyConnectedOptionsT *AsFullyConnectedOptions() {
+    return type == BuiltinOptions_FullyConnectedOptions ?
+      reinterpret_cast<tflite::FullyConnectedOptionsT *>(value) : nullptr;
+  }
+  const tflite::FullyConnectedOptionsT *AsFullyConnectedOptions() const {
+    return type == BuiltinOptions_FullyConnectedOptions ?
+      reinterpret_cast<const tflite::FullyConnectedOptionsT *>(value) : nullptr;
+  }
+  tflite::SoftmaxOptionsT *AsSoftmaxOptions() {
+    return type == BuiltinOptions_SoftmaxOptions ?
+      reinterpret_cast<tflite::SoftmaxOptionsT *>(value) : nullptr;
+  }
+  const tflite::SoftmaxOptionsT *AsSoftmaxOptions() const {
+    return type == BuiltinOptions_SoftmaxOptions ?
+      reinterpret_cast<const tflite::SoftmaxOptionsT *>(value) : nullptr;
+  }
+  tflite::ConcatenationOptionsT *AsConcatenationOptions() {
+    return type == BuiltinOptions_ConcatenationOptions ?
+      reinterpret_cast<tflite::ConcatenationOptionsT *>(value) : nullptr;
+  }
+  const tflite::ConcatenationOptionsT *AsConcatenationOptions() const {
+    return type == BuiltinOptions_ConcatenationOptions ?
+      reinterpret_cast<const tflite::ConcatenationOptionsT *>(value) : nullptr;
+  }
+  tflite::AddOptionsT *AsAddOptions() {
+    return type == BuiltinOptions_AddOptions ?
+      reinterpret_cast<tflite::AddOptionsT *>(value) : nullptr;
+  }
+  const tflite::AddOptionsT *AsAddOptions() const {
+    return type == BuiltinOptions_AddOptions ?
+      reinterpret_cast<const tflite::AddOptionsT *>(value) : nullptr;
+  }
+  tflite::L2NormOptionsT *AsL2NormOptions() {
+    return type == BuiltinOptions_L2NormOptions ?
+      reinterpret_cast<tflite::L2NormOptionsT *>(value) : nullptr;
+  }
+  const tflite::L2NormOptionsT *AsL2NormOptions() const {
+    return type == BuiltinOptions_L2NormOptions ?
+      reinterpret_cast<const tflite::L2NormOptionsT *>(value) : nullptr;
+  }
+  tflite::LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions() {
+    return type == BuiltinOptions_LocalResponseNormalizationOptions ?
+      reinterpret_cast<tflite::LocalResponseNormalizationOptionsT *>(value) : nullptr;
+  }
+  const tflite::LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions() const {
+    return type == BuiltinOptions_LocalResponseNormalizationOptions ?
+      reinterpret_cast<const tflite::LocalResponseNormalizationOptionsT *>(value) : nullptr;
+  }
+  tflite::LSTMOptionsT *AsLSTMOptions() {
+    return type == BuiltinOptions_LSTMOptions ?
+      reinterpret_cast<tflite::LSTMOptionsT *>(value) : nullptr;
+  }
+  const tflite::LSTMOptionsT *AsLSTMOptions() const {
+    return type == BuiltinOptions_LSTMOptions ?
+      reinterpret_cast<const tflite::LSTMOptionsT *>(value) : nullptr;
+  }
+  tflite::ResizeBilinearOptionsT *AsResizeBilinearOptions() {
+    return type == BuiltinOptions_ResizeBilinearOptions ?
+      reinterpret_cast<tflite::ResizeBilinearOptionsT *>(value) : nullptr;
+  }
+  const tflite::ResizeBilinearOptionsT *AsResizeBilinearOptions() const {
+    return type == BuiltinOptions_ResizeBilinearOptions ?
+      reinterpret_cast<const tflite::ResizeBilinearOptionsT *>(value) : nullptr;
+  }
+  tflite::CallOptionsT *AsCallOptions() {
+    return type == BuiltinOptions_CallOptions ?
+      reinterpret_cast<tflite::CallOptionsT *>(value) : nullptr;
+  }
+  const tflite::CallOptionsT *AsCallOptions() const {
+    return type == BuiltinOptions_CallOptions ?
+      reinterpret_cast<const tflite::CallOptionsT *>(value) : nullptr;
+  }
+  tflite::ReshapeOptionsT *AsReshapeOptions() {
+    return type == BuiltinOptions_ReshapeOptions ?
+      reinterpret_cast<tflite::ReshapeOptionsT *>(value) : nullptr;
+  }
+  const tflite::ReshapeOptionsT *AsReshapeOptions() const {
+    return type == BuiltinOptions_ReshapeOptions ?
+      reinterpret_cast<const tflite::ReshapeOptionsT *>(value) : nullptr;
+  }
+  tflite::SkipGramOptionsT *AsSkipGramOptions() {
+    return type == BuiltinOptions_SkipGramOptions ?
+      reinterpret_cast<tflite::SkipGramOptionsT *>(value) : nullptr;
+  }
+  const tflite::SkipGramOptionsT *AsSkipGramOptions() const {
+    return type == BuiltinOptions_SkipGramOptions ?
+      reinterpret_cast<const tflite::SkipGramOptionsT *>(value) : nullptr;
+  }
+  tflite::SpaceToDepthOptionsT *AsSpaceToDepthOptions() {
+    return type == BuiltinOptions_SpaceToDepthOptions ?
+      reinterpret_cast<tflite::SpaceToDepthOptionsT *>(value) : nullptr;
+  }
+  const tflite::SpaceToDepthOptionsT *AsSpaceToDepthOptions() const {
+    return type == BuiltinOptions_SpaceToDepthOptions ?
+      reinterpret_cast<const tflite::SpaceToDepthOptionsT *>(value) : nullptr;
+  }
+  tflite::EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() {
+    return type == BuiltinOptions_EmbeddingLookupSparseOptions ?
+      reinterpret_cast<tflite::EmbeddingLookupSparseOptionsT *>(value) : nullptr;
+  }
+  const tflite::EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() const {
+    return type == BuiltinOptions_EmbeddingLookupSparseOptions ?
+      reinterpret_cast<const tflite::EmbeddingLookupSparseOptionsT *>(value) : nullptr;
+  }
+  tflite::MulOptionsT *AsMulOptions() {
+    return type == BuiltinOptions_MulOptions ?
+      reinterpret_cast<tflite::MulOptionsT *>(value) : nullptr;
+  }
+  const tflite::MulOptionsT *AsMulOptions() const {
+    return type == BuiltinOptions_MulOptions ?
+      reinterpret_cast<const tflite::MulOptionsT *>(value) : nullptr;
+  }
+  tflite::PadOptionsT *AsPadOptions() {
+    return type == BuiltinOptions_PadOptions ?
+      reinterpret_cast<tflite::PadOptionsT *>(value) : nullptr;
+  }
+  const tflite::PadOptionsT *AsPadOptions() const {
+    return type == BuiltinOptions_PadOptions ?
+      reinterpret_cast<const tflite::PadOptionsT *>(value) : nullptr;
+  }
+  tflite::GatherOptionsT *AsGatherOptions() {
+    return type == BuiltinOptions_GatherOptions ?
+      reinterpret_cast<tflite::GatherOptionsT *>(value) : nullptr;
+  }
+  const tflite::GatherOptionsT *AsGatherOptions() const {
+    return type == BuiltinOptions_GatherOptions ?
+      reinterpret_cast<const tflite::GatherOptionsT *>(value) : nullptr;
+  }
+  tflite::BatchToSpaceNDOptionsT *AsBatchToSpaceNDOptions() {
+    return type == BuiltinOptions_BatchToSpaceNDOptions ?
+      reinterpret_cast<tflite::BatchToSpaceNDOptionsT *>(value) : nullptr;
+  }
+  const tflite::BatchToSpaceNDOptionsT *AsBatchToSpaceNDOptions() const {
+    return type == BuiltinOptions_BatchToSpaceNDOptions ?
+      reinterpret_cast<const tflite::BatchToSpaceNDOptionsT *>(value) : nullptr;
+  }
+  tflite::SpaceToBatchNDOptionsT *AsSpaceToBatchNDOptions() {
+    return type == BuiltinOptions_SpaceToBatchNDOptions ?
+      reinterpret_cast<tflite::SpaceToBatchNDOptionsT *>(value) : nullptr;
+  }
+  const tflite::SpaceToBatchNDOptionsT *AsSpaceToBatchNDOptions() const {
+    return type == BuiltinOptions_SpaceToBatchNDOptions ?
+      reinterpret_cast<const tflite::SpaceToBatchNDOptionsT *>(value) : nullptr;
+  }
+  tflite::TransposeOptionsT *AsTransposeOptions() {
+    return type == BuiltinOptions_TransposeOptions ?
+      reinterpret_cast<tflite::TransposeOptionsT *>(value) : nullptr;
+  }
+  const tflite::TransposeOptionsT *AsTransposeOptions() const {
+    return type == BuiltinOptions_TransposeOptions ?
+      reinterpret_cast<const tflite::TransposeOptionsT *>(value) : nullptr;
+  }
+  tflite::ReducerOptionsT *AsReducerOptions() {
+    return type == BuiltinOptions_ReducerOptions ?
+      reinterpret_cast<tflite::ReducerOptionsT *>(value) : nullptr;
+  }
+  const tflite::ReducerOptionsT *AsReducerOptions() const {
+    return type == BuiltinOptions_ReducerOptions ?
+      reinterpret_cast<const tflite::ReducerOptionsT *>(value) : nullptr;
+  }
+  tflite::SubOptionsT *AsSubOptions() {
+    return type == BuiltinOptions_SubOptions ?
+      reinterpret_cast<tflite::SubOptionsT *>(value) : nullptr;
+  }
+  const tflite::SubOptionsT *AsSubOptions() const {
+    return type == BuiltinOptions_SubOptions ?
+      reinterpret_cast<const tflite::SubOptionsT *>(value) : nullptr;
+  }
+  tflite::DivOptionsT *AsDivOptions() {
+    return type == BuiltinOptions_DivOptions ?
+      reinterpret_cast<tflite::DivOptionsT *>(value) : nullptr;
+  }
+  const tflite::DivOptionsT *AsDivOptions() const {
+    return type == BuiltinOptions_DivOptions ?
+      reinterpret_cast<const tflite::DivOptionsT *>(value) : nullptr;
+  }
+  tflite::SqueezeOptionsT *AsSqueezeOptions() {
+    return type == BuiltinOptions_SqueezeOptions ?
+      reinterpret_cast<tflite::SqueezeOptionsT *>(value) : nullptr;
+  }
+  const tflite::SqueezeOptionsT *AsSqueezeOptions() const {
+    return type == BuiltinOptions_SqueezeOptions ?
+      reinterpret_cast<const tflite::SqueezeOptionsT *>(value) : nullptr;
+  }
+  tflite::SequenceRNNOptionsT *AsSequenceRNNOptions() {
+    return type == BuiltinOptions_SequenceRNNOptions ?
+      reinterpret_cast<tflite::SequenceRNNOptionsT *>(value) : nullptr;
+  }
+  const tflite::SequenceRNNOptionsT *AsSequenceRNNOptions() const {
+    return type == BuiltinOptions_SequenceRNNOptions ?
+      reinterpret_cast<const tflite::SequenceRNNOptionsT *>(value) : nullptr;
+  }
+  tflite::StridedSliceOptionsT *AsStridedSliceOptions() {
+    return type == BuiltinOptions_StridedSliceOptions ?
+      reinterpret_cast<tflite::StridedSliceOptionsT *>(value) : nullptr;
+  }
+  const tflite::StridedSliceOptionsT *AsStridedSliceOptions() const {
+    return type == BuiltinOptions_StridedSliceOptions ?
+      reinterpret_cast<const tflite::StridedSliceOptionsT *>(value) : nullptr;
+  }
+  tflite::ExpOptionsT *AsExpOptions() {
+    return type == BuiltinOptions_ExpOptions ?
+      reinterpret_cast<tflite::ExpOptionsT *>(value) : nullptr;
+  }
+  const tflite::ExpOptionsT *AsExpOptions() const {
+    return type == BuiltinOptions_ExpOptions ?
+      reinterpret_cast<const tflite::ExpOptionsT *>(value) : nullptr;
+  }
+  tflite::TopKV2OptionsT *AsTopKV2Options() {
+    return type == BuiltinOptions_TopKV2Options ?
+      reinterpret_cast<tflite::TopKV2OptionsT *>(value) : nullptr;
+  }
+  const tflite::TopKV2OptionsT *AsTopKV2Options() const {
+    return type == BuiltinOptions_TopKV2Options ?
+      reinterpret_cast<const tflite::TopKV2OptionsT *>(value) : nullptr;
+  }
+  tflite::SplitOptionsT *AsSplitOptions() {
+    return type == BuiltinOptions_SplitOptions ?
+      reinterpret_cast<tflite::SplitOptionsT *>(value) : nullptr;
+  }
+  const tflite::SplitOptionsT *AsSplitOptions() const {
+    return type == BuiltinOptions_SplitOptions ?
+      reinterpret_cast<const tflite::SplitOptionsT *>(value) : nullptr;
+  }
+  tflite::LogSoftmaxOptionsT *AsLogSoftmaxOptions() {
+    return type == BuiltinOptions_LogSoftmaxOptions ?
+      reinterpret_cast<tflite::LogSoftmaxOptionsT *>(value) : nullptr;
+  }
+  const tflite::LogSoftmaxOptionsT *AsLogSoftmaxOptions() const {
+    return type == BuiltinOptions_LogSoftmaxOptions ?
+      reinterpret_cast<const tflite::LogSoftmaxOptionsT *>(value) : nullptr;
+  }
+  tflite::CastOptionsT *AsCastOptions() {
+    return type == BuiltinOptions_CastOptions ?
+      reinterpret_cast<tflite::CastOptionsT *>(value) : nullptr;
+  }
+  const tflite::CastOptionsT *AsCastOptions() const {
+    return type == BuiltinOptions_CastOptions ?
+      reinterpret_cast<const tflite::CastOptionsT *>(value) : nullptr;
+  }
+  tflite::DequantizeOptionsT *AsDequantizeOptions() {
+    return type == BuiltinOptions_DequantizeOptions ?
+      reinterpret_cast<tflite::DequantizeOptionsT *>(value) : nullptr;
+  }
+  const tflite::DequantizeOptionsT *AsDequantizeOptions() const {
+    return type == BuiltinOptions_DequantizeOptions ?
+      reinterpret_cast<const tflite::DequantizeOptionsT *>(value) : nullptr;
+  }
+  tflite::MaximumMinimumOptionsT *AsMaximumMinimumOptions() {
+    return type == BuiltinOptions_MaximumMinimumOptions ?
+      reinterpret_cast<tflite::MaximumMinimumOptionsT *>(value) : nullptr;
+  }
+  const tflite::MaximumMinimumOptionsT *AsMaximumMinimumOptions() const {
+    return type == BuiltinOptions_MaximumMinimumOptions ?
+      reinterpret_cast<const tflite::MaximumMinimumOptionsT *>(value) : nullptr;
+  }
+  tflite::ArgMaxOptionsT *AsArgMaxOptions() {
+    return type == BuiltinOptions_ArgMaxOptions ?
+      reinterpret_cast<tflite::ArgMaxOptionsT *>(value) : nullptr;
+  }
+  const tflite::ArgMaxOptionsT *AsArgMaxOptions() const {
+    return type == BuiltinOptions_ArgMaxOptions ?
+      reinterpret_cast<const tflite::ArgMaxOptionsT *>(value) : nullptr;
+  }
+  tflite::LessOptionsT *AsLessOptions() {
+    return type == BuiltinOptions_LessOptions ?
+      reinterpret_cast<tflite::LessOptionsT *>(value) : nullptr;
+  }
+  const tflite::LessOptionsT *AsLessOptions() const {
+    return type == BuiltinOptions_LessOptions ?
+      reinterpret_cast<const tflite::LessOptionsT *>(value) : nullptr;
+  }
+  tflite::NegOptionsT *AsNegOptions() {
+    return type == BuiltinOptions_NegOptions ?
+      reinterpret_cast<tflite::NegOptionsT *>(value) : nullptr;
+  }
+  const tflite::NegOptionsT *AsNegOptions() const {
+    return type == BuiltinOptions_NegOptions ?
+      reinterpret_cast<const tflite::NegOptionsT *>(value) : nullptr;
+  }
+  tflite::PadV2OptionsT *AsPadV2Options() {
+    return type == BuiltinOptions_PadV2Options ?
+      reinterpret_cast<tflite::PadV2OptionsT *>(value) : nullptr;
+  }
+  const tflite::PadV2OptionsT *AsPadV2Options() const {
+    return type == BuiltinOptions_PadV2Options ?
+      reinterpret_cast<const tflite::PadV2OptionsT *>(value) : nullptr;
+  }
+  tflite::GreaterOptionsT *AsGreaterOptions() {
+    return type == BuiltinOptions_GreaterOptions ?
+      reinterpret_cast<tflite::GreaterOptionsT *>(value) : nullptr;
+  }
+  const tflite::GreaterOptionsT *AsGreaterOptions() const {
+    return type == BuiltinOptions_GreaterOptions ?
+      reinterpret_cast<const tflite::GreaterOptionsT *>(value) : nullptr;
+  }
+  tflite::GreaterEqualOptionsT *AsGreaterEqualOptions() {
+    return type == BuiltinOptions_GreaterEqualOptions ?
+      reinterpret_cast<tflite::GreaterEqualOptionsT *>(value) : nullptr;
+  }
+  const tflite::GreaterEqualOptionsT *AsGreaterEqualOptions() const {
+    return type == BuiltinOptions_GreaterEqualOptions ?
+      reinterpret_cast<const tflite::GreaterEqualOptionsT *>(value) : nullptr;
+  }
+  tflite::LessEqualOptionsT *AsLessEqualOptions() {
+    return type == BuiltinOptions_LessEqualOptions ?
+      reinterpret_cast<tflite::LessEqualOptionsT *>(value) : nullptr;
+  }
+  const tflite::LessEqualOptionsT *AsLessEqualOptions() const {
+    return type == BuiltinOptions_LessEqualOptions ?
+      reinterpret_cast<const tflite::LessEqualOptionsT *>(value) : nullptr;
+  }
+  tflite::SelectOptionsT *AsSelectOptions() {
+    return type == BuiltinOptions_SelectOptions ?
+      reinterpret_cast<tflite::SelectOptionsT *>(value) : nullptr;
+  }
+  const tflite::SelectOptionsT *AsSelectOptions() const {
+    return type == BuiltinOptions_SelectOptions ?
+      reinterpret_cast<const tflite::SelectOptionsT *>(value) : nullptr;
+  }
+  tflite::SliceOptionsT *AsSliceOptions() {
+    return type == BuiltinOptions_SliceOptions ?
+      reinterpret_cast<tflite::SliceOptionsT *>(value) : nullptr;
+  }
+  const tflite::SliceOptionsT *AsSliceOptions() const {
+    return type == BuiltinOptions_SliceOptions ?
+      reinterpret_cast<const tflite::SliceOptionsT *>(value) : nullptr;
+  }
+  tflite::TransposeConvOptionsT *AsTransposeConvOptions() {
+    return type == BuiltinOptions_TransposeConvOptions ?
+      reinterpret_cast<tflite::TransposeConvOptionsT *>(value) : nullptr;
+  }
+  const tflite::TransposeConvOptionsT *AsTransposeConvOptions() const {
+    return type == BuiltinOptions_TransposeConvOptions ?
+      reinterpret_cast<const tflite::TransposeConvOptionsT *>(value) : nullptr;
+  }
+  tflite::SparseToDenseOptionsT *AsSparseToDenseOptions() {
+    return type == BuiltinOptions_SparseToDenseOptions ?
+      reinterpret_cast<tflite::SparseToDenseOptionsT *>(value) : nullptr;
+  }
+  const tflite::SparseToDenseOptionsT *AsSparseToDenseOptions() const {
+    return type == BuiltinOptions_SparseToDenseOptions ?
+      reinterpret_cast<const tflite::SparseToDenseOptionsT *>(value) : nullptr;
+  }
+  tflite::TileOptionsT *AsTileOptions() {
+    return type == BuiltinOptions_TileOptions ?
+      reinterpret_cast<tflite::TileOptionsT *>(value) : nullptr;
+  }
+  const tflite::TileOptionsT *AsTileOptions() const {
+    return type == BuiltinOptions_TileOptions ?
+      reinterpret_cast<const tflite::TileOptionsT *>(value) : nullptr;
+  }
+  tflite::ExpandDimsOptionsT *AsExpandDimsOptions() {
+    return type == BuiltinOptions_ExpandDimsOptions ?
+      reinterpret_cast<tflite::ExpandDimsOptionsT *>(value) : nullptr;
+  }
+  const tflite::ExpandDimsOptionsT *AsExpandDimsOptions() const {
+    return type == BuiltinOptions_ExpandDimsOptions ?
+      reinterpret_cast<const tflite::ExpandDimsOptionsT *>(value) : nullptr;
+  }
+  tflite::EqualOptionsT *AsEqualOptions() {
+    return type == BuiltinOptions_EqualOptions ?
+      reinterpret_cast<tflite::EqualOptionsT *>(value) : nullptr;
+  }
+  const tflite::EqualOptionsT *AsEqualOptions() const {
+    return type == BuiltinOptions_EqualOptions ?
+      reinterpret_cast<const tflite::EqualOptionsT *>(value) : nullptr;
+  }
+  tflite::NotEqualOptionsT *AsNotEqualOptions() {
+    return type == BuiltinOptions_NotEqualOptions ?
+      reinterpret_cast<tflite::NotEqualOptionsT *>(value) : nullptr;
+  }
+  const tflite::NotEqualOptionsT *AsNotEqualOptions() const {
+    return type == BuiltinOptions_NotEqualOptions ?
+      reinterpret_cast<const tflite::NotEqualOptionsT *>(value) : nullptr;
+  }
+  tflite::ShapeOptionsT *AsShapeOptions() {
+    return type == BuiltinOptions_ShapeOptions ?
+      reinterpret_cast<tflite::ShapeOptionsT *>(value) : nullptr;
+  }
+  const tflite::ShapeOptionsT *AsShapeOptions() const {
+    return type == BuiltinOptions_ShapeOptions ?
+      reinterpret_cast<const tflite::ShapeOptionsT *>(value) : nullptr;
+  }
+  tflite::PowOptionsT *AsPowOptions() {
+    return type == BuiltinOptions_PowOptions ?
+      reinterpret_cast<tflite::PowOptionsT *>(value) : nullptr;
+  }
+  const tflite::PowOptionsT *AsPowOptions() const {
+    return type == BuiltinOptions_PowOptions ?
+      reinterpret_cast<const tflite::PowOptionsT *>(value) : nullptr;
+  }
+  tflite::ArgMinOptionsT *AsArgMinOptions() {
+    return type == BuiltinOptions_ArgMinOptions ?
+      reinterpret_cast<tflite::ArgMinOptionsT *>(value) : nullptr;
+  }
+  const tflite::ArgMinOptionsT *AsArgMinOptions() const {
+    return type == BuiltinOptions_ArgMinOptions ?
+      reinterpret_cast<const tflite::ArgMinOptionsT *>(value) : nullptr;
+  }
+  tflite::FakeQuantOptionsT *AsFakeQuantOptions() {
+    return type == BuiltinOptions_FakeQuantOptions ?
+      reinterpret_cast<tflite::FakeQuantOptionsT *>(value) : nullptr;
+  }
+  const tflite::FakeQuantOptionsT *AsFakeQuantOptions() const {
+    return type == BuiltinOptions_FakeQuantOptions ?
+      reinterpret_cast<const tflite::FakeQuantOptionsT *>(value) : nullptr;
+  }
+  tflite::PackOptionsT *AsPackOptions() {
+    return type == BuiltinOptions_PackOptions ?
+      reinterpret_cast<tflite::PackOptionsT *>(value) : nullptr;
+  }
+  const tflite::PackOptionsT *AsPackOptions() const {
+    return type == BuiltinOptions_PackOptions ?
+      reinterpret_cast<const tflite::PackOptionsT *>(value) : nullptr;
+  }
+  tflite::LogicalOrOptionsT *AsLogicalOrOptions() {
+    return type == BuiltinOptions_LogicalOrOptions ?
+      reinterpret_cast<tflite::LogicalOrOptionsT *>(value) : nullptr;
+  }
+  const tflite::LogicalOrOptionsT *AsLogicalOrOptions() const {
+    return type == BuiltinOptions_LogicalOrOptions ?
+      reinterpret_cast<const tflite::LogicalOrOptionsT *>(value) : nullptr;
+  }
+  tflite::OneHotOptionsT *AsOneHotOptions() {
+    return type == BuiltinOptions_OneHotOptions ?
+      reinterpret_cast<tflite::OneHotOptionsT *>(value) : nullptr;
+  }
+  const tflite::OneHotOptionsT *AsOneHotOptions() const {
+    return type == BuiltinOptions_OneHotOptions ?
+      reinterpret_cast<const tflite::OneHotOptionsT *>(value) : nullptr;
+  }
+  tflite::LogicalAndOptionsT *AsLogicalAndOptions() {
+    return type == BuiltinOptions_LogicalAndOptions ?
+      reinterpret_cast<tflite::LogicalAndOptionsT *>(value) : nullptr;
+  }
+  const tflite::LogicalAndOptionsT *AsLogicalAndOptions() const {
+    return type == BuiltinOptions_LogicalAndOptions ?
+      reinterpret_cast<const tflite::LogicalAndOptionsT *>(value) : nullptr;
+  }
+  tflite::LogicalNotOptionsT *AsLogicalNotOptions() {
+    return type == BuiltinOptions_LogicalNotOptions ?
+      reinterpret_cast<tflite::LogicalNotOptionsT *>(value) : nullptr;
+  }
+  const tflite::LogicalNotOptionsT *AsLogicalNotOptions() const {
+    return type == BuiltinOptions_LogicalNotOptions ?
+      reinterpret_cast<const tflite::LogicalNotOptionsT *>(value) : nullptr;
+  }
+  tflite::UnpackOptionsT *AsUnpackOptions() {
+    return type == BuiltinOptions_UnpackOptions ?
+      reinterpret_cast<tflite::UnpackOptionsT *>(value) : nullptr;
+  }
+  const tflite::UnpackOptionsT *AsUnpackOptions() const {
+    return type == BuiltinOptions_UnpackOptions ?
+      reinterpret_cast<const tflite::UnpackOptionsT *>(value) : nullptr;
+  }
+  tflite::FloorDivOptionsT *AsFloorDivOptions() {
+    return type == BuiltinOptions_FloorDivOptions ?
+      reinterpret_cast<tflite::FloorDivOptionsT *>(value) : nullptr;
+  }
+  const tflite::FloorDivOptionsT *AsFloorDivOptions() const {
+    return type == BuiltinOptions_FloorDivOptions ?
+      reinterpret_cast<const tflite::FloorDivOptionsT *>(value) : nullptr;
+  }
+  tflite::SquareOptionsT *AsSquareOptions() {
+    return type == BuiltinOptions_SquareOptions ?
+      reinterpret_cast<tflite::SquareOptionsT *>(value) : nullptr;
+  }
+  const tflite::SquareOptionsT *AsSquareOptions() const {
+    return type == BuiltinOptions_SquareOptions ?
+      reinterpret_cast<const tflite::SquareOptionsT *>(value) : nullptr;
+  }
+  tflite::ZerosLikeOptionsT *AsZerosLikeOptions() {
+    return type == BuiltinOptions_ZerosLikeOptions ?
+      reinterpret_cast<tflite::ZerosLikeOptionsT *>(value) : nullptr;
+  }
+  const tflite::ZerosLikeOptionsT *AsZerosLikeOptions() const {
+    return type == BuiltinOptions_ZerosLikeOptions ?
+      reinterpret_cast<const tflite::ZerosLikeOptionsT *>(value) : nullptr;
+  }
+  tflite::FillOptionsT *AsFillOptions() {
+    return type == BuiltinOptions_FillOptions ?
+      reinterpret_cast<tflite::FillOptionsT *>(value) : nullptr;
+  }
+  const tflite::FillOptionsT *AsFillOptions() const {
+    return type == BuiltinOptions_FillOptions ?
+      reinterpret_cast<const tflite::FillOptionsT *>(value) : nullptr;
+  }
+  tflite::BidirectionalSequenceLSTMOptionsT *AsBidirectionalSequenceLSTMOptions() {
+    return type == BuiltinOptions_BidirectionalSequenceLSTMOptions ?
+      reinterpret_cast<tflite::BidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
+  }
+  const tflite::BidirectionalSequenceLSTMOptionsT *AsBidirectionalSequenceLSTMOptions() const {
+    return type == BuiltinOptions_BidirectionalSequenceLSTMOptions ?
+      reinterpret_cast<const tflite::BidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
+  }
+  tflite::BidirectionalSequenceRNNOptionsT *AsBidirectionalSequenceRNNOptions() {
+    return type == BuiltinOptions_BidirectionalSequenceRNNOptions ?
+      reinterpret_cast<tflite::BidirectionalSequenceRNNOptionsT *>(value) : nullptr;
+  }
+  const tflite::BidirectionalSequenceRNNOptionsT *AsBidirectionalSequenceRNNOptions() const {
+    return type == BuiltinOptions_BidirectionalSequenceRNNOptions ?
+      reinterpret_cast<const tflite::BidirectionalSequenceRNNOptionsT *>(value) : nullptr;
+  }
+  tflite::UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() {
+    return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ?
+      reinterpret_cast<tflite::UnidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
+  }
+  const tflite::UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() const {
+    return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ?
+      reinterpret_cast<const tflite::UnidirectionalSequenceLSTMOptionsT *>(value) : nullptr;
+  }
+  tflite::FloorModOptionsT *AsFloorModOptions() {
+    return type == BuiltinOptions_FloorModOptions ?
+      reinterpret_cast<tflite::FloorModOptionsT *>(value) : nullptr;
+  }
+  const tflite::FloorModOptionsT *AsFloorModOptions() const {
+    return type == BuiltinOptions_FloorModOptions ?
+      reinterpret_cast<const tflite::FloorModOptionsT *>(value) : nullptr;
+  }
+  tflite::RangeOptionsT *AsRangeOptions() {
+    return type == BuiltinOptions_RangeOptions ?
+      reinterpret_cast<tflite::RangeOptionsT *>(value) : nullptr;
+  }
+  const tflite::RangeOptionsT *AsRangeOptions() const {
+    return type == BuiltinOptions_RangeOptions ?
+      reinterpret_cast<const tflite::RangeOptionsT *>(value) : nullptr;
+  }
+  tflite::ResizeNearestNeighborOptionsT *AsResizeNearestNeighborOptions() {
+    return type == BuiltinOptions_ResizeNearestNeighborOptions ?
+      reinterpret_cast<tflite::ResizeNearestNeighborOptionsT *>(value) : nullptr;
+  }
+  const tflite::ResizeNearestNeighborOptionsT *AsResizeNearestNeighborOptions() const {
+    return type == BuiltinOptions_ResizeNearestNeighborOptions ?
+      reinterpret_cast<const tflite::ResizeNearestNeighborOptionsT *>(value) : nullptr;
+  }
+  tflite::LeakyReluOptionsT *AsLeakyReluOptions() {
+    return type == BuiltinOptions_LeakyReluOptions ?
+      reinterpret_cast<tflite::LeakyReluOptionsT *>(value) : nullptr;
+  }
+  const tflite::LeakyReluOptionsT *AsLeakyReluOptions() const {
+    return type == BuiltinOptions_LeakyReluOptions ?
+      reinterpret_cast<const tflite::LeakyReluOptionsT *>(value) : nullptr;
+  }
+  tflite::SquaredDifferenceOptionsT *AsSquaredDifferenceOptions() {
+    return type == BuiltinOptions_SquaredDifferenceOptions ?
+      reinterpret_cast<tflite::SquaredDifferenceOptionsT *>(value) : nullptr;
+  }
+  const tflite::SquaredDifferenceOptionsT *AsSquaredDifferenceOptions() const {
+    return type == BuiltinOptions_SquaredDifferenceOptions ?
+      reinterpret_cast<const tflite::SquaredDifferenceOptionsT *>(value) : nullptr;
+  }
+  tflite::MirrorPadOptionsT *AsMirrorPadOptions() {
+    return type == BuiltinOptions_MirrorPadOptions ?
+      reinterpret_cast<tflite::MirrorPadOptionsT *>(value) : nullptr;
+  }
+  const tflite::MirrorPadOptionsT *AsMirrorPadOptions() const {
+    return type == BuiltinOptions_MirrorPadOptions ?
+      reinterpret_cast<const tflite::MirrorPadOptionsT *>(value) : nullptr;
+  }
+  tflite::AbsOptionsT *AsAbsOptions() {
+    return type == BuiltinOptions_AbsOptions ?
+      reinterpret_cast<tflite::AbsOptionsT *>(value) : nullptr;
+  }
+  const tflite::AbsOptionsT *AsAbsOptions() const {
+    return type == BuiltinOptions_AbsOptions ?
+      reinterpret_cast<const tflite::AbsOptionsT *>(value) : nullptr;
+  }
+  tflite::SplitVOptionsT *AsSplitVOptions() {
+    return type == BuiltinOptions_SplitVOptions ?
+      reinterpret_cast<tflite::SplitVOptionsT *>(value) : nullptr;
+  }
+  const tflite::SplitVOptionsT *AsSplitVOptions() const {
+    return type == BuiltinOptions_SplitVOptions ?
+      reinterpret_cast<const tflite::SplitVOptionsT *>(value) : nullptr;
+  }
+  tflite::UniqueOptionsT *AsUniqueOptions() {
+    return type == BuiltinOptions_UniqueOptions ?
+      reinterpret_cast<tflite::UniqueOptionsT *>(value) : nullptr;
+  }
+  const tflite::UniqueOptionsT *AsUniqueOptions() const {
+    return type == BuiltinOptions_UniqueOptions ?
+      reinterpret_cast<const tflite::UniqueOptionsT *>(value) : nullptr;
+  }
+  tflite::ReverseV2OptionsT *AsReverseV2Options() {
+    return type == BuiltinOptions_ReverseV2Options ?
+      reinterpret_cast<tflite::ReverseV2OptionsT *>(value) : nullptr;
+  }
+  const tflite::ReverseV2OptionsT *AsReverseV2Options() const {
+    return type == BuiltinOptions_ReverseV2Options ?
+      reinterpret_cast<const tflite::ReverseV2OptionsT *>(value) : nullptr;
+  }
+  tflite::AddNOptionsT *AsAddNOptions() {
+    return type == BuiltinOptions_AddNOptions ?
+      reinterpret_cast<tflite::AddNOptionsT *>(value) : nullptr;
+  }
+  const tflite::AddNOptionsT *AsAddNOptions() const {
+    return type == BuiltinOptions_AddNOptions ?
+      reinterpret_cast<const tflite::AddNOptionsT *>(value) : nullptr;
+  }
+  tflite::GatherNdOptionsT *AsGatherNdOptions() {
+    return type == BuiltinOptions_GatherNdOptions ?
+      reinterpret_cast<tflite::GatherNdOptionsT *>(value) : nullptr;
+  }
+  const tflite::GatherNdOptionsT *AsGatherNdOptions() const {
+    return type == BuiltinOptions_GatherNdOptions ?
+      reinterpret_cast<const tflite::GatherNdOptionsT *>(value) : nullptr;
+  }
+  tflite::CosOptionsT *AsCosOptions() {
+    return type == BuiltinOptions_CosOptions ?
+      reinterpret_cast<tflite::CosOptionsT *>(value) : nullptr;
+  }
+  const tflite::CosOptionsT *AsCosOptions() const {
+    return type == BuiltinOptions_CosOptions ?
+      reinterpret_cast<const tflite::CosOptionsT *>(value) : nullptr;
+  }
+  tflite::WhereOptionsT *AsWhereOptions() {
+    return type == BuiltinOptions_WhereOptions ?
+      reinterpret_cast<tflite::WhereOptionsT *>(value) : nullptr;
+  }
+  const tflite::WhereOptionsT *AsWhereOptions() const {
+    return type == BuiltinOptions_WhereOptions ?
+      reinterpret_cast<const tflite::WhereOptionsT *>(value) : nullptr;
+  }
+  tflite::RankOptionsT *AsRankOptions() {
+    return type == BuiltinOptions_RankOptions ?
+      reinterpret_cast<tflite::RankOptionsT *>(value) : nullptr;
+  }
+  const tflite::RankOptionsT *AsRankOptions() const {
+    return type == BuiltinOptions_RankOptions ?
+      reinterpret_cast<const tflite::RankOptionsT *>(value) : nullptr;
+  }
+  tflite::ReverseSequenceOptionsT *AsReverseSequenceOptions() {
+    return type == BuiltinOptions_ReverseSequenceOptions ?
+      reinterpret_cast<tflite::ReverseSequenceOptionsT *>(value) : nullptr;
+  }
+  const tflite::ReverseSequenceOptionsT *AsReverseSequenceOptions() const {
+    return type == BuiltinOptions_ReverseSequenceOptions ?
+      reinterpret_cast<const tflite::ReverseSequenceOptionsT *>(value) : nullptr;
+  }
+  tflite::MatrixDiagOptionsT *AsMatrixDiagOptions() {
+    return type == BuiltinOptions_MatrixDiagOptions ?
+      reinterpret_cast<tflite::MatrixDiagOptionsT *>(value) : nullptr;
+  }
+  const tflite::MatrixDiagOptionsT *AsMatrixDiagOptions() const {
+    return type == BuiltinOptions_MatrixDiagOptions ?
+      reinterpret_cast<const tflite::MatrixDiagOptionsT *>(value) : nullptr;
+  }
+  tflite::QuantizeOptionsT *AsQuantizeOptions() {
+    return type == BuiltinOptions_QuantizeOptions ?
+      reinterpret_cast<tflite::QuantizeOptionsT *>(value) : nullptr;
+  }
+  const tflite::QuantizeOptionsT *AsQuantizeOptions() const {
+    return type == BuiltinOptions_QuantizeOptions ?
+      reinterpret_cast<const tflite::QuantizeOptionsT *>(value) : nullptr;
+  }
+  tflite::MatrixSetDiagOptionsT *AsMatrixSetDiagOptions() {
+    return type == BuiltinOptions_MatrixSetDiagOptions ?
+      reinterpret_cast<tflite::MatrixSetDiagOptionsT *>(value) : nullptr;
+  }
+  const tflite::MatrixSetDiagOptionsT *AsMatrixSetDiagOptions() const {
+    return type == BuiltinOptions_MatrixSetDiagOptions ?
+      reinterpret_cast<const tflite::MatrixSetDiagOptionsT *>(value) : nullptr;
+  }
+  tflite::HardSwishOptionsT *AsHardSwishOptions() {
+    return type == BuiltinOptions_HardSwishOptions ?
+      reinterpret_cast<tflite::HardSwishOptionsT *>(value) : nullptr;
+  }
+  const tflite::HardSwishOptionsT *AsHardSwishOptions() const {
+    return type == BuiltinOptions_HardSwishOptions ?
+      reinterpret_cast<const tflite::HardSwishOptionsT *>(value) : nullptr;
+  }
+  tflite::IfOptionsT *AsIfOptions() {
+    return type == BuiltinOptions_IfOptions ?
+      reinterpret_cast<tflite::IfOptionsT *>(value) : nullptr;
+  }
+  const tflite::IfOptionsT *AsIfOptions() const {
+    return type == BuiltinOptions_IfOptions ?
+      reinterpret_cast<const tflite::IfOptionsT *>(value) : nullptr;
+  }
+  tflite::WhileOptionsT *AsWhileOptions() {
+    return type == BuiltinOptions_WhileOptions ?
+      reinterpret_cast<tflite::WhileOptionsT *>(value) : nullptr;
+  }
+  const tflite::WhileOptionsT *AsWhileOptions() const {
+    return type == BuiltinOptions_WhileOptions ?
+      reinterpret_cast<const tflite::WhileOptionsT *>(value) : nullptr;
+  }
+  tflite::DepthToSpaceOptionsT *AsDepthToSpaceOptions() {
+    return type == BuiltinOptions_DepthToSpaceOptions ?
+      reinterpret_cast<tflite::DepthToSpaceOptionsT *>(value) : nullptr;
+  }
+  const tflite::DepthToSpaceOptionsT *AsDepthToSpaceOptions() const {
+    return type == BuiltinOptions_DepthToSpaceOptions ?
+      reinterpret_cast<const tflite::DepthToSpaceOptionsT *>(value) : nullptr;
+  }
+  tflite::NonMaxSuppressionV4OptionsT *AsNonMaxSuppressionV4Options() {
+    return type == BuiltinOptions_NonMaxSuppressionV4Options ?
+      reinterpret_cast<tflite::NonMaxSuppressionV4OptionsT *>(value) : nullptr;
+  }
+  const tflite::NonMaxSuppressionV4OptionsT *AsNonMaxSuppressionV4Options() const {
+    return type == BuiltinOptions_NonMaxSuppressionV4Options ?
+      reinterpret_cast<const tflite::NonMaxSuppressionV4OptionsT *>(value) : nullptr;
+  }
+  tflite::NonMaxSuppressionV5OptionsT *AsNonMaxSuppressionV5Options() {
+    return type == BuiltinOptions_NonMaxSuppressionV5Options ?
+      reinterpret_cast<tflite::NonMaxSuppressionV5OptionsT *>(value) : nullptr;
+  }
+  const tflite::NonMaxSuppressionV5OptionsT *AsNonMaxSuppressionV5Options() const {
+    return type == BuiltinOptions_NonMaxSuppressionV5Options ?
+      reinterpret_cast<const tflite::NonMaxSuppressionV5OptionsT *>(value) : nullptr;
+  }
+  tflite::ScatterNdOptionsT *AsScatterNdOptions() {
+    return type == BuiltinOptions_ScatterNdOptions ?
+      reinterpret_cast<tflite::ScatterNdOptionsT *>(value) : nullptr;
+  }
+  const tflite::ScatterNdOptionsT *AsScatterNdOptions() const {
+    return type == BuiltinOptions_ScatterNdOptions ?
+      reinterpret_cast<const tflite::ScatterNdOptionsT *>(value) : nullptr;
+  }
+  tflite::SelectV2OptionsT *AsSelectV2Options() {
+    return type == BuiltinOptions_SelectV2Options ?
+      reinterpret_cast<tflite::SelectV2OptionsT *>(value) : nullptr;
+  }
+  const tflite::SelectV2OptionsT *AsSelectV2Options() const {
+    return type == BuiltinOptions_SelectV2Options ?
+      reinterpret_cast<const tflite::SelectV2OptionsT *>(value) : nullptr;
+  }
+  tflite::DensifyOptionsT *AsDensifyOptions() {
+    return type == BuiltinOptions_DensifyOptions ?
+      reinterpret_cast<tflite::DensifyOptionsT *>(value) : nullptr;
+  }
+  const tflite::DensifyOptionsT *AsDensifyOptions() const {
+    return type == BuiltinOptions_DensifyOptions ?
+      reinterpret_cast<const tflite::DensifyOptionsT *>(value) : nullptr;
+  }
+  tflite::SegmentSumOptionsT *AsSegmentSumOptions() {
+    return type == BuiltinOptions_SegmentSumOptions ?
+      reinterpret_cast<tflite::SegmentSumOptionsT *>(value) : nullptr;
+  }
+  const tflite::SegmentSumOptionsT *AsSegmentSumOptions() const {
+    return type == BuiltinOptions_SegmentSumOptions ?
+      reinterpret_cast<const tflite::SegmentSumOptionsT *>(value) : nullptr;
+  }
+  tflite::BatchMatMulOptionsT *AsBatchMatMulOptions() {
+    return type == BuiltinOptions_BatchMatMulOptions ?
+      reinterpret_cast<tflite::BatchMatMulOptionsT *>(value) : nullptr;
+  }
+  const tflite::BatchMatMulOptionsT *AsBatchMatMulOptions() const {
+    return type == BuiltinOptions_BatchMatMulOptions ?
+      reinterpret_cast<const tflite::BatchMatMulOptionsT *>(value) : nullptr;
+  }
+  tflite::CumsumOptionsT *AsCumsumOptions() {
+    return type == BuiltinOptions_CumsumOptions ?
+      reinterpret_cast<tflite::CumsumOptionsT *>(value) : nullptr;
+  }
+  const tflite::CumsumOptionsT *AsCumsumOptions() const {
+    return type == BuiltinOptions_CumsumOptions ?
+      reinterpret_cast<const tflite::CumsumOptionsT *>(value) : nullptr;
+  }
+  tflite::CallOnceOptionsT *AsCallOnceOptions() {
+    return type == BuiltinOptions_CallOnceOptions ?
+      reinterpret_cast<tflite::CallOnceOptionsT *>(value) : nullptr;
+  }
+  const tflite::CallOnceOptionsT *AsCallOnceOptions() const {
+    return type == BuiltinOptions_CallOnceOptions ?
+      reinterpret_cast<const tflite::CallOnceOptionsT *>(value) : nullptr;
+  }
+  tflite::BroadcastToOptionsT *AsBroadcastToOptions() {
+    return type == BuiltinOptions_BroadcastToOptions ?
+      reinterpret_cast<tflite::BroadcastToOptionsT *>(value) : nullptr;
+  }
+  const tflite::BroadcastToOptionsT *AsBroadcastToOptions() const {
+    return type == BuiltinOptions_BroadcastToOptions ?
+      reinterpret_cast<const tflite::BroadcastToOptionsT *>(value) : nullptr;
+  }
+  tflite::Rfft2dOptionsT *AsRfft2dOptions() {
+    return type == BuiltinOptions_Rfft2dOptions ?
+      reinterpret_cast<tflite::Rfft2dOptionsT *>(value) : nullptr;
+  }
+  const tflite::Rfft2dOptionsT *AsRfft2dOptions() const {
+    return type == BuiltinOptions_Rfft2dOptions ?
+      reinterpret_cast<const tflite::Rfft2dOptionsT *>(value) : nullptr;
+  }
+  tflite::Conv3DOptionsT *AsConv3DOptions() {
+    return type == BuiltinOptions_Conv3DOptions ?
+      reinterpret_cast<tflite::Conv3DOptionsT *>(value) : nullptr;
+  }
+  const tflite::Conv3DOptionsT *AsConv3DOptions() const {
+    return type == BuiltinOptions_Conv3DOptions ?
+      reinterpret_cast<const tflite::Conv3DOptionsT *>(value) : nullptr;
+  }
+  tflite::HashtableOptionsT *AsHashtableOptions() {
+    return type == BuiltinOptions_HashtableOptions ?
+      reinterpret_cast<tflite::HashtableOptionsT *>(value) : nullptr;
+  }
+  const tflite::HashtableOptionsT *AsHashtableOptions() const {
+    return type == BuiltinOptions_HashtableOptions ?
+      reinterpret_cast<const tflite::HashtableOptionsT *>(value) : nullptr;
+  }
+  tflite::HashtableFindOptionsT *AsHashtableFindOptions() {
+    return type == BuiltinOptions_HashtableFindOptions ?
+      reinterpret_cast<tflite::HashtableFindOptionsT *>(value) : nullptr;
+  }
+  const tflite::HashtableFindOptionsT *AsHashtableFindOptions() const {
+    return type == BuiltinOptions_HashtableFindOptions ?
+      reinterpret_cast<const tflite::HashtableFindOptionsT *>(value) : nullptr;
+  }
+  tflite::HashtableImportOptionsT *AsHashtableImportOptions() {
+    return type == BuiltinOptions_HashtableImportOptions ?
+      reinterpret_cast<tflite::HashtableImportOptionsT *>(value) : nullptr;
+  }
+  const tflite::HashtableImportOptionsT *AsHashtableImportOptions() const {
+    return type == BuiltinOptions_HashtableImportOptions ?
+      reinterpret_cast<const tflite::HashtableImportOptionsT *>(value) : nullptr;
+  }
+  tflite::HashtableSizeOptionsT *AsHashtableSizeOptions() {
+    return type == BuiltinOptions_HashtableSizeOptions ?
+      reinterpret_cast<tflite::HashtableSizeOptionsT *>(value) : nullptr;
+  }
+  const tflite::HashtableSizeOptionsT *AsHashtableSizeOptions() const {
+    return type == BuiltinOptions_HashtableSizeOptions ?
+      reinterpret_cast<const tflite::HashtableSizeOptionsT *>(value) : nullptr;
+  }
+  tflite::VarHandleOptionsT *AsVarHandleOptions() {
+    return type == BuiltinOptions_VarHandleOptions ?
+      reinterpret_cast<tflite::VarHandleOptionsT *>(value) : nullptr;
+  }
+  const tflite::VarHandleOptionsT *AsVarHandleOptions() const {
+    return type == BuiltinOptions_VarHandleOptions ?
+      reinterpret_cast<const tflite::VarHandleOptionsT *>(value) : nullptr;
+  }
+  tflite::ReadVariableOptionsT *AsReadVariableOptions() {
+    return type == BuiltinOptions_ReadVariableOptions ?
+      reinterpret_cast<tflite::ReadVariableOptionsT *>(value) : nullptr;
+  }
+  const tflite::ReadVariableOptionsT *AsReadVariableOptions() const {
+    return type == BuiltinOptions_ReadVariableOptions ?
+      reinterpret_cast<const tflite::ReadVariableOptionsT *>(value) : nullptr;
+  }
+  tflite::AssignVariableOptionsT *AsAssignVariableOptions() {
+    return type == BuiltinOptions_AssignVariableOptions ?
+      reinterpret_cast<tflite::AssignVariableOptionsT *>(value) : nullptr;
+  }
+  const tflite::AssignVariableOptionsT *AsAssignVariableOptions() const {
+    return type == BuiltinOptions_AssignVariableOptions ?
+      reinterpret_cast<const tflite::AssignVariableOptionsT *>(value) : nullptr;
+  }
+  tflite::RandomOptionsT *AsRandomOptions() {
+    return type == BuiltinOptions_RandomOptions ?
+      reinterpret_cast<tflite::RandomOptionsT *>(value) : nullptr;
+  }
+  const tflite::RandomOptionsT *AsRandomOptions() const {
+    return type == BuiltinOptions_RandomOptions ?
+      reinterpret_cast<const tflite::RandomOptionsT *>(value) : nullptr;
+  }
+  tflite::BucketizeOptionsT *AsBucketizeOptions() {
+    return type == BuiltinOptions_BucketizeOptions ?
+      reinterpret_cast<tflite::BucketizeOptionsT *>(value) : nullptr;
+  }
+  const tflite::BucketizeOptionsT *AsBucketizeOptions() const {
+    return type == BuiltinOptions_BucketizeOptions ?
+      reinterpret_cast<const tflite::BucketizeOptionsT *>(value) : nullptr;
+  }
+  tflite::GeluOptionsT *AsGeluOptions() {
+    return type == BuiltinOptions_GeluOptions ?
+      reinterpret_cast<tflite::GeluOptionsT *>(value) : nullptr;
+  }
+  const tflite::GeluOptionsT *AsGeluOptions() const {
+    return type == BuiltinOptions_GeluOptions ?
+      reinterpret_cast<const tflite::GeluOptionsT *>(value) : nullptr;
+  }
+  tflite::DynamicUpdateSliceOptionsT *AsDynamicUpdateSliceOptions() {
+    return type == BuiltinOptions_DynamicUpdateSliceOptions ?
+      reinterpret_cast<tflite::DynamicUpdateSliceOptionsT *>(value) : nullptr;
+  }
+  const tflite::DynamicUpdateSliceOptionsT *AsDynamicUpdateSliceOptions() const {
+    return type == BuiltinOptions_DynamicUpdateSliceOptions ?
+      reinterpret_cast<const tflite::DynamicUpdateSliceOptionsT *>(value) : nullptr;
+  }
+  tflite::UnsortedSegmentProdOptionsT *AsUnsortedSegmentProdOptions() {
+    return type == BuiltinOptions_UnsortedSegmentProdOptions ?
+      reinterpret_cast<tflite::UnsortedSegmentProdOptionsT *>(value) : nullptr;
+  }
+  const tflite::UnsortedSegmentProdOptionsT *AsUnsortedSegmentProdOptions() const {
+    return type == BuiltinOptions_UnsortedSegmentProdOptions ?
+      reinterpret_cast<const tflite::UnsortedSegmentProdOptionsT *>(value) : nullptr;
+  }
+  tflite::UnsortedSegmentMaxOptionsT *AsUnsortedSegmentMaxOptions() {
+    return type == BuiltinOptions_UnsortedSegmentMaxOptions ?
+      reinterpret_cast<tflite::UnsortedSegmentMaxOptionsT *>(value) : nullptr;
+  }
+  const tflite::UnsortedSegmentMaxOptionsT *AsUnsortedSegmentMaxOptions() const {
+    return type == BuiltinOptions_UnsortedSegmentMaxOptions ?
+      reinterpret_cast<const tflite::UnsortedSegmentMaxOptionsT *>(value) : nullptr;
+  }
+  tflite::UnsortedSegmentMinOptionsT *AsUnsortedSegmentMinOptions() {
+    return type == BuiltinOptions_UnsortedSegmentMinOptions ?
+      reinterpret_cast<tflite::UnsortedSegmentMinOptionsT *>(value) : nullptr;
+  }
+  const tflite::UnsortedSegmentMinOptionsT *AsUnsortedSegmentMinOptions() const {
+    return type == BuiltinOptions_UnsortedSegmentMinOptions ?
+      reinterpret_cast<const tflite::UnsortedSegmentMinOptionsT *>(value) : nullptr;
+  }
+  tflite::UnsortedSegmentSumOptionsT *AsUnsortedSegmentSumOptions() {
+    return type == BuiltinOptions_UnsortedSegmentSumOptions ?
+      reinterpret_cast<tflite::UnsortedSegmentSumOptionsT *>(value) : nullptr;
+  }
+  const tflite::UnsortedSegmentSumOptionsT *AsUnsortedSegmentSumOptions() const {
+    return type == BuiltinOptions_UnsortedSegmentSumOptions ?
+      reinterpret_cast<const tflite::UnsortedSegmentSumOptionsT *>(value) : nullptr;
+  }
+  tflite::ATan2OptionsT *AsATan2Options() {
+    return type == BuiltinOptions_ATan2Options ?
+      reinterpret_cast<tflite::ATan2OptionsT *>(value) : nullptr;
+  }
+  const tflite::ATan2OptionsT *AsATan2Options() const {
+    return type == BuiltinOptions_ATan2Options ?
+      reinterpret_cast<const tflite::ATan2OptionsT *>(value) : nullptr;
+  }
+  tflite::SignOptionsT *AsSignOptions() {
+    return type == BuiltinOptions_SignOptions ?
+      reinterpret_cast<tflite::SignOptionsT *>(value) : nullptr;
+  }
+  const tflite::SignOptionsT *AsSignOptions() const {
+    return type == BuiltinOptions_SignOptions ?
+      reinterpret_cast<const tflite::SignOptionsT *>(value) : nullptr;
+  }
+};
+
+bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
+bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
+
+enum Padding : int8_t {
+  Padding_SAME = 0,
+  Padding_VALID = 1,
+  Padding_MIN = Padding_SAME,
+  Padding_MAX = Padding_VALID
+};
+
+inline const Padding (&EnumValuesPadding())[2] {
+  static const Padding values[] = {
+    Padding_SAME,
+    Padding_VALID
+  };
+  return values;
+}
+
+inline const char * const *EnumNamesPadding() {
+  static const char * const names[3] = {
+    "SAME",
+    "VALID",
+    nullptr
+  };
+  return names;
+}
+
+inline const char *EnumNamePadding(Padding e) {
+  if (flatbuffers::IsOutRange(e, Padding_SAME, Padding_VALID)) return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesPadding()[index];
+}
+
+enum ActivationFunctionType : int8_t {
+  ActivationFunctionType_NONE = 0,
+  ActivationFunctionType_RELU = 1,
+  ActivationFunctionType_RELU_N1_TO_1 = 2,
+  ActivationFunctionType_RELU6 = 3,
+  ActivationFunctionType_TANH = 4,
+  ActivationFunctionType_SIGN_BIT = 5,
+  ActivationFunctionType_MIN = ActivationFunctionType_NONE,
+  ActivationFunctionType_MAX = ActivationFunctionType_SIGN_BIT
+};
+
+inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6] {
+  static const ActivationFunctionType values[] = {
+    ActivationFunctionType_NONE,
+    ActivationFunctionType_RELU,
+    ActivationFunctionType_RELU_N1_TO_1,
+    ActivationFunctionType_RELU6,
+    ActivationFunctionType_TANH,
+    ActivationFunctionType_SIGN_BIT
+  };
+  return values;
+}
+
+inline const char * const *EnumNamesActivationFunctionType() {
+  static const char * const names[7] = {
+    "NONE",
+    "RELU",
+    "RELU_N1_TO_1",
+    "RELU6",
+    "TANH",
+    "SIGN_BIT",
+    nullptr
+  };
+  return names;
+}
+
+inline const char *EnumNameActivationFunctionType(ActivationFunctionType e) {
+  if (flatbuffers::IsOutRange(e, ActivationFunctionType_NONE, ActivationFunctionType_SIGN_BIT)) return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesActivationFunctionType()[index];
+}
+
+enum LSHProjectionType : int8_t {
+  LSHProjectionType_UNKNOWN = 0,
+  LSHProjectionType_SPARSE = 1,
+  LSHProjectionType_DENSE = 2,
+  LSHProjectionType_MIN = LSHProjectionType_UNKNOWN,
+  LSHProjectionType_MAX = LSHProjectionType_DENSE
+};
+
+inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3] {
+  static const LSHProjectionType values[] = {
+    LSHProjectionType_UNKNOWN,
+    LSHProjectionType_SPARSE,
+    LSHProjectionType_DENSE
+  };
+  return values;
+}
+
+inline const char * const *EnumNamesLSHProjectionType() {
+  static const char * const names[4] = {
+    "UNKNOWN",
+    "SPARSE",
+    "DENSE",
+    nullptr
+  };
+  return names;
+}
+
+inline const char *EnumNameLSHProjectionType(LSHProjectionType e) {
+  if (flatbuffers::IsOutRange(e, LSHProjectionType_UNKNOWN, LSHProjectionType_DENSE)) return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesLSHProjectionType()[index];
+}
+
+enum FullyConnectedOptionsWeightsFormat : int8_t {
+  FullyConnectedOptionsWeightsFormat_DEFAULT = 0,
+  FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1,
+  FullyConnectedOptionsWeightsFormat_MIN = FullyConnectedOptionsWeightsFormat_DEFAULT,
+  FullyConnectedOptionsWeightsFormat_MAX = FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8
+};
+
+inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2] {
+  static const FullyConnectedOptionsWeightsFormat values[] = {
+    FullyConnectedOptionsWeightsFormat_DEFAULT,
+    FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8
+  };
+  return values;
+}
+
+inline const char * const *EnumNamesFullyConnectedOptionsWeightsFormat() {
+  static const char * const names[3] = {
+    "DEFAULT",
+    "SHUFFLED4x16INT8",
+    nullptr
+  };
+  return names;
+}
+
+inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e) {
+  if (flatbuffers::IsOutRange(e, FullyConnectedOptionsWeightsFormat_DEFAULT, FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8)) return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesFullyConnectedOptionsWeightsFormat()[index];
+}
+
+enum LSTMKernelType : int8_t {
+  LSTMKernelType_FULL = 0,
+  LSTMKernelType_BASIC = 1,
+  LSTMKernelType_MIN = LSTMKernelType_FULL,
+  LSTMKernelType_MAX = LSTMKernelType_BASIC
+};
+
+inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2] {
+  static const LSTMKernelType values[] = {
+    LSTMKernelType_FULL,
+    LSTMKernelType_BASIC
+  };
+  return values;
+}
+
+inline const char * const *EnumNamesLSTMKernelType() {
+  static const char * const names[3] = {
+    "FULL",
+    "BASIC",
+    nullptr
+  };
+  return names;
+}
+
+inline const char *EnumNameLSTMKernelType(LSTMKernelType e) {
+  if (flatbuffers::IsOutRange(e, LSTMKernelType_FULL, LSTMKernelType_BASIC)) return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesLSTMKernelType()[index];
+}
+
+enum CombinerType : int8_t {
+  CombinerType_SUM = 0,
+  CombinerType_MEAN = 1,
+  CombinerType_SQRTN = 2,
+  CombinerType_MIN = CombinerType_SUM,
+  CombinerType_MAX = CombinerType_SQRTN
+};
+
+inline const CombinerType (&EnumValuesCombinerType())[3] {
+  static const CombinerType values[] = {
+    CombinerType_SUM,
+    CombinerType_MEAN,
+    CombinerType_SQRTN
+  };
+  return values;
+}
+
+inline const char * const *EnumNamesCombinerType() {
+  static const char * const names[4] = {
+    "SUM",
+    "MEAN",
+    "SQRTN",
+    nullptr
+  };
+  return names;
+}
+
+inline const char *EnumNameCombinerType(CombinerType e) {
+  if (flatbuffers::IsOutRange(e, CombinerType_SUM, CombinerType_SQRTN)) return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesCombinerType()[index];
+}
+
+enum MirrorPadMode : int8_t {
+  MirrorPadMode_REFLECT = 0,
+  MirrorPadMode_SYMMETRIC = 1,
+  MirrorPadMode_MIN = MirrorPadMode_REFLECT,
+  MirrorPadMode_MAX = MirrorPadMode_SYMMETRIC
+};
+
+inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2] {
+  static const MirrorPadMode values[] = {
+    MirrorPadMode_REFLECT,
+    MirrorPadMode_SYMMETRIC
+  };
+  return values;
+}
+
+inline const char * const *EnumNamesMirrorPadMode() {
+  static const char * const names[3] = {
+    "REFLECT",
+    "SYMMETRIC",
+    nullptr
+  };
+  return names;
+}
+
+inline const char *EnumNameMirrorPadMode(MirrorPadMode e) {
+  if (flatbuffers::IsOutRange(e, MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC)) return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesMirrorPadMode()[index];
+}
+
+enum CustomOptionsFormat : int8_t {
+  CustomOptionsFormat_FLEXBUFFERS = 0,
+  CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS,
+  CustomOptionsFormat_MAX = CustomOptionsFormat_FLEXBUFFERS
+};
+
+inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1] {
+  static const CustomOptionsFormat values[] = {
+    CustomOptionsFormat_FLEXBUFFERS
+  };
+  return values;
+}
+
+inline const char * const *EnumNamesCustomOptionsFormat() {
+  static const char * const names[2] = {
+    "FLEXBUFFERS",
+    nullptr
+  };
+  return names;
+}
+
+inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e) {
+  if (flatbuffers::IsOutRange(e, CustomOptionsFormat_FLEXBUFFERS, CustomOptionsFormat_FLEXBUFFERS)) return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesCustomOptionsFormat()[index];
+}
+
+struct CustomQuantizationT : public flatbuffers::NativeTable {
+  typedef CustomQuantization TableType;
+  std::vector<uint8_t> custom{};
+};
+
+struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef CustomQuantizationT NativeTableType;
+  typedef CustomQuantizationBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_CUSTOM = 4
+  };
+  const flatbuffers::Vector<uint8_t> *custom() const {
+    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_CUSTOM) &&
+           verifier.VerifyVector(custom()) &&
+           verifier.EndTable();
+  }
+  CustomQuantizationT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(CustomQuantizationT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CustomQuantization> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct CustomQuantizationBuilder {
+  typedef CustomQuantization Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom) {
+    fbb_.AddOffset(CustomQuantization::VT_CUSTOM, custom);
+  }
+  explicit CustomQuantizationBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CustomQuantization> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CustomQuantization>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CustomQuantization> CreateCustomQuantization(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom = 0) {
+  CustomQuantizationBuilder builder_(_fbb);
+  builder_.add_custom(custom);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<CustomQuantization> CreateCustomQuantizationDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<uint8_t> *custom = nullptr) {
+  if (custom) { _fbb.ForceVectorAlignment(custom->size(), sizeof(uint8_t), 16); }
+  auto custom__ = custom ? _fbb.CreateVector<uint8_t>(*custom) : 0;
+  return tflite::CreateCustomQuantization(
+      _fbb,
+      custom__);
+}
+
+flatbuffers::Offset<CustomQuantization> CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct QuantizationParametersT : public flatbuffers::NativeTable {
+  typedef QuantizationParameters TableType;
+  std::vector<float> min{};
+  std::vector<float> max{};
+  std::vector<float> scale{};
+  std::vector<int64_t> zero_point{};
+  tflite::QuantizationDetailsUnion details{};
+  int32_t quantized_dimension = 0;
+};
+
+struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef QuantizationParametersT NativeTableType;
+  typedef QuantizationParametersBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_MIN = 4,
+    VT_MAX = 6,
+    VT_SCALE = 8,
+    VT_ZERO_POINT = 10,
+    VT_DETAILS_TYPE = 12,
+    VT_DETAILS = 14,
+    VT_QUANTIZED_DIMENSION = 16
+  };
+  const flatbuffers::Vector<float> *min() const {
+    return GetPointer<const flatbuffers::Vector<float> *>(VT_MIN);
+  }
+  const flatbuffers::Vector<float> *max() const {
+    return GetPointer<const flatbuffers::Vector<float> *>(VT_MAX);
+  }
+  const flatbuffers::Vector<float> *scale() const {
+    return GetPointer<const flatbuffers::Vector<float> *>(VT_SCALE);
+  }
+  const flatbuffers::Vector<int64_t> *zero_point() const {
+    return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT);
+  }
+  tflite::QuantizationDetails details_type() const {
+    return static_cast<tflite::QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
+  }
+  const void *details() const {
+    return GetPointer<const void *>(VT_DETAILS);
+  }
+  template<typename T> const T *details_as() const;
+  const tflite::CustomQuantization *details_as_CustomQuantization() const {
+    return details_type() == tflite::QuantizationDetails_CustomQuantization ? static_cast<const tflite::CustomQuantization *>(details()) : nullptr;
+  }
+  int32_t quantized_dimension() const {
+    return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_MIN) &&
+           verifier.VerifyVector(min()) &&
+           VerifyOffset(verifier, VT_MAX) &&
+           verifier.VerifyVector(max()) &&
+           VerifyOffset(verifier, VT_SCALE) &&
+           verifier.VerifyVector(scale()) &&
+           VerifyOffset(verifier, VT_ZERO_POINT) &&
+           verifier.VerifyVector(zero_point()) &&
+           VerifyField<uint8_t>(verifier, VT_DETAILS_TYPE, 1) &&
+           VerifyOffset(verifier, VT_DETAILS) &&
+           VerifyQuantizationDetails(verifier, details(), details_type()) &&
+           VerifyField<int32_t>(verifier, VT_QUANTIZED_DIMENSION, 4) &&
+           verifier.EndTable();
+  }
+  QuantizationParametersT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(QuantizationParametersT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<QuantizationParameters> Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+template<> inline const tflite::CustomQuantization *QuantizationParameters::details_as<tflite::CustomQuantization>() const {
+  return details_as_CustomQuantization();
+}
+
+struct QuantizationParametersBuilder {
+  typedef QuantizationParameters Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min) {
+    fbb_.AddOffset(QuantizationParameters::VT_MIN, min);
+  }
+  void add_max(flatbuffers::Offset<flatbuffers::Vector<float>> max) {
+    fbb_.AddOffset(QuantizationParameters::VT_MAX, max);
+  }
+  void add_scale(flatbuffers::Offset<flatbuffers::Vector<float>> scale) {
+    fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale);
+  }
+  void add_zero_point(flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point) {
+    fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point);
+  }
+  void add_details_type(tflite::QuantizationDetails details_type) {
+    fbb_.AddElement<uint8_t>(QuantizationParameters::VT_DETAILS_TYPE, static_cast<uint8_t>(details_type), 0);
+  }
+  void add_details(flatbuffers::Offset<void> details) {
+    fbb_.AddOffset(QuantizationParameters::VT_DETAILS, details);
+  }
+  void add_quantized_dimension(int32_t quantized_dimension) {
+    fbb_.AddElement<int32_t>(QuantizationParameters::VT_QUANTIZED_DIMENSION, quantized_dimension, 0);
+  }
+  explicit QuantizationParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<QuantizationParameters> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<QuantizationParameters>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
+    flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
+    flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
+    tflite::QuantizationDetails details_type = tflite::QuantizationDetails_NONE,
+    flatbuffers::Offset<void> details = 0,
+    int32_t quantized_dimension = 0) {
+  QuantizationParametersBuilder builder_(_fbb);
+  builder_.add_quantized_dimension(quantized_dimension);
+  builder_.add_details(details);
+  builder_.add_zero_point(zero_point);
+  builder_.add_scale(scale);
+  builder_.add_max(max);
+  builder_.add_min(min);
+  builder_.add_details_type(details_type);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<float> *min = nullptr,
+    const std::vector<float> *max = nullptr,
+    const std::vector<float> *scale = nullptr,
+    const std::vector<int64_t> *zero_point = nullptr,
+    tflite::QuantizationDetails details_type = tflite::QuantizationDetails_NONE,
+    flatbuffers::Offset<void> details = 0,
+    int32_t quantized_dimension = 0) {
+  auto min__ = min ? _fbb.CreateVector<float>(*min) : 0;
+  auto max__ = max ? _fbb.CreateVector<float>(*max) : 0;
+  auto scale__ = scale ? _fbb.CreateVector<float>(*scale) : 0;
+  auto zero_point__ = zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0;
+  return tflite::CreateQuantizationParameters(
+      _fbb,
+      min__,
+      max__,
+      scale__,
+      zero_point__,
+      details_type,
+      details,
+      quantized_dimension);
+}
+
+flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Int32VectorT : public flatbuffers::NativeTable {
+  typedef Int32Vector TableType;
+  std::vector<int32_t> values{};
+};
+
+struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef Int32VectorT NativeTableType;
+  typedef Int32VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_VALUES = 4
+  };
+  const flatbuffers::Vector<int32_t> *values() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_VALUES);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_VALUES) &&
+           verifier.VerifyVector(values()) &&
+           verifier.EndTable();
+  }
+  Int32VectorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(Int32VectorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Int32Vector> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Int32VectorBuilder {
+  typedef Int32Vector Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_values(flatbuffers::Offset<flatbuffers::Vector<int32_t>> values) {
+    fbb_.AddOffset(Int32Vector::VT_VALUES, values);
+  }
+  explicit Int32VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Int32Vector> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Int32Vector>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Int32Vector> CreateInt32Vector(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> values = 0) {
+  Int32VectorBuilder builder_(_fbb);
+  builder_.add_values(values);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Int32Vector> CreateInt32VectorDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *values = nullptr) {
+  auto values__ = values ? _fbb.CreateVector<int32_t>(*values) : 0;
+  return tflite::CreateInt32Vector(
+      _fbb,
+      values__);
+}
+
+flatbuffers::Offset<Int32Vector> CreateInt32Vector(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Uint16VectorT : public flatbuffers::NativeTable {
+  typedef Uint16Vector TableType;
+  std::vector<uint16_t> values{};
+};
+
+struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef Uint16VectorT NativeTableType;
+  typedef Uint16VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_VALUES = 4
+  };
+  const flatbuffers::Vector<uint16_t> *values() const {
+    return GetPointer<const flatbuffers::Vector<uint16_t> *>(VT_VALUES);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_VALUES) &&
+           verifier.VerifyVector(values()) &&
+           verifier.EndTable();
+  }
+  Uint16VectorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(Uint16VectorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Uint16Vector> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Uint16VectorBuilder {
+  typedef Uint16Vector Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_values(flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values) {
+    fbb_.AddOffset(Uint16Vector::VT_VALUES, values);
+  }
+  explicit Uint16VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Uint16Vector> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Uint16Vector>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Uint16Vector> CreateUint16Vector(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values = 0) {
+  Uint16VectorBuilder builder_(_fbb);
+  builder_.add_values(values);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Uint16Vector> CreateUint16VectorDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<uint16_t> *values = nullptr) {
+  if (values) { _fbb.ForceVectorAlignment(values->size(), sizeof(uint16_t), 4); }
+  auto values__ = values ? _fbb.CreateVector<uint16_t>(*values) : 0;
+  return tflite::CreateUint16Vector(
+      _fbb,
+      values__);
+}
+
+flatbuffers::Offset<Uint16Vector> CreateUint16Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Uint8VectorT : public flatbuffers::NativeTable {
+  typedef Uint8Vector TableType;
+  std::vector<uint8_t> values{};
+};
+
+struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef Uint8VectorT NativeTableType;
+  typedef Uint8VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_VALUES = 4
+  };
+  const flatbuffers::Vector<uint8_t> *values() const {
+    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_VALUES);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_VALUES) &&
+           verifier.VerifyVector(values()) &&
+           verifier.EndTable();
+  }
+  Uint8VectorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(Uint8VectorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Uint8Vector> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Uint8VectorBuilder {
+  typedef Uint8Vector Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_values(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values) {
+    fbb_.AddOffset(Uint8Vector::VT_VALUES, values);
+  }
+  explicit Uint8VectorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Uint8Vector> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Uint8Vector>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Uint8Vector> CreateUint8Vector(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values = 0) {
+  Uint8VectorBuilder builder_(_fbb);
+  builder_.add_values(values);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Uint8Vector> CreateUint8VectorDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<uint8_t> *values = nullptr) {
+  if (values) { _fbb.ForceVectorAlignment(values->size(), sizeof(uint8_t), 4); }
+  auto values__ = values ? _fbb.CreateVector<uint8_t>(*values) : 0;
+  return tflite::CreateUint8Vector(
+      _fbb,
+      values__);
+}
+
+flatbuffers::Offset<Uint8Vector> CreateUint8Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DimensionMetadataT : public flatbuffers::NativeTable {
+  typedef DimensionMetadata TableType;
+  tflite::DimensionType format = tflite::DimensionType_DENSE;
+  int32_t dense_size = 0;
+  tflite::SparseIndexVectorUnion array_segments{};
+  tflite::SparseIndexVectorUnion array_indices{};
+};
+
+struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef DimensionMetadataT NativeTableType;
+  typedef DimensionMetadataBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_FORMAT = 4,
+    VT_DENSE_SIZE = 6,
+    VT_ARRAY_SEGMENTS_TYPE = 8,
+    VT_ARRAY_SEGMENTS = 10,
+    VT_ARRAY_INDICES_TYPE = 12,
+    VT_ARRAY_INDICES = 14
+  };
+  tflite::DimensionType format() const {
+    return static_cast<tflite::DimensionType>(GetField<int8_t>(VT_FORMAT, 0));
+  }
+  int32_t dense_size() const {
+    return GetField<int32_t>(VT_DENSE_SIZE, 0);
+  }
+  tflite::SparseIndexVector array_segments_type() const {
+    return static_cast<tflite::SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0));
+  }
+  const void *array_segments() const {
+    return GetPointer<const void *>(VT_ARRAY_SEGMENTS);
+  }
+  template<typename T> const T *array_segments_as() const;
+  const tflite::Int32Vector *array_segments_as_Int32Vector() const {
+    return array_segments_type() == tflite::SparseIndexVector_Int32Vector ? static_cast<const tflite::Int32Vector *>(array_segments()) : nullptr;
+  }
+  const tflite::Uint16Vector *array_segments_as_Uint16Vector() const {
+    return array_segments_type() == tflite::SparseIndexVector_Uint16Vector ? static_cast<const tflite::Uint16Vector *>(array_segments()) : nullptr;
+  }
+  const tflite::Uint8Vector *array_segments_as_Uint8Vector() const {
+    return array_segments_type() == tflite::SparseIndexVector_Uint8Vector ? static_cast<const tflite::Uint8Vector *>(array_segments()) : nullptr;
+  }
+  tflite::SparseIndexVector array_indices_type() const {
+    return static_cast<tflite::SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0));
+  }
+  const void *array_indices() const {
+    return GetPointer<const void *>(VT_ARRAY_INDICES);
+  }
+  template<typename T> const T *array_indices_as() const;
+  const tflite::Int32Vector *array_indices_as_Int32Vector() const {
+    return array_indices_type() == tflite::SparseIndexVector_Int32Vector ? static_cast<const tflite::Int32Vector *>(array_indices()) : nullptr;
+  }
+  const tflite::Uint16Vector *array_indices_as_Uint16Vector() const {
+    return array_indices_type() == tflite::SparseIndexVector_Uint16Vector ? static_cast<const tflite::Uint16Vector *>(array_indices()) : nullptr;
+  }
+  const tflite::Uint8Vector *array_indices_as_Uint8Vector() const {
+    return array_indices_type() == tflite::SparseIndexVector_Uint8Vector ? static_cast<const tflite::Uint8Vector *>(array_indices()) : nullptr;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FORMAT, 1) &&
+           VerifyField<int32_t>(verifier, VT_DENSE_SIZE, 4) &&
+           VerifyField<uint8_t>(verifier, VT_ARRAY_SEGMENTS_TYPE, 1) &&
+           VerifyOffset(verifier, VT_ARRAY_SEGMENTS) &&
+           VerifySparseIndexVector(verifier, array_segments(), array_segments_type()) &&
+           VerifyField<uint8_t>(verifier, VT_ARRAY_INDICES_TYPE, 1) &&
+           VerifyOffset(verifier, VT_ARRAY_INDICES) &&
+           VerifySparseIndexVector(verifier, array_indices(), array_indices_type()) &&
+           verifier.EndTable();
+  }
+  DimensionMetadataT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DimensionMetadataT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DimensionMetadata> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+template<> inline const tflite::Int32Vector *DimensionMetadata::array_segments_as<tflite::Int32Vector>() const {
+  return array_segments_as_Int32Vector();
+}
+
+template<> inline const tflite::Uint16Vector *DimensionMetadata::array_segments_as<tflite::Uint16Vector>() const {
+  return array_segments_as_Uint16Vector();
+}
+
+template<> inline const tflite::Uint8Vector *DimensionMetadata::array_segments_as<tflite::Uint8Vector>() const {
+  return array_segments_as_Uint8Vector();
+}
+
+template<> inline const tflite::Int32Vector *DimensionMetadata::array_indices_as<tflite::Int32Vector>() const {
+  return array_indices_as_Int32Vector();
+}
+
+template<> inline const tflite::Uint16Vector *DimensionMetadata::array_indices_as<tflite::Uint16Vector>() const {
+  return array_indices_as_Uint16Vector();
+}
+
+template<> inline const tflite::Uint8Vector *DimensionMetadata::array_indices_as<tflite::Uint8Vector>() const {
+  return array_indices_as_Uint8Vector();
+}
+
+struct DimensionMetadataBuilder {
+  typedef DimensionMetadata Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_format(tflite::DimensionType format) {
+    fbb_.AddElement<int8_t>(DimensionMetadata::VT_FORMAT, static_cast<int8_t>(format), 0);
+  }
+  void add_dense_size(int32_t dense_size) {
+    fbb_.AddElement<int32_t>(DimensionMetadata::VT_DENSE_SIZE, dense_size, 0);
+  }
+  void add_array_segments_type(tflite::SparseIndexVector array_segments_type) {
+    fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_SEGMENTS_TYPE, static_cast<uint8_t>(array_segments_type), 0);
+  }
+  void add_array_segments(flatbuffers::Offset<void> array_segments) {
+    fbb_.AddOffset(DimensionMetadata::VT_ARRAY_SEGMENTS, array_segments);
+  }
+  void add_array_indices_type(tflite::SparseIndexVector array_indices_type) {
+    fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_INDICES_TYPE, static_cast<uint8_t>(array_indices_type), 0);
+  }
+  void add_array_indices(flatbuffers::Offset<void> array_indices) {
+    fbb_.AddOffset(DimensionMetadata::VT_ARRAY_INDICES, array_indices);
+  }
+  explicit DimensionMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DimensionMetadata> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DimensionMetadata>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::DimensionType format = tflite::DimensionType_DENSE,
+    int32_t dense_size = 0,
+    tflite::SparseIndexVector array_segments_type = tflite::SparseIndexVector_NONE,
+    flatbuffers::Offset<void> array_segments = 0,
+    tflite::SparseIndexVector array_indices_type = tflite::SparseIndexVector_NONE,
+    flatbuffers::Offset<void> array_indices = 0) {
+  DimensionMetadataBuilder builder_(_fbb);
+  builder_.add_array_indices(array_indices);
+  builder_.add_array_segments(array_segments);
+  builder_.add_dense_size(dense_size);
+  builder_.add_array_indices_type(array_indices_type);
+  builder_.add_array_segments_type(array_segments_type);
+  builder_.add_format(format);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SparsityParametersT : public flatbuffers::NativeTable {
+  typedef SparsityParameters TableType;
+  std::vector<int32_t> traversal_order{};
+  std::vector<int32_t> block_map{};
+  std::vector<std::unique_ptr<tflite::DimensionMetadataT>> dim_metadata{};
+  SparsityParametersT() = default;
+  SparsityParametersT(const SparsityParametersT &o);
+  SparsityParametersT(SparsityParametersT&&) FLATBUFFERS_NOEXCEPT = default;
+  SparsityParametersT &operator=(SparsityParametersT o) FLATBUFFERS_NOEXCEPT;
+};
+
+struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SparsityParametersT NativeTableType;
+  typedef SparsityParametersBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_TRAVERSAL_ORDER = 4,
+    VT_BLOCK_MAP = 6,
+    VT_DIM_METADATA = 8
+  };
+  const flatbuffers::Vector<int32_t> *traversal_order() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_TRAVERSAL_ORDER);
+  }
+  const flatbuffers::Vector<int32_t> *block_map() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BLOCK_MAP);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::DimensionMetadata>> *dim_metadata() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::DimensionMetadata>> *>(VT_DIM_METADATA);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_TRAVERSAL_ORDER) &&
+           verifier.VerifyVector(traversal_order()) &&
+           VerifyOffset(verifier, VT_BLOCK_MAP) &&
+           verifier.VerifyVector(block_map()) &&
+           VerifyOffset(verifier, VT_DIM_METADATA) &&
+           verifier.VerifyVector(dim_metadata()) &&
+           verifier.VerifyVectorOfTables(dim_metadata()) &&
+           verifier.EndTable();
+  }
+  SparsityParametersT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SparsityParametersT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SparsityParameters> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SparsityParametersBuilder {
+  typedef SparsityParameters Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_traversal_order(flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order) {
+    fbb_.AddOffset(SparsityParameters::VT_TRAVERSAL_ORDER, traversal_order);
+  }
+  void add_block_map(flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map) {
+    fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
+  }
+  void add_dim_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::DimensionMetadata>>> dim_metadata) {
+    fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
+  }
+  explicit SparsityParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SparsityParameters> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SparsityParameters>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::DimensionMetadata>>> dim_metadata = 0) {
+  SparsityParametersBuilder builder_(_fbb);
+  builder_.add_dim_metadata(dim_metadata);
+  builder_.add_block_map(block_map);
+  builder_.add_traversal_order(traversal_order);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *traversal_order = nullptr,
+    const std::vector<int32_t> *block_map = nullptr,
+    const std::vector<flatbuffers::Offset<tflite::DimensionMetadata>> *dim_metadata = nullptr) {
+  auto traversal_order__ = traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0;
+  auto block_map__ = block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0;
+  auto dim_metadata__ = dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<tflite::DimensionMetadata>>(*dim_metadata) : 0;
+  return tflite::CreateSparsityParameters(
+      _fbb,
+      traversal_order__,
+      block_map__,
+      dim_metadata__);
+}
+
+flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct VariantSubTypeT : public flatbuffers::NativeTable {
+  typedef VariantSubType TableType;
+  std::vector<int32_t> shape{};
+  tflite::TensorType type = tflite::TensorType_FLOAT32;
+  bool has_rank = false;
+};
+
+struct VariantSubType FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef VariantSubTypeT NativeTableType;
+  typedef VariantSubTypeBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_SHAPE = 4,
+    VT_TYPE = 6,
+    VT_HAS_RANK = 8
+  };
+  const flatbuffers::Vector<int32_t> *shape() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
+  }
+  tflite::TensorType type() const {
+    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_TYPE, 0));
+  }
+  bool has_rank() const {
+    return GetField<uint8_t>(VT_HAS_RANK, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_SHAPE) &&
+           verifier.VerifyVector(shape()) &&
+           VerifyField<int8_t>(verifier, VT_TYPE, 1) &&
+           VerifyField<uint8_t>(verifier, VT_HAS_RANK, 1) &&
+           verifier.EndTable();
+  }
+  VariantSubTypeT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(VariantSubTypeT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<VariantSubType> Pack(flatbuffers::FlatBufferBuilder &_fbb, const VariantSubTypeT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct VariantSubTypeBuilder {
+  typedef VariantSubType Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape) {
+    fbb_.AddOffset(VariantSubType::VT_SHAPE, shape);
+  }
+  void add_type(tflite::TensorType type) {
+    fbb_.AddElement<int8_t>(VariantSubType::VT_TYPE, static_cast<int8_t>(type), 0);
+  }
+  void add_has_rank(bool has_rank) {
+    fbb_.AddElement<uint8_t>(VariantSubType::VT_HAS_RANK, static_cast<uint8_t>(has_rank), 0);
+  }
+  explicit VariantSubTypeBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<VariantSubType> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<VariantSubType>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<VariantSubType> CreateVariantSubType(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
+    tflite::TensorType type = tflite::TensorType_FLOAT32,
+    bool has_rank = false) {
+  VariantSubTypeBuilder builder_(_fbb);
+  builder_.add_shape(shape);
+  builder_.add_has_rank(has_rank);
+  builder_.add_type(type);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<VariantSubType> CreateVariantSubTypeDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *shape = nullptr,
+    tflite::TensorType type = tflite::TensorType_FLOAT32,
+    bool has_rank = false) {
+  auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0;
+  return tflite::CreateVariantSubType(
+      _fbb,
+      shape__,
+      type,
+      has_rank);
+}
+
+flatbuffers::Offset<VariantSubType> CreateVariantSubType(flatbuffers::FlatBufferBuilder &_fbb, const VariantSubTypeT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct TensorT : public flatbuffers::NativeTable {
+  typedef Tensor TableType;
+  std::vector<int32_t> shape{};
+  tflite::TensorType type = tflite::TensorType_FLOAT32;
+  uint32_t buffer = 0;
+  std::string name{};
+  std::unique_ptr<tflite::QuantizationParametersT> quantization{};
+  bool is_variable = false;
+  std::unique_ptr<tflite::SparsityParametersT> sparsity{};
+  std::vector<int32_t> shape_signature{};
+  bool has_rank = false;
+  std::vector<std::unique_ptr<tflite::VariantSubTypeT>> variant_tensors{};
+  TensorT() = default;
+  TensorT(const TensorT &o);
+  TensorT(TensorT&&) FLATBUFFERS_NOEXCEPT = default;
+  TensorT &operator=(TensorT o) FLATBUFFERS_NOEXCEPT;
+};
+
+struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef TensorT NativeTableType;
+  typedef TensorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_SHAPE = 4,
+    VT_TYPE = 6,
+    VT_BUFFER = 8,
+    VT_NAME = 10,
+    VT_QUANTIZATION = 12,
+    VT_IS_VARIABLE = 14,
+    VT_SPARSITY = 16,
+    VT_SHAPE_SIGNATURE = 18,
+    VT_HAS_RANK = 20,
+    VT_VARIANT_TENSORS = 22
+  };
+  const flatbuffers::Vector<int32_t> *shape() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
+  }
+  tflite::TensorType type() const {
+    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_TYPE, 0));
+  }
+  uint32_t buffer() const {
+    return GetField<uint32_t>(VT_BUFFER, 0);
+  }
+  const flatbuffers::String *name() const {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  const tflite::QuantizationParameters *quantization() const {
+    return GetPointer<const tflite::QuantizationParameters *>(VT_QUANTIZATION);
+  }
+  bool is_variable() const {
+    return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0;
+  }
+  const tflite::SparsityParameters *sparsity() const {
+    return GetPointer<const tflite::SparsityParameters *>(VT_SPARSITY);
+  }
+  const flatbuffers::Vector<int32_t> *shape_signature() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE_SIGNATURE);
+  }
+  bool has_rank() const {
+    return GetField<uint8_t>(VT_HAS_RANK, 0) != 0;
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::VariantSubType>> *variant_tensors() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::VariantSubType>> *>(VT_VARIANT_TENSORS);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_SHAPE) &&
+           verifier.VerifyVector(shape()) &&
+           VerifyField<int8_t>(verifier, VT_TYPE, 1) &&
+           VerifyField<uint32_t>(verifier, VT_BUFFER, 4) &&
+           VerifyOffset(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) &&
+           VerifyOffset(verifier, VT_QUANTIZATION) &&
+           verifier.VerifyTable(quantization()) &&
+           VerifyField<uint8_t>(verifier, VT_IS_VARIABLE, 1) &&
+           VerifyOffset(verifier, VT_SPARSITY) &&
+           verifier.VerifyTable(sparsity()) &&
+           VerifyOffset(verifier, VT_SHAPE_SIGNATURE) &&
+           verifier.VerifyVector(shape_signature()) &&
+           VerifyField<uint8_t>(verifier, VT_HAS_RANK, 1) &&
+           VerifyOffset(verifier, VT_VARIANT_TENSORS) &&
+           verifier.VerifyVector(variant_tensors()) &&
+           verifier.VerifyVectorOfTables(variant_tensors()) &&
+           verifier.EndTable();
+  }
+  TensorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Tensor> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct TensorBuilder {
+  typedef Tensor Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape) {
+    fbb_.AddOffset(Tensor::VT_SHAPE, shape);
+  }
+  void add_type(tflite::TensorType type) {
+    fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0);
+  }
+  void add_buffer(uint32_t buffer) {
+    fbb_.AddElement<uint32_t>(Tensor::VT_BUFFER, buffer, 0);
+  }
+  void add_name(flatbuffers::Offset<flatbuffers::String> name) {
+    fbb_.AddOffset(Tensor::VT_NAME, name);
+  }
+  void add_quantization(flatbuffers::Offset<tflite::QuantizationParameters> quantization) {
+    fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization);
+  }
+  void add_is_variable(bool is_variable) {
+    fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0);
+  }
+  void add_sparsity(flatbuffers::Offset<tflite::SparsityParameters> sparsity) {
+    fbb_.AddOffset(Tensor::VT_SPARSITY, sparsity);
+  }
+  void add_shape_signature(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature) {
+    fbb_.AddOffset(Tensor::VT_SHAPE_SIGNATURE, shape_signature);
+  }
+  void add_has_rank(bool has_rank) {
+    fbb_.AddElement<uint8_t>(Tensor::VT_HAS_RANK, static_cast<uint8_t>(has_rank), 0);
+  }
+  void add_variant_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::VariantSubType>>> variant_tensors) {
+    fbb_.AddOffset(Tensor::VT_VARIANT_TENSORS, variant_tensors);
+  }
+  explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Tensor> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Tensor>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Tensor> CreateTensor(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
+    tflite::TensorType type = tflite::TensorType_FLOAT32,
+    uint32_t buffer = 0,
+    flatbuffers::Offset<flatbuffers::String> name = 0,
+    flatbuffers::Offset<tflite::QuantizationParameters> quantization = 0,
+    bool is_variable = false,
+    flatbuffers::Offset<tflite::SparsityParameters> sparsity = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0,
+    bool has_rank = false,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::VariantSubType>>> variant_tensors = 0) {
+  TensorBuilder builder_(_fbb);
+  builder_.add_variant_tensors(variant_tensors);
+  builder_.add_shape_signature(shape_signature);
+  builder_.add_sparsity(sparsity);
+  builder_.add_quantization(quantization);
+  builder_.add_name(name);
+  builder_.add_buffer(buffer);
+  builder_.add_shape(shape);
+  builder_.add_has_rank(has_rank);
+  builder_.add_is_variable(is_variable);
+  builder_.add_type(type);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Tensor> CreateTensorDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *shape = nullptr,
+    tflite::TensorType type = tflite::TensorType_FLOAT32,
+    uint32_t buffer = 0,
+    const char *name = nullptr,
+    flatbuffers::Offset<tflite::QuantizationParameters> quantization = 0,
+    bool is_variable = false,
+    flatbuffers::Offset<tflite::SparsityParameters> sparsity = 0,
+    const std::vector<int32_t> *shape_signature = nullptr,
+    bool has_rank = false,
+    const std::vector<flatbuffers::Offset<tflite::VariantSubType>> *variant_tensors = nullptr) {
+  auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0;
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  auto shape_signature__ = shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0;
+  auto variant_tensors__ = variant_tensors ? _fbb.CreateVector<flatbuffers::Offset<tflite::VariantSubType>>(*variant_tensors) : 0;
+  return tflite::CreateTensor(
+      _fbb,
+      shape__,
+      type,
+      buffer,
+      name__,
+      quantization,
+      is_variable,
+      sparsity,
+      shape_signature__,
+      has_rank,
+      variant_tensors__);
+}
+
+flatbuffers::Offset<Tensor> CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Conv2DOptionsT : public flatbuffers::NativeTable {
+  typedef Conv2DOptions TableType;
+  tflite::Padding padding = tflite::Padding_SAME;
+  int32_t stride_w = 0;
+  int32_t stride_h = 0;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+  int32_t dilation_w_factor = 1;
+  int32_t dilation_h_factor = 1;
+};
+
+struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef Conv2DOptionsT NativeTableType;
+  typedef Conv2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_PADDING = 4,
+    VT_STRIDE_W = 6,
+    VT_STRIDE_H = 8,
+    VT_FUSED_ACTIVATION_FUNCTION = 10,
+    VT_DILATION_W_FACTOR = 12,
+    VT_DILATION_H_FACTOR = 14
+  };
+  tflite::Padding padding() const {
+    return static_cast<tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_w() const {
+    return GetField<int32_t>(VT_STRIDE_W, 0);
+  }
+  int32_t stride_h() const {
+    return GetField<int32_t>(VT_STRIDE_H, 0);
+  }
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  int32_t dilation_w_factor() const {
+    return GetField<int32_t>(VT_DILATION_W_FACTOR, 1);
+  }
+  int32_t dilation_h_factor() const {
+    return GetField<int32_t>(VT_DILATION_H_FACTOR, 1);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_PADDING, 1) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W, 4) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H, 4) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR, 4) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR, 4) &&
+           verifier.EndTable();
+  }
+  Conv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(Conv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Conv2DOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Conv2DOptionsBuilder {
+  typedef Conv2DOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(tflite::Padding padding) {
+    fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_w(int32_t stride_w) {
+    fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h) {
+    fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_dilation_w_factor(int32_t dilation_w_factor) {
+    fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+  }
+  void add_dilation_h_factor(int32_t dilation_h_factor) {
+    fbb_.AddElement<int32_t>(Conv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+  }
+  explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Conv2DOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Conv2DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::Padding padding = tflite::Padding_SAME,
+    int32_t stride_w = 0,
+    int32_t stride_h = 0,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
+    int32_t dilation_w_factor = 1,
+    int32_t dilation_h_factor = 1) {
+  Conv2DOptionsBuilder builder_(_fbb);
+  builder_.add_dilation_h_factor(dilation_h_factor);
+  builder_.add_dilation_w_factor(dilation_w_factor);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Conv3DOptionsT : public flatbuffers::NativeTable {
+  typedef Conv3DOptions TableType;
+  tflite::Padding padding = tflite::Padding_SAME;
+  int32_t stride_d = 0;
+  int32_t stride_w = 0;
+  int32_t stride_h = 0;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+  int32_t dilation_d_factor = 1;
+  int32_t dilation_w_factor = 1;
+  int32_t dilation_h_factor = 1;
+};
+
+struct Conv3DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef Conv3DOptionsT NativeTableType;
+  typedef Conv3DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_PADDING = 4,
+    VT_STRIDE_D = 6,
+    VT_STRIDE_W = 8,
+    VT_STRIDE_H = 10,
+    VT_FUSED_ACTIVATION_FUNCTION = 12,
+    VT_DILATION_D_FACTOR = 14,
+    VT_DILATION_W_FACTOR = 16,
+    VT_DILATION_H_FACTOR = 18
+  };
+  tflite::Padding padding() const {
+    return static_cast<tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_d() const {
+    return GetField<int32_t>(VT_STRIDE_D, 0);
+  }
+  int32_t stride_w() const {
+    return GetField<int32_t>(VT_STRIDE_W, 0);
+  }
+  int32_t stride_h() const {
+    return GetField<int32_t>(VT_STRIDE_H, 0);
+  }
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  int32_t dilation_d_factor() const {
+    return GetField<int32_t>(VT_DILATION_D_FACTOR, 1);
+  }
+  int32_t dilation_w_factor() const {
+    return GetField<int32_t>(VT_DILATION_W_FACTOR, 1);
+  }
+  int32_t dilation_h_factor() const {
+    return GetField<int32_t>(VT_DILATION_H_FACTOR, 1);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_PADDING, 1) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_D, 4) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W, 4) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H, 4) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_D_FACTOR, 4) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR, 4) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR, 4) &&
+           verifier.EndTable();
+  }
+  Conv3DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(Conv3DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Conv3DOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Conv3DOptionsBuilder {
+  typedef Conv3DOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(tflite::Padding padding) {
+    fbb_.AddElement<int8_t>(Conv3DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_d(int32_t stride_d) {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_D, stride_d, 0);
+  }
+  void add_stride_w(int32_t stride_w) {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h) {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_dilation_d_factor(int32_t dilation_d_factor) {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_D_FACTOR, dilation_d_factor, 1);
+  }
+  void add_dilation_w_factor(int32_t dilation_w_factor) {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+  }
+  void add_dilation_h_factor(int32_t dilation_h_factor) {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+  }
+  explicit Conv3DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Conv3DOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Conv3DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::Padding padding = tflite::Padding_SAME,
+    int32_t stride_d = 0,
+    int32_t stride_w = 0,
+    int32_t stride_h = 0,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
+    int32_t dilation_d_factor = 1,
+    int32_t dilation_w_factor = 1,
+    int32_t dilation_h_factor = 1) {
+  Conv3DOptionsBuilder builder_(_fbb);
+  builder_.add_dilation_h_factor(dilation_h_factor);
+  builder_.add_dilation_w_factor(dilation_w_factor);
+  builder_.add_dilation_d_factor(dilation_d_factor);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_stride_d(stride_d);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Pool2DOptionsT : public flatbuffers::NativeTable {
+  typedef Pool2DOptions TableType;
+  tflite::Padding padding = tflite::Padding_SAME;
+  int32_t stride_w = 0;
+  int32_t stride_h = 0;
+  int32_t filter_width = 0;
+  int32_t filter_height = 0;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+};
+
+struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef Pool2DOptionsT NativeTableType;
+  typedef Pool2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_PADDING = 4,
+    VT_STRIDE_W = 6,
+    VT_STRIDE_H = 8,
+    VT_FILTER_WIDTH = 10,
+    VT_FILTER_HEIGHT = 12,
+    VT_FUSED_ACTIVATION_FUNCTION = 14
+  };
+  tflite::Padding padding() const {
+    return static_cast<tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_w() const {
+    return GetField<int32_t>(VT_STRIDE_W, 0);
+  }
+  int32_t stride_h() const {
+    return GetField<int32_t>(VT_STRIDE_H, 0);
+  }
+  int32_t filter_width() const {
+    return GetField<int32_t>(VT_FILTER_WIDTH, 0);
+  }
+  int32_t filter_height() const {
+    return GetField<int32_t>(VT_FILTER_HEIGHT, 0);
+  }
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_PADDING, 1) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W, 4) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H, 4) &&
+           VerifyField<int32_t>(verifier, VT_FILTER_WIDTH, 4) &&
+           VerifyField<int32_t>(verifier, VT_FILTER_HEIGHT, 4) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           verifier.EndTable();
+  }
+  Pool2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(Pool2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Pool2DOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Pool2DOptionsBuilder {
+  typedef Pool2DOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(tflite::Padding padding) {
+    fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_w(int32_t stride_w) {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h) {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_filter_width(int32_t filter_width) {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0);
+  }
+  void add_filter_height(int32_t filter_height) {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
+  }
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Pool2DOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Pool2DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::Padding padding = tflite::Padding_SAME,
+    int32_t stride_w = 0,
+    int32_t stride_h = 0,
+    int32_t filter_width = 0,
+    int32_t filter_height = 0,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) {
+  Pool2DOptionsBuilder builder_(_fbb);
+  builder_.add_filter_height(filter_height);
+  builder_.add_filter_width(filter_width);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DepthwiseConv2DOptionsT : public flatbuffers::NativeTable {
+  typedef DepthwiseConv2DOptions TableType;
+  tflite::Padding padding = tflite::Padding_SAME;
+  int32_t stride_w = 0;
+  int32_t stride_h = 0;
+  int32_t depth_multiplier = 0;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+  int32_t dilation_w_factor = 1;
+  int32_t dilation_h_factor = 1;
+};
+
+struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef DepthwiseConv2DOptionsT NativeTableType;
+  typedef DepthwiseConv2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_PADDING = 4,
+    VT_STRIDE_W = 6,
+    VT_STRIDE_H = 8,
+    VT_DEPTH_MULTIPLIER = 10,
+    VT_FUSED_ACTIVATION_FUNCTION = 12,
+    VT_DILATION_W_FACTOR = 14,
+    VT_DILATION_H_FACTOR = 16
+  };
+  tflite::Padding padding() const {
+    return static_cast<tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_w() const {
+    return GetField<int32_t>(VT_STRIDE_W, 0);
+  }
+  int32_t stride_h() const {
+    return GetField<int32_t>(VT_STRIDE_H, 0);
+  }
+  int32_t depth_multiplier() const {
+    return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0);
+  }
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  int32_t dilation_w_factor() const {
+    return GetField<int32_t>(VT_DILATION_W_FACTOR, 1);
+  }
+  int32_t dilation_h_factor() const {
+    return GetField<int32_t>(VT_DILATION_H_FACTOR, 1);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_PADDING, 1) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W, 4) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H, 4) &&
+           VerifyField<int32_t>(verifier, VT_DEPTH_MULTIPLIER, 4) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR, 4) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR, 4) &&
+           verifier.EndTable();
+  }
+  DepthwiseConv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DepthwiseConv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DepthwiseConv2DOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DepthwiseConv2DOptionsBuilder {
+  typedef DepthwiseConv2DOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(tflite::Padding padding) {
+    fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_w(int32_t stride_w) {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h) {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_depth_multiplier(int32_t depth_multiplier) {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0);
+  }
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_dilation_w_factor(int32_t dilation_w_factor) {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+  }
+  void add_dilation_h_factor(int32_t dilation_h_factor) {
+    fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+  }
+  explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DepthwiseConv2DOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DepthwiseConv2DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::Padding padding = tflite::Padding_SAME,
+    int32_t stride_w = 0,
+    int32_t stride_h = 0,
+    int32_t depth_multiplier = 0,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
+    int32_t dilation_w_factor = 1,
+    int32_t dilation_h_factor = 1) {
+  DepthwiseConv2DOptionsBuilder builder_(_fbb);
+  builder_.add_dilation_h_factor(dilation_h_factor);
+  builder_.add_dilation_w_factor(dilation_w_factor);
+  builder_.add_depth_multiplier(depth_multiplier);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ConcatEmbeddingsOptionsT : public flatbuffers::NativeTable {
+  typedef ConcatEmbeddingsOptions TableType;
+  int32_t num_channels = 0;
+  std::vector<int32_t> num_columns_per_channel{};
+  std::vector<int32_t> embedding_dim_per_channel{};
+};
+
+struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ConcatEmbeddingsOptionsT NativeTableType;
+  typedef ConcatEmbeddingsOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_NUM_CHANNELS = 4,
+    VT_NUM_COLUMNS_PER_CHANNEL = 6,
+    VT_EMBEDDING_DIM_PER_CHANNEL = 8
+  };
+  int32_t num_channels() const {
+    return GetField<int32_t>(VT_NUM_CHANNELS, 0);
+  }
+  const flatbuffers::Vector<int32_t> *num_columns_per_channel() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NUM_COLUMNS_PER_CHANNEL);
+  }
+  const flatbuffers::Vector<int32_t> *embedding_dim_per_channel() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_EMBEDDING_DIM_PER_CHANNEL);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_NUM_CHANNELS, 4) &&
+           VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) &&
+           verifier.VerifyVector(num_columns_per_channel()) &&
+           VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) &&
+           verifier.VerifyVector(embedding_dim_per_channel()) &&
+           verifier.EndTable();
+  }
+  ConcatEmbeddingsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ConcatEmbeddingsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ConcatEmbeddingsOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ConcatEmbeddingsOptionsBuilder {
+  typedef ConcatEmbeddingsOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_num_channels(int32_t num_channels) {
+    fbb_.AddElement<int32_t>(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0);
+  }
+  void add_num_columns_per_channel(flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel) {
+    fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel);
+  }
+  void add_embedding_dim_per_channel(flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel) {
+    fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL, embedding_dim_per_channel);
+  }
+  explicit ConcatEmbeddingsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ConcatEmbeddingsOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ConcatEmbeddingsOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t num_channels = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> num_columns_per_channel = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> embedding_dim_per_channel = 0) {
+  ConcatEmbeddingsOptionsBuilder builder_(_fbb);
+  builder_.add_embedding_dim_per_channel(embedding_dim_per_channel);
+  builder_.add_num_columns_per_channel(num_columns_per_channel);
+  builder_.add_num_channels(num_channels);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t num_channels = 0,
+    const std::vector<int32_t> *num_columns_per_channel = nullptr,
+    const std::vector<int32_t> *embedding_dim_per_channel = nullptr) {
+  auto num_columns_per_channel__ = num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0;
+  auto embedding_dim_per_channel__ = embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0;
+  return tflite::CreateConcatEmbeddingsOptions(
+      _fbb,
+      num_channels,
+      num_columns_per_channel__,
+      embedding_dim_per_channel__);
+}
+
+flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LSHProjectionOptionsT : public flatbuffers::NativeTable {
+  typedef LSHProjectionOptions TableType;
+  tflite::LSHProjectionType type = tflite::LSHProjectionType_UNKNOWN;
+};
+
+struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef LSHProjectionOptionsT NativeTableType;
+  typedef LSHProjectionOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_TYPE = 4
+  };
+  tflite::LSHProjectionType type() const {
+    return static_cast<tflite::LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_TYPE, 1) &&
+           verifier.EndTable();
+  }
+  LSHProjectionOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LSHProjectionOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LSHProjectionOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LSHProjectionOptionsBuilder {
+  typedef LSHProjectionOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_type(tflite::LSHProjectionType type) {
+    fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0);
+  }
+  explicit LSHProjectionOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LSHProjectionOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LSHProjectionOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::LSHProjectionType type = tflite::LSHProjectionType_UNKNOWN) {
+  LSHProjectionOptionsBuilder builder_(_fbb);
+  builder_.add_type(type);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SVDFOptionsT : public flatbuffers::NativeTable {
+  typedef SVDFOptions TableType;
+  int32_t rank = 0;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SVDFOptionsT NativeTableType;
+  typedef SVDFOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_RANK = 4,
+    VT_FUSED_ACTIVATION_FUNCTION = 6,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
+  };
+  int32_t rank() const {
+    return GetField<int32_t>(VT_RANK, 0);
+  }
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool asymmetric_quantize_inputs() const {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_RANK, 4) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) &&
+           verifier.EndTable();
+  }
+  SVDFOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SVDFOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SVDFOptionsBuilder {
+  typedef SVDFOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_rank(int32_t rank) {
+    fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0);
+  }
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
+    fbb_.AddElement<uint8_t>(SVDFOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit SVDFOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SVDFOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SVDFOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t rank = 0,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
+    bool asymmetric_quantize_inputs = false) {
+  SVDFOptionsBuilder builder_(_fbb);
+  builder_.add_rank(rank);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct RNNOptionsT : public flatbuffers::NativeTable {
+  typedef RNNOptions TableType;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef RNNOptionsT NativeTableType;
+  typedef RNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 6
+  };
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool asymmetric_quantize_inputs() const {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) &&
+           verifier.EndTable();
+  }
+  RNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<RNNOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct RNNOptionsBuilder {
+  typedef RNNOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
+    fbb_.AddElement<uint8_t>(RNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit RNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<RNNOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RNNOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
+    bool asymmetric_quantize_inputs = false) {
+  RNNOptionsBuilder builder_(_fbb);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<RNNOptions> CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SequenceRNNOptionsT : public flatbuffers::NativeTable {
+  typedef SequenceRNNOptions TableType;
+  bool time_major = false;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SequenceRNNOptionsT NativeTableType;
+  typedef SequenceRNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_TIME_MAJOR = 4,
+    VT_FUSED_ACTIVATION_FUNCTION = 6,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
+  };
+  bool time_major() const {
+    return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0;
+  }
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool asymmetric_quantize_inputs() const {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint8_t>(verifier, VT_TIME_MAJOR, 1) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) &&
+           verifier.EndTable();
+  }
+  SequenceRNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SequenceRNNOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SequenceRNNOptionsBuilder {
+  typedef SequenceRNNOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_time_major(bool time_major) {
+    fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major), 0);
+  }
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
+    fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit SequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SequenceRNNOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SequenceRNNOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    bool time_major = false,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
+    bool asymmetric_quantize_inputs = false) {
+  SequenceRNNOptionsBuilder builder_(_fbb);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_time_major(time_major);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BidirectionalSequenceRNNOptionsT : public flatbuffers::NativeTable {
+  typedef BidirectionalSequenceRNNOptions TableType;
+  bool time_major = false;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+  bool merge_outputs = false;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef BidirectionalSequenceRNNOptionsT NativeTableType;
+  typedef BidirectionalSequenceRNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_TIME_MAJOR = 4,
+    VT_FUSED_ACTIVATION_FUNCTION = 6,
+    VT_MERGE_OUTPUTS = 8,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
+  };
+  bool time_major() const {
+    return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0;
+  }
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool merge_outputs() const {
+    return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0;
+  }
+  bool asymmetric_quantize_inputs() const {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint8_t>(verifier, VT_TIME_MAJOR, 1) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS, 1) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) &&
+           verifier.EndTable();
+  }
+  BidirectionalSequenceRNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BidirectionalSequenceRNNOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BidirectionalSequenceRNNOptionsBuilder {
+  typedef BidirectionalSequenceRNNOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_time_major(bool time_major) {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major), 0);
+  }
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_merge_outputs(bool merge_outputs) {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS, static_cast<uint8_t>(merge_outputs), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit BidirectionalSequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BidirectionalSequenceRNNOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    bool time_major = false,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
+    bool merge_outputs = false,
+    bool asymmetric_quantize_inputs = false) {
+  BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_merge_outputs(merge_outputs);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_time_major(time_major);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct FullyConnectedOptionsT : public flatbuffers::NativeTable {
+  typedef FullyConnectedOptions TableType;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+  tflite::FullyConnectedOptionsWeightsFormat weights_format = tflite::FullyConnectedOptionsWeightsFormat_DEFAULT;
+  bool keep_num_dims = false;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef FullyConnectedOptionsT NativeTableType;
+  typedef FullyConnectedOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_WEIGHTS_FORMAT = 6,
+    VT_KEEP_NUM_DIMS = 8,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
+  };
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  tflite::FullyConnectedOptionsWeightsFormat weights_format() const {
+    return static_cast<tflite::FullyConnectedOptionsWeightsFormat>(GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
+  }
+  bool keep_num_dims() const {
+    return GetField<uint8_t>(VT_KEEP_NUM_DIMS, 0) != 0;
+  }
+  bool asymmetric_quantize_inputs() const {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           VerifyField<int8_t>(verifier, VT_WEIGHTS_FORMAT, 1) &&
+           VerifyField<uint8_t>(verifier, VT_KEEP_NUM_DIMS, 1) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) &&
+           verifier.EndTable();
+  }
+  FullyConnectedOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(FullyConnectedOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FullyConnectedOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct FullyConnectedOptionsBuilder {
+  typedef FullyConnectedOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_weights_format(tflite::FullyConnectedOptionsWeightsFormat weights_format) {
+    fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_WEIGHTS_FORMAT, static_cast<int8_t>(weights_format), 0);
+  }
+  void add_keep_num_dims(bool keep_num_dims) {
+    fbb_.AddElement<uint8_t>(FullyConnectedOptions::VT_KEEP_NUM_DIMS, static_cast<uint8_t>(keep_num_dims), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
+    fbb_.AddElement<uint8_t>(FullyConnectedOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit FullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<FullyConnectedOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<FullyConnectedOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
+    tflite::FullyConnectedOptionsWeightsFormat weights_format = tflite::FullyConnectedOptionsWeightsFormat_DEFAULT,
+    bool keep_num_dims = false,
+    bool asymmetric_quantize_inputs = false) {
+  FullyConnectedOptionsBuilder builder_(_fbb);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_keep_num_dims(keep_num_dims);
+  builder_.add_weights_format(weights_format);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SoftmaxOptionsT : public flatbuffers::NativeTable {
+  typedef SoftmaxOptions TableType;
+  float beta = 0.0f;
+};
+
+struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SoftmaxOptionsT NativeTableType;
+  typedef SoftmaxOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_BETA = 4
+  };
+  float beta() const {
+    return GetField<float>(VT_BETA, 0.0f);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<float>(verifier, VT_BETA, 4) &&
+           verifier.EndTable();
+  }
+  SoftmaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SoftmaxOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SoftmaxOptionsBuilder {
+  typedef SoftmaxOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_beta(float beta) {
+    fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f);
+  }
+  explicit SoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SoftmaxOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SoftmaxOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SoftmaxOptions> CreateSoftmaxOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    float beta = 0.0f) {
+  SoftmaxOptionsBuilder builder_(_fbb);
+  builder_.add_beta(beta);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SoftmaxOptions> CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ConcatenationOptionsT : public flatbuffers::NativeTable {
+  typedef ConcatenationOptions TableType;
+  int32_t axis = 0;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+};
+
+struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ConcatenationOptionsT NativeTableType;
+  typedef ConcatenationOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_AXIS = 4,
+    VT_FUSED_ACTIVATION_FUNCTION = 6
+  };
+  int32_t axis() const {
+    return GetField<int32_t>(VT_AXIS, 0);
+  }
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_AXIS, 4) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           verifier.EndTable();
+  }
+  ConcatenationOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ConcatenationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ConcatenationOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ConcatenationOptionsBuilder {
+  typedef ConcatenationOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_axis(int32_t axis) {
+    fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0);
+  }
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit ConcatenationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ConcatenationOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ConcatenationOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t axis = 0,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) {
+  ConcatenationOptionsBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct AddOptionsT : public flatbuffers::NativeTable {
+  typedef AddOptions TableType;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+  bool pot_scale_int16 = true;
+};
+
+struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef AddOptionsT NativeTableType;
+  typedef AddOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_POT_SCALE_INT16 = 6
+  };
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool pot_scale_int16() const {
+    return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16, 1) &&
+           verifier.EndTable();
+  }
+  AddOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<AddOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct AddOptionsBuilder {
+  typedef AddOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_pot_scale_int16(bool pot_scale_int16) {
+    fbb_.AddElement<uint8_t>(AddOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16), 1);
+  }
+  explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<AddOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<AddOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<AddOptions> CreateAddOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
+    bool pot_scale_int16 = true) {
+  AddOptionsBuilder builder_(_fbb);
+  builder_.add_pot_scale_int16(pot_scale_int16);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<AddOptions> CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct MulOptionsT : public flatbuffers::NativeTable {
+  typedef MulOptions TableType;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+};
+
+struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef MulOptionsT NativeTableType;
+  typedef MulOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_FUSED_ACTIVATION_FUNCTION = 4
+  };
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           verifier.EndTable();
+  }
+  MulOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<MulOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct MulOptionsBuilder {
+  typedef MulOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit MulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<MulOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<MulOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<MulOptions> CreateMulOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) {
+  MulOptionsBuilder builder_(_fbb);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<MulOptions> CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct L2NormOptionsT : public flatbuffers::NativeTable {
+  typedef L2NormOptions TableType;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+};
+
+struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef L2NormOptionsT NativeTableType;
+  typedef L2NormOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_FUSED_ACTIVATION_FUNCTION = 4
+  };
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           verifier.EndTable();
+  }
+  L2NormOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(L2NormOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<L2NormOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct L2NormOptionsBuilder {
+  typedef L2NormOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit L2NormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<L2NormOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<L2NormOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) {
+  L2NormOptionsBuilder builder_(_fbb);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LocalResponseNormalizationOptionsT : public flatbuffers::NativeTable {
+  typedef LocalResponseNormalizationOptions TableType;
+  int32_t radius = 0;
+  float bias = 0.0f;
+  float alpha = 0.0f;
+  float beta = 0.0f;
+};
+
+struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef LocalResponseNormalizationOptionsT NativeTableType;
+  typedef LocalResponseNormalizationOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_RADIUS = 4,
+    VT_BIAS = 6,
+    VT_ALPHA = 8,
+    VT_BETA = 10
+  };
+  int32_t radius() const {
+    return GetField<int32_t>(VT_RADIUS, 0);
+  }
+  float bias() const {
+    return GetField<float>(VT_BIAS, 0.0f);
+  }
+  float alpha() const {
+    return GetField<float>(VT_ALPHA, 0.0f);
+  }
+  float beta() const {
+    return GetField<float>(VT_BETA, 0.0f);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_RADIUS, 4) &&
+           VerifyField<float>(verifier, VT_BIAS, 4) &&
+           VerifyField<float>(verifier, VT_ALPHA, 4) &&
+           VerifyField<float>(verifier, VT_BETA, 4) &&
+           verifier.EndTable();
+  }
+  LocalResponseNormalizationOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LocalResponseNormalizationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LocalResponseNormalizationOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LocalResponseNormalizationOptionsBuilder {
+  typedef LocalResponseNormalizationOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_radius(int32_t radius) {
+    fbb_.AddElement<int32_t>(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0);
+  }
+  void add_bias(float bias) {
+    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0f);
+  }
+  void add_alpha(float alpha) {
+    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0f);
+  }
+  void add_beta(float beta) {
+    fbb_.AddElement<float>(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f);
+  }
+  explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LocalResponseNormalizationOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LocalResponseNormalizationOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LocalResponseNormalizationOptions> CreateLocalResponseNormalizationOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t radius = 0,
+    float bias = 0.0f,
+    float alpha = 0.0f,
+    float beta = 0.0f) {
+  LocalResponseNormalizationOptionsBuilder builder_(_fbb);
+  builder_.add_beta(beta);
+  builder_.add_alpha(alpha);
+  builder_.add_bias(bias);
+  builder_.add_radius(radius);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LocalResponseNormalizationOptions> CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LSTMOptionsT : public flatbuffers::NativeTable {
+  typedef LSTMOptions TableType;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+  float cell_clip = 0.0f;
+  float proj_clip = 0.0f;
+  tflite::LSTMKernelType kernel_type = tflite::LSTMKernelType_FULL;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef LSTMOptionsT NativeTableType;
+  typedef LSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_CELL_CLIP = 6,
+    VT_PROJ_CLIP = 8,
+    VT_KERNEL_TYPE = 10,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
+  };
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  float cell_clip() const {
+    return GetField<float>(VT_CELL_CLIP, 0.0f);
+  }
+  float proj_clip() const {
+    return GetField<float>(VT_PROJ_CLIP, 0.0f);
+  }
+  tflite::LSTMKernelType kernel_type() const {
+    return static_cast<tflite::LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
+  }
+  bool asymmetric_quantize_inputs() const {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           VerifyField<float>(verifier, VT_CELL_CLIP, 4) &&
+           VerifyField<float>(verifier, VT_PROJ_CLIP, 4) &&
+           VerifyField<int8_t>(verifier, VT_KERNEL_TYPE, 1) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) &&
+           verifier.EndTable();
+  }
+  LSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LSTMOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LSTMOptionsBuilder {
+  typedef LSTMOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_cell_clip(float cell_clip) {
+    fbb_.AddElement<float>(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+  }
+  void add_proj_clip(float proj_clip) {
+    fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+  }
+  void add_kernel_type(tflite::LSTMKernelType kernel_type) {
+    fbb_.AddElement<int8_t>(LSTMOptions::VT_KERNEL_TYPE, static_cast<int8_t>(kernel_type), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
+    fbb_.AddElement<uint8_t>(LSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LSTMOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LSTMOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
+    float cell_clip = 0.0f,
+    float proj_clip = 0.0f,
+    tflite::LSTMKernelType kernel_type = tflite::LSTMKernelType_FULL,
+    bool asymmetric_quantize_inputs = false) {
+  LSTMOptionsBuilder builder_(_fbb);
+  builder_.add_proj_clip(proj_clip);
+  builder_.add_cell_clip(cell_clip);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_kernel_type(kernel_type);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct UnidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable {
+  typedef UnidirectionalSequenceLSTMOptions TableType;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+  float cell_clip = 0.0f;
+  float proj_clip = 0.0f;
+  bool time_major = false;
+  bool asymmetric_quantize_inputs = false;
+  bool diagonal_recurrent_tensors = false;
+};
+
+struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef UnidirectionalSequenceLSTMOptionsT NativeTableType;
+  typedef UnidirectionalSequenceLSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_CELL_CLIP = 6,
+    VT_PROJ_CLIP = 8,
+    VT_TIME_MAJOR = 10,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 12,
+    VT_DIAGONAL_RECURRENT_TENSORS = 14
+  };
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  float cell_clip() const {
+    return GetField<float>(VT_CELL_CLIP, 0.0f);
+  }
+  float proj_clip() const {
+    return GetField<float>(VT_PROJ_CLIP, 0.0f);
+  }
+  bool time_major() const {
+    return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0;
+  }
+  bool asymmetric_quantize_inputs() const {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool diagonal_recurrent_tensors() const {
+    return GetField<uint8_t>(VT_DIAGONAL_RECURRENT_TENSORS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           VerifyField<float>(verifier, VT_CELL_CLIP, 4) &&
+           VerifyField<float>(verifier, VT_PROJ_CLIP, 4) &&
+           VerifyField<uint8_t>(verifier, VT_TIME_MAJOR, 1) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) &&
+           VerifyField<uint8_t>(verifier, VT_DIAGONAL_RECURRENT_TENSORS, 1) &&
+           verifier.EndTable();
+  }
+  UnidirectionalSequenceLSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct UnidirectionalSequenceLSTMOptionsBuilder {
+  typedef UnidirectionalSequenceLSTMOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_cell_clip(float cell_clip) {
+    fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+  }
+  void add_proj_clip(float proj_clip) {
+    fbb_.AddElement<float>(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+  }
+  void add_time_major(bool time_major) {
+    fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
+    fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  void add_diagonal_recurrent_tensors(bool diagonal_recurrent_tensors) {
+    fbb_.AddElement<uint8_t>(UnidirectionalSequenceLSTMOptions::VT_DIAGONAL_RECURRENT_TENSORS, static_cast<uint8_t>(diagonal_recurrent_tensors), 0);
+  }
+  explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
+    float cell_clip = 0.0f,
+    float proj_clip = 0.0f,
+    bool time_major = false,
+    bool asymmetric_quantize_inputs = false,
+    bool diagonal_recurrent_tensors = false) {
+  UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
+  builder_.add_proj_clip(proj_clip);
+  builder_.add_cell_clip(cell_clip);
+  builder_.add_diagonal_recurrent_tensors(diagonal_recurrent_tensors);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_time_major(time_major);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable {
+  typedef BidirectionalSequenceLSTMOptions TableType;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+  float cell_clip = 0.0f;
+  float proj_clip = 0.0f;
+  bool merge_outputs = false;
+  bool time_major = true;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef BidirectionalSequenceLSTMOptionsT NativeTableType;
+  typedef BidirectionalSequenceLSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_CELL_CLIP = 6,
+    VT_PROJ_CLIP = 8,
+    VT_MERGE_OUTPUTS = 10,
+    VT_TIME_MAJOR = 12,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 14
+  };
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  float cell_clip() const {
+    return GetField<float>(VT_CELL_CLIP, 0.0f);
+  }
+  float proj_clip() const {
+    return GetField<float>(VT_PROJ_CLIP, 0.0f);
+  }
+  bool merge_outputs() const {
+    return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0;
+  }
+  bool time_major() const {
+    return GetField<uint8_t>(VT_TIME_MAJOR, 1) != 0;
+  }
+  bool asymmetric_quantize_inputs() const {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           VerifyField<float>(verifier, VT_CELL_CLIP, 4) &&
+           VerifyField<float>(verifier, VT_PROJ_CLIP, 4) &&
+           VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS, 1) &&
+           VerifyField<uint8_t>(verifier, VT_TIME_MAJOR, 1) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) &&
+           verifier.EndTable();
+  }
+  BidirectionalSequenceLSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BidirectionalSequenceLSTMOptionsBuilder {
+  typedef BidirectionalSequenceLSTMOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_cell_clip(float cell_clip) {
+    fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f);
+  }
+  void add_proj_clip(float proj_clip) {
+    fbb_.AddElement<float>(BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
+  }
+  void add_merge_outputs(bool merge_outputs) {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS, static_cast<uint8_t>(merge_outputs), 0);
+  }
+  void add_time_major(bool time_major) {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major), 1);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BidirectionalSequenceLSTMOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
+    float cell_clip = 0.0f,
+    float proj_clip = 0.0f,
+    bool merge_outputs = false,
+    bool time_major = true,
+    bool asymmetric_quantize_inputs = false) {
+  BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
+  builder_.add_proj_clip(proj_clip);
+  builder_.add_cell_clip(cell_clip);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_time_major(time_major);
+  builder_.add_merge_outputs(merge_outputs);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ResizeBilinearOptionsT : public flatbuffers::NativeTable {
+  typedef ResizeBilinearOptions TableType;
+  bool align_corners = false;
+  bool half_pixel_centers = false;
+};
+
+struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ResizeBilinearOptionsT NativeTableType;
+  typedef ResizeBilinearOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_ALIGN_CORNERS = 8,
+    VT_HALF_PIXEL_CENTERS = 10
+  };
+  bool align_corners() const {
+    return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0;
+  }
+  bool half_pixel_centers() const {
+    return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS, 1) &&
+           VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS, 1) &&
+           verifier.EndTable();
+  }
+  ResizeBilinearOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ResizeBilinearOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ResizeBilinearOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ResizeBilinearOptionsBuilder {
+  typedef ResizeBilinearOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_align_corners(bool align_corners) {
+    fbb_.AddElement<uint8_t>(ResizeBilinearOptions::VT_ALIGN_CORNERS, static_cast<uint8_t>(align_corners), 0);
+  }
+  void add_half_pixel_centers(bool half_pixel_centers) {
+    fbb_.AddElement<uint8_t>(ResizeBilinearOptions::VT_HALF_PIXEL_CENTERS, static_cast<uint8_t>(half_pixel_centers), 0);
+  }
+  explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ResizeBilinearOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ResizeBilinearOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    bool align_corners = false,
+    bool half_pixel_centers = false) {
+  ResizeBilinearOptionsBuilder builder_(_fbb);
+  builder_.add_half_pixel_centers(half_pixel_centers);
+  builder_.add_align_corners(align_corners);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ResizeNearestNeighborOptionsT : public flatbuffers::NativeTable {
+  typedef ResizeNearestNeighborOptions TableType;
+  bool align_corners = false;
+  bool half_pixel_centers = false;
+};
+
+struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ResizeNearestNeighborOptionsT NativeTableType;
+  typedef ResizeNearestNeighborOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_ALIGN_CORNERS = 4,
+    VT_HALF_PIXEL_CENTERS = 6
+  };
+  bool align_corners() const {
+    return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0;
+  }
+  bool half_pixel_centers() const {
+    return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS, 1) &&
+           VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS, 1) &&
+           verifier.EndTable();
+  }
+  ResizeNearestNeighborOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ResizeNearestNeighborOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ResizeNearestNeighborOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeNearestNeighborOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ResizeNearestNeighborOptionsBuilder {
+  typedef ResizeNearestNeighborOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_align_corners(bool align_corners) {
+    fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS, static_cast<uint8_t>(align_corners), 0);
+  }
+  void add_half_pixel_centers(bool half_pixel_centers) {
+    fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS, static_cast<uint8_t>(half_pixel_centers), 0);
+  }
+  explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ResizeNearestNeighborOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ResizeNearestNeighborOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ResizeNearestNeighborOptions> CreateResizeNearestNeighborOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    bool align_corners = false,
+    bool half_pixel_centers = false) {
+  ResizeNearestNeighborOptionsBuilder builder_(_fbb);
+  builder_.add_half_pixel_centers(half_pixel_centers);
+  builder_.add_align_corners(align_corners);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ResizeNearestNeighborOptions> CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeNearestNeighborOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct CallOptionsT : public flatbuffers::NativeTable {
+  typedef CallOptions TableType;
+  uint32_t subgraph = 0;
+};
+
+struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef CallOptionsT NativeTableType;
+  typedef CallOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_SUBGRAPH = 4
+  };
+  uint32_t subgraph() const {
+    return GetField<uint32_t>(VT_SUBGRAPH, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint32_t>(verifier, VT_SUBGRAPH, 4) &&
+           verifier.EndTable();
+  }
+  CallOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CallOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct CallOptionsBuilder {
+  typedef CallOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_subgraph(uint32_t subgraph) {
+    fbb_.AddElement<uint32_t>(CallOptions::VT_SUBGRAPH, subgraph, 0);
+  }
+  explicit CallOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CallOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CallOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CallOptions> CreateCallOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    uint32_t subgraph = 0) {
+  CallOptionsBuilder builder_(_fbb);
+  builder_.add_subgraph(subgraph);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct PadOptionsT : public flatbuffers::NativeTable {
+  typedef PadOptions TableType;
+};
+
+struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef PadOptionsT NativeTableType;
+  typedef PadOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  PadOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(PadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<PadOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct PadOptionsBuilder {
+  typedef PadOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<PadOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<PadOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<PadOptions> CreatePadOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  PadOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct PadV2OptionsT : public flatbuffers::NativeTable {
+  typedef PadV2Options TableType;
+};
+
+struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef PadV2OptionsT NativeTableType;
+  typedef PadV2OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  PadV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(PadV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<PadV2Options> Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct PadV2OptionsBuilder {
+  typedef PadV2Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<PadV2Options> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<PadV2Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  PadV2OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ReshapeOptionsT : public flatbuffers::NativeTable {
+  typedef ReshapeOptions TableType;
+  std::vector<int32_t> new_shape{};
+};
+
+struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ReshapeOptionsT NativeTableType;
+  typedef ReshapeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_NEW_SHAPE = 4
+  };
+  const flatbuffers::Vector<int32_t> *new_shape() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_NEW_SHAPE);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_NEW_SHAPE) &&
+           verifier.VerifyVector(new_shape()) &&
+           verifier.EndTable();
+  }
+  ReshapeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ReshapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ReshapeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ReshapeOptionsBuilder {
+  typedef ReshapeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape) {
+    fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape);
+  }
+  explicit ReshapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ReshapeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReshapeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape = 0) {
+  ReshapeOptionsBuilder builder_(_fbb);
+  builder_.add_new_shape(new_shape);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<ReshapeOptions> CreateReshapeOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *new_shape = nullptr) {
+  auto new_shape__ = new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0;
+  return tflite::CreateReshapeOptions(
+      _fbb,
+      new_shape__);
+}
+
+flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SpaceToBatchNDOptionsT : public flatbuffers::NativeTable {
+  typedef SpaceToBatchNDOptions TableType;
+};
+
+struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SpaceToBatchNDOptionsT NativeTableType;
+  typedef SpaceToBatchNDOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  SpaceToBatchNDOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SpaceToBatchNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SpaceToBatchNDOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SpaceToBatchNDOptionsBuilder {
+  typedef SpaceToBatchNDOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SpaceToBatchNDOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SpaceToBatchNDOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SpaceToBatchNDOptions> CreateSpaceToBatchNDOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  SpaceToBatchNDOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SpaceToBatchNDOptions> CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BatchToSpaceNDOptionsT : public flatbuffers::NativeTable {
+  typedef BatchToSpaceNDOptions TableType;
+};
+
+struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef BatchToSpaceNDOptionsT NativeTableType;
+  typedef BatchToSpaceNDOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  BatchToSpaceNDOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BatchToSpaceNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BatchToSpaceNDOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BatchToSpaceNDOptionsBuilder {
+  typedef BatchToSpaceNDOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BatchToSpaceNDOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BatchToSpaceNDOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BatchToSpaceNDOptions> CreateBatchToSpaceNDOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  BatchToSpaceNDOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<BatchToSpaceNDOptions> CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SkipGramOptionsT : public flatbuffers::NativeTable {
+  typedef SkipGramOptions TableType;
+  int32_t ngram_size = 0;
+  int32_t max_skip_size = 0;
+  bool include_all_ngrams = false;
+};
+
+struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SkipGramOptionsT NativeTableType;
+  typedef SkipGramOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_NGRAM_SIZE = 4,
+    VT_MAX_SKIP_SIZE = 6,
+    VT_INCLUDE_ALL_NGRAMS = 8
+  };
+  int32_t ngram_size() const {
+    return GetField<int32_t>(VT_NGRAM_SIZE, 0);
+  }
+  int32_t max_skip_size() const {
+    return GetField<int32_t>(VT_MAX_SKIP_SIZE, 0);
+  }
+  bool include_all_ngrams() const {
+    return GetField<uint8_t>(VT_INCLUDE_ALL_NGRAMS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_NGRAM_SIZE, 4) &&
+           VerifyField<int32_t>(verifier, VT_MAX_SKIP_SIZE, 4) &&
+           VerifyField<uint8_t>(verifier, VT_INCLUDE_ALL_NGRAMS, 1) &&
+           verifier.EndTable();
+  }
+  SkipGramOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SkipGramOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SkipGramOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SkipGramOptionsBuilder {
+  typedef SkipGramOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_ngram_size(int32_t ngram_size) {
+    fbb_.AddElement<int32_t>(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0);
+  }
+  void add_max_skip_size(int32_t max_skip_size) {
+    fbb_.AddElement<int32_t>(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0);
+  }
+  void add_include_all_ngrams(bool include_all_ngrams) {
+    fbb_.AddElement<uint8_t>(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS, static_cast<uint8_t>(include_all_ngrams), 0);
+  }
+  explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SkipGramOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SkipGramOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t ngram_size = 0,
+    int32_t max_skip_size = 0,
+    bool include_all_ngrams = false) {
+  SkipGramOptionsBuilder builder_(_fbb);
+  builder_.add_max_skip_size(max_skip_size);
+  builder_.add_ngram_size(ngram_size);
+  builder_.add_include_all_ngrams(include_all_ngrams);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SpaceToDepthOptionsT : public flatbuffers::NativeTable {
+  typedef SpaceToDepthOptions TableType;
+  int32_t block_size = 0;
+};
+
+struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SpaceToDepthOptionsT NativeTableType;
+  typedef SpaceToDepthOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_BLOCK_SIZE = 4
+  };
+  int32_t block_size() const {
+    return GetField<int32_t>(VT_BLOCK_SIZE, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_BLOCK_SIZE, 4) &&
+           verifier.EndTable();
+  }
+  SpaceToDepthOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SpaceToDepthOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SpaceToDepthOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SpaceToDepthOptionsBuilder {
+  typedef SpaceToDepthOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_block_size(int32_t block_size) {
+    fbb_.AddElement<int32_t>(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0);
+  }
+  explicit SpaceToDepthOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SpaceToDepthOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SpaceToDepthOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SpaceToDepthOptions> CreateSpaceToDepthOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t block_size = 0) {
+  SpaceToDepthOptionsBuilder builder_(_fbb);
+  builder_.add_block_size(block_size);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SpaceToDepthOptions> CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DepthToSpaceOptionsT : public flatbuffers::NativeTable {
+  typedef DepthToSpaceOptions TableType;
+  int32_t block_size = 0;
+};
+
+struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef DepthToSpaceOptionsT NativeTableType;
+  typedef DepthToSpaceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_BLOCK_SIZE = 4
+  };
+  int32_t block_size() const {
+    return GetField<int32_t>(VT_BLOCK_SIZE, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_BLOCK_SIZE, 4) &&
+           verifier.EndTable();
+  }
+  DepthToSpaceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DepthToSpaceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DepthToSpaceOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DepthToSpaceOptionsBuilder {
+  typedef DepthToSpaceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_block_size(int32_t block_size) {
+    fbb_.AddElement<int32_t>(DepthToSpaceOptions::VT_BLOCK_SIZE, block_size, 0);
+  }
+  explicit DepthToSpaceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DepthToSpaceOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DepthToSpaceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DepthToSpaceOptions> CreateDepthToSpaceOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t block_size = 0) {
+  DepthToSpaceOptionsBuilder builder_(_fbb);
+  builder_.add_block_size(block_size);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DepthToSpaceOptions> CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SubOptionsT : public flatbuffers::NativeTable {
+  typedef SubOptions TableType;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+  bool pot_scale_int16 = true;
+};
+
+struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SubOptionsT NativeTableType;
+  typedef SubOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_POT_SCALE_INT16 = 6
+  };
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool pot_scale_int16() const {
+    return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16, 1) &&
+           verifier.EndTable();
+  }
+  SubOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SubOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SubOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SubOptionsBuilder {
+  typedef SubOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_pot_scale_int16(bool pot_scale_int16) {
+    fbb_.AddElement<uint8_t>(SubOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16), 1);
+  }
+  explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SubOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SubOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SubOptions> CreateSubOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE,
+    bool pot_scale_int16 = true) {
+  SubOptionsBuilder builder_(_fbb);
+  builder_.add_pot_scale_int16(pot_scale_int16);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SubOptions> CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DivOptionsT : public flatbuffers::NativeTable {
+  typedef DivOptions TableType;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+};
+
+struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef DivOptionsT NativeTableType;
+  typedef DivOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_FUSED_ACTIVATION_FUNCTION = 4
+  };
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           verifier.EndTable();
+  }
+  DivOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DivOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DivOptionsBuilder {
+  typedef DivOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit DivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DivOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DivOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DivOptions> CreateDivOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) {
+  DivOptionsBuilder builder_(_fbb);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DivOptions> CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct TopKV2OptionsT : public flatbuffers::NativeTable {
+  typedef TopKV2Options TableType;
+};
+
+struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef TopKV2OptionsT NativeTableType;
+  typedef TopKV2OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  TopKV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(TopKV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<TopKV2Options> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct TopKV2OptionsBuilder {
+  typedef TopKV2Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<TopKV2Options> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TopKV2Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  TopKV2OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct EmbeddingLookupSparseOptionsT : public flatbuffers::NativeTable {
+  typedef EmbeddingLookupSparseOptions TableType;
+  tflite::CombinerType combiner = tflite::CombinerType_SUM;
+};
+
+struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef EmbeddingLookupSparseOptionsT NativeTableType;
+  typedef EmbeddingLookupSparseOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_COMBINER = 4
+  };
+  tflite::CombinerType combiner() const {
+    return static_cast<tflite::CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_COMBINER, 1) &&
+           verifier.EndTable();
+  }
+  EmbeddingLookupSparseOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(EmbeddingLookupSparseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<EmbeddingLookupSparseOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct EmbeddingLookupSparseOptionsBuilder {
+  typedef EmbeddingLookupSparseOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_combiner(tflite::CombinerType combiner) {
+    fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER, static_cast<int8_t>(combiner), 0);
+  }
+  explicit EmbeddingLookupSparseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<EmbeddingLookupSparseOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions> CreateEmbeddingLookupSparseOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::CombinerType combiner = tflite::CombinerType_SUM) {
+  EmbeddingLookupSparseOptionsBuilder builder_(_fbb);
+  builder_.add_combiner(combiner);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<EmbeddingLookupSparseOptions> CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct GatherOptionsT : public flatbuffers::NativeTable {
+  typedef GatherOptions TableType;
+  int32_t axis = 0;
+  int32_t batch_dims = 0;
+};
+
+struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef GatherOptionsT NativeTableType;
+  typedef GatherOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_AXIS = 4,
+    VT_BATCH_DIMS = 6
+  };
+  int32_t axis() const {
+    return GetField<int32_t>(VT_AXIS, 0);
+  }
+  int32_t batch_dims() const {
+    return GetField<int32_t>(VT_BATCH_DIMS, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_AXIS, 4) &&
+           VerifyField<int32_t>(verifier, VT_BATCH_DIMS, 4) &&
+           verifier.EndTable();
+  }
+  GatherOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(GatherOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<GatherOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct GatherOptionsBuilder {
+  typedef GatherOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_axis(int32_t axis) {
+    fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0);
+  }
+  void add_batch_dims(int32_t batch_dims) {
+    fbb_.AddElement<int32_t>(GatherOptions::VT_BATCH_DIMS, batch_dims, 0);
+  }
+  explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<GatherOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<GatherOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t axis = 0,
+    int32_t batch_dims = 0) {
+  GatherOptionsBuilder builder_(_fbb);
+  builder_.add_batch_dims(batch_dims);
+  builder_.add_axis(axis);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct TransposeOptionsT : public flatbuffers::NativeTable {
+  typedef TransposeOptions TableType;
+};
+
+struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef TransposeOptionsT NativeTableType;
+  typedef TransposeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  TransposeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(TransposeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<TransposeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct TransposeOptionsBuilder {
+  typedef TransposeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<TransposeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TransposeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TransposeOptions> CreateTransposeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  TransposeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<TransposeOptions> CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ExpOptionsT : public flatbuffers::NativeTable {
+  typedef ExpOptions TableType;
+};
+
+struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ExpOptionsT NativeTableType;
+  typedef ExpOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  ExpOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ExpOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ExpOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ExpOptionsBuilder {
+  typedef ExpOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ExpOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ExpOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ExpOptions> CreateExpOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  ExpOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct CosOptionsT : public flatbuffers::NativeTable {
+  typedef CosOptions TableType;
+};
+
+struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef CosOptionsT NativeTableType;
+  typedef CosOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  CosOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(CosOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CosOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct CosOptionsBuilder {
+  typedef CosOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit CosOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CosOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CosOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CosOptions> CreateCosOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  CosOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<CosOptions> CreateCosOptions(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ReducerOptionsT : public flatbuffers::NativeTable {
+  typedef ReducerOptions TableType;
+  bool keep_dims = false;
+};
+
+struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ReducerOptionsT NativeTableType;
+  typedef ReducerOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_KEEP_DIMS = 4
+  };
+  bool keep_dims() const {
+    return GetField<uint8_t>(VT_KEEP_DIMS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint8_t>(verifier, VT_KEEP_DIMS, 1) &&
+           verifier.EndTable();
+  }
+  ReducerOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ReducerOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ReducerOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ReducerOptionsBuilder {
+  typedef ReducerOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_keep_dims(bool keep_dims) {
+    fbb_.AddElement<uint8_t>(ReducerOptions::VT_KEEP_DIMS, static_cast<uint8_t>(keep_dims), 0);
+  }
+  explicit ReducerOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ReducerOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReducerOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReducerOptions> CreateReducerOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    bool keep_dims = false) {
+  ReducerOptionsBuilder builder_(_fbb);
+  builder_.add_keep_dims(keep_dims);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ReducerOptions> CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SqueezeOptionsT : public flatbuffers::NativeTable {
+  typedef SqueezeOptions TableType;
+  std::vector<int32_t> squeeze_dims{};
+};
+
+struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SqueezeOptionsT NativeTableType;
+  typedef SqueezeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_SQUEEZE_DIMS = 4
+  };
+  const flatbuffers::Vector<int32_t> *squeeze_dims() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SQUEEZE_DIMS);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_SQUEEZE_DIMS) &&
+           verifier.VerifyVector(squeeze_dims()) &&
+           verifier.EndTable();
+  }
+  SqueezeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SqueezeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SqueezeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SqueezeOptionsBuilder {
+  typedef SqueezeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_squeeze_dims(flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims) {
+    fbb_.AddOffset(SqueezeOptions::VT_SQUEEZE_DIMS, squeeze_dims);
+  }
+  explicit SqueezeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SqueezeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SqueezeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SqueezeOptions> CreateSqueezeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims = 0) {
+  SqueezeOptionsBuilder builder_(_fbb);
+  builder_.add_squeeze_dims(squeeze_dims);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SqueezeOptions> CreateSqueezeOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *squeeze_dims = nullptr) {
+  auto squeeze_dims__ = squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0;
+  return tflite::CreateSqueezeOptions(
+      _fbb,
+      squeeze_dims__);
+}
+
+flatbuffers::Offset<SqueezeOptions> CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SplitOptionsT : public flatbuffers::NativeTable {
+  typedef SplitOptions TableType;
+  int32_t num_splits = 0;
+};
+
+struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SplitOptionsT NativeTableType;
+  typedef SplitOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_NUM_SPLITS = 4
+  };
+  int32_t num_splits() const {
+    return GetField<int32_t>(VT_NUM_SPLITS, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_NUM_SPLITS, 4) &&
+           verifier.EndTable();
+  }
+  SplitOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SplitOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SplitOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SplitOptionsBuilder {
+  typedef SplitOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_num_splits(int32_t num_splits) {
+    fbb_.AddElement<int32_t>(SplitOptions::VT_NUM_SPLITS, num_splits, 0);
+  }
+  explicit SplitOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SplitOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SplitOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t num_splits = 0) {
+  SplitOptionsBuilder builder_(_fbb);
+  builder_.add_num_splits(num_splits);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SplitVOptionsT : public flatbuffers::NativeTable {
+  typedef SplitVOptions TableType;
+  int32_t num_splits = 0;
+};
+
+struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SplitVOptionsT NativeTableType;
+  typedef SplitVOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_NUM_SPLITS = 4
+  };
+  int32_t num_splits() const {
+    return GetField<int32_t>(VT_NUM_SPLITS, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_NUM_SPLITS, 4) &&
+           verifier.EndTable();
+  }
+  SplitVOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SplitVOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SplitVOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SplitVOptionsBuilder {
+  typedef SplitVOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_num_splits(int32_t num_splits) {
+    fbb_.AddElement<int32_t>(SplitVOptions::VT_NUM_SPLITS, num_splits, 0);
+  }
+  explicit SplitVOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SplitVOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SplitVOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t num_splits = 0) {
+  SplitVOptionsBuilder builder_(_fbb);
+  builder_.add_num_splits(num_splits);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct StridedSliceOptionsT : public flatbuffers::NativeTable {
+  typedef StridedSliceOptions TableType;
+  int32_t begin_mask = 0;
+  int32_t end_mask = 0;
+  int32_t ellipsis_mask = 0;
+  int32_t new_axis_mask = 0;
+  int32_t shrink_axis_mask = 0;
+};
+
+struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef StridedSliceOptionsT NativeTableType;
+  typedef StridedSliceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_BEGIN_MASK = 4,
+    VT_END_MASK = 6,
+    VT_ELLIPSIS_MASK = 8,
+    VT_NEW_AXIS_MASK = 10,
+    VT_SHRINK_AXIS_MASK = 12
+  };
+  int32_t begin_mask() const {
+    return GetField<int32_t>(VT_BEGIN_MASK, 0);
+  }
+  int32_t end_mask() const {
+    return GetField<int32_t>(VT_END_MASK, 0);
+  }
+  int32_t ellipsis_mask() const {
+    return GetField<int32_t>(VT_ELLIPSIS_MASK, 0);
+  }
+  int32_t new_axis_mask() const {
+    return GetField<int32_t>(VT_NEW_AXIS_MASK, 0);
+  }
+  int32_t shrink_axis_mask() const {
+    return GetField<int32_t>(VT_SHRINK_AXIS_MASK, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_BEGIN_MASK, 4) &&
+           VerifyField<int32_t>(verifier, VT_END_MASK, 4) &&
+           VerifyField<int32_t>(verifier, VT_ELLIPSIS_MASK, 4) &&
+           VerifyField<int32_t>(verifier, VT_NEW_AXIS_MASK, 4) &&
+           VerifyField<int32_t>(verifier, VT_SHRINK_AXIS_MASK, 4) &&
+           verifier.EndTable();
+  }
+  StridedSliceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(StridedSliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<StridedSliceOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct StridedSliceOptionsBuilder {
+  typedef StridedSliceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_begin_mask(int32_t begin_mask) {
+    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_BEGIN_MASK, begin_mask, 0);
+  }
+  void add_end_mask(int32_t end_mask) {
+    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_END_MASK, end_mask, 0);
+  }
+  void add_ellipsis_mask(int32_t ellipsis_mask) {
+    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_ELLIPSIS_MASK, ellipsis_mask, 0);
+  }
+  void add_new_axis_mask(int32_t new_axis_mask) {
+    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_NEW_AXIS_MASK, new_axis_mask, 0);
+  }
+  void add_shrink_axis_mask(int32_t shrink_axis_mask) {
+    fbb_.AddElement<int32_t>(StridedSliceOptions::VT_SHRINK_AXIS_MASK, shrink_axis_mask, 0);
+  }
+  explicit StridedSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<StridedSliceOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<StridedSliceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<StridedSliceOptions> CreateStridedSliceOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t begin_mask = 0,
+    int32_t end_mask = 0,
+    int32_t ellipsis_mask = 0,
+    int32_t new_axis_mask = 0,
+    int32_t shrink_axis_mask = 0) {
+  StridedSliceOptionsBuilder builder_(_fbb);
+  builder_.add_shrink_axis_mask(shrink_axis_mask);
+  builder_.add_new_axis_mask(new_axis_mask);
+  builder_.add_ellipsis_mask(ellipsis_mask);
+  builder_.add_end_mask(end_mask);
+  builder_.add_begin_mask(begin_mask);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<StridedSliceOptions> CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LogSoftmaxOptionsT : public flatbuffers::NativeTable {
+  typedef LogSoftmaxOptions TableType;
+};
+
+struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef LogSoftmaxOptionsT NativeTableType;
+  typedef LogSoftmaxOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  LogSoftmaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LogSoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LogSoftmaxOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LogSoftmaxOptionsBuilder {
+  typedef LogSoftmaxOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LogSoftmaxOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LogSoftmaxOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LogSoftmaxOptions> CreateLogSoftmaxOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  LogSoftmaxOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LogSoftmaxOptions> CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct CastOptionsT : public flatbuffers::NativeTable {
+  typedef CastOptions TableType;
+  tflite::TensorType in_data_type = tflite::TensorType_FLOAT32;
+  tflite::TensorType out_data_type = tflite::TensorType_FLOAT32;
+};
+
+struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef CastOptionsT NativeTableType;
+  typedef CastOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_IN_DATA_TYPE = 4,
+    VT_OUT_DATA_TYPE = 6
+  };
+  tflite::TensorType in_data_type() const {
+    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
+  }
+  tflite::TensorType out_data_type() const {
+    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_IN_DATA_TYPE, 1) &&
+           VerifyField<int8_t>(verifier, VT_OUT_DATA_TYPE, 1) &&
+           verifier.EndTable();
+  }
+  CastOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(CastOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CastOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct CastOptionsBuilder {
+  typedef CastOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_in_data_type(tflite::TensorType in_data_type) {
+    fbb_.AddElement<int8_t>(CastOptions::VT_IN_DATA_TYPE, static_cast<int8_t>(in_data_type), 0);
+  }
+  void add_out_data_type(tflite::TensorType out_data_type) {
+    fbb_.AddElement<int8_t>(CastOptions::VT_OUT_DATA_TYPE, static_cast<int8_t>(out_data_type), 0);
+  }
+  explicit CastOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CastOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CastOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CastOptions> CreateCastOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::TensorType in_data_type = tflite::TensorType_FLOAT32,
+    tflite::TensorType out_data_type = tflite::TensorType_FLOAT32) {
+  CastOptionsBuilder builder_(_fbb);
+  builder_.add_out_data_type(out_data_type);
+  builder_.add_in_data_type(in_data_type);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<CastOptions> CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DequantizeOptionsT : public flatbuffers::NativeTable {
+  typedef DequantizeOptions TableType;
+};
+
+struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef DequantizeOptionsT NativeTableType;
+  typedef DequantizeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  DequantizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DequantizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DequantizeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DequantizeOptionsBuilder {
+  typedef DequantizeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DequantizeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DequantizeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DequantizeOptions> CreateDequantizeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  DequantizeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DequantizeOptions> CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct MaximumMinimumOptionsT : public flatbuffers::NativeTable {
+  typedef MaximumMinimumOptions TableType;
+};
+
+struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef MaximumMinimumOptionsT NativeTableType;
+  typedef MaximumMinimumOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  MaximumMinimumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(MaximumMinimumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<MaximumMinimumOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct MaximumMinimumOptionsBuilder {
+  typedef MaximumMinimumOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<MaximumMinimumOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<MaximumMinimumOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<MaximumMinimumOptions> CreateMaximumMinimumOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  MaximumMinimumOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<MaximumMinimumOptions> CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct TileOptionsT : public flatbuffers::NativeTable {
+  typedef TileOptions TableType;
+};
+
+struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef TileOptionsT NativeTableType;
+  typedef TileOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  TileOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(TileOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<TileOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct TileOptionsBuilder {
+  typedef TileOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<TileOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TileOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TileOptions> CreateTileOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  TileOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ArgMaxOptionsT : public flatbuffers::NativeTable {
+  typedef ArgMaxOptions TableType;
+  tflite::TensorType output_type = tflite::TensorType_FLOAT32;
+};
+
+struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ArgMaxOptionsT NativeTableType;
+  typedef ArgMaxOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_OUTPUT_TYPE = 4
+  };
+  tflite::TensorType output_type() const {
+    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE, 1) &&
+           verifier.EndTable();
+  }
+  ArgMaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ArgMaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ArgMaxOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ArgMaxOptionsBuilder {
+  typedef ArgMaxOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_output_type(tflite::TensorType output_type) {
+    fbb_.AddElement<int8_t>(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
+  }
+  explicit ArgMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ArgMaxOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ArgMaxOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ArgMaxOptions> CreateArgMaxOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::TensorType output_type = tflite::TensorType_FLOAT32) {
+  ArgMaxOptionsBuilder builder_(_fbb);
+  builder_.add_output_type(output_type);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ArgMaxOptions> CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ArgMinOptionsT : public flatbuffers::NativeTable {
+  typedef ArgMinOptions TableType;
+  tflite::TensorType output_type = tflite::TensorType_FLOAT32;
+};
+
+struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ArgMinOptionsT NativeTableType;
+  typedef ArgMinOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_OUTPUT_TYPE = 4
+  };
+  tflite::TensorType output_type() const {
+    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_OUTPUT_TYPE, 1) &&
+           verifier.EndTable();
+  }
+  ArgMinOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ArgMinOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ArgMinOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ArgMinOptionsBuilder {
+  typedef ArgMinOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_output_type(tflite::TensorType output_type) {
+    fbb_.AddElement<int8_t>(ArgMinOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
+  }
+  explicit ArgMinOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ArgMinOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ArgMinOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ArgMinOptions> CreateArgMinOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::TensorType output_type = tflite::TensorType_FLOAT32) {
+  ArgMinOptionsBuilder builder_(_fbb);
+  builder_.add_output_type(output_type);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ArgMinOptions> CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct GreaterOptionsT : public flatbuffers::NativeTable {
+  typedef GreaterOptions TableType;
+};
+
+struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef GreaterOptionsT NativeTableType;
+  typedef GreaterOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  GreaterOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(GreaterOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<GreaterOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct GreaterOptionsBuilder {
+  typedef GreaterOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<GreaterOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<GreaterOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<GreaterOptions> CreateGreaterOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  GreaterOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<GreaterOptions> CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct GreaterEqualOptionsT : public flatbuffers::NativeTable {
+  typedef GreaterEqualOptions TableType;
+};
+
+struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef GreaterEqualOptionsT NativeTableType;
+  typedef GreaterEqualOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  GreaterEqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(GreaterEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<GreaterEqualOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct GreaterEqualOptionsBuilder {
+  typedef GreaterEqualOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<GreaterEqualOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<GreaterEqualOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<GreaterEqualOptions> CreateGreaterEqualOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  GreaterEqualOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<GreaterEqualOptions> CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LessOptionsT : public flatbuffers::NativeTable {
+  typedef LessOptions TableType;
+};
+
+struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef LessOptionsT NativeTableType;
+  typedef LessOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  LessOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LessOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LessOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LessOptionsBuilder {
+  typedef LessOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LessOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LessOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LessOptions> CreateLessOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  LessOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LessEqualOptionsT : public flatbuffers::NativeTable {
+  typedef LessEqualOptions TableType;
+};
+
+struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef LessEqualOptionsT NativeTableType;
+  typedef LessEqualOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  LessEqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LessEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LessEqualOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LessEqualOptionsBuilder {
+  typedef LessEqualOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LessEqualOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LessEqualOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LessEqualOptions> CreateLessEqualOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  LessEqualOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LessEqualOptions> CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct NegOptionsT : public flatbuffers::NativeTable {
+  typedef NegOptions TableType;
+};
+
+struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef NegOptionsT NativeTableType;
+  typedef NegOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  NegOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(NegOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<NegOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct NegOptionsBuilder {
+  typedef NegOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<NegOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<NegOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<NegOptions> CreateNegOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  NegOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SelectOptionsT : public flatbuffers::NativeTable {
+  typedef SelectOptions TableType;
+};
+
+struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SelectOptionsT NativeTableType;
+  typedef SelectOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  SelectOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SelectOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SelectOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SelectOptionsBuilder {
+  typedef SelectOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SelectOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SelectOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  SelectOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SliceOptionsT : public flatbuffers::NativeTable {
+  typedef SliceOptions TableType;
+};
+
+struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SliceOptionsT NativeTableType;
+  typedef SliceOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  SliceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SliceOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SliceOptionsBuilder {
+  typedef SliceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SliceOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SliceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  SliceOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct TransposeConvOptionsT : public flatbuffers::NativeTable {
+  typedef TransposeConvOptions TableType;
+  tflite::Padding padding = tflite::Padding_SAME;
+  int32_t stride_w = 0;
+  int32_t stride_h = 0;
+  tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE;
+};
+
+struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef TransposeConvOptionsT NativeTableType;
+  typedef TransposeConvOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_PADDING = 4,
+    VT_STRIDE_W = 6,
+    VT_STRIDE_H = 8,
+    VT_FUSED_ACTIVATION_FUNCTION = 10
+  };
+  tflite::Padding padding() const {
+    return static_cast<tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_w() const {
+    return GetField<int32_t>(VT_STRIDE_W, 0);
+  }
+  int32_t stride_h() const {
+    return GetField<int32_t>(VT_STRIDE_H, 0);
+  }
+  tflite::ActivationFunctionType fused_activation_function() const {
+    return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_PADDING, 1) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W, 4) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H, 4) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) &&
+           verifier.EndTable();
+  }
+  TransposeConvOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(TransposeConvOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<TransposeConvOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct TransposeConvOptionsBuilder {
+  typedef TransposeConvOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(tflite::Padding padding) {
+    fbb_.AddElement<int8_t>(TransposeConvOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_w(int32_t stride_w) {
+    fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h) {
+    fbb_.AddElement<int32_t>(TransposeConvOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_fused_activation_function(tflite::ActivationFunctionType fused_activation_function) {
+    fbb_.AddElement<int8_t>(TransposeConvOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit TransposeConvOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<TransposeConvOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TransposeConvOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TransposeConvOptions> CreateTransposeConvOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::Padding padding = tflite::Padding_SAME,
+    int32_t stride_w = 0,
+    int32_t stride_h = 0,
+    tflite::ActivationFunctionType fused_activation_function = tflite::ActivationFunctionType_NONE) {
+  TransposeConvOptionsBuilder builder_(_fbb);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<TransposeConvOptions> CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ExpandDimsOptionsT : public flatbuffers::NativeTable {
+  typedef ExpandDimsOptions TableType;
+};
+
+struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ExpandDimsOptionsT NativeTableType;
+  typedef ExpandDimsOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  ExpandDimsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ExpandDimsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ExpandDimsOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ExpandDimsOptionsBuilder {
+  typedef ExpandDimsOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ExpandDimsOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ExpandDimsOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ExpandDimsOptions> CreateExpandDimsOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  ExpandDimsOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ExpandDimsOptions> CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SparseToDenseOptionsT : public flatbuffers::NativeTable {
+  typedef SparseToDenseOptions TableType;
+  bool validate_indices = false;
+};
+
+struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SparseToDenseOptionsT NativeTableType;
+  typedef SparseToDenseOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_VALIDATE_INDICES = 4
+  };
+  bool validate_indices() const {
+    return GetField<uint8_t>(VT_VALIDATE_INDICES, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint8_t>(verifier, VT_VALIDATE_INDICES, 1) &&
+           verifier.EndTable();
+  }
+  SparseToDenseOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SparseToDenseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SparseToDenseOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SparseToDenseOptionsBuilder {
+  typedef SparseToDenseOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_validate_indices(bool validate_indices) {
+    fbb_.AddElement<uint8_t>(SparseToDenseOptions::VT_VALIDATE_INDICES, static_cast<uint8_t>(validate_indices), 0);
+  }
+  explicit SparseToDenseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SparseToDenseOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SparseToDenseOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SparseToDenseOptions> CreateSparseToDenseOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    bool validate_indices = false) {
+  SparseToDenseOptionsBuilder builder_(_fbb);
+  builder_.add_validate_indices(validate_indices);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SparseToDenseOptions> CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct EqualOptionsT : public flatbuffers::NativeTable {
+  typedef EqualOptions TableType;
+};
+
+struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef EqualOptionsT NativeTableType;
+  typedef EqualOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  EqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(EqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<EqualOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct EqualOptionsBuilder {
+  typedef EqualOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<EqualOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<EqualOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  EqualOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct NotEqualOptionsT : public flatbuffers::NativeTable {
+  typedef NotEqualOptions TableType;
+};
+
+struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef NotEqualOptionsT NativeTableType;
+  typedef NotEqualOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  NotEqualOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(NotEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<NotEqualOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct NotEqualOptionsBuilder {
+  typedef NotEqualOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<NotEqualOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<NotEqualOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<NotEqualOptions> CreateNotEqualOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  NotEqualOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<NotEqualOptions> CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ShapeOptionsT : public flatbuffers::NativeTable {
+  typedef ShapeOptions TableType;
+  tflite::TensorType out_type = tflite::TensorType_FLOAT32;
+};
+
+struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ShapeOptionsT NativeTableType;
+  typedef ShapeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_OUT_TYPE = 4
+  };
+  tflite::TensorType out_type() const {
+    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_OUT_TYPE, 1) &&
+           verifier.EndTable();
+  }
+  ShapeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ShapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ShapeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ShapeOptionsBuilder {
+  typedef ShapeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_out_type(tflite::TensorType out_type) {
+    fbb_.AddElement<int8_t>(ShapeOptions::VT_OUT_TYPE, static_cast<int8_t>(out_type), 0);
+  }
+  explicit ShapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ShapeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ShapeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ShapeOptions> CreateShapeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::TensorType out_type = tflite::TensorType_FLOAT32) {
+  ShapeOptionsBuilder builder_(_fbb);
+  builder_.add_out_type(out_type);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ShapeOptions> CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct RankOptionsT : public flatbuffers::NativeTable {
+  typedef RankOptions TableType;
+};
+
+struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef RankOptionsT NativeTableType;
+  typedef RankOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  RankOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(RankOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<RankOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct RankOptionsBuilder {
+  typedef RankOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit RankOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<RankOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RankOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RankOptions> CreateRankOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  RankOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<RankOptions> CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct PowOptionsT : public flatbuffers::NativeTable {
+  typedef PowOptions TableType;
+};
+
+struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef PowOptionsT NativeTableType;
+  typedef PowOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  PowOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(PowOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<PowOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct PowOptionsBuilder {
+  typedef PowOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<PowOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<PowOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<PowOptions> CreatePowOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  PowOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct FakeQuantOptionsT : public flatbuffers::NativeTable {
+  typedef FakeQuantOptions TableType;
+  float min = 0.0f;
+  float max = 0.0f;
+  int32_t num_bits = 0;
+  bool narrow_range = false;
+};
+
+struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef FakeQuantOptionsT NativeTableType;
+  typedef FakeQuantOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_MIN = 4,
+    VT_MAX = 6,
+    VT_NUM_BITS = 8,
+    VT_NARROW_RANGE = 10
+  };
+  float min() const {
+    return GetField<float>(VT_MIN, 0.0f);
+  }
+  float max() const {
+    return GetField<float>(VT_MAX, 0.0f);
+  }
+  int32_t num_bits() const {
+    return GetField<int32_t>(VT_NUM_BITS, 0);
+  }
+  bool narrow_range() const {
+    return GetField<uint8_t>(VT_NARROW_RANGE, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<float>(verifier, VT_MIN, 4) &&
+           VerifyField<float>(verifier, VT_MAX, 4) &&
+           VerifyField<int32_t>(verifier, VT_NUM_BITS, 4) &&
+           VerifyField<uint8_t>(verifier, VT_NARROW_RANGE, 1) &&
+           verifier.EndTable();
+  }
+  FakeQuantOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(FakeQuantOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FakeQuantOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct FakeQuantOptionsBuilder {
+  typedef FakeQuantOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_min(float min) {
+    fbb_.AddElement<float>(FakeQuantOptions::VT_MIN, min, 0.0f);
+  }
+  void add_max(float max) {
+    fbb_.AddElement<float>(FakeQuantOptions::VT_MAX, max, 0.0f);
+  }
+  void add_num_bits(int32_t num_bits) {
+    fbb_.AddElement<int32_t>(FakeQuantOptions::VT_NUM_BITS, num_bits, 0);
+  }
+  void add_narrow_range(bool narrow_range) {
+    fbb_.AddElement<uint8_t>(FakeQuantOptions::VT_NARROW_RANGE, static_cast<uint8_t>(narrow_range), 0);
+  }
+  explicit FakeQuantOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<FakeQuantOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<FakeQuantOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<FakeQuantOptions> CreateFakeQuantOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    float min = 0.0f,
+    float max = 0.0f,
+    int32_t num_bits = 0,
+    bool narrow_range = false) {
+  FakeQuantOptionsBuilder builder_(_fbb);
+  builder_.add_num_bits(num_bits);
+  builder_.add_max(max);
+  builder_.add_min(min);
+  builder_.add_narrow_range(narrow_range);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<FakeQuantOptions> CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct PackOptionsT : public flatbuffers::NativeTable {
+  typedef PackOptions TableType;
+  int32_t values_count = 0;
+  int32_t axis = 0;
+};
+
+struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef PackOptionsT NativeTableType;
+  typedef PackOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_VALUES_COUNT = 4,
+    VT_AXIS = 6
+  };
+  int32_t values_count() const {
+    return GetField<int32_t>(VT_VALUES_COUNT, 0);
+  }
+  int32_t axis() const {
+    return GetField<int32_t>(VT_AXIS, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_VALUES_COUNT, 4) &&
+           VerifyField<int32_t>(verifier, VT_AXIS, 4) &&
+           verifier.EndTable();
+  }
+  PackOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(PackOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<PackOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct PackOptionsBuilder {
+  typedef PackOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_values_count(int32_t values_count) {
+    fbb_.AddElement<int32_t>(PackOptions::VT_VALUES_COUNT, values_count, 0);
+  }
+  void add_axis(int32_t axis) {
+    fbb_.AddElement<int32_t>(PackOptions::VT_AXIS, axis, 0);
+  }
+  explicit PackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<PackOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<PackOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<PackOptions> CreatePackOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t values_count = 0,
+    int32_t axis = 0) {
+  PackOptionsBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  builder_.add_values_count(values_count);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<PackOptions> CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LogicalOrOptionsT : public flatbuffers::NativeTable {
+  typedef LogicalOrOptions TableType;
+};
+
+struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef LogicalOrOptionsT NativeTableType;
+  typedef LogicalOrOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  LogicalOrOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LogicalOrOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LogicalOrOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LogicalOrOptionsBuilder {
+  typedef LogicalOrOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LogicalOrOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LogicalOrOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LogicalOrOptions> CreateLogicalOrOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  LogicalOrOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LogicalOrOptions> CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct OneHotOptionsT : public flatbuffers::NativeTable {
+  typedef OneHotOptions TableType;
+  int32_t axis = 0;
+};
+
+struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef OneHotOptionsT NativeTableType;
+  typedef OneHotOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_AXIS = 4
+  };
+  int32_t axis() const {
+    return GetField<int32_t>(VT_AXIS, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_AXIS, 4) &&
+           verifier.EndTable();
+  }
+  OneHotOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(OneHotOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<OneHotOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct OneHotOptionsBuilder {
+  typedef OneHotOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_axis(int32_t axis) {
+    fbb_.AddElement<int32_t>(OneHotOptions::VT_AXIS, axis, 0);
+  }
+  explicit OneHotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<OneHotOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<OneHotOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t axis = 0) {
+  OneHotOptionsBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct AbsOptionsT : public flatbuffers::NativeTable {
+  typedef AbsOptions TableType;
+};
+
+struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef AbsOptionsT NativeTableType;
+  typedef AbsOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  AbsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(AbsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<AbsOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct AbsOptionsBuilder {
+  typedef AbsOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<AbsOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<AbsOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  AbsOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct HardSwishOptionsT : public flatbuffers::NativeTable {
+  typedef HardSwishOptions TableType;
+};
+
+struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef HardSwishOptionsT NativeTableType;
+  typedef HardSwishOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  HardSwishOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(HardSwishOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<HardSwishOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct HardSwishOptionsBuilder {
+  typedef HardSwishOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HardSwishOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HardSwishOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HardSwishOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HardSwishOptions> CreateHardSwishOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  HardSwishOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<HardSwishOptions> CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LogicalAndOptionsT : public flatbuffers::NativeTable {
+  typedef LogicalAndOptions TableType;
+};
+
+struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef LogicalAndOptionsT NativeTableType;
+  typedef LogicalAndOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  LogicalAndOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LogicalAndOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LogicalAndOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LogicalAndOptionsBuilder {
+  typedef LogicalAndOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LogicalAndOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LogicalAndOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LogicalAndOptions> CreateLogicalAndOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  LogicalAndOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LogicalAndOptions> CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LogicalNotOptionsT : public flatbuffers::NativeTable {
+  typedef LogicalNotOptions TableType;
+};
+
+struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef LogicalNotOptionsT NativeTableType;
+  typedef LogicalNotOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  LogicalNotOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LogicalNotOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LogicalNotOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LogicalNotOptionsBuilder {
+  typedef LogicalNotOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LogicalNotOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LogicalNotOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LogicalNotOptions> CreateLogicalNotOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  LogicalNotOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LogicalNotOptions> CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct UnpackOptionsT : public flatbuffers::NativeTable {
+  typedef UnpackOptions TableType;
+  int32_t num = 0;
+  int32_t axis = 0;
+};
+
+struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef UnpackOptionsT NativeTableType;
+  typedef UnpackOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_NUM = 4,
+    VT_AXIS = 6
+  };
+  int32_t num() const {
+    return GetField<int32_t>(VT_NUM, 0);
+  }
+  int32_t axis() const {
+    return GetField<int32_t>(VT_AXIS, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_NUM, 4) &&
+           VerifyField<int32_t>(verifier, VT_AXIS, 4) &&
+           verifier.EndTable();
+  }
+  UnpackOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(UnpackOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<UnpackOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct UnpackOptionsBuilder {
+  typedef UnpackOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_num(int32_t num) {
+    fbb_.AddElement<int32_t>(UnpackOptions::VT_NUM, num, 0);
+  }
+  void add_axis(int32_t axis) {
+    fbb_.AddElement<int32_t>(UnpackOptions::VT_AXIS, axis, 0);
+  }
+  explicit UnpackOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnpackOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnpackOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t num = 0,
+    int32_t axis = 0) {
+  UnpackOptionsBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  builder_.add_num(num);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct FloorDivOptionsT : public flatbuffers::NativeTable {
+  typedef FloorDivOptions TableType;
+};
+
+struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef FloorDivOptionsT NativeTableType;
+  typedef FloorDivOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  FloorDivOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(FloorDivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FloorDivOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct FloorDivOptionsBuilder {
+  typedef FloorDivOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<FloorDivOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<FloorDivOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<FloorDivOptions> CreateFloorDivOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  FloorDivOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<FloorDivOptions> CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SquareOptionsT : public flatbuffers::NativeTable {
+  typedef SquareOptions TableType;
+};
+
+struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SquareOptionsT NativeTableType;
+  typedef SquareOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  SquareOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SquareOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SquareOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SquareOptionsBuilder {
+  typedef SquareOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SquareOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SquareOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  SquareOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ZerosLikeOptionsT : public flatbuffers::NativeTable {
+  typedef ZerosLikeOptions TableType;
+};
+
+struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ZerosLikeOptionsT NativeTableType;
+  typedef ZerosLikeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  ZerosLikeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ZerosLikeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ZerosLikeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ZerosLikeOptionsBuilder {
+  typedef ZerosLikeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ZerosLikeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ZerosLikeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ZerosLikeOptions> CreateZerosLikeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  ZerosLikeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ZerosLikeOptions> CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct FillOptionsT : public flatbuffers::NativeTable {
+  typedef FillOptions TableType;
+};
+
+struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef FillOptionsT NativeTableType;
+  typedef FillOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  FillOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(FillOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FillOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct FillOptionsBuilder {
+  typedef FillOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<FillOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<FillOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<FillOptions> CreateFillOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  FillOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct FloorModOptionsT : public flatbuffers::NativeTable {
+  typedef FloorModOptions TableType;
+};
+
+struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef FloorModOptionsT NativeTableType;
+  typedef FloorModOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  FloorModOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(FloorModOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<FloorModOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct FloorModOptionsBuilder {
+  typedef FloorModOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<FloorModOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<FloorModOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  FloorModOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct RangeOptionsT : public flatbuffers::NativeTable {
+  typedef RangeOptions TableType;
+};
+
+struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef RangeOptionsT NativeTableType;
+  typedef RangeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  RangeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(RangeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<RangeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct RangeOptionsBuilder {
+  typedef RangeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<RangeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RangeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  RangeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct LeakyReluOptionsT : public flatbuffers::NativeTable {
+  typedef LeakyReluOptions TableType;
+  float alpha = 0.0f;
+};
+
+struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef LeakyReluOptionsT NativeTableType;
+  typedef LeakyReluOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_ALPHA = 4
+  };
+  float alpha() const {
+    return GetField<float>(VT_ALPHA, 0.0f);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<float>(verifier, VT_ALPHA, 4) &&
+           verifier.EndTable();
+  }
+  LeakyReluOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(LeakyReluOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<LeakyReluOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct LeakyReluOptionsBuilder {
+  typedef LeakyReluOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_alpha(float alpha) {
+    fbb_.AddElement<float>(LeakyReluOptions::VT_ALPHA, alpha, 0.0f);
+  }
+  explicit LeakyReluOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<LeakyReluOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<LeakyReluOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<LeakyReluOptions> CreateLeakyReluOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    float alpha = 0.0f) {
+  LeakyReluOptionsBuilder builder_(_fbb);
+  builder_.add_alpha(alpha);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<LeakyReluOptions> CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SquaredDifferenceOptionsT : public flatbuffers::NativeTable {
+  typedef SquaredDifferenceOptions TableType;
+};
+
+struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SquaredDifferenceOptionsT NativeTableType;
+  typedef SquaredDifferenceOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  SquaredDifferenceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SquaredDifferenceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SquaredDifferenceOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquaredDifferenceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SquaredDifferenceOptionsBuilder {
+  typedef SquaredDifferenceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SquaredDifferenceOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SquaredDifferenceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SquaredDifferenceOptions> CreateSquaredDifferenceOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  SquaredDifferenceOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SquaredDifferenceOptions> CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquaredDifferenceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct MirrorPadOptionsT : public flatbuffers::NativeTable {
+  typedef MirrorPadOptions TableType;
+  tflite::MirrorPadMode mode = tflite::MirrorPadMode_REFLECT;
+};
+
+struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef MirrorPadOptionsT NativeTableType;
+  typedef MirrorPadOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_MODE = 4
+  };
+  tflite::MirrorPadMode mode() const {
+    return static_cast<tflite::MirrorPadMode>(GetField<int8_t>(VT_MODE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_MODE, 1) &&
+           verifier.EndTable();
+  }
+  MirrorPadOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(MirrorPadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<MirrorPadOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct MirrorPadOptionsBuilder {
+  typedef MirrorPadOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_mode(tflite::MirrorPadMode mode) {
+    fbb_.AddElement<int8_t>(MirrorPadOptions::VT_MODE, static_cast<int8_t>(mode), 0);
+  }
+  explicit MirrorPadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<MirrorPadOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<MirrorPadOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<MirrorPadOptions> CreateMirrorPadOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::MirrorPadMode mode = tflite::MirrorPadMode_REFLECT) {
+  MirrorPadOptionsBuilder builder_(_fbb);
+  builder_.add_mode(mode);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<MirrorPadOptions> CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct UniqueOptionsT : public flatbuffers::NativeTable {
+  typedef UniqueOptions TableType;
+  tflite::TensorType idx_out_type = tflite::TensorType_INT32;
+};
+
+struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef UniqueOptionsT NativeTableType;
+  typedef UniqueOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_IDX_OUT_TYPE = 4
+  };
+  tflite::TensorType idx_out_type() const {
+    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_IDX_OUT_TYPE, 1) &&
+           verifier.EndTable();
+  }
+  UniqueOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(UniqueOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<UniqueOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct UniqueOptionsBuilder {
+  typedef UniqueOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_idx_out_type(tflite::TensorType idx_out_type) {
+    fbb_.AddElement<int8_t>(UniqueOptions::VT_IDX_OUT_TYPE, static_cast<int8_t>(idx_out_type), 2);
+  }
+  explicit UniqueOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UniqueOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UniqueOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UniqueOptions> CreateUniqueOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    tflite::TensorType idx_out_type = tflite::TensorType_INT32) {
+  UniqueOptionsBuilder builder_(_fbb);
+  builder_.add_idx_out_type(idx_out_type);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<UniqueOptions> CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ReverseV2OptionsT : public flatbuffers::NativeTable {
+  typedef ReverseV2Options TableType;
+};
+
+struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ReverseV2OptionsT NativeTableType;
+  typedef ReverseV2OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  ReverseV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ReverseV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ReverseV2Options> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ReverseV2OptionsBuilder {
+  typedef ReverseV2Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ReverseV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ReverseV2Options> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReverseV2Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReverseV2Options> CreateReverseV2Options(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  ReverseV2OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ReverseV2Options> CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct AddNOptionsT : public flatbuffers::NativeTable {
+  typedef AddNOptions TableType;
+};
+
+struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef AddNOptionsT NativeTableType;
+  typedef AddNOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  AddNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(AddNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<AddNOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct AddNOptionsBuilder {
+  typedef AddNOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit AddNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<AddNOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<AddNOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<AddNOptions> CreateAddNOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  AddNOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<AddNOptions> CreateAddNOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct GatherNdOptionsT : public flatbuffers::NativeTable {
+  typedef GatherNdOptions TableType;
+};
+
+struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef GatherNdOptionsT NativeTableType;
+  typedef GatherNdOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  GatherNdOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(GatherNdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<GatherNdOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct GatherNdOptionsBuilder {
+  typedef GatherNdOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit GatherNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<GatherNdOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<GatherNdOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<GatherNdOptions> CreateGatherNdOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  GatherNdOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<GatherNdOptions> CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct WhereOptionsT : public flatbuffers::NativeTable {
+  typedef WhereOptions TableType;
+};
+
+struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef WhereOptionsT NativeTableType;
+  typedef WhereOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  WhereOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(WhereOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<WhereOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct WhereOptionsBuilder {
+  typedef WhereOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit WhereOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<WhereOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<WhereOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<WhereOptions> CreateWhereOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  WhereOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<WhereOptions> CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ReverseSequenceOptionsT : public flatbuffers::NativeTable {
+  typedef ReverseSequenceOptions TableType;
+  int32_t seq_dim = 0;
+  int32_t batch_dim = 0;
+};
+
+struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ReverseSequenceOptionsT NativeTableType;
+  typedef ReverseSequenceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_SEQ_DIM = 4,
+    VT_BATCH_DIM = 6
+  };
+  int32_t seq_dim() const {
+    return GetField<int32_t>(VT_SEQ_DIM, 0);
+  }
+  int32_t batch_dim() const {
+    return GetField<int32_t>(VT_BATCH_DIM, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_SEQ_DIM, 4) &&
+           VerifyField<int32_t>(verifier, VT_BATCH_DIM, 4) &&
+           verifier.EndTable();
+  }
+  ReverseSequenceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ReverseSequenceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ReverseSequenceOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseSequenceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ReverseSequenceOptionsBuilder {
+  typedef ReverseSequenceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_seq_dim(int32_t seq_dim) {
+    fbb_.AddElement<int32_t>(ReverseSequenceOptions::VT_SEQ_DIM, seq_dim, 0);
+  }
+  void add_batch_dim(int32_t batch_dim) {
+    fbb_.AddElement<int32_t>(ReverseSequenceOptions::VT_BATCH_DIM, batch_dim, 0);
+  }
+  explicit ReverseSequenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ReverseSequenceOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReverseSequenceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReverseSequenceOptions> CreateReverseSequenceOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t seq_dim = 0,
+    int32_t batch_dim = 0) {
+  ReverseSequenceOptionsBuilder builder_(_fbb);
+  builder_.add_batch_dim(batch_dim);
+  builder_.add_seq_dim(seq_dim);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ReverseSequenceOptions> CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReverseSequenceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct MatrixDiagOptionsT : public flatbuffers::NativeTable {
+  typedef MatrixDiagOptions TableType;
+};
+
+struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef MatrixDiagOptionsT NativeTableType;
+  typedef MatrixDiagOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  MatrixDiagOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(MatrixDiagOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<MatrixDiagOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct MatrixDiagOptionsBuilder {
+  typedef MatrixDiagOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit MatrixDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<MatrixDiagOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<MatrixDiagOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<MatrixDiagOptions> CreateMatrixDiagOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  MatrixDiagOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<MatrixDiagOptions> CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct QuantizeOptionsT : public flatbuffers::NativeTable {
+  typedef QuantizeOptions TableType;
+};
+
+struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef QuantizeOptionsT NativeTableType;
+  typedef QuantizeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  QuantizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(QuantizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<QuantizeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct QuantizeOptionsBuilder {
+  typedef QuantizeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit QuantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<QuantizeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<QuantizeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<QuantizeOptions> CreateQuantizeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  QuantizeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<QuantizeOptions> CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct MatrixSetDiagOptionsT : public flatbuffers::NativeTable {
+  typedef MatrixSetDiagOptions TableType;
+};
+
+struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef MatrixSetDiagOptionsT NativeTableType;
+  typedef MatrixSetDiagOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  MatrixSetDiagOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(MatrixSetDiagOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<MatrixSetDiagOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct MatrixSetDiagOptionsBuilder {
+  typedef MatrixSetDiagOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit MatrixSetDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<MatrixSetDiagOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<MatrixSetDiagOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<MatrixSetDiagOptions> CreateMatrixSetDiagOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  MatrixSetDiagOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<MatrixSetDiagOptions> CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct IfOptionsT : public flatbuffers::NativeTable {
+  typedef IfOptions TableType;
+  int32_t then_subgraph_index = 0;
+  int32_t else_subgraph_index = 0;
+};
+
+struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef IfOptionsT NativeTableType;
+  typedef IfOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_THEN_SUBGRAPH_INDEX = 4,
+    VT_ELSE_SUBGRAPH_INDEX = 6
+  };
+  int32_t then_subgraph_index() const {
+    return GetField<int32_t>(VT_THEN_SUBGRAPH_INDEX, 0);
+  }
+  int32_t else_subgraph_index() const {
+    return GetField<int32_t>(VT_ELSE_SUBGRAPH_INDEX, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_THEN_SUBGRAPH_INDEX, 4) &&
+           VerifyField<int32_t>(verifier, VT_ELSE_SUBGRAPH_INDEX, 4) &&
+           verifier.EndTable();
+  }
+  IfOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(IfOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<IfOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct IfOptionsBuilder {
+  typedef IfOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_then_subgraph_index(int32_t then_subgraph_index) {
+    fbb_.AddElement<int32_t>(IfOptions::VT_THEN_SUBGRAPH_INDEX, then_subgraph_index, 0);
+  }
+  void add_else_subgraph_index(int32_t else_subgraph_index) {
+    fbb_.AddElement<int32_t>(IfOptions::VT_ELSE_SUBGRAPH_INDEX, else_subgraph_index, 0);
+  }
+  explicit IfOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<IfOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<IfOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<IfOptions> CreateIfOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t then_subgraph_index = 0,
+    int32_t else_subgraph_index = 0) {
+  IfOptionsBuilder builder_(_fbb);
+  builder_.add_else_subgraph_index(else_subgraph_index);
+  builder_.add_then_subgraph_index(then_subgraph_index);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct CallOnceOptionsT : public flatbuffers::NativeTable {
+  typedef CallOnceOptions TableType;
+  int32_t init_subgraph_index = 0;
+};
+
+struct CallOnceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef CallOnceOptionsT NativeTableType;
+  typedef CallOnceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_INIT_SUBGRAPH_INDEX = 4
+  };
+  int32_t init_subgraph_index() const {
+    return GetField<int32_t>(VT_INIT_SUBGRAPH_INDEX, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_INIT_SUBGRAPH_INDEX, 4) &&
+           verifier.EndTable();
+  }
+  CallOnceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(CallOnceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CallOnceOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct CallOnceOptionsBuilder {
+  typedef CallOnceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_init_subgraph_index(int32_t init_subgraph_index) {
+    fbb_.AddElement<int32_t>(CallOnceOptions::VT_INIT_SUBGRAPH_INDEX, init_subgraph_index, 0);
+  }
+  explicit CallOnceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CallOnceOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CallOnceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CallOnceOptions> CreateCallOnceOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t init_subgraph_index = 0) {
+  CallOnceOptionsBuilder builder_(_fbb);
+  builder_.add_init_subgraph_index(init_subgraph_index);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<CallOnceOptions> CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct WhileOptionsT : public flatbuffers::NativeTable {
+  typedef WhileOptions TableType;
+  int32_t cond_subgraph_index = 0;
+  int32_t body_subgraph_index = 0;
+};
+
+struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef WhileOptionsT NativeTableType;
+  typedef WhileOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_COND_SUBGRAPH_INDEX = 4,
+    VT_BODY_SUBGRAPH_INDEX = 6
+  };
+  int32_t cond_subgraph_index() const {
+    return GetField<int32_t>(VT_COND_SUBGRAPH_INDEX, 0);
+  }
+  int32_t body_subgraph_index() const {
+    return GetField<int32_t>(VT_BODY_SUBGRAPH_INDEX, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_COND_SUBGRAPH_INDEX, 4) &&
+           VerifyField<int32_t>(verifier, VT_BODY_SUBGRAPH_INDEX, 4) &&
+           verifier.EndTable();
+  }
+  WhileOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(WhileOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<WhileOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct WhileOptionsBuilder {
+  typedef WhileOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_cond_subgraph_index(int32_t cond_subgraph_index) {
+    fbb_.AddElement<int32_t>(WhileOptions::VT_COND_SUBGRAPH_INDEX, cond_subgraph_index, 0);
+  }
+  void add_body_subgraph_index(int32_t body_subgraph_index) {
+    fbb_.AddElement<int32_t>(WhileOptions::VT_BODY_SUBGRAPH_INDEX, body_subgraph_index, 0);
+  }
+  explicit WhileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<WhileOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<WhileOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<WhileOptions> CreateWhileOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t cond_subgraph_index = 0,
+    int32_t body_subgraph_index = 0) {
+  WhileOptionsBuilder builder_(_fbb);
+  builder_.add_body_subgraph_index(body_subgraph_index);
+  builder_.add_cond_subgraph_index(cond_subgraph_index);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<WhileOptions> CreateWhileOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct NonMaxSuppressionV4OptionsT : public flatbuffers::NativeTable {
+  typedef NonMaxSuppressionV4Options TableType;
+};
+
+struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef NonMaxSuppressionV4OptionsT NativeTableType;
+  typedef NonMaxSuppressionV4OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  NonMaxSuppressionV4OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(NonMaxSuppressionV4OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<NonMaxSuppressionV4Options> Pack(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV4OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct NonMaxSuppressionV4OptionsBuilder {
+  typedef NonMaxSuppressionV4Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit NonMaxSuppressionV4OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<NonMaxSuppressionV4Options> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<NonMaxSuppressionV4Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<NonMaxSuppressionV4Options> CreateNonMaxSuppressionV4Options(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  NonMaxSuppressionV4OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<NonMaxSuppressionV4Options> CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV4OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct NonMaxSuppressionV5OptionsT : public flatbuffers::NativeTable {
+  typedef NonMaxSuppressionV5Options TableType;
+};
+
+struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef NonMaxSuppressionV5OptionsT NativeTableType;
+  typedef NonMaxSuppressionV5OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  NonMaxSuppressionV5OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(NonMaxSuppressionV5OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<NonMaxSuppressionV5Options> Pack(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV5OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct NonMaxSuppressionV5OptionsBuilder {
+  typedef NonMaxSuppressionV5Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit NonMaxSuppressionV5OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<NonMaxSuppressionV5Options> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<NonMaxSuppressionV5Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<NonMaxSuppressionV5Options> CreateNonMaxSuppressionV5Options(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  NonMaxSuppressionV5OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<NonMaxSuppressionV5Options> CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV5OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ScatterNdOptionsT : public flatbuffers::NativeTable {
+  typedef ScatterNdOptions TableType;
+};
+
+struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ScatterNdOptionsT NativeTableType;
+  typedef ScatterNdOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  ScatterNdOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ScatterNdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ScatterNdOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ScatterNdOptionsBuilder {
+  typedef ScatterNdOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ScatterNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ScatterNdOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ScatterNdOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ScatterNdOptions> CreateScatterNdOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  ScatterNdOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ScatterNdOptions> CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SelectV2OptionsT : public flatbuffers::NativeTable {
+  typedef SelectV2Options TableType;
+};
+
+struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SelectV2OptionsT NativeTableType;
+  typedef SelectV2OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  SelectV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SelectV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SelectV2Options> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SelectV2OptionsBuilder {
+  typedef SelectV2Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SelectV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SelectV2Options> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SelectV2Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SelectV2Options> CreateSelectV2Options(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  SelectV2OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SelectV2Options> CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DensifyOptionsT : public flatbuffers::NativeTable {
+  typedef DensifyOptions TableType;
+};
+
+struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef DensifyOptionsT NativeTableType;
+  typedef DensifyOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  DensifyOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DensifyOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DensifyOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DensifyOptionsBuilder {
+  typedef DensifyOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit DensifyOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DensifyOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DensifyOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DensifyOptions> CreateDensifyOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  DensifyOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DensifyOptions> CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SegmentSumOptionsT : public flatbuffers::NativeTable {
+  typedef SegmentSumOptions TableType;
+};
+
+struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SegmentSumOptionsT NativeTableType;
+  typedef SegmentSumOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  SegmentSumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SegmentSumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SegmentSumOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SegmentSumOptionsBuilder {
+  typedef SegmentSumOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SegmentSumOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SegmentSumOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SegmentSumOptions> CreateSegmentSumOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  SegmentSumOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SegmentSumOptions> CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BatchMatMulOptionsT : public flatbuffers::NativeTable {
+  typedef BatchMatMulOptions TableType;
+  bool adj_x = false;
+  bool adj_y = false;
+  bool asymmetric_quantize_inputs = false;
+};
+
+struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef BatchMatMulOptionsT NativeTableType;
+  typedef BatchMatMulOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_ADJ_X = 4,
+    VT_ADJ_Y = 6,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
+  };
+  bool adj_x() const {
+    return GetField<uint8_t>(VT_ADJ_X, 0) != 0;
+  }
+  bool adj_y() const {
+    return GetField<uint8_t>(VT_ADJ_Y, 0) != 0;
+  }
+  bool asymmetric_quantize_inputs() const {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint8_t>(verifier, VT_ADJ_X, 1) &&
+           VerifyField<uint8_t>(verifier, VT_ADJ_Y, 1) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) &&
+           verifier.EndTable();
+  }
+  BatchMatMulOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BatchMatMulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BatchMatMulOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BatchMatMulOptionsBuilder {
+  typedef BatchMatMulOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_adj_x(bool adj_x) {
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJ_X, static_cast<uint8_t>(adj_x), 0);
+  }
+  void add_adj_y(bool adj_y) {
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJ_Y, static_cast<uint8_t>(adj_y), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) {
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
+  explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BatchMatMulOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BatchMatMulOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BatchMatMulOptions> CreateBatchMatMulOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    bool adj_x = false,
+    bool adj_y = false,
+    bool asymmetric_quantize_inputs = false) {
+  BatchMatMulOptionsBuilder builder_(_fbb);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_adj_y(adj_y);
+  builder_.add_adj_x(adj_x);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<BatchMatMulOptions> CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct CumsumOptionsT : public flatbuffers::NativeTable {
+  typedef CumsumOptions TableType;
+  bool exclusive = false;
+  bool reverse = false;
+};
+
+struct CumsumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef CumsumOptionsT NativeTableType;
+  typedef CumsumOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_EXCLUSIVE = 4,
+    VT_REVERSE = 6
+  };
+  bool exclusive() const {
+    return GetField<uint8_t>(VT_EXCLUSIVE, 0) != 0;
+  }
+  bool reverse() const {
+    return GetField<uint8_t>(VT_REVERSE, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint8_t>(verifier, VT_EXCLUSIVE, 1) &&
+           VerifyField<uint8_t>(verifier, VT_REVERSE, 1) &&
+           verifier.EndTable();
+  }
+  CumsumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(CumsumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<CumsumOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct CumsumOptionsBuilder {
+  typedef CumsumOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_exclusive(bool exclusive) {
+    fbb_.AddElement<uint8_t>(CumsumOptions::VT_EXCLUSIVE, static_cast<uint8_t>(exclusive), 0);
+  }
+  void add_reverse(bool reverse) {
+    fbb_.AddElement<uint8_t>(CumsumOptions::VT_REVERSE, static_cast<uint8_t>(reverse), 0);
+  }
+  explicit CumsumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CumsumOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CumsumOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    bool exclusive = false,
+    bool reverse = false) {
+  CumsumOptionsBuilder builder_(_fbb);
+  builder_.add_reverse(reverse);
+  builder_.add_exclusive(exclusive);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BroadcastToOptionsT : public flatbuffers::NativeTable {
+  typedef BroadcastToOptions TableType;
+};
+
+struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef BroadcastToOptionsT NativeTableType;
+  typedef BroadcastToOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  BroadcastToOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BroadcastToOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BroadcastToOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BroadcastToOptionsBuilder {
+  typedef BroadcastToOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit BroadcastToOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BroadcastToOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BroadcastToOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BroadcastToOptions> CreateBroadcastToOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  BroadcastToOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<BroadcastToOptions> CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct Rfft2dOptionsT : public flatbuffers::NativeTable {
+  typedef Rfft2dOptions TableType;
+};
+
+struct Rfft2dOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef Rfft2dOptionsT NativeTableType;
+  typedef Rfft2dOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  Rfft2dOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(Rfft2dOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Rfft2dOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct Rfft2dOptionsBuilder {
+  typedef Rfft2dOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit Rfft2dOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Rfft2dOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Rfft2dOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  Rfft2dOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct HashtableOptionsT : public flatbuffers::NativeTable {
+  typedef HashtableOptions TableType;
+  int32_t table_id = 0;
+  tflite::TensorType key_dtype = tflite::TensorType_FLOAT32;
+  tflite::TensorType value_dtype = tflite::TensorType_FLOAT32;
+};
+
+struct HashtableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef HashtableOptionsT NativeTableType;
+  typedef HashtableOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_TABLE_ID = 4,
+    VT_KEY_DTYPE = 6,
+    VT_VALUE_DTYPE = 8
+  };
+  int32_t table_id() const {
+    return GetField<int32_t>(VT_TABLE_ID, 0);
+  }
+  tflite::TensorType key_dtype() const {
+    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_KEY_DTYPE, 0));
+  }
+  tflite::TensorType value_dtype() const {
+    return static_cast<tflite::TensorType>(GetField<int8_t>(VT_VALUE_DTYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int32_t>(verifier, VT_TABLE_ID, 4) &&
+           VerifyField<int8_t>(verifier, VT_KEY_DTYPE, 1) &&
+           VerifyField<int8_t>(verifier, VT_VALUE_DTYPE, 1) &&
+           verifier.EndTable();
+  }
+  HashtableOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(HashtableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<HashtableOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct HashtableOptionsBuilder {
+  typedef HashtableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_table_id(int32_t table_id) {
+    fbb_.AddElement<int32_t>(HashtableOptions::VT_TABLE_ID, table_id, 0);
+  }
+  void add_key_dtype(tflite::TensorType key_dtype) {
+    fbb_.AddElement<int8_t>(HashtableOptions::VT_KEY_DTYPE, static_cast<int8_t>(key_dtype), 0);
+  }
+  void add_value_dtype(tflite::TensorType value_dtype) {
+    fbb_.AddElement<int8_t>(HashtableOptions::VT_VALUE_DTYPE, static_cast<int8_t>(value_dtype), 0);
+  }
+  explicit HashtableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableOptions> CreateHashtableOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int32_t table_id = 0,
+    tflite::TensorType key_dtype = tflite::TensorType_FLOAT32,
+    tflite::TensorType value_dtype = tflite::TensorType_FLOAT32) {
+  HashtableOptionsBuilder builder_(_fbb);
+  builder_.add_table_id(table_id);
+  builder_.add_value_dtype(value_dtype);
+  builder_.add_key_dtype(key_dtype);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<HashtableOptions> CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct HashtableFindOptionsT : public flatbuffers::NativeTable {
+  typedef HashtableFindOptions TableType;
+};
+
+struct HashtableFindOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef HashtableFindOptionsT NativeTableType;
+  typedef HashtableFindOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  HashtableFindOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(HashtableFindOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<HashtableFindOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct HashtableFindOptionsBuilder {
+  typedef HashtableFindOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableFindOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableFindOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableFindOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableFindOptions> CreateHashtableFindOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  HashtableFindOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<HashtableFindOptions> CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct HashtableImportOptionsT : public flatbuffers::NativeTable {
+  typedef HashtableImportOptions TableType;
+};
+
+struct HashtableImportOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef HashtableImportOptionsT NativeTableType;
+  typedef HashtableImportOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  HashtableImportOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(HashtableImportOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<HashtableImportOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableImportOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct HashtableImportOptionsBuilder {
+  typedef HashtableImportOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableImportOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableImportOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableImportOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableImportOptions> CreateHashtableImportOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  HashtableImportOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<HashtableImportOptions> CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableImportOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct HashtableSizeOptionsT : public flatbuffers::NativeTable {
+  typedef HashtableSizeOptions TableType;
+};
+
+struct HashtableSizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef HashtableSizeOptionsT NativeTableType;
+  typedef HashtableSizeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  HashtableSizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(HashtableSizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<HashtableSizeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct HashtableSizeOptionsBuilder {
+  typedef HashtableSizeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableSizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableSizeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableSizeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableSizeOptions> CreateHashtableSizeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  HashtableSizeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<HashtableSizeOptions> CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct VarHandleOptionsT : public flatbuffers::NativeTable {
+  typedef VarHandleOptions TableType;
+  std::string container{};
+  std::string shared_name{};
+};
+
+struct VarHandleOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef VarHandleOptionsT NativeTableType;
+  typedef VarHandleOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_CONTAINER = 4,
+    VT_SHARED_NAME = 6
+  };
+  const flatbuffers::String *container() const {
+    return GetPointer<const flatbuffers::String *>(VT_CONTAINER);
+  }
+  const flatbuffers::String *shared_name() const {
+    return GetPointer<const flatbuffers::String *>(VT_SHARED_NAME);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_CONTAINER) &&
+           verifier.VerifyString(container()) &&
+           VerifyOffset(verifier, VT_SHARED_NAME) &&
+           verifier.VerifyString(shared_name()) &&
+           verifier.EndTable();
+  }
+  VarHandleOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(VarHandleOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<VarHandleOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const VarHandleOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct VarHandleOptionsBuilder {
+  typedef VarHandleOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_container(flatbuffers::Offset<flatbuffers::String> container) {
+    fbb_.AddOffset(VarHandleOptions::VT_CONTAINER, container);
+  }
+  void add_shared_name(flatbuffers::Offset<flatbuffers::String> shared_name) {
+    fbb_.AddOffset(VarHandleOptions::VT_SHARED_NAME, shared_name);
+  }
+  explicit VarHandleOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<VarHandleOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<VarHandleOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<VarHandleOptions> CreateVarHandleOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::String> container = 0,
+    flatbuffers::Offset<flatbuffers::String> shared_name = 0) {
+  VarHandleOptionsBuilder builder_(_fbb);
+  builder_.add_shared_name(shared_name);
+  builder_.add_container(container);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<VarHandleOptions> CreateVarHandleOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const char *container = nullptr,
+    const char *shared_name = nullptr) {
+  auto container__ = container ? _fbb.CreateString(container) : 0;
+  auto shared_name__ = shared_name ? _fbb.CreateString(shared_name) : 0;
+  return tflite::CreateVarHandleOptions(
+      _fbb,
+      container__,
+      shared_name__);
+}
+
+flatbuffers::Offset<VarHandleOptions> CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb, const VarHandleOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ReadVariableOptionsT : public flatbuffers::NativeTable {
+  typedef ReadVariableOptions TableType;
+};
+
+struct ReadVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ReadVariableOptionsT NativeTableType;
+  typedef ReadVariableOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  ReadVariableOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ReadVariableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ReadVariableOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReadVariableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ReadVariableOptionsBuilder {
+  typedef ReadVariableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ReadVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ReadVariableOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReadVariableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReadVariableOptions> CreateReadVariableOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  ReadVariableOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ReadVariableOptions> CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReadVariableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct AssignVariableOptionsT : public flatbuffers::NativeTable {
+  typedef AssignVariableOptions TableType;
+};
+
+struct AssignVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef AssignVariableOptionsT NativeTableType;
+  typedef AssignVariableOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  AssignVariableOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(AssignVariableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<AssignVariableOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const AssignVariableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct AssignVariableOptionsBuilder {
+  typedef AssignVariableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit AssignVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<AssignVariableOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<AssignVariableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<AssignVariableOptions> CreateAssignVariableOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  AssignVariableOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<AssignVariableOptions> CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb, const AssignVariableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct RandomOptionsT : public flatbuffers::NativeTable {
+  typedef RandomOptions TableType;
+  int64_t seed = 0;
+  int64_t seed2 = 0;
+};
+
+struct RandomOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef RandomOptionsT NativeTableType;
+  typedef RandomOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_SEED = 4,
+    VT_SEED2 = 6
+  };
+  int64_t seed() const {
+    return GetField<int64_t>(VT_SEED, 0);
+  }
+  int64_t seed2() const {
+    return GetField<int64_t>(VT_SEED2, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int64_t>(verifier, VT_SEED, 8) &&
+           VerifyField<int64_t>(verifier, VT_SEED2, 8) &&
+           verifier.EndTable();
+  }
+  RandomOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(RandomOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<RandomOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const RandomOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct RandomOptionsBuilder {
+  typedef RandomOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_seed(int64_t seed) {
+    fbb_.AddElement<int64_t>(RandomOptions::VT_SEED, seed, 0);
+  }
+  void add_seed2(int64_t seed2) {
+    fbb_.AddElement<int64_t>(RandomOptions::VT_SEED2, seed2, 0);
+  }
+  explicit RandomOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<RandomOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RandomOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RandomOptions> CreateRandomOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int64_t seed = 0,
+    int64_t seed2 = 0) {
+  RandomOptionsBuilder builder_(_fbb);
+  builder_.add_seed2(seed2);
+  builder_.add_seed(seed);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<RandomOptions> CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb, const RandomOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BucketizeOptionsT : public flatbuffers::NativeTable {
+  typedef BucketizeOptions TableType;
+  std::vector<float> boundaries{};
+};
+
+struct BucketizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef BucketizeOptionsT NativeTableType;
+  typedef BucketizeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_BOUNDARIES = 4
+  };
+  const flatbuffers::Vector<float> *boundaries() const {
+    return GetPointer<const flatbuffers::Vector<float> *>(VT_BOUNDARIES);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_BOUNDARIES) &&
+           verifier.VerifyVector(boundaries()) &&
+           verifier.EndTable();
+  }
+  BucketizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BucketizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<BucketizeOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BucketizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BucketizeOptionsBuilder {
+  typedef BucketizeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_boundaries(flatbuffers::Offset<flatbuffers::Vector<float>> boundaries) {
+    fbb_.AddOffset(BucketizeOptions::VT_BOUNDARIES, boundaries);
+  }
+  explicit BucketizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BucketizeOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BucketizeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BucketizeOptions> CreateBucketizeOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<float>> boundaries = 0) {
+  BucketizeOptionsBuilder builder_(_fbb);
+  builder_.add_boundaries(boundaries);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<BucketizeOptions> CreateBucketizeOptionsDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<float> *boundaries = nullptr) {
+  auto boundaries__ = boundaries ? _fbb.CreateVector<float>(*boundaries) : 0;
+  return tflite::CreateBucketizeOptions(
+      _fbb,
+      boundaries__);
+}
+
+flatbuffers::Offset<BucketizeOptions> CreateBucketizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const BucketizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct GeluOptionsT : public flatbuffers::NativeTable {
+  typedef GeluOptions TableType;
+  bool approximate = false;
+};
+
+struct GeluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef GeluOptionsT NativeTableType;
+  typedef GeluOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_APPROXIMATE = 4
+  };
+  bool approximate() const {
+    return GetField<uint8_t>(VT_APPROXIMATE, 0) != 0;
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint8_t>(verifier, VT_APPROXIMATE, 1) &&
+           verifier.EndTable();
+  }
+  GeluOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(GeluOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<GeluOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const GeluOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct GeluOptionsBuilder {
+  typedef GeluOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_approximate(bool approximate) {
+    fbb_.AddElement<uint8_t>(GeluOptions::VT_APPROXIMATE, static_cast<uint8_t>(approximate), 0);
+  }
+  explicit GeluOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<GeluOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<GeluOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<GeluOptions> CreateGeluOptions(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    bool approximate = false) {
+  GeluOptionsBuilder builder_(_fbb);
+  builder_.add_approximate(approximate);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<GeluOptions> CreateGeluOptions(flatbuffers::FlatBufferBuilder &_fbb, const GeluOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct DynamicUpdateSliceOptionsT : public flatbuffers::NativeTable {
+  typedef DynamicUpdateSliceOptions TableType;
+};
+
+struct DynamicUpdateSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef DynamicUpdateSliceOptionsT NativeTableType;
+  typedef DynamicUpdateSliceOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  DynamicUpdateSliceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DynamicUpdateSliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DynamicUpdateSliceOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DynamicUpdateSliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DynamicUpdateSliceOptionsBuilder {
+  typedef DynamicUpdateSliceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit DynamicUpdateSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<DynamicUpdateSliceOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DynamicUpdateSliceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DynamicUpdateSliceOptions> CreateDynamicUpdateSliceOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  DynamicUpdateSliceOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<DynamicUpdateSliceOptions> CreateDynamicUpdateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const DynamicUpdateSliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct UnsortedSegmentProdOptionsT : public flatbuffers::NativeTable {
+  typedef UnsortedSegmentProdOptions TableType;
+};
+
+struct UnsortedSegmentProdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef UnsortedSegmentProdOptionsT NativeTableType;
+  typedef UnsortedSegmentProdOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  UnsortedSegmentProdOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(UnsortedSegmentProdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<UnsortedSegmentProdOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentProdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct UnsortedSegmentProdOptionsBuilder {
+  typedef UnsortedSegmentProdOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit UnsortedSegmentProdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnsortedSegmentProdOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnsortedSegmentProdOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnsortedSegmentProdOptions> CreateUnsortedSegmentProdOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  UnsortedSegmentProdOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<UnsortedSegmentProdOptions> CreateUnsortedSegmentProdOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentProdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct UnsortedSegmentMaxOptionsT : public flatbuffers::NativeTable {
+  typedef UnsortedSegmentMaxOptions TableType;
+};
+
+struct UnsortedSegmentMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef UnsortedSegmentMaxOptionsT NativeTableType;
+  typedef UnsortedSegmentMaxOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  UnsortedSegmentMaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(UnsortedSegmentMaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<UnsortedSegmentMaxOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct UnsortedSegmentMaxOptionsBuilder {
+  typedef UnsortedSegmentMaxOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit UnsortedSegmentMaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnsortedSegmentMaxOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnsortedSegmentMaxOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnsortedSegmentMaxOptions> CreateUnsortedSegmentMaxOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  UnsortedSegmentMaxOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<UnsortedSegmentMaxOptions> CreateUnsortedSegmentMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct UnsortedSegmentSumOptionsT : public flatbuffers::NativeTable {
+  typedef UnsortedSegmentSumOptions TableType;
+};
+
+struct UnsortedSegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef UnsortedSegmentSumOptionsT NativeTableType;
+  typedef UnsortedSegmentSumOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  UnsortedSegmentSumOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(UnsortedSegmentSumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<UnsortedSegmentSumOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentSumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct UnsortedSegmentSumOptionsBuilder {
+  typedef UnsortedSegmentSumOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit UnsortedSegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnsortedSegmentSumOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnsortedSegmentSumOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnsortedSegmentSumOptions> CreateUnsortedSegmentSumOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  UnsortedSegmentSumOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<UnsortedSegmentSumOptions> CreateUnsortedSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentSumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ATan2OptionsT : public flatbuffers::NativeTable {
+  typedef ATan2Options TableType;
+};
+
+struct ATan2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ATan2OptionsT NativeTableType;
+  typedef ATan2OptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  ATan2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ATan2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<ATan2Options> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ATan2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ATan2OptionsBuilder {
+  typedef ATan2Options Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ATan2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ATan2Options> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ATan2Options>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ATan2Options> CreateATan2Options(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  ATan2OptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<ATan2Options> CreateATan2Options(flatbuffers::FlatBufferBuilder &_fbb, const ATan2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct UnsortedSegmentMinOptionsT : public flatbuffers::NativeTable {
+  typedef UnsortedSegmentMinOptions TableType;
+};
+
+struct UnsortedSegmentMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef UnsortedSegmentMinOptionsT NativeTableType;
+  typedef UnsortedSegmentMinOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  UnsortedSegmentMinOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(UnsortedSegmentMinOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<UnsortedSegmentMinOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMinOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct UnsortedSegmentMinOptionsBuilder {
+  typedef UnsortedSegmentMinOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit UnsortedSegmentMinOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<UnsortedSegmentMinOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<UnsortedSegmentMinOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<UnsortedSegmentMinOptions> CreateUnsortedSegmentMinOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  UnsortedSegmentMinOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<UnsortedSegmentMinOptions> CreateUnsortedSegmentMinOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMinOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SignOptionsT : public flatbuffers::NativeTable {
+  typedef SignOptions TableType;
+};
+
+struct SignOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SignOptionsT NativeTableType;
+  typedef SignOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           verifier.EndTable();
+  }
+  SignOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SignOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SignOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SignOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SignOptionsBuilder {
+  typedef SignOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit SignOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SignOptions> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SignOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SignOptions> CreateSignOptions(
+    flatbuffers::FlatBufferBuilder &_fbb) {
+  SignOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+flatbuffers::Offset<SignOptions> CreateSignOptions(flatbuffers::FlatBufferBuilder &_fbb, const SignOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+
+
+
+
+struct OperatorCodeBuilder {
+  typedef OperatorCode Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_deprecated_builtin_code(int8_t deprecated_builtin_code) {
+    fbb_.AddElement<int8_t>(OperatorCode::VT_DEPRECATED_BUILTIN_CODE, deprecated_builtin_code, 0);
+  }
+  void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code) {
+    fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code);
+  }
+  void add_version(int32_t version) {
+    fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1);
+  }
+  void add_builtin_code(tflite::BuiltinOperator builtin_code) {
+    fbb_.AddElement<int32_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int32_t>(builtin_code), 0);
+  }
+  explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<OperatorCode> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<OperatorCode>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<OperatorCode> CreateOperatorCode(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int8_t deprecated_builtin_code = 0,
+    flatbuffers::Offset<flatbuffers::String> custom_code = 0,
+    int32_t version = 1,
+    tflite::BuiltinOperator builtin_code = tflite::BuiltinOperator_ADD) {
+  OperatorCodeBuilder builder_(_fbb);
+  builder_.add_builtin_code(builtin_code);
+  builder_.add_version(version);
+  builder_.add_custom_code(custom_code);
+  builder_.add_deprecated_builtin_code(deprecated_builtin_code);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<OperatorCode> CreateOperatorCodeDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    int8_t deprecated_builtin_code = 0,
+    const char *custom_code = nullptr,
+    int32_t version = 1,
+    tflite::BuiltinOperator builtin_code = tflite::BuiltinOperator_ADD) {
+  auto custom_code__ = custom_code ? _fbb.CreateString(custom_code) : 0;
+  return tflite::CreateOperatorCode(
+      _fbb,
+      deprecated_builtin_code,
+      custom_code__,
+      version,
+      builtin_code);
+}
+
+flatbuffers::Offset<OperatorCode> CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct OperatorT : public flatbuffers::NativeTable {
+  typedef Operator TableType;
+  uint32_t opcode_index = 0;
+  std::vector<int32_t> inputs{};
+  std::vector<int32_t> outputs{};
+  tflite::BuiltinOptionsUnion builtin_options{};
+  std::vector<uint8_t> custom_options{};
+  tflite::CustomOptionsFormat custom_options_format = tflite::CustomOptionsFormat_FLEXBUFFERS;
+  std::vector<bool> mutating_variable_inputs{};
+  std::vector<int32_t> intermediates{};
+};
+
+struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef OperatorT NativeTableType;
+  typedef OperatorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_OPCODE_INDEX = 4,
+    VT_INPUTS = 6,
+    VT_OUTPUTS = 8,
+    VT_BUILTIN_OPTIONS_TYPE = 10,
+    VT_BUILTIN_OPTIONS = 12,
+    VT_CUSTOM_OPTIONS = 14,
+    VT_CUSTOM_OPTIONS_FORMAT = 16,
+    VT_MUTATING_VARIABLE_INPUTS = 18,
+    VT_INTERMEDIATES = 20
+  };
+  uint32_t opcode_index() const {
+    return GetField<uint32_t>(VT_OPCODE_INDEX, 0);
+  }
+  const flatbuffers::Vector<int32_t> *inputs() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
+  }
+  const flatbuffers::Vector<int32_t> *outputs() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
+  }
+  tflite::BuiltinOptions builtin_options_type() const {
+    return static_cast<tflite::BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
+  }
+  const void *builtin_options() const {
+    return GetPointer<const void *>(VT_BUILTIN_OPTIONS);
+  }
+  template<typename T> const T *builtin_options_as() const;
+  const tflite::Conv2DOptions *builtin_options_as_Conv2DOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_Conv2DOptions ? static_cast<const tflite::Conv2DOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_DepthwiseConv2DOptions ? static_cast<const tflite::DepthwiseConv2DOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ConcatEmbeddingsOptions ? static_cast<const tflite::ConcatEmbeddingsOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_LSHProjectionOptions ? static_cast<const tflite::LSHProjectionOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::Pool2DOptions *builtin_options_as_Pool2DOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_Pool2DOptions ? static_cast<const tflite::Pool2DOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SVDFOptions *builtin_options_as_SVDFOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SVDFOptions ? static_cast<const tflite::SVDFOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::RNNOptions *builtin_options_as_RNNOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_RNNOptions ? static_cast<const tflite::RNNOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_FullyConnectedOptions ? static_cast<const tflite::FullyConnectedOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SoftmaxOptions ? static_cast<const tflite::SoftmaxOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ConcatenationOptions ? static_cast<const tflite::ConcatenationOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::AddOptions *builtin_options_as_AddOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_AddOptions ? static_cast<const tflite::AddOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::L2NormOptions *builtin_options_as_L2NormOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_L2NormOptions ? static_cast<const tflite::L2NormOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::LocalResponseNormalizationOptions *builtin_options_as_LocalResponseNormalizationOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_LocalResponseNormalizationOptions ? static_cast<const tflite::LocalResponseNormalizationOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::LSTMOptions *builtin_options_as_LSTMOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_LSTMOptions ? static_cast<const tflite::LSTMOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ResizeBilinearOptions ? static_cast<const tflite::ResizeBilinearOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::CallOptions *builtin_options_as_CallOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_CallOptions ? static_cast<const tflite::CallOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ReshapeOptions *builtin_options_as_ReshapeOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ReshapeOptions ? static_cast<const tflite::ReshapeOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SkipGramOptions *builtin_options_as_SkipGramOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SkipGramOptions ? static_cast<const tflite::SkipGramOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SpaceToDepthOptions ? static_cast<const tflite::SpaceToDepthOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_EmbeddingLookupSparseOptions ? static_cast<const tflite::EmbeddingLookupSparseOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::MulOptions *builtin_options_as_MulOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_MulOptions ? static_cast<const tflite::MulOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::PadOptions *builtin_options_as_PadOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_PadOptions ? static_cast<const tflite::PadOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::GatherOptions *builtin_options_as_GatherOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_GatherOptions ? static_cast<const tflite::GatherOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_BatchToSpaceNDOptions ? static_cast<const tflite::BatchToSpaceNDOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SpaceToBatchNDOptions ? static_cast<const tflite::SpaceToBatchNDOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::TransposeOptions *builtin_options_as_TransposeOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_TransposeOptions ? static_cast<const tflite::TransposeOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ReducerOptions *builtin_options_as_ReducerOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ReducerOptions ? static_cast<const tflite::ReducerOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SubOptions *builtin_options_as_SubOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SubOptions ? static_cast<const tflite::SubOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::DivOptions *builtin_options_as_DivOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_DivOptions ? static_cast<const tflite::DivOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SqueezeOptions *builtin_options_as_SqueezeOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SqueezeOptions ? static_cast<const tflite::SqueezeOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SequenceRNNOptions ? static_cast<const tflite::SequenceRNNOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_StridedSliceOptions ? static_cast<const tflite::StridedSliceOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ExpOptions *builtin_options_as_ExpOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ExpOptions ? static_cast<const tflite::ExpOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::TopKV2Options *builtin_options_as_TopKV2Options() const {
+    return builtin_options_type() == tflite::BuiltinOptions_TopKV2Options ? static_cast<const tflite::TopKV2Options *>(builtin_options()) : nullptr;
+  }
+  const tflite::SplitOptions *builtin_options_as_SplitOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SplitOptions ? static_cast<const tflite::SplitOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_LogSoftmaxOptions ? static_cast<const tflite::LogSoftmaxOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::CastOptions *builtin_options_as_CastOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_CastOptions ? static_cast<const tflite::CastOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::DequantizeOptions *builtin_options_as_DequantizeOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_DequantizeOptions ? static_cast<const tflite::DequantizeOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_MaximumMinimumOptions ? static_cast<const tflite::MaximumMinimumOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ArgMaxOptions ? static_cast<const tflite::ArgMaxOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::LessOptions *builtin_options_as_LessOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_LessOptions ? static_cast<const tflite::LessOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::NegOptions *builtin_options_as_NegOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_NegOptions ? static_cast<const tflite::NegOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::PadV2Options *builtin_options_as_PadV2Options() const {
+    return builtin_options_type() == tflite::BuiltinOptions_PadV2Options ? static_cast<const tflite::PadV2Options *>(builtin_options()) : nullptr;
+  }
+  const tflite::GreaterOptions *builtin_options_as_GreaterOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_GreaterOptions ? static_cast<const tflite::GreaterOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_GreaterEqualOptions ? static_cast<const tflite::GreaterEqualOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::LessEqualOptions *builtin_options_as_LessEqualOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_LessEqualOptions ? static_cast<const tflite::LessEqualOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SelectOptions *builtin_options_as_SelectOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SelectOptions ? static_cast<const tflite::SelectOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SliceOptions *builtin_options_as_SliceOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SliceOptions ? static_cast<const tflite::SliceOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_TransposeConvOptions ? static_cast<const tflite::TransposeConvOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SparseToDenseOptions ? static_cast<const tflite::SparseToDenseOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::TileOptions *builtin_options_as_TileOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_TileOptions ? static_cast<const tflite::TileOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ExpandDimsOptions ? static_cast<const tflite::ExpandDimsOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::EqualOptions *builtin_options_as_EqualOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_EqualOptions ? static_cast<const tflite::EqualOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::NotEqualOptions *builtin_options_as_NotEqualOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_NotEqualOptions ? static_cast<const tflite::NotEqualOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ShapeOptions *builtin_options_as_ShapeOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ShapeOptions ? static_cast<const tflite::ShapeOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::PowOptions *builtin_options_as_PowOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_PowOptions ? static_cast<const tflite::PowOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ArgMinOptions *builtin_options_as_ArgMinOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ArgMinOptions ? static_cast<const tflite::ArgMinOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_FakeQuantOptions ? static_cast<const tflite::FakeQuantOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::PackOptions *builtin_options_as_PackOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_PackOptions ? static_cast<const tflite::PackOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_LogicalOrOptions ? static_cast<const tflite::LogicalOrOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::OneHotOptions *builtin_options_as_OneHotOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_OneHotOptions ? static_cast<const tflite::OneHotOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_LogicalAndOptions ? static_cast<const tflite::LogicalAndOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_LogicalNotOptions ? static_cast<const tflite::LogicalNotOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::UnpackOptions *builtin_options_as_UnpackOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_UnpackOptions ? static_cast<const tflite::UnpackOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::FloorDivOptions *builtin_options_as_FloorDivOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_FloorDivOptions ? static_cast<const tflite::FloorDivOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SquareOptions *builtin_options_as_SquareOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SquareOptions ? static_cast<const tflite::SquareOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ZerosLikeOptions ? static_cast<const tflite::ZerosLikeOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::FillOptions *builtin_options_as_FillOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_FillOptions ? static_cast<const tflite::FillOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::BidirectionalSequenceLSTMOptions *builtin_options_as_BidirectionalSequenceLSTMOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_BidirectionalSequenceLSTMOptions ? static_cast<const tflite::BidirectionalSequenceLSTMOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_BidirectionalSequenceRNNOptions ? static_cast<const tflite::BidirectionalSequenceRNNOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::UnidirectionalSequenceLSTMOptions *builtin_options_as_UnidirectionalSequenceLSTMOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_UnidirectionalSequenceLSTMOptions ? static_cast<const tflite::UnidirectionalSequenceLSTMOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::FloorModOptions *builtin_options_as_FloorModOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_FloorModOptions ? static_cast<const tflite::FloorModOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::RangeOptions *builtin_options_as_RangeOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_RangeOptions ? static_cast<const tflite::RangeOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ResizeNearestNeighborOptions ? static_cast<const tflite::ResizeNearestNeighborOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_LeakyReluOptions ? static_cast<const tflite::LeakyReluOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SquaredDifferenceOptions ? static_cast<const tflite::SquaredDifferenceOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_MirrorPadOptions ? static_cast<const tflite::MirrorPadOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::AbsOptions *builtin_options_as_AbsOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_AbsOptions ? static_cast<const tflite::AbsOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SplitVOptions *builtin_options_as_SplitVOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SplitVOptions ? static_cast<const tflite::SplitVOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::UniqueOptions *builtin_options_as_UniqueOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_UniqueOptions ? static_cast<const tflite::UniqueOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ReverseV2Options *builtin_options_as_ReverseV2Options() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ReverseV2Options ? static_cast<const tflite::ReverseV2Options *>(builtin_options()) : nullptr;
+  }
+  const tflite::AddNOptions *builtin_options_as_AddNOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_AddNOptions ? static_cast<const tflite::AddNOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::GatherNdOptions *builtin_options_as_GatherNdOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_GatherNdOptions ? static_cast<const tflite::GatherNdOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::CosOptions *builtin_options_as_CosOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_CosOptions ? static_cast<const tflite::CosOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::WhereOptions *builtin_options_as_WhereOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_WhereOptions ? static_cast<const tflite::WhereOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::RankOptions *builtin_options_as_RankOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_RankOptions ? static_cast<const tflite::RankOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ReverseSequenceOptions ? static_cast<const tflite::ReverseSequenceOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_MatrixDiagOptions ? static_cast<const tflite::MatrixDiagOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::QuantizeOptions *builtin_options_as_QuantizeOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_QuantizeOptions ? static_cast<const tflite::QuantizeOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_MatrixSetDiagOptions ? static_cast<const tflite::MatrixSetDiagOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::HardSwishOptions *builtin_options_as_HardSwishOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_HardSwishOptions ? static_cast<const tflite::HardSwishOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::IfOptions *builtin_options_as_IfOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_IfOptions ? static_cast<const tflite::IfOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::WhileOptions *builtin_options_as_WhileOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_WhileOptions ? static_cast<const tflite::WhileOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_DepthToSpaceOptions ? static_cast<const tflite::DepthToSpaceOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const {
+    return builtin_options_type() == tflite::BuiltinOptions_NonMaxSuppressionV4Options ? static_cast<const tflite::NonMaxSuppressionV4Options *>(builtin_options()) : nullptr;
+  }
+  const tflite::NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const {
+    return builtin_options_type() == tflite::BuiltinOptions_NonMaxSuppressionV5Options ? static_cast<const tflite::NonMaxSuppressionV5Options *>(builtin_options()) : nullptr;
+  }
+  const tflite::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ScatterNdOptions ? static_cast<const tflite::ScatterNdOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SelectV2Options *builtin_options_as_SelectV2Options() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SelectV2Options ? static_cast<const tflite::SelectV2Options *>(builtin_options()) : nullptr;
+  }
+  const tflite::DensifyOptions *builtin_options_as_DensifyOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_DensifyOptions ? static_cast<const tflite::DensifyOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SegmentSumOptions ? static_cast<const tflite::SegmentSumOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_BatchMatMulOptions ? static_cast<const tflite::BatchMatMulOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::CumsumOptions *builtin_options_as_CumsumOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_CumsumOptions ? static_cast<const tflite::CumsumOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::CallOnceOptions *builtin_options_as_CallOnceOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_CallOnceOptions ? static_cast<const tflite::CallOnceOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_BroadcastToOptions ? static_cast<const tflite::BroadcastToOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::Rfft2dOptions *builtin_options_as_Rfft2dOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_Rfft2dOptions ? static_cast<const tflite::Rfft2dOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::Conv3DOptions *builtin_options_as_Conv3DOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_Conv3DOptions ? static_cast<const tflite::Conv3DOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::HashtableOptions *builtin_options_as_HashtableOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_HashtableOptions ? static_cast<const tflite::HashtableOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::HashtableFindOptions *builtin_options_as_HashtableFindOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_HashtableFindOptions ? static_cast<const tflite::HashtableFindOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::HashtableImportOptions *builtin_options_as_HashtableImportOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_HashtableImportOptions ? static_cast<const tflite::HashtableImportOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::HashtableSizeOptions *builtin_options_as_HashtableSizeOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_HashtableSizeOptions ? static_cast<const tflite::HashtableSizeOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::VarHandleOptions *builtin_options_as_VarHandleOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_VarHandleOptions ? static_cast<const tflite::VarHandleOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ReadVariableOptions *builtin_options_as_ReadVariableOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ReadVariableOptions ? static_cast<const tflite::ReadVariableOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::AssignVariableOptions *builtin_options_as_AssignVariableOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_AssignVariableOptions ? static_cast<const tflite::AssignVariableOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::RandomOptions *builtin_options_as_RandomOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_RandomOptions ? static_cast<const tflite::RandomOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::BucketizeOptions *builtin_options_as_BucketizeOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_BucketizeOptions ? static_cast<const tflite::BucketizeOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::GeluOptions *builtin_options_as_GeluOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_GeluOptions ? static_cast<const tflite::GeluOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::DynamicUpdateSliceOptions *builtin_options_as_DynamicUpdateSliceOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_DynamicUpdateSliceOptions ? static_cast<const tflite::DynamicUpdateSliceOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::UnsortedSegmentProdOptions *builtin_options_as_UnsortedSegmentProdOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_UnsortedSegmentProdOptions ? static_cast<const tflite::UnsortedSegmentProdOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::UnsortedSegmentMaxOptions *builtin_options_as_UnsortedSegmentMaxOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_UnsortedSegmentMaxOptions ? static_cast<const tflite::UnsortedSegmentMaxOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::UnsortedSegmentMinOptions *builtin_options_as_UnsortedSegmentMinOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_UnsortedSegmentMinOptions ? static_cast<const tflite::UnsortedSegmentMinOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::UnsortedSegmentSumOptions *builtin_options_as_UnsortedSegmentSumOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_UnsortedSegmentSumOptions ? static_cast<const tflite::UnsortedSegmentSumOptions *>(builtin_options()) : nullptr;
+  }
+  const tflite::ATan2Options *builtin_options_as_ATan2Options() const {
+    return builtin_options_type() == tflite::BuiltinOptions_ATan2Options ? static_cast<const tflite::ATan2Options *>(builtin_options()) : nullptr;
+  }
+  const tflite::SignOptions *builtin_options_as_SignOptions() const {
+    return builtin_options_type() == tflite::BuiltinOptions_SignOptions ? static_cast<const tflite::SignOptions *>(builtin_options()) : nullptr;
+  }
+  const flatbuffers::Vector<uint8_t> *custom_options() const {
+    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
+  }
+  tflite::CustomOptionsFormat custom_options_format() const {
+    return static_cast<tflite::CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
+  }
+  const flatbuffers::Vector<uint8_t> *mutating_variable_inputs() const {
+    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_MUTATING_VARIABLE_INPUTS);
+  }
+  const flatbuffers::Vector<int32_t> *intermediates() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INTERMEDIATES);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint32_t>(verifier, VT_OPCODE_INDEX, 4) &&
+           VerifyOffset(verifier, VT_INPUTS) &&
+           verifier.VerifyVector(inputs()) &&
+           VerifyOffset(verifier, VT_OUTPUTS) &&
+           verifier.VerifyVector(outputs()) &&
+           VerifyField<uint8_t>(verifier, VT_BUILTIN_OPTIONS_TYPE, 1) &&
+           VerifyOffset(verifier, VT_BUILTIN_OPTIONS) &&
+           VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) &&
+           VerifyOffset(verifier, VT_CUSTOM_OPTIONS) &&
+           verifier.VerifyVector(custom_options()) &&
+           VerifyField<int8_t>(verifier, VT_CUSTOM_OPTIONS_FORMAT, 1) &&
+           VerifyOffset(verifier, VT_MUTATING_VARIABLE_INPUTS) &&
+           verifier.VerifyVector(mutating_variable_inputs()) &&
+           VerifyOffset(verifier, VT_INTERMEDIATES) &&
+           verifier.VerifyVector(intermediates()) &&
+           verifier.EndTable();
+  }
+  OperatorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(OperatorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Operator> Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+template<> inline const tflite::Conv2DOptions *Operator::builtin_options_as<tflite::Conv2DOptions>() const {
+  return builtin_options_as_Conv2DOptions();
+}
+
+template<> inline const tflite::DepthwiseConv2DOptions *Operator::builtin_options_as<tflite::DepthwiseConv2DOptions>() const {
+  return builtin_options_as_DepthwiseConv2DOptions();
+}
+
+template<> inline const tflite::ConcatEmbeddingsOptions *Operator::builtin_options_as<tflite::ConcatEmbeddingsOptions>() const {
+  return builtin_options_as_ConcatEmbeddingsOptions();
+}
+
+template<> inline const tflite::LSHProjectionOptions *Operator::builtin_options_as<tflite::LSHProjectionOptions>() const {
+  return builtin_options_as_LSHProjectionOptions();
+}
+
+template<> inline const tflite::Pool2DOptions *Operator::builtin_options_as<tflite::Pool2DOptions>() const {
+  return builtin_options_as_Pool2DOptions();
+}
+
+template<> inline const tflite::SVDFOptions *Operator::builtin_options_as<tflite::SVDFOptions>() const {
+  return builtin_options_as_SVDFOptions();
+}
+
+template<> inline const tflite::RNNOptions *Operator::builtin_options_as<tflite::RNNOptions>() const {
+  return builtin_options_as_RNNOptions();
+}
+
+template<> inline const tflite::FullyConnectedOptions *Operator::builtin_options_as<tflite::FullyConnectedOptions>() const {
+  return builtin_options_as_FullyConnectedOptions();
+}
+
+template<> inline const tflite::SoftmaxOptions *Operator::builtin_options_as<tflite::SoftmaxOptions>() const {
+  return builtin_options_as_SoftmaxOptions();
+}
+
+template<> inline const tflite::ConcatenationOptions *Operator::builtin_options_as<tflite::ConcatenationOptions>() const {
+  return builtin_options_as_ConcatenationOptions();
+}
+
+template<> inline const tflite::AddOptions *Operator::builtin_options_as<tflite::AddOptions>() const {
+  return builtin_options_as_AddOptions();
+}
+
+template<> inline const tflite::L2NormOptions *Operator::builtin_options_as<tflite::L2NormOptions>() const {
+  return builtin_options_as_L2NormOptions();
+}
+
+template<> inline const tflite::LocalResponseNormalizationOptions *Operator::builtin_options_as<tflite::LocalResponseNormalizationOptions>() const {
+  return builtin_options_as_LocalResponseNormalizationOptions();
+}
+
+template<> inline const tflite::LSTMOptions *Operator::builtin_options_as<tflite::LSTMOptions>() const {
+  return builtin_options_as_LSTMOptions();
+}
+
+template<> inline const tflite::ResizeBilinearOptions *Operator::builtin_options_as<tflite::ResizeBilinearOptions>() const {
+  return builtin_options_as_ResizeBilinearOptions();
+}
+
+template<> inline const tflite::CallOptions *Operator::builtin_options_as<tflite::CallOptions>() const {
+  return builtin_options_as_CallOptions();
+}
+
+template<> inline const tflite::ReshapeOptions *Operator::builtin_options_as<tflite::ReshapeOptions>() const {
+  return builtin_options_as_ReshapeOptions();
+}
+
+template<> inline const tflite::SkipGramOptions *Operator::builtin_options_as<tflite::SkipGramOptions>() const {
+  return builtin_options_as_SkipGramOptions();
+}
+
+template<> inline const tflite::SpaceToDepthOptions *Operator::builtin_options_as<tflite::SpaceToDepthOptions>() const {
+  return builtin_options_as_SpaceToDepthOptions();
+}
+
+template<> inline const tflite::EmbeddingLookupSparseOptions *Operator::builtin_options_as<tflite::EmbeddingLookupSparseOptions>() const {
+  return builtin_options_as_EmbeddingLookupSparseOptions();
+}
+
+template<> inline const tflite::MulOptions *Operator::builtin_options_as<tflite::MulOptions>() const {
+  return builtin_options_as_MulOptions();
+}
+
+template<> inline const tflite::PadOptions *Operator::builtin_options_as<tflite::PadOptions>() const {
+  return builtin_options_as_PadOptions();
+}
+
+template<> inline const tflite::GatherOptions *Operator::builtin_options_as<tflite::GatherOptions>() const {
+  return builtin_options_as_GatherOptions();
+}
+
+template<> inline const tflite::BatchToSpaceNDOptions *Operator::builtin_options_as<tflite::BatchToSpaceNDOptions>() const {
+  return builtin_options_as_BatchToSpaceNDOptions();
+}
+
+template<> inline const tflite::SpaceToBatchNDOptions *Operator::builtin_options_as<tflite::SpaceToBatchNDOptions>() const {
+  return builtin_options_as_SpaceToBatchNDOptions();
+}
+
+template<> inline const tflite::TransposeOptions *Operator::builtin_options_as<tflite::TransposeOptions>() const {
+  return builtin_options_as_TransposeOptions();
+}
+
+template<> inline const tflite::ReducerOptions *Operator::builtin_options_as<tflite::ReducerOptions>() const {
+  return builtin_options_as_ReducerOptions();
+}
+
+template<> inline const tflite::SubOptions *Operator::builtin_options_as<tflite::SubOptions>() const {
+  return builtin_options_as_SubOptions();
+}
+
+template<> inline const tflite::DivOptions *Operator::builtin_options_as<tflite::DivOptions>() const {
+  return builtin_options_as_DivOptions();
+}
+
+template<> inline const tflite::SqueezeOptions *Operator::builtin_options_as<tflite::SqueezeOptions>() const {
+  return builtin_options_as_SqueezeOptions();
+}
+
+template<> inline const tflite::SequenceRNNOptions *Operator::builtin_options_as<tflite::SequenceRNNOptions>() const {
+  return builtin_options_as_SequenceRNNOptions();
+}
+
+template<> inline const tflite::StridedSliceOptions *Operator::builtin_options_as<tflite::StridedSliceOptions>() const {
+  return builtin_options_as_StridedSliceOptions();
+}
+
+template<> inline const tflite::ExpOptions *Operator::builtin_options_as<tflite::ExpOptions>() const {
+  return builtin_options_as_ExpOptions();
+}
+
+template<> inline const tflite::TopKV2Options *Operator::builtin_options_as<tflite::TopKV2Options>() const {
+  return builtin_options_as_TopKV2Options();
+}
+
+template<> inline const tflite::SplitOptions *Operator::builtin_options_as<tflite::SplitOptions>() const {
+  return builtin_options_as_SplitOptions();
+}
+
+template<> inline const tflite::LogSoftmaxOptions *Operator::builtin_options_as<tflite::LogSoftmaxOptions>() const {
+  return builtin_options_as_LogSoftmaxOptions();
+}
+
+template<> inline const tflite::CastOptions *Operator::builtin_options_as<tflite::CastOptions>() const {
+  return builtin_options_as_CastOptions();
+}
+
+template<> inline const tflite::DequantizeOptions *Operator::builtin_options_as<tflite::DequantizeOptions>() const {
+  return builtin_options_as_DequantizeOptions();
+}
+
+template<> inline const tflite::MaximumMinimumOptions *Operator::builtin_options_as<tflite::MaximumMinimumOptions>() const {
+  return builtin_options_as_MaximumMinimumOptions();
+}
+
+template<> inline const tflite::ArgMaxOptions *Operator::builtin_options_as<tflite::ArgMaxOptions>() const {
+  return builtin_options_as_ArgMaxOptions();
+}
+
+template<> inline const tflite::LessOptions *Operator::builtin_options_as<tflite::LessOptions>() const {
+  return builtin_options_as_LessOptions();
+}
+
+template<> inline const tflite::NegOptions *Operator::builtin_options_as<tflite::NegOptions>() const {
+  return builtin_options_as_NegOptions();
+}
+
+template<> inline const tflite::PadV2Options *Operator::builtin_options_as<tflite::PadV2Options>() const {
+  return builtin_options_as_PadV2Options();
+}
+
+template<> inline const tflite::GreaterOptions *Operator::builtin_options_as<tflite::GreaterOptions>() const {
+  return builtin_options_as_GreaterOptions();
+}
+
+template<> inline const tflite::GreaterEqualOptions *Operator::builtin_options_as<tflite::GreaterEqualOptions>() const {
+  return builtin_options_as_GreaterEqualOptions();
+}
+
+template<> inline const tflite::LessEqualOptions *Operator::builtin_options_as<tflite::LessEqualOptions>() const {
+  return builtin_options_as_LessEqualOptions();
+}
+
+template<> inline const tflite::SelectOptions *Operator::builtin_options_as<tflite::SelectOptions>() const {
+  return builtin_options_as_SelectOptions();
+}
+
+template<> inline const tflite::SliceOptions *Operator::builtin_options_as<tflite::SliceOptions>() const {
+  return builtin_options_as_SliceOptions();
+}
+
+template<> inline const tflite::TransposeConvOptions *Operator::builtin_options_as<tflite::TransposeConvOptions>() const {
+  return builtin_options_as_TransposeConvOptions();
+}
+
+template<> inline const tflite::SparseToDenseOptions *Operator::builtin_options_as<tflite::SparseToDenseOptions>() const {
+  return builtin_options_as_SparseToDenseOptions();
+}
+
+template<> inline const tflite::TileOptions *Operator::builtin_options_as<tflite::TileOptions>() const {
+  return builtin_options_as_TileOptions();
+}
+
+template<> inline const tflite::ExpandDimsOptions *Operator::builtin_options_as<tflite::ExpandDimsOptions>() const {
+  return builtin_options_as_ExpandDimsOptions();
+}
+
+template<> inline const tflite::EqualOptions *Operator::builtin_options_as<tflite::EqualOptions>() const {
+  return builtin_options_as_EqualOptions();
+}
+
+template<> inline const tflite::NotEqualOptions *Operator::builtin_options_as<tflite::NotEqualOptions>() const {
+  return builtin_options_as_NotEqualOptions();
+}
+
+template<> inline const tflite::ShapeOptions *Operator::builtin_options_as<tflite::ShapeOptions>() const {
+  return builtin_options_as_ShapeOptions();
+}
+
+template<> inline const tflite::PowOptions *Operator::builtin_options_as<tflite::PowOptions>() const {
+  return builtin_options_as_PowOptions();
+}
+
+template<> inline const tflite::ArgMinOptions *Operator::builtin_options_as<tflite::ArgMinOptions>() const {
+  return builtin_options_as_ArgMinOptions();
+}
+
+template<> inline const tflite::FakeQuantOptions *Operator::builtin_options_as<tflite::FakeQuantOptions>() const {
+  return builtin_options_as_FakeQuantOptions();
+}
+
+template<> inline const tflite::PackOptions *Operator::builtin_options_as<tflite::PackOptions>() const {
+  return builtin_options_as_PackOptions();
+}
+
+template<> inline const tflite::LogicalOrOptions *Operator::builtin_options_as<tflite::LogicalOrOptions>() const {
+  return builtin_options_as_LogicalOrOptions();
+}
+
+template<> inline const tflite::OneHotOptions *Operator::builtin_options_as<tflite::OneHotOptions>() const {
+  return builtin_options_as_OneHotOptions();
+}
+
+template<> inline const tflite::LogicalAndOptions *Operator::builtin_options_as<tflite::LogicalAndOptions>() const {
+  return builtin_options_as_LogicalAndOptions();
+}
+
+template<> inline const tflite::LogicalNotOptions *Operator::builtin_options_as<tflite::LogicalNotOptions>() const {
+  return builtin_options_as_LogicalNotOptions();
+}
+
+template<> inline const tflite::UnpackOptions *Operator::builtin_options_as<tflite::UnpackOptions>() const {
+  return builtin_options_as_UnpackOptions();
+}
+
+template<> inline const tflite::FloorDivOptions *Operator::builtin_options_as<tflite::FloorDivOptions>() const {
+  return builtin_options_as_FloorDivOptions();
+}
+
+template<> inline const tflite::SquareOptions *Operator::builtin_options_as<tflite::SquareOptions>() const {
+  return builtin_options_as_SquareOptions();
+}
+
+template<> inline const tflite::ZerosLikeOptions *Operator::builtin_options_as<tflite::ZerosLikeOptions>() const {
+  return builtin_options_as_ZerosLikeOptions();
+}
+
+template<> inline const tflite::FillOptions *Operator::builtin_options_as<tflite::FillOptions>() const {
+  return builtin_options_as_FillOptions();
+}
+
+template<> inline const tflite::BidirectionalSequenceLSTMOptions *Operator::builtin_options_as<tflite::BidirectionalSequenceLSTMOptions>() const {
+  return builtin_options_as_BidirectionalSequenceLSTMOptions();
+}
+
+template<> inline const tflite::BidirectionalSequenceRNNOptions *Operator::builtin_options_as<tflite::BidirectionalSequenceRNNOptions>() const {
+  return builtin_options_as_BidirectionalSequenceRNNOptions();
+}
+
+template<> inline const tflite::UnidirectionalSequenceLSTMOptions *Operator::builtin_options_as<tflite::UnidirectionalSequenceLSTMOptions>() const {
+  return builtin_options_as_UnidirectionalSequenceLSTMOptions();
+}
+
+template<> inline const tflite::FloorModOptions *Operator::builtin_options_as<tflite::FloorModOptions>() const {
+  return builtin_options_as_FloorModOptions();
+}
+
+template<> inline const tflite::RangeOptions *Operator::builtin_options_as<tflite::RangeOptions>() const {
+  return builtin_options_as_RangeOptions();
+}
+
+template<> inline const tflite::ResizeNearestNeighborOptions *Operator::builtin_options_as<tflite::ResizeNearestNeighborOptions>() const {
+  return builtin_options_as_ResizeNearestNeighborOptions();
+}
+
+template<> inline const tflite::LeakyReluOptions *Operator::builtin_options_as<tflite::LeakyReluOptions>() const {
+  return builtin_options_as_LeakyReluOptions();
+}
+
+template<> inline const tflite::SquaredDifferenceOptions *Operator::builtin_options_as<tflite::SquaredDifferenceOptions>() const {
+  return builtin_options_as_SquaredDifferenceOptions();
+}
+
+template<> inline const tflite::MirrorPadOptions *Operator::builtin_options_as<tflite::MirrorPadOptions>() const {
+  return builtin_options_as_MirrorPadOptions();
+}
+
+template<> inline const tflite::AbsOptions *Operator::builtin_options_as<tflite::AbsOptions>() const {
+  return builtin_options_as_AbsOptions();
+}
+
+template<> inline const tflite::SplitVOptions *Operator::builtin_options_as<tflite::SplitVOptions>() const {
+  return builtin_options_as_SplitVOptions();
+}
+
+template<> inline const tflite::UniqueOptions *Operator::builtin_options_as<tflite::UniqueOptions>() const {
+  return builtin_options_as_UniqueOptions();
+}
+
+template<> inline const tflite::ReverseV2Options *Operator::builtin_options_as<tflite::ReverseV2Options>() const {
+  return builtin_options_as_ReverseV2Options();
+}
+
+template<> inline const tflite::AddNOptions *Operator::builtin_options_as<tflite::AddNOptions>() const {
+  return builtin_options_as_AddNOptions();
+}
+
+template<> inline const tflite::GatherNdOptions *Operator::builtin_options_as<tflite::GatherNdOptions>() const {
+  return builtin_options_as_GatherNdOptions();
+}
+
+template<> inline const tflite::CosOptions *Operator::builtin_options_as<tflite::CosOptions>() const {
+  return builtin_options_as_CosOptions();
+}
+
+template<> inline const tflite::WhereOptions *Operator::builtin_options_as<tflite::WhereOptions>() const {
+  return builtin_options_as_WhereOptions();
+}
+
+template<> inline const tflite::RankOptions *Operator::builtin_options_as<tflite::RankOptions>() const {
+  return builtin_options_as_RankOptions();
+}
+
+template<> inline const tflite::ReverseSequenceOptions *Operator::builtin_options_as<tflite::ReverseSequenceOptions>() const {
+  return builtin_options_as_ReverseSequenceOptions();
+}
+
+template<> inline const tflite::MatrixDiagOptions *Operator::builtin_options_as<tflite::MatrixDiagOptions>() const {
+  return builtin_options_as_MatrixDiagOptions();
+}
+
+template<> inline const tflite::QuantizeOptions *Operator::builtin_options_as<tflite::QuantizeOptions>() const {
+  return builtin_options_as_QuantizeOptions();
+}
+
+template<> inline const tflite::MatrixSetDiagOptions *Operator::builtin_options_as<tflite::MatrixSetDiagOptions>() const {
+  return builtin_options_as_MatrixSetDiagOptions();
+}
+
+template<> inline const tflite::HardSwishOptions *Operator::builtin_options_as<tflite::HardSwishOptions>() const {
+  return builtin_options_as_HardSwishOptions();
+}
+
+template<> inline const tflite::IfOptions *Operator::builtin_options_as<tflite::IfOptions>() const {
+  return builtin_options_as_IfOptions();
+}
+
+template<> inline const tflite::WhileOptions *Operator::builtin_options_as<tflite::WhileOptions>() const {
+  return builtin_options_as_WhileOptions();
+}
+
+template<> inline const tflite::DepthToSpaceOptions *Operator::builtin_options_as<tflite::DepthToSpaceOptions>() const {
+  return builtin_options_as_DepthToSpaceOptions();
+}
+
+template<> inline const tflite::NonMaxSuppressionV4Options *Operator::builtin_options_as<tflite::NonMaxSuppressionV4Options>() const {
+  return builtin_options_as_NonMaxSuppressionV4Options();
+}
+
+template<> inline const tflite::NonMaxSuppressionV5Options *Operator::builtin_options_as<tflite::NonMaxSuppressionV5Options>() const {
+  return builtin_options_as_NonMaxSuppressionV5Options();
+}
+
+template<> inline const tflite::ScatterNdOptions *Operator::builtin_options_as<tflite::ScatterNdOptions>() const {
+  return builtin_options_as_ScatterNdOptions();
+}
+
+template<> inline const tflite::SelectV2Options *Operator::builtin_options_as<tflite::SelectV2Options>() const {
+  return builtin_options_as_SelectV2Options();
+}
+
+template<> inline const tflite::DensifyOptions *Operator::builtin_options_as<tflite::DensifyOptions>() const {
+  return builtin_options_as_DensifyOptions();
+}
+
+template<> inline const tflite::SegmentSumOptions *Operator::builtin_options_as<tflite::SegmentSumOptions>() const {
+  return builtin_options_as_SegmentSumOptions();
+}
+
+template<> inline const tflite::BatchMatMulOptions *Operator::builtin_options_as<tflite::BatchMatMulOptions>() const {
+  return builtin_options_as_BatchMatMulOptions();
+}
+
+template<> inline const tflite::CumsumOptions *Operator::builtin_options_as<tflite::CumsumOptions>() const {
+  return builtin_options_as_CumsumOptions();
+}
+
+template<> inline const tflite::CallOnceOptions *Operator::builtin_options_as<tflite::CallOnceOptions>() const {
+  return builtin_options_as_CallOnceOptions();
+}
+
+template<> inline const tflite::BroadcastToOptions *Operator::builtin_options_as<tflite::BroadcastToOptions>() const {
+  return builtin_options_as_BroadcastToOptions();
+}
+
+template<> inline const tflite::Rfft2dOptions *Operator::builtin_options_as<tflite::Rfft2dOptions>() const {
+  return builtin_options_as_Rfft2dOptions();
+}
+
+template<> inline const tflite::Conv3DOptions *Operator::builtin_options_as<tflite::Conv3DOptions>() const {
+  return builtin_options_as_Conv3DOptions();
+}
+
+template<> inline const tflite::HashtableOptions *Operator::builtin_options_as<tflite::HashtableOptions>() const {
+  return builtin_options_as_HashtableOptions();
+}
+
+template<> inline const tflite::HashtableFindOptions *Operator::builtin_options_as<tflite::HashtableFindOptions>() const {
+  return builtin_options_as_HashtableFindOptions();
+}
+
+template<> inline const tflite::HashtableImportOptions *Operator::builtin_options_as<tflite::HashtableImportOptions>() const {
+  return builtin_options_as_HashtableImportOptions();
+}
+
+template<> inline const tflite::HashtableSizeOptions *Operator::builtin_options_as<tflite::HashtableSizeOptions>() const {
+  return builtin_options_as_HashtableSizeOptions();
+}
+
+template<> inline const tflite::VarHandleOptions *Operator::builtin_options_as<tflite::VarHandleOptions>() const {
+  return builtin_options_as_VarHandleOptions();
+}
+
+template<> inline const tflite::ReadVariableOptions *Operator::builtin_options_as<tflite::ReadVariableOptions>() const {
+  return builtin_options_as_ReadVariableOptions();
+}
+
+template<> inline const tflite::AssignVariableOptions *Operator::builtin_options_as<tflite::AssignVariableOptions>() const {
+  return builtin_options_as_AssignVariableOptions();
+}
+
+template<> inline const tflite::RandomOptions *Operator::builtin_options_as<tflite::RandomOptions>() const {
+  return builtin_options_as_RandomOptions();
+}
+
+template<> inline const tflite::BucketizeOptions *Operator::builtin_options_as<tflite::BucketizeOptions>() const {
+  return builtin_options_as_BucketizeOptions();
+}
+
+template<> inline const tflite::GeluOptions *Operator::builtin_options_as<tflite::GeluOptions>() const {
+  return builtin_options_as_GeluOptions();
+}
+
+template<> inline const tflite::DynamicUpdateSliceOptions *Operator::builtin_options_as<tflite::DynamicUpdateSliceOptions>() const {
+  return builtin_options_as_DynamicUpdateSliceOptions();
+}
+
+template<> inline const tflite::UnsortedSegmentProdOptions *Operator::builtin_options_as<tflite::UnsortedSegmentProdOptions>() const {
+  return builtin_options_as_UnsortedSegmentProdOptions();
+}
+
+template<> inline const tflite::UnsortedSegmentMaxOptions *Operator::builtin_options_as<tflite::UnsortedSegmentMaxOptions>() const {
+  return builtin_options_as_UnsortedSegmentMaxOptions();
+}
+
+template<> inline const tflite::UnsortedSegmentMinOptions *Operator::builtin_options_as<tflite::UnsortedSegmentMinOptions>() const {
+  return builtin_options_as_UnsortedSegmentMinOptions();
+}
+
+template<> inline const tflite::UnsortedSegmentSumOptions *Operator::builtin_options_as<tflite::UnsortedSegmentSumOptions>() const {
+  return builtin_options_as_UnsortedSegmentSumOptions();
+}
+
+template<> inline const tflite::ATan2Options *Operator::builtin_options_as<tflite::ATan2Options>() const {
+  return builtin_options_as_ATan2Options();
+}
+
+template<> inline const tflite::SignOptions *Operator::builtin_options_as<tflite::SignOptions>() const {
+  return builtin_options_as_SignOptions();
+}
+
+struct OperatorBuilder {
+  typedef Operator Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_opcode_index(uint32_t opcode_index) {
+    fbb_.AddElement<uint32_t>(Operator::VT_OPCODE_INDEX, opcode_index, 0);
+  }
+  void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs) {
+    fbb_.AddOffset(Operator::VT_INPUTS, inputs);
+  }
+  void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs) {
+    fbb_.AddOffset(Operator::VT_OUTPUTS, outputs);
+  }
+  void add_builtin_options_type(tflite::BuiltinOptions builtin_options_type) {
+    fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE, static_cast<uint8_t>(builtin_options_type), 0);
+  }
+  void add_builtin_options(flatbuffers::Offset<void> builtin_options) {
+    fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options);
+  }
+  void add_custom_options(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options) {
+    fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options);
+  }
+  void add_custom_options_format(tflite::CustomOptionsFormat custom_options_format) {
+    fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT, static_cast<int8_t>(custom_options_format), 0);
+  }
+  void add_mutating_variable_inputs(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs) {
+    fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs);
+  }
+  void add_intermediates(flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates) {
+    fbb_.AddOffset(Operator::VT_INTERMEDIATES, intermediates);
+  }
+  explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Operator> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Operator>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Operator> CreateOperator(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    uint32_t opcode_index = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+    tflite::BuiltinOptions builtin_options_type = tflite::BuiltinOptions_NONE,
+    flatbuffers::Offset<void> builtin_options = 0,
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
+    tflite::CustomOptionsFormat custom_options_format = tflite::CustomOptionsFormat_FLEXBUFFERS,
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0) {
+  OperatorBuilder builder_(_fbb);
+  builder_.add_intermediates(intermediates);
+  builder_.add_mutating_variable_inputs(mutating_variable_inputs);
+  builder_.add_custom_options(custom_options);
+  builder_.add_builtin_options(builtin_options);
+  builder_.add_outputs(outputs);
+  builder_.add_inputs(inputs);
+  builder_.add_opcode_index(opcode_index);
+  builder_.add_custom_options_format(custom_options_format);
+  builder_.add_builtin_options_type(builtin_options_type);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Operator> CreateOperatorDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    uint32_t opcode_index = 0,
+    const std::vector<int32_t> *inputs = nullptr,
+    const std::vector<int32_t> *outputs = nullptr,
+    tflite::BuiltinOptions builtin_options_type = tflite::BuiltinOptions_NONE,
+    flatbuffers::Offset<void> builtin_options = 0,
+    const std::vector<uint8_t> *custom_options = nullptr,
+    tflite::CustomOptionsFormat custom_options_format = tflite::CustomOptionsFormat_FLEXBUFFERS,
+    const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
+    const std::vector<int32_t> *intermediates = nullptr) {
+  auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
+  auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
+  auto custom_options__ = custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0;
+  auto mutating_variable_inputs__ = mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0;
+  auto intermediates__ = intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0;
+  return tflite::CreateOperator(
+      _fbb,
+      opcode_index,
+      inputs__,
+      outputs__,
+      builtin_options_type,
+      builtin_options,
+      custom_options__,
+      custom_options_format,
+      mutating_variable_inputs__,
+      intermediates__);
+}
+
+flatbuffers::Offset<Operator> CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SubGraphT : public flatbuffers::NativeTable {
+  typedef SubGraph TableType;
+  std::vector<std::unique_ptr<tflite::TensorT>> tensors{};
+  std::vector<int32_t> inputs{};
+  std::vector<int32_t> outputs{};
+  std::vector<std::unique_ptr<tflite::OperatorT>> operators{};
+  std::string name{};
+  SubGraphT() = default;
+  SubGraphT(const SubGraphT &o);
+  SubGraphT(SubGraphT&&) FLATBUFFERS_NOEXCEPT = default;
+  SubGraphT &operator=(SubGraphT o) FLATBUFFERS_NOEXCEPT;
+};
+
+struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SubGraphT NativeTableType;
+  typedef SubGraphBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_TENSORS = 4,
+    VT_INPUTS = 6,
+    VT_OUTPUTS = 8,
+    VT_OPERATORS = 10,
+    VT_NAME = 12
+  };
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>> *tensors() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>> *>(VT_TENSORS);
+  }
+  const flatbuffers::Vector<int32_t> *inputs() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_INPUTS);
+  }
+  const flatbuffers::Vector<int32_t> *outputs() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>> *operators() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>> *>(VT_OPERATORS);
+  }
+  const flatbuffers::String *name() const {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_TENSORS) &&
+           verifier.VerifyVector(tensors()) &&
+           verifier.VerifyVectorOfTables(tensors()) &&
+           VerifyOffset(verifier, VT_INPUTS) &&
+           verifier.VerifyVector(inputs()) &&
+           VerifyOffset(verifier, VT_OUTPUTS) &&
+           verifier.VerifyVector(outputs()) &&
+           VerifyOffset(verifier, VT_OPERATORS) &&
+           verifier.VerifyVector(operators()) &&
+           verifier.VerifyVectorOfTables(operators()) &&
+           VerifyOffset(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) &&
+           verifier.EndTable();
+  }
+  SubGraphT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SubGraph> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SubGraphBuilder {
+  typedef SubGraph Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>> tensors) {
+    fbb_.AddOffset(SubGraph::VT_TENSORS, tensors);
+  }
+  void add_inputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs) {
+    fbb_.AddOffset(SubGraph::VT_INPUTS, inputs);
+  }
+  void add_outputs(flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs) {
+    fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
+  }
+  void add_operators(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>> operators) {
+    fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
+  }
+  void add_name(flatbuffers::Offset<flatbuffers::String> name) {
+    fbb_.AddOffset(SubGraph::VT_NAME, name);
+  }
+  explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SubGraph> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SubGraph>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SubGraph> CreateSubGraph(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>> tensors = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>> operators = 0,
+    flatbuffers::Offset<flatbuffers::String> name = 0) {
+  SubGraphBuilder builder_(_fbb);
+  builder_.add_name(name);
+  builder_.add_operators(operators);
+  builder_.add_outputs(outputs);
+  builder_.add_inputs(inputs);
+  builder_.add_tensors(tensors);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<flatbuffers::Offset<tflite::Tensor>> *tensors = nullptr,
+    const std::vector<int32_t> *inputs = nullptr,
+    const std::vector<int32_t> *outputs = nullptr,
+    const std::vector<flatbuffers::Offset<tflite::Operator>> *operators = nullptr,
+    const char *name = nullptr) {
+  auto tensors__ = tensors ? _fbb.CreateVector<flatbuffers::Offset<tflite::Tensor>>(*tensors) : 0;
+  auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
+  auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
+  auto operators__ = operators ? _fbb.CreateVector<flatbuffers::Offset<tflite::Operator>>(*operators) : 0;
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return tflite::CreateSubGraph(
+      _fbb,
+      tensors__,
+      inputs__,
+      outputs__,
+      operators__,
+      name__);
+}
+
+flatbuffers::Offset<SubGraph> CreateSubGraph(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct BufferT : public flatbuffers::NativeTable {
+  typedef Buffer TableType;
+  std::vector<uint8_t> data{};
+};
+
+struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef BufferT NativeTableType;
+  typedef BufferBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_DATA = 4
+  };
+  const flatbuffers::Vector<uint8_t> *data() const {
+    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_DATA);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_DATA) &&
+           verifier.VerifyVector(data()) &&
+           verifier.EndTable();
+  }
+  BufferT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Buffer> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BufferT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct BufferBuilder {
+  typedef Buffer Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data) {
+    fbb_.AddOffset(Buffer::VT_DATA, data);
+  }
+  explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Buffer> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Buffer>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Buffer> CreateBuffer(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data = 0) {
+  BufferBuilder builder_(_fbb);
+  builder_.add_data(data);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Buffer> CreateBufferDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<uint8_t> *data = nullptr) {
+  if (data) { _fbb.ForceVectorAlignment(data->size(), sizeof(uint8_t), 16); }
+  auto data__ = data ? _fbb.CreateVector<uint8_t>(*data) : 0;
+  return tflite::CreateBuffer(
+      _fbb,
+      data__);
+}
+
+flatbuffers::Offset<Buffer> CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct MetadataT : public flatbuffers::NativeTable {
+  typedef Metadata TableType;
+  std::string name{};
+  uint32_t buffer = 0;
+};
+
+struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef MetadataT NativeTableType;
+  typedef MetadataBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_NAME = 4,
+    VT_BUFFER = 6
+  };
+  const flatbuffers::String *name() const {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  uint32_t buffer() const {
+    return GetField<uint32_t>(VT_BUFFER, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) &&
+           VerifyField<uint32_t>(verifier, VT_BUFFER, 4) &&
+           verifier.EndTable();
+  }
+  MetadataT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(MetadataT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Metadata> Pack(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct MetadataBuilder {
+  typedef Metadata Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_name(flatbuffers::Offset<flatbuffers::String> name) {
+    fbb_.AddOffset(Metadata::VT_NAME, name);
+  }
+  void add_buffer(uint32_t buffer) {
+    fbb_.AddElement<uint32_t>(Metadata::VT_BUFFER, buffer, 0);
+  }
+  explicit MetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Metadata> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Metadata>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Metadata> CreateMetadata(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::String> name = 0,
+    uint32_t buffer = 0) {
+  MetadataBuilder builder_(_fbb);
+  builder_.add_buffer(buffer);
+  builder_.add_name(name);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Metadata> CreateMetadataDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const char *name = nullptr,
+    uint32_t buffer = 0) {
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return tflite::CreateMetadata(
+      _fbb,
+      name__,
+      buffer);
+}
+
+flatbuffers::Offset<Metadata> CreateMetadata(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct TensorMapT : public flatbuffers::NativeTable {
+  typedef TensorMap TableType;
+  std::string name{};
+  uint32_t tensor_index = 0;
+};
+
+struct TensorMap FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef TensorMapT NativeTableType;
+  typedef TensorMapBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_NAME = 4,
+    VT_TENSOR_INDEX = 6
+  };
+  const flatbuffers::String *name() const {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  uint32_t tensor_index() const {
+    return GetField<uint32_t>(VT_TENSOR_INDEX, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) &&
+           VerifyField<uint32_t>(verifier, VT_TENSOR_INDEX, 4) &&
+           verifier.EndTable();
+  }
+  TensorMapT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(TensorMapT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<TensorMap> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct TensorMapBuilder {
+  typedef TensorMap Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_name(flatbuffers::Offset<flatbuffers::String> name) {
+    fbb_.AddOffset(TensorMap::VT_NAME, name);
+  }
+  void add_tensor_index(uint32_t tensor_index) {
+    fbb_.AddElement<uint32_t>(TensorMap::VT_TENSOR_INDEX, tensor_index, 0);
+  }
+  explicit TensorMapBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<TensorMap> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TensorMap>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TensorMap> CreateTensorMap(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::String> name = 0,
+    uint32_t tensor_index = 0) {
+  TensorMapBuilder builder_(_fbb);
+  builder_.add_tensor_index(tensor_index);
+  builder_.add_name(name);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<TensorMap> CreateTensorMapDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const char *name = nullptr,
+    uint32_t tensor_index = 0) {
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return tflite::CreateTensorMap(
+      _fbb,
+      name__,
+      tensor_index);
+}
+
+flatbuffers::Offset<TensorMap> CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SignatureDefT : public flatbuffers::NativeTable {
+  typedef SignatureDef TableType;
+  std::vector<std::unique_ptr<tflite::TensorMapT>> inputs{};
+  std::vector<std::unique_ptr<tflite::TensorMapT>> outputs{};
+  std::string signature_key{};
+  uint32_t subgraph_index = 0;
+  SignatureDefT() = default;
+  SignatureDefT(const SignatureDefT &o);
+  SignatureDefT(SignatureDefT&&) FLATBUFFERS_NOEXCEPT = default;
+  SignatureDefT &operator=(SignatureDefT o) FLATBUFFERS_NOEXCEPT;
+};
+
+struct SignatureDef FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SignatureDefT NativeTableType;
+  typedef SignatureDefBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_INPUTS = 4,
+    VT_OUTPUTS = 6,
+    VT_SIGNATURE_KEY = 8,
+    VT_SUBGRAPH_INDEX = 12
+  };
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>> *inputs() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>> *>(VT_INPUTS);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>> *outputs() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>> *>(VT_OUTPUTS);
+  }
+  const flatbuffers::String *signature_key() const {
+    return GetPointer<const flatbuffers::String *>(VT_SIGNATURE_KEY);
+  }
+  uint32_t subgraph_index() const {
+    return GetField<uint32_t>(VT_SUBGRAPH_INDEX, 0);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_INPUTS) &&
+           verifier.VerifyVector(inputs()) &&
+           verifier.VerifyVectorOfTables(inputs()) &&
+           VerifyOffset(verifier, VT_OUTPUTS) &&
+           verifier.VerifyVector(outputs()) &&
+           verifier.VerifyVectorOfTables(outputs()) &&
+           VerifyOffset(verifier, VT_SIGNATURE_KEY) &&
+           verifier.VerifyString(signature_key()) &&
+           VerifyField<uint32_t>(verifier, VT_SUBGRAPH_INDEX, 4) &&
+           verifier.EndTable();
+  }
+  SignatureDefT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SignatureDefT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SignatureDef> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SignatureDefBuilder {
+  typedef SignatureDef Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_inputs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>>> inputs) {
+    fbb_.AddOffset(SignatureDef::VT_INPUTS, inputs);
+  }
+  void add_outputs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>>> outputs) {
+    fbb_.AddOffset(SignatureDef::VT_OUTPUTS, outputs);
+  }
+  void add_signature_key(flatbuffers::Offset<flatbuffers::String> signature_key) {
+    fbb_.AddOffset(SignatureDef::VT_SIGNATURE_KEY, signature_key);
+  }
+  void add_subgraph_index(uint32_t subgraph_index) {
+    fbb_.AddElement<uint32_t>(SignatureDef::VT_SUBGRAPH_INDEX, subgraph_index, 0);
+  }
+  explicit SignatureDefBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SignatureDef> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SignatureDef>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDef(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>>> inputs = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::TensorMap>>> outputs = 0,
+    flatbuffers::Offset<flatbuffers::String> signature_key = 0,
+    uint32_t subgraph_index = 0) {
+  SignatureDefBuilder builder_(_fbb);
+  builder_.add_subgraph_index(subgraph_index);
+  builder_.add_signature_key(signature_key);
+  builder_.add_outputs(outputs);
+  builder_.add_inputs(inputs);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDefDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<flatbuffers::Offset<tflite::TensorMap>> *inputs = nullptr,
+    const std::vector<flatbuffers::Offset<tflite::TensorMap>> *outputs = nullptr,
+    const char *signature_key = nullptr,
+    uint32_t subgraph_index = 0) {
+  auto inputs__ = inputs ? _fbb.CreateVector<flatbuffers::Offset<tflite::TensorMap>>(*inputs) : 0;
+  auto outputs__ = outputs ? _fbb.CreateVector<flatbuffers::Offset<tflite::TensorMap>>(*outputs) : 0;
+  auto signature_key__ = signature_key ? _fbb.CreateString(signature_key) : 0;
+  return tflite::CreateSignatureDef(
+      _fbb,
+      inputs__,
+      outputs__,
+      signature_key__,
+      subgraph_index);
+}
+
+flatbuffers::Offset<SignatureDef> CreateSignatureDef(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct ModelT : public flatbuffers::NativeTable {
+  typedef Model TableType;
+  uint32_t version = 0;
+  std::vector<std::unique_ptr<tflite::OperatorCodeT>> operator_codes{};
+  std::vector<std::unique_ptr<tflite::SubGraphT>> subgraphs{};
+  std::string description{};
+  std::vector<std::unique_ptr<tflite::BufferT>> buffers{};
+  std::vector<int32_t> metadata_buffer{};
+  std::vector<std::unique_ptr<tflite::MetadataT>> metadata{};
+  std::vector<std::unique_ptr<tflite::SignatureDefT>> signature_defs{};
+  ModelT() = default;
+  ModelT(const ModelT &o);
+  ModelT(ModelT&&) FLATBUFFERS_NOEXCEPT = default;
+  ModelT &operator=(ModelT o) FLATBUFFERS_NOEXCEPT;
+};
+
+struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef ModelT NativeTableType;
+  typedef ModelBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_VERSION = 4,
+    VT_OPERATOR_CODES = 6,
+    VT_SUBGRAPHS = 8,
+    VT_DESCRIPTION = 10,
+    VT_BUFFERS = 12,
+    VT_METADATA_BUFFER = 14,
+    VT_METADATA = 16,
+    VT_SIGNATURE_DEFS = 18
+  };
+  uint32_t version() const {
+    return GetField<uint32_t>(VT_VERSION, 0);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>> *operator_codes() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>> *>(VT_OPERATOR_CODES);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::SubGraph>> *subgraphs() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::SubGraph>> *>(VT_SUBGRAPHS);
+  }
+  const flatbuffers::String *description() const {
+    return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>> *buffers() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>> *>(VT_BUFFERS);
+  }
+  const flatbuffers::Vector<int32_t> *metadata_buffer() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_METADATA_BUFFER);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::Metadata>> *metadata() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::Metadata>> *>(VT_METADATA);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<tflite::SignatureDef>> *signature_defs() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<tflite::SignatureDef>> *>(VT_SIGNATURE_DEFS);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<uint32_t>(verifier, VT_VERSION, 4) &&
+           VerifyOffset(verifier, VT_OPERATOR_CODES) &&
+           verifier.VerifyVector(operator_codes()) &&
+           verifier.VerifyVectorOfTables(operator_codes()) &&
+           VerifyOffset(verifier, VT_SUBGRAPHS) &&
+           verifier.VerifyVector(subgraphs()) &&
+           verifier.VerifyVectorOfTables(subgraphs()) &&
+           VerifyOffset(verifier, VT_DESCRIPTION) &&
+           verifier.VerifyString(description()) &&
+           VerifyOffset(verifier, VT_BUFFERS) &&
+           verifier.VerifyVector(buffers()) &&
+           verifier.VerifyVectorOfTables(buffers()) &&
+           VerifyOffset(verifier, VT_METADATA_BUFFER) &&
+           verifier.VerifyVector(metadata_buffer()) &&
+           VerifyOffset(verifier, VT_METADATA) &&
+           verifier.VerifyVector(metadata()) &&
+           verifier.VerifyVectorOfTables(metadata()) &&
+           VerifyOffset(verifier, VT_SIGNATURE_DEFS) &&
+           verifier.VerifyVector(signature_defs()) &&
+           verifier.VerifyVectorOfTables(signature_defs()) &&
+           verifier.EndTable();
+  }
+  ModelT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<Model> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct ModelBuilder {
+  typedef Model Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_version(uint32_t version) {
+    fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0);
+  }
+  void add_operator_codes(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>>> operator_codes) {
+    fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
+  }
+  void add_subgraphs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::SubGraph>>> subgraphs) {
+    fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
+  }
+  void add_description(flatbuffers::Offset<flatbuffers::String> description) {
+    fbb_.AddOffset(Model::VT_DESCRIPTION, description);
+  }
+  void add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>> buffers) {
+    fbb_.AddOffset(Model::VT_BUFFERS, buffers);
+  }
+  void add_metadata_buffer(flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer) {
+    fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer);
+  }
+  void add_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Metadata>>> metadata) {
+    fbb_.AddOffset(Model::VT_METADATA, metadata);
+  }
+  void add_signature_defs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::SignatureDef>>> signature_defs) {
+    fbb_.AddOffset(Model::VT_SIGNATURE_DEFS, signature_defs);
+  }
+  explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Model> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Model>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Model> CreateModel(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    uint32_t version = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::OperatorCode>>> operator_codes = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::SubGraph>>> subgraphs = 0,
+    flatbuffers::Offset<flatbuffers::String> description = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>> buffers = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::Metadata>>> metadata = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<tflite::SignatureDef>>> signature_defs = 0) {
+  ModelBuilder builder_(_fbb);
+  builder_.add_signature_defs(signature_defs);
+  builder_.add_metadata(metadata);
+  builder_.add_metadata_buffer(metadata_buffer);
+  builder_.add_buffers(buffers);
+  builder_.add_description(description);
+  builder_.add_subgraphs(subgraphs);
+  builder_.add_operator_codes(operator_codes);
+  builder_.add_version(version);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<Model> CreateModelDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    uint32_t version = 0,
+    const std::vector<flatbuffers::Offset<tflite::OperatorCode>> *operator_codes = nullptr,
+    const std::vector<flatbuffers::Offset<tflite::SubGraph>> *subgraphs = nullptr,
+    const char *description = nullptr,
+    const std::vector<flatbuffers::Offset<tflite::Buffer>> *buffers = nullptr,
+    const std::vector<int32_t> *metadata_buffer = nullptr,
+    const std::vector<flatbuffers::Offset<tflite::Metadata>> *metadata = nullptr,
+    const std::vector<flatbuffers::Offset<tflite::SignatureDef>> *signature_defs = nullptr) {
+  auto operator_codes__ = operator_codes ? _fbb.CreateVector<flatbuffers::Offset<tflite::OperatorCode>>(*operator_codes) : 0;
+  auto subgraphs__ = subgraphs ? _fbb.CreateVector<flatbuffers::Offset<tflite::SubGraph>>(*subgraphs) : 0;
+  auto description__ = description ? _fbb.CreateString(description) : 0;
+  auto buffers__ = buffers ? _fbb.CreateVector<flatbuffers::Offset<tflite::Buffer>>(*buffers) : 0;
+  auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0;
+  auto metadata__ = metadata ? _fbb.CreateVector<flatbuffers::Offset<tflite::Metadata>>(*metadata) : 0;
+  auto signature_defs__ = signature_defs ? _fbb.CreateVector<flatbuffers::Offset<tflite::SignatureDef>>(*signature_defs) : 0;
+  return tflite::CreateModel(
+      _fbb,
+      version,
+      operator_codes__,
+      subgraphs__,
+      description__,
+      buffers__,
+      metadata_buffer__,
+      metadata__,
+      signature_defs__);
+}
+
+flatbuffers::Offset<Model> CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+inline CustomQuantizationT *CustomQuantization::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<CustomQuantizationT>(new CustomQuantizationT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void CustomQuantization::UnPackTo(CustomQuantizationT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = custom(); if (_e) { _o->custom.resize(_e->size()); std::copy(_e->begin(), _e->end(), _o->custom.begin()); } }
+}
+
+inline flatbuffers::Offset<CustomQuantization> CustomQuantization::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateCustomQuantization(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CustomQuantization> CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CustomQuantizationT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  _fbb.ForceVectorAlignment(_o->custom.size(), sizeof(uint8_t), 16);
+  auto _custom = _o->custom.size() ? _fbb.CreateVector(_o->custom) : 0;
+  return tflite::CreateCustomQuantization(
+      _fbb,
+      _custom);
+}
+
+inline QuantizationParametersT *QuantizationParameters::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<QuantizationParametersT>(new QuantizationParametersT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void QuantizationParameters::UnPackTo(QuantizationParametersT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = min(); if (_e) { _o->min.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->min[_i] = _e->Get(_i); } } }
+  { auto _e = max(); if (_e) { _o->max.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->max[_i] = _e->Get(_i); } } }
+  { auto _e = scale(); if (_e) { _o->scale.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->scale[_i] = _e->Get(_i); } } }
+  { auto _e = zero_point(); if (_e) { _o->zero_point.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->zero_point[_i] = _e->Get(_i); } } }
+  { auto _e = details_type(); _o->details.type = _e; }
+  { auto _e = details(); if (_e) _o->details.value = tflite::QuantizationDetailsUnion::UnPack(_e, details_type(), _resolver); }
+  { auto _e = quantized_dimension(); _o->quantized_dimension = _e; }
+}
+
+inline flatbuffers::Offset<QuantizationParameters> QuantizationParameters::Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateQuantizationParameters(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const QuantizationParametersT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _min = _o->min.size() ? _fbb.CreateVector(_o->min) : 0;
+  auto _max = _o->max.size() ? _fbb.CreateVector(_o->max) : 0;
+  auto _scale = _o->scale.size() ? _fbb.CreateVector(_o->scale) : 0;
+  auto _zero_point = _o->zero_point.size() ? _fbb.CreateVector(_o->zero_point) : 0;
+  auto _details_type = _o->details.type;
+  auto _details = _o->details.Pack(_fbb);
+  auto _quantized_dimension = _o->quantized_dimension;
+  return tflite::CreateQuantizationParameters(
+      _fbb,
+      _min,
+      _max,
+      _scale,
+      _zero_point,
+      _details_type,
+      _details,
+      _quantized_dimension);
+}
+
+inline Int32VectorT *Int32Vector::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<Int32VectorT>(new Int32VectorT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Int32Vector::UnPackTo(Int32VectorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = values(); if (_e) { _o->values.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->values[_i] = _e->Get(_i); } } }
+}
+
+inline flatbuffers::Offset<Int32Vector> Int32Vector::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateInt32Vector(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Int32Vector> CreateInt32Vector(flatbuffers::FlatBufferBuilder &_fbb, const Int32VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Int32VectorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _values = _o->values.size() ? _fbb.CreateVector(_o->values) : 0;
+  return tflite::CreateInt32Vector(
+      _fbb,
+      _values);
+}
+
+inline Uint16VectorT *Uint16Vector::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<Uint16VectorT>(new Uint16VectorT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Uint16Vector::UnPackTo(Uint16VectorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = values(); if (_e) { _o->values.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->values[_i] = _e->Get(_i); } } }
+}
+
+inline flatbuffers::Offset<Uint16Vector> Uint16Vector::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateUint16Vector(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Uint16Vector> CreateUint16Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint16VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Uint16VectorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  _fbb.ForceVectorAlignment(_o->values.size(), sizeof(uint16_t), 4);
+  auto _values = _o->values.size() ? _fbb.CreateVector(_o->values) : 0;
+  return tflite::CreateUint16Vector(
+      _fbb,
+      _values);
+}
+
+inline Uint8VectorT *Uint8Vector::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<Uint8VectorT>(new Uint8VectorT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Uint8Vector::UnPackTo(Uint8VectorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = values(); if (_e) { _o->values.resize(_e->size()); std::copy(_e->begin(), _e->end(), _o->values.begin()); } }
+}
+
+inline flatbuffers::Offset<Uint8Vector> Uint8Vector::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateUint8Vector(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Uint8Vector> CreateUint8Vector(flatbuffers::FlatBufferBuilder &_fbb, const Uint8VectorT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Uint8VectorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  _fbb.ForceVectorAlignment(_o->values.size(), sizeof(uint8_t), 4);
+  auto _values = _o->values.size() ? _fbb.CreateVector(_o->values) : 0;
+  return tflite::CreateUint8Vector(
+      _fbb,
+      _values);
+}
+
+inline DimensionMetadataT *DimensionMetadata::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<DimensionMetadataT>(new DimensionMetadataT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void DimensionMetadata::UnPackTo(DimensionMetadataT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = format(); _o->format = _e; }
+  { auto _e = dense_size(); _o->dense_size = _e; }
+  { auto _e = array_segments_type(); _o->array_segments.type = _e; }
+  { auto _e = array_segments(); if (_e) _o->array_segments.value = tflite::SparseIndexVectorUnion::UnPack(_e, array_segments_type(), _resolver); }
+  { auto _e = array_indices_type(); _o->array_indices.type = _e; }
+  { auto _e = array_indices(); if (_e) _o->array_indices.value = tflite::SparseIndexVectorUnion::UnPack(_e, array_indices_type(), _resolver); }
+}
+
+inline flatbuffers::Offset<DimensionMetadata> DimensionMetadata::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateDimensionMetadata(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DimensionMetadataT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _format = _o->format;
+  auto _dense_size = _o->dense_size;
+  auto _array_segments_type = _o->array_segments.type;
+  auto _array_segments = _o->array_segments.Pack(_fbb);
+  auto _array_indices_type = _o->array_indices.type;
+  auto _array_indices = _o->array_indices.Pack(_fbb);
+  return tflite::CreateDimensionMetadata(
+      _fbb,
+      _format,
+      _dense_size,
+      _array_segments_type,
+      _array_segments,
+      _array_indices_type,
+      _array_indices);
+}
+
+inline SparsityParametersT::SparsityParametersT(const SparsityParametersT &o)
+      : traversal_order(o.traversal_order),
+        block_map(o.block_map) {
+  dim_metadata.reserve(o.dim_metadata.size());
+  for (const auto &dim_metadata_ : o.dim_metadata) { dim_metadata.emplace_back((dim_metadata_) ? new tflite::DimensionMetadataT(*dim_metadata_) : nullptr); }
+}
+
+inline SparsityParametersT &SparsityParametersT::operator=(SparsityParametersT o) FLATBUFFERS_NOEXCEPT {
+  std::swap(traversal_order, o.traversal_order);
+  std::swap(block_map, o.block_map);
+  std::swap(dim_metadata, o.dim_metadata);
+  return *this;
+}
+
+inline SparsityParametersT *SparsityParameters::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SparsityParametersT>(new SparsityParametersT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SparsityParameters::UnPackTo(SparsityParametersT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = traversal_order(); if (_e) { _o->traversal_order.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->traversal_order[_i] = _e->Get(_i); } } }
+  { auto _e = block_map(); if (_e) { _o->block_map.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->block_map[_i] = _e->Get(_i); } } }
+  { auto _e = dim_metadata(); if (_e) { _o->dim_metadata.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->dim_metadata[_i]) { _e->Get(_i)->UnPackTo(_o->dim_metadata[_i].get(), _resolver); } else { _o->dim_metadata[_i] = std::unique_ptr<tflite::DimensionMetadataT>(_e->Get(_i)->UnPack(_resolver)); }; } } }
+}
+
+inline flatbuffers::Offset<SparsityParameters> SparsityParameters::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSparsityParameters(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SparsityParametersT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _traversal_order = _o->traversal_order.size() ? _fbb.CreateVector(_o->traversal_order) : 0;
+  auto _block_map = _o->block_map.size() ? _fbb.CreateVector(_o->block_map) : 0;
+  auto _dim_metadata = _o->dim_metadata.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::DimensionMetadata>> (_o->dim_metadata.size(), [](size_t i, _VectorArgs *__va) { return CreateDimensionMetadata(*__va->__fbb, __va->__o->dim_metadata[i].get(), __va->__rehasher); }, &_va ) : 0;
+  return tflite::CreateSparsityParameters(
+      _fbb,
+      _traversal_order,
+      _block_map,
+      _dim_metadata);
+}
+
+inline VariantSubTypeT *VariantSubType::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<VariantSubTypeT>(new VariantSubTypeT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void VariantSubType::UnPackTo(VariantSubTypeT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = shape(); if (_e) { _o->shape.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->shape[_i] = _e->Get(_i); } } }
+  { auto _e = type(); _o->type = _e; }
+  { auto _e = has_rank(); _o->has_rank = _e; }
+}
+
+inline flatbuffers::Offset<VariantSubType> VariantSubType::Pack(flatbuffers::FlatBufferBuilder &_fbb, const VariantSubTypeT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateVariantSubType(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<VariantSubType> CreateVariantSubType(flatbuffers::FlatBufferBuilder &_fbb, const VariantSubTypeT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const VariantSubTypeT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _shape = _o->shape.size() ? _fbb.CreateVector(_o->shape) : 0;
+  auto _type = _o->type;
+  auto _has_rank = _o->has_rank;
+  return tflite::CreateVariantSubType(
+      _fbb,
+      _shape,
+      _type,
+      _has_rank);
+}
+
+inline TensorT::TensorT(const TensorT &o)
+      : shape(o.shape),
+        type(o.type),
+        buffer(o.buffer),
+        name(o.name),
+        quantization((o.quantization) ? new tflite::QuantizationParametersT(*o.quantization) : nullptr),
+        is_variable(o.is_variable),
+        sparsity((o.sparsity) ? new tflite::SparsityParametersT(*o.sparsity) : nullptr),
+        shape_signature(o.shape_signature),
+        has_rank(o.has_rank) {
+  variant_tensors.reserve(o.variant_tensors.size());
+  for (const auto &variant_tensors_ : o.variant_tensors) { variant_tensors.emplace_back((variant_tensors_) ? new tflite::VariantSubTypeT(*variant_tensors_) : nullptr); }
+}
+
+inline TensorT &TensorT::operator=(TensorT o) FLATBUFFERS_NOEXCEPT {
+  std::swap(shape, o.shape);
+  std::swap(type, o.type);
+  std::swap(buffer, o.buffer);
+  std::swap(name, o.name);
+  std::swap(quantization, o.quantization);
+  std::swap(is_variable, o.is_variable);
+  std::swap(sparsity, o.sparsity);
+  std::swap(shape_signature, o.shape_signature);
+  std::swap(has_rank, o.has_rank);
+  std::swap(variant_tensors, o.variant_tensors);
+  return *this;
+}
+
+inline TensorT *Tensor::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<TensorT>(new TensorT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Tensor::UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = shape(); if (_e) { _o->shape.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->shape[_i] = _e->Get(_i); } } }
+  { auto _e = type(); _o->type = _e; }
+  { auto _e = buffer(); _o->buffer = _e; }
+  { auto _e = name(); if (_e) _o->name = _e->str(); }
+  { auto _e = quantization(); if (_e) { if(_o->quantization) { _e->UnPackTo(_o->quantization.get(), _resolver); } else { _o->quantization = std::unique_ptr<tflite::QuantizationParametersT>(_e->UnPack(_resolver)); } } }
+  { auto _e = is_variable(); _o->is_variable = _e; }
+  { auto _e = sparsity(); if (_e) { if(_o->sparsity) { _e->UnPackTo(_o->sparsity.get(), _resolver); } else { _o->sparsity = std::unique_ptr<tflite::SparsityParametersT>(_e->UnPack(_resolver)); } } }
+  { auto _e = shape_signature(); if (_e) { _o->shape_signature.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->shape_signature[_i] = _e->Get(_i); } } }
+  { auto _e = has_rank(); _o->has_rank = _e; }
+  { auto _e = variant_tensors(); if (_e) { _o->variant_tensors.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->variant_tensors[_i]) { _e->Get(_i)->UnPackTo(_o->variant_tensors[_i].get(), _resolver); } else { _o->variant_tensors[_i] = std::unique_ptr<tflite::VariantSubTypeT>(_e->Get(_i)->UnPack(_resolver)); }; } } }
+}
+
+inline flatbuffers::Offset<Tensor> Tensor::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateTensor(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Tensor> CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TensorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _shape = _o->shape.size() ? _fbb.CreateVector(_o->shape) : 0;
+  auto _type = _o->type;
+  auto _buffer = _o->buffer;
+  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
+  auto _quantization = _o->quantization ? CreateQuantizationParameters(_fbb, _o->quantization.get(), _rehasher) : 0;
+  auto _is_variable = _o->is_variable;
+  auto _sparsity = _o->sparsity ? CreateSparsityParameters(_fbb, _o->sparsity.get(), _rehasher) : 0;
+  auto _shape_signature = _o->shape_signature.size() ? _fbb.CreateVector(_o->shape_signature) : 0;
+  auto _has_rank = _o->has_rank;
+  auto _variant_tensors = _o->variant_tensors.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::VariantSubType>> (_o->variant_tensors.size(), [](size_t i, _VectorArgs *__va) { return CreateVariantSubType(*__va->__fbb, __va->__o->variant_tensors[i].get(), __va->__rehasher); }, &_va ) : 0;
+  return tflite::CreateTensor(
+      _fbb,
+      _shape,
+      _type,
+      _buffer,
+      _name,
+      _quantization,
+      _is_variable,
+      _sparsity,
+      _shape_signature,
+      _has_rank,
+      _variant_tensors);
+}
+
+inline Conv2DOptionsT *Conv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<Conv2DOptionsT>(new Conv2DOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Conv2DOptions::UnPackTo(Conv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = padding(); _o->padding = _e; }
+  { auto _e = stride_w(); _o->stride_w = _e; }
+  { auto _e = stride_h(); _o->stride_h = _e; }
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+  { auto _e = dilation_w_factor(); _o->dilation_w_factor = _e; }
+  { auto _e = dilation_h_factor(); _o->dilation_h_factor = _e; }
+}
+
+inline flatbuffers::Offset<Conv2DOptions> Conv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateConv2DOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Conv2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _padding = _o->padding;
+  auto _stride_w = _o->stride_w;
+  auto _stride_h = _o->stride_h;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _dilation_w_factor = _o->dilation_w_factor;
+  auto _dilation_h_factor = _o->dilation_h_factor;
+  return tflite::CreateConv2DOptions(
+      _fbb,
+      _padding,
+      _stride_w,
+      _stride_h,
+      _fused_activation_function,
+      _dilation_w_factor,
+      _dilation_h_factor);
+}
+
+inline Conv3DOptionsT *Conv3DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<Conv3DOptionsT>(new Conv3DOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Conv3DOptions::UnPackTo(Conv3DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = padding(); _o->padding = _e; }
+  { auto _e = stride_d(); _o->stride_d = _e; }
+  { auto _e = stride_w(); _o->stride_w = _e; }
+  { auto _e = stride_h(); _o->stride_h = _e; }
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+  { auto _e = dilation_d_factor(); _o->dilation_d_factor = _e; }
+  { auto _e = dilation_w_factor(); _o->dilation_w_factor = _e; }
+  { auto _e = dilation_h_factor(); _o->dilation_h_factor = _e; }
+}
+
+inline flatbuffers::Offset<Conv3DOptions> Conv3DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateConv3DOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv3DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Conv3DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _padding = _o->padding;
+  auto _stride_d = _o->stride_d;
+  auto _stride_w = _o->stride_w;
+  auto _stride_h = _o->stride_h;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _dilation_d_factor = _o->dilation_d_factor;
+  auto _dilation_w_factor = _o->dilation_w_factor;
+  auto _dilation_h_factor = _o->dilation_h_factor;
+  return tflite::CreateConv3DOptions(
+      _fbb,
+      _padding,
+      _stride_d,
+      _stride_w,
+      _stride_h,
+      _fused_activation_function,
+      _dilation_d_factor,
+      _dilation_w_factor,
+      _dilation_h_factor);
+}
+
+inline Pool2DOptionsT *Pool2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<Pool2DOptionsT>(new Pool2DOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Pool2DOptions::UnPackTo(Pool2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = padding(); _o->padding = _e; }
+  { auto _e = stride_w(); _o->stride_w = _e; }
+  { auto _e = stride_h(); _o->stride_h = _e; }
+  { auto _e = filter_width(); _o->filter_width = _e; }
+  { auto _e = filter_height(); _o->filter_height = _e; }
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+}
+
+inline flatbuffers::Offset<Pool2DOptions> Pool2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreatePool2DOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Pool2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _padding = _o->padding;
+  auto _stride_w = _o->stride_w;
+  auto _stride_h = _o->stride_h;
+  auto _filter_width = _o->filter_width;
+  auto _filter_height = _o->filter_height;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreatePool2DOptions(
+      _fbb,
+      _padding,
+      _stride_w,
+      _stride_h,
+      _filter_width,
+      _filter_height,
+      _fused_activation_function);
+}
+
+inline DepthwiseConv2DOptionsT *DepthwiseConv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<DepthwiseConv2DOptionsT>(new DepthwiseConv2DOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void DepthwiseConv2DOptions::UnPackTo(DepthwiseConv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = padding(); _o->padding = _e; }
+  { auto _e = stride_w(); _o->stride_w = _e; }
+  { auto _e = stride_h(); _o->stride_h = _e; }
+  { auto _e = depth_multiplier(); _o->depth_multiplier = _e; }
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+  { auto _e = dilation_w_factor(); _o->dilation_w_factor = _e; }
+  { auto _e = dilation_h_factor(); _o->dilation_h_factor = _e; }
+}
+
+inline flatbuffers::Offset<DepthwiseConv2DOptions> DepthwiseConv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateDepthwiseConv2DOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DepthwiseConv2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _padding = _o->padding;
+  auto _stride_w = _o->stride_w;
+  auto _stride_h = _o->stride_h;
+  auto _depth_multiplier = _o->depth_multiplier;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _dilation_w_factor = _o->dilation_w_factor;
+  auto _dilation_h_factor = _o->dilation_h_factor;
+  return tflite::CreateDepthwiseConv2DOptions(
+      _fbb,
+      _padding,
+      _stride_w,
+      _stride_h,
+      _depth_multiplier,
+      _fused_activation_function,
+      _dilation_w_factor,
+      _dilation_h_factor);
+}
+
+inline ConcatEmbeddingsOptionsT *ConcatEmbeddingsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ConcatEmbeddingsOptionsT>(new ConcatEmbeddingsOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ConcatEmbeddingsOptions::UnPackTo(ConcatEmbeddingsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = num_channels(); _o->num_channels = _e; }
+  { auto _e = num_columns_per_channel(); if (_e) { _o->num_columns_per_channel.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->num_columns_per_channel[_i] = _e->Get(_i); } } }
+  { auto _e = embedding_dim_per_channel(); if (_e) { _o->embedding_dim_per_channel.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->embedding_dim_per_channel[_i] = _e->Get(_i); } } }
+}
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions> ConcatEmbeddingsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateConcatEmbeddingsOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ConcatEmbeddingsOptions> CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ConcatEmbeddingsOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _num_channels = _o->num_channels;
+  auto _num_columns_per_channel = _o->num_columns_per_channel.size() ? _fbb.CreateVector(_o->num_columns_per_channel) : 0;
+  auto _embedding_dim_per_channel = _o->embedding_dim_per_channel.size() ? _fbb.CreateVector(_o->embedding_dim_per_channel) : 0;
+  return tflite::CreateConcatEmbeddingsOptions(
+      _fbb,
+      _num_channels,
+      _num_columns_per_channel,
+      _embedding_dim_per_channel);
+}
+
+inline LSHProjectionOptionsT *LSHProjectionOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<LSHProjectionOptionsT>(new LSHProjectionOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LSHProjectionOptions::UnPackTo(LSHProjectionOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = type(); _o->type = _e; }
+}
+
+inline flatbuffers::Offset<LSHProjectionOptions> LSHProjectionOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLSHProjectionOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LSHProjectionOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _type = _o->type;
+  return tflite::CreateLSHProjectionOptions(
+      _fbb,
+      _type);
+}
+
+inline SVDFOptionsT *SVDFOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SVDFOptionsT>(new SVDFOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SVDFOptions::UnPackTo(SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = rank(); _o->rank = _e; }
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
+}
+
+inline flatbuffers::Offset<SVDFOptions> SVDFOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSVDFOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SVDFOptions> CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SVDFOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _rank = _o->rank;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return tflite::CreateSVDFOptions(
+      _fbb,
+      _rank,
+      _fused_activation_function,
+      _asymmetric_quantize_inputs);
+}
+
+inline RNNOptionsT *RNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<RNNOptionsT>(new RNNOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void RNNOptions::UnPackTo(RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
+}
+
+inline flatbuffers::Offset<RNNOptions> RNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateRNNOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<RNNOptions> CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return tflite::CreateRNNOptions(
+      _fbb,
+      _fused_activation_function,
+      _asymmetric_quantize_inputs);
+}
+
+inline SequenceRNNOptionsT *SequenceRNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SequenceRNNOptionsT>(new SequenceRNNOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SequenceRNNOptions::UnPackTo(SequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = time_major(); _o->time_major = _e; }
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
+}
+
+inline flatbuffers::Offset<SequenceRNNOptions> SequenceRNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSequenceRNNOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SequenceRNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _time_major = _o->time_major;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return tflite::CreateSequenceRNNOptions(
+      _fbb,
+      _time_major,
+      _fused_activation_function,
+      _asymmetric_quantize_inputs);
+}
+
+inline BidirectionalSequenceRNNOptionsT *BidirectionalSequenceRNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<BidirectionalSequenceRNNOptionsT>(new BidirectionalSequenceRNNOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void BidirectionalSequenceRNNOptions::UnPackTo(BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = time_major(); _o->time_major = _e; }
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+  { auto _e = merge_outputs(); _o->merge_outputs = _e; }
+  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
+}
+
+inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> BidirectionalSequenceRNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateBidirectionalSequenceRNNOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BidirectionalSequenceRNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _time_major = _o->time_major;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _merge_outputs = _o->merge_outputs;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return tflite::CreateBidirectionalSequenceRNNOptions(
+      _fbb,
+      _time_major,
+      _fused_activation_function,
+      _merge_outputs,
+      _asymmetric_quantize_inputs);
+}
+
+inline FullyConnectedOptionsT *FullyConnectedOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<FullyConnectedOptionsT>(new FullyConnectedOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void FullyConnectedOptions::UnPackTo(FullyConnectedOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+  { auto _e = weights_format(); _o->weights_format = _e; }
+  { auto _e = keep_num_dims(); _o->keep_num_dims = _e; }
+  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
+}
+
+inline flatbuffers::Offset<FullyConnectedOptions> FullyConnectedOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateFullyConnectedOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FullyConnectedOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _weights_format = _o->weights_format;
+  auto _keep_num_dims = _o->keep_num_dims;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return tflite::CreateFullyConnectedOptions(
+      _fbb,
+      _fused_activation_function,
+      _weights_format,
+      _keep_num_dims,
+      _asymmetric_quantize_inputs);
+}
+
+inline SoftmaxOptionsT *SoftmaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SoftmaxOptionsT>(new SoftmaxOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SoftmaxOptions::UnPackTo(SoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = beta(); _o->beta = _e; }
+}
+
+inline flatbuffers::Offset<SoftmaxOptions> SoftmaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSoftmaxOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SoftmaxOptions> CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SoftmaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _beta = _o->beta;
+  return tflite::CreateSoftmaxOptions(
+      _fbb,
+      _beta);
+}
+
+inline ConcatenationOptionsT *ConcatenationOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ConcatenationOptionsT>(new ConcatenationOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ConcatenationOptions::UnPackTo(ConcatenationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = axis(); _o->axis = _e; }
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+}
+
+inline flatbuffers::Offset<ConcatenationOptions> ConcatenationOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateConcatenationOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ConcatenationOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _axis = _o->axis;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateConcatenationOptions(
+      _fbb,
+      _axis,
+      _fused_activation_function);
+}
+
+inline AddOptionsT *AddOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<AddOptionsT>(new AddOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void AddOptions::UnPackTo(AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+  { auto _e = pot_scale_int16(); _o->pot_scale_int16 = _e; }
+}
+
+inline flatbuffers::Offset<AddOptions> AddOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateAddOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<AddOptions> CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AddOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _pot_scale_int16 = _o->pot_scale_int16;
+  return tflite::CreateAddOptions(
+      _fbb,
+      _fused_activation_function,
+      _pot_scale_int16);
+}
+
+inline MulOptionsT *MulOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<MulOptionsT>(new MulOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void MulOptions::UnPackTo(MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+}
+
+inline flatbuffers::Offset<MulOptions> MulOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateMulOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<MulOptions> CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MulOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateMulOptions(
+      _fbb,
+      _fused_activation_function);
+}
+
+inline L2NormOptionsT *L2NormOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<L2NormOptionsT>(new L2NormOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void L2NormOptions::UnPackTo(L2NormOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+}
+
+inline flatbuffers::Offset<L2NormOptions> L2NormOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateL2NormOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<L2NormOptions> CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const L2NormOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateL2NormOptions(
+      _fbb,
+      _fused_activation_function);
+}
+
+inline LocalResponseNormalizationOptionsT *LocalResponseNormalizationOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<LocalResponseNormalizationOptionsT>(new LocalResponseNormalizationOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LocalResponseNormalizationOptions::UnPackTo(LocalResponseNormalizationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = radius(); _o->radius = _e; }
+  { auto _e = bias(); _o->bias = _e; }
+  { auto _e = alpha(); _o->alpha = _e; }
+  { auto _e = beta(); _o->beta = _e; }
+}
+
+inline flatbuffers::Offset<LocalResponseNormalizationOptions> LocalResponseNormalizationOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLocalResponseNormalizationOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LocalResponseNormalizationOptions> CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LocalResponseNormalizationOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _radius = _o->radius;
+  auto _bias = _o->bias;
+  auto _alpha = _o->alpha;
+  auto _beta = _o->beta;
+  return tflite::CreateLocalResponseNormalizationOptions(
+      _fbb,
+      _radius,
+      _bias,
+      _alpha,
+      _beta);
+}
+
+inline LSTMOptionsT *LSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<LSTMOptionsT>(new LSTMOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LSTMOptions::UnPackTo(LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+  { auto _e = cell_clip(); _o->cell_clip = _e; }
+  { auto _e = proj_clip(); _o->proj_clip = _e; }
+  { auto _e = kernel_type(); _o->kernel_type = _e; }
+  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
+}
+
+inline flatbuffers::Offset<LSTMOptions> LSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLSTMOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LSTMOptions> CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _cell_clip = _o->cell_clip;
+  auto _proj_clip = _o->proj_clip;
+  auto _kernel_type = _o->kernel_type;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return tflite::CreateLSTMOptions(
+      _fbb,
+      _fused_activation_function,
+      _cell_clip,
+      _proj_clip,
+      _kernel_type,
+      _asymmetric_quantize_inputs);
+}
+
+inline UnidirectionalSequenceLSTMOptionsT *UnidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<UnidirectionalSequenceLSTMOptionsT>(new UnidirectionalSequenceLSTMOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void UnidirectionalSequenceLSTMOptions::UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+  { auto _e = cell_clip(); _o->cell_clip = _e; }
+  { auto _e = proj_clip(); _o->proj_clip = _e; }
+  { auto _e = time_major(); _o->time_major = _e; }
+  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
+  { auto _e = diagonal_recurrent_tensors(); _o->diagonal_recurrent_tensors = _e; }
+}
+
+inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> UnidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateUnidirectionalSequenceLSTMOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnidirectionalSequenceLSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _cell_clip = _o->cell_clip;
+  auto _proj_clip = _o->proj_clip;
+  auto _time_major = _o->time_major;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  auto _diagonal_recurrent_tensors = _o->diagonal_recurrent_tensors;
+  return tflite::CreateUnidirectionalSequenceLSTMOptions(
+      _fbb,
+      _fused_activation_function,
+      _cell_clip,
+      _proj_clip,
+      _time_major,
+      _asymmetric_quantize_inputs,
+      _diagonal_recurrent_tensors);
+}
+
+inline BidirectionalSequenceLSTMOptionsT *BidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<BidirectionalSequenceLSTMOptionsT>(new BidirectionalSequenceLSTMOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void BidirectionalSequenceLSTMOptions::UnPackTo(BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+  { auto _e = cell_clip(); _o->cell_clip = _e; }
+  { auto _e = proj_clip(); _o->proj_clip = _e; }
+  { auto _e = merge_outputs(); _o->merge_outputs = _e; }
+  { auto _e = time_major(); _o->time_major = _e; }
+  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
+}
+
+inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> BidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateBidirectionalSequenceLSTMOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BidirectionalSequenceLSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _cell_clip = _o->cell_clip;
+  auto _proj_clip = _o->proj_clip;
+  auto _merge_outputs = _o->merge_outputs;
+  auto _time_major = _o->time_major;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return tflite::CreateBidirectionalSequenceLSTMOptions(
+      _fbb,
+      _fused_activation_function,
+      _cell_clip,
+      _proj_clip,
+      _merge_outputs,
+      _time_major,
+      _asymmetric_quantize_inputs);
+}
+
+inline ResizeBilinearOptionsT *ResizeBilinearOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ResizeBilinearOptionsT>(new ResizeBilinearOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ResizeBilinearOptions::UnPackTo(ResizeBilinearOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = align_corners(); _o->align_corners = _e; }
+  { auto _e = half_pixel_centers(); _o->half_pixel_centers = _e; }
+}
+
+inline flatbuffers::Offset<ResizeBilinearOptions> ResizeBilinearOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateResizeBilinearOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ResizeBilinearOptions> CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ResizeBilinearOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _align_corners = _o->align_corners;
+  auto _half_pixel_centers = _o->half_pixel_centers;
+  return tflite::CreateResizeBilinearOptions(
+      _fbb,
+      _align_corners,
+      _half_pixel_centers);
+}
+
+inline ResizeNearestNeighborOptionsT *ResizeNearestNeighborOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ResizeNearestNeighborOptionsT>(new ResizeNearestNeighborOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ResizeNearestNeighborOptions::UnPackTo(ResizeNearestNeighborOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = align_corners(); _o->align_corners = _e; }
+  { auto _e = half_pixel_centers(); _o->half_pixel_centers = _e; }
+}
+
+inline flatbuffers::Offset<ResizeNearestNeighborOptions> ResizeNearestNeighborOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeNearestNeighborOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateResizeNearestNeighborOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ResizeNearestNeighborOptions> CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeNearestNeighborOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ResizeNearestNeighborOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _align_corners = _o->align_corners;
+  auto _half_pixel_centers = _o->half_pixel_centers;
+  return tflite::CreateResizeNearestNeighborOptions(
+      _fbb,
+      _align_corners,
+      _half_pixel_centers);
+}
+
+inline CallOptionsT *CallOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<CallOptionsT>(new CallOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void CallOptions::UnPackTo(CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = subgraph(); _o->subgraph = _e; }
+}
+
+inline flatbuffers::Offset<CallOptions> CallOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateCallOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CallOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _subgraph = _o->subgraph;
+  return tflite::CreateCallOptions(
+      _fbb,
+      _subgraph);
+}
+
+inline PadOptionsT *PadOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<PadOptionsT>(new PadOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void PadOptions::UnPackTo(PadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<PadOptions> PadOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreatePadOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const PadOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreatePadOptions(
+      _fbb);
+}
+
+inline PadV2OptionsT *PadV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<PadV2OptionsT>(new PadV2OptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void PadV2Options::UnPackTo(PadV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<PadV2Options> PadV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreatePadV2Options(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBufferBuilder &_fbb, const PadV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const PadV2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreatePadV2Options(
+      _fbb);
+}
+
+inline ReshapeOptionsT *ReshapeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ReshapeOptionsT>(new ReshapeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ReshapeOptions::UnPackTo(ReshapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = new_shape(); if (_e) { _o->new_shape.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->new_shape[_i] = _e->Get(_i); } } }
+}
+
+inline flatbuffers::Offset<ReshapeOptions> ReshapeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateReshapeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ReshapeOptions> CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReshapeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _new_shape = _o->new_shape.size() ? _fbb.CreateVector(_o->new_shape) : 0;
+  return tflite::CreateReshapeOptions(
+      _fbb,
+      _new_shape);
+}
+
+inline SpaceToBatchNDOptionsT *SpaceToBatchNDOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SpaceToBatchNDOptionsT>(new SpaceToBatchNDOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SpaceToBatchNDOptions::UnPackTo(SpaceToBatchNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SpaceToBatchNDOptions> SpaceToBatchNDOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSpaceToBatchNDOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SpaceToBatchNDOptions> CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SpaceToBatchNDOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateSpaceToBatchNDOptions(
+      _fbb);
+}
+
+inline BatchToSpaceNDOptionsT *BatchToSpaceNDOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<BatchToSpaceNDOptionsT>(new BatchToSpaceNDOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void BatchToSpaceNDOptions::UnPackTo(BatchToSpaceNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<BatchToSpaceNDOptions> BatchToSpaceNDOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateBatchToSpaceNDOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BatchToSpaceNDOptions> CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BatchToSpaceNDOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateBatchToSpaceNDOptions(
+      _fbb);
+}
+
+inline SkipGramOptionsT *SkipGramOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SkipGramOptionsT>(new SkipGramOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SkipGramOptions::UnPackTo(SkipGramOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = ngram_size(); _o->ngram_size = _e; }
+  { auto _e = max_skip_size(); _o->max_skip_size = _e; }
+  { auto _e = include_all_ngrams(); _o->include_all_ngrams = _e; }
+}
+
+inline flatbuffers::Offset<SkipGramOptions> SkipGramOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSkipGramOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SkipGramOptions> CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SkipGramOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _ngram_size = _o->ngram_size;
+  auto _max_skip_size = _o->max_skip_size;
+  auto _include_all_ngrams = _o->include_all_ngrams;
+  return tflite::CreateSkipGramOptions(
+      _fbb,
+      _ngram_size,
+      _max_skip_size,
+      _include_all_ngrams);
+}
+
+inline SpaceToDepthOptionsT *SpaceToDepthOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SpaceToDepthOptionsT>(new SpaceToDepthOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SpaceToDepthOptions::UnPackTo(SpaceToDepthOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = block_size(); _o->block_size = _e; }
+}
+
+inline flatbuffers::Offset<SpaceToDepthOptions> SpaceToDepthOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSpaceToDepthOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SpaceToDepthOptions> CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SpaceToDepthOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _block_size = _o->block_size;
+  return tflite::CreateSpaceToDepthOptions(
+      _fbb,
+      _block_size);
+}
+
+inline DepthToSpaceOptionsT *DepthToSpaceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<DepthToSpaceOptionsT>(new DepthToSpaceOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void DepthToSpaceOptions::UnPackTo(DepthToSpaceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = block_size(); _o->block_size = _e; }
+}
+
+inline flatbuffers::Offset<DepthToSpaceOptions> DepthToSpaceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateDepthToSpaceOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DepthToSpaceOptions> CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthToSpaceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DepthToSpaceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _block_size = _o->block_size;
+  return tflite::CreateDepthToSpaceOptions(
+      _fbb,
+      _block_size);
+}
+
+inline SubOptionsT *SubOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SubOptionsT>(new SubOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SubOptions::UnPackTo(SubOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+  { auto _e = pot_scale_int16(); _o->pot_scale_int16 = _e; }
+}
+
+inline flatbuffers::Offset<SubOptions> SubOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSubOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SubOptions> CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SubOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  auto _pot_scale_int16 = _o->pot_scale_int16;
+  return tflite::CreateSubOptions(
+      _fbb,
+      _fused_activation_function,
+      _pot_scale_int16);
+}
+
+inline DivOptionsT *DivOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<DivOptionsT>(new DivOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void DivOptions::UnPackTo(DivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+}
+
+inline flatbuffers::Offset<DivOptions> DivOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateDivOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DivOptions> CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DivOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateDivOptions(
+      _fbb,
+      _fused_activation_function);
+}
+
+inline TopKV2OptionsT *TopKV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<TopKV2OptionsT>(new TopKV2OptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void TopKV2Options::UnPackTo(TopKV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<TopKV2Options> TopKV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateTopKV2Options(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TopKV2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateTopKV2Options(
+      _fbb);
+}
+
+inline EmbeddingLookupSparseOptionsT *EmbeddingLookupSparseOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<EmbeddingLookupSparseOptionsT>(new EmbeddingLookupSparseOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void EmbeddingLookupSparseOptions::UnPackTo(EmbeddingLookupSparseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = combiner(); _o->combiner = _e; }
+}
+
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions> EmbeddingLookupSparseOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateEmbeddingLookupSparseOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions> CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const EmbeddingLookupSparseOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _combiner = _o->combiner;
+  return tflite::CreateEmbeddingLookupSparseOptions(
+      _fbb,
+      _combiner);
+}
+
+inline GatherOptionsT *GatherOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<GatherOptionsT>(new GatherOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void GatherOptions::UnPackTo(GatherOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = axis(); _o->axis = _e; }
+  { auto _e = batch_dims(); _o->batch_dims = _e; }
+}
+
+inline flatbuffers::Offset<GatherOptions> GatherOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateGatherOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GatherOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _axis = _o->axis;
+  auto _batch_dims = _o->batch_dims;
+  return tflite::CreateGatherOptions(
+      _fbb,
+      _axis,
+      _batch_dims);
+}
+
+inline TransposeOptionsT *TransposeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<TransposeOptionsT>(new TransposeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void TransposeOptions::UnPackTo(TransposeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<TransposeOptions> TransposeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateTransposeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<TransposeOptions> CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TransposeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateTransposeOptions(
+      _fbb);
+}
+
+inline ExpOptionsT *ExpOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ExpOptionsT>(new ExpOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ExpOptions::UnPackTo(ExpOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<ExpOptions> ExpOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateExpOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ExpOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateExpOptions(
+      _fbb);
+}
+
+inline CosOptionsT *CosOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<CosOptionsT>(new CosOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void CosOptions::UnPackTo(CosOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<CosOptions> CosOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateCosOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CosOptions> CreateCosOptions(flatbuffers::FlatBufferBuilder &_fbb, const CosOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CosOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateCosOptions(
+      _fbb);
+}
+
+inline ReducerOptionsT *ReducerOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ReducerOptionsT>(new ReducerOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ReducerOptions::UnPackTo(ReducerOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = keep_dims(); _o->keep_dims = _e; }
+}
+
+inline flatbuffers::Offset<ReducerOptions> ReducerOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateReducerOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ReducerOptions> CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReducerOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReducerOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _keep_dims = _o->keep_dims;
+  return tflite::CreateReducerOptions(
+      _fbb,
+      _keep_dims);
+}
+
+inline SqueezeOptionsT *SqueezeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SqueezeOptionsT>(new SqueezeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SqueezeOptions::UnPackTo(SqueezeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = squeeze_dims(); if (_e) { _o->squeeze_dims.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->squeeze_dims[_i] = _e->Get(_i); } } }
+}
+
+inline flatbuffers::Offset<SqueezeOptions> SqueezeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSqueezeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SqueezeOptions> CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SqueezeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _squeeze_dims = _o->squeeze_dims.size() ? _fbb.CreateVector(_o->squeeze_dims) : 0;
+  return tflite::CreateSqueezeOptions(
+      _fbb,
+      _squeeze_dims);
+}
+
+inline SplitOptionsT *SplitOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SplitOptionsT>(new SplitOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SplitOptions::UnPackTo(SplitOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = num_splits(); _o->num_splits = _e; }
+}
+
+inline flatbuffers::Offset<SplitOptions> SplitOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSplitOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SplitOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _num_splits = _o->num_splits;
+  return tflite::CreateSplitOptions(
+      _fbb,
+      _num_splits);
+}
+
+inline SplitVOptionsT *SplitVOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SplitVOptionsT>(new SplitVOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SplitVOptions::UnPackTo(SplitVOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = num_splits(); _o->num_splits = _e; }
+}
+
+inline flatbuffers::Offset<SplitVOptions> SplitVOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSplitVOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitVOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SplitVOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _num_splits = _o->num_splits;
+  return tflite::CreateSplitVOptions(
+      _fbb,
+      _num_splits);
+}
+
+inline StridedSliceOptionsT *StridedSliceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<StridedSliceOptionsT>(new StridedSliceOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void StridedSliceOptions::UnPackTo(StridedSliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = begin_mask(); _o->begin_mask = _e; }
+  { auto _e = end_mask(); _o->end_mask = _e; }
+  { auto _e = ellipsis_mask(); _o->ellipsis_mask = _e; }
+  { auto _e = new_axis_mask(); _o->new_axis_mask = _e; }
+  { auto _e = shrink_axis_mask(); _o->shrink_axis_mask = _e; }
+}
+
+inline flatbuffers::Offset<StridedSliceOptions> StridedSliceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateStridedSliceOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<StridedSliceOptions> CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const StridedSliceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _begin_mask = _o->begin_mask;
+  auto _end_mask = _o->end_mask;
+  auto _ellipsis_mask = _o->ellipsis_mask;
+  auto _new_axis_mask = _o->new_axis_mask;
+  auto _shrink_axis_mask = _o->shrink_axis_mask;
+  return tflite::CreateStridedSliceOptions(
+      _fbb,
+      _begin_mask,
+      _end_mask,
+      _ellipsis_mask,
+      _new_axis_mask,
+      _shrink_axis_mask);
+}
+
+inline LogSoftmaxOptionsT *LogSoftmaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<LogSoftmaxOptionsT>(new LogSoftmaxOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LogSoftmaxOptions::UnPackTo(LogSoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<LogSoftmaxOptions> LogSoftmaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLogSoftmaxOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LogSoftmaxOptions> CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LogSoftmaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateLogSoftmaxOptions(
+      _fbb);
+}
+
+inline CastOptionsT *CastOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<CastOptionsT>(new CastOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void CastOptions::UnPackTo(CastOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = in_data_type(); _o->in_data_type = _e; }
+  { auto _e = out_data_type(); _o->out_data_type = _e; }
+}
+
+inline flatbuffers::Offset<CastOptions> CastOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateCastOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CastOptions> CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CastOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _in_data_type = _o->in_data_type;
+  auto _out_data_type = _o->out_data_type;
+  return tflite::CreateCastOptions(
+      _fbb,
+      _in_data_type,
+      _out_data_type);
+}
+
+inline DequantizeOptionsT *DequantizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<DequantizeOptionsT>(new DequantizeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void DequantizeOptions::UnPackTo(DequantizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<DequantizeOptions> DequantizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateDequantizeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DequantizeOptions> CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DequantizeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateDequantizeOptions(
+      _fbb);
+}
+
+inline MaximumMinimumOptionsT *MaximumMinimumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<MaximumMinimumOptionsT>(new MaximumMinimumOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void MaximumMinimumOptions::UnPackTo(MaximumMinimumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<MaximumMinimumOptions> MaximumMinimumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateMaximumMinimumOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<MaximumMinimumOptions> CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb, const MaximumMinimumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MaximumMinimumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateMaximumMinimumOptions(
+      _fbb);
+}
+
+inline TileOptionsT *TileOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<TileOptionsT>(new TileOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void TileOptions::UnPackTo(TileOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<TileOptions> TileOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateTileOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBufferBuilder &_fbb, const TileOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TileOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateTileOptions(
+      _fbb);
+}
+
+inline ArgMaxOptionsT *ArgMaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ArgMaxOptionsT>(new ArgMaxOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ArgMaxOptions::UnPackTo(ArgMaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = output_type(); _o->output_type = _e; }
+}
+
+inline flatbuffers::Offset<ArgMaxOptions> ArgMaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateArgMaxOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ArgMaxOptions> CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ArgMaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _output_type = _o->output_type;
+  return tflite::CreateArgMaxOptions(
+      _fbb,
+      _output_type);
+}
+
+inline ArgMinOptionsT *ArgMinOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ArgMinOptionsT>(new ArgMinOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ArgMinOptions::UnPackTo(ArgMinOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = output_type(); _o->output_type = _e; }
+}
+
+inline flatbuffers::Offset<ArgMinOptions> ArgMinOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateArgMinOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ArgMinOptions> CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb, const ArgMinOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ArgMinOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _output_type = _o->output_type;
+  return tflite::CreateArgMinOptions(
+      _fbb,
+      _output_type);
+}
+
+inline GreaterOptionsT *GreaterOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<GreaterOptionsT>(new GreaterOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void GreaterOptions::UnPackTo(GreaterOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<GreaterOptions> GreaterOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateGreaterOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<GreaterOptions> CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GreaterOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateGreaterOptions(
+      _fbb);
+}
+
+inline GreaterEqualOptionsT *GreaterEqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<GreaterEqualOptionsT>(new GreaterEqualOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void GreaterEqualOptions::UnPackTo(GreaterEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<GreaterEqualOptions> GreaterEqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateGreaterEqualOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<GreaterEqualOptions> CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const GreaterEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GreaterEqualOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateGreaterEqualOptions(
+      _fbb);
+}
+
+inline LessOptionsT *LessOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<LessOptionsT>(new LessOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LessOptions::UnPackTo(LessOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<LessOptions> LessOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLessOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LessOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateLessOptions(
+      _fbb);
+}
+
+inline LessEqualOptionsT *LessEqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<LessEqualOptionsT>(new LessEqualOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LessEqualOptions::UnPackTo(LessEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<LessEqualOptions> LessEqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLessEqualOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LessEqualOptions> CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const LessEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LessEqualOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateLessEqualOptions(
+      _fbb);
+}
+
+inline NegOptionsT *NegOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<NegOptionsT>(new NegOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void NegOptions::UnPackTo(NegOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<NegOptions> NegOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateNegOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferBuilder &_fbb, const NegOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const NegOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateNegOptions(
+      _fbb);
+}
+
+inline SelectOptionsT *SelectOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SelectOptionsT>(new SelectOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SelectOptions::UnPackTo(SelectOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SelectOptions> SelectOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSelectOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatBufferBuilder &_fbb, const SelectOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SelectOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateSelectOptions(
+      _fbb);
+}
+
+inline SliceOptionsT *SliceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SliceOptionsT>(new SliceOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SliceOptions::UnPackTo(SliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SliceOptions> SliceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSliceOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const SliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SliceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateSliceOptions(
+      _fbb);
+}
+
+inline TransposeConvOptionsT *TransposeConvOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<TransposeConvOptionsT>(new TransposeConvOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void TransposeConvOptions::UnPackTo(TransposeConvOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = padding(); _o->padding = _e; }
+  { auto _e = stride_w(); _o->stride_w = _e; }
+  { auto _e = stride_h(); _o->stride_h = _e; }
+  { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }
+}
+
+inline flatbuffers::Offset<TransposeConvOptions> TransposeConvOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateTransposeConvOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<TransposeConvOptions> CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeConvOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TransposeConvOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _padding = _o->padding;
+  auto _stride_w = _o->stride_w;
+  auto _stride_h = _o->stride_h;
+  auto _fused_activation_function = _o->fused_activation_function;
+  return tflite::CreateTransposeConvOptions(
+      _fbb,
+      _padding,
+      _stride_w,
+      _stride_h,
+      _fused_activation_function);
+}
+
+inline ExpandDimsOptionsT *ExpandDimsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ExpandDimsOptionsT>(new ExpandDimsOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ExpandDimsOptions::UnPackTo(ExpandDimsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<ExpandDimsOptions> ExpandDimsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateExpandDimsOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ExpandDimsOptions> CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpandDimsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ExpandDimsOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateExpandDimsOptions(
+      _fbb);
+}
+
+inline SparseToDenseOptionsT *SparseToDenseOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SparseToDenseOptionsT>(new SparseToDenseOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SparseToDenseOptions::UnPackTo(SparseToDenseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = validate_indices(); _o->validate_indices = _e; }
+}
+
+inline flatbuffers::Offset<SparseToDenseOptions> SparseToDenseOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSparseToDenseOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SparseToDenseOptions> CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, const SparseToDenseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SparseToDenseOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _validate_indices = _o->validate_indices;
+  return tflite::CreateSparseToDenseOptions(
+      _fbb,
+      _validate_indices);
+}
+
+inline EqualOptionsT *EqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<EqualOptionsT>(new EqualOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void EqualOptions::UnPackTo(EqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<EqualOptions> EqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateEqualOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const EqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const EqualOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateEqualOptions(
+      _fbb);
+}
+
+inline NotEqualOptionsT *NotEqualOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<NotEqualOptionsT>(new NotEqualOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void NotEqualOptions::UnPackTo(NotEqualOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<NotEqualOptions> NotEqualOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateNotEqualOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<NotEqualOptions> CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb, const NotEqualOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const NotEqualOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateNotEqualOptions(
+      _fbb);
+}
+
+inline ShapeOptionsT *ShapeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ShapeOptionsT>(new ShapeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ShapeOptions::UnPackTo(ShapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = out_type(); _o->out_type = _e; }
+}
+
+inline flatbuffers::Offset<ShapeOptions> ShapeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateShapeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ShapeOptions> CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ShapeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _out_type = _o->out_type;
+  return tflite::CreateShapeOptions(
+      _fbb,
+      _out_type);
+}
+
+inline RankOptionsT *RankOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<RankOptionsT>(new RankOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void RankOptions::UnPackTo(RankOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<RankOptions> RankOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateRankOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<RankOptions> CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RankOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateRankOptions(
+      _fbb);
+}
+
+inline PowOptionsT *PowOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<PowOptionsT>(new PowOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void PowOptions::UnPackTo(PowOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<PowOptions> PowOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreatePowOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferBuilder &_fbb, const PowOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const PowOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreatePowOptions(
+      _fbb);
+}
+
+inline FakeQuantOptionsT *FakeQuantOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<FakeQuantOptionsT>(new FakeQuantOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void FakeQuantOptions::UnPackTo(FakeQuantOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = min(); _o->min = _e; }
+  { auto _e = max(); _o->max = _e; }
+  { auto _e = num_bits(); _o->num_bits = _e; }
+  { auto _e = narrow_range(); _o->narrow_range = _e; }
+}
+
+inline flatbuffers::Offset<FakeQuantOptions> FakeQuantOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateFakeQuantOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<FakeQuantOptions> CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, const FakeQuantOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FakeQuantOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _min = _o->min;
+  auto _max = _o->max;
+  auto _num_bits = _o->num_bits;
+  auto _narrow_range = _o->narrow_range;
+  return tflite::CreateFakeQuantOptions(
+      _fbb,
+      _min,
+      _max,
+      _num_bits,
+      _narrow_range);
+}
+
+inline PackOptionsT *PackOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<PackOptionsT>(new PackOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void PackOptions::UnPackTo(PackOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = values_count(); _o->values_count = _e; }
+  { auto _e = axis(); _o->axis = _e; }
+}
+
+inline flatbuffers::Offset<PackOptions> PackOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreatePackOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<PackOptions> CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, const PackOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const PackOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _values_count = _o->values_count;
+  auto _axis = _o->axis;
+  return tflite::CreatePackOptions(
+      _fbb,
+      _values_count,
+      _axis);
+}
+
+inline LogicalOrOptionsT *LogicalOrOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<LogicalOrOptionsT>(new LogicalOrOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LogicalOrOptions::UnPackTo(LogicalOrOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<LogicalOrOptions> LogicalOrOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLogicalOrOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LogicalOrOptions> CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalOrOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LogicalOrOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateLogicalOrOptions(
+      _fbb);
+}
+
+inline OneHotOptionsT *OneHotOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<OneHotOptionsT>(new OneHotOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void OneHotOptions::UnPackTo(OneHotOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = axis(); _o->axis = _e; }
+}
+
+inline flatbuffers::Offset<OneHotOptions> OneHotOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateOneHotOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatBufferBuilder &_fbb, const OneHotOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OneHotOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _axis = _o->axis;
+  return tflite::CreateOneHotOptions(
+      _fbb,
+      _axis);
+}
+
+inline AbsOptionsT *AbsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<AbsOptionsT>(new AbsOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void AbsOptions::UnPackTo(AbsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<AbsOptions> AbsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateAbsOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferBuilder &_fbb, const AbsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AbsOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateAbsOptions(
+      _fbb);
+}
+
+inline HardSwishOptionsT *HardSwishOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<HardSwishOptionsT>(new HardSwishOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void HardSwishOptions::UnPackTo(HardSwishOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<HardSwishOptions> HardSwishOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateHardSwishOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<HardSwishOptions> CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb, const HardSwishOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HardSwishOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateHardSwishOptions(
+      _fbb);
+}
+
+inline LogicalAndOptionsT *LogicalAndOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<LogicalAndOptionsT>(new LogicalAndOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LogicalAndOptions::UnPackTo(LogicalAndOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<LogicalAndOptions> LogicalAndOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLogicalAndOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LogicalAndOptions> CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalAndOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LogicalAndOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateLogicalAndOptions(
+      _fbb);
+}
+
+inline LogicalNotOptionsT *LogicalNotOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<LogicalNotOptionsT>(new LogicalNotOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LogicalNotOptions::UnPackTo(LogicalNotOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<LogicalNotOptions> LogicalNotOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLogicalNotOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LogicalNotOptions> CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogicalNotOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LogicalNotOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateLogicalNotOptions(
+      _fbb);
+}
+
+inline UnpackOptionsT *UnpackOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<UnpackOptionsT>(new UnpackOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void UnpackOptions::UnPackTo(UnpackOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = num(); _o->num = _e; }
+  { auto _e = axis(); _o->axis = _e; }
+}
+
+inline flatbuffers::Offset<UnpackOptions> UnpackOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateUnpackOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnpackOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnpackOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _num = _o->num;
+  auto _axis = _o->axis;
+  return tflite::CreateUnpackOptions(
+      _fbb,
+      _num,
+      _axis);
+}
+
+inline FloorDivOptionsT *FloorDivOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<FloorDivOptionsT>(new FloorDivOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void FloorDivOptions::UnPackTo(FloorDivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<FloorDivOptions> FloorDivOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateFloorDivOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<FloorDivOptions> CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorDivOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FloorDivOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateFloorDivOptions(
+      _fbb);
+}
+
+inline SquareOptionsT *SquareOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SquareOptionsT>(new SquareOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SquareOptions::UnPackTo(SquareOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SquareOptions> SquareOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSquareOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquareOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SquareOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateSquareOptions(
+      _fbb);
+}
+
+inline ZerosLikeOptionsT *ZerosLikeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ZerosLikeOptionsT>(new ZerosLikeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ZerosLikeOptions::UnPackTo(ZerosLikeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<ZerosLikeOptions> ZerosLikeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateZerosLikeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ZerosLikeOptions> CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ZerosLikeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ZerosLikeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateZerosLikeOptions(
+      _fbb);
+}
+
+inline FillOptionsT *FillOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<FillOptionsT>(new FillOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void FillOptions::UnPackTo(FillOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<FillOptions> FillOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateFillOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FillOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateFillOptions(
+      _fbb);
+}
+
+inline FloorModOptionsT *FloorModOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<FloorModOptionsT>(new FloorModOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void FloorModOptions::UnPackTo(FloorModOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<FloorModOptions> FloorModOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateFloorModOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<FloorModOptions> CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FloorModOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateFloorModOptions(
+      _fbb);
+}
+
+inline RangeOptionsT *RangeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<RangeOptionsT>(new RangeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void RangeOptions::UnPackTo(RangeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<RangeOptions> RangeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateRangeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RangeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateRangeOptions(
+      _fbb);
+}
+
+inline LeakyReluOptionsT *LeakyReluOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<LeakyReluOptionsT>(new LeakyReluOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void LeakyReluOptions::UnPackTo(LeakyReluOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = alpha(); _o->alpha = _e; }
+}
+
+inline flatbuffers::Offset<LeakyReluOptions> LeakyReluOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateLeakyReluOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<LeakyReluOptions> CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, const LeakyReluOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LeakyReluOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _alpha = _o->alpha;
+  return tflite::CreateLeakyReluOptions(
+      _fbb,
+      _alpha);
+}
+
+inline SquaredDifferenceOptionsT *SquaredDifferenceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SquaredDifferenceOptionsT>(new SquaredDifferenceOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SquaredDifferenceOptions::UnPackTo(SquaredDifferenceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SquaredDifferenceOptions> SquaredDifferenceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SquaredDifferenceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSquaredDifferenceOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SquaredDifferenceOptions> CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb, const SquaredDifferenceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SquaredDifferenceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateSquaredDifferenceOptions(
+      _fbb);
+}
+
+inline MirrorPadOptionsT *MirrorPadOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<MirrorPadOptionsT>(new MirrorPadOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void MirrorPadOptions::UnPackTo(MirrorPadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = mode(); _o->mode = _e; }
+}
+
+inline flatbuffers::Offset<MirrorPadOptions> MirrorPadOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateMirrorPadOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<MirrorPadOptions> CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb, const MirrorPadOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MirrorPadOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _mode = _o->mode;
+  return tflite::CreateMirrorPadOptions(
+      _fbb,
+      _mode);
+}
+
+inline UniqueOptionsT *UniqueOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<UniqueOptionsT>(new UniqueOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void UniqueOptions::UnPackTo(UniqueOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = idx_out_type(); _o->idx_out_type = _e; }
+}
+
+inline flatbuffers::Offset<UniqueOptions> UniqueOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateUniqueOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<UniqueOptions> CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb, const UniqueOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UniqueOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _idx_out_type = _o->idx_out_type;
+  return tflite::CreateUniqueOptions(
+      _fbb,
+      _idx_out_type);
+}
+
+inline ReverseV2OptionsT *ReverseV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ReverseV2OptionsT>(new ReverseV2OptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ReverseV2Options::UnPackTo(ReverseV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<ReverseV2Options> ReverseV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateReverseV2Options(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ReverseV2Options> CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb, const ReverseV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReverseV2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateReverseV2Options(
+      _fbb);
+}
+
+inline AddNOptionsT *AddNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<AddNOptionsT>(new AddNOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void AddNOptions::UnPackTo(AddNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<AddNOptions> AddNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateAddNOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<AddNOptions> CreateAddNOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AddNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateAddNOptions(
+      _fbb);
+}
+
+inline GatherNdOptionsT *GatherNdOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<GatherNdOptionsT>(new GatherNdOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void GatherNdOptions::UnPackTo(GatherNdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<GatherNdOptions> GatherNdOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateGatherNdOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<GatherNdOptions> CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GatherNdOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateGatherNdOptions(
+      _fbb);
+}
+
+inline WhereOptionsT *WhereOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<WhereOptionsT>(new WhereOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void WhereOptions::UnPackTo(WhereOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<WhereOptions> WhereOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateWhereOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<WhereOptions> CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const WhereOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateWhereOptions(
+      _fbb);
+}
+
+inline ReverseSequenceOptionsT *ReverseSequenceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ReverseSequenceOptionsT>(new ReverseSequenceOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ReverseSequenceOptions::UnPackTo(ReverseSequenceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = seq_dim(); _o->seq_dim = _e; }
+  { auto _e = batch_dim(); _o->batch_dim = _e; }
+}
+
+inline flatbuffers::Offset<ReverseSequenceOptions> ReverseSequenceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReverseSequenceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateReverseSequenceOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ReverseSequenceOptions> CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReverseSequenceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReverseSequenceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _seq_dim = _o->seq_dim;
+  auto _batch_dim = _o->batch_dim;
+  return tflite::CreateReverseSequenceOptions(
+      _fbb,
+      _seq_dim,
+      _batch_dim);
+}
+
+inline MatrixDiagOptionsT *MatrixDiagOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<MatrixDiagOptionsT>(new MatrixDiagOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void MatrixDiagOptions::UnPackTo(MatrixDiagOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<MatrixDiagOptions> MatrixDiagOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateMatrixDiagOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<MatrixDiagOptions> CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixDiagOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MatrixDiagOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateMatrixDiagOptions(
+      _fbb);
+}
+
+inline QuantizeOptionsT *QuantizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<QuantizeOptionsT>(new QuantizeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void QuantizeOptions::UnPackTo(QuantizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<QuantizeOptions> QuantizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateQuantizeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<QuantizeOptions> CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const QuantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const QuantizeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateQuantizeOptions(
+      _fbb);
+}
+
+inline MatrixSetDiagOptionsT *MatrixSetDiagOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<MatrixSetDiagOptionsT>(new MatrixSetDiagOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void MatrixSetDiagOptions::UnPackTo(MatrixSetDiagOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<MatrixSetDiagOptions> MatrixSetDiagOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateMatrixSetDiagOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<MatrixSetDiagOptions> CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb, const MatrixSetDiagOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MatrixSetDiagOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateMatrixSetDiagOptions(
+      _fbb);
+}
+
+inline IfOptionsT *IfOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<IfOptionsT>(new IfOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void IfOptions::UnPackTo(IfOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = then_subgraph_index(); _o->then_subgraph_index = _e; }
+  { auto _e = else_subgraph_index(); _o->else_subgraph_index = _e; }
+}
+
+inline flatbuffers::Offset<IfOptions> IfOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateIfOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBuilder &_fbb, const IfOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const IfOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _then_subgraph_index = _o->then_subgraph_index;
+  auto _else_subgraph_index = _o->else_subgraph_index;
+  return tflite::CreateIfOptions(
+      _fbb,
+      _then_subgraph_index,
+      _else_subgraph_index);
+}
+
+inline CallOnceOptionsT *CallOnceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<CallOnceOptionsT>(new CallOnceOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void CallOnceOptions::UnPackTo(CallOnceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = init_subgraph_index(); _o->init_subgraph_index = _e; }
+}
+
+inline flatbuffers::Offset<CallOnceOptions> CallOnceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateCallOnceOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CallOnceOptions> CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOnceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CallOnceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _init_subgraph_index = _o->init_subgraph_index;
+  return tflite::CreateCallOnceOptions(
+      _fbb,
+      _init_subgraph_index);
+}
+
+inline WhileOptionsT *WhileOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<WhileOptionsT>(new WhileOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void WhileOptions::UnPackTo(WhileOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = cond_subgraph_index(); _o->cond_subgraph_index = _e; }
+  { auto _e = body_subgraph_index(); _o->body_subgraph_index = _e; }
+}
+
+inline flatbuffers::Offset<WhileOptions> WhileOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateWhileOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<WhileOptions> CreateWhileOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhileOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const WhileOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _cond_subgraph_index = _o->cond_subgraph_index;
+  auto _body_subgraph_index = _o->body_subgraph_index;
+  return tflite::CreateWhileOptions(
+      _fbb,
+      _cond_subgraph_index,
+      _body_subgraph_index);
+}
+
+inline NonMaxSuppressionV4OptionsT *NonMaxSuppressionV4Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<NonMaxSuppressionV4OptionsT>(new NonMaxSuppressionV4OptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void NonMaxSuppressionV4Options::UnPackTo(NonMaxSuppressionV4OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<NonMaxSuppressionV4Options> NonMaxSuppressionV4Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV4OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateNonMaxSuppressionV4Options(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<NonMaxSuppressionV4Options> CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV4OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const NonMaxSuppressionV4OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateNonMaxSuppressionV4Options(
+      _fbb);
+}
+
+inline NonMaxSuppressionV5OptionsT *NonMaxSuppressionV5Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<NonMaxSuppressionV5OptionsT>(new NonMaxSuppressionV5OptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void NonMaxSuppressionV5Options::UnPackTo(NonMaxSuppressionV5OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<NonMaxSuppressionV5Options> NonMaxSuppressionV5Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV5OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateNonMaxSuppressionV5Options(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<NonMaxSuppressionV5Options> CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb, const NonMaxSuppressionV5OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const NonMaxSuppressionV5OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateNonMaxSuppressionV5Options(
+      _fbb);
+}
+
+inline ScatterNdOptionsT *ScatterNdOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ScatterNdOptionsT>(new ScatterNdOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ScatterNdOptions::UnPackTo(ScatterNdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<ScatterNdOptions> ScatterNdOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateScatterNdOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ScatterNdOptions> CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const ScatterNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ScatterNdOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateScatterNdOptions(
+      _fbb);
+}
+
+inline SelectV2OptionsT *SelectV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SelectV2OptionsT>(new SelectV2OptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SelectV2Options::UnPackTo(SelectV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SelectV2Options> SelectV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSelectV2Options(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SelectV2Options> CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb, const SelectV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SelectV2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateSelectV2Options(
+      _fbb);
+}
+
+inline DensifyOptionsT *DensifyOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<DensifyOptionsT>(new DensifyOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void DensifyOptions::UnPackTo(DensifyOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<DensifyOptions> DensifyOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateDensifyOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DensifyOptions> CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb, const DensifyOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DensifyOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateDensifyOptions(
+      _fbb);
+}
+
+inline SegmentSumOptionsT *SegmentSumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SegmentSumOptionsT>(new SegmentSumOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SegmentSumOptions::UnPackTo(SegmentSumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SegmentSumOptions> SegmentSumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSegmentSumOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SegmentSumOptions> CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb, const SegmentSumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SegmentSumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateSegmentSumOptions(
+      _fbb);
+}
+
+inline BatchMatMulOptionsT *BatchMatMulOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<BatchMatMulOptionsT>(new BatchMatMulOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void BatchMatMulOptions::UnPackTo(BatchMatMulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = adj_x(); _o->adj_x = _e; }
+  { auto _e = adj_y(); _o->adj_y = _e; }
+  { auto _e = asymmetric_quantize_inputs(); _o->asymmetric_quantize_inputs = _e; }
+}
+
+inline flatbuffers::Offset<BatchMatMulOptions> BatchMatMulOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateBatchMatMulOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BatchMatMulOptions> CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BatchMatMulOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _adj_x = _o->adj_x;
+  auto _adj_y = _o->adj_y;
+  auto _asymmetric_quantize_inputs = _o->asymmetric_quantize_inputs;
+  return tflite::CreateBatchMatMulOptions(
+      _fbb,
+      _adj_x,
+      _adj_y,
+      _asymmetric_quantize_inputs);
+}
+
+inline CumsumOptionsT *CumsumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<CumsumOptionsT>(new CumsumOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void CumsumOptions::UnPackTo(CumsumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = exclusive(); _o->exclusive = _e; }
+  { auto _e = reverse(); _o->reverse = _e; }
+}
+
+inline flatbuffers::Offset<CumsumOptions> CumsumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateCumsumOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb, const CumsumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CumsumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _exclusive = _o->exclusive;
+  auto _reverse = _o->reverse;
+  return tflite::CreateCumsumOptions(
+      _fbb,
+      _exclusive,
+      _reverse);
+}
+
+inline BroadcastToOptionsT *BroadcastToOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<BroadcastToOptionsT>(new BroadcastToOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void BroadcastToOptions::UnPackTo(BroadcastToOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<BroadcastToOptions> BroadcastToOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateBroadcastToOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BroadcastToOptions> CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BroadcastToOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateBroadcastToOptions(
+      _fbb);
+}
+
+inline Rfft2dOptionsT *Rfft2dOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<Rfft2dOptionsT>(new Rfft2dOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Rfft2dOptions::UnPackTo(Rfft2dOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<Rfft2dOptions> Rfft2dOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateRfft2dOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb, const Rfft2dOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Rfft2dOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateRfft2dOptions(
+      _fbb);
+}
+
+inline HashtableOptionsT *HashtableOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<HashtableOptionsT>(new HashtableOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void HashtableOptions::UnPackTo(HashtableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = table_id(); _o->table_id = _e; }
+  { auto _e = key_dtype(); _o->key_dtype = _e; }
+  { auto _e = value_dtype(); _o->value_dtype = _e; }
+}
+
+inline flatbuffers::Offset<HashtableOptions> HashtableOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateHashtableOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<HashtableOptions> CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HashtableOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _table_id = _o->table_id;
+  auto _key_dtype = _o->key_dtype;
+  auto _value_dtype = _o->value_dtype;
+  return tflite::CreateHashtableOptions(
+      _fbb,
+      _table_id,
+      _key_dtype,
+      _value_dtype);
+}
+
+inline HashtableFindOptionsT *HashtableFindOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<HashtableFindOptionsT>(new HashtableFindOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void HashtableFindOptions::UnPackTo(HashtableFindOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<HashtableFindOptions> HashtableFindOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateHashtableFindOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<HashtableFindOptions> CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableFindOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HashtableFindOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateHashtableFindOptions(
+      _fbb);
+}
+
+inline HashtableImportOptionsT *HashtableImportOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<HashtableImportOptionsT>(new HashtableImportOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void HashtableImportOptions::UnPackTo(HashtableImportOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<HashtableImportOptions> HashtableImportOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableImportOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateHashtableImportOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<HashtableImportOptions> CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableImportOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HashtableImportOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateHashtableImportOptions(
+      _fbb);
+}
+
+inline HashtableSizeOptionsT *HashtableSizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<HashtableSizeOptionsT>(new HashtableSizeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void HashtableSizeOptions::UnPackTo(HashtableSizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<HashtableSizeOptions> HashtableSizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateHashtableSizeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<HashtableSizeOptions> CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const HashtableSizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const HashtableSizeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateHashtableSizeOptions(
+      _fbb);
+}
+
+inline VarHandleOptionsT *VarHandleOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<VarHandleOptionsT>(new VarHandleOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void VarHandleOptions::UnPackTo(VarHandleOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = container(); if (_e) _o->container = _e->str(); }
+  { auto _e = shared_name(); if (_e) _o->shared_name = _e->str(); }
+}
+
+inline flatbuffers::Offset<VarHandleOptions> VarHandleOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const VarHandleOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateVarHandleOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<VarHandleOptions> CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb, const VarHandleOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const VarHandleOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _container = _o->container.empty() ? 0 : _fbb.CreateString(_o->container);
+  auto _shared_name = _o->shared_name.empty() ? 0 : _fbb.CreateString(_o->shared_name);
+  return tflite::CreateVarHandleOptions(
+      _fbb,
+      _container,
+      _shared_name);
+}
+
+inline ReadVariableOptionsT *ReadVariableOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ReadVariableOptionsT>(new ReadVariableOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ReadVariableOptions::UnPackTo(ReadVariableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<ReadVariableOptions> ReadVariableOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReadVariableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateReadVariableOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ReadVariableOptions> CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReadVariableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReadVariableOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateReadVariableOptions(
+      _fbb);
+}
+
+inline AssignVariableOptionsT *AssignVariableOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<AssignVariableOptionsT>(new AssignVariableOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void AssignVariableOptions::UnPackTo(AssignVariableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<AssignVariableOptions> AssignVariableOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AssignVariableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateAssignVariableOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<AssignVariableOptions> CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb, const AssignVariableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AssignVariableOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateAssignVariableOptions(
+      _fbb);
+}
+
+inline RandomOptionsT *RandomOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<RandomOptionsT>(new RandomOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void RandomOptions::UnPackTo(RandomOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = seed(); _o->seed = _e; }
+  { auto _e = seed2(); _o->seed2 = _e; }
+}
+
+inline flatbuffers::Offset<RandomOptions> RandomOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RandomOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateRandomOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<RandomOptions> CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb, const RandomOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RandomOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _seed = _o->seed;
+  auto _seed2 = _o->seed2;
+  return tflite::CreateRandomOptions(
+      _fbb,
+      _seed,
+      _seed2);
+}
+
+inline BucketizeOptionsT *BucketizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<BucketizeOptionsT>(new BucketizeOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void BucketizeOptions::UnPackTo(BucketizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = boundaries(); if (_e) { _o->boundaries.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->boundaries[_i] = _e->Get(_i); } } }
+}
+
+inline flatbuffers::Offset<BucketizeOptions> BucketizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BucketizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateBucketizeOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<BucketizeOptions> CreateBucketizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const BucketizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BucketizeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _boundaries = _o->boundaries.size() ? _fbb.CreateVector(_o->boundaries) : 0;
+  return tflite::CreateBucketizeOptions(
+      _fbb,
+      _boundaries);
+}
+
+inline GeluOptionsT *GeluOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<GeluOptionsT>(new GeluOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void GeluOptions::UnPackTo(GeluOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = approximate(); _o->approximate = _e; }
+}
+
+inline flatbuffers::Offset<GeluOptions> GeluOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GeluOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateGeluOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<GeluOptions> CreateGeluOptions(flatbuffers::FlatBufferBuilder &_fbb, const GeluOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GeluOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _approximate = _o->approximate;
+  return tflite::CreateGeluOptions(
+      _fbb,
+      _approximate);
+}
+
+inline DynamicUpdateSliceOptionsT *DynamicUpdateSliceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<DynamicUpdateSliceOptionsT>(new DynamicUpdateSliceOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void DynamicUpdateSliceOptions::UnPackTo(DynamicUpdateSliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<DynamicUpdateSliceOptions> DynamicUpdateSliceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DynamicUpdateSliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateDynamicUpdateSliceOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DynamicUpdateSliceOptions> CreateDynamicUpdateSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const DynamicUpdateSliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DynamicUpdateSliceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateDynamicUpdateSliceOptions(
+      _fbb);
+}
+
+inline UnsortedSegmentProdOptionsT *UnsortedSegmentProdOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<UnsortedSegmentProdOptionsT>(new UnsortedSegmentProdOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void UnsortedSegmentProdOptions::UnPackTo(UnsortedSegmentProdOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<UnsortedSegmentProdOptions> UnsortedSegmentProdOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentProdOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateUnsortedSegmentProdOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<UnsortedSegmentProdOptions> CreateUnsortedSegmentProdOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentProdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnsortedSegmentProdOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateUnsortedSegmentProdOptions(
+      _fbb);
+}
+
+inline UnsortedSegmentMaxOptionsT *UnsortedSegmentMaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<UnsortedSegmentMaxOptionsT>(new UnsortedSegmentMaxOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void UnsortedSegmentMaxOptions::UnPackTo(UnsortedSegmentMaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<UnsortedSegmentMaxOptions> UnsortedSegmentMaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateUnsortedSegmentMaxOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<UnsortedSegmentMaxOptions> CreateUnsortedSegmentMaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnsortedSegmentMaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateUnsortedSegmentMaxOptions(
+      _fbb);
+}
+
+inline UnsortedSegmentSumOptionsT *UnsortedSegmentSumOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<UnsortedSegmentSumOptionsT>(new UnsortedSegmentSumOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void UnsortedSegmentSumOptions::UnPackTo(UnsortedSegmentSumOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<UnsortedSegmentSumOptions> UnsortedSegmentSumOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentSumOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateUnsortedSegmentSumOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<UnsortedSegmentSumOptions> CreateUnsortedSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentSumOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnsortedSegmentSumOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateUnsortedSegmentSumOptions(
+      _fbb);
+}
+
+inline ATan2OptionsT *ATan2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ATan2OptionsT>(new ATan2OptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void ATan2Options::UnPackTo(ATan2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<ATan2Options> ATan2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ATan2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateATan2Options(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<ATan2Options> CreateATan2Options(flatbuffers::FlatBufferBuilder &_fbb, const ATan2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ATan2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateATan2Options(
+      _fbb);
+}
+
+inline UnsortedSegmentMinOptionsT *UnsortedSegmentMinOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<UnsortedSegmentMinOptionsT>(new UnsortedSegmentMinOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void UnsortedSegmentMinOptions::UnPackTo(UnsortedSegmentMinOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<UnsortedSegmentMinOptions> UnsortedSegmentMinOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMinOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateUnsortedSegmentMinOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<UnsortedSegmentMinOptions> CreateUnsortedSegmentMinOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnsortedSegmentMinOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnsortedSegmentMinOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateUnsortedSegmentMinOptions(
+      _fbb);
+}
+
+inline SignOptionsT *SignOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SignOptionsT>(new SignOptionsT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SignOptions::UnPackTo(SignOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+}
+
+inline flatbuffers::Offset<SignOptions> SignOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SignOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSignOptions(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SignOptions> CreateSignOptions(flatbuffers::FlatBufferBuilder &_fbb, const SignOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SignOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  return tflite::CreateSignOptions(
+      _fbb);
+}
+
+inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<OperatorCodeT>(new OperatorCodeT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void OperatorCode::UnPackTo(OperatorCodeT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = deprecated_builtin_code(); _o->deprecated_builtin_code = _e; }
+  { auto _e = custom_code(); if (_e) _o->custom_code = _e->str(); }
+  { auto _e = version(); _o->version = _e; }
+  { auto _e = builtin_code(); _o->builtin_code = _e; }
+}
+
+inline flatbuffers::Offset<OperatorCode> OperatorCode::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateOperatorCode(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<OperatorCode> CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OperatorCodeT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _deprecated_builtin_code = _o->deprecated_builtin_code;
+  auto _custom_code = _o->custom_code.empty() ? 0 : _fbb.CreateString(_o->custom_code);
+  auto _version = _o->version;
+  auto _builtin_code = _o->builtin_code;
+  return tflite::CreateOperatorCode(
+      _fbb,
+      _deprecated_builtin_code,
+      _custom_code,
+      _version,
+      _builtin_code);
+}
+
+inline OperatorT *Operator::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<OperatorT>(new OperatorT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Operator::UnPackTo(OperatorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = opcode_index(); _o->opcode_index = _e; }
+  { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->inputs[_i] = _e->Get(_i); } } }
+  { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->outputs[_i] = _e->Get(_i); } } }
+  { auto _e = builtin_options_type(); _o->builtin_options.type = _e; }
+  { auto _e = builtin_options(); if (_e) _o->builtin_options.value = tflite::BuiltinOptionsUnion::UnPack(_e, builtin_options_type(), _resolver); }
+  { auto _e = custom_options(); if (_e) { _o->custom_options.resize(_e->size()); std::copy(_e->begin(), _e->end(), _o->custom_options.begin()); } }
+  { auto _e = custom_options_format(); _o->custom_options_format = _e; }
+  { auto _e = mutating_variable_inputs(); if (_e) { _o->mutating_variable_inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->mutating_variable_inputs[_i] = _e->Get(_i) != 0; } } }
+  { auto _e = intermediates(); if (_e) { _o->intermediates.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->intermediates[_i] = _e->Get(_i); } } }
+}
+
+inline flatbuffers::Offset<Operator> Operator::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateOperator(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Operator> CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OperatorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _opcode_index = _o->opcode_index;
+  auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0;
+  auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0;
+  auto _builtin_options_type = _o->builtin_options.type;
+  auto _builtin_options = _o->builtin_options.Pack(_fbb);
+  auto _custom_options = _o->custom_options.size() ? _fbb.CreateVector(_o->custom_options) : 0;
+  auto _custom_options_format = _o->custom_options_format;
+  auto _mutating_variable_inputs = _o->mutating_variable_inputs.size() ? _fbb.CreateVector(_o->mutating_variable_inputs) : 0;
+  auto _intermediates = _o->intermediates.size() ? _fbb.CreateVector(_o->intermediates) : 0;
+  return tflite::CreateOperator(
+      _fbb,
+      _opcode_index,
+      _inputs,
+      _outputs,
+      _builtin_options_type,
+      _builtin_options,
+      _custom_options,
+      _custom_options_format,
+      _mutating_variable_inputs,
+      _intermediates);
+}
+
+inline SubGraphT::SubGraphT(const SubGraphT &o)
+      : inputs(o.inputs),
+        outputs(o.outputs),
+        name(o.name) {
+  tensors.reserve(o.tensors.size());
+  for (const auto &tensors_ : o.tensors) { tensors.emplace_back((tensors_) ? new tflite::TensorT(*tensors_) : nullptr); }
+  operators.reserve(o.operators.size());
+  for (const auto &operators_ : o.operators) { operators.emplace_back((operators_) ? new tflite::OperatorT(*operators_) : nullptr); }
+}
+
+inline SubGraphT &SubGraphT::operator=(SubGraphT o) FLATBUFFERS_NOEXCEPT {
+  std::swap(tensors, o.tensors);
+  std::swap(inputs, o.inputs);
+  std::swap(outputs, o.outputs);
+  std::swap(operators, o.operators);
+  std::swap(name, o.name);
+  return *this;
+}
+
+inline SubGraphT *SubGraph::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SubGraphT>(new SubGraphT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SubGraph::UnPackTo(SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = tensors(); if (_e) { _o->tensors.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->tensors[_i]) { _e->Get(_i)->UnPackTo(_o->tensors[_i].get(), _resolver); } else { _o->tensors[_i] = std::unique_ptr<tflite::TensorT>(_e->Get(_i)->UnPack(_resolver)); }; } } }
+  { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->inputs[_i] = _e->Get(_i); } } }
+  { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->outputs[_i] = _e->Get(_i); } } }
+  { auto _e = operators(); if (_e) { _o->operators.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->operators[_i]) { _e->Get(_i)->UnPackTo(_o->operators[_i].get(), _resolver); } else { _o->operators[_i] = std::unique_ptr<tflite::OperatorT>(_e->Get(_i)->UnPack(_resolver)); }; } } }
+  { auto _e = name(); if (_e) _o->name = _e->str(); }
+}
+
+inline flatbuffers::Offset<SubGraph> SubGraph::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSubGraph(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SubGraph> CreateSubGraph(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SubGraphT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _tensors = _o->tensors.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::Tensor>> (_o->tensors.size(), [](size_t i, _VectorArgs *__va) { return CreateTensor(*__va->__fbb, __va->__o->tensors[i].get(), __va->__rehasher); }, &_va ) : 0;
+  auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0;
+  auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0;
+  auto _operators = _o->operators.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::Operator>> (_o->operators.size(), [](size_t i, _VectorArgs *__va) { return CreateOperator(*__va->__fbb, __va->__o->operators[i].get(), __va->__rehasher); }, &_va ) : 0;
+  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
+  return tflite::CreateSubGraph(
+      _fbb,
+      _tensors,
+      _inputs,
+      _outputs,
+      _operators,
+      _name);
+}
+
+inline BufferT *Buffer::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<BufferT>(new BufferT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Buffer::UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = data(); if (_e) { _o->data.resize(_e->size()); std::copy(_e->begin(), _e->end(), _o->data.begin()); } }
+}
+
+inline flatbuffers::Offset<Buffer> Buffer::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BufferT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateBuffer(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Buffer> CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BufferT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  _fbb.ForceVectorAlignment(_o->data.size(), sizeof(uint8_t), 16);
+  auto _data = _o->data.size() ? _fbb.CreateVector(_o->data) : 0;
+  return tflite::CreateBuffer(
+      _fbb,
+      _data);
+}
+
+inline MetadataT *Metadata::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<MetadataT>(new MetadataT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Metadata::UnPackTo(MetadataT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = name(); if (_e) _o->name = _e->str(); }
+  { auto _e = buffer(); _o->buffer = _e; }
+}
+
+inline flatbuffers::Offset<Metadata> Metadata::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateMetadata(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Metadata> CreateMetadata(flatbuffers::FlatBufferBuilder &_fbb, const MetadataT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MetadataT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
+  auto _buffer = _o->buffer;
+  return tflite::CreateMetadata(
+      _fbb,
+      _name,
+      _buffer);
+}
+
+inline TensorMapT *TensorMap::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<TensorMapT>(new TensorMapT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void TensorMap::UnPackTo(TensorMapT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = name(); if (_e) _o->name = _e->str(); }
+  { auto _e = tensor_index(); _o->tensor_index = _e; }
+}
+
+inline flatbuffers::Offset<TensorMap> TensorMap::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateTensorMap(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<TensorMap> CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb, const TensorMapT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TensorMapT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
+  auto _tensor_index = _o->tensor_index;
+  return tflite::CreateTensorMap(
+      _fbb,
+      _name,
+      _tensor_index);
+}
+
+inline SignatureDefT::SignatureDefT(const SignatureDefT &o)
+      : signature_key(o.signature_key),
+        subgraph_index(o.subgraph_index) {
+  inputs.reserve(o.inputs.size());
+  for (const auto &inputs_ : o.inputs) { inputs.emplace_back((inputs_) ? new tflite::TensorMapT(*inputs_) : nullptr); }
+  outputs.reserve(o.outputs.size());
+  for (const auto &outputs_ : o.outputs) { outputs.emplace_back((outputs_) ? new tflite::TensorMapT(*outputs_) : nullptr); }
+}
+
+inline SignatureDefT &SignatureDefT::operator=(SignatureDefT o) FLATBUFFERS_NOEXCEPT {
+  std::swap(inputs, o.inputs);
+  std::swap(outputs, o.outputs);
+  std::swap(signature_key, o.signature_key);
+  std::swap(subgraph_index, o.subgraph_index);
+  return *this;
+}
+
+inline SignatureDefT *SignatureDef::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<SignatureDefT>(new SignatureDefT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void SignatureDef::UnPackTo(SignatureDefT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->inputs[_i]) { _e->Get(_i)->UnPackTo(_o->inputs[_i].get(), _resolver); } else { _o->inputs[_i] = std::unique_ptr<tflite::TensorMapT>(_e->Get(_i)->UnPack(_resolver)); }; } } }
+  { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->outputs[_i]) { _e->Get(_i)->UnPackTo(_o->outputs[_i].get(), _resolver); } else { _o->outputs[_i] = std::unique_ptr<tflite::TensorMapT>(_e->Get(_i)->UnPack(_resolver)); }; } } }
+  { auto _e = signature_key(); if (_e) _o->signature_key = _e->str(); }
+  { auto _e = subgraph_index(); _o->subgraph_index = _e; }
+}
+
+inline flatbuffers::Offset<SignatureDef> SignatureDef::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSignatureDef(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDef(flatbuffers::FlatBufferBuilder &_fbb, const SignatureDefT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SignatureDefT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _inputs = _o->inputs.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::TensorMap>> (_o->inputs.size(), [](size_t i, _VectorArgs *__va) { return CreateTensorMap(*__va->__fbb, __va->__o->inputs[i].get(), __va->__rehasher); }, &_va ) : 0;
+  auto _outputs = _o->outputs.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::TensorMap>> (_o->outputs.size(), [](size_t i, _VectorArgs *__va) { return CreateTensorMap(*__va->__fbb, __va->__o->outputs[i].get(), __va->__rehasher); }, &_va ) : 0;
+  auto _signature_key = _o->signature_key.empty() ? 0 : _fbb.CreateString(_o->signature_key);
+  auto _subgraph_index = _o->subgraph_index;
+  return tflite::CreateSignatureDef(
+      _fbb,
+      _inputs,
+      _outputs,
+      _signature_key,
+      _subgraph_index);
+}
+
+inline ModelT::ModelT(const ModelT &o)
+      : version(o.version),
+        description(o.description),
+        metadata_buffer(o.metadata_buffer) {
+  operator_codes.reserve(o.operator_codes.size());
+  for (const auto &operator_codes_ : o.operator_codes) { operator_codes.emplace_back((operator_codes_) ? new tflite::OperatorCodeT(*operator_codes_) : nullptr); }
+  subgraphs.reserve(o.subgraphs.size());
+  for (const auto &subgraphs_ : o.subgraphs) { subgraphs.emplace_back((subgraphs_) ? new tflite::SubGraphT(*subgraphs_) : nullptr); }
+  buffers.reserve(o.buffers.size());
+  for (const auto &buffers_ : o.buffers) { buffers.emplace_back((buffers_) ? new tflite::BufferT(*buffers_) : nullptr); }
+  metadata.reserve(o.metadata.size());
+  for (const auto &metadata_ : o.metadata) { metadata.emplace_back((metadata_) ? new tflite::MetadataT(*metadata_) : nullptr); }
+  signature_defs.reserve(o.signature_defs.size());
+  for (const auto &signature_defs_ : o.signature_defs) { signature_defs.emplace_back((signature_defs_) ? new tflite::SignatureDefT(*signature_defs_) : nullptr); }
+}
+
+inline ModelT &ModelT::operator=(ModelT o) FLATBUFFERS_NOEXCEPT {
+  std::swap(version, o.version);
+  std::swap(operator_codes, o.operator_codes);
+  std::swap(subgraphs, o.subgraphs);
+  std::swap(description, o.description);
+  std::swap(buffers, o.buffers);
+  std::swap(metadata_buffer, o.metadata_buffer);
+  std::swap(metadata, o.metadata);
+  std::swap(signature_defs, o.signature_defs);
+  return *this;
+}
+
+inline ModelT *Model::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = std::unique_ptr<ModelT>(new ModelT());
+  UnPackTo(_o.get(), _resolver);
+  return _o.release();
+}
+
+inline void Model::UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = version(); _o->version = _e; }
+  { auto _e = operator_codes(); if (_e) { _o->operator_codes.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->operator_codes[_i]) { _e->Get(_i)->UnPackTo(_o->operator_codes[_i].get(), _resolver); } else { _o->operator_codes[_i] = std::unique_ptr<tflite::OperatorCodeT>(_e->Get(_i)->UnPack(_resolver)); }; } } }
+  { auto _e = subgraphs(); if (_e) { _o->subgraphs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->subgraphs[_i]) { _e->Get(_i)->UnPackTo(_o->subgraphs[_i].get(), _resolver); } else { _o->subgraphs[_i] = std::unique_ptr<tflite::SubGraphT>(_e->Get(_i)->UnPack(_resolver)); }; } } }
+  { auto _e = description(); if (_e) _o->description = _e->str(); }
+  { auto _e = buffers(); if (_e) { _o->buffers.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->buffers[_i]) { _e->Get(_i)->UnPackTo(_o->buffers[_i].get(), _resolver); } else { _o->buffers[_i] = std::unique_ptr<tflite::BufferT>(_e->Get(_i)->UnPack(_resolver)); }; } } }
+  { auto _e = metadata_buffer(); if (_e) { _o->metadata_buffer.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->metadata_buffer[_i] = _e->Get(_i); } } }
+  { auto _e = metadata(); if (_e) { _o->metadata.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->metadata[_i]) { _e->Get(_i)->UnPackTo(_o->metadata[_i].get(), _resolver); } else { _o->metadata[_i] = std::unique_ptr<tflite::MetadataT>(_e->Get(_i)->UnPack(_resolver)); }; } } }
+  { auto _e = signature_defs(); if (_e) { _o->signature_defs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { if(_o->signature_defs[_i]) { _e->Get(_i)->UnPackTo(_o->signature_defs[_i].get(), _resolver); } else { _o->signature_defs[_i] = std::unique_ptr<tflite::SignatureDefT>(_e->Get(_i)->UnPack(_resolver)); }; } } }
+}
+
+inline flatbuffers::Offset<Model> Model::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateModel(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<Model> CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ModelT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _version = _o->version;
+  auto _operator_codes = _o->operator_codes.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::OperatorCode>> (_o->operator_codes.size(), [](size_t i, _VectorArgs *__va) { return CreateOperatorCode(*__va->__fbb, __va->__o->operator_codes[i].get(), __va->__rehasher); }, &_va ) : 0;
+  auto _subgraphs = _o->subgraphs.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::SubGraph>> (_o->subgraphs.size(), [](size_t i, _VectorArgs *__va) { return CreateSubGraph(*__va->__fbb, __va->__o->subgraphs[i].get(), __va->__rehasher); }, &_va ) : 0;
+  auto _description = _o->description.empty() ? 0 : _fbb.CreateString(_o->description);
+  auto _buffers = _o->buffers.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::Buffer>> (_o->buffers.size(), [](size_t i, _VectorArgs *__va) { return CreateBuffer(*__va->__fbb, __va->__o->buffers[i].get(), __va->__rehasher); }, &_va ) : 0;
+  auto _metadata_buffer = _o->metadata_buffer.size() ? _fbb.CreateVector(_o->metadata_buffer) : 0;
+  auto _metadata = _o->metadata.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::Metadata>> (_o->metadata.size(), [](size_t i, _VectorArgs *__va) { return CreateMetadata(*__va->__fbb, __va->__o->metadata[i].get(), __va->__rehasher); }, &_va ) : 0;
+  auto _signature_defs = _o->signature_defs.size() ? _fbb.CreateVector<flatbuffers::Offset<tflite::SignatureDef>> (_o->signature_defs.size(), [](size_t i, _VectorArgs *__va) { return CreateSignatureDef(*__va->__fbb, __va->__o->signature_defs[i].get(), __va->__rehasher); }, &_va ) : 0;
+  return tflite::CreateModel(
+      _fbb,
+      _version,
+      _operator_codes,
+      _subgraphs,
+      _description,
+      _buffers,
+      _metadata_buffer,
+      _metadata,
+      _signature_defs);
+}
+
+inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, QuantizationDetails type) {
+  switch (type) {
+    case QuantizationDetails_NONE: {
+      return true;
+    }
+    case QuantizationDetails_CustomQuantization: {
+      auto ptr = reinterpret_cast<const tflite::CustomQuantization *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    default: return true;
+  }
+}
+
+inline bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
+  if (!values || !types) return !values && !types;
+  if (values->size() != types->size()) return false;
+  for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
+    if (!VerifyQuantizationDetails(
+        verifier,  values->Get(i), types->GetEnum<QuantizationDetails>(i))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline void *QuantizationDetailsUnion::UnPack(const void *obj, QuantizationDetails type, const flatbuffers::resolver_function_t *resolver) {
+  (void)resolver;
+  switch (type) {
+    case QuantizationDetails_CustomQuantization: {
+      auto ptr = reinterpret_cast<const tflite::CustomQuantization *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    default: return nullptr;
+  }
+}
+
+inline flatbuffers::Offset<void> QuantizationDetailsUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher) const {
+  (void)_rehasher;
+  switch (type) {
+    case QuantizationDetails_CustomQuantization: {
+      auto ptr = reinterpret_cast<const tflite::CustomQuantizationT *>(value);
+      return CreateCustomQuantization(_fbb, ptr, _rehasher).Union();
+    }
+    default: return 0;
+  }
+}
+
+inline QuantizationDetailsUnion::QuantizationDetailsUnion(const QuantizationDetailsUnion &u) : type(u.type), value(nullptr) {
+  switch (type) {
+    case QuantizationDetails_CustomQuantization: {
+      value = new tflite::CustomQuantizationT(*reinterpret_cast<tflite::CustomQuantizationT *>(u.value));
+      break;
+    }
+    default:
+      break;
+  }
+}
+
+inline void QuantizationDetailsUnion::Reset() {
+  switch (type) {
+    case QuantizationDetails_CustomQuantization: {
+      auto ptr = reinterpret_cast<tflite::CustomQuantizationT *>(value);
+      delete ptr;
+      break;
+    }
+    default: break;
+  }
+  value = nullptr;
+  type = QuantizationDetails_NONE;
+}
+
+inline bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void *obj, SparseIndexVector type) {
+  switch (type) {
+    case SparseIndexVector_NONE: {
+      return true;
+    }
+    case SparseIndexVector_Int32Vector: {
+      auto ptr = reinterpret_cast<const tflite::Int32Vector *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case SparseIndexVector_Uint16Vector: {
+      auto ptr = reinterpret_cast<const tflite::Uint16Vector *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case SparseIndexVector_Uint8Vector: {
+      auto ptr = reinterpret_cast<const tflite::Uint8Vector *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    default: return true;
+  }
+}
+
+inline bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
+  if (!values || !types) return !values && !types;
+  if (values->size() != types->size()) return false;
+  for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
+    if (!VerifySparseIndexVector(
+        verifier,  values->Get(i), types->GetEnum<SparseIndexVector>(i))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline void *SparseIndexVectorUnion::UnPack(const void *obj, SparseIndexVector type, const flatbuffers::resolver_function_t *resolver) {
+  (void)resolver;
+  switch (type) {
+    case SparseIndexVector_Int32Vector: {
+      auto ptr = reinterpret_cast<const tflite::Int32Vector *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case SparseIndexVector_Uint16Vector: {
+      auto ptr = reinterpret_cast<const tflite::Uint16Vector *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case SparseIndexVector_Uint8Vector: {
+      auto ptr = reinterpret_cast<const tflite::Uint8Vector *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    default: return nullptr;
+  }
+}
+
+inline flatbuffers::Offset<void> SparseIndexVectorUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher) const {
+  (void)_rehasher;
+  switch (type) {
+    case SparseIndexVector_Int32Vector: {
+      auto ptr = reinterpret_cast<const tflite::Int32VectorT *>(value);
+      return CreateInt32Vector(_fbb, ptr, _rehasher).Union();
+    }
+    case SparseIndexVector_Uint16Vector: {
+      auto ptr = reinterpret_cast<const tflite::Uint16VectorT *>(value);
+      return CreateUint16Vector(_fbb, ptr, _rehasher).Union();
+    }
+    case SparseIndexVector_Uint8Vector: {
+      auto ptr = reinterpret_cast<const tflite::Uint8VectorT *>(value);
+      return CreateUint8Vector(_fbb, ptr, _rehasher).Union();
+    }
+    default: return 0;
+  }
+}
+
+inline SparseIndexVectorUnion::SparseIndexVectorUnion(const SparseIndexVectorUnion &u) : type(u.type), value(nullptr) {
+  switch (type) {
+    case SparseIndexVector_Int32Vector: {
+      value = new tflite::Int32VectorT(*reinterpret_cast<tflite::Int32VectorT *>(u.value));
+      break;
+    }
+    case SparseIndexVector_Uint16Vector: {
+      value = new tflite::Uint16VectorT(*reinterpret_cast<tflite::Uint16VectorT *>(u.value));
+      break;
+    }
+    case SparseIndexVector_Uint8Vector: {
+      value = new tflite::Uint8VectorT(*reinterpret_cast<tflite::Uint8VectorT *>(u.value));
+      break;
+    }
+    default:
+      break;
+  }
+}
+
+inline void SparseIndexVectorUnion::Reset() {
+  switch (type) {
+    case SparseIndexVector_Int32Vector: {
+      auto ptr = reinterpret_cast<tflite::Int32VectorT *>(value);
+      delete ptr;
+      break;
+    }
+    case SparseIndexVector_Uint16Vector: {
+      auto ptr = reinterpret_cast<tflite::Uint16VectorT *>(value);
+      delete ptr;
+      break;
+    }
+    case SparseIndexVector_Uint8Vector: {
+      auto ptr = reinterpret_cast<tflite::Uint8VectorT *>(value);
+      delete ptr;
+      break;
+    }
+    default: break;
+  }
+  value = nullptr;
+  type = SparseIndexVector_NONE;
+}
+
+inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type) {
+  switch (type) {
+    case BuiltinOptions_NONE: {
+      return true;
+    }
+    case BuiltinOptions_Conv2DOptions: {
+      auto ptr = reinterpret_cast<const tflite::Conv2DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions: {
+      auto ptr = reinterpret_cast<const tflite::DepthwiseConv2DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions: {
+      auto ptr = reinterpret_cast<const tflite::ConcatEmbeddingsOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LSHProjectionOptions: {
+      auto ptr = reinterpret_cast<const tflite::LSHProjectionOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Pool2DOptions: {
+      auto ptr = reinterpret_cast<const tflite::Pool2DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SVDFOptions: {
+      auto ptr = reinterpret_cast<const tflite::SVDFOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RNNOptions: {
+      auto ptr = reinterpret_cast<const tflite::RNNOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_FullyConnectedOptions: {
+      auto ptr = reinterpret_cast<const tflite::FullyConnectedOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SoftmaxOptions: {
+      auto ptr = reinterpret_cast<const tflite::SoftmaxOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ConcatenationOptions: {
+      auto ptr = reinterpret_cast<const tflite::ConcatenationOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_AddOptions: {
+      auto ptr = reinterpret_cast<const tflite::AddOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_L2NormOptions: {
+      auto ptr = reinterpret_cast<const tflite::L2NormOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions: {
+      auto ptr = reinterpret_cast<const tflite::LocalResponseNormalizationOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LSTMOptions: {
+      auto ptr = reinterpret_cast<const tflite::LSTMOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ResizeBilinearOptions: {
+      auto ptr = reinterpret_cast<const tflite::ResizeBilinearOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CallOptions: {
+      auto ptr = reinterpret_cast<const tflite::CallOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReshapeOptions: {
+      auto ptr = reinterpret_cast<const tflite::ReshapeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SkipGramOptions: {
+      auto ptr = reinterpret_cast<const tflite::SkipGramOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SpaceToDepthOptions: {
+      auto ptr = reinterpret_cast<const tflite::SpaceToDepthOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions: {
+      auto ptr = reinterpret_cast<const tflite::EmbeddingLookupSparseOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_MulOptions: {
+      auto ptr = reinterpret_cast<const tflite::MulOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_PadOptions: {
+      auto ptr = reinterpret_cast<const tflite::PadOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_GatherOptions: {
+      auto ptr = reinterpret_cast<const tflite::GatherOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      auto ptr = reinterpret_cast<const tflite::BatchToSpaceNDOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions: {
+      auto ptr = reinterpret_cast<const tflite::SpaceToBatchNDOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_TransposeOptions: {
+      auto ptr = reinterpret_cast<const tflite::TransposeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReducerOptions: {
+      auto ptr = reinterpret_cast<const tflite::ReducerOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SubOptions: {
+      auto ptr = reinterpret_cast<const tflite::SubOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DivOptions: {
+      auto ptr = reinterpret_cast<const tflite::DivOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SqueezeOptions: {
+      auto ptr = reinterpret_cast<const tflite::SqueezeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const tflite::SequenceRNNOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_StridedSliceOptions: {
+      auto ptr = reinterpret_cast<const tflite::StridedSliceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ExpOptions: {
+      auto ptr = reinterpret_cast<const tflite::ExpOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_TopKV2Options: {
+      auto ptr = reinterpret_cast<const tflite::TopKV2Options *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SplitOptions: {
+      auto ptr = reinterpret_cast<const tflite::SplitOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LogSoftmaxOptions: {
+      auto ptr = reinterpret_cast<const tflite::LogSoftmaxOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CastOptions: {
+      auto ptr = reinterpret_cast<const tflite::CastOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DequantizeOptions: {
+      auto ptr = reinterpret_cast<const tflite::DequantizeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_MaximumMinimumOptions: {
+      auto ptr = reinterpret_cast<const tflite::MaximumMinimumOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ArgMaxOptions: {
+      auto ptr = reinterpret_cast<const tflite::ArgMaxOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LessOptions: {
+      auto ptr = reinterpret_cast<const tflite::LessOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_NegOptions: {
+      auto ptr = reinterpret_cast<const tflite::NegOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_PadV2Options: {
+      auto ptr = reinterpret_cast<const tflite::PadV2Options *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_GreaterOptions: {
+      auto ptr = reinterpret_cast<const tflite::GreaterOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_GreaterEqualOptions: {
+      auto ptr = reinterpret_cast<const tflite::GreaterEqualOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LessEqualOptions: {
+      auto ptr = reinterpret_cast<const tflite::LessEqualOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SelectOptions: {
+      auto ptr = reinterpret_cast<const tflite::SelectOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SliceOptions: {
+      auto ptr = reinterpret_cast<const tflite::SliceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_TransposeConvOptions: {
+      auto ptr = reinterpret_cast<const tflite::TransposeConvOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SparseToDenseOptions: {
+      auto ptr = reinterpret_cast<const tflite::SparseToDenseOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_TileOptions: {
+      auto ptr = reinterpret_cast<const tflite::TileOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ExpandDimsOptions: {
+      auto ptr = reinterpret_cast<const tflite::ExpandDimsOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_EqualOptions: {
+      auto ptr = reinterpret_cast<const tflite::EqualOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_NotEqualOptions: {
+      auto ptr = reinterpret_cast<const tflite::NotEqualOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ShapeOptions: {
+      auto ptr = reinterpret_cast<const tflite::ShapeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_PowOptions: {
+      auto ptr = reinterpret_cast<const tflite::PowOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ArgMinOptions: {
+      auto ptr = reinterpret_cast<const tflite::ArgMinOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_FakeQuantOptions: {
+      auto ptr = reinterpret_cast<const tflite::FakeQuantOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_PackOptions: {
+      auto ptr = reinterpret_cast<const tflite::PackOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LogicalOrOptions: {
+      auto ptr = reinterpret_cast<const tflite::LogicalOrOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_OneHotOptions: {
+      auto ptr = reinterpret_cast<const tflite::OneHotOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LogicalAndOptions: {
+      auto ptr = reinterpret_cast<const tflite::LogicalAndOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LogicalNotOptions: {
+      auto ptr = reinterpret_cast<const tflite::LogicalNotOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnpackOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnpackOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_FloorDivOptions: {
+      auto ptr = reinterpret_cast<const tflite::FloorDivOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SquareOptions: {
+      auto ptr = reinterpret_cast<const tflite::SquareOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ZerosLikeOptions: {
+      auto ptr = reinterpret_cast<const tflite::ZerosLikeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_FillOptions: {
+      auto ptr = reinterpret_cast<const tflite::FillOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const tflite::BidirectionalSequenceLSTMOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const tflite::BidirectionalSequenceRNNOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnidirectionalSequenceLSTMOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_FloorModOptions: {
+      auto ptr = reinterpret_cast<const tflite::FloorModOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RangeOptions: {
+      auto ptr = reinterpret_cast<const tflite::RangeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ResizeNearestNeighborOptions: {
+      auto ptr = reinterpret_cast<const tflite::ResizeNearestNeighborOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_LeakyReluOptions: {
+      auto ptr = reinterpret_cast<const tflite::LeakyReluOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SquaredDifferenceOptions: {
+      auto ptr = reinterpret_cast<const tflite::SquaredDifferenceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_MirrorPadOptions: {
+      auto ptr = reinterpret_cast<const tflite::MirrorPadOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_AbsOptions: {
+      auto ptr = reinterpret_cast<const tflite::AbsOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SplitVOptions: {
+      auto ptr = reinterpret_cast<const tflite::SplitVOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UniqueOptions: {
+      auto ptr = reinterpret_cast<const tflite::UniqueOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReverseV2Options: {
+      auto ptr = reinterpret_cast<const tflite::ReverseV2Options *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_AddNOptions: {
+      auto ptr = reinterpret_cast<const tflite::AddNOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_GatherNdOptions: {
+      auto ptr = reinterpret_cast<const tflite::GatherNdOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CosOptions: {
+      auto ptr = reinterpret_cast<const tflite::CosOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_WhereOptions: {
+      auto ptr = reinterpret_cast<const tflite::WhereOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RankOptions: {
+      auto ptr = reinterpret_cast<const tflite::RankOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReverseSequenceOptions: {
+      auto ptr = reinterpret_cast<const tflite::ReverseSequenceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_MatrixDiagOptions: {
+      auto ptr = reinterpret_cast<const tflite::MatrixDiagOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_QuantizeOptions: {
+      auto ptr = reinterpret_cast<const tflite::QuantizeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_MatrixSetDiagOptions: {
+      auto ptr = reinterpret_cast<const tflite::MatrixSetDiagOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HardSwishOptions: {
+      auto ptr = reinterpret_cast<const tflite::HardSwishOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_IfOptions: {
+      auto ptr = reinterpret_cast<const tflite::IfOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_WhileOptions: {
+      auto ptr = reinterpret_cast<const tflite::WhileOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DepthToSpaceOptions: {
+      auto ptr = reinterpret_cast<const tflite::DepthToSpaceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_NonMaxSuppressionV4Options: {
+      auto ptr = reinterpret_cast<const tflite::NonMaxSuppressionV4Options *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_NonMaxSuppressionV5Options: {
+      auto ptr = reinterpret_cast<const tflite::NonMaxSuppressionV5Options *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ScatterNdOptions: {
+      auto ptr = reinterpret_cast<const tflite::ScatterNdOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SelectV2Options: {
+      auto ptr = reinterpret_cast<const tflite::SelectV2Options *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DensifyOptions: {
+      auto ptr = reinterpret_cast<const tflite::DensifyOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SegmentSumOptions: {
+      auto ptr = reinterpret_cast<const tflite::SegmentSumOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BatchMatMulOptions: {
+      auto ptr = reinterpret_cast<const tflite::BatchMatMulOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CumsumOptions: {
+      auto ptr = reinterpret_cast<const tflite::CumsumOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CallOnceOptions: {
+      auto ptr = reinterpret_cast<const tflite::CallOnceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BroadcastToOptions: {
+      auto ptr = reinterpret_cast<const tflite::BroadcastToOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Rfft2dOptions: {
+      auto ptr = reinterpret_cast<const tflite::Rfft2dOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Conv3DOptions: {
+      auto ptr = reinterpret_cast<const tflite::Conv3DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableOptions: {
+      auto ptr = reinterpret_cast<const tflite::HashtableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableFindOptions: {
+      auto ptr = reinterpret_cast<const tflite::HashtableFindOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableImportOptions: {
+      auto ptr = reinterpret_cast<const tflite::HashtableImportOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableSizeOptions: {
+      auto ptr = reinterpret_cast<const tflite::HashtableSizeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_VarHandleOptions: {
+      auto ptr = reinterpret_cast<const tflite::VarHandleOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReadVariableOptions: {
+      auto ptr = reinterpret_cast<const tflite::ReadVariableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_AssignVariableOptions: {
+      auto ptr = reinterpret_cast<const tflite::AssignVariableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RandomOptions: {
+      auto ptr = reinterpret_cast<const tflite::RandomOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BucketizeOptions: {
+      auto ptr = reinterpret_cast<const tflite::BucketizeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_GeluOptions: {
+      auto ptr = reinterpret_cast<const tflite::GeluOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_DynamicUpdateSliceOptions: {
+      auto ptr = reinterpret_cast<const tflite::DynamicUpdateSliceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnsortedSegmentProdOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnsortedSegmentProdOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnsortedSegmentMaxOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnsortedSegmentMaxOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnsortedSegmentMinOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnsortedSegmentMinOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_UnsortedSegmentSumOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnsortedSegmentSumOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ATan2Options: {
+      auto ptr = reinterpret_cast<const tflite::ATan2Options *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_SignOptions: {
+      auto ptr = reinterpret_cast<const tflite::SignOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    default: return true;
+  }
+}
+
+inline bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types) {
+  if (!values || !types) return !values && !types;
+  if (values->size() != types->size()) return false;
+  for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) {
+    if (!VerifyBuiltinOptions(
+        verifier,  values->Get(i), types->GetEnum<BuiltinOptions>(i))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, const flatbuffers::resolver_function_t *resolver) {
+  (void)resolver;
+  switch (type) {
+    case BuiltinOptions_Conv2DOptions: {
+      auto ptr = reinterpret_cast<const tflite::Conv2DOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions: {
+      auto ptr = reinterpret_cast<const tflite::DepthwiseConv2DOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions: {
+      auto ptr = reinterpret_cast<const tflite::ConcatEmbeddingsOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LSHProjectionOptions: {
+      auto ptr = reinterpret_cast<const tflite::LSHProjectionOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_Pool2DOptions: {
+      auto ptr = reinterpret_cast<const tflite::Pool2DOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SVDFOptions: {
+      auto ptr = reinterpret_cast<const tflite::SVDFOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_RNNOptions: {
+      auto ptr = reinterpret_cast<const tflite::RNNOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_FullyConnectedOptions: {
+      auto ptr = reinterpret_cast<const tflite::FullyConnectedOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SoftmaxOptions: {
+      auto ptr = reinterpret_cast<const tflite::SoftmaxOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ConcatenationOptions: {
+      auto ptr = reinterpret_cast<const tflite::ConcatenationOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_AddOptions: {
+      auto ptr = reinterpret_cast<const tflite::AddOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_L2NormOptions: {
+      auto ptr = reinterpret_cast<const tflite::L2NormOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions: {
+      auto ptr = reinterpret_cast<const tflite::LocalResponseNormalizationOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LSTMOptions: {
+      auto ptr = reinterpret_cast<const tflite::LSTMOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ResizeBilinearOptions: {
+      auto ptr = reinterpret_cast<const tflite::ResizeBilinearOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_CallOptions: {
+      auto ptr = reinterpret_cast<const tflite::CallOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ReshapeOptions: {
+      auto ptr = reinterpret_cast<const tflite::ReshapeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SkipGramOptions: {
+      auto ptr = reinterpret_cast<const tflite::SkipGramOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SpaceToDepthOptions: {
+      auto ptr = reinterpret_cast<const tflite::SpaceToDepthOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions: {
+      auto ptr = reinterpret_cast<const tflite::EmbeddingLookupSparseOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_MulOptions: {
+      auto ptr = reinterpret_cast<const tflite::MulOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_PadOptions: {
+      auto ptr = reinterpret_cast<const tflite::PadOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_GatherOptions: {
+      auto ptr = reinterpret_cast<const tflite::GatherOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      auto ptr = reinterpret_cast<const tflite::BatchToSpaceNDOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions: {
+      auto ptr = reinterpret_cast<const tflite::SpaceToBatchNDOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_TransposeOptions: {
+      auto ptr = reinterpret_cast<const tflite::TransposeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ReducerOptions: {
+      auto ptr = reinterpret_cast<const tflite::ReducerOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SubOptions: {
+      auto ptr = reinterpret_cast<const tflite::SubOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_DivOptions: {
+      auto ptr = reinterpret_cast<const tflite::DivOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SqueezeOptions: {
+      auto ptr = reinterpret_cast<const tflite::SqueezeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const tflite::SequenceRNNOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_StridedSliceOptions: {
+      auto ptr = reinterpret_cast<const tflite::StridedSliceOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ExpOptions: {
+      auto ptr = reinterpret_cast<const tflite::ExpOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_TopKV2Options: {
+      auto ptr = reinterpret_cast<const tflite::TopKV2Options *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SplitOptions: {
+      auto ptr = reinterpret_cast<const tflite::SplitOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LogSoftmaxOptions: {
+      auto ptr = reinterpret_cast<const tflite::LogSoftmaxOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_CastOptions: {
+      auto ptr = reinterpret_cast<const tflite::CastOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_DequantizeOptions: {
+      auto ptr = reinterpret_cast<const tflite::DequantizeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_MaximumMinimumOptions: {
+      auto ptr = reinterpret_cast<const tflite::MaximumMinimumOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ArgMaxOptions: {
+      auto ptr = reinterpret_cast<const tflite::ArgMaxOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LessOptions: {
+      auto ptr = reinterpret_cast<const tflite::LessOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_NegOptions: {
+      auto ptr = reinterpret_cast<const tflite::NegOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_PadV2Options: {
+      auto ptr = reinterpret_cast<const tflite::PadV2Options *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_GreaterOptions: {
+      auto ptr = reinterpret_cast<const tflite::GreaterOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_GreaterEqualOptions: {
+      auto ptr = reinterpret_cast<const tflite::GreaterEqualOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LessEqualOptions: {
+      auto ptr = reinterpret_cast<const tflite::LessEqualOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SelectOptions: {
+      auto ptr = reinterpret_cast<const tflite::SelectOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SliceOptions: {
+      auto ptr = reinterpret_cast<const tflite::SliceOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_TransposeConvOptions: {
+      auto ptr = reinterpret_cast<const tflite::TransposeConvOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SparseToDenseOptions: {
+      auto ptr = reinterpret_cast<const tflite::SparseToDenseOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_TileOptions: {
+      auto ptr = reinterpret_cast<const tflite::TileOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ExpandDimsOptions: {
+      auto ptr = reinterpret_cast<const tflite::ExpandDimsOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_EqualOptions: {
+      auto ptr = reinterpret_cast<const tflite::EqualOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_NotEqualOptions: {
+      auto ptr = reinterpret_cast<const tflite::NotEqualOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ShapeOptions: {
+      auto ptr = reinterpret_cast<const tflite::ShapeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_PowOptions: {
+      auto ptr = reinterpret_cast<const tflite::PowOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ArgMinOptions: {
+      auto ptr = reinterpret_cast<const tflite::ArgMinOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_FakeQuantOptions: {
+      auto ptr = reinterpret_cast<const tflite::FakeQuantOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_PackOptions: {
+      auto ptr = reinterpret_cast<const tflite::PackOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LogicalOrOptions: {
+      auto ptr = reinterpret_cast<const tflite::LogicalOrOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_OneHotOptions: {
+      auto ptr = reinterpret_cast<const tflite::OneHotOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LogicalAndOptions: {
+      auto ptr = reinterpret_cast<const tflite::LogicalAndOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LogicalNotOptions: {
+      auto ptr = reinterpret_cast<const tflite::LogicalNotOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_UnpackOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnpackOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_FloorDivOptions: {
+      auto ptr = reinterpret_cast<const tflite::FloorDivOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SquareOptions: {
+      auto ptr = reinterpret_cast<const tflite::SquareOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ZerosLikeOptions: {
+      auto ptr = reinterpret_cast<const tflite::ZerosLikeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_FillOptions: {
+      auto ptr = reinterpret_cast<const tflite::FillOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const tflite::BidirectionalSequenceLSTMOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const tflite::BidirectionalSequenceRNNOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnidirectionalSequenceLSTMOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_FloorModOptions: {
+      auto ptr = reinterpret_cast<const tflite::FloorModOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_RangeOptions: {
+      auto ptr = reinterpret_cast<const tflite::RangeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ResizeNearestNeighborOptions: {
+      auto ptr = reinterpret_cast<const tflite::ResizeNearestNeighborOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_LeakyReluOptions: {
+      auto ptr = reinterpret_cast<const tflite::LeakyReluOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SquaredDifferenceOptions: {
+      auto ptr = reinterpret_cast<const tflite::SquaredDifferenceOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_MirrorPadOptions: {
+      auto ptr = reinterpret_cast<const tflite::MirrorPadOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_AbsOptions: {
+      auto ptr = reinterpret_cast<const tflite::AbsOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SplitVOptions: {
+      auto ptr = reinterpret_cast<const tflite::SplitVOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_UniqueOptions: {
+      auto ptr = reinterpret_cast<const tflite::UniqueOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ReverseV2Options: {
+      auto ptr = reinterpret_cast<const tflite::ReverseV2Options *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_AddNOptions: {
+      auto ptr = reinterpret_cast<const tflite::AddNOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_GatherNdOptions: {
+      auto ptr = reinterpret_cast<const tflite::GatherNdOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_CosOptions: {
+      auto ptr = reinterpret_cast<const tflite::CosOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_WhereOptions: {
+      auto ptr = reinterpret_cast<const tflite::WhereOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_RankOptions: {
+      auto ptr = reinterpret_cast<const tflite::RankOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ReverseSequenceOptions: {
+      auto ptr = reinterpret_cast<const tflite::ReverseSequenceOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_MatrixDiagOptions: {
+      auto ptr = reinterpret_cast<const tflite::MatrixDiagOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_QuantizeOptions: {
+      auto ptr = reinterpret_cast<const tflite::QuantizeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_MatrixSetDiagOptions: {
+      auto ptr = reinterpret_cast<const tflite::MatrixSetDiagOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_HardSwishOptions: {
+      auto ptr = reinterpret_cast<const tflite::HardSwishOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_IfOptions: {
+      auto ptr = reinterpret_cast<const tflite::IfOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_WhileOptions: {
+      auto ptr = reinterpret_cast<const tflite::WhileOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_DepthToSpaceOptions: {
+      auto ptr = reinterpret_cast<const tflite::DepthToSpaceOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_NonMaxSuppressionV4Options: {
+      auto ptr = reinterpret_cast<const tflite::NonMaxSuppressionV4Options *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_NonMaxSuppressionV5Options: {
+      auto ptr = reinterpret_cast<const tflite::NonMaxSuppressionV5Options *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ScatterNdOptions: {
+      auto ptr = reinterpret_cast<const tflite::ScatterNdOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SelectV2Options: {
+      auto ptr = reinterpret_cast<const tflite::SelectV2Options *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_DensifyOptions: {
+      auto ptr = reinterpret_cast<const tflite::DensifyOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SegmentSumOptions: {
+      auto ptr = reinterpret_cast<const tflite::SegmentSumOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BatchMatMulOptions: {
+      auto ptr = reinterpret_cast<const tflite::BatchMatMulOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_CumsumOptions: {
+      auto ptr = reinterpret_cast<const tflite::CumsumOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_CallOnceOptions: {
+      auto ptr = reinterpret_cast<const tflite::CallOnceOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BroadcastToOptions: {
+      auto ptr = reinterpret_cast<const tflite::BroadcastToOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_Rfft2dOptions: {
+      auto ptr = reinterpret_cast<const tflite::Rfft2dOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_Conv3DOptions: {
+      auto ptr = reinterpret_cast<const tflite::Conv3DOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_HashtableOptions: {
+      auto ptr = reinterpret_cast<const tflite::HashtableOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_HashtableFindOptions: {
+      auto ptr = reinterpret_cast<const tflite::HashtableFindOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_HashtableImportOptions: {
+      auto ptr = reinterpret_cast<const tflite::HashtableImportOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_HashtableSizeOptions: {
+      auto ptr = reinterpret_cast<const tflite::HashtableSizeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_VarHandleOptions: {
+      auto ptr = reinterpret_cast<const tflite::VarHandleOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ReadVariableOptions: {
+      auto ptr = reinterpret_cast<const tflite::ReadVariableOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_AssignVariableOptions: {
+      auto ptr = reinterpret_cast<const tflite::AssignVariableOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_RandomOptions: {
+      auto ptr = reinterpret_cast<const tflite::RandomOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_BucketizeOptions: {
+      auto ptr = reinterpret_cast<const tflite::BucketizeOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_GeluOptions: {
+      auto ptr = reinterpret_cast<const tflite::GeluOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_DynamicUpdateSliceOptions: {
+      auto ptr = reinterpret_cast<const tflite::DynamicUpdateSliceOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_UnsortedSegmentProdOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnsortedSegmentProdOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_UnsortedSegmentMaxOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnsortedSegmentMaxOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_UnsortedSegmentMinOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnsortedSegmentMinOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_UnsortedSegmentSumOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnsortedSegmentSumOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_ATan2Options: {
+      auto ptr = reinterpret_cast<const tflite::ATan2Options *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    case BuiltinOptions_SignOptions: {
+      auto ptr = reinterpret_cast<const tflite::SignOptions *>(obj);
+      return ptr->UnPack(resolver);
+    }
+    default: return nullptr;
+  }
+}
+
+inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher) const {
+  (void)_rehasher;
+  switch (type) {
+    case BuiltinOptions_Conv2DOptions: {
+      auto ptr = reinterpret_cast<const tflite::Conv2DOptionsT *>(value);
+      return CreateConv2DOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions: {
+      auto ptr = reinterpret_cast<const tflite::DepthwiseConv2DOptionsT *>(value);
+      return CreateDepthwiseConv2DOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions: {
+      auto ptr = reinterpret_cast<const tflite::ConcatEmbeddingsOptionsT *>(value);
+      return CreateConcatEmbeddingsOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LSHProjectionOptions: {
+      auto ptr = reinterpret_cast<const tflite::LSHProjectionOptionsT *>(value);
+      return CreateLSHProjectionOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_Pool2DOptions: {
+      auto ptr = reinterpret_cast<const tflite::Pool2DOptionsT *>(value);
+      return CreatePool2DOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SVDFOptions: {
+      auto ptr = reinterpret_cast<const tflite::SVDFOptionsT *>(value);
+      return CreateSVDFOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_RNNOptions: {
+      auto ptr = reinterpret_cast<const tflite::RNNOptionsT *>(value);
+      return CreateRNNOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_FullyConnectedOptions: {
+      auto ptr = reinterpret_cast<const tflite::FullyConnectedOptionsT *>(value);
+      return CreateFullyConnectedOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SoftmaxOptions: {
+      auto ptr = reinterpret_cast<const tflite::SoftmaxOptionsT *>(value);
+      return CreateSoftmaxOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ConcatenationOptions: {
+      auto ptr = reinterpret_cast<const tflite::ConcatenationOptionsT *>(value);
+      return CreateConcatenationOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_AddOptions: {
+      auto ptr = reinterpret_cast<const tflite::AddOptionsT *>(value);
+      return CreateAddOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_L2NormOptions: {
+      auto ptr = reinterpret_cast<const tflite::L2NormOptionsT *>(value);
+      return CreateL2NormOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions: {
+      auto ptr = reinterpret_cast<const tflite::LocalResponseNormalizationOptionsT *>(value);
+      return CreateLocalResponseNormalizationOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LSTMOptions: {
+      auto ptr = reinterpret_cast<const tflite::LSTMOptionsT *>(value);
+      return CreateLSTMOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ResizeBilinearOptions: {
+      auto ptr = reinterpret_cast<const tflite::ResizeBilinearOptionsT *>(value);
+      return CreateResizeBilinearOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_CallOptions: {
+      auto ptr = reinterpret_cast<const tflite::CallOptionsT *>(value);
+      return CreateCallOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ReshapeOptions: {
+      auto ptr = reinterpret_cast<const tflite::ReshapeOptionsT *>(value);
+      return CreateReshapeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SkipGramOptions: {
+      auto ptr = reinterpret_cast<const tflite::SkipGramOptionsT *>(value);
+      return CreateSkipGramOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SpaceToDepthOptions: {
+      auto ptr = reinterpret_cast<const tflite::SpaceToDepthOptionsT *>(value);
+      return CreateSpaceToDepthOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions: {
+      auto ptr = reinterpret_cast<const tflite::EmbeddingLookupSparseOptionsT *>(value);
+      return CreateEmbeddingLookupSparseOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_MulOptions: {
+      auto ptr = reinterpret_cast<const tflite::MulOptionsT *>(value);
+      return CreateMulOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_PadOptions: {
+      auto ptr = reinterpret_cast<const tflite::PadOptionsT *>(value);
+      return CreatePadOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_GatherOptions: {
+      auto ptr = reinterpret_cast<const tflite::GatherOptionsT *>(value);
+      return CreateGatherOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      auto ptr = reinterpret_cast<const tflite::BatchToSpaceNDOptionsT *>(value);
+      return CreateBatchToSpaceNDOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions: {
+      auto ptr = reinterpret_cast<const tflite::SpaceToBatchNDOptionsT *>(value);
+      return CreateSpaceToBatchNDOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_TransposeOptions: {
+      auto ptr = reinterpret_cast<const tflite::TransposeOptionsT *>(value);
+      return CreateTransposeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ReducerOptions: {
+      auto ptr = reinterpret_cast<const tflite::ReducerOptionsT *>(value);
+      return CreateReducerOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SubOptions: {
+      auto ptr = reinterpret_cast<const tflite::SubOptionsT *>(value);
+      return CreateSubOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_DivOptions: {
+      auto ptr = reinterpret_cast<const tflite::DivOptionsT *>(value);
+      return CreateDivOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SqueezeOptions: {
+      auto ptr = reinterpret_cast<const tflite::SqueezeOptionsT *>(value);
+      return CreateSqueezeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const tflite::SequenceRNNOptionsT *>(value);
+      return CreateSequenceRNNOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_StridedSliceOptions: {
+      auto ptr = reinterpret_cast<const tflite::StridedSliceOptionsT *>(value);
+      return CreateStridedSliceOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ExpOptions: {
+      auto ptr = reinterpret_cast<const tflite::ExpOptionsT *>(value);
+      return CreateExpOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_TopKV2Options: {
+      auto ptr = reinterpret_cast<const tflite::TopKV2OptionsT *>(value);
+      return CreateTopKV2Options(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SplitOptions: {
+      auto ptr = reinterpret_cast<const tflite::SplitOptionsT *>(value);
+      return CreateSplitOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LogSoftmaxOptions: {
+      auto ptr = reinterpret_cast<const tflite::LogSoftmaxOptionsT *>(value);
+      return CreateLogSoftmaxOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_CastOptions: {
+      auto ptr = reinterpret_cast<const tflite::CastOptionsT *>(value);
+      return CreateCastOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_DequantizeOptions: {
+      auto ptr = reinterpret_cast<const tflite::DequantizeOptionsT *>(value);
+      return CreateDequantizeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_MaximumMinimumOptions: {
+      auto ptr = reinterpret_cast<const tflite::MaximumMinimumOptionsT *>(value);
+      return CreateMaximumMinimumOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ArgMaxOptions: {
+      auto ptr = reinterpret_cast<const tflite::ArgMaxOptionsT *>(value);
+      return CreateArgMaxOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LessOptions: {
+      auto ptr = reinterpret_cast<const tflite::LessOptionsT *>(value);
+      return CreateLessOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_NegOptions: {
+      auto ptr = reinterpret_cast<const tflite::NegOptionsT *>(value);
+      return CreateNegOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_PadV2Options: {
+      auto ptr = reinterpret_cast<const tflite::PadV2OptionsT *>(value);
+      return CreatePadV2Options(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_GreaterOptions: {
+      auto ptr = reinterpret_cast<const tflite::GreaterOptionsT *>(value);
+      return CreateGreaterOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_GreaterEqualOptions: {
+      auto ptr = reinterpret_cast<const tflite::GreaterEqualOptionsT *>(value);
+      return CreateGreaterEqualOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LessEqualOptions: {
+      auto ptr = reinterpret_cast<const tflite::LessEqualOptionsT *>(value);
+      return CreateLessEqualOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SelectOptions: {
+      auto ptr = reinterpret_cast<const tflite::SelectOptionsT *>(value);
+      return CreateSelectOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SliceOptions: {
+      auto ptr = reinterpret_cast<const tflite::SliceOptionsT *>(value);
+      return CreateSliceOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_TransposeConvOptions: {
+      auto ptr = reinterpret_cast<const tflite::TransposeConvOptionsT *>(value);
+      return CreateTransposeConvOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SparseToDenseOptions: {
+      auto ptr = reinterpret_cast<const tflite::SparseToDenseOptionsT *>(value);
+      return CreateSparseToDenseOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_TileOptions: {
+      auto ptr = reinterpret_cast<const tflite::TileOptionsT *>(value);
+      return CreateTileOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ExpandDimsOptions: {
+      auto ptr = reinterpret_cast<const tflite::ExpandDimsOptionsT *>(value);
+      return CreateExpandDimsOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_EqualOptions: {
+      auto ptr = reinterpret_cast<const tflite::EqualOptionsT *>(value);
+      return CreateEqualOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_NotEqualOptions: {
+      auto ptr = reinterpret_cast<const tflite::NotEqualOptionsT *>(value);
+      return CreateNotEqualOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ShapeOptions: {
+      auto ptr = reinterpret_cast<const tflite::ShapeOptionsT *>(value);
+      return CreateShapeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_PowOptions: {
+      auto ptr = reinterpret_cast<const tflite::PowOptionsT *>(value);
+      return CreatePowOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ArgMinOptions: {
+      auto ptr = reinterpret_cast<const tflite::ArgMinOptionsT *>(value);
+      return CreateArgMinOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_FakeQuantOptions: {
+      auto ptr = reinterpret_cast<const tflite::FakeQuantOptionsT *>(value);
+      return CreateFakeQuantOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_PackOptions: {
+      auto ptr = reinterpret_cast<const tflite::PackOptionsT *>(value);
+      return CreatePackOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LogicalOrOptions: {
+      auto ptr = reinterpret_cast<const tflite::LogicalOrOptionsT *>(value);
+      return CreateLogicalOrOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_OneHotOptions: {
+      auto ptr = reinterpret_cast<const tflite::OneHotOptionsT *>(value);
+      return CreateOneHotOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LogicalAndOptions: {
+      auto ptr = reinterpret_cast<const tflite::LogicalAndOptionsT *>(value);
+      return CreateLogicalAndOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LogicalNotOptions: {
+      auto ptr = reinterpret_cast<const tflite::LogicalNotOptionsT *>(value);
+      return CreateLogicalNotOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_UnpackOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnpackOptionsT *>(value);
+      return CreateUnpackOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_FloorDivOptions: {
+      auto ptr = reinterpret_cast<const tflite::FloorDivOptionsT *>(value);
+      return CreateFloorDivOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SquareOptions: {
+      auto ptr = reinterpret_cast<const tflite::SquareOptionsT *>(value);
+      return CreateSquareOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ZerosLikeOptions: {
+      auto ptr = reinterpret_cast<const tflite::ZerosLikeOptionsT *>(value);
+      return CreateZerosLikeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_FillOptions: {
+      auto ptr = reinterpret_cast<const tflite::FillOptionsT *>(value);
+      return CreateFillOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const tflite::BidirectionalSequenceLSTMOptionsT *>(value);
+      return CreateBidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      auto ptr = reinterpret_cast<const tflite::BidirectionalSequenceRNNOptionsT *>(value);
+      return CreateBidirectionalSequenceRNNOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnidirectionalSequenceLSTMOptionsT *>(value);
+      return CreateUnidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_FloorModOptions: {
+      auto ptr = reinterpret_cast<const tflite::FloorModOptionsT *>(value);
+      return CreateFloorModOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_RangeOptions: {
+      auto ptr = reinterpret_cast<const tflite::RangeOptionsT *>(value);
+      return CreateRangeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ResizeNearestNeighborOptions: {
+      auto ptr = reinterpret_cast<const tflite::ResizeNearestNeighborOptionsT *>(value);
+      return CreateResizeNearestNeighborOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_LeakyReluOptions: {
+      auto ptr = reinterpret_cast<const tflite::LeakyReluOptionsT *>(value);
+      return CreateLeakyReluOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SquaredDifferenceOptions: {
+      auto ptr = reinterpret_cast<const tflite::SquaredDifferenceOptionsT *>(value);
+      return CreateSquaredDifferenceOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_MirrorPadOptions: {
+      auto ptr = reinterpret_cast<const tflite::MirrorPadOptionsT *>(value);
+      return CreateMirrorPadOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_AbsOptions: {
+      auto ptr = reinterpret_cast<const tflite::AbsOptionsT *>(value);
+      return CreateAbsOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SplitVOptions: {
+      auto ptr = reinterpret_cast<const tflite::SplitVOptionsT *>(value);
+      return CreateSplitVOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_UniqueOptions: {
+      auto ptr = reinterpret_cast<const tflite::UniqueOptionsT *>(value);
+      return CreateUniqueOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ReverseV2Options: {
+      auto ptr = reinterpret_cast<const tflite::ReverseV2OptionsT *>(value);
+      return CreateReverseV2Options(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_AddNOptions: {
+      auto ptr = reinterpret_cast<const tflite::AddNOptionsT *>(value);
+      return CreateAddNOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_GatherNdOptions: {
+      auto ptr = reinterpret_cast<const tflite::GatherNdOptionsT *>(value);
+      return CreateGatherNdOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_CosOptions: {
+      auto ptr = reinterpret_cast<const tflite::CosOptionsT *>(value);
+      return CreateCosOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_WhereOptions: {
+      auto ptr = reinterpret_cast<const tflite::WhereOptionsT *>(value);
+      return CreateWhereOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_RankOptions: {
+      auto ptr = reinterpret_cast<const tflite::RankOptionsT *>(value);
+      return CreateRankOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ReverseSequenceOptions: {
+      auto ptr = reinterpret_cast<const tflite::ReverseSequenceOptionsT *>(value);
+      return CreateReverseSequenceOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_MatrixDiagOptions: {
+      auto ptr = reinterpret_cast<const tflite::MatrixDiagOptionsT *>(value);
+      return CreateMatrixDiagOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_QuantizeOptions: {
+      auto ptr = reinterpret_cast<const tflite::QuantizeOptionsT *>(value);
+      return CreateQuantizeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_MatrixSetDiagOptions: {
+      auto ptr = reinterpret_cast<const tflite::MatrixSetDiagOptionsT *>(value);
+      return CreateMatrixSetDiagOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_HardSwishOptions: {
+      auto ptr = reinterpret_cast<const tflite::HardSwishOptionsT *>(value);
+      return CreateHardSwishOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_IfOptions: {
+      auto ptr = reinterpret_cast<const tflite::IfOptionsT *>(value);
+      return CreateIfOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_WhileOptions: {
+      auto ptr = reinterpret_cast<const tflite::WhileOptionsT *>(value);
+      return CreateWhileOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_DepthToSpaceOptions: {
+      auto ptr = reinterpret_cast<const tflite::DepthToSpaceOptionsT *>(value);
+      return CreateDepthToSpaceOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_NonMaxSuppressionV4Options: {
+      auto ptr = reinterpret_cast<const tflite::NonMaxSuppressionV4OptionsT *>(value);
+      return CreateNonMaxSuppressionV4Options(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_NonMaxSuppressionV5Options: {
+      auto ptr = reinterpret_cast<const tflite::NonMaxSuppressionV5OptionsT *>(value);
+      return CreateNonMaxSuppressionV5Options(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ScatterNdOptions: {
+      auto ptr = reinterpret_cast<const tflite::ScatterNdOptionsT *>(value);
+      return CreateScatterNdOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SelectV2Options: {
+      auto ptr = reinterpret_cast<const tflite::SelectV2OptionsT *>(value);
+      return CreateSelectV2Options(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_DensifyOptions: {
+      auto ptr = reinterpret_cast<const tflite::DensifyOptionsT *>(value);
+      return CreateDensifyOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SegmentSumOptions: {
+      auto ptr = reinterpret_cast<const tflite::SegmentSumOptionsT *>(value);
+      return CreateSegmentSumOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BatchMatMulOptions: {
+      auto ptr = reinterpret_cast<const tflite::BatchMatMulOptionsT *>(value);
+      return CreateBatchMatMulOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_CumsumOptions: {
+      auto ptr = reinterpret_cast<const tflite::CumsumOptionsT *>(value);
+      return CreateCumsumOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_CallOnceOptions: {
+      auto ptr = reinterpret_cast<const tflite::CallOnceOptionsT *>(value);
+      return CreateCallOnceOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BroadcastToOptions: {
+      auto ptr = reinterpret_cast<const tflite::BroadcastToOptionsT *>(value);
+      return CreateBroadcastToOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_Rfft2dOptions: {
+      auto ptr = reinterpret_cast<const tflite::Rfft2dOptionsT *>(value);
+      return CreateRfft2dOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_Conv3DOptions: {
+      auto ptr = reinterpret_cast<const tflite::Conv3DOptionsT *>(value);
+      return CreateConv3DOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_HashtableOptions: {
+      auto ptr = reinterpret_cast<const tflite::HashtableOptionsT *>(value);
+      return CreateHashtableOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_HashtableFindOptions: {
+      auto ptr = reinterpret_cast<const tflite::HashtableFindOptionsT *>(value);
+      return CreateHashtableFindOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_HashtableImportOptions: {
+      auto ptr = reinterpret_cast<const tflite::HashtableImportOptionsT *>(value);
+      return CreateHashtableImportOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_HashtableSizeOptions: {
+      auto ptr = reinterpret_cast<const tflite::HashtableSizeOptionsT *>(value);
+      return CreateHashtableSizeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_VarHandleOptions: {
+      auto ptr = reinterpret_cast<const tflite::VarHandleOptionsT *>(value);
+      return CreateVarHandleOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ReadVariableOptions: {
+      auto ptr = reinterpret_cast<const tflite::ReadVariableOptionsT *>(value);
+      return CreateReadVariableOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_AssignVariableOptions: {
+      auto ptr = reinterpret_cast<const tflite::AssignVariableOptionsT *>(value);
+      return CreateAssignVariableOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_RandomOptions: {
+      auto ptr = reinterpret_cast<const tflite::RandomOptionsT *>(value);
+      return CreateRandomOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_BucketizeOptions: {
+      auto ptr = reinterpret_cast<const tflite::BucketizeOptionsT *>(value);
+      return CreateBucketizeOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_GeluOptions: {
+      auto ptr = reinterpret_cast<const tflite::GeluOptionsT *>(value);
+      return CreateGeluOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_DynamicUpdateSliceOptions: {
+      auto ptr = reinterpret_cast<const tflite::DynamicUpdateSliceOptionsT *>(value);
+      return CreateDynamicUpdateSliceOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_UnsortedSegmentProdOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnsortedSegmentProdOptionsT *>(value);
+      return CreateUnsortedSegmentProdOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_UnsortedSegmentMaxOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnsortedSegmentMaxOptionsT *>(value);
+      return CreateUnsortedSegmentMaxOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_UnsortedSegmentMinOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnsortedSegmentMinOptionsT *>(value);
+      return CreateUnsortedSegmentMinOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_UnsortedSegmentSumOptions: {
+      auto ptr = reinterpret_cast<const tflite::UnsortedSegmentSumOptionsT *>(value);
+      return CreateUnsortedSegmentSumOptions(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_ATan2Options: {
+      auto ptr = reinterpret_cast<const tflite::ATan2OptionsT *>(value);
+      return CreateATan2Options(_fbb, ptr, _rehasher).Union();
+    }
+    case BuiltinOptions_SignOptions: {
+      auto ptr = reinterpret_cast<const tflite::SignOptionsT *>(value);
+      return CreateSignOptions(_fbb, ptr, _rehasher).Union();
+    }
+    default: return 0;
+  }
+}
+
+inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) : type(u.type), value(nullptr) {
+  switch (type) {
+    case BuiltinOptions_Conv2DOptions: {
+      value = new tflite::Conv2DOptionsT(*reinterpret_cast<tflite::Conv2DOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions: {
+      value = new tflite::DepthwiseConv2DOptionsT(*reinterpret_cast<tflite::DepthwiseConv2DOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions: {
+      value = new tflite::ConcatEmbeddingsOptionsT(*reinterpret_cast<tflite::ConcatEmbeddingsOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LSHProjectionOptions: {
+      value = new tflite::LSHProjectionOptionsT(*reinterpret_cast<tflite::LSHProjectionOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_Pool2DOptions: {
+      value = new tflite::Pool2DOptionsT(*reinterpret_cast<tflite::Pool2DOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SVDFOptions: {
+      value = new tflite::SVDFOptionsT(*reinterpret_cast<tflite::SVDFOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_RNNOptions: {
+      value = new tflite::RNNOptionsT(*reinterpret_cast<tflite::RNNOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_FullyConnectedOptions: {
+      value = new tflite::FullyConnectedOptionsT(*reinterpret_cast<tflite::FullyConnectedOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SoftmaxOptions: {
+      value = new tflite::SoftmaxOptionsT(*reinterpret_cast<tflite::SoftmaxOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ConcatenationOptions: {
+      value = new tflite::ConcatenationOptionsT(*reinterpret_cast<tflite::ConcatenationOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_AddOptions: {
+      value = new tflite::AddOptionsT(*reinterpret_cast<tflite::AddOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_L2NormOptions: {
+      value = new tflite::L2NormOptionsT(*reinterpret_cast<tflite::L2NormOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions: {
+      value = new tflite::LocalResponseNormalizationOptionsT(*reinterpret_cast<tflite::LocalResponseNormalizationOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LSTMOptions: {
+      value = new tflite::LSTMOptionsT(*reinterpret_cast<tflite::LSTMOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ResizeBilinearOptions: {
+      value = new tflite::ResizeBilinearOptionsT(*reinterpret_cast<tflite::ResizeBilinearOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_CallOptions: {
+      value = new tflite::CallOptionsT(*reinterpret_cast<tflite::CallOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ReshapeOptions: {
+      value = new tflite::ReshapeOptionsT(*reinterpret_cast<tflite::ReshapeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SkipGramOptions: {
+      value = new tflite::SkipGramOptionsT(*reinterpret_cast<tflite::SkipGramOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SpaceToDepthOptions: {
+      value = new tflite::SpaceToDepthOptionsT(*reinterpret_cast<tflite::SpaceToDepthOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions: {
+      value = new tflite::EmbeddingLookupSparseOptionsT(*reinterpret_cast<tflite::EmbeddingLookupSparseOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_MulOptions: {
+      value = new tflite::MulOptionsT(*reinterpret_cast<tflite::MulOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_PadOptions: {
+      value = new tflite::PadOptionsT(*reinterpret_cast<tflite::PadOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_GatherOptions: {
+      value = new tflite::GatherOptionsT(*reinterpret_cast<tflite::GatherOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      value = new tflite::BatchToSpaceNDOptionsT(*reinterpret_cast<tflite::BatchToSpaceNDOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions: {
+      value = new tflite::SpaceToBatchNDOptionsT(*reinterpret_cast<tflite::SpaceToBatchNDOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_TransposeOptions: {
+      value = new tflite::TransposeOptionsT(*reinterpret_cast<tflite::TransposeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ReducerOptions: {
+      value = new tflite::ReducerOptionsT(*reinterpret_cast<tflite::ReducerOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SubOptions: {
+      value = new tflite::SubOptionsT(*reinterpret_cast<tflite::SubOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_DivOptions: {
+      value = new tflite::DivOptionsT(*reinterpret_cast<tflite::DivOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SqueezeOptions: {
+      value = new tflite::SqueezeOptionsT(*reinterpret_cast<tflite::SqueezeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SequenceRNNOptions: {
+      value = new tflite::SequenceRNNOptionsT(*reinterpret_cast<tflite::SequenceRNNOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_StridedSliceOptions: {
+      value = new tflite::StridedSliceOptionsT(*reinterpret_cast<tflite::StridedSliceOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ExpOptions: {
+      value = new tflite::ExpOptionsT(*reinterpret_cast<tflite::ExpOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_TopKV2Options: {
+      value = new tflite::TopKV2OptionsT(*reinterpret_cast<tflite::TopKV2OptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SplitOptions: {
+      value = new tflite::SplitOptionsT(*reinterpret_cast<tflite::SplitOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LogSoftmaxOptions: {
+      value = new tflite::LogSoftmaxOptionsT(*reinterpret_cast<tflite::LogSoftmaxOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_CastOptions: {
+      value = new tflite::CastOptionsT(*reinterpret_cast<tflite::CastOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_DequantizeOptions: {
+      value = new tflite::DequantizeOptionsT(*reinterpret_cast<tflite::DequantizeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_MaximumMinimumOptions: {
+      value = new tflite::MaximumMinimumOptionsT(*reinterpret_cast<tflite::MaximumMinimumOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ArgMaxOptions: {
+      value = new tflite::ArgMaxOptionsT(*reinterpret_cast<tflite::ArgMaxOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LessOptions: {
+      value = new tflite::LessOptionsT(*reinterpret_cast<tflite::LessOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_NegOptions: {
+      value = new tflite::NegOptionsT(*reinterpret_cast<tflite::NegOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_PadV2Options: {
+      value = new tflite::PadV2OptionsT(*reinterpret_cast<tflite::PadV2OptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_GreaterOptions: {
+      value = new tflite::GreaterOptionsT(*reinterpret_cast<tflite::GreaterOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_GreaterEqualOptions: {
+      value = new tflite::GreaterEqualOptionsT(*reinterpret_cast<tflite::GreaterEqualOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LessEqualOptions: {
+      value = new tflite::LessEqualOptionsT(*reinterpret_cast<tflite::LessEqualOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SelectOptions: {
+      value = new tflite::SelectOptionsT(*reinterpret_cast<tflite::SelectOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SliceOptions: {
+      value = new tflite::SliceOptionsT(*reinterpret_cast<tflite::SliceOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_TransposeConvOptions: {
+      value = new tflite::TransposeConvOptionsT(*reinterpret_cast<tflite::TransposeConvOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SparseToDenseOptions: {
+      value = new tflite::SparseToDenseOptionsT(*reinterpret_cast<tflite::SparseToDenseOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_TileOptions: {
+      value = new tflite::TileOptionsT(*reinterpret_cast<tflite::TileOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ExpandDimsOptions: {
+      value = new tflite::ExpandDimsOptionsT(*reinterpret_cast<tflite::ExpandDimsOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_EqualOptions: {
+      value = new tflite::EqualOptionsT(*reinterpret_cast<tflite::EqualOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_NotEqualOptions: {
+      value = new tflite::NotEqualOptionsT(*reinterpret_cast<tflite::NotEqualOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ShapeOptions: {
+      value = new tflite::ShapeOptionsT(*reinterpret_cast<tflite::ShapeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_PowOptions: {
+      value = new tflite::PowOptionsT(*reinterpret_cast<tflite::PowOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ArgMinOptions: {
+      value = new tflite::ArgMinOptionsT(*reinterpret_cast<tflite::ArgMinOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_FakeQuantOptions: {
+      value = new tflite::FakeQuantOptionsT(*reinterpret_cast<tflite::FakeQuantOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_PackOptions: {
+      value = new tflite::PackOptionsT(*reinterpret_cast<tflite::PackOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LogicalOrOptions: {
+      value = new tflite::LogicalOrOptionsT(*reinterpret_cast<tflite::LogicalOrOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_OneHotOptions: {
+      value = new tflite::OneHotOptionsT(*reinterpret_cast<tflite::OneHotOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LogicalAndOptions: {
+      value = new tflite::LogicalAndOptionsT(*reinterpret_cast<tflite::LogicalAndOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LogicalNotOptions: {
+      value = new tflite::LogicalNotOptionsT(*reinterpret_cast<tflite::LogicalNotOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_UnpackOptions: {
+      value = new tflite::UnpackOptionsT(*reinterpret_cast<tflite::UnpackOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_FloorDivOptions: {
+      value = new tflite::FloorDivOptionsT(*reinterpret_cast<tflite::FloorDivOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SquareOptions: {
+      value = new tflite::SquareOptionsT(*reinterpret_cast<tflite::SquareOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ZerosLikeOptions: {
+      value = new tflite::ZerosLikeOptionsT(*reinterpret_cast<tflite::ZerosLikeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_FillOptions: {
+      value = new tflite::FillOptionsT(*reinterpret_cast<tflite::FillOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      value = new tflite::BidirectionalSequenceLSTMOptionsT(*reinterpret_cast<tflite::BidirectionalSequenceLSTMOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      value = new tflite::BidirectionalSequenceRNNOptionsT(*reinterpret_cast<tflite::BidirectionalSequenceRNNOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      value = new tflite::UnidirectionalSequenceLSTMOptionsT(*reinterpret_cast<tflite::UnidirectionalSequenceLSTMOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_FloorModOptions: {
+      value = new tflite::FloorModOptionsT(*reinterpret_cast<tflite::FloorModOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_RangeOptions: {
+      value = new tflite::RangeOptionsT(*reinterpret_cast<tflite::RangeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ResizeNearestNeighborOptions: {
+      value = new tflite::ResizeNearestNeighborOptionsT(*reinterpret_cast<tflite::ResizeNearestNeighborOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_LeakyReluOptions: {
+      value = new tflite::LeakyReluOptionsT(*reinterpret_cast<tflite::LeakyReluOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SquaredDifferenceOptions: {
+      value = new tflite::SquaredDifferenceOptionsT(*reinterpret_cast<tflite::SquaredDifferenceOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_MirrorPadOptions: {
+      value = new tflite::MirrorPadOptionsT(*reinterpret_cast<tflite::MirrorPadOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_AbsOptions: {
+      value = new tflite::AbsOptionsT(*reinterpret_cast<tflite::AbsOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SplitVOptions: {
+      value = new tflite::SplitVOptionsT(*reinterpret_cast<tflite::SplitVOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_UniqueOptions: {
+      value = new tflite::UniqueOptionsT(*reinterpret_cast<tflite::UniqueOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ReverseV2Options: {
+      value = new tflite::ReverseV2OptionsT(*reinterpret_cast<tflite::ReverseV2OptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_AddNOptions: {
+      value = new tflite::AddNOptionsT(*reinterpret_cast<tflite::AddNOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_GatherNdOptions: {
+      value = new tflite::GatherNdOptionsT(*reinterpret_cast<tflite::GatherNdOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_CosOptions: {
+      value = new tflite::CosOptionsT(*reinterpret_cast<tflite::CosOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_WhereOptions: {
+      value = new tflite::WhereOptionsT(*reinterpret_cast<tflite::WhereOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_RankOptions: {
+      value = new tflite::RankOptionsT(*reinterpret_cast<tflite::RankOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ReverseSequenceOptions: {
+      value = new tflite::ReverseSequenceOptionsT(*reinterpret_cast<tflite::ReverseSequenceOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_MatrixDiagOptions: {
+      value = new tflite::MatrixDiagOptionsT(*reinterpret_cast<tflite::MatrixDiagOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_QuantizeOptions: {
+      value = new tflite::QuantizeOptionsT(*reinterpret_cast<tflite::QuantizeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_MatrixSetDiagOptions: {
+      value = new tflite::MatrixSetDiagOptionsT(*reinterpret_cast<tflite::MatrixSetDiagOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_HardSwishOptions: {
+      value = new tflite::HardSwishOptionsT(*reinterpret_cast<tflite::HardSwishOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_IfOptions: {
+      value = new tflite::IfOptionsT(*reinterpret_cast<tflite::IfOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_WhileOptions: {
+      value = new tflite::WhileOptionsT(*reinterpret_cast<tflite::WhileOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_DepthToSpaceOptions: {
+      value = new tflite::DepthToSpaceOptionsT(*reinterpret_cast<tflite::DepthToSpaceOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_NonMaxSuppressionV4Options: {
+      value = new tflite::NonMaxSuppressionV4OptionsT(*reinterpret_cast<tflite::NonMaxSuppressionV4OptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_NonMaxSuppressionV5Options: {
+      value = new tflite::NonMaxSuppressionV5OptionsT(*reinterpret_cast<tflite::NonMaxSuppressionV5OptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ScatterNdOptions: {
+      value = new tflite::ScatterNdOptionsT(*reinterpret_cast<tflite::ScatterNdOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SelectV2Options: {
+      value = new tflite::SelectV2OptionsT(*reinterpret_cast<tflite::SelectV2OptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_DensifyOptions: {
+      value = new tflite::DensifyOptionsT(*reinterpret_cast<tflite::DensifyOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SegmentSumOptions: {
+      value = new tflite::SegmentSumOptionsT(*reinterpret_cast<tflite::SegmentSumOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BatchMatMulOptions: {
+      value = new tflite::BatchMatMulOptionsT(*reinterpret_cast<tflite::BatchMatMulOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_CumsumOptions: {
+      value = new tflite::CumsumOptionsT(*reinterpret_cast<tflite::CumsumOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_CallOnceOptions: {
+      value = new tflite::CallOnceOptionsT(*reinterpret_cast<tflite::CallOnceOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BroadcastToOptions: {
+      value = new tflite::BroadcastToOptionsT(*reinterpret_cast<tflite::BroadcastToOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_Rfft2dOptions: {
+      value = new tflite::Rfft2dOptionsT(*reinterpret_cast<tflite::Rfft2dOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_Conv3DOptions: {
+      value = new tflite::Conv3DOptionsT(*reinterpret_cast<tflite::Conv3DOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_HashtableOptions: {
+      value = new tflite::HashtableOptionsT(*reinterpret_cast<tflite::HashtableOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_HashtableFindOptions: {
+      value = new tflite::HashtableFindOptionsT(*reinterpret_cast<tflite::HashtableFindOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_HashtableImportOptions: {
+      value = new tflite::HashtableImportOptionsT(*reinterpret_cast<tflite::HashtableImportOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_HashtableSizeOptions: {
+      value = new tflite::HashtableSizeOptionsT(*reinterpret_cast<tflite::HashtableSizeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_VarHandleOptions: {
+      value = new tflite::VarHandleOptionsT(*reinterpret_cast<tflite::VarHandleOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ReadVariableOptions: {
+      value = new tflite::ReadVariableOptionsT(*reinterpret_cast<tflite::ReadVariableOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_AssignVariableOptions: {
+      value = new tflite::AssignVariableOptionsT(*reinterpret_cast<tflite::AssignVariableOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_RandomOptions: {
+      value = new tflite::RandomOptionsT(*reinterpret_cast<tflite::RandomOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_BucketizeOptions: {
+      value = new tflite::BucketizeOptionsT(*reinterpret_cast<tflite::BucketizeOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_GeluOptions: {
+      value = new tflite::GeluOptionsT(*reinterpret_cast<tflite::GeluOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_DynamicUpdateSliceOptions: {
+      value = new tflite::DynamicUpdateSliceOptionsT(*reinterpret_cast<tflite::DynamicUpdateSliceOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_UnsortedSegmentProdOptions: {
+      value = new tflite::UnsortedSegmentProdOptionsT(*reinterpret_cast<tflite::UnsortedSegmentProdOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_UnsortedSegmentMaxOptions: {
+      value = new tflite::UnsortedSegmentMaxOptionsT(*reinterpret_cast<tflite::UnsortedSegmentMaxOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_UnsortedSegmentMinOptions: {
+      value = new tflite::UnsortedSegmentMinOptionsT(*reinterpret_cast<tflite::UnsortedSegmentMinOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_UnsortedSegmentSumOptions: {
+      value = new tflite::UnsortedSegmentSumOptionsT(*reinterpret_cast<tflite::UnsortedSegmentSumOptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_ATan2Options: {
+      value = new tflite::ATan2OptionsT(*reinterpret_cast<tflite::ATan2OptionsT *>(u.value));
+      break;
+    }
+    case BuiltinOptions_SignOptions: {
+      value = new tflite::SignOptionsT(*reinterpret_cast<tflite::SignOptionsT *>(u.value));
+      break;
+    }
+    default:
+      break;
+  }
+}
+
+inline void BuiltinOptionsUnion::Reset() {
+  switch (type) {
+    case BuiltinOptions_Conv2DOptions: {
+      auto ptr = reinterpret_cast<tflite::Conv2DOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_DepthwiseConv2DOptions: {
+      auto ptr = reinterpret_cast<tflite::DepthwiseConv2DOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ConcatEmbeddingsOptions: {
+      auto ptr = reinterpret_cast<tflite::ConcatEmbeddingsOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LSHProjectionOptions: {
+      auto ptr = reinterpret_cast<tflite::LSHProjectionOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_Pool2DOptions: {
+      auto ptr = reinterpret_cast<tflite::Pool2DOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SVDFOptions: {
+      auto ptr = reinterpret_cast<tflite::SVDFOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_RNNOptions: {
+      auto ptr = reinterpret_cast<tflite::RNNOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_FullyConnectedOptions: {
+      auto ptr = reinterpret_cast<tflite::FullyConnectedOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SoftmaxOptions: {
+      auto ptr = reinterpret_cast<tflite::SoftmaxOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ConcatenationOptions: {
+      auto ptr = reinterpret_cast<tflite::ConcatenationOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_AddOptions: {
+      auto ptr = reinterpret_cast<tflite::AddOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_L2NormOptions: {
+      auto ptr = reinterpret_cast<tflite::L2NormOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LocalResponseNormalizationOptions: {
+      auto ptr = reinterpret_cast<tflite::LocalResponseNormalizationOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LSTMOptions: {
+      auto ptr = reinterpret_cast<tflite::LSTMOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ResizeBilinearOptions: {
+      auto ptr = reinterpret_cast<tflite::ResizeBilinearOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_CallOptions: {
+      auto ptr = reinterpret_cast<tflite::CallOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ReshapeOptions: {
+      auto ptr = reinterpret_cast<tflite::ReshapeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SkipGramOptions: {
+      auto ptr = reinterpret_cast<tflite::SkipGramOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SpaceToDepthOptions: {
+      auto ptr = reinterpret_cast<tflite::SpaceToDepthOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_EmbeddingLookupSparseOptions: {
+      auto ptr = reinterpret_cast<tflite::EmbeddingLookupSparseOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_MulOptions: {
+      auto ptr = reinterpret_cast<tflite::MulOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_PadOptions: {
+      auto ptr = reinterpret_cast<tflite::PadOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_GatherOptions: {
+      auto ptr = reinterpret_cast<tflite::GatherOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BatchToSpaceNDOptions: {
+      auto ptr = reinterpret_cast<tflite::BatchToSpaceNDOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SpaceToBatchNDOptions: {
+      auto ptr = reinterpret_cast<tflite::SpaceToBatchNDOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_TransposeOptions: {
+      auto ptr = reinterpret_cast<tflite::TransposeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ReducerOptions: {
+      auto ptr = reinterpret_cast<tflite::ReducerOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SubOptions: {
+      auto ptr = reinterpret_cast<tflite::SubOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_DivOptions: {
+      auto ptr = reinterpret_cast<tflite::DivOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SqueezeOptions: {
+      auto ptr = reinterpret_cast<tflite::SqueezeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SequenceRNNOptions: {
+      auto ptr = reinterpret_cast<tflite::SequenceRNNOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_StridedSliceOptions: {
+      auto ptr = reinterpret_cast<tflite::StridedSliceOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ExpOptions: {
+      auto ptr = reinterpret_cast<tflite::ExpOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_TopKV2Options: {
+      auto ptr = reinterpret_cast<tflite::TopKV2OptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SplitOptions: {
+      auto ptr = reinterpret_cast<tflite::SplitOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LogSoftmaxOptions: {
+      auto ptr = reinterpret_cast<tflite::LogSoftmaxOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_CastOptions: {
+      auto ptr = reinterpret_cast<tflite::CastOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_DequantizeOptions: {
+      auto ptr = reinterpret_cast<tflite::DequantizeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_MaximumMinimumOptions: {
+      auto ptr = reinterpret_cast<tflite::MaximumMinimumOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ArgMaxOptions: {
+      auto ptr = reinterpret_cast<tflite::ArgMaxOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LessOptions: {
+      auto ptr = reinterpret_cast<tflite::LessOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_NegOptions: {
+      auto ptr = reinterpret_cast<tflite::NegOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_PadV2Options: {
+      auto ptr = reinterpret_cast<tflite::PadV2OptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_GreaterOptions: {
+      auto ptr = reinterpret_cast<tflite::GreaterOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_GreaterEqualOptions: {
+      auto ptr = reinterpret_cast<tflite::GreaterEqualOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LessEqualOptions: {
+      auto ptr = reinterpret_cast<tflite::LessEqualOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SelectOptions: {
+      auto ptr = reinterpret_cast<tflite::SelectOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SliceOptions: {
+      auto ptr = reinterpret_cast<tflite::SliceOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_TransposeConvOptions: {
+      auto ptr = reinterpret_cast<tflite::TransposeConvOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SparseToDenseOptions: {
+      auto ptr = reinterpret_cast<tflite::SparseToDenseOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_TileOptions: {
+      auto ptr = reinterpret_cast<tflite::TileOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ExpandDimsOptions: {
+      auto ptr = reinterpret_cast<tflite::ExpandDimsOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_EqualOptions: {
+      auto ptr = reinterpret_cast<tflite::EqualOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_NotEqualOptions: {
+      auto ptr = reinterpret_cast<tflite::NotEqualOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ShapeOptions: {
+      auto ptr = reinterpret_cast<tflite::ShapeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_PowOptions: {
+      auto ptr = reinterpret_cast<tflite::PowOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ArgMinOptions: {
+      auto ptr = reinterpret_cast<tflite::ArgMinOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_FakeQuantOptions: {
+      auto ptr = reinterpret_cast<tflite::FakeQuantOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_PackOptions: {
+      auto ptr = reinterpret_cast<tflite::PackOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LogicalOrOptions: {
+      auto ptr = reinterpret_cast<tflite::LogicalOrOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_OneHotOptions: {
+      auto ptr = reinterpret_cast<tflite::OneHotOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LogicalAndOptions: {
+      auto ptr = reinterpret_cast<tflite::LogicalAndOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LogicalNotOptions: {
+      auto ptr = reinterpret_cast<tflite::LogicalNotOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_UnpackOptions: {
+      auto ptr = reinterpret_cast<tflite::UnpackOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_FloorDivOptions: {
+      auto ptr = reinterpret_cast<tflite::FloorDivOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SquareOptions: {
+      auto ptr = reinterpret_cast<tflite::SquareOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ZerosLikeOptions: {
+      auto ptr = reinterpret_cast<tflite::ZerosLikeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_FillOptions: {
+      auto ptr = reinterpret_cast<tflite::FillOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<tflite::BidirectionalSequenceLSTMOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BidirectionalSequenceRNNOptions: {
+      auto ptr = reinterpret_cast<tflite::BidirectionalSequenceRNNOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_UnidirectionalSequenceLSTMOptions: {
+      auto ptr = reinterpret_cast<tflite::UnidirectionalSequenceLSTMOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_FloorModOptions: {
+      auto ptr = reinterpret_cast<tflite::FloorModOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_RangeOptions: {
+      auto ptr = reinterpret_cast<tflite::RangeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ResizeNearestNeighborOptions: {
+      auto ptr = reinterpret_cast<tflite::ResizeNearestNeighborOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_LeakyReluOptions: {
+      auto ptr = reinterpret_cast<tflite::LeakyReluOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SquaredDifferenceOptions: {
+      auto ptr = reinterpret_cast<tflite::SquaredDifferenceOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_MirrorPadOptions: {
+      auto ptr = reinterpret_cast<tflite::MirrorPadOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_AbsOptions: {
+      auto ptr = reinterpret_cast<tflite::AbsOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SplitVOptions: {
+      auto ptr = reinterpret_cast<tflite::SplitVOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_UniqueOptions: {
+      auto ptr = reinterpret_cast<tflite::UniqueOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ReverseV2Options: {
+      auto ptr = reinterpret_cast<tflite::ReverseV2OptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_AddNOptions: {
+      auto ptr = reinterpret_cast<tflite::AddNOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_GatherNdOptions: {
+      auto ptr = reinterpret_cast<tflite::GatherNdOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_CosOptions: {
+      auto ptr = reinterpret_cast<tflite::CosOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_WhereOptions: {
+      auto ptr = reinterpret_cast<tflite::WhereOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_RankOptions: {
+      auto ptr = reinterpret_cast<tflite::RankOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ReverseSequenceOptions: {
+      auto ptr = reinterpret_cast<tflite::ReverseSequenceOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_MatrixDiagOptions: {
+      auto ptr = reinterpret_cast<tflite::MatrixDiagOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_QuantizeOptions: {
+      auto ptr = reinterpret_cast<tflite::QuantizeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_MatrixSetDiagOptions: {
+      auto ptr = reinterpret_cast<tflite::MatrixSetDiagOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_HardSwishOptions: {
+      auto ptr = reinterpret_cast<tflite::HardSwishOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_IfOptions: {
+      auto ptr = reinterpret_cast<tflite::IfOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_WhileOptions: {
+      auto ptr = reinterpret_cast<tflite::WhileOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_DepthToSpaceOptions: {
+      auto ptr = reinterpret_cast<tflite::DepthToSpaceOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_NonMaxSuppressionV4Options: {
+      auto ptr = reinterpret_cast<tflite::NonMaxSuppressionV4OptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_NonMaxSuppressionV5Options: {
+      auto ptr = reinterpret_cast<tflite::NonMaxSuppressionV5OptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ScatterNdOptions: {
+      auto ptr = reinterpret_cast<tflite::ScatterNdOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SelectV2Options: {
+      auto ptr = reinterpret_cast<tflite::SelectV2OptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_DensifyOptions: {
+      auto ptr = reinterpret_cast<tflite::DensifyOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SegmentSumOptions: {
+      auto ptr = reinterpret_cast<tflite::SegmentSumOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BatchMatMulOptions: {
+      auto ptr = reinterpret_cast<tflite::BatchMatMulOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_CumsumOptions: {
+      auto ptr = reinterpret_cast<tflite::CumsumOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_CallOnceOptions: {
+      auto ptr = reinterpret_cast<tflite::CallOnceOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BroadcastToOptions: {
+      auto ptr = reinterpret_cast<tflite::BroadcastToOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_Rfft2dOptions: {
+      auto ptr = reinterpret_cast<tflite::Rfft2dOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_Conv3DOptions: {
+      auto ptr = reinterpret_cast<tflite::Conv3DOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_HashtableOptions: {
+      auto ptr = reinterpret_cast<tflite::HashtableOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_HashtableFindOptions: {
+      auto ptr = reinterpret_cast<tflite::HashtableFindOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_HashtableImportOptions: {
+      auto ptr = reinterpret_cast<tflite::HashtableImportOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_HashtableSizeOptions: {
+      auto ptr = reinterpret_cast<tflite::HashtableSizeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_VarHandleOptions: {
+      auto ptr = reinterpret_cast<tflite::VarHandleOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ReadVariableOptions: {
+      auto ptr = reinterpret_cast<tflite::ReadVariableOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_AssignVariableOptions: {
+      auto ptr = reinterpret_cast<tflite::AssignVariableOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_RandomOptions: {
+      auto ptr = reinterpret_cast<tflite::RandomOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_BucketizeOptions: {
+      auto ptr = reinterpret_cast<tflite::BucketizeOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_GeluOptions: {
+      auto ptr = reinterpret_cast<tflite::GeluOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_DynamicUpdateSliceOptions: {
+      auto ptr = reinterpret_cast<tflite::DynamicUpdateSliceOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_UnsortedSegmentProdOptions: {
+      auto ptr = reinterpret_cast<tflite::UnsortedSegmentProdOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_UnsortedSegmentMaxOptions: {
+      auto ptr = reinterpret_cast<tflite::UnsortedSegmentMaxOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_UnsortedSegmentMinOptions: {
+      auto ptr = reinterpret_cast<tflite::UnsortedSegmentMinOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_UnsortedSegmentSumOptions: {
+      auto ptr = reinterpret_cast<tflite::UnsortedSegmentSumOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_ATan2Options: {
+      auto ptr = reinterpret_cast<tflite::ATan2OptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    case BuiltinOptions_SignOptions: {
+      auto ptr = reinterpret_cast<tflite::SignOptionsT *>(value);
+      delete ptr;
+      break;
+    }
+    default: break;
+  }
+  value = nullptr;
+  type = BuiltinOptions_NONE;
+}
+
+inline const tflite::Model *GetModel(const void *buf) {
+  return flatbuffers::GetRoot<tflite::Model>(buf);
+}
+
+inline const tflite::Model *GetSizePrefixedModel(const void *buf) {
+  return flatbuffers::GetSizePrefixedRoot<tflite::Model>(buf);
+}
+
+inline const char *ModelIdentifier() {
+  return "TFL3";
+}
+
+inline bool ModelBufferHasIdentifier(const void *buf) {
+  return flatbuffers::BufferHasIdentifier(
+      buf, ModelIdentifier());
+}
+
+inline bool SizePrefixedModelBufferHasIdentifier(const void *buf) {
+  return flatbuffers::BufferHasIdentifier(
+      buf, ModelIdentifier(), true);
+}
+
+inline bool VerifyModelBuffer(
+    flatbuffers::Verifier &verifier) {
+  return verifier.VerifyBuffer<tflite::Model>(ModelIdentifier());
+}
+
+inline bool VerifySizePrefixedModelBuffer(
+    flatbuffers::Verifier &verifier) {
+  return verifier.VerifySizePrefixedBuffer<tflite::Model>(ModelIdentifier());
+}
+
+inline const char *ModelExtension() {
+  return "tflite";
+}
+
+inline void FinishModelBuffer(
+    flatbuffers::FlatBufferBuilder &fbb,
+    flatbuffers::Offset<tflite::Model> root) {
+  fbb.Finish(root, ModelIdentifier());
+}
+
+inline void FinishSizePrefixedModelBuffer(
+    flatbuffers::FlatBufferBuilder &fbb,
+    flatbuffers::Offset<tflite::Model> root) {
+  fbb.FinishSizePrefixed(root, ModelIdentifier());
+}
+
+inline std::unique_ptr<tflite::ModelT> UnPackModel(
+    const void *buf,
+    const flatbuffers::resolver_function_t *res = nullptr) {
+  return std::unique_ptr<tflite::ModelT>(GetModel(buf)->UnPack(res));
+}
+
+inline std::unique_ptr<tflite::ModelT> UnPackSizePrefixedModel(
+    const void *buf,
+    const flatbuffers::resolver_function_t *res = nullptr) {
+  return std::unique_ptr<tflite::ModelT>(GetSizePrefixedModel(buf)->UnPack(res));
+}
+
+}  // namespace tflite
+
+#endif  // FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_allocator.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_allocator.h
new file mode 100644
index 0000000..9274c98
--- /dev/null
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_allocator.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2021 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_ALLOCATOR_H_
+#define FLATBUFFERS_ALLOCATOR_H_
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+
+namespace flatbuffers {
+
+// Allocator interface. This is flatbuffers-specific and meant only for
+// `vector_downward` usage.
+class Allocator {
+ public:
+  virtual ~Allocator() {}
+
+  // Allocate `size` bytes of memory.
+  virtual uint8_t *allocate(size_t size) = 0;
+
+  // Deallocate `size` bytes of memory at `p` allocated by this allocator.
+  virtual void deallocate(uint8_t *p, size_t size) = 0;
+
+  // Reallocate `new_size` bytes of memory, replacing the old region of size
+  // `old_size` at `p`. In contrast to a normal realloc, this grows downwards,
+  // and is intended specifcally for `vector_downward` use.
+  // `in_use_back` and `in_use_front` indicate how much of `old_size` is
+  // actually in use at each end, and needs to be copied.
+  virtual uint8_t *reallocate_downward(uint8_t *old_p, size_t old_size,
+                                       size_t new_size, size_t in_use_back,
+                                       size_t in_use_front) {
+    FLATBUFFERS_ASSERT(new_size > old_size);  // vector_downward only grows
+    uint8_t *new_p = allocate(new_size);
+    memcpy_downward(old_p, old_size, new_p, new_size, in_use_back,
+                    in_use_front);
+    deallocate(old_p, old_size);
+    return new_p;
+  }
+
+ protected:
+  // Called by `reallocate_downward` to copy memory from `old_p` of `old_size`
+  // to `new_p` of `new_size`. Only memory of size `in_use_front` and
+  // `in_use_back` will be copied from the front and back of the old memory
+  // allocation.
+  void memcpy_downward(uint8_t *old_p, size_t old_size, uint8_t *new_p,
+                       size_t new_size, size_t in_use_back,
+                       size_t in_use_front) {
+    memcpy(new_p + new_size - in_use_back, old_p + old_size - in_use_back,
+           in_use_back);
+    memcpy(new_p, old_p, in_use_front);
+  }
+};
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_ALLOCATOR_H_
\ No newline at end of file
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_array.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_array.h
new file mode 100644
index 0000000..286db9d
--- /dev/null
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_array.h
@@ -0,0 +1,243 @@
+/*
+ * Copyright 2021 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_ARRAY_H_
+#define FLATBUFFERS_ARRAY_H_
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h"
+
+namespace flatbuffers {
+
+// This is used as a helper type for accessing arrays.
+template<typename T, uint16_t length> class Array {
+  // Array<T> can carry only POD data types (scalars or structs).
+  typedef typename flatbuffers::bool_constant<flatbuffers::is_scalar<T>::value>
+      scalar_tag;
+  typedef
+      typename flatbuffers::conditional<scalar_tag::value, T, const T *>::type
+          IndirectHelperType;
+
+ public:
+  typedef uint16_t size_type;
+  typedef typename IndirectHelper<IndirectHelperType>::return_type return_type;
+  typedef VectorIterator<T, return_type> const_iterator;
+  typedef VectorReverseIterator<const_iterator> const_reverse_iterator;
+
+  // If T is a LE-scalar or a struct (!scalar_tag::value).
+  static FLATBUFFERS_CONSTEXPR bool is_span_observable =
+      (scalar_tag::value && (FLATBUFFERS_LITTLEENDIAN || sizeof(T) == 1)) ||
+      !scalar_tag::value;
+
+  FLATBUFFERS_CONSTEXPR uint16_t size() const { return length; }
+
+  return_type Get(uoffset_t i) const {
+    FLATBUFFERS_ASSERT(i < size());
+    return IndirectHelper<IndirectHelperType>::Read(Data(), i);
+  }
+
+  return_type operator[](uoffset_t i) const { return Get(i); }
+
+  // If this is a Vector of enums, T will be its storage type, not the enum
+  // type. This function makes it convenient to retrieve value with enum
+  // type E.
+  template<typename E> E GetEnum(uoffset_t i) const {
+    return static_cast<E>(Get(i));
+  }
+
+  const_iterator begin() const { return const_iterator(Data(), 0); }
+  const_iterator end() const { return const_iterator(Data(), size()); }
+
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(end());
+  }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(begin());
+  }
+
+  const_iterator cbegin() const { return begin(); }
+  const_iterator cend() const { return end(); }
+
+  const_reverse_iterator crbegin() const { return rbegin(); }
+  const_reverse_iterator crend() const { return rend(); }
+
+  // Get a mutable pointer to elements inside this array.
+  // This method used to mutate arrays of structs followed by a @p Mutate
+  // operation. For primitive types use @p Mutate directly.
+  // @warning Assignments and reads to/from the dereferenced pointer are not
+  //  automatically converted to the correct endianness.
+  typename flatbuffers::conditional<scalar_tag::value, void, T *>::type
+  GetMutablePointer(uoffset_t i) const {
+    FLATBUFFERS_ASSERT(i < size());
+    return const_cast<T *>(&data()[i]);
+  }
+
+  // Change elements if you have a non-const pointer to this object.
+  void Mutate(uoffset_t i, const T &val) { MutateImpl(scalar_tag(), i, val); }
+
+  // The raw data in little endian format. Use with care.
+  const uint8_t *Data() const { return data_; }
+
+  uint8_t *Data() { return data_; }
+
+  // Similarly, but typed, much like std::vector::data
+  const T *data() const { return reinterpret_cast<const T *>(Data()); }
+  T *data() { return reinterpret_cast<T *>(Data()); }
+
+  // Copy data from a span with endian conversion.
+  // If this Array and the span overlap, the behavior is undefined.
+  void CopyFromSpan(flatbuffers::span<const T, length> src) {
+    const auto p1 = reinterpret_cast<const uint8_t *>(src.data());
+    const auto p2 = Data();
+    FLATBUFFERS_ASSERT(!(p1 >= p2 && p1 < (p2 + length)) &&
+                       !(p2 >= p1 && p2 < (p1 + length)));
+    (void)p1;
+    (void)p2;
+    CopyFromSpanImpl(flatbuffers::bool_constant<is_span_observable>(), src);
+  }
+
+ protected:
+  void MutateImpl(flatbuffers::true_type, uoffset_t i, const T &val) {
+    FLATBUFFERS_ASSERT(i < size());
+    WriteScalar(data() + i, val);
+  }
+
+  void MutateImpl(flatbuffers::false_type, uoffset_t i, const T &val) {
+    *(GetMutablePointer(i)) = val;
+  }
+
+  void CopyFromSpanImpl(flatbuffers::true_type,
+                        flatbuffers::span<const T, length> src) {
+    // Use std::memcpy() instead of std::copy() to avoid performance degradation
+    // due to aliasing if T is char or unsigned char.
+    // The size is known at compile time, so memcpy would be inlined.
+    std::memcpy(data(), src.data(), length * sizeof(T));
+  }
+
+  // Copy data from flatbuffers::span with endian conversion.
+  void CopyFromSpanImpl(flatbuffers::false_type,
+                        flatbuffers::span<const T, length> src) {
+    for (size_type k = 0; k < length; k++) { Mutate(k, src[k]); }
+  }
+
+  // This class is only used to access pre-existing data. Don't ever
+  // try to construct these manually.
+  // 'constexpr' allows us to use 'size()' at compile time.
+  // @note Must not use 'FLATBUFFERS_CONSTEXPR' here, as const is not allowed on
+  //  a constructor.
+#if defined(__cpp_constexpr)
+  constexpr Array();
+#else
+  Array();
+#endif
+
+  uint8_t data_[length * sizeof(T)];
+
+ private:
+  // This class is a pointer. Copying will therefore create an invalid object.
+  // Private and unimplemented copy constructor.
+  Array(const Array &);
+  Array &operator=(const Array &);
+};
+
+// Specialization for Array[struct] with access using Offset<void> pointer.
+// This specialization used by idl_gen_text.cpp.
+template<typename T, uint16_t length> class Array<Offset<T>, length> {
+  static_assert(flatbuffers::is_same<T, void>::value, "unexpected type T");
+
+ public:
+  typedef const void *return_type;
+
+  const uint8_t *Data() const { return data_; }
+
+  // Make idl_gen_text.cpp::PrintContainer happy.
+  return_type operator[](uoffset_t) const {
+    FLATBUFFERS_ASSERT(false);
+    return nullptr;
+  }
+
+ private:
+  // This class is only used to access pre-existing data.
+  Array();
+  Array(const Array &);
+  Array &operator=(const Array &);
+
+  uint8_t data_[1];
+};
+
+template<class U, uint16_t N>
+FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span<U, N> make_span(Array<U, N> &arr)
+    FLATBUFFERS_NOEXCEPT {
+  static_assert(
+      Array<U, N>::is_span_observable,
+      "wrong type U, only plain struct, LE-scalar, or byte types are allowed");
+  return span<U, N>(arr.data(), N);
+}
+
+template<class U, uint16_t N>
+FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span<const U, N> make_span(
+    const Array<U, N> &arr) FLATBUFFERS_NOEXCEPT {
+  static_assert(
+      Array<U, N>::is_span_observable,
+      "wrong type U, only plain struct, LE-scalar, or byte types are allowed");
+  return span<const U, N>(arr.data(), N);
+}
+
+template<class U, uint16_t N>
+FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span<uint8_t, sizeof(U) * N>
+make_bytes_span(Array<U, N> &arr) FLATBUFFERS_NOEXCEPT {
+  static_assert(Array<U, N>::is_span_observable,
+                "internal error, Array<T> might hold only scalars or structs");
+  return span<uint8_t, sizeof(U) * N>(arr.Data(), sizeof(U) * N);
+}
+
+template<class U, uint16_t N>
+FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span<const uint8_t, sizeof(U) * N>
+make_bytes_span(const Array<U, N> &arr) FLATBUFFERS_NOEXCEPT {
+  static_assert(Array<U, N>::is_span_observable,
+                "internal error, Array<T> might hold only scalars or structs");
+  return span<const uint8_t, sizeof(U) * N>(arr.Data(), sizeof(U) * N);
+}
+
+// Cast a raw T[length] to a raw flatbuffers::Array<T, length>
+// without endian conversion. Use with care.
+// TODO: move these Cast-methods to `internal` namespace.
+template<typename T, uint16_t length>
+Array<T, length> &CastToArray(T (&arr)[length]) {
+  return *reinterpret_cast<Array<T, length> *>(arr);
+}
+
+template<typename T, uint16_t length>
+const Array<T, length> &CastToArray(const T (&arr)[length]) {
+  return *reinterpret_cast<const Array<T, length> *>(arr);
+}
+
+template<typename E, typename T, uint16_t length>
+Array<E, length> &CastToArrayOfEnum(T (&arr)[length]) {
+  static_assert(sizeof(E) == sizeof(T), "invalid enum type E");
+  return *reinterpret_cast<Array<E, length> *>(arr);
+}
+
+template<typename E, typename T, uint16_t length>
+const Array<E, length> &CastToArrayOfEnum(const T (&arr)[length]) {
+  static_assert(sizeof(E) == sizeof(T), "invalid enum type E");
+  return *reinterpret_cast<const Array<E, length> *>(arr);
+}
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_ARRAY_H_
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/base.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h
similarity index 80%
rename from edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/base.h
rename to edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h
index ecbc6cf..525a8e5 100644
--- a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/base.h
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h
@@ -1,6 +1,16 @@
 #ifndef FLATBUFFERS_BASE_H_
 #define FLATBUFFERS_BASE_H_
 
+// For TFLM, we always want FLATBUFFERS_LOCALE_INDEPENDENT to be defined as 0.
+// We could achieve this by adding -DFLATBUFFERS_LOCALE_INDEPENDENT=0 to the
+// TFLM Makefile. However, for (at least) the Arduino, adding additional build
+// flags during the compilation can be a bit awkward. As such, we have instead
+// made a decision to change the default to be FLATBUFFERS_LOCALE_INDEPENDENT=0
+// for TFLM to make it easier for external IDE integration.
+#ifndef FLATBUFFERS_LOCALE_INDEPENDENT
+#define FLATBUFFERS_LOCALE_INDEPENDENT 0
+#endif
+
 // clang-format off
 
 // If activate should be declared and included first.
@@ -38,14 +48,6 @@
   #include <utility>
 #endif
 
-// Patched by Edge Impulse, remove these declarations for Eta Compute
-#ifdef ECM3532
-#undef _GLIBCXX_HAVE_ENOTSUP
-#undef _GLIBCXX_HAVE_ECANCELED
-#undef _GLIBCXX_HAVE_EOWNERDEAD
-#undef _GLIBCXX_HAVE_ENOTRECOVERABLE
-#endif
-
 #include <string>
 #include <type_traits>
 #include <vector>
@@ -58,10 +60,6 @@
   #include <unistd.h>
 #endif
 
-#ifdef _STLPORT_VERSION
-  #define FLATBUFFERS_CPP98_STL
-#endif
-
 #ifdef __ANDROID__
   #include <android/api-level.h>
 #endif
@@ -150,9 +148,9 @@
   #endif
 #endif // !defined(FLATBUFFERS_LITTLEENDIAN)
 
-#define FLATBUFFERS_VERSION_MAJOR 1
-#define FLATBUFFERS_VERSION_MINOR 12
-#define FLATBUFFERS_VERSION_REVISION 0
+#define FLATBUFFERS_VERSION_MAJOR 2
+#define FLATBUFFERS_VERSION_MINOR 0
+#define FLATBUFFERS_VERSION_REVISION 6
 #define FLATBUFFERS_STRING_EXPAND(X) #X
 #define FLATBUFFERS_STRING(X) FLATBUFFERS_STRING_EXPAND(X)
 namespace flatbuffers {
@@ -185,10 +183,9 @@ namespace flatbuffers {
   #define FLATBUFFERS_CONSTEXPR_CPP11
 #endif
 
-// This macro is never used in code!
 #if (defined(__cplusplus) && __cplusplus >= 201402L) || \
     (defined(__cpp_constexpr) && __cpp_constexpr >= 201304)
-  #define FLATBUFFERS_CONSTEXPR_CPP14 FLATBUFFERS_CONSTEXPR
+  #define FLATBUFFERS_CONSTEXPR_CPP14 FLATBUFFERS_CONSTEXPR_CPP11
 #else
   #define FLATBUFFERS_CONSTEXPR_CPP14
 #endif
@@ -206,9 +203,15 @@ namespace flatbuffers {
 #if (!defined(_MSC_VER) || _MSC_FULL_VER >= 180020827) && \
     (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 404)) || \
     defined(__clang__)
-  #define FLATBUFFERS_DELETE_FUNC(func) func = delete;
+  #define FLATBUFFERS_DELETE_FUNC(func) func = delete
 #else
-  #define FLATBUFFERS_DELETE_FUNC(func) private: func;
+  #define FLATBUFFERS_DELETE_FUNC(func) private: func
+#endif
+
+#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && \
+    (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)) || \
+    defined(__clang__)
+  #define FLATBUFFERS_DEFAULT_DECLARATION
 #endif
 
 // Check if we can use template aliases
@@ -250,6 +253,11 @@ namespace flatbuffers {
   #endif // __has_include
 #endif // !FLATBUFFERS_HAS_STRING_VIEW
 
+#ifndef FLATBUFFERS_GENERAL_HEAP_ALLOC_OK
+  // Allow heap allocations to be used
+  #define FLATBUFFERS_GENERAL_HEAP_ALLOC_OK 1
+#endif // !FLATBUFFERS_GENERAL_HEAP_ALLOC_OK
+
 #ifndef FLATBUFFERS_HAS_NEW_STRTOD
   // Modern (C++11) strtod and strtof functions are available for use.
   // 1) nan/inf strings as argument of strtod;
@@ -262,9 +270,12 @@ namespace flatbuffers {
 #endif // !FLATBUFFERS_HAS_NEW_STRTOD
 
 #ifndef FLATBUFFERS_LOCALE_INDEPENDENT
-  // Enable locale independent functions {strtof_l, strtod_l,strtoll_l, strtoull_l}.
-  #if ((defined(_MSC_VER) && _MSC_VER >= 1800)            || \
-       (defined(_XOPEN_VERSION) && (_XOPEN_VERSION>=700)) && (!defined(__ANDROID_API__) || (defined(__ANDROID_API__) && (__ANDROID_API__>=21))))
+  // Enable locale independent functions {strtof_l, strtod_l,strtoll_l,
+  // strtoull_l}.
+  #if (defined(_MSC_VER) && _MSC_VER >= 1800) || \
+      (defined(__ANDROID_API__) && __ANDROID_API__>= 21) || \
+      (defined(_XOPEN_VERSION) && (_XOPEN_VERSION >= 700)) && \
+        (!defined(__Fuchsia__) && !defined(__ANDROID_API__))
     #define FLATBUFFERS_LOCALE_INDEPENDENT 1
   #else
     #define FLATBUFFERS_LOCALE_INDEPENDENT 0
@@ -292,7 +303,7 @@ template<typename T> FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(T t) {
 #if ((__cplusplus >= 201703L) \
     || (defined(_MSVC_LANG) &&  (_MSVC_LANG >= 201703L)))
   // All attributes unknown to an implementation are ignored without causing an error.
-  #define FLATBUFFERS_ATTRIBUTE(attr) [[attr]]
+  #define FLATBUFFERS_ATTRIBUTE(attr) attr
 
   #define FLATBUFFERS_FALLTHROUGH() [[fallthrough]]
 #else
@@ -330,8 +341,25 @@ typedef uintmax_t largest_scalar_t;
 // In 32bits, this evaluates to 2GB - 1
 #define FLATBUFFERS_MAX_BUFFER_SIZE ((1ULL << (sizeof(::flatbuffers::soffset_t) * 8 - 1)) - 1)
 
+// The minimum size buffer that can be a valid flatbuffer.
+// Includes the offset to the root table (uoffset_t), the offset to the vtable
+// of the root table (soffset_t), the size of the vtable (uint16_t), and the
+// size of the referring table (uint16_t).
+#define FLATBUFFERS_MIN_BUFFER_SIZE sizeof(uoffset_t) + sizeof(soffset_t) + \
+   sizeof(uint16_t) + sizeof(uint16_t)
+
 // We support aligning the contents of buffers up to this size.
-#define FLATBUFFERS_MAX_ALIGNMENT 16
+#ifndef FLATBUFFERS_MAX_ALIGNMENT
+  #define FLATBUFFERS_MAX_ALIGNMENT 32
+#endif
+
+/// @brief The length of a FlatBuffer file header.
+static const size_t kFileIdentifierLength = 4;
+
+inline bool VerifyAlignmentRequirements(size_t align, size_t min_align = 1) {
+  return (min_align <= align) && (align <= (FLATBUFFERS_MAX_ALIGNMENT)) &&
+         (align & (align - 1)) == 0;  // must be power of 2
+}
 
 #if defined(_MSC_VER)
   #pragma warning(disable: 4351) // C4351: new behavior: elements of array ... will be default initialized
@@ -431,5 +459,38 @@ inline size_t PaddingBytes(size_t buf_size, size_t scalar_size) {
   return ((~buf_size) + 1) & (scalar_size - 1);
 }
 
+// Generic 'operator==' with conditional specialisations.
+// T e - new value of a scalar field.
+// T def - default of scalar (is known at compile-time).
+template<typename T> inline bool IsTheSameAs(T e, T def) { return e == def; }
+
+#if defined(FLATBUFFERS_NAN_DEFAULTS) && \
+    defined(FLATBUFFERS_HAS_NEW_STRTOD) && (FLATBUFFERS_HAS_NEW_STRTOD > 0)
+// Like `operator==(e, def)` with weak NaN if T=(float|double).
+template<typename T> inline bool IsFloatTheSameAs(T e, T def) {
+  return (e == def) || ((def != def) && (e != e));
+}
+template<> inline bool IsTheSameAs<float>(float e, float def) {
+  return IsFloatTheSameAs(e, def);
+}
+template<> inline bool IsTheSameAs<double>(double e, double def) {
+  return IsFloatTheSameAs(e, def);
+}
+#endif
+
+// Check 'v' is out of closed range [low; high].
+// Workaround for GCC warning [-Werror=type-limits]:
+// comparison is always true due to limited range of data type.
+template<typename T>
+inline bool IsOutRange(const T &v, const T &low, const T &high) {
+  return (v < low) || (high < v);
+}
+
+// Check 'v' is in closed range [low; high].
+template<typename T>
+inline bool IsInRange(const T &v, const T &low, const T &high) {
+  return !IsOutRange(v, low, high);
+}
+
 }  // namespace flatbuffers
 #endif  // FLATBUFFERS_BASE_H_
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer.h
new file mode 100644
index 0000000..fba7de2
--- /dev/null
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2021 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_BUFFER_H_
+#define FLATBUFFERS_BUFFER_H_
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+
+namespace flatbuffers {
+
+// Wrapper for uoffset_t to allow safe template specialization.
+// Value is allowed to be 0 to indicate a null object (see e.g. AddOffset).
+template<typename T> struct Offset {
+  uoffset_t o;
+  Offset() : o(0) {}
+  Offset(uoffset_t _o) : o(_o) {}
+  Offset<void> Union() const { return Offset<void>(o); }
+  bool IsNull() const { return !o; }
+};
+
+inline void EndianCheck() {
+  int endiantest = 1;
+  // If this fails, see FLATBUFFERS_LITTLEENDIAN above.
+  FLATBUFFERS_ASSERT(*reinterpret_cast<char *>(&endiantest) ==
+                     FLATBUFFERS_LITTLEENDIAN);
+  (void)endiantest;
+}
+
+template<typename T> FLATBUFFERS_CONSTEXPR size_t AlignOf() {
+  // clang-format off
+  #ifdef _MSC_VER
+    return __alignof(T);
+  #else
+    #ifndef alignof
+      return __alignof__(T);
+    #else
+      return alignof(T);
+    #endif
+  #endif
+  // clang-format on
+}
+
+// Lexicographically compare two strings (possibly containing nulls), and
+// return true if the first is less than the second.
+static inline bool StringLessThan(const char *a_data, uoffset_t a_size,
+                                  const char *b_data, uoffset_t b_size) {
+  const auto cmp = memcmp(a_data, b_data, (std::min)(a_size, b_size));
+  return cmp == 0 ? a_size < b_size : cmp < 0;
+}
+
+// When we read serialized data from memory, in the case of most scalars,
+// we want to just read T, but in the case of Offset, we want to actually
+// perform the indirection and return a pointer.
+// The template specialization below does just that.
+// It is wrapped in a struct since function templates can't overload on the
+// return type like this.
+// The typedef is for the convenience of callers of this function
+// (avoiding the need for a trailing return decltype)
+template<typename T> struct IndirectHelper {
+  typedef T return_type;
+  typedef T mutable_return_type;
+  static const size_t element_stride = sizeof(T);
+  static return_type Read(const uint8_t *p, uoffset_t i) {
+    return EndianScalar((reinterpret_cast<const T *>(p))[i]);
+  }
+};
+template<typename T> struct IndirectHelper<Offset<T>> {
+  typedef const T *return_type;
+  typedef T *mutable_return_type;
+  static const size_t element_stride = sizeof(uoffset_t);
+  static return_type Read(const uint8_t *p, uoffset_t i) {
+    p += i * sizeof(uoffset_t);
+    return reinterpret_cast<return_type>(p + ReadScalar<uoffset_t>(p));
+  }
+};
+template<typename T> struct IndirectHelper<const T *> {
+  typedef const T *return_type;
+  typedef T *mutable_return_type;
+  static const size_t element_stride = sizeof(T);
+  static return_type Read(const uint8_t *p, uoffset_t i) {
+    return reinterpret_cast<const T *>(p + i * sizeof(T));
+  }
+};
+
+/// @brief Get a pointer to the the file_identifier section of the buffer.
+/// @return Returns a const char pointer to the start of the file_identifier
+/// characters in the buffer.  The returned char * has length
+/// 'flatbuffers::FlatBufferBuilder::kFileIdentifierLength'.
+/// This function is UNDEFINED for FlatBuffers whose schema does not include
+/// a file_identifier (likely points at padding or the start of a the root
+/// vtable).
+inline const char *GetBufferIdentifier(const void *buf,
+                                       bool size_prefixed = false) {
+  return reinterpret_cast<const char *>(buf) +
+         ((size_prefixed) ? 2 * sizeof(uoffset_t) : sizeof(uoffset_t));
+}
+
+// Helper to see if the identifier in a buffer has the expected value.
+inline bool BufferHasIdentifier(const void *buf, const char *identifier,
+                                bool size_prefixed = false) {
+  return strncmp(GetBufferIdentifier(buf, size_prefixed), identifier,
+                 flatbuffers::kFileIdentifierLength) == 0;
+}
+
+/// @cond FLATBUFFERS_INTERNAL
+// Helpers to get a typed pointer to the root object contained in the buffer.
+template<typename T> T *GetMutableRoot(void *buf) {
+  EndianCheck();
+  return reinterpret_cast<T *>(
+      reinterpret_cast<uint8_t *>(buf) +
+      EndianScalar(*reinterpret_cast<uoffset_t *>(buf)));
+}
+
+template<typename T> T *GetMutableSizePrefixedRoot(void *buf) {
+  return GetMutableRoot<T>(reinterpret_cast<uint8_t *>(buf) +
+                           sizeof(uoffset_t));
+}
+
+template<typename T> const T *GetRoot(const void *buf) {
+  return GetMutableRoot<T>(const_cast<void *>(buf));
+}
+
+template<typename T> const T *GetSizePrefixedRoot(const void *buf) {
+  return GetRoot<T>(reinterpret_cast<const uint8_t *>(buf) + sizeof(uoffset_t));
+}
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_BUFFER_H_
\ No newline at end of file
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer_ref.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer_ref.h
new file mode 100644
index 0000000..56eb281
--- /dev/null
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer_ref.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2021 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_BUFFER_REF_H_
+#define FLATBUFFERS_BUFFER_REF_H_
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_verifier.h"
+
+namespace flatbuffers {
+
+// Convenient way to bundle a buffer and its length, to pass it around
+// typed by its root.
+// A BufferRef does not own its buffer.
+struct BufferRefBase {};  // for std::is_base_of
+
+template<typename T> struct BufferRef : BufferRefBase {
+  BufferRef() : buf(nullptr), len(0), must_free(false) {}
+  BufferRef(uint8_t *_buf, uoffset_t _len)
+      : buf(_buf), len(_len), must_free(false) {}
+
+  ~BufferRef() {
+    if (must_free) free(buf);
+  }
+
+  const T *GetRoot() const { return flatbuffers::GetRoot<T>(buf); }
+
+  bool Verify() {
+    Verifier verifier(buf, len);
+    return verifier.VerifyBuffer<T>(nullptr);
+  }
+
+  uint8_t *buf;
+  uoffset_t len;
+  bool must_free;
+};
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_BUFFER_REF_H_
\ No newline at end of file
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_default_allocator.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_default_allocator.h
new file mode 100644
index 0000000..fff5db7
--- /dev/null
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_default_allocator.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2021 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_DEFAULT_ALLOCATOR_H_
+#define FLATBUFFERS_DEFAULT_ALLOCATOR_H_
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_allocator.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+
+namespace flatbuffers {
+
+// DefaultAllocator uses new/delete to allocate memory regions
+class DefaultAllocator : public Allocator {
+ public:
+  uint8_t *allocate(size_t size) FLATBUFFERS_OVERRIDE {
+    return new uint8_t[size];
+  }
+
+  void deallocate(uint8_t *p, size_t) FLATBUFFERS_OVERRIDE { delete[] p; }
+
+  static void dealloc(void *p, size_t) { delete[] static_cast<uint8_t *>(p); }
+};
+
+// These functions allow for a null allocator to mean use the default allocator,
+// as used by DetachedBuffer and vector_downward below.
+// This is to avoid having a statically or dynamically allocated default
+// allocator, or having to move it between the classes that may own it.
+inline uint8_t *Allocate(Allocator *allocator, size_t size) {
+  return allocator->allocate(size);
+}
+
+inline void Deallocate(Allocator *allocator, uint8_t *p, size_t size) {
+  allocator->deallocate(p, size);
+}
+
+inline uint8_t *ReallocateDownward(Allocator *allocator, uint8_t *old_p,
+                                   size_t old_size, size_t new_size,
+                                   size_t in_use_back, size_t in_use_front) {
+  return allocator->reallocate_downward(old_p, old_size, new_size, in_use_back,
+                                        in_use_front);
+}
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_DEFAULT_ALLOCATOR_H_
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_detached_buffer.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_detached_buffer.h
new file mode 100644
index 0000000..2d8ebac
--- /dev/null
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_detached_buffer.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2021 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_DETACHED_BUFFER_H_
+#define FLATBUFFERS_DETACHED_BUFFER_H_
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_allocator.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_default_allocator.h"
+
+namespace flatbuffers {
+
+// DetachedBuffer is a finished flatbuffer memory region, detached from its
+// builder. The original memory region and allocator are also stored so that
+// the DetachedBuffer can manage the memory lifetime.
+class DetachedBuffer {
+ public:
+  DetachedBuffer()
+      : allocator_(nullptr),
+        own_allocator_(false),
+        buf_(nullptr),
+        reserved_(0),
+        cur_(nullptr),
+        size_(0) {}
+
+  DetachedBuffer(Allocator *allocator, bool own_allocator, uint8_t *buf,
+                 size_t reserved, uint8_t *cur, size_t sz)
+      : allocator_(allocator),
+        own_allocator_(own_allocator),
+        buf_(buf),
+        reserved_(reserved),
+        cur_(cur),
+        size_(sz) {}
+
+  DetachedBuffer(DetachedBuffer &&other)
+      : allocator_(other.allocator_),
+        own_allocator_(other.own_allocator_),
+        buf_(other.buf_),
+        reserved_(other.reserved_),
+        cur_(other.cur_),
+        size_(other.size_) {
+    other.reset();
+  }
+
+  DetachedBuffer &operator=(DetachedBuffer &&other) {
+    if (this == &other) return *this;
+
+    destroy();
+
+    allocator_ = other.allocator_;
+    own_allocator_ = other.own_allocator_;
+    buf_ = other.buf_;
+    reserved_ = other.reserved_;
+    cur_ = other.cur_;
+    size_ = other.size_;
+
+    other.reset();
+
+    return *this;
+  }
+
+  ~DetachedBuffer() { destroy(); }
+
+  const uint8_t *data() const { return cur_; }
+
+  uint8_t *data() { return cur_; }
+
+  size_t size() const { return size_; }
+
+  // These may change access mode, leave these at end of public section
+  FLATBUFFERS_DELETE_FUNC(DetachedBuffer(const DetachedBuffer &other));
+  FLATBUFFERS_DELETE_FUNC(
+      DetachedBuffer &operator=(const DetachedBuffer &other));
+
+ protected:
+  Allocator *allocator_;
+  bool own_allocator_;
+  uint8_t *buf_;
+  size_t reserved_;
+  uint8_t *cur_;
+  size_t size_;
+
+  inline void destroy() {
+    if (buf_) Deallocate(allocator_, buf_, reserved_);
+    if (own_allocator_ && allocator_) { delete allocator_; }
+    reset();
+  }
+
+  inline void reset() {
+    allocator_ = nullptr;
+    own_allocator_ = false;
+    buf_ = nullptr;
+    reserved_ = 0;
+    cur_ = nullptr;
+    size_ = 0;
+  }
+};
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_DETACHED_BUFFER_H_
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_flatbuffer_builder.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_flatbuffer_builder.h
new file mode 100644
index 0000000..9aedf7b
--- /dev/null
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_flatbuffer_builder.h
@@ -0,0 +1,1214 @@
+/*
+ * Copyright 2021 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_FLATBUFFER_BUILDER_H_
+#define FLATBUFFERS_FLATBUFFER_BUILDER_H_
+
+#include <functional>
+#include <initializer_list>
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_allocator.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_array.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer_ref.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_default_allocator.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_detached_buffer.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_string.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_struct.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_table.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector_downward.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_verifier.h"
+
+namespace flatbuffers {
+
+// Converts a Field ID to a virtual table offset.
+inline voffset_t FieldIndexToOffset(voffset_t field_id) {
+  // Should correspond to what EndTable() below builds up.
+  const int fixed_fields = 2;  // Vtable size and Object Size.
+  return static_cast<voffset_t>((field_id + fixed_fields) * sizeof(voffset_t));
+}
+
+template<typename T, typename Alloc = std::allocator<T>>
+const T *data(const std::vector<T, Alloc> &v) {
+  // Eventually the returned pointer gets passed down to memcpy, so
+  // we need it to be non-null to avoid undefined behavior.
+  static uint8_t t;
+  return v.empty() ? reinterpret_cast<const T *>(&t) : &v.front();
+}
+template<typename T, typename Alloc = std::allocator<T>>
+T *data(std::vector<T, Alloc> &v) {
+  // Eventually the returned pointer gets passed down to memcpy, so
+  // we need it to be non-null to avoid undefined behavior.
+  static uint8_t t;
+  return v.empty() ? reinterpret_cast<T *>(&t) : &v.front();
+}
+
+/// @addtogroup flatbuffers_cpp_api
+/// @{
+/// @class FlatBufferBuilder
+/// @brief Helper class to hold data needed in creation of a FlatBuffer.
+/// To serialize data, you typically call one of the `Create*()` functions in
+/// the generated code, which in turn call a sequence of `StartTable`/
+/// `PushElement`/`AddElement`/`EndTable`, or the builtin `CreateString`/
+/// `CreateVector` functions. Do this is depth-first order to build up a tree to
+/// the root. `Finish()` wraps up the buffer ready for transport.
+class FlatBufferBuilder {
+ public:
+  /// @brief Default constructor for FlatBufferBuilder.
+  /// @param[in] initial_size The initial size of the buffer, in bytes. Defaults
+  /// to `1024`.
+  /// @param[in] allocator An `Allocator` to use. If null will use
+  /// `DefaultAllocator`.
+  /// @param[in] own_allocator Whether the builder/vector should own the
+  /// allocator. Defaults to / `false`.
+  /// @param[in] buffer_minalign Force the buffer to be aligned to the given
+  /// minimum alignment upon reallocation. Only needed if you intend to store
+  /// types with custom alignment AND you wish to read the buffer in-place
+  /// directly after creation.
+  explicit FlatBufferBuilder(
+      size_t initial_size = 1024, Allocator *allocator = nullptr,
+      bool own_allocator = false,
+      size_t buffer_minalign = AlignOf<largest_scalar_t>())
+      : buf_(initial_size, allocator, own_allocator, buffer_minalign),
+        num_field_loc(0),
+        max_voffset_(0),
+        nested(false),
+        finished(false),
+        minalign_(1),
+        force_defaults_(false),
+        dedup_vtables_(true),
+        string_pool(nullptr) {
+    EndianCheck();
+  }
+
+  /// @brief Move constructor for FlatBufferBuilder.
+  FlatBufferBuilder(FlatBufferBuilder &&other)
+      : buf_(1024, nullptr, false, AlignOf<largest_scalar_t>()),
+        num_field_loc(0),
+        max_voffset_(0),
+        nested(false),
+        finished(false),
+        minalign_(1),
+        force_defaults_(false),
+        dedup_vtables_(true),
+        string_pool(nullptr) {
+    EndianCheck();
+    // Default construct and swap idiom.
+    // Lack of delegating constructors in vs2010 makes it more verbose than
+    // needed.
+    Swap(other);
+  }
+
+  /// @brief Move assignment operator for FlatBufferBuilder.
+  FlatBufferBuilder &operator=(FlatBufferBuilder &&other) {
+    // Move construct a temporary and swap idiom
+    FlatBufferBuilder temp(std::move(other));
+    Swap(temp);
+    return *this;
+  }
+
+  void Swap(FlatBufferBuilder &other) {
+    using std::swap;
+    buf_.swap(other.buf_);
+    swap(num_field_loc, other.num_field_loc);
+    swap(max_voffset_, other.max_voffset_);
+    swap(nested, other.nested);
+    swap(finished, other.finished);
+    swap(minalign_, other.minalign_);
+    swap(force_defaults_, other.force_defaults_);
+    swap(dedup_vtables_, other.dedup_vtables_);
+    swap(string_pool, other.string_pool);
+  }
+
+  ~FlatBufferBuilder() {
+    if (string_pool) delete string_pool;
+  }
+
+  void Reset() {
+    Clear();       // clear builder state
+    buf_.reset();  // deallocate buffer
+  }
+
+  /// @brief Reset all the state in this FlatBufferBuilder so it can be reused
+  /// to construct another buffer.
+  void Clear() {
+    ClearOffsets();
+    buf_.clear();
+    nested = false;
+    finished = false;
+    minalign_ = 1;
+    if (string_pool) string_pool->clear();
+  }
+
+  /// @brief The current size of the serialized buffer, counting from the end.
+  /// @return Returns an `uoffset_t` with the current size of the buffer.
+  uoffset_t GetSize() const { return buf_.size(); }
+
+  /// @brief Get the serialized buffer (after you call `Finish()`).
+  /// @return Returns an `uint8_t` pointer to the FlatBuffer data inside the
+  /// buffer.
+  uint8_t *GetBufferPointer() const {
+    Finished();
+    return buf_.data();
+  }
+
+  /// @brief Get the serialized buffer (after you call `Finish()`) as a span.
+  /// @return Returns a constructed flatbuffers::span that is a view over the
+  /// FlatBuffer data inside the buffer.
+  flatbuffers::span<uint8_t> GetBufferSpan() const {
+    Finished();
+    return flatbuffers::span<uint8_t>(buf_.data(), buf_.size());
+  }
+
+  /// @brief Get a pointer to an unfinished buffer.
+  /// @return Returns a `uint8_t` pointer to the unfinished buffer.
+  uint8_t *GetCurrentBufferPointer() const { return buf_.data(); }
+
+  /// @brief Get the released pointer to the serialized buffer.
+  /// @warning Do NOT attempt to use this FlatBufferBuilder afterwards!
+  /// @return A `FlatBuffer` that owns the buffer and its allocator and
+  /// behaves similar to a `unique_ptr` with a deleter.
+  FLATBUFFERS_ATTRIBUTE([[deprecated("use Release() instead")]])
+  DetachedBuffer ReleaseBufferPointer() {
+    Finished();
+    return buf_.release();
+  }
+
+  /// @brief Get the released DetachedBuffer.
+  /// @return A `DetachedBuffer` that owns the buffer and its allocator.
+  DetachedBuffer Release() {
+    Finished();
+    return buf_.release();
+  }
+
+  /// @brief Get the released pointer to the serialized buffer.
+  /// @param size The size of the memory block containing
+  /// the serialized `FlatBuffer`.
+  /// @param offset The offset from the released pointer where the finished
+  /// `FlatBuffer` starts.
+  /// @return A raw pointer to the start of the memory block containing
+  /// the serialized `FlatBuffer`.
+  /// @remark If the allocator is owned, it gets deleted when the destructor is
+  /// called..
+  uint8_t *ReleaseRaw(size_t &size, size_t &offset) {
+    Finished();
+    return buf_.release_raw(size, offset);
+  }
+
+  /// @brief get the minimum alignment this buffer needs to be accessed
+  /// properly. This is only known once all elements have been written (after
+  /// you call Finish()). You can use this information if you need to embed
+  /// a FlatBuffer in some other buffer, such that you can later read it
+  /// without first having to copy it into its own buffer.
+  size_t GetBufferMinAlignment() const {
+    Finished();
+    return minalign_;
+  }
+
+  /// @cond FLATBUFFERS_INTERNAL
+  void Finished() const {
+    // If you get this assert, you're attempting to get access a buffer
+    // which hasn't been finished yet. Be sure to call
+    // FlatBufferBuilder::Finish with your root table.
+    // If you really need to access an unfinished buffer, call
+    // GetCurrentBufferPointer instead.
+    FLATBUFFERS_ASSERT(finished);
+  }
+  /// @endcond
+
+  /// @brief In order to save space, fields that are set to their default value
+  /// don't get serialized into the buffer.
+  /// @param[in] fd When set to `true`, always serializes default values that
+  /// are set. Optional fields which are not set explicitly, will still not be
+  /// serialized.
+  void ForceDefaults(bool fd) { force_defaults_ = fd; }
+
+  /// @brief By default vtables are deduped in order to save space.
+  /// @param[in] dedup When set to `true`, dedup vtables.
+  void DedupVtables(bool dedup) { dedup_vtables_ = dedup; }
+
+  /// @cond FLATBUFFERS_INTERNAL
+  void Pad(size_t num_bytes) { buf_.fill(num_bytes); }
+
+  void TrackMinAlign(size_t elem_size) {
+    if (elem_size > minalign_) minalign_ = elem_size;
+  }
+
+  void Align(size_t elem_size) {
+    TrackMinAlign(elem_size);
+    buf_.fill(PaddingBytes(buf_.size(), elem_size));
+  }
+
+  void PushFlatBuffer(const uint8_t *bytes, size_t size) {
+    PushBytes(bytes, size);
+    finished = true;
+  }
+
+  void PushBytes(const uint8_t *bytes, size_t size) { buf_.push(bytes, size); }
+
+  void PopBytes(size_t amount) { buf_.pop(amount); }
+
+  template<typename T> void AssertScalarT() {
+    // The code assumes power of 2 sizes and endian-swap-ability.
+    static_assert(flatbuffers::is_scalar<T>::value, "T must be a scalar type");
+  }
+
+  // Write a single aligned scalar to the buffer
+  template<typename T> uoffset_t PushElement(T element) {
+    AssertScalarT<T>();
+    Align(sizeof(T));
+    buf_.push_small(EndianScalar(element));
+    return GetSize();
+  }
+
+  template<typename T> uoffset_t PushElement(Offset<T> off) {
+    // Special case for offsets: see ReferTo below.
+    return PushElement(ReferTo(off.o));
+  }
+
+  // When writing fields, we track where they are, so we can create correct
+  // vtables later.
+  void TrackField(voffset_t field, uoffset_t off) {
+    FieldLoc fl = { off, field };
+    buf_.scratch_push_small(fl);
+    num_field_loc++;
+    if (field > max_voffset_) { max_voffset_ = field; }
+  }
+
+  // Like PushElement, but additionally tracks the field this represents.
+  template<typename T> void AddElement(voffset_t field, T e, T def) {
+    // We don't serialize values equal to the default.
+    if (IsTheSameAs(e, def) && !force_defaults_) return;
+    TrackField(field, PushElement(e));
+  }
+
+  template<typename T> void AddElement(voffset_t field, T e) {
+    TrackField(field, PushElement(e));
+  }
+
+  template<typename T> void AddOffset(voffset_t field, Offset<T> off) {
+    if (off.IsNull()) return;  // Don't store.
+    AddElement(field, ReferTo(off.o), static_cast<uoffset_t>(0));
+  }
+
+  template<typename T> void AddStruct(voffset_t field, const T *structptr) {
+    if (!structptr) return;  // Default, don't store.
+    Align(AlignOf<T>());
+    buf_.push_small(*structptr);
+    TrackField(field, GetSize());
+  }
+
+  void AddStructOffset(voffset_t field, uoffset_t off) {
+    TrackField(field, off);
+  }
+
+  // Offsets initially are relative to the end of the buffer (downwards).
+  // This function converts them to be relative to the current location
+  // in the buffer (when stored here), pointing upwards.
+  uoffset_t ReferTo(uoffset_t off) {
+    // Align to ensure GetSize() below is correct.
+    Align(sizeof(uoffset_t));
+    // Offset must refer to something already in buffer.
+    const uoffset_t size = GetSize();
+    FLATBUFFERS_ASSERT(off && off <= size);
+    return size - off + static_cast<uoffset_t>(sizeof(uoffset_t));
+  }
+
+  void NotNested() {
+    // If you hit this, you're trying to construct a Table/Vector/String
+    // during the construction of its parent table (between the MyTableBuilder
+    // and table.Finish().
+    // Move the creation of these sub-objects to above the MyTableBuilder to
+    // not get this assert.
+    // Ignoring this assert may appear to work in simple cases, but the reason
+    // it is here is that storing objects in-line may cause vtable offsets
+    // to not fit anymore. It also leads to vtable duplication.
+    FLATBUFFERS_ASSERT(!nested);
+    // If you hit this, fields were added outside the scope of a table.
+    FLATBUFFERS_ASSERT(!num_field_loc);
+  }
+
+  // From generated code (or from the parser), we call StartTable/EndTable
+  // with a sequence of AddElement calls in between.
+  uoffset_t StartTable() {
+    NotNested();
+    nested = true;
+    return GetSize();
+  }
+
+  // This finishes one serialized object by generating the vtable if it's a
+  // table, comparing it against existing vtables, and writing the
+  // resulting vtable offset.
+  uoffset_t EndTable(uoffset_t start) {
+    // If you get this assert, a corresponding StartTable wasn't called.
+    FLATBUFFERS_ASSERT(nested);
+    // Write the vtable offset, which is the start of any Table.
+    // We fill it's value later.
+    auto vtableoffsetloc = PushElement<soffset_t>(0);
+    // Write a vtable, which consists entirely of voffset_t elements.
+    // It starts with the number of offsets, followed by a type id, followed
+    // by the offsets themselves. In reverse:
+    // Include space for the last offset and ensure empty tables have a
+    // minimum size.
+    max_voffset_ =
+        (std::max)(static_cast<voffset_t>(max_voffset_ + sizeof(voffset_t)),
+                   FieldIndexToOffset(0));
+    buf_.fill_big(max_voffset_);
+    auto table_object_size = vtableoffsetloc - start;
+    // Vtable use 16bit offsets.
+    FLATBUFFERS_ASSERT(table_object_size < 0x10000);
+    WriteScalar<voffset_t>(buf_.data() + sizeof(voffset_t),
+                           static_cast<voffset_t>(table_object_size));
+    WriteScalar<voffset_t>(buf_.data(), max_voffset_);
+    // Write the offsets into the table
+    for (auto it = buf_.scratch_end() - num_field_loc * sizeof(FieldLoc);
+         it < buf_.scratch_end(); it += sizeof(FieldLoc)) {
+      auto field_location = reinterpret_cast<FieldLoc *>(it);
+      auto pos = static_cast<voffset_t>(vtableoffsetloc - field_location->off);
+      // If this asserts, it means you've set a field twice.
+      FLATBUFFERS_ASSERT(
+          !ReadScalar<voffset_t>(buf_.data() + field_location->id));
+      WriteScalar<voffset_t>(buf_.data() + field_location->id, pos);
+    }
+    ClearOffsets();
+    auto vt1 = reinterpret_cast<voffset_t *>(buf_.data());
+    auto vt1_size = ReadScalar<voffset_t>(vt1);
+    auto vt_use = GetSize();
+    // See if we already have generated a vtable with this exact same
+    // layout before. If so, make it point to the old one, remove this one.
+    if (dedup_vtables_) {
+      for (auto it = buf_.scratch_data(); it < buf_.scratch_end();
+           it += sizeof(uoffset_t)) {
+        auto vt_offset_ptr = reinterpret_cast<uoffset_t *>(it);
+        auto vt2 = reinterpret_cast<voffset_t *>(buf_.data_at(*vt_offset_ptr));
+        auto vt2_size = ReadScalar<voffset_t>(vt2);
+        if (vt1_size != vt2_size || 0 != memcmp(vt2, vt1, vt1_size)) continue;
+        vt_use = *vt_offset_ptr;
+        buf_.pop(GetSize() - vtableoffsetloc);
+        break;
+      }
+    }
+    // If this is a new vtable, remember it.
+    if (vt_use == GetSize()) { buf_.scratch_push_small(vt_use); }
+    // Fill the vtable offset we created above.
+    // The offset points from the beginning of the object to where the
+    // vtable is stored.
+    // Offsets default direction is downward in memory for future format
+    // flexibility (storing all vtables at the start of the file).
+    WriteScalar(buf_.data_at(vtableoffsetloc),
+                static_cast<soffset_t>(vt_use) -
+                    static_cast<soffset_t>(vtableoffsetloc));
+
+    nested = false;
+    return vtableoffsetloc;
+  }
+
+  FLATBUFFERS_ATTRIBUTE([[deprecated("call the version above instead")]])
+  uoffset_t EndTable(uoffset_t start, voffset_t /*numfields*/) {
+    return EndTable(start);
+  }
+
+  // This checks a required field has been set in a given table that has
+  // just been constructed.
+  template<typename T> void Required(Offset<T> table, voffset_t field);
+
+  uoffset_t StartStruct(size_t alignment) {
+    Align(alignment);
+    return GetSize();
+  }
+
+  uoffset_t EndStruct() { return GetSize(); }
+
+  void ClearOffsets() {
+    buf_.scratch_pop(num_field_loc * sizeof(FieldLoc));
+    num_field_loc = 0;
+    max_voffset_ = 0;
+  }
+
+  // Aligns such that when "len" bytes are written, an object can be written
+  // after it with "alignment" without padding.
+  void PreAlign(size_t len, size_t alignment) {
+    if (len == 0) return;
+    TrackMinAlign(alignment);
+    buf_.fill(PaddingBytes(GetSize() + len, alignment));
+  }
+  template<typename T> void PreAlign(size_t len) {
+    AssertScalarT<T>();
+    PreAlign(len, sizeof(T));
+  }
+  /// @endcond
+
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// @param[in] str A const char pointer to the data to be stored as a string.
+  /// @param[in] len The number of bytes that should be stored from `str`.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateString(const char *str, size_t len) {
+    NotNested();
+    PreAlign<uoffset_t>(len + 1);  // Always 0-terminated.
+    buf_.fill(1);
+    PushBytes(reinterpret_cast<const uint8_t *>(str), len);
+    PushElement(static_cast<uoffset_t>(len));
+    return Offset<String>(GetSize());
+  }
+
+  /// @brief Store a string in the buffer, which is null-terminated.
+  /// @param[in] str A const char pointer to a C-string to add to the buffer.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateString(const char *str) {
+    return CreateString(str, strlen(str));
+  }
+
+  /// @brief Store a string in the buffer, which is null-terminated.
+  /// @param[in] str A char pointer to a C-string to add to the buffer.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateString(char *str) {
+    return CreateString(str, strlen(str));
+  }
+
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// @param[in] str A const reference to a std::string to store in the buffer.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateString(const std::string &str) {
+    return CreateString(str.c_str(), str.length());
+  }
+
+  // clang-format off
+  #ifdef FLATBUFFERS_HAS_STRING_VIEW
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// @param[in] str A const string_view to copy in to the buffer.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateString(flatbuffers::string_view str) {
+    return CreateString(str.data(), str.size());
+  }
+  #endif // FLATBUFFERS_HAS_STRING_VIEW
+  // clang-format on
+
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// @param[in] str A const pointer to a `String` struct to add to the buffer.
+  /// @return Returns the offset in the buffer where the string starts
+  Offset<String> CreateString(const String *str) {
+    return str ? CreateString(str->c_str(), str->size()) : 0;
+  }
+
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// @param[in] str A const reference to a std::string like type with support
+  /// of T::c_str() and T::length() to store in the buffer.
+  /// @return Returns the offset in the buffer where the string starts.
+  template<typename T> Offset<String> CreateString(const T &str) {
+    return CreateString(str.c_str(), str.length());
+  }
+
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// If a string with this exact contents has already been serialized before,
+  /// instead simply returns the offset of the existing string. This uses a map
+  /// stored on the heap, but only stores the numerical offsets.
+  /// @param[in] str A const char pointer to the data to be stored as a string.
+  /// @param[in] len The number of bytes that should be stored from `str`.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateSharedString(const char *str, size_t len) {
+    FLATBUFFERS_ASSERT(FLATBUFFERS_GENERAL_HEAP_ALLOC_OK);
+    if (!string_pool)
+      string_pool = new StringOffsetMap(StringOffsetCompare(buf_));
+    auto size_before_string = buf_.size();
+    // Must first serialize the string, since the set is all offsets into
+    // buffer.
+    auto off = CreateString(str, len);
+    auto it = string_pool->find(off);
+    // If it exists we reuse existing serialized data!
+    if (it != string_pool->end()) {
+      // We can remove the string we serialized.
+      buf_.pop(buf_.size() - size_before_string);
+      return *it;
+    }
+    // Record this string for future use.
+    string_pool->insert(off);
+    return off;
+  }
+
+#ifdef FLATBUFFERS_HAS_STRING_VIEW
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// If a string with this exact contents has already been serialized before,
+  /// instead simply returns the offset of the existing string. This uses a map
+  /// stored on the heap, but only stores the numerical offsets.
+  /// @param[in] str A const std::string_view to store in the buffer.
+  /// @return Returns the offset in the buffer where the string starts
+  Offset<String> CreateSharedString(const flatbuffers::string_view str) {
+    return CreateSharedString(str.data(), str.size());
+  }
+#else
+  /// @brief Store a string in the buffer, which null-terminated.
+  /// If a string with this exact contents has already been serialized before,
+  /// instead simply returns the offset of the existing string. This uses a map
+  /// stored on the heap, but only stores the numerical offsets.
+  /// @param[in] str A const char pointer to a C-string to add to the buffer.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateSharedString(const char *str) {
+    return CreateSharedString(str, strlen(str));
+  }
+
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// If a string with this exact contents has already been serialized before,
+  /// instead simply returns the offset of the existing string. This uses a map
+  /// stored on the heap, but only stores the numerical offsets.
+  /// @param[in] str A const reference to a std::string to store in the buffer.
+  /// @return Returns the offset in the buffer where the string starts.
+  Offset<String> CreateSharedString(const std::string &str) {
+    return CreateSharedString(str.c_str(), str.length());
+  }
+#endif
+
+  /// @brief Store a string in the buffer, which can contain any binary data.
+  /// If a string with this exact contents has already been serialized before,
+  /// instead simply returns the offset of the existing string. This uses a map
+  /// stored on the heap, but only stores the numerical offsets.
+  /// @param[in] str A const pointer to a `String` struct to add to the buffer.
+  /// @return Returns the offset in the buffer where the string starts
+  Offset<String> CreateSharedString(const String *str) {
+    return CreateSharedString(str->c_str(), str->size());
+  }
+
+  /// @cond FLATBUFFERS_INTERNAL
+  uoffset_t EndVector(size_t len) {
+    FLATBUFFERS_ASSERT(nested);  // Hit if no corresponding StartVector.
+    nested = false;
+    return PushElement(static_cast<uoffset_t>(len));
+  }
+
+  void StartVector(size_t len, size_t elemsize) {
+    NotNested();
+    nested = true;
+    PreAlign<uoffset_t>(len * elemsize);
+    PreAlign(len * elemsize, elemsize);  // Just in case elemsize > uoffset_t.
+  }
+
+  // Call this right before StartVector/CreateVector if you want to force the
+  // alignment to be something different than what the element size would
+  // normally dictate.
+  // This is useful when storing a nested_flatbuffer in a vector of bytes,
+  // or when storing SIMD floats, etc.
+  void ForceVectorAlignment(size_t len, size_t elemsize, size_t alignment) {
+    if (len == 0) return;
+    FLATBUFFERS_ASSERT(VerifyAlignmentRequirements(alignment));
+    PreAlign(len * elemsize, alignment);
+  }
+
+  // Similar to ForceVectorAlignment but for String fields.
+  void ForceStringAlignment(size_t len, size_t alignment) {
+    if (len == 0) return;
+    FLATBUFFERS_ASSERT(VerifyAlignmentRequirements(alignment));
+    PreAlign((len + 1) * sizeof(char), alignment);
+  }
+
+  /// @endcond
+
+  /// @brief Serialize an array into a FlatBuffer `vector`.
+  /// @tparam T The data type of the array elements.
+  /// @param[in] v A pointer to the array of type `T` to serialize into the
+  /// buffer as a `vector`.
+  /// @param[in] len The number of elements to serialize.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T> Offset<Vector<T>> CreateVector(const T *v, size_t len) {
+    // If this assert hits, you're specifying a template argument that is
+    // causing the wrong overload to be selected, remove it.
+    AssertScalarT<T>();
+    StartVector(len, sizeof(T));
+    if (len == 0) { return Offset<Vector<T>>(EndVector(len)); }
+    // clang-format off
+    #if FLATBUFFERS_LITTLEENDIAN
+      PushBytes(reinterpret_cast<const uint8_t *>(v), len * sizeof(T));
+    #else
+      if (sizeof(T) == 1) {
+        PushBytes(reinterpret_cast<const uint8_t *>(v), len);
+      } else {
+        for (auto i = len; i > 0; ) {
+          PushElement(v[--i]);
+        }
+      }
+    #endif
+    // clang-format on
+    return Offset<Vector<T>>(EndVector(len));
+  }
+
+  /// @brief Serialize an array like object into a FlatBuffer `vector`.
+  /// @tparam T The data type of the array elements.
+  /// @tparam C The type of the array.
+  /// @param[in] array A reference to an array like object of type `T` to
+  /// serialize into the buffer as a `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T, class C> Offset<Vector<T>> CreateVector(const C &array) {
+    return CreateVector(array.data(), array.size());
+  }
+
+  /// @brief Serialize an initializer list into a FlatBuffer `vector`.
+  /// @tparam T The data type of the initializer list elements.
+  /// @param[in] v The value of the initializer list.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T>
+  Offset<Vector<T>> CreateVector(std::initializer_list<T> v) {
+    return CreateVector(v.begin(), v.size());
+  }
+
+  template<typename T>
+  Offset<Vector<Offset<T>>> CreateVector(const Offset<T> *v, size_t len) {
+    StartVector(len, sizeof(Offset<T>));
+    for (auto i = len; i > 0;) { PushElement(v[--i]); }
+    return Offset<Vector<Offset<T>>>(EndVector(len));
+  }
+
+  /// @brief Serialize a `std::vector` into a FlatBuffer `vector`.
+  /// @tparam T The data type of the `std::vector` elements.
+  /// @param v A const reference to the `std::vector` to serialize into the
+  /// buffer as a `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T, typename Alloc = std::allocator<T>>
+  Offset<Vector<T>> CreateVector(const std::vector<T, Alloc> &v) {
+    return CreateVector(data(v), v.size());
+  }
+
+  // vector<bool> may be implemented using a bit-set, so we can't access it as
+  // an array. Instead, read elements manually.
+  // Background: https://isocpp.org/blog/2012/11/on-vectorbool
+  Offset<Vector<uint8_t>> CreateVector(const std::vector<bool> &v) {
+    StartVector(v.size(), sizeof(uint8_t));
+    for (auto i = v.size(); i > 0;) {
+      PushElement(static_cast<uint8_t>(v[--i]));
+    }
+    return Offset<Vector<uint8_t>>(EndVector(v.size()));
+  }
+
+  /// @brief Serialize values returned by a function into a FlatBuffer `vector`.
+  /// This is a convenience function that takes care of iteration for you.
+  /// @tparam T The data type of the `std::vector` elements.
+  /// @param f A function that takes the current iteration 0..vector_size-1 and
+  /// returns any type that you can construct a FlatBuffers vector out of.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T>
+  Offset<Vector<T>> CreateVector(size_t vector_size,
+                                 const std::function<T(size_t i)> &f) {
+    FLATBUFFERS_ASSERT(FLATBUFFERS_GENERAL_HEAP_ALLOC_OK);
+    std::vector<T> elems(vector_size);
+    for (size_t i = 0; i < vector_size; i++) elems[i] = f(i);
+    return CreateVector(elems);
+  }
+
+  /// @brief Serialize values returned by a function into a FlatBuffer `vector`.
+  /// This is a convenience function that takes care of iteration for you. This
+  /// uses a vector stored on the heap to store the intermediate results of the
+  /// iteration.
+  /// @tparam T The data type of the `std::vector` elements.
+  /// @param f A function that takes the current iteration 0..vector_size-1,
+  /// and the state parameter returning any type that you can construct a
+  /// FlatBuffers vector out of.
+  /// @param state State passed to f.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T, typename F, typename S>
+  Offset<Vector<T>> CreateVector(size_t vector_size, F f, S *state) {
+    FLATBUFFERS_ASSERT(FLATBUFFERS_GENERAL_HEAP_ALLOC_OK);
+    std::vector<T> elems(vector_size);
+    for (size_t i = 0; i < vector_size; i++) elems[i] = f(i, state);
+    return CreateVector(elems);
+  }
+
+  /// @brief Serialize a `std::vector<StringType>` into a FlatBuffer `vector`.
+  /// whereas StringType is any type that is accepted by the CreateString()
+  /// overloads.
+  /// This is a convenience function for a common case.
+  /// @param v A const reference to the `std::vector` to serialize into the
+  /// buffer as a `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename StringType = std::string,
+           typename Alloc = std::allocator<StringType>>
+  Offset<Vector<Offset<String>>> CreateVectorOfStrings(
+      const std::vector<StringType, Alloc> &v) {
+    return CreateVectorOfStrings(v.cbegin(), v.cend());
+  }
+
+  /// @brief Serialize a collection of Strings into a FlatBuffer `vector`.
+  /// This is a convenience function for a common case.
+  /// @param begin The begining iterator of the collection
+  /// @param end The ending iterator of the collection
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<class It>
+  Offset<Vector<Offset<String>>> CreateVectorOfStrings(It begin, It end) {
+    auto size = std::distance(begin, end);
+    auto scratch_buffer_usage = size * sizeof(Offset<String>);
+    // If there is not enough space to store the offsets, there definitely won't
+    // be enough space to store all the strings. So ensuring space for the
+    // scratch region is OK, for it it fails, it would have failed later.
+    buf_.ensure_space(scratch_buffer_usage);
+    for (auto it = begin; it != end; ++it) {
+      buf_.scratch_push_small(CreateString(*it));
+    }
+    StartVector(size, sizeof(Offset<String>));
+    for (auto i = 1; i <= size; i++) {
+      // Note we re-evaluate the buf location each iteration to account for any
+      // underlying buffer resizing that may occur.
+      PushElement(*reinterpret_cast<Offset<String> *>(
+          buf_.scratch_end() - i * sizeof(Offset<String>)));
+    }
+    buf_.scratch_pop(scratch_buffer_usage);
+    return Offset<Vector<Offset<String>>>(EndVector(size));
+  }
+
+  /// @brief Serialize an array of structs into a FlatBuffer `vector`.
+  /// @tparam T The data type of the struct array elements.
+  /// @param[in] v A pointer to the array of type `T` to serialize into the
+  /// buffer as a `vector`.
+  /// @param[in] len The number of elements to serialize.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T>
+  Offset<Vector<const T *>> CreateVectorOfStructs(const T *v, size_t len) {
+    StartVector(len * sizeof(T) / AlignOf<T>(), AlignOf<T>());
+    if (len > 0) {
+      PushBytes(reinterpret_cast<const uint8_t *>(v), sizeof(T) * len);
+    }
+    return Offset<Vector<const T *>>(EndVector(len));
+  }
+
+  /// @brief Serialize an array of native structs into a FlatBuffer `vector`.
+  /// @tparam T The data type of the struct array elements.
+  /// @tparam S The data type of the native struct array elements.
+  /// @param[in] v A pointer to the array of type `S` to serialize into the
+  /// buffer as a `vector`.
+  /// @param[in] len The number of elements to serialize.
+  /// @param[in] pack_func Pointer to a function to convert the native struct
+  /// to the FlatBuffer struct.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T, typename S>
+  Offset<Vector<const T *>> CreateVectorOfNativeStructs(
+      const S *v, size_t len, T (*const pack_func)(const S &)) {
+    FLATBUFFERS_ASSERT(pack_func);
+    auto structs = StartVectorOfStructs<T>(len);
+    for (size_t i = 0; i < len; i++) { structs[i] = pack_func(v[i]); }
+    return EndVectorOfStructs<T>(len);
+  }
+
+  /// @brief Serialize an array of native structs into a FlatBuffer `vector`.
+  /// @tparam T The data type of the struct array elements.
+  /// @tparam S The data type of the native struct array elements.
+  /// @param[in] v A pointer to the array of type `S` to serialize into the
+  /// buffer as a `vector`.
+  /// @param[in] len The number of elements to serialize.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T, typename S>
+  Offset<Vector<const T *>> CreateVectorOfNativeStructs(const S *v,
+                                                        size_t len) {
+    extern T Pack(const S &);
+    return CreateVectorOfNativeStructs(v, len, Pack);
+  }
+
+  /// @brief Serialize an array of structs into a FlatBuffer `vector`.
+  /// @tparam T The data type of the struct array elements.
+  /// @param[in] filler A function that takes the current iteration
+  /// 0..vector_size-1 and a pointer to the struct that must be filled.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  /// This is mostly useful when flatbuffers are generated with mutation
+  /// accessors.
+  template<typename T>
+  Offset<Vector<const T *>> CreateVectorOfStructs(
+      size_t vector_size, const std::function<void(size_t i, T *)> &filler) {
+    T *structs = StartVectorOfStructs<T>(vector_size);
+    for (size_t i = 0; i < vector_size; i++) {
+      filler(i, structs);
+      structs++;
+    }
+    return EndVectorOfStructs<T>(vector_size);
+  }
+
+  /// @brief Serialize an array of structs into a FlatBuffer `vector`.
+  /// @tparam T The data type of the struct array elements.
+  /// @param[in] f A function that takes the current iteration 0..vector_size-1,
+  /// a pointer to the struct that must be filled and the state argument.
+  /// @param[in] state Arbitrary state to pass to f.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  /// This is mostly useful when flatbuffers are generated with mutation
+  /// accessors.
+  template<typename T, typename F, typename S>
+  Offset<Vector<const T *>> CreateVectorOfStructs(size_t vector_size, F f,
+                                                  S *state) {
+    T *structs = StartVectorOfStructs<T>(vector_size);
+    for (size_t i = 0; i < vector_size; i++) {
+      f(i, structs, state);
+      structs++;
+    }
+    return EndVectorOfStructs<T>(vector_size);
+  }
+
+  /// @brief Serialize a `std::vector` of structs into a FlatBuffer `vector`.
+  /// @tparam T The data type of the `std::vector` struct elements.
+  /// @param[in] v A const reference to the `std::vector` of structs to
+  /// serialize into the buffer as a `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T, typename Alloc = std::allocator<T>>
+  Offset<Vector<const T *>> CreateVectorOfStructs(
+      const std::vector<T, Alloc> &v) {
+    return CreateVectorOfStructs(data(v), v.size());
+  }
+
+  /// @brief Serialize a `std::vector` of native structs into a FlatBuffer
+  /// `vector`.
+  /// @tparam T The data type of the `std::vector` struct elements.
+  /// @tparam S The data type of the `std::vector` native struct elements.
+  /// @param[in] v A const reference to the `std::vector` of structs to
+  /// serialize into the buffer as a `vector`.
+  /// @param[in] pack_func Pointer to a function to convert the native struct
+  /// to the FlatBuffer struct.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T, typename S, typename Alloc = std::allocator<T>>
+  Offset<Vector<const T *>> CreateVectorOfNativeStructs(
+      const std::vector<S, Alloc> &v, T (*const pack_func)(const S &)) {
+    return CreateVectorOfNativeStructs<T, S>(data(v), v.size(), pack_func);
+  }
+
+  /// @brief Serialize a `std::vector` of native structs into a FlatBuffer
+  /// `vector`.
+  /// @tparam T The data type of the `std::vector` struct elements.
+  /// @tparam S The data type of the `std::vector` native struct elements.
+  /// @param[in] v A const reference to the `std::vector` of structs to
+  /// serialize into the buffer as a `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T, typename S, typename Alloc = std::allocator<S>>
+  Offset<Vector<const T *>> CreateVectorOfNativeStructs(
+      const std::vector<S, Alloc> &v) {
+    return CreateVectorOfNativeStructs<T, S>(data(v), v.size());
+  }
+
+  /// @cond FLATBUFFERS_INTERNAL
+  template<typename T> struct StructKeyComparator {
+    bool operator()(const T &a, const T &b) const {
+      return a.KeyCompareLessThan(&b);
+    }
+  };
+  /// @endcond
+
+  /// @brief Serialize a `std::vector` of structs into a FlatBuffer `vector`
+  /// in sorted order.
+  /// @tparam T The data type of the `std::vector` struct elements.
+  /// @param[in] v A const reference to the `std::vector` of structs to
+  /// serialize into the buffer as a `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T, typename Alloc = std::allocator<T>>
+  Offset<Vector<const T *>> CreateVectorOfSortedStructs(
+      std::vector<T, Alloc> *v) {
+    return CreateVectorOfSortedStructs(data(*v), v->size());
+  }
+
+  /// @brief Serialize a `std::vector` of native structs into a FlatBuffer
+  /// `vector` in sorted order.
+  /// @tparam T The data type of the `std::vector` struct elements.
+  /// @tparam S The data type of the `std::vector` native struct elements.
+  /// @param[in] v A const reference to the `std::vector` of structs to
+  /// serialize into the buffer as a `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T, typename S, typename Alloc = std::allocator<T>>
+  Offset<Vector<const T *>> CreateVectorOfSortedNativeStructs(
+      std::vector<S, Alloc> *v) {
+    return CreateVectorOfSortedNativeStructs<T, S>(data(*v), v->size());
+  }
+
+  /// @brief Serialize an array of structs into a FlatBuffer `vector` in sorted
+  /// order.
+  /// @tparam T The data type of the struct array elements.
+  /// @param[in] v A pointer to the array of type `T` to serialize into the
+  /// buffer as a `vector`.
+  /// @param[in] len The number of elements to serialize.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T>
+  Offset<Vector<const T *>> CreateVectorOfSortedStructs(T *v, size_t len) {
+    std::stable_sort(v, v + len, StructKeyComparator<T>());
+    return CreateVectorOfStructs(v, len);
+  }
+
+  /// @brief Serialize an array of native structs into a FlatBuffer `vector` in
+  /// sorted order.
+  /// @tparam T The data type of the struct array elements.
+  /// @tparam S The data type of the native struct array elements.
+  /// @param[in] v A pointer to the array of type `S` to serialize into the
+  /// buffer as a `vector`.
+  /// @param[in] len The number of elements to serialize.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T, typename S>
+  Offset<Vector<const T *>> CreateVectorOfSortedNativeStructs(S *v,
+                                                              size_t len) {
+    extern T Pack(const S &);
+    auto structs = StartVectorOfStructs<T>(len);
+    for (size_t i = 0; i < len; i++) { structs[i] = Pack(v[i]); }
+    std::stable_sort(structs, structs + len, StructKeyComparator<T>());
+    return EndVectorOfStructs<T>(len);
+  }
+
+  /// @cond FLATBUFFERS_INTERNAL
+  template<typename T> struct TableKeyComparator {
+    TableKeyComparator(vector_downward &buf) : buf_(buf) {}
+    TableKeyComparator(const TableKeyComparator &other) : buf_(other.buf_) {}
+    bool operator()(const Offset<T> &a, const Offset<T> &b) const {
+      auto table_a = reinterpret_cast<T *>(buf_.data_at(a.o));
+      auto table_b = reinterpret_cast<T *>(buf_.data_at(b.o));
+      return table_a->KeyCompareLessThan(table_b);
+    }
+    vector_downward &buf_;
+
+   private:
+    FLATBUFFERS_DELETE_FUNC(
+        TableKeyComparator &operator=(const TableKeyComparator &other));
+  };
+  /// @endcond
+
+  /// @brief Serialize an array of `table` offsets as a `vector` in the buffer
+  /// in sorted order.
+  /// @tparam T The data type that the offset refers to.
+  /// @param[in] v An array of type `Offset<T>` that contains the `table`
+  /// offsets to store in the buffer in sorted order.
+  /// @param[in] len The number of elements to store in the `vector`.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T>
+  Offset<Vector<Offset<T>>> CreateVectorOfSortedTables(Offset<T> *v,
+                                                       size_t len) {
+    std::stable_sort(v, v + len, TableKeyComparator<T>(buf_));
+    return CreateVector(v, len);
+  }
+
+  /// @brief Serialize an array of `table` offsets as a `vector` in the buffer
+  /// in sorted order.
+  /// @tparam T The data type that the offset refers to.
+  /// @param[in] v An array of type `Offset<T>` that contains the `table`
+  /// offsets to store in the buffer in sorted order.
+  /// @return Returns a typed `Offset` into the serialized data indicating
+  /// where the vector is stored.
+  template<typename T, typename Alloc = std::allocator<T>>
+  Offset<Vector<Offset<T>>> CreateVectorOfSortedTables(
+      std::vector<Offset<T>, Alloc> *v) {
+    return CreateVectorOfSortedTables(data(*v), v->size());
+  }
+
+  /// @brief Specialized version of `CreateVector` for non-copying use cases.
+  /// Write the data any time later to the returned buffer pointer `buf`.
+  /// @param[in] len The number of elements to store in the `vector`.
+  /// @param[in] elemsize The size of each element in the `vector`.
+  /// @param[out] buf A pointer to a `uint8_t` pointer that can be
+  /// written to at a later time to serialize the data into a `vector`
+  /// in the buffer.
+  uoffset_t CreateUninitializedVector(size_t len, size_t elemsize,
+                                      uint8_t **buf) {
+    NotNested();
+    StartVector(len, elemsize);
+    buf_.make_space(len * elemsize);
+    auto vec_start = GetSize();
+    auto vec_end = EndVector(len);
+    *buf = buf_.data_at(vec_start);
+    return vec_end;
+  }
+
+  /// @brief Specialized version of `CreateVector` for non-copying use cases.
+  /// Write the data any time later to the returned buffer pointer `buf`.
+  /// @tparam T The data type of the data that will be stored in the buffer
+  /// as a `vector`.
+  /// @param[in] len The number of elements to store in the `vector`.
+  /// @param[out] buf A pointer to a pointer of type `T` that can be
+  /// written to at a later time to serialize the data into a `vector`
+  /// in the buffer.
+  template<typename T>
+  Offset<Vector<T>> CreateUninitializedVector(size_t len, T **buf) {
+    AssertScalarT<T>();
+    return CreateUninitializedVector(len, sizeof(T),
+                                     reinterpret_cast<uint8_t **>(buf));
+  }
+
+  template<typename T>
+  Offset<Vector<const T *>> CreateUninitializedVectorOfStructs(size_t len,
+                                                               T **buf) {
+    return CreateUninitializedVector(len, sizeof(T),
+                                     reinterpret_cast<uint8_t **>(buf));
+  }
+
+  // @brief Create a vector of scalar type T given as input a vector of scalar
+  // type U, useful with e.g. pre "enum class" enums, or any existing scalar
+  // data of the wrong type.
+  template<typename T, typename U>
+  Offset<Vector<T>> CreateVectorScalarCast(const U *v, size_t len) {
+    AssertScalarT<T>();
+    AssertScalarT<U>();
+    StartVector(len, sizeof(T));
+    for (auto i = len; i > 0;) { PushElement(static_cast<T>(v[--i])); }
+    return Offset<Vector<T>>(EndVector(len));
+  }
+
+  /// @brief Write a struct by itself, typically to be part of a union.
+  template<typename T> Offset<const T *> CreateStruct(const T &structobj) {
+    NotNested();
+    Align(AlignOf<T>());
+    buf_.push_small(structobj);
+    return Offset<const T *>(GetSize());
+  }
+
+  /// @brief Finish serializing a buffer by writing the root offset.
+  /// @param[in] file_identifier If a `file_identifier` is given, the buffer
+  /// will be prefixed with a standard FlatBuffers file header.
+  template<typename T>
+  void Finish(Offset<T> root, const char *file_identifier = nullptr) {
+    Finish(root.o, file_identifier, false);
+  }
+
+  /// @brief Finish a buffer with a 32 bit size field pre-fixed (size of the
+  /// buffer following the size field). These buffers are NOT compatible
+  /// with standard buffers created by Finish, i.e. you can't call GetRoot
+  /// on them, you have to use GetSizePrefixedRoot instead.
+  /// All >32 bit quantities in this buffer will be aligned when the whole
+  /// size pre-fixed buffer is aligned.
+  /// These kinds of buffers are useful for creating a stream of FlatBuffers.
+  template<typename T>
+  void FinishSizePrefixed(Offset<T> root,
+                          const char *file_identifier = nullptr) {
+    Finish(root.o, file_identifier, true);
+  }
+
+  void SwapBufAllocator(FlatBufferBuilder &other) {
+    buf_.swap_allocator(other.buf_);
+  }
+
+  /// @brief The length of a FlatBuffer file header.
+  static const size_t kFileIdentifierLength =
+      ::flatbuffers::kFileIdentifierLength;
+
+ protected:
+  // You shouldn't really be copying instances of this class.
+  FlatBufferBuilder(const FlatBufferBuilder &);
+  FlatBufferBuilder &operator=(const FlatBufferBuilder &);
+
+  void Finish(uoffset_t root, const char *file_identifier, bool size_prefix) {
+    NotNested();
+    buf_.clear_scratch();
+    // This will cause the whole buffer to be aligned.
+    PreAlign((size_prefix ? sizeof(uoffset_t) : 0) + sizeof(uoffset_t) +
+                 (file_identifier ? kFileIdentifierLength : 0),
+             minalign_);
+    if (file_identifier) {
+      FLATBUFFERS_ASSERT(strlen(file_identifier) == kFileIdentifierLength);
+      PushBytes(reinterpret_cast<const uint8_t *>(file_identifier),
+                kFileIdentifierLength);
+    }
+    PushElement(ReferTo(root));  // Location of root.
+    if (size_prefix) { PushElement(GetSize()); }
+    finished = true;
+  }
+
+  struct FieldLoc {
+    uoffset_t off;
+    voffset_t id;
+  };
+
+  vector_downward buf_;
+
+  // Accumulating offsets of table members while it is being built.
+  // We store these in the scratch pad of buf_, after the vtable offsets.
+  uoffset_t num_field_loc;
+  // Track how much of the vtable is in use, so we can output the most compact
+  // possible vtable.
+  voffset_t max_voffset_;
+
+  // Ensure objects are not nested.
+  bool nested;
+
+  // Ensure the buffer is finished before it is being accessed.
+  bool finished;
+
+  size_t minalign_;
+
+  bool force_defaults_;  // Serialize values equal to their defaults anyway.
+
+  bool dedup_vtables_;
+
+  struct StringOffsetCompare {
+    StringOffsetCompare(const vector_downward &buf) : buf_(&buf) {}
+    bool operator()(const Offset<String> &a, const Offset<String> &b) const {
+      auto stra = reinterpret_cast<const String *>(buf_->data_at(a.o));
+      auto strb = reinterpret_cast<const String *>(buf_->data_at(b.o));
+      return StringLessThan(stra->data(), stra->size(), strb->data(),
+                            strb->size());
+    }
+    const vector_downward *buf_;
+  };
+
+  // For use with CreateSharedString. Instantiated on first use only.
+  typedef std::set<Offset<String>, StringOffsetCompare> StringOffsetMap;
+  StringOffsetMap *string_pool;
+
+ private:
+  // Allocates space for a vector of structures.
+  // Must be completed with EndVectorOfStructs().
+  template<typename T> T *StartVectorOfStructs(size_t vector_size) {
+    StartVector(vector_size * sizeof(T) / AlignOf<T>(), AlignOf<T>());
+    return reinterpret_cast<T *>(buf_.make_space(vector_size * sizeof(T)));
+  }
+
+  // End the vector of structures in the flatbuffers.
+  // Vector should have previously be started with StartVectorOfStructs().
+  template<typename T>
+  Offset<Vector<const T *>> EndVectorOfStructs(size_t vector_size) {
+    return Offset<Vector<const T *>>(EndVector(vector_size));
+  }
+};
+/// @}
+
+/// Helpers to get a typed pointer to objects that are currently being built.
+/// @warning Creating new objects will lead to reallocations and invalidates
+/// the pointer!
+template<typename T>
+T *GetMutableTemporaryPointer(FlatBufferBuilder &fbb, Offset<T> offset) {
+  return reinterpret_cast<T *>(fbb.GetCurrentBufferPointer() + fbb.GetSize() -
+                               offset.o);
+}
+
+template<typename T>
+const T *GetTemporaryPointer(FlatBufferBuilder &fbb, Offset<T> offset) {
+  return GetMutableTemporaryPointer<T>(fbb, offset);
+}
+
+template<typename T>
+void FlatBufferBuilder::Required(Offset<T> table, voffset_t field) {
+  auto table_ptr = reinterpret_cast<const Table *>(buf_.data_at(table.o));
+  bool ok = table_ptr->GetOptionalFieldOffset(field) != 0;
+  // If this fails, the caller will show what field needs to be set.
+  FLATBUFFERS_ASSERT(ok);
+  (void)ok;
+}
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_VECTOR_DOWNWARD_H_
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h
new file mode 100644
index 0000000..7166d4f
--- /dev/null
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h
@@ -0,0 +1,509 @@
+/*
+ * Copyright 2017 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_STL_EMULATION_H_
+#define FLATBUFFERS_STL_EMULATION_H_
+
+// clang-format off
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+
+#include <string>
+#include <type_traits>
+#include <vector>
+#include <memory>
+#include <limits>
+
+#ifndef FLATBUFFERS_USE_STD_OPTIONAL
+  // Detect C++17 compatible compiler.
+  // __cplusplus >= 201703L - a compiler has support of 'static inline' variables.
+  #if (defined(__cplusplus) && __cplusplus >= 201703L) \
+      || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
+    #define FLATBUFFERS_USE_STD_OPTIONAL 1
+  #else
+    #define FLATBUFFERS_USE_STD_OPTIONAL 0
+  #endif // (defined(__cplusplus) && __cplusplus >= 201703L) ...
+#endif // FLATBUFFERS_USE_STD_OPTIONAL
+
+#if FLATBUFFERS_USE_STD_OPTIONAL
+  #include <optional>
+#endif
+
+// The __cpp_lib_span is the predefined feature macro.
+#if defined(FLATBUFFERS_USE_STD_SPAN)
+    #include <span>
+#elif defined(__cpp_lib_span) && defined(__has_include)
+  #if __has_include(<span>)
+    #include <span>
+    #define FLATBUFFERS_USE_STD_SPAN
+  #endif
+#else
+  // Disable non-trivial ctors if FLATBUFFERS_SPAN_MINIMAL defined.
+  #if !defined(FLATBUFFERS_TEMPLATES_ALIASES)
+    #define FLATBUFFERS_SPAN_MINIMAL
+  #else
+    // Enable implicit construction of a span<T,N> from a std::array<T,N>.
+    #include <array>
+  #endif
+#endif // defined(FLATBUFFERS_USE_STD_SPAN)
+
+// This header provides backwards compatibility for older versions of the STL.
+namespace flatbuffers {
+
+#if defined(FLATBUFFERS_TEMPLATES_ALIASES)
+  template <typename T>
+  using numeric_limits = std::numeric_limits<T>;
+#else
+  template <typename T> class numeric_limits :
+    public std::numeric_limits<T> {};
+#endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)
+
+#if defined(FLATBUFFERS_TEMPLATES_ALIASES)
+  template <typename T> using is_scalar = std::is_scalar<T>;
+  template <typename T, typename U> using is_same = std::is_same<T,U>;
+  template <typename T> using is_floating_point = std::is_floating_point<T>;
+  template <typename T> using is_unsigned = std::is_unsigned<T>;
+  template <typename T> using is_enum = std::is_enum<T>;
+  template <typename T> using make_unsigned = std::make_unsigned<T>;
+  template<bool B, class T, class F>
+  using conditional = std::conditional<B, T, F>;
+  template<class T, T v>
+  using integral_constant = std::integral_constant<T, v>;
+  template <bool B>
+  using bool_constant = integral_constant<bool, B>;
+  using true_type  = std::true_type;
+  using false_type = std::false_type;
+#else
+  // MSVC 2010 doesn't support C++11 aliases.
+  template <typename T> struct is_scalar : public std::is_scalar<T> {};
+  template <typename T, typename U> struct is_same : public std::is_same<T,U> {};
+  template <typename T> struct is_floating_point :
+        public std::is_floating_point<T> {};
+  template <typename T> struct is_unsigned : public std::is_unsigned<T> {};
+  template <typename T> struct is_enum : public std::is_enum<T> {};
+  template <typename T> struct make_unsigned : public std::make_unsigned<T> {};
+  template<bool B, class T, class F>
+  struct conditional : public std::conditional<B, T, F> {};
+  template<class T, T v>
+  struct integral_constant : public std::integral_constant<T, v> {};
+  template <bool B>
+  struct bool_constant : public integral_constant<bool, B> {};
+  typedef bool_constant<true>  true_type;
+  typedef bool_constant<false> false_type;
+#endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)
+
+#if defined(FLATBUFFERS_TEMPLATES_ALIASES)
+  template <class T> using unique_ptr = std::unique_ptr<T>;
+#else
+  // MSVC 2010 doesn't support C++11 aliases.
+  // We're manually "aliasing" the class here as we want to bring unique_ptr
+  // into the flatbuffers namespace.  We have unique_ptr in the flatbuffers
+  // namespace we have a completely independent implementation (see below)
+  // for C++98 STL implementations.
+  template <class T> class unique_ptr : public std::unique_ptr<T> {
+    public:
+    unique_ptr() {}
+    explicit unique_ptr(T* p) : std::unique_ptr<T>(p) {}
+    unique_ptr(std::unique_ptr<T>&& u) { *this = std::move(u); }
+    unique_ptr(unique_ptr&& u) { *this = std::move(u); }
+    unique_ptr& operator=(std::unique_ptr<T>&& u) {
+      std::unique_ptr<T>::reset(u.release());
+      return *this;
+    }
+    unique_ptr& operator=(unique_ptr&& u) {
+      std::unique_ptr<T>::reset(u.release());
+      return *this;
+    }
+    unique_ptr& operator=(T* p) {
+      return std::unique_ptr<T>::operator=(p);
+    }
+  };
+#endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)
+
+#if FLATBUFFERS_USE_STD_OPTIONAL
+template<class T>
+using Optional = std::optional<T>;
+using nullopt_t = std::nullopt_t;
+inline constexpr nullopt_t nullopt = std::nullopt;
+
+#else
+// Limited implementation of Optional<T> type for a scalar T.
+// This implementation limited by trivial types compatible with
+// std::is_arithmetic<T> or std::is_enum<T> type traits.
+
+// A tag to indicate an empty flatbuffers::optional<T>.
+struct nullopt_t {
+  explicit FLATBUFFERS_CONSTEXPR_CPP11 nullopt_t(int) {}
+};
+
+#if defined(FLATBUFFERS_CONSTEXPR_DEFINED)
+  namespace internal {
+    template <class> struct nullopt_holder {
+      static constexpr nullopt_t instance_ = nullopt_t(0);
+    };
+    template<class Dummy>
+    constexpr nullopt_t nullopt_holder<Dummy>::instance_;
+  }
+  static constexpr const nullopt_t &nullopt = internal::nullopt_holder<void>::instance_;
+
+#else
+  namespace internal {
+    template <class> struct nullopt_holder {
+      static const nullopt_t instance_;
+    };
+    template<class Dummy>
+    const nullopt_t nullopt_holder<Dummy>::instance_  = nullopt_t(0);
+  }
+  static const nullopt_t &nullopt = internal::nullopt_holder<void>::instance_;
+
+#endif
+
+template<class T>
+class Optional FLATBUFFERS_FINAL_CLASS {
+  // Non-scalar 'T' would extremely complicated Optional<T>.
+  // Use is_scalar<T> checking because flatbuffers flatbuffers::is_arithmetic<T>
+  // isn't implemented.
+  static_assert(flatbuffers::is_scalar<T>::value, "unexpected type T");
+
+ public:
+  ~Optional() {}
+
+  FLATBUFFERS_CONSTEXPR_CPP11 Optional() FLATBUFFERS_NOEXCEPT
+    : value_(), has_value_(false) {}
+
+  FLATBUFFERS_CONSTEXPR_CPP11 Optional(nullopt_t) FLATBUFFERS_NOEXCEPT
+    : value_(), has_value_(false) {}
+
+  FLATBUFFERS_CONSTEXPR_CPP11 Optional(T val) FLATBUFFERS_NOEXCEPT
+    : value_(val), has_value_(true) {}
+
+  FLATBUFFERS_CONSTEXPR_CPP11 Optional(const Optional &other) FLATBUFFERS_NOEXCEPT
+    : value_(other.value_), has_value_(other.has_value_) {}
+
+  FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(const Optional &other) FLATBUFFERS_NOEXCEPT {
+    value_ = other.value_;
+    has_value_ = other.has_value_;
+    return *this;
+  }
+
+  FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(nullopt_t) FLATBUFFERS_NOEXCEPT {
+    value_ = T();
+    has_value_ = false;
+    return *this;
+  }
+
+  FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(T val) FLATBUFFERS_NOEXCEPT {
+    value_ = val;
+    has_value_ = true;
+    return *this;
+  }
+
+  void reset() FLATBUFFERS_NOEXCEPT {
+    *this = nullopt;
+  }
+
+  void swap(Optional &other) FLATBUFFERS_NOEXCEPT {
+    std::swap(value_, other.value_);
+    std::swap(has_value_, other.has_value_);
+  }
+
+  FLATBUFFERS_CONSTEXPR_CPP11 FLATBUFFERS_EXPLICIT_CPP11 operator bool() const FLATBUFFERS_NOEXCEPT {
+    return has_value_;
+  }
+
+  FLATBUFFERS_CONSTEXPR_CPP11 bool has_value() const FLATBUFFERS_NOEXCEPT {
+    return has_value_;
+  }
+
+  FLATBUFFERS_CONSTEXPR_CPP11 const T& operator*() const FLATBUFFERS_NOEXCEPT {
+    return value_;
+  }
+
+  const T& value() const {
+    FLATBUFFERS_ASSERT(has_value());
+    return value_;
+  }
+
+  T value_or(T default_value) const FLATBUFFERS_NOEXCEPT {
+    return has_value() ? value_ : default_value;
+  }
+
+ private:
+  T value_;
+  bool has_value_;
+};
+
+template<class T>
+FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional<T>& opt, nullopt_t) FLATBUFFERS_NOEXCEPT {
+  return !opt;
+}
+template<class T>
+FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(nullopt_t, const Optional<T>& opt) FLATBUFFERS_NOEXCEPT {
+  return !opt;
+}
+
+template<class T, class U>
+FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional<T>& lhs, const U& rhs) FLATBUFFERS_NOEXCEPT {
+  return static_cast<bool>(lhs) && (*lhs == rhs);
+}
+
+template<class T, class U>
+FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const T& lhs, const Optional<U>& rhs) FLATBUFFERS_NOEXCEPT {
+  return static_cast<bool>(rhs) && (lhs == *rhs);
+}
+
+template<class T, class U>
+FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional<T>& lhs, const Optional<U>& rhs) FLATBUFFERS_NOEXCEPT {
+  return static_cast<bool>(lhs) != static_cast<bool>(rhs)
+              ? false
+              : !static_cast<bool>(lhs) ? false : (*lhs == *rhs);
+}
+#endif // FLATBUFFERS_USE_STD_OPTIONAL
+
+
+// Very limited and naive partial implementation of C++20 std::span<T,Extent>.
+#if defined(FLATBUFFERS_USE_STD_SPAN)
+  inline constexpr std::size_t dynamic_extent = std::dynamic_extent;
+  template<class T, std::size_t Extent = std::dynamic_extent>
+  using span = std::span<T, Extent>;
+
+#else // !defined(FLATBUFFERS_USE_STD_SPAN)
+FLATBUFFERS_CONSTEXPR std::size_t dynamic_extent = static_cast<std::size_t>(-1);
+
+// Exclude this code if MSVC2010 or non-STL Android is active.
+// The non-STL Android doesn't have `std::is_convertible` required for SFINAE.
+#if !defined(FLATBUFFERS_SPAN_MINIMAL)
+namespace internal {
+  // This is SFINAE helper class for checking of a common condition:
+  // > This overload only participates in overload resolution
+  // > Check whether a pointer to an array of From can be converted
+  // > to a pointer to an array of To.
+  // This helper is used for checking of 'From -> const From'.
+  template<class To, std::size_t Extent, class From, std::size_t N>
+  struct is_span_convertable {
+    using type =
+      typename std::conditional<std::is_convertible<From (*)[], To (*)[]>::value
+                                && (Extent == dynamic_extent || N == Extent),
+                                int, void>::type;
+  };
+
+  template<typename T>
+  struct SpanIterator {
+    // TODO: upgrade to std::random_access_iterator_tag.
+    using iterator_category = std::forward_iterator_tag;
+    using difference_type  = std::ptrdiff_t;
+    using value_type = typename std::remove_cv<T>::type;
+    using reference = T&;
+    using pointer   = T*;
+
+    // Convince MSVC compiler that this iterator is trusted (it is verified).
+    #ifdef _MSC_VER
+      using _Unchecked_type = pointer;
+    #endif // _MSC_VER
+
+    SpanIterator(pointer ptr) : ptr_(ptr) {}
+    reference operator*() const { return *ptr_; }
+    pointer operator->() { return ptr_; }
+    SpanIterator& operator++() { ptr_++; return *this; }  
+    SpanIterator  operator++(int) { auto tmp = *this; ++(*this); return tmp; }
+
+    friend bool operator== (const SpanIterator& lhs, const SpanIterator& rhs) { return lhs.ptr_ == rhs.ptr_; }
+    friend bool operator!= (const SpanIterator& lhs, const SpanIterator& rhs) { return lhs.ptr_ != rhs.ptr_; }
+
+   private:
+    pointer ptr_;
+  };
+}  // namespace internal
+#endif  // !defined(FLATBUFFERS_SPAN_MINIMAL)
+
+// T - element type; must be a complete type that is not an abstract
+// class type.
+// Extent - the number of elements in the sequence, or dynamic.
+template<class T, std::size_t Extent = dynamic_extent>
+class span FLATBUFFERS_FINAL_CLASS {
+ public:
+  typedef T element_type;
+  typedef T& reference;
+  typedef const T& const_reference;
+  typedef T* pointer;
+  typedef const T* const_pointer;
+  typedef std::size_t size_type;
+
+  static FLATBUFFERS_CONSTEXPR size_type extent = Extent;
+
+  // Returns the number of elements in the span.
+  FLATBUFFERS_CONSTEXPR_CPP11 size_type size() const FLATBUFFERS_NOEXCEPT {
+    return count_;
+  }
+
+  // Returns the size of the sequence in bytes.
+  FLATBUFFERS_CONSTEXPR_CPP11
+  size_type size_bytes() const FLATBUFFERS_NOEXCEPT {
+    return size() * sizeof(element_type);
+  }
+
+  // Checks if the span is empty.
+  FLATBUFFERS_CONSTEXPR_CPP11 bool empty() const FLATBUFFERS_NOEXCEPT {
+    return size() == 0;
+  }
+
+  // Returns a pointer to the beginning of the sequence.
+  FLATBUFFERS_CONSTEXPR_CPP11 pointer data() const FLATBUFFERS_NOEXCEPT {
+    return data_;
+  }
+
+  #if !defined(FLATBUFFERS_SPAN_MINIMAL)
+    using Iterator = internal::SpanIterator<T>;
+
+    Iterator begin() const { return Iterator(data()); }
+    Iterator end() const   { return Iterator(data() + size()); }
+  #endif
+
+  // Returns a reference to the idx-th element of the sequence.
+  // The behavior is undefined if the idx is greater than or equal to size().
+  FLATBUFFERS_CONSTEXPR_CPP11 reference operator[](size_type idx) const {
+    return data()[idx];
+  }
+
+  FLATBUFFERS_CONSTEXPR_CPP11 span(const span &other) FLATBUFFERS_NOEXCEPT
+      : data_(other.data_), count_(other.count_) {}
+
+  FLATBUFFERS_CONSTEXPR_CPP14 span &operator=(const span &other)
+      FLATBUFFERS_NOEXCEPT {
+    data_ = other.data_;
+    count_ = other.count_;
+  }
+
+  // Limited implementation of
+  // `template <class It> constexpr std::span(It first, size_type count);`.
+  //
+  // Constructs a span that is a view over the range [first, first + count);
+  // the resulting span has: data() == first and size() == count.
+  // The behavior is undefined if [first, first + count) is not a valid range,
+  // or if (extent != flatbuffers::dynamic_extent && count != extent).
+  FLATBUFFERS_CONSTEXPR_CPP11
+  explicit span(pointer first, size_type count) FLATBUFFERS_NOEXCEPT
+    : data_ (Extent == dynamic_extent ? first : (Extent == count ? first : nullptr)),
+      count_(Extent == dynamic_extent ? count : (Extent == count ? Extent : 0)) {
+      // Make span empty if the count argument is incompatible with span<T,N>.
+  }
+
+  // Exclude this code if MSVC2010 is active. The MSVC2010 isn't C++11
+  // compliant, it doesn't support default template arguments for functions.
+  #if defined(FLATBUFFERS_SPAN_MINIMAL)
+  FLATBUFFERS_CONSTEXPR_CPP11 span() FLATBUFFERS_NOEXCEPT : data_(nullptr),
+                                                            count_(0) {
+    static_assert(extent == 0 || extent == dynamic_extent, "invalid span");
+  }
+
+  #else
+  // Constructs an empty span whose data() == nullptr and size() == 0.
+  // This overload only participates in overload resolution if
+  // extent == 0 || extent == flatbuffers::dynamic_extent.
+  // A dummy template argument N is need dependency for SFINAE.
+  template<std::size_t N = 0,
+    typename internal::is_span_convertable<element_type, Extent, element_type, (N - N)>::type = 0>
+  FLATBUFFERS_CONSTEXPR_CPP11 span() FLATBUFFERS_NOEXCEPT : data_(nullptr),
+                                                            count_(0) {
+    static_assert(extent == 0 || extent == dynamic_extent, "invalid span");
+  }
+
+  // Constructs a span that is a view over the array arr; the resulting span
+  // has size() == N and data() == std::data(arr). These overloads only
+  // participate in overload resolution if
+  // extent == std::dynamic_extent || N == extent is true and
+  // std::remove_pointer_t<decltype(std::data(arr))>(*)[]
+  // is convertible to element_type (*)[].
+  template<std::size_t N,
+    typename internal::is_span_convertable<element_type, Extent, element_type, N>::type = 0>
+  FLATBUFFERS_CONSTEXPR_CPP11 span(element_type (&arr)[N]) FLATBUFFERS_NOEXCEPT
+      : data_(arr), count_(N) {}
+
+  template<class U, std::size_t N,
+    typename internal::is_span_convertable<element_type, Extent, U, N>::type = 0>
+  FLATBUFFERS_CONSTEXPR_CPP11 span(std::array<U, N> &arr) FLATBUFFERS_NOEXCEPT
+     : data_(arr.data()), count_(N) {}
+
+  //template<class U, std::size_t N,
+  //  int = 0>
+  //FLATBUFFERS_CONSTEXPR_CPP11 span(std::array<U, N> &arr) FLATBUFFERS_NOEXCEPT
+  //   : data_(arr.data()), count_(N) {}
+
+  template<class U, std::size_t N,
+    typename internal::is_span_convertable<element_type, Extent, U, N>::type = 0>
+  FLATBUFFERS_CONSTEXPR_CPP11 span(const std::array<U, N> &arr) FLATBUFFERS_NOEXCEPT
+    : data_(arr.data()), count_(N) {}
+
+  // Converting constructor from another span s;
+  // the resulting span has size() == s.size() and data() == s.data().
+  // This overload only participates in overload resolution
+  // if extent == std::dynamic_extent || N == extent is true and U (*)[]
+  // is convertible to element_type (*)[].
+  template<class U, std::size_t N,
+    typename internal::is_span_convertable<element_type, Extent, U, N>::type = 0>
+  FLATBUFFERS_CONSTEXPR_CPP11 span(const flatbuffers::span<U, N> &s) FLATBUFFERS_NOEXCEPT
+      : span(s.data(), s.size()) {
+  }
+
+  #endif  // !defined(FLATBUFFERS_SPAN_MINIMAL)
+
+ private:
+  // This is a naive implementation with 'count_' member even if (Extent != dynamic_extent).
+  pointer const data_;
+  size_type count_;
+};
+#endif  // defined(FLATBUFFERS_USE_STD_SPAN)
+
+#if !defined(FLATBUFFERS_SPAN_MINIMAL)
+template<class ElementType, std::size_t Extent>
+FLATBUFFERS_CONSTEXPR_CPP11
+flatbuffers::span<ElementType, Extent> make_span(ElementType(&arr)[Extent]) FLATBUFFERS_NOEXCEPT {
+  return span<ElementType, Extent>(arr);
+}
+
+template<class ElementType, std::size_t Extent>
+FLATBUFFERS_CONSTEXPR_CPP11
+flatbuffers::span<const ElementType, Extent> make_span(const ElementType(&arr)[Extent]) FLATBUFFERS_NOEXCEPT {
+  return span<const ElementType, Extent>(arr);
+}
+
+template<class ElementType, std::size_t Extent>
+FLATBUFFERS_CONSTEXPR_CPP11
+flatbuffers::span<ElementType, Extent> make_span(std::array<ElementType, Extent> &arr) FLATBUFFERS_NOEXCEPT {
+  return span<ElementType, Extent>(arr);
+}
+
+template<class ElementType, std::size_t Extent>
+FLATBUFFERS_CONSTEXPR_CPP11
+flatbuffers::span<const ElementType, Extent> make_span(const std::array<ElementType, Extent> &arr) FLATBUFFERS_NOEXCEPT {
+  return span<const ElementType, Extent>(arr);
+}
+
+template<class ElementType, std::size_t Extent>
+FLATBUFFERS_CONSTEXPR_CPP11
+flatbuffers::span<ElementType, dynamic_extent> make_span(ElementType *first, std::size_t count) FLATBUFFERS_NOEXCEPT {
+  return span<ElementType, dynamic_extent>(first, count);
+}
+
+template<class ElementType, std::size_t Extent>
+FLATBUFFERS_CONSTEXPR_CPP11
+flatbuffers::span<const ElementType, dynamic_extent> make_span(const ElementType *first, std::size_t count) FLATBUFFERS_NOEXCEPT {
+  return span<const ElementType, dynamic_extent>(first, count);
+}
+#endif // !defined(FLATBUFFERS_SPAN_MINIMAL)
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_STL_EMULATION_H_
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_string.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_string.h
new file mode 100644
index 0000000..45cecf2
--- /dev/null
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_string.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2021 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_STRING_H_
+#define FLATBUFFERS_STRING_H_
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h"
+
+namespace flatbuffers {
+
+struct String : public Vector<char> {
+  const char *c_str() const { return reinterpret_cast<const char *>(Data()); }
+  std::string str() const { return std::string(c_str(), size()); }
+
+  // clang-format off
+  #ifdef FLATBUFFERS_HAS_STRING_VIEW
+  flatbuffers::string_view string_view() const {
+    return flatbuffers::string_view(c_str(), size());
+  }
+  #endif // FLATBUFFERS_HAS_STRING_VIEW
+  // clang-format on
+
+  bool operator<(const String &o) const {
+    return StringLessThan(this->data(), this->size(), o.data(), o.size());
+  }
+};
+
+// Convenience function to get std::string from a String returning an empty
+// string on null pointer.
+static inline std::string GetString(const String *str) {
+  return str ? str->str() : "";
+}
+
+// Convenience function to get char* from a String returning an empty string on
+// null pointer.
+static inline const char *GetCstring(const String *str) {
+  return str ? str->c_str() : "";
+}
+
+#ifdef FLATBUFFERS_HAS_STRING_VIEW
+// Convenience function to get string_view from a String returning an empty
+// string_view on null pointer.
+static inline flatbuffers::string_view GetStringView(const String *str) {
+  return str ? str->string_view() : flatbuffers::string_view();
+}
+#endif  // FLATBUFFERS_HAS_STRING_VIEW
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_STRING_H_
\ No newline at end of file
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_struct.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_struct.h
new file mode 100644
index 0000000..385d648
--- /dev/null
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_struct.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2021 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_STRUCT_H_
+#define FLATBUFFERS_STRUCT_H_
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+
+namespace flatbuffers {
+
+// "structs" are flat structures that do not have an offset table, thus
+// always have all members present and do not support forwards/backwards
+// compatible extensions.
+
+class Struct FLATBUFFERS_FINAL_CLASS {
+ public:
+  template<typename T> T GetField(uoffset_t o) const {
+    return ReadScalar<T>(&data_[o]);
+  }
+
+  template<typename T> T GetStruct(uoffset_t o) const {
+    return reinterpret_cast<T>(&data_[o]);
+  }
+
+  const uint8_t *GetAddressOf(uoffset_t o) const { return &data_[o]; }
+  uint8_t *GetAddressOf(uoffset_t o) { return &data_[o]; }
+
+ private:
+  // private constructor & copy constructor: you obtain instances of this
+  // class by pointing to existing data only
+  Struct();
+  Struct(const Struct &);
+  Struct &operator=(const Struct &);
+
+  uint8_t data_[1];
+};
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_STRUCT_H_
\ No newline at end of file
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_table.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_table.h
new file mode 100644
index 0000000..3aca63f
--- /dev/null
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_table.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2021 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_TABLE_H_
+#define FLATBUFFERS_TABLE_H_
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_verifier.h"
+
+namespace flatbuffers {
+
+// "tables" use an offset table (possibly shared) that allows fields to be
+// omitted and added at will, but uses an extra indirection to read.
+class Table {
+ public:
+  const uint8_t *GetVTable() const {
+    return data_ - ReadScalar<soffset_t>(data_);
+  }
+
+  // This gets the field offset for any of the functions below it, or 0
+  // if the field was not present.
+  voffset_t GetOptionalFieldOffset(voffset_t field) const {
+    // The vtable offset is always at the start.
+    auto vtable = GetVTable();
+    // The first element is the size of the vtable (fields + type id + itself).
+    auto vtsize = ReadScalar<voffset_t>(vtable);
+    // If the field we're accessing is outside the vtable, we're reading older
+    // data, so it's the same as if the offset was 0 (not present).
+    return field < vtsize ? ReadScalar<voffset_t>(vtable + field) : 0;
+  }
+
+  template<typename T> T GetField(voffset_t field, T defaultval) const {
+    auto field_offset = GetOptionalFieldOffset(field);
+    return field_offset ? ReadScalar<T>(data_ + field_offset) : defaultval;
+  }
+
+  template<typename P> P GetPointer(voffset_t field) {
+    auto field_offset = GetOptionalFieldOffset(field);
+    auto p = data_ + field_offset;
+    return field_offset ? reinterpret_cast<P>(p + ReadScalar<uoffset_t>(p))
+                        : nullptr;
+  }
+  template<typename P> P GetPointer(voffset_t field) const {
+    return const_cast<Table *>(this)->GetPointer<P>(field);
+  }
+
+  template<typename P> P GetStruct(voffset_t field) const {
+    auto field_offset = GetOptionalFieldOffset(field);
+    auto p = const_cast<uint8_t *>(data_ + field_offset);
+    return field_offset ? reinterpret_cast<P>(p) : nullptr;
+  }
+
+  template<typename Raw, typename Face>
+  flatbuffers::Optional<Face> GetOptional(voffset_t field) const {
+    auto field_offset = GetOptionalFieldOffset(field);
+    auto p = data_ + field_offset;
+    return field_offset ? Optional<Face>(static_cast<Face>(ReadScalar<Raw>(p)))
+                        : Optional<Face>();
+  }
+
+  template<typename T> bool SetField(voffset_t field, T val, T def) {
+    auto field_offset = GetOptionalFieldOffset(field);
+    if (!field_offset) return IsTheSameAs(val, def);
+    WriteScalar(data_ + field_offset, val);
+    return true;
+  }
+  template<typename T> bool SetField(voffset_t field, T val) {
+    auto field_offset = GetOptionalFieldOffset(field);
+    if (!field_offset) return false;
+    WriteScalar(data_ + field_offset, val);
+    return true;
+  }
+
+  bool SetPointer(voffset_t field, const uint8_t *val) {
+    auto field_offset = GetOptionalFieldOffset(field);
+    if (!field_offset) return false;
+    WriteScalar(data_ + field_offset,
+                static_cast<uoffset_t>(val - (data_ + field_offset)));
+    return true;
+  }
+
+  uint8_t *GetAddressOf(voffset_t field) {
+    auto field_offset = GetOptionalFieldOffset(field);
+    return field_offset ? data_ + field_offset : nullptr;
+  }
+  const uint8_t *GetAddressOf(voffset_t field) const {
+    return const_cast<Table *>(this)->GetAddressOf(field);
+  }
+
+  bool CheckField(voffset_t field) const {
+    return GetOptionalFieldOffset(field) != 0;
+  }
+
+  // Verify the vtable of this table.
+  // Call this once per table, followed by VerifyField once per field.
+  bool VerifyTableStart(Verifier &verifier) const {
+    return verifier.VerifyTableStart(data_);
+  }
+
+  // Verify a particular field.
+  template<typename T>
+  bool VerifyField(const Verifier &verifier, voffset_t field,
+                   size_t align) const {
+    // Calling GetOptionalFieldOffset should be safe now thanks to
+    // VerifyTable().
+    auto field_offset = GetOptionalFieldOffset(field);
+    // Check the actual field.
+    return !field_offset || verifier.VerifyField<T>(data_, field_offset, align);
+  }
+
+  // VerifyField for required fields.
+  template<typename T>
+  bool VerifyFieldRequired(const Verifier &verifier, voffset_t field,
+                           size_t align) const {
+    auto field_offset = GetOptionalFieldOffset(field);
+    return verifier.Check(field_offset != 0) &&
+           verifier.VerifyField<T>(data_, field_offset, align);
+  }
+
+  // Versions for offsets.
+  bool VerifyOffset(const Verifier &verifier, voffset_t field) const {
+    auto field_offset = GetOptionalFieldOffset(field);
+    return !field_offset || verifier.VerifyOffset(data_, field_offset);
+  }
+
+  bool VerifyOffsetRequired(const Verifier &verifier, voffset_t field) const {
+    auto field_offset = GetOptionalFieldOffset(field);
+    return verifier.Check(field_offset != 0) &&
+           verifier.VerifyOffset(data_, field_offset);
+  }
+
+ private:
+  // private constructor & copy constructor: you obtain instances of this
+  // class by pointing to existing data only
+  Table();
+  Table(const Table &other);
+  Table &operator=(const Table &);
+
+  uint8_t data_[1];
+};
+
+// This specialization allows avoiding warnings like:
+// MSVC C4800: type: forcing value to bool 'true' or 'false'.
+template<>
+inline flatbuffers::Optional<bool> Table::GetOptional<uint8_t, bool>(
+    voffset_t field) const {
+  auto field_offset = GetOptionalFieldOffset(field);
+  auto p = data_ + field_offset;
+  return field_offset ? Optional<bool>(ReadScalar<uint8_t>(p) != 0)
+                      : Optional<bool>();
+}
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_TABLE_H_
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/util.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_util.h
similarity index 93%
rename from edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/util.h
rename to edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_util.h
index ca92965..10138be 100644
--- a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/util.h
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_util.h
@@ -17,19 +17,26 @@
 #ifndef FLATBUFFERS_UTIL_H_
 #define FLATBUFFERS_UTIL_H_
 
+#include <ctype.h>
 #include <errno.h>
 
-#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/base.h"
-#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/stl_emulation.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h"
 
+// For TFLM we always want to use FLATBUFFERS_PREFER_PRINTF=1. See
+// http://b/211811553 for more context.
 #ifndef FLATBUFFERS_PREFER_PRINTF
+#define FLATBUFFERS_PREFER_PRINTF 1
+#endif
+
+#ifndef FLATBUFFERS_PREFER_PRINTF
+#  include <iomanip>
 #  include <sstream>
 #else  // FLATBUFFERS_PREFER_PRINTF
 #  include <float.h>
 #  include <stdio.h>
 #endif  // FLATBUFFERS_PREFER_PRINTF
 
-#include <iomanip>
 #include <string>
 
 namespace flatbuffers {
@@ -51,6 +58,9 @@ inline bool is_alpha(char c) {
   return check_ascii_range(c & 0xDF, 'a' & 0xDF, 'z' & 0xDF);
 }
 
+// Check for uppercase alpha
+inline bool is_alpha_upper(char c) { return check_ascii_range(c, 'A', 'Z'); }
+
 // Check (case-insensitive) that `c` is equal to alpha.
 inline bool is_alpha_char(char c, char alpha) {
   FLATBUFFERS_ASSERT(is_alpha(alpha));
@@ -91,7 +101,7 @@ template<typename T> size_t IntToDigitCount(T t) {
   // Count a single 0 left of the dot for fractional numbers
   if (-1 < t && t < 1) digit_count++;
   // Count digits until fractional part
-  T eps = std::numeric_limits<float>::epsilon();
+  T eps = std::numeric_limits<T>::epsilon();
   while (t <= (-1 + eps) || (1 - eps) <= t) {
     t /= 10;
     digit_count++;
@@ -142,20 +152,6 @@ template<> inline std::string NumToString<unsigned char>(unsigned char t) {
 template<> inline std::string NumToString<char>(char t) {
   return NumToString(static_cast<int>(t));
 }
-#if defined(FLATBUFFERS_CPP98_STL)
-template<> inline std::string NumToString<long long>(long long t) {
-  char buf[21];  // (log((1 << 63) - 1) / log(10)) + 2
-  snprintf(buf, sizeof(buf), "%lld", t);
-  return std::string(buf);
-}
-
-template<>
-inline std::string NumToString<unsigned long long>(unsigned long long t) {
-  char buf[22];  // (log((1 << 63) - 1) / log(10)) + 1
-  snprintf(buf, sizeof(buf), "%llu", t);
-  return std::string(buf);
-}
-#endif  // defined(FLATBUFFERS_CPP98_STL)
 
 // Special versions for floats/doubles.
 template<typename T> std::string FloatToString(T t, int precision) {
@@ -332,6 +328,9 @@ inline bool StringToFloatImpl(T *val, const char *const str) {
 // - If the converted value falls out of range of corresponding return type, a
 // range error occurs. In this case value MAX(T)/MIN(T) is returned.
 template<typename T> inline bool StringToNumber(const char *s, T *val) {
+  // Assert on `unsigned long` and `signed long` on LP64.
+  // If it is necessary, it could be solved with flatbuffers::enable_if<B,T>.
+  static_assert(sizeof(T) < sizeof(int64_t), "unexpected type T");
   FLATBUFFERS_ASSERT(s && val);
   int64_t i64;
   // The errno check isn't needed, will return MAX/MIN on overflow.
@@ -455,6 +454,9 @@ std::string StripPath(const std::string &filepath);
 // Strip the last component of the path + separator.
 std::string StripFileName(const std::string &filepath);
 
+std::string StripPrefix(const std::string &filepath,
+                        const std::string &prefix_to_remove);
+
 // Concatenates a path with a filename, regardless of whether the path
 // ends in a separator or not.
 std::string ConCatPathFileName(const std::string &path,
@@ -462,6 +464,7 @@ std::string ConCatPathFileName(const std::string &path,
 
 // Replaces any '\\' separators with '/'
 std::string PosixPath(const char *path);
+std::string PosixPath(const std::string &path);
 
 // This function ensure a directory exists, by recursively
 // creating dirs for any parts of the path that don't exist yet.
@@ -471,6 +474,10 @@ void EnsureDirExists(const std::string &filepath);
 // Returns the input path if the absolute path couldn't be resolved.
 std::string AbsolutePath(const std::string &filepath);
 
+// Returns files relative to the --project_root path, prefixed with `//`.
+std::string RelativeToRootPath(const std::string &project,
+                               const std::string &filepath);
+
 // To and from UTF-8 unicode conversion functions
 
 // Convert a unicode code point into a UTF-8 representation by appending it
@@ -687,6 +694,32 @@ bool ReadEnvironmentVariable(const char *var_name,
 // MSVC specific: Send all assert reports to STDOUT to prevent CI hangs.
 void SetupDefaultCRTReportMode();
 
+enum class Case {
+  kUnknown = 0,
+  // TheQuickBrownFox
+  kUpperCamel = 1,
+  // theQuickBrownFox
+  kLowerCamel = 2,
+  // the_quick_brown_fox
+  kSnake = 3,
+  // THE_QUICK_BROWN_FOX
+  kScreamingSnake = 4,
+  // THEQUICKBROWNFOX
+  kAllUpper = 5,
+  // thequickbrownfox
+  kAllLower = 6,
+  // the-quick-brown-fox
+  kDasher = 7,
+  // THEQuiCKBr_ownFox (or whatever you want, we won't change it)
+  kKeep = 8,
+  // the_quick_brown_fox123 (as opposed to the_quick_brown_fox_123)
+  kSnake2 = 9,
+};
+
+// Convert the `input` string of case `input_case` to the specified `output_case`.
+std::string ConvertCase(const std::string &input, Case output_case,
+                    Case input_case = Case::kSnake);
+
 }  // namespace flatbuffers
 
 #endif  // FLATBUFFERS_UTIL_H_
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h
new file mode 100644
index 0000000..81f583b
--- /dev/null
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h
@@ -0,0 +1,389 @@
+/*
+ * Copyright 2021 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_VECTOR_H_
+#define FLATBUFFERS_VECTOR_H_
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h"
+
+namespace flatbuffers {
+
+struct String;
+
+// An STL compatible iterator implementation for Vector below, effectively
+// calling Get() for every element.
+template<typename T, typename IT> struct VectorIterator {
+  typedef std::random_access_iterator_tag iterator_category;
+  typedef IT value_type;
+  typedef ptrdiff_t difference_type;
+  typedef IT *pointer;
+  typedef IT &reference;
+
+  VectorIterator(const uint8_t *data, uoffset_t i)
+      : data_(data + IndirectHelper<T>::element_stride * i) {}
+  VectorIterator(const VectorIterator &other) : data_(other.data_) {}
+  VectorIterator() : data_(nullptr) {}
+
+  VectorIterator &operator=(const VectorIterator &other) {
+    data_ = other.data_;
+    return *this;
+  }
+
+  VectorIterator &operator=(VectorIterator &&other) {
+    data_ = other.data_;
+    return *this;
+  }
+
+  bool operator==(const VectorIterator &other) const {
+    return data_ == other.data_;
+  }
+
+  bool operator<(const VectorIterator &other) const {
+    return data_ < other.data_;
+  }
+
+  bool operator!=(const VectorIterator &other) const {
+    return data_ != other.data_;
+  }
+
+  difference_type operator-(const VectorIterator &other) const {
+    return (data_ - other.data_) / IndirectHelper<T>::element_stride;
+  }
+
+  // Note: return type is incompatible with the standard
+  // `reference operator*()`.
+  IT operator*() const { return IndirectHelper<T>::Read(data_, 0); }
+
+  // Note: return type is incompatible with the standard
+  // `pointer operator->()`.
+  IT operator->() const { return IndirectHelper<T>::Read(data_, 0); }
+
+  VectorIterator &operator++() {
+    data_ += IndirectHelper<T>::element_stride;
+    return *this;
+  }
+
+  VectorIterator operator++(int) {
+    VectorIterator temp(data_, 0);
+    data_ += IndirectHelper<T>::element_stride;
+    return temp;
+  }
+
+  VectorIterator operator+(const uoffset_t &offset) const {
+    return VectorIterator(data_ + offset * IndirectHelper<T>::element_stride,
+                          0);
+  }
+
+  VectorIterator &operator+=(const uoffset_t &offset) {
+    data_ += offset * IndirectHelper<T>::element_stride;
+    return *this;
+  }
+
+  VectorIterator &operator--() {
+    data_ -= IndirectHelper<T>::element_stride;
+    return *this;
+  }
+
+  VectorIterator operator--(int) {
+    VectorIterator temp(data_, 0);
+    data_ -= IndirectHelper<T>::element_stride;
+    return temp;
+  }
+
+  VectorIterator operator-(const uoffset_t &offset) const {
+    return VectorIterator(data_ - offset * IndirectHelper<T>::element_stride,
+                          0);
+  }
+
+  VectorIterator &operator-=(const uoffset_t &offset) {
+    data_ -= offset * IndirectHelper<T>::element_stride;
+    return *this;
+  }
+
+ private:
+  const uint8_t *data_;
+};
+
+template<typename Iterator>
+struct VectorReverseIterator : public std::reverse_iterator<Iterator> {
+  explicit VectorReverseIterator(Iterator iter)
+      : std::reverse_iterator<Iterator>(iter) {}
+
+  // Note: return type is incompatible with the standard
+  // `reference operator*()`.
+  typename Iterator::value_type operator*() const {
+    auto tmp = std::reverse_iterator<Iterator>::current;
+    return *--tmp;
+  }
+
+  // Note: return type is incompatible with the standard
+  // `pointer operator->()`.
+  typename Iterator::value_type operator->() const {
+    auto tmp = std::reverse_iterator<Iterator>::current;
+    return *--tmp;
+  }
+};
+
+// This is used as a helper type for accessing vectors.
+// Vector::data() assumes the vector elements start after the length field.
+template<typename T> class Vector {
+ public:
+  typedef VectorIterator<T, typename IndirectHelper<T>::mutable_return_type>
+      iterator;
+  typedef VectorIterator<T, typename IndirectHelper<T>::return_type>
+      const_iterator;
+  typedef VectorReverseIterator<iterator> reverse_iterator;
+  typedef VectorReverseIterator<const_iterator> const_reverse_iterator;
+
+  typedef typename flatbuffers::bool_constant<flatbuffers::is_scalar<T>::value>
+      scalar_tag;
+
+  static FLATBUFFERS_CONSTEXPR bool is_span_observable =
+      scalar_tag::value && (FLATBUFFERS_LITTLEENDIAN || sizeof(T) == 1);
+
+  uoffset_t size() const { return EndianScalar(length_); }
+
+  // Deprecated: use size(). Here for backwards compatibility.
+  FLATBUFFERS_ATTRIBUTE([[deprecated("use size() instead")]])
+  uoffset_t Length() const { return size(); }
+
+  typedef typename IndirectHelper<T>::return_type return_type;
+  typedef typename IndirectHelper<T>::mutable_return_type mutable_return_type;
+  typedef return_type value_type;
+
+  return_type Get(uoffset_t i) const {
+    FLATBUFFERS_ASSERT(i < size());
+    return IndirectHelper<T>::Read(Data(), i);
+  }
+
+  return_type operator[](uoffset_t i) const { return Get(i); }
+
+  // If this is a Vector of enums, T will be its storage type, not the enum
+  // type. This function makes it convenient to retrieve value with enum
+  // type E.
+  template<typename E> E GetEnum(uoffset_t i) const {
+    return static_cast<E>(Get(i));
+  }
+
+  // If this a vector of unions, this does the cast for you. There's no check
+  // to make sure this is the right type!
+  template<typename U> const U *GetAs(uoffset_t i) const {
+    return reinterpret_cast<const U *>(Get(i));
+  }
+
+  // If this a vector of unions, this does the cast for you. There's no check
+  // to make sure this is actually a string!
+  const String *GetAsString(uoffset_t i) const {
+    return reinterpret_cast<const String *>(Get(i));
+  }
+
+  const void *GetStructFromOffset(size_t o) const {
+    return reinterpret_cast<const void *>(Data() + o);
+  }
+
+  iterator begin() { return iterator(Data(), 0); }
+  const_iterator begin() const { return const_iterator(Data(), 0); }
+
+  iterator end() { return iterator(Data(), size()); }
+  const_iterator end() const { return const_iterator(Data(), size()); }
+
+  reverse_iterator rbegin() { return reverse_iterator(end()); }
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(end());
+  }
+
+  reverse_iterator rend() { return reverse_iterator(begin()); }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(begin());
+  }
+
+  const_iterator cbegin() const { return begin(); }
+
+  const_iterator cend() const { return end(); }
+
+  const_reverse_iterator crbegin() const { return rbegin(); }
+
+  const_reverse_iterator crend() const { return rend(); }
+
+  // Change elements if you have a non-const pointer to this object.
+  // Scalars only. See reflection.h, and the documentation.
+  void Mutate(uoffset_t i, const T &val) {
+    FLATBUFFERS_ASSERT(i < size());
+    WriteScalar(data() + i, val);
+  }
+
+  // Change an element of a vector of tables (or strings).
+  // "val" points to the new table/string, as you can obtain from
+  // e.g. reflection::AddFlatBuffer().
+  void MutateOffset(uoffset_t i, const uint8_t *val) {
+    FLATBUFFERS_ASSERT(i < size());
+    static_assert(sizeof(T) == sizeof(uoffset_t), "Unrelated types");
+    WriteScalar(data() + i,
+                static_cast<uoffset_t>(val - (Data() + i * sizeof(uoffset_t))));
+  }
+
+  // Get a mutable pointer to tables/strings inside this vector.
+  mutable_return_type GetMutableObject(uoffset_t i) const {
+    FLATBUFFERS_ASSERT(i < size());
+    return const_cast<mutable_return_type>(IndirectHelper<T>::Read(Data(), i));
+  }
+
+  // The raw data in little endian format. Use with care.
+  const uint8_t *Data() const {
+    return reinterpret_cast<const uint8_t *>(&length_ + 1);
+  }
+
+  uint8_t *Data() { return reinterpret_cast<uint8_t *>(&length_ + 1); }
+
+  // Similarly, but typed, much like std::vector::data
+  const T *data() const { return reinterpret_cast<const T *>(Data()); }
+  T *data() { return reinterpret_cast<T *>(Data()); }
+
+  template<typename K> return_type LookupByKey(K key) const {
+    void *search_result = std::bsearch(
+        &key, Data(), size(), IndirectHelper<T>::element_stride, KeyCompare<K>);
+
+    if (!search_result) {
+      return nullptr;  // Key not found.
+    }
+
+    const uint8_t *element = reinterpret_cast<const uint8_t *>(search_result);
+
+    return IndirectHelper<T>::Read(element, 0);
+  }
+
+  template<typename K> mutable_return_type MutableLookupByKey(K key) {
+    return const_cast<mutable_return_type>(LookupByKey(key));
+  }
+
+ protected:
+  // This class is only used to access pre-existing data. Don't ever
+  // try to construct these manually.
+  Vector();
+
+  uoffset_t length_;
+
+ private:
+  // This class is a pointer. Copying will therefore create an invalid object.
+  // Private and unimplemented copy constructor.
+  Vector(const Vector &);
+  Vector &operator=(const Vector &);
+
+  template<typename K> static int KeyCompare(const void *ap, const void *bp) {
+    const K *key = reinterpret_cast<const K *>(ap);
+    const uint8_t *data = reinterpret_cast<const uint8_t *>(bp);
+    auto table = IndirectHelper<T>::Read(data, 0);
+
+    // std::bsearch compares with the operands transposed, so we negate the
+    // result here.
+    return -table->KeyCompareWithValue(*key);
+  }
+};
+
+template<class U>
+FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span<U> make_span(Vector<U> &vec)
+    FLATBUFFERS_NOEXCEPT {
+  static_assert(Vector<U>::is_span_observable,
+                "wrong type U, only LE-scalar, or byte types are allowed");
+  return span<U>(vec.data(), vec.size());
+}
+
+template<class U>
+FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span<const U> make_span(
+    const Vector<U> &vec) FLATBUFFERS_NOEXCEPT {
+  static_assert(Vector<U>::is_span_observable,
+                "wrong type U, only LE-scalar, or byte types are allowed");
+  return span<const U>(vec.data(), vec.size());
+}
+
+template<class U>
+FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span<uint8_t> make_bytes_span(
+    Vector<U> &vec) FLATBUFFERS_NOEXCEPT {
+  static_assert(Vector<U>::scalar_tag::value,
+                "wrong type U, only LE-scalar, or byte types are allowed");
+  return span<uint8_t>(vec.Data(), vec.size() * sizeof(U));
+}
+
+template<class U>
+FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span<const uint8_t> make_bytes_span(
+    const Vector<U> &vec) FLATBUFFERS_NOEXCEPT {
+  static_assert(Vector<U>::scalar_tag::value,
+                "wrong type U, only LE-scalar, or byte types are allowed");
+  return span<const uint8_t>(vec.Data(), vec.size() * sizeof(U));
+}
+
+// Convenient helper functions to get a span of any vector, regardless
+// of whether it is null or not (the field is not set).
+template<class U>
+FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span<U> make_span(Vector<U> *ptr)
+    FLATBUFFERS_NOEXCEPT {
+  static_assert(Vector<U>::is_span_observable,
+                "wrong type U, only LE-scalar, or byte types are allowed");
+  return ptr ? make_span(*ptr) : span<U>();
+}
+
+template<class U>
+FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span<const U> make_span(
+    const Vector<U> *ptr) FLATBUFFERS_NOEXCEPT {
+  static_assert(Vector<U>::is_span_observable,
+                "wrong type U, only LE-scalar, or byte types are allowed");
+  return ptr ? make_span(*ptr) : span<const U>();
+}
+
+// Represent a vector much like the template above, but in this case we
+// don't know what the element types are (used with reflection.h).
+class VectorOfAny {
+ public:
+  uoffset_t size() const { return EndianScalar(length_); }
+
+  const uint8_t *Data() const {
+    return reinterpret_cast<const uint8_t *>(&length_ + 1);
+  }
+  uint8_t *Data() { return reinterpret_cast<uint8_t *>(&length_ + 1); }
+
+ protected:
+  VectorOfAny();
+
+  uoffset_t length_;
+
+ private:
+  VectorOfAny(const VectorOfAny &);
+  VectorOfAny &operator=(const VectorOfAny &);
+};
+
+template<typename T, typename U>
+Vector<Offset<T>> *VectorCast(Vector<Offset<U>> *ptr) {
+  static_assert(std::is_base_of<T, U>::value, "Unrelated types");
+  return reinterpret_cast<Vector<Offset<T>> *>(ptr);
+}
+
+template<typename T, typename U>
+const Vector<Offset<T>> *VectorCast(const Vector<Offset<U>> *ptr) {
+  static_assert(std::is_base_of<T, U>::value, "Unrelated types");
+  return reinterpret_cast<const Vector<Offset<T>> *>(ptr);
+}
+
+// Convenient helper function to get the length of any vector, regardless
+// of whether it is null or not (the field is not set).
+template<typename T> static inline size_t VectorLength(const Vector<T> *v) {
+  return v ? v->size() : 0;
+}
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_VERIFIER_H_
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector_downward.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector_downward.h
new file mode 100644
index 0000000..6ff86a9
--- /dev/null
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector_downward.h
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2021 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_VECTOR_DOWNWARD_H_
+#define FLATBUFFERS_VECTOR_DOWNWARD_H_
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_default_allocator.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_detached_buffer.h"
+
+namespace flatbuffers {
+
+// This is a minimal replication of std::vector<uint8_t> functionality,
+// except growing from higher to lower addresses. i.e push_back() inserts data
+// in the lowest address in the vector.
+// Since this vector leaves the lower part unused, we support a "scratch-pad"
+// that can be stored there for temporary data, to share the allocated space.
+// Essentially, this supports 2 std::vectors in a single buffer.
+class vector_downward {
+ public:
+  explicit vector_downward(size_t initial_size, Allocator *allocator,
+                           bool own_allocator, size_t buffer_minalign)
+      : allocator_(allocator),
+        own_allocator_(own_allocator),
+        initial_size_(initial_size),
+        buffer_minalign_(buffer_minalign),
+        reserved_(0),
+        size_(0),
+        buf_(nullptr),
+        cur_(nullptr),
+        scratch_(nullptr) {}
+
+  vector_downward(vector_downward &&other)
+      // clang-format on
+      : allocator_(other.allocator_),
+        own_allocator_(other.own_allocator_),
+        initial_size_(other.initial_size_),
+        buffer_minalign_(other.buffer_minalign_),
+        reserved_(other.reserved_),
+        size_(other.size_),
+        buf_(other.buf_),
+        cur_(other.cur_),
+        scratch_(other.scratch_) {
+    // No change in other.allocator_
+    // No change in other.initial_size_
+    // No change in other.buffer_minalign_
+    other.own_allocator_ = false;
+    other.reserved_ = 0;
+    other.buf_ = nullptr;
+    other.cur_ = nullptr;
+    other.scratch_ = nullptr;
+  }
+
+  vector_downward &operator=(vector_downward &&other) {
+    // Move construct a temporary and swap idiom
+    vector_downward temp(std::move(other));
+    swap(temp);
+    return *this;
+  }
+
+  ~vector_downward() {
+    clear_buffer();
+    clear_allocator();
+  }
+
+  void reset() {
+    clear_buffer();
+    clear();
+  }
+
+  void clear() {
+    if (buf_) {
+      cur_ = buf_ + reserved_;
+    } else {
+      reserved_ = 0;
+      cur_ = nullptr;
+    }
+    size_ = 0;
+    clear_scratch();
+  }
+
+  void clear_scratch() { scratch_ = buf_; }
+
+  void clear_allocator() {
+    if (own_allocator_ && allocator_) { delete allocator_; }
+    allocator_ = nullptr;
+    own_allocator_ = false;
+  }
+
+  void clear_buffer() {
+    if (buf_) Deallocate(allocator_, buf_, reserved_);
+    buf_ = nullptr;
+  }
+
+  // Relinquish the pointer to the caller.
+  uint8_t *release_raw(size_t &allocated_bytes, size_t &offset) {
+    auto *buf = buf_;
+    allocated_bytes = reserved_;
+    offset = static_cast<size_t>(cur_ - buf_);
+
+    // release_raw only relinquishes the buffer ownership.
+    // Does not deallocate or reset the allocator. Destructor will do that.
+    buf_ = nullptr;
+    clear();
+    return buf;
+  }
+
+  // Relinquish the pointer to the caller.
+  DetachedBuffer release() {
+    // allocator ownership (if any) is transferred to DetachedBuffer.
+    DetachedBuffer fb(allocator_, own_allocator_, buf_, reserved_, cur_,
+                      size());
+    if (own_allocator_) {
+      allocator_ = nullptr;
+      own_allocator_ = false;
+    }
+    buf_ = nullptr;
+    clear();
+    return fb;
+  }
+
+  size_t ensure_space(size_t len) {
+    FLATBUFFERS_ASSERT(cur_ >= scratch_ && scratch_ >= buf_);
+    if (len > static_cast<size_t>(cur_ - scratch_)) { reallocate(len); }
+    // Beyond this, signed offsets may not have enough range:
+    // (FlatBuffers > 2GB not supported).
+    FLATBUFFERS_ASSERT(size() < FLATBUFFERS_MAX_BUFFER_SIZE);
+    return len;
+  }
+
+  inline uint8_t *make_space(size_t len) {
+    if (len) {
+      ensure_space(len);
+      cur_ -= len;
+      size_ += static_cast<uoffset_t>(len);
+    }
+    return cur_;
+  }
+
+  // Returns nullptr if using the DefaultAllocator.
+  Allocator *get_custom_allocator() { return allocator_; }
+
+  inline uoffset_t size() const { return size_; }
+
+  uoffset_t scratch_size() const {
+    return static_cast<uoffset_t>(scratch_ - buf_);
+  }
+
+  size_t capacity() const { return reserved_; }
+
+  uint8_t *data() const {
+    FLATBUFFERS_ASSERT(cur_);
+    return cur_;
+  }
+
+  uint8_t *scratch_data() const {
+    FLATBUFFERS_ASSERT(buf_);
+    return buf_;
+  }
+
+  uint8_t *scratch_end() const {
+    FLATBUFFERS_ASSERT(scratch_);
+    return scratch_;
+  }
+
+  uint8_t *data_at(size_t offset) const { return buf_ + reserved_ - offset; }
+
+  void push(const uint8_t *bytes, size_t num) {
+    if (num > 0) { memcpy(make_space(num), bytes, num); }
+  }
+
+  // Specialized version of push() that avoids memcpy call for small data.
+  template<typename T> void push_small(const T &little_endian_t) {
+    make_space(sizeof(T));
+    *reinterpret_cast<T *>(cur_) = little_endian_t;
+  }
+
+  template<typename T> void scratch_push_small(const T &t) {
+    ensure_space(sizeof(T));
+    *reinterpret_cast<T *>(scratch_) = t;
+    scratch_ += sizeof(T);
+  }
+
+  // fill() is most frequently called with small byte counts (<= 4),
+  // which is why we're using loops rather than calling memset.
+  void fill(size_t zero_pad_bytes) {
+    make_space(zero_pad_bytes);
+    for (size_t i = 0; i < zero_pad_bytes; i++) cur_[i] = 0;
+  }
+
+  // Version for when we know the size is larger.
+  // Precondition: zero_pad_bytes > 0
+  void fill_big(size_t zero_pad_bytes) {
+    memset(make_space(zero_pad_bytes), 0, zero_pad_bytes);
+  }
+
+  void pop(size_t bytes_to_remove) {
+    cur_ += bytes_to_remove;
+    size_ -= static_cast<uoffset_t>(bytes_to_remove);
+  }
+
+  void scratch_pop(size_t bytes_to_remove) { scratch_ -= bytes_to_remove; }
+
+  void swap(vector_downward &other) {
+    using std::swap;
+    swap(allocator_, other.allocator_);
+    swap(own_allocator_, other.own_allocator_);
+    swap(initial_size_, other.initial_size_);
+    swap(buffer_minalign_, other.buffer_minalign_);
+    swap(reserved_, other.reserved_);
+    swap(size_, other.size_);
+    swap(buf_, other.buf_);
+    swap(cur_, other.cur_);
+    swap(scratch_, other.scratch_);
+  }
+
+  void swap_allocator(vector_downward &other) {
+    using std::swap;
+    swap(allocator_, other.allocator_);
+    swap(own_allocator_, other.own_allocator_);
+  }
+
+ private:
+  // You shouldn't really be copying instances of this class.
+  FLATBUFFERS_DELETE_FUNC(vector_downward(const vector_downward &));
+  FLATBUFFERS_DELETE_FUNC(vector_downward &operator=(const vector_downward &));
+
+  Allocator *allocator_;
+  bool own_allocator_;
+  size_t initial_size_;
+  size_t buffer_minalign_;
+  size_t reserved_;
+  uoffset_t size_;
+  uint8_t *buf_;
+  uint8_t *cur_;  // Points at location between empty (below) and used (above).
+  uint8_t *scratch_;  // Points to the end of the scratchpad in use.
+
+  void reallocate(size_t len) {
+    auto old_reserved = reserved_;
+    auto old_size = size();
+    auto old_scratch_size = scratch_size();
+    reserved_ +=
+        (std::max)(len, old_reserved ? old_reserved / 2 : initial_size_);
+    reserved_ = (reserved_ + buffer_minalign_ - 1) & ~(buffer_minalign_ - 1);
+    if (buf_) {
+      buf_ = ReallocateDownward(allocator_, buf_, old_reserved, reserved_,
+                                old_size, old_scratch_size);
+    } else {
+      buf_ = Allocate(allocator_, reserved_);
+    }
+    cur_ = buf_ + reserved_ - old_size;
+    scratch_ = buf_ + old_scratch_size;
+  }
+};
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_VECTOR_DOWNWARD_H_
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_verifier.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_verifier.h
new file mode 100644
index 0000000..5f13e27
--- /dev/null
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_verifier.h
@@ -0,0 +1,304 @@
+/*
+ * Copyright 2021 Google Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FLATBUFFERS_VERIFIER_H_
+#define FLATBUFFERS_VERIFIER_H_
+
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h"
+
+namespace flatbuffers {
+
+// Helper class to verify the integrity of a FlatBuffer
+class Verifier FLATBUFFERS_FINAL_CLASS {
+ public:
+  Verifier(const uint8_t *const buf, const size_t buf_len,
+           const uoffset_t _max_depth = 64,
+           const uoffset_t _max_tables = 1000000,
+           const bool _check_alignment = true)
+      : buf_(buf),
+        size_(buf_len),
+        max_depth_(_max_depth),
+        max_tables_(_max_tables),
+        check_alignment_(_check_alignment),
+        upper_bound_(0),
+        depth_(0),
+        num_tables_(0),
+        flex_reuse_tracker_(nullptr) {
+    FLATBUFFERS_ASSERT(size_ < FLATBUFFERS_MAX_BUFFER_SIZE);
+  }
+
+  // Central location where any verification failures register.
+  bool Check(const bool ok) const {
+    // clang-format off
+    #ifdef FLATBUFFERS_DEBUG_VERIFICATION_FAILURE
+      FLATBUFFERS_ASSERT(ok);
+    #endif
+    #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE
+      if (!ok)
+        upper_bound_ = 0;
+    #endif
+    // clang-format on
+    return ok;
+  }
+
+  // Verify any range within the buffer.
+  bool Verify(const size_t elem, const size_t elem_len) const {
+    // clang-format off
+    #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE
+      auto upper_bound = elem + elem_len;
+      if (upper_bound_ < upper_bound)
+        upper_bound_ =  upper_bound;
+    #endif
+    // clang-format on
+    return Check(elem_len < size_ && elem <= size_ - elem_len);
+  }
+
+  bool VerifyAlignment(const size_t elem, const size_t align) const {
+    return Check((elem & (align - 1)) == 0 || !check_alignment_);
+  }
+
+  // Verify a range indicated by sizeof(T).
+  template<typename T> bool Verify(const size_t elem) const {
+    return VerifyAlignment(elem, sizeof(T)) && Verify(elem, sizeof(T));
+  }
+
+  bool VerifyFromPointer(const uint8_t *const p, const size_t len) {
+    return Verify(static_cast<size_t>(p - buf_), len);
+  }
+
+  // Verify relative to a known-good base pointer.
+  bool VerifyFieldStruct(const uint8_t *const base, const voffset_t elem_off,
+                         const size_t elem_len, const size_t align) const {
+    const auto f = static_cast<size_t>(base - buf_) + elem_off;
+    return VerifyAlignment(f, align) && Verify(f, elem_len);
+  }
+
+  template<typename T>
+  bool VerifyField(const uint8_t *const base, const voffset_t elem_off,
+                   const size_t align) const {
+    const auto f = static_cast<size_t>(base - buf_) + elem_off;
+    return VerifyAlignment(f, align) && Verify(f, sizeof(T));
+  }
+
+  // Verify a pointer (may be NULL) of a table type.
+  template<typename T> bool VerifyTable(const T *const table) {
+    return !table || table->Verify(*this);
+  }
+
+  // Verify a pointer (may be NULL) of any vector type.
+  template<typename T> bool VerifyVector(const Vector<T> *const vec) const {
+    return !vec || VerifyVectorOrString(reinterpret_cast<const uint8_t *>(vec),
+                                        sizeof(T));
+  }
+
+  // Verify a pointer (may be NULL) of a vector to struct.
+  template<typename T>
+  bool VerifyVector(const Vector<const T *> *const vec) const {
+    return VerifyVector(reinterpret_cast<const Vector<T> *>(vec));
+  }
+
+  // Verify a pointer (may be NULL) to string.
+  bool VerifyString(const String *const str) const {
+    size_t end;
+    return !str || (VerifyVectorOrString(reinterpret_cast<const uint8_t *>(str),
+                                         1, &end) &&
+                    Verify(end, 1) &&           // Must have terminator
+                    Check(buf_[end] == '\0'));  // Terminating byte must be 0.
+  }
+
+  // Common code between vectors and strings.
+  bool VerifyVectorOrString(const uint8_t *const vec, const size_t elem_size,
+                            size_t *const end = nullptr) const {
+    const auto veco = static_cast<size_t>(vec - buf_);
+    // Check we can read the size field.
+    if (!Verify<uoffset_t>(veco)) return false;
+    // Check the whole array. If this is a string, the byte past the array
+    // must be 0.
+    const auto size = ReadScalar<uoffset_t>(vec);
+    const auto max_elems = FLATBUFFERS_MAX_BUFFER_SIZE / elem_size;
+    if (!Check(size < max_elems))
+      return false;  // Protect against byte_size overflowing.
+    const auto byte_size = sizeof(size) + elem_size * size;
+    if (end) *end = veco + byte_size;
+    return Verify(veco, byte_size);
+  }
+
+  // Special case for string contents, after the above has been called.
+  bool VerifyVectorOfStrings(const Vector<Offset<String>> *const vec) const {
+    if (vec) {
+      for (uoffset_t i = 0; i < vec->size(); i++) {
+        if (!VerifyString(vec->Get(i))) return false;
+      }
+    }
+    return true;
+  }
+
+  // Special case for table contents, after the above has been called.
+  template<typename T>
+  bool VerifyVectorOfTables(const Vector<Offset<T>> *const vec) {
+    if (vec) {
+      for (uoffset_t i = 0; i < vec->size(); i++) {
+        if (!vec->Get(i)->Verify(*this)) return false;
+      }
+    }
+    return true;
+  }
+
+  __supress_ubsan__("unsigned-integer-overflow") bool VerifyTableStart(
+      const uint8_t *const table) {
+    // Check the vtable offset.
+    const auto tableo = static_cast<size_t>(table - buf_);
+    if (!Verify<soffset_t>(tableo)) return false;
+    // This offset may be signed, but doing the subtraction unsigned always
+    // gives the result we want.
+    const auto vtableo =
+        tableo - static_cast<size_t>(ReadScalar<soffset_t>(table));
+    // Check the vtable size field, then check vtable fits in its entirety.
+    if (!(VerifyComplexity() && Verify<voffset_t>(vtableo) &&
+          VerifyAlignment(ReadScalar<voffset_t>(buf_ + vtableo),
+                          sizeof(voffset_t))))
+      return false;
+    const auto vsize = ReadScalar<voffset_t>(buf_ + vtableo);
+    return Check((vsize & 1) == 0) && Verify(vtableo, vsize);
+  }
+
+  template<typename T>
+  bool VerifyBufferFromStart(const char *const identifier, const size_t start) {
+    // Buffers have to be of some size to be valid. The reason it is a runtime
+    // check instead of static_assert, is that nested flatbuffers go through
+    // this call and their size is determined at runtime.
+    if (!Check(size_ >= FLATBUFFERS_MIN_BUFFER_SIZE)) return false;
+
+    // If an identifier is provided, check that we have a buffer
+    if (identifier && !Check((size_ >= 2 * sizeof(flatbuffers::uoffset_t) &&
+                              BufferHasIdentifier(buf_ + start, identifier)))) {
+      return false;
+    }
+
+    // Call T::Verify, which must be in the generated code for this type.
+    const auto o = VerifyOffset(start);
+    return Check(o != 0) &&
+           reinterpret_cast<const T *>(buf_ + start + o)->Verify(*this)
+    // clang-format off
+    #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE
+           && GetComputedSize()
+    #endif
+        ;
+    // clang-format on
+  }
+
+  template<typename T>
+  bool VerifyNestedFlatBuffer(const Vector<uint8_t> *const buf,
+                              const char *const identifier) {
+    // An empty buffer is OK as it indicates not present.
+    if (!buf) return true;
+
+    // If there is a nested buffer, it must be greater than the min size.
+    if(!Check(buf->size() >= FLATBUFFERS_MIN_BUFFER_SIZE)) return false;
+
+    Verifier nested_verifier(buf->data(), buf->size());
+    return nested_verifier.VerifyBuffer<T>(identifier);
+  }
+
+  // Verify this whole buffer, starting with root type T.
+  template<typename T> bool VerifyBuffer() { return VerifyBuffer<T>(nullptr); }
+
+  template<typename T> bool VerifyBuffer(const char *const identifier) {
+    return VerifyBufferFromStart<T>(identifier, 0);
+  }
+
+  template<typename T>
+  bool VerifySizePrefixedBuffer(const char *const identifier) {
+    return Verify<uoffset_t>(0U) &&
+           Check(ReadScalar<uoffset_t>(buf_) == size_ - sizeof(uoffset_t)) &&
+           VerifyBufferFromStart<T>(identifier, sizeof(uoffset_t));
+  }
+
+  uoffset_t VerifyOffset(const size_t start) const {
+    if (!Verify<uoffset_t>(start)) return 0;
+    const auto o = ReadScalar<uoffset_t>(buf_ + start);
+    // May not point to itself.
+    if (!Check(o != 0)) return 0;
+    // Can't wrap around / buffers are max 2GB.
+    if (!Check(static_cast<soffset_t>(o) >= 0)) return 0;
+    // Must be inside the buffer to create a pointer from it (pointer outside
+    // buffer is UB).
+    if (!Verify(start + o, 1)) return 0;
+    return o;
+  }
+
+  uoffset_t VerifyOffset(const uint8_t *const base,
+                         const voffset_t start) const {
+    return VerifyOffset(static_cast<size_t>(base - buf_) + start);
+  }
+
+  // Called at the start of a table to increase counters measuring data
+  // structure depth and amount, and possibly bails out with false if
+  // limits set by the constructor have been hit. Needs to be balanced
+  // with EndTable().
+  bool VerifyComplexity() {
+    depth_++;
+    num_tables_++;
+    return Check(depth_ <= max_depth_ && num_tables_ <= max_tables_);
+  }
+
+  // Called at the end of a table to pop the depth count.
+  bool EndTable() {
+    depth_--;
+    return true;
+  }
+
+  // Returns the message size in bytes
+  size_t GetComputedSize() const {
+    // clang-format off
+    #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE
+      uintptr_t size = upper_bound_;
+      // Align the size to uoffset_t
+      size = (size - 1 + sizeof(uoffset_t)) & ~(sizeof(uoffset_t) - 1);
+      return (size > size_) ?  0 : size;
+    #else
+      // Must turn on FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE for this to work.
+      (void)upper_bound_;
+      FLATBUFFERS_ASSERT(false);
+      return 0;
+    #endif
+    // clang-format on
+  }
+
+  std::vector<uint8_t> *GetFlexReuseTracker() { return flex_reuse_tracker_; }
+
+  void SetFlexReuseTracker(std::vector<uint8_t> *const rt) {
+    flex_reuse_tracker_ = rt;
+  }
+
+ private:
+  const uint8_t *buf_;
+  const size_t size_;
+  const uoffset_t max_depth_;
+  const uoffset_t max_tables_;
+  const bool check_alignment_;
+
+  mutable size_t upper_bound_;
+
+  uoffset_t depth_;
+  uoffset_t num_tables_;
+  std::vector<uint8_t> *flex_reuse_tracker_;
+};
+
+}  // namespace flatbuffers
+
+#endif  // FLATBUFFERS_VERIFIER_H_
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h
index 1934169..051a0ed 100644
--- a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flatbuffers.h
@@ -17,2565 +17,23 @@
 #ifndef FLATBUFFERS_H_
 #define FLATBUFFERS_H_
 
-#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/base.h"
-#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/stl_emulation.h"
+// TODO: These includes are for mitigating the pains of users editing their
+// source because they relied on flatbuffers.h to include everything for them.
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_array.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_buffer_ref.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_detached_buffer.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_flatbuffer_builder.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_stl_emulation.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_string.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_struct.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_table.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_vector_downward.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_verifier.h"
 
-#ifndef FLATBUFFERS_CPP98_STL
-  #include <functional>
-#endif
-
-#if defined(FLATBUFFERS_NAN_DEFAULTS)
-#  include <cmath>
-#endif
-
-namespace flatbuffers {
-// Generic 'operator==' with conditional specialisations.
-// T e - new value of a scalar field.
-// T def - default of scalar (is known at compile-time).
-template<typename T> inline bool IsTheSameAs(T e, T def) { return e == def; }
-
-#if defined(FLATBUFFERS_NAN_DEFAULTS) && \
-    defined(FLATBUFFERS_HAS_NEW_STRTOD) && (FLATBUFFERS_HAS_NEW_STRTOD > 0)
-// Like `operator==(e, def)` with weak NaN if T=(float|double).
-template<typename T> inline bool IsFloatTheSameAs(T e, T def) {
-  return (e == def) || ((def != def) && (e != e));
-}
-template<> inline bool IsTheSameAs<float>(float e, float def) {
-  return IsFloatTheSameAs(e, def);
-}
-template<> inline bool IsTheSameAs<double>(double e, double def) {
-  return IsFloatTheSameAs(e, def);
-}
-#endif
-
-// Check 'v' is out of closed range [low; high].
-// Workaround for GCC warning [-Werror=type-limits]:
-// comparison is always true due to limited range of data type.
-template<typename T>
-inline bool IsOutRange(const T &v, const T &low, const T &high) {
-  return (v < low) || (high < v);
-}
-
-// Check 'v' is in closed range [low; high].
-template<typename T>
-inline bool IsInRange(const T &v, const T &low, const T &high) {
-  return !IsOutRange(v, low, high);
-}
-
-// Wrapper for uoffset_t to allow safe template specialization.
-// Value is allowed to be 0 to indicate a null object (see e.g. AddOffset).
-template<typename T> struct Offset {
-  uoffset_t o;
-  Offset() : o(0) {}
-  Offset(uoffset_t _o) : o(_o) {}
-  Offset<void> Union() const { return Offset<void>(o); }
-  bool IsNull() const { return !o; }
-};
-
-inline void EndianCheck() {
-  int endiantest = 1;
-  // If this fails, see FLATBUFFERS_LITTLEENDIAN above.
-  FLATBUFFERS_ASSERT(*reinterpret_cast<char *>(&endiantest) ==
-                     FLATBUFFERS_LITTLEENDIAN);
-  (void)endiantest;
-}
-
-template<typename T> FLATBUFFERS_CONSTEXPR size_t AlignOf() {
-  // clang-format off
-  #ifdef _MSC_VER
-    return __alignof(T);
-  #else
-    #ifndef alignof
-      return __alignof__(T);
-    #else
-      return alignof(T);
-    #endif
-  #endif
-  // clang-format on
-}
-
-// When we read serialized data from memory, in the case of most scalars,
-// we want to just read T, but in the case of Offset, we want to actually
-// perform the indirection and return a pointer.
-// The template specialization below does just that.
-// It is wrapped in a struct since function templates can't overload on the
-// return type like this.
-// The typedef is for the convenience of callers of this function
-// (avoiding the need for a trailing return decltype)
-template<typename T> struct IndirectHelper {
-  typedef T return_type;
-  typedef T mutable_return_type;
-  static const size_t element_stride = sizeof(T);
-  static return_type Read(const uint8_t *p, uoffset_t i) {
-    return EndianScalar((reinterpret_cast<const T *>(p))[i]);
-  }
-};
-template<typename T> struct IndirectHelper<Offset<T>> {
-  typedef const T *return_type;
-  typedef T *mutable_return_type;
-  static const size_t element_stride = sizeof(uoffset_t);
-  static return_type Read(const uint8_t *p, uoffset_t i) {
-    p += i * sizeof(uoffset_t);
-    return reinterpret_cast<return_type>(p + ReadScalar<uoffset_t>(p));
-  }
-};
-template<typename T> struct IndirectHelper<const T *> {
-  typedef const T *return_type;
-  typedef T *mutable_return_type;
-  static const size_t element_stride = sizeof(T);
-  static return_type Read(const uint8_t *p, uoffset_t i) {
-    return reinterpret_cast<const T *>(p + i * sizeof(T));
-  }
-};
-
-// An STL compatible iterator implementation for Vector below, effectively
-// calling Get() for every element.
-template<typename T, typename IT> struct VectorIterator {
-  typedef std::random_access_iterator_tag iterator_category;
-  typedef IT value_type;
-  typedef ptrdiff_t difference_type;
-  typedef IT *pointer;
-  typedef IT &reference;
-
-  VectorIterator(const uint8_t *data, uoffset_t i)
-      : data_(data + IndirectHelper<T>::element_stride * i) {}
-  VectorIterator(const VectorIterator &other) : data_(other.data_) {}
-  VectorIterator() : data_(nullptr) {}
-
-  VectorIterator &operator=(const VectorIterator &other) {
-    data_ = other.data_;
-    return *this;
-  }
-
-  // clang-format off
-  #if !defined(FLATBUFFERS_CPP98_STL)
-  VectorIterator &operator=(VectorIterator &&other) {
-    data_ = other.data_;
-    return *this;
-  }
-  #endif  // !defined(FLATBUFFERS_CPP98_STL)
-  // clang-format on
-
-  bool operator==(const VectorIterator &other) const {
-    return data_ == other.data_;
-  }
-
-  bool operator<(const VectorIterator &other) const {
-    return data_ < other.data_;
-  }
-
-  bool operator!=(const VectorIterator &other) const {
-    return data_ != other.data_;
-  }
-
-  difference_type operator-(const VectorIterator &other) const {
-    return (data_ - other.data_) / IndirectHelper<T>::element_stride;
-  }
-
-  IT operator*() const { return IndirectHelper<T>::Read(data_, 0); }
-
-  IT operator->() const { return IndirectHelper<T>::Read(data_, 0); }
-
-  VectorIterator &operator++() {
-    data_ += IndirectHelper<T>::element_stride;
-    return *this;
-  }
-
-  VectorIterator operator++(int) {
-    VectorIterator temp(data_, 0);
-    data_ += IndirectHelper<T>::element_stride;
-    return temp;
-  }
-
-  VectorIterator operator+(const uoffset_t &offset) const {
-    return VectorIterator(data_ + offset * IndirectHelper<T>::element_stride,
-                          0);
-  }
-
-  VectorIterator &operator+=(const uoffset_t &offset) {
-    data_ += offset * IndirectHelper<T>::element_stride;
-    return *this;
-  }
-
-  VectorIterator &operator--() {
-    data_ -= IndirectHelper<T>::element_stride;
-    return *this;
-  }
-
-  VectorIterator operator--(int) {
-    VectorIterator temp(data_, 0);
-    data_ -= IndirectHelper<T>::element_stride;
-    return temp;
-  }
-
-  VectorIterator operator-(const uoffset_t &offset) const {
-    return VectorIterator(data_ - offset * IndirectHelper<T>::element_stride,
-                          0);
-  }
-
-  VectorIterator &operator-=(const uoffset_t &offset) {
-    data_ -= offset * IndirectHelper<T>::element_stride;
-    return *this;
-  }
-
- private:
-  const uint8_t *data_;
-};
-
-template<typename Iterator>
-struct VectorReverseIterator : public std::reverse_iterator<Iterator> {
-  explicit VectorReverseIterator(Iterator iter)
-      : std::reverse_iterator<Iterator>(iter) {}
-
-  typename Iterator::value_type operator*() const {
-    return *(std::reverse_iterator<Iterator>::current);
-  }
-
-  typename Iterator::value_type operator->() const {
-    return *(std::reverse_iterator<Iterator>::current);
-  }
-};
-
-struct String;
-
-// This is used as a helper type for accessing vectors.
-// Vector::data() assumes the vector elements start after the length field.
-template<typename T> class Vector {
- public:
-  typedef VectorIterator<T, typename IndirectHelper<T>::mutable_return_type>
-      iterator;
-  typedef VectorIterator<T, typename IndirectHelper<T>::return_type>
-      const_iterator;
-  typedef VectorReverseIterator<iterator> reverse_iterator;
-  typedef VectorReverseIterator<const_iterator> const_reverse_iterator;
-
-  uoffset_t size() const { return EndianScalar(length_); }
-
-  // Deprecated: use size(). Here for backwards compatibility.
-  FLATBUFFERS_ATTRIBUTE(deprecated("use size() instead"))
-  uoffset_t Length() const { return size(); }
-
-  typedef typename IndirectHelper<T>::return_type return_type;
-  typedef typename IndirectHelper<T>::mutable_return_type mutable_return_type;
-
-  return_type Get(uoffset_t i) const {
-    FLATBUFFERS_ASSERT(i < size());
-    return IndirectHelper<T>::Read(Data(), i);
-  }
-
-  return_type operator[](uoffset_t i) const { return Get(i); }
-
-  // If this is a Vector of enums, T will be its storage type, not the enum
-  // type. This function makes it convenient to retrieve value with enum
-  // type E.
-  template<typename E> E GetEnum(uoffset_t i) const {
-    return static_cast<E>(Get(i));
-  }
-
-  // If this a vector of unions, this does the cast for you. There's no check
-  // to make sure this is the right type!
-  template<typename U> const U *GetAs(uoffset_t i) const {
-    return reinterpret_cast<const U *>(Get(i));
-  }
-
-  // If this a vector of unions, this does the cast for you. There's no check
-  // to make sure this is actually a string!
-  const String *GetAsString(uoffset_t i) const {
-    return reinterpret_cast<const String *>(Get(i));
-  }
-
-  const void *GetStructFromOffset(size_t o) const {
-    return reinterpret_cast<const void *>(Data() + o);
-  }
-
-  iterator begin() { return iterator(Data(), 0); }
-  const_iterator begin() const { return const_iterator(Data(), 0); }
-
-  iterator end() { return iterator(Data(), size()); }
-  const_iterator end() const { return const_iterator(Data(), size()); }
-
-  reverse_iterator rbegin() { return reverse_iterator(end() - 1); }
-  const_reverse_iterator rbegin() const {
-    return const_reverse_iterator(end() - 1);
-  }
-
-  reverse_iterator rend() { return reverse_iterator(begin() - 1); }
-  const_reverse_iterator rend() const {
-    return const_reverse_iterator(begin() - 1);
-  }
-
-  const_iterator cbegin() const { return begin(); }
-
-  const_iterator cend() const { return end(); }
-
-  const_reverse_iterator crbegin() const { return rbegin(); }
-
-  const_reverse_iterator crend() const { return rend(); }
-
-  // Change elements if you have a non-const pointer to this object.
-  // Scalars only. See reflection.h, and the documentation.
-  void Mutate(uoffset_t i, const T &val) {
-    FLATBUFFERS_ASSERT(i < size());
-    WriteScalar(data() + i, val);
-  }
-
-  // Change an element of a vector of tables (or strings).
-  // "val" points to the new table/string, as you can obtain from
-  // e.g. reflection::AddFlatBuffer().
-  void MutateOffset(uoffset_t i, const uint8_t *val) {
-    FLATBUFFERS_ASSERT(i < size());
-    static_assert(sizeof(T) == sizeof(uoffset_t), "Unrelated types");
-    WriteScalar(data() + i,
-                static_cast<uoffset_t>(val - (Data() + i * sizeof(uoffset_t))));
-  }
-
-  // Get a mutable pointer to tables/strings inside this vector.
-  mutable_return_type GetMutableObject(uoffset_t i) const {
-    FLATBUFFERS_ASSERT(i < size());
-    return const_cast<mutable_return_type>(IndirectHelper<T>::Read(Data(), i));
-  }
-
-  // The raw data in little endian format. Use with care.
-  const uint8_t *Data() const {
-    return reinterpret_cast<const uint8_t *>(&length_ + 1);
-  }
-
-  uint8_t *Data() { return reinterpret_cast<uint8_t *>(&length_ + 1); }
-
-  // Similarly, but typed, much like std::vector::data
-  const T *data() const { return reinterpret_cast<const T *>(Data()); }
-  T *data() { return reinterpret_cast<T *>(Data()); }
-
-  template<typename K> return_type LookupByKey(K key) const {
-    void *search_result = std::bsearch(
-        &key, Data(), size(), IndirectHelper<T>::element_stride, KeyCompare<K>);
-
-    if (!search_result) {
-      return nullptr;  // Key not found.
-    }
-
-    const uint8_t *element = reinterpret_cast<const uint8_t *>(search_result);
-
-    return IndirectHelper<T>::Read(element, 0);
-  }
-
- protected:
-  // This class is only used to access pre-existing data. Don't ever
-  // try to construct these manually.
-  Vector();
-
-  uoffset_t length_;
-
- private:
-  // This class is a pointer. Copying will therefore create an invalid object.
-  // Private and unimplemented copy constructor.
-  Vector(const Vector &);
-  Vector &operator=(const Vector &);
-
-  template<typename K> static int KeyCompare(const void *ap, const void *bp) {
-    const K *key = reinterpret_cast<const K *>(ap);
-    const uint8_t *data = reinterpret_cast<const uint8_t *>(bp);
-    auto table = IndirectHelper<T>::Read(data, 0);
-
-    // std::bsearch compares with the operands transposed, so we negate the
-    // result here.
-    return -table->KeyCompareWithValue(*key);
-  }
-};
-
-// Represent a vector much like the template above, but in this case we
-// don't know what the element types are (used with reflection.h).
-class VectorOfAny {
- public:
-  uoffset_t size() const { return EndianScalar(length_); }
-
-  const uint8_t *Data() const {
-    return reinterpret_cast<const uint8_t *>(&length_ + 1);
-  }
-  uint8_t *Data() { return reinterpret_cast<uint8_t *>(&length_ + 1); }
-
- protected:
-  VectorOfAny();
-
-  uoffset_t length_;
-
- private:
-  VectorOfAny(const VectorOfAny &);
-  VectorOfAny &operator=(const VectorOfAny &);
-};
-
-#ifndef FLATBUFFERS_CPP98_STL
-template<typename T, typename U>
-Vector<Offset<T>> *VectorCast(Vector<Offset<U>> *ptr) {
-  static_assert(std::is_base_of<T, U>::value, "Unrelated types");
-  return reinterpret_cast<Vector<Offset<T>> *>(ptr);
-}
-
-template<typename T, typename U>
-const Vector<Offset<T>> *VectorCast(const Vector<Offset<U>> *ptr) {
-  static_assert(std::is_base_of<T, U>::value, "Unrelated types");
-  return reinterpret_cast<const Vector<Offset<T>> *>(ptr);
-}
-#endif
-
-// Convenient helper function to get the length of any vector, regardless
-// of whether it is null or not (the field is not set).
-template<typename T> static inline size_t VectorLength(const Vector<T> *v) {
-  return v ? v->size() : 0;
-}
-
-// This is used as a helper type for accessing arrays.
-template<typename T, uint16_t length> class Array {
-  typedef
-      typename flatbuffers::integral_constant<bool,
-                                              flatbuffers::is_scalar<T>::value>
-          scalar_tag;
-  typedef
-      typename flatbuffers::conditional<scalar_tag::value, T, const T *>::type
-          IndirectHelperType;
-
- public:
-  typedef typename IndirectHelper<IndirectHelperType>::return_type return_type;
-  typedef VectorIterator<T, return_type> const_iterator;
-  typedef VectorReverseIterator<const_iterator> const_reverse_iterator;
-
-  FLATBUFFERS_CONSTEXPR uint16_t size() const { return length; }
-
-  return_type Get(uoffset_t i) const {
-    FLATBUFFERS_ASSERT(i < size());
-    return IndirectHelper<IndirectHelperType>::Read(Data(), i);
-  }
-
-  return_type operator[](uoffset_t i) const { return Get(i); }
-
-  // If this is a Vector of enums, T will be its storage type, not the enum
-  // type. This function makes it convenient to retrieve value with enum
-  // type E.
-  template<typename E> E GetEnum(uoffset_t i) const {
-    return static_cast<E>(Get(i));
-  }
-
-  const_iterator begin() const { return const_iterator(Data(), 0); }
-  const_iterator end() const { return const_iterator(Data(), size()); }
-
-  const_reverse_iterator rbegin() const {
-    return const_reverse_iterator(end());
-  }
-  const_reverse_iterator rend() const { return const_reverse_iterator(end()); }
-
-  const_iterator cbegin() const { return begin(); }
-  const_iterator cend() const { return end(); }
-
-  const_reverse_iterator crbegin() const { return rbegin(); }
-  const_reverse_iterator crend() const { return rend(); }
-
-  // Get a mutable pointer to elements inside this array.
-  // This method used to mutate arrays of structs followed by a @p Mutate
-  // operation. For primitive types use @p Mutate directly.
-  // @warning Assignments and reads to/from the dereferenced pointer are not
-  //  automatically converted to the correct endianness.
-  typename flatbuffers::conditional<scalar_tag::value, void, T *>::type
-  GetMutablePointer(uoffset_t i) const {
-    FLATBUFFERS_ASSERT(i < size());
-    return const_cast<T *>(&data()[i]);
-  }
-
-  // Change elements if you have a non-const pointer to this object.
-  void Mutate(uoffset_t i, const T &val) { MutateImpl(scalar_tag(), i, val); }
-
-  // The raw data in little endian format. Use with care.
-  const uint8_t *Data() const { return data_; }
-
-  uint8_t *Data() { return data_; }
-
-  // Similarly, but typed, much like std::vector::data
-  const T *data() const { return reinterpret_cast<const T *>(Data()); }
-  T *data() { return reinterpret_cast<T *>(Data()); }
-
- protected:
-  void MutateImpl(flatbuffers::integral_constant<bool, true>, uoffset_t i,
-                  const T &val) {
-    FLATBUFFERS_ASSERT(i < size());
-    WriteScalar(data() + i, val);
-  }
-
-  void MutateImpl(flatbuffers::integral_constant<bool, false>, uoffset_t i,
-                  const T &val) {
-    *(GetMutablePointer(i)) = val;
-  }
-
-  // This class is only used to access pre-existing data. Don't ever
-  // try to construct these manually.
-  // 'constexpr' allows us to use 'size()' at compile time.
-  // @note Must not use 'FLATBUFFERS_CONSTEXPR' here, as const is not allowed on
-  //  a constructor.
-#if defined(__cpp_constexpr)
-  constexpr Array();
-#else
-  Array();
-#endif
-
-  uint8_t data_[length * sizeof(T)];
-
- private:
-  // This class is a pointer. Copying will therefore create an invalid object.
-  // Private and unimplemented copy constructor.
-  Array(const Array &);
-  Array &operator=(const Array &);
-};
-
-// Specialization for Array[struct] with access using Offset<void> pointer.
-// This specialization used by idl_gen_text.cpp.
-template<typename T, uint16_t length> class Array<Offset<T>, length> {
-  static_assert(flatbuffers::is_same<T, void>::value, "unexpected type T");
-
- public:
-  typedef const void *return_type;
-
-  const uint8_t *Data() const { return data_; }
-
-  // Make idl_gen_text.cpp::PrintContainer happy.
-  return_type operator[](uoffset_t) const {
-    FLATBUFFERS_ASSERT(false);
-    return nullptr;
-  }
-
- private:
-  // This class is only used to access pre-existing data.
-  Array();
-  Array(const Array &);
-  Array &operator=(const Array &);
-
-  uint8_t data_[1];
-};
-
-// Lexicographically compare two strings (possibly containing nulls), and
-// return true if the first is less than the second.
-static inline bool StringLessThan(const char *a_data, uoffset_t a_size,
-                                  const char *b_data, uoffset_t b_size) {
-  const auto cmp = memcmp(a_data, b_data, (std::min)(a_size, b_size));
-  return cmp == 0 ? a_size < b_size : cmp < 0;
-}
-
-struct String : public Vector<char> {
-  const char *c_str() const { return reinterpret_cast<const char *>(Data()); }
-  std::string str() const { return std::string(c_str(), size()); }
-
-  // clang-format off
-  #ifdef FLATBUFFERS_HAS_STRING_VIEW
-  flatbuffers::string_view string_view() const {
-    return flatbuffers::string_view(c_str(), size());
-  }
-  #endif // FLATBUFFERS_HAS_STRING_VIEW
-  // clang-format on
-
-  bool operator<(const String &o) const {
-    return StringLessThan(this->data(), this->size(), o.data(), o.size());
-  }
-};
-
-// Convenience function to get std::string from a String returning an empty
-// string on null pointer.
-static inline std::string GetString(const String *str) {
-  return str ? str->str() : "";
-}
-
-// Convenience function to get char* from a String returning an empty string on
-// null pointer.
-static inline const char *GetCstring(const String *str) {
-  return str ? str->c_str() : "";
-}
-
-#ifdef FLATBUFFERS_HAS_STRING_VIEW
-// Convenience function to get string_view from a String returning an empty
-// string_view on null pointer.
-static inline flatbuffers::string_view GetStringView(const String *str) {
-  return str ? str->string_view() : flatbuffers::string_view();
-}
-#endif  // FLATBUFFERS_HAS_STRING_VIEW
-
-// Allocator interface. This is flatbuffers-specific and meant only for
-// `vector_downward` usage.
-class Allocator {
- public:
-  virtual ~Allocator() {}
-
-  // Allocate `size` bytes of memory.
-  virtual uint8_t *allocate(size_t size) = 0;
-
-  // Deallocate `size` bytes of memory at `p` allocated by this allocator.
-  virtual void deallocate(uint8_t *p, size_t size) = 0;
-
-  // Reallocate `new_size` bytes of memory, replacing the old region of size
-  // `old_size` at `p`. In contrast to a normal realloc, this grows downwards,
-  // and is intended specifcally for `vector_downward` use.
-  // `in_use_back` and `in_use_front` indicate how much of `old_size` is
-  // actually in use at each end, and needs to be copied.
-  virtual uint8_t *reallocate_downward(uint8_t *old_p, size_t old_size,
-                                       size_t new_size, size_t in_use_back,
-                                       size_t in_use_front) {
-    FLATBUFFERS_ASSERT(new_size > old_size);  // vector_downward only grows
-    uint8_t *new_p = allocate(new_size);
-    memcpy_downward(old_p, old_size, new_p, new_size, in_use_back,
-                    in_use_front);
-    deallocate(old_p, old_size);
-    return new_p;
-  }
-
- protected:
-  // Called by `reallocate_downward` to copy memory from `old_p` of `old_size`
-  // to `new_p` of `new_size`. Only memory of size `in_use_front` and
-  // `in_use_back` will be copied from the front and back of the old memory
-  // allocation.
-  void memcpy_downward(uint8_t *old_p, size_t old_size, uint8_t *new_p,
-                       size_t new_size, size_t in_use_back,
-                       size_t in_use_front) {
-    memcpy(new_p + new_size - in_use_back, old_p + old_size - in_use_back,
-           in_use_back);
-    memcpy(new_p, old_p, in_use_front);
-  }
-};
-
-// DefaultAllocator uses new/delete to allocate memory regions
-class DefaultAllocator : public Allocator {
- public:
-  uint8_t *allocate(size_t size) FLATBUFFERS_OVERRIDE {
-    return new uint8_t[size];
-  }
-
-  void deallocate(uint8_t *p, size_t) FLATBUFFERS_OVERRIDE { delete[] p; }
-
-  static void dealloc(void *p, size_t) { delete[] static_cast<uint8_t *>(p); }
-};
-
-// These functions allow for a null allocator to mean use the default allocator,
-// as used by DetachedBuffer and vector_downward below.
-// This is to avoid having a statically or dynamically allocated default
-// allocator, or having to move it between the classes that may own it.
-inline uint8_t *Allocate(Allocator *allocator, size_t size) {
-  return allocator ? allocator->allocate(size)
-                   : DefaultAllocator().allocate(size);
-}
-
-inline void Deallocate(Allocator *allocator, uint8_t *p, size_t size) {
-  if (allocator)
-    allocator->deallocate(p, size);
-  else
-    DefaultAllocator().deallocate(p, size);
-}
-
-inline uint8_t *ReallocateDownward(Allocator *allocator, uint8_t *old_p,
-                                   size_t old_size, size_t new_size,
-                                   size_t in_use_back, size_t in_use_front) {
-  return allocator ? allocator->reallocate_downward(old_p, old_size, new_size,
-                                                    in_use_back, in_use_front)
-                   : DefaultAllocator().reallocate_downward(
-                         old_p, old_size, new_size, in_use_back, in_use_front);
-}
-
-// DetachedBuffer is a finished flatbuffer memory region, detached from its
-// builder. The original memory region and allocator are also stored so that
-// the DetachedBuffer can manage the memory lifetime.
-class DetachedBuffer {
- public:
-  DetachedBuffer()
-      : allocator_(nullptr),
-        own_allocator_(false),
-        buf_(nullptr),
-        reserved_(0),
-        cur_(nullptr),
-        size_(0) {}
-
-  DetachedBuffer(Allocator *allocator, bool own_allocator, uint8_t *buf,
-                 size_t reserved, uint8_t *cur, size_t sz)
-      : allocator_(allocator),
-        own_allocator_(own_allocator),
-        buf_(buf),
-        reserved_(reserved),
-        cur_(cur),
-        size_(sz) {}
-
-  // clang-format off
-  #if !defined(FLATBUFFERS_CPP98_STL)
-  // clang-format on
-  DetachedBuffer(DetachedBuffer &&other)
-      : allocator_(other.allocator_),
-        own_allocator_(other.own_allocator_),
-        buf_(other.buf_),
-        reserved_(other.reserved_),
-        cur_(other.cur_),
-        size_(other.size_) {
-    other.reset();
-  }
-  // clang-format off
-  #endif  // !defined(FLATBUFFERS_CPP98_STL)
-  // clang-format on
-
-  // clang-format off
-  #if !defined(FLATBUFFERS_CPP98_STL)
-  // clang-format on
-  DetachedBuffer &operator=(DetachedBuffer &&other) {
-    if (this == &other) return *this;
-
-    destroy();
-
-    allocator_ = other.allocator_;
-    own_allocator_ = other.own_allocator_;
-    buf_ = other.buf_;
-    reserved_ = other.reserved_;
-    cur_ = other.cur_;
-    size_ = other.size_;
-
-    other.reset();
-
-    return *this;
-  }
-  // clang-format off
-  #endif  // !defined(FLATBUFFERS_CPP98_STL)
-  // clang-format on
-
-  ~DetachedBuffer() { destroy(); }
-
-  const uint8_t *data() const { return cur_; }
-
-  uint8_t *data() { return cur_; }
-
-  size_t size() const { return size_; }
-
-  // clang-format off
-  #if 0  // disabled for now due to the ordering of classes in this header
-  template <class T>
-  bool Verify() const {
-    Verifier verifier(data(), size());
-    return verifier.Verify<T>(nullptr);
-  }
-
-  template <class T>
-  const T* GetRoot() const {
-    return flatbuffers::GetRoot<T>(data());
-  }
-
-  template <class T>
-  T* GetRoot() {
-    return flatbuffers::GetRoot<T>(data());
-  }
-  #endif
-  // clang-format on
-
-  // clang-format off
-  #if !defined(FLATBUFFERS_CPP98_STL)
-  // clang-format on
-  // These may change access mode, leave these at end of public section
-  FLATBUFFERS_DELETE_FUNC(DetachedBuffer(const DetachedBuffer &other))
-  FLATBUFFERS_DELETE_FUNC(
-      DetachedBuffer &operator=(const DetachedBuffer &other))
-  // clang-format off
-  #endif  // !defined(FLATBUFFERS_CPP98_STL)
-  // clang-format on
-
- protected:
-  Allocator *allocator_;
-  bool own_allocator_;
-  uint8_t *buf_;
-  size_t reserved_;
-  uint8_t *cur_;
-  size_t size_;
-
-  inline void destroy() {
-    if (buf_) Deallocate(allocator_, buf_, reserved_);
-    if (own_allocator_ && allocator_) { delete allocator_; }
-    reset();
-  }
-
-  inline void reset() {
-    allocator_ = nullptr;
-    own_allocator_ = false;
-    buf_ = nullptr;
-    reserved_ = 0;
-    cur_ = nullptr;
-    size_ = 0;
-  }
-};
-
-// This is a minimal replication of std::vector<uint8_t> functionality,
-// except growing from higher to lower addresses. i.e push_back() inserts data
-// in the lowest address in the vector.
-// Since this vector leaves the lower part unused, we support a "scratch-pad"
-// that can be stored there for temporary data, to share the allocated space.
-// Essentially, this supports 2 std::vectors in a single buffer.
-class vector_downward {
- public:
-  explicit vector_downward(size_t initial_size, Allocator *allocator,
-                           bool own_allocator, size_t buffer_minalign)
-      : allocator_(allocator),
-        own_allocator_(own_allocator),
-        initial_size_(initial_size),
-        buffer_minalign_(buffer_minalign),
-        reserved_(0),
-        buf_(nullptr),
-        cur_(nullptr),
-        scratch_(nullptr) {}
-
-  // clang-format off
-  #if !defined(FLATBUFFERS_CPP98_STL)
-  vector_downward(vector_downward &&other)
-  #else
-  vector_downward(vector_downward &other)
-  #endif  // defined(FLATBUFFERS_CPP98_STL)
-      // clang-format on
-      : allocator_(other.allocator_),
-        own_allocator_(other.own_allocator_),
-        initial_size_(other.initial_size_),
-        buffer_minalign_(other.buffer_minalign_),
-        reserved_(other.reserved_),
-        buf_(other.buf_),
-        cur_(other.cur_),
-        scratch_(other.scratch_) {
-    // No change in other.allocator_
-    // No change in other.initial_size_
-    // No change in other.buffer_minalign_
-    other.own_allocator_ = false;
-    other.reserved_ = 0;
-    other.buf_ = nullptr;
-    other.cur_ = nullptr;
-    other.scratch_ = nullptr;
-  }
-
-  // clang-format off
-  #if !defined(FLATBUFFERS_CPP98_STL)
-  // clang-format on
-  vector_downward &operator=(vector_downward &&other) {
-    // Move construct a temporary and swap idiom
-    vector_downward temp(std::move(other));
-    swap(temp);
-    return *this;
-  }
-  // clang-format off
-  #endif  // defined(FLATBUFFERS_CPP98_STL)
-  // clang-format on
-
-  ~vector_downward() {
-    clear_buffer();
-    clear_allocator();
-  }
-
-  void reset() {
-    clear_buffer();
-    clear();
-  }
-
-  void clear() {
-    if (buf_) {
-      cur_ = buf_ + reserved_;
-    } else {
-      reserved_ = 0;
-      cur_ = nullptr;
-    }
-    clear_scratch();
-  }
-
-  void clear_scratch() { scratch_ = buf_; }
-
-  void clear_allocator() {
-    if (own_allocator_ && allocator_) { delete allocator_; }
-    allocator_ = nullptr;
-    own_allocator_ = false;
-  }
-
-  void clear_buffer() {
-    if (buf_) Deallocate(allocator_, buf_, reserved_);
-    buf_ = nullptr;
-  }
-
-  // Relinquish the pointer to the caller.
-  uint8_t *release_raw(size_t &allocated_bytes, size_t &offset) {
-    auto *buf = buf_;
-    allocated_bytes = reserved_;
-    offset = static_cast<size_t>(cur_ - buf_);
-
-    // release_raw only relinquishes the buffer ownership.
-    // Does not deallocate or reset the allocator. Destructor will do that.
-    buf_ = nullptr;
-    clear();
-    return buf;
-  }
-
-  // Relinquish the pointer to the caller.
-  DetachedBuffer release() {
-    // allocator ownership (if any) is transferred to DetachedBuffer.
-    DetachedBuffer fb(allocator_, own_allocator_, buf_, reserved_, cur_,
-                      size());
-    if (own_allocator_) {
-      allocator_ = nullptr;
-      own_allocator_ = false;
-    }
-    buf_ = nullptr;
-    clear();
-    return fb;
-  }
-
-  size_t ensure_space(size_t len) {
-    FLATBUFFERS_ASSERT(cur_ >= scratch_ && scratch_ >= buf_);
-    if (len > static_cast<size_t>(cur_ - scratch_)) { reallocate(len); }
-    // Beyond this, signed offsets may not have enough range:
-    // (FlatBuffers > 2GB not supported).
-    FLATBUFFERS_ASSERT(size() < FLATBUFFERS_MAX_BUFFER_SIZE);
-    return len;
-  }
-
-  inline uint8_t *make_space(size_t len) {
-    size_t space = ensure_space(len);
-    cur_ -= space;
-    return cur_;
-  }
-
-  // Returns nullptr if using the DefaultAllocator.
-  Allocator *get_custom_allocator() { return allocator_; }
-
-  uoffset_t size() const {
-    return static_cast<uoffset_t>(reserved_ - (cur_ - buf_));
-  }
-
-  uoffset_t scratch_size() const {
-    return static_cast<uoffset_t>(scratch_ - buf_);
-  }
-
-  size_t capacity() const { return reserved_; }
-
-  uint8_t *data() const {
-    FLATBUFFERS_ASSERT(cur_);
-    return cur_;
-  }
-
-  uint8_t *scratch_data() const {
-    FLATBUFFERS_ASSERT(buf_);
-    return buf_;
-  }
-
-  uint8_t *scratch_end() const {
-    FLATBUFFERS_ASSERT(scratch_);
-    return scratch_;
-  }
-
-  uint8_t *data_at(size_t offset) const { return buf_ + reserved_ - offset; }
-
-  void push(const uint8_t *bytes, size_t num) {
-    if (num > 0) { memcpy(make_space(num), bytes, num); }
-  }
-
-  // Specialized version of push() that avoids memcpy call for small data.
-  template<typename T> void push_small(const T &little_endian_t) {
-    make_space(sizeof(T));
-    *reinterpret_cast<T *>(cur_) = little_endian_t;
-  }
-
-  template<typename T> void scratch_push_small(const T &t) {
-    ensure_space(sizeof(T));
-    *reinterpret_cast<T *>(scratch_) = t;
-    scratch_ += sizeof(T);
-  }
-
-  // fill() is most frequently called with small byte counts (<= 4),
-  // which is why we're using loops rather than calling memset.
-  void fill(size_t zero_pad_bytes) {
-    make_space(zero_pad_bytes);
-    for (size_t i = 0; i < zero_pad_bytes; i++) cur_[i] = 0;
-  }
-
-  // Version for when we know the size is larger.
-  // Precondition: zero_pad_bytes > 0
-  void fill_big(size_t zero_pad_bytes) {
-    memset(make_space(zero_pad_bytes), 0, zero_pad_bytes);
-  }
-
-  void pop(size_t bytes_to_remove) { cur_ += bytes_to_remove; }
-  void scratch_pop(size_t bytes_to_remove) { scratch_ -= bytes_to_remove; }
-
-  void swap(vector_downward &other) {
-    using std::swap;
-    swap(allocator_, other.allocator_);
-    swap(own_allocator_, other.own_allocator_);
-    swap(initial_size_, other.initial_size_);
-    swap(buffer_minalign_, other.buffer_minalign_);
-    swap(reserved_, other.reserved_);
-    swap(buf_, other.buf_);
-    swap(cur_, other.cur_);
-    swap(scratch_, other.scratch_);
-  }
-
-  void swap_allocator(vector_downward &other) {
-    using std::swap;
-    swap(allocator_, other.allocator_);
-    swap(own_allocator_, other.own_allocator_);
-  }
-
- private:
-  // You shouldn't really be copying instances of this class.
-  FLATBUFFERS_DELETE_FUNC(vector_downward(const vector_downward &))
-  FLATBUFFERS_DELETE_FUNC(vector_downward &operator=(const vector_downward &))
-
-  Allocator *allocator_;
-  bool own_allocator_;
-  size_t initial_size_;
-  size_t buffer_minalign_;
-  size_t reserved_;
-  uint8_t *buf_;
-  uint8_t *cur_;  // Points at location between empty (below) and used (above).
-  uint8_t *scratch_;  // Points to the end of the scratchpad in use.
-
-  void reallocate(size_t len) {
-    auto old_reserved = reserved_;
-    auto old_size = size();
-    auto old_scratch_size = scratch_size();
-    reserved_ +=
-        (std::max)(len, old_reserved ? old_reserved / 2 : initial_size_);
-    reserved_ = (reserved_ + buffer_minalign_ - 1) & ~(buffer_minalign_ - 1);
-    if (buf_) {
-      buf_ = ReallocateDownward(allocator_, buf_, old_reserved, reserved_,
-                                old_size, old_scratch_size);
-    } else {
-      buf_ = Allocate(allocator_, reserved_);
-    }
-    cur_ = buf_ + reserved_ - old_size;
-    scratch_ = buf_ + old_scratch_size;
-  }
-};
-
-// Converts a Field ID to a virtual table offset.
-inline voffset_t FieldIndexToOffset(voffset_t field_id) {
-  // Should correspond to what EndTable() below builds up.
-  const int fixed_fields = 2;  // Vtable size and Object Size.
-  return static_cast<voffset_t>((field_id + fixed_fields) * sizeof(voffset_t));
-}
-
-template<typename T, typename Alloc>
-const T *data(const std::vector<T, Alloc> &v) {
-  // Eventually the returned pointer gets passed down to memcpy, so
-  // we need it to be non-null to avoid undefined behavior.
-  static uint8_t t;
-  return v.empty() ? reinterpret_cast<const T *>(&t) : &v.front();
-}
-template<typename T, typename Alloc> T *data(std::vector<T, Alloc> &v) {
-  // Eventually the returned pointer gets passed down to memcpy, so
-  // we need it to be non-null to avoid undefined behavior.
-  static uint8_t t;
-  return v.empty() ? reinterpret_cast<T *>(&t) : &v.front();
-}
-
-/// @endcond
-
-/// @addtogroup flatbuffers_cpp_api
-/// @{
-/// @class FlatBufferBuilder
-/// @brief Helper class to hold data needed in creation of a FlatBuffer.
-/// To serialize data, you typically call one of the `Create*()` functions in
-/// the generated code, which in turn call a sequence of `StartTable`/
-/// `PushElement`/`AddElement`/`EndTable`, or the builtin `CreateString`/
-/// `CreateVector` functions. Do this is depth-first order to build up a tree to
-/// the root. `Finish()` wraps up the buffer ready for transport.
-class FlatBufferBuilder {
- public:
-  /// @brief Default constructor for FlatBufferBuilder.
-  /// @param[in] initial_size The initial size of the buffer, in bytes. Defaults
-  /// to `1024`.
-  /// @param[in] allocator An `Allocator` to use. If null will use
-  /// `DefaultAllocator`.
-  /// @param[in] own_allocator Whether the builder/vector should own the
-  /// allocator. Defaults to / `false`.
-  /// @param[in] buffer_minalign Force the buffer to be aligned to the given
-  /// minimum alignment upon reallocation. Only needed if you intend to store
-  /// types with custom alignment AND you wish to read the buffer in-place
-  /// directly after creation.
-  explicit FlatBufferBuilder(
-      size_t initial_size = 1024, Allocator *allocator = nullptr,
-      bool own_allocator = false,
-      size_t buffer_minalign = AlignOf<largest_scalar_t>())
-      : buf_(initial_size, allocator, own_allocator, buffer_minalign),
-        num_field_loc(0),
-        max_voffset_(0),
-        nested(false),
-        finished(false),
-        minalign_(1),
-        force_defaults_(false),
-        dedup_vtables_(true),
-        string_pool(nullptr) {
-    EndianCheck();
-  }
-
-  // clang-format off
-  /// @brief Move constructor for FlatBufferBuilder.
-  #if !defined(FLATBUFFERS_CPP98_STL)
-  FlatBufferBuilder(FlatBufferBuilder &&other)
-  #else
-  FlatBufferBuilder(FlatBufferBuilder &other)
-  #endif  // #if !defined(FLATBUFFERS_CPP98_STL)
-    : buf_(1024, nullptr, false, AlignOf<largest_scalar_t>()),
-      num_field_loc(0),
-      max_voffset_(0),
-      nested(false),
-      finished(false),
-      minalign_(1),
-      force_defaults_(false),
-      dedup_vtables_(true),
-      string_pool(nullptr) {
-    EndianCheck();
-    // Default construct and swap idiom.
-    // Lack of delegating constructors in vs2010 makes it more verbose than needed.
-    Swap(other);
-  }
-  // clang-format on
-
-  // clang-format off
-  #if !defined(FLATBUFFERS_CPP98_STL)
-  // clang-format on
-  /// @brief Move assignment operator for FlatBufferBuilder.
-  FlatBufferBuilder &operator=(FlatBufferBuilder &&other) {
-    // Move construct a temporary and swap idiom
-    FlatBufferBuilder temp(std::move(other));
-    Swap(temp);
-    return *this;
-  }
-  // clang-format off
-  #endif  // defined(FLATBUFFERS_CPP98_STL)
-  // clang-format on
-
-  void Swap(FlatBufferBuilder &other) {
-    using std::swap;
-    buf_.swap(other.buf_);
-    swap(num_field_loc, other.num_field_loc);
-    swap(max_voffset_, other.max_voffset_);
-    swap(nested, other.nested);
-    swap(finished, other.finished);
-    swap(minalign_, other.minalign_);
-    swap(force_defaults_, other.force_defaults_);
-    swap(dedup_vtables_, other.dedup_vtables_);
-    swap(string_pool, other.string_pool);
-  }
-
-  ~FlatBufferBuilder() {
-    if (string_pool) delete string_pool;
-  }
-
-  void Reset() {
-    Clear();       // clear builder state
-    buf_.reset();  // deallocate buffer
-  }
-
-  /// @brief Reset all the state in this FlatBufferBuilder so it can be reused
-  /// to construct another buffer.
-  void Clear() {
-    ClearOffsets();
-    buf_.clear();
-    nested = false;
-    finished = false;
-    minalign_ = 1;
-    if (string_pool) string_pool->clear();
-  }
-
-  /// @brief The current size of the serialized buffer, counting from the end.
-  /// @return Returns an `uoffset_t` with the current size of the buffer.
-  uoffset_t GetSize() const { return buf_.size(); }
-
-  /// @brief Get the serialized buffer (after you call `Finish()`).
-  /// @return Returns an `uint8_t` pointer to the FlatBuffer data inside the
-  /// buffer.
-  uint8_t *GetBufferPointer() const {
-    Finished();
-    return buf_.data();
-  }
-
-  /// @brief Get a pointer to an unfinished buffer.
-  /// @return Returns a `uint8_t` pointer to the unfinished buffer.
-  uint8_t *GetCurrentBufferPointer() const { return buf_.data(); }
-
-  /// @brief Get the released pointer to the serialized buffer.
-  /// @warning Do NOT attempt to use this FlatBufferBuilder afterwards!
-  /// @return A `FlatBuffer` that owns the buffer and its allocator and
-  /// behaves similar to a `unique_ptr` with a deleter.
-  FLATBUFFERS_ATTRIBUTE(deprecated("use Release() instead"))
-  DetachedBuffer ReleaseBufferPointer() {
-    Finished();
-    return buf_.release();
-  }
-
-  /// @brief Get the released DetachedBuffer.
-  /// @return A `DetachedBuffer` that owns the buffer and its allocator.
-  DetachedBuffer Release() {
-    Finished();
-    return buf_.release();
-  }
-
-  /// @brief Get the released pointer to the serialized buffer.
-  /// @param size The size of the memory block containing
-  /// the serialized `FlatBuffer`.
-  /// @param offset The offset from the released pointer where the finished
-  /// `FlatBuffer` starts.
-  /// @return A raw pointer to the start of the memory block containing
-  /// the serialized `FlatBuffer`.
-  /// @remark If the allocator is owned, it gets deleted when the destructor is
-  /// called..
-  uint8_t *ReleaseRaw(size_t &size, size_t &offset) {
-    Finished();
-    return buf_.release_raw(size, offset);
-  }
-
-  /// @brief get the minimum alignment this buffer needs to be accessed
-  /// properly. This is only known once all elements have been written (after
-  /// you call Finish()). You can use this information if you need to embed
-  /// a FlatBuffer in some other buffer, such that you can later read it
-  /// without first having to copy it into its own buffer.
-  size_t GetBufferMinAlignment() const {
-    Finished();
-    return minalign_;
-  }
-
-  /// @cond FLATBUFFERS_INTERNAL
-  void Finished() const {
-    // If you get this assert, you're attempting to get access a buffer
-    // which hasn't been finished yet. Be sure to call
-    // FlatBufferBuilder::Finish with your root table.
-    // If you really need to access an unfinished buffer, call
-    // GetCurrentBufferPointer instead.
-    FLATBUFFERS_ASSERT(finished);
-  }
-  /// @endcond
-
-  /// @brief In order to save space, fields that are set to their default value
-  /// don't get serialized into the buffer.
-  /// @param[in] fd When set to `true`, always serializes default values that
-  /// are set. Optional fields which are not set explicitly, will still not be
-  /// serialized.
-  void ForceDefaults(bool fd) { force_defaults_ = fd; }
-
-  /// @brief By default vtables are deduped in order to save space.
-  /// @param[in] dedup When set to `true`, dedup vtables.
-  void DedupVtables(bool dedup) { dedup_vtables_ = dedup; }
-
-  /// @cond FLATBUFFERS_INTERNAL
-  void Pad(size_t num_bytes) { buf_.fill(num_bytes); }
-
-  void TrackMinAlign(size_t elem_size) {
-    if (elem_size > minalign_) minalign_ = elem_size;
-  }
-
-  void Align(size_t elem_size) {
-    TrackMinAlign(elem_size);
-    buf_.fill(PaddingBytes(buf_.size(), elem_size));
-  }
-
-  void PushFlatBuffer(const uint8_t *bytes, size_t size) {
-    PushBytes(bytes, size);
-    finished = true;
-  }
-
-  void PushBytes(const uint8_t *bytes, size_t size) { buf_.push(bytes, size); }
-
-  void PopBytes(size_t amount) { buf_.pop(amount); }
-
-  template<typename T> void AssertScalarT() {
-    // The code assumes power of 2 sizes and endian-swap-ability.
-    static_assert(flatbuffers::is_scalar<T>::value, "T must be a scalar type");
-  }
-
-  // Write a single aligned scalar to the buffer
-  template<typename T> uoffset_t PushElement(T element) {
-    AssertScalarT<T>();
-    T litle_endian_element = EndianScalar(element);
-    Align(sizeof(T));
-    buf_.push_small(litle_endian_element);
-    return GetSize();
-  }
-
-  template<typename T> uoffset_t PushElement(Offset<T> off) {
-    // Special case for offsets: see ReferTo below.
-    return PushElement(ReferTo(off.o));
-  }
-
-  // When writing fields, we track where they are, so we can create correct
-  // vtables later.
-  void TrackField(voffset_t field, uoffset_t off) {
-    FieldLoc fl = { off, field };
-    buf_.scratch_push_small(fl);
-    num_field_loc++;
-    max_voffset_ = (std::max)(max_voffset_, field);
-  }
-
-  // Like PushElement, but additionally tracks the field this represents.
-  template<typename T> void AddElement(voffset_t field, T e, T def) {
-    // We don't serialize values equal to the default.
-    if (IsTheSameAs(e, def) && !force_defaults_) return;
-    auto off = PushElement(e);
-    TrackField(field, off);
-  }
-
-  template<typename T> void AddElement(voffset_t field, T e) {
-    auto off = PushElement(e);
-    TrackField(field, off);
-  }
-
-  template<typename T> void AddOffset(voffset_t field, Offset<T> off) {
-    if (off.IsNull()) return;  // Don't store.
-    AddElement(field, ReferTo(off.o), static_cast<uoffset_t>(0));
-  }
-
-  template<typename T> void AddStruct(voffset_t field, const T *structptr) {
-    if (!structptr) return;  // Default, don't store.
-    Align(AlignOf<T>());
-    buf_.push_small(*structptr);
-    TrackField(field, GetSize());
-  }
-
-  void AddStructOffset(voffset_t field, uoffset_t off) {
-    TrackField(field, off);
-  }
-
-  // Offsets initially are relative to the end of the buffer (downwards).
-  // This function converts them to be relative to the current location
-  // in the buffer (when stored here), pointing upwards.
-  uoffset_t ReferTo(uoffset_t off) {
-    // Align to ensure GetSize() below is correct.
-    Align(sizeof(uoffset_t));
-    // Offset must refer to something already in buffer.
-    FLATBUFFERS_ASSERT(off && off <= GetSize());
-    return GetSize() - off + static_cast<uoffset_t>(sizeof(uoffset_t));
-  }
-
-  void NotNested() {
-    // If you hit this, you're trying to construct a Table/Vector/String
-    // during the construction of its parent table (between the MyTableBuilder
-    // and table.Finish().
-    // Move the creation of these sub-objects to above the MyTableBuilder to
-    // not get this assert.
-    // Ignoring this assert may appear to work in simple cases, but the reason
-    // it is here is that storing objects in-line may cause vtable offsets
-    // to not fit anymore. It also leads to vtable duplication.
-    FLATBUFFERS_ASSERT(!nested);
-    // If you hit this, fields were added outside the scope of a table.
-    FLATBUFFERS_ASSERT(!num_field_loc);
-  }
-
-  // From generated code (or from the parser), we call StartTable/EndTable
-  // with a sequence of AddElement calls in between.
-  uoffset_t StartTable() {
-    NotNested();
-    nested = true;
-    return GetSize();
-  }
-
-  // This finishes one serialized object by generating the vtable if it's a
-  // table, comparing it against existing vtables, and writing the
-  // resulting vtable offset.
-  uoffset_t EndTable(uoffset_t start) {
-    // If you get this assert, a corresponding StartTable wasn't called.
-    FLATBUFFERS_ASSERT(nested);
-    // Write the vtable offset, which is the start of any Table.
-    // We fill it's value later.
-    auto vtableoffsetloc = PushElement<soffset_t>(0);
-    // Write a vtable, which consists entirely of voffset_t elements.
-    // It starts with the number of offsets, followed by a type id, followed
-    // by the offsets themselves. In reverse:
-    // Include space for the last offset and ensure empty tables have a
-    // minimum size.
-    max_voffset_ =
-        (std::max)(static_cast<voffset_t>(max_voffset_ + sizeof(voffset_t)),
-                   FieldIndexToOffset(0));
-    buf_.fill_big(max_voffset_);
-    auto table_object_size = vtableoffsetloc - start;
-    // Vtable use 16bit offsets.
-    FLATBUFFERS_ASSERT(table_object_size < 0x10000);
-    WriteScalar<voffset_t>(buf_.data() + sizeof(voffset_t),
-                           static_cast<voffset_t>(table_object_size));
-    WriteScalar<voffset_t>(buf_.data(), max_voffset_);
-    // Write the offsets into the table
-    for (auto it = buf_.scratch_end() - num_field_loc * sizeof(FieldLoc);
-         it < buf_.scratch_end(); it += sizeof(FieldLoc)) {
-      auto field_location = reinterpret_cast<FieldLoc *>(it);
-      auto pos = static_cast<voffset_t>(vtableoffsetloc - field_location->off);
-      // If this asserts, it means you've set a field twice.
-      FLATBUFFERS_ASSERT(
-          !ReadScalar<voffset_t>(buf_.data() + field_location->id));
-      WriteScalar<voffset_t>(buf_.data() + field_location->id, pos);
-    }
-    ClearOffsets();
-    auto vt1 = reinterpret_cast<voffset_t *>(buf_.data());
-    auto vt1_size = ReadScalar<voffset_t>(vt1);
-    auto vt_use = GetSize();
-    // See if we already have generated a vtable with this exact same
-    // layout before. If so, make it point to the old one, remove this one.
-    if (dedup_vtables_) {
-      for (auto it = buf_.scratch_data(); it < buf_.scratch_end();
-           it += sizeof(uoffset_t)) {
-        auto vt_offset_ptr = reinterpret_cast<uoffset_t *>(it);
-        auto vt2 = reinterpret_cast<voffset_t *>(buf_.data_at(*vt_offset_ptr));
-        auto vt2_size = ReadScalar<voffset_t>(vt2);
-        if (vt1_size != vt2_size || 0 != memcmp(vt2, vt1, vt1_size)) continue;
-        vt_use = *vt_offset_ptr;
-        buf_.pop(GetSize() - vtableoffsetloc);
-        break;
-      }
-    }
-    // If this is a new vtable, remember it.
-    if (vt_use == GetSize()) { buf_.scratch_push_small(vt_use); }
-    // Fill the vtable offset we created above.
-    // The offset points from the beginning of the object to where the
-    // vtable is stored.
-    // Offsets default direction is downward in memory for future format
-    // flexibility (storing all vtables at the start of the file).
-    WriteScalar(buf_.data_at(vtableoffsetloc),
-                static_cast<soffset_t>(vt_use) -
-                    static_cast<soffset_t>(vtableoffsetloc));
-
-    nested = false;
-    return vtableoffsetloc;
-  }
-
-  FLATBUFFERS_ATTRIBUTE(deprecated("call the version above instead"))
-  uoffset_t EndTable(uoffset_t start, voffset_t /*numfields*/) {
-    return EndTable(start);
-  }
-
-  // This checks a required field has been set in a given table that has
-  // just been constructed.
-  template<typename T> void Required(Offset<T> table, voffset_t field);
-
-  uoffset_t StartStruct(size_t alignment) {
-    Align(alignment);
-    return GetSize();
-  }
-
-  uoffset_t EndStruct() { return GetSize(); }
-
-  void ClearOffsets() {
-    buf_.scratch_pop(num_field_loc * sizeof(FieldLoc));
-    num_field_loc = 0;
-    max_voffset_ = 0;
-  }
-
-  // Aligns such that when "len" bytes are written, an object can be written
-  // after it with "alignment" without padding.
-  void PreAlign(size_t len, size_t alignment) {
-    TrackMinAlign(alignment);
-    buf_.fill(PaddingBytes(GetSize() + len, alignment));
-  }
-  template<typename T> void PreAlign(size_t len) {
-    AssertScalarT<T>();
-    PreAlign(len, sizeof(T));
-  }
-  /// @endcond
-
-  /// @brief Store a string in the buffer, which can contain any binary data.
-  /// @param[in] str A const char pointer to the data to be stored as a string.
-  /// @param[in] len The number of bytes that should be stored from `str`.
-  /// @return Returns the offset in the buffer where the string starts.
-  Offset<String> CreateString(const char *str, size_t len) {
-    NotNested();
-    PreAlign<uoffset_t>(len + 1);  // Always 0-terminated.
-    buf_.fill(1);
-    PushBytes(reinterpret_cast<const uint8_t *>(str), len);
-    PushElement(static_cast<uoffset_t>(len));
-    return Offset<String>(GetSize());
-  }
-
-  /// @brief Store a string in the buffer, which is null-terminated.
-  /// @param[in] str A const char pointer to a C-string to add to the buffer.
-  /// @return Returns the offset in the buffer where the string starts.
-  Offset<String> CreateString(const char *str) {
-    return CreateString(str, strlen(str));
-  }
-
-  /// @brief Store a string in the buffer, which is null-terminated.
-  /// @param[in] str A char pointer to a C-string to add to the buffer.
-  /// @return Returns the offset in the buffer where the string starts.
-  Offset<String> CreateString(char *str) {
-    return CreateString(str, strlen(str));
-  }
-
-  /// @brief Store a string in the buffer, which can contain any binary data.
-  /// @param[in] str A const reference to a std::string to store in the buffer.
-  /// @return Returns the offset in the buffer where the string starts.
-  Offset<String> CreateString(const std::string &str) {
-    return CreateString(str.c_str(), str.length());
-  }
-
-  // clang-format off
-  #ifdef FLATBUFFERS_HAS_STRING_VIEW
-  /// @brief Store a string in the buffer, which can contain any binary data.
-  /// @param[in] str A const string_view to copy in to the buffer.
-  /// @return Returns the offset in the buffer where the string starts.
-  Offset<String> CreateString(flatbuffers::string_view str) {
-    return CreateString(str.data(), str.size());
-  }
-  #endif // FLATBUFFERS_HAS_STRING_VIEW
-  // clang-format on
-
-  /// @brief Store a string in the buffer, which can contain any binary data.
-  /// @param[in] str A const pointer to a `String` struct to add to the buffer.
-  /// @return Returns the offset in the buffer where the string starts
-  Offset<String> CreateString(const String *str) {
-    return str ? CreateString(str->c_str(), str->size()) : 0;
-  }
-
-  /// @brief Store a string in the buffer, which can contain any binary data.
-  /// @param[in] str A const reference to a std::string like type with support
-  /// of T::c_str() and T::length() to store in the buffer.
-  /// @return Returns the offset in the buffer where the string starts.
-  template<typename T> Offset<String> CreateString(const T &str) {
-    return CreateString(str.c_str(), str.length());
-  }
-
-  /// @brief Store a string in the buffer, which can contain any binary data.
-  /// If a string with this exact contents has already been serialized before,
-  /// instead simply returns the offset of the existing string.
-  /// @param[in] str A const char pointer to the data to be stored as a string.
-  /// @param[in] len The number of bytes that should be stored from `str`.
-  /// @return Returns the offset in the buffer where the string starts.
-  Offset<String> CreateSharedString(const char *str, size_t len) {
-    if (!string_pool)
-      string_pool = new StringOffsetMap(StringOffsetCompare(buf_));
-    auto size_before_string = buf_.size();
-    // Must first serialize the string, since the set is all offsets into
-    // buffer.
-    auto off = CreateString(str, len);
-    auto it = string_pool->find(off);
-    // If it exists we reuse existing serialized data!
-    if (it != string_pool->end()) {
-      // We can remove the string we serialized.
-      buf_.pop(buf_.size() - size_before_string);
-      return *it;
-    }
-    // Record this string for future use.
-    string_pool->insert(off);
-    return off;
-  }
-
-  /// @brief Store a string in the buffer, which null-terminated.
-  /// If a string with this exact contents has already been serialized before,
-  /// instead simply returns the offset of the existing string.
-  /// @param[in] str A const char pointer to a C-string to add to the buffer.
-  /// @return Returns the offset in the buffer where the string starts.
-  Offset<String> CreateSharedString(const char *str) {
-    return CreateSharedString(str, strlen(str));
-  }
-
-  /// @brief Store a string in the buffer, which can contain any binary data.
-  /// If a string with this exact contents has already been serialized before,
-  /// instead simply returns the offset of the existing string.
-  /// @param[in] str A const reference to a std::string to store in the buffer.
-  /// @return Returns the offset in the buffer where the string starts.
-  Offset<String> CreateSharedString(const std::string &str) {
-    return CreateSharedString(str.c_str(), str.length());
-  }
-
-  /// @brief Store a string in the buffer, which can contain any binary data.
-  /// If a string with this exact contents has already been serialized before,
-  /// instead simply returns the offset of the existing string.
-  /// @param[in] str A const pointer to a `String` struct to add to the buffer.
-  /// @return Returns the offset in the buffer where the string starts
-  Offset<String> CreateSharedString(const String *str) {
-    return CreateSharedString(str->c_str(), str->size());
-  }
-
-  /// @cond FLATBUFFERS_INTERNAL
-  uoffset_t EndVector(size_t len) {
-    FLATBUFFERS_ASSERT(nested);  // Hit if no corresponding StartVector.
-    nested = false;
-    return PushElement(static_cast<uoffset_t>(len));
-  }
-
-  void StartVector(size_t len, size_t elemsize) {
-    NotNested();
-    nested = true;
-    PreAlign<uoffset_t>(len * elemsize);
-    PreAlign(len * elemsize, elemsize);  // Just in case elemsize > uoffset_t.
-  }
-
-  // Call this right before StartVector/CreateVector if you want to force the
-  // alignment to be something different than what the element size would
-  // normally dictate.
-  // This is useful when storing a nested_flatbuffer in a vector of bytes,
-  // or when storing SIMD floats, etc.
-  void ForceVectorAlignment(size_t len, size_t elemsize, size_t alignment) {
-    PreAlign(len * elemsize, alignment);
-  }
-
-  // Similar to ForceVectorAlignment but for String fields.
-  void ForceStringAlignment(size_t len, size_t alignment) {
-    PreAlign((len + 1) * sizeof(char), alignment);
-  }
-
-  /// @endcond
-
-  /// @brief Serialize an array into a FlatBuffer `vector`.
-  /// @tparam T The data type of the array elements.
-  /// @param[in] v A pointer to the array of type `T` to serialize into the
-  /// buffer as a `vector`.
-  /// @param[in] len The number of elements to serialize.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  template<typename T> Offset<Vector<T>> CreateVector(const T *v, size_t len) {
-    // If this assert hits, you're specifying a template argument that is
-    // causing the wrong overload to be selected, remove it.
-    AssertScalarT<T>();
-    StartVector(len, sizeof(T));
-    if (len == 0) {
-      return Offset<Vector<T>>(EndVector(len));
-    }
-    // clang-format off
-    #if FLATBUFFERS_LITTLEENDIAN
-      PushBytes(reinterpret_cast<const uint8_t *>(v), len * sizeof(T));
-    #else
-      if (sizeof(T) == 1) {
-        PushBytes(reinterpret_cast<const uint8_t *>(v), len);
-      } else {
-        for (auto i = len; i > 0; ) {
-          PushElement(v[--i]);
-        }
-      }
-    #endif
-    // clang-format on
-    return Offset<Vector<T>>(EndVector(len));
-  }
-
-  template<typename T>
-  Offset<Vector<Offset<T>>> CreateVector(const Offset<T> *v, size_t len) {
-    StartVector(len, sizeof(Offset<T>));
-    for (auto i = len; i > 0;) { PushElement(v[--i]); }
-    return Offset<Vector<Offset<T>>>(EndVector(len));
-  }
-
-  /// @brief Serialize a `std::vector` into a FlatBuffer `vector`.
-  /// @tparam T The data type of the `std::vector` elements.
-  /// @param v A const reference to the `std::vector` to serialize into the
-  /// buffer as a `vector`.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  template<typename T> Offset<Vector<T>> CreateVector(const std::vector<T> &v) {
-    return CreateVector(data(v), v.size());
-  }
-
-  // vector<bool> may be implemented using a bit-set, so we can't access it as
-  // an array. Instead, read elements manually.
-  // Background: https://isocpp.org/blog/2012/11/on-vectorbool
-  Offset<Vector<uint8_t>> CreateVector(const std::vector<bool> &v) {
-    StartVector(v.size(), sizeof(uint8_t));
-    for (auto i = v.size(); i > 0;) {
-      PushElement(static_cast<uint8_t>(v[--i]));
-    }
-    return Offset<Vector<uint8_t>>(EndVector(v.size()));
-  }
-
-  // clang-format off
-  #ifndef FLATBUFFERS_CPP98_STL
-  /// @brief Serialize values returned by a function into a FlatBuffer `vector`.
-  /// This is a convenience function that takes care of iteration for you.
-  /// @tparam T The data type of the `std::vector` elements.
-  /// @param f A function that takes the current iteration 0..vector_size-1 and
-  /// returns any type that you can construct a FlatBuffers vector out of.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  template<typename T> Offset<Vector<T>> CreateVector(size_t vector_size,
-      const std::function<T (size_t i)> &f) {
-    std::vector<T> elems(vector_size);
-    for (size_t i = 0; i < vector_size; i++) elems[i] = f(i);
-    return CreateVector(elems);
-  }
-  #endif
-  // clang-format on
-
-  /// @brief Serialize values returned by a function into a FlatBuffer `vector`.
-  /// This is a convenience function that takes care of iteration for you.
-  /// @tparam T The data type of the `std::vector` elements.
-  /// @param f A function that takes the current iteration 0..vector_size-1,
-  /// and the state parameter returning any type that you can construct a
-  /// FlatBuffers vector out of.
-  /// @param state State passed to f.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  template<typename T, typename F, typename S>
-  Offset<Vector<T>> CreateVector(size_t vector_size, F f, S *state) {
-    std::vector<T> elems(vector_size);
-    for (size_t i = 0; i < vector_size; i++) elems[i] = f(i, state);
-    return CreateVector(elems);
-  }
-
-  /// @brief Serialize a `std::vector<std::string>` into a FlatBuffer `vector`.
-  /// This is a convenience function for a common case.
-  /// @param v A const reference to the `std::vector` to serialize into the
-  /// buffer as a `vector`.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  Offset<Vector<Offset<String>>> CreateVectorOfStrings(
-      const std::vector<std::string> &v) {
-    std::vector<Offset<String>> offsets(v.size());
-    for (size_t i = 0; i < v.size(); i++) offsets[i] = CreateString(v[i]);
-    return CreateVector(offsets);
-  }
-
-  /// @brief Serialize an array of structs into a FlatBuffer `vector`.
-  /// @tparam T The data type of the struct array elements.
-  /// @param[in] v A pointer to the array of type `T` to serialize into the
-  /// buffer as a `vector`.
-  /// @param[in] len The number of elements to serialize.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  template<typename T>
-  Offset<Vector<const T *>> CreateVectorOfStructs(const T *v, size_t len) {
-    StartVector(len * sizeof(T) / AlignOf<T>(), AlignOf<T>());
-    PushBytes(reinterpret_cast<const uint8_t *>(v), sizeof(T) * len);
-    return Offset<Vector<const T *>>(EndVector(len));
-  }
-
-  /// @brief Serialize an array of native structs into a FlatBuffer `vector`.
-  /// @tparam T The data type of the struct array elements.
-  /// @tparam S The data type of the native struct array elements.
-  /// @param[in] v A pointer to the array of type `S` to serialize into the
-  /// buffer as a `vector`.
-  /// @param[in] len The number of elements to serialize.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  template<typename T, typename S>
-  Offset<Vector<const T *>> CreateVectorOfNativeStructs(const S *v,
-                                                        size_t len) {
-    extern T Pack(const S &);
-    std::vector<T> vv(len);
-    std::transform(v, v + len, vv.begin(), Pack);
-    return CreateVectorOfStructs<T>(data(vv), vv.size());
-  }
-
-  // clang-format off
-  #ifndef FLATBUFFERS_CPP98_STL
-  /// @brief Serialize an array of structs into a FlatBuffer `vector`.
-  /// @tparam T The data type of the struct array elements.
-  /// @param[in] filler A function that takes the current iteration 0..vector_size-1
-  /// and a pointer to the struct that must be filled.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  /// This is mostly useful when flatbuffers are generated with mutation
-  /// accessors.
-  template<typename T> Offset<Vector<const T *>> CreateVectorOfStructs(
-      size_t vector_size, const std::function<void(size_t i, T *)> &filler) {
-    T* structs = StartVectorOfStructs<T>(vector_size);
-    for (size_t i = 0; i < vector_size; i++) {
-      filler(i, structs);
-      structs++;
-    }
-    return EndVectorOfStructs<T>(vector_size);
-  }
-  #endif
-  // clang-format on
-
-  /// @brief Serialize an array of structs into a FlatBuffer `vector`.
-  /// @tparam T The data type of the struct array elements.
-  /// @param[in] f A function that takes the current iteration 0..vector_size-1,
-  /// a pointer to the struct that must be filled and the state argument.
-  /// @param[in] state Arbitrary state to pass to f.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  /// This is mostly useful when flatbuffers are generated with mutation
-  /// accessors.
-  template<typename T, typename F, typename S>
-  Offset<Vector<const T *>> CreateVectorOfStructs(size_t vector_size, F f,
-                                                  S *state) {
-    T *structs = StartVectorOfStructs<T>(vector_size);
-    for (size_t i = 0; i < vector_size; i++) {
-      f(i, structs, state);
-      structs++;
-    }
-    return EndVectorOfStructs<T>(vector_size);
-  }
-
-  /// @brief Serialize a `std::vector` of structs into a FlatBuffer `vector`.
-  /// @tparam T The data type of the `std::vector` struct elements.
-  /// @param[in] v A const reference to the `std::vector` of structs to
-  /// serialize into the buffer as a `vector`.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  template<typename T, typename Alloc>
-  Offset<Vector<const T *>> CreateVectorOfStructs(
-      const std::vector<T, Alloc> &v) {
-    return CreateVectorOfStructs(data(v), v.size());
-  }
-
-  /// @brief Serialize a `std::vector` of native structs into a FlatBuffer
-  /// `vector`.
-  /// @tparam T The data type of the `std::vector` struct elements.
-  /// @tparam S The data type of the `std::vector` native struct elements.
-  /// @param[in] v A const reference to the `std::vector` of structs to
-  /// serialize into the buffer as a `vector`.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  template<typename T, typename S>
-  Offset<Vector<const T *>> CreateVectorOfNativeStructs(
-      const std::vector<S> &v) {
-    return CreateVectorOfNativeStructs<T, S>(data(v), v.size());
-  }
-
-  /// @cond FLATBUFFERS_INTERNAL
-  template<typename T> struct StructKeyComparator {
-    bool operator()(const T &a, const T &b) const {
-      return a.KeyCompareLessThan(&b);
-    }
-
-    FLATBUFFERS_DELETE_FUNC(
-        StructKeyComparator &operator=(const StructKeyComparator &))
-  };
-  /// @endcond
-
-  /// @brief Serialize a `std::vector` of structs into a FlatBuffer `vector`
-  /// in sorted order.
-  /// @tparam T The data type of the `std::vector` struct elements.
-  /// @param[in] v A const reference to the `std::vector` of structs to
-  /// serialize into the buffer as a `vector`.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  template<typename T>
-  Offset<Vector<const T *>> CreateVectorOfSortedStructs(std::vector<T> *v) {
-    return CreateVectorOfSortedStructs(data(*v), v->size());
-  }
-
-  /// @brief Serialize a `std::vector` of native structs into a FlatBuffer
-  /// `vector` in sorted order.
-  /// @tparam T The data type of the `std::vector` struct elements.
-  /// @tparam S The data type of the `std::vector` native struct elements.
-  /// @param[in] v A const reference to the `std::vector` of structs to
-  /// serialize into the buffer as a `vector`.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  template<typename T, typename S>
-  Offset<Vector<const T *>> CreateVectorOfSortedNativeStructs(
-      std::vector<S> *v) {
-    return CreateVectorOfSortedNativeStructs<T, S>(data(*v), v->size());
-  }
-
-  /// @brief Serialize an array of structs into a FlatBuffer `vector` in sorted
-  /// order.
-  /// @tparam T The data type of the struct array elements.
-  /// @param[in] v A pointer to the array of type `T` to serialize into the
-  /// buffer as a `vector`.
-  /// @param[in] len The number of elements to serialize.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  template<typename T>
-  Offset<Vector<const T *>> CreateVectorOfSortedStructs(T *v, size_t len) {
-    std::sort(v, v + len, StructKeyComparator<T>());
-    return CreateVectorOfStructs(v, len);
-  }
-
-  /// @brief Serialize an array of native structs into a FlatBuffer `vector` in
-  /// sorted order.
-  /// @tparam T The data type of the struct array elements.
-  /// @tparam S The data type of the native struct array elements.
-  /// @param[in] v A pointer to the array of type `S` to serialize into the
-  /// buffer as a `vector`.
-  /// @param[in] len The number of elements to serialize.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  template<typename T, typename S>
-  Offset<Vector<const T *>> CreateVectorOfSortedNativeStructs(S *v,
-                                                              size_t len) {
-    extern T Pack(const S &);
-    typedef T (*Pack_t)(const S &);
-    std::vector<T> vv(len);
-    std::transform(v, v + len, vv.begin(), static_cast<Pack_t &>(Pack));
-    return CreateVectorOfSortedStructs<T>(vv, len);
-  }
-
-  /// @cond FLATBUFFERS_INTERNAL
-  template<typename T> struct TableKeyComparator {
-    TableKeyComparator(vector_downward &buf) : buf_(buf) {}
-    TableKeyComparator(const TableKeyComparator &other) : buf_(other.buf_) {}
-    bool operator()(const Offset<T> &a, const Offset<T> &b) const {
-      auto table_a = reinterpret_cast<T *>(buf_.data_at(a.o));
-      auto table_b = reinterpret_cast<T *>(buf_.data_at(b.o));
-      return table_a->KeyCompareLessThan(table_b);
-    }
-    vector_downward &buf_;
-
-   private:
-    FLATBUFFERS_DELETE_FUNC(TableKeyComparator &operator=(const TableKeyComparator &other))
-  };
-  /// @endcond
-
-  /// @brief Serialize an array of `table` offsets as a `vector` in the buffer
-  /// in sorted order.
-  /// @tparam T The data type that the offset refers to.
-  /// @param[in] v An array of type `Offset<T>` that contains the `table`
-  /// offsets to store in the buffer in sorted order.
-  /// @param[in] len The number of elements to store in the `vector`.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  template<typename T>
-  Offset<Vector<Offset<T>>> CreateVectorOfSortedTables(Offset<T> *v,
-                                                       size_t len) {
-    std::sort(v, v + len, TableKeyComparator<T>(buf_));
-    return CreateVector(v, len);
-  }
-
-  /// @brief Serialize an array of `table` offsets as a `vector` in the buffer
-  /// in sorted order.
-  /// @tparam T The data type that the offset refers to.
-  /// @param[in] v An array of type `Offset<T>` that contains the `table`
-  /// offsets to store in the buffer in sorted order.
-  /// @return Returns a typed `Offset` into the serialized data indicating
-  /// where the vector is stored.
-  template<typename T>
-  Offset<Vector<Offset<T>>> CreateVectorOfSortedTables(
-      std::vector<Offset<T>> *v) {
-    return CreateVectorOfSortedTables(data(*v), v->size());
-  }
-
-  /// @brief Specialized version of `CreateVector` for non-copying use cases.
-  /// Write the data any time later to the returned buffer pointer `buf`.
-  /// @param[in] len The number of elements to store in the `vector`.
-  /// @param[in] elemsize The size of each element in the `vector`.
-  /// @param[out] buf A pointer to a `uint8_t` pointer that can be
-  /// written to at a later time to serialize the data into a `vector`
-  /// in the buffer.
-  uoffset_t CreateUninitializedVector(size_t len, size_t elemsize,
-                                      uint8_t **buf) {
-    NotNested();
-    StartVector(len, elemsize);
-    buf_.make_space(len * elemsize);
-    auto vec_start = GetSize();
-    auto vec_end = EndVector(len);
-    *buf = buf_.data_at(vec_start);
-    return vec_end;
-  }
-
-  /// @brief Specialized version of `CreateVector` for non-copying use cases.
-  /// Write the data any time later to the returned buffer pointer `buf`.
-  /// @tparam T The data type of the data that will be stored in the buffer
-  /// as a `vector`.
-  /// @param[in] len The number of elements to store in the `vector`.
-  /// @param[out] buf A pointer to a pointer of type `T` that can be
-  /// written to at a later time to serialize the data into a `vector`
-  /// in the buffer.
-  template<typename T>
-  Offset<Vector<T>> CreateUninitializedVector(size_t len, T **buf) {
-    AssertScalarT<T>();
-    return CreateUninitializedVector(len, sizeof(T),
-                                     reinterpret_cast<uint8_t **>(buf));
-  }
-
-  template<typename T>
-  Offset<Vector<const T *>> CreateUninitializedVectorOfStructs(size_t len,
-                                                               T **buf) {
-    return CreateUninitializedVector(len, sizeof(T),
-                                     reinterpret_cast<uint8_t **>(buf));
-  }
-
-  // @brief Create a vector of scalar type T given as input a vector of scalar
-  // type U, useful with e.g. pre "enum class" enums, or any existing scalar
-  // data of the wrong type.
-  template<typename T, typename U>
-  Offset<Vector<T>> CreateVectorScalarCast(const U *v, size_t len) {
-    AssertScalarT<T>();
-    AssertScalarT<U>();
-    StartVector(len, sizeof(T));
-    for (auto i = len; i > 0;) { PushElement(static_cast<T>(v[--i])); }
-    return Offset<Vector<T>>(EndVector(len));
-  }
-
-  /// @brief Write a struct by itself, typically to be part of a union.
-  template<typename T> Offset<const T *> CreateStruct(const T &structobj) {
-    NotNested();
-    Align(AlignOf<T>());
-    buf_.push_small(structobj);
-    return Offset<const T *>(GetSize());
-  }
-
-  /// @brief The length of a FlatBuffer file header.
-  static const size_t kFileIdentifierLength = 4;
-
-  /// @brief Finish serializing a buffer by writing the root offset.
-  /// @param[in] file_identifier If a `file_identifier` is given, the buffer
-  /// will be prefixed with a standard FlatBuffers file header.
-  template<typename T>
-  void Finish(Offset<T> root, const char *file_identifier = nullptr) {
-    Finish(root.o, file_identifier, false);
-  }
-
-  /// @brief Finish a buffer with a 32 bit size field pre-fixed (size of the
-  /// buffer following the size field). These buffers are NOT compatible
-  /// with standard buffers created by Finish, i.e. you can't call GetRoot
-  /// on them, you have to use GetSizePrefixedRoot instead.
-  /// All >32 bit quantities in this buffer will be aligned when the whole
-  /// size pre-fixed buffer is aligned.
-  /// These kinds of buffers are useful for creating a stream of FlatBuffers.
-  template<typename T>
-  void FinishSizePrefixed(Offset<T> root,
-                          const char *file_identifier = nullptr) {
-    Finish(root.o, file_identifier, true);
-  }
-
-  void SwapBufAllocator(FlatBufferBuilder &other) {
-    buf_.swap_allocator(other.buf_);
-  }
-
- protected:
-  // You shouldn't really be copying instances of this class.
-  FlatBufferBuilder(const FlatBufferBuilder &);
-  FlatBufferBuilder &operator=(const FlatBufferBuilder &);
-
-  void Finish(uoffset_t root, const char *file_identifier, bool size_prefix) {
-    NotNested();
-    buf_.clear_scratch();
-    // This will cause the whole buffer to be aligned.
-    PreAlign((size_prefix ? sizeof(uoffset_t) : 0) + sizeof(uoffset_t) +
-                 (file_identifier ? kFileIdentifierLength : 0),
-             minalign_);
-    if (file_identifier) {
-      FLATBUFFERS_ASSERT(strlen(file_identifier) == kFileIdentifierLength);
-      PushBytes(reinterpret_cast<const uint8_t *>(file_identifier),
-                kFileIdentifierLength);
-    }
-    PushElement(ReferTo(root));  // Location of root.
-    if (size_prefix) { PushElement(GetSize()); }
-    finished = true;
-  }
-
-  struct FieldLoc {
-    uoffset_t off;
-    voffset_t id;
-  };
-
-  vector_downward buf_;
-
-  // Accumulating offsets of table members while it is being built.
-  // We store these in the scratch pad of buf_, after the vtable offsets.
-  uoffset_t num_field_loc;
-  // Track how much of the vtable is in use, so we can output the most compact
-  // possible vtable.
-  voffset_t max_voffset_;
-
-  // Ensure objects are not nested.
-  bool nested;
-
-  // Ensure the buffer is finished before it is being accessed.
-  bool finished;
-
-  size_t minalign_;
-
-  bool force_defaults_;  // Serialize values equal to their defaults anyway.
-
-  bool dedup_vtables_;
-
-  struct StringOffsetCompare {
-    StringOffsetCompare(const vector_downward &buf) : buf_(&buf) {}
-    bool operator()(const Offset<String> &a, const Offset<String> &b) const {
-      auto stra = reinterpret_cast<const String *>(buf_->data_at(a.o));
-      auto strb = reinterpret_cast<const String *>(buf_->data_at(b.o));
-      return StringLessThan(stra->data(), stra->size(), strb->data(),
-                            strb->size());
-    }
-    const vector_downward *buf_;
-  };
-
-  // For use with CreateSharedString. Instantiated on first use only.
-  typedef std::set<Offset<String>, StringOffsetCompare> StringOffsetMap;
-  StringOffsetMap *string_pool;
-
- private:
-  // Allocates space for a vector of structures.
-  // Must be completed with EndVectorOfStructs().
-  template<typename T> T *StartVectorOfStructs(size_t vector_size) {
-    StartVector(vector_size * sizeof(T) / AlignOf<T>(), AlignOf<T>());
-    return reinterpret_cast<T *>(buf_.make_space(vector_size * sizeof(T)));
-  }
-
-  // End the vector of structues in the flatbuffers.
-  // Vector should have previously be started with StartVectorOfStructs().
-  template<typename T>
-  Offset<Vector<const T *>> EndVectorOfStructs(size_t vector_size) {
-    return Offset<Vector<const T *>>(EndVector(vector_size));
-  }
-};
-/// @}
-
-/// @cond FLATBUFFERS_INTERNAL
-// Helpers to get a typed pointer to the root object contained in the buffer.
-template<typename T> T *GetMutableRoot(void *buf) {
-  EndianCheck();
-  return reinterpret_cast<T *>(
-      reinterpret_cast<uint8_t *>(buf) +
-      EndianScalar(*reinterpret_cast<uoffset_t *>(buf)));
-}
-
-template<typename T> const T *GetRoot(const void *buf) {
-  return GetMutableRoot<T>(const_cast<void *>(buf));
-}
-
-template<typename T> const T *GetSizePrefixedRoot(const void *buf) {
-  return GetRoot<T>(reinterpret_cast<const uint8_t *>(buf) + sizeof(uoffset_t));
-}
-
-/// Helpers to get a typed pointer to objects that are currently being built.
-/// @warning Creating new objects will lead to reallocations and invalidates
-/// the pointer!
-template<typename T>
-T *GetMutableTemporaryPointer(FlatBufferBuilder &fbb, Offset<T> offset) {
-  return reinterpret_cast<T *>(fbb.GetCurrentBufferPointer() + fbb.GetSize() -
-                               offset.o);
-}
-
-template<typename T>
-const T *GetTemporaryPointer(FlatBufferBuilder &fbb, Offset<T> offset) {
-  return GetMutableTemporaryPointer<T>(fbb, offset);
-}
-
-/// @brief Get a pointer to the the file_identifier section of the buffer.
-/// @return Returns a const char pointer to the start of the file_identifier
-/// characters in the buffer.  The returned char * has length
-/// 'flatbuffers::FlatBufferBuilder::kFileIdentifierLength'.
-/// This function is UNDEFINED for FlatBuffers whose schema does not include
-/// a file_identifier (likely points at padding or the start of a the root
-/// vtable).
-inline const char *GetBufferIdentifier(const void *buf,
-                                       bool size_prefixed = false) {
-  return reinterpret_cast<const char *>(buf) +
-         ((size_prefixed) ? 2 * sizeof(uoffset_t) : sizeof(uoffset_t));
-}
-
-// Helper to see if the identifier in a buffer has the expected value.
-inline bool BufferHasIdentifier(const void *buf, const char *identifier,
-                                bool size_prefixed = false) {
-  return strncmp(GetBufferIdentifier(buf, size_prefixed), identifier,
-                 FlatBufferBuilder::kFileIdentifierLength) == 0;
-}
-
-// Helper class to verify the integrity of a FlatBuffer
-class Verifier FLATBUFFERS_FINAL_CLASS {
- public:
-  Verifier(const uint8_t *buf, size_t buf_len, uoffset_t _max_depth = 64,
-           uoffset_t _max_tables = 1000000, bool _check_alignment = true)
-      : buf_(buf),
-        size_(buf_len),
-        depth_(0),
-        max_depth_(_max_depth),
-        num_tables_(0),
-        max_tables_(_max_tables),
-        upper_bound_(0),
-        check_alignment_(_check_alignment) {
-    FLATBUFFERS_ASSERT(size_ < FLATBUFFERS_MAX_BUFFER_SIZE);
-  }
-
-  // Central location where any verification failures register.
-  bool Check(bool ok) const {
-    // clang-format off
-    #ifdef FLATBUFFERS_DEBUG_VERIFICATION_FAILURE
-      FLATBUFFERS_ASSERT(ok);
-    #endif
-    #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE
-      if (!ok)
-        upper_bound_ = 0;
-    #endif
-    // clang-format on
-    return ok;
-  }
-
-  // Verify any range within the buffer.
-  bool Verify(size_t elem, size_t elem_len) const {
-    // clang-format off
-    #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE
-      auto upper_bound = elem + elem_len;
-      if (upper_bound_ < upper_bound)
-        upper_bound_ =  upper_bound;
-    #endif
-    // clang-format on
-    return Check(elem_len < size_ && elem <= size_ - elem_len);
-  }
-
-  template<typename T> bool VerifyAlignment(size_t elem) const {
-    return Check((elem & (sizeof(T) - 1)) == 0 || !check_alignment_);
-  }
-
-  // Verify a range indicated by sizeof(T).
-  template<typename T> bool Verify(size_t elem) const {
-    return VerifyAlignment<T>(elem) && Verify(elem, sizeof(T));
-  }
-
-  bool VerifyFromPointer(const uint8_t *p, size_t len) {
-    auto o = static_cast<size_t>(p - buf_);
-    return Verify(o, len);
-  }
-
-  // Verify relative to a known-good base pointer.
-  bool Verify(const uint8_t *base, voffset_t elem_off, size_t elem_len) const {
-    return Verify(static_cast<size_t>(base - buf_) + elem_off, elem_len);
-  }
-
-  template<typename T>
-  bool Verify(const uint8_t *base, voffset_t elem_off) const {
-    return Verify(static_cast<size_t>(base - buf_) + elem_off, sizeof(T));
-  }
-
-  // Verify a pointer (may be NULL) of a table type.
-  template<typename T> bool VerifyTable(const T *table) {
-    return !table || table->Verify(*this);
-  }
-
-  // Verify a pointer (may be NULL) of any vector type.
-  template<typename T> bool VerifyVector(const Vector<T> *vec) const {
-    return !vec || VerifyVectorOrString(reinterpret_cast<const uint8_t *>(vec),
-                                        sizeof(T));
-  }
-
-  // Verify a pointer (may be NULL) of a vector to struct.
-  template<typename T> bool VerifyVector(const Vector<const T *> *vec) const {
-    return VerifyVector(reinterpret_cast<const Vector<T> *>(vec));
-  }
-
-  // Verify a pointer (may be NULL) to string.
-  bool VerifyString(const String *str) const {
-    size_t end;
-    return !str || (VerifyVectorOrString(reinterpret_cast<const uint8_t *>(str),
-                                         1, &end) &&
-                    Verify(end, 1) &&           // Must have terminator
-                    Check(buf_[end] == '\0'));  // Terminating byte must be 0.
-  }
-
-  // Common code between vectors and strings.
-  bool VerifyVectorOrString(const uint8_t *vec, size_t elem_size,
-                            size_t *end = nullptr) const {
-    auto veco = static_cast<size_t>(vec - buf_);
-    // Check we can read the size field.
-    if (!Verify<uoffset_t>(veco)) return false;
-    // Check the whole array. If this is a string, the byte past the array
-    // must be 0.
-    auto size = ReadScalar<uoffset_t>(vec);
-    auto max_elems = FLATBUFFERS_MAX_BUFFER_SIZE / elem_size;
-    if (!Check(size < max_elems))
-      return false;  // Protect against byte_size overflowing.
-    auto byte_size = sizeof(size) + elem_size * size;
-    if (end) *end = veco + byte_size;
-    return Verify(veco, byte_size);
-  }
-
-  // Special case for string contents, after the above has been called.
-  bool VerifyVectorOfStrings(const Vector<Offset<String>> *vec) const {
-    if (vec) {
-      for (uoffset_t i = 0; i < vec->size(); i++) {
-        if (!VerifyString(vec->Get(i))) return false;
-      }
-    }
-    return true;
-  }
-
-  // Special case for table contents, after the above has been called.
-  template<typename T> bool VerifyVectorOfTables(const Vector<Offset<T>> *vec) {
-    if (vec) {
-      for (uoffset_t i = 0; i < vec->size(); i++) {
-        if (!vec->Get(i)->Verify(*this)) return false;
-      }
-    }
-    return true;
-  }
-
-  __supress_ubsan__("unsigned-integer-overflow") bool VerifyTableStart(
-      const uint8_t *table) {
-    // Check the vtable offset.
-    auto tableo = static_cast<size_t>(table - buf_);
-    if (!Verify<soffset_t>(tableo)) return false;
-    // This offset may be signed, but doing the subtraction unsigned always
-    // gives the result we want.
-    auto vtableo = tableo - static_cast<size_t>(ReadScalar<soffset_t>(table));
-    // Check the vtable size field, then check vtable fits in its entirety.
-    return VerifyComplexity() && Verify<voffset_t>(vtableo) &&
-           VerifyAlignment<voffset_t>(ReadScalar<voffset_t>(buf_ + vtableo)) &&
-           Verify(vtableo, ReadScalar<voffset_t>(buf_ + vtableo));
-  }
-
-  template<typename T>
-  bool VerifyBufferFromStart(const char *identifier, size_t start) {
-    if (identifier && !Check((size_ >= 2 * sizeof(flatbuffers::uoffset_t) &&
-                              BufferHasIdentifier(buf_ + start, identifier)))) {
-      return false;
-    }
-
-    // Call T::Verify, which must be in the generated code for this type.
-    auto o = VerifyOffset(start);
-    return o && reinterpret_cast<const T *>(buf_ + start + o)->Verify(*this)
-    // clang-format off
-    #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE
-           && GetComputedSize()
-    #endif
-        ;
-    // clang-format on
-  }
-
-  // Verify this whole buffer, starting with root type T.
-  template<typename T> bool VerifyBuffer() { return VerifyBuffer<T>(nullptr); }
-
-  template<typename T> bool VerifyBuffer(const char *identifier) {
-    return VerifyBufferFromStart<T>(identifier, 0);
-  }
-
-  template<typename T> bool VerifySizePrefixedBuffer(const char *identifier) {
-    return Verify<uoffset_t>(0U) &&
-           ReadScalar<uoffset_t>(buf_) == size_ - sizeof(uoffset_t) &&
-           VerifyBufferFromStart<T>(identifier, sizeof(uoffset_t));
-  }
-
-  uoffset_t VerifyOffset(size_t start) const {
-    if (!Verify<uoffset_t>(start)) return 0;
-    auto o = ReadScalar<uoffset_t>(buf_ + start);
-    // May not point to itself.
-    if (!Check(o != 0)) return 0;
-    // Can't wrap around / buffers are max 2GB.
-    if (!Check(static_cast<soffset_t>(o) >= 0)) return 0;
-    // Must be inside the buffer to create a pointer from it (pointer outside
-    // buffer is UB).
-    if (!Verify(start + o, 1)) return 0;
-    return o;
-  }
-
-  uoffset_t VerifyOffset(const uint8_t *base, voffset_t start) const {
-    return VerifyOffset(static_cast<size_t>(base - buf_) + start);
-  }
-
-  // Called at the start of a table to increase counters measuring data
-  // structure depth and amount, and possibly bails out with false if
-  // limits set by the constructor have been hit. Needs to be balanced
-  // with EndTable().
-  bool VerifyComplexity() {
-    depth_++;
-    num_tables_++;
-    return Check(depth_ <= max_depth_ && num_tables_ <= max_tables_);
-  }
-
-  // Called at the end of a table to pop the depth count.
-  bool EndTable() {
-    depth_--;
-    return true;
-  }
-
-  // Returns the message size in bytes
-  size_t GetComputedSize() const {
-    // clang-format off
-    #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE
-      uintptr_t size = upper_bound_;
-      // Align the size to uoffset_t
-      size = (size - 1 + sizeof(uoffset_t)) & ~(sizeof(uoffset_t) - 1);
-      return (size > size_) ?  0 : size;
-    #else
-      // Must turn on FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE for this to work.
-      (void)upper_bound_;
-      FLATBUFFERS_ASSERT(false);
-      return 0;
-    #endif
-    // clang-format on
-  }
-
- private:
-  const uint8_t *buf_;
-  size_t size_;
-  uoffset_t depth_;
-  uoffset_t max_depth_;
-  uoffset_t num_tables_;
-  uoffset_t max_tables_;
-  mutable size_t upper_bound_;
-  bool check_alignment_;
-};
-
-// Convenient way to bundle a buffer and its length, to pass it around
-// typed by its root.
-// A BufferRef does not own its buffer.
-struct BufferRefBase {};  // for std::is_base_of
-template<typename T> struct BufferRef : BufferRefBase {
-  BufferRef() : buf(nullptr), len(0), must_free(false) {}
-  BufferRef(uint8_t *_buf, uoffset_t _len)
-      : buf(_buf), len(_len), must_free(false) {}
-
-  ~BufferRef() {
-    if (must_free) free(buf);
-  }
-
-  const T *GetRoot() const { return flatbuffers::GetRoot<T>(buf); }
-
-  bool Verify() {
-    Verifier verifier(buf, len);
-    return verifier.VerifyBuffer<T>(nullptr);
-  }
-
-  uint8_t *buf;
-  uoffset_t len;
-  bool must_free;
-};
-
-// "structs" are flat structures that do not have an offset table, thus
-// always have all members present and do not support forwards/backwards
-// compatible extensions.
-
-class Struct FLATBUFFERS_FINAL_CLASS {
- public:
-  template<typename T> T GetField(uoffset_t o) const {
-    return ReadScalar<T>(&data_[o]);
-  }
-
-  template<typename T> T GetStruct(uoffset_t o) const {
-    return reinterpret_cast<T>(&data_[o]);
-  }
-
-  const uint8_t *GetAddressOf(uoffset_t o) const { return &data_[o]; }
-  uint8_t *GetAddressOf(uoffset_t o) { return &data_[o]; }
-
- private:
-  // private constructor & copy constructor: you obtain instances of this
-  // class by pointing to existing data only
-  Struct();
-  Struct(const Struct &);
-  Struct &operator=(const Struct &);
-
-  uint8_t data_[1];
-};
-
-// "tables" use an offset table (possibly shared) that allows fields to be
-// omitted and added at will, but uses an extra indirection to read.
-class Table {
- public:
-  const uint8_t *GetVTable() const {
-    return data_ - ReadScalar<soffset_t>(data_);
-  }
-
-  // This gets the field offset for any of the functions below it, or 0
-  // if the field was not present.
-  voffset_t GetOptionalFieldOffset(voffset_t field) const {
-    // The vtable offset is always at the start.
-    auto vtable = GetVTable();
-    // The first element is the size of the vtable (fields + type id + itself).
-    auto vtsize = ReadScalar<voffset_t>(vtable);
-    // If the field we're accessing is outside the vtable, we're reading older
-    // data, so it's the same as if the offset was 0 (not present).
-    return field < vtsize ? ReadScalar<voffset_t>(vtable + field) : 0;
-  }
-
-  template<typename T> T GetField(voffset_t field, T defaultval) const {
-    auto field_offset = GetOptionalFieldOffset(field);
-    return field_offset ? ReadScalar<T>(data_ + field_offset) : defaultval;
-  }
-
-  template<typename P> P GetPointer(voffset_t field) {
-    auto field_offset = GetOptionalFieldOffset(field);
-    auto p = data_ + field_offset;
-    return field_offset ? reinterpret_cast<P>(p + ReadScalar<uoffset_t>(p))
-                        : nullptr;
-  }
-  template<typename P> P GetPointer(voffset_t field) const {
-    return const_cast<Table *>(this)->GetPointer<P>(field);
-  }
-
-  template<typename P> P GetStruct(voffset_t field) const {
-    auto field_offset = GetOptionalFieldOffset(field);
-    auto p = const_cast<uint8_t *>(data_ + field_offset);
-    return field_offset ? reinterpret_cast<P>(p) : nullptr;
-  }
-
-  template<typename Raw, typename Face>
-  flatbuffers::Optional<Face> GetOptional(voffset_t field) const {
-    auto field_offset = GetOptionalFieldOffset(field);
-    auto p = data_ + field_offset;
-    return field_offset ? Optional<Face>(static_cast<Face>(ReadScalar<Raw>(p)))
-                        : Optional<Face>();
-  }
-
-  template<typename T> bool SetField(voffset_t field, T val, T def) {
-    auto field_offset = GetOptionalFieldOffset(field);
-    if (!field_offset) return IsTheSameAs(val, def);
-    WriteScalar(data_ + field_offset, val);
-    return true;
-  }
-  template<typename T> bool SetField(voffset_t field, T val) {
-    auto field_offset = GetOptionalFieldOffset(field);
-    if (!field_offset) return false;
-    WriteScalar(data_ + field_offset, val);
-    return true;
-  }
-
-  bool SetPointer(voffset_t field, const uint8_t *val) {
-    auto field_offset = GetOptionalFieldOffset(field);
-    if (!field_offset) return false;
-    WriteScalar(data_ + field_offset,
-                static_cast<uoffset_t>(val - (data_ + field_offset)));
-    return true;
-  }
-
-  uint8_t *GetAddressOf(voffset_t field) {
-    auto field_offset = GetOptionalFieldOffset(field);
-    return field_offset ? data_ + field_offset : nullptr;
-  }
-  const uint8_t *GetAddressOf(voffset_t field) const {
-    return const_cast<Table *>(this)->GetAddressOf(field);
-  }
-
-  bool CheckField(voffset_t field) const {
-    return GetOptionalFieldOffset(field) != 0;
-  }
-
-  // Verify the vtable of this table.
-  // Call this once per table, followed by VerifyField once per field.
-  bool VerifyTableStart(Verifier &verifier) const {
-    return verifier.VerifyTableStart(data_);
-  }
-
-  // Verify a particular field.
-  template<typename T>
-  bool VerifyField(const Verifier &verifier, voffset_t field) const {
-    // Calling GetOptionalFieldOffset should be safe now thanks to
-    // VerifyTable().
-    auto field_offset = GetOptionalFieldOffset(field);
-    // Check the actual field.
-    return !field_offset || verifier.Verify<T>(data_, field_offset);
-  }
-
-  // VerifyField for required fields.
-  template<typename T>
-  bool VerifyFieldRequired(const Verifier &verifier, voffset_t field) const {
-    auto field_offset = GetOptionalFieldOffset(field);
-    return verifier.Check(field_offset != 0) &&
-           verifier.Verify<T>(data_, field_offset);
-  }
-
-  // Versions for offsets.
-  bool VerifyOffset(const Verifier &verifier, voffset_t field) const {
-    auto field_offset = GetOptionalFieldOffset(field);
-    return !field_offset || verifier.VerifyOffset(data_, field_offset);
-  }
-
-  bool VerifyOffsetRequired(const Verifier &verifier, voffset_t field) const {
-    auto field_offset = GetOptionalFieldOffset(field);
-    return verifier.Check(field_offset != 0) &&
-           verifier.VerifyOffset(data_, field_offset);
-  }
-
- private:
-  // private constructor & copy constructor: you obtain instances of this
-  // class by pointing to existing data only
-  Table();
-  Table(const Table &other);
-  Table &operator=(const Table &);
-
-  uint8_t data_[1];
-};
-
-// This specialization allows avoiding warnings like:
-// MSVC C4800: type: forcing value to bool 'true' or 'false'.
-template<>
-inline flatbuffers::Optional<bool> Table::GetOptional<uint8_t, bool>(
-    voffset_t field) const {
-  auto field_offset = GetOptionalFieldOffset(field);
-  auto p = data_ + field_offset;
-  return field_offset ? Optional<bool>(ReadScalar<uint8_t>(p) != 0)
-                      : Optional<bool>();
-}
-
-template<typename T>
-void FlatBufferBuilder::Required(Offset<T> table, voffset_t field) {
-  auto table_ptr = reinterpret_cast<const Table *>(buf_.data_at(table.o));
-  bool ok = table_ptr->GetOptionalFieldOffset(field) != 0;
-  // If this fails, the caller will show what field needs to be set.
-  FLATBUFFERS_ASSERT(ok);
-  (void)ok;
-}
+namespace flatbuffers {
 
 /// @brief This can compute the start of a FlatBuffer from a root pointer, i.e.
 /// it is the opposite transformation of GetRoot().
@@ -2598,7 +56,7 @@ inline const uint8_t *GetBufferStartFromRootPointer(const void *root) {
   // file_identifier, and alignment padding) to see which points to the root.
   // None of the other values can "impersonate" the root since they will either
   // be 0 or four ASCII characters.
-  static_assert(FlatBufferBuilder::kFileIdentifierLength == sizeof(uoffset_t),
+  static_assert(flatbuffers::kFileIdentifierLength == sizeof(uoffset_t),
                 "file_identifier is assumed to be the same size as uoffset_t");
   for (auto possible_roots = FLATBUFFERS_MAX_ALIGNMENT / sizeof(uoffset_t) + 1;
        possible_roots; possible_roots--) {
@@ -2634,16 +92,9 @@ struct NativeTable {};
 /// if you wish. The resolver does the opposite lookup, for when the object
 /// is being serialized again.
 typedef uint64_t hash_value_t;
-// clang-format off
-#ifdef FLATBUFFERS_CPP98_STL
-  typedef void (*resolver_function_t)(void **pointer_adr, hash_value_t hash);
-  typedef hash_value_t (*rehasher_function_t)(void *pointer);
-#else
-  typedef std::function<void (void **pointer_adr, hash_value_t hash)>
-          resolver_function_t;
-  typedef std::function<hash_value_t (void *pointer)> rehasher_function_t;
-#endif
-// clang-format on
+typedef std::function<void(void **pointer_adr, hash_value_t hash)>
+    resolver_function_t;
+typedef std::function<hash_value_t(void *pointer)> rehasher_function_t;
 
 // Helper function to test if a field is present, using any of the field
 // enums in the generated code.
@@ -2700,7 +151,7 @@ inline int LookupEnum(const char **names, const char *name) {
 
 // Minimal reflection via code generation.
 // Besides full-fat reflection (see reflection.h) and parsing/printing by
-// loading schemas (see idl.h), we can also have code generation for mimimal
+// loading schemas (see idl.h), we can also have code generation for minimal
 // reflection data which allows pretty-printing and other uses without needing
 // a schema or a parser.
 // Generate code with --reflect-types (types only) or --reflect-names (names
@@ -2745,10 +196,16 @@ inline const char * const *ElementaryTypeNames() {
 // clang-format on
 
 // Basic type info cost just 16bits per field!
+// We're explicitly defining the signedness since the signedness of integer
+// bitfields is otherwise implementation-defined and causes warnings on older
+// GCC compilers.
 struct TypeCode {
-  uint16_t base_type : 4;  // ElementaryType
-  uint16_t is_repeating : 1;  // Either vector (in table) or array (in struct)
-  int16_t sequence_ref : 11;  // Index into type_refs below, or -1 for none.
+  // ElementaryType
+  unsigned short base_type : 4;
+  // Either vector (in table) or array (in struct)
+  unsigned short is_repeating : 1;
+  // Index into type_refs below, or -1 for none.
+  signed short sequence_ref : 11;
 };
 
 static_assert(sizeof(TypeCode) == 2, "TypeCode");
@@ -2769,27 +226,13 @@ struct TypeTable {
 };
 
 // String which identifies the current version of FlatBuffers.
-// flatbuffer_version_string is used by Google developers to identify which
-// applications uploaded to Google Play are using this library.  This allows
-// the development team at Google to determine the popularity of the library.
-// How it works: Applications that are uploaded to the Google Play Store are
-// scanned for this version string.  We track which applications are using it
-// to measure popularity.  You are free to remove it (of course) but we would
-// appreciate if you left it in.
+inline const char *flatbuffers_version_string() {
+  return "FlatBuffers " FLATBUFFERS_STRING(FLATBUFFERS_VERSION_MAJOR) "."
+      FLATBUFFERS_STRING(FLATBUFFERS_VERSION_MINOR) "."
+      FLATBUFFERS_STRING(FLATBUFFERS_VERSION_REVISION);
+}
 
-// Weak linkage is culled by VS & doesn't work on cygwin.
 // clang-format off
-#if !defined(_WIN32) && !defined(__CYGWIN__)
-
-extern volatile __attribute__((weak)) const char *flatbuffer_version_string;
-volatile __attribute__((weak)) const char *flatbuffer_version_string =
-  "FlatBuffers "
-  FLATBUFFERS_STRING(FLATBUFFERS_VERSION_MAJOR) "."
-  FLATBUFFERS_STRING(FLATBUFFERS_VERSION_MINOR) "."
-  FLATBUFFERS_STRING(FLATBUFFERS_VERSION_REVISION);
-
-#endif  // !defined(_WIN32) && !defined(__CYGWIN__)
-
 #define FLATBUFFERS_DEFINE_BITMASK_OPERATORS(E, T)\
     inline E operator | (E lhs, E rhs){\
         return E(T(lhs) | T(rhs));\
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h
index b36d306..7930949 100644
--- a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h
+++ b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/flexbuffers.h
@@ -19,9 +19,9 @@
 
 #include <map>
 // Used to select STL variant.
-#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/base.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_base.h"
 // We use the basic binary writing functions from the regular FlatBuffers.
-#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/util.h"
+#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/fb_util.h"
 
 #ifdef _MSC_VER
 #  include <intrin.h>
@@ -53,7 +53,7 @@ enum Type {
   FBT_INT = 1,
   FBT_UINT = 2,
   FBT_FLOAT = 3,
-  // Types above stored inline, types below store an offset.
+  // Types above stored inline, types below (except FBT_BOOL) store an offset.
   FBT_KEY = 4,
   FBT_STRING = 5,
   FBT_INDIRECT_INT = 6,
@@ -81,6 +81,8 @@ enum Type {
   FBT_BOOL = 26,
   FBT_VECTOR_BOOL =
       36,  // To Allow the same type of conversion of type to vector type
+
+  FBT_MAX_TYPE = 37
 };
 
 inline bool IsInline(Type t) { return t <= FBT_FLOAT || t == FBT_BOOL; }
@@ -154,8 +156,10 @@ inline uint64_t ReadUInt64(const uint8_t *data, uint8_t byte_width) {
   // TODO: GCC apparently replaces memcpy by a rep movsb, but only if count is a
   // constant, which here it isn't. Test if memcpy is still faster than
   // the conditionals in ReadSizedScalar. Can also use inline asm.
+
   // clang-format off
-  #if defined(_MSC_VER) && (defined(_M_X64) || defined _M_IX86)
+  #if defined(_MSC_VER) && defined(_M_X64) && !defined(_M_ARM64EC)
+  // This is 64-bit Windows only, __movsb does not work on 32-bit Windows.
     uint64_t u = 0;
     __movsb(reinterpret_cast<uint8_t *>(&u),
             reinterpret_cast<const uint8_t *>(data), byte_width);
@@ -319,8 +323,8 @@ class FixedTypedVector : public Object {
     return data_ == FixedTypedVector::EmptyFixedTypedVector().data_;
   }
 
-  Type ElementType() { return type_; }
-  uint8_t size() { return len_; }
+  Type ElementType() const { return type_; }
+  uint8_t size() const { return len_; }
 
  private:
   Type type_;
@@ -368,10 +372,7 @@ void AppendToString(std::string &s, T &&v, bool keys_quoted) {
 class Reference {
  public:
   Reference()
-      : data_(nullptr),
-        parent_width_(0),
-        byte_width_(BIT_WIDTH_8),
-        type_(FBT_NULL) {}
+      : data_(nullptr), parent_width_(0), byte_width_(0), type_(FBT_NULL) {}
 
   Reference(const uint8_t *data, uint8_t parent_width, uint8_t byte_width,
             Type type)
@@ -494,12 +495,17 @@ class Reference {
         case FBT_NULL: return 0.0;
         case FBT_STRING: {
 #if 1
+#if !defined( _MSC_VER)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wnull-dereference"
-          // TODO(b/173239141): Patched via micro/tools/make/flexbuffers_download.sh
+#endif
+          // See b/173239141 for additional context. Patched via
+          // micro/tools/make/flexbuffers_download.sh
           // Introduce a segfault for an unsupported code path for TFLM.
           return *(static_cast<double*>(nullptr));
+#if !defined( _MSC_VER)
 #pragma GCC diagnostic pop
+#endif
 #else
           // This is the original code
           double d;
@@ -582,7 +588,23 @@ class Reference {
       auto keys = m.Keys();
       auto vals = m.Values();
       for (size_t i = 0; i < keys.size(); i++) {
-        keys[i].ToString(true, keys_quoted, s);
+        bool kq = keys_quoted;
+        if (!kq) {
+          // FlexBuffers keys may contain arbitrary characters, only allow
+          // unquoted if it looks like an "identifier":
+          const char *p = keys[i].AsKey();
+          if (!flatbuffers::is_alpha(*p) && *p != '_') {
+            kq = true;
+          } else {
+            while (*++p) {
+              if (!flatbuffers::is_alnum(*p) && *p != '_') {
+                kq = true;
+                break;
+              }
+            }
+          }
+        }
+        keys[i].ToString(true, kq, s);
         s += ": ";
         vals[i].ToString(true, keys_quoted, s);
         if (i < keys.size() - 1) s += ", ";
@@ -766,6 +788,8 @@ class Reference {
     return false;
   }
 
+  friend class Verifier;
+
   const uint8_t *data_;
   uint8_t parent_width_;
   uint8_t byte_width_;
@@ -860,6 +884,7 @@ inline Reference Map::operator[](const char *key) const {
     case 2: comp = KeyCompare<uint16_t>; break;
     case 4: comp = KeyCompare<uint32_t>; break;
     case 8: comp = KeyCompare<uint64_t>; break;
+    default: FLATBUFFERS_ASSERT(false); return Reference();
   }
   auto res = std::bsearch(key, keys.data_, keys.size(), keys.byte_width_, comp);
   if (!res) return Reference(nullptr, 1, NullPackedType());
@@ -882,7 +907,7 @@ inline Reference GetRoot(const uint8_t *buffer, size_t size) {
 }
 
 inline Reference GetRoot(const std::vector<uint8_t> &buffer) {
-  return GetRoot(flatbuffers::vector_data(buffer), buffer.size());
+  return GetRoot(buffer.data(), buffer.size());
 }
 
 // Flags that configure how the Builder behaves.
@@ -910,6 +935,7 @@ class Builder FLATBUFFERS_FINAL_CLASS {
           BuilderFlag flags = BUILDER_FLAG_SHARE_KEYS)
       : buf_(initial_size),
         finished_(false),
+        has_duplicate_keys_(false),
         flags_(flags),
         force_min_bit_width_(BIT_WIDTH_8),
         key_pool(KeyOffsetCompare(buf_)),
@@ -917,6 +943,11 @@ class Builder FLATBUFFERS_FINAL_CLASS {
     buf_.clear();
   }
 
+#ifdef FLATBUFFERS_DEFAULT_DECLARATION
+  Builder(Builder &&) = default;
+  Builder &operator=(Builder &&) = default;
+#endif
+
   /// @brief Get the serialized buffer (after you call `Finish()`).
   /// @return Returns a vector owned by this class.
   const std::vector<uint8_t> &GetBuffer() const {
@@ -1072,7 +1103,16 @@ class Builder FLATBUFFERS_FINAL_CLASS {
     return CreateBlob(data, len, 0, FBT_BLOB);
   }
   size_t Blob(const std::vector<uint8_t> &v) {
-    return CreateBlob(flatbuffers::vector_data(v), v.size(), 0, FBT_BLOB);
+    return CreateBlob(v.data(), v.size(), 0, FBT_BLOB);
+  }
+
+  void Blob(const char *key, const void *data, size_t len) {
+    Key(key);
+    Blob(data, len);
+  }
+  void Blob(const char *key, const std::vector<uint8_t> &v) {
+    Key(key);
+    Blob(v);
   }
 
   // TODO(wvo): support all the FlexBuffer types (like flexbuffers::String),
@@ -1090,7 +1130,7 @@ class Builder FLATBUFFERS_FINAL_CLASS {
     return stack_.size();
   }
 
-  // TODO(wvo): allow this to specify an aligment greater than the natural
+  // TODO(wvo): allow this to specify an alignment greater than the natural
   // alignment.
   size_t EndVector(size_t start, bool typed, bool fixed) {
     auto vec = CreateVector(start, stack_.size() - start, 1, typed, fixed);
@@ -1125,23 +1165,24 @@ class Builder FLATBUFFERS_FINAL_CLASS {
     // step automatically when appliccable, and encourage people to write in
     // sorted fashion.
     // std::sort is typically already a lot faster on sorted data though.
-    auto dict =
-        reinterpret_cast<TwoValue *>(flatbuffers::vector_data(stack_) + start);
-    std::sort(dict, dict + len,
-              [&](const TwoValue &a, const TwoValue &b) -> bool {
-                auto as = reinterpret_cast<const char *>(
-                    flatbuffers::vector_data(buf_) + a.key.u_);
-                auto bs = reinterpret_cast<const char *>(
-                    flatbuffers::vector_data(buf_) + b.key.u_);
-                auto comp = strcmp(as, bs);
-                // If this assertion hits, you've added two keys with the same
-                // value to this map.
-                // TODO: Have to check for pointer equality, as some sort
-                // implementation apparently call this function with the same
-                // element?? Why?
-                FLATBUFFERS_ASSERT(comp || &a == &b);
-                return comp < 0;
-              });
+    auto dict = reinterpret_cast<TwoValue *>(stack_.data() + start);
+    std::sort(
+        dict, dict + len, [&](const TwoValue &a, const TwoValue &b) -> bool {
+          auto as = reinterpret_cast<const char *>(buf_.data() + a.key.u_);
+          auto bs = reinterpret_cast<const char *>(buf_.data() + b.key.u_);
+          auto comp = strcmp(as, bs);
+          // We want to disallow duplicate keys, since this results in a
+          // map where values cannot be found.
+          // But we can't assert here (since we don't want to fail on
+          // random JSON input) or have an error mechanism.
+          // Instead, we set has_duplicate_keys_ in the builder to
+          // signal this.
+          // TODO: Have to check for pointer equality, as some sort
+          // implementation apparently call this function with the same
+          // element?? Why?
+          if (!comp && &a != &b) has_duplicate_keys_ = true;
+          return comp < 0;
+        });
     // First create a vector out of all keys.
     // TODO(wvo): if kBuilderFlagShareKeyVectors is true, see if we can share
     // the first vector.
@@ -1153,6 +1194,10 @@ class Builder FLATBUFFERS_FINAL_CLASS {
     return static_cast<size_t>(vec.u_);
   }
 
+  // Call this after EndMap to see if the map had any duplicate keys.
+  // Any map with such keys won't be able to retrieve all values.
+  bool HasDuplicateKeys() const { return has_duplicate_keys_; }
+
   template<typename F> size_t Vector(F f) {
     auto start = StartVector();
     f();
@@ -1191,7 +1236,7 @@ class Builder FLATBUFFERS_FINAL_CLASS {
     Vector(elems, len);
   }
   template<typename T> void Vector(const std::vector<T> &vec) {
-    Vector(flatbuffers::vector_data(vec), vec.size());
+    Vector(vec.data(), vec.size());
   }
 
   template<typename F> size_t TypedVector(F f) {
@@ -1548,9 +1593,9 @@ class Builder FLATBUFFERS_FINAL_CLASS {
         }
       }
     }
-    // If you get this assert, your fixed types are not one of:
+    // If you get this assert, your typed types are not one of:
     // Int / UInt / Float / Key.
-    FLATBUFFERS_ASSERT(!fixed || IsTypedVectorElementType(vector_type));
+    FLATBUFFERS_ASSERT(!typed || IsTypedVectorElementType(vector_type));
     auto byte_width = Align(bit_width);
     // Write vector. First the keys width/offset if available, and size.
     if (keys) {
@@ -1584,6 +1629,7 @@ class Builder FLATBUFFERS_FINAL_CLASS {
   std::vector<Value> stack_;
 
   bool finished_;
+  bool has_duplicate_keys_;
 
   BuilderFlag flags_;
 
@@ -1592,10 +1638,8 @@ class Builder FLATBUFFERS_FINAL_CLASS {
   struct KeyOffsetCompare {
     explicit KeyOffsetCompare(const std::vector<uint8_t> &buf) : buf_(&buf) {}
     bool operator()(size_t a, size_t b) const {
-      auto stra =
-          reinterpret_cast<const char *>(flatbuffers::vector_data(*buf_) + a);
-      auto strb =
-          reinterpret_cast<const char *>(flatbuffers::vector_data(*buf_) + b);
+      auto stra = reinterpret_cast<const char *>(buf_->data() + a);
+      auto strb = reinterpret_cast<const char *>(buf_->data() + b);
       return strcmp(stra, strb) < 0;
     }
     const std::vector<uint8_t> *buf_;
@@ -1606,11 +1650,10 @@ class Builder FLATBUFFERS_FINAL_CLASS {
     explicit StringOffsetCompare(const std::vector<uint8_t> &buf)
         : buf_(&buf) {}
     bool operator()(const StringOffset &a, const StringOffset &b) const {
-      auto stra = reinterpret_cast<const char *>(
-          flatbuffers::vector_data(*buf_) + a.first);
-      auto strb = reinterpret_cast<const char *>(
-          flatbuffers::vector_data(*buf_) + b.first);
-      return strncmp(stra, strb, (std::min)(a.second, b.second) + 1) < 0;
+      auto stra = buf_->data() + a.first;
+      auto strb = buf_->data() + b.first;
+      auto cr = memcmp(stra, strb, (std::min)(a.second, b.second) + 1);
+      return cr < 0 || (cr == 0 && a.second < b.second);
     }
     const std::vector<uint8_t> *buf_;
   };
@@ -1620,8 +1663,237 @@ class Builder FLATBUFFERS_FINAL_CLASS {
 
   KeyOffsetMap key_pool;
   StringOffsetMap string_pool;
+
+  friend class Verifier;
 };
 
+// Helper class to verify the integrity of a FlexBuffer
+class Verifier FLATBUFFERS_FINAL_CLASS {
+ public:
+  Verifier(const uint8_t *buf, size_t buf_len,
+           // Supplying this vector likely results in faster verification
+           // of larger buffers with many shared keys/strings, but
+           // comes at the cost of using additional memory the same size of
+           // the buffer being verified, so it is by default off.
+           std::vector<uint8_t> *reuse_tracker = nullptr,
+           bool _check_alignment = true, size_t max_depth = 64)
+      : buf_(buf),
+        size_(buf_len),
+        depth_(0),
+        max_depth_(max_depth),
+        num_vectors_(0),
+        max_vectors_(buf_len),
+        check_alignment_(_check_alignment),
+        reuse_tracker_(reuse_tracker) {
+    FLATBUFFERS_ASSERT(size_ < FLATBUFFERS_MAX_BUFFER_SIZE);
+    if (reuse_tracker_) {
+      reuse_tracker_->clear();
+      reuse_tracker_->resize(size_, PackedType(BIT_WIDTH_8, FBT_NULL));
+    }
+  }
+
+ private:
+  // Central location where any verification failures register.
+  bool Check(bool ok) const {
+    // clang-format off
+    #ifdef FLATBUFFERS_DEBUG_VERIFICATION_FAILURE
+      FLATBUFFERS_ASSERT(ok);
+    #endif
+    // clang-format on
+    return ok;
+  }
+
+  // Verify any range within the buffer.
+  bool VerifyFrom(size_t elem, size_t elem_len) const {
+    return Check(elem_len < size_ && elem <= size_ - elem_len);
+  }
+  bool VerifyBefore(size_t elem, size_t elem_len) const {
+    return Check(elem_len <= elem);
+  }
+
+  bool VerifyFromPointer(const uint8_t *p, size_t len) {
+    auto o = static_cast<size_t>(p - buf_);
+    return VerifyFrom(o, len);
+  }
+  bool VerifyBeforePointer(const uint8_t *p, size_t len) {
+    auto o = static_cast<size_t>(p - buf_);
+    return VerifyBefore(o, len);
+  }
+
+  bool VerifyByteWidth(size_t width) {
+    return Check(width == 1 || width == 2 || width == 4 || width == 8);
+  }
+
+  bool VerifyType(int type) { return Check(type >= 0 && type < FBT_MAX_TYPE); }
+
+  bool VerifyOffset(uint64_t off, const uint8_t *p) {
+    return Check(off <= static_cast<uint64_t>(size_)) &&
+           off <= static_cast<uint64_t>(p - buf_);
+  }
+
+  bool VerifyAlignment(const uint8_t *p, size_t size) const {
+    auto o = static_cast<size_t>(p - buf_);
+    return Check((o & (size - 1)) == 0 || !check_alignment_);
+  }
+
+// Macro, since we want to escape from parent function & use lazy args.
+#define FLEX_CHECK_VERIFIED(P, PACKED_TYPE)                     \
+  if (reuse_tracker_) {                                         \
+    auto packed_type = PACKED_TYPE;                             \
+    auto existing = (*reuse_tracker_)[P - buf_];                \
+    if (existing == packed_type) return true;                   \
+    /* Fail verification if already set with different type! */ \
+    if (!Check(existing == 0)) return false;                    \
+    (*reuse_tracker_)[P - buf_] = packed_type;                  \
+  }
+
+  bool VerifyVector(Reference r, const uint8_t *p, Type elem_type) {
+    // Any kind of nesting goes thru this function, so guard against that
+    // here, both with simple nesting checks, and the reuse tracker if on.
+    depth_++;
+    num_vectors_++;
+    if (!Check(depth_ <= max_depth_ && num_vectors_ <= max_vectors_))
+      return false;
+    auto size_byte_width = r.byte_width_;
+    if (!VerifyBeforePointer(p, size_byte_width)) return false;
+    FLEX_CHECK_VERIFIED(p - size_byte_width,
+                        PackedType(Builder::WidthB(size_byte_width), r.type_));
+    auto sized = Sized(p, size_byte_width);
+    auto num_elems = sized.size();
+    auto elem_byte_width = r.type_ == FBT_STRING || r.type_ == FBT_BLOB
+                               ? uint8_t(1)
+                               : r.byte_width_;
+    auto max_elems = SIZE_MAX / elem_byte_width;
+    if (!Check(num_elems < max_elems))
+      return false;  // Protect against byte_size overflowing.
+    auto byte_size = num_elems * elem_byte_width;
+    if (!VerifyFromPointer(p, byte_size)) return false;
+    if (elem_type == FBT_NULL) {
+      // Verify type bytes after the vector.
+      if (!VerifyFromPointer(p + byte_size, num_elems)) return false;
+      auto v = Vector(p, size_byte_width);
+      for (size_t i = 0; i < num_elems; i++)
+        if (!VerifyRef(v[i])) return false;
+    } else if (elem_type == FBT_KEY) {
+      auto v = TypedVector(p, elem_byte_width, FBT_KEY);
+      for (size_t i = 0; i < num_elems; i++)
+        if (!VerifyRef(v[i])) return false;
+    } else {
+      FLATBUFFERS_ASSERT(IsInline(elem_type));
+    }
+    depth_--;
+    return true;
+  }
+
+  bool VerifyKeys(const uint8_t *p, uint8_t byte_width) {
+    // The vector part of the map has already been verified.
+    const size_t num_prefixed_fields = 3;
+    if (!VerifyBeforePointer(p, byte_width * num_prefixed_fields)) return false;
+    p -= byte_width * num_prefixed_fields;
+    auto off = ReadUInt64(p, byte_width);
+    if (!VerifyOffset(off, p)) return false;
+    auto key_byte_with =
+        static_cast<uint8_t>(ReadUInt64(p + byte_width, byte_width));
+    if (!VerifyByteWidth(key_byte_with)) return false;
+    return VerifyVector(Reference(p, byte_width, key_byte_with, FBT_VECTOR_KEY),
+                        p - off, FBT_KEY);
+  }
+
+  bool VerifyKey(const uint8_t *p) {
+    FLEX_CHECK_VERIFIED(p, PackedType(BIT_WIDTH_8, FBT_KEY));
+    while (p < buf_ + size_)
+      if (*p++) return true;
+    return false;
+  }
+
+#undef FLEX_CHECK_VERIFIED
+
+  bool VerifyTerminator(const String &s) {
+    return VerifyFromPointer(reinterpret_cast<const uint8_t *>(s.c_str()),
+                             s.size() + 1);
+  }
+
+  bool VerifyRef(Reference r) {
+    // r.parent_width_ and r.data_ already verified.
+    if (!VerifyByteWidth(r.byte_width_) || !VerifyType(r.type_)) {
+      return false;
+    }
+    if (IsInline(r.type_)) {
+      // Inline scalars, don't require further verification.
+      return true;
+    }
+    // All remaining types are an offset.
+    auto off = ReadUInt64(r.data_, r.parent_width_);
+    if (!VerifyOffset(off, r.data_)) return false;
+    auto p = r.Indirect();
+    if (!VerifyAlignment(p, r.byte_width_)) return false;
+    switch (r.type_) {
+      case FBT_INDIRECT_INT:
+      case FBT_INDIRECT_UINT:
+      case FBT_INDIRECT_FLOAT: return VerifyFromPointer(p, r.byte_width_);
+      case FBT_KEY: return VerifyKey(p);
+      case FBT_MAP:
+        return VerifyVector(r, p, FBT_NULL) && VerifyKeys(p, r.byte_width_);
+      case FBT_VECTOR: return VerifyVector(r, p, FBT_NULL);
+      case FBT_VECTOR_INT: return VerifyVector(r, p, FBT_INT);
+      case FBT_VECTOR_BOOL:
+      case FBT_VECTOR_UINT: return VerifyVector(r, p, FBT_UINT);
+      case FBT_VECTOR_FLOAT: return VerifyVector(r, p, FBT_FLOAT);
+      case FBT_VECTOR_KEY: return VerifyVector(r, p, FBT_KEY);
+      case FBT_VECTOR_STRING_DEPRECATED:
+        // Use of FBT_KEY here intentional, see elsewhere.
+        return VerifyVector(r, p, FBT_KEY);
+      case FBT_BLOB: return VerifyVector(r, p, FBT_UINT);
+      case FBT_STRING:
+        return VerifyVector(r, p, FBT_UINT) &&
+               VerifyTerminator(String(p, r.byte_width_));
+      case FBT_VECTOR_INT2:
+      case FBT_VECTOR_UINT2:
+      case FBT_VECTOR_FLOAT2:
+      case FBT_VECTOR_INT3:
+      case FBT_VECTOR_UINT3:
+      case FBT_VECTOR_FLOAT3:
+      case FBT_VECTOR_INT4:
+      case FBT_VECTOR_UINT4:
+      case FBT_VECTOR_FLOAT4: {
+        uint8_t len = 0;
+        auto vtype = ToFixedTypedVectorElementType(r.type_, &len);
+        if (!VerifyType(vtype)) return false;
+        return VerifyFromPointer(p, r.byte_width_ * len);
+      }
+      default: return false;
+    }
+  }
+
+ public:
+  bool VerifyBuffer() {
+    if (!Check(size_ >= 3)) return false;
+    auto end = buf_ + size_;
+    auto byte_width = *--end;
+    auto packed_type = *--end;
+    return VerifyByteWidth(byte_width) && Check(end - buf_ >= byte_width) &&
+           VerifyRef(Reference(end - byte_width, byte_width, packed_type));
+  }
+
+ private:
+  const uint8_t *buf_;
+  size_t size_;
+  size_t depth_;
+  const size_t max_depth_;
+  size_t num_vectors_;
+  const size_t max_vectors_;
+  bool check_alignment_;
+  std::vector<uint8_t> *reuse_tracker_;
+};
+
+// Utility function that contructs the Verifier for you, see above for
+// parameters.
+inline bool VerifyBuffer(const uint8_t *buf, size_t buf_len,
+                         std::vector<uint8_t> *reuse_tracker = nullptr) {
+  Verifier verifier(buf, buf_len, reuse_tracker);
+  return verifier.VerifyBuffer();
+}
+
 }  // namespace flexbuffers
 
 #if defined(_MSC_VER)
diff --git a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/stl_emulation.h b/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/stl_emulation.h
deleted file mode 100644
index f745d3a..0000000
--- a/edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/stl_emulation.h
+++ /dev/null
@@ -1,449 +0,0 @@
-/*
- * Copyright 2017 Google Inc. All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef FLATBUFFERS_STL_EMULATION_H_
-#define FLATBUFFERS_STL_EMULATION_H_
-
-// clang-format off
-#include "edge-impulse-sdk/third_party/flatbuffers/include/flatbuffers/base.h"
-
-#include <string>
-#include <type_traits>
-#include <vector>
-#include <memory>
-#include <limits>
-
-// Detect C++17 compatible compiler.
-// __cplusplus >= 201703L - a compiler has support of 'static inline' variables.
-#if defined(FLATBUFFERS_USE_STD_OPTIONAL) \
-    || (defined(__cplusplus) && __cplusplus >= 201703L) \
-    || (defined(_MSVC_LANG) &&  (_MSVC_LANG >= 201703L))
-  #include <optional>
-  #ifndef FLATBUFFERS_USE_STD_OPTIONAL
-    #define FLATBUFFERS_USE_STD_OPTIONAL
-  #endif
-#endif
-
-#if defined(_STLPORT_VERSION) && !defined(FLATBUFFERS_CPP98_STL)
-  #define FLATBUFFERS_CPP98_STL
-#endif  // defined(_STLPORT_VERSION) && !defined(FLATBUFFERS_CPP98_STL)
-
-#if defined(FLATBUFFERS_CPP98_STL)
-  #include <cctype>
-#endif  // defined(FLATBUFFERS_CPP98_STL)
-
-// This header provides backwards compatibility for C++98 STLs like stlport.
-namespace flatbuffers {
-
-// Retrieve ::back() from a string in a way that is compatible with pre C++11
-// STLs (e.g stlport).
-inline char& string_back(std::string &value) {
-  return value[value.length() - 1];
-}
-
-inline char string_back(const std::string &value) {
-  return value[value.length() - 1];
-}
-
-// Helper method that retrieves ::data() from a vector in a way that is
-// compatible with pre C++11 STLs (e.g stlport).
-template <typename T> inline T *vector_data(std::vector<T> &vector) {
-  // In some debug environments, operator[] does bounds checking, so &vector[0]
-  // can't be used.
-  return vector.empty() ? nullptr : &vector[0];
-}
-
-template <typename T> inline const T *vector_data(
-    const std::vector<T> &vector) {
-  return vector.empty() ? nullptr : &vector[0];
-}
-
-template <typename T, typename V>
-inline void vector_emplace_back(std::vector<T> *vector, V &&data) {
-  #if defined(FLATBUFFERS_CPP98_STL)
-    vector->push_back(data);
-  #else
-    vector->emplace_back(std::forward<V>(data));
-  #endif  // defined(FLATBUFFERS_CPP98_STL)
-}
-
-#ifndef FLATBUFFERS_CPP98_STL
-  #if defined(FLATBUFFERS_TEMPLATES_ALIASES)
-    template <typename T>
-    using numeric_limits = std::numeric_limits<T>;
-  #else
-    template <typename T> class numeric_limits :
-      public std::numeric_limits<T> {};
-  #endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)
-#else
-  template <typename T> class numeric_limits :
-      public std::numeric_limits<T> {
-    public:
-      // Android NDK fix.
-      static T lowest() {
-        return std::numeric_limits<T>::min();
-      }
-  };
-
-  template <> class numeric_limits<float> :
-      public std::numeric_limits<float> {
-    public:
-      static float lowest() { return -FLT_MAX; }
-  };
-
-  template <> class numeric_limits<double> :
-      public std::numeric_limits<double> {
-    public:
-      static double lowest() { return -DBL_MAX; }
-  };
-
-  template <> class numeric_limits<unsigned long long> {
-   public:
-    static unsigned long long min() { return 0ULL; }
-    static unsigned long long max() { return ~0ULL; }
-    static unsigned long long lowest() {
-      return numeric_limits<unsigned long long>::min();
-    }
-  };
-
-  template <> class numeric_limits<long long> {
-   public:
-    static long long min() {
-      return static_cast<long long>(1ULL << ((sizeof(long long) << 3) - 1));
-    }
-    static long long max() {
-      return static_cast<long long>(
-          (1ULL << ((sizeof(long long) << 3) - 1)) - 1);
-    }
-    static long long lowest() {
-      return numeric_limits<long long>::min();
-    }
-  };
-#endif  // FLATBUFFERS_CPP98_STL
-
-#if defined(FLATBUFFERS_TEMPLATES_ALIASES)
-  #ifndef FLATBUFFERS_CPP98_STL
-    template <typename T> using is_scalar = std::is_scalar<T>;
-    template <typename T, typename U> using is_same = std::is_same<T,U>;
-    template <typename T> using is_floating_point = std::is_floating_point<T>;
-    template <typename T> using is_unsigned = std::is_unsigned<T>;
-    template <typename T> using is_enum = std::is_enum<T>;
-    template <typename T> using make_unsigned = std::make_unsigned<T>;
-    template<bool B, class T, class F>
-    using conditional = std::conditional<B, T, F>;
-    template<class T, T v>
-    using integral_constant = std::integral_constant<T, v>;
-  #else
-    // Map C++ TR1 templates defined by stlport.
-    template <typename T> using is_scalar = std::tr1::is_scalar<T>;
-    template <typename T, typename U> using is_same = std::tr1::is_same<T,U>;
-    template <typename T> using is_floating_point =
-        std::tr1::is_floating_point<T>;
-    template <typename T> using is_unsigned = std::tr1::is_unsigned<T>;
-    template <typename T> using is_enum = std::tr1::is_enum<T>;
-    // Android NDK doesn't have std::make_unsigned or std::tr1::make_unsigned.
-    template<typename T> struct make_unsigned {
-      static_assert(is_unsigned<T>::value, "Specialization not implemented!");
-      using type = T;
-    };
-    template<> struct make_unsigned<char> { using type = unsigned char; };
-    template<> struct make_unsigned<short> { using type = unsigned short; };
-    template<> struct make_unsigned<int> { using type = unsigned int; };
-    template<> struct make_unsigned<long> { using type = unsigned long; };
-    template<>
-    struct make_unsigned<long long> { using type = unsigned long long; };
-    template<bool B, class T, class F>
-    using conditional = std::tr1::conditional<B, T, F>;
-    template<class T, T v>
-    using integral_constant = std::tr1::integral_constant<T, v>;
-  #endif  // !FLATBUFFERS_CPP98_STL
-#else
-  // MSVC 2010 doesn't support C++11 aliases.
-  template <typename T> struct is_scalar : public std::is_scalar<T> {};
-  template <typename T, typename U> struct is_same : public std::is_same<T,U> {};
-  template <typename T> struct is_floating_point :
-        public std::is_floating_point<T> {};
-  template <typename T> struct is_unsigned : public std::is_unsigned<T> {};
-  template <typename T> struct is_enum : public std::is_enum<T> {};
-  template <typename T> struct make_unsigned : public std::make_unsigned<T> {};
-  template<bool B, class T, class F>
-  struct conditional : public std::conditional<B, T, F> {};
-  template<class T, T v>
-  struct integral_constant : public std::integral_constant<T, v> {};
-#endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)
-
-#ifndef FLATBUFFERS_CPP98_STL
-  #if defined(FLATBUFFERS_TEMPLATES_ALIASES)
-    template <class T> using unique_ptr = std::unique_ptr<T>;
-  #else
-    // MSVC 2010 doesn't support C++11 aliases.
-    // We're manually "aliasing" the class here as we want to bring unique_ptr
-    // into the flatbuffers namespace.  We have unique_ptr in the flatbuffers
-    // namespace we have a completely independent implementation (see below)
-    // for C++98 STL implementations.
-    template <class T> class unique_ptr : public std::unique_ptr<T> {
-     public:
-      unique_ptr() {}
-      explicit unique_ptr(T* p) : std::unique_ptr<T>(p) {}
-      unique_ptr(std::unique_ptr<T>&& u) { *this = std::move(u); }
-      unique_ptr(unique_ptr&& u) { *this = std::move(u); }
-      unique_ptr& operator=(std::unique_ptr<T>&& u) {
-        std::unique_ptr<T>::reset(u.release());
-        return *this;
-      }
-      unique_ptr& operator=(unique_ptr&& u) {
-        std::unique_ptr<T>::reset(u.release());
-        return *this;
-      }
-      unique_ptr& operator=(T* p) {
-        return std::unique_ptr<T>::operator=(p);
-      }
-    };
-  #endif  // defined(FLATBUFFERS_TEMPLATES_ALIASES)
-#else
-  // Very limited implementation of unique_ptr.
-  // This is provided simply to allow the C++ code generated from the default
-  // settings to function in C++98 environments with no modifications.
-  template <class T> class unique_ptr {
-   public:
-    typedef T element_type;
-
-    unique_ptr() : ptr_(nullptr) {}
-    explicit unique_ptr(T* p) : ptr_(p) {}
-    unique_ptr(unique_ptr&& u) : ptr_(nullptr) { reset(u.release()); }
-    unique_ptr(const unique_ptr& u) : ptr_(nullptr) {
-      reset(const_cast<unique_ptr*>(&u)->release());
-    }
-    ~unique_ptr() { reset(); }
-
-    unique_ptr& operator=(const unique_ptr& u) {
-      reset(const_cast<unique_ptr*>(&u)->release());
-      return *this;
-    }
-
-    unique_ptr& operator=(unique_ptr&& u) {
-      reset(u.release());
-      return *this;
-    }
-
-    unique_ptr& operator=(T* p) {
-      reset(p);
-      return *this;
-    }
-
-    const T& operator*() const { return *ptr_; }
-    T* operator->() const { return ptr_; }
-    T* get() const noexcept { return ptr_; }
-    explicit operator bool() const { return ptr_ != nullptr; }
-
-    // modifiers
-    T* release() {
-      T* value = ptr_;
-      ptr_ = nullptr;
-      return value;
-    }
-
-    void reset(T* p = nullptr) {
-      T* value = ptr_;
-      ptr_ = p;
-      if (value) delete value;
-    }
-
-    void swap(unique_ptr& u) {
-      T* temp_ptr = ptr_;
-      ptr_ = u.ptr_;
-      u.ptr_ = temp_ptr;
-    }
-
-   private:
-    T* ptr_;
-  };
-
-  template <class T> bool operator==(const unique_ptr<T>& x,
-                                     const unique_ptr<T>& y) {
-    return x.get() == y.get();
-  }
-
-  template <class T, class D> bool operator==(const unique_ptr<T>& x,
-                                              const D* y) {
-    return static_cast<D*>(x.get()) == y;
-  }
-
-  template <class T> bool operator==(const unique_ptr<T>& x, intptr_t y) {
-    return reinterpret_cast<intptr_t>(x.get()) == y;
-  }
-
-  template <class T> bool operator!=(const unique_ptr<T>& x, decltype(nullptr)) {
-    return !!x;
-  }
-
-  template <class T> bool operator!=(decltype(nullptr), const unique_ptr<T>& x) {
-    return !!x;
-  }
-
-  template <class T> bool operator==(const unique_ptr<T>& x, decltype(nullptr)) {
-    return !x;
-  }
-
-  template <class T> bool operator==(decltype(nullptr), const unique_ptr<T>& x) {
-    return !x;
-  }
-
-#endif  // !FLATBUFFERS_CPP98_STL
-
-#ifdef FLATBUFFERS_USE_STD_OPTIONAL
-template<class T>
-using Optional = std::optional<T>;
-using nullopt_t = std::nullopt_t;
-inline constexpr nullopt_t nullopt = std::nullopt;
-
-#else
-// Limited implementation of Optional<T> type for a scalar T.
-// This implementation limited by trivial types compatible with
-// std::is_arithmetic<T> or std::is_enum<T> type traits.
-
-// A tag to indicate an empty flatbuffers::optional<T>.
-struct nullopt_t {
-  explicit FLATBUFFERS_CONSTEXPR_CPP11 nullopt_t(int) {}
-};
-
-#if defined(FLATBUFFERS_CONSTEXPR_DEFINED)
-  namespace internal {
-    template <class> struct nullopt_holder {
-      static constexpr nullopt_t instance_ = nullopt_t(0);
-    };
-    template<class Dummy>
-    constexpr nullopt_t nullopt_holder<Dummy>::instance_;
-  }
-  static constexpr const nullopt_t &nullopt = internal::nullopt_holder<void>::instance_;
-
-#else
-  namespace internal {
-    template <class> struct nullopt_holder {
-      static const nullopt_t instance_;
-    };
-    template<class Dummy>
-    const nullopt_t nullopt_holder<Dummy>::instance_  = nullopt_t(0);
-  }
-  static const nullopt_t &nullopt = internal::nullopt_holder<void>::instance_;
-
-#endif
-
-template<class T>
-class Optional FLATBUFFERS_FINAL_CLASS {
-  // Non-scalar 'T' would extremely complicated Optional<T>.
-  // Use is_scalar<T> checking because flatbuffers flatbuffers::is_arithmetic<T>
-  // isn't implemented.
-  static_assert(flatbuffers::is_scalar<T>::value, "unexpected type T");
-
- public:
-  ~Optional() {}
-
-  FLATBUFFERS_CONSTEXPR_CPP11 Optional() FLATBUFFERS_NOEXCEPT
-    : value_(), has_value_(false) {}
-
-  FLATBUFFERS_CONSTEXPR_CPP11 Optional(nullopt_t) FLATBUFFERS_NOEXCEPT
-    : value_(), has_value_(false) {}
-
-  FLATBUFFERS_CONSTEXPR_CPP11 Optional(T val) FLATBUFFERS_NOEXCEPT
-    : value_(val), has_value_(true) {}
-
-  FLATBUFFERS_CONSTEXPR_CPP11 Optional(const Optional &other) FLATBUFFERS_NOEXCEPT
-    : value_(other.value_), has_value_(other.has_value_) {}
-
-  FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(const Optional &other) FLATBUFFERS_NOEXCEPT {
-    value_ = other.value_;
-    has_value_ = other.has_value_;
-    return *this;
-  }
-
-  FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(nullopt_t) FLATBUFFERS_NOEXCEPT {
-    value_ = T();
-    has_value_ = false;
-    return *this;
-  }
-
-  FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(T val) FLATBUFFERS_NOEXCEPT {
-    value_ = val;
-    has_value_ = true;
-    return *this;
-  }
-
-  void reset() FLATBUFFERS_NOEXCEPT {
-    *this = nullopt;
-  }
-
-  void swap(Optional &other) FLATBUFFERS_NOEXCEPT {
-    std::swap(value_, other.value_);
-    std::swap(has_value_, other.has_value_);
-  }
-
-  FLATBUFFERS_CONSTEXPR_CPP11 FLATBUFFERS_EXPLICIT_CPP11 operator bool() const FLATBUFFERS_NOEXCEPT {
-    return has_value_;
-  }
-
-  FLATBUFFERS_CONSTEXPR_CPP11 bool has_value() const FLATBUFFERS_NOEXCEPT {
-    return has_value_;
-  }
-
-  FLATBUFFERS_CONSTEXPR_CPP11 const T& operator*() const FLATBUFFERS_NOEXCEPT {
-    return value_;
-  }
-
-  const T& value() const {
-    FLATBUFFERS_ASSERT(has_value());
-    return value_;
-  }
-
-  T value_or(T default_value) const FLATBUFFERS_NOEXCEPT {
-    return has_value() ? value_ : default_value;
-  }
-
- private:
-  T value_;
-  bool has_value_;
-};
-
-template<class T>
-FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional<T>& opt, nullopt_t) FLATBUFFERS_NOEXCEPT {
-  return !opt;
-}
-template<class T>
-FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(nullopt_t, const Optional<T>& opt) FLATBUFFERS_NOEXCEPT {
-  return !opt;
-}
-
-template<class T, class U>
-FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional<T>& lhs, const U& rhs) FLATBUFFERS_NOEXCEPT {
-  return static_cast<bool>(lhs) && (*lhs == rhs);
-}
-
-template<class T, class U>
-FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const T& lhs, const Optional<U>& rhs) FLATBUFFERS_NOEXCEPT {
-  return static_cast<bool>(rhs) && (lhs == *rhs);
-}
-
-template<class T, class U>
-FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional<T>& lhs, const Optional<U>& rhs) FLATBUFFERS_NOEXCEPT {
-  return static_cast<bool>(lhs) != static_cast<bool>(rhs)
-              ? false
-              : !static_cast<bool>(lhs) ? false : (*lhs == *rhs);
-}
-#endif // FLATBUFFERS_USE_STD_OPTIONAL
-
-}  // namespace flatbuffers
-
-#endif  // FLATBUFFERS_STL_EMULATION_H_
diff --git a/edge-impulse/inference/ei_run_audio_impulse.cpp b/edge-impulse/inference/ei_run_audio_impulse.cpp
index 353f89d..6597b14 100644
--- a/edge-impulse/inference/ei_run_audio_impulse.cpp
+++ b/edge-impulse/inference/ei_run_audio_impulse.cpp
@@ -47,22 +47,6 @@ static bool debug_mode = false;
 static float samples_circ_buff[EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE];
 static int samples_wr_index = 0;
 
-static void display_results(ei_impulse_result_t* result)
-{
-    ei_printf("Predictions (DSP: %d ms., Classification: %d ms., Anomaly: %d ms.): \n",
-        result->timing.dsp, result->timing.classification, result->timing.anomaly);
-    for (size_t ix = 0; ix < EI_CLASSIFIER_LABEL_COUNT; ix++) {
-        ei_printf("    %s: \t", result->classification[ix].label);
-        ei_printf_float(result->classification[ix].value);
-        ei_printf("\r\n");
-    }
-#if EI_CLASSIFIER_HAS_ANOMALY == 1
-        ei_printf("    anomaly score: ");
-        ei_printf_float(result->anomaly);
-        ei_printf("\r\n");
-#endif
-}
-
 void ei_run_impulse(void)
 {
     switch(state) {
@@ -83,7 +67,7 @@ void ei_run_impulse(void)
             }
             state = INFERENCE_DATA_READY;
             break;
-            // nothing to do, just continue to inference provcessing below
+            // nothing to do, just continue to inference processing below
         case INFERENCE_DATA_READY:
         default:
             break;
diff --git a/edge-impulse/inference/ei_run_fusion_impulse.cpp b/edge-impulse/inference/ei_run_fusion_impulse.cpp
index d4be1f0..51655b8 100644
--- a/edge-impulse/inference/ei_run_fusion_impulse.cpp
+++ b/edge-impulse/inference/ei_run_fusion_impulse.cpp
@@ -78,18 +78,6 @@ bool samples_callback(const void *raw_sample, uint32_t raw_sample_size)
     return false;
 }
 
-static void display_results(ei_impulse_result_t* result)
-{
-    ei_printf("Predictions (DSP: %d ms., Classification: %d ms., Anomaly: %d ms.): \n",
-        result->timing.dsp, result->timing.classification, result->timing.anomaly);
-    for (size_t ix = 0; ix < EI_CLASSIFIER_LABEL_COUNT; ix++) {
-        ei_printf("    %s: \t%f\r\n", result->classification[ix].label, result->classification[ix].value);
-    }
-#if EI_CLASSIFIER_HAS_ANOMALY == 1
-    ei_printf("    anomaly score: %f\r\n", result->anomaly);
-#endif
-}
-
 void ei_run_impulse(void)
 {
     switch(state) {
@@ -109,7 +97,7 @@ void ei_run_impulse(void)
             // wait for data to be collected through callback
             return;
         case INFERENCE_DATA_READY:
-            // nothing to do, just continue to inference provcessing below
+            // nothing to do, just continue to inference processing below
             break;
         default:
             break;
diff --git a/edge-impulse/ingestion-sdk-platform/sensors/ei_motionsensor.cpp b/edge-impulse/ingestion-sdk-platform/sensors/ei_motionsensor.cpp
new file mode 100644
index 0000000..6b412bb
--- /dev/null
+++ b/edge-impulse/ingestion-sdk-platform/sensors/ei_motionsensor.cpp
@@ -0,0 +1,46 @@
+/* Include ----------------------------------------------------------------- */
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "ei_motionsensor.h"
+#include <LIS2DH12.h>
+
+#include "ei_device_raspberry_rp2040.h"
+#include "sensor_aq.h"
+
+/* Constant defines -------------------------------------------------------- */
+#define CONVERT_G_TO_MS2 9.80665f
+
+static float motion_data[MOTION_AXIS_SAMPLED];
+
+bool ei_motion_sensor_init(void)
+{
+    uint8_t acc_type = MOTION.begin();
+
+    if ((acc_type == 0) || (acc_type != MOTION_SENSOR_LIS2DH12)) {
+        return false;
+    }
+
+    ei_add_sensor_to_fusion_list(motion_sensor);
+    return true;
+}
+
+float *ei_fusion_motion_sensor_read_data(int n_samples)
+{
+    if (MOTION.accelerationAvailable())
+    {
+        MOTION.readAcceleration(motion_data[0], motion_data[1], motion_data[2]);
+
+        motion_data[0] *= CONVERT_G_TO_MS2;
+        motion_data[1] *= CONVERT_G_TO_MS2;
+        motion_data[2] *= CONVERT_G_TO_MS2;
+    }
+
+#ifdef DEBUG
+    for (int i = 0; i < MOTION_AXIS_SAMPLED; i++) {
+        ei_printf("%f ", motion_data[i]);
+    }
+    ei_printf("\n");
+#endif
+    return motion_data;
+}
diff --git a/edge-impulse/ingestion-sdk-platform/sensors/ei_motionsensor.h b/edge-impulse/ingestion-sdk-platform/sensors/ei_motionsensor.h
new file mode 100644
index 0000000..e51b22e
--- /dev/null
+++ b/edge-impulse/ingestion-sdk-platform/sensors/ei_motionsensor.h
@@ -0,0 +1,32 @@
+#ifndef _EI_MOTIONSENSOR_H
+#define _EI_MOTIONSENSOR_H
+
+/* Include ----------------------------------------------------------------- */
+#include "ei_fusion.h"
+#include "ei_sampler.h"
+
+/** Number of axis used and sample data format */
+#define MOTION_AXIS_SAMPLED 3
+
+/* Function prototypes ----------------------------------------------------- */
+bool ei_motion_sensor_init(void);
+float *ei_fusion_motion_sensor_read_data(int n_samples);
+
+static const ei_device_fusion_sensor_t motion_sensor = {
+    // name of sensor module to be displayed in fusion list
+    "Motion",
+    // number of sensor module axis
+    MOTION_AXIS_SAMPLED,
+    // sampling frequencies
+    { 100.0f, 62.5f, 20.0f },
+    // axis name and units payload (must be same order as read in)
+    {
+        { "accX", "m/s2" },
+        { "accY", "m/s2" },
+        { "accZ", "m/s2" },
+    },
+    // reference to read data function
+    &ei_fusion_motion_sensor_read_data
+};
+
+#endif /* _EI_MOTIONSENSOR_H */
diff --git a/firmware-sdk/sensor_aq.h b/firmware-sdk/sensor_aq.h
index 2d62036..58493da 100644
--- a/firmware-sdk/sensor_aq.h
+++ b/firmware-sdk/sensor_aq.h
@@ -22,10 +22,10 @@
 /* Include ----------------------------------------------------------------- */
 #include "QCBOR/inc/qcbor.h"
 #include <stdio.h>
+#include <time.h>
 
 // detect POSIX, and use FILE* in that case
 #if !defined(EI_SENSOR_AQ_STREAM) && (defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)))
-#include <time.h>
 #define EI_SENSOR_AQ_STREAM     FILE
 #elif !defined(EI_SENSOR_AQ_STREAM)
 // most targets don't need a file handle
diff --git a/model-parameters/model_metadata.h b/model-parameters/model_metadata.h
index 4d91a25..8b467c0 100644
--- a/model-parameters/model_metadata.h
+++ b/model-parameters/model_metadata.h
@@ -24,6 +24,7 @@
 
 #include <stdint.h>
 #include <stdbool.h>
+#include <stddef.h>
 
 #define EI_CLASSIFIER_NONE                       255
 #define EI_CLASSIFIER_UTENSOR                    1
@@ -37,6 +38,7 @@
 #define EI_CLASSIFIER_AKIDA                      9
 #define EI_CLASSIFIER_SYNTIANT                   10
 #define EI_CLASSIFIER_ONNX_TIDL                  11
+#define EI_CLASSIFIER_MEMRYX                     12
 
 #define EI_CLASSIFIER_SENSOR_UNKNOWN             -1
 #define EI_CLASSIFIER_SENSOR_MICROPHONE          1
@@ -46,53 +48,59 @@
 #define EI_CLASSIFIER_SENSOR_ENVIRONMENTAL       5
 #define EI_CLASSIFIER_SENSOR_FUSION              6
 
+#define EI_ANOMALY_TYPE_UNKNOWN                   0
+#define EI_ANOMALY_TYPE_KMEANS                    1
+#define EI_ANOMALY_TYPE_GMM                       2
+#define EI_ANOMALY_TYPE_VISUAL_GMM                3
+
 // These must match the enum values in TensorFlow Lite's "TfLiteType"
 #define EI_CLASSIFIER_DATATYPE_FLOAT32           1
 #define EI_CLASSIFIER_DATATYPE_UINT8             3
 #define EI_CLASSIFIER_DATATYPE_INT8              9
 
-#define EI_CLASSIFIER_PROJECT_ID                 1033
+#define EI_CLASSIFIER_PROJECT_ID                 424952
 #define EI_CLASSIFIER_PROJECT_OWNER              "Edge Impulse Profiling"
-#define EI_CLASSIFIER_PROJECT_NAME               "Continuous motion recognition"
-#define EI_CLASSIFIER_PROJECT_DEPLOY_VERSION     6
-#define EI_CLASSIFIER_NN_INPUT_FRAME_SIZE        27
-#define EI_CLASSIFIER_RAW_SAMPLE_COUNT           125
+#define EI_CLASSIFIER_PROJECT_NAME               "rp2040-project"
+#define EI_CLASSIFIER_PROJECT_DEPLOY_VERSION     2
+#define EI_CLASSIFIER_NN_INPUT_FRAME_SIZE        39
+#define EI_CLASSIFIER_RAW_SAMPLE_COUNT           200
 #define EI_CLASSIFIER_RAW_SAMPLES_PER_FRAME      3
 #define EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE       (EI_CLASSIFIER_RAW_SAMPLE_COUNT * EI_CLASSIFIER_RAW_SAMPLES_PER_FRAME)
 #define EI_CLASSIFIER_INPUT_WIDTH                0
 #define EI_CLASSIFIER_INPUT_HEIGHT               0
 #define EI_CLASSIFIER_INPUT_FRAMES               0
 #define EI_CLASSIFIER_NN_OUTPUT_COUNT            4
-#define EI_CLASSIFIER_INTERVAL_MS                16
+#define EI_CLASSIFIER_INTERVAL_MS                10
 #define EI_CLASSIFIER_LABEL_COUNT                4
-#define EI_CLASSIFIER_HAS_ANOMALY                1
-#define EI_CLASSIFIER_FREQUENCY                  62.5
+#define EI_CLASSIFIER_HAS_ANOMALY                EI_ANOMALY_TYPE_UNKNOWN
+#define EI_CLASSIFIER_HAS_VISUAL_ANOMALY         0
+#define EI_CLASSIFIER_SINGLE_FEATURE_INPUT       1
+#define EI_CLASSIFIER_FREQUENCY                  100
 #define EI_CLASSIFIER_HAS_MODEL_VARIABLES        1
+#define EI_CLASSIFIER_THRESHOLD                  0.6
 
-
-#define EI_CLASSIFIER_OBJECT_DETECTION            0
-#define EI_CLASSIFIER_TFLITE_OUTPUT_DATA_TENSOR   0
-#define EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER EI_CLASSIFIER_LAST_LAYER_UNKNOWN
-
+#define EI_CLASSIFIER_OBJECT_DETECTION             0
+#define EI_CLASSIFIER_TFLITE_OUTPUT_DATA_TENSOR    0
+#define EI_CLASSIFIER_OBJECT_DETECTION_LAST_LAYER  EI_CLASSIFIER_LAST_LAYER_UNKNOWN
 
 #define EI_CLASSIFIER_TFLITE_INPUT_DATATYPE         EI_CLASSIFIER_DATATYPE_INT8
-#define EI_CLASSIFIER_TFLITE_INPUT_QUANTIZED        1
-#define EI_CLASSIFIER_TFLITE_INPUT_SCALE            37.24111557006836
-#define EI_CLASSIFIER_TFLITE_INPUT_ZEROPOINT        -128
 #define EI_CLASSIFIER_TFLITE_OUTPUT_DATATYPE        EI_CLASSIFIER_DATATYPE_INT8
-#define EI_CLASSIFIER_TFLITE_OUTPUT_QUANTIZED       1
-#define EI_CLASSIFIER_TFLITE_OUTPUT_SCALE           0.00390625
-#define EI_CLASSIFIER_TFLITE_OUTPUT_ZEROPOINT       -128
+
 
 #define EI_CLASSIFIER_INFERENCING_ENGINE            EI_CLASSIFIER_TFLITE
+
+#define EI_CLASSIFIER_QUANTIZATION_ENABLED          1
+
 #define EI_CLASSIFIER_COMPILED                      1
 #define EI_CLASSIFIER_HAS_TFLITE_OPS_RESOLVER       1
 
+#define EI_CLASSIFIER_LOAD_IMAGE_SCALING         0
+
 
 #define EI_CLASSIFIER_HAS_FFT_INFO               1
 #define EI_CLASSIFIER_LOAD_FFT_32                0
 #define EI_CLASSIFIER_LOAD_FFT_64                0
-#define EI_CLASSIFIER_LOAD_FFT_128               1
+#define EI_CLASSIFIER_LOAD_FFT_128               0
 #define EI_CLASSIFIER_LOAD_FFT_256               0
 #define EI_CLASSIFIER_LOAD_FFT_512               0
 #define EI_CLASSIFIER_LOAD_FFT_1024              0
@@ -104,12 +112,18 @@
 
 #define EI_CLASSIFIER_SENSOR                     EI_CLASSIFIER_SENSOR_ACCELEROMETER
 #define EI_CLASSIFIER_FUSION_AXES_STRING         "accX + accY + accZ"
+#define EI_CLASSIFIER_CALIBRATION_ENABLED        0
 
 #ifndef EI_CLASSIFIER_SLICES_PER_MODEL_WINDOW
 #define EI_CLASSIFIER_SLICES_PER_MODEL_WINDOW    4
 #endif // EI_CLASSIFIER_SLICES_PER_MODEL_WINDOW
 #define EI_CLASSIFIER_SLICE_SIZE                 (EI_CLASSIFIER_RAW_SAMPLE_COUNT / EI_CLASSIFIER_SLICES_PER_MODEL_WINDOW)
 
+#define EI_STUDIO_VERSION_MAJOR             1
+#define EI_STUDIO_VERSION_MINOR             52
+#define EI_STUDIO_VERSION_PATCH             1
+
+#define EI_CLASSIFIER_HR_ENABLED            0
 
 #if ((EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE) ||      (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI)) &&      EI_CLASSIFIER_USE_FULL_TFLITE == 1
 
@@ -126,6 +140,11 @@
 #endif
 #endif // ((EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_TFLITE) || (EI_CLASSIFIER_INFERENCING_ENGINE == EI_CLASSIFIER_DRPAI)) && EI_CLASSIFIER_USE_FULL_TFLITE == 1
 
+typedef struct {
+    const char *name;
+    int axis;
+} ei_dsp_named_axis_t;
+
 typedef struct {
     uint32_t block_id;
     uint16_t implementation_version;
@@ -138,12 +157,15 @@ typedef struct {
     bool stdev;
     bool skewness;
     bool kurtosis;
+    int moving_avg_num_windows;
 } ei_dsp_config_flatten_t;
 
 typedef struct {
     uint32_t block_id;
     uint16_t implementation_version;
     int axes;
+    ei_dsp_named_axis_t * named_axes;
+    size_t named_axes_size;
     const char * channels;
 } ei_dsp_config_image_t;
 
@@ -151,6 +173,8 @@ typedef struct {
     uint32_t block_id;
     uint16_t implementation_version;
     int axes;
+    ei_dsp_named_axis_t * named_axes;
+    size_t named_axes_size;
     int num_cepstral;
     float frame_length;
     float frame_stride;
@@ -167,6 +191,8 @@ typedef struct {
     uint32_t block_id;
     uint16_t implementation_version;
     int axes;
+    ei_dsp_named_axis_t * named_axes;
+    size_t named_axes_size;
     float frame_length;
     float frame_stride;
     int num_filters;
@@ -209,6 +235,8 @@ typedef struct {
     uint32_t block_id;
     uint16_t implementation_version;
     int axes;
+    ei_dsp_named_axis_t * named_axes;
+    size_t named_axes_size;
     float frame_length;
     float frame_stride;
     int fft_length;
@@ -220,6 +248,8 @@ typedef struct {
     uint32_t block_id;
     uint16_t implementation_version;
     int axes;
+    ei_dsp_named_axis_t * named_axes;
+    size_t named_axes_size;
     float frame_length;
     float frame_stride;
     int num_filters;
@@ -235,6 +265,15 @@ typedef struct {
     uint16_t implementation_version;
     int axes;
     bool scaling;
+    bool scaling_raw;
+    bool padding;
 } ei_dsp_config_imu_syntiant_t;
 
+typedef struct {
+    uint32_t block_id;
+    uint16_t implementation_version;
+    int axes;
+    const char * ppg_ecg;
+} ei_dsp_config_hr_t;
+
 #endif // _EI_CLASSIFIER_MODEL_METADATA_H_
diff --git a/model-parameters/model_variables.h b/model-parameters/model_variables.h
index abbb924..7a61e74 100644
--- a/model-parameters/model_variables.h
+++ b/model-parameters/model_variables.h
@@ -24,48 +24,50 @@
 
 #include <stdint.h>
 #include "model_metadata.h"
-#include "anomaly_clusters.h"
+
 #include "tflite-model/trained_model_compiled.h"
 #include "edge-impulse-sdk/classifier/ei_model_types.h"
 #include "edge-impulse-sdk/classifier/inferencing_engines/engines.h"
 
-const char* ei_classifier_inferencing_categories[] = { "idle", "snake", "updown", "wave" };
+const char* ei_classifier_inferencing_categories[] = { "circle", "idle", "left-right", "up-down" };
 
-uint8_t ei_dsp_config_3_axes[] = { 0, 1, 2 };
-const uint32_t ei_dsp_config_3_axes_size = 3;
-ei_dsp_config_spectral_analysis_t ei_dsp_config_3 = {
-    3, // uint32_t blockId
-    2, // int implementationVersion
+uint8_t ei_dsp_config_4_axes[] = { 0, 1, 2 };
+const uint32_t ei_dsp_config_4_axes_size = 3;
+ei_dsp_config_spectral_analysis_t ei_dsp_config_4 = {
+    4, // uint32_t blockId
+    4, // int implementationVersion
     3, // int length of axes
     1.0f, // float scale-axes
     1, // int input-decimation-ratio
-    "low", // select filter-type
+    "none", // select filter-type
     3.0f, // float filter-cutoff
     6, // int filter-order
     "FFT", // select analysis-type
-    128, // int fft-length
+    16, // int fft-length
     3, // int spectral-peaks-count
     0.1f, // float spectral-peaks-threshold
     "0.1, 0.5, 1.0, 2.0, 5.0", // string spectral-power-edges
-    false, // boolean do-log
+    true, // boolean do-log
     true, // boolean do-fft-overlap
-    4, // int wavelet-level
+    1, // int wavelet-level
     "db4", // select wavelet
     false // boolean extra-low-freq
 };
 
 const size_t ei_dsp_blocks_size = 1;
 ei_model_dsp_t ei_dsp_blocks[ei_dsp_blocks_size] = {
-    { // DSP block 3
-        27,
-        &extract_spectral_analysis_features,
-        (void*)&ei_dsp_config_3,
-        ei_dsp_config_3_axes,
-        ei_dsp_config_3_axes_size
+    { // DSP block 4
+        4,
+        39, // output size
+        &extract_spectral_analysis_features, // DSP function pointer
+        (void*)&ei_dsp_config_4, // pointer to config struct
+        ei_dsp_config_4_axes, // array of offsets into the input stream, one for each axis
+        ei_dsp_config_4_axes_size, // number of axes
+        1, // version
+        nullptr, // factory function
     }
 };
-
-const ei_config_tflite_eon_graph_t ei_config_tflite_graph_0 = {
+const ei_config_tflite_eon_graph_t ei_config_tflite_graph_5 = {
     .implementation_version = 1,
     .model_init = &trained_model_init,
     .model_invoke = &trained_model_invoke,
@@ -74,36 +76,34 @@ const ei_config_tflite_eon_graph_t ei_config_tflite_graph_0 = {
     .model_output = &trained_model_output,
 };
 
-const ei_learning_block_config_tflite_graph_t ei_learning_block_config_0 = {
+const ei_learning_block_config_tflite_graph_t ei_learning_block_config_5 = {
     .implementation_version = 1,
-    .block_id = 0,
+    .classification_mode = EI_CLASSIFIER_CLASSIFICATION_MODE_CLASSIFICATION,
+    .block_id = 5,
     .object_detection = 0,
     .object_detection_last_layer = EI_CLASSIFIER_LAST_LAYER_UNKNOWN,
     .output_data_tensor = 0,
     .output_labels_tensor = 1,
     .output_score_tensor = 2,
-    .graph_config = (void*)&ei_config_tflite_graph_0
-};
-
-const ei_learning_block_config_anomaly_kmeans_t ei_learning_block_config_1 = {
-    .implementation_version = 1,
-    .anom_axis = ei_classifier_anom_axes,
-    .anom_axes_size = 4,
-    .anom_clusters = ei_classifier_anom_clusters,
-    .anom_cluster_count = 32,
-    .anom_scale = ei_classifier_anom_scale,
-    .anom_mean = ei_classifier_anom_mean,
+    .threshold = 0,
+    .quantized = 1,
+    .compiled = 1,
+    .graph_config = (void*)&ei_config_tflite_graph_5
 };
 
-const size_t ei_learning_blocks_size = 2;
+const size_t ei_learning_blocks_size = 1;
+const uint32_t ei_learning_block_5_inputs[1] = { 4 };
+const uint32_t ei_learning_block_5_inputs_size = 1;
 const ei_learning_block_t ei_learning_blocks[ei_learning_blocks_size] = {
     {
+        5,
+        false,
         &run_nn_inference,
-        (void*)&ei_learning_block_config_0,
-    },
-    {
-        &run_kmeans_anomaly,
-        (void*)&ei_learning_block_config_1,
+        (void*)&ei_learning_block_config_5,
+        EI_CLASSIFIER_IMAGE_SCALING_NONE,
+        ei_learning_block_5_inputs,
+        ei_learning_block_5_inputs_size,
+        4
     },
 };
 
@@ -115,30 +115,30 @@ const ei_model_performance_calibration_t ei_calibration = {
     (int32_t)(EI_CLASSIFIER_RAW_SAMPLE_COUNT / ((EI_CLASSIFIER_FREQUENCY > 0) ? EI_CLASSIFIER_FREQUENCY : 1)) * 500, /* Half of model window */
     0   /* Don't use flags */
 };
+const ei_object_detection_nms_config_t ei_object_detection_nms = {
+    0.0f, /* NMS confidence threshold */
+    0.2f  /* NMS IOU threshold */
+};
 
-
-const ei_impulse_t impulse_1033_6 = {
-    .project_id = 1033,
+const ei_impulse_t impulse_424952_0 = {
+    .project_id = 424952,
     .project_owner = "Edge Impulse Profiling",
-    .project_name = "gestures-large",
-    .deploy_version = 6,
+    .project_name = "rp2040-project",
+    .deploy_version = 2,
 
-    .nn_input_frame_size = 27,
-    .raw_sample_count = 125,
+    .nn_input_frame_size = 39,
+    .raw_sample_count = 200,
     .raw_samples_per_frame = 3,
-    .dsp_input_frame_size = 125 * 3,
+    .dsp_input_frame_size = 200 * 3,
     .input_width = 0,
     .input_height = 0,
     .input_frames = 0,
-    .interval_ms = 16,
-    .frequency = 62.5,
+    .interval_ms = 10,
+    .frequency = 100,
     .dsp_blocks_size = ei_dsp_blocks_size,
     .dsp_blocks = ei_dsp_blocks,
     
-    .object_detection = 0,
     .object_detection_count = 0,
-    .object_detection_threshold = 0,
-    .object_detection_last_layer = EI_CLASSIFIER_LAST_LAYER_UNKNOWN,
     .fomo_output_size = 0,
     
     .tflite_output_features_count = 4,
@@ -146,22 +146,20 @@ const ei_impulse_t impulse_1033_6 = {
     .learning_blocks = ei_learning_blocks,
 
     .inferencing_engine = EI_CLASSIFIER_TFLITE,
-    
-    .quantized = 1,
-    
-    .compiled = 1,
 
     .sensor = EI_CLASSIFIER_SENSOR_ACCELEROMETER,
     .fusion_string = "accX + accY + accZ",
-    .slice_size = (125/4),
+    .slice_size = (200/4),
     .slices_per_model_window = 4,
 
-    .has_anomaly = 1,
+    .has_anomaly = EI_ANOMALY_TYPE_UNKNOWN,
     .label_count = 4,
     .calibration = ei_calibration,
-    .categories = ei_classifier_inferencing_categories
+    .categories = ei_classifier_inferencing_categories,
+    .object_detection_nms = ei_object_detection_nms
 };
 
-const ei_impulse_t ei_default_impulse = impulse_1033_6;
+ei_impulse_handle_t impulse_handle_424952_0 = ei_impulse_handle_t( &impulse_424952_0 );
+ei_impulse_handle_t& ei_default_impulse = impulse_handle_424952_0;
 
 #endif // _EI_CLASSIFIER_MODEL_METADATA_H_
diff --git a/src/main.cpp b/src/main.cpp
index ce9c4aa..6844e2e 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -35,6 +35,7 @@
 
 #include "ei_dht11sensor.h"
 #include "ei_inertialsensor.h"
+#include "ei_motionsensor.h"
 #include "ei_analogsensor.h"
 #include "ei_ultrasonicsensor.h"
 
@@ -69,6 +70,11 @@ void ei_init(void)
         ei_printf("Inertial sensor communication error occurred\r\n");
     }
 
+    /* Setup the motion sensor */
+    if (ei_motion_sensor_init() == false) {
+        ei_printf("Motion sensor communication error occurred\r\n");
+    }
+
     /* Setup the temp&humidity sensor */
     if (ei_dht11_sensor_init() == false) {
         ei_printf("DHT11 initialization failed\r\n");
@@ -100,17 +106,16 @@ void ei_init(void)
 
 void ei_main(void *pvParameters)
 {
-
     /* Initialize Edge Impulse sensors and commands */
     ei_init();
 
     while(true) {
-        /* handle command comming from uart */
+        /* handle command coming from uart */
         char data = ei_get_serial_byte();
 
         while (data != 0xFF) {
-            at->handle(data);
-            data = ei_get_serial_byte();
+           at->handle(data);
+           data = ei_get_serial_byte();
         }
     }
 }
@@ -125,11 +130,10 @@ int main(void)
     while (!tud_cdc_connected()) {
         tight_loop_contents();
     }
-    
+
     gpio_put(LED_PIN, 0);
 
-    /* Start the two tasks as described in the comments at the top of this
-    file. */
+    /* Start the two tasks as described in the comments at the top of this file. */
     xTaskCreate(ei_main,		/* The function that implements the task. */
                 "ei_main", 		/* The text name assigned to the task - for debug only as it is not used by the kernel. */
                 1024, 			/* The size of the stack to allocate to the task. */
diff --git a/tflite-model/trained_model_compiled.cpp b/tflite-model/trained_model_compiled.cpp
index c9167de..44dd28f 100644
--- a/tflite-model/trained_model_compiled.cpp
+++ b/tflite-model/trained_model_compiled.cpp
@@ -18,7 +18,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-// Generated on: 12.04.2023 11:45:30
+// Generated on: 13.06.2024 14:08:46
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -43,10 +43,16 @@ extern void ei_printf(const char *format, ...);
 #define ALIGN(X) __declspec(align(X))
 #elif defined __TASKING__
 #define ALIGN(X) __align(X)
+#elif defined __ICCARM__
+#define ALIGN(x) __attribute__((aligned(x)))
 #endif
 
 #ifndef EI_MAX_SCRATCH_BUFFER_COUNT
+#ifndef CONFIG_IDF_TARGET_ESP32S3
 #define EI_MAX_SCRATCH_BUFFER_COUNT 4
+#else
+#define EI_MAX_SCRATCH_BUFFER_COUNT 4
+#endif // CONFIG_IDF_TARGET_ESP32S3
 #endif // EI_MAX_SCRATCH_BUFFER_COUNT
 
 #ifndef EI_MAX_OVERFLOW_BUFFER_COUNT
@@ -60,9 +66,9 @@ using namespace tflite::ops::micro;
 namespace {
 
 #if defined(EI_CLASSIFIER_ALLOCATION_STATIC_HIMAX) || defined(EI_CLASSIFIER_ALLOCATION_STATIC_HIMAX_GNU)
-constexpr int kTensorArenaSize = 1456;
+constexpr int kTensorArenaSize = 1408;
 #else
-constexpr int kTensorArenaSize = 432;
+constexpr int kTensorArenaSize = 384;
 #endif
 
 #if defined(EI_CLASSIFIER_ALLOCATION_STATIC)
@@ -84,9 +90,11 @@ static uint8_t* current_location;
 template <int SZ, class T> struct TfArray {
   int sz; T elem[SZ];
 };
+
 enum used_operators_e {
   OP_FULLY_CONNECTED, OP_SOFTMAX,  OP_LAST
 };
+
 struct TensorInfo_t { // subset of TfLiteTensor used for initialization from constant memory
   TfLiteAllocationType allocation_type;
   TfLiteType type;
@@ -95,12 +103,6 @@ struct TensorInfo_t { // subset of TfLiteTensor used for initialization from con
   size_t bytes;
   TfLiteQuantization quantization;
 };
-struct NodeInfo_t { // subset of TfLiteNode used for initialization from constant memory
-  struct TfLiteIntArray* inputs;
-  struct TfLiteIntArray* outputs;
-  void* builtin_data;
-  used_operators_e used_op_index;
-};
 
 typedef struct {
   TfLiteTensor tensor;
@@ -118,323 +120,162 @@ static TfLiteTensorWithIndex tflTensors[MAX_TFL_TENSOR_COUNT];
 static const int MAX_TFL_EVAL_COUNT = 4;
 static TfLiteEvalTensorWithIndex tflEvalTensors[MAX_TFL_EVAL_COUNT];
 TfLiteRegistration registrations[OP_LAST];
-TfLiteNode tflNodes[6];
 
-const TfArray<2, int> tensor_dimension0 = { 2, { 1,27 } };
-const TfArray<1, float> quant0_scale = { 1, { 37.241115570068359, } };
-const TfArray<1, int> quant0_zero = { 1, { -128 } };
+namespace g0 {
+const TfArray<2, int> tensor_dimension0 = { 2, { 1,39 } };
+const TfArray<1, float> quant0_scale = { 1, { 0.27902525663375854, } };
+const TfArray<1, int> quant0_zero = { 1, { -92 } };
 const TfLiteAffineQuantization quant0 = { (TfLiteFloatArray*)&quant0_scale, (TfLiteIntArray*)&quant0_zero, 0 };
-const ALIGN(16) int8_t tensor_data1[100*27] = { 
-  -44, 9, 38, 39, 37, -30, -36, -27, 26, 14, 8, 8, 5, -7, 42, 41, 28, 13, 34, -10, -53, -45, 6, -4, 32, -51, 36, 
-  -7, 28, -8, -26, -14, -24, 40, 43, -32, -63, 27, -31, 34, -29, -51, 1, 20, -17, 26, 24, -7, 41, -22, -35, 32, -42, -9, 
-  14, 24, 2, -58, 22, 38, -36, 16, 44, 28, 62, 34, 16, -30, 37, 32, 27, 52, 13, -54, 64, 34, -5, -1, 46, -20, 22, 
-  -42, 15, 6, -20, -47, 46, 6, 44, 9, -31, 30, -38, -40, 13, 22, -37, 22, 23, -5, -44, -2, 10, 16, 28, 15, -39, -30, 
-  19, 64, 14, -7, 4, -21, 7, -11, 27, 30, 18, -30, -41, 3, -1, 27, -1, 5, -16, -56, 35, 5, 15, 4, -33, 38, -24, 
-  21, 18, -21, 6, -26, 19, 18, 26, -6, 23, -6, 31, -19, -5, -49, -48, -48, 37, 9, 2, 1, -29, -2, -31, -40, 21, -22, 
-  -39, 10, -49, 9, 8, 1, 20, -23, 36, -6, 21, 23, 21, -44, -43, -27, 20, -29, -20, -21, 34, -44, 36, 22, 26, 3, -29, 
-  1, 67, 42, -3, -2, -55, -56, 13, 3, 61, 19, 15, -7, 17, -34, 35, 57, 20, -23, -76, 5, 6, 37, 23, 9, 49, 1, 
-  12, 20, -28, -32, -11, 37, -25, 10, 18, 10, -72, 21, -10, 37, 14, 42, -16, -1, 36, 16, 37, 17, -6, -36, -24, -38, -19, 
-  -9, -7, 4, -25, 25, -40, -44, 35, -41, -41, -12, -43, -27, 36, 3, 0, -5, 23, -52, -2, 4, -20, -42, -3, 8, -19, 19, 
-  -62, -25, 0, -30, -29, 12, 43, 6, -32, 6, 43, -23, -23, -30, -29, -27, 28, 6, 14, 7, -72, 19, 0, -17, 27, 22, -48, 
-  43, 64, -9, 17, 13, -36, -46, -35, -20, 11, -1, -32, -15, 33, -29, 19, -27, -5, 5, -80, 10, 25, -1, -10, 26, 3, 38, 
-  7, -31, -18, -44, -46, -6, 34, -39, -12, -34, -20, 16, -38, 27, -50, 27, -5, 29, 10, 80, -15, 28, -37, 33, -35, 13, -21, 
-  7, -5, 34, 10, -50, 39, -45, 25, -15, 1, -18, 14, 0, 28, 37, -38, 11, 18, -3, -34, -5, -29, 36, -27, -22, 14, -28, 
-  -31, -7, -46, -13, 6, 0, 6, -49, -2, 1, -28, -32, -44, 8, 34, 16, 20, 28, 12, 3, -34, -37, -52, -24, -16, -41, -38, 
-  -36, 46, -39, -38, 9, -15, -44, -19, 41, 11, 17, 18, 23, 22, 37, 26, -8, -59, -45, -5, -3, 38, 38, -27, 8, -10, -28, 
-  -48, -2, -12, 16, -32, 15, -3, 3, 2, 5, 36, -5, -23, -23, -25, -49, 19, 16, -58, -16, 36, 1, 24, -38, 5, -12, 28, 
-  -7, 8, 31, -23, -50, -10, 25, -38, -34, 63, -25, -20, 31, -15, 35, 41, 47, 1, 24, -79, 97, -15, 30, 20, -8, -22, -18, 
-  -36, 42, -25, 28, 23, -51, 18, 27, -25, 38, 12, -28, -2, 12, 10, -32, -39, -32, -25, 11, 20, 18, -14, 15, 14, -25, 24, 
-  -22, -24, -22, 17, -17, 43, -48, -1, -39, -5, 77, -8, 10, 37, 33, 22, -16, 48, -30, 73, -34, -26, -33, -60, -34, 26, -59, 
-  -45, -46, -32, 43, -44, -49, -47, -32, 37, -30, 38, 15, -17, 12, -38, 22, 11, -29, -56, 27, -48, 14, -12, 18, 21, 23, -73, 
-  5, 41, -14, -27, -29, 9, 41, -17, 6, 5, 64, -37, 2, 8, 20, -41, -39, -40, -11, 8, 44, -45, -4, -46, -7, -56, -39, 
-  -45, -53, -18, -44, -9, 4, 35, 10, -9, 37, 2, 33, -54, -27, 18, -36, -24, -6, 20, -4, 43, -30, 13, 29, 37, -12, -5, 
-  -14, -33, -42, -15, -32, 1, 34, 10, -11, 35, -28, 26, -27, -16, -19, -42, -28, 35, -32, -37, -29, -4, -30, -5, -47, -32, -22, 
-  30, -15, 9, -23, -19, 41, -27, 28, -41, 43, -55, 33, 9, 7, -3, 16, 26, -13, 29, -45, 8, -33, 35, 31, 0, 56, 17, 
-  -29, 4, -25, -25, -42, 1, 39, -9, 7, -47, -6, 9, 1, 27, -17, 17, 5, -15, 3, 49, -27, -3, 33, 2, -13, 13, 16, 
-  33, 21, -13, -19, -13, -33, -27, 4, -11, 38, -17, 0, -15, 12, 23, 12, 7, -22, -19, 34, -12, 20, 17, -52, -22, 28, -48, 
-  39, 4, -19, -3, -44, 18, 22, -57, -54, 19, 12, 7, 22, 13, 31, 1, 40, 25, -47, -41, -7, -11, -5, -6, 33, -29, -49, 
-  20, -32, 22, 7, -16, -40, 16, -19, -37, 38, -29, -57, 32, -50, -37, 18, -13, 21, -6, -3, -2, 4, -15, 36, 10, -20, -23, 
-  6, -15, -37, 14, -32, -13, 18, -8, -15, 23, 88, -49, -16, 44, -23, 27, -49, -12, 7, 20, -4, -36, 5, 36, -19, 7, 30, 
-  -55, 1, 48, -33, -2, -44, -19, -24, -7, 7, -34, 27, 10, 34, -15, -38, 46, 29, -23, 57, -43, 52, 17, -41, -15, -21, -32, 
-  37, 47, 56, 7, -19, -18, 17, 56, -8, -36, 23, 45, -6, 32, -23, -29, -11, 35, 46, -27, 12, -37, -9, -18, 47, 11, 49, 
-  34, 1, -43, 22, -11, -37, -17, -28, 12, 31, -36, 1, -21, 42, -38, 4, -5, -23, 13, 11, -30, 15, 28, 18, -2, 29, -23, 
-  17, 25, 57, -28, -20, 25, 52, 51, 0, -27, -20, 21, 2, 15, -48, 14, -23, 30, 34, 14, 10, 24, -40, -30, 6, -13, 7, 
-  -50, 34, 14, -30, 16, -46, -13, 7, -48, 43, -21, 8, 28, 24, -9, 43, -32, -5, 50, -54, -18, -25, 20, 17, -19, -48, -16, 
-  21, -43, 12, -6, -25, 25, -44, -53, -38, -75, 30, 7, -32, -35, -28, -28, 37, -40, -9, -33, -70, -20, 27, -17, 54, 11, -8, 
-  41, -6, -38, 32, -44, -11, -10, -5, 47, 23, 40, -38, -16, -8, -38, 38, 46, 1, 38, 40, 28, -46, -26, -45, -48, -20, 28, 
-  6, 21, 33, -3, -10, -29, 11, 48, 32, 33, 16, -1, -12, -16, 34, -39, -46, 40, -35, 23, 65, -12, 39, 4, 20, -35, 9, 
-  -9, -5, -33, -16, -41, 37, 19, 18, -47, -37, -58, -7, -14, -37, -25, -25, 24, 3, 35, 20, -45, -33, 42, 39, -13, -48, -46, 
-  19, 5, 27, 10, 26, -1, -34, 36, -19, -19, 38, 31, -38, 41, -34, 14, 29, -14, -10, 25, 3, 0, -48, -28, -8, -37, 35, 
-  -21, -14, -16, -20, 39, 14, 5, 38, 31, 50, -81, 2, -19, 26, -1, 50, 25, 23, 38, -61, 111, -24, -10, 10, 31, -3, 5, 
-  -13, 40, 0, -33, -39, 22, -47, 15, 1, -21, -33, 71, -23, -38, -12, -34, -49, 22, -43, 44, -20, 17, 23, -43, 12, 31, -54, 
-  -30, 11, -22, -20, -32, -13, -39, 14, -3, -5, 6, 21, -51, 3, 0, -5, -33, 24, -38, -46, 92, 39, 12, -30, 32, -41, -12, 
-  -57, -12, -17, -12, 31, -26, -7, -25, -20, -52, -13, -10, -12, -51, -10, 12, -36, -34, -27, -7, -81, -28, -14, 14, 56, 45, 32, 
-  -29, -18, -7, -21, -18, 20, -24, 24, -31, 35, 11, -35, -16, -32, -6, 10, 26, -24, 19, 22, -35, -13, -21, 49, -38, -41, 6, 
-  -11, 4, 32, 17, -9, -12, 9, 14, 44, 17, 10, -34, 41, 17, -29, 42, 3, 9, -48, 56, 2, -31, 28, -22, 9, -29, -25, 
-  -10, -9, -59, 23, 40, 6, -33, 28, -45, 40, 22, 12, 8, -13, -11, 9, -22, 34, -1, -27, 20, -45, 25, -45, 1, -19, -12, 
-  -58, -55, -41, -26, -51, 13, -41, 4, 21, 14, -27, 48, 16, -32, -30, -6, -14, -45, -74, 27, -127, 31, 33, 40, -45, -21, -50, 
-  15, -15, 4, -10, -11, 9, -23, 2, -36, 20, 11, 16, -28, 4, 19, 22, 30, 16, -50, -44, 21, -43, -40, -51, 36, -24, -17, 
-  7, -16, -17, 40, -4, -22, 3, 38, 29, -9, -22, -13, 44, -27, -25, 32, -24, -20, 27, -44, 21, -30, 13, -8, -21, -8, 42, 
-  -52, -3, 21, 27, 24, 24, -43, 8, 38, 21, -20, 62, 30, -16, -32, -48, -35, -30, 11, -7, 2, 44, -36, 23, 34, -21, -13, 
-  -12, 17, -12, 23, -7, 35, -36, 17, -58, -10, 24, 1, 25, -30, -5, 11, 13, 40, 0, 52, -11, 9, -39, 53, 33, -52, 18, 
-  9, -21, -28, -5, 40, 10, -21, -7, 5, -11, -18, 3, 15, 40, 12, 19, 52, -6, -33, 34, -27, -6, 10, 21, -8, -34, -4, 
-  -21, -11, 25, -24, 19, 12, 19, -28, 34, -16, -7, 57, 48, 31, -1, 2, 12, -31, -52, 54, -37, -9, 29, -54, -42, -6, 34, 
-  41, 34, -35, -26, -31, -3, 38, 0, 29, -56, 28, 12, -18, 36, 30, -13, -27, 17, 25, -12, -43, -41, 10, 2, -21, -51, -15, 
-  -17, -21, -24, 30, 9, 15, -45, 28, 36, 47, 5, -28, -38, -44, 15, -18, 32, 25, -2, -26, 21, -3, -55, 40, 28, -12, -27, 
-  31, -8, -21, 23, -21, 6, -2, -16, -29, 39, 0, 9, -12, 16, 29, 14, 27, -30, 67, -16, 19, 19, -23, 20, -1, 26, -15, 
-  -57, 17, -13, -9, 24, 44, 6, 0, -44, 22, -26, -27, 29, -24, -52, -22, -31, -48, -67, 34, -52, 23, 36, -19, 21, -20, 43, 
-  57, -24, 19, -12, -22, -13, 42, 2, 24, 31, -14, -20, 44, 42, -5, -30, -22, 6, 30, 8, 34, -19, 34, 41, -24, -9, -11, 
-  23, -44, -12, -50, 39, 45, 51, -25, 42, 21, -5, -45, 15, 29, -2, 3, -18, 33, 9, 56, 22, -5, 40, 34, 20, -22, -19, 
-  17, -30, 27, 28, -37, -22, 34, 8, 26, -1, 43, -20, -19, 9, 5, 1, 42, -3, 59, -62, 54, 3, -3, -16, 47, 42, -8, 
-  -19, -40, -32, 34, -26, -27, -10, 31, -1, -4, -44, 6, 32, 27, -24, 35, -39, -13, -32, -63, 39, -16, -15, -37, -21, 28, -6, 
-  20, -34, 2, 37, 6, -12, 11, -25, -7, 57, -47, 29, 15, 22, 45, -30, 25, -25, -10, 15, -4, -3, -15, -39, 37, -41, 20, 
-  13, 13, 28, 39, -8, 31, -46, -43, 19, 23, 55, 39, 30, -50, -34, -28, 2, 35, 21, 24, 33, 18, -8, 0, -25, -55, -1, 
-  35, 5, -31, 8, 11, 46, 30, -31, -11, 36, -44, -23, -23, -11, 10, -1, -25, -31, -18, 50, 32, -13, 43, -36, -1, 3, 27, 
-  -12, -3, -30, 25, -18, 26, 44, 47, 23, -12, -26, 16, 2, 8, -45, 14, -2, -40, -4, -17, -9, -22, -11, 25, 34, -27, -32, 
-  -18, -17, -41, 12, -38, 15, -1, 11, -20, 19, -12, -10, -21, -24, 20, 36, -6, 41, 36, 66, -11, -36, 25, 17, 2, -7, -3, 
-  -20, 32, -31, 3, -12, 15, -30, 16, -35, -2, 71, -15, 33, 27, 40, 30, -15, -43, -58, -9, -3, 24, -23, -33, -36, -33, -12, 
-  -47, 23, -7, 32, -29, 34, 15, -45, 25, -28, -48, 41, 50, -1, 42, 39, -11, 23, -13, 30, -34, -9, -22, 21, -42, 21, 8, 
-  26, 32, 19, 37, 36, 39, -37, 1, 25, -2, 39, 50, -28, 35, 11, 29, 24, 16, -27, -46, -8, -38, -41, -29, -36, 4, -47, 
-  -45, 3, 32, -15, -45, 16, 0, -31, 39, -29, -18, -56, -41, -49, -31, 23, -44, 17, -20, 71, -17, 39, 13, 28, 10, -25, -45, 
-  -20, 34, -55, 36, 9, -27, -20, 28, -46, -28, 62, -64, -2, 9, 42, -38, 19, -32, -47, 12, -57, 30, -30, -36, 37, -40, 6, 
-  -42, -6, 49, -45, -11, 38, 38, -40, 30, -18, -42, 47, -4, 36, -5, -23, -13, -10, -9, -1, -62, 26, -24, 28, -32, -41, -6, 
-  26, 14, -28, 10, -25, 39, 17, 19, -19, -4, -19, -53, -7, -52, 14, -19, -11, -36, -13, 42, -57, -48, 1, -38, 1, 21, 28, 
-  -38, -4, -14, -20, 14, -25, 26, -45, -55, 12, 28, 48, -10, -43, -12, -19, 17, -48, -51, 49, -69, 37, 9, -18, 17, -30, -1, 
-  27, -7, -5, 26, -39, 3, 28, -2, 1, -3, 17, -22, -18, -42, -24, -31, -18, 31, -7, -16, -1, 17, 35, 39, 42, -27, 9, 
-  0, -10, -39, -42, 29, 44, 31, -43, -37, 38, 10, 11, 43, 2, -13, 16, -56, 15, 26, 42, -1, 19, -43, 5, 37, -29, 38, 
-  -46, -28, -14, -27, -41, 24, 3, -44, 34, 4, 40, 26, 34, -9, -46, 29, -44, -23, 36, 64, -60, -14, 7, 49, 45, 2, 51, 
-  24, -28, -7, -40, 39, 2, -2, 2, -30, 1, -24, -29, -36, -40, -57, 17, -53, -32, -42, 50, -28, 16, 43, 26, -27, -38, -16, 
-  16, 40, -27, 10, -45, -49, 19, 17, 15, -27, -22, 3, -39, 7, -26, -21, 6, -49, -49, 66, -10, -3, 41, 27, 23, 47, 34, 
-  -14, 12, 30, -46, -33, -21, 11, -11, -41, 4, -2, 21, -23, -41, -14, 28, -4, 34, 13, 34, 16, 15, -14, -7, 40, -29, 41, 
-  10, 35, -25, 22, 16, 4, -41, -21, -38, -28, 29, -32, -7, -7, 26, -41, -19, -45, -58, -29, -46, -29, 32, 2, 17, -26, -16, 
-  -8, 4, 24, -50, -21, -24, -30, -30, -15, -51, 39, 36, 15, 36, -21, -21, 30, 14, 37, 10, 12, -34, 44, -43, -47, -14, 20, 
-  8, 30, -3, 28, -11, 6, -14, -4, -36, 28, -53, -38, -21, 14, -2, 20, 21, -17, 21, 2, 4, -40, 5, -3, -15, -30, -39, 
-  20, 29, -46, 6, -8, 0, -28, -23, -45, 26, 33, -8, 38, -50, 14, 47, 30, 37, 60, -32, 30, 23, -20, 37, 44, 23, 28, 
-  -4, -40, 33, 36, -36, 13, -18, -30, -16, -8, -21, -5, 2, -39, -29, 20, 47, -15, -38, 30, 19, -2, 15, -50, 2, 46, 46, 
-  -21, 27, 44, -20, 10, -16, -20, -21, -10, 19, -46, 9, -23, -9, -21, 8, 21, 1, -46, 40, -4, -11, -31, -15, -34, -21, 28, 
-  -46, -38, -28, -21, -10, 27, 0, 13, -38, -2, 15, -28, -43, -27, 17, 34, -31, 42, -41, 33, -8, -14, -11, 8, -41, 28, 41, 
-  37, -48, -49, 5, -11, -1, 36, 15, -4, 45, -9, 7, 36, -47, -34, -4, 25, -1, 3, -21, -57, 19, 45, 27, 29, 53, 33, 
-  -49, -20, 0, -39, 27, 19, -20, 26, 11, 34, 31, 14, 41, 18, 17, -31, -49, 8, -29, -24, -19, 46, -29, 47, 13, -44, -37, 
-  41, 38, -4, -6, -29, -45, 39, -6, 12, -48, 32, 48, -42, -29, 12, 29, -35, 29, 4, 40, -47, 20, -42, -48, 17, -50, 17, 
-  -4, 41, -45, 27, 26, 24, 37, -25, -45, -5, 57, 1, 2, 22, -17, -16, -1, -43, -54, -27, 4, -6, 18, -33, 42, 24, -3, 
-  37, 45, 42, -10, -40, -20, -41, 44, -4, 20, 38, 20, -12, -39, -40, 19, -28, -14, 4, -38, -16, -5, -5, -47, -13, -5, 41, 
-  -5, 41, -1, 35, -51, -16, 39, -50, -28, 31, 41, -18, -4, 37, 35, 38, 16, 41, 7, 29, 12, 37, 7, -41, -47, -38, -38, 
-  24, -16, 0, -26, 30, 5, 40, -16, -17, 3, -16, 34, -45, -43, -18, -6, -18, -24, 24, -15, -40, 34, -18, -41, -8, 27, 21, 
-  -13, 22, 52, 31, -23, -4, 26, 31, -21, 32, -6, -38, -6, -16, -27, 46, -9, 43, 21, 36, -39, -7, 39, -32, -6, -20, 17, 
-  -6, 27, -33, -7, 22, 40, 6, 12, 15, 15, -19, -22, -16, 42, 8, 48, 19, -19, -9, -11, 87, -6, 5, 36, 26, -30, 44, 
-  28, -18, -25, 36, 36, 21, -44, 2, 42, -63, 63, -50, 40, 8, 18, -61, 6, -47, 26, 86, -58, -15, 25, 21, -25, -29, 46, 
-  46, 8, -32, -23, -33, -2, 24, 35, 52, 12, 27, -12, 27, 25, 42, -28, -43, 4, 26, -34, 29, 16, 8, 37, 50, -27, -27, 
-  4, 59, 57, -10, 27, -51, 12, -12, -31, -53, -15, 3, -35, -37, -47, 37, -53, 19, 31, 8, 91, 25, 8, 13, 19, 29, 19, 
-};
-const TfArray<2, int> tensor_dimension1 = { 2, { 100,27 } };
-const TfArray<1, float> quant1_scale = { 1, { 0.0044608833268284798, } };
+const ALIGN(16) int32_t tensor_data1[4] = { -41, -25, 29, 42, };
+const TfArray<1, int> tensor_dimension1 = { 1, { 4 } };
+const TfArray<1, float> quant1_scale = { 1, { 0.00151285738684237, } };
 const TfArray<1, int> quant1_zero = { 1, { 0 } };
 const TfLiteAffineQuantization quant1 = { (TfLiteFloatArray*)&quant1_scale, (TfLiteIntArray*)&quant1_zero, 0 };
-const ALIGN(16) int32_t tensor_data2[100] = { 0, -1, 2, 0, 1, 0, 0, 1, 0, 0, -1, 1, -1, 0, 0, 0, 0, 1, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, -1, 0, 0, 1, -2, 1, 0, 1, 0, 0, 0, 0, -1, 0, -1, 0, 1, -1, -1, -1, 0, -1, 1, 0, 0, -1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 1, -1, 1, 0, };
-const TfArray<1, int> tensor_dimension2 = { 1, { 100 } };
-const TfArray<1, float> quant2_scale = { 1, { 0.16612827777862549, } };
+const ALIGN(16) int8_t tensor_data2[4*10] = { 
+  44, 53, 109, -26, 0, -127, -109, -27, 67, 57, 
+  57, -38, -82, -91, -58, -61, 17, 31, -30, 10, 
+  -81, 89, -77, -92, 53, -41, 66, 40, 54, -57, 
+  1, -56, -24, 58, 27, -31, 72, -28, -58, -91, 
+};
+const TfArray<2, int> tensor_dimension2 = { 2, { 4,10 } };
+const TfArray<1, float> quant2_scale = { 1, { 0.0081148594617843628, } };
 const TfArray<1, int> quant2_zero = { 1, { 0 } };
 const TfLiteAffineQuantization quant2 = { (TfLiteFloatArray*)&quant2_scale, (TfLiteIntArray*)&quant2_zero, 0 };
-const ALIGN(16) int8_t tensor_data3[50*100] = { 
-  -44, -41, -24, -3, 55, -87, -32, 36, -21, 36, -53, -46, 50, -64, 30, 45, -56, 67, 21, -25, -47, 24, -35, -47, 43, -67, 23, 42, 63, 57, 64, 39, 42, -74, 42, 20, -46, 22, -47, 65, 53, -19, -23, 72, -41, -26, 52, 63, 25, -65, -17, -18, 47, -40, 62, -58, 55, 25, -69, 33, -57, 35, -24, -37, -72, -24, 18, 54, -26, 37, -10, 37, -57, 5, 42, -20, -15, -2, 12, 60, -23, 18, 70, -65, -27, 34, -4, 5, 55, 2, 26, -23, 64, 26, -9, -58, -20, -44, 68, -66, 
-  -54, 15, 3, -69, 13, 70, -56, 18, -71, 52, 69, 65, 63, 29, -57, 50, 39, 49, -12, 52, 23, 1, 37, 62, -47, 31, 37, 48, -15, 61, 26, -53, 20, -29, 59, 12, -66, 6, -84, -61, -1, -28, 78, 51, 22, 52, -22, -78, -30, -68, 59, -38, -39, -60, 46, -43, 58, 59, 44, 36, -12, 68, -39, 49, -17, -61, -1, -20, 62, -41, -41, 18, -44, -37, 18, 36, -36, 20, 56, -68, 25, -32, 23, -44, -32, -77, 0, 49, -32, 25, 21, 60, 18, 29, -65, -37, 56, -4, -9, 28, 
-  -57, -3, -43, -28, 65, 33, 35, 48, 41, -46, -39, 7, -33, 61, 47, 17, 49, -12, 34, -56, -7, 29, 52, -56, -5, -7, -42, 3, -69, -71, 10, -40, -5, 39, 65, 2, 60, 60, 31, -2, 30, -19, 3, 43, -12, 29, -37, 9, -56, -4, -28, 43, 23, -11, 36, -36, 60, 57, 34, 26, -49, 43, -29, 52, 45, 48, -61, -73, 5, 47, 58, -74, -26, 45, -37, -55, -46, -55, -79, 23, 41, 41, 56, 5, -21, 64, 44, -24, -65, -70, -71, 33, 9, 18, -77, -45, 22, -9, 55, -41, 
-  -48, 26, 56, 72, -44, -50, 23, -57, -60, -68, -38, 0, -4, 70, -34, 61, 36, 62, -61, -44, -56, 13, -26, -12, 69, -52, -17, 50, -15, 37, -80, 11, -65, 38, -37, -45, 11, -16, 48, 69, 72, -17, -8, -1, -65, -10, -45, 43, -50, 29, 40, -72, -44, 66, 65, 44, -16, -42, 67, 62, -2, -20, -59, -71, 6, 64, -19, 49, -67, -1, -35, 53, -47, -38, -4, -25, 41, -11, -62, 61, -73, -46, -44, -32, 64, 10, -16, 4, 51, -20, -45, -30, 10, -17, -25, -17, 23, 65, -4, 36, 
-  5, 62, -2, 11, 40, -36, 44, 30, 27, -75, 37, -40, 26, 34, 51, 49, 9, 58, -36, -5, -48, 6, 26, -33, -56, 1, -41, 19, -66, 21, -51, 54, -58, -45, -28, 70, 50, -4, -9, 31, 23, 26, 66, 9, 46, -75, 55, -67, 31, -53, -28, -31, -47, 46, 43, -11, -45, -36, -50, -59, -26, 38, -55, 66, -26, -8, 72, -55, -23, 4, 5, -71, -35, 32, 40, -54, 32, 22, 12, -12, -24, -2, 65, -15, -40, -27, 11, 40, -56, -65, 63, 41, 72, -47, -41, -5, 42, 62, 36, 19, 
-  45, -17, -80, 51, 34, 5, 59, 44, 46, 61, -61, 61, 50, 9, -14, 10, 2, -69, -59, -2, 38, -17, -41, 18, -36, 2, 64, -7, 12, -37, -43, -53, 20, -19, -49, 68, 37, -25, 2, -65, -72, -59, -38, -39, -29, -61, 14, 20, -4, -59, 15, 29, -49, -14, -29, 9, -19, 44, -29, 22, 46, -43, -16, -58, -30, -72, -9, 34, 28, -48, 7, -54, -54, 37, -67, -71, -62, -64, 16, 22, 58, 37, 63, -26, -55, 14, -2, -15, 25, 24, -10, -21, -65, 32, 49, 24, -80, -48, -49, 62, 
-  51, -17, -39, -10, 59, 42, -65, -50, 10, 42, 29, 30, -54, 25, 4, -17, 11, -73, 26, 9, -59, -57, -41, -62, -60, -36, -5, -64, 41, -66, 54, -12, 2, -60, -57, 8, -64, 66, 51, -15, 43, 51, 6, -16, -58, 0, 26, 43, 0, 56, 52, 16, -54, 9, -7, 19, -1, 22, -35, -13, -56, 16, 38, -15, -42, 21, 27, 54, 53, -68, 10, 59, 33, 73, 21, -57, -16, -22, -45, 63, -26, 47, 50, 47, 29, 41, -9, -14, -29, -26, 36, -10, 69, 36, -26, -10, 40, 8, 21, 16, 
-  -45, -1, 46, -24, 54, 41, 8, -54, 16, 57, 13, 34, 40, -8, 10, 51, 69, 63, -46, -22, -22, 19, 52, -23, -36, -35, -22, 56, 36, -21, 2, -48, 38, -10, -50, -13, 47, -42, -12, -30, 48, 62, -70, -62, 9, 24, 54, 65, 60, 1, -50, 66, 33, 57, -25, -50, 47, -16, 2, 76, -62, -22, -31, 3, 52, -57, 46, 69, 38, -19, 68, 67, 15, 73, 45, 26, -47, -43, 0, -48, 40, -18, -33, 62, -19, 13, 54, 39, -19, 41, -5, 35, -6, 36, -71, -21, 39, 69, -32, -20, 
-  -51, -3, 28, 32, -69, -20, 62, -5, -65, -50, 14, 26, -40, -17, -67, -59, -53, 10, -16, -26, -57, 45, -16, -71, -57, -18, 23, 36, 30, 56, -21, -1, -41, 67, 55, -36, 51, -11, -44, -70, 64, 68, 31, 65, -15, 35, -39, 61, 45, 2, 64, -4, 28, 20, -19, -34, 68, -39, -21, 20, -60, -13, -71, -15, -44, 25, -77, -20, -20, 71, -53, 17, 49, 38, -68, 46, -10, 33, -74, 41, -14, 13, 8, 30, 55, 53, -27, -26, -43, -73, -22, 1, 2, -47, 66, 23, -65, 23, -14, -55, 
-  -19, 40, -16, 13, 24, 2, 0, -38, 42, 11, 29, -33, -55, -19, -32, -65, 12, 24, 24, -25, 15, 34, 56, -17, -65, -71, -57, 10, 16, -14, -32, 55, 65, 7, 57, 29, -30, 55, -6, 4, 29, 43, 10, -47, 43, 26, -61, -46, 41, 3, 19, 24, -28, -19, 16, -66, 25, 6, -51, -26, 43, -8, -57, 12, 42, -62, 61, -60, -24, -45, 32, 23, -25, -38, 13, 37, 39, -42, 17, 4, 43, -44, -14, 30, 26, -55, -13, 31, 26, -54, 22, -34, 70, 19, 50, -7, -20, -73, -48, -76, 
-  -5, -29, -46, -69, -22, 63, -30, 76, 51, 28, -8, 6, -24, -71, -29, -2, -50, 1, 41, -4, -28, 60, 20, 31, 30, -28, -41, 39, 20, 40, -66, 30, 31, 13, 67, 43, -48, -32, -41, -41, 56, -23, -59, 0, -48, -8, -66, -32, -65, 44, 17, 24, 52, 64, 16, 8, -4, 61, 23, -9, -66, -23, 57, 63, -65, 66, -43, 34, -55, -28, -57, -22, -18, -28, 29, 70, -30, 47, 59, 3, -70, -28, -25, -19, 62, 32, -37, -20, 25, 27, -65, -50, 15, 14, -20, 58, 35, 12, -55, -70, 
-  10, 49, -53, -18, 61, -31, 6, 29, 26, 13, -25, -2, 12, 80, 78, 32, -21, 21, 61, 10, -32, 54, 9, -9, -70, 8, 30, -38, -44, 66, 7, 43, -44, 50, 33, 42, -18, -70, 49, 19, 3, 20, 38, 7, -1, 44, 59, -66, -3, -86, -28, -22, 11, 50, 72, -64, -75, 72, -43, -36, 20, -80, 51, -6, 6, 70, 36, -22, -21, -41, 41, -26, -2, -79, 39, -37, -13, -8, -13, 30, -45, 63, 62, 32, -11, 35, -30, -6, -9, 2, -74, 64, 15, 72, -68, -55, -40, -46, 69, -20, 
-  39, 29, -42, -21, -48, -37, -69, 68, 38, 19, 8, -15, -67, 4, -44, 62, 69, -56, 6, 21, 55, -49, -56, 7, 15, -40, 11, 12, -58, -47, 47, -14, 10, -26, -29, -56, 46, -52, -14, -35, -12, -75, 3, -21, 29, -51, -50, -9, 56, 18, -37, 59, -17, -57, 50, -81, -77, 23, 2, -21, 63, -59, -77, 49, 16, -37, -11, 54, 62, 14, -34, 28, 25, -65, -51, 70, 26, 1, 3, 63, -38, -38, 16, -12, 50, 63, -26, 39, -55, -37, 36, 34, 36, -45, -18, 50, -78, 63, -11, 17, 
-  -12, 22, 34, 28, 77, -126, 1, 72, -6, -26, -39, 32, -12, -64, 28, 30, 2, 10, 43, -34, -9, 34, 36, 47, 82, -8, -47, 60, -11, 34, -41, 62, -4, -66, -31, -16, 72, 38, 31, -27, 86, 30, 52, -51, -45, 53, -48, -46, 28, -15, -87, 54, -37, -68, -23, 36, 77, -58, -26, -13, 80, 22, -30, -96, -18, -62, -33, 3, -4, 0, -55, -46, -58, -96, -43, -50, 28, 17, -38, -3, 2, 15, -20, 20, 24, 42, -34, 24, -24, 35, 40, -1, 16, -17, 24, 46, 81, -39, 11, -4, 
-  14, -37, -67, 10, 74, 3, -73, 7, -7, -21, 30, -49, 72, 42, 18, 41, -5, 12, 5, -57, 14, -59, -65, 73, 46, 35, -4, -2, -36, -15, -32, 61, 55, -25, -21, -76, 25, 58, -13, -50, 65, -81, 38, -59, -67, -72, -24, -20, -32, -60, -29, 57, 40, -67, -58, 28, -11, -18, 39, -44, -31, 4, -49, 11, 60, -67, -22, -73, -17, -55, 62, 65, 7, -27, -55, 11, -28, -50, -56, -10, -21, -65, -13, 18, 11, 8, 30, 9, 55, 45, -3, 16, 49, 6, 58, -41, 78, -72, 25, 48, 
-  52, -4, 62, -67, 37, -58, 47, 48, 18, 27, 3, -31, 21, -24, -3, 7, 3, -53, 43, 57, -16, -73, -8, -35, 13, -3, 72, -58, 12, 67, 74, -23, -47, 36, 23, -50, -39, -4, -49, 41, -1, -2, -59, 4, -34, -46, 11, -35, 16, 8, -52, -78, -55, 28, -36, 53, -15, -43, 65, 5, 66, -17, -37, 55, -75, -67, 63, -63, -60, 7, 46, -34, 51, -54, -42, 46, 59, 9, -47, 50, 44, -80, -13, 64, -33, 43, 0, 15, 24, -1, -3, 15, 30, -16, -71, 19, -13, 22, -10, 14, 
-  -26, 1, 36, 22, 27, 2, -44, 15, -48, 59, -55, 41, -12, 59, -8, -5, 67, 37, 15, -78, -52, 54, -44, -13, -30, 7, 50, 40, 55, 12, 56, 61, -13, 64, 31, -3, -11, -58, -38, -44, 19, 22, -68, 45, 10, -53, -31, -11, -19, -16, 46, 10, 22, -19, 12, 61, -72, -30, 4, 7, 0, -72, 22, -73, -34, 51, -24, 46, 4, 32, 47, -69, -47, -52, 26, -48, 29, -22, 67, -14, 39, 12, -13, 65, -20, 57, 46, -55, 49, 16, -30, -24, 63, 18, -65, 18, -22, 17, 42, 28, 
-  63, -19, 65, -40, 24, 32, 43, -55, -4, -23, 0, 33, 23, -34, -22, 21, 47, -68, 71, -53, 64, 46, 37, 63, 17, -54, -60, -54, 72, 71, 9, -7, -19, -14, -23, 61, 37, 9, -4, 13, -39, 58, 49, 58, 11, -10, -58, 36, 55, -67, -39, 55, -35, 46, 33, 11, 18, -39, 6, 70, 23, 16, -4, -14, -6, 23, -34, 31, 13, -52, 70, -71, -5, 21, 62, -45, 26, 13, 52, 12, -52, -11, -9, -55, -39, -68, -39, 66, -55, -5, 2, 40, 9, 49, 44, -71, 25, -46, -1, -41, 
-  44, 40, 51, -50, 30, 0, -51, 19, 70, -10, 62, -17, -68, 11, -47, 24, -36, -43, 59, -54, -25, -26, 22, 51, 74, -3, -30, -62, -37, 32, -56, -24, 51, -20, 28, -69, -65, 78, 16, 25, 3, 59, 39, -71, -11, 31, 27, -62, -51, -63, -46, 66, 31, 17, 3, -36, -10, -46, -17, 43, 27, -37, 55, 17, -34, 22, -23, -4, 65, 20, 14, -64, -72, 66, 8, -51, -60, 63, -4, 47, 32, 57, 79, -70, -65, 29, -6, -21, 13, -58, -41, -39, -4, -38, -64, 42, -18, 56, 5, -9, 
-  25, 47, 17, 64, -11, -25, -70, 4, -26, -38, -35, 14, 19, -68, -41, 62, 64, -76, -2, 59, -36, 24, 40, 42, -41, -43, -51, -56, -33, -12, -40, -14, 44, 40, -38, -44, -56, 8, -34, -13, 86, -64, 5, -29, 24, 36, -59, -18, 31, -10, -69, -21, -64, 47, -53, -43, -76, -2, -23, -23, -33, -72, 57, 64, 39, -7, 1, 31, 49, -87, -78, 36, -68, 59, -39, 4, -53, -4, -37, 40, -61, 59, -41, 20, 37, -45, 16, 8, -19, 52, -7, -56, -6, -69, -36, -29, -47, -69, 27, 61, 
-  23, 14, 57, -3, -68, -36, 17, -6, -71, 13, 60, -8, -61, 63, 44, -1, 4, -23, 41, -65, -46, 65, 11, -66, 16, 44, 32, 34, -31, 7, 4, -19, -19, 10, 19, -21, 34, 6, 18, 53, -20, 36, -4, -64, -20, 73, -46, 27, -13, 49, -16, -9, -31, -64, 55, -59, -50, -21, 49, -2, -46, -65, -41, -15, -49, 16, -39, 73, 64, 59, 25, -56, -74, -19, -49, 44, -69, 47, -7, 33, 43, -7, -53, -29, 11, -9, -37, 52, -35, 25, 45, -10, -33, 21, -47, -49, -55, 47, -41, 42, 
-  -35, -15, -25, 39, -1, 48, 48, -33, 24, 56, 11, 23, -26, -47, -47, -79, 67, -60, -26, -24, -47, -51, 69, -62, 37, -44, 37, -15, -48, -11, -15, 32, 56, -28, 20, -17, -69, -68, -10, -49, -73, -50, -82, 39, 55, -2, -64, 55, -12, -19, 14, -59, -37, -3, -14, 53, -9, -54, -62, -10, 43, 75, -8, 59, -32, 47, -23, -6, -44, 53, -76, -72, -27, -18, -47, 69, -61, 48, -52, 67, -34, -24, -40, 9, -20, 63, -23, -86, 66, 67, 27, 3, -70, 54, -30, 73, 5, 26, -28, -25, 
-  18, -29, 20, 61, -16, -43, 13, 52, -3, -66, 27, -75, 14, -72, 36, -13, -4, 18, -44, -41, 74, 13, 28, -1, -6, 40, -2, 61, 43, -58, 2, 41, 27, -62, -44, 52, 57, 45, 67, 61, 43, -1, -67, 52, 23, 44, -19, -45, 6, -41, -61, 48, -73, -68, -65, -40, 7, -46, 22, -12, -9, -2, 17, 47, 22, 41, -56, -11, -69, -25, 23, -4, 6, -70, -43, 30, 10, 56, 42, 20, 32, 58, -8, 21, -20, 64, 54, 54, -30, 9, -21, -13, -38, -2, 16, 52, 17, 31, -23, -13, 
-  -46, 58, 0, -10, 48, 54, -34, 44, -49, 7, -42, -64, 18, 63, -5, -46, 9, 29, -7, 60, -29, 36, 50, -43, -25, -68, 43, -75, 70, -59, 64, 6, -23, 31, -64, 44, 24, 22, -69, 0, 21, -15, 68, 32, -12, 34, 7, -28, 53, -36, 9, -16, 11, 26, -67, -36, 36, 3, 20, -37, 32, -3, -46, -74, 7, 38, -1, -55, -36, -26, -5, 62, 65, -9, -70, -41, -16, 0, 72, -62, 66, -59, 43, 50, 5, -45, -42, -29, -36, 27, -62, 13, 70, -55, 9, 36, -65, 55, -40, -16, 
-  -42, -7, -65, -66, 29, -58, -41, -16, -51, -34, 37, -29, -66, 45, -16, -8, 63, 6, -41, 32, -67, -36, -22, 52, -47, -4, -14, 17, -60, 41, 30, 20, -36, 31, 56, 58, -34, -36, 73, 1, 30, -43, -3, 13, -45, 22, 66, 51, 69, 45, -25, -63, 29, -29, 33, 36, -78, 64, -50, 12, 5, -35, -70, 34, 18, 14, -11, -18, 9, 55, 44, -53, -65, -44, -32, 31, 32, 35, -67, -34, 63, -68, -39, -56, 33, -28, 0, 69, 22, -70, 59, 18, 31, -3, -39, -15, 1, 2, 51, 25, 
-  -57, -21, -60, -12, -15, -68, 44, 48, 29, 57, -21, -44, -41, -31, 11, 37, -13, -1, -31, 25, -41, -69, 50, -44, -35, 6, -41, 21, 33, 25, -18, -77, -63, 53, 12, 57, -19, 31, -28, 45, 3, -20, -18, -20, -51, -73, 34, 24, -26, -14, 52, -51, -49, -18, -65, -9, 47, 5, -43, -19, 16, 4, -41, 64, 36, -52, 29, 70, 9, -38, -55, 3, 7, 71, 21, -14, -37, 66, 44, 22, 32, -73, -77, 2, 26, 2, 24, 29, 26, -79, 33, -51, -74, -31, -1, 5, 17, 49, 28, 50, 
-  5, 11, -69, -14, -52, -63, 42, 31, 45, 57, -60, -34, 8, -25, 30, 0, 63, -22, 60, -43, 16, -3, 12, -35, -50, -28, -29, -59, -42, -12, -20, -57, 48, 9, 10, 50, -75, -26, 38, -68, 45, 74, 25, 6, 6, -71, -48, 8, -2, -17, -69, 4, 41, -53, 38, 70, -53, 48, -47, 49, 15, 21, 60, 12, -30, -15, -13, 29, 4, 6, 34, 48, -10, 43, 4, 26, 21, -65, -63, -67, -60, 3, 15, 61, 29, -48, 60, 4, -104, 31, -45, -27, -69, -68, -14, 45, 0, -8, -43, 5, 
-  -42, -49, -14, 12, 62, -3, 34, 20, -47, -11, -15, 17, 4, -7, -10, 8, 62, 72, 52, -69, -51, 47, -24, 33, -9, 5, 18, 55, -24, 19, 18, 54, 13, -3, -42, -28, 34, 41, 67, -59, -31, 20, -54, -55, 28, 47, 73, 32, -63, -47, -53, -56, -49, 28, -80, 65, -62, -38, 17, 31, -73, -68, -75, 37, 79, -12, 15, -74, -12, 19, 37, -57, -65, -19, -30, 40, 4, 58, -14, 26, 6, -29, -43, -64, -30, -56, -41, 58, -43, -61, 21, 51, -31, 29, 81, -32, -26, -53, 64, -5, 
-  2, -76, 70, 70, 40, 54, -25, -10, 3, -41, -2, 33, -12, -10, -20, -10, 13, -11, -38, 54, -24, 19, -36, -46, -28, 7, 2, -37, 5, 45, -45, 23, 7, 50, -56, -84, 56, 8, -52, 30, -38, 32, 14, -51, -45, 73, -61, -39, 61, 24, -9, -53, -46, 31, -12, -12, 71, -62, 45, -40, -30, 47, 70, -23, 65, -18, 10, 26, 20, -49, -24, -11, -58, -32, -27, 69, 2, 26, 12, 46, -51, -38, 62, -7, 11, -65, 21, -74, 9, 53, -36, -3, -2, 46, 54, -62, -17, 14, 51, -21, 
-  10, -28, -55, 65, -60, -60, 14, 10, 46, -45, 39, -26, -67, -43, 37, -72, -20, -74, -21, -25, -66, -66, 7, 20, 39, 63, -52, -19, 4, 15, 13, -61, 41, 42, 32, -14, -6, 39, -68, -11, -50, 52, -91, 59, -4, 25, -46, -68, 8, -14, 33, 54, -42, 49, -21, -35, -2, 16, 50, 40, -78, 47, -17, -31, -19, 23, 39, -53, 30, -55, -7, -66, -3, 38, 48, 69, 78, -29, 20, 24, -69, -38, 20, 63, 57, -48, 14, -47, -48, -51, -77, -48, 33, -1, -24, 71, 66, 69, -31, -30, 
-  20, -56, 55, -27, -16, -56, -34, 49, 62, -42, -7, 43, -58, -31, -74, -26, 62, 57, -24, -21, 17, -22, -53, 10, -22, 26, 48, 57, -66, -52, 68, 17, -61, -30, 10, -53, 12, -56, 21, -14, 61, 35, -1, -6, -44, -12, 31, 57, 3, -57, 41, -68, 22, 46, 1, -68, 3, -68, 27, 33, -18, 70, 40, 63, -66, -78, -30, 1, 21, -53, 67, -40, 31, 29, 67, 64, -27, 42, -65, 63, 47, -10, -9, -27, -56, -48, -19, -66, -3, -1, 44, -57, 5, -38, 59, -34, 16, -61, -55, 35, 
-  18, -17, -28, 33, 46, -5, 54, -28, 13, 13, 66, 27, -11, -19, 19, 33, 12, -33, 49, 52, 35, -64, 51, -38, 38, 65, 66, 29, -33, -45, 63, 30, -68, 16, -18, -22, -63, -33, -17, -34, -14, 60, -46, -55, -33, 75, -83, -46, -37, 16, 26, -72, 64, -33, 5, -87, -14, -40, 60, -41, -27, 31, -39, 36, -40, -32, 16, 8, 15, 56, 77, -66, 7, 10, 55, 36, 56, 20, 61, -24, -20, -33, -8, -8, 40, -72, 73, 63, -19, 60, 37, 55, 73, -42, -29, -22, -16, 21, -59, 33, 
-  -53, 55, -70, -66, 41, -7, -7, -7, 54, -4, -38, 1, 60, 38, -50, 35, 30, -8, -56, 68, -14, -46, -48, 28, -12, 47, -57, 65, 57, 52, -25, 4, 40, -14, -11, -60, 42, -65, -2, 61, -70, -32, 43, 64, 11, 66, -30, 43, -79, -65, 60, -45, 21, 16, -50, -37, 8, 43, 77, 56, 41, 57, -71, -65, 65, -29, 35, -43, -63, -55, 16, -44, -25, -22, 56, -31, 1, 61, -19, 65, -46, -69, -16, -3, -33, 36, 10, -40, 71, -9, -36, 23, 57, -36, -28, 10, -9, -11, 44, -47, 
-  -66, 71, -43, 13, 68, 17, -10, 24, 53, -4, 8, 21, 6, -28, -17, -52, -53, -69, 5, -30, 55, 31, -7, -73, 64, -32, 76, -51, 55, 18, 73, 32, 10, 80, 40, -39, -41, 20, -63, 49, -31, -17, -2, -47, -67, -36, 58, -8, 27, -67, 18, 12, -53, -24, -22, 16, -44, -43, -31, -19, -36, -30, 32, 71, -58, 38, 63, 24, 22, -36, 26, -8, 57, 23, -31, 3, 51, -74, -7, 43, -8, -28, 18, 30, 9, -8, -68, 57, -32, 13, -6, -57, -17, 46, -34, 71, -18, -23, -52, 65, 
-  -55, -47, 25, 68, 60, 26, -6, 55, 72, -67, -10, 36, -5, -73, -74, -8, 32, -6, -57, -56, 17, 34, 4, 53, 37, -23, -53, 28, -36, -24, -26, 66, -42, 59, 14, -3, 44, 41, -15, 16, 68, -73, -39, 63, -16, -28, -71, 32, -64, 48, -72, 21, -38, -61, 23, -9, -15, -67, 43, -42, -68, 6, 66, 39, -33, 56, -27, -46, -27, 50, -45, -18, -65, 21, 15, -5, 41, -26, -25, -31, 3, 66, 28, -73, -39, -26, 41, 27, -34, 69, 9, -5, 50, -74, -25, 68, -38, -10, -62, 23, 
-  3, 60, 31, -63, 11, -68, -28, 83, 61, -12, 45, -19, -45, 15, 53, -10, -59, -5, -13, -29, -53, -24, 61, 31, -43, -16, -37, 25, 64, 36, 42, -49, 33, -49, -62, -39, -71, -55, 24, 18, -7, 18, -21, 66, -61, 16, -14, -47, 50, -48, -75, 24, 60, -64, 16, 34, -51, 41, 34, -16, 45, 60, 42, 18, 53, -64, -63, -20, 6, -18, -40, 69, 32, -36, 30, 33, -51, 46, -13, 80, -12, -69, -31, 5, -32, -67, 45, -73, -58, 28, 19, -25, -25, -56, 4, 58, 47, -16, 2, -55, 
-  48, 26, -4, 24, 48, 58, 56, 44, -10, 8, -47, -3, 25, -74, 12, 32, -72, 69, 11, 5, -41, -29, 61, -47, -13, -48, -14, 52, 17, -15, 41, -1, -26, 2, 39, -32, -6, 63, 7, 7, -22, -72, 66, -44, -76, 15, -19, -69, -14, 53, 41, -28, -58, 21, 40, -31, -41, -61, -59, 33, 73, 5, -69, -62, 29, -3, 70, 55, 53, -76, -24, 2, -15, -91, -61, -25, 25, -1, -5, -36, -25, 16, 21, 48, 53, -42, 58, -29, 25, -27, 49, -56, 28, -80, -35, -38, -11, 12, 51, -42, 
-  -62, 57, -2, -27, -62, -49, -33, -1, 13, 2, 29, -25, 14, 14, 58, -60, -37, 50, -78, -53, -82, -21, -7, -62, -66, -12, -31, -66, -23, -48, -62, 32, -44, -6, 23, 21, 55, -21, -48, -69, -23, 26, -8, -41, -7, 5, -109, 33, 38, -50, -64, -20, 50, -72, -65, 41, -36, 51, -41, 71, -73, 60, 58, -18, 48, 72, 82, -16, -69, -63, 36, -7, -23, -86, -39, 71, 40, 4, -58, 5, 15, -31, -44, 21, -35, -1, -72, -62, 43, 48, -72, 32, 51, 30, -28, 17, -53, -46, 5, -62, 
-  30, -23, 76, 5, -5, 53, 9, -23, -47, 51, -82, 11, 52, 30, -57, -23, 40, 25, 45, -21, 31, -59, 51, -50, -37, 49, -42, -3, 6, -36, -30, -28, 37, 36, -29, -45, 56, 19, 17, 59, 54, -48, 73, -60, -9, 45, -65, -3, 45, 19, -60, 38, -33, -55, -51, 38, 44, -42, 69, 53, 45, -23, -26, -24, 43, 30, 46, -66, -15, -18, 42, -52, 4, 34, 3, 23, -66, 19, -2, -47, 21, -23, -44, -51, 53, 68, -31, -9, -54, -56, 35, 59, 40, 54, -49, -71, 5, -55, 53, 26, 
-  53, 48, -26, -25, 66, 19, 44, -59, 31, -21, -32, -13, 70, -13, -4, -75, 34, 57, -3, -3, 37, 47, 66, 14, -70, -20, -14, -72, -20, -71, 2, -73, -68, 9, -72, -60, 66, 24, 44, 11, -7, -68, -51, 9, 15, -16, -54, -37, 44, -55, 29, -33, 25, -12, 11, -21, 16, -53, 3, -36, 37, 13, 51, 1, -41, -10, 73, -72, -37, -31, 53, -7, -59, 29, -37, 37, 18, 72, 8, 14, 74, -49, 44, -66, 43, 20, 73, 12, 19, 67, -36, 9, 27, 68, 24, 34, -23, -28, 65, -71, 
-  59, -60, -24, 2, -36, -45, 74, 1, -56, -56, -36, -47, -62, 51, -28, 2, -78, 29, -22, -61, -21, -71, -52, 73, 6, 66, 9, -60, 37, 23, -69, -116, -1, 38, 75, -3, 26, -8, -13, -53, -99, -12, -1, 3, -65, -63, -32, -61, 50, 21, -12, -56, 41, 56, 36, -18, -22, -40, -9, -79, -26, 64, 25, 19, -49, 64, 44, -18, -12, -34, -74, -2, -73, 25, -63, -13, 10, 20, 8, 13, -25, 32, -33, 61, 23, 41, -18, 57, 28, 40, 6, -69, -1, -31, 13, -76, -86, -3, 60, 54, 
-  17, -52, -46, -50, -4, 36, 9, 36, 71, 6, 29, -27, -37, 29, 86, 65, -16, -62, -33, 7, 17, 8, 7, 61, 43, 38, 3, 42, 37, -50, 22, -13, 18, -67, -20, 32, 45, 20, 57, 15, 42, -48, -4, -59, 23, -29, -53, 79, -51, -5, 19, 11, -48, -55, 54, 40, -54, -51, 50, -54, 63, 4, 61, -31, 9, -24, 50, -59, -73, 70, 76, -54, -25, -1, 51, 66, -69, -23, -35, 47, -66, 34, 20, -36, 48, -5, -26, 21, 54, 39, 21, -27, 37, -16, -78, -52, 39, 59, -34, -50, 
-  46, 61, 20, -24, 40, 62, -41, -34, 46, -16, 68, -35, 26, 53, 22, -10, 18, 66, 66, -73, 68, 65, 55, 21, -18, 15, 73, 31, 54, -45, -19, -7, -50, 54, -43, -20, -14, -32, -71, 25, 15, 48, 55, 57, -64, 42, 47, 23, -60, -22, 63, -15, 30, 68, 49, -22, -39, -63, 36, 37, -70, 29, 11, 55, -56, 38, 12, 22, 18, 74, -46, -2, 29, 6, -30, -61, 29, -71, 6, -42, -73, 48, 12, -15, -24, -36, -51, -26, 13, 35, 44, 22, -34, 62, -33, -39, 47, -32, 35, 7, 
-  -15, -56, -15, -51, -7, 32, 11, -61, 34, 58, 27, -13, -29, 18, 6, 0, 44, -2, 63, -54, 23, 14, 21, 44, -38, -63, 23, 53, 7, 41, 4, 38, 62, 31, 22, 0, 37, -46, -47, -43, -27, -60, -62, -10, 71, -58, -23, -27, 53, 39, 36, -4, -32, -62, 54, -2, -68, -28, -33, 51, -78, -67, 60, -15, 67, 37, 39, 23, -54, -5, -51, 69, -22, -73, -71, 13, 1, 37, -43, 18, 13, -29, 58, -25, 63, 3, 27, -41, 14, 0, -12, 55, 30, 63, 3, -50, -48, -2, -22, -18, 
-  -73, 67, -14, 15, 92, -5, -50, 35, 27, 42, 63, -37, -4, -13, 35, -42, -9, 45, -3, -16, 53, 43, -33, 42, 24, 51, 50, -69, 49, -50, -7, 11, 2, 42, -39, 43, 41, -46, -48, -64, 53, 44, 65, 18, -46, -7, 23, -39, 40, 36, -27, 41, -66, -46, 41, 33, -55, 5, 30, -43, 8, 17, 52, 0, 32, -28, -15, 37, -8, -49, 11, 5, 29, -46, 64, 4, 43, 22, -65, -95, -22, -43, -11, 66, -12, -15, -42, 49, -13, -49, 14, -45, -46, -63, -62, -6, 45, 62, -48, -13, 
-  -49, -67, -37, 40, -44, -43, -13, -10, -8, 6, 72, -2, -50, 58, -82, -77, -23, 35, -15, 60, 3, -11, -47, -24, 12, -73, 42, -41, -5, -65, -15, 4, -33, 41, 47, -19, -2, -61, -47, 29, 10, 14, -107, 4, -72, 4, 23, -58, 48, 72, 47, 7, -66, 62, -24, -30, 65, -15, -72, 16, 35, 30, 44, -12, -50, -34, -10, -37, 71, 63, 14, 28, -34, -19, -55, 38, 59, -26, 37, 9, 70, 30, -47, 71, 19, -57, 68, 41, 70, 30, 29, 11, 31, 67, -55, -32, -58, -70, -30, -40, 
-  40, 60, -10, -32, -77, 37, -27, -59, -42, 52, -59, 40, -55, 47, -14, -54, -15, 27, 24, -30, 12, -52, -58, 14, -54, -45, -59, -40, 72, -65, -8, -75, 29, -58, 53, -10, -67, -52, 32, 64, 33, -28, -93, 1, -127, -13, 37, -37, 71, -57, 29, 35, -27, -52, 28, -31, 23, -25, 10, 7, -41, -41, -67, -13, -16, -50, 62, -63, -39, -58, 3, 19, -41, 52, 16, -34, -39, 73, 19, -13, 43, -39, 59, 69, -49, 66, -70, 42, 11, -70, 29, -4, -12, 34, 42, 53, 30, -14, 0, -22, 
-  -15, 49, -12, 37, -68, -50, -50, 47, -64, 18, 78, -14, 31, 2, 31, 23, 36, -26, 43, 47, 18, 16, -24, -24, -8, 67, 73, 10, -64, 41, 68, -24, -54, -5, -9, -19, -60, 68, 52, 44, -62, 4, -37, 4, 49, 8, -40, 80, 56, 29, -70, 47, -54, -23, -27, -49, -14, 44, 73, -66, -29, 8, 35, -10, -18, -55, 75, -35, -30, -23, 69, 71, 65, 42, -18, -29, -55, 15, -9, -2, 72, -16, 8, 13, 37, 57, 9, -52, 82, 39, -56, 12, 39, -60, -65, -18, -34, 69, -64, 69, 
-  63, -51, -15, 3, 42, -8, 38, -87, -3, -50, 1, 1, -27, -55, -62, 17, -55, 30, 34, -23, -6, -51, -16, 29, -41, 34, 44, 5, 59, 33, -31, -51, -72, -71, 14, -49, 18, 63, -26, 18, 13, 55, 64, -4, -34, -68, 28, -39, -30, -62, -12, -40, -46, 41, 22, -57, 38, -30, -6, -16, -67, -12, 59, -11, -38, -62, 9, -10, -72, 25, 23, -76, -3, 53, -15, -44, 61, -57, 32, -20, -60, 70, 54, 50, 62, 7, -16, -42, -43, -73, 13, -47, -55, -40, 4, -72, -27, -62, -43, -34, 
-  -53, -30, -61, 63, -39, -54, 28, -2, 34, 82, 6, -41, -20, -43, 61, -20, 5, -71, -41, -25, -73, -7, 35, 2, 55, -60, -17, 54, 47, 73, 33, -12, -60, -26, -44, -4, 34, 48, 23, 71, -65, -1, 24, -44, -35, 8, 73, 22, -55, -13, -52, -30, -36, -63, -61, -49, 23, -31, 15, 60, 1, -26, -60, 68, 64, 80, -18, -64, 78, -79, 9, 43, 43, 79, 18, 6, -54, -65, 55, 24, 48, 40, 51, 27, -7, 10, -73, -42, -70, -47, -20, -60, -28, 28, 40, 51, -40, -63, 70, -62, 
-};
-const TfArray<2, int> tensor_dimension3 = { 2, { 50,100 } };
-const TfArray<1, float> quant3_scale = { 1, { 0.0026967737358063459, } };
+const ALIGN(16) int32_t tensor_data3[10] = { 34, 138, 123, 167, -30, 143, 97, 72, -164, -79, };
+const TfArray<1, int> tensor_dimension3 = { 1, { 10 } };
+const TfArray<1, float> quant3_scale = { 1, { 0.00055093521950766444, } };
 const TfArray<1, int> quant3_zero = { 1, { 0 } };
 const TfLiteAffineQuantization quant3 = { (TfLiteFloatArray*)&quant3_scale, (TfLiteIntArray*)&quant3_zero, 0 };
-const ALIGN(16) int32_t tensor_data4[50] = { -3, 1, 5, 3, -1, -1, -2, -8, 1, -1, 3, -5, -3, 15, 7, 8, 5, -6, 2, 0, 3, -7, -4, -1, 0, 3, -2, -3, 8, -7, 8, 6, -9, 1, 0, 6, 4, 1, 7, 0, -3, -1, -2, 0, 0, 6, 0, -8, 0, 1, };
-const TfArray<1, int> tensor_dimension4 = { 1, { 50 } };
-const TfArray<1, float> quant4_scale = { 1, { 0.022363642230629921, } };
+const ALIGN(16) int8_t tensor_data4[10*20] = { 
+  38, -53, -39, -16, 83, 29, 96, -87, -4, -32, 6, 15, 55, 84, 47, -81, 7, -29, -39, 62, 
+  -46, -98, 71, 119, 56, -53, -33, -54, -59, -32, -21, -32, 38, 35, 74, 106, -23, 65, -40, -56, 
+  9, -64, 17, 61, -45, -8, -22, 40, -38, -68, 13, 30, -23, -74, 13, -89, 63, -81, -111, -67, 
+  64, 33, 86, -63, -59, 89, -28, -11, -21, 96, 57, 40, -49, -87, 3, -77, -29, -74, 94, 7, 
+  -32, 37, -54, -29, -64, 48, -29, 17, -68, 48, 61, 38, -18, 29, 19, -67, -38, 69, 50, 34, 
+  -54, 64, -34, 127, 4, -81, 11, 50, -75, -32, 95, 52, 1, 96, -10, 74, 48, 84, 25, -21, 
+  80, 56, -45, -50, 59, 62, -2, -1, -21, 44, 59, 27, -59, -1, -5, 4, 46, -15, -12, -17, 
+  -13, 51, 22, 97, 3, 30, 49, -73, 87, -49, -24, 78, 55, 15, -76, 73, 85, 30, -10, 107, 
+  30, 11, 19, 5, 50, 5, -87, -36, -25, -6, 62, 11, -9, -74, -46, 51, -70, 40, -42, -25, 
+  24, -69, -6, -10, 65, 69, -3, 6, 54, -115, -60, 34, -51, -44, 58, -32, -9, 64, -2, 97, 
+};
+const TfArray<2, int> tensor_dimension4 = { 2, { 10,20 } };
+const TfArray<1, float> quant4_scale = { 1, { 0.0053707375191152096, } };
 const TfArray<1, int> quant4_zero = { 1, { 0 } };
 const TfLiteAffineQuantization quant4 = { (TfLiteFloatArray*)&quant4_scale, (TfLiteIntArray*)&quant4_zero, 0 };
-const ALIGN(16) int8_t tensor_data5[20*50] = { 
-  -11, -9, 12, 0, -28, 32, 11, 2, -18, -36, -3, -25, 12, 14, 20, -24, -7, 8, -25, 8, 22, 29, -15, 1, -5, -1, -37, 40, -24, 15, -2, 31, -14, 13, -30, 5, -2, 28, 35, 12, 43, 23, -5, -14, 11, -9, 36, 35, -38, -12, 
-  23, 13, -32, 14, -14, -7, -6, 31, -17, 29, 0, 38, 24, -5, -25, 22, 6, 35, -36, -32, 34, 10, 36, 9, -31, 20, 19, 3, -22, 1, 9, 36, -10, -25, 4, -37, -13, -35, -31, -15, 38, 13, 32, -14, -39, 3, 27, 24, -31, -36, 
-  -39, 17, 38, 38, 7, 4, -20, 36, 2, -20, -35, -19, -21, 26, 23, 17, 3, -2, -14, 20, -14, 16, 25, -5, -12, -21, 31, -9, 29, 19, -37, -6, 38, 4, 30, 12, 28, 19, -14, -38, 22, -33, -24, -10, -21, -23, 3, -25, 32, -26, 
-  -39, -7, -27, 10, 25, 15, -40, -9, 13, -36, 31, 17, 24, 27, 21, -40, 26, -19, -7, -22, -18, -33, -38, -22, 21, -21, 40, -4, -42, -13, -16, -18, 33, -39, -1, -21, -24, -34, -34, 25, 23, -40, -14, -17, 31, -39, 36, -1, 34, -16, 
-  -3, -31, -7, 23, -7, 35, 2, -18, 8, -35, 37, 29, -1, 31, 17, 11, 12, -32, 37, -34, -23, -34, 14, 7, -8, -10, 24, 29, 20, -36, -27, -27, -16, -20, 13, 16, 28, 6, 41, 24, 16, 42, -31, 35, -37, -30, 7, 34, 26, 18, 
-  -5, 26, -17, -31, 22, -30, 0, 10, 36, 21, 15, -13, 11, 4, -11, 33, 32, -32, 5, 35, 34, 14, -39, -1, 29, 14, 12, -12, 0, -19, 25, 5, 2, 17, -23, 33, 7, -19, 24, 39, 5, -38, 2, -13, 23, 14, 10, -7, 39, 26, 
-  -39, 4, 8, 26, 12, 25, 26, -40, -17, 6, -21, -3, 29, -5, 17, -16, 9, 20, -14, -20, 5, -8, 37, 38, 13, 16, -32, 14, -2, 0, 24, -7, -11, 27, 18, 29, 24, -13, 5, 4, 15, -4, 29, 12, -37, 37, 37, -30, -30, -1, 
-  34, -4, -23, -19, 40, -26, -3, -24, -14, 11, 15, -25, 4, -25, -23, 32, 16, 3, 13, 24, 21, -36, -40, 11, 33, -15, 15, -34, -20, -34, 32, -30, -15, 22, 19, 34, 5, 1, 14, -19, 19, 35, 23, -30, -24, 19, 14, -37, -35, -14, 
-  4, 6, -30, -20, -37, -31, -3, -38, -31, -6, 13, -22, 1, 26, -28, 10, -13, 41, -25, -37, -14, -4, 34, -26, -13, 21, -28, -33, -17, -33, -29, 0, 13, -31, -11, 40, -30, 27, 2, 34, 8, 28, -1, -12, -23, -63, -19, 17, -6, -19, 
-  32, -17, -30, 31, 5, 10, -21, 6, 6, 14, 29, 26, -17, -12, 37, 34, 35, 27, -36, -12, 37, 9, -1, 20, -29, -2, -6, -22, 6, 3, -32, 33, 11, -17, -15, -10, -11, -12, 12, -18, -33, 28, -24, 6, -22, -33, 18, -23, -21, -10, 
-  -34, -9, -39, -38, -50, 7, -4, 16, -8, 37, 10, 19, 14, 30, -67, 35, -27, -21, 26, -20, -2, 26, -36, -13, 20, -14, 24, 14, -8, -24, 26, 15, -51, -73, -40, -74, 3, -40, 2, 11, 4, -27, -38, -10, -62, -3, 12, -31, 34, -3, 
-  21, -35, 16, 4, -11, 33, 24, -13, -15, -34, 4, -25, -36, -14, -4, -21, 14, -12, 27, -35, 4, -11, 33, -13, -13, -10, -18, -24, -42, 43, -15, -21, 38, 16, -5, -13, 17, 37, 1, 37, -38, -24, -3, 31, -12, 34, -18, 14, -35, 28, 
-  16, 4, 29, 5, 37, 21, 2, -12, 19, 23, 1, -32, 36, 31, 29, -4, -36, 17, -15, 24, -32, -36, 25, -4, -17, 32, -27, -35, -4, -18, -31, 27, -15, -39, 22, -17, 17, 10, -5, 31, -14, -37, 15, 12, -35, -2, 33, -34, -34, -16, 
-  20, 23, 37, -5, -10, -1, -19, -11, 33, 3, -5, 1, -21, 42, 1, 28, 30, 20, 19, 31, 36, -36, 3, 14, -35, 32, -39, -32, 7, -27, 26, 38, -38, 35, 23, -8, 19, -7, 34, 29, 9, 0, 23, -15, -29, -8, 12, -16, -28, -16, 
-  24, 11, -26, -38, 5, -37, -38, 25, -28, 18, 21, -36, 33, -23, 18, 8, 32, 14, 6, 18, 9, 1, -1, -28, -30, -21, -30, 27, 30, 42, -24, 40, 24, -9, 41, -26, 15, -30, 3, 32, -18, -4, -12, -19, -3, -37, 33, 7, -9, -30, 
-  -2, -39, 4, -5, 12, -8, -37, -10, 5, 28, -9, -22, -28, 6, 36, -21, 15, 24, 19, -42, 29, 29, 41, -35, 29, 37, 4, -30, -53, -21, -24, -33, 18, -29, 38, 21, -18, -9, 5, -6, 18, 28, -34, -19, -7, -4, -14, 25, 31, -36, 
-  -3, 9, 35, 33, 9, 24, 30, -13, 0, 35, -21, -16, 10, 28, -6, 38, -35, -15, -33, 24, 39, 5, 34, 23, -36, 16, -15, 22, -11, 33, -28, -39, -2, -5, -5, -34, 37, -28, 38, -17, 25, -43, 33, -36, -20, 13, -35, -39, 1, 7, 
-  8, 35, -30, -8, 22, 19, -36, -23, -37, 31, -30, 14, -16, 10, 36, -12, -33, 17, 24, -17, 16, -26, 33, 24, 35, 41, 25, 16, 23, 37, 2, -39, -40, -16, 5, 13, 18, 15, -19, 23, -13, 37, 8, 21, 4, 4, 23, -17, -39, 25, 
-  11, -33, 31, 24, -127, 13, -32, -23, 2, 6, 11, -20, -43, 7, 4, -24, -18, -1, 3, -1, -39, -33, 25, 33, 45, 26, -24, 8, -7, 14, -17, 4, -7, 17, -32, -20, -41, -11, 19, 18, -8, -42, 17, 46, -49, 19, -72, 2, 7, -35, 
-  -2, 36, -7, -35, 39, -19, -2, 10, -30, 0, -28, 39, 24, 8, -21, -36, -28, 29, 15, -20, 8, 7, 25, 8, 27, -2, 6, -7, -8, 1, -3, 9, -18, 3, -25, 17, -37, 14, 40, 35, -15, -11, 6, 33, 21, -37, 3, 23, 5, -24, 
-};
-const TfArray<2, int> tensor_dimension5 = { 2, { 20,50 } };
-const TfArray<1, float> quant5_scale = { 1, { 0.0072257239371538162, } };
+const ALIGN(16) int32_t tensor_data5[20] = { 43, 6, 69, 95, -5, 66, 25, -15, -7, 49, -2, -36, 32, 37, 27, 72, 43, 1, 15, 74, };
+const TfArray<1, int> tensor_dimension5 = { 1, { 20 } };
+const TfArray<1, float> quant5_scale = { 1, { 0.0014028748264536262, } };
 const TfArray<1, int> quant5_zero = { 1, { 0 } };
 const TfLiteAffineQuantization quant5 = { (TfLiteFloatArray*)&quant5_scale, (TfLiteIntArray*)&quant5_zero, 0 };
-const ALIGN(16) int32_t tensor_data6[20] = { 2, -3, 0, 0, 2, 2, 0, -1, -2, 3, 0, -3, 2, 4, -4, -2, -1, 2, 3, -3, };
-const TfArray<1, int> tensor_dimension6 = { 1, { 20 } };
-const TfArray<1, float> quant6_scale = { 1, { 0.051554322242736816, } };
+const ALIGN(16) int8_t tensor_data6[20*39] = { 
+  -31, -11, 19, 55, 13, 15, 12, -65, -12, -81, -22, -12, -58, -32, 3, 52, 27, 57, -39, -47, 21, -37, 21, -11, -89, 37, 4, -16, -61, -40, 35, 44, 77, -20, -20, 61, 10, 78, -6, 
+  -76, 1, 49, 7, -21, 9, 26, 20, 29, 53, -38, 22, 10, -1, 16, -17, 50, 32, 65, -37, -16, -8, 46, -13, 27, 32, -2, 45, -16, -46, -11, 71, 77, 77, 67, -5, -27, 11, 28, 
+  62, -7, -61, -24, 49, -9, 60, 76, 81, 16, -29, 31, -54, 51, -42, 31, 41, 59, 24, 44, 77, -54, 42, -46, 15, -26, 72, 15, -26, -27, -10, 61, 49, -28, -1, -8, -44, 8, 27, 
+  20, 31, -56, 71, 34, 85, 26, 116, 99, 9, 86, 122, 14, 57, -10, -10, -23, -7, 51, 48, -27, 7, -43, -42, 32, 4, -69, 20, 65, 58, -31, 19, 16, -57, -26, -48, -27, -127, -18, 
+  78, 8, -1, -36, -42, -50, 43, -18, -31, 34, -27, 9, -44, -29, -49, 42, 30, 32, -13, -23, -13, 50, -18, -17, -26, -2, 32, 48, -53, -58, 18, -54, 39, -58, -34, -44, -36, -70, 8, 
+  -40, 43, 14, 13, -11, -57, 5, 44, -50, -9, -73, -47, -62, 57, 32, 0, -37, 39, -11, 5, -57, -60, -17, -42, 32, -18, 52, -2, -55, 85, 16, 24, 88, -6, 43, -33, 72, -18, 27, 
+  30, -25, -18, -33, -44, -72, -33, 33, -34, -6, -15, -16, -2, 41, 29, 0, -24, -37, 53, 40, -38, 3, -37, 31, -34, -9, 67, -45, 52, 13, 32, -66, -9, -16, -57, 28, -67, 16, -52, 
+  30, -60, 19, 47, 47, 3, 3, 10, -39, 36, -29, 41, -25, 31, -22, -81, -23, -32, -46, 57, 54, -38, -31, 0, -11, 63, 3, 25, -84, -2, 35, 69, 26, -38, -8, -39, 6, 33, 49, 
+  31, 53, 41, -12, -54, 33, -15, -46, -13, -19, -4, 31, 29, -29, 12, -23, 23, -33, 54, 5, 50, 37, 50, 20, 33, 34, 36, -4, 55, -58, 33, -69, -19, -10, -61, -27, -64, 44, -49, 
+  -107, 32, 0, 2, -39, 35, -46, -7, -46, -74, -49, -55, -61, -10, -28, 41, 9, 55, 50, -36, -17, 39, 7, 57, 58, 44, 29, 16, -11, 46, -10, 4, -16, 18, 67, 37, 70, 65, -6, 
+  69, 21, 23, -58, -39, -18, 43, 15, 1, 25, -43, 23, 15, -57, 38, -48, 0, 2, 26, 16, -51, 31, -56, 51, 54, -13, -45, -17, 39, 46, -49, -46, -48, -13, 63, 56, -17, -47, -51, 
+  -19, 23, 51, -29, -56, -66, 53, -34, 39, -20, 48, 53, 37, 47, -13, 28, -36, -32, 50, -1, -13, -10, 64, 6, 24, 26, 31, 32, 60, 14, -13, -22, -5, -30, 14, -15, -45, 15, 16, 
+  10, -68, -4, -7, 55, -44, -42, -55, -45, 34, -22, -54, 12, 43, -41, 57, 15, -17, 53, -23, 27, 28, -43, 32, 27, -24, -14, -38, 61, -15, -36, 1, -82, 42, 22, 24, 30, -57, -57, 
+  19, -62, -24, 2, 54, 53, -63, 47, -20, 3, 0, 4, 48, 6, -12, -27, -57, -9, -63, -37, -62, -64, -17, -35, -41, -72, -58, 50, 9, 1, 2, -74, -61, -75, -23, 0, 13, -20, 41, 
+  89, -77, -53, 26, 25, -29, -34, 32, 37, 31, 51, -53, -62, 95, -38, -52, -47, 19, 61, -21, 92, 80, -2, 59, -37, -45, 12, -50, 29, 0, 86, -21, -31, 62, 3, -15, -65, 29, 42, 
+  26, 81, -57, -38, -37, 44, 19, 38, 53, 49, 24, 53, -42, 6, 2, 47, 29, 45, 32, 20, -84, 26, -77, 3, -9, -9, -65, -31, 13, -27, -1, -27, -47, -3, -41, -24, -36, -92, 7, 
+  0, -17, -6, 2, 51, -12, -13, -52, 16, -68, -56, -31, 18, 8, -35, 39, -42, -28, 5, -48, -83, -63, -5, -52, -2, -19, -31, 56, -19, 42, 6, -14, 35, -62, -42, -58, 0, -77, -25, 
+  36, 32, 40, 53, 62, 7, -13, 85, 59, 24, -7, 98, -23, 43, -73, -20, -44, 15, 67, -37, 45, -30, -38, 60, -27, -51, -51, 11, 1, -40, -16, -61, -25, -6, -25, 34, 19, -64, 41, 
+  -89, -18, 37, -60, 28, 6, -33, -6, -5, -23, -53, -26, -54, -24, -25, -50, 14, -21, -25, 33, -25, 58, -30, 55, -23, -55, 28, -22, -70, -26, 56, -6, -17, -39, 80, 66, 36, -28, -48, 
+  -28, 43, 10, -20, -16, -64, -31, -62, -7, -9, -101, -1, 31, -17, -6, 11, 13, -46, -79, -24, -62, -93, 13, -85, -41, -10, -55, -16, 31, 58, -67, -38, 2, -84, -38, -19, -102, -27, -7, 
+};
+const TfArray<2, int> tensor_dimension6 = { 2, { 20,39 } };
+const TfArray<1, float> quant6_scale = { 1, { 0.0050277700647711754, } };
 const TfArray<1, int> quant6_zero = { 1, { 0 } };
 const TfLiteAffineQuantization quant6 = { (TfLiteFloatArray*)&quant6_scale, (TfLiteIntArray*)&quant6_zero, 0 };
-const ALIGN(16) int8_t tensor_data7[10*20] = { 
-  19, -52, -58, -4, -52, -10, -29, 17, 59, -5, 9, 46, -51, -15, 55, -1, -25, 23, -20, 10, 
-  -22, 23, -61, -27, -40, -47, 41, -1, -51, -41, 41, 14, 20, 50, -44, -20, 25, -44, 87, -56, 
-  -34, -32, -63, 11, 33, 58, 60, -20, -79, -55, 42, 22, 35, 43, -7, -81, -59, 16, 19, -30, 
-  23, -49, 7, 9, -50, -40, -50, -36, 39, 2, -48, -39, -10, -39, 22, -16, 33, 48, 23, -38, 
-  36, -54, 43, 10, 30, 18, -11, -17, -24, 33, -40, -30, 16, 61, -51, -57, 3, 27, 9, -19, 
-  -51, -53, 22, -30, 4, -127, 42, -28, -56, -4, 10, -33, -30, -39, -50, -3, 50, 4, -44, -42, 
-  -21, -18, -55, -35, 10, 57, -19, 24, 0, 58, 8, -60, -52, -52, 10, -50, -39, -48, 35, -48, 
-  54, -50, -9, 4, 44, -53, 9, 8, -15, -22, -38, 30, -22, -50, 59, -4, -12, -24, 21, 38, 
-  -50, -1, 47, -24, -11, -26, -2, 9, -36, 48, 27, -54, -25, -65, -2, -64, -60, 60, -10, 52, 
-  2, 30, 23, -45, -44, -46, 57, 6, -28, -27, -47, 15, 9, -8, 6, -11, 38, -2, -29, 29, 
-};
-const TfArray<2, int> tensor_dimension7 = { 2, { 10,20 } };
-const TfArray<1, float> quant7_scale = { 1, { 0.0073037790134549141, } };
-const TfArray<1, int> quant7_zero = { 1, { 0 } };
+const TfArray<2, int> tensor_dimension7 = { 2, { 1,20 } };
+const TfArray<1, float> quant7_scale = { 1, { 0.10258091986179352, } };
+const TfArray<1, int> quant7_zero = { 1, { -128 } };
 const TfLiteAffineQuantization quant7 = { (TfLiteFloatArray*)&quant7_scale, (TfLiteIntArray*)&quant7_zero, 0 };
-const ALIGN(16) int32_t tensor_data8[10] = { 0, 2, 2, 0, 6, 0, 3, -2, -4, -3, };
-const TfArray<1, int> tensor_dimension8 = { 1, { 10 } };
-const TfArray<1, float> quant8_scale = { 1, { 0.033830270171165466, } };
-const TfArray<1, int> quant8_zero = { 1, { 0 } };
+const TfArray<2, int> tensor_dimension8 = { 2, { 1,10 } };
+const TfArray<1, float> quant8_scale = { 1, { 0.18643051385879517, } };
+const TfArray<1, int> quant8_zero = { 1, { -128 } };
 const TfLiteAffineQuantization quant8 = { (TfLiteFloatArray*)&quant8_scale, (TfLiteIntArray*)&quant8_zero, 0 };
-const ALIGN(16) int8_t tensor_data9[4*10] = { 
-  -92, 43, 68, -34, 50, -62, 14, -111, -127, -47, 
-  -75, -12, 57, 17, 76, -59, 72, 3, -61, -21, 
-  85, -75, 64, 28, -78, -17, 95, 32, 35, 66, 
-  -77, 57, -20, -40, -66, 82, -24, 0, 68, 71, 
-};
-const TfArray<2, int> tensor_dimension9 = { 2, { 4,10 } };
-const TfArray<1, float> quant9_scale = { 1, { 0.0071185319684445858, } };
-const TfArray<1, int> quant9_zero = { 1, { 0 } };
+const TfArray<2, int> tensor_dimension9 = { 2, { 1,4 } };
+const TfArray<1, float> quant9_scale = { 1, { 0.13449952006340027, } };
+const TfArray<1, int> quant9_zero = { 1, { 27 } };
 const TfLiteAffineQuantization quant9 = { (TfLiteFloatArray*)&quant9_scale, (TfLiteIntArray*)&quant9_zero, 0 };
-const ALIGN(16) int32_t tensor_data10[4] = { 105, -107, 18, -79, };
-const TfArray<1, int> tensor_dimension10 = { 1, { 4 } };
-const TfArray<1, float> quant10_scale = { 1, { 0.011597982607781887, } };
-const TfArray<1, int> quant10_zero = { 1, { 0 } };
+const TfArray<2, int> tensor_dimension10 = { 2, { 1,4 } };
+const TfArray<1, float> quant10_scale = { 1, { 0.00390625, } };
+const TfArray<1, int> quant10_zero = { 1, { -128 } };
 const TfLiteAffineQuantization quant10 = { (TfLiteFloatArray*)&quant10_scale, (TfLiteIntArray*)&quant10_zero, 0 };
-const TfArray<2, int> tensor_dimension11 = { 2, { 1,100 } };
-const TfArray<1, float> quant11_scale = { 1, { 8.2927398681640625, } };
-const TfArray<1, int> quant11_zero = { 1, { -128 } };
-const TfLiteAffineQuantization quant11 = { (TfLiteFloatArray*)&quant11_scale, (TfLiteIntArray*)&quant11_zero, 0 };
-const TfArray<2, int> tensor_dimension12 = { 2, { 1,50 } };
-const TfArray<1, float> quant12_scale = { 1, { 7.134831428527832, } };
-const TfArray<1, int> quant12_zero = { 1, { -128 } };
-const TfLiteAffineQuantization quant12 = { (TfLiteFloatArray*)&quant12_scale, (TfLiteIntArray*)&quant12_zero, 0 };
-const TfArray<2, int> tensor_dimension13 = { 2, { 1,20 } };
-const TfArray<1, float> quant13_scale = { 1, { 4.6318860054016113, } };
-const TfArray<1, int> quant13_zero = { 1, { -128 } };
-const TfLiteAffineQuantization quant13 = { (TfLiteFloatArray*)&quant13_scale, (TfLiteIntArray*)&quant13_zero, 0 };
-const TfArray<2, int> tensor_dimension14 = { 2, { 1,10 } };
-const TfArray<1, float> quant14_scale = { 1, { 1.6292660236358643, } };
-const TfArray<1, int> quant14_zero = { 1, { -128 } };
-const TfLiteAffineQuantization quant14 = { (TfLiteFloatArray*)&quant14_scale, (TfLiteIntArray*)&quant14_zero, 0 };
-const TfArray<2, int> tensor_dimension15 = { 2, { 1,4 } };
-const TfArray<1, float> quant15_scale = { 1, { 1.8152707815170288, } };
-const TfArray<1, int> quant15_zero = { 1, { -26 } };
-const TfLiteAffineQuantization quant15 = { (TfLiteFloatArray*)&quant15_scale, (TfLiteIntArray*)&quant15_zero, 0 };
-const TfArray<2, int> tensor_dimension16 = { 2, { 1,4 } };
-const TfArray<1, float> quant16_scale = { 1, { 0.00390625, } };
-const TfArray<1, int> quant16_zero = { 1, { -128 } };
-const TfLiteAffineQuantization quant16 = { (TfLiteFloatArray*)&quant16_scale, (TfLiteIntArray*)&quant16_zero, 0 };
 const TfLiteFullyConnectedParams opdata0 = { kTfLiteActRelu, kTfLiteFullyConnectedWeightsFormatDefault, false, false };
-const TfArray<3, int> inputs0 = { 3, { 0,1,2 } };
-const TfArray<1, int> outputs0 = { 1, { 11 } };
+const TfArray<3, int> inputs0 = { 3, { 0,6,5 } };
+const TfArray<1, int> outputs0 = { 1, { 7 } };
 const TfLiteFullyConnectedParams opdata1 = { kTfLiteActRelu, kTfLiteFullyConnectedWeightsFormatDefault, false, false };
-const TfArray<3, int> inputs1 = { 3, { 11,3,4 } };
-const TfArray<1, int> outputs1 = { 1, { 12 } };
-const TfLiteFullyConnectedParams opdata2 = { kTfLiteActRelu, kTfLiteFullyConnectedWeightsFormatDefault, false, false };
-const TfArray<3, int> inputs2 = { 3, { 12,5,6 } };
-const TfArray<1, int> outputs2 = { 1, { 13 } };
-const TfLiteFullyConnectedParams opdata3 = { kTfLiteActRelu, kTfLiteFullyConnectedWeightsFormatDefault, false, false };
-const TfArray<3, int> inputs3 = { 3, { 13,7,8 } };
-const TfArray<1, int> outputs3 = { 1, { 14 } };
-const TfLiteFullyConnectedParams opdata4 = { kTfLiteActNone, kTfLiteFullyConnectedWeightsFormatDefault, false, false };
-const TfArray<3, int> inputs4 = { 3, { 14,9,10 } };
-const TfArray<1, int> outputs4 = { 1, { 15 } };
-const TfLiteSoftmaxParams opdata5 = { 1 };
-const TfArray<1, int> inputs5 = { 1, { 15 } };
-const TfArray<1, int> outputs5 = { 1, { 16 } };
-const TensorInfo_t tensorData[] = {
-  { kTfLiteArenaRw, kTfLiteInt8, tensor_arena + 112, (TfLiteIntArray*)&tensor_dimension0, 27, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant0))}, },
-  { kTfLiteMmapRo, kTfLiteInt8, (void*)tensor_data1, (TfLiteIntArray*)&tensor_dimension1, 2700, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant1))}, },
-  { kTfLiteMmapRo, kTfLiteInt32, (void*)tensor_data2, (TfLiteIntArray*)&tensor_dimension2, 400, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant2))}, },
-  { kTfLiteMmapRo, kTfLiteInt8, (void*)tensor_data3, (TfLiteIntArray*)&tensor_dimension3, 5000, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant3))}, },
-  { kTfLiteMmapRo, kTfLiteInt32, (void*)tensor_data4, (TfLiteIntArray*)&tensor_dimension4, 200, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant4))}, },
-  { kTfLiteMmapRo, kTfLiteInt8, (void*)tensor_data5, (TfLiteIntArray*)&tensor_dimension5, 1000, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant5))}, },
-  { kTfLiteMmapRo, kTfLiteInt32, (void*)tensor_data6, (TfLiteIntArray*)&tensor_dimension6, 80, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant6))}, },
-  { kTfLiteMmapRo, kTfLiteInt8, (void*)tensor_data7, (TfLiteIntArray*)&tensor_dimension7, 200, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant7))}, },
-  { kTfLiteMmapRo, kTfLiteInt32, (void*)tensor_data8, (TfLiteIntArray*)&tensor_dimension8, 40, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant8))}, },
-  { kTfLiteMmapRo, kTfLiteInt8, (void*)tensor_data9, (TfLiteIntArray*)&tensor_dimension9, 40, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant9))}, },
-  { kTfLiteMmapRo, kTfLiteInt32, (void*)tensor_data10, (TfLiteIntArray*)&tensor_dimension10, 16, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant10))}, },
-  { kTfLiteArenaRw, kTfLiteInt8, tensor_arena + 0, (TfLiteIntArray*)&tensor_dimension11, 100, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant11))}, },
-  { kTfLiteArenaRw, kTfLiteInt8, tensor_arena + 112, (TfLiteIntArray*)&tensor_dimension12, 50, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant12))}, },
-  { kTfLiteArenaRw, kTfLiteInt8, tensor_arena + 0, (TfLiteIntArray*)&tensor_dimension13, 20, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant13))}, },
-  { kTfLiteArenaRw, kTfLiteInt8, tensor_arena + 32, (TfLiteIntArray*)&tensor_dimension14, 10, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant14))}, },
-  { kTfLiteArenaRw, kTfLiteInt8, tensor_arena + 16, (TfLiteIntArray*)&tensor_dimension15, 4, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant15))}, },
-  { kTfLiteArenaRw, kTfLiteInt8, tensor_arena + 0, (TfLiteIntArray*)&tensor_dimension16, 4, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&quant16))}, },
-};const NodeInfo_t nodeData[] = {
-  { (TfLiteIntArray*)&inputs0, (TfLiteIntArray*)&outputs0, const_cast<void*>(static_cast<const void*>(&opdata0)), OP_FULLY_CONNECTED, },
-  { (TfLiteIntArray*)&inputs1, (TfLiteIntArray*)&outputs1, const_cast<void*>(static_cast<const void*>(&opdata1)), OP_FULLY_CONNECTED, },
-  { (TfLiteIntArray*)&inputs2, (TfLiteIntArray*)&outputs2, const_cast<void*>(static_cast<const void*>(&opdata2)), OP_FULLY_CONNECTED, },
-  { (TfLiteIntArray*)&inputs3, (TfLiteIntArray*)&outputs3, const_cast<void*>(static_cast<const void*>(&opdata3)), OP_FULLY_CONNECTED, },
-  { (TfLiteIntArray*)&inputs4, (TfLiteIntArray*)&outputs4, const_cast<void*>(static_cast<const void*>(&opdata4)), OP_FULLY_CONNECTED, },
-  { (TfLiteIntArray*)&inputs5, (TfLiteIntArray*)&outputs5, const_cast<void*>(static_cast<const void*>(&opdata5)), OP_SOFTMAX, },
+const TfArray<3, int> inputs1 = { 3, { 7,4,3 } };
+const TfArray<1, int> outputs1 = { 1, { 8 } };
+const TfLiteFullyConnectedParams opdata2 = { kTfLiteActNone, kTfLiteFullyConnectedWeightsFormatDefault, false, false };
+const TfArray<3, int> inputs2 = { 3, { 8,2,1 } };
+const TfArray<1, int> outputs2 = { 1, { 9 } };
+const TfLiteSoftmaxParams opdata3 = { 1 };
+const TfArray<1, int> inputs3 = { 1, { 9 } };
+const TfArray<1, int> outputs3 = { 1, { 10 } };
+};
+
+TensorInfo_t tensorData[] = {
+{ kTfLiteArenaRw, kTfLiteInt8, (int32_t*)(tensor_arena + 0), (TfLiteIntArray*)&g0::tensor_dimension0, 39, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&g0::quant0))}, },
+{ kTfLiteMmapRo, kTfLiteInt32, (int32_t*)g0::tensor_data1, (TfLiteIntArray*)&g0::tensor_dimension1, 16, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&g0::quant1))}, },
+{ kTfLiteMmapRo, kTfLiteInt8, (int32_t*)g0::tensor_data2, (TfLiteIntArray*)&g0::tensor_dimension2, 40, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&g0::quant2))}, },
+{ kTfLiteMmapRo, kTfLiteInt32, (int32_t*)g0::tensor_data3, (TfLiteIntArray*)&g0::tensor_dimension3, 40, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&g0::quant3))}, },
+{ kTfLiteMmapRo, kTfLiteInt8, (int32_t*)g0::tensor_data4, (TfLiteIntArray*)&g0::tensor_dimension4, 200, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&g0::quant4))}, },
+{ kTfLiteMmapRo, kTfLiteInt32, (int32_t*)g0::tensor_data5, (TfLiteIntArray*)&g0::tensor_dimension5, 80, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&g0::quant5))}, },
+{ kTfLiteMmapRo, kTfLiteInt8, (int32_t*)g0::tensor_data6, (TfLiteIntArray*)&g0::tensor_dimension6, 780, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&g0::quant6))}, },
+{ kTfLiteArenaRw, kTfLiteInt8, (int32_t*)(tensor_arena + 48), (TfLiteIntArray*)&g0::tensor_dimension7, 20, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&g0::quant7))}, },
+{ kTfLiteArenaRw, kTfLiteInt8, (int32_t*)(tensor_arena + 0), (TfLiteIntArray*)&g0::tensor_dimension8, 10, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&g0::quant8))}, },
+{ kTfLiteArenaRw, kTfLiteInt8, (int32_t*)(tensor_arena + 16), (TfLiteIntArray*)&g0::tensor_dimension9, 4, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&g0::quant9))}, },
+{ kTfLiteArenaRw, kTfLiteInt8, (int32_t*)(tensor_arena + 0), (TfLiteIntArray*)&g0::tensor_dimension10, 4, {kTfLiteAffineQuantization, const_cast<void*>(static_cast<const void*>(&g0::quant10))}, },
+};
+
+#ifndef TF_LITE_STATIC_MEMORY
+TfLiteNode tflNodes[4] = {
+{ (TfLiteIntArray*)&g0::inputs0, (TfLiteIntArray*)&g0::outputs0, (TfLiteIntArray*)&g0::inputs0, nullptr, nullptr, const_cast<void*>(static_cast<const void*>(&g0::opdata0)), nullptr, 0, },
+{ (TfLiteIntArray*)&g0::inputs1, (TfLiteIntArray*)&g0::outputs1, (TfLiteIntArray*)&g0::inputs1, nullptr, nullptr, const_cast<void*>(static_cast<const void*>(&g0::opdata1)), nullptr, 0, },
+{ (TfLiteIntArray*)&g0::inputs2, (TfLiteIntArray*)&g0::outputs2, (TfLiteIntArray*)&g0::inputs2, nullptr, nullptr, const_cast<void*>(static_cast<const void*>(&g0::opdata2)), nullptr, 0, },
+{ (TfLiteIntArray*)&g0::inputs3, (TfLiteIntArray*)&g0::outputs3, (TfLiteIntArray*)&g0::inputs3, nullptr, nullptr, const_cast<void*>(static_cast<const void*>(&g0::opdata3)), nullptr, 0, },
+};
+#else
+TfLiteNode tflNodes[4] = {
+{ (TfLiteIntArray*)&g0::inputs0, (TfLiteIntArray*)&g0::outputs0, (TfLiteIntArray*)&g0::inputs0, nullptr, const_cast<void*>(static_cast<const void*>(&g0::opdata0)), nullptr, 0, },
+{ (TfLiteIntArray*)&g0::inputs1, (TfLiteIntArray*)&g0::outputs1, (TfLiteIntArray*)&g0::inputs1, nullptr, const_cast<void*>(static_cast<const void*>(&g0::opdata1)), nullptr, 0, },
+{ (TfLiteIntArray*)&g0::inputs2, (TfLiteIntArray*)&g0::outputs2, (TfLiteIntArray*)&g0::inputs2, nullptr, const_cast<void*>(static_cast<const void*>(&g0::opdata2)), nullptr, 0, },
+{ (TfLiteIntArray*)&g0::inputs3, (TfLiteIntArray*)&g0::outputs3, (TfLiteIntArray*)&g0::inputs3, nullptr, const_cast<void*>(static_cast<const void*>(&g0::opdata3)), nullptr, 0, },
+};
+#endif
+
+used_operators_e used_ops[] =
+{OP_FULLY_CONNECTED, OP_FULLY_CONNECTED, OP_FULLY_CONNECTED, OP_SOFTMAX, };
+
+
+// Indices into tflTensors and tflNodes for subgraphs
+const size_t tflTensors_subgraph_index[] = {0, 11, };
+const size_t tflNodes_subgraph_index[] = {0, 4, };
+
+// Input/output tensors
+static const int in_tensor_indices[] = {
+  0, 
+};
+
+static const int out_tensor_indices[] = {
+  10, 
 };
 
+
+size_t current_subgraph_index = 0;
+
 static void init_tflite_tensor(size_t i, TfLiteTensor *tensor) {
   tensor->type = tensorData[i].type;
-  tensor->is_variable = 0;
+  tensor->is_variable = false;
 
 #if defined(EI_CLASSIFIER_ALLOCATION_HEAP)
   tensor->allocation_type = tensorData[i].allocation_type;
@@ -465,7 +306,8 @@ static void init_tflite_tensor(size_t i, TfLiteTensor *tensor) {
 
 }
 
-static void init_tflite_eval_tensor(size_t i, TfLiteEvalTensor *tensor) {
+static void init_tflite_eval_tensor(int i, TfLiteEvalTensor *tensor) {
+
   tensor->type = tensorData[i].type;
 
   tensor->dims = tensorData[i].dims;
@@ -487,10 +329,12 @@ static void init_tflite_eval_tensor(size_t i, TfLiteEvalTensor *tensor) {
 
 static void* overflow_buffers[EI_MAX_OVERFLOW_BUFFER_COUNT];
 static size_t overflow_buffers_ix = 0;
-static void * AllocatePersistentBuffer(struct TfLiteContext* ctx,
+static void * AllocatePersistentBufferImpl(struct TfLiteContext* ctx,
                                        size_t bytes) {
   void *ptr;
-  if (current_location - bytes < tensor_boundary) {
+  uint32_t align_bytes = (bytes % 16) ? 16 - (bytes % 16) : 0;
+
+  if (current_location - (bytes + align_bytes) < tensor_boundary) {
     if (overflow_buffers_ix > EI_MAX_OVERFLOW_BUFFER_COUNT - 1) {
       ei_printf("ERR: Failed to allocate persistent buffer of size %d, does not fit in tensor arena and reached EI_MAX_OVERFLOW_BUFFER_COUNT\n",
         (int)bytes);
@@ -510,19 +354,25 @@ static void * AllocatePersistentBuffer(struct TfLiteContext* ctx,
 
   current_location -= bytes;
 
+  // align to the left aligned boundary of 16 bytes
+  current_location -= 15; // for alignment
+  current_location += 16 - ((uintptr_t)(current_location) & 15);
+
   ptr = current_location;
   memset(ptr, 0, bytes);
 
   return ptr;
 }
+
 typedef struct {
   size_t bytes;
   void *ptr;
 } scratch_buffer_t;
+
 static scratch_buffer_t scratch_buffers[EI_MAX_SCRATCH_BUFFER_COUNT];
 static size_t scratch_buffers_ix = 0;
 
-static TfLiteStatus RequestScratchBufferInArena(struct TfLiteContext* ctx, size_t bytes,
+static TfLiteStatus RequestScratchBufferInArenaImpl(struct TfLiteContext* ctx, size_t bytes,
                                                 int* buffer_idx) {
   if (scratch_buffers_ix > EI_MAX_SCRATCH_BUFFER_COUNT - 1) {
     ei_printf("ERR: Failed to allocate scratch buffer of size %d, reached EI_MAX_SCRATCH_BUFFER_COUNT\n",
@@ -533,7 +383,7 @@ static TfLiteStatus RequestScratchBufferInArena(struct TfLiteContext* ctx, size_
   scratch_buffer_t b;
   b.bytes = bytes;
 
-  b.ptr = AllocatePersistentBuffer(ctx, b.bytes);
+  b.ptr = AllocatePersistentBufferImpl(ctx, b.bytes);
   if (!b.ptr) {
     ei_printf("ERR: Failed to allocate scratch buffer of size %d\n",
       (int)bytes);
@@ -548,7 +398,7 @@ static TfLiteStatus RequestScratchBufferInArena(struct TfLiteContext* ctx, size_
   return kTfLiteOk;
 }
 
-static void* GetScratchBuffer(struct TfLiteContext* ctx, int buffer_idx) {
+static void* GetScratchBufferImpl(struct TfLiteContext* ctx, int buffer_idx) {
   if (buffer_idx > (int)scratch_buffers_ix) {
     return NULL;
   }
@@ -566,9 +416,11 @@ static void ResetTensors() {
   }
 }
 
-static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
+static TfLiteTensor* GetTensorImpl(const struct TfLiteContext* context,
                                int tensor_idx) {
 
+  tensor_idx = tflTensors_subgraph_index[current_subgraph_index] + tensor_idx;
+
   for (size_t ix = 0; ix < MAX_TFL_TENSOR_COUNT; ix++) {
     // already used? OK!
     if (tflTensors[ix].index == tensor_idx) {
@@ -587,9 +439,11 @@ static TfLiteTensor* GetTensor(const struct TfLiteContext* context,
   return nullptr;
 }
 
-static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
+static TfLiteEvalTensor* GetEvalTensorImpl(const struct TfLiteContext* context,
                                        int tensor_idx) {
 
+  tensor_idx = tflTensors_subgraph_index[current_subgraph_index] + tensor_idx;
+
   for (size_t ix = 0; ix < MAX_TFL_EVAL_COUNT; ix++) {
     // already used? OK!
     if (tflEvalTensors[ix].index == tensor_idx) {
@@ -608,6 +462,43 @@ static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context,
   return nullptr;
 }
 
+class EonMicroContext : public MicroContext {
+ public:
+ 
+  EonMicroContext(): MicroContext(nullptr, nullptr, nullptr) { }
+
+  void* AllocatePersistentBuffer(size_t bytes) {
+    return AllocatePersistentBufferImpl(nullptr, bytes);
+  }
+
+  TfLiteStatus RequestScratchBufferInArena(size_t bytes,
+                                           int* buffer_index) {
+  return RequestScratchBufferInArenaImpl(nullptr, bytes, buffer_index);
+  }
+
+  void* GetScratchBuffer(int buffer_index) {
+    return GetScratchBufferImpl(nullptr, buffer_index);
+  }
+ 
+  TfLiteTensor* AllocateTempTfLiteTensor(int tensor_index) {
+    return GetTensorImpl(nullptr, tensor_index);
+  }
+
+  void DeallocateTempTfLiteTensor(TfLiteTensor* tensor) {
+    return;
+  }
+
+  bool IsAllTempTfLiteTensorDeallocated() {
+    return true;
+  }
+
+  TfLiteEvalTensor* GetEvalTensor(int tensor_index) {
+    return GetEvalTensorImpl(nullptr, tensor_index);
+  }
+
+};
+
+
 } // namespace
 
 TfLiteStatus trained_model_init( void*(*alloc_fnc)(size_t,size_t) ) {
@@ -622,13 +513,21 @@ TfLiteStatus trained_model_init( void*(*alloc_fnc)(size_t,size_t) ) {
 #endif
   tensor_boundary = tensor_arena;
   current_location = tensor_arena + kTensorArenaSize;
-  ctx.AllocatePersistentBuffer = &AllocatePersistentBuffer;
-  ctx.RequestScratchBufferInArena = &RequestScratchBufferInArena;
-  ctx.GetScratchBuffer = &GetScratchBuffer;
-  ctx.GetTensor = &GetTensor;
-  ctx.GetEvalTensor = &GetEvalTensor;
-  ctx.tensors_size = 17;
-  for (size_t i = 0; i < 17; ++i) {
+
+  EonMicroContext micro_context_;
+  
+  // Set microcontext as the context ptr
+  ctx.impl_ = static_cast<void*>(&micro_context_);
+  // Setup tflitecontext functions
+  ctx.AllocatePersistentBuffer = &AllocatePersistentBufferImpl;
+  ctx.RequestScratchBufferInArena = &RequestScratchBufferInArenaImpl;
+  ctx.GetScratchBuffer = &GetScratchBufferImpl;
+  ctx.GetTensor = &GetTensorImpl;
+  ctx.GetEvalTensor = &GetEvalTensorImpl;
+  ctx.ReportError = &MicroContextReportOpError;
+
+  ctx.tensors_size = 11;
+  for (size_t i = 0; i < 11; ++i) {
     TfLiteTensor tensor;
     init_tflite_tensor(i, &tensor);
     if (tensor.allocation_type == kTfLiteArenaRw) {
@@ -638,57 +537,57 @@ TfLiteStatus trained_model_init( void*(*alloc_fnc)(size_t,size_t) ) {
       }
     }
   }
+
   if (tensor_boundary > current_location /* end of arena size */) {
     ei_printf("ERR: tensor arena is too small, does not fit model - even without scratch buffers\n");
     return kTfLiteError;
   }
+
   registrations[OP_FULLY_CONNECTED] = Register_FULLY_CONNECTED();
   registrations[OP_SOFTMAX] = Register_SOFTMAX();
 
-  for (size_t i = 0; i < 6; ++i) {
-    tflNodes[i].inputs = nodeData[i].inputs;
-    tflNodes[i].outputs = nodeData[i].outputs;
-    tflNodes[i].builtin_data = nodeData[i].builtin_data;
-tflNodes[i].custom_initial_data = nullptr;
-      tflNodes[i].custom_initial_data_size = 0;
-if (registrations[nodeData[i].used_op_index].init) {
-      tflNodes[i].user_data = registrations[nodeData[i].used_op_index].init(&ctx, (const char*)tflNodes[i].builtin_data, 0);
+  for (size_t g = 0; g < 1; ++g) {
+    current_subgraph_index = g;
+    for(size_t i = tflNodes_subgraph_index[g]; i < tflNodes_subgraph_index[g+1]; ++i) {
+      if (registrations[used_ops[i]].init) {
+        tflNodes[i].user_data = registrations[used_ops[i]].init(&ctx, (const char*)tflNodes[i].builtin_data, 0);
+      }
     }
   }
-  for (size_t i = 0; i < 6; ++i) {
-    if (registrations[nodeData[i].used_op_index].prepare) {
-      ResetTensors();
-
-      TfLiteStatus status = registrations[nodeData[i].used_op_index].prepare(&ctx, &tflNodes[i]);
-      if (status != kTfLiteOk) {
-        return status;
+  current_subgraph_index = 0;
+
+  for(size_t g = 0; g < 1; ++g) {
+    current_subgraph_index = g;
+    for(size_t i = tflNodes_subgraph_index[g]; i < tflNodes_subgraph_index[g+1]; ++i) {
+      if (registrations[used_ops[i]].prepare) {
+        ResetTensors();
+        TfLiteStatus status = registrations[used_ops[i]].prepare(&ctx, &tflNodes[i]);
+        if (status != kTfLiteOk) {
+          return status;
+        }
       }
     }
   }
+  current_subgraph_index = 0;
+
   return kTfLiteOk;
 }
 
-static const int inTensorIndices[] = {
-  0, 
-};
 TfLiteStatus trained_model_input(int index, TfLiteTensor *tensor) {
-  init_tflite_tensor(inTensorIndices[index], tensor);
+  init_tflite_tensor(in_tensor_indices[index], tensor);
   return kTfLiteOk;
 }
 
-static const int outTensorIndices[] = {
-  16, 
-};
 TfLiteStatus trained_model_output(int index, TfLiteTensor *tensor) {
-  init_tflite_tensor(outTensorIndices[index], tensor);
+  init_tflite_tensor(out_tensor_indices[index], tensor);
   return kTfLiteOk;
 }
 
 TfLiteStatus trained_model_invoke() {
-  for (size_t i = 0; i < 6; ++i) {
+  for (size_t i = 0; i < 4; ++i) {
     ResetTensors();
 
-    TfLiteStatus status = registrations[nodeData[i].used_op_index].invoke(&ctx, &tflNodes[i]);
+    TfLiteStatus status = registrations[used_ops[i]].invoke(&ctx, &tflNodes[i]);
 
 #if EI_CLASSIFIER_PRINT_STATE
     ei_printf("layer %lu\n", i);
diff --git a/tflite-model/trained_model_compiled.h b/tflite-model/trained_model_compiled.h
index 9e36081..8fd0132 100644
--- a/tflite-model/trained_model_compiled.h
+++ b/tflite-model/trained_model_compiled.h
@@ -18,7 +18,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-// Generated on: 12.04.2023 11:45:30
+// Generated on: 13.06.2024 14:08:46
 
 #ifndef trained_model_GEN_H
 #define trained_model_GEN_H
diff --git a/tflite-model/trained_model_ops_define.h b/tflite-model/trained_model_ops_define.h
index ad39a2b..8490353 100644
--- a/tflite-model/trained_model_ops_define.h
+++ b/tflite-model/trained_model_ops_define.h
@@ -25,46 +25,78 @@
 #define EI_TFLITE_DISABLE_SOFTMAX_IN_U8     1
 #define EI_TFLITE_DISABLE_SOFTMAX_IN_I16    1
 #define EI_TFLITE_DISABLE_SOFTMAX_IN_F32    1
+#define EI_TFLITE_DISABLE_SOFTMAX_IN_BOOL   1
 #define EI_TFLITE_DISABLE_SOFTMAX_OUT_U8    1
 #define EI_TFLITE_DISABLE_SOFTMAX_OUT_I16   1
 #define EI_TFLITE_DISABLE_SOFTMAX_OUT_F32   1
+#define EI_TFLITE_DISABLE_SOFTMAX_OUT_BOOL  1
 #define EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_U8     1
 #define EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_I16    1
 #define EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_F32    1
+#define EI_TFLITE_DISABLE_FULLY_CONNECTED_IN_BOOL   1
 #define EI_TFLITE_DISABLE_FULLY_CONNECTED_OUT_U8    1
 #define EI_TFLITE_DISABLE_FULLY_CONNECTED_OUT_I16   1
 #define EI_TFLITE_DISABLE_FULLY_CONNECTED_OUT_F32   1
+#define EI_TFLITE_DISABLE_FULLY_CONNECTED_OUT_BOOL  1
 #define EI_TFLITE_DISABLE_CONV_2D_IN_U8     1
 #define EI_TFLITE_DISABLE_CONV_2D_IN_I8     1
 #define EI_TFLITE_DISABLE_CONV_2D_IN_I16    1
 #define EI_TFLITE_DISABLE_CONV_2D_IN_F32    1
+#define EI_TFLITE_DISABLE_CONV_2D_IN_BOOL   1
 #define EI_TFLITE_DISABLE_CONV_2D_OUT_U8    1
 #define EI_TFLITE_DISABLE_CONV_2D_OUT_I8    1
 #define EI_TFLITE_DISABLE_CONV_2D_OUT_I16   1
 #define EI_TFLITE_DISABLE_CONV_2D_OUT_F32   1
+#define EI_TFLITE_DISABLE_CONV_2D_OUT_BOOL  1
 #define EI_TFLITE_DISABLE_MAX_POOL_2D_IN_U8     1
 #define EI_TFLITE_DISABLE_MAX_POOL_2D_IN_I8     1
 #define EI_TFLITE_DISABLE_MAX_POOL_2D_IN_I16    1
 #define EI_TFLITE_DISABLE_MAX_POOL_2D_IN_F32    1
+#define EI_TFLITE_DISABLE_MAX_POOL_2D_IN_BOOL   1
 #define EI_TFLITE_DISABLE_MAX_POOL_2D_OUT_U8    1
 #define EI_TFLITE_DISABLE_MAX_POOL_2D_OUT_I8    1
 #define EI_TFLITE_DISABLE_MAX_POOL_2D_OUT_I16   1
 #define EI_TFLITE_DISABLE_MAX_POOL_2D_OUT_F32   1
+#define EI_TFLITE_DISABLE_MAX_POOL_2D_OUT_BOOL  1
 #define EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_U8     1
 #define EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_I8     1
 #define EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_I16    1
 #define EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_F32    1
+#define EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_IN_BOOL   1
 #define EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_OUT_U8    1
 #define EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_OUT_I8    1
 #define EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_OUT_I16   1
 #define EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_OUT_F32   1
+#define EI_TFLITE_DISABLE_DEPTHWISE_CONV_2D_OUT_BOOL  1
 #define EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_U8     1
 #define EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_I8     1
 #define EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_I16    1
 #define EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_F32    1
+#define EI_TFLITE_DISABLE_AVERAGE_POOL_2D_IN_BOOL   1
 #define EI_TFLITE_DISABLE_AVERAGE_POOL_2D_OUT_U8    1
 #define EI_TFLITE_DISABLE_AVERAGE_POOL_2D_OUT_I8    1
 #define EI_TFLITE_DISABLE_AVERAGE_POOL_2D_OUT_I16   1
 #define EI_TFLITE_DISABLE_AVERAGE_POOL_2D_OUT_F32   1
+#define EI_TFLITE_DISABLE_AVERAGE_POOL_2D_OUT_BOOL  1
+#define EI_TFLITE_DISABLE_STRIDED_SLICE_IN_U8     1
+#define EI_TFLITE_DISABLE_STRIDED_SLICE_IN_I8     1
+#define EI_TFLITE_DISABLE_STRIDED_SLICE_IN_I16    1
+#define EI_TFLITE_DISABLE_STRIDED_SLICE_IN_F32    1
+#define EI_TFLITE_DISABLE_STRIDED_SLICE_IN_BOOL   1
+#define EI_TFLITE_DISABLE_STRIDED_SLICE_OUT_U8    1
+#define EI_TFLITE_DISABLE_STRIDED_SLICE_OUT_I8    1
+#define EI_TFLITE_DISABLE_STRIDED_SLICE_OUT_I16   1
+#define EI_TFLITE_DISABLE_STRIDED_SLICE_OUT_F32   1
+#define EI_TFLITE_DISABLE_STRIDED_SLICE_OUT_BOOL  1
+#define EI_TFLITE_DISABLE_TreeEnsembleClassifier_IN_U8     1
+#define EI_TFLITE_DISABLE_TreeEnsembleClassifier_IN_I8     1
+#define EI_TFLITE_DISABLE_TreeEnsembleClassifier_IN_I16    1
+#define EI_TFLITE_DISABLE_TreeEnsembleClassifier_IN_F32    1
+#define EI_TFLITE_DISABLE_TreeEnsembleClassifier_IN_BOOL   1
+#define EI_TFLITE_DISABLE_TreeEnsembleClassifier_OUT_U8    1
+#define EI_TFLITE_DISABLE_TreeEnsembleClassifier_OUT_I8    1
+#define EI_TFLITE_DISABLE_TreeEnsembleClassifier_OUT_I16   1
+#define EI_TFLITE_DISABLE_TreeEnsembleClassifier_OUT_F32   1
+#define EI_TFLITE_DISABLE_TreeEnsembleClassifier_OUT_BOOL  1
 
 #endif // EI_TFLITE_MODEL_OPS_DEFINES_H